3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmnU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "invlist_inline.h"
43 #define new_constant(a,b,c,d,e,f,g, h) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
46 #define pl_yylval (PL_parser->yylval)
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_dojoin (PL_parser->lex_dojoin)
56 #define PL_lex_formbrack (PL_parser->lex_formbrack)
57 #define PL_lex_inpat (PL_parser->lex_inpat)
58 #define PL_lex_inwhat (PL_parser->lex_inwhat)
59 #define PL_lex_op (PL_parser->lex_op)
60 #define PL_lex_repl (PL_parser->lex_repl)
61 #define PL_lex_starts (PL_parser->lex_starts)
62 #define PL_lex_stuff (PL_parser->lex_stuff)
63 #define PL_multi_start (PL_parser->multi_start)
64 #define PL_multi_open (PL_parser->multi_open)
65 #define PL_multi_close (PL_parser->multi_close)
66 #define PL_preambled (PL_parser->preambled)
67 #define PL_linestr (PL_parser->linestr)
68 #define PL_expect (PL_parser->expect)
69 #define PL_copline (PL_parser->copline)
70 #define PL_bufptr (PL_parser->bufptr)
71 #define PL_oldbufptr (PL_parser->oldbufptr)
72 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
73 #define PL_linestart (PL_parser->linestart)
74 #define PL_bufend (PL_parser->bufend)
75 #define PL_last_uni (PL_parser->last_uni)
76 #define PL_last_lop (PL_parser->last_lop)
77 #define PL_last_lop_op (PL_parser->last_lop_op)
78 #define PL_lex_state (PL_parser->lex_state)
79 #define PL_rsfp (PL_parser->rsfp)
80 #define PL_rsfp_filters (PL_parser->rsfp_filters)
81 #define PL_in_my (PL_parser->in_my)
82 #define PL_in_my_stash (PL_parser->in_my_stash)
83 #define PL_tokenbuf (PL_parser->tokenbuf)
84 #define PL_multi_end (PL_parser->multi_end)
85 #define PL_error_count (PL_parser->error_count)
87 # define PL_nexttoke (PL_parser->nexttoke)
88 # define PL_nexttype (PL_parser->nexttype)
89 # define PL_nextval (PL_parser->nextval)
92 #define SvEVALED(sv) \
93 (SvTYPE(sv) >= SVt_PVNV \
94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
96 static const char* const ident_too_long = "Identifier too long";
97 static const char* const ident_var_zero_multi_digit = "Numeric variables with more than one digit may not start with '0'";
99 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
101 #define XENUMMASK 0x3f
102 #define XFAKEEOF 0x40
103 #define XFAKEBRACK 0x80
105 #ifdef USE_UTF8_SCRIPTS
106 # define UTF cBOOL(!IN_BYTES)
108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
114 /* In variables named $^X, these are the legal values for X.
115 * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
118 #define SPACE_OR_TAB(c) isBLANK_A(c)
120 #define HEXFP_PEEK(s) \
122 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
123 isALPHA_FOLD_EQ(s[0], 'p'))
125 /* LEX_* are values for PL_lex_state, the state of the lexer.
126 * They are arranged oddly so that the guard on the switch statement
127 * can get by with a single comparison (if the compiler is smart enough).
129 * These values refer to the various states within a sublex parse,
130 * i.e. within a double quotish string
133 /* #define LEX_NOTPARSING 11 is done in perl.h. */
135 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
136 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
137 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
138 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
139 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
141 /* at end of code, eg "$x" followed by: */
142 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
143 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
145 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
146 string or after \E, $foo, etc */
147 #define LEX_INTERPCONST 2 /* NOT USED */
148 #define LEX_FORMLINE 1 /* expecting a format line */
150 /* returned to yyl_try() to request it to retry the parse loop, expected to only
151 be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
154 yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
155 other token values are 258 or higher (see perly.h), so -1 should be
158 #define YYL_RETRY (-1)
161 static const char* const lex_state_names[] = {
176 #include "keywords.h"
178 /* CLINE is a macro that ensures PL_copline has a sane value */
180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
183 * Convenience functions to return different tokens and prime the
184 * lexer for the next token. They all take an argument.
186 * TOKEN : generic token (used for '(', DOLSHARP, etc)
187 * OPERATOR : generic operator
188 * AOPERATOR : assignment operator
189 * PREBLOCK : beginning the block after an if, while, foreach, ...
190 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
191 * PREREF : *EXPR where EXPR is not a simple identifier
192 * TERM : expression term
193 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
194 * LOOPX : loop exiting command (goto, last, dump, etc)
195 * FTST : file test operator
196 * FUN0 : zero-argument function
197 * FUN0OP : zero-argument function, with its op created in this file
198 * FUN1 : not used, except for not, which isn't a UNIOP
199 * BOop : bitwise or or xor
201 * BCop : bitwise complement
202 * SHop : shift operator
203 * PWop : power operator
204 * PMop : pattern-matching operator
205 * Aop : addition-level operator
206 * AopNOASSIGN : addition-level operator that is never part of .=
207 * Mop : multiplication-level operator
208 * ChEop : chaining equality-testing operator
209 * NCEop : non-chaining comparison operator at equality precedence
210 * ChRop : chaining relational operator <= != gt
211 * NCRop : non-chaining relational operator isa
213 * Also see LOP and lop() below.
216 #ifdef DEBUGGING /* Serve -DT. */
217 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
219 # define REPORT(retval) (retval)
222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
232 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
234 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
235 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
236 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
237 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
238 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
239 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
242 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
243 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
244 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
245 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
247 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
253 /* This bit of chicanery makes a unary function followed by
254 * a parenthesis into a function with one argument, highest precedence.
255 * The UNIDOR macro is for unary functions that can be followed by the //
256 * operator (such as C<shift // 0>).
258 #define UNI3(f,x,have_x) { \
259 pl_yylval.ival = f; \
260 if (have_x) PL_expect = x; \
262 PL_last_uni = PL_oldbufptr; \
263 PL_last_lop_op = (f) < 0 ? -(f) : (f); \
265 return REPORT( (int)FUNC1 ); \
267 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
269 #define UNI(f) UNI3(f,XTERM,1)
270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
271 #define UNIPROTO(f,optional) { \
272 if (optional) PL_last_uni = PL_oldbufptr; \
276 #define UNIBRACK(f) UNI3(f,0,0)
278 /* grandfather return to old style */
281 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
282 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
283 pl_yylval.ival = (f); \
289 #define COPLINE_INC_WITH_HERELINES \
291 CopLINE_inc(PL_curcop); \
292 if (PL_parser->herelines) \
293 CopLINE(PL_curcop) += PL_parser->herelines, \
294 PL_parser->herelines = 0; \
296 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
297 * is no sublex_push to follow. */
298 #define COPLINE_SET_FROM_MULTI_END \
300 CopLINE_set(PL_curcop, PL_multi_end); \
301 if (PL_multi_end != PL_multi_start) \
302 PL_parser->herelines = 0; \
306 /* A file-local structure for passing around information about subroutines and
307 * related definable words */
317 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
321 /* how to interpret the pl_yylval associated with the token */
325 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
330 static struct debug_tokens {
332 enum token_type type;
334 } const debug_tokens[] =
336 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
337 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
338 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
339 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
340 { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" },
341 { ARROW, TOKENTYPE_NONE, "ARROW" },
342 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
343 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
344 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
345 { CHEQOP, TOKENTYPE_OPNUM, "CHEQOP" },
346 { CHRELOP, TOKENTYPE_OPNUM, "CHRELOP" },
347 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
348 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
349 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
350 { DO, TOKENTYPE_NONE, "DO" },
351 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
352 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
353 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
354 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
355 { ELSE, TOKENTYPE_NONE, "ELSE" },
356 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
357 { FOR, TOKENTYPE_IVAL, "FOR" },
358 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
359 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
360 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
361 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
362 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
363 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
364 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
365 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
366 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
367 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
368 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
369 { IF, TOKENTYPE_IVAL, "IF" },
370 { LABEL, TOKENTYPE_OPVAL, "LABEL" },
371 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
372 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
373 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
374 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
375 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
376 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
377 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
378 { MY, TOKENTYPE_IVAL, "MY" },
379 { NCEQOP, TOKENTYPE_OPNUM, "NCEQOP" },
380 { NCRELOP, TOKENTYPE_OPNUM, "NCRELOP" },
381 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
382 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
383 { OROP, TOKENTYPE_IVAL, "OROP" },
384 { OROR, TOKENTYPE_NONE, "OROR" },
385 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
386 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
387 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
388 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
389 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
390 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
391 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
392 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
393 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
394 { PREINC, TOKENTYPE_NONE, "PREINC" },
395 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
396 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
397 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
398 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
399 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
400 { SIGSUB, TOKENTYPE_NONE, "SIGSUB" },
401 { SUB, TOKENTYPE_NONE, "SUB" },
402 { SUBLEXEND, TOKENTYPE_NONE, "SUBLEXEND" },
403 { SUBLEXSTART, TOKENTYPE_NONE, "SUBLEXSTART" },
404 { THING, TOKENTYPE_OPVAL, "THING" },
405 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
406 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
407 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
408 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
409 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
410 { USE, TOKENTYPE_IVAL, "USE" },
411 { WHEN, TOKENTYPE_IVAL, "WHEN" },
412 { WHILE, TOKENTYPE_IVAL, "WHILE" },
413 { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" },
414 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
415 { 0, TOKENTYPE_NONE, NULL }
418 /* dump the returned token in rv, plus any optional arg in pl_yylval */
421 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
423 PERL_ARGS_ASSERT_TOKEREPORT;
426 const char *name = NULL;
427 enum token_type type = TOKENTYPE_NONE;
428 const struct debug_tokens *p;
429 SV* const report = newSVpvs("<== ");
431 for (p = debug_tokens; p->token; p++) {
432 if (p->token == (int)rv) {
439 Perl_sv_catpv(aTHX_ report, name);
440 else if (isGRAPH(rv))
442 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
444 sv_catpvs(report, " (pending identifier)");
447 sv_catpvs(report, "EOF");
449 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
454 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
456 case TOKENTYPE_OPNUM:
457 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
458 PL_op_name[lvalp->ival]);
461 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
463 case TOKENTYPE_OPVAL:
465 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
466 PL_op_name[lvalp->opval->op_type]);
467 if (lvalp->opval->op_type == OP_CONST) {
468 Perl_sv_catpvf(aTHX_ report, " %s",
469 SvPEEK(cSVOPx_sv(lvalp->opval)));
474 sv_catpvs(report, "(opval=null)");
477 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
483 /* print the buffer with suitable escapes */
486 S_printbuf(pTHX_ const char *const fmt, const char *const s)
488 SV* const tmp = newSVpvs("");
490 PERL_ARGS_ASSERT_PRINTBUF;
492 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
493 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
494 GCC_DIAG_RESTORE_STMT;
503 * This subroutine looks for an '=' next to the operator that has just been
504 * parsed and turns it into an ASSIGNOP if it finds one.
508 S_ao(pTHX_ int toketype)
510 if (*PL_bufptr == '=') {
512 if (toketype == ANDAND)
513 pl_yylval.ival = OP_ANDASSIGN;
514 else if (toketype == OROR)
515 pl_yylval.ival = OP_ORASSIGN;
516 else if (toketype == DORDOR)
517 pl_yylval.ival = OP_DORASSIGN;
520 return REPORT(toketype);
525 * When Perl expects an operator and finds something else, no_op
526 * prints the warning. It always prints "<something> found where
527 * operator expected. It prints "Missing semicolon on previous line?"
528 * if the surprise occurs at the start of the line. "do you need to
529 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
530 * where the compiler doesn't know if foo is a method call or a function.
531 * It prints "Missing operator before end of line" if there's nothing
532 * after the missing operator, or "... before <...>" if there is something
533 * after the missing operator.
535 * PL_bufptr is expected to point to the start of the thing that was found,
536 * and s after the next token or partial token.
540 S_no_op(pTHX_ const char *const what, char *s)
542 char * const oldbp = PL_bufptr;
543 const bool is_first = (PL_oldbufptr == PL_linestart);
545 PERL_ARGS_ASSERT_NO_OP;
551 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
552 if (ckWARN_d(WARN_SYNTAX)) {
554 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
555 "\t(Missing semicolon on previous line?)\n");
556 else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
561 for (t = PL_oldoldbufptr;
562 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
563 t += UTF ? UTF8SKIP(t) : 1)
567 if (t < PL_bufptr && isSPACE(*t))
568 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
569 "\t(Do you need to predeclare %" UTF8f "?)\n",
570 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
574 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
575 "\t(Missing operator before %" UTF8f "?)\n",
576 UTF8fARG(UTF, s - oldbp, oldbp));
584 * Complain about missing quote/regexp/heredoc terminator.
585 * If it's called with NULL then it cauterizes the line buffer.
586 * If we're in a delimited string and the delimiter is a control
587 * character, it's reformatted into a two-char sequence like ^C.
592 S_missingterm(pTHX_ char *s, STRLEN len)
594 char tmpbuf[UTF8_MAXBYTES + 1];
599 char * const nl = (char *) my_memrchr(s, '\n', len);
606 else if (PL_multi_close < 32) {
608 tmpbuf[1] = (char)toCTRL(PL_multi_close);
614 if (LIKELY(PL_multi_close < 256)) {
615 *tmpbuf = (char)PL_multi_close;
620 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
627 q = memchr(s, '"', len) ? '\'' : '"';
628 sv = sv_2mortal(newSVpvn(s, len));
631 Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
632 " anywhere before EOF", q, SVfARG(sv), q);
638 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
639 * utf16-to-utf8-reversed.
642 #ifdef PERL_CR_FILTER
646 const char *s = SvPVX_const(sv);
647 const char * const e = s + SvCUR(sv);
649 PERL_ARGS_ASSERT_STRIP_RETURN;
651 /* outer loop optimized to do nothing if there are no CR-LFs */
653 if (*s++ == '\r' && *s == '\n') {
654 /* hit a CR-LF, need to copy the rest */
658 if (*s == '\r' && s[1] == '\n')
669 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
671 const I32 count = FILTER_READ(idx+1, sv, maxlen);
672 if (count > 0 && !maxlen)
679 =for apidoc lex_start
681 Creates and initialises a new lexer/parser state object, supplying
682 a context in which to lex and parse from a new source of Perl code.
683 A pointer to the new state object is placed in L</PL_parser>. An entry
684 is made on the save stack so that upon unwinding, the new state object
685 will be destroyed and the former value of L</PL_parser> will be restored.
686 Nothing else need be done to clean up the parsing context.
688 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
689 non-null, provides a string (in SV form) containing code to be parsed.
690 A copy of the string is made, so subsequent modification of C<line>
691 does not affect parsing. C<rsfp>, if non-null, provides an input stream
692 from which code will be read to be parsed. If both are non-null, the
693 code in C<line> comes first and must consist of complete lines of input,
694 and C<rsfp> supplies the remainder of the source.
696 The C<flags> parameter is reserved for future use. Currently it is only
697 used by perl internally, so extensions should always pass zero.
702 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
703 can share filters with the current parser.
704 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
705 caller, hence isn't owned by the parser, so shouldn't be closed on parser
706 destruction. This is used to handle the case of defaulting to reading the
707 script from the standard input because no filename was given on the command
708 line (without getting confused by situation where STDIN has been closed, so
709 the script handle is opened on fd 0) */
712 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
714 const char *s = NULL;
715 yy_parser *parser, *oparser;
717 if (flags && flags & ~LEX_START_FLAGS)
718 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
720 /* create and initialise a parser */
722 Newxz(parser, 1, yy_parser);
723 parser->old_parser = oparser = PL_parser;
726 parser->stack = NULL;
727 parser->stack_max1 = NULL;
730 /* on scope exit, free this parser and restore any outer one */
732 parser->saved_curcop = PL_curcop;
734 /* initialise lexer state */
736 parser->nexttoke = 0;
737 parser->error_count = oparser ? oparser->error_count : 0;
738 parser->copline = parser->preambling = NOLINE;
739 parser->lex_state = LEX_NORMAL;
740 parser->expect = XSTATE;
742 parser->recheck_utf8_validity = TRUE;
743 parser->rsfp_filters =
744 !(flags & LEX_START_SAME_FILTER) || !oparser
746 : MUTABLE_AV(SvREFCNT_inc(
747 oparser->rsfp_filters
748 ? oparser->rsfp_filters
749 : (oparser->rsfp_filters = newAV())
752 Newx(parser->lex_brackstack, 120, char);
753 Newx(parser->lex_casestack, 12, char);
754 *parser->lex_casestack = '\0';
755 Newxz(parser->lex_shared, 1, LEXSHARED);
759 const U8* first_bad_char_loc;
761 s = SvPV_const(line, len);
764 && UNLIKELY(! is_utf8_string_loc((U8 *) s,
766 &first_bad_char_loc)))
768 _force_out_malformed_utf8_message(first_bad_char_loc,
769 (U8 *) s + SvCUR(line),
771 1 /* 1 means die */ );
772 NOT_REACHED; /* NOTREACHED */
775 parser->linestr = flags & LEX_START_COPIED
776 ? SvREFCNT_inc_simple_NN(line)
777 : newSVpvn_flags(s, len, SvUTF8(line));
779 sv_catpvs(parser->linestr, "\n;");
781 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
784 parser->oldoldbufptr =
787 parser->linestart = SvPVX(parser->linestr);
788 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
789 parser->last_lop = parser->last_uni = NULL;
791 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
792 |LEX_DONT_CLOSE_RSFP));
793 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
794 |LEX_DONT_CLOSE_RSFP));
796 parser->in_pod = parser->filtered = 0;
800 /* delete a parser object */
803 Perl_parser_free(pTHX_ const yy_parser *parser)
805 PERL_ARGS_ASSERT_PARSER_FREE;
807 PL_curcop = parser->saved_curcop;
808 SvREFCNT_dec(parser->linestr);
810 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
811 PerlIO_clearerr(parser->rsfp);
812 else if (parser->rsfp && (!parser->old_parser
813 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
814 PerlIO_close(parser->rsfp);
815 SvREFCNT_dec(parser->rsfp_filters);
816 SvREFCNT_dec(parser->lex_stuff);
817 SvREFCNT_dec(parser->lex_sub_repl);
819 Safefree(parser->lex_brackstack);
820 Safefree(parser->lex_casestack);
821 Safefree(parser->lex_shared);
822 PL_parser = parser->old_parser;
827 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
829 I32 nexttoke = parser->nexttoke;
830 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
832 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
833 && parser->nextval[nexttoke].opval
834 && parser->nextval[nexttoke].opval->op_slabbed
835 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
836 op_free(parser->nextval[nexttoke].opval);
837 parser->nextval[nexttoke].opval = NULL;
844 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
846 Buffer scalar containing the chunk currently under consideration of the
847 text currently being lexed. This is always a plain string scalar (for
848 which C<SvPOK> is true). It is not intended to be used as a scalar by
849 normal scalar means; instead refer to the buffer directly by the pointer
850 variables described below.
852 The lexer maintains various C<char*> pointers to things in the
853 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
854 reallocated, all of these pointers must be updated. Don't attempt to
855 do this manually, but rather use L</lex_grow_linestr> if you need to
856 reallocate the buffer.
858 The content of the text chunk in the buffer is commonly exactly one
859 complete line of input, up to and including a newline terminator,
860 but there are situations where it is otherwise. The octets of the
861 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
862 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
863 flag on this scalar, which may disagree with it.
865 For direct examination of the buffer, the variable
866 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
867 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
868 of these pointers is usually preferable to examination of the scalar
869 through normal scalar means.
871 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
873 Direct pointer to the end of the chunk of text currently being lexed, the
874 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
875 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
876 always located at the end of the buffer, and does not count as part of
877 the buffer's contents.
879 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
881 Points to the current position of lexing inside the lexer buffer.
882 Characters around this point may be freely examined, within
883 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
884 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
885 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
887 Lexing code (whether in the Perl core or not) moves this pointer past
888 the characters that it consumes. It is also expected to perform some
889 bookkeeping whenever a newline character is consumed. This movement
890 can be more conveniently performed by the function L</lex_read_to>,
891 which handles newlines appropriately.
893 Interpretation of the buffer's octets can be abstracted out by
894 using the slightly higher-level functions L</lex_peek_unichar> and
895 L</lex_read_unichar>.
897 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
899 Points to the start of the current line inside the lexer buffer.
900 This is useful for indicating at which column an error occurred, and
901 not much else. This must be updated by any lexing code that consumes
902 a newline; the function L</lex_read_to> handles this detail.
908 =for apidoc lex_bufutf8
910 Indicates whether the octets in the lexer buffer
911 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
912 of Unicode characters. If not, they should be interpreted as Latin-1
913 characters. This is analogous to the C<SvUTF8> flag for scalars.
915 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
916 contains valid UTF-8. Lexing code must be robust in the face of invalid
919 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
920 is significant, but not the whole story regarding the input character
921 encoding. Normally, when a file is being read, the scalar contains octets
922 and its C<SvUTF8> flag is off, but the octets should be interpreted as
923 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
924 however, the scalar may have the C<SvUTF8> flag on, and in this case its
925 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
926 is in effect. This logic may change in the future; use this function
927 instead of implementing the logic yourself.
933 Perl_lex_bufutf8(pTHX)
939 =for apidoc lex_grow_linestr
941 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
942 at least C<len> octets (including terminating C<NUL>). Returns a
943 pointer to the reallocated buffer. This is necessary before making
944 any direct modification of the buffer that would increase its length.
945 L</lex_stuff_pvn> provides a more convenient way to insert text into
948 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
949 this function updates all of the lexer's variables that point directly
956 Perl_lex_grow_linestr(pTHX_ STRLEN len)
960 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
961 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
964 linestr = PL_parser->linestr;
965 buf = SvPVX(linestr);
966 if (len <= SvLEN(linestr))
969 /* Is the lex_shared linestr SV the same as the current linestr SV?
970 * Only in this case does re_eval_start need adjusting, since it
971 * points within lex_shared->ls_linestr's buffer */
972 current = ( !PL_parser->lex_shared->ls_linestr
973 || linestr == PL_parser->lex_shared->ls_linestr);
975 bufend_pos = PL_parser->bufend - buf;
976 bufptr_pos = PL_parser->bufptr - buf;
977 oldbufptr_pos = PL_parser->oldbufptr - buf;
978 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
979 linestart_pos = PL_parser->linestart - buf;
980 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
981 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
982 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
983 PL_parser->lex_shared->re_eval_start - buf : 0;
985 buf = sv_grow(linestr, len);
987 PL_parser->bufend = buf + bufend_pos;
988 PL_parser->bufptr = buf + bufptr_pos;
989 PL_parser->oldbufptr = buf + oldbufptr_pos;
990 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
991 PL_parser->linestart = buf + linestart_pos;
992 if (PL_parser->last_uni)
993 PL_parser->last_uni = buf + last_uni_pos;
994 if (PL_parser->last_lop)
995 PL_parser->last_lop = buf + last_lop_pos;
996 if (current && PL_parser->lex_shared->re_eval_start)
997 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
1002 =for apidoc lex_stuff_pvn
1004 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1005 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1006 reallocating the buffer if necessary. This means that lexing code that
1007 runs later will see the characters as if they had appeared in the input.
1008 It is not recommended to do this as part of normal parsing, and most
1009 uses of this facility run the risk of the inserted characters being
1010 interpreted in an unintended manner.
1012 The string to be inserted is represented by C<len> octets starting
1013 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1014 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1015 The characters are recoded for the lexer buffer, according to how the
1016 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1017 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1018 function is more convenient.
1020 =for apidoc Amnh||LEX_STUFF_UTF8
1026 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1029 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1030 if (flags & ~(LEX_STUFF_UTF8))
1031 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1033 if (flags & LEX_STUFF_UTF8) {
1036 STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1038 const char *p, *e = pv+len;;
1041 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1042 bufptr = PL_parser->bufptr;
1043 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1044 SvCUR_set(PL_parser->linestr,
1045 SvCUR(PL_parser->linestr) + len+highhalf);
1046 PL_parser->bufend += len+highhalf;
1047 for (p = pv; p != e; p++) {
1048 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1052 if (flags & LEX_STUFF_UTF8) {
1053 STRLEN highhalf = 0;
1054 const char *p, *e = pv+len;
1055 for (p = pv; p != e; p++) {
1057 if (UTF8_IS_ABOVE_LATIN1(c)) {
1058 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1059 "non-Latin-1 character into Latin-1 input");
1060 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1063 } else assert(UTF8_IS_INVARIANT(c));
1067 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1068 bufptr = PL_parser->bufptr;
1069 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1070 SvCUR_set(PL_parser->linestr,
1071 SvCUR(PL_parser->linestr) + len-highhalf);
1072 PL_parser->bufend += len-highhalf;
1075 if (UTF8_IS_INVARIANT(*p)) {
1081 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1087 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1088 bufptr = PL_parser->bufptr;
1089 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1090 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1091 PL_parser->bufend += len;
1092 Copy(pv, bufptr, len, char);
1098 =for apidoc lex_stuff_pv
1100 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1101 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1102 reallocating the buffer if necessary. This means that lexing code that
1103 runs later will see the characters as if they had appeared in the input.
1104 It is not recommended to do this as part of normal parsing, and most
1105 uses of this facility run the risk of the inserted characters being
1106 interpreted in an unintended manner.
1108 The string to be inserted is represented by octets starting at C<pv>
1109 and continuing to the first nul. These octets are interpreted as either
1110 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1111 in C<flags>. The characters are recoded for the lexer buffer, according
1112 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1113 If it is not convenient to nul-terminate a string to be inserted, the
1114 L</lex_stuff_pvn> function is more appropriate.
1120 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1122 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1123 lex_stuff_pvn(pv, strlen(pv), flags);
1127 =for apidoc lex_stuff_sv
1129 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1130 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1131 reallocating the buffer if necessary. This means that lexing code that
1132 runs later will see the characters as if they had appeared in the input.
1133 It is not recommended to do this as part of normal parsing, and most
1134 uses of this facility run the risk of the inserted characters being
1135 interpreted in an unintended manner.
1137 The string to be inserted is the string value of C<sv>. The characters
1138 are recoded for the lexer buffer, according to how the buffer is currently
1139 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1140 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1141 need to construct a scalar.
1147 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1151 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1153 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1155 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1159 =for apidoc lex_unstuff
1161 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1162 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1163 This hides the discarded text from any lexing code that runs later,
1164 as if the text had never appeared.
1166 This is not the normal way to consume lexed text. For that, use
1173 Perl_lex_unstuff(pTHX_ char *ptr)
1177 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1178 buf = PL_parser->bufptr;
1180 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1183 bufend = PL_parser->bufend;
1185 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1186 unstuff_len = ptr - buf;
1187 Move(ptr, buf, bufend+1-ptr, char);
1188 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1189 PL_parser->bufend = bufend - unstuff_len;
1193 =for apidoc lex_read_to
1195 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1196 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1197 performing the correct bookkeeping whenever a newline character is passed.
1198 This is the normal way to consume lexed text.
1200 Interpretation of the buffer's octets can be abstracted out by
1201 using the slightly higher-level functions L</lex_peek_unichar> and
1202 L</lex_read_unichar>.
1208 Perl_lex_read_to(pTHX_ char *ptr)
1211 PERL_ARGS_ASSERT_LEX_READ_TO;
1212 s = PL_parser->bufptr;
1213 if (ptr < s || ptr > PL_parser->bufend)
1214 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1215 for (; s != ptr; s++)
1217 COPLINE_INC_WITH_HERELINES;
1218 PL_parser->linestart = s+1;
1220 PL_parser->bufptr = ptr;
1224 =for apidoc lex_discard_to
1226 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1227 up to C<ptr>. The remaining content of the buffer will be moved, and
1228 all pointers into the buffer updated appropriately. C<ptr> must not
1229 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1230 it is not permitted to discard text that has yet to be lexed.
1232 Normally it is not necessarily to do this directly, because it suffices to
1233 use the implicit discarding behaviour of L</lex_next_chunk> and things
1234 based on it. However, if a token stretches across multiple lines,
1235 and the lexing code has kept multiple lines of text in the buffer for
1236 that purpose, then after completion of the token it would be wise to
1237 explicitly discard the now-unneeded earlier lines, to avoid future
1238 multi-line tokens growing the buffer without bound.
1244 Perl_lex_discard_to(pTHX_ char *ptr)
1248 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1249 buf = SvPVX(PL_parser->linestr);
1251 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1254 if (ptr > PL_parser->bufptr)
1255 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1256 discard_len = ptr - buf;
1257 if (PL_parser->oldbufptr < ptr)
1258 PL_parser->oldbufptr = ptr;
1259 if (PL_parser->oldoldbufptr < ptr)
1260 PL_parser->oldoldbufptr = ptr;
1261 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1262 PL_parser->last_uni = NULL;
1263 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1264 PL_parser->last_lop = NULL;
1265 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1266 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1267 PL_parser->bufend -= discard_len;
1268 PL_parser->bufptr -= discard_len;
1269 PL_parser->oldbufptr -= discard_len;
1270 PL_parser->oldoldbufptr -= discard_len;
1271 if (PL_parser->last_uni)
1272 PL_parser->last_uni -= discard_len;
1273 if (PL_parser->last_lop)
1274 PL_parser->last_lop -= discard_len;
1278 Perl_notify_parser_that_changed_to_utf8(pTHX)
1280 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1281 * off to on. At compile time, this has the effect of entering a 'use
1282 * utf8' section. This means that any input was not previously checked for
1283 * UTF-8 (because it was off), but now we do need to check it, or our
1284 * assumptions about the input being sane could be wrong, and we could
1285 * segfault. This routine just sets a flag so that the next time we look
1286 * at the input we do the well-formed UTF-8 check. If we aren't in the
1287 * proper phase, there may not be a parser object, but if there is, setting
1288 * the flag is harmless */
1291 PL_parser->recheck_utf8_validity = TRUE;
1296 =for apidoc lex_next_chunk
1298 Reads in the next chunk of text to be lexed, appending it to
1299 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1300 looked to the end of the current chunk and wants to know more. It is
1301 usual, but not necessary, for lexing to have consumed the entirety of
1302 the current chunk at this time.
1304 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1305 chunk (i.e., the current chunk has been entirely consumed), normally the
1306 current chunk will be discarded at the same time that the new chunk is
1307 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1308 will not be discarded. If the current chunk has not been entirely
1309 consumed, then it will not be discarded regardless of the flag.
1311 Returns true if some new text was added to the buffer, or false if the
1312 buffer has reached the end of the input text.
1314 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1319 #define LEX_FAKE_EOF 0x80000000
1320 #define LEX_NO_TERM 0x40000000 /* here-doc */
1323 Perl_lex_next_chunk(pTHX_ U32 flags)
1327 STRLEN old_bufend_pos, new_bufend_pos;
1328 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1329 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1330 bool got_some_for_debugger = 0;
1333 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1334 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1335 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1337 linestr = PL_parser->linestr;
1338 buf = SvPVX(linestr);
1339 if (!(flags & LEX_KEEP_PREVIOUS)
1340 && PL_parser->bufptr == PL_parser->bufend)
1342 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1344 if (PL_parser->last_uni != PL_parser->bufend)
1345 PL_parser->last_uni = NULL;
1346 if (PL_parser->last_lop != PL_parser->bufend)
1347 PL_parser->last_lop = NULL;
1348 last_uni_pos = last_lop_pos = 0;
1350 SvCUR_set(linestr, 0);
1352 old_bufend_pos = PL_parser->bufend - buf;
1353 bufptr_pos = PL_parser->bufptr - buf;
1354 oldbufptr_pos = PL_parser->oldbufptr - buf;
1355 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1356 linestart_pos = PL_parser->linestart - buf;
1357 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1358 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1360 if (flags & LEX_FAKE_EOF) {
1362 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1364 } else if (filter_gets(linestr, old_bufend_pos)) {
1366 got_some_for_debugger = 1;
1367 } else if (flags & LEX_NO_TERM) {
1370 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1373 /* End of real input. Close filehandle (unless it was STDIN),
1374 * then add implicit termination.
1376 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1377 PerlIO_clearerr(PL_parser->rsfp);
1378 else if (PL_parser->rsfp)
1379 (void)PerlIO_close(PL_parser->rsfp);
1380 PL_parser->rsfp = NULL;
1381 PL_parser->in_pod = PL_parser->filtered = 0;
1382 if (!PL_in_eval && PL_minus_p) {
1384 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1385 PL_minus_n = PL_minus_p = 0;
1386 } else if (!PL_in_eval && PL_minus_n) {
1387 sv_catpvs(linestr, /*{*/";}");
1390 sv_catpvs(linestr, ";");
1393 buf = SvPVX(linestr);
1394 new_bufend_pos = SvCUR(linestr);
1395 PL_parser->bufend = buf + new_bufend_pos;
1396 PL_parser->bufptr = buf + bufptr_pos;
1399 const U8* first_bad_char_loc;
1400 if (UNLIKELY(! is_utf8_string_loc(
1401 (U8 *) PL_parser->bufptr,
1402 PL_parser->bufend - PL_parser->bufptr,
1403 &first_bad_char_loc)))
1405 _force_out_malformed_utf8_message(first_bad_char_loc,
1406 (U8 *) PL_parser->bufend,
1408 1 /* 1 means die */ );
1409 NOT_REACHED; /* NOTREACHED */
1413 PL_parser->oldbufptr = buf + oldbufptr_pos;
1414 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1415 PL_parser->linestart = buf + linestart_pos;
1416 if (PL_parser->last_uni)
1417 PL_parser->last_uni = buf + last_uni_pos;
1418 if (PL_parser->last_lop)
1419 PL_parser->last_lop = buf + last_lop_pos;
1420 if (PL_parser->preambling != NOLINE) {
1421 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1422 PL_parser->preambling = NOLINE;
1424 if ( got_some_for_debugger
1425 && PERLDB_LINE_OR_SAVESRC
1426 && PL_curstash != PL_debstash)
1428 /* debugger active and we're not compiling the debugger code,
1429 * so store the line into the debugger's array of lines
1431 update_debugger_info(NULL, buf+old_bufend_pos,
1432 new_bufend_pos-old_bufend_pos);
1438 =for apidoc lex_peek_unichar
1440 Looks ahead one (Unicode) character in the text currently being lexed.
1441 Returns the codepoint (unsigned integer value) of the next character,
1442 or -1 if lexing has reached the end of the input text. To consume the
1443 peeked character, use L</lex_read_unichar>.
1445 If the next character is in (or extends into) the next chunk of input
1446 text, the next chunk will be read in. Normally the current chunk will be
1447 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1448 bit set, then the current chunk will not be discarded.
1450 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1451 is encountered, an exception is generated.
1457 Perl_lex_peek_unichar(pTHX_ U32 flags)
1460 if (flags & ~(LEX_KEEP_PREVIOUS))
1461 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1462 s = PL_parser->bufptr;
1463 bufend = PL_parser->bufend;
1469 if (!lex_next_chunk(flags))
1471 s = PL_parser->bufptr;
1472 bufend = PL_parser->bufend;
1475 if (UTF8_IS_INVARIANT(head))
1477 if (UTF8_IS_START(head)) {
1478 len = UTF8SKIP(&head);
1479 while ((STRLEN)(bufend-s) < len) {
1480 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1482 s = PL_parser->bufptr;
1483 bufend = PL_parser->bufend;
1486 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1487 if (retlen == (STRLEN)-1) {
1488 _force_out_malformed_utf8_message((U8 *) s,
1491 1 /* 1 means die */ );
1492 NOT_REACHED; /* NOTREACHED */
1497 if (!lex_next_chunk(flags))
1499 s = PL_parser->bufptr;
1506 =for apidoc lex_read_unichar
1508 Reads the next (Unicode) character in the text currently being lexed.
1509 Returns the codepoint (unsigned integer value) of the character read,
1510 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1511 if lexing has reached the end of the input text. To non-destructively
1512 examine the next character, use L</lex_peek_unichar> instead.
1514 If the next character is in (or extends into) the next chunk of input
1515 text, the next chunk will be read in. Normally the current chunk will be
1516 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1517 bit set, then the current chunk will not be discarded.
1519 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1520 is encountered, an exception is generated.
1526 Perl_lex_read_unichar(pTHX_ U32 flags)
1529 if (flags & ~(LEX_KEEP_PREVIOUS))
1530 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1531 c = lex_peek_unichar(flags);
1534 COPLINE_INC_WITH_HERELINES;
1536 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1538 ++(PL_parser->bufptr);
1544 =for apidoc lex_read_space
1546 Reads optional spaces, in Perl style, in the text currently being
1547 lexed. The spaces may include ordinary whitespace characters and
1548 Perl-style comments. C<#line> directives are processed if encountered.
1549 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1550 at a non-space character (or the end of the input text).
1552 If spaces extend into the next chunk of input text, the next chunk will
1553 be read in. Normally the current chunk will be discarded at the same
1554 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1555 chunk will not be discarded.
1560 #define LEX_NO_INCLINE 0x40000000
1561 #define LEX_NO_NEXT_CHUNK 0x80000000
1564 Perl_lex_read_space(pTHX_ U32 flags)
1567 const bool can_incline = !(flags & LEX_NO_INCLINE);
1568 bool need_incline = 0;
1569 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1570 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1571 s = PL_parser->bufptr;
1572 bufend = PL_parser->bufend;
1578 } while (!(c == '\n' || (c == 0 && s == bufend)));
1579 } else if (c == '\n') {
1582 PL_parser->linestart = s;
1588 } else if (isSPACE(c)) {
1590 } else if (c == 0 && s == bufend) {
1593 if (flags & LEX_NO_NEXT_CHUNK)
1595 PL_parser->bufptr = s;
1596 l = CopLINE(PL_curcop);
1597 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1598 got_more = lex_next_chunk(flags);
1599 CopLINE_set(PL_curcop, l);
1600 s = PL_parser->bufptr;
1601 bufend = PL_parser->bufend;
1604 if (can_incline && need_incline && PL_parser->rsfp) {
1614 PL_parser->bufptr = s;
1619 =for apidoc validate_proto
1621 This function performs syntax checking on a prototype, C<proto>.
1622 If C<warn> is true, any illegal characters or mismatched brackets
1623 will trigger illegalproto warnings, declaring that they were
1624 detected in the prototype for C<name>.
1626 The return value is C<true> if this is a valid prototype, and
1627 C<false> if it is not, regardless of whether C<warn> was C<true> or
1630 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1637 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1639 STRLEN len, origlen;
1641 bool bad_proto = FALSE;
1642 bool in_brackets = FALSE;
1643 bool after_slash = FALSE;
1644 char greedy_proto = ' ';
1645 bool proto_after_greedy_proto = FALSE;
1646 bool must_be_last = FALSE;
1647 bool underscore = FALSE;
1648 bool bad_proto_after_underscore = FALSE;
1650 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1655 p = SvPV(proto, len);
1657 for (; len--; p++) {
1660 proto_after_greedy_proto = TRUE;
1662 if (!memCHRs(";@%", *p))
1663 bad_proto_after_underscore = TRUE;
1666 if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1673 in_brackets = FALSE;
1674 else if ((*p == '@' || *p == '%')
1678 must_be_last = TRUE;
1687 after_slash = FALSE;
1692 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1695 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1696 origlen, UNI_DISPLAY_ISPRINT)
1697 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1699 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1700 SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1701 sv_catpvs(name2, "::");
1702 sv_catsv(name2, (SV *)name);
1706 if (proto_after_greedy_proto)
1707 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1708 "Prototype after '%c' for %" SVf " : %s",
1709 greedy_proto, SVfARG(name), p);
1711 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1712 "Missing ']' in prototype for %" SVf " : %s",
1715 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1716 "Illegal character in prototype for %" SVf " : %s",
1718 if (bad_proto_after_underscore)
1719 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1720 "Illegal character after '_' in prototype for %" SVf " : %s",
1724 return (! (proto_after_greedy_proto || bad_proto) );
1729 * This subroutine has nothing to do with tilting, whether at windmills
1730 * or pinball tables. Its name is short for "increment line". It
1731 * increments the current line number in CopLINE(PL_curcop) and checks
1732 * to see whether the line starts with a comment of the form
1733 * # line 500 "foo.pm"
1734 * If so, it sets the current line number and file to the values in the comment.
1738 S_incline(pTHX_ const char *s, const char *end)
1746 PERL_ARGS_ASSERT_INCLINE;
1750 COPLINE_INC_WITH_HERELINES;
1751 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1752 && s+1 == PL_bufend && *s == ';') {
1753 /* fake newline in string eval */
1754 CopLINE_dec(PL_curcop);
1759 while (SPACE_OR_TAB(*s))
1761 if (memBEGINs(s, (STRLEN) (end - s), "line"))
1762 s += sizeof("line") - 1;
1765 if (SPACE_OR_TAB(*s))
1769 while (SPACE_OR_TAB(*s))
1777 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1779 while (SPACE_OR_TAB(*s))
1781 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1787 while (*t && !isSPACE(*t))
1791 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1793 if (*e != '\n' && *e != '\0')
1794 return; /* false alarm */
1796 if (!grok_atoUV(n, &uv, &e))
1798 line_num = ((line_t)uv) - 1;
1801 const STRLEN len = t - s;
1803 if (!PL_rsfp && !PL_parser->filtered) {
1804 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1805 * to *{"::_<newfilename"} */
1806 /* However, the long form of evals is only turned on by the
1807 debugger - usually they're "(eval %lu)" */
1808 GV * const cfgv = CopFILEGV(PL_curcop);
1811 STRLEN tmplen2 = len;
1815 if (tmplen2 + 2 <= sizeof smallbuf)
1818 Newx(tmpbuf2, tmplen2 + 2, char);
1823 memcpy(tmpbuf2 + 2, s, tmplen2);
1826 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1828 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1829 /* adjust ${"::_<newfilename"} to store the new file name */
1830 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1831 /* The line number may differ. If that is the case,
1832 alias the saved lines that are in the array.
1833 Otherwise alias the whole array. */
1834 if (CopLINE(PL_curcop) == line_num) {
1835 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1836 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1838 else if (GvAV(cfgv)) {
1839 AV * const av = GvAV(cfgv);
1840 const line_t start = CopLINE(PL_curcop)+1;
1841 SSize_t items = AvFILLp(av) - start;
1843 AV * const av2 = GvAVn(gv2);
1844 SV **svp = AvARRAY(av) + start;
1845 Size_t l = line_num+1;
1846 while (items-- && l < SSize_t_MAX && l == (line_t)l)
1847 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1852 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1855 CopFILE_free(PL_curcop);
1856 CopFILE_setn(PL_curcop, s, len);
1858 CopLINE_set(PL_curcop, line_num);
1862 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1864 AV *av = CopFILEAVx(PL_curcop);
1867 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1869 sv = *av_fetch(av, 0, 1);
1870 SvUPGRADE(sv, SVt_PVMG);
1872 if (!SvPOK(sv)) SvPVCLEAR(sv);
1874 sv_catsv(sv, orig_sv);
1876 sv_catpvn(sv, buf, len);
1881 if (PL_parser->preambling == NOLINE)
1882 av_store(av, CopLINE(PL_curcop), sv);
1888 * Called to gobble the appropriate amount and type of whitespace.
1889 * Skips comments as well.
1890 * Returns the next character after the whitespace that is skipped.
1893 * Same thing, but look ahead without incrementing line numbers or
1894 * adjusting PL_linestart.
1897 #define skipspace(s) skipspace_flags(s, 0)
1898 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1901 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
1903 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1904 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1905 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1908 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1910 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1911 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1912 LEX_NO_NEXT_CHUNK : 0));
1914 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1915 if (PL_linestart > PL_bufptr)
1916 PL_bufptr = PL_linestart;
1924 * Check the unary operators to ensure there's no ambiguity in how they're
1925 * used. An ambiguous piece of code would be:
1927 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1928 * the +5 is its argument.
1936 if (PL_oldoldbufptr != PL_last_uni)
1938 while (isSPACE(*PL_last_uni))
1941 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1942 s += UTF ? UTF8SKIP(s) : 1;
1943 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1946 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1947 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1948 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1952 * LOP : macro to build a list operator. Its behaviour has been replaced
1953 * with a subroutine, S_lop() for which LOP is just another name.
1956 #define LOP(f,x) return lop(f,x,s)
1960 * Build a list operator (or something that might be one). The rules:
1961 * - if we have a next token, then it's a list operator (no parens) for
1962 * which the next token has already been parsed; e.g.,
1965 * - if the next thing is an opening paren, then it's a function
1966 * - else it's a list operator
1970 S_lop(pTHX_ I32 f, U8 x, char *s)
1972 PERL_ARGS_ASSERT_LOP;
1977 PL_last_lop = PL_oldbufptr;
1978 PL_last_lop_op = (OPCODE)f;
1983 return REPORT(FUNC);
1986 return REPORT(FUNC);
1989 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1990 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1991 return REPORT(LSTOP);
1997 * When the lexer realizes it knows the next token (for instance,
1998 * it is reordering tokens for the parser) then it can call S_force_next
1999 * to know what token to return the next time the lexer is called. Caller
2000 * will need to set PL_nextval[] and possibly PL_expect to ensure
2001 * the lexer handles the token correctly.
2005 S_force_next(pTHX_ I32 type)
2009 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2010 tokereport(type, &NEXTVAL_NEXTTOKE);
2013 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2014 PL_nexttype[PL_nexttoke] = type;
2021 * This subroutine handles postfix deref syntax after the arrow has already
2022 * been emitted. @* $* etc. are emitted as two separate tokens right here.
2023 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2024 * only the first, leaving yylex to find the next.
2028 S_postderef(pTHX_ int const funny, char const next)
2030 assert(funny == DOLSHARP || memCHRs("$@%&*", funny));
2032 PL_expect = XOPERATOR;
2033 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2034 assert('@' == funny || '$' == funny || DOLSHARP == funny);
2035 PL_lex_state = LEX_INTERPEND;
2037 force_next(POSTJOIN);
2043 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2044 && !PL_lex_brackets)
2046 PL_expect = XOPERATOR;
2055 int yyc = PL_parser->yychar;
2056 if (yyc != YYEMPTY) {
2058 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2059 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2060 PL_lex_allbrackets--;
2062 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2063 } else if (yyc == '('/*)*/) {
2064 PL_lex_allbrackets--;
2069 PL_parser->yychar = YYEMPTY;
2074 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2076 SV * const sv = newSVpvn_utf8(start, len,
2080 && is_utf8_non_invariant_string((const U8*)start, len));
2086 * When the lexer knows the next thing is a word (for instance, it has
2087 * just seen -> and it knows that the next char is a word char, then
2088 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2092 * char *start : buffer position (must be within PL_linestr)
2093 * int token : PL_next* will be this type of bare word
2094 * (e.g., METHOD,BAREWORD)
2095 * int check_keyword : if true, Perl checks to make sure the word isn't
2096 * a keyword (do this if the word is a label, e.g. goto FOO)
2097 * int allow_pack : if true, : characters will also be allowed (require,
2098 * use, etc. do this)
2102 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2107 PERL_ARGS_ASSERT_FORCE_WORD;
2109 start = skipspace(start);
2111 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2112 || (allow_pack && *s == ':' && s[1] == ':') )
2114 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2115 if (check_keyword) {
2116 char *s2 = PL_tokenbuf;
2118 if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2119 s2 += sizeof("CORE::") - 1;
2120 len2 -= sizeof("CORE::") - 1;
2122 if (keyword(s2, len2, 0))
2125 if (token == METHOD) {
2130 PL_expect = XOPERATOR;
2133 NEXTVAL_NEXTTOKE.opval
2134 = newSVOP(OP_CONST,0,
2135 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2136 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2144 * Called when the lexer wants $foo *foo &foo etc, but the program
2145 * text only contains the "foo" portion. The first argument is a pointer
2146 * to the "foo", and the second argument is the type symbol to prefix.
2147 * Forces the next token to be a "BAREWORD".
2148 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2152 S_force_ident(pTHX_ const char *s, int kind)
2154 PERL_ARGS_ASSERT_FORCE_IDENT;
2157 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2158 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2159 UTF ? SVf_UTF8 : 0));
2160 NEXTVAL_NEXTTOKE.opval = o;
2161 force_next(BAREWORD);
2163 o->op_private = OPpCONST_ENTERED;
2164 /* XXX see note in pp_entereval() for why we forgo typo
2165 warnings if the symbol must be introduced in an eval.
2167 gv_fetchpvn_flags(s, len,
2168 (PL_in_eval ? GV_ADDMULTI
2169 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2170 kind == '$' ? SVt_PV :
2171 kind == '@' ? SVt_PVAV :
2172 kind == '%' ? SVt_PVHV :
2180 S_force_ident_maybe_lex(pTHX_ char pit)
2182 NEXTVAL_NEXTTOKE.ival = pit;
2187 Perl_str_to_version(pTHX_ SV *sv)
2192 const char *start = SvPV_const(sv,len);
2193 const char * const end = start + len;
2194 const bool utf = cBOOL(SvUTF8(sv));
2196 PERL_ARGS_ASSERT_STR_TO_VERSION;
2198 while (start < end) {
2202 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2207 retval += ((NV)n)/nshift;
2216 * Forces the next token to be a version number.
2217 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2218 * and if "guessing" is TRUE, then no new token is created (and the caller
2219 * must use an alternative parsing method).
2223 S_force_version(pTHX_ char *s, int guessing)
2228 PERL_ARGS_ASSERT_FORCE_VERSION;
2236 while (isDIGIT(*d) || *d == '_' || *d == '.')
2238 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2240 s = scan_num(s, &pl_yylval);
2241 version = pl_yylval.opval;
2242 ver = cSVOPx(version)->op_sv;
2243 if (SvPOK(ver) && !SvNIOK(ver)) {
2244 SvUPGRADE(ver, SVt_PVNV);
2245 SvNV_set(ver, str_to_version(ver));
2246 SvNOK_on(ver); /* hint that it is a version */
2249 else if (guessing) {
2254 /* NOTE: The parser sees the package name and the VERSION swapped */
2255 NEXTVAL_NEXTTOKE.opval = version;
2256 force_next(BAREWORD);
2262 * S_force_strict_version
2263 * Forces the next token to be a version number using strict syntax rules.
2267 S_force_strict_version(pTHX_ char *s)
2270 const char *errstr = NULL;
2272 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2274 while (isSPACE(*s)) /* leading whitespace */
2277 if (is_STRICT_VERSION(s,&errstr)) {
2279 s = (char *)scan_version(s, ver, 0);
2280 version = newSVOP(OP_CONST, 0, ver);
2282 else if ((*s != ';' && *s != '{' && *s != '}' )
2283 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2287 yyerror(errstr); /* version required */
2291 /* NOTE: The parser sees the package name and the VERSION swapped */
2292 NEXTVAL_NEXTTOKE.opval = version;
2293 force_next(BAREWORD);
2300 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2301 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is
2302 * unchanged, and a new SV containing the modified input is returned.
2306 S_tokeq(pTHX_ SV *sv)
2313 PERL_ARGS_ASSERT_TOKEQ;
2317 assert (!SvIsCOW(sv));
2318 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2322 /* This is relying on the SV being "well formed" with a trailing '\0' */
2323 while (s < send && !(*s == '\\' && s[1] == '\\'))
2328 if ( PL_hints & HINT_NEW_STRING ) {
2329 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2330 SVs_TEMP | SvUTF8(sv));
2334 if (s + 1 < send && (s[1] == '\\'))
2335 s++; /* all that, just for this */
2340 SvCUR_set(sv, d - SvPVX_const(sv));
2342 if ( PL_hints & HINT_NEW_STRING )
2343 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2348 * Now come three functions related to double-quote context,
2349 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2350 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2351 * interact with PL_lex_state, and create fake ( ... ) argument lists
2352 * to handle functions and concatenation.
2356 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2361 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2363 * Pattern matching will set PL_lex_op to the pattern-matching op to
2364 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2366 * OP_CONST is easy--just make the new op and return.
2368 * Everything else becomes a FUNC.
2370 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2371 * had an OP_CONST. This just sets us up for a
2372 * call to S_sublex_push().
2376 S_sublex_start(pTHX)
2378 const I32 op_type = pl_yylval.ival;
2380 if (op_type == OP_NULL) {
2381 pl_yylval.opval = PL_lex_op;
2385 if (op_type == OP_CONST) {
2386 SV *sv = PL_lex_stuff;
2387 PL_lex_stuff = NULL;
2390 if (SvTYPE(sv) == SVt_PVIV) {
2391 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2393 const char * const p = SvPV_const(sv, len);
2394 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2398 pl_yylval.opval = newSVOP(op_type, 0, sv);
2402 PL_parser->lex_super_state = PL_lex_state;
2403 PL_parser->lex_sub_inwhat = (U16)op_type;
2404 PL_parser->lex_sub_op = PL_lex_op;
2405 PL_parser->sub_no_recover = FALSE;
2406 PL_parser->sub_error_count = PL_error_count;
2407 PL_lex_state = LEX_INTERPPUSH;
2411 pl_yylval.opval = PL_lex_op;
2421 * Create a new scope to save the lexing state. The scope will be
2422 * ended in S_sublex_done. Returns a '(', starting the function arguments
2423 * to the uc, lc, etc. found before.
2424 * Sets PL_lex_state to LEX_INTERPCONCAT.
2431 const bool is_heredoc = PL_multi_close == '<';
2434 PL_lex_state = PL_parser->lex_super_state;
2435 SAVEI8(PL_lex_dojoin);
2436 SAVEI32(PL_lex_brackets);
2437 SAVEI32(PL_lex_allbrackets);
2438 SAVEI32(PL_lex_formbrack);
2439 SAVEI8(PL_lex_fakeeof);
2440 SAVEI32(PL_lex_casemods);
2441 SAVEI32(PL_lex_starts);
2442 SAVEI8(PL_lex_state);
2443 SAVESPTR(PL_lex_repl);
2444 SAVEVPTR(PL_lex_inpat);
2445 SAVEI16(PL_lex_inwhat);
2448 SAVECOPLINE(PL_curcop);
2449 SAVEI32(PL_multi_end);
2450 SAVEI32(PL_parser->herelines);
2451 PL_parser->herelines = 0;
2453 SAVEIV(PL_multi_close);
2454 SAVEPPTR(PL_bufptr);
2455 SAVEPPTR(PL_bufend);
2456 SAVEPPTR(PL_oldbufptr);
2457 SAVEPPTR(PL_oldoldbufptr);
2458 SAVEPPTR(PL_last_lop);
2459 SAVEPPTR(PL_last_uni);
2460 SAVEPPTR(PL_linestart);
2461 SAVESPTR(PL_linestr);
2462 SAVEGENERICPV(PL_lex_brackstack);
2463 SAVEGENERICPV(PL_lex_casestack);
2464 SAVEGENERICPV(PL_parser->lex_shared);
2465 SAVEBOOL(PL_parser->lex_re_reparsing);
2466 SAVEI32(PL_copline);
2468 /* The here-doc parser needs to be able to peek into outer lexing
2469 scopes to find the body of the here-doc. So we put PL_linestr and
2470 PL_bufptr into lex_shared, to ‘share’ those values.
2472 PL_parser->lex_shared->ls_linestr = PL_linestr;
2473 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2475 PL_linestr = PL_lex_stuff;
2476 PL_lex_repl = PL_parser->lex_sub_repl;
2477 PL_lex_stuff = NULL;
2478 PL_parser->lex_sub_repl = NULL;
2480 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2481 set for an inner quote-like operator and then an error causes scope-
2482 popping. We must not have a PL_lex_stuff value left dangling, as
2483 that breaks assumptions elsewhere. See bug #123617. */
2484 SAVEGENERICSV(PL_lex_stuff);
2485 SAVEGENERICSV(PL_parser->lex_sub_repl);
2487 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2488 = SvPVX(PL_linestr);
2489 PL_bufend += SvCUR(PL_linestr);
2490 PL_last_lop = PL_last_uni = NULL;
2491 SAVEFREESV(PL_linestr);
2492 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2494 PL_lex_dojoin = FALSE;
2495 PL_lex_brackets = PL_lex_formbrack = 0;
2496 PL_lex_allbrackets = 0;
2497 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2498 Newx(PL_lex_brackstack, 120, char);
2499 Newx(PL_lex_casestack, 12, char);
2500 PL_lex_casemods = 0;
2501 *PL_lex_casestack = '\0';
2503 PL_lex_state = LEX_INTERPCONCAT;
2505 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2506 PL_copline = NOLINE;
2508 Newxz(shared, 1, LEXSHARED);
2509 shared->ls_prev = PL_parser->lex_shared;
2510 PL_parser->lex_shared = shared;
2512 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2513 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2514 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2515 PL_lex_inpat = PL_parser->lex_sub_op;
2517 PL_lex_inpat = NULL;
2519 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2520 PL_in_eval &= ~EVAL_RE_REPARSING;
2527 * Restores lexer state after a S_sublex_push.
2533 if (!PL_lex_starts++) {
2534 SV * const sv = newSVpvs("");
2535 if (SvUTF8(PL_linestr))
2537 PL_expect = XOPERATOR;
2538 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2542 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2543 PL_lex_state = LEX_INTERPCASEMOD;
2547 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2548 assert(PL_lex_inwhat != OP_TRANSR);
2550 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2551 PL_linestr = PL_lex_repl;
2553 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2554 PL_bufend += SvCUR(PL_linestr);
2555 PL_last_lop = PL_last_uni = NULL;
2556 PL_lex_dojoin = FALSE;
2557 PL_lex_brackets = 0;
2558 PL_lex_allbrackets = 0;
2559 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2560 PL_lex_casemods = 0;
2561 *PL_lex_casestack = '\0';
2563 if (SvEVALED(PL_lex_repl)) {
2564 PL_lex_state = LEX_INTERPNORMAL;
2566 /* we don't clear PL_lex_repl here, so that we can check later
2567 whether this is an evalled subst; that means we rely on the
2568 logic to ensure sublex_done() is called again only via the
2569 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2572 PL_lex_state = LEX_INTERPCONCAT;
2575 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2576 CopLINE(PL_curcop) +=
2577 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2578 + PL_parser->herelines;
2579 PL_parser->herelines = 0;
2584 const line_t l = CopLINE(PL_curcop);
2586 if (PL_parser->sub_error_count != PL_error_count) {
2587 if (PL_parser->sub_no_recover) {
2592 if (PL_multi_close == '<')
2593 PL_parser->herelines += l - PL_multi_end;
2594 PL_bufend = SvPVX(PL_linestr);
2595 PL_bufend += SvCUR(PL_linestr);
2596 PL_expect = XOPERATOR;
2602 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2603 const STRLEN context_len, const char ** error_msg)
2605 /* Load the official _charnames module if not already there. The
2606 * parameters are just to give info for any error messages generated:
2607 * char_name a name to look up which is the reason for loading this
2608 * context 'char_name' in the context in the input in which it appears
2609 * context_len how many bytes 'context' occupies
2610 * error_msg *error_msg will be set to any error
2612 * Returns the ^H table if success; otherwise NULL */
2619 PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2621 /* This loop is executed 1 1/2 times. On the first time through, if it
2622 * isn't already loaded, try loading it, and iterate just once to see if it
2624 for (i = 0; i < 2; i++) {
2625 table = GvHV(PL_hintgv); /* ^H */
2628 && (PL_hints & HINT_LOCALIZE_HH)
2629 && (cvp = hv_fetchs(table, "charnames", FALSE))
2632 return table; /* Quit if already loaded */
2636 Perl_load_module(aTHX_
2638 newSVpvs("_charnames"),
2640 /* version parameter; no need to specify it, as if we get too early
2641 * a version, will fail anyway, not being able to find 'charnames'
2650 /* Here, it failed; new_constant will give appropriate error messages */
2652 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2653 context, context_len, error_msg);
2660 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2662 /* This justs wraps get_and_check_backslash_N_name() to output any error
2663 * message it returns. */
2665 const char * error_msg = NULL;
2668 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2670 /* charnames doesn't work well if there have been errors found */
2671 if (PL_error_count > 0) {
2675 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2678 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2685 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2686 const char* const e,
2688 const char ** error_msg)
2690 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2691 * interior, hence to the "}". Finds what the name resolves to, returning
2692 * an SV* containing it; NULL if no valid one found.
2694 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2695 * doesn't have to be. */
2705 /* Points to the beginning of the \N{... so that any messages include the
2706 * context of what's failing*/
2707 const char* context = s - 3;
2708 STRLEN context_len = e - context + 1; /* include all of \N{...} */
2711 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2714 assert(s > (char *) 3);
2716 char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2718 if (!SvCUR(char_name)) {
2719 SvREFCNT_dec_NN(char_name);
2720 /* diag_listed_as: Unknown charname '%s' */
2721 *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2725 /* Autoload the charnames module */
2727 table = load_charnames(char_name, context, context_len, error_msg);
2728 if (table == NULL) {
2733 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2734 context, context_len, error_msg);
2736 *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2742 /* See if the charnames handler is the Perl core's, and if so, we can skip
2743 * the validation needed for a user-supplied one, as Perl's does its own
2745 cvp = hv_fetchs(table, "charnames", FALSE);
2746 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2747 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2749 const char * const name = HvNAME(stash);
2750 if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2755 /* Here, it isn't Perl's charname handler. We can't rely on a
2756 * user-supplied handler to validate the input name. For non-ut8 input,
2757 * look to see that the first character is legal. Then loop through the
2758 * rest checking that each is a continuation */
2760 /* This code makes the reasonable assumption that the only Latin1-range
2761 * characters that begin a character name alias are alphabetic, otherwise
2762 * would have to create a isCHARNAME_BEGIN macro */
2765 if (! isALPHAU(*s)) {
2770 if (! isCHARNAME_CONT(*s)) {
2773 if (*s == ' ' && *(s-1) == ' ') {
2780 /* Similarly for utf8. For invariants can check directly; for other
2781 * Latin1, can calculate their code point and check; otherwise use an
2783 if (UTF8_IS_INVARIANT(*s)) {
2784 if (! isALPHAU(*s)) {
2788 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2789 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2795 if (! _invlist_contains_cp(PL_utf8_charname_begin,
2796 utf8_to_uvchr_buf((U8 *) s,
2806 if (UTF8_IS_INVARIANT(*s)) {
2807 if (! isCHARNAME_CONT(*s)) {
2810 if (*s == ' ' && *(s-1) == ' ') {
2815 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2816 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2823 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2824 utf8_to_uvchr_buf((U8 *) s,
2834 if (*(s-1) == ' ') {
2835 /* diag_listed_as: charnames alias definitions may not contain
2836 trailing white-space; marked by <-- HERE in %s
2838 *error_msg = Perl_form(aTHX_
2839 "charnames alias definitions may not contain trailing "
2840 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2841 (int)(s - context + 1), context,
2842 (int)(e - s + 1), s + 1);
2846 if (SvUTF8(res)) { /* Don't accept malformed charname value */
2847 const U8* first_bad_char_loc;
2849 const char* const str = SvPV_const(res, len);
2850 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2851 &first_bad_char_loc)))
2853 _force_out_malformed_utf8_message(first_bad_char_loc,
2854 (U8 *) PL_parser->bufend,
2856 0 /* 0 means don't die */ );
2857 /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2858 immediately after '%s' */
2859 *error_msg = Perl_form(aTHX_
2860 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2861 (int) context_len, context,
2862 (int) ((char *) first_bad_char_loc - str), str);
2871 /* The final %.*s makes sure that should the trailing NUL be missing
2872 * that this print won't run off the end of the string */
2873 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2875 *error_msg = Perl_form(aTHX_
2876 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2877 (int)(s - context + 1), context,
2878 (int)(e - s + 1), s + 1);
2883 /* diag_listed_as: charnames alias definitions may not contain a
2884 sequence of multiple spaces; marked by <-- HERE
2886 *error_msg = Perl_form(aTHX_
2887 "charnames alias definitions may not contain a sequence of "
2888 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2889 (int)(s - context + 1), context,
2890 (int)(e - s + 1), s + 1);
2897 Extracts the next constant part of a pattern, double-quoted string,
2898 or transliteration. This is terrifying code.
2900 For example, in parsing the double-quoted string "ab\x63$d", it would
2901 stop at the '$' and return an OP_CONST containing 'abc'.
2903 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2904 processing a pattern (PL_lex_inpat is true), a transliteration
2905 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2907 Returns a pointer to the character scanned up to. If this is
2908 advanced from the start pointer supplied (i.e. if anything was
2909 successfully parsed), will leave an OP_CONST for the substring scanned
2910 in pl_yylval. Caller must intuit reason for not parsing further
2911 by looking at the next characters herself.
2915 \N{FOO} => \N{U+hex_for_character_FOO}
2916 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2919 all other \-char, including \N and \N{ apart from \N{ABC}
2922 @ and $ where it appears to be a var, but not for $ as tail anchor
2926 In transliterations:
2927 characters are VERY literal, except for - not at the start or end
2928 of the string, which indicates a range. However some backslash sequences
2929 are recognized: \r, \n, and the like
2930 \007 \o{}, \x{}, \N{}
2931 If all elements in the transliteration are below 256,
2932 scan_const expands the range to the full set of intermediate
2933 characters. If the range is in utf8, the hyphen is replaced with
2934 a certain range mark which will be handled by pmtrans() in op.c.
2936 In double-quoted strings:
2938 all those recognized in transliterations
2939 deprecated backrefs: \1 (in substitution replacements)
2940 case and quoting: \U \Q \E
2943 scan_const does *not* construct ops to handle interpolated strings.
2944 It stops processing as soon as it finds an embedded $ or @ variable
2945 and leaves it to the caller to work out what's going on.
2947 embedded arrays (whether in pattern or not) could be:
2948 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2950 $ in double-quoted strings must be the symbol of an embedded scalar.
2952 $ in pattern could be $foo or could be tail anchor. Assumption:
2953 it's a tail anchor if $ is the last thing in the string, or if it's
2954 followed by one of "()| \r\n\t"
2956 \1 (backreferences) are turned into $1 in substitutions
2958 The structure of the code is
2959 while (there's a character to process) {
2960 handle transliteration ranges
2961 skip regexp comments /(?#comment)/ and codes /(?{code})/
2962 skip #-initiated comments in //x patterns
2963 check for embedded arrays
2964 check for embedded scalars
2966 deprecate \1 in substitution replacements
2967 handle string-changing backslashes \l \U \Q \E, etc.
2968 switch (what was escaped) {
2969 handle \- in a transliteration (becomes a literal -)
2970 if a pattern and not \N{, go treat as regular character
2971 handle \132 (octal characters)
2972 handle \x15 and \x{1234} (hex characters)
2973 handle \N{name} (named characters, also \N{3,5} in a pattern)
2974 handle \cV (control characters)
2975 handle printf-style backslashes (\f, \r, \n, etc)
2978 } (end if backslash)
2979 handle regular character
2980 } (end while character to read)
2985 S_scan_const(pTHX_ char *start)
2987 char *send = PL_bufend; /* end of the constant */
2988 SV *sv = newSV(send - start); /* sv for the constant. See note below
2990 char *s = start; /* start of the constant */
2991 char *d = SvPVX(sv); /* destination for copies */
2992 bool dorange = FALSE; /* are we in a translit range? */
2993 bool didrange = FALSE; /* did we just finish a range? */
2994 bool in_charclass = FALSE; /* within /[...]/ */
2995 bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
2996 UTF8? But, this can show as true
2997 when the source isn't utf8, as for
2998 example when it is entirely composed
3000 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
3001 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
3002 number of characters found so far
3003 that will expand (into 2 bytes)
3004 should we have to convert to
3006 SV *res; /* result from charnames */
3007 STRLEN offset_to_max = 0; /* The offset in the output to where the range
3008 high-end character is temporarily placed */
3010 /* Does something require special handling in tr/// ? This avoids extra
3011 * work in a less likely case. As such, khw didn't feel it was worth
3012 * adding any branches to the more mainline code to handle this, which
3013 * means that this doesn't get set in some circumstances when things like
3014 * \x{100} get expanded out. As a result there needs to be extra testing
3015 * done in the tr code */
3016 bool has_above_latin1 = FALSE;
3018 /* Note on sizing: The scanned constant is placed into sv, which is
3019 * initialized by newSV() assuming one byte of output for every byte of
3020 * input. This routine expects newSV() to allocate an extra byte for a
3021 * trailing NUL, which this routine will append if it gets to the end of
3022 * the input. There may be more bytes of input than output (eg., \N{LATIN
3023 * CAPITAL LETTER A}), or more output than input if the constant ends up
3024 * recoded to utf8, but each time a construct is found that might increase
3025 * the needed size, SvGROW() is called. Its size parameter each time is
3026 * based on the best guess estimate at the time, namely the length used so
3027 * far, plus the length the current construct will occupy, plus room for
3028 * the trailing NUL, plus one byte for every input byte still unscanned */
3030 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3033 int backslash_N = 0; /* ? was the character from \N{} */
3034 int non_portable_endpoint = 0; /* ? In a range is an endpoint
3035 platform-specific like \x65 */
3038 PERL_ARGS_ASSERT_SCAN_CONST;
3040 assert(PL_lex_inwhat != OP_TRANSR);
3042 /* Protect sv from errors and fatal warnings. */
3043 ENTER_with_name("scan_const");
3046 /* A bunch of code in the loop below assumes that if s[n] exists and is not
3047 * NUL, then s[n+1] exists. This assertion makes sure that assumption is
3049 assert(*send == '\0');
3052 || dorange /* Handle tr/// range at right edge of input */
3055 /* get transliterations out of the way (they're most literal) */
3056 if (PL_lex_inwhat == OP_TRANS) {
3058 /* But there isn't any special handling necessary unless there is a
3059 * range, so for most cases we just drop down and handle the value
3060 * as any other. There are two exceptions.
3062 * 1. A hyphen indicates that we are actually going to have a
3063 * range. In this case, skip the '-', set a flag, then drop
3064 * down to handle what should be the end range value.
3065 * 2. After we've handled that value, the next time through, that
3066 * flag is set and we fix up the range.
3068 * Ranges entirely within Latin1 are expanded out entirely, in
3069 * order to make the transliteration a simple table look-up.
3070 * Ranges that extend above Latin1 have to be done differently, so
3071 * there is no advantage to expanding them here, so they are
3072 * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is
3073 * a byte that can't occur in legal UTF-8, and hence can signify a
3074 * hyphen without any possible ambiguity. On EBCDIC machines, if
3075 * the range is expressed as Unicode, the Latin1 portion is
3076 * expanded out even if the range extends above Latin1. This is
3077 * because each code point in it has to be processed here
3078 * individually to get its native translation */
3082 /* Here, we don't think we're in a range. If the new character
3083 * is not a hyphen; or if it is a hyphen, but it's too close to
3084 * either edge to indicate a range, or if we haven't output any
3085 * characters yet then it's a regular character. */
3086 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3089 /* A regular character. Process like any other, but first
3090 * clear any flags */
3094 non_portable_endpoint = 0;
3097 /* The tests here for being above Latin1 and similar ones
3098 * in the following 'else' suffice to find all such
3099 * occurences in the constant, except those added by a
3100 * backslash escape sequence, like \x{100}. Mostly, those
3101 * set 'has_above_latin1' as appropriate */
3102 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3103 has_above_latin1 = TRUE;
3106 /* Drops down to generic code to process current byte */
3108 else { /* Is a '-' in the context where it means a range */
3109 if (didrange) { /* Something like y/A-C-Z// */
3110 Perl_croak(aTHX_ "Ambiguous range in transliteration"
3116 s++; /* Skip past the hyphen */
3118 /* d now points to where the end-range character will be
3119 * placed. Drop down to get that character. We'll finish
3120 * processing the range the next time through the loop */
3122 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3123 has_above_latin1 = TRUE;
3126 /* Drops down to generic code to process current byte */
3128 } /* End of not a range */
3130 /* Here we have parsed a range. Now must handle it. At this
3132 * 'sv' is a SV* that contains the output string we are
3133 * constructing. The final two characters in that string
3134 * are the range start and range end, in order.
3135 * 'd' points to just beyond the range end in the 'sv' string,
3136 * where we would next place something
3141 IV range_max; /* last character in range */
3143 Size_t offset_to_min = 0;
3146 bool convert_unicode;
3147 IV real_range_max = 0;
3149 /* Get the code point values of the range ends. */
3150 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3151 offset_to_max = max_ptr - SvPVX_const(sv);
3153 /* We know the utf8 is valid, because we just constructed
3154 * it ourselves in previous loop iterations */
3155 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3156 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3157 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3159 /* This compensates for not all code setting
3160 * 'has_above_latin1', so that we don't skip stuff that
3161 * should be executed */
3162 if (range_max > 255) {
3163 has_above_latin1 = TRUE;
3167 min_ptr = max_ptr - 1;
3168 range_min = * (U8*) min_ptr;
3169 range_max = * (U8*) max_ptr;
3172 /* If the range is just a single code point, like tr/a-a/.../,
3173 * that code point is already in the output, twice. We can
3174 * just back up over the second instance and avoid all the rest
3175 * of the work. But if it is a variant character, it's been
3176 * counted twice, so decrement. (This unlikely scenario is
3177 * special cased, like the one for a range of 2 code points
3178 * below, only because the main-line code below needs a range
3179 * of 3 or more to work without special casing. Might as well
3180 * get it out of the way now.) */
3181 if (UNLIKELY(range_max == range_min)) {
3183 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3184 utf8_variant_count--;
3190 /* On EBCDIC platforms, we may have to deal with portable
3191 * ranges. These happen if at least one range endpoint is a
3192 * Unicode value (\N{...}), or if the range is a subset of
3193 * [A-Z] or [a-z], and both ends are literal characters,
3194 * like 'A', and not like \x{C1} */
3196 cBOOL(backslash_N) /* \N{} forces Unicode,
3197 hence portable range */
3198 || ( ! non_portable_endpoint
3199 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
3200 || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3201 if (convert_unicode) {
3203 /* Special handling is needed for these portable ranges.
3204 * They are defined to be in Unicode terms, which includes
3205 * all the Unicode code points between the end points.
3206 * Convert to Unicode to get the Unicode range. Later we
3207 * will convert each code point in the range back to
3209 range_min = NATIVE_TO_UNI(range_min);
3210 range_max = NATIVE_TO_UNI(range_max);
3214 if (range_min > range_max) {
3216 if (convert_unicode) {
3217 /* Need to convert back to native for meaningful
3218 * messages for this platform */
3219 range_min = UNI_TO_NATIVE(range_min);
3220 range_max = UNI_TO_NATIVE(range_max);
3223 /* Use the characters themselves for the error message if
3224 * ASCII printables; otherwise some visible representation
3226 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3228 "Invalid range \"%c-%c\" in transliteration operator",
3229 (char)range_min, (char)range_max);
3232 else if (convert_unicode) {
3233 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3235 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3236 UVXf "}\" in transliteration operator",
3237 range_min, range_max);
3241 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3243 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3244 " in transliteration operator",
3245 range_min, range_max);
3249 /* If the range is exactly two code points long, they are
3250 * already both in the output */
3251 if (UNLIKELY(range_min + 1 == range_max)) {
3255 /* Here the range contains at least 3 code points */
3259 /* If everything in the transliteration is below 256, we
3260 * can avoid special handling later. A translation table
3261 * for each of those bytes is created by op.c. So we
3262 * expand out all ranges to their constituent code points.
3263 * But if we've encountered something above 255, the
3264 * expanding won't help, so skip doing that. But if it's
3265 * EBCDIC, we may have to look at each character below 256
3266 * if we have to convert to/from Unicode values */
3267 if ( has_above_latin1
3269 && (range_min > 255 || ! convert_unicode)
3272 const STRLEN off = d - SvPVX(sv);
3273 const STRLEN extra = 1 + (send - s) + 1;
3276 /* Move the high character one byte to the right; then
3277 * insert between it and the range begin, an illegal
3278 * byte which serves to indicate this is a range (using
3279 * a '-' would be ambiguous). */
3281 if (off + extra > SvLEN(sv)) {
3282 d = off + SvGROW(sv, off + extra);
3283 max_ptr = d - off + offset_to_max;
3287 while (e-- > max_ptr) {
3290 *(e + 1) = (char) RANGE_INDICATOR;
3294 /* Here, we're going to expand out the range. For EBCDIC
3295 * the range can extend above 255 (not so in ASCII), so
3296 * for EBCDIC, split it into the parts above and below
3299 if (range_max > 255) {
3300 real_range_max = range_max;
3306 /* Here we need to expand out the string to contain each
3307 * character in the range. Grow the output to handle this.
3308 * For non-UTF8, we need a byte for each code point in the
3309 * range, minus the three that we've already allocated for: the
3310 * hyphen, the min, and the max. For UTF-8, we need this
3311 * plus an extra byte for each code point that occupies two
3312 * bytes (is variant) when in UTF-8 (except we've already
3313 * allocated for the end points, including if they are
3314 * variants). For ASCII platforms and Unicode ranges on EBCDIC
3315 * platforms, it's easy to calculate a precise number. To
3316 * start, we count the variants in the range, which we need
3317 * elsewhere in this function anyway. (For the case where it
3318 * isn't easy to calculate, 'extras' has been initialized to 0,
3319 * and the calculation is done in a loop further down.) */
3321 if (convert_unicode)
3324 /* This is executed unconditionally on ASCII, and for
3325 * Unicode ranges on EBCDIC. Under these conditions, all
3326 * code points above a certain value are variant; and none
3327 * under that value are. We just need to find out how much
3328 * of the range is above that value. We don't count the
3329 * end points here, as they will already have been counted
3330 * as they were parsed. */
3331 if (range_min >= UTF_CONTINUATION_MARK) {
3333 /* The whole range is made up of variants */
3334 extras = (range_max - 1) - (range_min + 1) + 1;
3336 else if (range_max >= UTF_CONTINUATION_MARK) {
3338 /* Only the higher portion of the range is variants */
3339 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3342 utf8_variant_count += extras;
3345 /* The base growth is the number of code points in the range,
3346 * not including the endpoints, which have already been sized
3347 * for (and output). We don't subtract for the hyphen, as it
3348 * has been parsed but not output, and the SvGROW below is
3349 * based only on what's been output plus what's left to parse.
3351 grow = (range_max - 1) - (range_min + 1) + 1;
3355 /* In some cases in EBCDIC, we haven't yet calculated a
3356 * precise amount needed for the UTF-8 variants. Just
3357 * assume the worst case, that everything will expand by a
3359 if (! convert_unicode) {
3365 /* Otherwise we know exactly how many variants there
3366 * are in the range. */
3371 /* Grow, but position the output to overwrite the range min end
3372 * point, because in some cases we overwrite that */
3373 SvCUR_set(sv, d - SvPVX_const(sv));
3374 offset_to_min = min_ptr - SvPVX_const(sv);
3376 /* See Note on sizing above. */
3377 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3380 + 1 /* Trailing NUL */ );
3382 /* Now, we can expand out the range. */
3384 if (convert_unicode) {
3387 /* Recall that the min and max are now in Unicode terms, so
3388 * we have to convert each character to its native
3391 for (i = range_min; i <= range_max; i++) {
3392 append_utf8_from_native_byte(
3393 LATIN1_TO_NATIVE((U8) i),
3398 for (i = range_min; i <= range_max; i++) {
3399 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3405 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3407 /* Here, no conversions are necessary, which means that the
3408 * first character in the range is already in 'd' and
3409 * valid, so we can skip overwriting it */
3413 for (i = range_min + 1; i <= range_max; i++) {
3414 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3420 assert(range_min + 1 <= range_max);
3421 for (i = range_min + 1; i < range_max; i++) {
3423 /* In this case on EBCDIC, we haven't calculated
3424 * the variants. Do it here, as we go along */
3425 if (! UVCHR_IS_INVARIANT(i)) {
3426 utf8_variant_count++;
3432 /* The range_max is done outside the loop so as to
3433 * avoid having to special case not incrementing
3434 * 'utf8_variant_count' on EBCDIC (it's already been
3435 * counted when originally parsed) */
3436 *d++ = (char) range_max;
3441 /* If the original range extended above 255, add in that
3443 if (real_range_max) {
3444 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3445 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3446 if (real_range_max > 0x100) {
3447 if (real_range_max > 0x101) {
3448 *d++ = (char) RANGE_INDICATOR;
3450 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3456 /* mark the range as done, and continue */
3460 non_portable_endpoint = 0;
3464 } /* End of is a range */
3465 } /* End of transliteration. Joins main code after these else's */
3466 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3469 while (s1 >= start && *s1-- == '\\')
3472 in_charclass = TRUE;
3474 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3477 while (s1 >= start && *s1-- == '\\')
3480 in_charclass = FALSE;
3482 /* skip for regexp comments /(?#comment)/, except for the last
3483 * char, which will be done separately. Stop on (?{..}) and
3485 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3488 PERL_UINT_FAST8_T len = UTF8SKIP(s);
3490 while (s + len < send && *s != ')') {
3491 Copy(s, d, len, U8);
3494 len = UTF8_SAFE_SKIP(s, send);
3497 else while (s+1 < send && *s != ')') {
3501 else if (!PL_lex_casemods
3502 && ( s[2] == '{' /* This should match regcomp.c */
3503 || (s[2] == '?' && s[3] == '{')))
3508 /* likewise skip #-initiated comments in //x patterns */
3512 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3514 while (s < send && *s != '\n')
3517 /* no further processing of single-quoted regex */
3518 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3519 goto default_action;
3521 /* check for embedded arrays
3522 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3524 else if (*s == '@' && s[1]) {
3526 ? isIDFIRST_utf8_safe(s+1, send)
3527 : isWORDCHAR_A(s[1]))
3531 if (memCHRs(":'{$", s[1]))
3533 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3534 break; /* in regexp, neither @+ nor @- are interpolated */
3536 /* check for embedded scalars. only stop if we're sure it's a
3538 else if (*s == '$') {
3539 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3541 if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3543 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3544 "Possible unintended interpolation of $\\ in regex");
3546 break; /* in regexp, $ might be tail anchor */
3550 /* End of else if chain - OP_TRANS rejoin rest */
3552 if (UNLIKELY(s >= send)) {
3558 if (*s == '\\' && s+1 < send) {
3559 char* e; /* Can be used for ending '}', etc. */
3563 /* warn on \1 - \9 in substitution replacements, but note that \11
3564 * is an octal; and \19 is \1 followed by '9' */
3565 if (PL_lex_inwhat == OP_SUBST
3571 /* diag_listed_as: \%d better written as $%d */
3572 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3577 /* string-change backslash escapes */
3578 if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3582 /* In a pattern, process \N, but skip any other backslash escapes.
3583 * This is because we don't want to translate an escape sequence
3584 * into a meta symbol and have the regex compiler use the meta
3585 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3586 * in spite of this, we do have to process \N here while the proper
3587 * charnames handler is in scope. See bugs #56444 and #62056.
3589 * There is a complication because \N in a pattern may also stand
3590 * for 'match a non-nl', and not mean a charname, in which case its
3591 * processing should be deferred to the regex compiler. To be a
3592 * charname it must be followed immediately by a '{', and not look
3593 * like \N followed by a curly quantifier, i.e., not something like
3594 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3596 else if (PL_lex_inpat
3599 || regcurly(s + 1)))
3602 goto default_action;
3608 if ((isALPHANUMERIC(*s)))
3609 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3610 "Unrecognized escape \\%c passed through",
3612 /* default action is to copy the quoted character */
3613 goto default_action;
3616 /* eg. \132 indicates the octal constant 0132 */
3617 case '0': case '1': case '2': case '3':
3618 case '4': case '5': case '6': case '7':
3620 I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3621 | PERL_SCAN_NOTIFY_ILLDIGIT;
3623 uv = grok_oct(s, &len, &flags, NULL);
3625 if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3627 && isDIGIT(*s) /* like \08, \178 */
3628 && ckWARN(WARN_MISC))
3630 Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3631 form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3634 goto NUM_ESCAPE_INSERT;
3636 /* eg. \o{24} indicates the octal constant \024 */
3641 if (! grok_bslash_o(&s, send,
3644 FALSE, /* Not strict */
3645 FALSE, /* No illegal cp's */
3649 uv = 0; /* drop through to ensure range ends are set */
3651 goto NUM_ESCAPE_INSERT;
3654 /* eg. \x24 indicates the hex constant 0x24 */
3659 if (! grok_bslash_x(&s, send,
3662 FALSE, /* Not strict */
3663 FALSE, /* No illegal cp's */
3667 uv = 0; /* drop through to ensure range ends are set */
3672 /* Insert oct or hex escaped character. */
3674 /* Here uv is the ordinal of the next character being added */
3675 if (UVCHR_IS_INVARIANT(uv)) {
3679 if (!d_is_utf8 && uv > 255) {
3681 /* Here, 'uv' won't fit unless we convert to UTF-8.
3682 * If we've only seen invariants so far, all we have to
3683 * do is turn on the flag */
3684 if (utf8_variant_count == 0) {
3688 SvCUR_set(sv, d - SvPVX_const(sv));
3692 sv_utf8_upgrade_flags_grow(
3694 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3696 /* Since we're having to grow here,
3697 * make sure we have enough room for
3698 * this escape and a NUL, so the
3699 * code immediately below won't have
3700 * to actually grow again */
3702 + (STRLEN)(send - s) + 1);
3703 d = SvPVX(sv) + SvCUR(sv);
3706 has_above_latin1 = TRUE;
3712 utf8_variant_count++;
3715 /* Usually, there will already be enough room in 'sv'
3716 * since such escapes are likely longer than any UTF-8
3717 * sequence they can end up as. This isn't the case on
3718 * EBCDIC where \x{40000000} contains 12 bytes, and the
3719 * UTF-8 for it contains 14. And, we have to allow for
3720 * a trailing NUL. It probably can't happen on ASCII
3721 * platforms, but be safe. See Note on sizing above. */
3722 const STRLEN needed = d - SvPVX(sv)
3726 if (UNLIKELY(needed > SvLEN(sv))) {
3727 SvCUR_set(sv, d - SvPVX_const(sv));
3728 d = SvCUR(sv) + SvGROW(sv, needed);
3731 d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3732 (ckWARN(WARN_PORTABLE))
3733 ? UNICODE_WARN_PERL_EXTENDED
3738 non_portable_endpoint++;
3743 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3744 * named character, like \N{LATIN SMALL LETTER A}, or a named
3745 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3746 * GRAVE} (except y/// can't handle the latter, croaking). For
3747 * convenience all three forms are referred to as "named
3748 * characters" below.
3750 * For patterns, \N also can mean to match a non-newline. Code
3751 * before this 'switch' statement should already have handled
3752 * this situation, and hence this code only has to deal with
3753 * the named character cases.
3755 * For non-patterns, the named characters are converted to
3756 * their string equivalents. In patterns, named characters are
3757 * not converted to their ultimate forms for the same reasons
3758 * that other escapes aren't (mainly that the ultimate
3759 * character could be considered a meta-symbol by the regex
3760 * compiler). Instead, they are converted to the \N{U+...}
3761 * form to get the value from the charnames that is in effect
3762 * right now, while preserving the fact that it was a named
3763 * character, so that the regex compiler knows this.
3765 * The structure of this section of code (besides checking for
3766 * errors and upgrading to utf8) is:
3767 * If the named character is of the form \N{U+...}, pass it
3768 * through if a pattern; otherwise convert the code point
3770 * Otherwise must be some \N{NAME}: convert to
3771 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3773 * Transliteration is an exception. The conversion to utf8 is
3774 * only done if the code point requires it to be representable.
3776 * Here, 's' points to the 'N'; the test below is guaranteed to
3777 * succeed if we are being called on a pattern, as we already
3778 * know from a test above that the next character is a '{'. A
3779 * non-pattern \N must mean 'named character', which requires
3783 yyerror("Missing braces on \\N{}");
3789 /* If there is no matching '}', it is an error. */
3790 if (! (e = (char *) memchr(s, '}', send - s))) {
3791 if (! PL_lex_inpat) {
3792 yyerror("Missing right brace on \\N{}");
3794 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3796 yyquit(); /* Have exhausted the input. */
3799 /* Here it looks like a named character */
3801 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3802 s += 2; /* Skip to next char after the 'U+' */
3805 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3806 /* Check the syntax. */
3809 if (!isXDIGIT(*s)) {
3812 "Invalid hexadecimal number in \\N{U+...}"
3821 else if ((*s == '.' || *s == '_')
3827 /* Pass everything through unchanged.
3828 * +1 is for the '}' */
3829 Copy(orig_s, d, e - orig_s + 1, char);
3830 d += e - orig_s + 1;
3832 else { /* Not a pattern: convert the hex to string */
3833 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3834 | PERL_SCAN_SILENT_ILLDIGIT
3835 | PERL_SCAN_SILENT_OVERFLOW
3836 | PERL_SCAN_DISALLOW_PREFIX;
3839 uv = grok_hex(s, &len, &flags, NULL);
3840 if (len == 0 || (len != (STRLEN)(e - s)))
3843 if ( uv > MAX_LEGAL_CP
3844 || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3846 yyerror(form_cp_too_large_msg(16, s, len, 0));
3847 uv = 0; /* drop through to ensure range ends are
3851 /* For non-tr///, if the destination is not in utf8,
3852 * unconditionally recode it to be so. This is
3853 * because \N{} implies Unicode semantics, and scalars
3854 * have to be in utf8 to guarantee those semantics.
3855 * tr/// doesn't care about Unicode rules, so no need
3856 * there to upgrade to UTF-8 for small enough code
3858 if (! d_is_utf8 && ( uv > 0xFF
3859 || PL_lex_inwhat != OP_TRANS))
3861 /* See Note on sizing above. */
3862 const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3864 SvCUR_set(sv, d - SvPVX_const(sv));
3868 if (utf8_variant_count == 0) {
3870 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3873 sv_utf8_upgrade_flags_grow(
3875 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3877 d = SvPVX(sv) + SvCUR(sv);
3881 has_above_latin1 = TRUE;
3884 /* Add the (Unicode) code point to the output. */
3885 if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3886 *d++ = (char) LATIN1_TO_NATIVE(uv);
3889 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
3890 (ckWARN(WARN_PORTABLE))
3891 ? UNICODE_WARN_PERL_EXTENDED
3896 else /* Here is \N{NAME} but not \N{U+...}. */
3897 if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
3898 { /* Failed. We should die eventually, but for now use a NUL
3902 else { /* Successfully evaluated the name */
3904 const char *str = SvPV_const(res, len);
3907 if (! len) { /* The name resolved to an empty string */
3908 const char empty_N[] = "\\N{_}";
3909 Copy(empty_N, d, sizeof(empty_N) - 1, char);
3910 d += sizeof(empty_N) - 1;
3913 /* In order to not lose information for the regex
3914 * compiler, pass the result in the specially made
3915 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3916 * the code points in hex of each character
3917 * returned by charnames */
3919 const char *str_end = str + len;
3920 const STRLEN off = d - SvPVX_const(sv);
3922 if (! SvUTF8(res)) {
3923 /* For the non-UTF-8 case, we can determine the
3924 * exact length needed without having to parse
3925 * through the string. Each character takes up
3926 * 2 hex digits plus either a trailing dot or
3928 const char initial_text[] = "\\N{U+";
3929 const STRLEN initial_len = sizeof(initial_text)
3931 d = off + SvGROW(sv, off
3934 /* +1 for trailing NUL */
3937 + (STRLEN)(send - e));
3938 Copy(initial_text, d, initial_len, char);
3940 while (str < str_end) {
3943 my_snprintf(hex_string,
3947 /* The regex compiler is
3948 * expecting Unicode, not
3950 NATIVE_TO_LATIN1(*str));
3951 PERL_MY_SNPRINTF_POST_GUARD(len,
3952 sizeof(hex_string));
3953 Copy(hex_string, d, 3, char);
3957 d--; /* Below, we will overwrite the final
3958 dot with a right brace */
3961 STRLEN char_length; /* cur char's byte length */
3963 /* and the number of bytes after this is
3964 * translated into hex digits */
3965 STRLEN output_length;
3967 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3968 * for max('U+', '.'); and 1 for NUL */
3969 char hex_string[2 * UTF8_MAXBYTES + 5];
3971 /* Get the first character of the result. */
3972 U32 uv = utf8n_to_uvchr((U8 *) str,
3976 /* Convert first code point to Unicode hex,
3977 * including the boiler plate before it. */
3979 my_snprintf(hex_string, sizeof(hex_string),
3981 (unsigned int) NATIVE_TO_UNI(uv));
3983 /* Make sure there is enough space to hold it */
3984 d = off + SvGROW(sv, off
3986 + (STRLEN)(send - e)
3987 + 2); /* '}' + NUL */
3989 Copy(hex_string, d, output_length, char);
3992 /* For each subsequent character, append dot and
3993 * its Unicode code point in hex */
3994 while ((str += char_length) < str_end) {
3995 const STRLEN off = d - SvPVX_const(sv);
3996 U32 uv = utf8n_to_uvchr((U8 *) str,
4001 my_snprintf(hex_string,
4004 (unsigned int) NATIVE_TO_UNI(uv));
4006 d = off + SvGROW(sv, off
4008 + (STRLEN)(send - e)
4009 + 2); /* '}' + NUL */
4010 Copy(hex_string, d, output_length, char);
4015 *d++ = '}'; /* Done. Add the trailing brace */
4018 else { /* Here, not in a pattern. Convert the name to a
4021 if (PL_lex_inwhat == OP_TRANS) {
4022 str = SvPV_const(res, len);
4023 if (len > ((SvUTF8(res))
4027 yyerror(Perl_form(aTHX_
4028 "%.*s must not be a named sequence"
4029 " in transliteration operator",
4030 /* +1 to include the "}" */
4031 (int) (e + 1 - start), start));
4033 goto end_backslash_N;
4036 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4037 has_above_latin1 = TRUE;
4041 else if (! SvUTF8(res)) {
4042 /* Make sure \N{} return is UTF-8. This is because
4043 * \N{} implies Unicode semantics, and scalars have
4044 * to be in utf8 to guarantee those semantics; but
4045 * not needed in tr/// */
4046 sv_utf8_upgrade_flags(res, 0);
4047 str = SvPV_const(res, len);
4050 /* Upgrade destination to be utf8 if this new
4052 if (! d_is_utf8 && SvUTF8(res)) {
4053 /* See Note on sizing above. */
4054 const STRLEN extra = len + (send - s) + 1;
4056 SvCUR_set(sv, d - SvPVX_const(sv));
4060 if (utf8_variant_count == 0) {
4062 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4065 sv_utf8_upgrade_flags_grow(sv,
4066 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4068 d = SvPVX(sv) + SvCUR(sv);
4071 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
4073 /* See Note on sizing above. (NOTE: SvCUR() is not
4074 * set correctly here). */
4075 const STRLEN extra = len + (send - e) + 1;
4076 const STRLEN off = d - SvPVX_const(sv);
4077 d = off + SvGROW(sv, off + extra);
4079 Copy(str, d, len, char);
4085 } /* End \N{NAME} */
4089 backslash_N++; /* \N{} is defined to be Unicode */
4091 s = e + 1; /* Point to just after the '}' */
4094 /* \c is a control character */
4098 const char * message;
4100 if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4102 yyquit(); /* Have always immediately croaked on
4108 yyerror("Missing control char name in \\c");
4109 yyquit(); /* Are at end of input, no sense continuing */
4112 non_portable_endpoint++;
4116 /* printf-style backslashes, formfeeds, newlines, etc */
4142 } /* end if (backslash) */
4145 /* Just copy the input to the output, though we may have to convert
4148 * If the input has the same representation in UTF-8 as not, it will be
4149 * a single byte, and we don't care about UTF8ness; just copy the byte */
4150 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4153 else if (! s_is_utf8 && ! d_is_utf8) {
4154 /* If neither source nor output is UTF-8, is also a single byte,
4155 * just copy it; but this byte counts should we later have to
4156 * convert to UTF-8 */
4158 utf8_variant_count++;
4160 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
4161 const STRLEN len = UTF8SKIP(s);
4163 /* We expect the source to have already been checked for
4165 assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4167 Copy(s, d, len, U8);
4171 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4172 STRLEN need = send - s + 1; /* See Note on sizing above. */
4174 SvCUR_set(sv, d - SvPVX_const(sv));
4178 if (utf8_variant_count == 0) {
4180 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4183 sv_utf8_upgrade_flags_grow(sv,
4184 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4186 d = SvPVX(sv) + SvCUR(sv);
4189 goto default_action; /* Redo, having upgraded so both are UTF-8 */
4191 else { /* UTF8ness matters: convert this non-UTF8 source char to
4192 UTF-8 for output. It will occupy 2 bytes, but don't include
4193 the input byte since we haven't incremented 's' yet. See
4194 Note on sizing above. */
4195 const STRLEN off = d - SvPVX(sv);
4196 const STRLEN extra = 2 + (send - s - 1) + 1;
4197 if (off + extra > SvLEN(sv)) {
4198 d = off + SvGROW(sv, off + extra);
4200 *d++ = UTF8_EIGHT_BIT_HI(*s);
4201 *d++ = UTF8_EIGHT_BIT_LO(*s);
4204 } /* while loop to process each character */
4207 const STRLEN off = d - SvPVX(sv);
4209 /* See if room for the terminating NUL */
4210 if (UNLIKELY(off >= SvLEN(sv))) {
4214 if (off > SvLEN(sv))
4216 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4217 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4219 /* Whew! Here we don't have room for the terminating NUL, but
4220 * everything else so far has fit. It's not too late to grow
4221 * to fit the NUL and continue on. But it is a bug, as the code
4222 * above was supposed to have made room for this, so under
4223 * DEBUGGING builds, we panic anyway. */
4224 d = off + SvGROW(sv, off + 1);
4228 /* terminate the string and set up the sv */
4230 SvCUR_set(sv, d - SvPVX_const(sv));
4237 /* shrink the sv if we allocated more than we used */
4238 if (SvCUR(sv) + 5 < SvLEN(sv)) {
4239 SvPV_shrink_to_cur(sv);
4242 /* return the substring (via pl_yylval) only if we parsed anything */
4245 for (; s2 < s; s2++) {
4247 COPLINE_INC_WITH_HERELINES;
4249 SvREFCNT_inc_simple_void_NN(sv);
4250 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4251 && ! PL_parser->lex_re_reparsing)
4253 const char *const key = PL_lex_inpat ? "qr" : "q";
4254 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4258 if (PL_lex_inwhat == OP_TRANS) {
4261 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4264 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4272 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4273 type, typelen, NULL);
4275 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4277 LEAVE_with_name("scan_const");
4282 * Returns TRUE if there's more to the expression (e.g., a subscript),
4285 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4287 * ->[ and ->{ return TRUE
4288 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4289 * { and [ outside a pattern are always subscripts, so return TRUE
4290 * if we're outside a pattern and it's not { or [, then return FALSE
4291 * if we're in a pattern and the first char is a {
4292 * {4,5} (any digits around the comma) returns FALSE
4293 * if we're in a pattern and the first char is a [
4295 * [SOMETHING] has a funky algorithm to decide whether it's a
4296 * character class or not. It has to deal with things like
4297 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4298 * anything else returns TRUE
4301 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4304 S_intuit_more(pTHX_ char *s, char *e)
4306 PERL_ARGS_ASSERT_INTUIT_MORE;
4308 if (PL_lex_brackets)
4310 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4312 if (*s == '-' && s[1] == '>'
4313 && FEATURE_POSTDEREF_QQ_IS_ENABLED
4314 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4315 ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4317 if (*s != '{' && *s != '[')
4319 PL_parser->sub_no_recover = TRUE;
4323 /* In a pattern, so maybe we have {n,m}. */
4331 /* On the other hand, maybe we have a character class */
4334 if (*s == ']' || *s == '^')
4337 /* this is terrifying, and it works */
4340 const char * const send = (char *) memchr(s, ']', e - s);
4341 unsigned char un_char, last_un_char;
4342 char tmpbuf[sizeof PL_tokenbuf * 4];
4344 if (!send) /* has to be an expression */
4346 weight = 2; /* let's weigh the evidence */
4350 else if (isDIGIT(*s)) {
4352 if (isDIGIT(s[1]) && s[2] == ']')
4358 Zero(seen,256,char);
4360 for (; s < send; s++) {
4361 last_un_char = un_char;
4362 un_char = (unsigned char)*s;
4367 weight -= seen[un_char] * 10;
4368 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4370 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4371 len = (int)strlen(tmpbuf);
4372 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4373 UTF ? SVf_UTF8 : 0, SVt_PV))
4380 && memCHRs("[#!%*<>()-=",s[1]))
4382 if (/*{*/ memCHRs("])} =",s[2]))
4391 if (memCHRs("wds]",s[1]))
4393 else if (seen[(U8)'\''] || seen[(U8)'"'])
4395 else if (memCHRs("rnftbxcav",s[1]))
4397 else if (isDIGIT(s[1])) {
4399 while (s[1] && isDIGIT(s[1]))
4409 if (memCHRs("aA01! ",last_un_char))
4411 if (memCHRs("zZ79~",s[1]))
4413 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4414 weight -= 5; /* cope with negative subscript */
4417 if (!isWORDCHAR(last_un_char)
4418 && !(last_un_char == '$' || last_un_char == '@'
4419 || last_un_char == '&')
4420 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4424 if (keyword(d, s - d, 0))
4427 if (un_char == last_un_char + 1)
4429 weight -= seen[un_char];
4434 if (weight >= 0) /* probably a character class */
4444 * Does all the checking to disambiguate
4446 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4447 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4449 * First argument is the stuff after the first token, e.g. "bar".
4451 * Not a method if foo is a filehandle.
4452 * Not a method if foo is a subroutine prototyped to take a filehandle.
4453 * Not a method if it's really "Foo $bar"
4454 * Method if it's "foo $bar"
4455 * Not a method if it's really "print foo $bar"
4456 * Method if it's really "foo package::" (interpreted as package->foo)
4457 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4458 * Not a method if bar is a filehandle or package, but is quoted with
4463 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4465 char *s = start + (*start == '$');
4466 char tmpbuf[sizeof PL_tokenbuf];
4469 /* Mustn't actually add anything to a symbol table.
4470 But also don't want to "initialise" any placeholder
4471 constants that might already be there into full
4472 blown PVGVs with attached PVCV. */
4474 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4476 PERL_ARGS_ASSERT_INTUIT_METHOD;
4478 if (!FEATURE_INDIRECT_IS_ENABLED)
4481 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4483 if (cv && SvPOK(cv)) {
4484 const char *proto = CvPROTO(cv);
4486 while (*proto && (isSPACE(*proto) || *proto == ';'))
4493 if (*start == '$') {
4494 SSize_t start_off = start - SvPVX(PL_linestr);
4495 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4496 || isUPPER(*PL_tokenbuf))
4498 /* this could be $# */
4501 PL_bufptr = SvPVX(PL_linestr) + start_off;
4503 return *s == '(' ? FUNCMETH : METHOD;
4506 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4507 /* start is the beginning of the possible filehandle/object,
4508 * and s is the end of it
4509 * tmpbuf is a copy of it (but with single quotes as double colons)
4512 if (!keyword(tmpbuf, len, 0)) {
4513 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4518 indirgv = gv_fetchpvn_flags(tmpbuf, len,
4519 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4521 if (indirgv && SvTYPE(indirgv) != SVt_NULL
4522 && (!isGV(indirgv) || GvCVu(indirgv)))
4524 /* filehandle or package name makes it a method */
4525 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4527 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4528 return 0; /* no assumptions -- "=>" quotes bareword */
4530 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4531 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4532 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4534 force_next(BAREWORD);
4536 return *s == '(' ? FUNCMETH : METHOD;
4542 /* Encoded script support. filter_add() effectively inserts a
4543 * 'pre-processing' function into the current source input stream.
4544 * Note that the filter function only applies to the current source file
4545 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4547 * The datasv parameter (which may be NULL) can be used to pass
4548 * private data to this instance of the filter. The filter function
4549 * can recover the SV using the FILTER_DATA macro and use it to
4550 * store private buffers and state information.
4552 * The supplied datasv parameter is upgraded to a PVIO type
4553 * and the IoDIRP/IoANY field is used to store the function pointer,
4554 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4555 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4556 * private use must be set using malloc'd pointers.
4560 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4568 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4569 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4571 if (!PL_rsfp_filters)
4572 PL_rsfp_filters = newAV();
4575 SvUPGRADE(datasv, SVt_PVIO);
4576 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4577 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4578 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4579 FPTR2DPTR(void *, IoANY(datasv)),
4580 SvPV_nolen(datasv)));
4581 av_unshift(PL_rsfp_filters, 1);
4582 av_store(PL_rsfp_filters, 0, datasv) ;
4584 !PL_parser->filtered
4585 && PL_parser->lex_flags & LEX_EVALBYTES
4586 && PL_bufptr < PL_bufend
4588 const char *s = PL_bufptr;
4589 while (s < PL_bufend) {
4591 SV *linestr = PL_parser->linestr;
4592 char *buf = SvPVX(linestr);
4593 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4594 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4595 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4596 STRLEN const linestart_pos = PL_parser->linestart - buf;
4597 STRLEN const last_uni_pos =
4598 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4599 STRLEN const last_lop_pos =
4600 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4601 av_push(PL_rsfp_filters, linestr);
4602 PL_parser->linestr =
4603 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4604 buf = SvPVX(PL_parser->linestr);
4605 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4606 PL_parser->bufptr = buf + bufptr_pos;
4607 PL_parser->oldbufptr = buf + oldbufptr_pos;
4608 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4609 PL_parser->linestart = buf + linestart_pos;
4610 if (PL_parser->last_uni)
4611 PL_parser->last_uni = buf + last_uni_pos;
4612 if (PL_parser->last_lop)
4613 PL_parser->last_lop = buf + last_lop_pos;
4614 SvLEN_set(linestr, SvCUR(linestr));
4615 SvCUR_set(linestr, s - SvPVX(linestr));
4616 PL_parser->filtered = 1;
4626 /* Delete most recently added instance of this filter function. */
4628 Perl_filter_del(pTHX_ filter_t funcp)
4632 PERL_ARGS_ASSERT_FILTER_DEL;
4635 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4636 FPTR2DPTR(void*, funcp)));
4638 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4640 /* if filter is on top of stack (usual case) just pop it off */
4641 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4642 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4643 sv_free(av_pop(PL_rsfp_filters));
4647 /* we need to search for the correct entry and clear it */
4648 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4652 /* Invoke the idxth filter function for the current rsfp. */
4653 /* maxlen 0 = read one text line */
4655 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4660 /* This API is bad. It should have been using unsigned int for maxlen.
4661 Not sure if we want to change the API, but if not we should sanity
4662 check the value here. */
4663 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4665 PERL_ARGS_ASSERT_FILTER_READ;
4667 if (!PL_parser || !PL_rsfp_filters)
4669 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4670 /* Provide a default input filter to make life easy. */
4671 /* Note that we append to the line. This is handy. */
4672 DEBUG_P(PerlIO_printf(Perl_debug_log,
4673 "filter_read %d: from rsfp\n", idx));
4674 if (correct_length) {
4677 const int old_len = SvCUR(buf_sv);
4679 /* ensure buf_sv is large enough */
4680 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4681 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4682 correct_length)) <= 0) {
4683 if (PerlIO_error(PL_rsfp))
4684 return -1; /* error */
4686 return 0 ; /* end of file */
4688 SvCUR_set(buf_sv, old_len + len) ;
4689 SvPVX(buf_sv)[old_len + len] = '\0';
4692 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4693 if (PerlIO_error(PL_rsfp))
4694 return -1; /* error */
4696 return 0 ; /* end of file */
4699 return SvCUR(buf_sv);
4701 /* Skip this filter slot if filter has been deleted */
4702 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4703 DEBUG_P(PerlIO_printf(Perl_debug_log,
4704 "filter_read %d: skipped (filter deleted)\n",
4706 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4708 if (SvTYPE(datasv) != SVt_PVIO) {
4709 if (correct_length) {
4711 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4712 if (!remainder) return 0; /* eof */
4713 if (correct_length > remainder) correct_length = remainder;
4714 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4715 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4718 const char *s = SvEND(datasv);
4719 const char *send = SvPVX(datasv) + SvLEN(datasv);
4727 if (s == send) return 0; /* eof */
4728 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4729 SvCUR_set(datasv, s-SvPVX(datasv));
4731 return SvCUR(buf_sv);
4733 /* Get function pointer hidden within datasv */
4734 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4735 DEBUG_P(PerlIO_printf(Perl_debug_log,
4736 "filter_read %d: via function %p (%s)\n",
4737 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4738 /* Call function. The function is expected to */
4739 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4740 /* Return: <0:error, =0:eof, >0:not eof */
4742 save_scalar(PL_errgv);
4743 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4749 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4751 PERL_ARGS_ASSERT_FILTER_GETS;
4753 #ifdef PERL_CR_FILTER
4754 if (!PL_rsfp_filters) {
4755 filter_add(S_cr_textfilter,NULL);
4758 if (PL_rsfp_filters) {
4760 SvCUR_set(sv, 0); /* start with empty line */
4761 if (FILTER_READ(0, sv, 0) > 0)
4762 return ( SvPVX(sv) ) ;
4767 return (sv_gets(sv, PL_rsfp, append));
4771 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4775 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4777 if (memEQs(pkgname, len, "__PACKAGE__"))
4781 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4782 && (gv = gv_fetchpvn_flags(pkgname,
4784 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4786 return GvHV(gv); /* Foo:: */
4789 /* use constant CLASS => 'MyClass' */
4790 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4791 if (gv && GvCV(gv)) {
4792 SV * const sv = cv_const_sv(GvCV(gv));
4794 return gv_stashsv(sv, 0);
4797 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4802 S_tokenize_use(pTHX_ int is_use, char *s) {
4803 PERL_ARGS_ASSERT_TOKENIZE_USE;
4805 if (PL_expect != XSTATE)
4806 /* diag_listed_as: "use" not allowed in expression */
4807 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4808 is_use ? "use" : "no"));
4811 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4812 s = force_version(s, TRUE);
4813 if (*s == ';' || *s == '}'
4814 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4815 NEXTVAL_NEXTTOKE.opval = NULL;
4816 force_next(BAREWORD);
4818 else if (*s == 'v') {
4819 s = force_word(s,BAREWORD,FALSE,TRUE);
4820 s = force_version(s, FALSE);
4824 s = force_word(s,BAREWORD,FALSE,TRUE);
4825 s = force_version(s, FALSE);
4827 pl_yylval.ival = is_use;
4831 static const char* const exp_name[] =
4832 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4833 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4834 "SIGVAR", "TERMORDORDOR"
4838 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4840 S_word_takes_any_delimiter(char *p, STRLEN len)
4842 return (len == 1 && memCHRs("msyq", p[0]))
4844 && ((p[0] == 't' && p[1] == 'r')
4845 || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4849 S_check_scalar_slice(pTHX_ char *s)
4852 while (SPACE_OR_TAB(*s)) s++;
4853 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4859 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4860 || (*s && memCHRs(" \t$#+-'\"", *s)))
4862 s += UTF ? UTF8SKIP(s) : 1;
4864 if (*s == '}' || *s == ']')
4865 pl_yylval.ival = OPpSLICEWARNING;
4868 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4870 S_lex_token_boundary(pTHX)
4872 PL_oldoldbufptr = PL_oldbufptr;
4873 PL_oldbufptr = PL_bufptr;
4876 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4878 S_vcs_conflict_marker(pTHX_ char *s)
4880 lex_token_boundary();
4882 yyerror("Version control conflict marker");
4883 while (s < PL_bufend && *s != '\n')
4889 yyl_sigvar(pTHX_ char *s)
4891 /* we expect the sigil and optional var name part of a
4892 * signature element here. Since a '$' is not necessarily
4893 * followed by a var name, handle it specially here; the general
4894 * yylex code would otherwise try to interpret whatever follows
4895 * as a var; e.g. ($, ...) would be seen as the var '$,'
4902 PL_bufptr = s; /* for error reporting */
4907 /* spot stuff that looks like an prototype */
4908 if (memCHRs("$:@%&*;\\[]", *s)) {
4909 yyerror("Illegal character following sigil in a subroutine signature");
4912 /* '$#' is banned, while '$ # comment' isn't */
4914 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
4918 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
4919 char *dest = PL_tokenbuf + 1;
4920 /* read var name, including sigil, into PL_tokenbuf */
4921 PL_tokenbuf[0] = sigil;
4922 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
4923 0, cBOOL(UTF), FALSE, FALSE);
4925 assert(PL_tokenbuf[1]); /* we have a variable name */
4933 /* parse the = for the default ourselves to avoid '+=' etc being accepted here
4934 * as the ASSIGNOP, and exclude other tokens that start with =
4936 if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
4937 /* save now to report with the same context as we did when
4938 * all ASSIGNOPS were accepted */
4942 NEXTVAL_NEXTTOKE.ival = 0;
4943 force_next(ASSIGNOP);
4946 else if (*s == ',' || *s == ')') {
4947 PL_expect = XOPERATOR;
4950 /* make sure the context shows the unexpected character and
4951 * hopefully a bit more */
4953 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4955 PL_bufptr = s; /* for error reporting */
4956 yyerror("Illegal operator following parameter in a subroutine signature");
4960 NEXTVAL_NEXTTOKE.ival = sigil;
4961 force_next('p'); /* force a signature pending identifier */
4968 case ',': /* handle ($a,,$b) */
4973 yyerror("A signature parameter must start with '$', '@' or '%'");
4974 /* very crude error recovery: skip to likely next signature
4976 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4985 yyl_dollar(pTHX_ char *s)
4989 if (PL_expect == XPOSTDEREF) {
4992 POSTDEREF(DOLSHARP);
4998 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
4999 || memCHRs("{$:+-@", s[2])))
5001 PL_tokenbuf[0] = '@';
5002 s = scan_ident(s + 1, PL_tokenbuf + 1,
5003 sizeof PL_tokenbuf - 1, FALSE);
5004 if (PL_expect == XOPERATOR) {
5006 if (PL_bufptr > s) {
5008 PL_bufptr = PL_oldbufptr;
5010 no_op("Array length", d);
5012 if (!PL_tokenbuf[1])
5014 PL_expect = XOPERATOR;
5015 force_ident_maybe_lex('#');
5019 PL_tokenbuf[0] = '$';
5020 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5021 if (PL_expect == XOPERATOR) {
5023 if (PL_bufptr > s) {
5025 PL_bufptr = PL_oldbufptr;
5029 if (!PL_tokenbuf[1]) {
5031 yyerror("Final $ should be \\$ or $name");
5036 const char tmp = *s;
5037 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5040 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5041 && intuit_more(s, PL_bufend)) {
5043 PL_tokenbuf[0] = '@';
5044 if (ckWARN(WARN_SYNTAX)) {
5047 while ( t < PL_bufend ) {
5049 do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5050 /* consumed one or more space chars */
5051 } else if (*t == '$' || *t == '@') {
5052 /* could be more than one '$' like $$ref or @$ref */
5053 do { t++; } while (t < PL_bufend && *t == '$');
5055 /* could be an abigail style identifier like $ foo */
5056 while (t < PL_bufend && *t == ' ') t++;
5058 /* strip off the name of the var */
5059 while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5060 t += UTF ? UTF8SKIP(t) : 1;
5061 /* consumed a varname */
5062 } else if (isDIGIT(*t)) {
5063 /* deal with hex constants like 0x11 */
5064 if (t[0] == '0' && t[1] == 'x') {
5066 while (t < PL_bufend && isXDIGIT(*t)) t++;
5068 /* deal with decimal/octal constants like 1 and 0123 */
5069 do { t++; } while (isDIGIT(*t));
5070 if (t<PL_bufend && *t == '.') {
5071 do { t++; } while (isDIGIT(*t));
5074 /* consumed a number */
5076 /* not a var nor a space nor a number */
5080 if (t < PL_bufend && *t++ == ',') {
5081 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5082 while (t < PL_bufend && *t != ']')
5084 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5085 "Multidimensional syntax %" UTF8f " not supported",
5086 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5090 else if (*s == '{') {
5092 PL_tokenbuf[0] = '%';
5093 if ( strEQ(PL_tokenbuf+1, "SIG")
5094 && ckWARN(WARN_SYNTAX)
5095 && (t = (char *) memchr(s, '}', PL_bufend - s))
5096 && (t = (char *) memchr(t, '=', PL_bufend - t)))
5098 char tmpbuf[sizeof PL_tokenbuf];
5101 } while (isSPACE(*t));
5102 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5104 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5109 && get_cvn_flags(tmpbuf, len, UTF
5113 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5114 "You need to quote \"%" UTF8f "\"",
5115 UTF8fARG(UTF, len, tmpbuf));
5122 PL_expect = XOPERATOR;
5123 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5124 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5125 if (!islop || PL_last_lop_op == OP_GREPSTART)
5126 PL_expect = XOPERATOR;
5127 else if (memCHRs("$@\"'`q", *s))
5128 PL_expect = XTERM; /* e.g. print $fh "foo" */
5129 else if ( memCHRs("&*<%", *s)
5130 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5132 PL_expect = XTERM; /* e.g. print $fh &sub */
5134 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5135 char tmpbuf[sizeof PL_tokenbuf];
5138 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5139 if ((t2 = keyword(tmpbuf, len, 0))) {
5140 /* binary operators exclude handle interpretations */
5152 PL_expect = XTERM; /* e.g. print $fh length() */
5157 PL_expect = XTERM; /* e.g. print $fh subr() */
5160 else if (isDIGIT(*s))
5161 PL_expect = XTERM; /* e.g. print $fh 3 */
5162 else if (*s == '.' && isDIGIT(s[1]))
5163 PL_expect = XTERM; /* e.g. print $fh .3 */
5164 else if ((*s == '?' || *s == '-' || *s == '+')
5165 && !isSPACE(s[1]) && s[1] != '=')
5166 PL_expect = XTERM; /* e.g. print $fh -1 */
5167 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5169 PL_expect = XTERM; /* e.g. print $fh /.../
5170 XXX except DORDOR operator
5172 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5174 PL_expect = XTERM; /* print $fh <<"EOF" */
5177 force_ident_maybe_lex('$');
5182 yyl_sub(pTHX_ char *s, const int key)
5184 char * const tmpbuf = PL_tokenbuf + 1;
5185 bool have_name, have_proto;
5187 SV *format_name = NULL;
5188 bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5190 SSize_t off = s-SvPVX(PL_linestr);
5193 s = skipspace(s); /* can move PL_linestr */
5195 d = SvPVX(PL_linestr)+off;
5197 SAVEBOOL(PL_parser->sig_seen);
5198 PL_parser->sig_seen = FALSE;
5200 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5202 || (*s == ':' && s[1] == ':'))
5205 PL_expect = XATTRBLOCK;
5206 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5208 if (key == KEY_format)
5209 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5211 if (memchr(tmpbuf, ':', len) || key != KEY_sub
5213 PL_tokenbuf, len + 1, 0
5215 sv_setpvn(PL_subname, tmpbuf, len);
5217 sv_setsv(PL_subname,PL_curstname);
5218 sv_catpvs(PL_subname,"::");
5219 sv_catpvn(PL_subname,tmpbuf,len);
5221 if (SvUTF8(PL_linestr))
5222 SvUTF8_on(PL_subname);
5228 if (key == KEY_my || key == KEY_our || key==KEY_state) {
5230 /* diag_listed_as: Missing name in "%s sub" */
5232 "Missing name in \"%s\"", PL_bufptr);
5234 PL_expect = XATTRTERM;
5235 sv_setpvs(PL_subname,"?");
5239 if (key == KEY_format) {
5241 NEXTVAL_NEXTTOKE.opval
5242 = newSVOP(OP_CONST,0, format_name);
5243 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5244 force_next(BAREWORD);
5249 /* Look for a prototype */
5250 if (*s == '(' && !is_sigsub) {
5251 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5253 Perl_croak(aTHX_ "Prototype not terminated");
5254 COPLINE_SET_FROM_MULTI_END;
5255 (void)validate_proto(PL_subname, PL_lex_stuff,
5256 ckWARN(WARN_ILLEGALPROTO), 0);
5264 if ( !(*s == ':' && s[1] != ':')
5265 && (*s != '{' && *s != '(') && key != KEY_format)
5267 assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5268 key == KEY_DESTROY || key == KEY_BEGIN ||
5269 key == KEY_UNITCHECK || key == KEY_CHECK ||
5270 key == KEY_INIT || key == KEY_END ||
5271 key == KEY_my || key == KEY_state ||
5274 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5275 else if (*s != ';' && *s != '}')
5276 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5280 NEXTVAL_NEXTTOKE.opval =
5281 newSVOP(OP_CONST, 0, PL_lex_stuff);
5282 PL_lex_stuff = NULL;
5287 sv_setpvs(PL_subname, "__ANON__");
5289 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5295 force_ident_maybe_lex('&');
5303 yyl_interpcasemod(pTHX_ char *s)
5306 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5308 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5309 PL_bufptr, PL_bufend, *PL_bufptr);
5312 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5314 if (PL_lex_casemods) {
5315 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5316 PL_lex_casestack[PL_lex_casemods] = '\0';
5318 if (PL_bufptr != PL_bufend
5319 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5320 || oldmod == 'F')) {
5322 PL_lex_state = LEX_INTERPCONCAT;
5324 PL_lex_allbrackets--;
5327 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5328 /* Got an unpaired \E */
5329 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5330 "Useless use of \\E");
5332 if (PL_bufptr != PL_bufend)
5334 PL_lex_state = LEX_INTERPCONCAT;
5339 PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5342 if (s[1] == '\\' && s[2] == 'E') {
5344 PL_lex_state = LEX_INTERPCONCAT;
5349 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5350 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5352 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
5354 if ((*s == 'L' || *s == 'U' || *s == 'F')
5355 && (strpbrk(PL_lex_casestack, "LUF")))
5357 PL_lex_casestack[--PL_lex_casemods] = '\0';
5358 PL_lex_allbrackets--;
5361 if (PL_lex_casemods > 10)
5362 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5363 PL_lex_casestack[PL_lex_casemods++] = *s;
5364 PL_lex_casestack[PL_lex_casemods] = '\0';
5365 PL_lex_state = LEX_INTERPCONCAT;
5366 NEXTVAL_NEXTTOKE.ival = 0;
5367 force_next((2<<24)|'(');
5369 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5371 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5373 NEXTVAL_NEXTTOKE.ival = OP_LC;
5375 NEXTVAL_NEXTTOKE.ival = OP_UC;
5377 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5379 NEXTVAL_NEXTTOKE.ival = OP_FC;
5381 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5385 if (PL_lex_starts) {
5388 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5389 if (PL_lex_casemods == 1 && PL_lex_inpat)
5392 AopNOASSIGN(OP_CONCAT);
5400 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5401 GV **pgv, GV ***pgvp)
5403 GV *ogv = NULL; /* override (winner) */
5404 GV *hgv = NULL; /* hidden (loser) */
5407 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5409 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5410 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5412 && (cv = GvCVu(gv)))
5414 if (GvIMPORTED_CV(gv))
5416 else if (! CvMETHOD(cv))
5420 && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5422 && (isGV_with_GP(gv)
5423 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5424 : SvPCS_IMPORTED(gv)
5425 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5435 *orig_keyword = key;
5436 return 0; /* overridden by import or by GLOBAL */
5438 else if (gv && !*pgvp
5439 && -key==KEY_lock /* XXX generalizable kludge */
5442 return 0; /* any sub overrides "weak" keyword */
5444 else { /* no override */
5446 if (key == KEY_dump) {
5447 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5451 if (hgv && key != KEY_x) /* never ambiguous */
5452 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5453 "Ambiguous call resolved as CORE::%s(), "
5454 "qualify as such or use &",
5461 yyl_qw(pTHX_ char *s, STRLEN len)
5465 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5467 missingterm(NULL, 0);
5469 COPLINE_SET_FROM_MULTI_END;
5470 PL_expect = XOPERATOR;
5471 if (SvCUR(PL_lex_stuff)) {
5472 int warned_comma = !ckWARN(WARN_QW);
5473 int warned_comment = warned_comma;
5474 char *d = SvPV_force(PL_lex_stuff, len);
5476 for (; isSPACE(*d) && len; --len, ++d)
5481 if (!warned_comma || !warned_comment) {
5482 for (; !isSPACE(*d) && len; --len, ++d) {
5483 if (!warned_comma && *d == ',') {
5484 Perl_warner(aTHX_ packWARN(WARN_QW),
5485 "Possible attempt to separate words with commas");
5488 else if (!warned_comment && *d == '#') {
5489 Perl_warner(aTHX_ packWARN(WARN_QW),
5490 "Possible attempt to put comments in qw() list");
5496 for (; !isSPACE(*d) && len; --len, ++d)
5499 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5500 words = op_append_elem(OP_LIST, words,
5501 newSVOP(OP_CONST, 0, tokeq(sv)));
5506 words = newNULLLIST();
5507 SvREFCNT_dec_NN(PL_lex_stuff);
5508 PL_lex_stuff = NULL;
5509 PL_expect = XOPERATOR;
5510 pl_yylval.opval = sawparens(words);
5515 yyl_hyphen(pTHX_ char *s)
5517 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5525 while (s < PL_bufend && SPACE_OR_TAB(*s))
5528 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5529 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5530 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5531 OPERATOR('-'); /* unary minus */
5534 case 'r': ftst = OP_FTEREAD; break;
5535 case 'w': ftst = OP_FTEWRITE; break;
5536 case 'x': ftst = OP_FTEEXEC; break;
5537 case 'o': ftst = OP_FTEOWNED; break;
5538 case 'R': ftst = OP_FTRREAD; break;
5539 case 'W': ftst = OP_FTRWRITE; break;
5540 case 'X': ftst = OP_FTREXEC; break;
5541 case 'O': ftst = OP_FTROWNED; break;
5542 case 'e': ftst = OP_FTIS; break;
5543 case 'z': ftst = OP_FTZERO; break;
5544 case 's': ftst = OP_FTSIZE; break;
5545 case 'f': ftst = OP_FTFILE; break;
5546 case 'd': ftst = OP_FTDIR; break;
5547 case 'l': ftst = OP_FTLINK; break;
5548 case 'p': ftst = OP_FTPIPE; break;
5549 case 'S': ftst = OP_FTSOCK; break;
5550 case 'u': ftst = OP_FTSUID; break;
5551 case 'g': ftst = OP_FTSGID; break;
5552 case 'k': ftst = OP_FTSVTX; break;
5553 case 'b': ftst = OP_FTBLK; break;
5554 case 'c': ftst = OP_FTCHR; break;
5555 case 't': ftst = OP_FTTTY; break;
5556 case 'T': ftst = OP_FTTEXT; break;
5557 case 'B': ftst = OP_FTBINARY; break;
5558 case 'M': case 'A': case 'C':
5559 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5561 case 'M': ftst = OP_FTMTIME; break;
5562 case 'A': ftst = OP_FTATIME; break;
5563 case 'C': ftst = OP_FTCTIME; break;
5571 PL_last_uni = PL_oldbufptr;
5572 PL_last_lop_op = (OPCODE)ftst;
5574 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5579 /* Assume it was a minus followed by a one-letter named
5580 * subroutine call (or a -bareword), then. */
5582 PerlIO_printf(Perl_debug_log,
5583 "### '-%c' looked like a file test but was not\n",
5590 const char tmp = *s++;
5593 if (PL_expect == XOPERATOR)
5598 else if (*s == '>') {
5601 if (((*s == '$' || *s == '&') && s[1] == '*')
5602 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5603 ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5604 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5607 PL_expect = XPOSTDEREF;
5610 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5611 s = force_word(s,METHOD,FALSE,TRUE);
5619 if (PL_expect == XOPERATOR) {
5621 && !PL_lex_allbrackets
5622 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5630 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5632 OPERATOR('-'); /* unary minus */
5638 yyl_plus(pTHX_ char *s)
5640 const char tmp = *s++;
5643 if (PL_expect == XOPERATOR)
5648 if (PL_expect == XOPERATOR) {
5650 && !PL_lex_allbrackets
5651 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5659 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5666 yyl_star(pTHX_ char *s)
5668 if (PL_expect == XPOSTDEREF)
5671 if (PL_expect != XOPERATOR) {
5672 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5673 PL_expect = XOPERATOR;
5674 force_ident(PL_tokenbuf, '*');
5683 if (*s == '=' && !PL_lex_allbrackets
5684 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5693 && !PL_lex_allbrackets
5694 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5704 yyl_percent(pTHX_ char *s)
5706 if (PL_expect == XOPERATOR) {
5708 && !PL_lex_allbrackets
5709 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5716 else if (PL_expect == XPOSTDEREF)
5719 PL_tokenbuf[0] = '%';
5720 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5722 if (!PL_tokenbuf[1]) {
5725 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5726 && intuit_more(s, PL_bufend)) {
5728 PL_tokenbuf[0] = '@';
5730 PL_expect = XOPERATOR;
5731 force_ident_maybe_lex('%');
5736 yyl_caret(pTHX_ char *s)
5739 const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5740 if (bof && s[1] == '.')
5742 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5743 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5749 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5753 yyl_colon(pTHX_ char *s)
5757 switch (PL_expect) {
5759 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5761 PL_bufptr = s; /* update in case we back off */
5764 "Use of := for an empty attribute list is not allowed");
5771 PL_expect = XTERMBLOCK;
5773 /* NB: as well as parsing normal attributes, we also end up
5774 * here if there is something looking like attributes
5775 * following a signature (which is illegal, but used to be
5776 * legal in 5.20..5.26). If the latter, we still parse the
5777 * attributes so that error messages(s) are less confusing,
5778 * but ignore them (parser->sig_seen).
5782 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5783 bool sig = PL_parser->sig_seen;
5787 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5788 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5789 if (tmp < 0) tmp = -tmp;
5804 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5806 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5811 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5813 COPLINE_SET_FROM_MULTI_END;
5816 sv_catsv(sv, PL_lex_stuff);
5817 attrs = op_append_elem(OP_LIST, attrs,
5818 newSVOP(OP_CONST, 0, sv));
5819 SvREFCNT_dec_NN(PL_lex_stuff);
5820 PL_lex_stuff = NULL;
5823 /* NOTE: any CV attrs applied here need to be part of
5824 the CVf_BUILTIN_ATTRS define in cv.h! */
5825 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5828 CvLVALUE_on(PL_compcv);
5830 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5833 CvMETHOD_on(PL_compcv);
5835 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5838 Perl_ck_warner_d(aTHX_
5839 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5840 ":const is experimental"
5842 CvANONCONST_on(PL_compcv);
5843 if (!CvANON(PL_compcv))
5844 yyerror(":const is not permitted on named "
5848 /* After we've set the flags, it could be argued that
5849 we don't need to do the attributes.pm-based setting
5850 process, and shouldn't bother appending recognized
5851 flags. To experiment with that, uncomment the
5852 following "else". (Note that's already been
5853 uncommented. That keeps the above-applied built-in
5854 attributes from being intercepted (and possibly
5855 rejected) by a package's attribute routines, but is
5856 justified by the performance win for the common case
5857 of applying only built-in attributes.) */
5859 attrs = op_append_elem(OP_LIST, attrs,
5860 newSVOP(OP_CONST, 0,
5864 if (*s == ':' && s[1] != ':')
5867 break; /* require real whitespace or :'s */
5868 /* XXX losing whitespace on sequential attributes here */
5873 && !(PL_expect == XOPERATOR
5874 ? (*s == '=' || *s == ')')
5875 : (*s == '{' || *s == '(')))
5877 const char q = ((*s == '\'') ? '"' : '\'');
5878 /* If here for an expression, and parsed no attrs, back off. */
5879 if (PL_expect == XOPERATOR && !attrs) {
5883 /* MUST advance bufptr here to avoid bogus "at end of line"
5884 context messages from yyerror().
5887 yyerror( (const char *)
5889 ? Perl_form(aTHX_ "Invalid separator character "
5890 "%c%c%c in attribute list", q, *s, q)
5891 : "Unterminated attribute list" ) );
5898 if (PL_parser->sig_seen) {
5899 /* see comment about about sig_seen and parser error
5903 Perl_croak(aTHX_ "Subroutine attributes must come "
5904 "before the signature");
5907 NEXTVAL_NEXTTOKE.opval = attrs;
5913 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5918 PL_lex_allbrackets--;
5923 yyl_subproto(pTHX_ char *s, CV *cv)
5925 STRLEN protolen = CvPROTOLEN(cv);
5926 const char *proto = CvPROTO(cv);
5929 proto = S_strip_spaces(aTHX_ proto, &protolen);
5932 if ((optional = *proto == ';')) {
5935 } while (*proto == ';');
5941 *proto == '$' || *proto == '_'
5942 || *proto == '*' || *proto == '+'
5947 *proto == '\\' && proto[1] && proto[2] == '\0'
5950 UNIPROTO(UNIOPSUB,optional);
5953 if (*proto == '\\' && proto[1] == '[') {
5954 const char *p = proto + 2;
5955 while(*p && *p != ']')
5957 if(*p == ']' && !p[1])
5958 UNIPROTO(UNIOPSUB,optional);
5961 if (*proto == '&' && *s == '{') {
5963 sv_setpvs(PL_subname, "__ANON__");
5965 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5966 if (!PL_lex_allbrackets
5967 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
5969 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5978 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
5981 if (PL_lex_brackets > 100) {
5982 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5985 switch (PL_expect) {
5988 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5989 PL_lex_allbrackets++;
5990 OPERATOR(HASHBRACK);
5992 while (s < PL_bufend && SPACE_OR_TAB(*s))
5995 PL_tokenbuf[0] = '\0';
5996 if (d < PL_bufend && *d == '-') {
5997 PL_tokenbuf[0] = '-';
5999 while (d < PL_bufend && SPACE_OR_TAB(*d))
6002 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6004 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6006 while (d < PL_bufend && SPACE_OR_TAB(*d))
6009 const char minus = (PL_tokenbuf[0] == '-');
6010 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6018 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6019 PL_lex_allbrackets++;
6024 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6025 PL_lex_allbrackets++;
6029 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6030 PL_lex_allbrackets++;
6035 if (PL_oldoldbufptr == PL_last_lop)
6036 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6038 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6039 PL_lex_allbrackets++;
6042 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6044 /* This hack is to get the ${} in the message. */
6046 yyerror("syntax error");
6049 OPERATOR(HASHBRACK);
6051 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6052 /* ${...} or @{...} etc., but not print {...}
6053 * Skip the disambiguation and treat this as a block.
6055 goto block_expectation;
6057 /* This hack serves to disambiguate a pair of curlies
6058 * as being a block or an anon hash. Normally, expectation
6059 * determines that, but in cases where we're not in a
6060 * position to expect anything in particular (like inside
6061 * eval"") we have to resolve the ambiguity. This code
6062 * covers the case where the first term in the curlies is a
6063 * quoted string. Most other cases need to be explicitly
6064 * disambiguated by prepending a "+" before the opening
6065 * curly in order to force resolution as an anon hash.
6067 * XXX should probably propagate the outer expectation
6068 * into eval"" to rely less on this hack, but that could
6069 * potentially break current behavior of eval"".
6073 if (*s == '\'' || *s == '"' || *s == '`') {
6074 /* common case: get past first string, handling escapes */
6075 for (t++; t < PL_bufend && *t != *s;)
6080 else if (*s == 'q') {
6083 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6084 && !isWORDCHAR(*t))))
6086 /* skip q//-like construct */
6088 char open, close, term;
6091 while (t < PL_bufend && isSPACE(*t))
6093 /* check for q => */
6094 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6095 OPERATOR(HASHBRACK);
6099 if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6103 for (t++; t < PL_bufend; t++) {
6104 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6106 else if (*t == open)
6110 for (t++; t < PL_bufend; t++) {
6111 if (*t == '\\' && t+1 < PL_bufend)
6113 else if (*t == close && --brackets <= 0)
6115 else if (*t == open)
6122 /* skip plain q word */
6123 while ( t < PL_bufend
6124 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6126 t += UTF ? UTF8SKIP(t) : 1;
6129 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6130 t += UTF ? UTF8SKIP(t) : 1;
6131 while ( t < PL_bufend
6132 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6134 t += UTF ? UTF8SKIP(t) : 1;
6137 while (t < PL_bufend && isSPACE(*t))
6139 /* if comma follows first term, call it an anon hash */
6140 /* XXX it could be a comma expression with loop modifiers */
6141 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6142 || (*t == '=' && t[1] == '>')))
6143 OPERATOR(HASHBRACK);
6144 if (PL_expect == XREF) {
6146 /* If there is an opening brace or 'sub:', treat it
6147 as a term to make ${{...}}{k} and &{sub:attr...}
6148 dwim. Otherwise, treat it as a statement, so
6149 map {no strict; ...} works.
6156 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6169 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6176 pl_yylval.ival = CopLINE(PL_curcop);
6177 PL_copline = NOLINE; /* invalidate current command line number */
6178 TOKEN(formbrack ? '=' : '{');
6182 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6184 assert(s != PL_bufend);
6187 if (PL_lex_brackets <= 0)
6188 /* diag_listed_as: Unmatched right %s bracket */
6189 yyerror("Unmatched right curly bracket");
6191 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6193 PL_lex_allbrackets--;
6195 if (PL_lex_state == LEX_INTERPNORMAL) {
6196 if (PL_lex_brackets == 0) {
6197 if (PL_expect & XFAKEBRACK) {
6198 PL_expect &= XENUMMASK;
6199 PL_lex_state = LEX_INTERPEND;
6201 return yylex(); /* ignore fake brackets */
6203 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6204 && SvEVALED(PL_lex_repl))
6205 PL_lex_state = LEX_INTERPEND;
6206 else if (*s == '-' && s[1] == '>')
6207 PL_lex_state = LEX_INTERPENDMAYBE;
6208 else if (*s != '[' && *s != '{')
6209 PL_lex_state = LEX_INTERPEND;
6213 if (PL_expect & XFAKEBRACK) {
6214 PL_expect &= XENUMMASK;
6216 return yylex(); /* ignore fake brackets */
6219 force_next(formbrack ? '.' : '}');
6220 if (formbrack) LEAVE_with_name("lex_format");
6221 if (formbrack == 2) { /* means . where arguments were expected */
6230 yyl_ampersand(pTHX_ char *s)
6232 if (PL_expect == XPOSTDEREF)
6237 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6238 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6246 if (PL_expect == XOPERATOR) {
6249 if ( PL_bufptr == PL_linestart
6250 && ckWARN(WARN_SEMICOLON)
6251 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6253 CopLINE_dec(PL_curcop);
6254 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6255 CopLINE_inc(PL_curcop);
6258 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6260 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6261 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6267 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6272 PL_tokenbuf[0] = '&';
6273 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6274 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6277 force_ident_maybe_lex('&');
6285 yyl_verticalbar(pTHX_ char *s)
6292 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6293 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6302 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6305 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6306 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6311 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6315 yyl_bang(pTHX_ char *s)
6317 const char tmp = *s++;
6319 /* was this !=~ where !~ was meant?
6320 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6322 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6323 const char *t = s+1;
6325 while (t < PL_bufend && isSPACE(*t))
6328 if (*t == '/' || *t == '?'
6329 || ((*t == 'm' || *t == 's' || *t == 'y')
6330 && !isWORDCHAR(t[1]))
6331 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6332 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6333 "!=~ should be !~");
6336 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6352 yyl_snail(pTHX_ char *s)
6354 if (PL_expect == XPOSTDEREF)
6356 PL_tokenbuf[0] = '@';
6357 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6358 if (PL_expect == XOPERATOR) {
6360 if (PL_bufptr > s) {
6362 PL_bufptr = PL_oldbufptr;
6367 if (!PL_tokenbuf[1]) {
6370 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6372 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6373 && intuit_more(s, PL_bufend))
6376 PL_tokenbuf[0] = '%';
6378 /* Warn about @ where they meant $. */
6379 if (*s == '[' || *s == '{') {
6380 if (ckWARN(WARN_SYNTAX)) {
6381 S_check_scalar_slice(aTHX_ s);
6385 PL_expect = XOPERATOR;
6386 force_ident_maybe_lex('@');
6391 yyl_slash(pTHX_ char *s)
6393 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6394 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6395 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6400 else if (PL_expect == XOPERATOR) {
6402 if (*s == '=' && !PL_lex_allbrackets
6403 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6411 /* Disable warning on "study /blah/" */
6412 if ( PL_oldoldbufptr == PL_last_uni
6413 && ( *PL_last_uni != 's' || s - PL_last_uni < 5
6414 || memNE(PL_last_uni, "study", 5)
6415 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6418 s = scan_pat(s,OP_MATCH);
6419 TERM(sublex_start());
6424 yyl_leftsquare(pTHX_ char *s)
6428 if (PL_lex_brackets > 100)
6429 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6430 PL_lex_brackstack[PL_lex_brackets++] = 0;
6431 PL_lex_allbrackets++;
6437 yyl_rightsquare(pTHX_ char *s)
6439 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6442 if (PL_lex_brackets <= 0)
6443 /* diag_listed_as: Unmatched right %s bracket */
6444 yyerror("Unmatched right square bracket");
6447 PL_lex_allbrackets--;
6448 if (PL_lex_state == LEX_INTERPNORMAL) {
6449 if (PL_lex_brackets == 0) {
6450 if (*s == '-' && s[1] == '>')
6451 PL_lex_state = LEX_INTERPENDMAYBE;
6452 else if (*s != '[' && *s != '{')
6453 PL_lex_state = LEX_INTERPEND;
6460 yyl_tilde(pTHX_ char *s)
6463 if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6464 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6467 Perl_ck_warner_d(aTHX_
6468 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6469 "Smartmatch is experimental");
6470 NCEop(OP_SMARTMATCH);
6473 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6475 BCop(OP_SCOMPLEMENT);
6477 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6481 yyl_leftparen(pTHX_ char *s)
6483 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6484 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6488 PL_lex_allbrackets++;
6493 yyl_rightparen(pTHX_ char *s)
6495 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6498 PL_lex_allbrackets--;
6506 yyl_leftpointy(pTHX_ char *s)
6510 if (PL_expect != XOPERATOR) {
6511 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6513 if (s[1] == '<' && s[2] != '>')
6514 s = scan_heredoc(s);
6516 s = scan_inputsymbol(s);
6517 PL_expect = XOPERATOR;
6518 TOKEN(sublex_start());
6525 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6529 SHop(OP_LEFT_SHIFT);
6534 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6541 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6549 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6558 yyl_rightpointy(pTHX_ char *s)
6560 const char tmp = *s++;
6563 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6567 SHop(OP_RIGHT_SHIFT);
6569 else if (tmp == '=') {
6570 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6578 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6587 yyl_sglquote(pTHX_ char *s)
6589 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6591 missingterm(NULL, 0);
6592 COPLINE_SET_FROM_MULTI_END;
6593 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6594 if (PL_expect == XOPERATOR) {
6597 pl_yylval.ival = OP_CONST;
6598 TERM(sublex_start());
6602 yyl_dblquote(pTHX_ char *s)
6606 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6609 printbuf("### Saw string before %s\n", s);
6611 PerlIO_printf(Perl_debug_log,
6612 "### Saw unterminated string\n");
6614 if (PL_expect == XOPERATOR) {
6618 missingterm(NULL, 0);
6619 pl_yylval.ival = OP_CONST;
6620 /* FIXME. I think that this can be const if char *d is replaced by
6621 more localised variables. */
6622 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6623 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6624 pl_yylval.ival = OP_STRINGIFY;
6628 if (pl_yylval.ival == OP_CONST)
6629 COPLINE_SET_FROM_MULTI_END;
6630 TERM(sublex_start());
6634 yyl_backtick(pTHX_ char *s)
6636 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6639 printbuf("### Saw backtick string before %s\n", s);
6641 PerlIO_printf(Perl_debug_log,
6642 "### Saw unterminated backtick string\n");
6644 if (PL_expect == XOPERATOR)
6645 no_op("Backticks",s);
6647 missingterm(NULL, 0);
6648 pl_yylval.ival = OP_BACKTICK;
6649 TERM(sublex_start());
6653 yyl_backslash(pTHX_ char *s)
6655 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6656 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6658 if (PL_expect == XOPERATOR)
6659 no_op("Backslash",s);
6664 yyl_data_handle(pTHX)
6666 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6669 GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6672 gv_init(gv,stash,"DATA",4,0);
6676 GvIOp(gv) = newIO();
6677 IoIFP(GvIOp(gv)) = PL_rsfp;
6679 /* Mark this internal pseudo-handle as clean */
6680 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6681 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6682 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6684 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6686 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6687 /* if the script was opened in binmode, we need to revert
6688 * it to text mode for compatibility; but only iff it has CRs
6689 * XXX this is a questionable hack at best. */
6690 if (PL_bufend-PL_bufptr > 2
6691 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6694 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6695 loc = PerlIO_tell(PL_rsfp);
6696 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6698 if (PerlLIO_setmode(RSFP_FILENO, O_TEXT) != -1) {
6700 PerlIO_seek(PL_rsfp, loc, 0);
6705 #ifdef PERLIO_LAYERS
6708 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6715 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6716 __attribute__noreturn__;
6718 PERL_STATIC_NO_RET void
6719 yyl_croak_unrecognised(pTHX_ char *s)
6721 SV *dsv = newSVpvs_flags("", SVs_TEMP);
6727 STRLEN skiplen = UTF8SKIP(s);
6728 STRLEN stravail = PL_bufend - s;
6729 c = sv_uni_display(dsv, newSVpvn_flags(s,
6730 skiplen > stravail ? stravail : skiplen,
6731 SVs_TEMP | SVf_UTF8),
6732 10, UNI_DISPLAY_ISPRINT);
6735 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6738 if (s >= PL_linestart) {
6742 /* somehow (probably due to a parse failure), PL_linestart has advanced
6743 * pass PL_bufptr, get a reasonable beginning of line
6746 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6749 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6750 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6751 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6754 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6755 UTF8fARG(UTF, (s - d), d),
6760 yyl_require(pTHX_ char *s, I32 orig_keyword)
6764 s = force_version(s, FALSE);
6766 else if (*s != 'v' || !isDIGIT(s[1])
6767 || (s = force_version(s, TRUE), *s == 'v'))
6769 *PL_tokenbuf = '\0';
6770 s = force_word(s,BAREWORD,TRUE,TRUE);
6771 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6772 PL_tokenbuf + sizeof(PL_tokenbuf),
6775 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6776 GV_ADD | (UTF ? SVf_UTF8 : 0));
6779 yyerror("<> at require-statement should be quotes");
6782 if (orig_keyword == KEY_require)
6787 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6789 PL_last_uni = PL_oldbufptr;
6790 PL_last_lop_op = OP_REQUIRE;
6792 return REPORT( (int)REQUIRE );
6796 yyl_foreach(pTHX_ char *s)
6798 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6800 pl_yylval.ival = CopLINE(PL_curcop);
6802 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6804 SSize_t s_off = s - SvPVX(PL_linestr);
6807 if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") && isSPACE(p[2])) {
6810 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") && isSPACE(p[3])) {
6815 /* skip optional package name, as in "for my abc $x (..)" */
6816 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
6817 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6820 if (*p != '$' && *p != '\\')
6821 Perl_croak(aTHX_ "Missing $ on loop variable");
6823 /* The buffer may have been reallocated, update s */
6824 s = SvPVX(PL_linestr) + s_off;
6830 yyl_do(pTHX_ char *s, I32 orig_keyword)
6839 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6841 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
6842 && !keyword(PL_tokenbuf + 1, len, 0)) {
6843 SSize_t off = s-SvPVX(PL_linestr);
6845 s = SvPVX(PL_linestr)+off;
6847 force_ident_maybe_lex('&');
6852 if (orig_keyword == KEY_do)
6860 yyl_my(pTHX_ char *s, I32 my)
6864 yyerror(Perl_form(aTHX_
6865 "Can't redeclare \"%s\" in \"%s\"",
6866 my == KEY_my ? "my" :
6867 my == KEY_state ? "state" : "our",
6868 PL_in_my == KEY_my ? "my" :
6869 PL_in_my == KEY_state ? "state" : "our"));
6873 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6875 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6876 if (memEQs(PL_tokenbuf, len, "sub"))
6877 return yyl_sub(aTHX_ s, my);
6878 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6879 if (!PL_in_my_stash) {
6883 i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6884 PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
6885 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
6888 else if (*s == '\\') {
6889 if (!FEATURE_MYREF_IS_ENABLED)
6890 Perl_croak(aTHX_ "The experimental declared_refs "
6891 "feature is not enabled");
6892 Perl_ck_warner_d(aTHX_
6893 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
6894 "Declaring references is experimental");
6899 static int yyl_try(pTHX_ char*);
6902 yyl_eol_needs_semicolon(pTHX_ char **ps)
6905 if (PL_lex_state != LEX_NORMAL
6906 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
6908 const bool in_comment = *s == '#';
6910 if (*s == '#' && s == PL_linestart && PL_in_eval
6911 && !PL_rsfp && !PL_parser->filtered) {
6912 /* handle eval qq[#line 1 "foo"\n ...] */
6913 CopLINE_dec(PL_curcop);
6914 incline(s, PL_bufend);
6917 while (d < PL_bufend && *d != '\n')
6922 if (in_comment && d == PL_bufend
6923 && PL_lex_state == LEX_INTERPNORMAL
6924 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6925 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
6927 incline(s, PL_bufend);
6928 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
6929 PL_lex_state = LEX_FORMLINE;
6930 force_next(FORMRBRACK);
6936 while (s < PL_bufend && *s != '\n')
6938 if (s < PL_bufend) {
6941 incline(s, PL_bufend);
6949 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
6957 bof = cBOOL(PL_rsfp);
6960 PL_bufptr = PL_bufend;
6961 COPLINE_INC_WITH_HERELINES;
6962 if (!lex_next_chunk(fake_eof)) {
6963 CopLINE_dec(PL_curcop);
6965 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
6967 CopLINE_dec(PL_curcop);
6969 /* If it looks like the start of a BOM or raw UTF-16,
6970 * check if it in fact is. */
6973 || *(U8*)s == BOM_UTF8_FIRST_BYTE
6977 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
6978 bof = (offset == (Off_t)SvCUR(PL_linestr));
6979 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
6980 /* offset may include swallowed CR */
6982 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6985 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6986 s = swallow_bom((U8*)s);
6989 if (PL_parser->in_pod) {
6990 /* Incest with pod. */
6991 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
6994 SvPVCLEAR(PL_linestr);
6995 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
6996 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6997 PL_last_lop = PL_last_uni = NULL;
6998 PL_parser->in_pod = 0;
7001 if (PL_rsfp || PL_parser->filtered)
7002 incline(s, PL_bufend);
7003 } while (PL_parser->in_pod);
7005 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7006 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7007 PL_last_lop = PL_last_uni = NULL;
7008 if (CopLINE(PL_curcop) == 1) {
7009 while (s < PL_bufend && isSPACE(*s))
7011 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7015 if (*s == '#' && *(s+1) == '!')
7017 #ifdef ALTERNATE_SHEBANG
7019 static char const as[] = ALTERNATE_SHEBANG;
7020 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7021 d = s + (sizeof(as) - 1);
7023 #endif /* ALTERNATE_SHEBANG */
7032 while (*d && !isSPACE(*d))
7036 #ifdef ARG_ZERO_IS_SCRIPT
7037 if (ipathend > ipath) {
7039 * HP-UX (at least) sets argv[0] to the script name,
7040 * which makes $^X incorrect. And Digital UNIX and Linux,
7041 * at least, set argv[0] to the basename of the Perl
7042 * interpreter. So, having found "#!", we'll set it right.
7044 SV* copfilesv = CopFILESV(PL_curcop);
7047 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7049 assert(SvPOK(x) || SvGMAGICAL(x));
7050 if (sv_eq(x, copfilesv)) {
7051 sv_setpvn(x, ipath, ipathend - ipath);
7057 const char *bstart = SvPV_const(copfilesv, blen);
7058 const char * const lstart = SvPV_const(x, llen);
7060 bstart += blen - llen;
7061 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7062 sv_setpvn(x, ipath, ipathend - ipath);
7069 /* Anything to do if no copfilesv? */
7071 TAINT_NOT; /* $^X is always tainted, but that's OK */
7073 #endif /* ARG_ZERO_IS_SCRIPT */
7078 d = instr(s,"perl -");
7080 d = instr(s,"perl");
7082 /* avoid getting into infinite loops when shebang
7083 * line contains "Perl" rather than "perl" */
7085 for (d = ipathend-4; d >= ipath; --d) {
7086 if (isALPHA_FOLD_EQ(*d, 'p')
7087 && !ibcmp(d, "perl", 4))
7097 #ifdef ALTERNATE_SHEBANG
7099 * If the ALTERNATE_SHEBANG on this system starts with a
7100 * character that can be part of a Perl expression, then if
7101 * we see it but not "perl", we're probably looking at the
7102 * start of Perl code, not a request to hand off to some
7103 * other interpreter. Similarly, if "perl" is there, but
7104 * not in the first 'word' of the line, we assume the line
7105 * contains the start of the Perl program.
7107 if (d && *s != '#') {
7108 const char *c = ipath;
7109 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7112 d = NULL; /* "perl" not in first word; ignore */
7114 *s = '#'; /* Don't try to parse shebang line */
7116 #endif /* ALTERNATE_SHEBANG */
7121 && !instr(s,"indir")
7122 && instr(PL_origargv[0],"perl"))
7128 while (s < PL_bufend && isSPACE(*s))
7130 if (s < PL_bufend) {
7131 Newx(newargv,PL_origargc+3,char*);
7133 while (s < PL_bufend && !isSPACE(*s))
7136 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7139 newargv = PL_origargv;
7142 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7144 Perl_croak(aTHX_ "Can't exec %s", ipath);
7147 while (*d && !isSPACE(*d))
7149 while (SPACE_OR_TAB(*d))
7153 const bool switches_done = PL_doswitches;
7154 const U32 oldpdb = PL_perldb;
7155 const bool oldn = PL_minus_n;
7156 const bool oldp = PL_minus_p;
7160 bool baduni = FALSE;
7162 const char *d2 = d1 + 1;
7163 if (parse_unicode_opts((const char **)&d2)
7167 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7168 const char * const m = d1;
7169 while (*d1 && !isSPACE(*d1))
7171 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7174 d1 = moreswitches(d1);
7176 if (PL_doswitches && !switches_done) {
7177 int argc = PL_origargc;
7178 char **argv = PL_origargv;
7181 } while (argc && argv[0][0] == '-' && argv[0][1]);
7182 init_argv_symbols(argc,argv);
7184 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7185 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7186 /* if we have already added "LINE: while (<>) {",
7187 we must not do it again */
7189 SvPVCLEAR(PL_linestr);
7190 PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7191 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7192 PL_last_lop = PL_last_uni = NULL;
7193 PL_preambled = FALSE;
7194 if (PERLDB_LINE_OR_SAVESRC)
7195 (void)gv_fetchfile(PL_origfilename);
7203 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7204 PL_lex_state = LEX_FORMLINE;
7205 force_next(FORMRBRACK);
7214 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7218 = newSVOP(OP_CONST, 0,
7219 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7220 pl_yylval.opval->op_private = OPpCONST_BARE;
7225 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7227 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7228 && PL_parser->saw_infix_sigil)
7230 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7231 "Operator or semicolon missing before %c%" UTF8f,
7233 UTF8fARG(UTF, strlen(PL_tokenbuf),
7235 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7236 "Ambiguous use of %c resolved as operator %c",
7237 lastchar, lastchar);
7243 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7247 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7248 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7249 if (SvTYPE(sv) == SVt_PVAV)
7250 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7253 pl_yylval.opval->op_private = 0;
7254 pl_yylval.opval->op_folded = 1;
7255 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7260 op_free(pl_yylval.opval);
7262 off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7263 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7264 PL_last_lop = PL_oldbufptr;
7265 PL_last_lop_op = OP_ENTERSUB;
7267 /* Is there a prototype? */
7269 int k = yyl_subproto(aTHX_ s, cv);
7274 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7276 force_next(off ? PRIVATEREF : BAREWORD);
7277 if (!PL_lex_allbrackets
7278 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7280 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7286 /* Honour "reserved word" warnings, and enforce strict subs */
7288 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7290 /* after "print" and similar functions (corresponding to
7291 * "F? L" in opcode.pl), whatever wasn't already parsed as
7292 * a filehandle should be subject to "strict subs".
7293 * Likewise for the optional indirect-object argument to system
7294 * or exec, which can't be a bareword */
7295 if ((PL_last_lop_op == OP_PRINT
7296 || PL_last_lop_op == OP_PRTF
7297 || PL_last_lop_op == OP_SAY
7298 || PL_last_lop_op == OP_SYSTEM
7299 || PL_last_lop_op == OP_EXEC)
7300 && (PL_hints & HINT_STRICT_SUBS))
7302 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7305 if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7306 char *d = PL_tokenbuf;
7309 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7310 /* PL_warn_reserved is constant */
7311 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7312 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7314 GCC_DIAG_RESTORE_STMT;
7320 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7323 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7325 bool no_op_error = FALSE;
7326 /* Use this var to track whether intuit_method has been
7327 called. intuit_method returns 0 or > 255. */
7330 if (PL_expect == XOPERATOR) {
7331 if (PL_bufptr == PL_linestart) {
7332 CopLINE_dec(PL_curcop);
7333 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7334 CopLINE_inc(PL_curcop);
7337 /* We want to call no_op with s pointing after the
7338 bareword, so defer it. But we want it to come
7339 before the Bad name croak. */
7343 /* Get the rest if it looks like a package qualifier */
7345 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7347 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7350 no_op("Bareword",s);
7351 no_op_error = FALSE;
7354 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7355 UTF8fARG(UTF, len, PL_tokenbuf),
7356 *s == '\'' ? "'" : "::");
7362 no_op("Bareword",s);
7364 /* See if the name is "Foo::",
7365 in which case Foo is a bareword
7366 (and a package name). */
7368 if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7369 if (ckWARN(WARN_BAREWORD)
7370 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7371 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7372 "Bareword \"%" UTF8f
7373 "\" refers to nonexistent package",
7374 UTF8fARG(UTF, len, PL_tokenbuf));
7376 PL_tokenbuf[len] = '\0';
7385 /* if we saw a global override before, get the right name */
7388 c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7390 SV *sv = newSVpvs("CORE::GLOBAL::");
7396 /* Presume this is going to be a bareword of some sort. */
7398 pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7399 pl_yylval.opval->op_private = OPpCONST_BARE;
7401 /* And if "Foo::", then that's what it certainly is. */
7403 return yyl_safe_bareword(aTHX_ s, lastchar);
7406 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7407 const_op->op_private = OPpCONST_BARE;
7408 c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7412 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7415 : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7418 /* See if it's the indirect object for a list operator. */
7421 && PL_oldoldbufptr < PL_bufptr
7422 && (PL_oldoldbufptr == PL_last_lop
7423 || PL_oldoldbufptr == PL_last_uni)
7424 && /* NO SKIPSPACE BEFORE HERE! */
7426 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7429 bool immediate_paren = *s == '(';
7432 /* (Now we can afford to cross potential line boundary.) */
7435 /* intuit_method() can indirectly call lex_next_chunk(),
7438 s_off = s - SvPVX(PL_linestr);
7439 /* Two barewords in a row may indicate method call. */
7440 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7442 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7444 /* the code at method: doesn't use s */
7447 s = SvPVX(PL_linestr) + s_off;
7449 /* If not a declared subroutine, it's an indirect object. */
7450 /* (But it's an indir obj regardless for sort.) */
7451 /* Also, if "_" follows a filetest operator, it's a bareword */
7454 ( !immediate_paren && (PL_last_lop_op == OP_SORT
7456 && (PL_last_lop_op != OP_MAPSTART
7457 && PL_last_lop_op != OP_GREPSTART))))
7458 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7459 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7463 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7464 yyl_strictwarn_bareword(aTHX_ lastchar);
7465 op_free(c.rv2cv_op);
7466 return yyl_safe_bareword(aTHX_ s, lastchar);
7470 PL_expect = XOPERATOR;
7473 /* Is this a word before a => operator? */
7474 if (*s == '=' && s[1] == '>' && !pkgname) {
7475 op_free(c.rv2cv_op);
7477 if (c.gvp || (c.lex && !c.off)) {
7478 assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7479 /* This is our own scalar, created a few lines
7480 above, so this is safe. */
7481 SvREADONLY_off(c.sv);
7482 sv_setpv(c.sv, PL_tokenbuf);
7483 if (UTF && !IN_BYTES
7484 && is_utf8_string((U8*)PL_tokenbuf, len))
7486 SvREADONLY_on(c.sv);
7491 /* If followed by a paren, it's certainly a subroutine. */
7496 while (SPACE_OR_TAB(*d))
7498 if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7499 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7501 NEXTVAL_NEXTTOKE.opval =
7502 c.off ? c.rv2cv_op : pl_yylval.opval;
7504 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7505 else op_free(c.rv2cv_op), force_next(BAREWORD);
7510 /* If followed by var or block, call it a method (unless sub) */
7512 if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7513 op_free(c.rv2cv_op);
7514 PL_last_lop = PL_oldbufptr;
7515 PL_last_lop_op = OP_METHOD;
7516 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7517 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7518 PL_expect = XBLOCKTERM;
7520 return REPORT(METHOD);
7523 /* If followed by a bareword, see if it looks like indir obj. */
7527 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7528 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7531 if (c.lex && !c.off) {
7532 assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7533 SvREADONLY_off(c.sv);
7534 sv_setpvn(c.sv, PL_tokenbuf, len);
7535 if (UTF && !IN_BYTES
7536 && is_utf8_string((U8*)PL_tokenbuf, len))
7538 else SvUTF8_off(c.sv);
7540 op_free(c.rv2cv_op);
7541 if (key == METHOD && !PL_lex_allbrackets
7542 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7544 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7549 /* Not a method, so call it a subroutine (if defined) */
7552 /* Check for a constant sub */
7553 c.sv = cv_const_sv_or_av(c.cv);
7554 return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7557 /* Call it a bare word */
7559 if (PL_hints & HINT_STRICT_SUBS)
7560 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7562 yyl_strictwarn_bareword(aTHX_ lastchar);
7564 op_free(c.rv2cv_op);
7566 return yyl_safe_bareword(aTHX_ s, lastchar);
7570 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7573 default: /* not a keyword */
7574 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7577 FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7581 newSVOP(OP_CONST, 0,
7582 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7585 case KEY___PACKAGE__:
7587 newSVOP(OP_CONST, 0, (PL_curstash
7588 ? newSVhek(HvNAME_HEK(PL_curstash))
7594 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7595 yyl_data_handle(aTHX);
7596 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7599 FUN0OP(CvCLONE(PL_compcv)
7600 ? newOP(OP_RUNCV, 0)
7601 : newPVOP(OP_RUNCV,0,NULL));
7610 if (PL_expect == XSTATE)
7611 return yyl_sub(aTHX_ PL_bufptr, key);
7612 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7621 LOP(OP_ACCEPT,XTERM);
7624 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7629 LOP(OP_ATAN2,XTERM);
7635 LOP(OP_BINMODE,XTERM);
7638 LOP(OP_BLESS,XTERM);
7647 /* We have to disambiguate the two senses of
7648 "continue". If the next token is a '{' then
7649 treat it as the start of a continue block;
7650 otherwise treat it as a control operator.
7660 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7670 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7679 LOP(OP_CRYPT,XTERM);
7682 LOP(OP_CHMOD,XTERM);
7685 LOP(OP_CHOWN,XTERM);
7688 LOP(OP_CONNECT,XTERM);
7703 return yyl_do(aTHX_ s, orig_keyword);
7706 PL_hints |= HINT_BLOCK_SCOPE;
7716 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7717 STR_WITH_LEN("NDBM_File::"),
7718 STR_WITH_LEN("DB_File::"),
7719 STR_WITH_LEN("GDBM_File::"),
7720 STR_WITH_LEN("SDBM_File::"),
7721 STR_WITH_LEN("ODBM_File::"),
7723 LOP(OP_DBMOPEN,XTERM);
7735 pl_yylval.ival = CopLINE(PL_curcop);
7739 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7751 if (*s == '{') { /* block eval */
7752 PL_expect = XTERMBLOCK;
7753 UNIBRACK(OP_ENTERTRY);
7755 else { /* string eval */
7757 UNIBRACK(OP_ENTEREVAL);
7762 UNIBRACK(-OP_ENTEREVAL);
7776 case KEY_endhostent:
7782 case KEY_endservent:
7785 case KEY_endprotoent:
7796 return yyl_foreach(aTHX_ s);
7799 LOP(OP_FORMLINE,XTERM);
7808 LOP(OP_FCNTL,XTERM);
7814 LOP(OP_FLOCK,XTERM);
7817 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7822 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7827 LOP(OP_GREPSTART, XREF);
7844 case KEY_getpriority:
7845 LOP(OP_GETPRIORITY,XTERM);
7847 case KEY_getprotobyname:
7850 case KEY_getprotobynumber:
7851 LOP(OP_GPBYNUMBER,XTERM);
7853 case KEY_getprotoent:
7865 case KEY_getpeername:
7866 UNI(OP_GETPEERNAME);
7868 case KEY_gethostbyname:
7871 case KEY_gethostbyaddr:
7872 LOP(OP_GHBYADDR,XTERM);
7874 case KEY_gethostent:
7877 case KEY_getnetbyname:
7880 case KEY_getnetbyaddr:
7881 LOP(OP_GNBYADDR,XTERM);
7886 case KEY_getservbyname:
7887 LOP(OP_GSBYNAME,XTERM);
7889 case KEY_getservbyport:
7890 LOP(OP_GSBYPORT,XTERM);
7892 case KEY_getservent:
7895 case KEY_getsockname:
7896 UNI(OP_GETSOCKNAME);
7898 case KEY_getsockopt:
7899 LOP(OP_GSOCKOPT,XTERM);
7914 pl_yylval.ival = CopLINE(PL_curcop);
7915 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7916 "given is experimental");
7920 LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
7926 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7928 pl_yylval.ival = CopLINE(PL_curcop);
7932 LOP(OP_INDEX,XTERM);
7938 LOP(OP_IOCTL,XTERM);
7941 Perl_ck_warner_d(aTHX_
7942 packWARN(WARN_EXPERIMENTAL__ISA), "isa is experimental");
7970 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7975 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7989 LOP(OP_LISTEN,XTERM);
7998 s = scan_pat(s,OP_MATCH);
7999 TERM(sublex_start());
8002 LOP(OP_MAPSTART, XREF);
8005 LOP(OP_MKDIR,XTERM);
8008 LOP(OP_MSGCTL,XTERM);
8011 LOP(OP_MSGGET,XTERM);
8014 LOP(OP_MSGRCV,XTERM);
8017 LOP(OP_MSGSND,XTERM);
8022 return yyl_my(aTHX_ s, key);
8028 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8033 s = tokenize_use(0, s);
8037 if (*s == '(' || (s = skipspace(s), *s == '('))
8040 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8041 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8047 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8049 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8050 for (t=d; isSPACE(*t);)
8052 if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8054 && !(t[0] == '=' && t[1] == '>')
8055 && !(t[0] == ':' && t[1] == ':')
8056 && !keyword(s, d-s, 0)
8058 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8059 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8060 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8066 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8068 pl_yylval.ival = OP_OR;
8078 LOP(OP_OPEN_DIR,XTERM);
8081 checkcomma(s,PL_tokenbuf,"filehandle");
8085 checkcomma(s,PL_tokenbuf,"filehandle");
8104 s = force_word(s,BAREWORD,FALSE,TRUE);
8106 s = force_strict_version(s);
8110 LOP(OP_PIPE_OP,XTERM);
8113 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8115 missingterm(NULL, 0);
8116 COPLINE_SET_FROM_MULTI_END;
8117 pl_yylval.ival = OP_CONST;
8118 TERM(sublex_start());
8124 return yyl_qw(aTHX_ s, len);
8127 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8129 missingterm(NULL, 0);
8130 pl_yylval.ival = OP_STRINGIFY;
8131 if (SvIVX(PL_lex_stuff) == '\'')
8132 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8133 TERM(sublex_start());
8136 s = scan_pat(s,OP_QR);
8137 TERM(sublex_start());
8140 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8142 missingterm(NULL, 0);
8143 pl_yylval.ival = OP_BACKTICK;
8144 TERM(sublex_start());
8150 return yyl_require(aTHX_ s, orig_keyword);
8159 LOP(OP_RENAME,XTERM);
8168 LOP(OP_RINDEX,XTERM);
8177 UNIDOR(OP_READLINE);
8180 UNIDOR(OP_BACKTICK);
8189 LOP(OP_REVERSE,XTERM);
8192 UNIDOR(OP_READLINK);
8199 if (pl_yylval.opval)
8200 TERM(sublex_start());
8202 TOKEN(1); /* force error */
8205 checkcomma(s,PL_tokenbuf,"filehandle");
8215 LOP(OP_SELECT,XTERM);
8221 LOP(OP_SEMCTL,XTERM);
8224 LOP(OP_SEMGET,XTERM);
8227 LOP(OP_SEMOP,XTERM);
8233 LOP(OP_SETPGRP,XTERM);
8235 case KEY_setpriority:
8236 LOP(OP_SETPRIORITY,XTERM);
8238 case KEY_sethostent:
8244 case KEY_setservent:
8247 case KEY_setprotoent:
8257 LOP(OP_SEEKDIR,XTERM);
8259 case KEY_setsockopt:
8260 LOP(OP_SSOCKOPT,XTERM);
8266 LOP(OP_SHMCTL,XTERM);
8269 LOP(OP_SHMGET,XTERM);
8272 LOP(OP_SHMREAD,XTERM);
8275 LOP(OP_SHMWRITE,XTERM);
8278 LOP(OP_SHUTDOWN,XTERM);
8287 LOP(OP_SOCKET,XTERM);
8289 case KEY_socketpair:
8290 LOP(OP_SOCKPAIR,XTERM);
8293 checkcomma(s,PL_tokenbuf,"subroutine name");
8296 s = force_word(s,BAREWORD,TRUE,TRUE);
8300 LOP(OP_SPLIT,XTERM);
8303 LOP(OP_SPRINTF,XTERM);
8306 LOP(OP_SPLICE,XTERM);
8321 LOP(OP_SUBSTR,XTERM);
8325 return yyl_sub(aTHX_ s, key);
8328 LOP(OP_SYSTEM,XREF);
8331 LOP(OP_SYMLINK,XTERM);
8334 LOP(OP_SYSCALL,XTERM);
8337 LOP(OP_SYSOPEN,XTERM);
8340 LOP(OP_SYSSEEK,XTERM);
8343 LOP(OP_SYSREAD,XTERM);
8346 LOP(OP_SYSWRITE,XTERM);
8351 TERM(sublex_start());
8372 LOP(OP_TRUNCATE,XTERM);
8384 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8386 pl_yylval.ival = CopLINE(PL_curcop);
8390 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8392 pl_yylval.ival = CopLINE(PL_curcop);
8396 LOP(OP_UNLINK,XTERM);
8402 LOP(OP_UNPACK,XTERM);
8405 LOP(OP_UTIME,XTERM);
8411 LOP(OP_UNSHIFT,XTERM);
8414 s = tokenize_use(1, s);
8424 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8426 pl_yylval.ival = CopLINE(PL_curcop);
8427 Perl_ck_warner_d(aTHX_
8428 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8429 "when is experimental");
8433 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8435 pl_yylval.ival = CopLINE(PL_curcop);
8439 PL_hints |= HINT_BLOCK_SCOPE;
8446 LOP(OP_WAITPID,XTERM);
8452 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8453 * we use the same number on EBCDIC */
8454 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8458 if (PL_expect == XOPERATOR) {
8459 if (*s == '=' && !PL_lex_allbrackets
8460 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8467 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8470 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8472 pl_yylval.ival = OP_XOR;
8478 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8481 I32 orig_keyword = 0;
8485 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8486 if ((*s == ':' && s[1] == ':')
8487 || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8489 Copy(PL_bufptr, PL_tokenbuf, olen, char);
8490 return yyl_just_a_word(aTHX_ d, olen, 0, c);
8493 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8494 UTF8fARG(UTF, len, PL_tokenbuf));
8497 else if (key == KEY_require || key == KEY_do
8499 /* that's a way to remember we saw "CORE::" */
8502 /* Known to be a reserved word at this point */
8503 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8507 yyl_keylookup(pTHX_ char *s, GV *gv)
8512 struct code c = no_code;
8513 I32 orig_keyword = 0;
8519 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8521 /* Some keywords can be followed by any delimiter, including ':' */
8522 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8524 /* x::* is just a word, unless x is "CORE" */
8525 if (!anydelim && *s == ':' && s[1] == ':') {
8526 if (memEQs(PL_tokenbuf, len, "CORE"))
8527 return yyl_key_core(aTHX_ s, len, c);
8528 return yyl_just_a_word(aTHX_ s, len, 0, c);
8532 while (d < PL_bufend && isSPACE(*d))
8533 d++; /* no comments skipped here, or s### is misparsed */
8535 /* Is this a word before a => operator? */
8536 if (*d == '=' && d[1] == '>') {
8537 return yyl_fatcomma(aTHX_ s, len);
8540 /* Check for plugged-in keyword */
8544 char *saved_bufptr = PL_bufptr;
8546 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8548 if (result == KEYWORD_PLUGIN_DECLINE) {
8549 /* not a plugged-in keyword */
8550 PL_bufptr = saved_bufptr;
8551 } else if (result == KEYWORD_PLUGIN_STMT) {
8552 pl_yylval.opval = o;
8554 if (!PL_nexttoke) PL_expect = XSTATE;
8555 return REPORT(PLUGSTMT);
8556 } else if (result == KEYWORD_PLUGIN_EXPR) {
8557 pl_yylval.opval = o;
8559 if (!PL_nexttoke) PL_expect = XOPERATOR;
8560 return REPORT(PLUGEXPR);
8562 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8566 /* Is this a label? */
8567 if (!anydelim && PL_expect == XSTATE
8568 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8571 newSVOP(OP_CONST, 0,
8572 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8577 /* Check for lexical sub */
8578 if (PL_expect != XOPERATOR) {
8579 char tmpbuf[sizeof PL_tokenbuf + 1];
8581 Copy(PL_tokenbuf, tmpbuf+1, len, char);
8582 c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8583 if (c.off != NOT_IN_PAD) {
8584 assert(c.off); /* we assume this is boolean-true below */
8585 if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8586 HV * const stash = PAD_COMPNAME_OURSTASH(c.off);
8587 HEK * const stashname = HvNAME_HEK(stash);
8588 c.sv = newSVhek(stashname);
8589 sv_catpvs(c.sv, "::");
8590 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8591 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8592 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8598 return yyl_just_a_word(aTHX_ s, len, 0, c);
8602 c.rv2cv_op = newOP(OP_PADANY, 0);
8603 c.rv2cv_op->op_targ = c.off;
8604 c.cv = find_lexical_cv(c.off);
8607 return yyl_just_a_word(aTHX_ s, len, 0, c);
8612 /* Check for built-in keyword */
8613 key = keyword(PL_tokenbuf, len, 0);
8616 key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8618 if (key && key != KEY___DATA__ && key != KEY___END__
8619 && (!anydelim || *s != '#')) {
8620 /* no override, and not s### either; skipspace is safe here
8621 * check for => on following line */
8623 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8624 STRLEN soff = s - SvPVX(PL_linestr);
8626 arrow = *s == '=' && s[1] == '>';
8627 PL_bufptr = SvPVX(PL_linestr) + bufoff;
8628 s = SvPVX(PL_linestr) + soff;
8630 return yyl_fatcomma(aTHX_ s, len);
8633 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8637 yyl_try(pTHX_ char *s)
8646 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8647 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8651 yyl_croak_unrecognised(aTHX_ s);
8655 /* emulate EOF on ^D or ^Z */
8656 if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8663 if ((!PL_rsfp || PL_lex_inwhat)
8664 && (!PL_parser->filtered || s+1 < PL_bufend)) {
8668 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8670 yyerror((const char *)
8672 ? "Format not terminated"
8673 : "Missing right curly or square bracket"));
8676 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8680 if (s++ < PL_bufend)
8681 goto retry; /* ignore stray nulls */
8684 if (!PL_in_eval && !PL_preambled) {
8685 PL_preambled = TRUE;
8687 /* Generate a string of Perl code to load the debugger.
8688 * If PERL5DB is set, it will return the contents of that,
8689 * otherwise a compile-time require of perl5db.pl. */
8691 const char * const pdb = PerlEnv_getenv("PERL5DB");
8694 sv_setpv(PL_linestr, pdb);
8695 sv_catpvs(PL_linestr,";");
8697 SETERRNO(0,SS_NORMAL);
8698 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8700 PL_parser->preambling = CopLINE(PL_curcop);
8702 SvPVCLEAR(PL_linestr);
8703 if (PL_preambleav) {
8704 SV **svp = AvARRAY(PL_preambleav);
8705 SV **const end = svp + AvFILLp(PL_preambleav);
8707 sv_catsv(PL_linestr, *svp);
8709 sv_catpvs(PL_linestr, ";");
8711 sv_free(MUTABLE_SV(PL_preambleav));
8712 PL_preambleav = NULL;
8715 sv_catpvs(PL_linestr,
8716 "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
8717 if (PL_minus_n || PL_minus_p) {
8718 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8720 sv_catpvs(PL_linestr,"chomp;");
8723 if ( ( *PL_splitstr == '/'
8724 || *PL_splitstr == '\''
8725 || *PL_splitstr == '"')
8726 && strchr(PL_splitstr + 1, *PL_splitstr))
8728 /* strchr is ok, because -F pattern can't contain
8730 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8733 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8734 bytes can be used as quoting characters. :-) */
8735 const char *splits = PL_splitstr;
8736 sv_catpvs(PL_linestr, "our @F=split(q\0");
8739 if (*splits == '\\')
8740 sv_catpvn(PL_linestr, splits, 1);
8741 sv_catpvn(PL_linestr, splits, 1);
8742 } while (*splits++);
8743 /* This loop will embed the trailing NUL of
8744 PL_linestr as the last thing it does before
8746 sv_catpvs(PL_linestr, ");");
8750 sv_catpvs(PL_linestr,"our @F=split(' ');");
8753 sv_catpvs(PL_linestr, "\n");
8754 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8755 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8756 PL_last_lop = PL_last_uni = NULL;
8757 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8758 update_debugger_info(PL_linestr, NULL, 0);
8761 if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8766 #ifdef PERL_STRICT_CR
8767 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8769 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8771 case ' ': case '\t': case '\f': case '\v':
8777 const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8778 if (needs_semicolon)
8785 return yyl_hyphen(aTHX_ s);
8788 return yyl_plus(aTHX_ s);
8791 return yyl_star(aTHX_ s);
8794 return yyl_percent(aTHX_ s);
8797 return yyl_caret(aTHX_ s);
8800 return yyl_leftsquare(aTHX_ s);
8803 return yyl_tilde(aTHX_ s);
8806 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8812 return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
8813 return yyl_colon(aTHX_ s + 1);
8816 return yyl_leftparen(aTHX_ s + 1);
8819 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8827 return yyl_rightparen(aTHX_ s);
8830 return yyl_rightsquare(aTHX_ s);
8833 return yyl_leftcurly(aTHX_ s + 1, 0);
8836 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
8838 return yyl_rightcurly(aTHX_ s, 0);
8841 return yyl_ampersand(aTHX_ s);
8844 return yyl_verticalbar(aTHX_ s);
8847 if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
8848 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
8850 s = vcs_conflict_marker(s + 7);
8856 const char tmp = *s++;
8858 if (!PL_lex_allbrackets
8859 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8867 if (!PL_lex_allbrackets
8868 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8877 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
8878 && memCHRs("+-*/%.^&|<",tmp))
8879 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8880 "Reversed %c= operator",(int)tmp);
8882 if (PL_expect == XSTATE
8884 && (s == PL_linestart+1 || s[-2] == '\n') )
8886 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
8887 || PL_lex_state != LEX_NORMAL)
8892 incline(s, PL_bufend);
8893 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
8895 s = (char *) memchr(s,'\n', d - s);
8900 incline(s, PL_bufend);
8908 PL_parser->in_pod = 1;
8912 if (PL_expect == XBLOCK) {
8914 #ifdef PERL_STRICT_CR
8915 while (SPACE_OR_TAB(*t))
8917 while (SPACE_OR_TAB(*t) || *t == '\r')
8920 if (*t == '\n' || *t == '#') {
8921 ENTER_with_name("lex_format");
8922 SAVEI8(PL_parser->form_lex_state);
8923 SAVEI32(PL_lex_formbrack);
8924 PL_parser->form_lex_state = PL_lex_state;
8925 PL_lex_formbrack = PL_lex_brackets + 1;
8926 PL_parser->sub_error_count = PL_error_count;
8927 return yyl_leftcurly(aTHX_ s, 1);
8930 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
8938 return yyl_bang(aTHX_ s + 1);
8941 if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
8942 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
8944 s = vcs_conflict_marker(s + 7);
8947 return yyl_leftpointy(aTHX_ s);
8950 if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
8951 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
8953 s = vcs_conflict_marker(s + 7);
8956 return yyl_rightpointy(aTHX_ s + 1);
8959 return yyl_dollar(aTHX_ s);
8962 return yyl_snail(aTHX_ s);
8964 case '/': /* may be division, defined-or, or pattern */
8965 return yyl_slash(aTHX_ s);
8967 case '?': /* conditional */
8969 if (!PL_lex_allbrackets
8970 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
8975 PL_lex_allbrackets++;
8979 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
8980 #ifdef PERL_STRICT_CR
8983 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
8985 && (s == PL_linestart || s[-1] == '\n') )
8988 /* formbrack==2 means dot seen where arguments expected */
8989 return yyl_rightcurly(aTHX_ s, 2);
8991 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
8995 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
8998 if (!PL_lex_allbrackets
8999 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9007 pl_yylval.ival = OPf_SPECIAL;
9013 if (*s == '=' && !PL_lex_allbrackets
9014 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9022 case '0': case '1': case '2': case '3': case '4':
9023 case '5': case '6': case '7': case '8': case '9':
9024 s = scan_num(s, &pl_yylval);
9025 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9026 if (PL_expect == XOPERATOR)
9031 return yyl_sglquote(aTHX_ s);
9034 return yyl_dblquote(aTHX_ s);
9037 return yyl_backtick(aTHX_ s);
9040 return yyl_backslash(aTHX_ s + 1);
9043 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9044 char *start = s + 2;
9045 while (isDIGIT(*start) || *start == '_')
9047 if (*start == '.' && isDIGIT(start[1])) {
9048 s = scan_num(s, &pl_yylval);
9051 else if ((*start == ':' && start[1] == ':')
9052 || (PL_expect == XSTATE && *start == ':')) {
9053 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9057 else if (PL_expect == XSTATE) {
9059 while (d < PL_bufend && isSPACE(*d)) d++;
9061 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9066 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9067 if (!isALPHA(*start) && (PL_expect == XTERM
9068 || PL_expect == XREF || PL_expect == XSTATE
9069 || PL_expect == XTERMORDORDOR)) {
9070 GV *const gv = gv_fetchpvn_flags(s, start - s,
9071 UTF ? SVf_UTF8 : 0, SVt_PVCV);
9073 s = scan_num(s, &pl_yylval);
9078 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9083 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9087 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9118 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9128 Works out what to call the token just pulled out of the input
9129 stream. The yacc parser takes care of taking the ops we return and
9130 stitching them into a tree.
9133 The type of the next token
9136 Check if we have already built the token; if so, use it.
9137 Switch based on the current state:
9138 - if we have a case modifier in a string, deal with that
9139 - handle other cases of interpolation inside a string
9140 - scan the next line if we are inside a format
9141 In the normal state, switch on the next character:
9143 if alphabetic, go to key lookup
9144 unrecognized character - croak
9145 - 0/4/26: handle end-of-line or EOF
9146 - cases for whitespace
9147 - \n and #: handle comments and line numbers
9148 - various operators, brackets and sigils
9151 - 'v': vstrings (or go to key lookup)
9152 - 'x' repetition operator (or go to key lookup)
9153 - other ASCII alphanumerics (key lookup begins here):
9156 scan built-in keyword (but do nothing with it yet)
9157 check for statement label
9158 check for lexical subs
9159 return yyl_just_a_word if there is one
9160 see whether built-in keyword is overridden
9161 switch on keyword number:
9162 - default: return yyl_just_a_word:
9163 not a built-in keyword; handle bareword lookup
9164 disambiguate between method and sub call
9165 fall back to bareword
9166 - cases for built-in keywords
9170 #define RSFP_FILENO (PL_rsfp)
9172 #define RSFP_FILENO (PerlIO_fileno(PL_rsfp))
9179 char *s = PL_bufptr;
9181 if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9182 const U8* first_bad_char_loc;
9183 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9184 PL_bufend - PL_bufptr,
9185 &first_bad_char_loc)))
9187 _force_out_malformed_utf8_message(first_bad_char_loc,
9190 1 /* 1 means die */ );
9191 NOT_REACHED; /* NOTREACHED */
9193 PL_parser->recheck_utf8_validity = FALSE;
9196 SV* tmp = newSVpvs("");
9197 PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9198 (IV)CopLINE(PL_curcop),
9199 lex_state_names[PL_lex_state],
9200 exp_name[PL_expect],
9201 pv_display(tmp, s, strlen(s), 0, 60));
9205 /* when we've already built the next token, just pull it out of the queue */
9208 pl_yylval = PL_nextval[PL_nexttoke];
9211 next_type = PL_nexttype[PL_nexttoke];
9212 if (next_type & (7<<24)) {
9213 if (next_type & (1<<24)) {
9214 if (PL_lex_brackets > 100)
9215 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9216 PL_lex_brackstack[PL_lex_brackets++] =
9217 (char) ((next_type >> 16) & 0xff);
9219 if (next_type & (2<<24))
9220 PL_lex_allbrackets++;
9221 if (next_type & (4<<24))
9222 PL_lex_allbrackets--;
9223 next_type &= 0xffff;
9225 return REPORT(next_type == 'p' ? pending_ident() : next_type);
9229 switch (PL_lex_state) {
9231 case LEX_INTERPNORMAL:
9234 /* interpolated case modifiers like \L \U, including \Q and \E.
9235 when we get here, PL_bufptr is at the \
9237 case LEX_INTERPCASEMOD:
9238 /* handle \E or end of string */
9239 return yyl_interpcasemod(aTHX_ s);
9241 case LEX_INTERPPUSH:
9242 return REPORT(sublex_push());
9244 case LEX_INTERPSTART:
9245 if (PL_bufptr == PL_bufend)
9246 return REPORT(sublex_done());
9248 if(*PL_bufptr != '(')
9249 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9252 /* for /@a/, we leave the joining for the regex engine to do
9253 * (unless we're within \Q etc) */
9254 PL_lex_dojoin = (*PL_bufptr == '@'
9255 && (!PL_lex_inpat || PL_lex_casemods));
9256 PL_lex_state = LEX_INTERPNORMAL;
9257 if (PL_lex_dojoin) {
9258 NEXTVAL_NEXTTOKE.ival = 0;
9260 force_ident("\"", '$');
9261 NEXTVAL_NEXTTOKE.ival = 0;
9263 NEXTVAL_NEXTTOKE.ival = 0;
9264 force_next((2<<24)|'(');
9265 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
9268 /* Convert (?{...}) and friends to 'do {...}' */
9269 if (PL_lex_inpat && *PL_bufptr == '(') {
9270 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9272 if (*PL_bufptr != '{')
9274 PL_expect = XTERMBLOCK;
9278 if (PL_lex_starts++) {
9280 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9281 if (!PL_lex_casemods && PL_lex_inpat)
9284 AopNOASSIGN(OP_CONCAT);
9288 case LEX_INTERPENDMAYBE:
9289 if (intuit_more(PL_bufptr, PL_bufend)) {
9290 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
9296 if (PL_lex_dojoin) {
9297 const U8 dojoin_was = PL_lex_dojoin;
9298 PL_lex_dojoin = FALSE;
9299 PL_lex_state = LEX_INTERPCONCAT;
9300 PL_lex_allbrackets--;
9301 return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
9303 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9304 && SvEVALED(PL_lex_repl))
9306 if (PL_bufptr != PL_bufend)
9307 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9310 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
9311 re_eval_str. If the here-doc body’s length equals the previous
9312 value of re_eval_start, re_eval_start will now be null. So
9313 check re_eval_str as well. */
9314 if (PL_parser->lex_shared->re_eval_start
9315 || PL_parser->lex_shared->re_eval_str) {
9317 if (*PL_bufptr != ')')
9318 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9320 /* having compiled a (?{..}) expression, return the original
9321 * text too, as a const */
9322 if (PL_parser->lex_shared->re_eval_str) {
9323 sv = PL_parser->lex_shared->re_eval_str;
9324 PL_parser->lex_shared->re_eval_str = NULL;
9326 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9327 SvPV_shrink_to_cur(sv);
9329 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9330 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9331 NEXTVAL_NEXTTOKE.opval =
9332 newSVOP(OP_CONST, 0,
9335 PL_parser->lex_shared->re_eval_start = NULL;
9341 case LEX_INTERPCONCAT:
9343 if (PL_lex_brackets)
9344 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9345 (long) PL_lex_brackets);
9347 if (PL_bufptr == PL_bufend)
9348 return REPORT(sublex_done());
9350 /* m'foo' still needs to be parsed for possible (?{...}) */
9351 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9352 SV *sv = newSVsv(PL_linestr);
9354 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9358 int save_error_count = PL_error_count;
9360 s = scan_const(PL_bufptr);
9362 /* Set flag if this was a pattern and there were errors. op.c will
9363 * refuse to compile a pattern with this flag set. Otherwise, we
9364 * could get segfaults, etc. */
9365 if (PL_lex_inpat && PL_error_count > save_error_count) {
9366 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9369 PL_lex_state = LEX_INTERPCASEMOD;
9371 PL_lex_state = LEX_INTERPSTART;
9374 if (s != PL_bufptr) {
9375 NEXTVAL_NEXTTOKE = pl_yylval;
9378 if (PL_lex_starts++) {
9379 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9380 if (!PL_lex_casemods && PL_lex_inpat)
9383 AopNOASSIGN(OP_CONCAT);
9393 if (PL_parser->sub_error_count != PL_error_count) {
9394 /* There was an error parsing a formline, which tends to
9396 Unlike interpolated sub-parsing, we can't treat any of
9397 these as recoverable, so no need to check sub_no_recover.
9401 assert(PL_lex_formbrack);
9402 s = scan_formline(PL_bufptr);
9403 if (!PL_lex_formbrack)
9404 return yyl_rightcurly(aTHX_ s, 1);
9409 /* We really do *not* want PL_linestr ever becoming a COW. */
9410 assert (!SvIsCOW(PL_linestr));
9412 PL_oldoldbufptr = PL_oldbufptr;
9415 if (PL_in_my == KEY_sigvar) {
9416 PL_parser->saw_infix_sigil = 0;
9417 return yyl_sigvar(aTHX_ s);
9421 /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9422 On its return, we then need to set it to indicate whether the token
9423 we just encountered was an infix operator that (if we hadn't been
9424 expecting an operator) have been a sigil.
9426 bool expected_operator = (PL_expect == XOPERATOR);
9427 int ret = yyl_try(aTHX_ s);
9428 switch (pl_yylval.ival) {
9433 if (expected_operator) {
9434 PL_parser->saw_infix_sigil = 1;
9439 PL_parser->saw_infix_sigil = 0;
9449 Looks up an identifier in the pad or in a package
9451 PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9452 rather than a plain pad var.
9455 PRIVATEREF if this is a lexical name.
9456 BAREWORD if this belongs to a package.
9459 if we're in a my declaration
9460 croak if they tried to say my($foo::bar)
9461 build the ops for a my() declaration
9462 if it's an access to a my() variable
9463 build ops for access to a my() variable
9464 if in a dq string, and they've said @foo and we can't find @foo
9466 build ops for a bareword
9470 S_pending_ident(pTHX)
9473 const char pit = (char)pl_yylval.ival;
9474 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9475 /* All routes through this function want to know if there is a colon. */
9476 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9478 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9479 "### Pending identifier '%s'\n", PL_tokenbuf); });
9480 assert(tokenbuf_len >= 2);
9482 /* if we're in a my(), we can't allow dynamics here.
9483 $foo'bar has already been turned into $foo::bar, so
9484 just check for colons.
9486 if it's a legal name, the OP is a PADANY.
9489 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9491 /* diag_listed_as: No package name allowed for variable %s
9493 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9495 *PL_tokenbuf=='&' ? "subroutine" : "variable",
9496 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9497 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9502 /* "my" variable %s can't be in a package */
9503 /* PL_no_myglob is constant */
9504 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9505 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9506 PL_in_my == KEY_my ? "my" : "state",
9507 *PL_tokenbuf == '&' ? "subroutine" : "variable",
9509 UTF ? SVf_UTF8 : 0);
9510 GCC_DIAG_RESTORE_STMT;
9513 if (PL_in_my == KEY_sigvar) {
9514 /* A signature 'padop' needs in addition, an op_first to
9515 * point to a child sigdefelem, and an extra field to hold
9516 * the signature index. We can achieve both by using an
9517 * UNOP_AUX and (ab)using the op_aux field to hold the
9518 * index. If we ever need more fields, use a real malloced
9519 * aux strut instead.
9521 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9522 INT2PTR(UNOP_AUX_item *,
9523 (PL_parser->sig_elems)));
9524 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9525 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9529 o = newOP(OP_PADANY, 0);
9530 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9531 UTF ? SVf_UTF8 : 0);
9532 if (PL_in_my == KEY_sigvar)
9535 pl_yylval.opval = o;
9541 build the ops for accesses to a my() variable.
9546 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9548 if (tmp != NOT_IN_PAD) {
9549 /* might be an "our" variable" */
9550 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9551 /* build ops for a bareword */
9552 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9553 HEK * const stashname = HvNAME_HEK(stash);
9554 SV * const sym = newSVhek(stashname);
9555 sv_catpvs(sym, "::");
9556 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9557 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9558 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9562 ((PL_tokenbuf[0] == '$') ? SVt_PV
9563 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9568 pl_yylval.opval = newOP(OP_PADANY, 0);
9569 pl_yylval.opval->op_targ = tmp;
9575 Whine if they've said @foo or @foo{key} in a doublequoted string,
9576 and @foo (or %foo) isn't a variable we can find in the symbol
9579 if (ckWARN(WARN_AMBIGUOUS)
9581 && PL_lex_state != LEX_NORMAL
9582 && !PL_lex_brackets)
9584 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9585 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9587 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9590 /* Downgraded from fatal to warning 20000522 mjd */
9591 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9592 "Possible unintended interpolation of %" UTF8f
9594 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9598 /* build ops for a bareword */
9599 pl_yylval.opval = newSVOP(OP_CONST, 0,
9600 newSVpvn_flags(PL_tokenbuf + 1,
9601 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9602 UTF ? SVf_UTF8 : 0 ));
9603 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9605 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9606 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9607 | ( UTF ? SVf_UTF8 : 0 ),
9608 ((PL_tokenbuf[0] == '$') ? SVt_PV
9609 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9615 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9617 PERL_ARGS_ASSERT_CHECKCOMMA;
9619 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9620 if (ckWARN(WARN_SYNTAX)) {
9623 for (w = s+2; *w && level; w++) {
9631 /* the list of chars below is for end of statements or
9632 * block / parens, boolean operators (&&, ||, //) and branch
9633 * constructs (or, and, if, until, unless, while, err, for).
9634 * Not a very solid hack... */
9635 if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9636 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9637 "%s (...) interpreted as function",name);
9640 while (s < PL_bufend && isSPACE(*s))
9644 while (s < PL_bufend && isSPACE(*s))
9646 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9647 const char * const w = s;
9648 s += UTF ? UTF8SKIP(s) : 1;
9649 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9650 s += UTF ? UTF8SKIP(s) : 1;
9651 while (s < PL_bufend && isSPACE(*s))
9655 if (keyword(w, s - w, 0))
9658 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9659 if (gv && GvCVu(gv))
9664 Copy(w, tmpbuf+1, s - w, char);
9666 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9667 if (off != NOT_IN_PAD) return;
9669 Perl_croak(aTHX_ "No comma allowed after %s", what);
9674 /* S_new_constant(): do any overload::constant lookup.
9676 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9677 Best used as sv=new_constant(..., sv, ...).
9678 If s, pv are NULL, calls subroutine with one argument,
9679 and <type> is used with error messages only.
9680 <type> is assumed to be well formed UTF-8.
9682 If error_msg is not NULL, *error_msg will be set to any error encountered.
9683 Otherwise yyerror() will be used to output it */
9686 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9687 SV *sv, SV *pv, const char *type, STRLEN typelen,
9688 const char ** error_msg)
9691 HV * table = GvHV(PL_hintgv); /* ^H */
9696 const char *why1 = "", *why2 = "", *why3 = "";
9697 const char * optional_colon = ":"; /* Only some messages have a colon */
9700 PERL_ARGS_ASSERT_NEW_CONSTANT;
9701 /* We assume that this is true: */
9704 sv_2mortal(sv); /* Parent created it permanently */
9707 || ! (PL_hints & HINT_LOCALIZE_HH))
9710 optional_colon = "";
9714 cvp = hv_fetch(table, key, keylen, FALSE);
9715 if (!cvp || !SvOK(*cvp)) {
9718 why3 = "} is not defined";
9724 pv = newSVpvn_flags(s, len, SVs_TEMP);
9726 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9728 typesv = &PL_sv_undef;
9730 PUSHSTACKi(PERLSI_OVERLOAD);
9742 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9746 /* Check the eval first */
9747 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9749 const char * errstr;
9750 sv_catpvs(errsv, "Propagated");
9751 errstr = SvPV_const(errsv, errlen);
9752 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9754 res = SvREFCNT_inc_simple_NN(sv);
9758 SvREFCNT_inc_simple_void_NN(res);
9771 (void)sv_2mortal(sv);
9773 why1 = "Call to &{$^H{";
9775 why3 = "}} did not return a defined value";
9779 msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9780 (int)(type ? typelen : len),
9788 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9790 return SvREFCNT_inc_simple_NN(sv);
9793 PERL_STATIC_INLINE void
9794 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9795 bool is_utf8, bool check_dollar, bool tick_warn)
9798 const char *olds = *s;
9799 PERL_ARGS_ASSERT_PARSE_IDENT;
9801 while (*s < PL_bufend) {
9803 Perl_croak(aTHX_ "%s", ident_too_long);
9804 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9805 /* The UTF-8 case must come first, otherwise things
9806 * like c\N{COMBINING TILDE} would start failing, as the
9807 * isWORDCHAR_A case below would gobble the 'c' up.
9810 char *t = *s + UTF8SKIP(*s);
9811 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9814 if (*d + (t - *s) > e)
9815 Perl_croak(aTHX_ "%s", ident_too_long);
9816 Copy(*s, *d, t - *s, char);
9820 else if ( isWORDCHAR_A(**s) ) {
9823 } while (isWORDCHAR_A(**s) && *d < e);
9825 else if ( allow_package
9827 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9834 else if (allow_package && **s == ':' && (*s)[1] == ':'
9835 /* Disallow things like Foo::$bar. For the curious, this is
9836 * the code path that triggers the "Bad name after" warning
9837 * when looking for barewords.
9839 && !(check_dollar && (*s)[2] == '$')) {
9846 if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9847 && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9850 Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9853 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9854 "Old package separator used in string");
9855 if (olds[-1] == '#')
9859 if (*olds == '\'') {
9866 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9867 "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9868 UTF8fARG(is_utf8, d2-this_d, this_d));
9873 /* Returns a NUL terminated string, with the length of the string written to
9877 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9880 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9881 bool is_utf8 = cBOOL(UTF);
9883 PERL_ARGS_ASSERT_SCAN_WORD;
9885 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9891 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9892 * iff Unicode semantics are to be used. The legal ones are any of:
9893 * a) all ASCII characters except:
9894 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9896 * The final case currently doesn't get this far in the program, so we
9897 * don't test for it. If that were to change, it would be ok to allow it.
9898 * b) When not under Unicode rules, any upper Latin1 character
9899 * c) Otherwise, when unicode rules are used, all XIDS characters.
9901 * Because all ASCII characters have the same representation whether
9902 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9903 * '{' without knowing if is UTF-8 or not. */
9904 #define VALID_LEN_ONE_IDENT(s, e, is_utf8) \
9905 (isGRAPH_A(*(s)) || ((is_utf8) \
9906 ? isIDFIRST_utf8_safe(s, e) \
9908 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9911 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9913 I32 herelines = PL_parser->herelines;
9914 SSize_t bracket = -1;
9917 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9918 bool is_utf8 = cBOOL(UTF);
9919 I32 orig_copline = 0, tmp_copline = 0;
9921 PERL_ARGS_ASSERT_SCAN_IDENT;
9923 if (isSPACE(*s) || !*s)
9925 if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
9926 bool is_zero= *s == '0' ? TRUE : FALSE;
9927 char *digit_start= d;
9929 while (s < PL_bufend && isDIGIT(*s)) {
9931 Perl_croak(aTHX_ "%s", ident_too_long);
9934 if (is_zero && d - digit_start > 1)
9935 Perl_croak(aTHX_ ident_var_zero_multi_digit);
9937 else { /* See if it is a "normal" identifier */
9938 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9943 /* Either a digit variable, or parse_ident() found an identifier
9944 (anything valid as a bareword), so job done and return. */
9945 if (PL_lex_state != LEX_NORMAL)
9946 PL_lex_state = LEX_INTERPENDMAYBE;
9950 /* Here, it is not a run-of-the-mill identifier name */
9952 if (*s == '$' && s[1]
9953 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9954 || isDIGIT_A((U8)s[1])
9957 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9959 /* Dereferencing a value in a scalar variable.
9960 The alternatives are different syntaxes for a scalar variable.
9961 Using ' as a leading package separator isn't allowed. :: is. */
9964 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9966 bracket = s - SvPVX(PL_linestr);
9968 orig_copline = CopLINE(PL_curcop);
9969 if (s < PL_bufend && isSPACE(*s)) {
9973 if ((s <= PL_bufend - ((is_utf8)
9976 && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9979 const STRLEN skip = UTF8SKIP(s);
9982 for ( i = 0; i < skip; i++ )
9987 /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
9989 bool is_zero= *d == '0' ? TRUE : FALSE;
9990 char *digit_start= d;
9991 while (s < PL_bufend && isDIGIT(*s)) {
9994 Perl_croak(aTHX_ "%s", ident_too_long);
9997 if (is_zero && d - digit_start > 1)
9998 Perl_croak(aTHX_ ident_var_zero_multi_digit);
10003 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10004 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10008 /* Warn about ambiguous code after unary operators if {...} notation isn't
10009 used. There's no difference in ambiguity; it's merely a heuristic
10010 about when not to warn. */
10011 else if (ck_uni && bracket == -1)
10013 if (bracket != -1) {
10016 /* If we were processing {...} notation then... */
10017 if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10018 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10021 /* note we have to check for a normal identifier first,
10022 * as it handles utf8 symbols, and only after that has
10023 * been ruled out can we look at the caret words */
10024 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10025 /* if it starts as a valid identifier, assume that it is one.
10026 (the later check for } being at the expected point will trap
10027 cases where this doesn't pan out.) */
10028 d += is_utf8 ? UTF8SKIP(d) : 1;
10029 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10032 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10034 while (isWORDCHAR(*s) && d < e) {
10038 Perl_croak(aTHX_ "%s", ident_too_long);
10041 tmp_copline = CopLINE(PL_curcop);
10042 if (s < PL_bufend && isSPACE(*s)) {
10045 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10046 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
10047 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10048 const char * const brack =
10050 ((*s == '[') ? "[...]" : "{...}");
10051 orig_copline = CopLINE(PL_curcop);
10052 CopLINE_set(PL_curcop, tmp_copline);
10053 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10054 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10055 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10056 funny, dest, brack, funny, dest, brack);
10057 CopLINE_set(PL_curcop, orig_copline);
10060 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10061 PL_lex_allbrackets++;
10066 if ( !tmp_copline )
10067 tmp_copline = CopLINE(PL_curcop);
10068 if ((skip = s < PL_bufend && isSPACE(*s))) {
10069 /* Avoid incrementing line numbers or resetting PL_linestart,
10070 in case we have to back up. */
10071 STRLEN s_off = s - SvPVX(PL_linestr);
10073 s = SvPVX(PL_linestr) + s_off;
10078 /* Expect to find a closing } after consuming any trailing whitespace.
10081 /* Now increment line numbers if applicable. */
10085 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10086 PL_lex_state = LEX_INTERPEND;
10089 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10090 if (ckWARN(WARN_AMBIGUOUS)
10091 && (keyword(dest, d - dest, 0)
10092 || get_cvn_flags(dest, d - dest, is_utf8
10096 SV *tmp = newSVpvn_flags( dest, d - dest,
10097 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10100 orig_copline = CopLINE(PL_curcop);
10101 CopLINE_set(PL_curcop, tmp_copline);
10102 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10103 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10104 funny, SVfARG(tmp), funny, SVfARG(tmp));
10105 CopLINE_set(PL_curcop, orig_copline);
10110 /* Didn't find the closing } at the point we expected, so restore
10111 state such that the next thing to process is the opening { and */
10112 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10113 CopLINE_set(PL_curcop, orig_copline);
10114 PL_parser->herelines = herelines;
10116 PL_parser->sub_no_recover = TRUE;
10119 else if ( PL_lex_state == LEX_INTERPNORMAL
10120 && !PL_lex_brackets
10121 && !intuit_more(s, PL_bufend))
10122 PL_lex_state = LEX_INTERPEND;
10127 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10129 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10130 * found in the parse starting at 's', based on the subset that are valid
10131 * in this context input to this routine in 'valid_flags'. Advances s.
10132 * Returns TRUE if the input should be treated as a valid flag, so the next
10133 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10134 * upon first call on the current regex. This routine will set it to any
10135 * charset modifier found. The caller shouldn't change it. This way,
10136 * another charset modifier encountered in the parse can be detected as an
10137 * error, as we have decided to allow only one */
10139 const char c = **s;
10140 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10142 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10143 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10144 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10145 UTF ? SVf_UTF8 : 0);
10147 /* Pretend that it worked, so will continue processing before
10156 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10157 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10158 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10159 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10160 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
10161 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10162 case LOCALE_PAT_MOD:
10164 goto multiple_charsets;
10166 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10169 case UNICODE_PAT_MOD:
10171 goto multiple_charsets;
10173 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10176 case ASCII_RESTRICT_PAT_MOD:
10178 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10182 /* Error if previous modifier wasn't an 'a', but if it was, see
10183 * if, and accept, a second occurrence (only) */
10184 if (*charset != 'a'
10185 || get_regex_charset(*pmfl)
10186 != REGEX_ASCII_RESTRICTED_CHARSET)
10188 goto multiple_charsets;
10190 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10194 case DEPENDS_PAT_MOD:
10196 goto multiple_charsets;
10198 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10207 if (*charset != c) {
10208 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10210 else if (c == 'a') {
10211 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10212 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10215 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10218 /* Pretend that it worked, so will continue processing before dieing */
10224 S_scan_pat(pTHX_ char *start, I32 type)
10228 const char * const valid_flags =
10229 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10230 char charset = '\0'; /* character set modifier */
10231 unsigned int x_mod_count = 0;
10233 PERL_ARGS_ASSERT_SCAN_PAT;
10235 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10237 Perl_croak(aTHX_ "Search pattern not terminated");
10239 pm = (PMOP*)newPMOP(type, 0);
10240 if (PL_multi_open == '?') {
10241 /* This is the only point in the code that sets PMf_ONCE: */
10242 pm->op_pmflags |= PMf_ONCE;
10244 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10245 allows us to restrict the list needed by reset to just the ??
10247 assert(type != OP_TRANS);
10249 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10252 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10255 elements = mg->mg_len / sizeof(PMOP**);
10256 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10257 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10258 mg->mg_len = elements * sizeof(PMOP**);
10259 PmopSTASH_set(pm,PL_curstash);
10263 /* if qr/...(?{..}).../, then need to parse the pattern within a new
10264 * anon CV. False positives like qr/[(?{]/ are harmless */
10266 if (type == OP_QR) {
10268 char *e, *p = SvPV(PL_lex_stuff, len);
10270 for (; p < e; p++) {
10271 if (p[0] == '(' && p[1] == '?'
10272 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10274 pm->op_pmflags |= PMf_HAS_CV;
10278 pm->op_pmflags |= PMf_IS_QR;
10281 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10282 &s, &charset, &x_mod_count))
10284 /* issue a warning if /c is specified,but /g is not */
10285 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10287 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10288 "Use of /c modifier is meaningless without /g" );
10291 PL_lex_op = (OP*)pm;
10292 pl_yylval.ival = OP_MATCH;
10297 S_scan_subst(pTHX_ char *start)
10303 line_t linediff = 0;
10305 char charset = '\0'; /* character set modifier */
10306 unsigned int x_mod_count = 0;
10309 PERL_ARGS_ASSERT_SCAN_SUBST;
10311 pl_yylval.ival = OP_NULL;
10313 s = scan_str(start, TRUE, FALSE, FALSE, &t);
10316 Perl_croak(aTHX_ "Substitution pattern not terminated");
10320 first_start = PL_multi_start;
10321 first_line = CopLINE(PL_curcop);
10322 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10324 SvREFCNT_dec_NN(PL_lex_stuff);
10325 PL_lex_stuff = NULL;
10326 Perl_croak(aTHX_ "Substitution replacement not terminated");
10328 PL_multi_start = first_start; /* so whole substitution is taken together */
10330 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10334 if (*s == EXEC_PAT_MOD) {
10338 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10339 &s, &charset, &x_mod_count))
10345 if ((pm->op_pmflags & PMf_CONTINUE)) {
10346 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10350 SV * const repl = newSVpvs("");
10353 pm->op_pmflags |= PMf_EVAL;
10354 for (; es > 1; es--) {
10355 sv_catpvs(repl, "eval ");
10357 sv_catpvs(repl, "do {");
10358 sv_catsv(repl, PL_parser->lex_sub_repl);
10359 sv_catpvs(repl, "}");
10360 SvREFCNT_dec(PL_parser->lex_sub_repl);
10361 PL_parser->lex_sub_repl = repl;
10365 linediff = CopLINE(PL_curcop) - first_line;
10367 CopLINE_set(PL_curcop, first_line);
10369 if (linediff || es) {
10370 /* the IVX field indicates that the replacement string is a s///e;
10371 * the NVX field indicates how many src code lines the replacement
10373 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10374 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10375 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10379 PL_lex_op = (OP*)pm;
10380 pl_yylval.ival = OP_SUBST;
10385 S_scan_trans(pTHX_ char *start)
10392 bool nondestruct = 0;
10395 PERL_ARGS_ASSERT_SCAN_TRANS;
10397 pl_yylval.ival = OP_NULL;
10399 s = scan_str(start,FALSE,FALSE,FALSE,&t);
10401 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10405 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10407 SvREFCNT_dec_NN(PL_lex_stuff);
10408 PL_lex_stuff = NULL;
10409 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10412 complement = del = squash = 0;
10416 complement = OPpTRANS_COMPLEMENT;
10419 del = OPpTRANS_DELETE;
10422 squash = OPpTRANS_SQUASH;
10434 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10435 o->op_private &= ~OPpTRANS_ALL;
10436 o->op_private |= del|squash|complement;
10439 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10446 Takes a pointer to the first < in <<FOO.
10447 Returns a pointer to the byte following <<FOO.
10449 This function scans a heredoc, which involves different methods
10450 depending on whether we are in a string eval, quoted construct, etc.
10451 This is because PL_linestr could containing a single line of input, or
10452 a whole string being evalled, or the contents of the current quote-
10455 The two basic methods are:
10456 - Steal lines from the input stream
10457 - Scan the heredoc in PL_linestr and remove it therefrom
10459 In a file scope or filtered eval, the first method is used; in a
10460 string eval, the second.
10462 In a quote-like operator, we have to choose between the two,
10463 depending on where we can find a newline. We peek into outer lex-
10464 ing scopes until we find one with a newline in it. If we reach the
10465 outermost lexing scope and it is a file, we use the stream method.
10466 Otherwise it is treated as an eval.
10470 S_scan_heredoc(pTHX_ char *s)
10472 I32 op_type = OP_SCALAR;
10480 I32 indent_len = 0;
10481 bool indented = FALSE;
10482 const bool infile = PL_rsfp || PL_parser->filtered;
10483 const line_t origline = CopLINE(PL_curcop);
10484 LEXSHARED *shared = PL_parser->lex_shared;
10486 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10489 d = PL_tokenbuf + 1;
10490 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10491 *PL_tokenbuf = '\n';
10494 if (*peek == '~') {
10499 while (SPACE_OR_TAB(*peek))
10502 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10505 s = delimcpy(d, e, s, PL_bufend, term, &len);
10506 if (s == PL_bufend)
10507 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10513 /* <<\FOO is equivalent to <<'FOO' */
10518 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10519 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10523 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10524 peek += UTF ? UTF8SKIP(peek) : 1;
10527 len = (peek - s >= e - d) ? (e - d) : (peek - s);
10528 Copy(s, d, len, char);
10533 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10534 Perl_croak(aTHX_ "Delimiter for here document is too long");
10538 len = d - PL_tokenbuf;
10540 #ifndef PERL_STRICT_CR
10541 d = (char *) memchr(s, '\r', PL_bufend - s);
10543 char * const olds = s;
10545 while (s < PL_bufend) {
10551 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10560 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10565 tmpstr = newSV_type(SVt_PVIV);
10566 SvGROW(tmpstr, 80);
10567 if (term == '\'') {
10568 op_type = OP_CONST;
10569 SvIV_set(tmpstr, -1);
10571 else if (term == '`') {
10572 op_type = OP_BACKTICK;
10573 SvIV_set(tmpstr, '\\');
10576 PL_multi_start = origline + 1 + PL_parser->herelines;
10577 PL_multi_open = PL_multi_close = '<';
10579 /* inside a string eval or quote-like operator */
10580 if (!infile || PL_lex_inwhat) {
10583 char * const olds = s;
10584 PERL_CONTEXT * const cx = CX_CUR();
10585 /* These two fields are not set until an inner lexing scope is
10586 entered. But we need them set here. */
10587 shared->ls_bufptr = s;
10588 shared->ls_linestr = PL_linestr;
10590 if (PL_lex_inwhat) {
10591 /* Look for a newline. If the current buffer does not have one,
10592 peek into the line buffer of the parent lexing scope, going
10593 up as many levels as necessary to find one with a newline
10596 while (!(s = (char *)memchr(
10597 (void *)shared->ls_bufptr, '\n',
10598 SvEND(shared->ls_linestr)-shared->ls_bufptr
10601 shared = shared->ls_prev;
10602 /* shared is only null if we have gone beyond the outermost
10603 lexing scope. In a file, we will have broken out of the
10604 loop in the previous iteration. In an eval, the string buf-
10605 fer ends with "\n;", so the while condition above will have
10606 evaluated to false. So shared can never be null. Or so you
10607 might think. Odd syntax errors like s;@{<<; can gobble up
10608 the implicit semicolon at the end of a flie, causing the
10609 file handle to be closed even when we are not in a string
10610 eval. So shared may be null in that case.
10611 (Closing '>>}' here to balance the earlier open brace for
10612 editors that look for matched pairs.) */
10613 if (UNLIKELY(!shared))
10615 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10616 most lexing scope. In a file, shared->ls_linestr at that
10617 level is just one line, so there is no body to steal. */
10618 if (infile && !shared->ls_prev) {
10624 else { /* eval or we've already hit EOF */
10625 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10630 linestr = shared->ls_linestr;
10631 bufend = SvEND(linestr);
10636 while (s < bufend - len + 1) {
10638 ++PL_parser->herelines;
10640 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10644 /* Only valid if it's preceded by whitespace only */
10645 while (backup != myolds && --backup >= myolds) {
10646 if (! SPACE_OR_TAB(*backup)) {
10652 /* No whitespace or all! */
10653 if (backup == s || *backup == '\n') {
10654 Newx(indent, indent_len + 1, char);
10655 memcpy(indent, backup + 1, indent_len);
10656 indent[indent_len] = 0;
10657 s--; /* before our delimiter */
10658 PL_parser->herelines--; /* this line doesn't count */
10665 while (s < bufend - len + 1
10666 && memNE(s,PL_tokenbuf,len) )
10669 ++PL_parser->herelines;
10673 if (s >= bufend - len + 1) {
10677 sv_setpvn(tmpstr,d+1,s-d);
10679 /* the preceding stmt passes a newline */
10680 PL_parser->herelines++;
10682 /* s now points to the newline after the heredoc terminator.
10683 d points to the newline before the body of the heredoc.
10686 /* We are going to modify linestr in place here, so set
10687 aside copies of the string if necessary for re-evals or
10689 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10690 check shared->re_eval_str. */
10691 if (shared->re_eval_start || shared->re_eval_str) {
10692 /* Set aside the rest of the regexp */
10693 if (!shared->re_eval_str)
10694 shared->re_eval_str =
10695 newSVpvn(shared->re_eval_start,
10696 bufend - shared->re_eval_start);
10697 shared->re_eval_start -= s-d;
10700 if (cxstack_ix >= 0
10701 && CxTYPE(cx) == CXt_EVAL
10702 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10703 && cx->blk_eval.cur_text == linestr)
10705 cx->blk_eval.cur_text = newSVsv(linestr);
10706 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10709 /* Copy everything from s onwards back to d. */
10710 Move(s,d,bufend-s + 1,char);
10711 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10712 /* Setting PL_bufend only applies when we have not dug deeper
10713 into other scopes, because sublex_done sets PL_bufend to
10714 SvEND(PL_linestr). */
10715 if (shared == PL_parser->lex_shared)
10716 PL_bufend = SvEND(linestr);
10721 char *oldbufptr_save;
10722 char *oldoldbufptr_save;
10724 SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
10725 term = PL_tokenbuf[1];
10727 linestr_save = PL_linestr; /* must restore this afterwards */
10728 d = s; /* and this */
10729 oldbufptr_save = PL_oldbufptr;
10730 oldoldbufptr_save = PL_oldoldbufptr;
10731 PL_linestr = newSVpvs("");
10732 PL_bufend = SvPVX(PL_linestr);
10735 PL_bufptr = PL_bufend;
10736 CopLINE_set(PL_curcop,
10737 origline + 1 + PL_parser->herelines);
10739 if ( !lex_next_chunk(LEX_NO_TERM)
10740 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10742 /* Simply freeing linestr_save might seem simpler here, as it
10743 does not matter what PL_linestr points to, since we are
10744 about to croak; but in a quote-like op, linestr_save
10745 will have been prospectively freed already, via
10746 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10747 restore PL_linestr. */
10748 SvREFCNT_dec_NN(PL_linestr);
10749 PL_linestr = linestr_save;
10750 PL_oldbufptr = oldbufptr_save;
10751 PL_oldoldbufptr = oldoldbufptr_save;
10755 CopLINE_set(PL_curcop, origline);
10757 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10758 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10759 /* ^That should be enough to avoid this needing to grow: */
10760 sv_catpvs(PL_linestr, "\n\0");
10761 assert(s == SvPVX(PL_linestr));
10762 PL_bufend = SvEND(PL_linestr);
10766 PL_parser->herelines++;
10767 PL_last_lop = PL_last_uni = NULL;
10769 #ifndef PERL_STRICT_CR
10770 if (PL_bufend - PL_linestart >= 2) {
10771 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10772 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10774 PL_bufend[-2] = '\n';
10776 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10778 else if (PL_bufend[-1] == '\r')
10779 PL_bufend[-1] = '\n';
10781 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10782 PL_bufend[-1] = '\n';
10785 if (indented && (PL_bufend-s) >= len) {
10786 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10789 char *backup = found;
10792 /* Only valid if it's preceded by whitespace only */
10793 while (backup != s && --backup >= s) {
10794 if (! SPACE_OR_TAB(*backup)) {
10800 /* All whitespace or none! */
10801 if (backup == found || SPACE_OR_TAB(*backup)) {
10802 Newx(indent, indent_len + 1, char);
10803 memcpy(indent, backup, indent_len);
10804 indent[indent_len] = 0;
10805 SvREFCNT_dec(PL_linestr);
10806 PL_linestr = linestr_save;
10807 PL_linestart = SvPVX(linestr_save);
10808 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10809 PL_oldbufptr = oldbufptr_save;
10810 PL_oldoldbufptr = oldoldbufptr_save;
10816 /* Didn't find it */
10817 sv_catsv(tmpstr,PL_linestr);
10820 if (*s == term && PL_bufend-s >= len
10821 && memEQ(s,PL_tokenbuf + 1,len))
10823 SvREFCNT_dec(PL_linestr);
10824 PL_linestr = linestr_save;
10825 PL_linestart = SvPVX(linestr_save);
10826 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10827 PL_oldbufptr = oldbufptr_save;
10828 PL_oldoldbufptr = oldoldbufptr_save;
10833 sv_catsv(tmpstr,PL_linestr);
10839 PL_multi_end = origline + PL_parser->herelines;
10841 if (indented && indent) {
10842 STRLEN linecount = 1;
10843 STRLEN herelen = SvCUR(tmpstr);
10844 char *ss = SvPVX(tmpstr);
10845 char *se = ss + herelen;
10846 SV *newstr = newSV(herelen+1);
10849 /* Trim leading whitespace */
10851 /* newline only? Copy and move on */
10853 sv_catpvs(newstr,"\n");
10857 /* Found our indentation? Strip it */
10859 else if (se - ss >= indent_len
10860 && memEQ(ss, indent, indent_len))
10865 while ((ss + le) < se && *(ss + le) != '\n')
10868 sv_catpvn(newstr, ss, le);
10871 /* Line doesn't begin with our indentation? Croak */
10876 "Indentation on line %d of here-doc doesn't match delimiter",
10882 /* avoid sv_setsv() as we dont wan't to COW here */
10883 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10885 SvREFCNT_dec_NN(newstr);
10888 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10889 SvPV_shrink_to_cur(tmpstr);
10893 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10897 PL_lex_stuff = tmpstr;
10898 pl_yylval.ival = op_type;
10904 SvREFCNT_dec(tmpstr);
10905 CopLINE_set(PL_curcop, origline);
10906 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10910 /* scan_inputsymbol
10911 takes: position of first '<' in input buffer
10912 returns: position of first char following the matching '>' in
10914 side-effects: pl_yylval and lex_op are set.
10919 <<>> read from ARGV without magic open
10920 <FH> read from filehandle
10921 <pkg::FH> read from package qualified filehandle
10922 <pkg'FH> read from package qualified filehandle
10923 <$fh> read from filehandle in $fh
10924 <*.h> filename glob
10929 S_scan_inputsymbol(pTHX_ char *start)
10931 char *s = start; /* current position in buffer */
10934 bool nomagicopen = FALSE;
10935 char *d = PL_tokenbuf; /* start of temp holding space */
10936 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10938 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10940 end = (char *) memchr(s, '\n', PL_bufend - s);
10943 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10944 nomagicopen = TRUE;
10950 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10952 /* die if we didn't have space for the contents of the <>,
10953 or if it didn't end, or if we see a newline
10956 if (len >= (I32)sizeof PL_tokenbuf)
10957 Perl_croak(aTHX_ "Excessively long <> operator");
10959 Perl_croak(aTHX_ "Unterminated <> operator");
10964 Remember, only scalar variables are interpreted as filehandles by
10965 this code. Anything more complex (e.g., <$fh{$num}>) will be
10966 treated as a glob() call.
10967 This code makes use of the fact that except for the $ at the front,
10968 a scalar variable and a filehandle look the same.
10970 if (*d == '$' && d[1]) d++;
10972 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10973 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10974 d += UTF ? UTF8SKIP(d) : 1;
10977 /* If we've tried to read what we allow filehandles to look like, and
10978 there's still text left, then it must be a glob() and not a getline.
10979 Use scan_str to pull out the stuff between the <> and treat it
10980 as nothing more than a string.
10983 if (d - PL_tokenbuf != len) {
10984 pl_yylval.ival = OP_GLOB;
10985 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10987 Perl_croak(aTHX_ "Glob not terminated");
10991 bool readline_overriden = FALSE;
10993 /* we're in a filehandle read situation */
10996 /* turn <> into <ARGV> */
10998 Copy("ARGV",d,5,char);
11000 /* Check whether readline() is overriden */
11001 if ((gv_readline = gv_override("readline",8)))
11002 readline_overriden = TRUE;
11004 /* if <$fh>, create the ops to turn the variable into a
11008 /* try to find it in the pad for this block, otherwise find
11009 add symbol table ops
11011 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11012 if (tmp != NOT_IN_PAD) {
11013 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11014 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11015 HEK * const stashname = HvNAME_HEK(stash);
11016 SV * const sym = sv_2mortal(newSVhek(stashname));
11017 sv_catpvs(sym, "::");
11018 sv_catpv(sym, d+1);
11023 OP * const o = newOP(OP_PADSV, 0);
11025 PL_lex_op = readline_overriden
11026 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11027 op_append_elem(OP_LIST, o,
11028 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11029 : newUNOP(OP_READLINE, 0, o);
11037 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11039 PL_lex_op = readline_overriden
11040 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11041 op_append_elem(OP_LIST,
11042 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11043 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11044 : newUNOP(OP_READLINE, 0,
11045 newUNOP(OP_RV2SV, 0,
11046 newGVOP(OP_GV, 0, gv)));
11048 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11049 pl_yylval.ival = OP_NULL;
11052 /* If it's none of the above, it must be a literal filehandle
11053 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11055 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11056 PL_lex_op = readline_overriden
11057 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11058 op_append_elem(OP_LIST,
11059 newGVOP(OP_GV, 0, gv),
11060 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11061 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11062 pl_yylval.ival = OP_NULL;
11072 start position in buffer
11073 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
11074 only if they are of the open/close form
11075 keep_delims preserve the delimiters around the string
11076 re_reparse compiling a run-time /(?{})/:
11077 collapse // to /, and skip encoding src
11078 delimp if non-null, this is set to the position of
11079 the closing delimiter, or just after it if
11080 the closing and opening delimiters differ
11081 (i.e., the opening delimiter of a substitu-
11083 returns: position to continue reading from buffer
11084 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11085 updates the read buffer.
11087 This subroutine pulls a string out of the input. It is called for:
11088 q single quotes q(literal text)
11089 ' single quotes 'literal text'
11090 qq double quotes qq(interpolate $here please)
11091 " double quotes "interpolate $here please"
11092 qx backticks qx(/bin/ls -l)
11093 ` backticks `/bin/ls -l`
11094 qw quote words @EXPORT_OK = qw( func() $spam )
11095 m// regexp match m/this/
11096 s/// regexp substitute s/this/that/
11097 tr/// string transliterate tr/this/that/
11098 y/// string transliterate y/this/that/
11099 ($*@) sub prototypes sub foo ($)
11100 (stuff) sub attr parameters sub foo : attr(stuff)
11101 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11103 In most of these cases (all but <>, patterns and transliterate)
11104 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11105 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11106 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11109 It skips whitespace before the string starts, and treats the first
11110 character as the delimiter. If the delimiter is one of ([{< then
11111 the corresponding "close" character )]}> is used as the closing
11112 delimiter. It allows quoting of delimiters, and if the string has
11113 balanced delimiters ([{<>}]) it allows nesting.
11115 On success, the SV with the resulting string is put into lex_stuff or,
11116 if that is already non-NULL, into lex_repl. The second case occurs only
11117 when parsing the RHS of the special constructs s/// and tr/// (y///).
11118 For convenience, the terminating delimiter character is stuffed into
11123 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11127 SV *sv; /* scalar value: string */
11128 const char *tmps; /* temp string, used for delimiter matching */
11129 char *s = start; /* current position in the buffer */
11130 char term; /* terminating character */
11131 char *to; /* current position in the sv's data */
11132 I32 brackets = 1; /* bracket nesting level */
11133 bool d_is_utf8 = FALSE; /* is there any utf8 content? */
11134 IV termcode; /* terminating char. code */
11135 U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
11136 STRLEN termlen; /* length of terminating string */
11139 /* The delimiters that have a mirror-image closing one */
11140 const char * opening_delims = "([{<";
11141 const char * closing_delims = ")]}>";
11143 /* The only non-UTF character that isn't a stand alone grapheme is
11144 * white-space, hence can't be a delimiter. */
11145 const char * non_grapheme_msg = "Use of unassigned code point or"
11146 " non-standalone grapheme for a delimiter"
11148 PERL_ARGS_ASSERT_SCAN_STR;
11150 /* skip space before the delimiter */
11155 /* mark where we are, in case we need to report errors */
11158 /* after skipping whitespace, the next character is the terminator */
11160 if (!UTF || UTF8_IS_INVARIANT(term)) {
11161 termcode = termstr[0] = term;
11165 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
11166 if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
11171 yyerror(non_grapheme_msg);
11174 Copy(s, termstr, termlen, U8);
11177 /* mark where we are */
11178 PL_multi_start = CopLINE(PL_curcop);
11179 PL_multi_open = termcode;
11180 herelines = PL_parser->herelines;
11182 /* If the delimiter has a mirror-image closing one, get it */
11183 if (term && (tmps = strchr(opening_delims, term))) {
11184 termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
11187 PL_multi_close = termcode;
11189 if (PL_multi_open == PL_multi_close) {
11190 keep_bracketed_quoted = FALSE;
11193 /* create a new SV to hold the contents. 79 is the SV's initial length.
11194 What a random number. */
11195 sv = newSV_type(SVt_PVIV);
11197 SvIV_set(sv, termcode);
11198 (void)SvPOK_only(sv); /* validate pointer */
11200 /* move past delimiter and try to read a complete string */
11202 sv_catpvn(sv, s, termlen);
11205 /* extend sv if need be */
11206 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11207 /* set 'to' to the next character in the sv's string */
11208 to = SvPVX(sv)+SvCUR(sv);
11210 /* if open delimiter is the close delimiter read unbridle */
11211 if (PL_multi_open == PL_multi_close) {
11212 for (; s < PL_bufend; s++,to++) {
11213 /* embedded newlines increment the current line number */
11214 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11215 COPLINE_INC_WITH_HERELINES;
11216 /* handle quoted delimiters */
11217 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11218 if (!keep_bracketed_quoted
11220 || (re_reparse && s[1] == '\\'))
11223 else /* any other quotes are simply copied straight through */
11226 /* terminate when run out of buffer (the for() condition), or
11227 have found the terminator */
11228 else if (*s == term) { /* First byte of terminator matches */
11229 if (termlen == 1) /* If is the only byte, are done */
11232 /* If the remainder of the terminator matches, also are
11233 * done, after checking that is a separate grapheme */
11234 if ( s + termlen <= PL_bufend
11235 && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
11238 && UNLIKELY(! is_grapheme((U8 *) start,
11243 yyerror(non_grapheme_msg);
11248 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
11256 /* if the terminator isn't the same as the start character (e.g.,
11257 matched brackets), we have to allow more in the quoting, and
11258 be prepared for nested brackets.
11261 /* read until we run out of string, or we find the terminator */
11262 for (; s < PL_bufend; s++,to++) {
11263 /* embedded newlines increment the line count */
11264 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11265 COPLINE_INC_WITH_HERELINES;
11266 /* backslashes can escape the open or closing characters */
11267 if (*s == '\\' && s+1 < PL_bufend) {
11268 if (!keep_bracketed_quoted
11269 && ( ((UV)s[1] == PL_multi_open)
11270 || ((UV)s[1] == PL_multi_close) ))
11277 /* allow nested opens and closes */
11278 else if ((UV)*s == PL_multi_close && --brackets <= 0)
11280 else if ((UV)*s == PL_multi_open)
11282 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11287 /* terminate the copied string and update the sv's end-of-string */
11289 SvCUR_set(sv, to - SvPVX_const(sv));
11292 * this next chunk reads more into the buffer if we're not done yet
11296 break; /* handle case where we are done yet :-) */
11298 #ifndef PERL_STRICT_CR
11299 if (to - SvPVX_const(sv) >= 2) {
11300 if ( (to[-2] == '\r' && to[-1] == '\n')
11301 || (to[-2] == '\n' && to[-1] == '\r'))
11305 SvCUR_set(sv, to - SvPVX_const(sv));
11307 else if (to[-1] == '\r')
11310 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11314 /* if we're out of file, or a read fails, bail and reset the current
11315 line marker so we can report where the unterminated string began
11317 COPLINE_INC_WITH_HERELINES;
11318 PL_bufptr = PL_bufend;
11319 if (!lex_next_chunk(0)) {
11321 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11324 s = start = PL_bufptr;
11327 /* at this point, we have successfully read the delimited string */
11330 sv_catpvn(sv, s, termlen);
11336 PL_multi_end = CopLINE(PL_curcop);
11337 CopLINE_set(PL_curcop, PL_multi_start);
11338 PL_parser->herelines = herelines;
11340 /* if we allocated too much space, give some back */
11341 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11342 SvLEN_set(sv, SvCUR(sv) + 1);
11343 SvPV_shrink_to_cur(sv);
11346 /* decide whether this is the first or second quoted string we've read
11351 PL_parser->lex_sub_repl = sv;
11354 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
11360 takes: pointer to position in buffer
11361 returns: pointer to new position in buffer
11362 side-effects: builds ops for the constant in pl_yylval.op
11364 Read a number in any of the formats that Perl accepts:
11366 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11367 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11368 0b[01](_?[01])* binary integers
11369 0[0-7](_?[0-7])* octal integers
11370 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
11371 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
11373 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11376 If it reads a number without a decimal point or an exponent, it will
11377 try converting the number to an integer and see if it can do so
11378 without loss of precision.
11382 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11384 const char *s = start; /* current position in buffer */
11385 char *d; /* destination in temp buffer */
11386 char *e; /* end of temp buffer */
11387 NV nv; /* number read, as a double */
11388 SV *sv = NULL; /* place to put the converted number */
11389 bool floatit; /* boolean: int or float? */
11390 const char *lastub = NULL; /* position of last underbar */
11391 static const char* const number_too_long = "Number too long";
11392 bool warned_about_underscore = 0;
11393 I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11394 #define WARN_ABOUT_UNDERSCORE() \
11396 if (!warned_about_underscore) { \
11397 warned_about_underscore = 1; \
11398 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11399 "Misplaced _ in number"); \
11402 /* Hexadecimal floating point.
11404 * In many places (where we have quads and NV is IEEE 754 double)
11405 * we can fit the mantissa bits of a NV into an unsigned quad.
11406 * (Note that UVs might not be quads even when we have quads.)
11407 * This will not work everywhere, though (either no quads, or
11408 * using long doubles), in which case we have to resort to NV,
11409 * which will probably mean horrible loss of precision due to
11410 * multiple fp operations. */
11411 bool hexfp = FALSE;
11412 int total_bits = 0;
11413 int significant_bits = 0;
11414 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11415 # define HEXFP_UQUAD
11416 Uquad_t hexfp_uquad = 0;
11417 int hexfp_frac_bits = 0;
11422 NV hexfp_mult = 1.0;
11423 UV high_non_zero = 0; /* highest digit */
11424 int non_zero_integer_digits = 0;
11426 PERL_ARGS_ASSERT_SCAN_NUM;
11428 /* We use the first character to decide what type of number this is */
11432 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11434 /* if it starts with a 0, it could be an octal number, a decimal in
11435 0.13 disguise, or a hexadecimal number, or a binary number. */
11439 u holds the "number so far"
11440 overflowed was the number more than we can hold?
11442 Shift is used when we add a digit. It also serves as an "are
11443 we in octal/hex/binary?" indicator to disallow hex characters
11444 when in octal mode.
11448 bool overflowed = FALSE;
11449 bool just_zero = TRUE; /* just plain 0 or binary number? */
11450 bool has_digs = FALSE;
11451 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11452 static const char* const bases[5] =
11453 { "", "binary", "", "octal", "hexadecimal" };
11454 static const char* const Bases[5] =
11455 { "", "Binary", "", "Octal", "Hexadecimal" };
11456 static const char* const maxima[5] =
11458 "0b11111111111111111111111111111111",
11462 const char *base, *Base, *max;
11464 /* check for hex */
11465 if (isALPHA_FOLD_EQ(s[1], 'x')) {
11469 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11474 /* check for a decimal in disguise */
11475 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11477 /* so it must be octal */
11484 WARN_ABOUT_UNDERSCORE();
11488 base = bases[shift];
11489 Base = Bases[shift];
11490 max = maxima[shift];
11492 /* read the rest of the number */
11494 /* x is used in the overflow test,
11495 b is the digit we're adding on. */
11500 /* if we don't mention it, we're done */
11504 /* _ are ignored -- but warned about if consecutive */
11506 if (lastub && s == lastub + 1)
11507 WARN_ABOUT_UNDERSCORE();
11511 /* 8 and 9 are not octal */
11512 case '8': case '9':
11514 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11518 case '2': case '3': case '4':
11519 case '5': case '6': case '7':
11521 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11524 case '0': case '1':
11525 b = *s++ & 15; /* ASCII digit -> value of digit */
11529 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11530 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11531 /* make sure they said 0x */
11534 b = (*s++ & 7) + 9;
11536 /* Prepare to put the digit we have onto the end
11537 of the number so far. We check for overflows.
11544 assert(shift >= 0);
11545 x = u << shift; /* make room for the digit */
11547 total_bits += shift;
11549 if ((x >> shift) != u
11550 && !(PL_hints & HINT_NEW_BINARY)) {
11553 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11554 "Integer overflow in %s number",
11557 u = x | b; /* add the digit to the end */
11560 n *= nvshift[shift];
11561 /* If an NV has not enough bits in its
11562 * mantissa to represent an UV this summing of
11563 * small low-order numbers is a waste of time
11564 * (because the NV cannot preserve the
11565 * low-order bits anyway): we could just
11566 * remember when did we overflow and in the
11567 * end just multiply n by the right
11572 if (high_non_zero == 0 && b > 0)
11576 non_zero_integer_digits++;
11578 /* this could be hexfp, but peek ahead
11579 * to avoid matching ".." */
11580 if (UNLIKELY(HEXFP_PEEK(s))) {
11588 /* if we get here, we had success: make a scalar value from
11593 /* final misplaced underbar check */
11595 WARN_ABOUT_UNDERSCORE();
11597 if (UNLIKELY(HEXFP_PEEK(s))) {
11598 /* Do sloppy (on the underbars) but quick detection
11599 * (and value construction) for hexfp, the decimal
11600 * detection will shortly be more thorough with the
11601 * underbar checks. */
11603 significant_bits = non_zero_integer_digits * shift;
11606 #else /* HEXFP_NV */
11609 /* Ignore the leading zero bits of
11610 * the high (first) non-zero digit. */
11611 if (high_non_zero) {
11612 if (high_non_zero < 0x8)
11613 significant_bits--;
11614 if (high_non_zero < 0x4)
11615 significant_bits--;
11616 if (high_non_zero < 0x2)
11617 significant_bits--;
11624 bool accumulate = TRUE;
11626 int lim = 1 << shift;
11627 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11629 if (isXDIGIT(*h)) {
11630 significant_bits += shift;
11633 if (significant_bits < NV_MANT_DIG) {
11634 /* We are in the long "run" of xdigits,
11635 * accumulate the full four bits. */
11636 assert(shift >= 0);
11637 hexfp_uquad <<= shift;
11639 hexfp_frac_bits += shift;
11640 } else if (significant_bits - shift < NV_MANT_DIG) {
11641 /* We are at a hexdigit either at,
11642 * or straddling, the edge of mantissa.
11643 * We will try grabbing as many as
11644 * possible bits. */
11646 significant_bits - NV_MANT_DIG;
11650 hexfp_uquad <<= tail;
11651 assert((shift - tail) >= 0);
11652 hexfp_uquad |= b >> (shift - tail);
11653 hexfp_frac_bits += tail;
11655 /* Ignore the trailing zero bits
11656 * of the last non-zero xdigit.
11658 * The assumption here is that if
11659 * one has input of e.g. the xdigit
11660 * eight (0x8), there is only one
11661 * bit being input, not the full
11662 * four bits. Conversely, if one
11663 * specifies a zero xdigit, the
11664 * assumption is that one really
11665 * wants all those bits to be zero. */
11667 if ((b & 0x1) == 0x0) {
11668 significant_bits--;
11669 if ((b & 0x2) == 0x0) {
11670 significant_bits--;
11671 if ((b & 0x4) == 0x0) {
11672 significant_bits--;
11678 accumulate = FALSE;
11681 /* Keep skipping the xdigits, and
11682 * accumulating the significant bits,
11683 * but do not shift the uquad
11684 * (which would catastrophically drop
11685 * high-order bits) or accumulate the
11686 * xdigits anymore. */
11688 #else /* HEXFP_NV */
11690 nv_mult /= nvshift[shift];
11692 hexfp_nv += b * nv_mult;
11694 accumulate = FALSE;
11698 if (significant_bits >= NV_MANT_DIG)
11699 accumulate = FALSE;
11703 if ((total_bits > 0 || significant_bits > 0) &&
11704 isALPHA_FOLD_EQ(*h, 'p')) {
11705 bool negexp = FALSE;
11709 else if (*h == '-') {
11715 while (isDIGIT(*h) || *h == '_') {
11718 hexfp_exp += *h - '0';
11721 && -hexfp_exp < NV_MIN_EXP - 1) {
11722 /* NOTE: this means that the exponent
11723 * underflow warning happens for
11724 * the IEEE 754 subnormals (denormals),
11725 * because DBL_MIN_EXP etc are the lowest
11726 * possible binary (or, rather, DBL_RADIX-base)
11727 * exponent for normals, not subnormals.
11729 * This may or may not be a good thing. */
11730 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11731 "Hexadecimal float: exponent underflow");
11737 && hexfp_exp > NV_MAX_EXP - 1) {
11738 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11739 "Hexadecimal float: exponent overflow");
11747 hexfp_exp = -hexfp_exp;
11749 hexfp_exp -= hexfp_frac_bits;
11751 hexfp_mult = Perl_pow(2.0, hexfp_exp);
11758 if (shift != 3 && !has_digs) {
11759 /* 0x or 0b with no digits, treat it as an error.
11760 Originally this backed up the parse before the b or
11761 x, but that has the potential for silent changes in
11762 behaviour, like for: "0x.3" and "0x+$foo".
11765 char *oldbp = PL_bufptr;
11766 if (*d) ++d; /* so the user sees the bad non-digit */
11767 PL_bufptr = (char *)d; /* so yyerror reports the context */
11768 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
11769 shift == 4 ? "hexadecimal" : "binary"));
11774 if (n > 4294967295.0)
11775 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11776 "%s number > %s non-portable",
11782 if (u > 0xffffffff)
11783 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11784 "%s number > %s non-portable",
11789 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11790 sv = new_constant(start, s - start, "integer",
11791 sv, NULL, NULL, 0, NULL);
11792 else if (PL_hints & HINT_NEW_BINARY)
11793 sv = new_constant(start, s - start, "binary",
11794 sv, NULL, NULL, 0, NULL);
11799 handle decimal numbers.
11800 we're also sent here when we read a 0 as the first digit
11802 case '1': case '2': case '3': case '4': case '5':
11803 case '6': case '7': case '8': case '9': case '.':
11806 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11824 NOT_REACHED; /* NOTREACHED */
11828 /* read next group of digits and _ and copy into d */
11831 || UNLIKELY(hexfp && isXDIGIT(*s)))
11833 /* skip underscores, checking for misplaced ones
11837 if (lastub && s == lastub + 1)
11838 WARN_ABOUT_UNDERSCORE();
11842 /* check for end of fixed-length buffer */
11844 Perl_croak(aTHX_ "%s", number_too_long);
11845 /* if we're ok, copy the character */
11850 /* final misplaced underbar check */
11851 if (lastub && s == lastub + 1)
11852 WARN_ABOUT_UNDERSCORE();
11854 /* read a decimal portion if there is one. avoid
11855 3..5 being interpreted as the number 3. followed
11858 if (*s == '.' && s[1] != '.') {
11863 WARN_ABOUT_UNDERSCORE();
11867 /* copy, ignoring underbars, until we run out of digits.
11871 || UNLIKELY(hexfp && isXDIGIT(*s));
11874 /* fixed length buffer check */
11876 Perl_croak(aTHX_ "%s", number_too_long);
11878 if (lastub && s == lastub + 1)
11879 WARN_ABOUT_UNDERSCORE();
11885 /* fractional part ending in underbar? */
11887 WARN_ABOUT_UNDERSCORE();
11888 if (*s == '.' && isDIGIT(s[1])) {
11889 /* oops, it's really a v-string, but without the "v" */
11895 /* read exponent part, if present */
11896 if ((isALPHA_FOLD_EQ(*s, 'e')
11897 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11898 && memCHRs("+-0123456789_", s[1]))
11900 int exp_digits = 0;
11901 const char *save_s = s;
11904 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11905 ditto for p (hexfloats) */
11906 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11907 /* At least some Mach atof()s don't grok 'E' */
11910 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11917 /* stray preinitial _ */
11919 WARN_ABOUT_UNDERSCORE();
11923 /* allow positive or negative exponent */
11924 if (*s == '+' || *s == '-')
11927 /* stray initial _ */
11929 WARN_ABOUT_UNDERSCORE();
11933 /* read digits of exponent */
11934 while (isDIGIT(*s) || *s == '_') {
11938 Perl_croak(aTHX_ "%s", number_too_long);
11942 if (((lastub && s == lastub + 1)
11943 || (!isDIGIT(s[1]) && s[1] != '_')))
11944 WARN_ABOUT_UNDERSCORE();
11950 /* no exponent digits, the [eEpP] could be for something else,
11951 * though in practice we don't get here for p since that's preparsed
11952 * earlier, and results in only the 0xX being consumed, so behave similarly
11953 * for decimal floats and consume only the D.DD, leaving the [eE] to the
11966 We try to do an integer conversion first if no characters
11967 indicating "float" have been found.
11972 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11974 if (flags == IS_NUMBER_IN_UV) {
11976 sv = newSViv(uv); /* Prefer IVs over UVs. */
11979 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11980 if (uv <= (UV) IV_MIN)
11981 sv = newSViv(-(IV)uv);
11988 /* terminate the string */
11990 if (UNLIKELY(hexfp)) {
11991 # ifdef NV_MANT_DIG
11992 if (significant_bits > NV_MANT_DIG)
11993 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11994 "Hexadecimal float: mantissa overflow");
11997 nv = hexfp_uquad * hexfp_mult;
11998 #else /* HEXFP_NV */
11999 nv = hexfp_nv * hexfp_mult;
12002 nv = Atof(PL_tokenbuf);
12008 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12009 const char *const key = floatit ? "float" : "integer";
12010 const STRLEN keylen = floatit ? 5 : 7;
12011 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12012 key, keylen, sv, NULL, NULL, 0, NULL);
12016 /* if it starts with a v, it could be a v-string */
12019 sv = newSV(5); /* preallocate storage space */
12020 ENTER_with_name("scan_vstring");
12022 s = scan_vstring(s, PL_bufend, sv);
12023 SvREFCNT_inc_simple_void_NN(sv);
12024 LEAVE_with_name("scan_vstring");
12028 /* make the op for the constant and return */
12031 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12033 lvalp->opval = NULL;
12039 S_scan_formline(pTHX_ char *s)
12041 SV * const stuff = newSVpvs("");
12042 bool needargs = FALSE;
12043 bool eofmt = FALSE;
12045 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12047 while (!needargs) {
12051 #ifdef PERL_STRICT_CR
12052 while (SPACE_OR_TAB(*t))
12055 while (SPACE_OR_TAB(*t) || *t == '\r')
12058 if (*t == '\n' || t == PL_bufend) {
12063 eol = (char *) memchr(s,'\n',PL_bufend-s);
12068 for (t = s; t < eol; t++) {
12069 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12071 goto enough; /* ~~ must be first line in formline */
12073 if (*t == '@' || *t == '^')
12077 sv_catpvn(stuff, s, eol-s);
12078 #ifndef PERL_STRICT_CR
12079 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12080 char *end = SvPVX(stuff) + SvCUR(stuff);
12083 SvCUR_set(stuff, SvCUR(stuff) - 1);
12091 if ((PL_rsfp || PL_parser->filtered)
12092 && PL_parser->form_lex_state == LEX_NORMAL) {
12094 PL_bufptr = PL_bufend;
12095 COPLINE_INC_WITH_HERELINES;
12096 got_some = lex_next_chunk(0);
12097 CopLINE_dec(PL_curcop);
12102 incline(s, PL_bufend);
12105 if (!SvCUR(stuff) || needargs)
12106 PL_lex_state = PL_parser->form_lex_state;
12107 if (SvCUR(stuff)) {
12108 PL_expect = XSTATE;
12110 const char *s2 = s;
12111 while (isSPACE(*s2) && *s2 != '\n')
12114 PL_expect = XTERMBLOCK;
12115 NEXTVAL_NEXTTOKE.ival = 0;
12118 NEXTVAL_NEXTTOKE.ival = 0;
12119 force_next(FORMLBRACK);
12122 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12125 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12129 SvREFCNT_dec(stuff);
12131 PL_lex_formbrack = 0;
12137 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12139 const I32 oldsavestack_ix = PL_savestack_ix;
12140 CV* const outsidecv = PL_compcv;
12142 SAVEI32(PL_subline);
12143 save_item(PL_subname);
12144 SAVESPTR(PL_compcv);
12146 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12147 CvFLAGS(PL_compcv) |= flags;
12149 PL_subline = CopLINE(PL_curcop);
12150 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12151 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12152 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12153 if (outsidecv && CvPADLIST(outsidecv))
12154 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12156 return oldsavestack_ix;
12160 /* Do extra initialisation of a CV (typically one just created by
12161 * start_subparse()) if that CV is for a named sub
12165 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12167 PERL_ARGS_ASSERT_INIT_NAMED_CV;
12169 if (nameop->op_type == OP_CONST) {
12170 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12171 if ( strEQ(name, "BEGIN")
12172 || strEQ(name, "END")
12173 || strEQ(name, "INIT")
12174 || strEQ(name, "CHECK")
12175 || strEQ(name, "UNITCHECK")
12180 /* State subs inside anonymous subs need to be
12181 clonable themselves. */
12182 if ( CvANON(CvOUTSIDE(cv))
12183 || CvCLONE(CvOUTSIDE(cv))
12184 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12186 ))[nameop->op_targ])
12193 S_yywarn(pTHX_ const char *const s, U32 flags)
12195 PERL_ARGS_ASSERT_YYWARN;
12197 PL_in_eval |= EVAL_WARNONLY;
12198 yyerror_pv(s, flags);
12203 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12205 PERL_ARGS_ASSERT_ABORT_EXECUTION;
12208 Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12211 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12213 NOT_REACHED; /* NOTREACHED */
12219 /* Called, after at least one error has been found, to abort the parse now,
12220 * instead of trying to forge ahead */
12222 yyerror_pvn(NULL, 0, 0);
12226 Perl_yyerror(pTHX_ const char *const s)
12228 PERL_ARGS_ASSERT_YYERROR;
12229 return yyerror_pvn(s, strlen(s), 0);
12233 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12235 PERL_ARGS_ASSERT_YYERROR_PV;
12236 return yyerror_pvn(s, strlen(s), flags);
12240 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12242 const char *context = NULL;
12245 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12246 int yychar = PL_parser->yychar;
12248 /* Output error message 's' with length 'len'. 'flags' are SV flags that
12249 * apply. If the number of errors found is large enough, it abandons
12250 * parsing. If 's' is NULL, there is no message, and it abandons
12251 * processing unconditionally */
12254 if (!yychar || (yychar == ';' && !PL_rsfp))
12255 sv_catpvs(where_sv, "at EOF");
12256 else if ( PL_oldoldbufptr
12257 && PL_bufptr > PL_oldoldbufptr
12258 && PL_bufptr - PL_oldoldbufptr < 200
12259 && PL_oldoldbufptr != PL_oldbufptr
12260 && PL_oldbufptr != PL_bufptr)
12264 The code below is removed for NetWare because it
12265 abends/crashes on NetWare when the script has error such as
12266 not having the closing quotes like:
12267 if ($var eq "value)
12268 Checking of white spaces is anyway done in NetWare code.
12271 while (isSPACE(*PL_oldoldbufptr))
12274 context = PL_oldoldbufptr;
12275 contlen = PL_bufptr - PL_oldoldbufptr;
12277 else if ( PL_oldbufptr
12278 && PL_bufptr > PL_oldbufptr
12279 && PL_bufptr - PL_oldbufptr < 200
12280 && PL_oldbufptr != PL_bufptr) {
12283 The code below is removed for NetWare because it
12284 abends/crashes on NetWare when the script has error such as
12285 not having the closing quotes like:
12286 if ($var eq "value)
12287 Checking of white spaces is anyway done in NetWare code.
12290 while (isSPACE(*PL_oldbufptr))
12293 context = PL_oldbufptr;
12294 contlen = PL_bufptr - PL_oldbufptr;
12296 else if (yychar > 255)
12297 sv_catpvs(where_sv, "next token ???");
12298 else if (yychar == YYEMPTY) {
12299 if (PL_lex_state == LEX_NORMAL)
12300 sv_catpvs(where_sv, "at end of line");
12301 else if (PL_lex_inpat)
12302 sv_catpvs(where_sv, "within pattern");
12304 sv_catpvs(where_sv, "within string");
12307 sv_catpvs(where_sv, "next char ");
12309 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12310 else if (isPRINT_LC(yychar)) {
12311 const char string = yychar;
12312 sv_catpvn(where_sv, &string, 1);
12315 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12317 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12318 Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12319 OutCopFILE(PL_curcop),
12320 (IV)(PL_parser->preambling == NOLINE
12321 ? CopLINE(PL_curcop)
12322 : PL_parser->preambling));
12324 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12325 UTF8fARG(UTF, contlen, context));
12327 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12328 if ( PL_multi_start < PL_multi_end
12329 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12331 Perl_sv_catpvf(aTHX_ msg,
12332 " (Might be a runaway multi-line %c%c string starting on"
12333 " line %" IVdf ")\n",
12334 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12337 if (PL_in_eval & EVAL_WARNONLY) {
12338 PL_in_eval &= ~EVAL_WARNONLY;
12339 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12345 if (s == NULL || PL_error_count >= 10) {
12346 const char * msg = "";
12347 const char * const name = OutCopFILE(PL_curcop);
12350 SV * errsv = ERRSV;
12351 if (SvCUR(errsv)) {
12352 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12357 abort_execution(msg, name);
12360 Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12364 PL_in_my_stash = NULL;
12369 S_swallow_bom(pTHX_ U8 *s)
12371 const STRLEN slen = SvCUR(PL_linestr);
12373 PERL_ARGS_ASSERT_SWALLOW_BOM;
12377 if (s[1] == 0xFE) {
12378 /* UTF-16 little-endian? (or UTF-32LE?) */
12379 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12380 /* diag_listed_as: Unsupported script encoding %s */
12381 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12382 #ifndef PERL_NO_UTF16_FILTER
12384 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12387 if (PL_bufend > (char*)s) {
12388 s = add_utf16_textfilter(s, TRUE);
12391 /* diag_listed_as: Unsupported script encoding %s */
12392 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12397 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12398 #ifndef PERL_NO_UTF16_FILTER
12400 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12403 if (PL_bufend > (char *)s) {
12404 s = add_utf16_textfilter(s, FALSE);
12407 /* diag_listed_as: Unsupported script encoding %s */
12408 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12412 case BOM_UTF8_FIRST_BYTE: {
12413 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12415 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12417 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
12424 if (s[2] == 0xFE && s[3] == 0xFF) {
12425 /* UTF-32 big-endian */
12426 /* diag_listed_as: Unsupported script encoding %s */
12427 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12430 else if (s[2] == 0 && s[3] != 0) {
12433 * are a good indicator of UTF-16BE. */
12434 #ifndef PERL_NO_UTF16_FILTER
12436 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12438 s = add_utf16_textfilter(s, FALSE);
12440 /* diag_listed_as: Unsupported script encoding %s */
12441 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12448 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12451 * are a good indicator of UTF-16LE. */
12452 #ifndef PERL_NO_UTF16_FILTER
12454 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12456 s = add_utf16_textfilter(s, TRUE);
12458 /* diag_listed_as: Unsupported script encoding %s */
12459 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12467 #ifndef PERL_NO_UTF16_FILTER
12469 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12471 SV *const filter = FILTER_DATA(idx);
12472 /* We re-use this each time round, throwing the contents away before we
12474 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12475 SV *const utf8_buffer = filter;
12476 IV status = IoPAGE(filter);
12477 const bool reverse = cBOOL(IoLINES(filter));
12480 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12482 /* As we're automatically added, at the lowest level, and hence only called
12483 from this file, we can be sure that we're not called in block mode. Hence
12484 don't bother writing code to deal with block mode. */
12486 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12489 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12491 DEBUG_P(PerlIO_printf(Perl_debug_log,
12492 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12493 FPTR2DPTR(void *, S_utf16_textfilter),
12494 reverse ? 'l' : 'b', idx, maxlen, status,
12495 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12502 /* First, look in our buffer of existing UTF-8 data: */
12503 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12507 } else if (status == 0) {
12509 IoPAGE(filter) = 0;
12510 nl = SvEND(utf8_buffer);
12513 STRLEN got = nl - SvPVX(utf8_buffer);
12514 /* Did we have anything to append? */
12516 sv_catpvn(sv, SvPVX(utf8_buffer), got);
12517 /* Everything else in this code works just fine if SVp_POK isn't
12518 set. This, however, needs it, and we need it to work, else
12519 we loop infinitely because the buffer is never consumed. */
12520 sv_chop(utf8_buffer, nl);
12524 /* OK, not a complete line there, so need to read some more UTF-16.
12525 Read an extra octect if the buffer currently has an odd number. */
12529 if (SvCUR(utf16_buffer) >= 2) {
12530 /* Location of the high octet of the last complete code point.
12531 Gosh, UTF-16 is a pain. All the benefits of variable length,
12532 *coupled* with all the benefits of partial reads and
12534 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12535 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12537 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12541 /* We have the first half of a surrogate. Read more. */
12542 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12545 status = FILTER_READ(idx + 1, utf16_buffer,
12546 160 + (SvCUR(utf16_buffer) & 1));
12547 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12548 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12551 IoPAGE(filter) = status;
12556 /* 'chars' isn't quite the right name, as code points above 0xFFFF
12557 * require 4 bytes per char */
12558 chars = SvCUR(utf16_buffer) >> 1;
12559 have = SvCUR(utf8_buffer);
12561 /* Assume the worst case size as noted by the functions: twice the
12562 * number of input bytes */
12563 SvGROW(utf8_buffer, have + chars * 4 + 1);
12566 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12567 (U8*)SvPVX_const(utf8_buffer) + have,
12568 chars * 2, &newlen);
12570 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12571 (U8*)SvPVX_const(utf8_buffer) + have,
12572 chars * 2, &newlen);
12574 SvCUR_set(utf8_buffer, have + newlen);
12577 /* No need to keep this SV "well-formed" with a '\0' after the end, as
12578 it's private to us, and utf16_to_utf8{,reversed} take a
12579 (pointer,length) pair, rather than a NUL-terminated string. */
12580 if(SvCUR(utf16_buffer) & 1) {
12581 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12582 SvCUR_set(utf16_buffer, 1);
12584 SvCUR_set(utf16_buffer, 0);
12587 DEBUG_P(PerlIO_printf(Perl_debug_log,
12588 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12590 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12591 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12596 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12598 SV *filter = filter_add(S_utf16_textfilter, NULL);
12600 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12602 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12604 IoLINES(filter) = reversed;
12605 IoPAGE(filter) = 1; /* Not EOF */
12607 /* Sadly, we have to return a valid pointer, come what may, so we have to
12608 ignore any error return from this. */
12609 SvCUR_set(PL_linestr, 0);
12610 if (FILTER_READ(0, PL_linestr, 0)) {
12611 SvUTF8_on(PL_linestr);
12613 SvUTF8_on(PL_linestr);
12615 PL_bufend = SvEND(PL_linestr);
12616 return (U8*)SvPVX(PL_linestr);
12621 Returns a pointer to the next character after the parsed
12622 vstring, as well as updating the passed in sv.
12624 Function must be called like
12626 sv = sv_2mortal(newSV(5));
12627 s = scan_vstring(s,e,sv);
12629 where s and e are the start and end of the string.
12630 The sv should already be large enough to store the vstring
12631 passed in, for performance reasons.
12633 This function may croak if fatal warnings are enabled in the
12634 calling scope, hence the sv_2mortal in the example (to prevent
12635 a leak). Make sure to do SvREFCNT_inc afterwards if you use
12641 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12643 const char *pos = s;
12644 const char *start = s;
12646 PERL_ARGS_ASSERT_SCAN_VSTRING;
12648 if (*pos == 'v') pos++; /* get past 'v' */
12649 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12651 if ( *pos != '.') {
12652 /* this may not be a v-string if followed by => */
12653 const char *next = pos;
12654 while (next < e && isSPACE(*next))
12656 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12657 /* return string not v-string */
12658 sv_setpvn(sv,(char *)s,pos-s);
12659 return (char *)pos;
12663 if (!isALPHA(*pos)) {
12664 U8 tmpbuf[UTF8_MAXBYTES+1];
12667 s++; /* get past 'v' */
12672 /* this is atoi() that tolerates underscores */
12675 const char *end = pos;
12677 while (--end >= s) {
12679 const UV orev = rev;
12680 rev += (*end - '0') * mult;
12683 /* diag_listed_as: Integer overflow in %s number */
12684 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12685 "Integer overflow in decimal number");
12689 /* Append native character for the rev point */
12690 tmpend = uvchr_to_utf8(tmpbuf, rev);
12691 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12692 if (!UVCHR_IS_INVARIANT(rev))
12694 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12700 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12704 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12711 Perl_keyword_plugin_standard(pTHX_
12712 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12714 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12715 PERL_UNUSED_CONTEXT;
12716 PERL_UNUSED_ARG(keyword_ptr);
12717 PERL_UNUSED_ARG(keyword_len);
12718 PERL_UNUSED_ARG(op_ptr);
12719 return KEYWORD_PLUGIN_DECLINE;
12723 =for apidoc wrap_keyword_plugin
12725 Puts a C function into the chain of keyword plugins. This is the
12726 preferred way to manipulate the L</PL_keyword_plugin> variable.
12727 C<new_plugin> is a pointer to the C function that is to be added to the
12728 keyword plugin chain, and C<old_plugin_p> points to the storage location
12729 where a pointer to the next function in the chain will be stored. The
12730 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12731 while the value previously stored there is written to C<*old_plugin_p>.
12733 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12734 to hook keyword parsing may find itself invoked more than once per
12735 process, typically in different threads. To handle that situation, this
12736 function is idempotent. The location C<*old_plugin_p> must initially
12737 (once per process) contain a null pointer. A C variable of static
12738 duration (declared at file scope, typically also marked C<static> to give
12739 it internal linkage) will be implicitly initialised appropriately, if it
12740 does not have an explicit initialiser. This function will only actually
12741 modify the plugin chain if it finds C<*old_plugin_p> to be null. This
12742 function is also thread safe on the small scale. It uses appropriate
12743 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12745 When this function is called, the function referenced by C<new_plugin>
12746 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12747 In a threading situation, C<new_plugin> may be called immediately, even
12748 before this function has returned. C<*old_plugin_p> will always be
12749 appropriately set before C<new_plugin> is called. If C<new_plugin>
12750 decides not to do anything special with the identifier that it is given
12751 (which is the usual case for most calls to a keyword plugin), it must
12752 chain the plugin function referenced by C<*old_plugin_p>.
12754 Taken all together, XS code to install a keyword plugin should typically
12755 look something like this:
12757 static Perl_keyword_plugin_t next_keyword_plugin;
12758 static OP *my_keyword_plugin(pTHX_
12759 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12761 if (memEQs(keyword_ptr, keyword_len,
12762 "my_new_keyword")) {
12765 return next_keyword_plugin(aTHX_
12766 keyword_ptr, keyword_len, op_ptr);
12770 wrap_keyword_plugin(my_keyword_plugin,
12771 &next_keyword_plugin);
12773 Direct access to L</PL_keyword_plugin> should be avoided.
12779 Perl_wrap_keyword_plugin(pTHX_
12780 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12783 PERL_UNUSED_CONTEXT;
12784 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12785 if (*old_plugin_p) return;
12786 KEYWORD_PLUGIN_MUTEX_LOCK;
12787 if (!*old_plugin_p) {
12788 *old_plugin_p = PL_keyword_plugin;
12789 PL_keyword_plugin = new_plugin;
12791 KEYWORD_PLUGIN_MUTEX_UNLOCK;
12794 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12796 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12798 SAVEI32(PL_lex_brackets);
12799 if (PL_lex_brackets > 100)
12800 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12801 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12802 SAVEI32(PL_lex_allbrackets);
12803 PL_lex_allbrackets = 0;
12804 SAVEI8(PL_lex_fakeeof);
12805 PL_lex_fakeeof = (U8)fakeeof;
12806 if(yyparse(gramtype) && !PL_parser->error_count)
12807 qerror(Perl_mess(aTHX_ "Parse error"));
12810 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12812 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12816 SAVEVPTR(PL_eval_root);
12817 PL_eval_root = NULL;
12818 parse_recdescent(gramtype, fakeeof);
12824 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12826 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12829 if (flags & ~PARSE_OPTIONAL)
12830 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12831 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12832 if (!exprop && !(flags & PARSE_OPTIONAL)) {
12833 if (!PL_parser->error_count)
12834 qerror(Perl_mess(aTHX_ "Parse error"));
12835 exprop = newOP(OP_NULL, 0);
12841 =for apidoc parse_arithexpr
12843 Parse a Perl arithmetic expression. This may contain operators of precedence
12844 down to the bit shift operators. The expression must be followed (and thus
12845 terminated) either by a comparison or lower-precedence operator or by
12846 something that would normally terminate an expression such as semicolon.
12847 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12848 otherwise it is mandatory. It is up to the caller to ensure that the
12849 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12850 the source of the code to be parsed and the lexical context for the
12853 The op tree representing the expression is returned. If an optional
12854 expression is absent, a null pointer is returned, otherwise the pointer
12857 If an error occurs in parsing or compilation, in most cases a valid op
12858 tree is returned anyway. The error is reflected in the parser state,
12859 normally resulting in a single exception at the top level of parsing
12860 which covers all the compilation errors that occurred. Some compilation
12861 errors, however, will throw an exception immediately.
12863 =for apidoc Amnh||PARSE_OPTIONAL
12870 Perl_parse_arithexpr(pTHX_ U32 flags)
12872 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12876 =for apidoc parse_termexpr
12878 Parse a Perl term expression. This may contain operators of precedence
12879 down to the assignment operators. The expression must be followed (and thus
12880 terminated) either by a comma or lower-precedence operator or by
12881 something that would normally terminate an expression such as semicolon.
12882 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12883 otherwise it is mandatory. It is up to the caller to ensure that the
12884 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12885 the source of the code to be parsed and the lexical context for the
12888 The op tree representing the expression is returned. If an optional
12889 expression is absent, a null pointer is returned, otherwise the pointer
12892 If an error occurs in parsing or compilation, in most cases a valid op
12893 tree is returned anyway. The error is reflected in the parser state,
12894 normally resulting in a single exception at the top level of parsing
12895 which covers all the compilation errors that occurred. Some compilation
12896 errors, however, will throw an exception immediately.
12902 Perl_parse_termexpr(pTHX_ U32 flags)
12904 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12908 =for apidoc parse_listexpr
12910 Parse a Perl list expression. This may contain operators of precedence
12911 down to the comma operator. The expression must be followed (and thus
12912 terminated) either by a low-precedence logic operator such as C<or> or by
12913 something that would normally terminate an expression such as semicolon.
12914 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12915 otherwise it is mandatory. It is up to the caller to ensure that the
12916 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12917 the source of the code to be parsed and the lexical context for the
12920 The op tree representing the expression is returned. If an optional
12921 expression is absent, a null pointer is returned, otherwise the pointer
12924 If an error occurs in parsing or compilation, in most cases a valid op
12925 tree is returned anyway. The error is reflected in the parser state,
12926 normally resulting in a single exception at the top level of parsing
12927 which covers all the compilation errors that occurred. Some compilation
12928 errors, however, will throw an exception immediately.
12934 Perl_parse_listexpr(pTHX_ U32 flags)
12936 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12940 =for apidoc parse_fullexpr
12942 Parse a single complete Perl expression. This allows the full
12943 expression grammar, including the lowest-precedence operators such
12944 as C<or>. The expression must be followed (and thus terminated) by a
12945 token that an expression would normally be terminated by: end-of-file,
12946 closing bracketing punctuation, semicolon, or one of the keywords that
12947 signals a postfix expression-statement modifier. If C<flags> has the
12948 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12949 mandatory. It is up to the caller to ensure that the dynamic parser
12950 state (L</PL_parser> et al) is correctly set to reflect the source of
12951 the code to be parsed and the lexical context for the expression.
12953 The op tree representing the expression is returned. If an optional
12954 expression is absent, a null pointer is returned, otherwise the pointer
12957 If an error occurs in parsing or compilation, in most cases a valid op
12958 tree is returned anyway. The error is reflected in the parser state,
12959 normally resulting in a single exception at the top level of parsing
12960 which covers all the compilation errors that occurred. Some compilation
12961 errors, however, will throw an exception immediately.
12967 Perl_parse_fullexpr(pTHX_ U32 flags)
12969 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12973 =for apidoc parse_block
12975 Parse a single complete Perl code block. This consists of an opening
12976 brace, a sequence of statements, and a closing brace. The block
12977 constitutes a lexical scope, so C<my> variables and various compile-time
12978 effects can be contained within it. It is up to the caller to ensure
12979 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12980 reflect the source of the code to be parsed and the lexical context for
12983 The op tree representing the code block is returned. This is always a
12984 real op, never a null pointer. It will normally be a C<lineseq> list,
12985 including C<nextstate> or equivalent ops. No ops to construct any kind
12986 of runtime scope are included by virtue of it being a block.
12988 If an error occurs in parsing or compilation, in most cases a valid op
12989 tree (most likely null) is returned anyway. The error is reflected in
12990 the parser state, normally resulting in a single exception at the top
12991 level of parsing which covers all the compilation errors that occurred.
12992 Some compilation errors, however, will throw an exception immediately.
12994 The C<flags> parameter is reserved for future use, and must always
13001 Perl_parse_block(pTHX_ U32 flags)
13004 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13005 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13009 =for apidoc parse_barestmt
13011 Parse a single unadorned Perl statement. This may be a normal imperative
13012 statement or a declaration that has compile-time effect. It does not
13013 include any label or other affixture. It is up to the caller to ensure
13014 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13015 reflect the source of the code to be parsed and the lexical context for
13018 The op tree representing the statement is returned. This may be a
13019 null pointer if the statement is null, for example if it was actually
13020 a subroutine definition (which has compile-time side effects). If not
13021 null, it will be ops directly implementing the statement, suitable to
13022 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
13023 equivalent op (except for those embedded in a scope contained entirely
13024 within the statement).
13026 If an error occurs in parsing or compilation, in most cases a valid op
13027 tree (most likely null) is returned anyway. The error is reflected in
13028 the parser state, normally resulting in a single exception at the top
13029 level of parsing which covers all the compilation errors that occurred.
13030 Some compilation errors, however, will throw an exception immediately.
13032 The C<flags> parameter is reserved for future use, and must always
13039 Perl_parse_barestmt(pTHX_ U32 flags)
13042 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13043 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13047 =for apidoc parse_label
13049 Parse a single label, possibly optional, of the type that may prefix a
13050 Perl statement. It is up to the caller to ensure that the dynamic parser
13051 state (L</PL_parser> et al) is correctly set to reflect the source of
13052 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13053 label is optional, otherwise it is mandatory.
13055 The name of the label is returned in the form of a fresh scalar. If an
13056 optional label is absent, a null pointer is returned.
13058 If an error occurs in parsing, which can only occur if the label is
13059 mandatory, a valid label is returned anyway. The error is reflected in
13060 the parser state, normally resulting in a single exception at the top
13061 level of parsing which covers all the compilation errors that occurred.
13067 Perl_parse_label(pTHX_ U32 flags)
13069 if (flags & ~PARSE_OPTIONAL)
13070 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13072 PL_parser->yychar = yylex();
13073 if (PL_parser->yychar == LABEL) {
13074 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13075 PL_parser->yychar = YYEMPTY;
13076 cSVOPx(pl_yylval.opval)->op_sv = NULL;
13077 op_free(pl_yylval.opval);
13085 STRLEN wlen, bufptr_pos;
13088 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13090 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13091 if (word_takes_any_delimiter(s, wlen))
13093 bufptr_pos = s - SvPVX(PL_linestr);
13095 lex_read_space(LEX_KEEP_PREVIOUS);
13097 s = SvPVX(PL_linestr) + bufptr_pos;
13098 if (t[0] == ':' && t[1] != ':') {
13099 PL_oldoldbufptr = PL_oldbufptr;
13102 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13106 if (flags & PARSE_OPTIONAL) {
13109 qerror(Perl_mess(aTHX_ "Parse error"));
13110 return newSVpvs("x");
13117 =for apidoc parse_fullstmt
13119 Parse a single complete Perl statement. This may be a normal imperative
13120 statement or a declaration that has compile-time effect, and may include
13121 optional labels. It is up to the caller to ensure that the dynamic
13122 parser state (L</PL_parser> et al) is correctly set to reflect the source
13123 of the code to be parsed and the lexical context for the statement.
13125 The op tree representing the statement is returned. This may be a
13126 null pointer if the statement is null, for example if it was actually
13127 a subroutine definition (which has compile-time side effects). If not
13128 null, it will be the result of a L</newSTATEOP> call, normally including
13129 a C<nextstate> or equivalent op.
13131 If an error occurs in parsing or compilation, in most cases a valid op
13132 tree (most likely null) is returned anyway. The error is reflected in
13133 the parser state, normally resulting in a single exception at the top
13134 level of parsing which covers all the compilation errors that occurred.
13135 Some compilation errors, however, will throw an exception immediately.
13137 The C<flags> parameter is reserved for future use, and must always
13144 Perl_parse_fullstmt(pTHX_ U32 flags)
13147 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13148 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13152 =for apidoc parse_stmtseq
13154 Parse a sequence of zero or more Perl statements. These may be normal
13155 imperative statements, including optional labels, or declarations
13156 that have compile-time effect, or any mixture thereof. The statement
13157 sequence ends when a closing brace or end-of-file is encountered in a
13158 place where a new statement could have validly started. It is up to
13159 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13160 is correctly set to reflect the source of the code to be parsed and the
13161 lexical context for the statements.
13163 The op tree representing the statement sequence is returned. This may
13164 be a null pointer if the statements were all null, for example if there
13165 were no statements or if there were only subroutine definitions (which
13166 have compile-time side effects). If not null, it will be a C<lineseq>
13167 list, normally including C<nextstate> or equivalent ops.
13169 If an error occurs in parsing or compilation, in most cases a valid op
13170 tree is returned anyway. The error is reflected in the parser state,
13171 normally resulting in a single exception at the top level of parsing
13172 which covers all the compilation errors that occurred. Some compilation
13173 errors, however, will throw an exception immediately.
13175 The C<flags> parameter is reserved for future use, and must always
13182 Perl_parse_stmtseq(pTHX_ U32 flags)
13187 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13188 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13189 c = lex_peek_unichar(0);
13190 if (c != -1 && c != /*{*/'}')
13191 qerror(Perl_mess(aTHX_ "Parse error"));
13196 =for apidoc parse_subsignature
13198 Parse a subroutine signature declaration. This is the contents of the
13199 parentheses following a named or anonymous subroutine declaration when the
13200 C<signatures> feature is enabled. Note that this function neither expects
13201 nor consumes the opening and closing parentheses around the signature; it
13202 is the caller's job to handle these.
13204 This function must only be called during parsing of a subroutine; after
13205 L</start_subparse> has been called. It might allocate lexical variables on
13206 the pad for the current subroutine.
13208 The op tree to unpack the arguments from the stack at runtime is returned.
13209 This op tree should appear at the beginning of the compiled function. The
13210 caller may wish to use L</op_append_list> to build their function body
13211 after it, or splice it together with the body before calling L</newATTRSUB>.
13213 The C<flags> parameter is reserved for future use, and must always
13220 Perl_parse_subsignature(pTHX_ U32 flags)
13223 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13224 return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13228 * ex: set ts=8 sts=4 sw=4 et: