3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_brackstack (PL_parser->lex_brackstack)
52 #define PL_lex_casemods (PL_parser->lex_casemods)
53 #define PL_lex_casestack (PL_parser->lex_casestack)
54 #define PL_lex_defer (PL_parser->lex_defer)
55 #define PL_lex_dojoin (PL_parser->lex_dojoin)
56 #define PL_lex_expect (PL_parser->lex_expect)
57 #define PL_lex_formbrack (PL_parser->lex_formbrack)
58 #define PL_lex_inpat (PL_parser->lex_inpat)
59 #define PL_lex_inwhat (PL_parser->lex_inwhat)
60 #define PL_lex_op (PL_parser->lex_op)
61 #define PL_lex_repl (PL_parser->lex_repl)
62 #define PL_lex_starts (PL_parser->lex_starts)
63 #define PL_lex_stuff (PL_parser->lex_stuff)
64 #define PL_multi_start (PL_parser->multi_start)
65 #define PL_multi_open (PL_parser->multi_open)
66 #define PL_multi_close (PL_parser->multi_close)
67 #define PL_pending_ident (PL_parser->pending_ident)
68 #define PL_preambled (PL_parser->preambled)
69 #define PL_sublex_info (PL_parser->sublex_info)
70 #define PL_linestr (PL_parser->linestr)
71 #define PL_expect (PL_parser->expect)
72 #define PL_copline (PL_parser->copline)
73 #define PL_bufptr (PL_parser->bufptr)
74 #define PL_oldbufptr (PL_parser->oldbufptr)
75 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
76 #define PL_linestart (PL_parser->linestart)
77 #define PL_bufend (PL_parser->bufend)
78 #define PL_last_uni (PL_parser->last_uni)
79 #define PL_last_lop (PL_parser->last_lop)
80 #define PL_last_lop_op (PL_parser->last_lop_op)
81 #define PL_lex_state (PL_parser->lex_state)
82 #define PL_rsfp (PL_parser->rsfp)
83 #define PL_rsfp_filters (PL_parser->rsfp_filters)
84 #define PL_in_my (PL_parser->in_my)
85 #define PL_in_my_stash (PL_parser->in_my_stash)
86 #define PL_tokenbuf (PL_parser->tokenbuf)
87 #define PL_multi_end (PL_parser->multi_end)
88 #define PL_error_count (PL_parser->error_count)
91 # define PL_endwhite (PL_parser->endwhite)
92 # define PL_faketokens (PL_parser->faketokens)
93 # define PL_lasttoke (PL_parser->lasttoke)
94 # define PL_nextwhite (PL_parser->nextwhite)
95 # define PL_realtokenstart (PL_parser->realtokenstart)
96 # define PL_skipwhite (PL_parser->skipwhite)
97 # define PL_thisclose (PL_parser->thisclose)
98 # define PL_thismad (PL_parser->thismad)
99 # define PL_thisopen (PL_parser->thisopen)
100 # define PL_thisstuff (PL_parser->thisstuff)
101 # define PL_thistoken (PL_parser->thistoken)
102 # define PL_thiswhite (PL_parser->thiswhite)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_nexttoke (PL_parser->nexttoke)
105 # define PL_curforce (PL_parser->curforce)
107 # define PL_nexttoke (PL_parser->nexttoke)
108 # define PL_nexttype (PL_parser->nexttype)
109 # define PL_nextval (PL_parser->nextval)
112 /* This can't be done with embed.fnc, because struct yy_parser contains a
113 member named pending_ident, which clashes with the generated #define */
115 S_pending_ident(pTHX);
117 static const char ident_too_long[] = "Identifier too long";
120 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
121 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
123 # define CURMAD(slot,sv)
124 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
127 #define XFAKEBRACK 128
128 #define XENUMMASK 127
130 #ifdef USE_UTF8_SCRIPTS
131 # define UTF (!IN_BYTES)
133 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
136 /* The maximum number of characters preceding the unrecognized one to display */
137 #define UNRECOGNIZED_PRECEDE_COUNT 10
139 /* In variables named $^X, these are the legal values for X.
140 * 1999-02-27 mjd-perl-patch@plover.com */
141 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
143 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
145 /* LEX_* are values for PL_lex_state, the state of the lexer.
146 * They are arranged oddly so that the guard on the switch statement
147 * can get by with a single comparison (if the compiler is smart enough).
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
189 #include "keywords.h"
191 /* CLINE is a macro that ensures PL_copline has a sane value */
196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
199 # define SKIPSPACE0(s) skipspace0(s)
200 # define SKIPSPACE1(s) skipspace1(s)
201 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202 # define PEEKSPACE(s) skipspace2(s,0)
204 # define SKIPSPACE0(s) skipspace(s)
205 # define SKIPSPACE1(s) skipspace(s)
206 # define SKIPSPACE2(s,tsv) skipspace(s)
207 # define PEEKSPACE(s) skipspace(s)
211 * Convenience functions to return different tokens and prime the
212 * lexer for the next token. They all take an argument.
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
224 * FUN1 : not used, except for not, which isn't a UNIOP
225 * BOop : bitwise or or xor
227 * SHop : shift operator
228 * PWop : power operator
229 * PMop : pattern-matching operator
230 * Aop : addition-level operator
231 * Mop : multiplication-level operator
232 * Eop : equality-testing operator
233 * Rop : relational operator <= != gt
235 * Also see LOP and lop() below.
238 #ifdef DEBUGGING /* Serve -DT. */
239 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
241 # define REPORT(retval) (retval)
244 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
245 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
246 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
247 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
248 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
249 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
250 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
251 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
252 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
253 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
254 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
255 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
256 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
257 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
258 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
259 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
260 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
261 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
262 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
263 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
265 /* This bit of chicanery makes a unary function followed by
266 * a parenthesis into a function with one argument, highest precedence.
267 * The UNIDOR macro is for unary functions that can be followed by the //
268 * operator (such as C<shift // 0>).
270 #define UNI2(f,x) { \
271 pl_yylval.ival = f; \
274 PL_last_uni = PL_oldbufptr; \
275 PL_last_lop_op = f; \
277 return REPORT( (int)FUNC1 ); \
279 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
281 #define UNI(f) UNI2(f,XTERM)
282 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
284 #define UNIBRACK(f) { \
285 pl_yylval.ival = f; \
287 PL_last_uni = PL_oldbufptr; \
289 return REPORT( (int)FUNC1 ); \
291 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
294 /* grandfather return to old style */
295 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
299 /* how to interpret the pl_yylval associated with the token */
303 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
309 static struct debug_tokens {
311 enum token_type type;
313 } const debug_tokens[] =
315 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
316 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
317 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
318 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
319 { ARROW, TOKENTYPE_NONE, "ARROW" },
320 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
321 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
322 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
323 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
324 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
325 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
326 { DO, TOKENTYPE_NONE, "DO" },
327 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
328 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
329 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
330 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
331 { ELSE, TOKENTYPE_NONE, "ELSE" },
332 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
333 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
334 { FOR, TOKENTYPE_IVAL, "FOR" },
335 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
336 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
337 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
338 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
339 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
340 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
341 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
342 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
343 { IF, TOKENTYPE_IVAL, "IF" },
344 { LABEL, TOKENTYPE_PVAL, "LABEL" },
345 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
346 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
347 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
348 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
349 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
350 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
351 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
352 { MY, TOKENTYPE_IVAL, "MY" },
353 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
354 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
355 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
356 { OROP, TOKENTYPE_IVAL, "OROP" },
357 { OROR, TOKENTYPE_NONE, "OROR" },
358 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
359 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
360 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
361 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
362 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
363 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
364 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
365 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
366 { PREINC, TOKENTYPE_NONE, "PREINC" },
367 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
368 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
369 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
370 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
371 { SUB, TOKENTYPE_NONE, "SUB" },
372 { THING, TOKENTYPE_OPVAL, "THING" },
373 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
374 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
375 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
376 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
377 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
378 { USE, TOKENTYPE_IVAL, "USE" },
379 { WHEN, TOKENTYPE_IVAL, "WHEN" },
380 { WHILE, TOKENTYPE_IVAL, "WHILE" },
381 { WORD, TOKENTYPE_OPVAL, "WORD" },
382 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
383 { 0, TOKENTYPE_NONE, NULL }
386 /* dump the returned token in rv, plus any optional arg in pl_yylval */
389 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
393 PERL_ARGS_ASSERT_TOKEREPORT;
396 const char *name = NULL;
397 enum token_type type = TOKENTYPE_NONE;
398 const struct debug_tokens *p;
399 SV* const report = newSVpvs("<== ");
401 for (p = debug_tokens; p->token; p++) {
402 if (p->token == (int)rv) {
409 Perl_sv_catpv(aTHX_ report, name);
410 else if ((char)rv > ' ' && (char)rv < '~')
411 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
413 sv_catpvs(report, "EOF");
415 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
418 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
421 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
423 case TOKENTYPE_OPNUM:
424 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
425 PL_op_name[lvalp->ival]);
428 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
430 case TOKENTYPE_OPVAL:
432 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
433 PL_op_name[lvalp->opval->op_type]);
434 if (lvalp->opval->op_type == OP_CONST) {
435 Perl_sv_catpvf(aTHX_ report, " %s",
436 SvPEEK(cSVOPx_sv(lvalp->opval)));
441 sv_catpvs(report, "(opval=null)");
444 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
450 /* print the buffer with suitable escapes */
453 S_printbuf(pTHX_ const char *const fmt, const char *const s)
455 SV* const tmp = newSVpvs("");
457 PERL_ARGS_ASSERT_PRINTBUF;
459 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
466 S_deprecate_commaless_var_list(pTHX) {
468 deprecate("comma-less variable list");
469 return REPORT(','); /* grandfather non-comma-format format */
475 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
476 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
480 S_ao(pTHX_ int toketype)
483 if (*PL_bufptr == '=') {
485 if (toketype == ANDAND)
486 pl_yylval.ival = OP_ANDASSIGN;
487 else if (toketype == OROR)
488 pl_yylval.ival = OP_ORASSIGN;
489 else if (toketype == DORDOR)
490 pl_yylval.ival = OP_DORASSIGN;
498 * When Perl expects an operator and finds something else, no_op
499 * prints the warning. It always prints "<something> found where
500 * operator expected. It prints "Missing semicolon on previous line?"
501 * if the surprise occurs at the start of the line. "do you need to
502 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
503 * where the compiler doesn't know if foo is a method call or a function.
504 * It prints "Missing operator before end of line" if there's nothing
505 * after the missing operator, or "... before <...>" if there is something
506 * after the missing operator.
510 S_no_op(pTHX_ const char *const what, char *s)
513 char * const oldbp = PL_bufptr;
514 const bool is_first = (PL_oldbufptr == PL_linestart);
516 PERL_ARGS_ASSERT_NO_OP;
522 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
523 if (ckWARN_d(WARN_SYNTAX)) {
525 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
526 "\t(Missing semicolon on previous line?)\n");
527 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
529 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
531 if (t < PL_bufptr && isSPACE(*t))
532 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
533 "\t(Do you need to predeclare %.*s?)\n",
534 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
538 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
539 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
547 * Complain about missing quote/regexp/heredoc terminator.
548 * If it's called with NULL then it cauterizes the line buffer.
549 * If we're in a delimited string and the delimiter is a control
550 * character, it's reformatted into a two-char sequence like ^C.
555 S_missingterm(pTHX_ char *s)
561 char * const nl = strrchr(s,'\n');
565 else if (isCNTRL(PL_multi_close)) {
567 tmpbuf[1] = (char)toCTRL(PL_multi_close);
572 *tmpbuf = (char)PL_multi_close;
576 q = strchr(s,'"') ? '\'' : '"';
577 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
580 #define FEATURE_IS_ENABLED(name) \
581 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
582 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
583 /* The longest string we pass in. */
584 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
587 * S_feature_is_enabled
588 * Check whether the named feature is enabled.
591 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
594 HV * const hinthv = GvHV(PL_hintgv);
595 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
597 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
599 assert(namelen <= MAX_FEATURE_LEN);
600 memcpy(&he_name[8], name, namelen);
602 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
606 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
607 * utf16-to-utf8-reversed.
610 #ifdef PERL_CR_FILTER
614 register const char *s = SvPVX_const(sv);
615 register const char * const e = s + SvCUR(sv);
617 PERL_ARGS_ASSERT_STRIP_RETURN;
619 /* outer loop optimized to do nothing if there are no CR-LFs */
621 if (*s++ == '\r' && *s == '\n') {
622 /* hit a CR-LF, need to copy the rest */
623 register char *d = s - 1;
626 if (*s == '\r' && s[1] == '\n')
637 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
639 const I32 count = FILTER_READ(idx+1, sv, maxlen);
640 if (count > 0 && !maxlen)
651 * Create a parser object and initialise its parser and lexer fields
653 * rsfp is the opened file handle to read from (if any),
655 * line holds any initial content already read from the file (or in
656 * the case of no file, such as an eval, the whole contents);
658 * new_filter indicates that this is a new file and it shouldn't inherit
659 * the filters from the current parser (ie require).
663 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
666 const char *s = NULL;
668 yy_parser *parser, *oparser;
670 /* create and initialise a parser */
672 Newxz(parser, 1, yy_parser);
673 parser->old_parser = oparser = PL_parser;
676 parser->stack = NULL;
678 parser->stack_size = 0;
680 /* on scope exit, free this parser and restore any outer one */
682 parser->saved_curcop = PL_curcop;
684 /* initialise lexer state */
687 parser->curforce = -1;
689 parser->nexttoke = 0;
691 parser->error_count = oparser ? oparser->error_count : 0;
692 parser->copline = NOLINE;
693 parser->lex_state = LEX_NORMAL;
694 parser->expect = XSTATE;
696 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
697 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
699 Newx(parser->lex_brackstack, 120, char);
700 Newx(parser->lex_casestack, 12, char);
701 *parser->lex_casestack = '\0';
704 s = SvPV_const(line, len);
710 parser->linestr = newSVpvs("\n;");
711 } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
712 /* avoid tie/overload weirdness */
713 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
715 sv_catpvs(parser->linestr, "\n;");
718 SvREFCNT_inc_simple_void_NN(line);
719 parser->linestr = line;
721 parser->oldoldbufptr =
724 parser->linestart = SvPVX(parser->linestr);
725 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
726 parser->last_lop = parser->last_uni = NULL;
730 /* delete a parser object */
733 Perl_parser_free(pTHX_ const yy_parser *parser)
735 PERL_ARGS_ASSERT_PARSER_FREE;
737 PL_curcop = parser->saved_curcop;
738 SvREFCNT_dec(parser->linestr);
740 if (parser->rsfp == PerlIO_stdin())
741 PerlIO_clearerr(parser->rsfp);
742 else if (parser->rsfp && (!parser->old_parser ||
743 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
744 PerlIO_close(parser->rsfp);
745 SvREFCNT_dec(parser->rsfp_filters);
747 Safefree(parser->lex_brackstack);
748 Safefree(parser->lex_casestack);
749 PL_parser = parser->old_parser;
756 * Finalizer for lexing operations. Must be called when the parser is
757 * done with the lexer.
764 PL_doextract = FALSE;
768 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
770 Buffer scalar containing the chunk currently under consideration of the
771 text currently being lexed. This is always a plain string scalar (for
772 which C<SvPOK> is true). It is not intended to be used as a scalar by
773 normal scalar means; instead refer to the buffer directly by the pointer
774 variables described below.
776 The lexer maintains various C<char*> pointers to things in the
777 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
778 reallocated, all of these pointers must be updated. Don't attempt to
779 do this manually, but rather use L</lex_grow_linestr> if you need to
780 reallocate the buffer.
782 The content of the text chunk in the buffer is commonly exactly one
783 complete line of input, up to and including a newline terminator,
784 but there are situations where it is otherwise. The octets of the
785 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
786 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
787 flag on this scalar, which may disagree with it.
789 For direct examination of the buffer, the variable
790 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
791 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
792 of these pointers is usually preferable to examination of the scalar
793 through normal scalar means.
795 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
797 Direct pointer to the end of the chunk of text currently being lexed, the
798 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
799 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
800 always located at the end of the buffer, and does not count as part of
801 the buffer's contents.
803 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
805 Points to the current position of lexing inside the lexer buffer.
806 Characters around this point may be freely examined, within
807 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
808 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
809 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
811 Lexing code (whether in the Perl core or not) moves this pointer past
812 the characters that it consumes. It is also expected to perform some
813 bookkeeping whenever a newline character is consumed. This movement
814 can be more conveniently performed by the function L</lex_read_to>,
815 which handles newlines appropriately.
817 Interpretation of the buffer's octets can be abstracted out by
818 using the slightly higher-level functions L</lex_peek_unichar> and
819 L</lex_read_unichar>.
821 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
823 Points to the start of the current line inside the lexer buffer.
824 This is useful for indicating at which column an error occurred, and
825 not much else. This must be updated by any lexing code that consumes
826 a newline; the function L</lex_read_to> handles this detail.
832 =for apidoc Amx|bool|lex_bufutf8
834 Indicates whether the octets in the lexer buffer
835 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
836 of Unicode characters. If not, they should be interpreted as Latin-1
837 characters. This is analogous to the C<SvUTF8> flag for scalars.
839 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
840 contains valid UTF-8. Lexing code must be robust in the face of invalid
843 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
844 is significant, but not the whole story regarding the input character
845 encoding. Normally, when a file is being read, the scalar contains octets
846 and its C<SvUTF8> flag is off, but the octets should be interpreted as
847 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
848 however, the scalar may have the C<SvUTF8> flag on, and in this case its
849 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
850 is in effect. This logic may change in the future; use this function
851 instead of implementing the logic yourself.
857 Perl_lex_bufutf8(pTHX)
863 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
865 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
866 at least I<len> octets (including terminating NUL). Returns a
867 pointer to the reallocated buffer. This is necessary before making
868 any direct modification of the buffer that would increase its length.
869 L</lex_stuff_pvn> provides a more convenient way to insert text into
872 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
873 this function updates all of the lexer's variables that point directly
880 Perl_lex_grow_linestr(pTHX_ STRLEN len)
884 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
885 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
886 linestr = PL_parser->linestr;
887 buf = SvPVX(linestr);
888 if (len <= SvLEN(linestr))
890 bufend_pos = PL_parser->bufend - buf;
891 bufptr_pos = PL_parser->bufptr - buf;
892 oldbufptr_pos = PL_parser->oldbufptr - buf;
893 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
894 linestart_pos = PL_parser->linestart - buf;
895 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
896 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
897 buf = sv_grow(linestr, len);
898 PL_parser->bufend = buf + bufend_pos;
899 PL_parser->bufptr = buf + bufptr_pos;
900 PL_parser->oldbufptr = buf + oldbufptr_pos;
901 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
902 PL_parser->linestart = buf + linestart_pos;
903 if (PL_parser->last_uni)
904 PL_parser->last_uni = buf + last_uni_pos;
905 if (PL_parser->last_lop)
906 PL_parser->last_lop = buf + last_lop_pos;
911 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
913 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
914 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
915 reallocating the buffer if necessary. This means that lexing code that
916 runs later will see the characters as if they had appeared in the input.
917 It is not recommended to do this as part of normal parsing, and most
918 uses of this facility run the risk of the inserted characters being
919 interpreted in an unintended manner.
921 The string to be inserted is represented by I<len> octets starting
922 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
923 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
924 The characters are recoded for the lexer buffer, according to how the
925 buffer is currently being interpreted (L</lex_bufutf8>). If a string
926 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
927 function is more convenient.
933 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
937 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
938 if (flags & ~(LEX_STUFF_UTF8))
939 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
941 if (flags & LEX_STUFF_UTF8) {
945 const char *p, *e = pv+len;
946 for (p = pv; p != e; p++)
947 highhalf += !!(((U8)*p) & 0x80);
950 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
951 bufptr = PL_parser->bufptr;
952 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
953 SvCUR_set(PL_parser->linestr,
954 SvCUR(PL_parser->linestr) + len+highhalf);
955 PL_parser->bufend += len+highhalf;
956 for (p = pv; p != e; p++) {
959 *bufptr++ = (char)(0xc0 | (c >> 6));
960 *bufptr++ = (char)(0x80 | (c & 0x3f));
967 if (flags & LEX_STUFF_UTF8) {
969 const char *p, *e = pv+len;
970 for (p = pv; p != e; p++) {
973 Perl_croak(aTHX_ "Lexing code attempted to stuff "
974 "non-Latin-1 character into Latin-1 input");
975 } else if (c >= 0xc2 && p+1 != e &&
976 (((U8)p[1]) & 0xc0) == 0x80) {
979 } else if (c >= 0x80) {
980 /* malformed UTF-8 */
982 SAVESPTR(PL_warnhook);
983 PL_warnhook = PERL_WARNHOOK_FATAL;
984 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
990 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
991 bufptr = PL_parser->bufptr;
992 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
993 SvCUR_set(PL_parser->linestr,
994 SvCUR(PL_parser->linestr) + len-highhalf);
995 PL_parser->bufend += len-highhalf;
996 for (p = pv; p != e; p++) {
999 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1002 *bufptr++ = (char)c;
1007 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1008 bufptr = PL_parser->bufptr;
1009 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1010 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1011 PL_parser->bufend += len;
1012 Copy(pv, bufptr, len, char);
1018 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary. This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1028 The string to be inserted is represented by octets starting at I<pv>
1029 and continuing to the first nul. These octets are interpreted as either
1030 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1031 in I<flags>. The characters are recoded for the lexer buffer, according
1032 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1033 If it is not convenient to nul-terminate a string to be inserted, the
1034 L</lex_stuff_pvn> function is more appropriate.
1040 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1042 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1043 lex_stuff_pvn(pv, strlen(pv), flags);
1047 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1049 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1050 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1051 reallocating the buffer if necessary. This means that lexing code that
1052 runs later will see the characters as if they had appeared in the input.
1053 It is not recommended to do this as part of normal parsing, and most
1054 uses of this facility run the risk of the inserted characters being
1055 interpreted in an unintended manner.
1057 The string to be inserted is the string value of I<sv>. The characters
1058 are recoded for the lexer buffer, according to how the buffer is currently
1059 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1060 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1061 need to construct a scalar.
1067 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1071 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1073 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1075 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1079 =for apidoc Amx|void|lex_unstuff|char *ptr
1081 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1082 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1083 This hides the discarded text from any lexing code that runs later,
1084 as if the text had never appeared.
1086 This is not the normal way to consume lexed text. For that, use
1093 Perl_lex_unstuff(pTHX_ char *ptr)
1097 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1098 buf = PL_parser->bufptr;
1100 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1103 bufend = PL_parser->bufend;
1105 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1106 unstuff_len = ptr - buf;
1107 Move(ptr, buf, bufend+1-ptr, char);
1108 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1109 PL_parser->bufend = bufend - unstuff_len;
1113 =for apidoc Amx|void|lex_read_to|char *ptr
1115 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1116 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1117 performing the correct bookkeeping whenever a newline character is passed.
1118 This is the normal way to consume lexed text.
1120 Interpretation of the buffer's octets can be abstracted out by
1121 using the slightly higher-level functions L</lex_peek_unichar> and
1122 L</lex_read_unichar>.
1128 Perl_lex_read_to(pTHX_ char *ptr)
1131 PERL_ARGS_ASSERT_LEX_READ_TO;
1132 s = PL_parser->bufptr;
1133 if (ptr < s || ptr > PL_parser->bufend)
1134 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1135 for (; s != ptr; s++)
1137 CopLINE_inc(PL_curcop);
1138 PL_parser->linestart = s+1;
1140 PL_parser->bufptr = ptr;
1144 =for apidoc Amx|void|lex_discard_to|char *ptr
1146 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1147 up to I<ptr>. The remaining content of the buffer will be moved, and
1148 all pointers into the buffer updated appropriately. I<ptr> must not
1149 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1150 it is not permitted to discard text that has yet to be lexed.
1152 Normally it is not necessarily to do this directly, because it suffices to
1153 use the implicit discarding behaviour of L</lex_next_chunk> and things
1154 based on it. However, if a token stretches across multiple lines,
1155 and the lexing code has kept multiple lines of text in the buffer for
1156 that purpose, then after completion of the token it would be wise to
1157 explicitly discard the now-unneeded earlier lines, to avoid future
1158 multi-line tokens growing the buffer without bound.
1164 Perl_lex_discard_to(pTHX_ char *ptr)
1168 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1169 buf = SvPVX(PL_parser->linestr);
1171 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1174 if (ptr > PL_parser->bufptr)
1175 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1176 discard_len = ptr - buf;
1177 if (PL_parser->oldbufptr < ptr)
1178 PL_parser->oldbufptr = ptr;
1179 if (PL_parser->oldoldbufptr < ptr)
1180 PL_parser->oldoldbufptr = ptr;
1181 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1182 PL_parser->last_uni = NULL;
1183 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1184 PL_parser->last_lop = NULL;
1185 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1186 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1187 PL_parser->bufend -= discard_len;
1188 PL_parser->bufptr -= discard_len;
1189 PL_parser->oldbufptr -= discard_len;
1190 PL_parser->oldoldbufptr -= discard_len;
1191 if (PL_parser->last_uni)
1192 PL_parser->last_uni -= discard_len;
1193 if (PL_parser->last_lop)
1194 PL_parser->last_lop -= discard_len;
1198 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1200 Reads in the next chunk of text to be lexed, appending it to
1201 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1202 looked to the end of the current chunk and wants to know more. It is
1203 usual, but not necessary, for lexing to have consumed the entirety of
1204 the current chunk at this time.
1206 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1207 chunk (i.e., the current chunk has been entirely consumed), normally the
1208 current chunk will be discarded at the same time that the new chunk is
1209 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1210 will not be discarded. If the current chunk has not been entirely
1211 consumed, then it will not be discarded regardless of the flag.
1213 Returns true if some new text was added to the buffer, or false if the
1214 buffer has reached the end of the input text.
1219 #define LEX_FAKE_EOF 0x80000000
1222 Perl_lex_next_chunk(pTHX_ U32 flags)
1226 STRLEN old_bufend_pos, new_bufend_pos;
1227 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1228 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1229 bool got_some_for_debugger = 0;
1231 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1232 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1233 linestr = PL_parser->linestr;
1234 buf = SvPVX(linestr);
1235 if (!(flags & LEX_KEEP_PREVIOUS) &&
1236 PL_parser->bufptr == PL_parser->bufend) {
1237 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1239 if (PL_parser->last_uni != PL_parser->bufend)
1240 PL_parser->last_uni = NULL;
1241 if (PL_parser->last_lop != PL_parser->bufend)
1242 PL_parser->last_lop = NULL;
1243 last_uni_pos = last_lop_pos = 0;
1247 old_bufend_pos = PL_parser->bufend - buf;
1248 bufptr_pos = PL_parser->bufptr - buf;
1249 oldbufptr_pos = PL_parser->oldbufptr - buf;
1250 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1251 linestart_pos = PL_parser->linestart - buf;
1252 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1253 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1255 if (flags & LEX_FAKE_EOF) {
1257 } else if (!PL_parser->rsfp) {
1259 } else if (filter_gets(linestr, old_bufend_pos)) {
1261 got_some_for_debugger = 1;
1263 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1264 sv_setpvs(linestr, "");
1266 /* End of real input. Close filehandle (unless it was STDIN),
1267 * then add implicit termination.
1269 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1270 PerlIO_clearerr(PL_parser->rsfp);
1271 else if (PL_parser->rsfp)
1272 (void)PerlIO_close(PL_parser->rsfp);
1273 PL_parser->rsfp = NULL;
1274 PL_doextract = FALSE;
1276 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1279 if (!PL_in_eval && PL_minus_p) {
1281 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1282 PL_minus_n = PL_minus_p = 0;
1283 } else if (!PL_in_eval && PL_minus_n) {
1284 sv_catpvs(linestr, /*{*/";}");
1287 sv_catpvs(linestr, ";");
1290 buf = SvPVX(linestr);
1291 new_bufend_pos = SvCUR(linestr);
1292 PL_parser->bufend = buf + new_bufend_pos;
1293 PL_parser->bufptr = buf + bufptr_pos;
1294 PL_parser->oldbufptr = buf + oldbufptr_pos;
1295 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1296 PL_parser->linestart = buf + linestart_pos;
1297 if (PL_parser->last_uni)
1298 PL_parser->last_uni = buf + last_uni_pos;
1299 if (PL_parser->last_lop)
1300 PL_parser->last_lop = buf + last_lop_pos;
1301 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1302 PL_curstash != PL_debstash) {
1303 /* debugger active and we're not compiling the debugger code,
1304 * so store the line into the debugger's array of lines
1306 update_debugger_info(NULL, buf+old_bufend_pos,
1307 new_bufend_pos-old_bufend_pos);
1313 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1315 Looks ahead one (Unicode) character in the text currently being lexed.
1316 Returns the codepoint (unsigned integer value) of the next character,
1317 or -1 if lexing has reached the end of the input text. To consume the
1318 peeked character, use L</lex_read_unichar>.
1320 If the next character is in (or extends into) the next chunk of input
1321 text, the next chunk will be read in. Normally the current chunk will be
1322 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1323 then the current chunk will not be discarded.
1325 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1326 is encountered, an exception is generated.
1332 Perl_lex_peek_unichar(pTHX_ U32 flags)
1336 if (flags & ~(LEX_KEEP_PREVIOUS))
1337 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1338 s = PL_parser->bufptr;
1339 bufend = PL_parser->bufend;
1345 if (!lex_next_chunk(flags))
1347 s = PL_parser->bufptr;
1348 bufend = PL_parser->bufend;
1354 len = PL_utf8skip[head];
1355 while ((STRLEN)(bufend-s) < len) {
1356 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1358 s = PL_parser->bufptr;
1359 bufend = PL_parser->bufend;
1362 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1363 if (retlen == (STRLEN)-1) {
1364 /* malformed UTF-8 */
1366 SAVESPTR(PL_warnhook);
1367 PL_warnhook = PERL_WARNHOOK_FATAL;
1368 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1374 if (!lex_next_chunk(flags))
1376 s = PL_parser->bufptr;
1383 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1385 Reads the next (Unicode) character in the text currently being lexed.
1386 Returns the codepoint (unsigned integer value) of the character read,
1387 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1388 if lexing has reached the end of the input text. To non-destructively
1389 examine the next character, use L</lex_peek_unichar> instead.
1391 If the next character is in (or extends into) the next chunk of input
1392 text, the next chunk will be read in. Normally the current chunk will be
1393 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1394 then the current chunk will not be discarded.
1396 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1397 is encountered, an exception is generated.
1403 Perl_lex_read_unichar(pTHX_ U32 flags)
1406 if (flags & ~(LEX_KEEP_PREVIOUS))
1407 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1408 c = lex_peek_unichar(flags);
1411 CopLINE_inc(PL_curcop);
1412 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1418 =for apidoc Amx|void|lex_read_space|U32 flags
1420 Reads optional spaces, in Perl style, in the text currently being
1421 lexed. The spaces may include ordinary whitespace characters and
1422 Perl-style comments. C<#line> directives are processed if encountered.
1423 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1424 at a non-space character (or the end of the input text).
1426 If spaces extend into the next chunk of input text, the next chunk will
1427 be read in. Normally the current chunk will be discarded at the same
1428 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1429 chunk will not be discarded.
1434 #define LEX_NO_NEXT_CHUNK 0x80000000
1437 Perl_lex_read_space(pTHX_ U32 flags)
1440 bool need_incline = 0;
1441 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1442 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1445 sv_free(PL_skipwhite);
1446 PL_skipwhite = NULL;
1449 PL_skipwhite = newSVpvs("");
1450 #endif /* PERL_MAD */
1451 s = PL_parser->bufptr;
1452 bufend = PL_parser->bufend;
1458 } while (!(c == '\n' || (c == 0 && s == bufend)));
1459 } else if (c == '\n') {
1461 PL_parser->linestart = s;
1466 } else if (isSPACE(c)) {
1468 } else if (c == 0 && s == bufend) {
1472 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1473 #endif /* PERL_MAD */
1474 if (flags & LEX_NO_NEXT_CHUNK)
1476 PL_parser->bufptr = s;
1477 CopLINE_inc(PL_curcop);
1478 got_more = lex_next_chunk(flags);
1479 CopLINE_dec(PL_curcop);
1480 s = PL_parser->bufptr;
1481 bufend = PL_parser->bufend;
1484 if (need_incline && PL_parser->rsfp) {
1494 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1495 #endif /* PERL_MAD */
1496 PL_parser->bufptr = s;
1501 * This subroutine has nothing to do with tilting, whether at windmills
1502 * or pinball tables. Its name is short for "increment line". It
1503 * increments the current line number in CopLINE(PL_curcop) and checks
1504 * to see whether the line starts with a comment of the form
1505 * # line 500 "foo.pm"
1506 * If so, it sets the current line number and file to the values in the comment.
1510 S_incline(pTHX_ const char *s)
1517 PERL_ARGS_ASSERT_INCLINE;
1519 CopLINE_inc(PL_curcop);
1522 while (SPACE_OR_TAB(*s))
1524 if (strnEQ(s, "line", 4))
1528 if (SPACE_OR_TAB(*s))
1532 while (SPACE_OR_TAB(*s))
1540 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1542 while (SPACE_OR_TAB(*s))
1544 if (*s == '"' && (t = strchr(s+1, '"'))) {
1550 while (!isSPACE(*t))
1554 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1556 if (*e != '\n' && *e != '\0')
1557 return; /* false alarm */
1560 const STRLEN len = t - s;
1561 #ifndef USE_ITHREADS
1562 SV *const temp_sv = CopFILESV(PL_curcop);
1567 cf = SvPVX(temp_sv);
1568 tmplen = SvCUR(temp_sv);
1574 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1575 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1576 * to *{"::_<newfilename"} */
1577 /* However, the long form of evals is only turned on by the
1578 debugger - usually they're "(eval %lu)" */
1582 STRLEN tmplen2 = len;
1583 if (tmplen + 2 <= sizeof smallbuf)
1586 Newx(tmpbuf, tmplen + 2, char);
1589 memcpy(tmpbuf + 2, cf, tmplen);
1591 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1596 if (tmplen2 + 2 <= sizeof smallbuf)
1599 Newx(tmpbuf2, tmplen2 + 2, char);
1601 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1602 /* Either they malloc'd it, or we malloc'd it,
1603 so no prefix is present in ours. */
1608 memcpy(tmpbuf2 + 2, s, tmplen2);
1611 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1613 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1614 /* adjust ${"::_<newfilename"} to store the new file name */
1615 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1616 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1617 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1620 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1622 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1625 CopFILE_free(PL_curcop);
1626 CopFILE_setn(PL_curcop, s, len);
1628 CopLINE_set(PL_curcop, atoi(n)-1);
1632 /* skip space before PL_thistoken */
1635 S_skipspace0(pTHX_ register char *s)
1637 PERL_ARGS_ASSERT_SKIPSPACE0;
1644 PL_thiswhite = newSVpvs("");
1645 sv_catsv(PL_thiswhite, PL_skipwhite);
1646 sv_free(PL_skipwhite);
1649 PL_realtokenstart = s - SvPVX(PL_linestr);
1653 /* skip space after PL_thistoken */
1656 S_skipspace1(pTHX_ register char *s)
1658 const char *start = s;
1659 I32 startoff = start - SvPVX(PL_linestr);
1661 PERL_ARGS_ASSERT_SKIPSPACE1;
1666 start = SvPVX(PL_linestr) + startoff;
1667 if (!PL_thistoken && PL_realtokenstart >= 0) {
1668 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1669 PL_thistoken = newSVpvn(tstart, start - tstart);
1671 PL_realtokenstart = -1;
1674 PL_nextwhite = newSVpvs("");
1675 sv_catsv(PL_nextwhite, PL_skipwhite);
1676 sv_free(PL_skipwhite);
1683 S_skipspace2(pTHX_ register char *s, SV **svp)
1686 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1687 const I32 startoff = s - SvPVX(PL_linestr);
1689 PERL_ARGS_ASSERT_SKIPSPACE2;
1692 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1693 if (!PL_madskills || !svp)
1695 start = SvPVX(PL_linestr) + startoff;
1696 if (!PL_thistoken && PL_realtokenstart >= 0) {
1697 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1698 PL_thistoken = newSVpvn(tstart, start - tstart);
1699 PL_realtokenstart = -1;
1703 *svp = newSVpvs("");
1704 sv_setsv(*svp, PL_skipwhite);
1705 sv_free(PL_skipwhite);
1714 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1716 AV *av = CopFILEAVx(PL_curcop);
1718 SV * const sv = newSV_type(SVt_PVMG);
1720 sv_setsv(sv, orig_sv);
1722 sv_setpvn(sv, buf, len);
1725 av_store(av, (I32)CopLINE(PL_curcop), sv);
1731 * Called to gobble the appropriate amount and type of whitespace.
1732 * Skips comments as well.
1736 S_skipspace(pTHX_ register char *s)
1740 #endif /* PERL_MAD */
1741 PERL_ARGS_ASSERT_SKIPSPACE;
1744 sv_free(PL_skipwhite);
1745 PL_skipwhite = NULL;
1747 #endif /* PERL_MAD */
1748 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1749 while (s < PL_bufend && SPACE_OR_TAB(*s))
1752 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1754 lex_read_space(LEX_KEEP_PREVIOUS |
1755 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1756 LEX_NO_NEXT_CHUNK : 0));
1758 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1759 if (PL_linestart > PL_bufptr)
1760 PL_bufptr = PL_linestart;
1765 PL_skipwhite = newSVpvn(start, s-start);
1766 #endif /* PERL_MAD */
1772 * Check the unary operators to ensure there's no ambiguity in how they're
1773 * used. An ambiguous piece of code would be:
1775 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1776 * the +5 is its argument.
1786 if (PL_oldoldbufptr != PL_last_uni)
1788 while (isSPACE(*PL_last_uni))
1791 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1793 if ((t = strchr(s, '(')) && t < PL_bufptr)
1796 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1797 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1798 (int)(s - PL_last_uni), PL_last_uni);
1802 * LOP : macro to build a list operator. Its behaviour has been replaced
1803 * with a subroutine, S_lop() for which LOP is just another name.
1806 #define LOP(f,x) return lop(f,x,s)
1810 * Build a list operator (or something that might be one). The rules:
1811 * - if we have a next token, then it's a list operator [why?]
1812 * - if the next thing is an opening paren, then it's a function
1813 * - else it's a list operator
1817 S_lop(pTHX_ I32 f, int x, char *s)
1821 PERL_ARGS_ASSERT_LOP;
1827 PL_last_lop = PL_oldbufptr;
1828 PL_last_lop_op = (OPCODE)f;
1831 return REPORT(LSTOP);
1834 return REPORT(LSTOP);
1837 return REPORT(FUNC);
1840 return REPORT(FUNC);
1842 return REPORT(LSTOP);
1848 * Sets up for an eventual force_next(). start_force(0) basically does
1849 * an unshift, while start_force(-1) does a push. yylex removes items
1854 S_start_force(pTHX_ int where)
1858 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1859 where = PL_lasttoke;
1860 assert(PL_curforce < 0 || PL_curforce == where);
1861 if (PL_curforce != where) {
1862 for (i = PL_lasttoke; i > where; --i) {
1863 PL_nexttoke[i] = PL_nexttoke[i-1];
1867 if (PL_curforce < 0) /* in case of duplicate start_force() */
1868 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1869 PL_curforce = where;
1872 curmad('^', newSVpvs(""));
1873 CURMAD('_', PL_nextwhite);
1878 S_curmad(pTHX_ char slot, SV *sv)
1884 if (PL_curforce < 0)
1885 where = &PL_thismad;
1887 where = &PL_nexttoke[PL_curforce].next_mad;
1893 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1895 else if (PL_encoding) {
1896 sv_recode_to_utf8(sv, PL_encoding);
1901 /* keep a slot open for the head of the list? */
1902 if (slot != '_' && *where && (*where)->mad_key == '^') {
1903 (*where)->mad_key = slot;
1904 sv_free(MUTABLE_SV(((*where)->mad_val)));
1905 (*where)->mad_val = (void*)sv;
1908 addmad(newMADsv(slot, sv), where, 0);
1911 # define start_force(where) NOOP
1912 # define curmad(slot, sv) NOOP
1917 * When the lexer realizes it knows the next token (for instance,
1918 * it is reordering tokens for the parser) then it can call S_force_next
1919 * to know what token to return the next time the lexer is called. Caller
1920 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1921 * and possibly PL_expect to ensure the lexer handles the token correctly.
1925 S_force_next(pTHX_ I32 type)
1930 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1931 tokereport(type, &NEXTVAL_NEXTTOKE);
1935 if (PL_curforce < 0)
1936 start_force(PL_lasttoke);
1937 PL_nexttoke[PL_curforce].next_type = type;
1938 if (PL_lex_state != LEX_KNOWNEXT)
1939 PL_lex_defer = PL_lex_state;
1940 PL_lex_state = LEX_KNOWNEXT;
1941 PL_lex_expect = PL_expect;
1944 PL_nexttype[PL_nexttoke] = type;
1946 if (PL_lex_state != LEX_KNOWNEXT) {
1947 PL_lex_defer = PL_lex_state;
1948 PL_lex_expect = PL_expect;
1949 PL_lex_state = LEX_KNOWNEXT;
1957 if (PL_parser->yychar != YYEMPTY) {
1959 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1960 force_next(PL_parser->yychar);
1961 PL_parser->yychar = YYEMPTY;
1966 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1969 SV * const sv = newSVpvn_utf8(start, len,
1972 && !is_ascii_string((const U8*)start, len)
1973 && is_utf8_string((const U8*)start, len));
1979 * When the lexer knows the next thing is a word (for instance, it has
1980 * just seen -> and it knows that the next char is a word char, then
1981 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1985 * char *start : buffer position (must be within PL_linestr)
1986 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1987 * int check_keyword : if true, Perl checks to make sure the word isn't
1988 * a keyword (do this if the word is a label, e.g. goto FOO)
1989 * int allow_pack : if true, : characters will also be allowed (require,
1990 * use, etc. do this)
1991 * int allow_initial_tick : used by the "sub" lexer only.
1995 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2001 PERL_ARGS_ASSERT_FORCE_WORD;
2003 start = SKIPSPACE1(start);
2005 if (isIDFIRST_lazy_if(s,UTF) ||
2006 (allow_pack && *s == ':') ||
2007 (allow_initial_tick && *s == '\'') )
2009 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2010 if (check_keyword && keyword(PL_tokenbuf, len, 0))
2012 start_force(PL_curforce);
2014 curmad('X', newSVpvn(start,s-start));
2015 if (token == METHOD) {
2020 PL_expect = XOPERATOR;
2024 curmad('g', newSVpvs( "forced" ));
2025 NEXTVAL_NEXTTOKE.opval
2026 = (OP*)newSVOP(OP_CONST,0,
2027 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2028 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2036 * Called when the lexer wants $foo *foo &foo etc, but the program
2037 * text only contains the "foo" portion. The first argument is a pointer
2038 * to the "foo", and the second argument is the type symbol to prefix.
2039 * Forces the next token to be a "WORD".
2040 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2044 S_force_ident(pTHX_ register const char *s, int kind)
2048 PERL_ARGS_ASSERT_FORCE_IDENT;
2051 const STRLEN len = strlen(s);
2052 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2053 start_force(PL_curforce);
2054 NEXTVAL_NEXTTOKE.opval = o;
2057 o->op_private = OPpCONST_ENTERED;
2058 /* XXX see note in pp_entereval() for why we forgo typo
2059 warnings if the symbol must be introduced in an eval.
2061 gv_fetchpvn_flags(s, len,
2062 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2064 kind == '$' ? SVt_PV :
2065 kind == '@' ? SVt_PVAV :
2066 kind == '%' ? SVt_PVHV :
2074 Perl_str_to_version(pTHX_ SV *sv)
2079 const char *start = SvPV_const(sv,len);
2080 const char * const end = start + len;
2081 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2083 PERL_ARGS_ASSERT_STR_TO_VERSION;
2085 while (start < end) {
2089 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2094 retval += ((NV)n)/nshift;
2103 * Forces the next token to be a version number.
2104 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2105 * and if "guessing" is TRUE, then no new token is created (and the caller
2106 * must use an alternative parsing method).
2110 S_force_version(pTHX_ char *s, int guessing)
2116 I32 startoff = s - SvPVX(PL_linestr);
2119 PERL_ARGS_ASSERT_FORCE_VERSION;
2127 while (isDIGIT(*d) || *d == '_' || *d == '.')
2131 start_force(PL_curforce);
2132 curmad('X', newSVpvn(s,d-s));
2135 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2137 #ifdef USE_LOCALE_NUMERIC
2138 char *loc = setlocale(LC_NUMERIC, "C");
2140 s = scan_num(s, &pl_yylval);
2141 #ifdef USE_LOCALE_NUMERIC
2142 setlocale(LC_NUMERIC, loc);
2144 version = pl_yylval.opval;
2145 ver = cSVOPx(version)->op_sv;
2146 if (SvPOK(ver) && !SvNIOK(ver)) {
2147 SvUPGRADE(ver, SVt_PVNV);
2148 SvNV_set(ver, str_to_version(ver));
2149 SvNOK_on(ver); /* hint that it is a version */
2152 else if (guessing) {
2155 sv_free(PL_nextwhite); /* let next token collect whitespace */
2157 s = SvPVX(PL_linestr) + startoff;
2165 if (PL_madskills && !version) {
2166 sv_free(PL_nextwhite); /* let next token collect whitespace */
2168 s = SvPVX(PL_linestr) + startoff;
2171 /* NOTE: The parser sees the package name and the VERSION swapped */
2172 start_force(PL_curforce);
2173 NEXTVAL_NEXTTOKE.opval = version;
2180 * S_force_strict_version
2181 * Forces the next token to be a version number using strict syntax rules.
2185 S_force_strict_version(pTHX_ char *s)
2190 I32 startoff = s - SvPVX(PL_linestr);
2192 const char *errstr = NULL;
2194 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2196 while (isSPACE(*s)) /* leading whitespace */
2199 if (is_STRICT_VERSION(s,&errstr)) {
2201 s = (char *)scan_version(s, ver, 0);
2202 version = newSVOP(OP_CONST, 0, ver);
2204 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2205 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2209 yyerror(errstr); /* version required */
2214 if (PL_madskills && !version) {
2215 sv_free(PL_nextwhite); /* let next token collect whitespace */
2217 s = SvPVX(PL_linestr) + startoff;
2220 /* NOTE: The parser sees the package name and the VERSION swapped */
2221 start_force(PL_curforce);
2222 NEXTVAL_NEXTTOKE.opval = version;
2230 * Tokenize a quoted string passed in as an SV. It finds the next
2231 * chunk, up to end of string or a backslash. It may make a new
2232 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2237 S_tokeq(pTHX_ SV *sv)
2241 register char *send;
2246 PERL_ARGS_ASSERT_TOKEQ;
2251 s = SvPV_force(sv, len);
2252 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2255 while (s < send && *s != '\\')
2260 if ( PL_hints & HINT_NEW_STRING ) {
2261 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2265 if (s + 1 < send && (s[1] == '\\'))
2266 s++; /* all that, just for this */
2271 SvCUR_set(sv, d - SvPVX_const(sv));
2273 if ( PL_hints & HINT_NEW_STRING )
2274 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2279 * Now come three functions related to double-quote context,
2280 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2281 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2282 * interact with PL_lex_state, and create fake ( ... ) argument lists
2283 * to handle functions and concatenation.
2284 * They assume that whoever calls them will be setting up a fake
2285 * join call, because each subthing puts a ',' after it. This lets
2288 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2290 * (I'm not sure whether the spurious commas at the end of lcfirst's
2291 * arguments and join's arguments are created or not).
2296 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2298 * Pattern matching will set PL_lex_op to the pattern-matching op to
2299 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2301 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2303 * Everything else becomes a FUNC.
2305 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2306 * had an OP_CONST or OP_READLINE). This just sets us up for a
2307 * call to S_sublex_push().
2311 S_sublex_start(pTHX)
2314 register const I32 op_type = pl_yylval.ival;
2316 if (op_type == OP_NULL) {
2317 pl_yylval.opval = PL_lex_op;
2321 if (op_type == OP_CONST || op_type == OP_READLINE) {
2322 SV *sv = tokeq(PL_lex_stuff);
2324 if (SvTYPE(sv) == SVt_PVIV) {
2325 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2327 const char * const p = SvPV_const(sv, len);
2328 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2332 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2333 PL_lex_stuff = NULL;
2334 /* Allow <FH> // "foo" */
2335 if (op_type == OP_READLINE)
2336 PL_expect = XTERMORDORDOR;
2339 else if (op_type == OP_BACKTICK && PL_lex_op) {
2340 /* readpipe() vas overriden */
2341 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2342 pl_yylval.opval = PL_lex_op;
2344 PL_lex_stuff = NULL;
2348 PL_sublex_info.super_state = PL_lex_state;
2349 PL_sublex_info.sub_inwhat = (U16)op_type;
2350 PL_sublex_info.sub_op = PL_lex_op;
2351 PL_lex_state = LEX_INTERPPUSH;
2355 pl_yylval.opval = PL_lex_op;
2365 * Create a new scope to save the lexing state. The scope will be
2366 * ended in S_sublex_done. Returns a '(', starting the function arguments
2367 * to the uc, lc, etc. found before.
2368 * Sets PL_lex_state to LEX_INTERPCONCAT.
2377 PL_lex_state = PL_sublex_info.super_state;
2378 SAVEBOOL(PL_lex_dojoin);
2379 SAVEI32(PL_lex_brackets);
2380 SAVEI32(PL_lex_casemods);
2381 SAVEI32(PL_lex_starts);
2382 SAVEI8(PL_lex_state);
2383 SAVEVPTR(PL_lex_inpat);
2384 SAVEI16(PL_lex_inwhat);
2385 SAVECOPLINE(PL_curcop);
2386 SAVEPPTR(PL_bufptr);
2387 SAVEPPTR(PL_bufend);
2388 SAVEPPTR(PL_oldbufptr);
2389 SAVEPPTR(PL_oldoldbufptr);
2390 SAVEPPTR(PL_last_lop);
2391 SAVEPPTR(PL_last_uni);
2392 SAVEPPTR(PL_linestart);
2393 SAVESPTR(PL_linestr);
2394 SAVEGENERICPV(PL_lex_brackstack);
2395 SAVEGENERICPV(PL_lex_casestack);
2397 PL_linestr = PL_lex_stuff;
2398 PL_lex_stuff = NULL;
2400 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2401 = SvPVX(PL_linestr);
2402 PL_bufend += SvCUR(PL_linestr);
2403 PL_last_lop = PL_last_uni = NULL;
2404 SAVEFREESV(PL_linestr);
2406 PL_lex_dojoin = FALSE;
2407 PL_lex_brackets = 0;
2408 Newx(PL_lex_brackstack, 120, char);
2409 Newx(PL_lex_casestack, 12, char);
2410 PL_lex_casemods = 0;
2411 *PL_lex_casestack = '\0';
2413 PL_lex_state = LEX_INTERPCONCAT;
2414 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2416 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2417 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2418 PL_lex_inpat = PL_sublex_info.sub_op;
2420 PL_lex_inpat = NULL;
2427 * Restores lexer state after a S_sublex_push.
2434 if (!PL_lex_starts++) {
2435 SV * const sv = newSVpvs("");
2436 if (SvUTF8(PL_linestr))
2438 PL_expect = XOPERATOR;
2439 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2443 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2444 PL_lex_state = LEX_INTERPCASEMOD;
2448 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2449 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2450 PL_linestr = PL_lex_repl;
2452 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2453 PL_bufend += SvCUR(PL_linestr);
2454 PL_last_lop = PL_last_uni = NULL;
2455 SAVEFREESV(PL_linestr);
2456 PL_lex_dojoin = FALSE;
2457 PL_lex_brackets = 0;
2458 PL_lex_casemods = 0;
2459 *PL_lex_casestack = '\0';
2461 if (SvEVALED(PL_lex_repl)) {
2462 PL_lex_state = LEX_INTERPNORMAL;
2464 /* we don't clear PL_lex_repl here, so that we can check later
2465 whether this is an evalled subst; that means we rely on the
2466 logic to ensure sublex_done() is called again only via the
2467 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2470 PL_lex_state = LEX_INTERPCONCAT;
2480 PL_endwhite = newSVpvs("");
2481 sv_catsv(PL_endwhite, PL_thiswhite);
2485 sv_setpvs(PL_thistoken,"");
2487 PL_realtokenstart = -1;
2491 PL_bufend = SvPVX(PL_linestr);
2492 PL_bufend += SvCUR(PL_linestr);
2493 PL_expect = XOPERATOR;
2494 PL_sublex_info.sub_inwhat = 0;
2502 Extracts a pattern, double-quoted string, or transliteration. This
2505 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2506 processing a pattern (PL_lex_inpat is true), a transliteration
2507 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2509 Returns a pointer to the character scanned up to. If this is
2510 advanced from the start pointer supplied (i.e. if anything was
2511 successfully parsed), will leave an OP for the substring scanned
2512 in pl_yylval. Caller must intuit reason for not parsing further
2513 by looking at the next characters herself.
2517 constants: \N{NAME} only
2518 case and quoting: \U \Q \E
2519 stops on @ and $, but not for $ as tail anchor
2521 In transliterations:
2522 characters are VERY literal, except for - not at the start or end
2523 of the string, which indicates a range. If the range is in bytes,
2524 scan_const expands the range to the full set of intermediate
2525 characters. If the range is in utf8, the hyphen is replaced with
2526 a certain range mark which will be handled by pmtrans() in op.c.
2528 In double-quoted strings:
2530 double-quoted style: \r and \n
2531 constants: \x31, etc.
2532 deprecated backrefs: \1 (in substitution replacements)
2533 case and quoting: \U \Q \E
2536 scan_const does *not* construct ops to handle interpolated strings.
2537 It stops processing as soon as it finds an embedded $ or @ variable
2538 and leaves it to the caller to work out what's going on.
2540 embedded arrays (whether in pattern or not) could be:
2541 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2543 $ in double-quoted strings must be the symbol of an embedded scalar.
2545 $ in pattern could be $foo or could be tail anchor. Assumption:
2546 it's a tail anchor if $ is the last thing in the string, or if it's
2547 followed by one of "()| \r\n\t"
2549 \1 (backreferences) are turned into $1
2551 The structure of the code is
2552 while (there's a character to process) {
2553 handle transliteration ranges
2554 skip regexp comments /(?#comment)/ and codes /(?{code})/
2555 skip #-initiated comments in //x patterns
2556 check for embedded arrays
2557 check for embedded scalars
2559 deprecate \1 in substitution replacements
2560 handle string-changing backslashes \l \U \Q \E, etc.
2561 switch (what was escaped) {
2562 handle \- in a transliteration (becomes a literal -)
2563 if a pattern and not \N{, go treat as regular character
2564 handle \132 (octal characters)
2565 handle \x15 and \x{1234} (hex characters)
2566 handle \N{name} (named characters, also \N{3,5} in a pattern)
2567 handle \cV (control characters)
2568 handle printf-style backslashes (\f, \r, \n, etc)
2571 } (end if backslash)
2572 handle regular character
2573 } (end while character to read)
2578 S_scan_const(pTHX_ char *start)
2581 register char *send = PL_bufend; /* end of the constant */
2582 SV *sv = newSV(send - start); /* sv for the constant. See
2583 note below on sizing. */
2584 register char *s = start; /* start of the constant */
2585 register char *d = SvPVX(sv); /* destination for copies */
2586 bool dorange = FALSE; /* are we in a translit range? */
2587 bool didrange = FALSE; /* did we just finish a range? */
2588 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
2589 I32 this_utf8 = UTF; /* Is the source string assumed
2590 to be UTF8? But, this can
2591 show as true when the source
2592 isn't utf8, as for example
2593 when it is entirely composed
2596 /* Note on sizing: The scanned constant is placed into sv, which is
2597 * initialized by newSV() assuming one byte of output for every byte of
2598 * input. This routine expects newSV() to allocate an extra byte for a
2599 * trailing NUL, which this routine will append if it gets to the end of
2600 * the input. There may be more bytes of input than output (eg., \N{LATIN
2601 * CAPITAL LETTER A}), or more output than input if the constant ends up
2602 * recoded to utf8, but each time a construct is found that might increase
2603 * the needed size, SvGROW() is called. Its size parameter each time is
2604 * based on the best guess estimate at the time, namely the length used so
2605 * far, plus the length the current construct will occupy, plus room for
2606 * the trailing NUL, plus one byte for every input byte still unscanned */
2610 UV literal_endpoint = 0;
2611 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2614 PERL_ARGS_ASSERT_SCAN_CONST;
2616 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2617 /* If we are doing a trans and we know we want UTF8 set expectation */
2618 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2619 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2623 while (s < send || dorange) {
2625 /* get transliterations out of the way (they're most literal) */
2626 if (PL_lex_inwhat == OP_TRANS) {
2627 /* expand a range A-Z to the full set of characters. AIE! */
2629 I32 i; /* current expanded character */
2630 I32 min; /* first character in range */
2631 I32 max; /* last character in range */
2642 char * const c = (char*)utf8_hop((U8*)d, -1);
2646 *c = (char)UTF_TO_NATIVE(0xff);
2647 /* mark the range as done, and continue */
2653 i = d - SvPVX_const(sv); /* remember current offset */
2656 SvLEN(sv) + (has_utf8 ?
2657 (512 - UTF_CONTINUATION_MARK +
2660 /* How many two-byte within 0..255: 128 in UTF-8,
2661 * 96 in UTF-8-mod. */
2663 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2665 d = SvPVX(sv) + i; /* refresh d after realloc */
2669 for (j = 0; j <= 1; j++) {
2670 char * const c = (char*)utf8_hop((U8*)d, -1);
2671 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2677 max = (U8)0xff; /* only to \xff */
2678 uvmax = uv; /* \x{100} to uvmax */
2680 d = c; /* eat endpoint chars */
2685 d -= 2; /* eat the first char and the - */
2686 min = (U8)*d; /* first char in range */
2687 max = (U8)d[1]; /* last char in range */
2694 "Invalid range \"%c-%c\" in transliteration operator",
2695 (char)min, (char)max);
2699 if (literal_endpoint == 2 &&
2700 ((isLOWER(min) && isLOWER(max)) ||
2701 (isUPPER(min) && isUPPER(max)))) {
2703 for (i = min; i <= max; i++)
2705 *d++ = NATIVE_TO_NEED(has_utf8,i);
2707 for (i = min; i <= max; i++)
2709 *d++ = NATIVE_TO_NEED(has_utf8,i);
2714 for (i = min; i <= max; i++)
2717 const U8 ch = (U8)NATIVE_TO_UTF(i);
2718 if (UNI_IS_INVARIANT(ch))
2721 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2722 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2731 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2733 *d++ = (char)UTF_TO_NATIVE(0xff);
2735 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2739 /* mark the range as done, and continue */
2743 literal_endpoint = 0;
2748 /* range begins (ignore - as first or last char) */
2749 else if (*s == '-' && s+1 < send && s != start) {
2751 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2758 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2768 literal_endpoint = 0;
2769 native_range = TRUE;
2774 /* if we get here, we're not doing a transliteration */
2776 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2777 except for the last char, which will be done separately. */
2778 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2780 while (s+1 < send && *s != ')')
2781 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2783 else if (s[2] == '{' /* This should match regcomp.c */
2784 || (s[2] == '?' && s[3] == '{'))
2787 char *regparse = s + (s[2] == '{' ? 3 : 4);
2790 while (count && (c = *regparse)) {
2791 if (c == '\\' && regparse[1])
2799 if (*regparse != ')')
2800 regparse--; /* Leave one char for continuation. */
2801 while (s < regparse)
2802 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2806 /* likewise skip #-initiated comments in //x patterns */
2807 else if (*s == '#' && PL_lex_inpat &&
2808 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2809 while (s+1 < send && *s != '\n')
2810 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2813 /* check for embedded arrays
2814 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2816 else if (*s == '@' && s[1]) {
2817 if (isALNUM_lazy_if(s+1,UTF))
2819 if (strchr(":'{$", s[1]))
2821 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2822 break; /* in regexp, neither @+ nor @- are interpolated */
2825 /* check for embedded scalars. only stop if we're sure it's a
2828 else if (*s == '$') {
2829 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2831 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2833 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2834 "Possible unintended interpolation of $\\ in regex");
2836 break; /* in regexp, $ might be tail anchor */
2840 /* End of else if chain - OP_TRANS rejoin rest */
2843 if (*s == '\\' && s+1 < send) {
2844 char* e; /* Can be used for ending '}', etc. */
2848 /* warn on \1 - \9 in substitution replacements, but note that \11
2849 * is an octal; and \19 is \1 followed by '9' */
2850 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2851 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2853 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2858 /* string-change backslash escapes */
2859 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2863 /* In a pattern, process \N, but skip any other backslash escapes.
2864 * This is because we don't want to translate an escape sequence
2865 * into a meta symbol and have the regex compiler use the meta
2866 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2867 * in spite of this, we do have to process \N here while the proper
2868 * charnames handler is in scope. See bugs #56444 and #62056.
2869 * There is a complication because \N in a pattern may also stand
2870 * for 'match a non-nl', and not mean a charname, in which case its
2871 * processing should be deferred to the regex compiler. To be a
2872 * charname it must be followed immediately by a '{', and not look
2873 * like \N followed by a curly quantifier, i.e., not something like
2874 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2876 else if (PL_lex_inpat
2879 || regcurly(s + 1)))
2881 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2882 goto default_action;
2887 /* quoted - in transliterations */
2889 if (PL_lex_inwhat == OP_TRANS) {
2896 if ((isALPHA(*s) || isDIGIT(*s)))
2897 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2898 "Unrecognized escape \\%c passed through",
2900 /* default action is to copy the quoted character */
2901 goto default_action;
2904 /* eg. \132 indicates the octal constant 0132 */
2905 case '0': case '1': case '2': case '3':
2906 case '4': case '5': case '6': case '7':
2910 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2913 goto NUM_ESCAPE_INSERT;
2915 /* eg. \o{24} indicates the octal constant \024 */
2921 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2927 goto NUM_ESCAPE_INSERT;
2930 /* eg. \x24 indicates the hex constant 0x24 */
2934 char* const e = strchr(s, '}');
2935 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2936 PERL_SCAN_DISALLOW_PREFIX;
2941 yyerror("Missing right brace on \\x{}");
2945 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2951 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2952 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2958 /* Insert oct or hex escaped character. There will always be
2959 * enough room in sv since such escapes will be longer than any
2960 * UTF-8 sequence they can end up as, except if they force us
2961 * to recode the rest of the string into utf8 */
2963 /* Here uv is the ordinal of the next character being added in
2964 * unicode (converted from native). */
2965 if (!UNI_IS_INVARIANT(uv)) {
2966 if (!has_utf8 && uv > 255) {
2967 /* Might need to recode whatever we have accumulated so
2968 * far if it contains any chars variant in utf8 or
2971 SvCUR_set(sv, d - SvPVX_const(sv));
2974 /* See Note on sizing above. */
2975 sv_utf8_upgrade_flags_grow(sv,
2976 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2977 UNISKIP(uv) + (STRLEN)(send - s) + 1);
2978 d = SvPVX(sv) + SvCUR(sv);
2983 d = (char*)uvuni_to_utf8((U8*)d, uv);
2984 if (PL_lex_inwhat == OP_TRANS &&
2985 PL_sublex_info.sub_op) {
2986 PL_sublex_info.sub_op->op_private |=
2987 (PL_lex_repl ? OPpTRANS_FROM_UTF
2991 if (uv > 255 && !dorange)
2992 native_range = FALSE;
3005 /* In a non-pattern \N must be a named character, like \N{LATIN
3006 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3007 * mean to match a non-newline. For non-patterns, named
3008 * characters are converted to their string equivalents. In
3009 * patterns, named characters are not converted to their
3010 * ultimate forms for the same reasons that other escapes
3011 * aren't. Instead, they are converted to the \N{U+...} form
3012 * to get the value from the charnames that is in effect right
3013 * now, while preserving the fact that it was a named character
3014 * so that the regex compiler knows this */
3016 /* This section of code doesn't generally use the
3017 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3018 * a close examination of this macro and determined it is a
3019 * no-op except on utfebcdic variant characters. Every
3020 * character generated by this that would normally need to be
3021 * enclosed by this macro is invariant, so the macro is not
3022 * needed, and would complicate use of copy(). There are other
3023 * parts of this file where the macro is used inconsistently,
3024 * but are saved by it being a no-op */
3026 /* The structure of this section of code (besides checking for
3027 * errors and upgrading to utf8) is:
3028 * Further disambiguate between the two meanings of \N, and if
3029 * not a charname, go process it elsewhere
3030 * If of form \N{U+...}, pass it through if a pattern;
3031 * otherwise convert to utf8
3032 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3033 * pattern; otherwise convert to utf8 */
3035 /* Here, s points to the 'N'; the test below is guaranteed to
3036 * succeed if we are being called on a pattern as we already
3037 * know from a test above that the next character is a '{'.
3038 * On a non-pattern \N must mean 'named sequence, which
3039 * requires braces */
3042 yyerror("Missing braces on \\N{}");
3047 /* If there is no matching '}', it is an error. */
3048 if (! (e = strchr(s, '}'))) {
3049 if (! PL_lex_inpat) {
3050 yyerror("Missing right brace on \\N{}");
3052 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3057 /* Here it looks like a named character */
3061 /* XXX This block is temporary code. \N{} implies that the
3062 * pattern is to have Unicode semantics, and therefore
3063 * currently has to be encoded in utf8. By putting it in
3064 * utf8 now, we save a whole pass in the regular expression
3065 * compiler. Once that code is changed so Unicode
3066 * semantics doesn't necessarily have to be in utf8, this
3067 * block should be removed */
3069 SvCUR_set(sv, d - SvPVX_const(sv));
3072 /* See Note on sizing above. */
3073 sv_utf8_upgrade_flags_grow(sv,
3074 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3075 /* 5 = '\N{' + cur char + NUL */
3076 (STRLEN)(send - s) + 5);
3077 d = SvPVX(sv) + SvCUR(sv);
3082 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3083 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3084 | PERL_SCAN_DISALLOW_PREFIX;
3087 /* For \N{U+...}, the '...' is a unicode value even on
3088 * EBCDIC machines */
3089 s += 2; /* Skip to next char after the 'U+' */
3091 uv = grok_hex(s, &len, &flags, NULL);
3092 if (len == 0 || len != (STRLEN)(e - s)) {
3093 yyerror("Invalid hexadecimal number in \\N{U+...}");
3100 /* Pass through to the regex compiler unchanged. The
3101 * reason we evaluated the number above is to make sure
3102 * there wasn't a syntax error. */
3103 s -= 5; /* Include the '\N{U+' */
3104 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3107 else { /* Not a pattern: convert the hex to string */
3109 /* If destination is not in utf8, unconditionally
3110 * recode it to be so. This is because \N{} implies
3111 * Unicode semantics, and scalars have to be in utf8
3112 * to guarantee those semantics */
3114 SvCUR_set(sv, d - SvPVX_const(sv));
3117 /* See Note on sizing above. */
3118 sv_utf8_upgrade_flags_grow(
3120 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3121 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3122 d = SvPVX(sv) + SvCUR(sv);
3126 /* Add the string to the output */
3127 if (UNI_IS_INVARIANT(uv)) {
3130 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3133 else { /* Here is \N{NAME} but not \N{U+...}. */
3135 SV *res; /* result from charnames */
3136 const char *str; /* the string in 'res' */
3137 STRLEN len; /* its length */
3139 /* Get the value for NAME */
3140 res = newSVpvn(s, e - s);
3141 res = new_constant( NULL, 0, "charnames",
3142 /* includes all of: \N{...} */
3143 res, NULL, s - 3, e - s + 4 );
3145 /* Most likely res will be in utf8 already since the
3146 * standard charnames uses pack U, but a custom translator
3147 * can leave it otherwise, so make sure. XXX This can be
3148 * revisited to not have charnames use utf8 for characters
3149 * that don't need it when regexes don't have to be in utf8
3150 * for Unicode semantics. If doing so, remember EBCDIC */
3151 sv_utf8_upgrade(res);
3152 str = SvPV_const(res, len);
3154 /* Don't accept malformed input */
3155 if (! is_utf8_string((U8 *) str, len)) {
3156 yyerror("Malformed UTF-8 returned by \\N");
3158 else if (PL_lex_inpat) {
3160 if (! len) { /* The name resolved to an empty string */
3161 Copy("\\N{}", d, 4, char);
3165 /* In order to not lose information for the regex
3166 * compiler, pass the result in the specially made
3167 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3168 * the code points in hex of each character
3169 * returned by charnames */
3171 const char *str_end = str + len;
3172 STRLEN char_length; /* cur char's byte length */
3173 STRLEN output_length; /* and the number of bytes
3174 after this is translated
3176 const STRLEN off = d - SvPVX_const(sv);
3178 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3179 * max('U+', '.'); and 1 for NUL */
3180 char hex_string[2 * UTF8_MAXBYTES + 5];
3182 /* Get the first character of the result. */
3183 U32 uv = utf8n_to_uvuni((U8 *) str,
3188 /* The call to is_utf8_string() above hopefully
3189 * guarantees that there won't be an error. But
3190 * it's easy here to make sure. The function just
3191 * above warns and returns 0 if invalid utf8, but
3192 * it can also return 0 if the input is validly a
3193 * NUL. Disambiguate */
3194 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3195 uv = UNICODE_REPLACEMENT;
3198 /* Convert first code point to hex, including the
3199 * boiler plate before it */
3200 sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3201 output_length = strlen(hex_string);
3203 /* Make sure there is enough space to hold it */
3204 d = off + SvGROW(sv, off
3206 + (STRLEN)(send - e)
3207 + 2); /* '}' + NUL */
3209 Copy(hex_string, d, output_length, char);
3212 /* For each subsequent character, append dot and
3213 * its ordinal in hex */
3214 while ((str += char_length) < str_end) {
3215 const STRLEN off = d - SvPVX_const(sv);
3216 U32 uv = utf8n_to_uvuni((U8 *) str,
3220 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3221 uv = UNICODE_REPLACEMENT;
3224 sprintf(hex_string, ".%X", (unsigned int) uv);
3225 output_length = strlen(hex_string);
3227 d = off + SvGROW(sv, off
3229 + (STRLEN)(send - e)
3230 + 2); /* '}' + NUL */
3231 Copy(hex_string, d, output_length, char);
3235 *d++ = '}'; /* Done. Add the trailing brace */
3238 else { /* Here, not in a pattern. Convert the name to a
3241 /* If destination is not in utf8, unconditionally
3242 * recode it to be so. This is because \N{} implies
3243 * Unicode semantics, and scalars have to be in utf8
3244 * to guarantee those semantics */
3246 SvCUR_set(sv, d - SvPVX_const(sv));
3249 /* See Note on sizing above. */
3250 sv_utf8_upgrade_flags_grow(sv,
3251 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3252 len + (STRLEN)(send - s) + 1);
3253 d = SvPVX(sv) + SvCUR(sv);
3255 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3257 /* See Note on sizing above. (NOTE: SvCUR() is not
3258 * set correctly here). */
3259 const STRLEN off = d - SvPVX_const(sv);
3260 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3262 Copy(str, d, len, char);
3267 /* Deprecate non-approved name syntax */
3268 if (ckWARN_d(WARN_DEPRECATED)) {
3269 bool problematic = FALSE;
3272 /* For non-ut8 input, look to see that the first
3273 * character is an alpha, then loop through the rest
3274 * checking that each is a continuation */
3276 if (! isALPHAU(*i)) problematic = TRUE;
3277 else for (i = s + 1; i < e; i++) {
3278 if (isCHARNAME_CONT(*i)) continue;
3284 /* Similarly for utf8. For invariants can check
3285 * directly. We accept anything above the latin1
3286 * range because it is immaterial to Perl if it is
3287 * correct or not, and is expensive to check. But
3288 * it is fairly easy in the latin1 range to convert
3289 * the variants into a single character and check
3291 if (UTF8_IS_INVARIANT(*i)) {
3292 if (! isALPHAU(*i)) problematic = TRUE;
3293 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3294 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3300 if (! problematic) for (i = s + UTF8SKIP(s);
3304 if (UTF8_IS_INVARIANT(*i)) {
3305 if (isCHARNAME_CONT(*i)) continue;
3306 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3308 } else if (isCHARNAME_CONT(
3310 UTF8_ACCUMULATE(*i, *(i+1)))))
3319 /* The e-i passed to the final %.*s makes sure that
3320 * should the trailing NUL be missing that this
3321 * print won't run off the end of the string */
3322 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3323 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3324 (int)(i - s + 1), s, (int)(e - i), i + 1);
3327 } /* End \N{NAME} */
3330 native_range = FALSE; /* \N{} is defined to be Unicode */
3332 s = e + 1; /* Point to just after the '}' */
3335 /* \c is a control character */
3339 *d++ = grok_bslash_c(*s++, 1);
3342 yyerror("Missing control char name in \\c");
3346 /* printf-style backslashes, formfeeds, newlines, etc */
3348 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3351 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3354 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3357 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3360 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3363 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3366 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3372 } /* end if (backslash) */
3379 /* If we started with encoded form, or already know we want it,
3380 then encode the next character */
3381 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3385 /* One might think that it is wasted effort in the case of the
3386 * source being utf8 (this_utf8 == TRUE) to take the next character
3387 * in the source, convert it to an unsigned value, and then convert
3388 * it back again. But the source has not been validated here. The
3389 * routine that does the conversion checks for errors like
3392 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3393 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3395 SvCUR_set(sv, d - SvPVX_const(sv));
3398 /* See Note on sizing above. */
3399 sv_utf8_upgrade_flags_grow(sv,
3400 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3401 need + (STRLEN)(send - s) + 1);
3402 d = SvPVX(sv) + SvCUR(sv);
3404 } else if (need > len) {
3405 /* encoded value larger than old, may need extra space (NOTE:
3406 * SvCUR() is not set correctly here). See Note on sizing
3408 const STRLEN off = d - SvPVX_const(sv);
3409 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3413 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3415 if (uv > 255 && !dorange)
3416 native_range = FALSE;
3420 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3422 } /* while loop to process each character */
3424 /* terminate the string and set up the sv */
3426 SvCUR_set(sv, d - SvPVX_const(sv));
3427 if (SvCUR(sv) >= SvLEN(sv))
3428 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3431 if (PL_encoding && !has_utf8) {
3432 sv_recode_to_utf8(sv, PL_encoding);
3438 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3439 PL_sublex_info.sub_op->op_private |=
3440 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3444 /* shrink the sv if we allocated more than we used */
3445 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3446 SvPV_shrink_to_cur(sv);
3449 /* return the substring (via pl_yylval) only if we parsed anything */
3450 if (s > PL_bufptr) {
3451 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3452 const char *const key = PL_lex_inpat ? "qr" : "q";
3453 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3457 if (PL_lex_inwhat == OP_TRANS) {
3460 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3468 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3471 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3478 * Returns TRUE if there's more to the expression (e.g., a subscript),
3481 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3483 * ->[ and ->{ return TRUE
3484 * { and [ outside a pattern are always subscripts, so return TRUE
3485 * if we're outside a pattern and it's not { or [, then return FALSE
3486 * if we're in a pattern and the first char is a {
3487 * {4,5} (any digits around the comma) returns FALSE
3488 * if we're in a pattern and the first char is a [
3490 * [SOMETHING] has a funky algorithm to decide whether it's a
3491 * character class or not. It has to deal with things like
3492 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3493 * anything else returns TRUE
3496 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3499 S_intuit_more(pTHX_ register char *s)
3503 PERL_ARGS_ASSERT_INTUIT_MORE;
3505 if (PL_lex_brackets)
3507 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3509 if (*s != '{' && *s != '[')
3514 /* In a pattern, so maybe we have {n,m}. */
3522 /* On the other hand, maybe we have a character class */
3525 if (*s == ']' || *s == '^')
3528 /* this is terrifying, and it works */
3529 int weight = 2; /* let's weigh the evidence */
3531 unsigned char un_char = 255, last_un_char;
3532 const char * const send = strchr(s,']');
3533 char tmpbuf[sizeof PL_tokenbuf * 4];
3535 if (!send) /* has to be an expression */
3538 Zero(seen,256,char);
3541 else if (isDIGIT(*s)) {
3543 if (isDIGIT(s[1]) && s[2] == ']')
3549 for (; s < send; s++) {
3550 last_un_char = un_char;
3551 un_char = (unsigned char)*s;
3556 weight -= seen[un_char] * 10;
3557 if (isALNUM_lazy_if(s+1,UTF)) {
3559 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3560 len = (int)strlen(tmpbuf);
3561 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3566 else if (*s == '$' && s[1] &&
3567 strchr("[#!%*<>()-=",s[1])) {
3568 if (/*{*/ strchr("])} =",s[2]))
3577 if (strchr("wds]",s[1]))
3579 else if (seen[(U8)'\''] || seen[(U8)'"'])
3581 else if (strchr("rnftbxcav",s[1]))
3583 else if (isDIGIT(s[1])) {
3585 while (s[1] && isDIGIT(s[1]))
3595 if (strchr("aA01! ",last_un_char))
3597 if (strchr("zZ79~",s[1]))
3599 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3600 weight -= 5; /* cope with negative subscript */
3603 if (!isALNUM(last_un_char)
3604 && !(last_un_char == '$' || last_un_char == '@'
3605 || last_un_char == '&')
3606 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3611 if (keyword(tmpbuf, d - tmpbuf, 0))
3614 if (un_char == last_un_char + 1)
3616 weight -= seen[un_char];
3621 if (weight >= 0) /* probably a character class */
3631 * Does all the checking to disambiguate
3633 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3634 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3636 * First argument is the stuff after the first token, e.g. "bar".
3638 * Not a method if bar is a filehandle.
3639 * Not a method if foo is a subroutine prototyped to take a filehandle.
3640 * Not a method if it's really "Foo $bar"
3641 * Method if it's "foo $bar"
3642 * Not a method if it's really "print foo $bar"
3643 * Method if it's really "foo package::" (interpreted as package->foo)
3644 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3645 * Not a method if bar is a filehandle or package, but is quoted with
3650 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3653 char *s = start + (*start == '$');
3654 char tmpbuf[sizeof PL_tokenbuf];
3661 PERL_ARGS_ASSERT_INTUIT_METHOD;
3664 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3668 const char *proto = SvPVX_const(cv);
3679 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3680 /* start is the beginning of the possible filehandle/object,
3681 * and s is the end of it
3682 * tmpbuf is a copy of it
3685 if (*start == '$') {
3686 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3687 isUPPER(*PL_tokenbuf))
3690 len = start - SvPVX(PL_linestr);
3694 start = SvPVX(PL_linestr) + len;
3698 return *s == '(' ? FUNCMETH : METHOD;
3700 if (!keyword(tmpbuf, len, 0)) {
3701 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3705 soff = s - SvPVX(PL_linestr);
3709 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3710 if (indirgv && GvCVu(indirgv))
3712 /* filehandle or package name makes it a method */
3713 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3715 soff = s - SvPVX(PL_linestr);
3718 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3719 return 0; /* no assumptions -- "=>" quotes bearword */
3721 start_force(PL_curforce);
3722 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3723 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3724 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3726 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3731 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3733 return *s == '(' ? FUNCMETH : METHOD;
3739 /* Encoded script support. filter_add() effectively inserts a
3740 * 'pre-processing' function into the current source input stream.
3741 * Note that the filter function only applies to the current source file
3742 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3744 * The datasv parameter (which may be NULL) can be used to pass
3745 * private data to this instance of the filter. The filter function
3746 * can recover the SV using the FILTER_DATA macro and use it to
3747 * store private buffers and state information.
3749 * The supplied datasv parameter is upgraded to a PVIO type
3750 * and the IoDIRP/IoANY field is used to store the function pointer,
3751 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3752 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3753 * private use must be set using malloc'd pointers.
3757 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3766 if (!PL_rsfp_filters)
3767 PL_rsfp_filters = newAV();
3770 SvUPGRADE(datasv, SVt_PVIO);
3771 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3772 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3773 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3774 FPTR2DPTR(void *, IoANY(datasv)),
3775 SvPV_nolen(datasv)));
3776 av_unshift(PL_rsfp_filters, 1);
3777 av_store(PL_rsfp_filters, 0, datasv) ;
3782 /* Delete most recently added instance of this filter function. */
3784 Perl_filter_del(pTHX_ filter_t funcp)
3789 PERL_ARGS_ASSERT_FILTER_DEL;
3792 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3793 FPTR2DPTR(void*, funcp)));
3795 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3797 /* if filter is on top of stack (usual case) just pop it off */
3798 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3799 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3800 sv_free(av_pop(PL_rsfp_filters));
3804 /* we need to search for the correct entry and clear it */
3805 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3809 /* Invoke the idxth filter function for the current rsfp. */
3810 /* maxlen 0 = read one text line */
3812 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3817 /* This API is bad. It should have been using unsigned int for maxlen.
3818 Not sure if we want to change the API, but if not we should sanity
3819 check the value here. */
3820 const unsigned int correct_length
3829 PERL_ARGS_ASSERT_FILTER_READ;
3831 if (!PL_parser || !PL_rsfp_filters)
3833 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
3834 /* Provide a default input filter to make life easy. */
3835 /* Note that we append to the line. This is handy. */
3836 DEBUG_P(PerlIO_printf(Perl_debug_log,
3837 "filter_read %d: from rsfp\n", idx));
3838 if (correct_length) {
3841 const int old_len = SvCUR(buf_sv);
3843 /* ensure buf_sv is large enough */
3844 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3845 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3846 correct_length)) <= 0) {
3847 if (PerlIO_error(PL_rsfp))
3848 return -1; /* error */
3850 return 0 ; /* end of file */
3852 SvCUR_set(buf_sv, old_len + len) ;
3853 SvPVX(buf_sv)[old_len + len] = '\0';
3856 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3857 if (PerlIO_error(PL_rsfp))
3858 return -1; /* error */
3860 return 0 ; /* end of file */
3863 return SvCUR(buf_sv);
3865 /* Skip this filter slot if filter has been deleted */
3866 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3867 DEBUG_P(PerlIO_printf(Perl_debug_log,
3868 "filter_read %d: skipped (filter deleted)\n",
3870 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3872 /* Get function pointer hidden within datasv */
3873 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3874 DEBUG_P(PerlIO_printf(Perl_debug_log,
3875 "filter_read %d: via function %p (%s)\n",
3876 idx, (void*)datasv, SvPV_nolen_const(datasv)));
3877 /* Call function. The function is expected to */
3878 /* call "FILTER_READ(idx+1, buf_sv)" first. */
3879 /* Return: <0:error, =0:eof, >0:not eof */
3880 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3884 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3888 PERL_ARGS_ASSERT_FILTER_GETS;
3890 #ifdef PERL_CR_FILTER
3891 if (!PL_rsfp_filters) {
3892 filter_add(S_cr_textfilter,NULL);
3895 if (PL_rsfp_filters) {
3897 SvCUR_set(sv, 0); /* start with empty line */
3898 if (FILTER_READ(0, sv, 0) > 0)
3899 return ( SvPVX(sv) ) ;
3904 return (sv_gets(sv, PL_rsfp, append));
3908 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3913 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3915 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3919 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3920 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3922 return GvHV(gv); /* Foo:: */
3925 /* use constant CLASS => 'MyClass' */
3926 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3927 if (gv && GvCV(gv)) {
3928 SV * const sv = cv_const_sv(GvCV(gv));
3930 pkgname = SvPV_const(sv, len);
3933 return gv_stashpvn(pkgname, len, 0);
3937 * S_readpipe_override
3938 * Check whether readpipe() is overriden, and generates the appropriate
3939 * optree, provided sublex_start() is called afterwards.
3942 S_readpipe_override(pTHX)
3945 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3946 pl_yylval.ival = OP_BACKTICK;
3948 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3950 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3951 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3952 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3954 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3955 append_elem(OP_LIST,
3956 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3957 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3964 * The intent of this yylex wrapper is to minimize the changes to the
3965 * tokener when we aren't interested in collecting madprops. It remains
3966 * to be seen how successful this strategy will be...
3973 char *s = PL_bufptr;
3975 /* make sure PL_thiswhite is initialized */
3979 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3980 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
3981 return S_pending_ident(aTHX);
3983 /* previous token ate up our whitespace? */
3984 if (!PL_lasttoke && PL_nextwhite) {
3985 PL_thiswhite = PL_nextwhite;
3989 /* isolate the token, and figure out where it is without whitespace */
3990 PL_realtokenstart = -1;
3994 assert(PL_curforce < 0);
3996 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3997 if (!PL_thistoken) {
3998 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3999 PL_thistoken = newSVpvs("");
4001 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4002 PL_thistoken = newSVpvn(tstart, s - tstart);
4005 if (PL_thismad) /* install head */
4006 CURMAD('X', PL_thistoken);
4009 /* last whitespace of a sublex? */
4010 if (optype == ')' && PL_endwhite) {
4011 CURMAD('X', PL_endwhite);
4016 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4017 if (!PL_thiswhite && !PL_endwhite && !optype) {
4018 sv_free(PL_thistoken);
4023 /* put off final whitespace till peg */
4024 if (optype == ';' && !PL_rsfp) {
4025 PL_nextwhite = PL_thiswhite;
4028 else if (PL_thisopen) {
4029 CURMAD('q', PL_thisopen);
4031 sv_free(PL_thistoken);
4035 /* Store actual token text as madprop X */
4036 CURMAD('X', PL_thistoken);
4040 /* add preceding whitespace as madprop _ */
4041 CURMAD('_', PL_thiswhite);
4045 /* add quoted material as madprop = */
4046 CURMAD('=', PL_thisstuff);
4050 /* add terminating quote as madprop Q */
4051 CURMAD('Q', PL_thisclose);
4055 /* special processing based on optype */
4059 /* opval doesn't need a TOKEN since it can already store mp */
4069 if (pl_yylval.opval)
4070 append_madprops(PL_thismad, pl_yylval.opval, 0);
4078 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4087 /* remember any fake bracket that lexer is about to discard */
4088 if (PL_lex_brackets == 1 &&
4089 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4092 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4095 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4096 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4099 break; /* don't bother looking for trailing comment */
4108 /* attach a trailing comment to its statement instead of next token */
4112 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4114 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4116 if (*s == '\n' || *s == '#') {
4117 while (s < PL_bufend && *s != '\n')
4121 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4122 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4139 /* Create new token struct. Note: opvals return early above. */
4140 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4147 S_tokenize_use(pTHX_ int is_use, char *s) {
4150 PERL_ARGS_ASSERT_TOKENIZE_USE;
4152 if (PL_expect != XSTATE)
4153 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4154 is_use ? "use" : "no"));
4156 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4157 s = force_version(s, TRUE);
4158 if (*s == ';' || *s == '}'
4159 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4160 start_force(PL_curforce);
4161 NEXTVAL_NEXTTOKE.opval = NULL;
4164 else if (*s == 'v') {
4165 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4166 s = force_version(s, FALSE);
4170 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4171 s = force_version(s, FALSE);
4173 pl_yylval.ival = is_use;
4177 static const char* const exp_name[] =
4178 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4179 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4186 Works out what to call the token just pulled out of the input
4187 stream. The yacc parser takes care of taking the ops we return and
4188 stitching them into a tree.
4194 if read an identifier
4195 if we're in a my declaration
4196 croak if they tried to say my($foo::bar)
4197 build the ops for a my() declaration
4198 if it's an access to a my() variable
4199 are we in a sort block?
4200 croak if my($a); $a <=> $b
4201 build ops for access to a my() variable
4202 if in a dq string, and they've said @foo and we can't find @foo
4204 build ops for a bareword
4205 if we already built the token before, use it.
4210 #pragma segment Perl_yylex
4216 register char *s = PL_bufptr;
4222 /* orig_keyword, gvp, and gv are initialized here because
4223 * jump to the label just_a_word_zero can bypass their
4224 * initialization later. */
4225 I32 orig_keyword = 0;
4230 SV* tmp = newSVpvs("");
4231 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4232 (IV)CopLINE(PL_curcop),
4233 lex_state_names[PL_lex_state],
4234 exp_name[PL_expect],
4235 pv_display(tmp, s, strlen(s), 0, 60));
4238 /* check if there's an identifier for us to look at */
4239 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4240 return REPORT(S_pending_ident(aTHX));
4242 /* no identifier pending identification */
4244 switch (PL_lex_state) {
4246 case LEX_NORMAL: /* Some compilers will produce faster */
4247 case LEX_INTERPNORMAL: /* code if we comment these out. */
4251 /* when we've already built the next token, just pull it out of the queue */
4255 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4257 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4258 PL_nexttoke[PL_lasttoke].next_mad = 0;
4259 if (PL_thismad && PL_thismad->mad_key == '_') {
4260 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4261 PL_thismad->mad_val = 0;
4262 mad_free(PL_thismad);
4267 PL_lex_state = PL_lex_defer;
4268 PL_expect = PL_lex_expect;
4269 PL_lex_defer = LEX_NORMAL;
4270 if (!PL_nexttoke[PL_lasttoke].next_type)
4275 pl_yylval = PL_nextval[PL_nexttoke];
4277 PL_lex_state = PL_lex_defer;
4278 PL_expect = PL_lex_expect;
4279 PL_lex_defer = LEX_NORMAL;
4283 /* FIXME - can these be merged? */
4284 return(PL_nexttoke[PL_lasttoke].next_type);
4286 return REPORT(PL_nexttype[PL_nexttoke]);
4289 /* interpolated case modifiers like \L \U, including \Q and \E.
4290 when we get here, PL_bufptr is at the \
4292 case LEX_INTERPCASEMOD:
4294 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4295 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4297 /* handle \E or end of string */
4298 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4300 if (PL_lex_casemods) {
4301 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4302 PL_lex_casestack[PL_lex_casemods] = '\0';
4304 if (PL_bufptr != PL_bufend
4305 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4307 PL_lex_state = LEX_INTERPCONCAT;
4310 PL_thistoken = newSVpvs("\\E");
4316 while (PL_bufptr != PL_bufend &&
4317 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4319 PL_thiswhite = newSVpvs("");
4320 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4324 if (PL_bufptr != PL_bufend)
4327 PL_lex_state = LEX_INTERPCONCAT;
4331 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4332 "### Saw case modifier\n"); });
4334 if (s[1] == '\\' && s[2] == 'E') {
4337 PL_thiswhite = newSVpvs("");
4338 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4341 PL_lex_state = LEX_INTERPCONCAT;
4346 if (!PL_madskills) /* when just compiling don't need correct */
4347 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4348 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4349 if ((*s == 'L' || *s == 'U') &&
4350 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4351 PL_lex_casestack[--PL_lex_casemods] = '\0';
4354 if (PL_lex_casemods > 10)
4355 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4356 PL_lex_casestack[PL_lex_casemods++] = *s;
4357 PL_lex_casestack[PL_lex_casemods] = '\0';
4358 PL_lex_state = LEX_INTERPCONCAT;
4359 start_force(PL_curforce);
4360 NEXTVAL_NEXTTOKE.ival = 0;
4362 start_force(PL_curforce);
4364 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4366 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4368 NEXTVAL_NEXTTOKE.ival = OP_LC;
4370 NEXTVAL_NEXTTOKE.ival = OP_UC;
4372 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4374 Perl_croak(aTHX_ "panic: yylex");
4376 SV* const tmpsv = newSVpvs("\\ ");
4377 /* replace the space with the character we want to escape
4379 SvPVX(tmpsv)[1] = *s;
4385 if (PL_lex_starts) {
4391 sv_free(PL_thistoken);
4392 PL_thistoken = newSVpvs("");
4395 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4396 if (PL_lex_casemods == 1 && PL_lex_inpat)
4405 case LEX_INTERPPUSH:
4406 return REPORT(sublex_push());
4408 case LEX_INTERPSTART:
4409 if (PL_bufptr == PL_bufend)
4410 return REPORT(sublex_done());
4411 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4412 "### Interpolated variable\n"); });
4414 PL_lex_dojoin = (*PL_bufptr == '@');
4415 PL_lex_state = LEX_INTERPNORMAL;
4416 if (PL_lex_dojoin) {
4417 start_force(PL_curforce);
4418 NEXTVAL_NEXTTOKE.ival = 0;
4420 start_force(PL_curforce);
4421 force_ident("\"", '$');
4422 start_force(PL_curforce);
4423 NEXTVAL_NEXTTOKE.ival = 0;
4425 start_force(PL_curforce);
4426 NEXTVAL_NEXTTOKE.ival = 0;
4428 start_force(PL_curforce);
4429 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4432 if (PL_lex_starts++) {
4437 sv_free(PL_thistoken);
4438 PL_thistoken = newSVpvs("");
4441 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4442 if (!PL_lex_casemods && PL_lex_inpat)
4449 case LEX_INTERPENDMAYBE:
4450 if (intuit_more(PL_bufptr)) {
4451 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4457 if (PL_lex_dojoin) {
4458 PL_lex_dojoin = FALSE;
4459 PL_lex_state = LEX_INTERPCONCAT;
4463 sv_free(PL_thistoken);
4464 PL_thistoken = newSVpvs("");
4469 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4470 && SvEVALED(PL_lex_repl))
4472 if (PL_bufptr != PL_bufend)
4473 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4477 case LEX_INTERPCONCAT:
4479 if (PL_lex_brackets)
4480 Perl_croak(aTHX_ "panic: INTERPCONCAT");
4482 if (PL_bufptr == PL_bufend)
4483 return REPORT(sublex_done());
4485 if (SvIVX(PL_linestr) == '\'') {
4486 SV *sv = newSVsv(PL_linestr);
4489 else if ( PL_hints & HINT_NEW_RE )
4490 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4491 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4495 s = scan_const(PL_bufptr);
4497 PL_lex_state = LEX_INTERPCASEMOD;
4499 PL_lex_state = LEX_INTERPSTART;
4502 if (s != PL_bufptr) {
4503 start_force(PL_curforce);
4505 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4507 NEXTVAL_NEXTTOKE = pl_yylval;
4510 if (PL_lex_starts++) {
4514 sv_free(PL_thistoken);
4515 PL_thistoken = newSVpvs("");
4518 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4519 if (!PL_lex_casemods && PL_lex_inpat)
4532 PL_lex_state = LEX_NORMAL;
4533 s = scan_formline(PL_bufptr);
4534 if (!PL_lex_formbrack)
4540 PL_oldoldbufptr = PL_oldbufptr;
4546 sv_free(PL_thistoken);
4549 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
4553 if (isIDFIRST_lazy_if(s,UTF))
4556 unsigned char c = *s;
4557 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4558 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4559 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4564 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4568 goto fake_eof; /* emulate EOF on ^D or ^Z */
4577 if (PL_lex_brackets) {
4578 yyerror((const char *)
4580 ? "Format not terminated"
4581 : "Missing right curly or square bracket"));
4583 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4584 "### Tokener got EOF\n");
4588 if (s++ < PL_bufend)
4589 goto retry; /* ignore stray nulls */
4592 if (!PL_in_eval && !PL_preambled) {
4593 PL_preambled = TRUE;
4599 /* Generate a string of Perl code to load the debugger.
4600 * If PERL5DB is set, it will return the contents of that,
4601 * otherwise a compile-time require of perl5db.pl. */
4603 const char * const pdb = PerlEnv_getenv("PERL5DB");
4606 sv_setpv(PL_linestr, pdb);
4607 sv_catpvs(PL_linestr,";");
4609 SETERRNO(0,SS_NORMAL);
4610 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4613 sv_setpvs(PL_linestr,"");
4614 if (PL_preambleav) {
4615 SV **svp = AvARRAY(PL_preambleav);
4616 SV **const end = svp + AvFILLp(PL_preambleav);
4618 sv_catsv(PL_linestr, *svp);
4620 sv_catpvs(PL_linestr, ";");
4622 sv_free(MUTABLE_SV(PL_preambleav));
4623 PL_preambleav = NULL;
4626 sv_catpvs(PL_linestr,
4627 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4628 if (PL_minus_n || PL_minus_p) {
4629 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4631 sv_catpvs(PL_linestr,"chomp;");
4634 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4635 || *PL_splitstr == '"')
4636 && strchr(PL_splitstr + 1, *PL_splitstr))
4637 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4639 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4640 bytes can be used as quoting characters. :-) */
4641 const char *splits = PL_splitstr;
4642 sv_catpvs(PL_linestr, "our @F=split(q\0");
4645 if (*splits == '\\')
4646 sv_catpvn(PL_linestr, splits, 1);
4647 sv_catpvn(PL_linestr, splits, 1);
4648 } while (*splits++);
4649 /* This loop will embed the trailing NUL of
4650 PL_linestr as the last thing it does before
4652 sv_catpvs(PL_linestr, ");");
4656 sv_catpvs(PL_linestr,"our @F=split(' ');");
4659 sv_catpvs(PL_linestr, "\n");
4660 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4661 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4662 PL_last_lop = PL_last_uni = NULL;
4663 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4664 update_debugger_info(PL_linestr, NULL, 0);
4669 bof = PL_rsfp ? TRUE : FALSE;
4672 fake_eof = LEX_FAKE_EOF;
4674 PL_bufptr = PL_bufend;
4675 CopLINE_inc(PL_curcop);
4676 if (!lex_next_chunk(fake_eof)) {
4677 CopLINE_dec(PL_curcop);
4679 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4681 CopLINE_dec(PL_curcop);
4684 PL_realtokenstart = -1;
4687 /* If it looks like the start of a BOM or raw UTF-16,
4688 * check if it in fact is. */
4689 if (bof && PL_rsfp &&
4694 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4696 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4697 s = swallow_bom((U8*)s);
4701 /* Incest with pod. */
4704 sv_catsv(PL_thiswhite, PL_linestr);
4706 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4707 sv_setpvs(PL_linestr, "");
4708 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4709 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4710 PL_last_lop = PL_last_uni = NULL;
4711 PL_doextract = FALSE;
4716 } while (PL_doextract);
4717 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4718 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4719 PL_last_lop = PL_last_uni = NULL;
4720 if (CopLINE(PL_curcop) == 1) {
4721 while (s < PL_bufend && isSPACE(*s))
4723 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4727 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4731 if (*s == '#' && *(s+1) == '!')
4733 #ifdef ALTERNATE_SHEBANG
4735 static char const as[] = ALTERNATE_SHEBANG;
4736 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4737 d = s + (sizeof(as) - 1);
4739 #endif /* ALTERNATE_SHEBANG */
4748 while (*d && !isSPACE(*d))
4752 #ifdef ARG_ZERO_IS_SCRIPT
4753 if (ipathend > ipath) {
4755 * HP-UX (at least) sets argv[0] to the script name,
4756 * which makes $^X incorrect. And Digital UNIX and Linux,
4757 * at least, set argv[0] to the basename of the Perl
4758 * interpreter. So, having found "#!", we'll set it right.
4760 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4762 assert(SvPOK(x) || SvGMAGICAL(x));
4763 if (sv_eq(x, CopFILESV(PL_curcop))) {
4764 sv_setpvn(x, ipath, ipathend - ipath);
4770 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4771 const char * const lstart = SvPV_const(x,llen);
4773 bstart += blen - llen;
4774 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4775 sv_setpvn(x, ipath, ipathend - ipath);
4780 TAINT_NOT; /* $^X is always tainted, but that's OK */
4782 #endif /* ARG_ZERO_IS_SCRIPT */
4787 d = instr(s,"perl -");
4789 d = instr(s,"perl");
4791 /* avoid getting into infinite loops when shebang
4792 * line contains "Perl" rather than "perl" */
4794 for (d = ipathend-4; d >= ipath; --d) {
4795 if ((*d == 'p' || *d == 'P')
4796 && !ibcmp(d, "perl", 4))
4806 #ifdef ALTERNATE_SHEBANG
4808 * If the ALTERNATE_SHEBANG on this system starts with a
4809 * character that can be part of a Perl expression, then if
4810 * we see it but not "perl", we're probably looking at the
4811 * start of Perl code, not a request to hand off to some
4812 * other interpreter. Similarly, if "perl" is there, but
4813 * not in the first 'word' of the line, we assume the line
4814 * contains the start of the Perl program.
4816 if (d && *s != '#') {
4817 const char *c = ipath;
4818 while (*c && !strchr("; \t\r\n\f\v#", *c))
4821 d = NULL; /* "perl" not in first word; ignore */
4823 *s = '#'; /* Don't try to parse shebang line */
4825 #endif /* ALTERNATE_SHEBANG */
4830 !instr(s,"indir") &&
4831 instr(PL_origargv[0],"perl"))
4838 while (s < PL_bufend && isSPACE(*s))
4840 if (s < PL_bufend) {
4841 Newx(newargv,PL_origargc+3,char*);
4843 while (s < PL_bufend && !isSPACE(*s))
4846 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4849 newargv = PL_origargv;
4852 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4854 Perl_croak(aTHX_ "Can't exec %s", ipath);
4857 while (*d && !isSPACE(*d))
4859 while (SPACE_OR_TAB(*d))
4863 const bool switches_done = PL_doswitches;
4864 const U32 oldpdb = PL_perldb;
4865 const bool oldn = PL_minus_n;
4866 const bool oldp = PL_minus_p;
4870 bool baduni = FALSE;
4872 const char *d2 = d1 + 1;
4873 if (parse_unicode_opts((const char **)&d2)
4877 if (baduni || *d1 == 'M' || *d1 == 'm') {
4878 const char * const m = d1;
4879 while (*d1 && !isSPACE(*d1))
4881 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4884 d1 = moreswitches(d1);
4886 if (PL_doswitches && !switches_done) {
4887 int argc = PL_origargc;
4888 char **argv = PL_origargv;
4891 } while (argc && argv[0][0] == '-' && argv[0][1]);
4892 init_argv_symbols(argc,argv);
4894 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4895 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4896 /* if we have already added "LINE: while (<>) {",
4897 we must not do it again */
4899 sv_setpvs(PL_linestr, "");
4900 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4901 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4902 PL_last_lop = PL_last_uni = NULL;
4903 PL_preambled = FALSE;
4904 if (PERLDB_LINE || PERLDB_SAVESRC)
4905 (void)gv_fetchfile(PL_origfilename);
4912 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4914 PL_lex_state = LEX_FORMLINE;
4919 #ifdef PERL_STRICT_CR
4920 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4922 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4924 case ' ': case '\t': case '\f': case 013:
4926 PL_realtokenstart = -1;
4928 PL_thiswhite = newSVpvs("");
4929 sv_catpvn(PL_thiswhite, s, 1);
4936 PL_realtokenstart = -1;
4940 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4941 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4942 /* handle eval qq[#line 1 "foo"\n ...] */
4943 CopLINE_dec(PL_curcop);
4946 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4948 if (!PL_in_eval || PL_rsfp)
4953 while (d < PL_bufend && *d != '\n')
4957 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4958 Perl_croak(aTHX_ "panic: input overflow");
4961 PL_thiswhite = newSVpvn(s, d - s);
4966 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4968 PL_lex_state = LEX_FORMLINE;
4974 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4975 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4978 TOKEN(PEG); /* make sure any #! line is accessible */
4983 /* if (PL_madskills && PL_lex_formbrack) { */
4985 while (d < PL_bufend && *d != '\n')
4989 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4990 Perl_croak(aTHX_ "panic: input overflow");
4991 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4993 PL_thiswhite = newSVpvs("");
4994 if (CopLINE(PL_curcop) == 1) {
4995 sv_setpvs(PL_thiswhite, "");
4998 sv_catpvn(PL_thiswhite, s, d - s);
5012 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5020 while (s < PL_bufend && SPACE_OR_TAB(*s))
5023 if (strnEQ(s,"=>",2)) {
5024 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5025 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5026 OPERATOR('-'); /* unary minus */
5028 PL_last_uni = PL_oldbufptr;
5030 case 'r': ftst = OP_FTEREAD; break;
5031 case 'w': ftst = OP_FTEWRITE; break;
5032 case 'x': ftst = OP_FTEEXEC; break;
5033 case 'o': ftst = OP_FTEOWNED; break;
5034 case 'R': ftst = OP_FTRREAD; break;
5035 case 'W': ftst = OP_FTRWRITE; break;
5036 case 'X': ftst = OP_FTREXEC; break;
5037 case 'O': ftst = OP_FTROWNED; break;
5038 case 'e': ftst = OP_FTIS; break;
5039 case 'z': ftst = OP_FTZERO; break;
5040 case 's': ftst = OP_FTSIZE; break;
5041 case 'f': ftst = OP_FTFILE; break;
5042 case 'd': ftst = OP_FTDIR; break;
5043 case 'l': ftst = OP_FTLINK; break;
5044 case 'p': ftst = OP_FTPIPE; break;
5045 case 'S': ftst = OP_FTSOCK; break;
5046 case 'u': ftst = OP_FTSUID; break;
5047 case 'g': ftst = OP_FTSGID; break;
5048 case 'k': ftst = OP_FTSVTX; break;
5049 case 'b': ftst = OP_FTBLK; break;
5050 case 'c': ftst = OP_FTCHR; break;
5051 case 't': ftst = OP_FTTTY; break;
5052 case 'T': ftst = OP_FTTEXT; break;
5053 case 'B': ftst = OP_FTBINARY; break;
5054 case 'M': case 'A': case 'C':
5055 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5057 case 'M': ftst = OP_FTMTIME; break;
5058 case 'A': ftst = OP_FTATIME; break;
5059 case 'C': ftst = OP_FTCTIME; break;
5067 PL_last_lop_op = (OPCODE)ftst;
5068 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5069 "### Saw file test %c\n", (int)tmp);
5074 /* Assume it was a minus followed by a one-letter named
5075 * subroutine call (or a -bareword), then. */
5076 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5077 "### '-%c' looked like a file test but was not\n",
5084 const char tmp = *s++;
5087 if (PL_expect == XOPERATOR)
5092 else if (*s == '>') {
5095 if (isIDFIRST_lazy_if(s,UTF)) {
5096 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5104 if (PL_expect == XOPERATOR)
5107 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5109 OPERATOR('-'); /* unary minus */
5115 const char tmp = *s++;
5118 if (PL_expect == XOPERATOR)
5123 if (PL_expect == XOPERATOR)
5126 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5133 if (PL_expect != XOPERATOR) {
5134 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5135 PL_expect = XOPERATOR;
5136 force_ident(PL_tokenbuf, '*');
5149 if (PL_expect == XOPERATOR) {
5153 PL_tokenbuf[0] = '%';
5154 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5155 sizeof PL_tokenbuf - 1, FALSE);
5156 if (!PL_tokenbuf[1]) {
5159 PL_pending_ident = '%';
5168 const char tmp = *s++;
5173 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5180 const char tmp = *s++;
5186 goto just_a_word_zero_gv;
5189 switch (PL_expect) {
5195 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5197 PL_bufptr = s; /* update in case we back off */
5199 deprecate(":= for an empty attribute list");
5206 PL_expect = XTERMBLOCK;
5209 stuffstart = s - SvPVX(PL_linestr) - 1;
5213 while (isIDFIRST_lazy_if(s,UTF)) {
5216 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5217 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5218 if (tmp < 0) tmp = -tmp;
5233 sv = newSVpvn(s, len);
5235 d = scan_str(d,TRUE,TRUE);
5237 /* MUST advance bufptr here to avoid bogus
5238 "at end of line" context messages from yyerror().
5240 PL_bufptr = s + len;
5241 yyerror("Unterminated attribute parameter in attribute list");
5245 return REPORT(0); /* EOF indicator */
5249 sv_catsv(sv, PL_lex_stuff);
5250 attrs = append_elem(OP_LIST, attrs,
5251 newSVOP(OP_CONST, 0, sv));
5252 SvREFCNT_dec(PL_lex_stuff);
5253 PL_lex_stuff = NULL;
5256 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5258 if (PL_in_my == KEY_our) {
5259 deprecate(":unique");
5262 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5265 /* NOTE: any CV attrs applied here need to be part of
5266 the CVf_BUILTIN_ATTRS define in cv.h! */
5267 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5269 CvLVALUE_on(PL_compcv);
5271 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5273 deprecate(":locked");
5275 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5277 CvMETHOD_on(PL_compcv);
5279 /* After we've set the flags, it could be argued that
5280 we don't need to do the attributes.pm-based setting
5281 process, and shouldn't bother appending recognized
5282 flags. To experiment with that, uncomment the
5283 following "else". (Note that's already been
5284 uncommented. That keeps the above-applied built-in
5285 attributes from being intercepted (and possibly
5286 rejected) by a package's attribute routines, but is
5287 justified by the performance win for the common case
5288 of applying only built-in attributes.) */
5290 attrs = append_elem(OP_LIST, attrs,
5291 newSVOP(OP_CONST, 0,
5295 if (*s == ':' && s[1] != ':')
5298 break; /* require real whitespace or :'s */
5299 /* XXX losing whitespace on sequential attributes here */
5303 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5304 if (*s != ';' && *s != '}' && *s != tmp
5305 && (tmp != '=' || *s != ')')) {
5306 const char q = ((*s == '\'') ? '"' : '\'');
5307 /* If here for an expression, and parsed no attrs, back
5309 if (tmp == '=' && !attrs) {
5313 /* MUST advance bufptr here to avoid bogus "at end of line"
5314 context messages from yyerror().
5317 yyerror( (const char *)
5319 ? Perl_form(aTHX_ "Invalid separator character "
5320 "%c%c%c in attribute list", q, *s, q)
5321 : "Unterminated attribute list" ) );
5329 start_force(PL_curforce);
5330 NEXTVAL_NEXTTOKE.opval = attrs;
5331 CURMAD('_', PL_nextwhite);
5336 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5337 (s - SvPVX(PL_linestr)) - stuffstart);
5345 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5346 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5354 const char tmp = *s++;
5359 const char tmp = *s++;
5367 if (PL_lex_brackets <= 0)
5368 yyerror("Unmatched right square bracket");
5371 if (PL_lex_state == LEX_INTERPNORMAL) {
5372 if (PL_lex_brackets == 0) {
5373 if (*s == '-' && s[1] == '>')
5374 PL_lex_state = LEX_INTERPENDMAYBE;
5375 else if (*s != '[' && *s != '{')
5376 PL_lex_state = LEX_INTERPEND;
5383 if (PL_lex_brackets > 100) {
5384 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5386 switch (PL_expect) {
5388 if (PL_lex_formbrack) {
5392 if (PL_oldoldbufptr == PL_last_lop)
5393 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5395 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5396 OPERATOR(HASHBRACK);
5398 while (s < PL_bufend && SPACE_OR_TAB(*s))
5401 PL_tokenbuf[0] = '\0';
5402 if (d < PL_bufend && *d == '-') {
5403 PL_tokenbuf[0] = '-';
5405 while (d < PL_bufend && SPACE_OR_TAB(*d))
5408 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5409 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5411 while (d < PL_bufend && SPACE_OR_TAB(*d))
5414 const char minus = (PL_tokenbuf[0] == '-');
5415 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5423 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5428 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5433 if (PL_oldoldbufptr == PL_last_lop)
5434 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5436 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5439 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5441 /* This hack is to get the ${} in the message. */
5443 yyerror("syntax error");
5446 OPERATOR(HASHBRACK);
5448 /* This hack serves to disambiguate a pair of curlies
5449 * as being a block or an anon hash. Normally, expectation
5450 * determines that, but in cases where we're not in a
5451 * position to expect anything in particular (like inside
5452 * eval"") we have to resolve the ambiguity. This code
5453 * covers the case where the first term in the curlies is a
5454 * quoted string. Most other cases need to be explicitly
5455 * disambiguated by prepending a "+" before the opening
5456 * curly in order to force resolution as an anon hash.
5458 * XXX should probably propagate the outer expectation
5459 * into eval"" to rely less on this hack, but that could
5460 * potentially break current behavior of eval"".
5464 if (*s == '\'' || *s == '"' || *s == '`') {
5465 /* common case: get past first string, handling escapes */
5466 for (t++; t < PL_bufend && *t != *s;)
5467 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5471 else if (*s == 'q') {
5474 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5477 /* skip q//-like construct */
5479 char open, close, term;
5482 while (t < PL_bufend && isSPACE(*t))
5484 /* check for q => */
5485 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5486 OPERATOR(HASHBRACK);
5490 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5494 for (t++; t < PL_bufend; t++) {
5495 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5497 else if (*t == open)
5501 for (t++; t < PL_bufend; t++) {
5502 if (*t == '\\' && t+1 < PL_bufend)
5504 else if (*t == close && --brackets <= 0)
5506 else if (*t == open)
5513 /* skip plain q word */
5514 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5517 else if (isALNUM_lazy_if(t,UTF)) {
5519 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5522 while (t < PL_bufend && isSPACE(*t))
5524 /* if comma follows first term, call it an anon hash */
5525 /* XXX it could be a comma expression with loop modifiers */
5526 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5527 || (*t == '=' && t[1] == '>')))
5528 OPERATOR(HASHBRACK);
5529 if (PL_expect == XREF)
5532 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5538 pl_yylval.ival = CopLINE(PL_curcop);
5539 if (isSPACE(*s) || *s == '#')
5540 PL_copline = NOLINE; /* invalidate current command line number */
5545 if (PL_lex_brackets <= 0)
5546 yyerror("Unmatched right curly bracket");
5548 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5549 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5550 PL_lex_formbrack = 0;
5551 if (PL_lex_state == LEX_INTERPNORMAL) {
5552 if (PL_lex_brackets == 0) {
5553 if (PL_expect & XFAKEBRACK) {
5554 PL_expect &= XENUMMASK;
5555 PL_lex_state = LEX_INTERPEND;
5560 PL_thiswhite = newSVpvs("");
5561 sv_catpvs(PL_thiswhite,"}");
5564 return yylex(); /* ignore fake brackets */
5566 if (*s == '-' && s[1] == '>')
5567 PL_lex_state = LEX_INTERPENDMAYBE;
5568 else if (*s != '[' && *s != '{')
5569 PL_lex_state = LEX_INTERPEND;
5572 if (PL_expect & XFAKEBRACK) {
5573 PL_expect &= XENUMMASK;
5575 return yylex(); /* ignore fake brackets */
5577 start_force(PL_curforce);
5579 curmad('X', newSVpvn(s-1,1));
5580 CURMAD('_', PL_thiswhite);
5585 PL_thistoken = newSVpvs("");
5593 if (PL_expect == XOPERATOR) {
5594 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5595 && isIDFIRST_lazy_if(s,UTF))
5597 CopLINE_dec(PL_curcop);
5598 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5599 CopLINE_inc(PL_curcop);
5604 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5606 PL_expect = XOPERATOR;
5607 force_ident(PL_tokenbuf, '&');
5611 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5623 const char tmp = *s++;
5630 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5631 && strchr("+-*/%.^&|<",tmp))
5632 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5633 "Reversed %c= operator",(int)tmp);
5635 if (PL_expect == XSTATE && isALPHA(tmp) &&
5636 (s == PL_linestart+1 || s[-2] == '\n') )
5638 if (PL_in_eval && !PL_rsfp) {
5643 if (strnEQ(s,"=cut",4)) {
5659 PL_thiswhite = newSVpvs("");
5660 sv_catpvn(PL_thiswhite, PL_linestart,
5661 PL_bufend - PL_linestart);
5665 PL_doextract = TRUE;
5669 if (PL_lex_brackets < PL_lex_formbrack) {
5671 #ifdef PERL_STRICT_CR
5672 while (SPACE_OR_TAB(*t))
5674 while (SPACE_OR_TAB(*t) || *t == '\r')
5677 if (*t == '\n' || *t == '#') {
5688 const char tmp = *s++;
5690 /* was this !=~ where !~ was meant?
5691 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5693 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5694 const char *t = s+1;
5696 while (t < PL_bufend && isSPACE(*t))
5699 if (*t == '/' || *t == '?' ||
5700 ((*t == 'm' || *t == 's' || *t == 'y')
5701 && !isALNUM(t[1])) ||
5702 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5703 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5704 "!=~ should be !~");
5714 if (PL_expect != XOPERATOR) {
5715 if (s[1] != '<' && !strchr(s,'>'))
5718 s = scan_heredoc(s);
5720 s = scan_inputsymbol(s);
5721 TERM(sublex_start());
5727 SHop(OP_LEFT_SHIFT);
5741 const char tmp = *s++;
5743 SHop(OP_RIGHT_SHIFT);
5744 else if (tmp == '=')
5753 if (PL_expect == XOPERATOR) {
5754 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5755 return deprecate_commaless_var_list();
5759 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5760 PL_tokenbuf[0] = '@';
5761 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5762 sizeof PL_tokenbuf - 1, FALSE);
5763 if (PL_expect == XOPERATOR)
5764 no_op("Array length", s);
5765 if (!PL_tokenbuf[1])
5767 PL_expect = XOPERATOR;
5768 PL_pending_ident = '#';
5772 PL_tokenbuf[0] = '$';
5773 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5774 sizeof PL_tokenbuf - 1, FALSE);
5775 if (PL_expect == XOPERATOR)
5777 if (!PL_tokenbuf[1]) {
5779 yyerror("Final $ should be \\$ or $name");
5783 /* This kludge not intended to be bulletproof. */
5784 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5785 pl_yylval.opval = newSVOP(OP_CONST, 0,
5786 newSViv(CopARYBASE_get(&PL_compiling)));
5787 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5793 const char tmp = *s;
5794 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5797 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5798 && intuit_more(s)) {
5800 PL_tokenbuf[0] = '@';
5801 if (ckWARN(WARN_SYNTAX)) {
5804 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5807 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5808 while (t < PL_bufend && *t != ']')
5810 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5811 "Multidimensional syntax %.*s not supported",
5812 (int)((t - PL_bufptr) + 1), PL_bufptr);
5816 else if (*s == '{') {
5818 PL_tokenbuf[0] = '%';
5819 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5820 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5822 char tmpbuf[sizeof PL_tokenbuf];
5825 } while (isSPACE(*t));
5826 if (isIDFIRST_lazy_if(t,UTF)) {
5828 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5832 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5833 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5834 "You need to quote \"%s\"",
5841 PL_expect = XOPERATOR;
5842 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5843 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5844 if (!islop || PL_last_lop_op == OP_GREPSTART)
5845 PL_expect = XOPERATOR;
5846 else if (strchr("$@\"'`q", *s))
5847 PL_expect = XTERM; /* e.g. print $fh "foo" */
5848 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5849 PL_expect = XTERM; /* e.g. print $fh &sub */
5850 else if (isIDFIRST_lazy_if(s,UTF)) {
5851 char tmpbuf[sizeof PL_tokenbuf];
5853 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5854 if ((t2 = keyword(tmpbuf, len, 0))) {
5855 /* binary operators exclude handle interpretations */
5867 PL_expect = XTERM; /* e.g. print $fh length() */
5872 PL_expect = XTERM; /* e.g. print $fh subr() */
5875 else if (isDIGIT(*s))
5876 PL_expect = XTERM; /* e.g. print $fh 3 */
5877 else if (*s == '.' && isDIGIT(s[1]))
5878 PL_expect = XTERM; /* e.g. print $fh .3 */
5879 else if ((*s == '?' || *s == '-' || *s == '+')
5880 && !isSPACE(s[1]) && s[1] != '=')
5881 PL_expect = XTERM; /* e.g. print $fh -1 */
5882 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5884 PL_expect = XTERM; /* e.g. print $fh /.../
5885 XXX except DORDOR operator
5887 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5889 PL_expect = XTERM; /* print $fh <<"EOF" */
5892 PL_pending_ident = '$';
5896 if (PL_expect == XOPERATOR)
5898 PL_tokenbuf[0] = '@';
5899 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5900 if (!PL_tokenbuf[1]) {
5903 if (PL_lex_state == LEX_NORMAL)
5905 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5907 PL_tokenbuf[0] = '%';
5909 /* Warn about @ where they meant $. */
5910 if (*s == '[' || *s == '{') {
5911 if (ckWARN(WARN_SYNTAX)) {
5912 const char *t = s + 1;
5913 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5915 if (*t == '}' || *t == ']') {
5917 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5918 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5919 "Scalar value %.*s better written as $%.*s",
5920 (int)(t-PL_bufptr), PL_bufptr,
5921 (int)(t-PL_bufptr-1), PL_bufptr+1);
5926 PL_pending_ident = '@';
5929 case '/': /* may be division, defined-or, or pattern */
5930 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5934 case '?': /* may either be conditional or pattern */
5935 if (PL_expect == XOPERATOR) {
5943 /* A // operator. */
5953 /* Disable warning on "study /blah/" */
5954 if (PL_oldoldbufptr == PL_last_uni
5955 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5956 || memNE(PL_last_uni, "study", 5)
5957 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5960 s = scan_pat(s,OP_MATCH);
5961 TERM(sublex_start());
5965 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5966 #ifdef PERL_STRICT_CR
5969 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5971 && (s == PL_linestart || s[-1] == '\n') )
5973 PL_lex_formbrack = 0;
5977 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5981 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5987 pl_yylval.ival = OPf_SPECIAL;
5996 case '0': case '1': case '2': case '3': case '4':
5997 case '5': case '6': case '7': case '8': case '9':
5998 s = scan_num(s, &pl_yylval);
5999 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6000 if (PL_expect == XOPERATOR)
6005 s = scan_str(s,!!PL_madskills,FALSE);
6006 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6007 if (PL_expect == XOPERATOR) {
6008 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6009 return deprecate_commaless_var_list();
6016 pl_yylval.ival = OP_CONST;
6017 TERM(sublex_start());
6020 s = scan_str(s,!!PL_madskills,FALSE);
6021 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6022 if (PL_expect == XOPERATOR) {
6023 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6024 return deprecate_commaless_var_list();
6031 pl_yylval.ival = OP_CONST;
6032 /* FIXME. I think that this can be const if char *d is replaced by
6033 more localised variables. */
6034 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6035 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6036 pl_yylval.ival = OP_STRINGIFY;
6040 TERM(sublex_start());
6043 s = scan_str(s,!!PL_madskills,FALSE);
6044 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6045 if (PL_expect == XOPERATOR)
6046 no_op("Backticks",s);
6049 readpipe_override();
6050 TERM(sublex_start());
6054 if (PL_lex_inwhat && isDIGIT(*s))
6055 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6057 if (PL_expect == XOPERATOR)
6058 no_op("Backslash",s);
6062 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6063 char *start = s + 2;
6064 while (isDIGIT(*start) || *start == '_')
6066 if (*start == '.' && isDIGIT(start[1])) {
6067 s = scan_num(s, &pl_yylval);
6070 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6071 else if (!isALPHA(*start) && (PL_expect == XTERM
6072 || PL_expect == XREF || PL_expect == XSTATE
6073 || PL_expect == XTERMORDORDOR)) {
6074 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6076 s = scan_num(s, &pl_yylval);
6083 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6126 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6128 /* Some keywords can be followed by any delimiter, including ':' */
6129 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
6130 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6131 (PL_tokenbuf[0] == 'q' &&
6132 strchr("qwxr", PL_tokenbuf[1])))));
6134 /* x::* is just a word, unless x is "CORE" */
6135 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6139 while (d < PL_bufend && isSPACE(*d))
6140 d++; /* no comments skipped here, or s### is misparsed */
6142 /* Is this a word before a => operator? */
6143 if (*d == '=' && d[1] == '>') {
6146 = (OP*)newSVOP(OP_CONST, 0,
6147 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6148 pl_yylval.opval->op_private = OPpCONST_BARE;
6152 /* Check for plugged-in keyword */
6156 char *saved_bufptr = PL_bufptr;
6158 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6160 if (result == KEYWORD_PLUGIN_DECLINE) {
6161 /* not a plugged-in keyword */
6162 PL_bufptr = saved_bufptr;
6163 } else if (result == KEYWORD_PLUGIN_STMT) {
6164 pl_yylval.opval = o;
6167 return REPORT(PLUGSTMT);
6168 } else if (result == KEYWORD_PLUGIN_EXPR) {
6169 pl_yylval.opval = o;
6171 PL_expect = XOPERATOR;
6172 return REPORT(PLUGEXPR);
6174 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6179 /* Check for built-in keyword */
6180 tmp = keyword(PL_tokenbuf, len, 0);
6182 /* Is this a label? */
6183 if (!anydelim && PL_expect == XSTATE
6184 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6186 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6191 if (tmp < 0) { /* second-class keyword? */
6192 GV *ogv = NULL; /* override (winner) */
6193 GV *hgv = NULL; /* hidden (loser) */
6194 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6196 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6199 if (GvIMPORTED_CV(gv))
6201 else if (! CvMETHOD(cv))
6205 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6206 (gv = *gvp) && isGV_with_GP(gv) &&
6207 GvCVu(gv) && GvIMPORTED_CV(gv))
6214 tmp = 0; /* overridden by import or by GLOBAL */
6217 && -tmp==KEY_lock /* XXX generalizable kludge */
6220 tmp = 0; /* any sub overrides "weak" keyword */
6222 else { /* no override */
6224 if (tmp == KEY_dump) {
6225 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6226 "dump() better written as CORE::dump()");
6230 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6231 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6232 "Ambiguous call resolved as CORE::%s(), "
6233 "qualify as such or use &",
6241 default: /* not a keyword */
6242 /* Trade off - by using this evil construction we can pull the
6243 variable gv into the block labelled keylookup. If not, then
6244 we have to give it function scope so that the goto from the
6245 earlier ':' case doesn't bypass the initialisation. */
6247 just_a_word_zero_gv:
6255 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6259 SV *nextPL_nextwhite = 0;
6263 /* Get the rest if it looks like a package qualifier */
6265 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6267 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6270 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6271 *s == '\'' ? "'" : "::");
6276 if (PL_expect == XOPERATOR) {
6277 if (PL_bufptr == PL_linestart) {
6278 CopLINE_dec(PL_curcop);
6279 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6280 CopLINE_inc(PL_curcop);
6283 no_op("Bareword",s);
6286 /* Look for a subroutine with this name in current package,
6287 unless name is "Foo::", in which case Foo is a bearword
6288 (and a package name). */
6290 if (len > 2 && !PL_madskills &&
6291 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6293 if (ckWARN(WARN_BAREWORD)
6294 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6295 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6296 "Bareword \"%s\" refers to nonexistent package",
6299 PL_tokenbuf[len] = '\0';
6305 /* Mustn't actually add anything to a symbol table.
6306 But also don't want to "initialise" any placeholder
6307 constants that might already be there into full
6308 blown PVGVs with attached PVCV. */
6309 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6310 GV_NOADD_NOINIT, SVt_PVCV);
6315 /* if we saw a global override before, get the right name */
6317 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6318 len ? len : strlen(PL_tokenbuf));
6320 SV * const tmp_sv = sv;
6321 sv = newSVpvs("CORE::GLOBAL::");
6322 sv_catsv(sv, tmp_sv);
6323 SvREFCNT_dec(tmp_sv);
6327 if (PL_madskills && !PL_thistoken) {
6328 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6329 PL_thistoken = newSVpvn(start,s - start);
6330 PL_realtokenstart = s - SvPVX(PL_linestr);
6334 /* Presume this is going to be a bareword of some sort. */
6336 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6337 pl_yylval.opval->op_private = OPpCONST_BARE;
6339 /* And if "Foo::", then that's what it certainly is. */
6345 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6346 const_op->op_private = OPpCONST_BARE;
6347 rv2cv_op = newCVREF(0, const_op);
6349 if (rv2cv_op->op_type == OP_RV2CV &&
6350 (rv2cv_op->op_flags & OPf_KIDS)) {
6351 OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6352 switch (rv_op->op_type) {
6354 SV *sv = cSVOPx_sv(rv_op);
6355 if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6359 GV *gv = cGVOPx_gv(rv_op);
6360 CV *maybe_cv = GvCVu(gv);
6361 if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6367 /* See if it's the indirect object for a list operator. */
6369 if (PL_oldoldbufptr &&
6370 PL_oldoldbufptr < PL_bufptr &&
6371 (PL_oldoldbufptr == PL_last_lop
6372 || PL_oldoldbufptr == PL_last_uni) &&
6373 /* NO SKIPSPACE BEFORE HERE! */
6374 (PL_expect == XREF ||
6375 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6377 bool immediate_paren = *s == '(';
6379 /* (Now we can afford to cross potential line boundary.) */
6380 s = SKIPSPACE2(s,nextPL_nextwhite);
6382 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
6385 /* Two barewords in a row may indicate method call. */
6387 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6388 (tmp = intuit_method(s, gv, cv))) {
6393 /* If not a declared subroutine, it's an indirect object. */
6394 /* (But it's an indir obj regardless for sort.) */
6395 /* Also, if "_" follows a filetest operator, it's a bareword */
6398 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6400 (PL_last_lop_op != OP_MAPSTART &&
6401 PL_last_lop_op != OP_GREPSTART))))
6402 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6403 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6406 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6411 PL_expect = XOPERATOR;
6414 s = SKIPSPACE2(s,nextPL_nextwhite);
6415 PL_nextwhite = nextPL_nextwhite;
6420 /* Is this a word before a => operator? */
6421 if (*s == '=' && s[1] == '>' && !pkgname) {
6424 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6425 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6426 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6430 /* If followed by a paren, it's certainly a subroutine. */
6435 while (SPACE_OR_TAB(*d))
6437 if (*d == ')' && (sv = cv_const_sv(cv))) {
6444 PL_nextwhite = PL_thiswhite;
6447 start_force(PL_curforce);
6449 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6450 PL_expect = XOPERATOR;
6453 PL_nextwhite = nextPL_nextwhite;
6454 curmad('X', PL_thistoken);
6455 PL_thistoken = newSVpvs("");
6464 /* If followed by var or block, call it a method (unless sub) */
6466 if ((*s == '$' || *s == '{') && !cv) {
6468 PL_last_lop = PL_oldbufptr;
6469 PL_last_lop_op = OP_METHOD;
6473 /* If followed by a bareword, see if it looks like indir obj. */
6476 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6477 && (tmp = intuit_method(s, gv, cv))) {
6482 /* Not a method, so call it a subroutine (if defined) */
6485 if (lastchar == '-')
6486 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6487 "Ambiguous use of -%s resolved as -&%s()",
6488 PL_tokenbuf, PL_tokenbuf);
6489 /* Check for a constant sub */
6490 if ((sv = cv_const_sv(cv))) {
6493 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6494 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6495 pl_yylval.opval->op_private = 0;
6499 op_free(pl_yylval.opval);
6500 pl_yylval.opval = rv2cv_op;
6501 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6502 PL_last_lop = PL_oldbufptr;
6503 PL_last_lop_op = OP_ENTERSUB;
6504 /* Is there a prototype? */
6512 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6515 while (*proto == ';')
6520 *proto == '$' || *proto == '_'
6526 *proto == '\\' && proto[1] && proto[2] == '\0'
6530 if (*proto == '\\' && proto[1] == '[') {
6531 const char *p = proto + 2;
6532 while(*p && *p != ']')
6534 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6536 if (*proto == '&' && *s == '{') {
6538 sv_setpvs(PL_subname, "__ANON__");
6540 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6547 PL_nextwhite = PL_thiswhite;
6550 start_force(PL_curforce);
6551 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6554 PL_nextwhite = nextPL_nextwhite;
6555 curmad('X', PL_thistoken);
6556 PL_thistoken = newSVpvs("");
6563 /* Guess harder when madskills require "best effort". */
6564 if (PL_madskills && (!gv || !GvCVu(gv))) {
6565 int probable_sub = 0;
6566 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6568 else if (isALPHA(*s)) {
6572 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6573 if (!keyword(tmpbuf, tmplen, 0))
6576 while (d < PL_bufend && isSPACE(*d))
6578 if (*d == '=' && d[1] == '>')
6583 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6584 op_free(pl_yylval.opval);
6585 pl_yylval.opval = rv2cv_op;
6586 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6587 PL_last_lop = PL_oldbufptr;
6588 PL_last_lop_op = OP_ENTERSUB;
6589 PL_nextwhite = PL_thiswhite;
6591 start_force(PL_curforce);
6592 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6594 PL_nextwhite = nextPL_nextwhite;
6595 curmad('X', PL_thistoken);
6596 PL_thistoken = newSVpvs("");
6601 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6608 /* Call it a bare word */
6610 if (PL_hints & HINT_STRICT_SUBS)
6611 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6614 /* after "print" and similar functions (corresponding to
6615 * "F? L" in opcode.pl), whatever wasn't already parsed as
6616 * a filehandle should be subject to "strict subs".
6617 * Likewise for the optional indirect-object argument to system
6618 * or exec, which can't be a bareword */
6619 if ((PL_last_lop_op == OP_PRINT
6620 || PL_last_lop_op == OP_PRTF
6621 || PL_last_lop_op == OP_SAY
6622 || PL_last_lop_op == OP_SYSTEM
6623 || PL_last_lop_op == OP_EXEC)
6624 && (PL_hints & HINT_STRICT_SUBS))
6625 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6626 if (lastchar != '-') {
6627 if (ckWARN(WARN_RESERVED)) {
6631 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6632 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6640 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6641 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6642 "Operator or semicolon missing before %c%s",
6643 lastchar, PL_tokenbuf);
6644 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6645 "Ambiguous use of %c resolved as operator %c",
6646 lastchar, lastchar);
6652 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6653 newSVpv(CopFILE(PL_curcop),0));
6657 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6658 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6661 case KEY___PACKAGE__:
6662 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6664 ? newSVhek(HvNAME_HEK(PL_curstash))
6671 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6672 const char *pname = "main";
6673 if (PL_tokenbuf[2] == 'D')
6674 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6675 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6679 GvIOp(gv) = newIO();
6680 IoIFP(GvIOp(gv)) = PL_rsfp;
6681 #if defined(HAS_FCNTL) && defined(F_SETFD)
6683 const int fd = PerlIO_fileno(PL_rsfp);
6684 fcntl(fd,F_SETFD,fd >= 3);
6687 /* Mark this internal pseudo-handle as clean */
6688 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6689 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6690 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6692 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6693 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6694 /* if the script was opened in binmode, we need to revert
6695 * it to text mode for compatibility; but only iff it has CRs
6696 * XXX this is a questionable hack at best. */
6697 if (PL_bufend-PL_bufptr > 2
6698 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6701 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6702 loc = PerlIO_tell(PL_rsfp);
6703 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6706 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6708 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6709 #endif /* NETWARE */
6710 #ifdef PERLIO_IS_STDIO /* really? */
6711 # if defined(__BORLANDC__)
6712 /* XXX see note in do_binmode() */
6713 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6717 PerlIO_seek(PL_rsfp, loc, 0);
6721 #ifdef PERLIO_LAYERS
6724 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6725 else if (PL_encoding) {
6732 XPUSHs(PL_encoding);
6734 call_method("name", G_SCALAR);
6738 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6739 Perl_form(aTHX_ ":encoding(%"SVf")",
6748 if (PL_realtokenstart >= 0) {
6749 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6751 PL_endwhite = newSVpvs("");
6752 sv_catsv(PL_endwhite, PL_thiswhite);
6754 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6755 PL_realtokenstart = -1;
6757 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6773 if (PL_expect == XSTATE) {
6780 if (*s == ':' && s[1] == ':') {
6783 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6784 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6785 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6788 else if (tmp == KEY_require || tmp == KEY_do)
6789 /* that's a way to remember we saw "CORE::" */
6802 LOP(OP_ACCEPT,XTERM);
6808 LOP(OP_ATAN2,XTERM);
6814 LOP(OP_BINMODE,XTERM);
6817 LOP(OP_BLESS,XTERM);
6826 /* When 'use switch' is in effect, continue has a dual
6827 life as a control operator. */
6829 if (!FEATURE_IS_ENABLED("switch"))
6832 /* We have to disambiguate the two senses of
6833 "continue". If the next token is a '{' then
6834 treat it as the start of a continue block;
6835 otherwise treat it as a control operator.
6847 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6864 if (!PL_cryptseen) {
6865 PL_cryptseen = TRUE;
6869 LOP(OP_CRYPT,XTERM);
6872 LOP(OP_CHMOD,XTERM);
6875 LOP(OP_CHOWN,XTERM);
6878 LOP(OP_CONNECT,XTERM);
6897 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6898 if (orig_keyword == KEY_do) {
6907 PL_hints |= HINT_BLOCK_SCOPE;
6917 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
6918 STR_WITH_LEN("NDBM_File::"),
6919 STR_WITH_LEN("DB_File::"),
6920 STR_WITH_LEN("GDBM_File::"),
6921 STR_WITH_LEN("SDBM_File::"),
6922 STR_WITH_LEN("ODBM_File::"),
6924 LOP(OP_DBMOPEN,XTERM);
6930 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6937 pl_yylval.ival = CopLINE(PL_curcop);
6953 if (*s == '{') { /* block eval */
6954 PL_expect = XTERMBLOCK;
6955 UNIBRACK(OP_ENTERTRY);
6957 else { /* string eval */
6959 UNIBRACK(OP_ENTEREVAL);
6974 case KEY_endhostent:
6980 case KEY_endservent:
6983 case KEY_endprotoent:
6994 pl_yylval.ival = CopLINE(PL_curcop);
6996 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6999 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7002 if ((PL_bufend - p) >= 3 &&
7003 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7005 else if ((PL_bufend - p) >= 4 &&
7006 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7009 if (isIDFIRST_lazy_if(p,UTF)) {
7010 p = scan_ident(p, PL_bufend,
7011 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7015 Perl_croak(aTHX_ "Missing $ on loop variable");
7017 s = SvPVX(PL_linestr) + soff;
7023 LOP(OP_FORMLINE,XTERM);
7029 LOP(OP_FCNTL,XTERM);
7035 LOP(OP_FLOCK,XTERM);
7044 LOP(OP_GREPSTART, XREF);
7047 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7062 case KEY_getpriority:
7063 LOP(OP_GETPRIORITY,XTERM);
7065 case KEY_getprotobyname:
7068 case KEY_getprotobynumber:
7069 LOP(OP_GPBYNUMBER,XTERM);
7071 case KEY_getprotoent:
7083 case KEY_getpeername:
7084 UNI(OP_GETPEERNAME);
7086 case KEY_gethostbyname:
7089 case KEY_gethostbyaddr:
7090 LOP(OP_GHBYADDR,XTERM);
7092 case KEY_gethostent:
7095 case KEY_getnetbyname:
7098 case KEY_getnetbyaddr:
7099 LOP(OP_GNBYADDR,XTERM);
7104 case KEY_getservbyname:
7105 LOP(OP_GSBYNAME,XTERM);
7107 case KEY_getservbyport:
7108 LOP(OP_GSBYPORT,XTERM);
7110 case KEY_getservent:
7113 case KEY_getsockname:
7114 UNI(OP_GETSOCKNAME);
7116 case KEY_getsockopt:
7117 LOP(OP_GSOCKOPT,XTERM);
7132 pl_yylval.ival = CopLINE(PL_curcop);
7142 pl_yylval.ival = CopLINE(PL_curcop);
7146 LOP(OP_INDEX,XTERM);
7152 LOP(OP_IOCTL,XTERM);
7164 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7196 LOP(OP_LISTEN,XTERM);
7205 s = scan_pat(s,OP_MATCH);
7206 TERM(sublex_start());
7209 LOP(OP_MAPSTART, XREF);
7212 LOP(OP_MKDIR,XTERM);
7215 LOP(OP_MSGCTL,XTERM);
7218 LOP(OP_MSGGET,XTERM);
7221 LOP(OP_MSGRCV,XTERM);
7224 LOP(OP_MSGSND,XTERM);
7229 PL_in_my = (U16)tmp;
7231 if (isIDFIRST_lazy_if(s,UTF)) {
7235 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7236 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7238 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7239 if (!PL_in_my_stash) {
7242 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7246 if (PL_madskills) { /* just add type to declarator token */
7247 sv_catsv(PL_thistoken, PL_nextwhite);
7249 sv_catpvn(PL_thistoken, start, s - start);
7257 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7264 s = tokenize_use(0, s);
7268 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7275 if (isIDFIRST_lazy_if(s,UTF)) {
7277 for (d = s; isALNUM_lazy_if(d,UTF);)
7279 for (t=d; isSPACE(*t);)
7281 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7283 && !(t[0] == '=' && t[1] == '>')
7285 int parms_len = (int)(d-s);
7286 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7287 "Precedence problem: open %.*s should be open(%.*s)",
7288 parms_len, s, parms_len, s);
7294 pl_yylval.ival = OP_OR;
7304 LOP(OP_OPEN_DIR,XTERM);
7307 checkcomma(s,PL_tokenbuf,"filehandle");
7311 checkcomma(s,PL_tokenbuf,"filehandle");
7330 s = force_word(s,WORD,FALSE,TRUE,FALSE);
7332 s = force_strict_version(s);
7333 PL_lex_expect = XBLOCK;
7337 LOP(OP_PIPE_OP,XTERM);
7340 s = scan_str(s,!!PL_madskills,FALSE);
7343 pl_yylval.ival = OP_CONST;
7344 TERM(sublex_start());
7351 s = scan_str(s,!!PL_madskills,FALSE);
7354 PL_expect = XOPERATOR;
7355 if (SvCUR(PL_lex_stuff)) {
7357 d = SvPV_force(PL_lex_stuff, len);
7359 for (; isSPACE(*d) && len; --len, ++d)
7364 if (!warned && ckWARN(WARN_QW)) {
7365 for (; !isSPACE(*d) && len; --len, ++d) {
7367 Perl_warner(aTHX_ packWARN(WARN_QW),
7368 "Possible attempt to separate words with commas");
7371 else if (*d == '#') {
7372 Perl_warner(aTHX_ packWARN(WARN_QW),
7373 "Possible attempt to put comments in qw() list");
7379 for (; !isSPACE(*d) && len; --len, ++d)
7382 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7383 words = append_elem(OP_LIST, words,
7384 newSVOP(OP_CONST, 0, tokeq(sv)));
7389 words = newNULLLIST();
7391 SvREFCNT_dec(PL_lex_stuff);
7392 PL_lex_stuff = NULL;
7394 PL_expect = XOPERATOR;
7395 pl_yylval.opval = sawparens(words);
7400 s = scan_str(s,!!PL_madskills,FALSE);
7403 pl_yylval.ival = OP_STRINGIFY;
7404 if (SvIVX(PL_lex_stuff) == '\'')
7405 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
7406 TERM(sublex_start());
7409 s = scan_pat(s,OP_QR);
7410 TERM(sublex_start());
7413 s = scan_str(s,!!PL_madskills,FALSE);
7416 readpipe_override();
7417 TERM(sublex_start());
7425 s = force_version(s, FALSE);
7427 else if (*s != 'v' || !isDIGIT(s[1])
7428 || (s = force_version(s, TRUE), *s == 'v'))
7430 *PL_tokenbuf = '\0';
7431 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7432 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7433 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7435 yyerror("<> should be quotes");
7437 if (orig_keyword == KEY_require) {
7445 PL_last_uni = PL_oldbufptr;
7446 PL_last_lop_op = OP_REQUIRE;
7448 return REPORT( (int)REQUIRE );
7454 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7458 LOP(OP_RENAME,XTERM);
7467 LOP(OP_RINDEX,XTERM);
7476 UNIDOR(OP_READLINE);
7479 UNIDOR(OP_BACKTICK);
7488 LOP(OP_REVERSE,XTERM);
7491 UNIDOR(OP_READLINK);
7498 if (pl_yylval.opval)
7499 TERM(sublex_start());
7501 TOKEN(1); /* force error */
7504 checkcomma(s,PL_tokenbuf,"filehandle");
7514 LOP(OP_SELECT,XTERM);
7520 LOP(OP_SEMCTL,XTERM);
7523 LOP(OP_SEMGET,XTERM);
7526 LOP(OP_SEMOP,XTERM);
7532 LOP(OP_SETPGRP,XTERM);
7534 case KEY_setpriority:
7535 LOP(OP_SETPRIORITY,XTERM);
7537 case KEY_sethostent:
7543 case KEY_setservent:
7546 case KEY_setprotoent:
7556 LOP(OP_SEEKDIR,XTERM);
7558 case KEY_setsockopt:
7559 LOP(OP_SSOCKOPT,XTERM);
7565 LOP(OP_SHMCTL,XTERM);
7568 LOP(OP_SHMGET,XTERM);
7571 LOP(OP_SHMREAD,XTERM);
7574 LOP(OP_SHMWRITE,XTERM);
7577 LOP(OP_SHUTDOWN,XTERM);
7586 LOP(OP_SOCKET,XTERM);
7588 case KEY_socketpair:
7589 LOP(OP_SOCKPAIR,XTERM);
7592 checkcomma(s,PL_tokenbuf,"subroutine name");
7594 if (*s == ';' || *s == ')') /* probably a close */
7595 Perl_croak(aTHX_ "sort is now a reserved word");
7597 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7601 LOP(OP_SPLIT,XTERM);
7604 LOP(OP_SPRINTF,XTERM);
7607 LOP(OP_SPLICE,XTERM);
7622 LOP(OP_SUBSTR,XTERM);
7628 char tmpbuf[sizeof PL_tokenbuf];
7629 SSize_t tboffset = 0;
7630 expectation attrful;
7631 bool have_name, have_proto;
7632 const int key = tmp;
7637 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7638 SV *subtoken = newSVpvn(tstart, s - tstart);
7642 s = SKIPSPACE2(s,tmpwhite);
7647 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7648 (*s == ':' && s[1] == ':'))
7651 SV *nametoke = NULL;
7655 attrful = XATTRBLOCK;
7656 /* remember buffer pos'n for later force_word */
7657 tboffset = s - PL_oldbufptr;
7658 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7661 nametoke = newSVpvn(s, d - s);
7663 if (memchr(tmpbuf, ':', len))
7664 sv_setpvn(PL_subname, tmpbuf, len);
7666 sv_setsv(PL_subname,PL_curstname);
7667 sv_catpvs(PL_subname,"::");
7668 sv_catpvn(PL_subname,tmpbuf,len);
7675 CURMAD('X', nametoke);
7676 CURMAD('_', tmpwhite);
7677 (void) force_word(PL_oldbufptr + tboffset, WORD,
7680 s = SKIPSPACE2(d,tmpwhite);
7687 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7688 PL_expect = XTERMBLOCK;
7689 attrful = XATTRTERM;
7690 sv_setpvs(PL_subname,"?");
7694 if (key == KEY_format) {
7696 PL_lex_formbrack = PL_lex_brackets + 1;
7698 PL_thistoken = subtoken;
7702 (void) force_word(PL_oldbufptr + tboffset, WORD,
7708 /* Look for a prototype */
7711 bool bad_proto = FALSE;
7712 bool in_brackets = FALSE;
7713 char greedy_proto = ' ';
7714 bool proto_after_greedy_proto = FALSE;
7715 bool must_be_last = FALSE;
7716 bool underscore = FALSE;
7717 bool seen_underscore = FALSE;
7718 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7720 s = scan_str(s,!!PL_madskills,FALSE);
7722 Perl_croak(aTHX_ "Prototype not terminated");
7723 /* strip spaces and check for bad characters */
7724 d = SvPVX(PL_lex_stuff);
7726 for (p = d; *p; ++p) {
7730 if (warnillegalproto) {
7732 proto_after_greedy_proto = TRUE;
7733 if (!strchr("$@%*;[]&\\_", *p)) {
7745 else if ( *p == ']' ) {
7746 in_brackets = FALSE;
7748 else if ( (*p == '@' || *p == '%') &&
7749 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7751 must_be_last = TRUE;
7754 else if ( *p == '_' ) {
7755 underscore = seen_underscore = TRUE;
7762 if (proto_after_greedy_proto)
7763 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7764 "Prototype after '%c' for %"SVf" : %s",
7765 greedy_proto, SVfARG(PL_subname), d);
7767 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7768 "Illegal character %sin prototype for %"SVf" : %s",
7769 seen_underscore ? "after '_' " : "",
7770 SVfARG(PL_subname), d);
7771 SvCUR_set(PL_lex_stuff, tmp);
7776 CURMAD('q', PL_thisopen);
7777 CURMAD('_', tmpwhite);
7778 CURMAD('=', PL_thisstuff);
7779 CURMAD('Q', PL_thisclose);
7780 NEXTVAL_NEXTTOKE.opval =
7781 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7782 PL_lex_stuff = NULL;
7785 s = SKIPSPACE2(s,tmpwhite);
7793 if (*s == ':' && s[1] != ':')
7794 PL_expect = attrful;
7795 else if (*s != '{' && key == KEY_sub) {
7797 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7798 else if (*s != ';' && *s != '}')
7799 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7806 curmad('^', newSVpvs(""));
7807 CURMAD('_', tmpwhite);
7811 PL_thistoken = subtoken;
7814 NEXTVAL_NEXTTOKE.opval =
7815 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7816 PL_lex_stuff = NULL;
7822 sv_setpvs(PL_subname, "__ANON__");
7824 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7828 (void) force_word(PL_oldbufptr + tboffset, WORD,
7837 LOP(OP_SYSTEM,XREF);
7840 LOP(OP_SYMLINK,XTERM);
7843 LOP(OP_SYSCALL,XTERM);
7846 LOP(OP_SYSOPEN,XTERM);
7849 LOP(OP_SYSSEEK,XTERM);
7852 LOP(OP_SYSREAD,XTERM);
7855 LOP(OP_SYSWRITE,XTERM);
7859 TERM(sublex_start());
7880 LOP(OP_TRUNCATE,XTERM);
7892 pl_yylval.ival = CopLINE(PL_curcop);
7896 pl_yylval.ival = CopLINE(PL_curcop);
7900 LOP(OP_UNLINK,XTERM);
7906 LOP(OP_UNPACK,XTERM);
7909 LOP(OP_UTIME,XTERM);
7915 LOP(OP_UNSHIFT,XTERM);
7918 s = tokenize_use(1, s);
7928 pl_yylval.ival = CopLINE(PL_curcop);
7932 pl_yylval.ival = CopLINE(PL_curcop);
7936 PL_hints |= HINT_BLOCK_SCOPE;
7943 LOP(OP_WAITPID,XTERM);
7952 ctl_l[0] = toCTRL('L');
7954 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7957 /* Make sure $^L is defined */
7958 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7963 if (PL_expect == XOPERATOR)
7969 pl_yylval.ival = OP_XOR;
7974 TERM(sublex_start());
7979 #pragma segment Main
7983 S_pending_ident(pTHX)
7988 /* pit holds the identifier we read and pending_ident is reset */
7989 char pit = PL_pending_ident;
7990 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7991 /* All routes through this function want to know if there is a colon. */
7992 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7993 PL_pending_ident = 0;
7995 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7996 DEBUG_T({ PerlIO_printf(Perl_debug_log,
7997 "### Pending identifier '%s'\n", PL_tokenbuf); });
7999 /* if we're in a my(), we can't allow dynamics here.
8000 $foo'bar has already been turned into $foo::bar, so
8001 just check for colons.
8003 if it's a legal name, the OP is a PADANY.
8006 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8008 yyerror(Perl_form(aTHX_ "No package name allowed for "
8009 "variable %s in \"our\"",
8011 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8015 yyerror(Perl_form(aTHX_ PL_no_myglob,
8016 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8018 pl_yylval.opval = newOP(OP_PADANY, 0);
8019 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8025 build the ops for accesses to a my() variable.
8027 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8028 then used in a comparison. This catches most, but not
8029 all cases. For instance, it catches
8030 sort { my($a); $a <=> $b }
8032 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8033 (although why you'd do that is anyone's guess).
8038 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8039 if (tmp != NOT_IN_PAD) {
8040 /* might be an "our" variable" */
8041 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8042 /* build ops for a bareword */
8043 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8044 HEK * const stashname = HvNAME_HEK(stash);
8045 SV * const sym = newSVhek(stashname);
8046 sv_catpvs(sym, "::");
8047 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8048 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8049 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8052 ? (GV_ADDMULTI | GV_ADDINEVAL)
8055 ((PL_tokenbuf[0] == '$') ? SVt_PV
8056 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8061 /* if it's a sort block and they're naming $a or $b */
8062 if (PL_last_lop_op == OP_SORT &&
8063 PL_tokenbuf[0] == '$' &&
8064 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8067 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8068 d < PL_bufend && *d != '\n';
8071 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8072 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8078 pl_yylval.opval = newOP(OP_PADANY, 0);
8079 pl_yylval.opval->op_targ = tmp;
8085 Whine if they've said @foo in a doublequoted string,
8086 and @foo isn't a variable we can find in the symbol
8089 if (ckWARN(WARN_AMBIGUOUS) &&
8090 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8091 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8093 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8094 /* DO NOT warn for @- and @+ */
8095 && !( PL_tokenbuf[2] == '\0' &&
8096 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8099 /* Downgraded from fatal to warning 20000522 mjd */
8100 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8101 "Possible unintended interpolation of %s in string",
8106 /* build ops for a bareword */
8107 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8109 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8110 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8111 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8112 ((PL_tokenbuf[0] == '$') ? SVt_PV
8113 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8119 * The following code was generated by perl_keyword.pl.
8123 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
8127 PERL_ARGS_ASSERT_KEYWORD;
8131 case 1: /* 5 tokens of length 1 */
8163 case 2: /* 18 tokens of length 2 */
8309 case 3: /* 29 tokens of length 3 */
8313 if (name[1] == 'N' &&
8376 if (name[1] == 'i' &&
8408 if (name[1] == 'o' &&
8417 if (name[1] == 'e' &&
8426 if (name[1] == 'n' &&
8435 if (name[1] == 'o' &&
8444 if (name[1] == 'a' &&
8453 if (name[1] == 'o' &&
8515 if (name[1] == 'e' &&
8529 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8555 if (name[1] == 'i' &&
8564 if (name[1] == 's' &&
8573 if (name[1] == 'e' &&
8582 if (name[1] == 'o' &&
8594 case 4: /* 41 tokens of length 4 */
8598 if (name[1] == 'O' &&
8608 if (name[1] == 'N' &&
8618 if (name[1] == 'i' &&
8628 if (name[1] == 'h' &&
8638 if (name[1] == 'u' &&
8651 if (name[2] == 'c' &&
8660 if (name[2] == 's' &&
8669 if (name[2] == 'a' &&
8705 if (name[1] == 'o' &&
8718 if (name[2] == 't' &&
8727 if (name[2] == 'o' &&
8736 if (name[2] == 't' &&
8745 if (name[2] == 'e' &&
8758 if (name[1] == 'o' &&
8771 if (name[2] == 'y' &&
8780 if (name[2] == 'l' &&
8796 if (name[2] == 's' &&
8805 if (name[2] == 'n' &&
8814 if (name[2] == 'c' &&
8827 if (name[1] == 'e' &&
8837 if (name[1] == 'p' &&
8850 if (name[2] == 'c' &&
8859 if (name[2] == 'p' &&
8868 if (name[2] == 's' &&
8884 if (name[2] == 'n' &&
8954 if (name[2] == 'r' &&
8963 if (name[2] == 'r' &&
8972 if (name[2] == 'a' &&
8988 if (name[2] == 'l' &&
9050 if (name[2] == 'e' &&
9053 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
9066 case 5: /* 39 tokens of length 5 */
9070 if (name[1] == 'E' &&
9081 if (name[1] == 'H' &&
9095 if (name[2] == 'a' &&
9105 if (name[2] == 'a' &&
9122 if (name[2] == 'e' &&
9132 if (name[2] == 'e' &&
9136 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
9152 if (name[3] == 'i' &&
9161 if (name[3] == 'o' &&
9197 if (name[2] == 'o' &&
9207 if (name[2] == 'y' &&
9221 if (name[1] == 'l' &&
9235 if (name[2] == 'n' &&
9245 if (name[2] == 'o' &&
9259 if (name[1] == 'i' &&
9264 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
9273 if (name[2] == 'd' &&
9283 if (name[2] == 'c' &&
9300 if (name[2] == 'c' &&
9310 if (name[2] == 't' &&
9324 if (name[1] == 'k' &&
9335 if (name[1] == 'r' &&
9349 if (name[2] == 's' &&
9359 if (name[2] == 'd' &&
9376 if (name[2] == 'm' &&
9386 if (name[2] == 'i' &&
9396 if (name[2] == 'e' &&
9406 if (name[2] == 'l' &&
9416 if (name[2] == 'a' &&
9429 if (name[3] == 't' &&
9432 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9438 if (name[3] == 'd' &&
9455 if (name[1] == 'i' &&
9469 if (name[2] == 'a' &&
9482 if (name[3] == 'e' &&
9517 if (name[2] == 'i' &&
9534 if (name[2] == 'i' &&
9544 if (name[2] == 'i' &&
9561 case 6: /* 33 tokens of length 6 */
9565 if (name[1] == 'c' &&
9580 if (name[2] == 'l' &&
9591 if (name[2] == 'r' &&
9606 if (name[1] == 'e' &&
9621 if (name[2] == 's' &&
9626 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9632 if (name[2] == 'i' &&
9650 if (name[2] == 'l' &&
9661 if (name[2] == 'r' &&
9676 if (name[1] == 'm' &&
9691 if (name[2] == 'n' &&
9702 if (name[2] == 's' &&
9717 if (name[1] == 's' &&
9723 if (name[4] == 't' &&
9732 if (name[4] == 'e' &&
9741 if (name[4] == 'c' &&
9750 if (name[4] == 'n' &&
9766 if (name[1] == 'r' &&
9784 if (name[3] == 'a' &&
9794 if (name[3] == 'u' &&
9808 if (name[2] == 'n' &&
9826 if (name[2] == 'a' &&
9840 if (name[3] == 'e' &&
9853 if (name[4] == 't' &&
9862 if (name[4] == 'e' &&
9884 if (name[4] == 't' &&
9893 if (name[4] == 'e' &&
9909 if (name[2] == 'c' &&
9920 if (name[2] == 'l' &&
9931 if (name[2] == 'b' &&
9942 if (name[2] == 's' &&
9965 if (name[4] == 's' &&
9974 if (name[4] == 'n' &&
9987 if (name[3] == 'a' &&
10004 if (name[1] == 'a' &&
10010 return -KEY_values;
10019 case 7: /* 29 tokens of length 7 */
10023 if (name[1] == 'E' &&
10030 return KEY_DESTROY;
10036 if (name[1] == '_' &&
10043 return KEY___END__;
10049 if (name[1] == 'i' &&
10056 return -KEY_binmode;
10062 if (name[1] == 'o' &&
10069 return -KEY_connect;
10078 if (name[2] == 'm' &&
10084 return -KEY_dbmopen;
10090 if (name[2] == 'f')
10095 if (name[4] == 'u' &&
10099 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
10105 if (name[4] == 'n' &&
10109 return KEY_defined;
10126 if (name[1] == 'o' &&
10133 return KEY_foreach;
10139 if (name[1] == 'e' &&
10146 if (name[5] == 'r' &&
10149 return -KEY_getpgrp;
10155 if (name[5] == 'i' &&
10158 return -KEY_getppid;
10171 if (name[1] == 'c' &&
10178 return -KEY_lcfirst;
10184 if (name[1] == 'p' &&
10191 return -KEY_opendir;
10197 if (name[1] == 'a' &&
10204 return KEY_package;
10210 if (name[1] == 'e')
10215 if (name[3] == 'd' &&
10220 return -KEY_readdir;
10226 if (name[3] == 'u' &&
10231 return KEY_require;
10237 if (name[3] == 'e' &&
10242 return -KEY_reverse;
10261 if (name[3] == 'k' &&
10266 return -KEY_seekdir;
10272 if (name[3] == 'p' &&
10277 return -KEY_setpgrp;
10287 if (name[2] == 'm' &&
10293 return -KEY_shmread;
10299 if (name[2] == 'r' &&
10305 return -KEY_sprintf;
10314 if (name[3] == 'l' &&
10319 return -KEY_symlink;
10328 if (name[4] == 'a' &&
10332 return -KEY_syscall;
10338 if (name[4] == 'p' &&
10342 return -KEY_sysopen;
10348 if (name[4] == 'e' &&
10352 return -KEY_sysread;
10358 if (name[4] == 'e' &&
10362 return -KEY_sysseek;
10380 if (name[1] == 'e' &&
10387 return -KEY_telldir;
10396 if (name[2] == 'f' &&
10402 return -KEY_ucfirst;
10408 if (name[2] == 's' &&
10414 return -KEY_unshift;
10424 if (name[1] == 'a' &&
10431 return -KEY_waitpid;
10440 case 8: /* 26 tokens of length 8 */
10444 if (name[1] == 'U' &&
10452 return KEY_AUTOLOAD;
10458 if (name[1] == '_')
10463 if (name[3] == 'A' &&
10469 return KEY___DATA__;
10475 if (name[3] == 'I' &&
10481 return -KEY___FILE__;
10487 if (name[3] == 'I' &&
10493 return -KEY___LINE__;
10509 if (name[2] == 'o' &&
10516 return -KEY_closedir;
10522 if (name[2] == 'n' &&
10529 return -KEY_continue;
10539 if (name[1] == 'b' &&
10547 return -KEY_dbmclose;
10553 if (name[1] == 'n' &&
10559 if (name[4] == 'r' &&
10564 return -KEY_endgrent;
10570 if (name[4] == 'w' &&
10575 return -KEY_endpwent;
10588 if (name[1] == 'o' &&
10596 return -KEY_formline;
10602 if (name[1] == 'e' &&
10608 if (name[4] == 'r')
10613 if (name[6] == 'n' &&
10616 return -KEY_getgrent;
10622 if (name[6] == 'i' &&
10625 return -KEY_getgrgid;
10631 if (name[6] == 'a' &&
10634 return -KEY_getgrnam;
10647 if (name[4] == 'o' &&
10652 return -KEY_getlogin;
10658 if (name[4] == 'w')
10663 if (name[6] == 'n' &&
10666 return -KEY_getpwent;
10672 if (name[6] == 'a' &&
10675 return -KEY_getpwnam;
10681 if (name[6] == 'i' &&
10684 return -KEY_getpwuid;
10704 if (name[1] == 'e' &&
10711 if (name[5] == 'i' &&
10718 return -KEY_readline;
10723 return -KEY_readlink;
10734 if (name[5] == 'i' &&
10738 return -KEY_readpipe;
10754 if (name[2] == 't')
10759 if (name[4] == 'r' &&
10764 return -KEY_setgrent;
10770 if (name[4] == 'w' &&
10775 return -KEY_setpwent;
10791 if (name[3] == 'w' &&
10797 return -KEY_shmwrite;
10803 if (name[3] == 't' &&
10809 return -KEY_shutdown;
10819 if (name[2] == 's' &&
10826 return -KEY_syswrite;
10836 if (name[1] == 'r' &&
10844 return -KEY_truncate;
10853 case 9: /* 9 tokens of length 9 */
10857 if (name[1] == 'N' &&
10866 return KEY_UNITCHECK;
10872 if (name[1] == 'n' &&
10881 return -KEY_endnetent;
10887 if (name[1] == 'e' &&
10896 return -KEY_getnetent;
10902 if (name[1] == 'o' &&
10911 return -KEY_localtime;
10917 if (name[1] == 'r' &&
10926 return KEY_prototype;
10932 if (name[1] == 'u' &&
10941 return -KEY_quotemeta;
10947 if (name[1] == 'e' &&
10956 return -KEY_rewinddir;
10962 if (name[1] == 'e' &&
10971 return -KEY_setnetent;
10977 if (name[1] == 'a' &&
10986 return -KEY_wantarray;
10995 case 10: /* 9 tokens of length 10 */
10999 if (name[1] == 'n' &&
11005 if (name[4] == 'o' &&
11012 return -KEY_endhostent;
11018 if (name[4] == 'e' &&
11025 return -KEY_endservent;
11038 if (name[1] == 'e' &&
11044 if (name[4] == 'o' &&
11051 return -KEY_gethostent;
11060 if (name[5] == 'r' &&
11066 return -KEY_getservent;
11072 if (name[5] == 'c' &&
11078 return -KEY_getsockopt;
11098 if (name[2] == 't')
11103 if (name[4] == 'o' &&
11110 return -KEY_sethostent;
11119 if (name[5] == 'r' &&
11125 return -KEY_setservent;
11131 if (name[5] == 'c' &&
11137 return -KEY_setsockopt;
11154 if (name[2] == 'c' &&
11163 return -KEY_socketpair;
11176 case 11: /* 8 tokens of length 11 */
11180 if (name[1] == '_' &&
11190 { /* __PACKAGE__ */
11191 return -KEY___PACKAGE__;
11197 if (name[1] == 'n' &&
11207 { /* endprotoent */
11208 return -KEY_endprotoent;
11214 if (name[1] == 'e' &&
11223 if (name[5] == 'e' &&
11229 { /* getpeername */
11230 return -KEY_getpeername;
11239 if (name[6] == 'o' &&
11244 { /* getpriority */
11245 return -KEY_getpriority;
11251 if (name[6] == 't' &&
11256 { /* getprotoent */
11257 return -KEY_getprotoent;
11271 if (name[4] == 'o' &&
11278 { /* getsockname */
11279 return -KEY_getsockname;
11292 if (name[1] == 'e' &&
11300 if (name[6] == 'o' &&
11305 { /* setpriority */
11306 return -KEY_setpriority;
11312 if (name[6] == 't' &&
11317 { /* setprotoent */
11318 return -KEY_setprotoent;
11334 case 12: /* 2 tokens of length 12 */
11335 if (name[0] == 'g' &&
11347 if (name[9] == 'd' &&
11350 { /* getnetbyaddr */
11351 return -KEY_getnetbyaddr;
11357 if (name[9] == 'a' &&
11360 { /* getnetbyname */
11361 return -KEY_getnetbyname;
11373 case 13: /* 4 tokens of length 13 */
11374 if (name[0] == 'g' &&
11381 if (name[4] == 'o' &&
11390 if (name[10] == 'd' &&
11393 { /* gethostbyaddr */
11394 return -KEY_gethostbyaddr;
11400 if (name[10] == 'a' &&
11403 { /* gethostbyname */
11404 return -KEY_gethostbyname;
11417 if (name[4] == 'e' &&
11426 if (name[10] == 'a' &&
11429 { /* getservbyname */
11430 return -KEY_getservbyname;
11436 if (name[10] == 'o' &&
11439 { /* getservbyport */
11440 return -KEY_getservbyport;
11459 case 14: /* 1 tokens of length 14 */
11460 if (name[0] == 'g' &&
11474 { /* getprotobyname */
11475 return -KEY_getprotobyname;
11480 case 16: /* 1 tokens of length 16 */
11481 if (name[0] == 'g' &&
11497 { /* getprotobynumber */
11498 return -KEY_getprotobynumber;
11512 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11516 PERL_ARGS_ASSERT_CHECKCOMMA;
11518 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
11519 if (ckWARN(WARN_SYNTAX)) {
11522 for (w = s+2; *w && level; w++) {
11525 else if (*w == ')')
11528 while (isSPACE(*w))
11530 /* the list of chars below is for end of statements or
11531 * block / parens, boolean operators (&&, ||, //) and branch
11532 * constructs (or, and, if, until, unless, while, err, for).
11533 * Not a very solid hack... */
11534 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11535 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11536 "%s (...) interpreted as function",name);
11539 while (s < PL_bufend && isSPACE(*s))
11543 while (s < PL_bufend && isSPACE(*s))
11545 if (isIDFIRST_lazy_if(s,UTF)) {
11546 const char * const w = s++;
11547 while (isALNUM_lazy_if(s,UTF))
11549 while (s < PL_bufend && isSPACE(*s))
11553 if (keyword(w, s - w, 0))
11556 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11557 if (gv && GvCVu(gv))
11559 Perl_croak(aTHX_ "No comma allowed after %s", what);
11564 /* Either returns sv, or mortalizes sv and returns a new SV*.
11565 Best used as sv=new_constant(..., sv, ...).
11566 If s, pv are NULL, calls subroutine with one argument,
11567 and type is used with error messages only. */
11570 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11571 SV *sv, SV *pv, const char *type, STRLEN typelen)
11574 HV * const table = GvHV(PL_hintgv); /* ^H */
11578 const char *why1 = "", *why2 = "", *why3 = "";
11580 PERL_ARGS_ASSERT_NEW_CONSTANT;
11582 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11585 why2 = (const char *)
11586 (strEQ(key,"charnames")
11587 ? "(possibly a missing \"use charnames ...\")"
11589 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11590 (type ? type: "undef"), why2);
11592 /* This is convoluted and evil ("goto considered harmful")
11593 * but I do not understand the intricacies of all the different
11594 * failure modes of %^H in here. The goal here is to make
11595 * the most probable error message user-friendly. --jhi */
11600 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11601 (type ? type: "undef"), why1, why2, why3);
11603 yyerror(SvPVX_const(msg));
11608 /* charnames doesn't work well if there have been errors found */
11609 if (PL_error_count > 0 && strEQ(key,"charnames"))
11610 return &PL_sv_undef;
11612 cvp = hv_fetch(table, key, keylen, FALSE);
11613 if (!cvp || !SvOK(*cvp)) {
11616 why3 = "} is not defined";
11619 sv_2mortal(sv); /* Parent created it permanently */
11622 pv = newSVpvn_flags(s, len, SVs_TEMP);
11624 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11626 typesv = &PL_sv_undef;
11628 PUSHSTACKi(PERLSI_OVERLOAD);
11640 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11644 /* Check the eval first */
11645 if (!PL_in_eval && SvTRUE(ERRSV)) {
11646 sv_catpvs(ERRSV, "Propagated");
11647 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11649 res = SvREFCNT_inc_simple(sv);
11653 SvREFCNT_inc_simple_void(res);
11662 why1 = "Call to &{$^H{";
11664 why3 = "}} did not return a defined value";
11672 /* Returns a NUL terminated string, with the length of the string written to
11676 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11679 register char *d = dest;
11680 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
11682 PERL_ARGS_ASSERT_SCAN_WORD;
11686 Perl_croak(aTHX_ ident_too_long);
11687 if (isALNUM(*s)) /* UTF handled below */
11689 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11694 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11698 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11699 char *t = s + UTF8SKIP(s);
11701 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11705 Perl_croak(aTHX_ ident_too_long);
11706 Copy(s, d, len, char);
11719 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11722 char *bracket = NULL;
11724 register char *d = dest;
11725 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
11727 PERL_ARGS_ASSERT_SCAN_IDENT;
11732 while (isDIGIT(*s)) {
11734 Perl_croak(aTHX_ ident_too_long);
11741 Perl_croak(aTHX_ ident_too_long);
11742 if (isALNUM(*s)) /* UTF handled below */
11744 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11749 else if (*s == ':' && s[1] == ':') {
11753 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11754 char *t = s + UTF8SKIP(s);
11755 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11757 if (d + (t - s) > e)
11758 Perl_croak(aTHX_ ident_too_long);
11759 Copy(s, d, t - s, char);
11770 if (PL_lex_state != LEX_NORMAL)
11771 PL_lex_state = LEX_INTERPENDMAYBE;
11774 if (*s == '$' && s[1] &&
11775 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11788 if (*d == '^' && *s && isCONTROLVAR(*s)) {
11793 if (isSPACE(s[-1])) {
11795 const char ch = *s++;
11796 if (!SPACE_OR_TAB(ch)) {
11802 if (isIDFIRST_lazy_if(d,UTF)) {
11806 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11807 end += UTF8SKIP(end);
11808 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11809 end += UTF8SKIP(end);
11811 Copy(s, d, end - s, char);
11816 while ((isALNUM(*s) || *s == ':') && d < e)
11819 Perl_croak(aTHX_ ident_too_long);
11822 while (s < send && SPACE_OR_TAB(*s))
11824 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11825 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11826 const char * const brack =
11828 ((*s == '[') ? "[...]" : "{...}");
11829 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11830 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11831 funny, dest, brack, funny, dest, brack);
11834 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11838 /* Handle extended ${^Foo} variables
11839 * 1999-02-27 mjd-perl-patch@plover.com */
11840 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11844 while (isALNUM(*s) && d < e) {
11848 Perl_croak(aTHX_ ident_too_long);
11853 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11854 PL_lex_state = LEX_INTERPEND;
11857 if (PL_lex_state == LEX_NORMAL) {
11858 if (ckWARN(WARN_AMBIGUOUS) &&
11859 (keyword(dest, d - dest, 0)
11860 || get_cvn_flags(dest, d - dest, 0)))
11864 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11865 "Ambiguous use of %c{%s} resolved to %c%s",
11866 funny, dest, funny, dest);
11871 s = bracket; /* let the parser handle it */
11875 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11876 PL_lex_state = LEX_INTERPEND;
11881 S_pmflag(U32 pmfl, const char ch) {
11883 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11884 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11885 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11886 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11887 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11888 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
11894 S_scan_pat(pTHX_ char *start, I32 type)
11898 char *s = scan_str(start,!!PL_madskills,FALSE);
11899 const char * const valid_flags =
11900 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11905 PERL_ARGS_ASSERT_SCAN_PAT;
11908 const char * const delimiter = skipspace(start);
11912 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11913 : "Search pattern not terminated" ));
11916 pm = (PMOP*)newPMOP(type, 0);
11917 if (PL_multi_open == '?') {
11918 /* This is the only point in the code that sets PMf_ONCE: */
11919 pm->op_pmflags |= PMf_ONCE;
11921 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11922 allows us to restrict the list needed by reset to just the ??
11924 assert(type != OP_TRANS);
11926 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11929 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11932 elements = mg->mg_len / sizeof(PMOP**);
11933 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11934 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11935 mg->mg_len = elements * sizeof(PMOP**);
11936 PmopSTASH_set(pm,PL_curstash);
11942 while (*s && strchr(valid_flags, *s))
11943 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11946 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11947 "Having no space between pattern and following word is deprecated");
11951 if (PL_madskills && modstart != s) {
11952 SV* tmptoken = newSVpvn(modstart, s - modstart);
11953 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11956 /* issue a warning if /c is specified,but /g is not */
11957 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11959 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11960 "Use of /c modifier is meaningless without /g" );
11963 PL_lex_op = (OP*)pm;
11964 pl_yylval.ival = OP_MATCH;
11969 S_scan_subst(pTHX_ char *start)
11980 PERL_ARGS_ASSERT_SCAN_SUBST;
11982 pl_yylval.ival = OP_NULL;
11984 s = scan_str(start,!!PL_madskills,FALSE);
11987 Perl_croak(aTHX_ "Substitution pattern not terminated");
11989 if (s[-1] == PL_multi_open)
11992 if (PL_madskills) {
11993 CURMAD('q', PL_thisopen);
11994 CURMAD('_', PL_thiswhite);
11995 CURMAD('E', PL_thisstuff);
11996 CURMAD('Q', PL_thisclose);
11997 PL_realtokenstart = s - SvPVX(PL_linestr);
12001 first_start = PL_multi_start;
12002 s = scan_str(s,!!PL_madskills,FALSE);
12004 if (PL_lex_stuff) {
12005 SvREFCNT_dec(PL_lex_stuff);
12006 PL_lex_stuff = NULL;
12008 Perl_croak(aTHX_ "Substitution replacement not terminated");
12010 PL_multi_start = first_start; /* so whole substitution is taken together */
12012 pm = (PMOP*)newPMOP(OP_SUBST, 0);
12015 if (PL_madskills) {
12016 CURMAD('z', PL_thisopen);
12017 CURMAD('R', PL_thisstuff);
12018 CURMAD('Z', PL_thisclose);
12024 if (*s == EXEC_PAT_MOD) {
12028 else if (strchr(S_PAT_MODS, *s))
12029 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
12032 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12033 "Having no space between pattern and following word is deprecated");
12041 if (PL_madskills) {
12043 curmad('m', newSVpvn(modstart, s - modstart));
12044 append_madprops(PL_thismad, (OP*)pm, 0);
12048 if ((pm->op_pmflags & PMf_CONTINUE)) {
12049 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
12053 SV * const repl = newSVpvs("");
12055 PL_sublex_info.super_bufptr = s;
12056 PL_sublex_info.super_bufend = PL_bufend;
12058 pm->op_pmflags |= PMf_EVAL;
12061 sv_catpvs(repl, "eval ");
12063 sv_catpvs(repl, "do ");
12065 sv_catpvs(repl, "{");
12066 sv_catsv(repl, PL_lex_repl);
12067 if (strchr(SvPVX(PL_lex_repl), '#'))
12068 sv_catpvs(repl, "\n");
12069 sv_catpvs(repl, "}");
12071 SvREFCNT_dec(PL_lex_repl);
12072 PL_lex_repl = repl;
12075 PL_lex_op = (OP*)pm;
12076 pl_yylval.ival = OP_SUBST;
12081 S_scan_trans(pTHX_ char *start)
12094 PERL_ARGS_ASSERT_SCAN_TRANS;
12096 pl_yylval.ival = OP_NULL;
12098 s = scan_str(start,!!PL_madskills,FALSE);
12100 Perl_croak(aTHX_ "Transliteration pattern not terminated");
12102 if (s[-1] == PL_multi_open)
12105 if (PL_madskills) {
12106 CURMAD('q', PL_thisopen);
12107 CURMAD('_', PL_thiswhite);
12108 CURMAD('E', PL_thisstuff);
12109 CURMAD('Q', PL_thisclose);
12110 PL_realtokenstart = s - SvPVX(PL_linestr);
12114 s = scan_str(s,!!PL_madskills,FALSE);
12116 if (PL_lex_stuff) {
12117 SvREFCNT_dec(PL_lex_stuff);
12118 PL_lex_stuff = NULL;
12120 Perl_croak(aTHX_ "Transliteration replacement not terminated");
12122 if (PL_madskills) {
12123 CURMAD('z', PL_thisopen);
12124 CURMAD('R', PL_thisstuff);
12125 CURMAD('Z', PL_thisclose);
12128 complement = del = squash = 0;
12135 complement = OPpTRANS_COMPLEMENT;
12138 del = OPpTRANS_DELETE;
12141 squash = OPpTRANS_SQUASH;
12150 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
12151 o = newPVOP(OP_TRANS, 0, (char*)tbl);
12152 o->op_private &= ~OPpTRANS_ALL;
12153 o->op_private |= del|squash|complement|
12154 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12155 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
12158 pl_yylval.ival = OP_TRANS;
12161 if (PL_madskills) {
12163 curmad('m', newSVpvn(modstart, s - modstart));
12164 append_madprops(PL_thismad, o, 0);
12173 S_scan_heredoc(pTHX_ register char *s)
12177 I32 op_type = OP_SCALAR;
12181 const char *found_newline;
12185 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
12187 I32 stuffstart = s - SvPVX(PL_linestr);
12190 PL_realtokenstart = -1;
12193 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12197 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
12201 while (SPACE_OR_TAB(*peek))
12203 if (*peek == '`' || *peek == '\'' || *peek =='"') {
12206 s = delimcpy(d, e, s, PL_bufend, term, &len);
12216 if (!isALNUM_lazy_if(s,UTF))
12217 deprecate("bare << to mean <<\"\"");
12218 for (; isALNUM_lazy_if(s,UTF); s++) {
12223 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
12224 Perl_croak(aTHX_ "Delimiter for here document is too long");
12227 len = d - PL_tokenbuf;
12230 if (PL_madskills) {
12231 tstart = PL_tokenbuf + !outer;
12232 PL_thisclose = newSVpvn(tstart, len - !outer);
12233 tstart = SvPVX(PL_linestr) + stuffstart;
12234 PL_thisopen = newSVpvn(tstart, s - tstart);
12235 stuffstart = s - SvPVX(PL_linestr);
12238 #ifndef PERL_STRICT_CR
12239 d = strchr(s, '\r');
12241 char * const olds = s;
12243 while (s < PL_bufend) {
12249 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12258 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12265 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
12266 herewas = newSVpvn(s,PL_bufend-s);
12270 herewas = newSVpvn(s-1,found_newline-s+1);
12273 herewas = newSVpvn(s,found_newline-s);
12277 if (PL_madskills) {
12278 tstart = SvPVX(PL_linestr) + stuffstart;
12280 sv_catpvn(PL_thisstuff, tstart, s - tstart);
12282 PL_thisstuff = newSVpvn(tstart, s - tstart);
12285 s += SvCUR(herewas);
12288 stuffstart = s - SvPVX(PL_linestr);
12294 tmpstr = newSV_type(SVt_PVIV);
12295 SvGROW(tmpstr, 80);
12296 if (term == '\'') {
12297 op_type = OP_CONST;
12298 SvIV_set(tmpstr, -1);
12300 else if (term == '`') {
12301 op_type = OP_BACKTICK;
12302 SvIV_set(tmpstr, '\\');
12306 PL_multi_start = CopLINE(PL_curcop);
12307 PL_multi_open = PL_multi_close = '<';
12308 term = *PL_tokenbuf;
12309 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
12310 char * const bufptr = PL_sublex_info.super_bufptr;
12311 char * const bufend = PL_sublex_info.super_bufend;
12312 char * const olds = s - SvCUR(herewas);
12313 s = strchr(bufptr, '\n');
12317 while (s < bufend &&
12318 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12320 CopLINE_inc(PL_curcop);
12323 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12324 missingterm(PL_tokenbuf);
12326 sv_setpvn(herewas,bufptr,d-bufptr+1);
12327 sv_setpvn(tmpstr,d+1,s-d);
12329 sv_catpvn(herewas,s,bufend-s);
12330 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
12337 while (s < PL_bufend &&
12338 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12340 CopLINE_inc(PL_curcop);
12342 if (s >= PL_bufend) {
12343 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12344 missingterm(PL_tokenbuf);
12346 sv_setpvn(tmpstr,d+1,s-d);
12348 if (PL_madskills) {
12350 sv_catpvn(PL_thisstuff, d + 1, s - d);
12352 PL_thisstuff = newSVpvn(d + 1, s - d);
12353 stuffstart = s - SvPVX(PL_linestr);
12357 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12359 sv_catpvn(herewas,s,PL_bufend-s);
12360 sv_setsv(PL_linestr,herewas);
12361 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12362 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12363 PL_last_lop = PL_last_uni = NULL;
12366 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
12367 while (s >= PL_bufend) { /* multiple line string? */
12369 if (PL_madskills) {
12370 tstart = SvPVX(PL_linestr) + stuffstart;
12372 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12374 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12378 CopLINE_inc(PL_curcop);
12379 if (!outer || !lex_next_chunk(0)) {
12380 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12381 missingterm(PL_tokenbuf);
12383 CopLINE_dec(PL_curcop);
12386 stuffstart = s - SvPVX(PL_linestr);
12388 CopLINE_inc(PL_curcop);
12389 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12390 PL_last_lop = PL_last_uni = NULL;
12391 #ifndef PERL_STRICT_CR
12392 if (PL_bufend - PL_linestart >= 2) {
12393 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12394 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12396 PL_bufend[-2] = '\n';
12398 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12400 else if (PL_bufend[-1] == '\r')
12401 PL_bufend[-1] = '\n';
12403 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12404 PL_bufend[-1] = '\n';
12406 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12407 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12408 *(SvPVX(PL_linestr) + off ) = ' ';
12409 sv_catsv(PL_linestr,herewas);
12410 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12411 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12415 sv_catsv(tmpstr,PL_linestr);
12420 PL_multi_end = CopLINE(PL_curcop);
12421 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12422 SvPV_shrink_to_cur(tmpstr);
12424 SvREFCNT_dec(herewas);
12426 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12428 else if (PL_encoding)
12429 sv_recode_to_utf8(tmpstr, PL_encoding);
12431 PL_lex_stuff = tmpstr;
12432 pl_yylval.ival = op_type;
12436 /* scan_inputsymbol
12437 takes: current position in input buffer
12438 returns: new position in input buffer
12439 side-effects: pl_yylval and lex_op are set.
12444 <FH> read from filehandle
12445 <pkg::FH> read from package qualified filehandle
12446 <pkg'FH> read from package qualified filehandle
12447 <$fh> read from filehandle in $fh
12448 <*.h> filename glob
12453 S_scan_inputsymbol(pTHX_ char *start)
12456 register char *s = start; /* current position in buffer */
12459 char *d = PL_tokenbuf; /* start of temp holding space */
12460 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12462 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12464 end = strchr(s, '\n');
12467 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
12469 /* die if we didn't have space for the contents of the <>,
12470 or if it didn't end, or if we see a newline
12473 if (len >= (I32)sizeof PL_tokenbuf)
12474 Perl_croak(aTHX_ "Excessively long <> operator");
12476 Perl_croak(aTHX_ "Unterminated <> operator");
12481 Remember, only scalar variables are interpreted as filehandles by
12482 this code. Anything more complex (e.g., <$fh{$num}>) will be
12483 treated as a glob() call.
12484 This code makes use of the fact that except for the $ at the front,
12485 a scalar variable and a filehandle look the same.
12487 if (*d == '$' && d[1]) d++;
12489 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12490 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12493 /* If we've tried to read what we allow filehandles to look like, and
12494 there's still text left, then it must be a glob() and not a getline.
12495 Use scan_str to pull out the stuff between the <> and treat it
12496 as nothing more than a string.
12499 if (d - PL_tokenbuf != len) {
12500 pl_yylval.ival = OP_GLOB;
12501 s = scan_str(start,!!PL_madskills,FALSE);
12503 Perl_croak(aTHX_ "Glob not terminated");
12507 bool readline_overriden = FALSE;
12510 /* we're in a filehandle read situation */
12513 /* turn <> into <ARGV> */
12515 Copy("ARGV",d,5,char);
12517 /* Check whether readline() is overriden */
12518 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12520 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12522 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12523 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12524 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12525 readline_overriden = TRUE;
12527 /* if <$fh>, create the ops to turn the variable into a
12531 /* try to find it in the pad for this block, otherwise find
12532 add symbol table ops
12534 const PADOFFSET tmp = pad_findmy(d, len, 0);
12535 if (tmp != NOT_IN_PAD) {
12536 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12537 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12538 HEK * const stashname = HvNAME_HEK(stash);
12539 SV * const sym = sv_2mortal(newSVhek(stashname));
12540 sv_catpvs(sym, "::");
12541 sv_catpv(sym, d+1);
12546 OP * const o = newOP(OP_PADSV, 0);
12548 PL_lex_op = readline_overriden
12549 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12550 append_elem(OP_LIST, o,
12551 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12552 : (OP*)newUNOP(OP_READLINE, 0, o);
12561 ? (GV_ADDMULTI | GV_ADDINEVAL)
12564 PL_lex_op = readline_overriden
12565 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12566 append_elem(OP_LIST,
12567 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12568 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12569 : (OP*)newUNOP(OP_READLINE, 0,
12570 newUNOP(OP_RV2SV, 0,
12571 newGVOP(OP_GV, 0, gv)));
12573 if (!readline_overriden)
12574 PL_lex_op->op_flags |= OPf_SPECIAL;
12575 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12576 pl_yylval.ival = OP_NULL;
12579 /* If it's none of the above, it must be a literal filehandle
12580 (<Foo::BAR> or <FOO>) so build a simple readline OP */
12582 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12583 PL_lex_op = readline_overriden
12584 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12585 append_elem(OP_LIST,
12586 newGVOP(OP_GV, 0, gv),
12587 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12588 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12589 pl_yylval.ival = OP_NULL;
12598 takes: start position in buffer
12599 keep_quoted preserve \ on the embedded delimiter(s)
12600 keep_delims preserve the delimiters around the string
12601 returns: position to continue reading from buffer
12602 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12603 updates the read buffer.
12605 This subroutine pulls a string out of the input. It is called for:
12606 q single quotes q(literal text)
12607 ' single quotes 'literal text'
12608 qq double quotes qq(interpolate $here please)
12609 " double quotes "interpolate $here please"
12610 qx backticks qx(/bin/ls -l)
12611 ` backticks `/bin/ls -l`
12612 qw quote words @EXPORT_OK = qw( func() $spam )
12613 m// regexp match m/this/
12614 s/// regexp substitute s/this/that/
12615 tr/// string transliterate tr/this/that/
12616 y/// string transliterate y/this/that/
12617 ($*@) sub prototypes sub foo ($)
12618 (stuff) sub attr parameters sub foo : attr(stuff)
12619 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12621 In most of these cases (all but <>, patterns and transliterate)
12622 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12623 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12624 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12627 It skips whitespace before the string starts, and treats the first
12628 character as the delimiter. If the delimiter is one of ([{< then
12629 the corresponding "close" character )]}> is used as the closing
12630 delimiter. It allows quoting of delimiters, and if the string has
12631 balanced delimiters ([{<>}]) it allows nesting.
12633 On success, the SV with the resulting string is put into lex_stuff or,
12634 if that is already non-NULL, into lex_repl. The second case occurs only
12635 when parsing the RHS of the special constructs s/// and tr/// (y///).
12636 For convenience, the terminating delimiter character is stuffed into
12641 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12644 SV *sv; /* scalar value: string */
12645 const char *tmps; /* temp string, used for delimiter matching */
12646 register char *s = start; /* current position in the buffer */
12647 register char term; /* terminating character */
12648 register char *to; /* current position in the sv's data */
12649 I32 brackets = 1; /* bracket nesting level */
12650 bool has_utf8 = FALSE; /* is there any utf8 content? */
12651 I32 termcode; /* terminating char. code */
12652 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
12653 STRLEN termlen; /* length of terminating string */
12654 int last_off = 0; /* last position for nesting bracket */
12660 PERL_ARGS_ASSERT_SCAN_STR;
12662 /* skip space before the delimiter */
12668 if (PL_realtokenstart >= 0) {
12669 stuffstart = PL_realtokenstart;
12670 PL_realtokenstart = -1;
12673 stuffstart = start - SvPVX(PL_linestr);
12675 /* mark where we are, in case we need to report errors */
12678 /* after skipping whitespace, the next character is the terminator */
12681 termcode = termstr[0] = term;
12685 termcode = utf8_to_uvchr((U8*)s, &termlen);
12686 Copy(s, termstr, termlen, U8);
12687 if (!UTF8_IS_INVARIANT(term))
12691 /* mark where we are */
12692 PL_multi_start = CopLINE(PL_curcop);
12693 PL_multi_open = term;
12695 /* find corresponding closing delimiter */
12696 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12697 termcode = termstr[0] = term = tmps[5];
12699 PL_multi_close = term;
12701 /* create a new SV to hold the contents. 79 is the SV's initial length.
12702 What a random number. */
12703 sv = newSV_type(SVt_PVIV);
12705 SvIV_set(sv, termcode);
12706 (void)SvPOK_only(sv); /* validate pointer */
12708 /* move past delimiter and try to read a complete string */
12710 sv_catpvn(sv, s, termlen);
12713 tstart = SvPVX(PL_linestr) + stuffstart;
12714 if (!PL_thisopen && !keep_delims) {
12715 PL_thisopen = newSVpvn(tstart, s - tstart);
12716 stuffstart = s - SvPVX(PL_linestr);
12720 if (PL_encoding && !UTF) {
12724 int offset = s - SvPVX_const(PL_linestr);
12725 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12726 &offset, (char*)termstr, termlen);
12727 const char * const ns = SvPVX_const(PL_linestr) + offset;
12728 char * const svlast = SvEND(sv) - 1;
12730 for (; s < ns; s++) {
12731 if (*s == '\n' && !PL_rsfp)
12732 CopLINE_inc(PL_curcop);
12735 goto read_more_line;
12737 /* handle quoted delimiters */
12738 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12740 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12742 if ((svlast-1 - t) % 2) {
12743 if (!keep_quoted) {
12744 *(svlast-1) = term;
12746 SvCUR_set(sv, SvCUR(sv) - 1);
12751 if (PL_multi_open == PL_multi_close) {
12757 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12758 /* At here, all closes are "was quoted" one,
12759 so we don't check PL_multi_close. */
12761 if (!keep_quoted && *(t+1) == PL_multi_open)
12766 else if (*t == PL_multi_open)
12774 SvCUR_set(sv, w - SvPVX_const(sv));
12776 last_off = w - SvPVX(sv);
12777 if (--brackets <= 0)
12782 if (!keep_delims) {
12783 SvCUR_set(sv, SvCUR(sv) - 1);
12789 /* extend sv if need be */
12790 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12791 /* set 'to' to the next character in the sv's string */
12792 to = SvPVX(sv)+SvCUR(sv);
12794 /* if open delimiter is the close delimiter read unbridle */
12795 if (PL_multi_open == PL_multi_close) {
12796 for (; s < PL_bufend; s++,to++) {
12797 /* embedded newlines increment the current line number */
12798 if (*s == '\n' && !PL_rsfp)
12799 CopLINE_inc(PL_curcop);
12800 /* handle quoted delimiters */
12801 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12802 if (!keep_quoted && s[1] == term)
12804 /* any other quotes are simply copied straight through */
12808 /* terminate when run out of buffer (the for() condition), or
12809 have found the terminator */
12810 else if (*s == term) {
12813 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12816 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12822 /* if the terminator isn't the same as the start character (e.g.,
12823 matched brackets), we have to allow more in the quoting, and
12824 be prepared for nested brackets.
12827 /* read until we run out of string, or we find the terminator */
12828 for (; s < PL_bufend; s++,to++) {
12829 /* embedded newlines increment the line count */
12830 if (*s == '\n' && !PL_rsfp)
12831 CopLINE_inc(PL_curcop);
12832 /* backslashes can escape the open or closing characters */
12833 if (*s == '\\' && s+1 < PL_bufend) {
12834 if (!keep_quoted &&
12835 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12840 /* allow nested opens and closes */
12841 else if (*s == PL_multi_close && --brackets <= 0)
12843 else if (*s == PL_multi_open)
12845 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12850 /* terminate the copied string and update the sv's end-of-string */
12852 SvCUR_set(sv, to - SvPVX_const(sv));
12855 * this next chunk reads more into the buffer if we're not done yet
12859 break; /* handle case where we are done yet :-) */
12861 #ifndef PERL_STRICT_CR
12862 if (to - SvPVX_const(sv) >= 2) {
12863 if ((to[-2] == '\r' && to[-1] == '\n') ||
12864 (to[-2] == '\n' && to[-1] == '\r'))
12868 SvCUR_set(sv, to - SvPVX_const(sv));
12870 else if (to[-1] == '\r')
12873 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12878 /* if we're out of file, or a read fails, bail and reset the current
12879 line marker so we can report where the unterminated string began
12882 if (PL_madskills) {
12883 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12885 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12887 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12890 CopLINE_inc(PL_curcop);
12891 PL_bufptr = PL_bufend;
12892 if (!lex_next_chunk(0)) {
12894 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12903 /* at this point, we have successfully read the delimited string */
12905 if (!PL_encoding || UTF) {
12907 if (PL_madskills) {
12908 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12909 const int len = s - tstart;
12911 sv_catpvn(PL_thisstuff, tstart, len);
12913 PL_thisstuff = newSVpvn(tstart, len);
12914 if (!PL_thisclose && !keep_delims)
12915 PL_thisclose = newSVpvn(s,termlen);
12920 sv_catpvn(sv, s, termlen);
12925 if (PL_madskills) {
12926 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12927 const int len = s - tstart - termlen;
12929 sv_catpvn(PL_thisstuff, tstart, len);
12931 PL_thisstuff = newSVpvn(tstart, len);
12932 if (!PL_thisclose && !keep_delims)
12933 PL_thisclose = newSVpvn(s - termlen,termlen);
12937 if (has_utf8 || PL_encoding)
12940 PL_multi_end = CopLINE(PL_curcop);
12942 /* if we allocated too much space, give some back */
12943 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12944 SvLEN_set(sv, SvCUR(sv) + 1);
12945 SvPV_renew(sv, SvLEN(sv));
12948 /* decide whether this is the first or second quoted string we've read
12961 takes: pointer to position in buffer
12962 returns: pointer to new position in buffer
12963 side-effects: builds ops for the constant in pl_yylval.op
12965 Read a number in any of the formats that Perl accepts:
12967 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12968 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
12971 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12973 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12976 If it reads a number without a decimal point or an exponent, it will
12977 try converting the number to an integer and see if it can do so
12978 without loss of precision.
12982 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12985 register const char *s = start; /* current position in buffer */
12986 register char *d; /* destination in temp buffer */
12987 register char *e; /* end of temp buffer */
12988 NV nv; /* number read, as a double */
12989 SV *sv = NULL; /* place to put the converted number */
12990 bool floatit; /* boolean: int or float? */
12991 const char *lastub = NULL; /* position of last underbar */
12992 static char const number_too_long[] = "Number too long";
12994 PERL_ARGS_ASSERT_SCAN_NUM;
12996 /* We use the first character to decide what type of number this is */
13000 Perl_croak(aTHX_ "panic: scan_num");
13002 /* if it starts with a 0, it could be an octal number, a decimal in
13003 0.13 disguise, or a hexadecimal number, or a binary number. */
13007 u holds the "number so far"
13008 shift the power of 2 of the base
13009 (hex == 4, octal == 3, binary == 1)
13010 overflowed was the number more than we can hold?
13012 Shift is used when we add a digit. It also serves as an "are
13013 we in octal/hex/binary?" indicator to disallow hex characters
13014 when in octal mode.
13019 bool overflowed = FALSE;
13020 bool just_zero = TRUE; /* just plain 0 or binary number? */
13021 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
13022 static const char* const bases[5] =
13023 { "", "binary", "", "octal", "hexadecimal" };
13024 static const char* const Bases[5] =
13025 { "", "Binary", "", "Octal", "Hexadecimal" };
13026 static const char* const maxima[5] =
13028 "0b11111111111111111111111111111111",
13032 const char *base, *Base, *max;
13034 /* check for hex */
13035 if (s[1] == 'x' || s[1] == 'X') {
13039 } else if (s[1] == 'b' || s[1] == 'B') {
13044 /* check for a decimal in disguise */
13045 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
13047 /* so it must be octal */
13054 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13055 "Misplaced _ in number");
13059 base = bases[shift];
13060 Base = Bases[shift];
13061 max = maxima[shift];
13063 /* read the rest of the number */
13065 /* x is used in the overflow test,
13066 b is the digit we're adding on. */
13071 /* if we don't mention it, we're done */
13075 /* _ are ignored -- but warned about if consecutive */
13077 if (lastub && s == lastub + 1)
13078 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13079 "Misplaced _ in number");
13083 /* 8 and 9 are not octal */
13084 case '8': case '9':
13086 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
13090 case '2': case '3': case '4':
13091 case '5': case '6': case '7':
13093 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
13096 case '0': case '1':
13097 b = *s++ & 15; /* ASCII digit -> value of digit */
13101 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13102 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
13103 /* make sure they said 0x */
13106 b = (*s++ & 7) + 9;
13108 /* Prepare to put the digit we have onto the end
13109 of the number so far. We check for overflows.
13115 x = u << shift; /* make room for the digit */
13117 if ((x >> shift) != u
13118 && !(PL_hints & HINT_NEW_BINARY)) {
13121 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13122 "Integer overflow in %s number",
13125 u = x | b; /* add the digit to the end */
13128 n *= nvshift[shift];
13129 /* If an NV has not enough bits in its
13130 * mantissa to represent an UV this summing of
13131 * small low-order numbers is a waste of time
13132 * (because the NV cannot preserve the
13133 * low-order bits anyway): we could just
13134 * remember when did we overflow and in the
13135 * end just multiply n by the right
13143 /* if we get here, we had success: make a scalar value from
13148 /* final misplaced underbar check */
13149 if (s[-1] == '_') {
13150 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13154 if (n > 4294967295.0)
13155 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13156 "%s number > %s non-portable",
13162 if (u > 0xffffffff)
13163 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13164 "%s number > %s non-portable",
13169 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
13170 sv = new_constant(start, s - start, "integer",
13171 sv, NULL, NULL, 0);
13172 else if (PL_hints & HINT_NEW_BINARY)
13173 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
13178 handle decimal numbers.
13179 we're also sent here when we read a 0 as the first digit
13181 case '1': case '2': case '3': case '4': case '5':
13182 case '6': case '7': case '8': case '9': case '.':
13185 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
13188 /* read next group of digits and _ and copy into d */
13189 while (isDIGIT(*s) || *s == '_') {
13190 /* skip underscores, checking for misplaced ones
13194 if (lastub && s == lastub + 1)
13195 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13196 "Misplaced _ in number");
13200 /* check for end of fixed-length buffer */
13202 Perl_croak(aTHX_ number_too_long);
13203 /* if we're ok, copy the character */
13208 /* final misplaced underbar check */
13209 if (lastub && s == lastub + 1) {
13210 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13213 /* read a decimal portion if there is one. avoid
13214 3..5 being interpreted as the number 3. followed
13217 if (*s == '.' && s[1] != '.') {
13222 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13223 "Misplaced _ in number");
13227 /* copy, ignoring underbars, until we run out of digits.
13229 for (; isDIGIT(*s) || *s == '_'; s++) {
13230 /* fixed length buffer check */
13232 Perl_croak(aTHX_ number_too_long);
13234 if (lastub && s == lastub + 1)
13235 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13236 "Misplaced _ in number");
13242 /* fractional part ending in underbar? */
13243 if (s[-1] == '_') {
13244 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13245 "Misplaced _ in number");
13247 if (*s == '.' && isDIGIT(s[1])) {
13248 /* oops, it's really a v-string, but without the "v" */
13254 /* read exponent part, if present */
13255 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
13259 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
13260 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
13262 /* stray preinitial _ */
13264 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13265 "Misplaced _ in number");
13269 /* allow positive or negative exponent */
13270 if (*s == '+' || *s == '-')
13273 /* stray initial _ */
13275 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13276 "Misplaced _ in number");
13280 /* read digits of exponent */
13281 while (isDIGIT(*s) || *s == '_') {
13284 Perl_croak(aTHX_ number_too_long);
13288 if (((lastub && s == lastub + 1) ||
13289 (!isDIGIT(s[1]) && s[1] != '_')))
13290 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13291 "Misplaced _ in number");
13299 We try to do an integer conversion first if no characters
13300 indicating "float" have been found.
13305 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
13307 if (flags == IS_NUMBER_IN_UV) {
13309 sv = newSViv(uv); /* Prefer IVs over UVs. */
13312 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13313 if (uv <= (UV) IV_MIN)
13314 sv = newSViv(-(IV)uv);
13321 /* terminate the string */
13323 nv = Atof(PL_tokenbuf);
13328 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13329 const char *const key = floatit ? "float" : "integer";
13330 const STRLEN keylen = floatit ? 5 : 7;
13331 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13332 key, keylen, sv, NULL, NULL, 0);
13336 /* if it starts with a v, it could be a v-string */
13339 sv = newSV(5); /* preallocate storage space */
13340 s = scan_vstring(s, PL_bufend, sv);
13344 /* make the op for the constant and return */
13347 lvalp->opval = newSVOP(OP_CONST, 0, sv);
13349 lvalp->opval = NULL;
13355 S_scan_formline(pTHX_ register char *s)
13358 register char *eol;
13360 SV * const stuff = newSVpvs("");
13361 bool needargs = FALSE;
13362 bool eofmt = FALSE;
13364 char *tokenstart = s;
13365 SV* savewhite = NULL;
13367 if (PL_madskills) {
13368 savewhite = PL_thiswhite;
13373 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13375 while (!needargs) {
13378 #ifdef PERL_STRICT_CR
13379 while (SPACE_OR_TAB(*t))
13382 while (SPACE_OR_TAB(*t) || *t == '\r')
13385 if (*t == '\n' || t == PL_bufend) {
13390 if (PL_in_eval && !PL_rsfp) {
13391 eol = (char *) memchr(s,'\n',PL_bufend-s);
13396 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13398 for (t = s; t < eol; t++) {
13399 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13401 goto enough; /* ~~ must be first line in formline */
13403 if (*t == '@' || *t == '^')
13407 sv_catpvn(stuff, s, eol-s);
13408 #ifndef PERL_STRICT_CR
13409 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13410 char *end = SvPVX(stuff) + SvCUR(stuff);
13413 SvCUR_set(stuff, SvCUR(stuff) - 1);
13424 if (PL_madskills) {
13426 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13428 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13431 PL_bufptr = PL_bufend;
13432 CopLINE_inc(PL_curcop);
13433 got_some = lex_next_chunk(0);
13434 CopLINE_dec(PL_curcop);
13437 tokenstart = PL_bufptr;
13445 if (SvCUR(stuff)) {
13448 PL_lex_state = LEX_NORMAL;
13449 start_force(PL_curforce);
13450 NEXTVAL_NEXTTOKE.ival = 0;
13454 PL_lex_state = LEX_FORMLINE;
13456 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13458 else if (PL_encoding)
13459 sv_recode_to_utf8(stuff, PL_encoding);
13461 start_force(PL_curforce);
13462 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13464 start_force(PL_curforce);
13465 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13469 SvREFCNT_dec(stuff);
13471 PL_lex_formbrack = 0;
13475 if (PL_madskills) {
13477 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13479 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13480 PL_thiswhite = savewhite;
13487 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13490 const I32 oldsavestack_ix = PL_savestack_ix;
13491 CV* const outsidecv = PL_compcv;
13494 assert(SvTYPE(PL_compcv) == SVt_PVCV);
13496 SAVEI32(PL_subline);
13497 save_item(PL_subname);
13498 SAVESPTR(PL_compcv);
13500 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13501 CvFLAGS(PL_compcv) |= flags;
13503 PL_subline = CopLINE(PL_curcop);
13504 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13505 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13506 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13508 return oldsavestack_ix;
13512 #pragma segment Perl_yylex
13515 S_yywarn(pTHX_ const char *const s)
13519 PERL_ARGS_ASSERT_YYWARN;
13521 PL_in_eval |= EVAL_WARNONLY;
13523 PL_in_eval &= ~EVAL_WARNONLY;
13528 Perl_yyerror(pTHX_ const char *const s)
13531 const char *where = NULL;
13532 const char *context = NULL;
13535 int yychar = PL_parser->yychar;
13537 PERL_ARGS_ASSERT_YYERROR;
13539 if (!yychar || (yychar == ';' && !PL_rsfp))
13541 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13542 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13543 PL_oldbufptr != PL_bufptr) {
13546 The code below is removed for NetWare because it abends/crashes on NetWare
13547 when the script has error such as not having the closing quotes like:
13548 if ($var eq "value)
13549 Checking of white spaces is anyway done in NetWare code.
13552 while (isSPACE(*PL_oldoldbufptr))
13555 context = PL_oldoldbufptr;
13556 contlen = PL_bufptr - PL_oldoldbufptr;
13558 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13559 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13562 The code below is removed for NetWare because it abends/crashes on NetWare
13563 when the script has error such as not having the closing quotes like:
13564 if ($var eq "value)
13565 Checking of white spaces is anyway done in NetWare code.
13568 while (isSPACE(*PL_oldbufptr))
13571 context = PL_oldbufptr;
13572 contlen = PL_bufptr - PL_oldbufptr;
13574 else if (yychar > 255)
13575 where = "next token ???";
13576 else if (yychar == -2) { /* YYEMPTY */
13577 if (PL_lex_state == LEX_NORMAL ||
13578 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13579 where = "at end of line";
13580 else if (PL_lex_inpat)
13581 where = "within pattern";
13583 where = "within string";
13586 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13588 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13589 else if (isPRINT_LC(yychar)) {
13590 const char string = yychar;
13591 sv_catpvn(where_sv, &string, 1);
13594 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13595 where = SvPVX_const(where_sv);
13597 msg = sv_2mortal(newSVpv(s, 0));
13598 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13599 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13601 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13603 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13604 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13605 Perl_sv_catpvf(aTHX_ msg,
13606 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13607 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13610 if (PL_in_eval & EVAL_WARNONLY) {
13611 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13615 if (PL_error_count >= 10) {
13616 if (PL_in_eval && SvCUR(ERRSV))
13617 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13618 SVfARG(ERRSV), OutCopFILE(PL_curcop));
13620 Perl_croak(aTHX_ "%s has too many errors.\n",
13621 OutCopFILE(PL_curcop));
13624 PL_in_my_stash = NULL;
13628 #pragma segment Main
13632 S_swallow_bom(pTHX_ U8 *s)
13635 const STRLEN slen = SvCUR(PL_linestr);
13637 PERL_ARGS_ASSERT_SWALLOW_BOM;
13641 if (s[1] == 0xFE) {
13642 /* UTF-16 little-endian? (or UTF-32LE?) */
13643 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
13644 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13645 #ifndef PERL_NO_UTF16_FILTER
13646 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13648 if (PL_bufend > (char*)s) {
13649 s = add_utf16_textfilter(s, TRUE);
13652 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13657 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
13658 #ifndef PERL_NO_UTF16_FILTER
13659 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13661 if (PL_bufend > (char *)s) {
13662 s = add_utf16_textfilter(s, FALSE);
13665 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13670 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13671 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13672 s += 3; /* UTF-8 */
13678 if (s[2] == 0xFE && s[3] == 0xFF) {
13679 /* UTF-32 big-endian */
13680 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13683 else if (s[2] == 0 && s[3] != 0) {
13686 * are a good indicator of UTF-16BE. */
13687 #ifndef PERL_NO_UTF16_FILTER
13688 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13689 s = add_utf16_textfilter(s, FALSE);
13691 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13697 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13698 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13699 s += 4; /* UTF-8 */
13705 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13708 * are a good indicator of UTF-16LE. */
13709 #ifndef PERL_NO_UTF16_FILTER
13710 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13711 s = add_utf16_textfilter(s, TRUE);
13713 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13721 #ifndef PERL_NO_UTF16_FILTER
13723 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13726 SV *const filter = FILTER_DATA(idx);
13727 /* We re-use this each time round, throwing the contents away before we
13729 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13730 SV *const utf8_buffer = filter;
13731 IV status = IoPAGE(filter);
13732 const bool reverse = cBOOL(IoLINES(filter));
13735 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13737 /* As we're automatically added, at the lowest level, and hence only called
13738 from this file, we can be sure that we're not called in block mode. Hence
13739 don't bother writing code to deal with block mode. */
13741 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13744 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13746 DEBUG_P(PerlIO_printf(Perl_debug_log,
13747 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13748 FPTR2DPTR(void *, S_utf16_textfilter),
13749 reverse ? 'l' : 'b', idx, maxlen, status,
13750 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13757 /* First, look in our buffer of existing UTF-8 data: */
13758 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13762 } else if (status == 0) {
13764 IoPAGE(filter) = 0;
13765 nl = SvEND(utf8_buffer);
13768 STRLEN got = nl - SvPVX(utf8_buffer);
13769 /* Did we have anything to append? */
13771 sv_catpvn(sv, SvPVX(utf8_buffer), got);
13772 /* Everything else in this code works just fine if SVp_POK isn't
13773 set. This, however, needs it, and we need it to work, else
13774 we loop infinitely because the buffer is never consumed. */
13775 sv_chop(utf8_buffer, nl);
13779 /* OK, not a complete line there, so need to read some more UTF-16.
13780 Read an extra octect if the buffer currently has an odd number. */
13784 if (SvCUR(utf16_buffer) >= 2) {
13785 /* Location of the high octet of the last complete code point.
13786 Gosh, UTF-16 is a pain. All the benefits of variable length,
13787 *coupled* with all the benefits of partial reads and
13789 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13790 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13792 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13796 /* We have the first half of a surrogate. Read more. */
13797 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13800 status = FILTER_READ(idx + 1, utf16_buffer,
13801 160 + (SvCUR(utf16_buffer) & 1));
13802 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13803 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13806 IoPAGE(filter) = status;
13811 chars = SvCUR(utf16_buffer) >> 1;
13812 have = SvCUR(utf8_buffer);
13813 SvGROW(utf8_buffer, have + chars * 3 + 1);
13816 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13817 (U8*)SvPVX_const(utf8_buffer) + have,
13818 chars * 2, &newlen);
13820 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13821 (U8*)SvPVX_const(utf8_buffer) + have,
13822 chars * 2, &newlen);
13824 SvCUR_set(utf8_buffer, have + newlen);
13827 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13828 it's private to us, and utf16_to_utf8{,reversed} take a
13829 (pointer,length) pair, rather than a NUL-terminated string. */
13830 if(SvCUR(utf16_buffer) & 1) {
13831 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13832 SvCUR_set(utf16_buffer, 1);
13834 SvCUR_set(utf16_buffer, 0);
13837 DEBUG_P(PerlIO_printf(Perl_debug_log,
13838 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13840 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13841 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13846 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13848 SV *filter = filter_add(S_utf16_textfilter, NULL);
13850 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13852 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13853 sv_setpvs(filter, "");
13854 IoLINES(filter) = reversed;
13855 IoPAGE(filter) = 1; /* Not EOF */
13857 /* Sadly, we have to return a valid pointer, come what may, so we have to
13858 ignore any error return from this. */
13859 SvCUR_set(PL_linestr, 0);
13860 if (FILTER_READ(0, PL_linestr, 0)) {
13861 SvUTF8_on(PL_linestr);
13863 SvUTF8_on(PL_linestr);
13865 PL_bufend = SvEND(PL_linestr);
13866 return (U8*)SvPVX(PL_linestr);
13871 Returns a pointer to the next character after the parsed
13872 vstring, as well as updating the passed in sv.
13874 Function must be called like
13877 s = scan_vstring(s,e,sv);
13879 where s and e are the start and end of the string.
13880 The sv should already be large enough to store the vstring
13881 passed in, for performance reasons.
13886 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13889 const char *pos = s;
13890 const char *start = s;
13892 PERL_ARGS_ASSERT_SCAN_VSTRING;
13894 if (*pos == 'v') pos++; /* get past 'v' */
13895 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13897 if ( *pos != '.') {
13898 /* this may not be a v-string if followed by => */
13899 const char *next = pos;
13900 while (next < e && isSPACE(*next))
13902 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13903 /* return string not v-string */
13904 sv_setpvn(sv,(char *)s,pos-s);
13905 return (char *)pos;
13909 if (!isALPHA(*pos)) {
13910 U8 tmpbuf[UTF8_MAXBYTES+1];
13913 s++; /* get past 'v' */
13918 /* this is atoi() that tolerates underscores */
13921 const char *end = pos;
13923 while (--end >= s) {
13925 const UV orev = rev;
13926 rev += (*end - '0') * mult;
13929 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13930 "Integer overflow in decimal number");
13934 if (rev > 0x7FFFFFFF)
13935 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13937 /* Append native character for the rev point */
13938 tmpend = uvchr_to_utf8(tmpbuf, rev);
13939 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13940 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13942 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13948 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13952 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13959 Perl_keyword_plugin_standard(pTHX_
13960 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13962 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13963 PERL_UNUSED_CONTEXT;
13964 PERL_UNUSED_ARG(keyword_ptr);
13965 PERL_UNUSED_ARG(keyword_len);
13966 PERL_UNUSED_ARG(op_ptr);
13967 return KEYWORD_PLUGIN_DECLINE;
13971 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
13973 Parse a single complete Perl statement. This may be a normal imperative
13974 statement, including optional label, or a declaration that has
13975 compile-time effect. It is up to the caller to ensure that the dynamic
13976 parser state (L</PL_parser> et al) is correctly set to reflect the source
13977 of the code to be parsed and the lexical context for the statement.
13979 The op tree representing the statement is returned. This may be a
13980 null pointer if the statement is null, for example if it was actually
13981 a subroutine definition (which has compile-time side effects). If not
13982 null, it will be the result of a L</newSTATEOP> call, normally including
13983 a C<nextstate> or equivalent op.
13985 If an error occurs in parsing or compilation, in most cases a valid op
13986 tree (most likely null) is returned anyway. The error is reflected in
13987 the parser state, normally resulting in a single exception at the top
13988 level of parsing which covers all the compilation errors that occurred.
13989 Some compilation errors, however, will throw an exception immediately.
13991 The I<flags> parameter is reserved for future use, and must always
13998 Perl_parse_fullstmt(pTHX_ U32 flags)
14002 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
14004 SAVEVPTR(PL_eval_root);
14005 PL_eval_root = NULL;
14006 if(yyparse(GRAMFULLSTMT) && !PL_parser->error_count)
14007 qerror(Perl_mess(aTHX_ "Parse error"));
14008 fullstmtop = PL_eval_root;
14014 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
14016 Parse a sequence of zero or more Perl statements. These may be normal
14017 imperative statements, including optional labels, or declarations
14018 that have compile-time effect, or any mixture thereof. The statement
14019 sequence ends when a closing brace or end-of-file is encountered in a
14020 place where a new statement could have validly started. It is up to
14021 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
14022 is correctly set to reflect the source of the code to be parsed and the
14023 lexical context for the statements.
14025 The op tree representing the statement sequence is returned. This may
14026 be a null pointer if the statements were all null, for example if there
14027 were no statements or if there were only subroutine definitions (which
14028 have compile-time side effects). If not null, it will be a C<lineseq>
14029 list, normally including C<nextstate> or equivalent ops.
14031 If an error occurs in parsing or compilation, in most cases a valid op
14032 tree is returned anyway. The error is reflected in the parser state,
14033 normally resulting in a single exception at the top level of parsing
14034 which covers all the compilation errors that occurred. Some compilation
14035 errors, however, will throw an exception immediately.
14037 The I<flags> parameter is reserved for future use, and must always
14044 Perl_parse_stmtseq(pTHX_ U32 flags)
14048 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
14050 SAVEVPTR(PL_eval_root);
14051 PL_eval_root = NULL;
14052 if(yyparse(GRAMSTMTSEQ) && !PL_parser->error_count)
14053 qerror(Perl_mess(aTHX_ "Parse error"));
14054 stmtseqop = PL_eval_root;
14060 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
14062 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
14063 deprecate("qw(...) as parentheses");
14065 if (qwlist->op_type == OP_STUB) {
14069 start_force(PL_curforce);
14070 NEXTVAL_NEXTTOKE.opval = qwlist;
14078 * c-indentation-style: bsd
14079 * c-basic-offset: 4
14080 * indent-tabs-mode: t
14083 * ex: set ts=8 sts=4 sw=4 noet: