3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_brackstack (PL_parser->lex_brackstack)
52 #define PL_lex_casemods (PL_parser->lex_casemods)
53 #define PL_lex_casestack (PL_parser->lex_casestack)
54 #define PL_lex_defer (PL_parser->lex_defer)
55 #define PL_lex_dojoin (PL_parser->lex_dojoin)
56 #define PL_lex_expect (PL_parser->lex_expect)
57 #define PL_lex_formbrack (PL_parser->lex_formbrack)
58 #define PL_lex_inpat (PL_parser->lex_inpat)
59 #define PL_lex_inwhat (PL_parser->lex_inwhat)
60 #define PL_lex_op (PL_parser->lex_op)
61 #define PL_lex_repl (PL_parser->lex_repl)
62 #define PL_lex_starts (PL_parser->lex_starts)
63 #define PL_lex_stuff (PL_parser->lex_stuff)
64 #define PL_multi_start (PL_parser->multi_start)
65 #define PL_multi_open (PL_parser->multi_open)
66 #define PL_multi_close (PL_parser->multi_close)
67 #define PL_pending_ident (PL_parser->pending_ident)
68 #define PL_preambled (PL_parser->preambled)
69 #define PL_sublex_info (PL_parser->sublex_info)
70 #define PL_linestr (PL_parser->linestr)
71 #define PL_expect (PL_parser->expect)
72 #define PL_copline (PL_parser->copline)
73 #define PL_bufptr (PL_parser->bufptr)
74 #define PL_oldbufptr (PL_parser->oldbufptr)
75 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
76 #define PL_linestart (PL_parser->linestart)
77 #define PL_bufend (PL_parser->bufend)
78 #define PL_last_uni (PL_parser->last_uni)
79 #define PL_last_lop (PL_parser->last_lop)
80 #define PL_last_lop_op (PL_parser->last_lop_op)
81 #define PL_lex_state (PL_parser->lex_state)
82 #define PL_rsfp (PL_parser->rsfp)
83 #define PL_rsfp_filters (PL_parser->rsfp_filters)
84 #define PL_in_my (PL_parser->in_my)
85 #define PL_in_my_stash (PL_parser->in_my_stash)
86 #define PL_tokenbuf (PL_parser->tokenbuf)
87 #define PL_multi_end (PL_parser->multi_end)
88 #define PL_error_count (PL_parser->error_count)
91 # define PL_endwhite (PL_parser->endwhite)
92 # define PL_faketokens (PL_parser->faketokens)
93 # define PL_lasttoke (PL_parser->lasttoke)
94 # define PL_nextwhite (PL_parser->nextwhite)
95 # define PL_realtokenstart (PL_parser->realtokenstart)
96 # define PL_skipwhite (PL_parser->skipwhite)
97 # define PL_thisclose (PL_parser->thisclose)
98 # define PL_thismad (PL_parser->thismad)
99 # define PL_thisopen (PL_parser->thisopen)
100 # define PL_thisstuff (PL_parser->thisstuff)
101 # define PL_thistoken (PL_parser->thistoken)
102 # define PL_thiswhite (PL_parser->thiswhite)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_nexttoke (PL_parser->nexttoke)
105 # define PL_curforce (PL_parser->curforce)
107 # define PL_nexttoke (PL_parser->nexttoke)
108 # define PL_nexttype (PL_parser->nexttype)
109 # define PL_nextval (PL_parser->nextval)
112 /* This can't be done with embed.fnc, because struct yy_parser contains a
113 member named pending_ident, which clashes with the generated #define */
115 S_pending_ident(pTHX);
117 static const char ident_too_long[] = "Identifier too long";
120 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
121 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
123 # define CURMAD(slot,sv)
124 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
127 #define XFAKEBRACK 128
128 #define XENUMMASK 127
130 #ifdef USE_UTF8_SCRIPTS
131 # define UTF (!IN_BYTES)
133 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
136 /* The maximum number of characters preceding the unrecognized one to display */
137 #define UNRECOGNIZED_PRECEDE_COUNT 10
139 /* In variables named $^X, these are the legal values for X.
140 * 1999-02-27 mjd-perl-patch@plover.com */
141 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
143 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
145 /* LEX_* are values for PL_lex_state, the state of the lexer.
146 * They are arranged oddly so that the guard on the switch statement
147 * can get by with a single comparison (if the compiler is smart enough).
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
189 #include "keywords.h"
191 /* CLINE is a macro that ensures PL_copline has a sane value */
196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
199 # define SKIPSPACE0(s) skipspace0(s)
200 # define SKIPSPACE1(s) skipspace1(s)
201 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202 # define PEEKSPACE(s) skipspace2(s,0)
204 # define SKIPSPACE0(s) skipspace(s)
205 # define SKIPSPACE1(s) skipspace(s)
206 # define SKIPSPACE2(s,tsv) skipspace(s)
207 # define PEEKSPACE(s) skipspace(s)
211 * Convenience functions to return different tokens and prime the
212 * lexer for the next token. They all take an argument.
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
224 * FUN1 : not used, except for not, which isn't a UNIOP
225 * BOop : bitwise or or xor
227 * SHop : shift operator
228 * PWop : power operator
229 * PMop : pattern-matching operator
230 * Aop : addition-level operator
231 * Mop : multiplication-level operator
232 * Eop : equality-testing operator
233 * Rop : relational operator <= != gt
235 * Also see LOP and lop() below.
238 #ifdef DEBUGGING /* Serve -DT. */
239 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
241 # define REPORT(retval) (retval)
244 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
245 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
246 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
247 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
248 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
249 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
250 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
251 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
252 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
253 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
254 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
255 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
256 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
257 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
258 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
259 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
260 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
261 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
262 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
263 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
265 /* This bit of chicanery makes a unary function followed by
266 * a parenthesis into a function with one argument, highest precedence.
267 * The UNIDOR macro is for unary functions that can be followed by the //
268 * operator (such as C<shift // 0>).
270 #define UNI2(f,x) { \
271 pl_yylval.ival = f; \
274 PL_last_uni = PL_oldbufptr; \
275 PL_last_lop_op = f; \
277 return REPORT( (int)FUNC1 ); \
279 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
281 #define UNI(f) UNI2(f,XTERM)
282 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
284 #define UNIBRACK(f) { \
285 pl_yylval.ival = f; \
287 PL_last_uni = PL_oldbufptr; \
289 return REPORT( (int)FUNC1 ); \
291 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
294 /* grandfather return to old style */
295 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
299 /* how to interpret the pl_yylval associated with the token */
303 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
309 static struct debug_tokens {
311 enum token_type type;
313 } const debug_tokens[] =
315 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
316 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
317 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
318 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
319 { ARROW, TOKENTYPE_NONE, "ARROW" },
320 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
321 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
322 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
323 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
324 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
325 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
326 { DO, TOKENTYPE_NONE, "DO" },
327 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
328 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
329 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
330 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
331 { ELSE, TOKENTYPE_NONE, "ELSE" },
332 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
333 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
334 { FOR, TOKENTYPE_IVAL, "FOR" },
335 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
336 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
337 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
338 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
339 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
340 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
341 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
342 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
343 { IF, TOKENTYPE_IVAL, "IF" },
344 { LABEL, TOKENTYPE_PVAL, "LABEL" },
345 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
346 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
347 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
348 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
349 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
350 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
351 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
352 { MY, TOKENTYPE_IVAL, "MY" },
353 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
354 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
355 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
356 { OROP, TOKENTYPE_IVAL, "OROP" },
357 { OROR, TOKENTYPE_NONE, "OROR" },
358 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
359 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
360 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
361 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
362 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
363 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
364 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
365 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
366 { PREINC, TOKENTYPE_NONE, "PREINC" },
367 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
368 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
369 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
370 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
371 { SUB, TOKENTYPE_NONE, "SUB" },
372 { THING, TOKENTYPE_OPVAL, "THING" },
373 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
374 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
375 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
376 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
377 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
378 { USE, TOKENTYPE_IVAL, "USE" },
379 { WHEN, TOKENTYPE_IVAL, "WHEN" },
380 { WHILE, TOKENTYPE_IVAL, "WHILE" },
381 { WORD, TOKENTYPE_OPVAL, "WORD" },
382 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
383 { 0, TOKENTYPE_NONE, NULL }
386 /* dump the returned token in rv, plus any optional arg in pl_yylval */
389 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
393 PERL_ARGS_ASSERT_TOKEREPORT;
396 const char *name = NULL;
397 enum token_type type = TOKENTYPE_NONE;
398 const struct debug_tokens *p;
399 SV* const report = newSVpvs("<== ");
401 for (p = debug_tokens; p->token; p++) {
402 if (p->token == (int)rv) {
409 Perl_sv_catpv(aTHX_ report, name);
410 else if ((char)rv > ' ' && (char)rv < '~')
411 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
413 sv_catpvs(report, "EOF");
415 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
418 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
421 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
423 case TOKENTYPE_OPNUM:
424 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
425 PL_op_name[lvalp->ival]);
428 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
430 case TOKENTYPE_OPVAL:
432 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
433 PL_op_name[lvalp->opval->op_type]);
434 if (lvalp->opval->op_type == OP_CONST) {
435 Perl_sv_catpvf(aTHX_ report, " %s",
436 SvPEEK(cSVOPx_sv(lvalp->opval)));
441 sv_catpvs(report, "(opval=null)");
444 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
450 /* print the buffer with suitable escapes */
453 S_printbuf(pTHX_ const char *const fmt, const char *const s)
455 SV* const tmp = newSVpvs("");
457 PERL_ARGS_ASSERT_PRINTBUF;
459 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
466 S_deprecate_commaless_var_list(pTHX) {
468 deprecate("comma-less variable list");
469 return REPORT(','); /* grandfather non-comma-format format */
475 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
476 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
480 S_ao(pTHX_ int toketype)
483 if (*PL_bufptr == '=') {
485 if (toketype == ANDAND)
486 pl_yylval.ival = OP_ANDASSIGN;
487 else if (toketype == OROR)
488 pl_yylval.ival = OP_ORASSIGN;
489 else if (toketype == DORDOR)
490 pl_yylval.ival = OP_DORASSIGN;
498 * When Perl expects an operator and finds something else, no_op
499 * prints the warning. It always prints "<something> found where
500 * operator expected. It prints "Missing semicolon on previous line?"
501 * if the surprise occurs at the start of the line. "do you need to
502 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
503 * where the compiler doesn't know if foo is a method call or a function.
504 * It prints "Missing operator before end of line" if there's nothing
505 * after the missing operator, or "... before <...>" if there is something
506 * after the missing operator.
510 S_no_op(pTHX_ const char *const what, char *s)
513 char * const oldbp = PL_bufptr;
514 const bool is_first = (PL_oldbufptr == PL_linestart);
516 PERL_ARGS_ASSERT_NO_OP;
522 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
523 if (ckWARN_d(WARN_SYNTAX)) {
525 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
526 "\t(Missing semicolon on previous line?)\n");
527 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
529 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
531 if (t < PL_bufptr && isSPACE(*t))
532 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
533 "\t(Do you need to predeclare %.*s?)\n",
534 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
538 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
539 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
547 * Complain about missing quote/regexp/heredoc terminator.
548 * If it's called with NULL then it cauterizes the line buffer.
549 * If we're in a delimited string and the delimiter is a control
550 * character, it's reformatted into a two-char sequence like ^C.
555 S_missingterm(pTHX_ char *s)
561 char * const nl = strrchr(s,'\n');
565 else if (isCNTRL(PL_multi_close)) {
567 tmpbuf[1] = (char)toCTRL(PL_multi_close);
572 *tmpbuf = (char)PL_multi_close;
576 q = strchr(s,'"') ? '\'' : '"';
577 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
580 #define FEATURE_IS_ENABLED(name) \
581 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
582 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
583 /* The longest string we pass in. */
584 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
587 * S_feature_is_enabled
588 * Check whether the named feature is enabled.
591 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
594 HV * const hinthv = GvHV(PL_hintgv);
595 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
597 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
599 assert(namelen <= MAX_FEATURE_LEN);
600 memcpy(&he_name[8], name, namelen);
602 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
606 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
607 * utf16-to-utf8-reversed.
610 #ifdef PERL_CR_FILTER
614 register const char *s = SvPVX_const(sv);
615 register const char * const e = s + SvCUR(sv);
617 PERL_ARGS_ASSERT_STRIP_RETURN;
619 /* outer loop optimized to do nothing if there are no CR-LFs */
621 if (*s++ == '\r' && *s == '\n') {
622 /* hit a CR-LF, need to copy the rest */
623 register char *d = s - 1;
626 if (*s == '\r' && s[1] == '\n')
637 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
639 const I32 count = FILTER_READ(idx+1, sv, maxlen);
640 if (count > 0 && !maxlen)
651 * Create a parser object and initialise its parser and lexer fields
653 * rsfp is the opened file handle to read from (if any),
655 * line holds any initial content already read from the file (or in
656 * the case of no file, such as an eval, the whole contents);
658 * new_filter indicates that this is a new file and it shouldn't inherit
659 * the filters from the current parser (ie require).
663 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
666 const char *s = NULL;
668 yy_parser *parser, *oparser;
670 /* create and initialise a parser */
672 Newxz(parser, 1, yy_parser);
673 parser->old_parser = oparser = PL_parser;
676 parser->stack = NULL;
678 parser->stack_size = 0;
680 /* on scope exit, free this parser and restore any outer one */
682 parser->saved_curcop = PL_curcop;
684 /* initialise lexer state */
687 parser->curforce = -1;
689 parser->nexttoke = 0;
691 parser->error_count = oparser ? oparser->error_count : 0;
692 parser->copline = NOLINE;
693 parser->lex_state = LEX_NORMAL;
694 parser->expect = XSTATE;
696 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
697 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
699 Newx(parser->lex_brackstack, 120, char);
700 Newx(parser->lex_casestack, 12, char);
701 *parser->lex_casestack = '\0';
704 s = SvPV_const(line, len);
710 parser->linestr = newSVpvs("\n;");
711 } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
712 /* avoid tie/overload weirdness */
713 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
715 sv_catpvs(parser->linestr, "\n;");
718 SvREFCNT_inc_simple_void_NN(line);
719 parser->linestr = line;
721 parser->oldoldbufptr =
724 parser->linestart = SvPVX(parser->linestr);
725 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
726 parser->last_lop = parser->last_uni = NULL;
730 /* delete a parser object */
733 Perl_parser_free(pTHX_ const yy_parser *parser)
735 PERL_ARGS_ASSERT_PARSER_FREE;
737 PL_curcop = parser->saved_curcop;
738 SvREFCNT_dec(parser->linestr);
740 if (parser->rsfp == PerlIO_stdin())
741 PerlIO_clearerr(parser->rsfp);
742 else if (parser->rsfp && (!parser->old_parser ||
743 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
744 PerlIO_close(parser->rsfp);
745 SvREFCNT_dec(parser->rsfp_filters);
747 Safefree(parser->lex_brackstack);
748 Safefree(parser->lex_casestack);
749 PL_parser = parser->old_parser;
756 * Finalizer for lexing operations. Must be called when the parser is
757 * done with the lexer.
764 PL_doextract = FALSE;
768 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
770 Buffer scalar containing the chunk currently under consideration of the
771 text currently being lexed. This is always a plain string scalar (for
772 which C<SvPOK> is true). It is not intended to be used as a scalar by
773 normal scalar means; instead refer to the buffer directly by the pointer
774 variables described below.
776 The lexer maintains various C<char*> pointers to things in the
777 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
778 reallocated, all of these pointers must be updated. Don't attempt to
779 do this manually, but rather use L</lex_grow_linestr> if you need to
780 reallocate the buffer.
782 The content of the text chunk in the buffer is commonly exactly one
783 complete line of input, up to and including a newline terminator,
784 but there are situations where it is otherwise. The octets of the
785 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
786 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
787 flag on this scalar, which may disagree with it.
789 For direct examination of the buffer, the variable
790 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
791 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
792 of these pointers is usually preferable to examination of the scalar
793 through normal scalar means.
795 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
797 Direct pointer to the end of the chunk of text currently being lexed, the
798 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
799 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
800 always located at the end of the buffer, and does not count as part of
801 the buffer's contents.
803 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
805 Points to the current position of lexing inside the lexer buffer.
806 Characters around this point may be freely examined, within
807 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
808 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
809 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
811 Lexing code (whether in the Perl core or not) moves this pointer past
812 the characters that it consumes. It is also expected to perform some
813 bookkeeping whenever a newline character is consumed. This movement
814 can be more conveniently performed by the function L</lex_read_to>,
815 which handles newlines appropriately.
817 Interpretation of the buffer's octets can be abstracted out by
818 using the slightly higher-level functions L</lex_peek_unichar> and
819 L</lex_read_unichar>.
821 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
823 Points to the start of the current line inside the lexer buffer.
824 This is useful for indicating at which column an error occurred, and
825 not much else. This must be updated by any lexing code that consumes
826 a newline; the function L</lex_read_to> handles this detail.
832 =for apidoc Amx|bool|lex_bufutf8
834 Indicates whether the octets in the lexer buffer
835 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
836 of Unicode characters. If not, they should be interpreted as Latin-1
837 characters. This is analogous to the C<SvUTF8> flag for scalars.
839 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
840 contains valid UTF-8. Lexing code must be robust in the face of invalid
843 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
844 is significant, but not the whole story regarding the input character
845 encoding. Normally, when a file is being read, the scalar contains octets
846 and its C<SvUTF8> flag is off, but the octets should be interpreted as
847 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
848 however, the scalar may have the C<SvUTF8> flag on, and in this case its
849 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
850 is in effect. This logic may change in the future; use this function
851 instead of implementing the logic yourself.
857 Perl_lex_bufutf8(pTHX)
863 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
865 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
866 at least I<len> octets (including terminating NUL). Returns a
867 pointer to the reallocated buffer. This is necessary before making
868 any direct modification of the buffer that would increase its length.
869 L</lex_stuff_pvn> provides a more convenient way to insert text into
872 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
873 this function updates all of the lexer's variables that point directly
880 Perl_lex_grow_linestr(pTHX_ STRLEN len)
884 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
885 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
886 linestr = PL_parser->linestr;
887 buf = SvPVX(linestr);
888 if (len <= SvLEN(linestr))
890 bufend_pos = PL_parser->bufend - buf;
891 bufptr_pos = PL_parser->bufptr - buf;
892 oldbufptr_pos = PL_parser->oldbufptr - buf;
893 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
894 linestart_pos = PL_parser->linestart - buf;
895 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
896 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
897 buf = sv_grow(linestr, len);
898 PL_parser->bufend = buf + bufend_pos;
899 PL_parser->bufptr = buf + bufptr_pos;
900 PL_parser->oldbufptr = buf + oldbufptr_pos;
901 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
902 PL_parser->linestart = buf + linestart_pos;
903 if (PL_parser->last_uni)
904 PL_parser->last_uni = buf + last_uni_pos;
905 if (PL_parser->last_lop)
906 PL_parser->last_lop = buf + last_lop_pos;
911 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
913 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
914 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
915 reallocating the buffer if necessary. This means that lexing code that
916 runs later will see the characters as if they had appeared in the input.
917 It is not recommended to do this as part of normal parsing, and most
918 uses of this facility run the risk of the inserted characters being
919 interpreted in an unintended manner.
921 The string to be inserted is represented by I<len> octets starting
922 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
923 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
924 The characters are recoded for the lexer buffer, according to how the
925 buffer is currently being interpreted (L</lex_bufutf8>). If a string
926 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
927 function is more convenient.
933 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
937 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
938 if (flags & ~(LEX_STUFF_UTF8))
939 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
941 if (flags & LEX_STUFF_UTF8) {
945 const char *p, *e = pv+len;
946 for (p = pv; p != e; p++)
947 highhalf += !!(((U8)*p) & 0x80);
950 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
951 bufptr = PL_parser->bufptr;
952 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
953 SvCUR_set(PL_parser->linestr,
954 SvCUR(PL_parser->linestr) + len+highhalf);
955 PL_parser->bufend += len+highhalf;
956 for (p = pv; p != e; p++) {
959 *bufptr++ = (char)(0xc0 | (c >> 6));
960 *bufptr++ = (char)(0x80 | (c & 0x3f));
967 if (flags & LEX_STUFF_UTF8) {
969 const char *p, *e = pv+len;
970 for (p = pv; p != e; p++) {
973 Perl_croak(aTHX_ "Lexing code attempted to stuff "
974 "non-Latin-1 character into Latin-1 input");
975 } else if (c >= 0xc2 && p+1 != e &&
976 (((U8)p[1]) & 0xc0) == 0x80) {
979 } else if (c >= 0x80) {
980 /* malformed UTF-8 */
982 SAVESPTR(PL_warnhook);
983 PL_warnhook = PERL_WARNHOOK_FATAL;
984 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
990 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
991 bufptr = PL_parser->bufptr;
992 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
993 SvCUR_set(PL_parser->linestr,
994 SvCUR(PL_parser->linestr) + len-highhalf);
995 PL_parser->bufend += len-highhalf;
996 for (p = pv; p != e; p++) {
999 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1002 *bufptr++ = (char)c;
1007 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1008 bufptr = PL_parser->bufptr;
1009 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1010 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1011 PL_parser->bufend += len;
1012 Copy(pv, bufptr, len, char);
1018 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary. This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1028 The string to be inserted is represented by octets starting at I<pv>
1029 and continuing to the first nul. These octets are interpreted as either
1030 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1031 in I<flags>. The characters are recoded for the lexer buffer, according
1032 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1033 If it is not convenient to nul-terminate a string to be inserted, the
1034 L</lex_stuff_pvn> function is more appropriate.
1040 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1042 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1043 lex_stuff_pvn(pv, strlen(pv), flags);
1047 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1049 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1050 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1051 reallocating the buffer if necessary. This means that lexing code that
1052 runs later will see the characters as if they had appeared in the input.
1053 It is not recommended to do this as part of normal parsing, and most
1054 uses of this facility run the risk of the inserted characters being
1055 interpreted in an unintended manner.
1057 The string to be inserted is the string value of I<sv>. The characters
1058 are recoded for the lexer buffer, according to how the buffer is currently
1059 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1060 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1061 need to construct a scalar.
1067 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1071 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1073 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1075 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1079 =for apidoc Amx|void|lex_unstuff|char *ptr
1081 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1082 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1083 This hides the discarded text from any lexing code that runs later,
1084 as if the text had never appeared.
1086 This is not the normal way to consume lexed text. For that, use
1093 Perl_lex_unstuff(pTHX_ char *ptr)
1097 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1098 buf = PL_parser->bufptr;
1100 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1103 bufend = PL_parser->bufend;
1105 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1106 unstuff_len = ptr - buf;
1107 Move(ptr, buf, bufend+1-ptr, char);
1108 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1109 PL_parser->bufend = bufend - unstuff_len;
1113 =for apidoc Amx|void|lex_read_to|char *ptr
1115 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1116 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1117 performing the correct bookkeeping whenever a newline character is passed.
1118 This is the normal way to consume lexed text.
1120 Interpretation of the buffer's octets can be abstracted out by
1121 using the slightly higher-level functions L</lex_peek_unichar> and
1122 L</lex_read_unichar>.
1128 Perl_lex_read_to(pTHX_ char *ptr)
1131 PERL_ARGS_ASSERT_LEX_READ_TO;
1132 s = PL_parser->bufptr;
1133 if (ptr < s || ptr > PL_parser->bufend)
1134 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1135 for (; s != ptr; s++)
1137 CopLINE_inc(PL_curcop);
1138 PL_parser->linestart = s+1;
1140 PL_parser->bufptr = ptr;
1144 =for apidoc Amx|void|lex_discard_to|char *ptr
1146 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1147 up to I<ptr>. The remaining content of the buffer will be moved, and
1148 all pointers into the buffer updated appropriately. I<ptr> must not
1149 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1150 it is not permitted to discard text that has yet to be lexed.
1152 Normally it is not necessarily to do this directly, because it suffices to
1153 use the implicit discarding behaviour of L</lex_next_chunk> and things
1154 based on it. However, if a token stretches across multiple lines,
1155 and the lexing code has kept multiple lines of text in the buffer for
1156 that purpose, then after completion of the token it would be wise to
1157 explicitly discard the now-unneeded earlier lines, to avoid future
1158 multi-line tokens growing the buffer without bound.
1164 Perl_lex_discard_to(pTHX_ char *ptr)
1168 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1169 buf = SvPVX(PL_parser->linestr);
1171 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1174 if (ptr > PL_parser->bufptr)
1175 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1176 discard_len = ptr - buf;
1177 if (PL_parser->oldbufptr < ptr)
1178 PL_parser->oldbufptr = ptr;
1179 if (PL_parser->oldoldbufptr < ptr)
1180 PL_parser->oldoldbufptr = ptr;
1181 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1182 PL_parser->last_uni = NULL;
1183 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1184 PL_parser->last_lop = NULL;
1185 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1186 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1187 PL_parser->bufend -= discard_len;
1188 PL_parser->bufptr -= discard_len;
1189 PL_parser->oldbufptr -= discard_len;
1190 PL_parser->oldoldbufptr -= discard_len;
1191 if (PL_parser->last_uni)
1192 PL_parser->last_uni -= discard_len;
1193 if (PL_parser->last_lop)
1194 PL_parser->last_lop -= discard_len;
1198 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1200 Reads in the next chunk of text to be lexed, appending it to
1201 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1202 looked to the end of the current chunk and wants to know more. It is
1203 usual, but not necessary, for lexing to have consumed the entirety of
1204 the current chunk at this time.
1206 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1207 chunk (i.e., the current chunk has been entirely consumed), normally the
1208 current chunk will be discarded at the same time that the new chunk is
1209 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1210 will not be discarded. If the current chunk has not been entirely
1211 consumed, then it will not be discarded regardless of the flag.
1213 Returns true if some new text was added to the buffer, or false if the
1214 buffer has reached the end of the input text.
1219 #define LEX_FAKE_EOF 0x80000000
1222 Perl_lex_next_chunk(pTHX_ U32 flags)
1226 STRLEN old_bufend_pos, new_bufend_pos;
1227 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1228 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1229 bool got_some_for_debugger = 0;
1231 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1232 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1233 linestr = PL_parser->linestr;
1234 buf = SvPVX(linestr);
1235 if (!(flags & LEX_KEEP_PREVIOUS) &&
1236 PL_parser->bufptr == PL_parser->bufend) {
1237 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1239 if (PL_parser->last_uni != PL_parser->bufend)
1240 PL_parser->last_uni = NULL;
1241 if (PL_parser->last_lop != PL_parser->bufend)
1242 PL_parser->last_lop = NULL;
1243 last_uni_pos = last_lop_pos = 0;
1247 old_bufend_pos = PL_parser->bufend - buf;
1248 bufptr_pos = PL_parser->bufptr - buf;
1249 oldbufptr_pos = PL_parser->oldbufptr - buf;
1250 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1251 linestart_pos = PL_parser->linestart - buf;
1252 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1253 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1255 if (flags & LEX_FAKE_EOF) {
1257 } else if (!PL_parser->rsfp) {
1259 } else if (filter_gets(linestr, old_bufend_pos)) {
1261 got_some_for_debugger = 1;
1263 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1264 sv_setpvs(linestr, "");
1266 /* End of real input. Close filehandle (unless it was STDIN),
1267 * then add implicit termination.
1269 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1270 PerlIO_clearerr(PL_parser->rsfp);
1271 else if (PL_parser->rsfp)
1272 (void)PerlIO_close(PL_parser->rsfp);
1273 PL_parser->rsfp = NULL;
1274 PL_doextract = FALSE;
1276 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1279 if (!PL_in_eval && PL_minus_p) {
1281 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1282 PL_minus_n = PL_minus_p = 0;
1283 } else if (!PL_in_eval && PL_minus_n) {
1284 sv_catpvs(linestr, /*{*/";}");
1287 sv_catpvs(linestr, ";");
1290 buf = SvPVX(linestr);
1291 new_bufend_pos = SvCUR(linestr);
1292 PL_parser->bufend = buf + new_bufend_pos;
1293 PL_parser->bufptr = buf + bufptr_pos;
1294 PL_parser->oldbufptr = buf + oldbufptr_pos;
1295 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1296 PL_parser->linestart = buf + linestart_pos;
1297 if (PL_parser->last_uni)
1298 PL_parser->last_uni = buf + last_uni_pos;
1299 if (PL_parser->last_lop)
1300 PL_parser->last_lop = buf + last_lop_pos;
1301 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1302 PL_curstash != PL_debstash) {
1303 /* debugger active and we're not compiling the debugger code,
1304 * so store the line into the debugger's array of lines
1306 update_debugger_info(NULL, buf+old_bufend_pos,
1307 new_bufend_pos-old_bufend_pos);
1313 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1315 Looks ahead one (Unicode) character in the text currently being lexed.
1316 Returns the codepoint (unsigned integer value) of the next character,
1317 or -1 if lexing has reached the end of the input text. To consume the
1318 peeked character, use L</lex_read_unichar>.
1320 If the next character is in (or extends into) the next chunk of input
1321 text, the next chunk will be read in. Normally the current chunk will be
1322 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1323 then the current chunk will not be discarded.
1325 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1326 is encountered, an exception is generated.
1332 Perl_lex_peek_unichar(pTHX_ U32 flags)
1336 if (flags & ~(LEX_KEEP_PREVIOUS))
1337 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1338 s = PL_parser->bufptr;
1339 bufend = PL_parser->bufend;
1345 if (!lex_next_chunk(flags))
1347 s = PL_parser->bufptr;
1348 bufend = PL_parser->bufend;
1354 len = PL_utf8skip[head];
1355 while ((STRLEN)(bufend-s) < len) {
1356 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1358 s = PL_parser->bufptr;
1359 bufend = PL_parser->bufend;
1362 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1363 if (retlen == (STRLEN)-1) {
1364 /* malformed UTF-8 */
1366 SAVESPTR(PL_warnhook);
1367 PL_warnhook = PERL_WARNHOOK_FATAL;
1368 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1374 if (!lex_next_chunk(flags))
1376 s = PL_parser->bufptr;
1383 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1385 Reads the next (Unicode) character in the text currently being lexed.
1386 Returns the codepoint (unsigned integer value) of the character read,
1387 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1388 if lexing has reached the end of the input text. To non-destructively
1389 examine the next character, use L</lex_peek_unichar> instead.
1391 If the next character is in (or extends into) the next chunk of input
1392 text, the next chunk will be read in. Normally the current chunk will be
1393 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1394 then the current chunk will not be discarded.
1396 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1397 is encountered, an exception is generated.
1403 Perl_lex_read_unichar(pTHX_ U32 flags)
1406 if (flags & ~(LEX_KEEP_PREVIOUS))
1407 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1408 c = lex_peek_unichar(flags);
1411 CopLINE_inc(PL_curcop);
1412 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1418 =for apidoc Amx|void|lex_read_space|U32 flags
1420 Reads optional spaces, in Perl style, in the text currently being
1421 lexed. The spaces may include ordinary whitespace characters and
1422 Perl-style comments. C<#line> directives are processed if encountered.
1423 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1424 at a non-space character (or the end of the input text).
1426 If spaces extend into the next chunk of input text, the next chunk will
1427 be read in. Normally the current chunk will be discarded at the same
1428 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1429 chunk will not be discarded.
1434 #define LEX_NO_NEXT_CHUNK 0x80000000
1437 Perl_lex_read_space(pTHX_ U32 flags)
1440 bool need_incline = 0;
1441 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1442 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1445 sv_free(PL_skipwhite);
1446 PL_skipwhite = NULL;
1449 PL_skipwhite = newSVpvs("");
1450 #endif /* PERL_MAD */
1451 s = PL_parser->bufptr;
1452 bufend = PL_parser->bufend;
1458 } while (!(c == '\n' || (c == 0 && s == bufend)));
1459 } else if (c == '\n') {
1461 PL_parser->linestart = s;
1466 } else if (isSPACE(c)) {
1468 } else if (c == 0 && s == bufend) {
1472 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1473 #endif /* PERL_MAD */
1474 if (flags & LEX_NO_NEXT_CHUNK)
1476 PL_parser->bufptr = s;
1477 CopLINE_inc(PL_curcop);
1478 got_more = lex_next_chunk(flags);
1479 CopLINE_dec(PL_curcop);
1480 s = PL_parser->bufptr;
1481 bufend = PL_parser->bufend;
1484 if (need_incline && PL_parser->rsfp) {
1494 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1495 #endif /* PERL_MAD */
1496 PL_parser->bufptr = s;
1501 * This subroutine has nothing to do with tilting, whether at windmills
1502 * or pinball tables. Its name is short for "increment line". It
1503 * increments the current line number in CopLINE(PL_curcop) and checks
1504 * to see whether the line starts with a comment of the form
1505 * # line 500 "foo.pm"
1506 * If so, it sets the current line number and file to the values in the comment.
1510 S_incline(pTHX_ const char *s)
1517 PERL_ARGS_ASSERT_INCLINE;
1519 CopLINE_inc(PL_curcop);
1522 while (SPACE_OR_TAB(*s))
1524 if (strnEQ(s, "line", 4))
1528 if (SPACE_OR_TAB(*s))
1532 while (SPACE_OR_TAB(*s))
1540 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1542 while (SPACE_OR_TAB(*s))
1544 if (*s == '"' && (t = strchr(s+1, '"'))) {
1550 while (!isSPACE(*t))
1554 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1556 if (*e != '\n' && *e != '\0')
1557 return; /* false alarm */
1560 const STRLEN len = t - s;
1561 #ifndef USE_ITHREADS
1562 SV *const temp_sv = CopFILESV(PL_curcop);
1567 cf = SvPVX(temp_sv);
1568 tmplen = SvCUR(temp_sv);
1574 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1575 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1576 * to *{"::_<newfilename"} */
1577 /* However, the long form of evals is only turned on by the
1578 debugger - usually they're "(eval %lu)" */
1582 STRLEN tmplen2 = len;
1583 if (tmplen + 2 <= sizeof smallbuf)
1586 Newx(tmpbuf, tmplen + 2, char);
1589 memcpy(tmpbuf + 2, cf, tmplen);
1591 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1596 if (tmplen2 + 2 <= sizeof smallbuf)
1599 Newx(tmpbuf2, tmplen2 + 2, char);
1601 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1602 /* Either they malloc'd it, or we malloc'd it,
1603 so no prefix is present in ours. */
1608 memcpy(tmpbuf2 + 2, s, tmplen2);
1611 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1613 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1614 /* adjust ${"::_<newfilename"} to store the new file name */
1615 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1616 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1617 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1620 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1622 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1625 CopFILE_free(PL_curcop);
1626 CopFILE_setn(PL_curcop, s, len);
1628 CopLINE_set(PL_curcop, atoi(n)-1);
1632 /* skip space before PL_thistoken */
1635 S_skipspace0(pTHX_ register char *s)
1637 PERL_ARGS_ASSERT_SKIPSPACE0;
1644 PL_thiswhite = newSVpvs("");
1645 sv_catsv(PL_thiswhite, PL_skipwhite);
1646 sv_free(PL_skipwhite);
1649 PL_realtokenstart = s - SvPVX(PL_linestr);
1653 /* skip space after PL_thistoken */
1656 S_skipspace1(pTHX_ register char *s)
1658 const char *start = s;
1659 I32 startoff = start - SvPVX(PL_linestr);
1661 PERL_ARGS_ASSERT_SKIPSPACE1;
1666 start = SvPVX(PL_linestr) + startoff;
1667 if (!PL_thistoken && PL_realtokenstart >= 0) {
1668 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1669 PL_thistoken = newSVpvn(tstart, start - tstart);
1671 PL_realtokenstart = -1;
1674 PL_nextwhite = newSVpvs("");
1675 sv_catsv(PL_nextwhite, PL_skipwhite);
1676 sv_free(PL_skipwhite);
1683 S_skipspace2(pTHX_ register char *s, SV **svp)
1686 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1687 const I32 startoff = s - SvPVX(PL_linestr);
1689 PERL_ARGS_ASSERT_SKIPSPACE2;
1692 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1693 if (!PL_madskills || !svp)
1695 start = SvPVX(PL_linestr) + startoff;
1696 if (!PL_thistoken && PL_realtokenstart >= 0) {
1697 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1698 PL_thistoken = newSVpvn(tstart, start - tstart);
1699 PL_realtokenstart = -1;
1703 *svp = newSVpvs("");
1704 sv_setsv(*svp, PL_skipwhite);
1705 sv_free(PL_skipwhite);
1714 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1716 AV *av = CopFILEAVx(PL_curcop);
1718 SV * const sv = newSV_type(SVt_PVMG);
1720 sv_setsv(sv, orig_sv);
1722 sv_setpvn(sv, buf, len);
1725 av_store(av, (I32)CopLINE(PL_curcop), sv);
1731 * Called to gobble the appropriate amount and type of whitespace.
1732 * Skips comments as well.
1736 S_skipspace(pTHX_ register char *s)
1740 #endif /* PERL_MAD */
1741 PERL_ARGS_ASSERT_SKIPSPACE;
1744 sv_free(PL_skipwhite);
1745 PL_skipwhite = NULL;
1747 #endif /* PERL_MAD */
1748 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1749 while (s < PL_bufend && SPACE_OR_TAB(*s))
1752 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1754 lex_read_space(LEX_KEEP_PREVIOUS |
1755 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1756 LEX_NO_NEXT_CHUNK : 0));
1758 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1759 if (PL_linestart > PL_bufptr)
1760 PL_bufptr = PL_linestart;
1765 PL_skipwhite = newSVpvn(start, s-start);
1766 #endif /* PERL_MAD */
1772 * Check the unary operators to ensure there's no ambiguity in how they're
1773 * used. An ambiguous piece of code would be:
1775 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1776 * the +5 is its argument.
1786 if (PL_oldoldbufptr != PL_last_uni)
1788 while (isSPACE(*PL_last_uni))
1791 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1793 if ((t = strchr(s, '(')) && t < PL_bufptr)
1796 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1797 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1798 (int)(s - PL_last_uni), PL_last_uni);
1802 * LOP : macro to build a list operator. Its behaviour has been replaced
1803 * with a subroutine, S_lop() for which LOP is just another name.
1806 #define LOP(f,x) return lop(f,x,s)
1810 * Build a list operator (or something that might be one). The rules:
1811 * - if we have a next token, then it's a list operator [why?]
1812 * - if the next thing is an opening paren, then it's a function
1813 * - else it's a list operator
1817 S_lop(pTHX_ I32 f, int x, char *s)
1821 PERL_ARGS_ASSERT_LOP;
1827 PL_last_lop = PL_oldbufptr;
1828 PL_last_lop_op = (OPCODE)f;
1831 return REPORT(LSTOP);
1834 return REPORT(LSTOP);
1837 return REPORT(FUNC);
1840 return REPORT(FUNC);
1842 return REPORT(LSTOP);
1848 * Sets up for an eventual force_next(). start_force(0) basically does
1849 * an unshift, while start_force(-1) does a push. yylex removes items
1854 S_start_force(pTHX_ int where)
1858 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1859 where = PL_lasttoke;
1860 assert(PL_curforce < 0 || PL_curforce == where);
1861 if (PL_curforce != where) {
1862 for (i = PL_lasttoke; i > where; --i) {
1863 PL_nexttoke[i] = PL_nexttoke[i-1];
1867 if (PL_curforce < 0) /* in case of duplicate start_force() */
1868 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1869 PL_curforce = where;
1872 curmad('^', newSVpvs(""));
1873 CURMAD('_', PL_nextwhite);
1878 S_curmad(pTHX_ char slot, SV *sv)
1884 if (PL_curforce < 0)
1885 where = &PL_thismad;
1887 where = &PL_nexttoke[PL_curforce].next_mad;
1893 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1895 else if (PL_encoding) {
1896 sv_recode_to_utf8(sv, PL_encoding);
1901 /* keep a slot open for the head of the list? */
1902 if (slot != '_' && *where && (*where)->mad_key == '^') {
1903 (*where)->mad_key = slot;
1904 sv_free(MUTABLE_SV(((*where)->mad_val)));
1905 (*where)->mad_val = (void*)sv;
1908 addmad(newMADsv(slot, sv), where, 0);
1911 # define start_force(where) NOOP
1912 # define curmad(slot, sv) NOOP
1917 * When the lexer realizes it knows the next token (for instance,
1918 * it is reordering tokens for the parser) then it can call S_force_next
1919 * to know what token to return the next time the lexer is called. Caller
1920 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1921 * and possibly PL_expect to ensure the lexer handles the token correctly.
1925 S_force_next(pTHX_ I32 type)
1930 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1931 tokereport(type, &NEXTVAL_NEXTTOKE);
1935 if (PL_curforce < 0)
1936 start_force(PL_lasttoke);
1937 PL_nexttoke[PL_curforce].next_type = type;
1938 if (PL_lex_state != LEX_KNOWNEXT)
1939 PL_lex_defer = PL_lex_state;
1940 PL_lex_state = LEX_KNOWNEXT;
1941 PL_lex_expect = PL_expect;
1944 PL_nexttype[PL_nexttoke] = type;
1946 if (PL_lex_state != LEX_KNOWNEXT) {
1947 PL_lex_defer = PL_lex_state;
1948 PL_lex_expect = PL_expect;
1949 PL_lex_state = LEX_KNOWNEXT;
1957 if (PL_parser->yychar != YYEMPTY) {
1959 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1960 force_next(PL_parser->yychar);
1961 PL_parser->yychar = YYEMPTY;
1966 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1969 SV * const sv = newSVpvn_utf8(start, len,
1972 && !is_ascii_string((const U8*)start, len)
1973 && is_utf8_string((const U8*)start, len));
1979 * When the lexer knows the next thing is a word (for instance, it has
1980 * just seen -> and it knows that the next char is a word char, then
1981 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1985 * char *start : buffer position (must be within PL_linestr)
1986 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1987 * int check_keyword : if true, Perl checks to make sure the word isn't
1988 * a keyword (do this if the word is a label, e.g. goto FOO)
1989 * int allow_pack : if true, : characters will also be allowed (require,
1990 * use, etc. do this)
1991 * int allow_initial_tick : used by the "sub" lexer only.
1995 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2001 PERL_ARGS_ASSERT_FORCE_WORD;
2003 start = SKIPSPACE1(start);
2005 if (isIDFIRST_lazy_if(s,UTF) ||
2006 (allow_pack && *s == ':') ||
2007 (allow_initial_tick && *s == '\'') )
2009 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2010 if (check_keyword && keyword(PL_tokenbuf, len, 0))
2012 start_force(PL_curforce);
2014 curmad('X', newSVpvn(start,s-start));
2015 if (token == METHOD) {
2020 PL_expect = XOPERATOR;
2024 curmad('g', newSVpvs( "forced" ));
2025 NEXTVAL_NEXTTOKE.opval
2026 = (OP*)newSVOP(OP_CONST,0,
2027 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2028 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2036 * Called when the lexer wants $foo *foo &foo etc, but the program
2037 * text only contains the "foo" portion. The first argument is a pointer
2038 * to the "foo", and the second argument is the type symbol to prefix.
2039 * Forces the next token to be a "WORD".
2040 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2044 S_force_ident(pTHX_ register const char *s, int kind)
2048 PERL_ARGS_ASSERT_FORCE_IDENT;
2051 const STRLEN len = strlen(s);
2052 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2053 start_force(PL_curforce);
2054 NEXTVAL_NEXTTOKE.opval = o;
2057 o->op_private = OPpCONST_ENTERED;
2058 /* XXX see note in pp_entereval() for why we forgo typo
2059 warnings if the symbol must be introduced in an eval.
2061 gv_fetchpvn_flags(s, len,
2062 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2064 kind == '$' ? SVt_PV :
2065 kind == '@' ? SVt_PVAV :
2066 kind == '%' ? SVt_PVHV :
2074 Perl_str_to_version(pTHX_ SV *sv)
2079 const char *start = SvPV_const(sv,len);
2080 const char * const end = start + len;
2081 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2083 PERL_ARGS_ASSERT_STR_TO_VERSION;
2085 while (start < end) {
2089 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2094 retval += ((NV)n)/nshift;
2103 * Forces the next token to be a version number.
2104 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2105 * and if "guessing" is TRUE, then no new token is created (and the caller
2106 * must use an alternative parsing method).
2110 S_force_version(pTHX_ char *s, int guessing)
2116 I32 startoff = s - SvPVX(PL_linestr);
2119 PERL_ARGS_ASSERT_FORCE_VERSION;
2127 while (isDIGIT(*d) || *d == '_' || *d == '.')
2131 start_force(PL_curforce);
2132 curmad('X', newSVpvn(s,d-s));
2135 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2137 #ifdef USE_LOCALE_NUMERIC
2138 char *loc = setlocale(LC_NUMERIC, "C");
2140 s = scan_num(s, &pl_yylval);
2141 #ifdef USE_LOCALE_NUMERIC
2142 setlocale(LC_NUMERIC, loc);
2144 version = pl_yylval.opval;
2145 ver = cSVOPx(version)->op_sv;
2146 if (SvPOK(ver) && !SvNIOK(ver)) {
2147 SvUPGRADE(ver, SVt_PVNV);
2148 SvNV_set(ver, str_to_version(ver));
2149 SvNOK_on(ver); /* hint that it is a version */
2152 else if (guessing) {
2155 sv_free(PL_nextwhite); /* let next token collect whitespace */
2157 s = SvPVX(PL_linestr) + startoff;
2165 if (PL_madskills && !version) {
2166 sv_free(PL_nextwhite); /* let next token collect whitespace */
2168 s = SvPVX(PL_linestr) + startoff;
2171 /* NOTE: The parser sees the package name and the VERSION swapped */
2172 start_force(PL_curforce);
2173 NEXTVAL_NEXTTOKE.opval = version;
2180 * S_force_strict_version
2181 * Forces the next token to be a version number using strict syntax rules.
2185 S_force_strict_version(pTHX_ char *s)
2190 I32 startoff = s - SvPVX(PL_linestr);
2192 const char *errstr = NULL;
2194 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2196 while (isSPACE(*s)) /* leading whitespace */
2199 if (is_STRICT_VERSION(s,&errstr)) {
2201 s = (char *)scan_version(s, ver, 0);
2202 version = newSVOP(OP_CONST, 0, ver);
2204 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2205 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2209 yyerror(errstr); /* version required */
2214 if (PL_madskills && !version) {
2215 sv_free(PL_nextwhite); /* let next token collect whitespace */
2217 s = SvPVX(PL_linestr) + startoff;
2220 /* NOTE: The parser sees the package name and the VERSION swapped */
2221 start_force(PL_curforce);
2222 NEXTVAL_NEXTTOKE.opval = version;
2230 * Tokenize a quoted string passed in as an SV. It finds the next
2231 * chunk, up to end of string or a backslash. It may make a new
2232 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2237 S_tokeq(pTHX_ SV *sv)
2241 register char *send;
2246 PERL_ARGS_ASSERT_TOKEQ;
2251 s = SvPV_force(sv, len);
2252 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2255 while (s < send && *s != '\\')
2260 if ( PL_hints & HINT_NEW_STRING ) {
2261 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2265 if (s + 1 < send && (s[1] == '\\'))
2266 s++; /* all that, just for this */
2271 SvCUR_set(sv, d - SvPVX_const(sv));
2273 if ( PL_hints & HINT_NEW_STRING )
2274 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2279 * Now come three functions related to double-quote context,
2280 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2281 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2282 * interact with PL_lex_state, and create fake ( ... ) argument lists
2283 * to handle functions and concatenation.
2284 * They assume that whoever calls them will be setting up a fake
2285 * join call, because each subthing puts a ',' after it. This lets
2288 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2290 * (I'm not sure whether the spurious commas at the end of lcfirst's
2291 * arguments and join's arguments are created or not).
2296 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2298 * Pattern matching will set PL_lex_op to the pattern-matching op to
2299 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2301 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2303 * Everything else becomes a FUNC.
2305 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2306 * had an OP_CONST or OP_READLINE). This just sets us up for a
2307 * call to S_sublex_push().
2311 S_sublex_start(pTHX)
2314 register const I32 op_type = pl_yylval.ival;
2316 if (op_type == OP_NULL) {
2317 pl_yylval.opval = PL_lex_op;
2321 if (op_type == OP_CONST || op_type == OP_READLINE) {
2322 SV *sv = tokeq(PL_lex_stuff);
2324 if (SvTYPE(sv) == SVt_PVIV) {
2325 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2327 const char * const p = SvPV_const(sv, len);
2328 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2332 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2333 PL_lex_stuff = NULL;
2334 /* Allow <FH> // "foo" */
2335 if (op_type == OP_READLINE)
2336 PL_expect = XTERMORDORDOR;
2339 else if (op_type == OP_BACKTICK && PL_lex_op) {
2340 /* readpipe() vas overriden */
2341 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2342 pl_yylval.opval = PL_lex_op;
2344 PL_lex_stuff = NULL;
2348 PL_sublex_info.super_state = PL_lex_state;
2349 PL_sublex_info.sub_inwhat = (U16)op_type;
2350 PL_sublex_info.sub_op = PL_lex_op;
2351 PL_lex_state = LEX_INTERPPUSH;
2355 pl_yylval.opval = PL_lex_op;
2365 * Create a new scope to save the lexing state. The scope will be
2366 * ended in S_sublex_done. Returns a '(', starting the function arguments
2367 * to the uc, lc, etc. found before.
2368 * Sets PL_lex_state to LEX_INTERPCONCAT.
2377 PL_lex_state = PL_sublex_info.super_state;
2378 SAVEBOOL(PL_lex_dojoin);
2379 SAVEI32(PL_lex_brackets);
2380 SAVEI32(PL_lex_casemods);
2381 SAVEI32(PL_lex_starts);
2382 SAVEI8(PL_lex_state);
2383 SAVEVPTR(PL_lex_inpat);
2384 SAVEI16(PL_lex_inwhat);
2385 SAVECOPLINE(PL_curcop);
2386 SAVEPPTR(PL_bufptr);
2387 SAVEPPTR(PL_bufend);
2388 SAVEPPTR(PL_oldbufptr);
2389 SAVEPPTR(PL_oldoldbufptr);
2390 SAVEPPTR(PL_last_lop);
2391 SAVEPPTR(PL_last_uni);
2392 SAVEPPTR(PL_linestart);
2393 SAVESPTR(PL_linestr);
2394 SAVEGENERICPV(PL_lex_brackstack);
2395 SAVEGENERICPV(PL_lex_casestack);
2397 PL_linestr = PL_lex_stuff;
2398 PL_lex_stuff = NULL;
2400 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2401 = SvPVX(PL_linestr);
2402 PL_bufend += SvCUR(PL_linestr);
2403 PL_last_lop = PL_last_uni = NULL;
2404 SAVEFREESV(PL_linestr);
2406 PL_lex_dojoin = FALSE;
2407 PL_lex_brackets = 0;
2408 Newx(PL_lex_brackstack, 120, char);
2409 Newx(PL_lex_casestack, 12, char);
2410 PL_lex_casemods = 0;
2411 *PL_lex_casestack = '\0';
2413 PL_lex_state = LEX_INTERPCONCAT;
2414 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2416 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2417 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2418 PL_lex_inpat = PL_sublex_info.sub_op;
2420 PL_lex_inpat = NULL;
2427 * Restores lexer state after a S_sublex_push.
2434 if (!PL_lex_starts++) {
2435 SV * const sv = newSVpvs("");
2436 if (SvUTF8(PL_linestr))
2438 PL_expect = XOPERATOR;
2439 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2443 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2444 PL_lex_state = LEX_INTERPCASEMOD;
2448 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2449 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2450 PL_linestr = PL_lex_repl;
2452 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2453 PL_bufend += SvCUR(PL_linestr);
2454 PL_last_lop = PL_last_uni = NULL;
2455 SAVEFREESV(PL_linestr);
2456 PL_lex_dojoin = FALSE;
2457 PL_lex_brackets = 0;
2458 PL_lex_casemods = 0;
2459 *PL_lex_casestack = '\0';
2461 if (SvEVALED(PL_lex_repl)) {
2462 PL_lex_state = LEX_INTERPNORMAL;
2464 /* we don't clear PL_lex_repl here, so that we can check later
2465 whether this is an evalled subst; that means we rely on the
2466 logic to ensure sublex_done() is called again only via the
2467 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2470 PL_lex_state = LEX_INTERPCONCAT;
2480 PL_endwhite = newSVpvs("");
2481 sv_catsv(PL_endwhite, PL_thiswhite);
2485 sv_setpvs(PL_thistoken,"");
2487 PL_realtokenstart = -1;
2491 PL_bufend = SvPVX(PL_linestr);
2492 PL_bufend += SvCUR(PL_linestr);
2493 PL_expect = XOPERATOR;
2494 PL_sublex_info.sub_inwhat = 0;
2502 Extracts a pattern, double-quoted string, or transliteration. This
2505 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2506 processing a pattern (PL_lex_inpat is true), a transliteration
2507 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2509 Returns a pointer to the character scanned up to. If this is
2510 advanced from the start pointer supplied (i.e. if anything was
2511 successfully parsed), will leave an OP for the substring scanned
2512 in pl_yylval. Caller must intuit reason for not parsing further
2513 by looking at the next characters herself.
2517 constants: \N{NAME} only
2518 case and quoting: \U \Q \E
2519 stops on @ and $, but not for $ as tail anchor
2521 In transliterations:
2522 characters are VERY literal, except for - not at the start or end
2523 of the string, which indicates a range. If the range is in bytes,
2524 scan_const expands the range to the full set of intermediate
2525 characters. If the range is in utf8, the hyphen is replaced with
2526 a certain range mark which will be handled by pmtrans() in op.c.
2528 In double-quoted strings:
2530 double-quoted style: \r and \n
2531 constants: \x31, etc.
2532 deprecated backrefs: \1 (in substitution replacements)
2533 case and quoting: \U \Q \E
2536 scan_const does *not* construct ops to handle interpolated strings.
2537 It stops processing as soon as it finds an embedded $ or @ variable
2538 and leaves it to the caller to work out what's going on.
2540 embedded arrays (whether in pattern or not) could be:
2541 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2543 $ in double-quoted strings must be the symbol of an embedded scalar.
2545 $ in pattern could be $foo or could be tail anchor. Assumption:
2546 it's a tail anchor if $ is the last thing in the string, or if it's
2547 followed by one of "()| \r\n\t"
2549 \1 (backreferences) are turned into $1
2551 The structure of the code is
2552 while (there's a character to process) {
2553 handle transliteration ranges
2554 skip regexp comments /(?#comment)/ and codes /(?{code})/
2555 skip #-initiated comments in //x patterns
2556 check for embedded arrays
2557 check for embedded scalars
2559 deprecate \1 in substitution replacements
2560 handle string-changing backslashes \l \U \Q \E, etc.
2561 switch (what was escaped) {
2562 handle \- in a transliteration (becomes a literal -)
2563 if a pattern and not \N{, go treat as regular character
2564 handle \132 (octal characters)
2565 handle \x15 and \x{1234} (hex characters)
2566 handle \N{name} (named characters, also \N{3,5} in a pattern)
2567 handle \cV (control characters)
2568 handle printf-style backslashes (\f, \r, \n, etc)
2571 } (end if backslash)
2572 handle regular character
2573 } (end while character to read)
2578 S_scan_const(pTHX_ char *start)
2581 register char *send = PL_bufend; /* end of the constant */
2582 SV *sv = newSV(send - start); /* sv for the constant. See
2583 note below on sizing. */
2584 register char *s = start; /* start of the constant */
2585 register char *d = SvPVX(sv); /* destination for copies */
2586 bool dorange = FALSE; /* are we in a translit range? */
2587 bool didrange = FALSE; /* did we just finish a range? */
2588 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
2589 I32 this_utf8 = UTF; /* Is the source string assumed
2590 to be UTF8? But, this can
2591 show as true when the source
2592 isn't utf8, as for example
2593 when it is entirely composed
2596 /* Note on sizing: The scanned constant is placed into sv, which is
2597 * initialized by newSV() assuming one byte of output for every byte of
2598 * input. This routine expects newSV() to allocate an extra byte for a
2599 * trailing NUL, which this routine will append if it gets to the end of
2600 * the input. There may be more bytes of input than output (eg., \N{LATIN
2601 * CAPITAL LETTER A}), or more output than input if the constant ends up
2602 * recoded to utf8, but each time a construct is found that might increase
2603 * the needed size, SvGROW() is called. Its size parameter each time is
2604 * based on the best guess estimate at the time, namely the length used so
2605 * far, plus the length the current construct will occupy, plus room for
2606 * the trailing NUL, plus one byte for every input byte still unscanned */
2610 UV literal_endpoint = 0;
2611 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2614 PERL_ARGS_ASSERT_SCAN_CONST;
2616 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2617 /* If we are doing a trans and we know we want UTF8 set expectation */
2618 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2619 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2623 while (s < send || dorange) {
2625 /* get transliterations out of the way (they're most literal) */
2626 if (PL_lex_inwhat == OP_TRANS) {
2627 /* expand a range A-Z to the full set of characters. AIE! */
2629 I32 i; /* current expanded character */
2630 I32 min; /* first character in range */
2631 I32 max; /* last character in range */
2642 char * const c = (char*)utf8_hop((U8*)d, -1);
2646 *c = (char)UTF_TO_NATIVE(0xff);
2647 /* mark the range as done, and continue */
2653 i = d - SvPVX_const(sv); /* remember current offset */
2656 SvLEN(sv) + (has_utf8 ?
2657 (512 - UTF_CONTINUATION_MARK +
2660 /* How many two-byte within 0..255: 128 in UTF-8,
2661 * 96 in UTF-8-mod. */
2663 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2665 d = SvPVX(sv) + i; /* refresh d after realloc */
2669 for (j = 0; j <= 1; j++) {
2670 char * const c = (char*)utf8_hop((U8*)d, -1);
2671 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2677 max = (U8)0xff; /* only to \xff */
2678 uvmax = uv; /* \x{100} to uvmax */
2680 d = c; /* eat endpoint chars */
2685 d -= 2; /* eat the first char and the - */
2686 min = (U8)*d; /* first char in range */
2687 max = (U8)d[1]; /* last char in range */
2694 "Invalid range \"%c-%c\" in transliteration operator",
2695 (char)min, (char)max);
2699 if (literal_endpoint == 2 &&
2700 ((isLOWER(min) && isLOWER(max)) ||
2701 (isUPPER(min) && isUPPER(max)))) {
2703 for (i = min; i <= max; i++)
2705 *d++ = NATIVE_TO_NEED(has_utf8,i);
2707 for (i = min; i <= max; i++)
2709 *d++ = NATIVE_TO_NEED(has_utf8,i);
2714 for (i = min; i <= max; i++)
2717 const U8 ch = (U8)NATIVE_TO_UTF(i);
2718 if (UNI_IS_INVARIANT(ch))
2721 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2722 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2731 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2733 *d++ = (char)UTF_TO_NATIVE(0xff);
2735 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2739 /* mark the range as done, and continue */
2743 literal_endpoint = 0;
2748 /* range begins (ignore - as first or last char) */
2749 else if (*s == '-' && s+1 < send && s != start) {
2751 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2758 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2768 literal_endpoint = 0;
2769 native_range = TRUE;
2774 /* if we get here, we're not doing a transliteration */
2776 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2777 except for the last char, which will be done separately. */
2778 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2780 while (s+1 < send && *s != ')')
2781 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2783 else if (s[2] == '{' /* This should match regcomp.c */
2784 || (s[2] == '?' && s[3] == '{'))
2787 char *regparse = s + (s[2] == '{' ? 3 : 4);
2790 while (count && (c = *regparse)) {
2791 if (c == '\\' && regparse[1])
2799 if (*regparse != ')')
2800 regparse--; /* Leave one char for continuation. */
2801 while (s < regparse)
2802 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2806 /* likewise skip #-initiated comments in //x patterns */
2807 else if (*s == '#' && PL_lex_inpat &&
2808 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2809 while (s+1 < send && *s != '\n')
2810 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2813 /* check for embedded arrays
2814 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2816 else if (*s == '@' && s[1]) {
2817 if (isALNUM_lazy_if(s+1,UTF))
2819 if (strchr(":'{$", s[1]))
2821 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2822 break; /* in regexp, neither @+ nor @- are interpolated */
2825 /* check for embedded scalars. only stop if we're sure it's a
2828 else if (*s == '$') {
2829 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2831 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2833 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2834 "Possible unintended interpolation of $\\ in regex");
2836 break; /* in regexp, $ might be tail anchor */
2840 /* End of else if chain - OP_TRANS rejoin rest */
2843 if (*s == '\\' && s+1 < send) {
2844 char* e; /* Can be used for ending '}', etc. */
2848 /* warn on \1 - \9 in substitution replacements, but note that \11
2849 * is an octal; and \19 is \1 followed by '9' */
2850 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2851 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2853 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2858 /* string-change backslash escapes */
2859 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2863 /* In a pattern, process \N, but skip any other backslash escapes.
2864 * This is because we don't want to translate an escape sequence
2865 * into a meta symbol and have the regex compiler use the meta
2866 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2867 * in spite of this, we do have to process \N here while the proper
2868 * charnames handler is in scope. See bugs #56444 and #62056.
2869 * There is a complication because \N in a pattern may also stand
2870 * for 'match a non-nl', and not mean a charname, in which case its
2871 * processing should be deferred to the regex compiler. To be a
2872 * charname it must be followed immediately by a '{', and not look
2873 * like \N followed by a curly quantifier, i.e., not something like
2874 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2876 else if (PL_lex_inpat
2879 || regcurly(s + 1)))
2881 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2882 goto default_action;
2887 /* quoted - in transliterations */
2889 if (PL_lex_inwhat == OP_TRANS) {
2896 if ((isALPHA(*s) || isDIGIT(*s)))
2897 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2898 "Unrecognized escape \\%c passed through",
2900 /* default action is to copy the quoted character */
2901 goto default_action;
2904 /* eg. \132 indicates the octal constant 0132 */
2905 case '0': case '1': case '2': case '3':
2906 case '4': case '5': case '6': case '7':
2910 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2913 goto NUM_ESCAPE_INSERT;
2915 /* eg. \o{24} indicates the octal constant \024 */
2921 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2927 goto NUM_ESCAPE_INSERT;
2930 /* eg. \x24 indicates the hex constant 0x24 */
2934 char* const e = strchr(s, '}');
2935 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2936 PERL_SCAN_DISALLOW_PREFIX;
2941 yyerror("Missing right brace on \\x{}");
2945 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2951 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2952 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2958 /* Insert oct or hex escaped character. There will always be
2959 * enough room in sv since such escapes will be longer than any
2960 * UTF-8 sequence they can end up as, except if they force us
2961 * to recode the rest of the string into utf8 */
2963 /* Here uv is the ordinal of the next character being added in
2964 * unicode (converted from native). */
2965 if (!UNI_IS_INVARIANT(uv)) {
2966 if (!has_utf8 && uv > 255) {
2967 /* Might need to recode whatever we have accumulated so
2968 * far if it contains any chars variant in utf8 or
2971 SvCUR_set(sv, d - SvPVX_const(sv));
2974 /* See Note on sizing above. */
2975 sv_utf8_upgrade_flags_grow(sv,
2976 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2977 UNISKIP(uv) + (STRLEN)(send - s) + 1);
2978 d = SvPVX(sv) + SvCUR(sv);
2983 d = (char*)uvuni_to_utf8((U8*)d, uv);
2984 if (PL_lex_inwhat == OP_TRANS &&
2985 PL_sublex_info.sub_op) {
2986 PL_sublex_info.sub_op->op_private |=
2987 (PL_lex_repl ? OPpTRANS_FROM_UTF
2991 if (uv > 255 && !dorange)
2992 native_range = FALSE;
3005 /* In a non-pattern \N must be a named character, like \N{LATIN
3006 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3007 * mean to match a non-newline. For non-patterns, named
3008 * characters are converted to their string equivalents. In
3009 * patterns, named characters are not converted to their
3010 * ultimate forms for the same reasons that other escapes
3011 * aren't. Instead, they are converted to the \N{U+...} form
3012 * to get the value from the charnames that is in effect right
3013 * now, while preserving the fact that it was a named character
3014 * so that the regex compiler knows this */
3016 /* This section of code doesn't generally use the
3017 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3018 * a close examination of this macro and determined it is a
3019 * no-op except on utfebcdic variant characters. Every
3020 * character generated by this that would normally need to be
3021 * enclosed by this macro is invariant, so the macro is not
3022 * needed, and would complicate use of copy(). There are other
3023 * parts of this file where the macro is used inconsistently,
3024 * but are saved by it being a no-op */
3026 /* The structure of this section of code (besides checking for
3027 * errors and upgrading to utf8) is:
3028 * Further disambiguate between the two meanings of \N, and if
3029 * not a charname, go process it elsewhere
3030 * If of form \N{U+...}, pass it through if a pattern;
3031 * otherwise convert to utf8
3032 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3033 * pattern; otherwise convert to utf8 */
3035 /* Here, s points to the 'N'; the test below is guaranteed to
3036 * succeed if we are being called on a pattern as we already
3037 * know from a test above that the next character is a '{'.
3038 * On a non-pattern \N must mean 'named sequence, which
3039 * requires braces */
3042 yyerror("Missing braces on \\N{}");
3047 /* If there is no matching '}', it is an error. */
3048 if (! (e = strchr(s, '}'))) {
3049 if (! PL_lex_inpat) {
3050 yyerror("Missing right brace on \\N{}");
3052 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3057 /* Here it looks like a named character */
3061 /* XXX This block is temporary code. \N{} implies that the
3062 * pattern is to have Unicode semantics, and therefore
3063 * currently has to be encoded in utf8. By putting it in
3064 * utf8 now, we save a whole pass in the regular expression
3065 * compiler. Once that code is changed so Unicode
3066 * semantics doesn't necessarily have to be in utf8, this
3067 * block should be removed */
3069 SvCUR_set(sv, d - SvPVX_const(sv));
3072 /* See Note on sizing above. */
3073 sv_utf8_upgrade_flags_grow(sv,
3074 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3075 /* 5 = '\N{' + cur char + NUL */
3076 (STRLEN)(send - s) + 5);
3077 d = SvPVX(sv) + SvCUR(sv);
3082 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3083 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3084 | PERL_SCAN_DISALLOW_PREFIX;
3087 /* For \N{U+...}, the '...' is a unicode value even on
3088 * EBCDIC machines */
3089 s += 2; /* Skip to next char after the 'U+' */
3091 uv = grok_hex(s, &len, &flags, NULL);
3092 if (len == 0 || len != (STRLEN)(e - s)) {
3093 yyerror("Invalid hexadecimal number in \\N{U+...}");
3100 /* Pass through to the regex compiler unchanged. The
3101 * reason we evaluated the number above is to make sure
3102 * there wasn't a syntax error. */
3103 s -= 5; /* Include the '\N{U+' */
3104 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3107 else { /* Not a pattern: convert the hex to string */
3109 /* If destination is not in utf8, unconditionally
3110 * recode it to be so. This is because \N{} implies
3111 * Unicode semantics, and scalars have to be in utf8
3112 * to guarantee those semantics */
3114 SvCUR_set(sv, d - SvPVX_const(sv));
3117 /* See Note on sizing above. */
3118 sv_utf8_upgrade_flags_grow(
3120 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3121 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3122 d = SvPVX(sv) + SvCUR(sv);
3126 /* Add the string to the output */
3127 if (UNI_IS_INVARIANT(uv)) {
3130 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3133 else { /* Here is \N{NAME} but not \N{U+...}. */
3135 SV *res; /* result from charnames */
3136 const char *str; /* the string in 'res' */
3137 STRLEN len; /* its length */
3139 /* Get the value for NAME */
3140 res = newSVpvn(s, e - s);
3141 res = new_constant( NULL, 0, "charnames",
3142 /* includes all of: \N{...} */
3143 res, NULL, s - 3, e - s + 4 );
3145 /* Most likely res will be in utf8 already since the
3146 * standard charnames uses pack U, but a custom translator
3147 * can leave it otherwise, so make sure. XXX This can be
3148 * revisited to not have charnames use utf8 for characters
3149 * that don't need it when regexes don't have to be in utf8
3150 * for Unicode semantics. If doing so, remember EBCDIC */
3151 sv_utf8_upgrade(res);
3152 str = SvPV_const(res, len);
3154 /* Don't accept malformed input */
3155 if (! is_utf8_string((U8 *) str, len)) {
3156 yyerror("Malformed UTF-8 returned by \\N");
3158 else if (PL_lex_inpat) {
3160 if (! len) { /* The name resolved to an empty string */
3161 Copy("\\N{}", d, 4, char);
3165 /* In order to not lose information for the regex
3166 * compiler, pass the result in the specially made
3167 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3168 * the code points in hex of each character
3169 * returned by charnames */
3171 const char *str_end = str + len;
3172 STRLEN char_length; /* cur char's byte length */
3173 STRLEN output_length; /* and the number of bytes
3174 after this is translated
3176 const STRLEN off = d - SvPVX_const(sv);
3178 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3179 * max('U+', '.'); and 1 for NUL */
3180 char hex_string[2 * UTF8_MAXBYTES + 5];
3182 /* Get the first character of the result. */
3183 U32 uv = utf8n_to_uvuni((U8 *) str,
3188 /* The call to is_utf8_string() above hopefully
3189 * guarantees that there won't be an error. But
3190 * it's easy here to make sure. The function just
3191 * above warns and returns 0 if invalid utf8, but
3192 * it can also return 0 if the input is validly a
3193 * NUL. Disambiguate */
3194 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3195 uv = UNICODE_REPLACEMENT;
3198 /* Convert first code point to hex, including the
3199 * boiler plate before it */
3200 sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3201 output_length = strlen(hex_string);
3203 /* Make sure there is enough space to hold it */
3204 d = off + SvGROW(sv, off
3206 + (STRLEN)(send - e)
3207 + 2); /* '}' + NUL */
3209 Copy(hex_string, d, output_length, char);
3212 /* For each subsequent character, append dot and
3213 * its ordinal in hex */
3214 while ((str += char_length) < str_end) {
3215 const STRLEN off = d - SvPVX_const(sv);
3216 U32 uv = utf8n_to_uvuni((U8 *) str,
3220 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3221 uv = UNICODE_REPLACEMENT;
3224 sprintf(hex_string, ".%X", (unsigned int) uv);
3225 output_length = strlen(hex_string);
3227 d = off + SvGROW(sv, off
3229 + (STRLEN)(send - e)
3230 + 2); /* '}' + NUL */
3231 Copy(hex_string, d, output_length, char);
3235 *d++ = '}'; /* Done. Add the trailing brace */
3238 else { /* Here, not in a pattern. Convert the name to a
3241 /* If destination is not in utf8, unconditionally
3242 * recode it to be so. This is because \N{} implies
3243 * Unicode semantics, and scalars have to be in utf8
3244 * to guarantee those semantics */
3246 SvCUR_set(sv, d - SvPVX_const(sv));
3249 /* See Note on sizing above. */
3250 sv_utf8_upgrade_flags_grow(sv,
3251 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3252 len + (STRLEN)(send - s) + 1);
3253 d = SvPVX(sv) + SvCUR(sv);
3255 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3257 /* See Note on sizing above. (NOTE: SvCUR() is not
3258 * set correctly here). */
3259 const STRLEN off = d - SvPVX_const(sv);
3260 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3262 Copy(str, d, len, char);
3267 /* Deprecate non-approved name syntax */
3268 if (ckWARN_d(WARN_DEPRECATED)) {
3269 bool problematic = FALSE;
3272 /* For non-ut8 input, look to see that the first
3273 * character is an alpha, then loop through the rest
3274 * checking that each is a continuation */
3276 if (! isALPHAU(*i)) problematic = TRUE;
3277 else for (i = s + 1; i < e; i++) {
3278 if (isCHARNAME_CONT(*i)) continue;
3284 /* Similarly for utf8. For invariants can check
3285 * directly. We accept anything above the latin1
3286 * range because it is immaterial to Perl if it is
3287 * correct or not, and is expensive to check. But
3288 * it is fairly easy in the latin1 range to convert
3289 * the variants into a single character and check
3291 if (UTF8_IS_INVARIANT(*i)) {
3292 if (! isALPHAU(*i)) problematic = TRUE;
3293 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3294 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3300 if (! problematic) for (i = s + UTF8SKIP(s);
3304 if (UTF8_IS_INVARIANT(*i)) {
3305 if (isCHARNAME_CONT(*i)) continue;
3306 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3308 } else if (isCHARNAME_CONT(
3310 UTF8_ACCUMULATE(*i, *(i+1)))))
3319 /* The e-i passed to the final %.*s makes sure that
3320 * should the trailing NUL be missing that this
3321 * print won't run off the end of the string */
3322 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3323 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3324 (int)(i - s + 1), s, (int)(e - i), i + 1);
3327 } /* End \N{NAME} */
3330 native_range = FALSE; /* \N{} is defined to be Unicode */
3332 s = e + 1; /* Point to just after the '}' */
3335 /* \c is a control character */
3339 *d++ = grok_bslash_c(*s++, 1);
3342 yyerror("Missing control char name in \\c");
3346 /* printf-style backslashes, formfeeds, newlines, etc */
3348 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3351 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3354 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3357 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3360 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3363 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3366 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3372 } /* end if (backslash) */
3379 /* If we started with encoded form, or already know we want it,
3380 then encode the next character */
3381 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3385 /* One might think that it is wasted effort in the case of the
3386 * source being utf8 (this_utf8 == TRUE) to take the next character
3387 * in the source, convert it to an unsigned value, and then convert
3388 * it back again. But the source has not been validated here. The
3389 * routine that does the conversion checks for errors like
3392 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3393 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3395 SvCUR_set(sv, d - SvPVX_const(sv));
3398 /* See Note on sizing above. */
3399 sv_utf8_upgrade_flags_grow(sv,
3400 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3401 need + (STRLEN)(send - s) + 1);
3402 d = SvPVX(sv) + SvCUR(sv);
3404 } else if (need > len) {
3405 /* encoded value larger than old, may need extra space (NOTE:
3406 * SvCUR() is not set correctly here). See Note on sizing
3408 const STRLEN off = d - SvPVX_const(sv);
3409 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3413 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3415 if (uv > 255 && !dorange)
3416 native_range = FALSE;
3420 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3422 } /* while loop to process each character */
3424 /* terminate the string and set up the sv */
3426 SvCUR_set(sv, d - SvPVX_const(sv));
3427 if (SvCUR(sv) >= SvLEN(sv))
3428 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3431 if (PL_encoding && !has_utf8) {
3432 sv_recode_to_utf8(sv, PL_encoding);
3438 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3439 PL_sublex_info.sub_op->op_private |=
3440 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3444 /* shrink the sv if we allocated more than we used */
3445 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3446 SvPV_shrink_to_cur(sv);
3449 /* return the substring (via pl_yylval) only if we parsed anything */
3450 if (s > PL_bufptr) {
3451 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3452 const char *const key = PL_lex_inpat ? "qr" : "q";
3453 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3457 if (PL_lex_inwhat == OP_TRANS) {
3460 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3468 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3471 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3478 * Returns TRUE if there's more to the expression (e.g., a subscript),
3481 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3483 * ->[ and ->{ return TRUE
3484 * { and [ outside a pattern are always subscripts, so return TRUE
3485 * if we're outside a pattern and it's not { or [, then return FALSE
3486 * if we're in a pattern and the first char is a {
3487 * {4,5} (any digits around the comma) returns FALSE
3488 * if we're in a pattern and the first char is a [
3490 * [SOMETHING] has a funky algorithm to decide whether it's a
3491 * character class or not. It has to deal with things like
3492 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3493 * anything else returns TRUE
3496 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3499 S_intuit_more(pTHX_ register char *s)
3503 PERL_ARGS_ASSERT_INTUIT_MORE;
3505 if (PL_lex_brackets)
3507 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3509 if (*s != '{' && *s != '[')
3514 /* In a pattern, so maybe we have {n,m}. */
3522 /* On the other hand, maybe we have a character class */
3525 if (*s == ']' || *s == '^')
3528 /* this is terrifying, and it works */
3529 int weight = 2; /* let's weigh the evidence */
3531 unsigned char un_char = 255, last_un_char;
3532 const char * const send = strchr(s,']');
3533 char tmpbuf[sizeof PL_tokenbuf * 4];
3535 if (!send) /* has to be an expression */
3538 Zero(seen,256,char);
3541 else if (isDIGIT(*s)) {
3543 if (isDIGIT(s[1]) && s[2] == ']')
3549 for (; s < send; s++) {
3550 last_un_char = un_char;
3551 un_char = (unsigned char)*s;
3556 weight -= seen[un_char] * 10;
3557 if (isALNUM_lazy_if(s+1,UTF)) {
3559 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3560 len = (int)strlen(tmpbuf);
3561 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3566 else if (*s == '$' && s[1] &&
3567 strchr("[#!%*<>()-=",s[1])) {
3568 if (/*{*/ strchr("])} =",s[2]))
3577 if (strchr("wds]",s[1]))
3579 else if (seen[(U8)'\''] || seen[(U8)'"'])
3581 else if (strchr("rnftbxcav",s[1]))
3583 else if (isDIGIT(s[1])) {
3585 while (s[1] && isDIGIT(s[1]))
3595 if (strchr("aA01! ",last_un_char))
3597 if (strchr("zZ79~",s[1]))
3599 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3600 weight -= 5; /* cope with negative subscript */
3603 if (!isALNUM(last_un_char)
3604 && !(last_un_char == '$' || last_un_char == '@'
3605 || last_un_char == '&')
3606 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3611 if (keyword(tmpbuf, d - tmpbuf, 0))
3614 if (un_char == last_un_char + 1)
3616 weight -= seen[un_char];
3621 if (weight >= 0) /* probably a character class */
3631 * Does all the checking to disambiguate
3633 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3634 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3636 * First argument is the stuff after the first token, e.g. "bar".
3638 * Not a method if bar is a filehandle.
3639 * Not a method if foo is a subroutine prototyped to take a filehandle.
3640 * Not a method if it's really "Foo $bar"
3641 * Method if it's "foo $bar"
3642 * Not a method if it's really "print foo $bar"
3643 * Method if it's really "foo package::" (interpreted as package->foo)
3644 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3645 * Not a method if bar is a filehandle or package, but is quoted with
3650 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3653 char *s = start + (*start == '$');
3654 char tmpbuf[sizeof PL_tokenbuf];
3661 PERL_ARGS_ASSERT_INTUIT_METHOD;
3664 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3668 const char *proto = SvPVX_const(cv);
3679 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3680 /* start is the beginning of the possible filehandle/object,
3681 * and s is the end of it
3682 * tmpbuf is a copy of it
3685 if (*start == '$') {
3686 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3687 isUPPER(*PL_tokenbuf))
3690 len = start - SvPVX(PL_linestr);
3694 start = SvPVX(PL_linestr) + len;
3698 return *s == '(' ? FUNCMETH : METHOD;
3700 if (!keyword(tmpbuf, len, 0)) {
3701 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3705 soff = s - SvPVX(PL_linestr);
3709 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3710 if (indirgv && GvCVu(indirgv))
3712 /* filehandle or package name makes it a method */
3713 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3715 soff = s - SvPVX(PL_linestr);
3718 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3719 return 0; /* no assumptions -- "=>" quotes bearword */
3721 start_force(PL_curforce);
3722 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3723 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3724 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3726 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3731 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3733 return *s == '(' ? FUNCMETH : METHOD;
3739 /* Encoded script support. filter_add() effectively inserts a
3740 * 'pre-processing' function into the current source input stream.
3741 * Note that the filter function only applies to the current source file
3742 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3744 * The datasv parameter (which may be NULL) can be used to pass
3745 * private data to this instance of the filter. The filter function
3746 * can recover the SV using the FILTER_DATA macro and use it to
3747 * store private buffers and state information.
3749 * The supplied datasv parameter is upgraded to a PVIO type
3750 * and the IoDIRP/IoANY field is used to store the function pointer,
3751 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3752 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3753 * private use must be set using malloc'd pointers.
3757 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3766 if (!PL_rsfp_filters)
3767 PL_rsfp_filters = newAV();
3770 SvUPGRADE(datasv, SVt_PVIO);
3771 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3772 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3773 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3774 FPTR2DPTR(void *, IoANY(datasv)),
3775 SvPV_nolen(datasv)));
3776 av_unshift(PL_rsfp_filters, 1);
3777 av_store(PL_rsfp_filters, 0, datasv) ;
3782 /* Delete most recently added instance of this filter function. */
3784 Perl_filter_del(pTHX_ filter_t funcp)
3789 PERL_ARGS_ASSERT_FILTER_DEL;
3792 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3793 FPTR2DPTR(void*, funcp)));
3795 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3797 /* if filter is on top of stack (usual case) just pop it off */
3798 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3799 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3800 sv_free(av_pop(PL_rsfp_filters));
3804 /* we need to search for the correct entry and clear it */
3805 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3809 /* Invoke the idxth filter function for the current rsfp. */
3810 /* maxlen 0 = read one text line */
3812 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3817 /* This API is bad. It should have been using unsigned int for maxlen.
3818 Not sure if we want to change the API, but if not we should sanity
3819 check the value here. */
3820 const unsigned int correct_length
3829 PERL_ARGS_ASSERT_FILTER_READ;
3831 if (!PL_parser || !PL_rsfp_filters)
3833 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
3834 /* Provide a default input filter to make life easy. */
3835 /* Note that we append to the line. This is handy. */
3836 DEBUG_P(PerlIO_printf(Perl_debug_log,
3837 "filter_read %d: from rsfp\n", idx));
3838 if (correct_length) {
3841 const int old_len = SvCUR(buf_sv);
3843 /* ensure buf_sv is large enough */
3844 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3845 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3846 correct_length)) <= 0) {
3847 if (PerlIO_error(PL_rsfp))
3848 return -1; /* error */
3850 return 0 ; /* end of file */
3852 SvCUR_set(buf_sv, old_len + len) ;
3853 SvPVX(buf_sv)[old_len + len] = '\0';
3856 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3857 if (PerlIO_error(PL_rsfp))
3858 return -1; /* error */
3860 return 0 ; /* end of file */
3863 return SvCUR(buf_sv);
3865 /* Skip this filter slot if filter has been deleted */
3866 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3867 DEBUG_P(PerlIO_printf(Perl_debug_log,
3868 "filter_read %d: skipped (filter deleted)\n",
3870 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3872 /* Get function pointer hidden within datasv */
3873 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3874 DEBUG_P(PerlIO_printf(Perl_debug_log,
3875 "filter_read %d: via function %p (%s)\n",
3876 idx, (void*)datasv, SvPV_nolen_const(datasv)));
3877 /* Call function. The function is expected to */
3878 /* call "FILTER_READ(idx+1, buf_sv)" first. */
3879 /* Return: <0:error, =0:eof, >0:not eof */
3880 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3884 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3888 PERL_ARGS_ASSERT_FILTER_GETS;
3890 #ifdef PERL_CR_FILTER
3891 if (!PL_rsfp_filters) {
3892 filter_add(S_cr_textfilter,NULL);
3895 if (PL_rsfp_filters) {
3897 SvCUR_set(sv, 0); /* start with empty line */
3898 if (FILTER_READ(0, sv, 0) > 0)
3899 return ( SvPVX(sv) ) ;
3904 return (sv_gets(sv, PL_rsfp, append));
3908 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3913 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3915 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3919 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3920 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3922 return GvHV(gv); /* Foo:: */
3925 /* use constant CLASS => 'MyClass' */
3926 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3927 if (gv && GvCV(gv)) {
3928 SV * const sv = cv_const_sv(GvCV(gv));
3930 pkgname = SvPV_const(sv, len);
3933 return gv_stashpvn(pkgname, len, 0);
3937 * S_readpipe_override
3938 * Check whether readpipe() is overriden, and generates the appropriate
3939 * optree, provided sublex_start() is called afterwards.
3942 S_readpipe_override(pTHX)
3945 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3946 pl_yylval.ival = OP_BACKTICK;
3948 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3950 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3951 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3952 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3954 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3955 op_append_elem(OP_LIST,
3956 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3957 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3964 * The intent of this yylex wrapper is to minimize the changes to the
3965 * tokener when we aren't interested in collecting madprops. It remains
3966 * to be seen how successful this strategy will be...
3973 char *s = PL_bufptr;
3975 /* make sure PL_thiswhite is initialized */
3979 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3980 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
3981 return S_pending_ident(aTHX);
3983 /* previous token ate up our whitespace? */
3984 if (!PL_lasttoke && PL_nextwhite) {
3985 PL_thiswhite = PL_nextwhite;
3989 /* isolate the token, and figure out where it is without whitespace */
3990 PL_realtokenstart = -1;
3994 assert(PL_curforce < 0);
3996 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3997 if (!PL_thistoken) {
3998 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3999 PL_thistoken = newSVpvs("");
4001 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4002 PL_thistoken = newSVpvn(tstart, s - tstart);
4005 if (PL_thismad) /* install head */
4006 CURMAD('X', PL_thistoken);
4009 /* last whitespace of a sublex? */
4010 if (optype == ')' && PL_endwhite) {
4011 CURMAD('X', PL_endwhite);
4016 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4017 if (!PL_thiswhite && !PL_endwhite && !optype) {
4018 sv_free(PL_thistoken);
4023 /* put off final whitespace till peg */
4024 if (optype == ';' && !PL_rsfp) {
4025 PL_nextwhite = PL_thiswhite;
4028 else if (PL_thisopen) {
4029 CURMAD('q', PL_thisopen);
4031 sv_free(PL_thistoken);
4035 /* Store actual token text as madprop X */
4036 CURMAD('X', PL_thistoken);
4040 /* add preceding whitespace as madprop _ */
4041 CURMAD('_', PL_thiswhite);
4045 /* add quoted material as madprop = */
4046 CURMAD('=', PL_thisstuff);
4050 /* add terminating quote as madprop Q */
4051 CURMAD('Q', PL_thisclose);
4055 /* special processing based on optype */
4059 /* opval doesn't need a TOKEN since it can already store mp */
4069 if (pl_yylval.opval)
4070 append_madprops(PL_thismad, pl_yylval.opval, 0);
4078 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4087 /* remember any fake bracket that lexer is about to discard */
4088 if (PL_lex_brackets == 1 &&
4089 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4092 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4095 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4096 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4099 break; /* don't bother looking for trailing comment */
4108 /* attach a trailing comment to its statement instead of next token */
4112 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4114 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4116 if (*s == '\n' || *s == '#') {
4117 while (s < PL_bufend && *s != '\n')
4121 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4122 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4139 /* Create new token struct. Note: opvals return early above. */
4140 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4147 S_tokenize_use(pTHX_ int is_use, char *s) {
4150 PERL_ARGS_ASSERT_TOKENIZE_USE;
4152 if (PL_expect != XSTATE)
4153 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4154 is_use ? "use" : "no"));
4156 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4157 s = force_version(s, TRUE);
4158 if (*s == ';' || *s == '}'
4159 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4160 start_force(PL_curforce);
4161 NEXTVAL_NEXTTOKE.opval = NULL;
4164 else if (*s == 'v') {
4165 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4166 s = force_version(s, FALSE);
4170 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4171 s = force_version(s, FALSE);
4173 pl_yylval.ival = is_use;
4177 static const char* const exp_name[] =
4178 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4179 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4186 Works out what to call the token just pulled out of the input
4187 stream. The yacc parser takes care of taking the ops we return and
4188 stitching them into a tree.
4194 if read an identifier
4195 if we're in a my declaration
4196 croak if they tried to say my($foo::bar)
4197 build the ops for a my() declaration
4198 if it's an access to a my() variable
4199 are we in a sort block?
4200 croak if my($a); $a <=> $b
4201 build ops for access to a my() variable
4202 if in a dq string, and they've said @foo and we can't find @foo
4204 build ops for a bareword
4205 if we already built the token before, use it.
4210 #pragma segment Perl_yylex
4216 register char *s = PL_bufptr;
4222 /* orig_keyword, gvp, and gv are initialized here because
4223 * jump to the label just_a_word_zero can bypass their
4224 * initialization later. */
4225 I32 orig_keyword = 0;
4230 SV* tmp = newSVpvs("");
4231 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4232 (IV)CopLINE(PL_curcop),
4233 lex_state_names[PL_lex_state],
4234 exp_name[PL_expect],
4235 pv_display(tmp, s, strlen(s), 0, 60));
4238 /* check if there's an identifier for us to look at */
4239 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4240 return REPORT(S_pending_ident(aTHX));
4242 /* no identifier pending identification */
4244 switch (PL_lex_state) {
4246 case LEX_NORMAL: /* Some compilers will produce faster */
4247 case LEX_INTERPNORMAL: /* code if we comment these out. */
4251 /* when we've already built the next token, just pull it out of the queue */
4255 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4257 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4258 PL_nexttoke[PL_lasttoke].next_mad = 0;
4259 if (PL_thismad && PL_thismad->mad_key == '_') {
4260 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4261 PL_thismad->mad_val = 0;
4262 mad_free(PL_thismad);
4267 PL_lex_state = PL_lex_defer;
4268 PL_expect = PL_lex_expect;
4269 PL_lex_defer = LEX_NORMAL;
4270 if (!PL_nexttoke[PL_lasttoke].next_type)
4275 pl_yylval = PL_nextval[PL_nexttoke];
4277 PL_lex_state = PL_lex_defer;
4278 PL_expect = PL_lex_expect;
4279 PL_lex_defer = LEX_NORMAL;
4283 /* FIXME - can these be merged? */
4284 return(PL_nexttoke[PL_lasttoke].next_type);
4286 return REPORT(PL_nexttype[PL_nexttoke]);
4289 /* interpolated case modifiers like \L \U, including \Q and \E.
4290 when we get here, PL_bufptr is at the \
4292 case LEX_INTERPCASEMOD:
4294 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4295 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4297 /* handle \E or end of string */
4298 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4300 if (PL_lex_casemods) {
4301 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4302 PL_lex_casestack[PL_lex_casemods] = '\0';
4304 if (PL_bufptr != PL_bufend
4305 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4307 PL_lex_state = LEX_INTERPCONCAT;
4310 PL_thistoken = newSVpvs("\\E");
4316 while (PL_bufptr != PL_bufend &&
4317 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4319 PL_thiswhite = newSVpvs("");
4320 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4324 if (PL_bufptr != PL_bufend)
4327 PL_lex_state = LEX_INTERPCONCAT;
4331 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4332 "### Saw case modifier\n"); });
4334 if (s[1] == '\\' && s[2] == 'E') {
4337 PL_thiswhite = newSVpvs("");
4338 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4341 PL_lex_state = LEX_INTERPCONCAT;
4346 if (!PL_madskills) /* when just compiling don't need correct */
4347 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4348 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4349 if ((*s == 'L' || *s == 'U') &&
4350 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4351 PL_lex_casestack[--PL_lex_casemods] = '\0';
4354 if (PL_lex_casemods > 10)
4355 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4356 PL_lex_casestack[PL_lex_casemods++] = *s;
4357 PL_lex_casestack[PL_lex_casemods] = '\0';
4358 PL_lex_state = LEX_INTERPCONCAT;
4359 start_force(PL_curforce);
4360 NEXTVAL_NEXTTOKE.ival = 0;
4362 start_force(PL_curforce);
4364 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4366 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4368 NEXTVAL_NEXTTOKE.ival = OP_LC;
4370 NEXTVAL_NEXTTOKE.ival = OP_UC;
4372 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4374 Perl_croak(aTHX_ "panic: yylex");
4376 SV* const tmpsv = newSVpvs("\\ ");
4377 /* replace the space with the character we want to escape
4379 SvPVX(tmpsv)[1] = *s;
4385 if (PL_lex_starts) {
4391 sv_free(PL_thistoken);
4392 PL_thistoken = newSVpvs("");
4395 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4396 if (PL_lex_casemods == 1 && PL_lex_inpat)
4405 case LEX_INTERPPUSH:
4406 return REPORT(sublex_push());
4408 case LEX_INTERPSTART:
4409 if (PL_bufptr == PL_bufend)
4410 return REPORT(sublex_done());
4411 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4412 "### Interpolated variable\n"); });
4414 PL_lex_dojoin = (*PL_bufptr == '@');
4415 PL_lex_state = LEX_INTERPNORMAL;
4416 if (PL_lex_dojoin) {
4417 start_force(PL_curforce);
4418 NEXTVAL_NEXTTOKE.ival = 0;
4420 start_force(PL_curforce);
4421 force_ident("\"", '$');
4422 start_force(PL_curforce);
4423 NEXTVAL_NEXTTOKE.ival = 0;
4425 start_force(PL_curforce);
4426 NEXTVAL_NEXTTOKE.ival = 0;
4428 start_force(PL_curforce);
4429 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4432 if (PL_lex_starts++) {
4437 sv_free(PL_thistoken);
4438 PL_thistoken = newSVpvs("");
4441 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4442 if (!PL_lex_casemods && PL_lex_inpat)
4449 case LEX_INTERPENDMAYBE:
4450 if (intuit_more(PL_bufptr)) {
4451 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4457 if (PL_lex_dojoin) {
4458 PL_lex_dojoin = FALSE;
4459 PL_lex_state = LEX_INTERPCONCAT;
4463 sv_free(PL_thistoken);
4464 PL_thistoken = newSVpvs("");
4469 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4470 && SvEVALED(PL_lex_repl))
4472 if (PL_bufptr != PL_bufend)
4473 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4477 case LEX_INTERPCONCAT:
4479 if (PL_lex_brackets)
4480 Perl_croak(aTHX_ "panic: INTERPCONCAT");
4482 if (PL_bufptr == PL_bufend)
4483 return REPORT(sublex_done());
4485 if (SvIVX(PL_linestr) == '\'') {
4486 SV *sv = newSVsv(PL_linestr);
4489 else if ( PL_hints & HINT_NEW_RE )
4490 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4491 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4495 s = scan_const(PL_bufptr);
4497 PL_lex_state = LEX_INTERPCASEMOD;
4499 PL_lex_state = LEX_INTERPSTART;
4502 if (s != PL_bufptr) {
4503 start_force(PL_curforce);
4505 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4507 NEXTVAL_NEXTTOKE = pl_yylval;
4510 if (PL_lex_starts++) {
4514 sv_free(PL_thistoken);
4515 PL_thistoken = newSVpvs("");
4518 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4519 if (!PL_lex_casemods && PL_lex_inpat)
4532 PL_lex_state = LEX_NORMAL;
4533 s = scan_formline(PL_bufptr);
4534 if (!PL_lex_formbrack)
4540 PL_oldoldbufptr = PL_oldbufptr;
4546 sv_free(PL_thistoken);
4549 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
4553 if (isIDFIRST_lazy_if(s,UTF))
4556 unsigned char c = *s;
4557 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4558 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4559 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4564 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4568 goto fake_eof; /* emulate EOF on ^D or ^Z */
4577 if (PL_lex_brackets) {
4578 yyerror((const char *)
4580 ? "Format not terminated"
4581 : "Missing right curly or square bracket"));
4583 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4584 "### Tokener got EOF\n");
4588 if (s++ < PL_bufend)
4589 goto retry; /* ignore stray nulls */
4592 if (!PL_in_eval && !PL_preambled) {
4593 PL_preambled = TRUE;
4599 /* Generate a string of Perl code to load the debugger.
4600 * If PERL5DB is set, it will return the contents of that,
4601 * otherwise a compile-time require of perl5db.pl. */
4603 const char * const pdb = PerlEnv_getenv("PERL5DB");
4606 sv_setpv(PL_linestr, pdb);
4607 sv_catpvs(PL_linestr,";");
4609 SETERRNO(0,SS_NORMAL);
4610 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4613 sv_setpvs(PL_linestr,"");
4614 if (PL_preambleav) {
4615 SV **svp = AvARRAY(PL_preambleav);
4616 SV **const end = svp + AvFILLp(PL_preambleav);
4618 sv_catsv(PL_linestr, *svp);
4620 sv_catpvs(PL_linestr, ";");
4622 sv_free(MUTABLE_SV(PL_preambleav));
4623 PL_preambleav = NULL;
4626 sv_catpvs(PL_linestr,
4627 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4628 if (PL_minus_n || PL_minus_p) {
4629 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4631 sv_catpvs(PL_linestr,"chomp;");
4634 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4635 || *PL_splitstr == '"')
4636 && strchr(PL_splitstr + 1, *PL_splitstr))
4637 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4639 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4640 bytes can be used as quoting characters. :-) */
4641 const char *splits = PL_splitstr;
4642 sv_catpvs(PL_linestr, "our @F=split(q\0");
4645 if (*splits == '\\')
4646 sv_catpvn(PL_linestr, splits, 1);
4647 sv_catpvn(PL_linestr, splits, 1);
4648 } while (*splits++);
4649 /* This loop will embed the trailing NUL of
4650 PL_linestr as the last thing it does before
4652 sv_catpvs(PL_linestr, ");");
4656 sv_catpvs(PL_linestr,"our @F=split(' ');");
4659 sv_catpvs(PL_linestr, "\n");
4660 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4661 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4662 PL_last_lop = PL_last_uni = NULL;
4663 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4664 update_debugger_info(PL_linestr, NULL, 0);
4669 bof = PL_rsfp ? TRUE : FALSE;
4672 fake_eof = LEX_FAKE_EOF;
4674 PL_bufptr = PL_bufend;
4675 CopLINE_inc(PL_curcop);
4676 if (!lex_next_chunk(fake_eof)) {
4677 CopLINE_dec(PL_curcop);
4679 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4681 CopLINE_dec(PL_curcop);
4684 PL_realtokenstart = -1;
4687 /* If it looks like the start of a BOM or raw UTF-16,
4688 * check if it in fact is. */
4689 if (bof && PL_rsfp &&
4694 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4696 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4697 s = swallow_bom((U8*)s);
4701 /* Incest with pod. */
4704 sv_catsv(PL_thiswhite, PL_linestr);
4706 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4707 sv_setpvs(PL_linestr, "");
4708 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4709 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4710 PL_last_lop = PL_last_uni = NULL;
4711 PL_doextract = FALSE;
4716 } while (PL_doextract);
4717 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4718 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4719 PL_last_lop = PL_last_uni = NULL;
4720 if (CopLINE(PL_curcop) == 1) {
4721 while (s < PL_bufend && isSPACE(*s))
4723 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4727 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4731 if (*s == '#' && *(s+1) == '!')
4733 #ifdef ALTERNATE_SHEBANG
4735 static char const as[] = ALTERNATE_SHEBANG;
4736 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4737 d = s + (sizeof(as) - 1);
4739 #endif /* ALTERNATE_SHEBANG */
4748 while (*d && !isSPACE(*d))
4752 #ifdef ARG_ZERO_IS_SCRIPT
4753 if (ipathend > ipath) {
4755 * HP-UX (at least) sets argv[0] to the script name,
4756 * which makes $^X incorrect. And Digital UNIX and Linux,
4757 * at least, set argv[0] to the basename of the Perl
4758 * interpreter. So, having found "#!", we'll set it right.
4760 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4762 assert(SvPOK(x) || SvGMAGICAL(x));
4763 if (sv_eq(x, CopFILESV(PL_curcop))) {
4764 sv_setpvn(x, ipath, ipathend - ipath);
4770 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4771 const char * const lstart = SvPV_const(x,llen);
4773 bstart += blen - llen;
4774 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4775 sv_setpvn(x, ipath, ipathend - ipath);
4780 TAINT_NOT; /* $^X is always tainted, but that's OK */
4782 #endif /* ARG_ZERO_IS_SCRIPT */
4787 d = instr(s,"perl -");
4789 d = instr(s,"perl");
4791 /* avoid getting into infinite loops when shebang
4792 * line contains "Perl" rather than "perl" */
4794 for (d = ipathend-4; d >= ipath; --d) {
4795 if ((*d == 'p' || *d == 'P')
4796 && !ibcmp(d, "perl", 4))
4806 #ifdef ALTERNATE_SHEBANG
4808 * If the ALTERNATE_SHEBANG on this system starts with a
4809 * character that can be part of a Perl expression, then if
4810 * we see it but not "perl", we're probably looking at the
4811 * start of Perl code, not a request to hand off to some
4812 * other interpreter. Similarly, if "perl" is there, but
4813 * not in the first 'word' of the line, we assume the line
4814 * contains the start of the Perl program.
4816 if (d && *s != '#') {
4817 const char *c = ipath;
4818 while (*c && !strchr("; \t\r\n\f\v#", *c))
4821 d = NULL; /* "perl" not in first word; ignore */
4823 *s = '#'; /* Don't try to parse shebang line */
4825 #endif /* ALTERNATE_SHEBANG */
4830 !instr(s,"indir") &&
4831 instr(PL_origargv[0],"perl"))
4838 while (s < PL_bufend && isSPACE(*s))
4840 if (s < PL_bufend) {
4841 Newx(newargv,PL_origargc+3,char*);
4843 while (s < PL_bufend && !isSPACE(*s))
4846 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4849 newargv = PL_origargv;
4852 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4854 Perl_croak(aTHX_ "Can't exec %s", ipath);
4857 while (*d && !isSPACE(*d))
4859 while (SPACE_OR_TAB(*d))
4863 const bool switches_done = PL_doswitches;
4864 const U32 oldpdb = PL_perldb;
4865 const bool oldn = PL_minus_n;
4866 const bool oldp = PL_minus_p;
4870 bool baduni = FALSE;
4872 const char *d2 = d1 + 1;
4873 if (parse_unicode_opts((const char **)&d2)
4877 if (baduni || *d1 == 'M' || *d1 == 'm') {
4878 const char * const m = d1;
4879 while (*d1 && !isSPACE(*d1))
4881 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4884 d1 = moreswitches(d1);
4886 if (PL_doswitches && !switches_done) {
4887 int argc = PL_origargc;
4888 char **argv = PL_origargv;
4891 } while (argc && argv[0][0] == '-' && argv[0][1]);
4892 init_argv_symbols(argc,argv);
4894 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4895 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4896 /* if we have already added "LINE: while (<>) {",
4897 we must not do it again */
4899 sv_setpvs(PL_linestr, "");
4900 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4901 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4902 PL_last_lop = PL_last_uni = NULL;
4903 PL_preambled = FALSE;
4904 if (PERLDB_LINE || PERLDB_SAVESRC)
4905 (void)gv_fetchfile(PL_origfilename);
4912 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4914 PL_lex_state = LEX_FORMLINE;
4919 #ifdef PERL_STRICT_CR
4920 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4922 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4924 case ' ': case '\t': case '\f': case 013:
4926 PL_realtokenstart = -1;
4928 PL_thiswhite = newSVpvs("");
4929 sv_catpvn(PL_thiswhite, s, 1);
4936 PL_realtokenstart = -1;
4940 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4941 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4942 /* handle eval qq[#line 1 "foo"\n ...] */
4943 CopLINE_dec(PL_curcop);
4946 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4948 if (!PL_in_eval || PL_rsfp)
4953 while (d < PL_bufend && *d != '\n')
4957 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4958 Perl_croak(aTHX_ "panic: input overflow");
4961 PL_thiswhite = newSVpvn(s, d - s);
4966 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4968 PL_lex_state = LEX_FORMLINE;
4974 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4975 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4978 TOKEN(PEG); /* make sure any #! line is accessible */
4983 /* if (PL_madskills && PL_lex_formbrack) { */
4985 while (d < PL_bufend && *d != '\n')
4989 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4990 Perl_croak(aTHX_ "panic: input overflow");
4991 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4993 PL_thiswhite = newSVpvs("");
4994 if (CopLINE(PL_curcop) == 1) {
4995 sv_setpvs(PL_thiswhite, "");
4998 sv_catpvn(PL_thiswhite, s, d - s);
5012 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5020 while (s < PL_bufend && SPACE_OR_TAB(*s))
5023 if (strnEQ(s,"=>",2)) {
5024 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5025 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5026 OPERATOR('-'); /* unary minus */
5028 PL_last_uni = PL_oldbufptr;
5030 case 'r': ftst = OP_FTEREAD; break;
5031 case 'w': ftst = OP_FTEWRITE; break;
5032 case 'x': ftst = OP_FTEEXEC; break;
5033 case 'o': ftst = OP_FTEOWNED; break;
5034 case 'R': ftst = OP_FTRREAD; break;
5035 case 'W': ftst = OP_FTRWRITE; break;
5036 case 'X': ftst = OP_FTREXEC; break;
5037 case 'O': ftst = OP_FTROWNED; break;
5038 case 'e': ftst = OP_FTIS; break;
5039 case 'z': ftst = OP_FTZERO; break;
5040 case 's': ftst = OP_FTSIZE; break;
5041 case 'f': ftst = OP_FTFILE; break;
5042 case 'd': ftst = OP_FTDIR; break;
5043 case 'l': ftst = OP_FTLINK; break;
5044 case 'p': ftst = OP_FTPIPE; break;
5045 case 'S': ftst = OP_FTSOCK; break;
5046 case 'u': ftst = OP_FTSUID; break;
5047 case 'g': ftst = OP_FTSGID; break;
5048 case 'k': ftst = OP_FTSVTX; break;
5049 case 'b': ftst = OP_FTBLK; break;
5050 case 'c': ftst = OP_FTCHR; break;
5051 case 't': ftst = OP_FTTTY; break;
5052 case 'T': ftst = OP_FTTEXT; break;
5053 case 'B': ftst = OP_FTBINARY; break;
5054 case 'M': case 'A': case 'C':
5055 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5057 case 'M': ftst = OP_FTMTIME; break;
5058 case 'A': ftst = OP_FTATIME; break;
5059 case 'C': ftst = OP_FTCTIME; break;
5067 PL_last_lop_op = (OPCODE)ftst;
5068 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5069 "### Saw file test %c\n", (int)tmp);
5074 /* Assume it was a minus followed by a one-letter named
5075 * subroutine call (or a -bareword), then. */
5076 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5077 "### '-%c' looked like a file test but was not\n",
5084 const char tmp = *s++;
5087 if (PL_expect == XOPERATOR)
5092 else if (*s == '>') {
5095 if (isIDFIRST_lazy_if(s,UTF)) {
5096 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5104 if (PL_expect == XOPERATOR)
5107 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5109 OPERATOR('-'); /* unary minus */
5115 const char tmp = *s++;
5118 if (PL_expect == XOPERATOR)
5123 if (PL_expect == XOPERATOR)
5126 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5133 if (PL_expect != XOPERATOR) {
5134 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5135 PL_expect = XOPERATOR;
5136 force_ident(PL_tokenbuf, '*');
5149 if (PL_expect == XOPERATOR) {
5153 PL_tokenbuf[0] = '%';
5154 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5155 sizeof PL_tokenbuf - 1, FALSE);
5156 if (!PL_tokenbuf[1]) {
5159 PL_pending_ident = '%';
5168 const char tmp = *s++;
5173 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5180 const char tmp = *s++;
5186 goto just_a_word_zero_gv;
5189 switch (PL_expect) {
5195 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5197 PL_bufptr = s; /* update in case we back off */
5199 deprecate(":= for an empty attribute list");
5206 PL_expect = XTERMBLOCK;
5209 stuffstart = s - SvPVX(PL_linestr) - 1;
5213 while (isIDFIRST_lazy_if(s,UTF)) {
5216 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5217 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5218 if (tmp < 0) tmp = -tmp;
5233 sv = newSVpvn(s, len);
5235 d = scan_str(d,TRUE,TRUE);
5237 /* MUST advance bufptr here to avoid bogus
5238 "at end of line" context messages from yyerror().
5240 PL_bufptr = s + len;
5241 yyerror("Unterminated attribute parameter in attribute list");
5245 return REPORT(0); /* EOF indicator */
5249 sv_catsv(sv, PL_lex_stuff);
5250 attrs = op_append_elem(OP_LIST, attrs,
5251 newSVOP(OP_CONST, 0, sv));
5252 SvREFCNT_dec(PL_lex_stuff);
5253 PL_lex_stuff = NULL;
5256 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5258 if (PL_in_my == KEY_our) {
5259 deprecate(":unique");
5262 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5265 /* NOTE: any CV attrs applied here need to be part of
5266 the CVf_BUILTIN_ATTRS define in cv.h! */
5267 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5269 CvLVALUE_on(PL_compcv);
5271 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5273 deprecate(":locked");
5275 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5277 CvMETHOD_on(PL_compcv);
5279 /* After we've set the flags, it could be argued that
5280 we don't need to do the attributes.pm-based setting
5281 process, and shouldn't bother appending recognized
5282 flags. To experiment with that, uncomment the
5283 following "else". (Note that's already been
5284 uncommented. That keeps the above-applied built-in
5285 attributes from being intercepted (and possibly
5286 rejected) by a package's attribute routines, but is
5287 justified by the performance win for the common case
5288 of applying only built-in attributes.) */
5290 attrs = op_append_elem(OP_LIST, attrs,
5291 newSVOP(OP_CONST, 0,
5295 if (*s == ':' && s[1] != ':')
5298 break; /* require real whitespace or :'s */
5299 /* XXX losing whitespace on sequential attributes here */
5303 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5304 if (*s != ';' && *s != '}' && *s != tmp
5305 && (tmp != '=' || *s != ')')) {
5306 const char q = ((*s == '\'') ? '"' : '\'');
5307 /* If here for an expression, and parsed no attrs, back
5309 if (tmp == '=' && !attrs) {
5313 /* MUST advance bufptr here to avoid bogus "at end of line"
5314 context messages from yyerror().
5317 yyerror( (const char *)
5319 ? Perl_form(aTHX_ "Invalid separator character "
5320 "%c%c%c in attribute list", q, *s, q)
5321 : "Unterminated attribute list" ) );
5329 start_force(PL_curforce);
5330 NEXTVAL_NEXTTOKE.opval = attrs;
5331 CURMAD('_', PL_nextwhite);
5336 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5337 (s - SvPVX(PL_linestr)) - stuffstart);
5345 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5346 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5354 const char tmp = *s++;
5359 const char tmp = *s++;
5367 if (PL_lex_brackets <= 0)
5368 yyerror("Unmatched right square bracket");
5371 if (PL_lex_state == LEX_INTERPNORMAL) {
5372 if (PL_lex_brackets == 0) {
5373 if (*s == '-' && s[1] == '>')
5374 PL_lex_state = LEX_INTERPENDMAYBE;
5375 else if (*s != '[' && *s != '{')
5376 PL_lex_state = LEX_INTERPEND;
5383 if (PL_lex_brackets > 100) {
5384 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5386 switch (PL_expect) {
5388 if (PL_lex_formbrack) {
5392 if (PL_oldoldbufptr == PL_last_lop)
5393 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5395 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5396 OPERATOR(HASHBRACK);
5398 while (s < PL_bufend && SPACE_OR_TAB(*s))
5401 PL_tokenbuf[0] = '\0';
5402 if (d < PL_bufend && *d == '-') {
5403 PL_tokenbuf[0] = '-';
5405 while (d < PL_bufend && SPACE_OR_TAB(*d))
5408 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5409 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5411 while (d < PL_bufend && SPACE_OR_TAB(*d))
5414 const char minus = (PL_tokenbuf[0] == '-');
5415 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5423 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5428 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5433 if (PL_oldoldbufptr == PL_last_lop)
5434 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5436 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5439 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5441 /* This hack is to get the ${} in the message. */
5443 yyerror("syntax error");
5446 OPERATOR(HASHBRACK);
5448 /* This hack serves to disambiguate a pair of curlies
5449 * as being a block or an anon hash. Normally, expectation
5450 * determines that, but in cases where we're not in a
5451 * position to expect anything in particular (like inside
5452 * eval"") we have to resolve the ambiguity. This code
5453 * covers the case where the first term in the curlies is a
5454 * quoted string. Most other cases need to be explicitly
5455 * disambiguated by prepending a "+" before the opening
5456 * curly in order to force resolution as an anon hash.
5458 * XXX should probably propagate the outer expectation
5459 * into eval"" to rely less on this hack, but that could
5460 * potentially break current behavior of eval"".
5464 if (*s == '\'' || *s == '"' || *s == '`') {
5465 /* common case: get past first string, handling escapes */
5466 for (t++; t < PL_bufend && *t != *s;)
5467 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5471 else if (*s == 'q') {
5474 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5477 /* skip q//-like construct */
5479 char open, close, term;
5482 while (t < PL_bufend && isSPACE(*t))
5484 /* check for q => */
5485 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5486 OPERATOR(HASHBRACK);
5490 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5494 for (t++; t < PL_bufend; t++) {
5495 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5497 else if (*t == open)
5501 for (t++; t < PL_bufend; t++) {
5502 if (*t == '\\' && t+1 < PL_bufend)
5504 else if (*t == close && --brackets <= 0)
5506 else if (*t == open)
5513 /* skip plain q word */
5514 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5517 else if (isALNUM_lazy_if(t,UTF)) {
5519 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5522 while (t < PL_bufend && isSPACE(*t))
5524 /* if comma follows first term, call it an anon hash */
5525 /* XXX it could be a comma expression with loop modifiers */
5526 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5527 || (*t == '=' && t[1] == '>')))
5528 OPERATOR(HASHBRACK);
5529 if (PL_expect == XREF)
5532 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5538 pl_yylval.ival = CopLINE(PL_curcop);
5539 if (isSPACE(*s) || *s == '#')
5540 PL_copline = NOLINE; /* invalidate current command line number */
5545 if (PL_lex_brackets <= 0)
5546 yyerror("Unmatched right curly bracket");
5548 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5549 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5550 PL_lex_formbrack = 0;
5551 if (PL_lex_state == LEX_INTERPNORMAL) {
5552 if (PL_lex_brackets == 0) {
5553 if (PL_expect & XFAKEBRACK) {
5554 PL_expect &= XENUMMASK;
5555 PL_lex_state = LEX_INTERPEND;
5560 PL_thiswhite = newSVpvs("");
5561 sv_catpvs(PL_thiswhite,"}");
5564 return yylex(); /* ignore fake brackets */
5566 if (*s == '-' && s[1] == '>')
5567 PL_lex_state = LEX_INTERPENDMAYBE;
5568 else if (*s != '[' && *s != '{')
5569 PL_lex_state = LEX_INTERPEND;
5572 if (PL_expect & XFAKEBRACK) {
5573 PL_expect &= XENUMMASK;
5575 return yylex(); /* ignore fake brackets */
5577 start_force(PL_curforce);
5579 curmad('X', newSVpvn(s-1,1));
5580 CURMAD('_', PL_thiswhite);
5585 PL_thistoken = newSVpvs("");
5593 if (PL_expect == XOPERATOR) {
5594 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5595 && isIDFIRST_lazy_if(s,UTF))
5597 CopLINE_dec(PL_curcop);
5598 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5599 CopLINE_inc(PL_curcop);
5604 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5606 PL_expect = XOPERATOR;
5607 force_ident(PL_tokenbuf, '&');
5611 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5623 const char tmp = *s++;
5630 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5631 && strchr("+-*/%.^&|<",tmp))
5632 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5633 "Reversed %c= operator",(int)tmp);
5635 if (PL_expect == XSTATE && isALPHA(tmp) &&
5636 (s == PL_linestart+1 || s[-2] == '\n') )
5638 if (PL_in_eval && !PL_rsfp) {
5643 if (strnEQ(s,"=cut",4)) {
5659 PL_thiswhite = newSVpvs("");
5660 sv_catpvn(PL_thiswhite, PL_linestart,
5661 PL_bufend - PL_linestart);
5665 PL_doextract = TRUE;
5669 if (PL_lex_brackets < PL_lex_formbrack) {
5671 #ifdef PERL_STRICT_CR
5672 while (SPACE_OR_TAB(*t))
5674 while (SPACE_OR_TAB(*t) || *t == '\r')
5677 if (*t == '\n' || *t == '#') {
5688 const char tmp = *s++;
5690 /* was this !=~ where !~ was meant?
5691 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5693 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5694 const char *t = s+1;
5696 while (t < PL_bufend && isSPACE(*t))
5699 if (*t == '/' || *t == '?' ||
5700 ((*t == 'm' || *t == 's' || *t == 'y')
5701 && !isALNUM(t[1])) ||
5702 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5703 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5704 "!=~ should be !~");
5714 if (PL_expect != XOPERATOR) {
5715 if (s[1] != '<' && !strchr(s,'>'))
5718 s = scan_heredoc(s);
5720 s = scan_inputsymbol(s);
5721 TERM(sublex_start());
5727 SHop(OP_LEFT_SHIFT);
5741 const char tmp = *s++;
5743 SHop(OP_RIGHT_SHIFT);
5744 else if (tmp == '=')
5753 if (PL_expect == XOPERATOR) {
5754 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5755 return deprecate_commaless_var_list();
5759 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5760 PL_tokenbuf[0] = '@';
5761 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5762 sizeof PL_tokenbuf - 1, FALSE);
5763 if (PL_expect == XOPERATOR)
5764 no_op("Array length", s);
5765 if (!PL_tokenbuf[1])
5767 PL_expect = XOPERATOR;
5768 PL_pending_ident = '#';
5772 PL_tokenbuf[0] = '$';
5773 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5774 sizeof PL_tokenbuf - 1, FALSE);
5775 if (PL_expect == XOPERATOR)
5777 if (!PL_tokenbuf[1]) {
5779 yyerror("Final $ should be \\$ or $name");
5783 /* This kludge not intended to be bulletproof. */
5784 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5785 pl_yylval.opval = newSVOP(OP_CONST, 0,
5786 newSViv(CopARYBASE_get(&PL_compiling)));
5787 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5793 const char tmp = *s;
5794 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5797 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5798 && intuit_more(s)) {
5800 PL_tokenbuf[0] = '@';
5801 if (ckWARN(WARN_SYNTAX)) {
5804 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5807 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5808 while (t < PL_bufend && *t != ']')
5810 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5811 "Multidimensional syntax %.*s not supported",
5812 (int)((t - PL_bufptr) + 1), PL_bufptr);
5816 else if (*s == '{') {
5818 PL_tokenbuf[0] = '%';
5819 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5820 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5822 char tmpbuf[sizeof PL_tokenbuf];
5825 } while (isSPACE(*t));
5826 if (isIDFIRST_lazy_if(t,UTF)) {
5828 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5832 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5833 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5834 "You need to quote \"%s\"",
5841 PL_expect = XOPERATOR;
5842 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5843 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5844 if (!islop || PL_last_lop_op == OP_GREPSTART)
5845 PL_expect = XOPERATOR;
5846 else if (strchr("$@\"'`q", *s))
5847 PL_expect = XTERM; /* e.g. print $fh "foo" */
5848 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5849 PL_expect = XTERM; /* e.g. print $fh &sub */
5850 else if (isIDFIRST_lazy_if(s,UTF)) {
5851 char tmpbuf[sizeof PL_tokenbuf];
5853 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5854 if ((t2 = keyword(tmpbuf, len, 0))) {
5855 /* binary operators exclude handle interpretations */
5867 PL_expect = XTERM; /* e.g. print $fh length() */
5872 PL_expect = XTERM; /* e.g. print $fh subr() */
5875 else if (isDIGIT(*s))
5876 PL_expect = XTERM; /* e.g. print $fh 3 */
5877 else if (*s == '.' && isDIGIT(s[1]))
5878 PL_expect = XTERM; /* e.g. print $fh .3 */
5879 else if ((*s == '?' || *s == '-' || *s == '+')
5880 && !isSPACE(s[1]) && s[1] != '=')
5881 PL_expect = XTERM; /* e.g. print $fh -1 */
5882 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5884 PL_expect = XTERM; /* e.g. print $fh /.../
5885 XXX except DORDOR operator
5887 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5889 PL_expect = XTERM; /* print $fh <<"EOF" */
5892 PL_pending_ident = '$';
5896 if (PL_expect == XOPERATOR)
5898 PL_tokenbuf[0] = '@';
5899 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5900 if (!PL_tokenbuf[1]) {
5903 if (PL_lex_state == LEX_NORMAL)
5905 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5907 PL_tokenbuf[0] = '%';
5909 /* Warn about @ where they meant $. */
5910 if (*s == '[' || *s == '{') {
5911 if (ckWARN(WARN_SYNTAX)) {
5912 const char *t = s + 1;
5913 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5915 if (*t == '}' || *t == ']') {
5917 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5918 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5919 "Scalar value %.*s better written as $%.*s",
5920 (int)(t-PL_bufptr), PL_bufptr,
5921 (int)(t-PL_bufptr-1), PL_bufptr+1);
5926 PL_pending_ident = '@';
5929 case '/': /* may be division, defined-or, or pattern */
5930 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5934 case '?': /* may either be conditional or pattern */
5935 if (PL_expect == XOPERATOR) {
5943 /* A // operator. */
5953 /* Disable warning on "study /blah/" */
5954 if (PL_oldoldbufptr == PL_last_uni
5955 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5956 || memNE(PL_last_uni, "study", 5)
5957 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5960 s = scan_pat(s,OP_MATCH);
5961 TERM(sublex_start());
5965 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5966 #ifdef PERL_STRICT_CR
5969 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5971 && (s == PL_linestart || s[-1] == '\n') )
5973 PL_lex_formbrack = 0;
5977 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5981 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5987 pl_yylval.ival = OPf_SPECIAL;
5996 case '0': case '1': case '2': case '3': case '4':
5997 case '5': case '6': case '7': case '8': case '9':
5998 s = scan_num(s, &pl_yylval);
5999 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6000 if (PL_expect == XOPERATOR)
6005 s = scan_str(s,!!PL_madskills,FALSE);
6006 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6007 if (PL_expect == XOPERATOR) {
6008 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6009 return deprecate_commaless_var_list();
6016 pl_yylval.ival = OP_CONST;
6017 TERM(sublex_start());
6020 s = scan_str(s,!!PL_madskills,FALSE);
6021 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6022 if (PL_expect == XOPERATOR) {
6023 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6024 return deprecate_commaless_var_list();
6031 pl_yylval.ival = OP_CONST;
6032 /* FIXME. I think that this can be const if char *d is replaced by
6033 more localised variables. */
6034 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6035 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6036 pl_yylval.ival = OP_STRINGIFY;
6040 TERM(sublex_start());
6043 s = scan_str(s,!!PL_madskills,FALSE);
6044 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6045 if (PL_expect == XOPERATOR)
6046 no_op("Backticks",s);
6049 readpipe_override();
6050 TERM(sublex_start());
6054 if (PL_lex_inwhat && isDIGIT(*s))
6055 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6057 if (PL_expect == XOPERATOR)
6058 no_op("Backslash",s);
6062 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6063 char *start = s + 2;
6064 while (isDIGIT(*start) || *start == '_')
6066 if (*start == '.' && isDIGIT(start[1])) {
6067 s = scan_num(s, &pl_yylval);
6070 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6071 else if (!isALPHA(*start) && (PL_expect == XTERM
6072 || PL_expect == XREF || PL_expect == XSTATE
6073 || PL_expect == XTERMORDORDOR)) {
6074 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6076 s = scan_num(s, &pl_yylval);
6083 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6126 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6128 /* Some keywords can be followed by any delimiter, including ':' */
6129 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
6130 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6131 (PL_tokenbuf[0] == 'q' &&
6132 strchr("qwxr", PL_tokenbuf[1])))));
6134 /* x::* is just a word, unless x is "CORE" */
6135 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6139 while (d < PL_bufend && isSPACE(*d))
6140 d++; /* no comments skipped here, or s### is misparsed */
6142 /* Is this a word before a => operator? */
6143 if (*d == '=' && d[1] == '>') {
6146 = (OP*)newSVOP(OP_CONST, 0,
6147 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6148 pl_yylval.opval->op_private = OPpCONST_BARE;
6152 /* Check for plugged-in keyword */
6156 char *saved_bufptr = PL_bufptr;
6158 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6160 if (result == KEYWORD_PLUGIN_DECLINE) {
6161 /* not a plugged-in keyword */
6162 PL_bufptr = saved_bufptr;
6163 } else if (result == KEYWORD_PLUGIN_STMT) {
6164 pl_yylval.opval = o;
6167 return REPORT(PLUGSTMT);
6168 } else if (result == KEYWORD_PLUGIN_EXPR) {
6169 pl_yylval.opval = o;
6171 PL_expect = XOPERATOR;
6172 return REPORT(PLUGEXPR);
6174 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6179 /* Check for built-in keyword */
6180 tmp = keyword(PL_tokenbuf, len, 0);
6182 /* Is this a label? */
6183 if (!anydelim && PL_expect == XSTATE
6184 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6186 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6191 if (tmp < 0) { /* second-class keyword? */
6192 GV *ogv = NULL; /* override (winner) */
6193 GV *hgv = NULL; /* hidden (loser) */
6194 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6196 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6199 if (GvIMPORTED_CV(gv))
6201 else if (! CvMETHOD(cv))
6205 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6206 (gv = *gvp) && isGV_with_GP(gv) &&
6207 GvCVu(gv) && GvIMPORTED_CV(gv))
6214 tmp = 0; /* overridden by import or by GLOBAL */
6217 && -tmp==KEY_lock /* XXX generalizable kludge */
6220 tmp = 0; /* any sub overrides "weak" keyword */
6222 else { /* no override */
6224 if (tmp == KEY_dump) {
6225 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6226 "dump() better written as CORE::dump()");
6230 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6231 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6232 "Ambiguous call resolved as CORE::%s(), "
6233 "qualify as such or use &",
6241 default: /* not a keyword */
6242 /* Trade off - by using this evil construction we can pull the
6243 variable gv into the block labelled keylookup. If not, then
6244 we have to give it function scope so that the goto from the
6245 earlier ':' case doesn't bypass the initialisation. */
6247 just_a_word_zero_gv:
6255 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6259 SV *nextPL_nextwhite = 0;
6263 /* Get the rest if it looks like a package qualifier */
6265 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6267 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6270 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6271 *s == '\'' ? "'" : "::");
6276 if (PL_expect == XOPERATOR) {
6277 if (PL_bufptr == PL_linestart) {
6278 CopLINE_dec(PL_curcop);
6279 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6280 CopLINE_inc(PL_curcop);
6283 no_op("Bareword",s);
6286 /* Look for a subroutine with this name in current package,
6287 unless name is "Foo::", in which case Foo is a bearword
6288 (and a package name). */
6290 if (len > 2 && !PL_madskills &&
6291 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6293 if (ckWARN(WARN_BAREWORD)
6294 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6295 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6296 "Bareword \"%s\" refers to nonexistent package",
6299 PL_tokenbuf[len] = '\0';
6305 /* Mustn't actually add anything to a symbol table.
6306 But also don't want to "initialise" any placeholder
6307 constants that might already be there into full
6308 blown PVGVs with attached PVCV. */
6309 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6310 GV_NOADD_NOINIT, SVt_PVCV);
6315 /* if we saw a global override before, get the right name */
6317 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6318 len ? len : strlen(PL_tokenbuf));
6320 SV * const tmp_sv = sv;
6321 sv = newSVpvs("CORE::GLOBAL::");
6322 sv_catsv(sv, tmp_sv);
6323 SvREFCNT_dec(tmp_sv);
6327 if (PL_madskills && !PL_thistoken) {
6328 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6329 PL_thistoken = newSVpvn(start,s - start);
6330 PL_realtokenstart = s - SvPVX(PL_linestr);
6334 /* Presume this is going to be a bareword of some sort. */
6336 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6337 pl_yylval.opval->op_private = OPpCONST_BARE;
6339 /* And if "Foo::", then that's what it certainly is. */
6344 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6345 const_op->op_private = OPpCONST_BARE;
6346 rv2cv_op = newCVREF(0, const_op);
6348 cv = rv2cv_op_cv(rv2cv_op, 0);
6350 /* See if it's the indirect object for a list operator. */
6352 if (PL_oldoldbufptr &&
6353 PL_oldoldbufptr < PL_bufptr &&
6354 (PL_oldoldbufptr == PL_last_lop
6355 || PL_oldoldbufptr == PL_last_uni) &&
6356 /* NO SKIPSPACE BEFORE HERE! */
6357 (PL_expect == XREF ||
6358 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6360 bool immediate_paren = *s == '(';
6362 /* (Now we can afford to cross potential line boundary.) */
6363 s = SKIPSPACE2(s,nextPL_nextwhite);
6365 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
6368 /* Two barewords in a row may indicate method call. */
6370 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6371 (tmp = intuit_method(s, gv, cv))) {
6376 /* If not a declared subroutine, it's an indirect object. */
6377 /* (But it's an indir obj regardless for sort.) */
6378 /* Also, if "_" follows a filetest operator, it's a bareword */
6381 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6383 (PL_last_lop_op != OP_MAPSTART &&
6384 PL_last_lop_op != OP_GREPSTART))))
6385 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6386 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6389 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6394 PL_expect = XOPERATOR;
6397 s = SKIPSPACE2(s,nextPL_nextwhite);
6398 PL_nextwhite = nextPL_nextwhite;
6403 /* Is this a word before a => operator? */
6404 if (*s == '=' && s[1] == '>' && !pkgname) {
6407 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6408 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6409 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6413 /* If followed by a paren, it's certainly a subroutine. */
6418 while (SPACE_OR_TAB(*d))
6420 if (*d == ')' && (sv = cv_const_sv(cv))) {
6427 PL_nextwhite = PL_thiswhite;
6430 start_force(PL_curforce);
6432 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6433 PL_expect = XOPERATOR;
6436 PL_nextwhite = nextPL_nextwhite;
6437 curmad('X', PL_thistoken);
6438 PL_thistoken = newSVpvs("");
6447 /* If followed by var or block, call it a method (unless sub) */
6449 if ((*s == '$' || *s == '{') && !cv) {
6451 PL_last_lop = PL_oldbufptr;
6452 PL_last_lop_op = OP_METHOD;
6456 /* If followed by a bareword, see if it looks like indir obj. */
6459 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6460 && (tmp = intuit_method(s, gv, cv))) {
6465 /* Not a method, so call it a subroutine (if defined) */
6468 if (lastchar == '-')
6469 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6470 "Ambiguous use of -%s resolved as -&%s()",
6471 PL_tokenbuf, PL_tokenbuf);
6472 /* Check for a constant sub */
6473 if ((sv = cv_const_sv(cv))) {
6476 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6477 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6478 pl_yylval.opval->op_private = 0;
6482 op_free(pl_yylval.opval);
6483 pl_yylval.opval = rv2cv_op;
6484 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6485 PL_last_lop = PL_oldbufptr;
6486 PL_last_lop_op = OP_ENTERSUB;
6487 /* Is there a prototype? */
6495 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6498 while (*proto == ';')
6503 *proto == '$' || *proto == '_'
6509 *proto == '\\' && proto[1] && proto[2] == '\0'
6513 if (*proto == '\\' && proto[1] == '[') {
6514 const char *p = proto + 2;
6515 while(*p && *p != ']')
6517 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6519 if (*proto == '&' && *s == '{') {
6521 sv_setpvs(PL_subname, "__ANON__");
6523 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6530 PL_nextwhite = PL_thiswhite;
6533 start_force(PL_curforce);
6534 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6537 PL_nextwhite = nextPL_nextwhite;
6538 curmad('X', PL_thistoken);
6539 PL_thistoken = newSVpvs("");
6546 /* Guess harder when madskills require "best effort". */
6547 if (PL_madskills && (!gv || !GvCVu(gv))) {
6548 int probable_sub = 0;
6549 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6551 else if (isALPHA(*s)) {
6555 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6556 if (!keyword(tmpbuf, tmplen, 0))
6559 while (d < PL_bufend && isSPACE(*d))
6561 if (*d == '=' && d[1] == '>')
6566 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6567 op_free(pl_yylval.opval);
6568 pl_yylval.opval = rv2cv_op;
6569 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6570 PL_last_lop = PL_oldbufptr;
6571 PL_last_lop_op = OP_ENTERSUB;
6572 PL_nextwhite = PL_thiswhite;
6574 start_force(PL_curforce);
6575 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6577 PL_nextwhite = nextPL_nextwhite;
6578 curmad('X', PL_thistoken);
6579 PL_thistoken = newSVpvs("");
6584 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6591 /* Call it a bare word */
6593 if (PL_hints & HINT_STRICT_SUBS)
6594 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6597 /* after "print" and similar functions (corresponding to
6598 * "F? L" in opcode.pl), whatever wasn't already parsed as
6599 * a filehandle should be subject to "strict subs".
6600 * Likewise for the optional indirect-object argument to system
6601 * or exec, which can't be a bareword */
6602 if ((PL_last_lop_op == OP_PRINT
6603 || PL_last_lop_op == OP_PRTF
6604 || PL_last_lop_op == OP_SAY
6605 || PL_last_lop_op == OP_SYSTEM
6606 || PL_last_lop_op == OP_EXEC)
6607 && (PL_hints & HINT_STRICT_SUBS))
6608 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6609 if (lastchar != '-') {
6610 if (ckWARN(WARN_RESERVED)) {
6614 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6615 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6623 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6624 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6625 "Operator or semicolon missing before %c%s",
6626 lastchar, PL_tokenbuf);
6627 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6628 "Ambiguous use of %c resolved as operator %c",
6629 lastchar, lastchar);
6635 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6636 newSVpv(CopFILE(PL_curcop),0));
6640 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6641 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6644 case KEY___PACKAGE__:
6645 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6647 ? newSVhek(HvNAME_HEK(PL_curstash))
6654 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6655 const char *pname = "main";
6656 if (PL_tokenbuf[2] == 'D')
6657 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6658 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6662 GvIOp(gv) = newIO();
6663 IoIFP(GvIOp(gv)) = PL_rsfp;
6664 #if defined(HAS_FCNTL) && defined(F_SETFD)
6666 const int fd = PerlIO_fileno(PL_rsfp);
6667 fcntl(fd,F_SETFD,fd >= 3);
6670 /* Mark this internal pseudo-handle as clean */
6671 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6672 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6673 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6675 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6676 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6677 /* if the script was opened in binmode, we need to revert
6678 * it to text mode for compatibility; but only iff it has CRs
6679 * XXX this is a questionable hack at best. */
6680 if (PL_bufend-PL_bufptr > 2
6681 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6684 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6685 loc = PerlIO_tell(PL_rsfp);
6686 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6689 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6691 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6692 #endif /* NETWARE */
6693 #ifdef PERLIO_IS_STDIO /* really? */
6694 # if defined(__BORLANDC__)
6695 /* XXX see note in do_binmode() */
6696 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6700 PerlIO_seek(PL_rsfp, loc, 0);
6704 #ifdef PERLIO_LAYERS
6707 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6708 else if (PL_encoding) {
6715 XPUSHs(PL_encoding);
6717 call_method("name", G_SCALAR);
6721 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6722 Perl_form(aTHX_ ":encoding(%"SVf")",
6731 if (PL_realtokenstart >= 0) {
6732 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6734 PL_endwhite = newSVpvs("");
6735 sv_catsv(PL_endwhite, PL_thiswhite);
6737 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6738 PL_realtokenstart = -1;
6740 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6756 if (PL_expect == XSTATE) {
6763 if (*s == ':' && s[1] == ':') {
6766 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6767 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6768 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6771 else if (tmp == KEY_require || tmp == KEY_do)
6772 /* that's a way to remember we saw "CORE::" */
6785 LOP(OP_ACCEPT,XTERM);
6791 LOP(OP_ATAN2,XTERM);
6797 LOP(OP_BINMODE,XTERM);
6800 LOP(OP_BLESS,XTERM);
6809 /* When 'use switch' is in effect, continue has a dual
6810 life as a control operator. */
6812 if (!FEATURE_IS_ENABLED("switch"))
6815 /* We have to disambiguate the two senses of
6816 "continue". If the next token is a '{' then
6817 treat it as the start of a continue block;
6818 otherwise treat it as a control operator.
6830 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6847 if (!PL_cryptseen) {
6848 PL_cryptseen = TRUE;
6852 LOP(OP_CRYPT,XTERM);
6855 LOP(OP_CHMOD,XTERM);
6858 LOP(OP_CHOWN,XTERM);
6861 LOP(OP_CONNECT,XTERM);
6880 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6881 if (orig_keyword == KEY_do) {
6890 PL_hints |= HINT_BLOCK_SCOPE;
6900 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
6901 STR_WITH_LEN("NDBM_File::"),
6902 STR_WITH_LEN("DB_File::"),
6903 STR_WITH_LEN("GDBM_File::"),
6904 STR_WITH_LEN("SDBM_File::"),
6905 STR_WITH_LEN("ODBM_File::"),
6907 LOP(OP_DBMOPEN,XTERM);
6913 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6920 pl_yylval.ival = CopLINE(PL_curcop);
6936 if (*s == '{') { /* block eval */
6937 PL_expect = XTERMBLOCK;
6938 UNIBRACK(OP_ENTERTRY);
6940 else { /* string eval */
6942 UNIBRACK(OP_ENTEREVAL);
6957 case KEY_endhostent:
6963 case KEY_endservent:
6966 case KEY_endprotoent:
6977 pl_yylval.ival = CopLINE(PL_curcop);
6979 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6982 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6985 if ((PL_bufend - p) >= 3 &&
6986 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6988 else if ((PL_bufend - p) >= 4 &&
6989 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6992 if (isIDFIRST_lazy_if(p,UTF)) {
6993 p = scan_ident(p, PL_bufend,
6994 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6998 Perl_croak(aTHX_ "Missing $ on loop variable");
7000 s = SvPVX(PL_linestr) + soff;
7006 LOP(OP_FORMLINE,XTERM);
7012 LOP(OP_FCNTL,XTERM);
7018 LOP(OP_FLOCK,XTERM);
7027 LOP(OP_GREPSTART, XREF);
7030 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7045 case KEY_getpriority:
7046 LOP(OP_GETPRIORITY,XTERM);
7048 case KEY_getprotobyname:
7051 case KEY_getprotobynumber:
7052 LOP(OP_GPBYNUMBER,XTERM);
7054 case KEY_getprotoent:
7066 case KEY_getpeername:
7067 UNI(OP_GETPEERNAME);
7069 case KEY_gethostbyname:
7072 case KEY_gethostbyaddr:
7073 LOP(OP_GHBYADDR,XTERM);
7075 case KEY_gethostent:
7078 case KEY_getnetbyname:
7081 case KEY_getnetbyaddr:
7082 LOP(OP_GNBYADDR,XTERM);
7087 case KEY_getservbyname:
7088 LOP(OP_GSBYNAME,XTERM);
7090 case KEY_getservbyport:
7091 LOP(OP_GSBYPORT,XTERM);
7093 case KEY_getservent:
7096 case KEY_getsockname:
7097 UNI(OP_GETSOCKNAME);
7099 case KEY_getsockopt:
7100 LOP(OP_GSOCKOPT,XTERM);
7115 pl_yylval.ival = CopLINE(PL_curcop);
7125 pl_yylval.ival = CopLINE(PL_curcop);
7129 LOP(OP_INDEX,XTERM);
7135 LOP(OP_IOCTL,XTERM);
7147 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7179 LOP(OP_LISTEN,XTERM);
7188 s = scan_pat(s,OP_MATCH);
7189 TERM(sublex_start());
7192 LOP(OP_MAPSTART, XREF);
7195 LOP(OP_MKDIR,XTERM);
7198 LOP(OP_MSGCTL,XTERM);
7201 LOP(OP_MSGGET,XTERM);
7204 LOP(OP_MSGRCV,XTERM);
7207 LOP(OP_MSGSND,XTERM);
7212 PL_in_my = (U16)tmp;
7214 if (isIDFIRST_lazy_if(s,UTF)) {
7218 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7219 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7221 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7222 if (!PL_in_my_stash) {
7225 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7229 if (PL_madskills) { /* just add type to declarator token */
7230 sv_catsv(PL_thistoken, PL_nextwhite);
7232 sv_catpvn(PL_thistoken, start, s - start);
7240 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7247 s = tokenize_use(0, s);
7251 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7258 if (isIDFIRST_lazy_if(s,UTF)) {
7260 for (d = s; isALNUM_lazy_if(d,UTF);)
7262 for (t=d; isSPACE(*t);)
7264 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7266 && !(t[0] == '=' && t[1] == '>')
7268 int parms_len = (int)(d-s);
7269 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7270 "Precedence problem: open %.*s should be open(%.*s)",
7271 parms_len, s, parms_len, s);
7277 pl_yylval.ival = OP_OR;
7287 LOP(OP_OPEN_DIR,XTERM);
7290 checkcomma(s,PL_tokenbuf,"filehandle");
7294 checkcomma(s,PL_tokenbuf,"filehandle");
7313 s = force_word(s,WORD,FALSE,TRUE,FALSE);
7315 s = force_strict_version(s);
7316 PL_lex_expect = XBLOCK;
7320 LOP(OP_PIPE_OP,XTERM);
7323 s = scan_str(s,!!PL_madskills,FALSE);
7326 pl_yylval.ival = OP_CONST;
7327 TERM(sublex_start());
7334 s = scan_str(s,!!PL_madskills,FALSE);
7337 PL_expect = XOPERATOR;
7338 if (SvCUR(PL_lex_stuff)) {
7340 d = SvPV_force(PL_lex_stuff, len);
7342 for (; isSPACE(*d) && len; --len, ++d)
7347 if (!warned && ckWARN(WARN_QW)) {
7348 for (; !isSPACE(*d) && len; --len, ++d) {
7350 Perl_warner(aTHX_ packWARN(WARN_QW),
7351 "Possible attempt to separate words with commas");
7354 else if (*d == '#') {
7355 Perl_warner(aTHX_ packWARN(WARN_QW),
7356 "Possible attempt to put comments in qw() list");
7362 for (; !isSPACE(*d) && len; --len, ++d)
7365 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7366 words = op_append_elem(OP_LIST, words,
7367 newSVOP(OP_CONST, 0, tokeq(sv)));
7372 words = newNULLLIST();
7374 SvREFCNT_dec(PL_lex_stuff);
7375 PL_lex_stuff = NULL;
7377 PL_expect = XOPERATOR;
7378 pl_yylval.opval = sawparens(words);
7383 s = scan_str(s,!!PL_madskills,FALSE);
7386 pl_yylval.ival = OP_STRINGIFY;
7387 if (SvIVX(PL_lex_stuff) == '\'')
7388 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
7389 TERM(sublex_start());
7392 s = scan_pat(s,OP_QR);
7393 TERM(sublex_start());
7396 s = scan_str(s,!!PL_madskills,FALSE);
7399 readpipe_override();
7400 TERM(sublex_start());
7408 s = force_version(s, FALSE);
7410 else if (*s != 'v' || !isDIGIT(s[1])
7411 || (s = force_version(s, TRUE), *s == 'v'))
7413 *PL_tokenbuf = '\0';
7414 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7415 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7416 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7418 yyerror("<> should be quotes");
7420 if (orig_keyword == KEY_require) {
7428 PL_last_uni = PL_oldbufptr;
7429 PL_last_lop_op = OP_REQUIRE;
7431 return REPORT( (int)REQUIRE );
7437 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7441 LOP(OP_RENAME,XTERM);
7450 LOP(OP_RINDEX,XTERM);
7459 UNIDOR(OP_READLINE);
7462 UNIDOR(OP_BACKTICK);
7471 LOP(OP_REVERSE,XTERM);
7474 UNIDOR(OP_READLINK);
7481 if (pl_yylval.opval)
7482 TERM(sublex_start());
7484 TOKEN(1); /* force error */
7487 checkcomma(s,PL_tokenbuf,"filehandle");
7497 LOP(OP_SELECT,XTERM);
7503 LOP(OP_SEMCTL,XTERM);
7506 LOP(OP_SEMGET,XTERM);
7509 LOP(OP_SEMOP,XTERM);
7515 LOP(OP_SETPGRP,XTERM);
7517 case KEY_setpriority:
7518 LOP(OP_SETPRIORITY,XTERM);
7520 case KEY_sethostent:
7526 case KEY_setservent:
7529 case KEY_setprotoent:
7539 LOP(OP_SEEKDIR,XTERM);
7541 case KEY_setsockopt:
7542 LOP(OP_SSOCKOPT,XTERM);
7548 LOP(OP_SHMCTL,XTERM);
7551 LOP(OP_SHMGET,XTERM);
7554 LOP(OP_SHMREAD,XTERM);
7557 LOP(OP_SHMWRITE,XTERM);
7560 LOP(OP_SHUTDOWN,XTERM);
7569 LOP(OP_SOCKET,XTERM);
7571 case KEY_socketpair:
7572 LOP(OP_SOCKPAIR,XTERM);
7575 checkcomma(s,PL_tokenbuf,"subroutine name");
7577 if (*s == ';' || *s == ')') /* probably a close */
7578 Perl_croak(aTHX_ "sort is now a reserved word");
7580 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7584 LOP(OP_SPLIT,XTERM);
7587 LOP(OP_SPRINTF,XTERM);
7590 LOP(OP_SPLICE,XTERM);
7605 LOP(OP_SUBSTR,XTERM);
7611 char tmpbuf[sizeof PL_tokenbuf];
7612 SSize_t tboffset = 0;
7613 expectation attrful;
7614 bool have_name, have_proto;
7615 const int key = tmp;
7620 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7621 SV *subtoken = newSVpvn(tstart, s - tstart);
7625 s = SKIPSPACE2(s,tmpwhite);
7630 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7631 (*s == ':' && s[1] == ':'))
7634 SV *nametoke = NULL;
7638 attrful = XATTRBLOCK;
7639 /* remember buffer pos'n for later force_word */
7640 tboffset = s - PL_oldbufptr;
7641 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7644 nametoke = newSVpvn(s, d - s);
7646 if (memchr(tmpbuf, ':', len))
7647 sv_setpvn(PL_subname, tmpbuf, len);
7649 sv_setsv(PL_subname,PL_curstname);
7650 sv_catpvs(PL_subname,"::");
7651 sv_catpvn(PL_subname,tmpbuf,len);
7658 CURMAD('X', nametoke);
7659 CURMAD('_', tmpwhite);
7660 (void) force_word(PL_oldbufptr + tboffset, WORD,
7663 s = SKIPSPACE2(d,tmpwhite);
7670 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7671 PL_expect = XTERMBLOCK;
7672 attrful = XATTRTERM;
7673 sv_setpvs(PL_subname,"?");
7677 if (key == KEY_format) {
7679 PL_lex_formbrack = PL_lex_brackets + 1;
7681 PL_thistoken = subtoken;
7685 (void) force_word(PL_oldbufptr + tboffset, WORD,
7691 /* Look for a prototype */
7694 bool bad_proto = FALSE;
7695 bool in_brackets = FALSE;
7696 char greedy_proto = ' ';
7697 bool proto_after_greedy_proto = FALSE;
7698 bool must_be_last = FALSE;
7699 bool underscore = FALSE;
7700 bool seen_underscore = FALSE;
7701 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7703 s = scan_str(s,!!PL_madskills,FALSE);
7705 Perl_croak(aTHX_ "Prototype not terminated");
7706 /* strip spaces and check for bad characters */
7707 d = SvPVX(PL_lex_stuff);
7709 for (p = d; *p; ++p) {
7713 if (warnillegalproto) {
7715 proto_after_greedy_proto = TRUE;
7716 if (!strchr("$@%*;[]&\\_", *p)) {
7728 else if ( *p == ']' ) {
7729 in_brackets = FALSE;
7731 else if ( (*p == '@' || *p == '%') &&
7732 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7734 must_be_last = TRUE;
7737 else if ( *p == '_' ) {
7738 underscore = seen_underscore = TRUE;
7745 if (proto_after_greedy_proto)
7746 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7747 "Prototype after '%c' for %"SVf" : %s",
7748 greedy_proto, SVfARG(PL_subname), d);
7750 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7751 "Illegal character %sin prototype for %"SVf" : %s",
7752 seen_underscore ? "after '_' " : "",
7753 SVfARG(PL_subname), d);
7754 SvCUR_set(PL_lex_stuff, tmp);
7759 CURMAD('q', PL_thisopen);
7760 CURMAD('_', tmpwhite);
7761 CURMAD('=', PL_thisstuff);
7762 CURMAD('Q', PL_thisclose);
7763 NEXTVAL_NEXTTOKE.opval =
7764 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7765 PL_lex_stuff = NULL;
7768 s = SKIPSPACE2(s,tmpwhite);
7776 if (*s == ':' && s[1] != ':')
7777 PL_expect = attrful;
7778 else if (*s != '{' && key == KEY_sub) {
7780 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7781 else if (*s != ';' && *s != '}')
7782 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7789 curmad('^', newSVpvs(""));
7790 CURMAD('_', tmpwhite);
7794 PL_thistoken = subtoken;
7797 NEXTVAL_NEXTTOKE.opval =
7798 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7799 PL_lex_stuff = NULL;
7805 sv_setpvs(PL_subname, "__ANON__");
7807 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7811 (void) force_word(PL_oldbufptr + tboffset, WORD,
7820 LOP(OP_SYSTEM,XREF);
7823 LOP(OP_SYMLINK,XTERM);
7826 LOP(OP_SYSCALL,XTERM);
7829 LOP(OP_SYSOPEN,XTERM);
7832 LOP(OP_SYSSEEK,XTERM);
7835 LOP(OP_SYSREAD,XTERM);
7838 LOP(OP_SYSWRITE,XTERM);
7842 TERM(sublex_start());
7863 LOP(OP_TRUNCATE,XTERM);
7875 pl_yylval.ival = CopLINE(PL_curcop);
7879 pl_yylval.ival = CopLINE(PL_curcop);
7883 LOP(OP_UNLINK,XTERM);
7889 LOP(OP_UNPACK,XTERM);
7892 LOP(OP_UTIME,XTERM);
7898 LOP(OP_UNSHIFT,XTERM);
7901 s = tokenize_use(1, s);
7911 pl_yylval.ival = CopLINE(PL_curcop);
7915 pl_yylval.ival = CopLINE(PL_curcop);
7919 PL_hints |= HINT_BLOCK_SCOPE;
7926 LOP(OP_WAITPID,XTERM);
7935 ctl_l[0] = toCTRL('L');
7937 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7940 /* Make sure $^L is defined */
7941 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7946 if (PL_expect == XOPERATOR)
7952 pl_yylval.ival = OP_XOR;
7957 TERM(sublex_start());
7962 #pragma segment Main
7966 S_pending_ident(pTHX)
7971 /* pit holds the identifier we read and pending_ident is reset */
7972 char pit = PL_pending_ident;
7973 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7974 /* All routes through this function want to know if there is a colon. */
7975 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7976 PL_pending_ident = 0;
7978 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7979 DEBUG_T({ PerlIO_printf(Perl_debug_log,
7980 "### Pending identifier '%s'\n", PL_tokenbuf); });
7982 /* if we're in a my(), we can't allow dynamics here.
7983 $foo'bar has already been turned into $foo::bar, so
7984 just check for colons.
7986 if it's a legal name, the OP is a PADANY.
7989 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
7991 yyerror(Perl_form(aTHX_ "No package name allowed for "
7992 "variable %s in \"our\"",
7994 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7998 yyerror(Perl_form(aTHX_ PL_no_myglob,
7999 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8001 pl_yylval.opval = newOP(OP_PADANY, 0);
8002 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8008 build the ops for accesses to a my() variable.
8010 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8011 then used in a comparison. This catches most, but not
8012 all cases. For instance, it catches
8013 sort { my($a); $a <=> $b }
8015 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8016 (although why you'd do that is anyone's guess).
8021 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8022 if (tmp != NOT_IN_PAD) {
8023 /* might be an "our" variable" */
8024 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8025 /* build ops for a bareword */
8026 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8027 HEK * const stashname = HvNAME_HEK(stash);
8028 SV * const sym = newSVhek(stashname);
8029 sv_catpvs(sym, "::");
8030 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8031 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8032 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8035 ? (GV_ADDMULTI | GV_ADDINEVAL)
8038 ((PL_tokenbuf[0] == '$') ? SVt_PV
8039 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8044 /* if it's a sort block and they're naming $a or $b */
8045 if (PL_last_lop_op == OP_SORT &&
8046 PL_tokenbuf[0] == '$' &&
8047 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8050 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8051 d < PL_bufend && *d != '\n';
8054 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8055 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8061 pl_yylval.opval = newOP(OP_PADANY, 0);
8062 pl_yylval.opval->op_targ = tmp;
8068 Whine if they've said @foo in a doublequoted string,
8069 and @foo isn't a variable we can find in the symbol
8072 if (ckWARN(WARN_AMBIGUOUS) &&
8073 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8074 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8076 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8077 /* DO NOT warn for @- and @+ */
8078 && !( PL_tokenbuf[2] == '\0' &&
8079 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8082 /* Downgraded from fatal to warning 20000522 mjd */
8083 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8084 "Possible unintended interpolation of %s in string",
8089 /* build ops for a bareword */
8090 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8092 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8093 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8094 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8095 ((PL_tokenbuf[0] == '$') ? SVt_PV
8096 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8102 * The following code was generated by perl_keyword.pl.
8106 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
8110 PERL_ARGS_ASSERT_KEYWORD;
8114 case 1: /* 5 tokens of length 1 */
8146 case 2: /* 18 tokens of length 2 */
8292 case 3: /* 29 tokens of length 3 */
8296 if (name[1] == 'N' &&
8359 if (name[1] == 'i' &&
8391 if (name[1] == 'o' &&
8400 if (name[1] == 'e' &&
8409 if (name[1] == 'n' &&
8418 if (name[1] == 'o' &&
8427 if (name[1] == 'a' &&
8436 if (name[1] == 'o' &&
8498 if (name[1] == 'e' &&
8512 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8538 if (name[1] == 'i' &&
8547 if (name[1] == 's' &&
8556 if (name[1] == 'e' &&
8565 if (name[1] == 'o' &&
8577 case 4: /* 41 tokens of length 4 */
8581 if (name[1] == 'O' &&
8591 if (name[1] == 'N' &&
8601 if (name[1] == 'i' &&
8611 if (name[1] == 'h' &&
8621 if (name[1] == 'u' &&
8634 if (name[2] == 'c' &&
8643 if (name[2] == 's' &&
8652 if (name[2] == 'a' &&
8688 if (name[1] == 'o' &&
8701 if (name[2] == 't' &&
8710 if (name[2] == 'o' &&
8719 if (name[2] == 't' &&
8728 if (name[2] == 'e' &&
8741 if (name[1] == 'o' &&
8754 if (name[2] == 'y' &&
8763 if (name[2] == 'l' &&
8779 if (name[2] == 's' &&
8788 if (name[2] == 'n' &&
8797 if (name[2] == 'c' &&
8810 if (name[1] == 'e' &&
8820 if (name[1] == 'p' &&
8833 if (name[2] == 'c' &&
8842 if (name[2] == 'p' &&
8851 if (name[2] == 's' &&
8867 if (name[2] == 'n' &&
8937 if (name[2] == 'r' &&
8946 if (name[2] == 'r' &&
8955 if (name[2] == 'a' &&
8971 if (name[2] == 'l' &&
9033 if (name[2] == 'e' &&
9036 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
9049 case 5: /* 39 tokens of length 5 */
9053 if (name[1] == 'E' &&
9064 if (name[1] == 'H' &&
9078 if (name[2] == 'a' &&
9088 if (name[2] == 'a' &&
9105 if (name[2] == 'e' &&
9115 if (name[2] == 'e' &&
9119 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
9135 if (name[3] == 'i' &&
9144 if (name[3] == 'o' &&
9180 if (name[2] == 'o' &&
9190 if (name[2] == 'y' &&
9204 if (name[1] == 'l' &&
9218 if (name[2] == 'n' &&
9228 if (name[2] == 'o' &&
9242 if (name[1] == 'i' &&
9247 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
9256 if (name[2] == 'd' &&
9266 if (name[2] == 'c' &&
9283 if (name[2] == 'c' &&
9293 if (name[2] == 't' &&
9307 if (name[1] == 'k' &&
9318 if (name[1] == 'r' &&
9332 if (name[2] == 's' &&
9342 if (name[2] == 'd' &&
9359 if (name[2] == 'm' &&
9369 if (name[2] == 'i' &&
9379 if (name[2] == 'e' &&
9389 if (name[2] == 'l' &&
9399 if (name[2] == 'a' &&
9412 if (name[3] == 't' &&
9415 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9421 if (name[3] == 'd' &&
9438 if (name[1] == 'i' &&
9452 if (name[2] == 'a' &&
9465 if (name[3] == 'e' &&
9500 if (name[2] == 'i' &&
9517 if (name[2] == 'i' &&
9527 if (name[2] == 'i' &&
9544 case 6: /* 33 tokens of length 6 */
9548 if (name[1] == 'c' &&
9563 if (name[2] == 'l' &&
9574 if (name[2] == 'r' &&
9589 if (name[1] == 'e' &&
9604 if (name[2] == 's' &&
9609 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9615 if (name[2] == 'i' &&
9633 if (name[2] == 'l' &&
9644 if (name[2] == 'r' &&
9659 if (name[1] == 'm' &&
9674 if (name[2] == 'n' &&
9685 if (name[2] == 's' &&
9700 if (name[1] == 's' &&
9706 if (name[4] == 't' &&
9715 if (name[4] == 'e' &&
9724 if (name[4] == 'c' &&
9733 if (name[4] == 'n' &&
9749 if (name[1] == 'r' &&
9767 if (name[3] == 'a' &&
9777 if (name[3] == 'u' &&
9791 if (name[2] == 'n' &&
9809 if (name[2] == 'a' &&
9823 if (name[3] == 'e' &&
9836 if (name[4] == 't' &&
9845 if (name[4] == 'e' &&
9867 if (name[4] == 't' &&
9876 if (name[4] == 'e' &&
9892 if (name[2] == 'c' &&
9903 if (name[2] == 'l' &&
9914 if (name[2] == 'b' &&
9925 if (name[2] == 's' &&
9948 if (name[4] == 's' &&
9957 if (name[4] == 'n' &&
9970 if (name[3] == 'a' &&
9987 if (name[1] == 'a' &&
10002 case 7: /* 29 tokens of length 7 */
10006 if (name[1] == 'E' &&
10013 return KEY_DESTROY;
10019 if (name[1] == '_' &&
10026 return KEY___END__;
10032 if (name[1] == 'i' &&
10039 return -KEY_binmode;
10045 if (name[1] == 'o' &&
10052 return -KEY_connect;
10061 if (name[2] == 'm' &&
10067 return -KEY_dbmopen;
10073 if (name[2] == 'f')
10078 if (name[4] == 'u' &&
10082 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
10088 if (name[4] == 'n' &&
10092 return KEY_defined;
10109 if (name[1] == 'o' &&
10116 return KEY_foreach;
10122 if (name[1] == 'e' &&
10129 if (name[5] == 'r' &&
10132 return -KEY_getpgrp;
10138 if (name[5] == 'i' &&
10141 return -KEY_getppid;
10154 if (name[1] == 'c' &&
10161 return -KEY_lcfirst;
10167 if (name[1] == 'p' &&
10174 return -KEY_opendir;
10180 if (name[1] == 'a' &&
10187 return KEY_package;
10193 if (name[1] == 'e')
10198 if (name[3] == 'd' &&
10203 return -KEY_readdir;
10209 if (name[3] == 'u' &&
10214 return KEY_require;
10220 if (name[3] == 'e' &&
10225 return -KEY_reverse;
10244 if (name[3] == 'k' &&
10249 return -KEY_seekdir;
10255 if (name[3] == 'p' &&
10260 return -KEY_setpgrp;
10270 if (name[2] == 'm' &&
10276 return -KEY_shmread;
10282 if (name[2] == 'r' &&
10288 return -KEY_sprintf;
10297 if (name[3] == 'l' &&
10302 return -KEY_symlink;
10311 if (name[4] == 'a' &&
10315 return -KEY_syscall;
10321 if (name[4] == 'p' &&
10325 return -KEY_sysopen;
10331 if (name[4] == 'e' &&
10335 return -KEY_sysread;
10341 if (name[4] == 'e' &&
10345 return -KEY_sysseek;
10363 if (name[1] == 'e' &&
10370 return -KEY_telldir;
10379 if (name[2] == 'f' &&
10385 return -KEY_ucfirst;
10391 if (name[2] == 's' &&
10397 return -KEY_unshift;
10407 if (name[1] == 'a' &&
10414 return -KEY_waitpid;
10423 case 8: /* 26 tokens of length 8 */
10427 if (name[1] == 'U' &&
10435 return KEY_AUTOLOAD;
10441 if (name[1] == '_')
10446 if (name[3] == 'A' &&
10452 return KEY___DATA__;
10458 if (name[3] == 'I' &&
10464 return -KEY___FILE__;
10470 if (name[3] == 'I' &&
10476 return -KEY___LINE__;
10492 if (name[2] == 'o' &&
10499 return -KEY_closedir;
10505 if (name[2] == 'n' &&
10512 return -KEY_continue;
10522 if (name[1] == 'b' &&
10530 return -KEY_dbmclose;
10536 if (name[1] == 'n' &&
10542 if (name[4] == 'r' &&
10547 return -KEY_endgrent;
10553 if (name[4] == 'w' &&
10558 return -KEY_endpwent;
10571 if (name[1] == 'o' &&
10579 return -KEY_formline;
10585 if (name[1] == 'e' &&
10591 if (name[4] == 'r')
10596 if (name[6] == 'n' &&
10599 return -KEY_getgrent;
10605 if (name[6] == 'i' &&
10608 return -KEY_getgrgid;
10614 if (name[6] == 'a' &&
10617 return -KEY_getgrnam;
10630 if (name[4] == 'o' &&
10635 return -KEY_getlogin;
10641 if (name[4] == 'w')
10646 if (name[6] == 'n' &&
10649 return -KEY_getpwent;
10655 if (name[6] == 'a' &&
10658 return -KEY_getpwnam;
10664 if (name[6] == 'i' &&
10667 return -KEY_getpwuid;
10687 if (name[1] == 'e' &&
10694 if (name[5] == 'i' &&
10701 return -KEY_readline;
10706 return -KEY_readlink;
10717 if (name[5] == 'i' &&
10721 return -KEY_readpipe;
10737 if (name[2] == 't')
10742 if (name[4] == 'r' &&
10747 return -KEY_setgrent;
10753 if (name[4] == 'w' &&
10758 return -KEY_setpwent;
10774 if (name[3] == 'w' &&
10780 return -KEY_shmwrite;
10786 if (name[3] == 't' &&
10792 return -KEY_shutdown;
10802 if (name[2] == 's' &&
10809 return -KEY_syswrite;
10819 if (name[1] == 'r' &&
10827 return -KEY_truncate;
10836 case 9: /* 9 tokens of length 9 */
10840 if (name[1] == 'N' &&
10849 return KEY_UNITCHECK;
10855 if (name[1] == 'n' &&
10864 return -KEY_endnetent;
10870 if (name[1] == 'e' &&
10879 return -KEY_getnetent;
10885 if (name[1] == 'o' &&
10894 return -KEY_localtime;
10900 if (name[1] == 'r' &&
10909 return KEY_prototype;
10915 if (name[1] == 'u' &&
10924 return -KEY_quotemeta;
10930 if (name[1] == 'e' &&
10939 return -KEY_rewinddir;
10945 if (name[1] == 'e' &&
10954 return -KEY_setnetent;
10960 if (name[1] == 'a' &&
10969 return -KEY_wantarray;
10978 case 10: /* 9 tokens of length 10 */
10982 if (name[1] == 'n' &&
10988 if (name[4] == 'o' &&
10995 return -KEY_endhostent;
11001 if (name[4] == 'e' &&
11008 return -KEY_endservent;
11021 if (name[1] == 'e' &&
11027 if (name[4] == 'o' &&
11034 return -KEY_gethostent;
11043 if (name[5] == 'r' &&
11049 return -KEY_getservent;
11055 if (name[5] == 'c' &&
11061 return -KEY_getsockopt;
11081 if (name[2] == 't')
11086 if (name[4] == 'o' &&
11093 return -KEY_sethostent;
11102 if (name[5] == 'r' &&
11108 return -KEY_setservent;
11114 if (name[5] == 'c' &&
11120 return -KEY_setsockopt;
11137 if (name[2] == 'c' &&
11146 return -KEY_socketpair;
11159 case 11: /* 8 tokens of length 11 */
11163 if (name[1] == '_' &&
11173 { /* __PACKAGE__ */
11174 return -KEY___PACKAGE__;
11180 if (name[1] == 'n' &&
11190 { /* endprotoent */
11191 return -KEY_endprotoent;
11197 if (name[1] == 'e' &&
11206 if (name[5] == 'e' &&
11212 { /* getpeername */
11213 return -KEY_getpeername;
11222 if (name[6] == 'o' &&
11227 { /* getpriority */
11228 return -KEY_getpriority;
11234 if (name[6] == 't' &&
11239 { /* getprotoent */
11240 return -KEY_getprotoent;
11254 if (name[4] == 'o' &&
11261 { /* getsockname */
11262 return -KEY_getsockname;
11275 if (name[1] == 'e' &&
11283 if (name[6] == 'o' &&
11288 { /* setpriority */
11289 return -KEY_setpriority;
11295 if (name[6] == 't' &&
11300 { /* setprotoent */
11301 return -KEY_setprotoent;
11317 case 12: /* 2 tokens of length 12 */
11318 if (name[0] == 'g' &&
11330 if (name[9] == 'd' &&
11333 { /* getnetbyaddr */
11334 return -KEY_getnetbyaddr;
11340 if (name[9] == 'a' &&
11343 { /* getnetbyname */
11344 return -KEY_getnetbyname;
11356 case 13: /* 4 tokens of length 13 */
11357 if (name[0] == 'g' &&
11364 if (name[4] == 'o' &&
11373 if (name[10] == 'd' &&
11376 { /* gethostbyaddr */
11377 return -KEY_gethostbyaddr;
11383 if (name[10] == 'a' &&
11386 { /* gethostbyname */
11387 return -KEY_gethostbyname;
11400 if (name[4] == 'e' &&
11409 if (name[10] == 'a' &&
11412 { /* getservbyname */
11413 return -KEY_getservbyname;
11419 if (name[10] == 'o' &&
11422 { /* getservbyport */
11423 return -KEY_getservbyport;
11442 case 14: /* 1 tokens of length 14 */
11443 if (name[0] == 'g' &&
11457 { /* getprotobyname */
11458 return -KEY_getprotobyname;
11463 case 16: /* 1 tokens of length 16 */
11464 if (name[0] == 'g' &&
11480 { /* getprotobynumber */
11481 return -KEY_getprotobynumber;
11495 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11499 PERL_ARGS_ASSERT_CHECKCOMMA;
11501 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
11502 if (ckWARN(WARN_SYNTAX)) {
11505 for (w = s+2; *w && level; w++) {
11508 else if (*w == ')')
11511 while (isSPACE(*w))
11513 /* the list of chars below is for end of statements or
11514 * block / parens, boolean operators (&&, ||, //) and branch
11515 * constructs (or, and, if, until, unless, while, err, for).
11516 * Not a very solid hack... */
11517 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11518 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11519 "%s (...) interpreted as function",name);
11522 while (s < PL_bufend && isSPACE(*s))
11526 while (s < PL_bufend && isSPACE(*s))
11528 if (isIDFIRST_lazy_if(s,UTF)) {
11529 const char * const w = s++;
11530 while (isALNUM_lazy_if(s,UTF))
11532 while (s < PL_bufend && isSPACE(*s))
11536 if (keyword(w, s - w, 0))
11539 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11540 if (gv && GvCVu(gv))
11542 Perl_croak(aTHX_ "No comma allowed after %s", what);
11547 /* Either returns sv, or mortalizes sv and returns a new SV*.
11548 Best used as sv=new_constant(..., sv, ...).
11549 If s, pv are NULL, calls subroutine with one argument,
11550 and type is used with error messages only. */
11553 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11554 SV *sv, SV *pv, const char *type, STRLEN typelen)
11557 HV * const table = GvHV(PL_hintgv); /* ^H */
11561 const char *why1 = "", *why2 = "", *why3 = "";
11563 PERL_ARGS_ASSERT_NEW_CONSTANT;
11565 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11568 why2 = (const char *)
11569 (strEQ(key,"charnames")
11570 ? "(possibly a missing \"use charnames ...\")"
11572 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11573 (type ? type: "undef"), why2);
11575 /* This is convoluted and evil ("goto considered harmful")
11576 * but I do not understand the intricacies of all the different
11577 * failure modes of %^H in here. The goal here is to make
11578 * the most probable error message user-friendly. --jhi */
11583 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11584 (type ? type: "undef"), why1, why2, why3);
11586 yyerror(SvPVX_const(msg));
11591 /* charnames doesn't work well if there have been errors found */
11592 if (PL_error_count > 0 && strEQ(key,"charnames"))
11593 return &PL_sv_undef;
11595 cvp = hv_fetch(table, key, keylen, FALSE);
11596 if (!cvp || !SvOK(*cvp)) {
11599 why3 = "} is not defined";
11602 sv_2mortal(sv); /* Parent created it permanently */
11605 pv = newSVpvn_flags(s, len, SVs_TEMP);
11607 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11609 typesv = &PL_sv_undef;
11611 PUSHSTACKi(PERLSI_OVERLOAD);
11623 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11627 /* Check the eval first */
11628 if (!PL_in_eval && SvTRUE(ERRSV)) {
11629 sv_catpvs(ERRSV, "Propagated");
11630 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11632 res = SvREFCNT_inc_simple(sv);
11636 SvREFCNT_inc_simple_void(res);
11645 why1 = "Call to &{$^H{";
11647 why3 = "}} did not return a defined value";
11655 /* Returns a NUL terminated string, with the length of the string written to
11659 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11662 register char *d = dest;
11663 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
11665 PERL_ARGS_ASSERT_SCAN_WORD;
11669 Perl_croak(aTHX_ ident_too_long);
11670 if (isALNUM(*s)) /* UTF handled below */
11672 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11677 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11681 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11682 char *t = s + UTF8SKIP(s);
11684 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11688 Perl_croak(aTHX_ ident_too_long);
11689 Copy(s, d, len, char);
11702 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11705 char *bracket = NULL;
11707 register char *d = dest;
11708 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
11710 PERL_ARGS_ASSERT_SCAN_IDENT;
11715 while (isDIGIT(*s)) {
11717 Perl_croak(aTHX_ ident_too_long);
11724 Perl_croak(aTHX_ ident_too_long);
11725 if (isALNUM(*s)) /* UTF handled below */
11727 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11732 else if (*s == ':' && s[1] == ':') {
11736 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11737 char *t = s + UTF8SKIP(s);
11738 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11740 if (d + (t - s) > e)
11741 Perl_croak(aTHX_ ident_too_long);
11742 Copy(s, d, t - s, char);
11753 if (PL_lex_state != LEX_NORMAL)
11754 PL_lex_state = LEX_INTERPENDMAYBE;
11757 if (*s == '$' && s[1] &&
11758 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11771 if (*d == '^' && *s && isCONTROLVAR(*s)) {
11776 if (isSPACE(s[-1])) {
11778 const char ch = *s++;
11779 if (!SPACE_OR_TAB(ch)) {
11785 if (isIDFIRST_lazy_if(d,UTF)) {
11789 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11790 end += UTF8SKIP(end);
11791 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11792 end += UTF8SKIP(end);
11794 Copy(s, d, end - s, char);
11799 while ((isALNUM(*s) || *s == ':') && d < e)
11802 Perl_croak(aTHX_ ident_too_long);
11805 while (s < send && SPACE_OR_TAB(*s))
11807 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11808 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11809 const char * const brack =
11811 ((*s == '[') ? "[...]" : "{...}");
11812 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11813 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11814 funny, dest, brack, funny, dest, brack);
11817 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11821 /* Handle extended ${^Foo} variables
11822 * 1999-02-27 mjd-perl-patch@plover.com */
11823 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11827 while (isALNUM(*s) && d < e) {
11831 Perl_croak(aTHX_ ident_too_long);
11836 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11837 PL_lex_state = LEX_INTERPEND;
11840 if (PL_lex_state == LEX_NORMAL) {
11841 if (ckWARN(WARN_AMBIGUOUS) &&
11842 (keyword(dest, d - dest, 0)
11843 || get_cvn_flags(dest, d - dest, 0)))
11847 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11848 "Ambiguous use of %c{%s} resolved to %c%s",
11849 funny, dest, funny, dest);
11854 s = bracket; /* let the parser handle it */
11858 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11859 PL_lex_state = LEX_INTERPEND;
11864 S_pmflag(U32 pmfl, const char ch) {
11866 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11867 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11868 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11869 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11870 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11871 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
11877 S_scan_pat(pTHX_ char *start, I32 type)
11881 char *s = scan_str(start,!!PL_madskills,FALSE);
11882 const char * const valid_flags =
11883 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11888 PERL_ARGS_ASSERT_SCAN_PAT;
11891 const char * const delimiter = skipspace(start);
11895 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11896 : "Search pattern not terminated" ));
11899 pm = (PMOP*)newPMOP(type, 0);
11900 if (PL_multi_open == '?') {
11901 /* This is the only point in the code that sets PMf_ONCE: */
11902 pm->op_pmflags |= PMf_ONCE;
11904 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11905 allows us to restrict the list needed by reset to just the ??
11907 assert(type != OP_TRANS);
11909 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11912 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11915 elements = mg->mg_len / sizeof(PMOP**);
11916 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11917 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11918 mg->mg_len = elements * sizeof(PMOP**);
11919 PmopSTASH_set(pm,PL_curstash);
11925 while (*s && strchr(valid_flags, *s))
11926 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11929 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11930 "Having no space between pattern and following word is deprecated");
11934 if (PL_madskills && modstart != s) {
11935 SV* tmptoken = newSVpvn(modstart, s - modstart);
11936 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11939 /* issue a warning if /c is specified,but /g is not */
11940 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11942 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11943 "Use of /c modifier is meaningless without /g" );
11946 PL_lex_op = (OP*)pm;
11947 pl_yylval.ival = OP_MATCH;
11952 S_scan_subst(pTHX_ char *start)
11963 PERL_ARGS_ASSERT_SCAN_SUBST;
11965 pl_yylval.ival = OP_NULL;
11967 s = scan_str(start,!!PL_madskills,FALSE);
11970 Perl_croak(aTHX_ "Substitution pattern not terminated");
11972 if (s[-1] == PL_multi_open)
11975 if (PL_madskills) {
11976 CURMAD('q', PL_thisopen);
11977 CURMAD('_', PL_thiswhite);
11978 CURMAD('E', PL_thisstuff);
11979 CURMAD('Q', PL_thisclose);
11980 PL_realtokenstart = s - SvPVX(PL_linestr);
11984 first_start = PL_multi_start;
11985 s = scan_str(s,!!PL_madskills,FALSE);
11987 if (PL_lex_stuff) {
11988 SvREFCNT_dec(PL_lex_stuff);
11989 PL_lex_stuff = NULL;
11991 Perl_croak(aTHX_ "Substitution replacement not terminated");
11993 PL_multi_start = first_start; /* so whole substitution is taken together */
11995 pm = (PMOP*)newPMOP(OP_SUBST, 0);
11998 if (PL_madskills) {
11999 CURMAD('z', PL_thisopen);
12000 CURMAD('R', PL_thisstuff);
12001 CURMAD('Z', PL_thisclose);
12007 if (*s == EXEC_PAT_MOD) {
12011 else if (strchr(S_PAT_MODS, *s))
12012 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
12015 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12016 "Having no space between pattern and following word is deprecated");
12024 if (PL_madskills) {
12026 curmad('m', newSVpvn(modstart, s - modstart));
12027 append_madprops(PL_thismad, (OP*)pm, 0);
12031 if ((pm->op_pmflags & PMf_CONTINUE)) {
12032 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
12036 SV * const repl = newSVpvs("");
12038 PL_sublex_info.super_bufptr = s;
12039 PL_sublex_info.super_bufend = PL_bufend;
12041 pm->op_pmflags |= PMf_EVAL;
12044 sv_catpvs(repl, "eval ");
12046 sv_catpvs(repl, "do ");
12048 sv_catpvs(repl, "{");
12049 sv_catsv(repl, PL_lex_repl);
12050 if (strchr(SvPVX(PL_lex_repl), '#'))
12051 sv_catpvs(repl, "\n");
12052 sv_catpvs(repl, "}");
12054 SvREFCNT_dec(PL_lex_repl);
12055 PL_lex_repl = repl;
12058 PL_lex_op = (OP*)pm;
12059 pl_yylval.ival = OP_SUBST;
12064 S_scan_trans(pTHX_ char *start)
12077 PERL_ARGS_ASSERT_SCAN_TRANS;
12079 pl_yylval.ival = OP_NULL;
12081 s = scan_str(start,!!PL_madskills,FALSE);
12083 Perl_croak(aTHX_ "Transliteration pattern not terminated");
12085 if (s[-1] == PL_multi_open)
12088 if (PL_madskills) {
12089 CURMAD('q', PL_thisopen);
12090 CURMAD('_', PL_thiswhite);
12091 CURMAD('E', PL_thisstuff);
12092 CURMAD('Q', PL_thisclose);
12093 PL_realtokenstart = s - SvPVX(PL_linestr);
12097 s = scan_str(s,!!PL_madskills,FALSE);
12099 if (PL_lex_stuff) {
12100 SvREFCNT_dec(PL_lex_stuff);
12101 PL_lex_stuff = NULL;
12103 Perl_croak(aTHX_ "Transliteration replacement not terminated");
12105 if (PL_madskills) {
12106 CURMAD('z', PL_thisopen);
12107 CURMAD('R', PL_thisstuff);
12108 CURMAD('Z', PL_thisclose);
12111 complement = del = squash = 0;
12118 complement = OPpTRANS_COMPLEMENT;
12121 del = OPpTRANS_DELETE;
12124 squash = OPpTRANS_SQUASH;
12133 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
12134 o = newPVOP(OP_TRANS, 0, (char*)tbl);
12135 o->op_private &= ~OPpTRANS_ALL;
12136 o->op_private |= del|squash|complement|
12137 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12138 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
12141 pl_yylval.ival = OP_TRANS;
12144 if (PL_madskills) {
12146 curmad('m', newSVpvn(modstart, s - modstart));
12147 append_madprops(PL_thismad, o, 0);
12156 S_scan_heredoc(pTHX_ register char *s)
12160 I32 op_type = OP_SCALAR;
12164 const char *found_newline;
12168 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
12170 I32 stuffstart = s - SvPVX(PL_linestr);
12173 PL_realtokenstart = -1;
12176 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12180 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
12184 while (SPACE_OR_TAB(*peek))
12186 if (*peek == '`' || *peek == '\'' || *peek =='"') {
12189 s = delimcpy(d, e, s, PL_bufend, term, &len);
12199 if (!isALNUM_lazy_if(s,UTF))
12200 deprecate("bare << to mean <<\"\"");
12201 for (; isALNUM_lazy_if(s,UTF); s++) {
12206 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
12207 Perl_croak(aTHX_ "Delimiter for here document is too long");
12210 len = d - PL_tokenbuf;
12213 if (PL_madskills) {
12214 tstart = PL_tokenbuf + !outer;
12215 PL_thisclose = newSVpvn(tstart, len - !outer);
12216 tstart = SvPVX(PL_linestr) + stuffstart;
12217 PL_thisopen = newSVpvn(tstart, s - tstart);
12218 stuffstart = s - SvPVX(PL_linestr);
12221 #ifndef PERL_STRICT_CR
12222 d = strchr(s, '\r');
12224 char * const olds = s;
12226 while (s < PL_bufend) {
12232 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12241 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12248 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
12249 herewas = newSVpvn(s,PL_bufend-s);
12253 herewas = newSVpvn(s-1,found_newline-s+1);
12256 herewas = newSVpvn(s,found_newline-s);
12260 if (PL_madskills) {
12261 tstart = SvPVX(PL_linestr) + stuffstart;
12263 sv_catpvn(PL_thisstuff, tstart, s - tstart);
12265 PL_thisstuff = newSVpvn(tstart, s - tstart);
12268 s += SvCUR(herewas);
12271 stuffstart = s - SvPVX(PL_linestr);
12277 tmpstr = newSV_type(SVt_PVIV);
12278 SvGROW(tmpstr, 80);
12279 if (term == '\'') {
12280 op_type = OP_CONST;
12281 SvIV_set(tmpstr, -1);
12283 else if (term == '`') {
12284 op_type = OP_BACKTICK;
12285 SvIV_set(tmpstr, '\\');
12289 PL_multi_start = CopLINE(PL_curcop);
12290 PL_multi_open = PL_multi_close = '<';
12291 term = *PL_tokenbuf;
12292 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
12293 char * const bufptr = PL_sublex_info.super_bufptr;
12294 char * const bufend = PL_sublex_info.super_bufend;
12295 char * const olds = s - SvCUR(herewas);
12296 s = strchr(bufptr, '\n');
12300 while (s < bufend &&
12301 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12303 CopLINE_inc(PL_curcop);
12306 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12307 missingterm(PL_tokenbuf);
12309 sv_setpvn(herewas,bufptr,d-bufptr+1);
12310 sv_setpvn(tmpstr,d+1,s-d);
12312 sv_catpvn(herewas,s,bufend-s);
12313 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
12320 while (s < PL_bufend &&
12321 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12323 CopLINE_inc(PL_curcop);
12325 if (s >= PL_bufend) {
12326 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12327 missingterm(PL_tokenbuf);
12329 sv_setpvn(tmpstr,d+1,s-d);
12331 if (PL_madskills) {
12333 sv_catpvn(PL_thisstuff, d + 1, s - d);
12335 PL_thisstuff = newSVpvn(d + 1, s - d);
12336 stuffstart = s - SvPVX(PL_linestr);
12340 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12342 sv_catpvn(herewas,s,PL_bufend-s);
12343 sv_setsv(PL_linestr,herewas);
12344 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12345 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12346 PL_last_lop = PL_last_uni = NULL;
12349 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
12350 while (s >= PL_bufend) { /* multiple line string? */
12352 if (PL_madskills) {
12353 tstart = SvPVX(PL_linestr) + stuffstart;
12355 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12357 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12361 CopLINE_inc(PL_curcop);
12362 if (!outer || !lex_next_chunk(0)) {
12363 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12364 missingterm(PL_tokenbuf);
12366 CopLINE_dec(PL_curcop);
12369 stuffstart = s - SvPVX(PL_linestr);
12371 CopLINE_inc(PL_curcop);
12372 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12373 PL_last_lop = PL_last_uni = NULL;
12374 #ifndef PERL_STRICT_CR
12375 if (PL_bufend - PL_linestart >= 2) {
12376 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12377 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12379 PL_bufend[-2] = '\n';
12381 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12383 else if (PL_bufend[-1] == '\r')
12384 PL_bufend[-1] = '\n';
12386 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12387 PL_bufend[-1] = '\n';
12389 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12390 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12391 *(SvPVX(PL_linestr) + off ) = ' ';
12392 sv_catsv(PL_linestr,herewas);
12393 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12394 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12398 sv_catsv(tmpstr,PL_linestr);
12403 PL_multi_end = CopLINE(PL_curcop);
12404 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12405 SvPV_shrink_to_cur(tmpstr);
12407 SvREFCNT_dec(herewas);
12409 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12411 else if (PL_encoding)
12412 sv_recode_to_utf8(tmpstr, PL_encoding);
12414 PL_lex_stuff = tmpstr;
12415 pl_yylval.ival = op_type;
12419 /* scan_inputsymbol
12420 takes: current position in input buffer
12421 returns: new position in input buffer
12422 side-effects: pl_yylval and lex_op are set.
12427 <FH> read from filehandle
12428 <pkg::FH> read from package qualified filehandle
12429 <pkg'FH> read from package qualified filehandle
12430 <$fh> read from filehandle in $fh
12431 <*.h> filename glob
12436 S_scan_inputsymbol(pTHX_ char *start)
12439 register char *s = start; /* current position in buffer */
12442 char *d = PL_tokenbuf; /* start of temp holding space */
12443 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12445 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12447 end = strchr(s, '\n');
12450 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
12452 /* die if we didn't have space for the contents of the <>,
12453 or if it didn't end, or if we see a newline
12456 if (len >= (I32)sizeof PL_tokenbuf)
12457 Perl_croak(aTHX_ "Excessively long <> operator");
12459 Perl_croak(aTHX_ "Unterminated <> operator");
12464 Remember, only scalar variables are interpreted as filehandles by
12465 this code. Anything more complex (e.g., <$fh{$num}>) will be
12466 treated as a glob() call.
12467 This code makes use of the fact that except for the $ at the front,
12468 a scalar variable and a filehandle look the same.
12470 if (*d == '$' && d[1]) d++;
12472 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12473 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12476 /* If we've tried to read what we allow filehandles to look like, and
12477 there's still text left, then it must be a glob() and not a getline.
12478 Use scan_str to pull out the stuff between the <> and treat it
12479 as nothing more than a string.
12482 if (d - PL_tokenbuf != len) {
12483 pl_yylval.ival = OP_GLOB;
12484 s = scan_str(start,!!PL_madskills,FALSE);
12486 Perl_croak(aTHX_ "Glob not terminated");
12490 bool readline_overriden = FALSE;
12493 /* we're in a filehandle read situation */
12496 /* turn <> into <ARGV> */
12498 Copy("ARGV",d,5,char);
12500 /* Check whether readline() is overriden */
12501 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12503 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12505 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12506 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12507 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12508 readline_overriden = TRUE;
12510 /* if <$fh>, create the ops to turn the variable into a
12514 /* try to find it in the pad for this block, otherwise find
12515 add symbol table ops
12517 const PADOFFSET tmp = pad_findmy(d, len, 0);
12518 if (tmp != NOT_IN_PAD) {
12519 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12520 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12521 HEK * const stashname = HvNAME_HEK(stash);
12522 SV * const sym = sv_2mortal(newSVhek(stashname));
12523 sv_catpvs(sym, "::");
12524 sv_catpv(sym, d+1);
12529 OP * const o = newOP(OP_PADSV, 0);
12531 PL_lex_op = readline_overriden
12532 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12533 op_append_elem(OP_LIST, o,
12534 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12535 : (OP*)newUNOP(OP_READLINE, 0, o);
12544 ? (GV_ADDMULTI | GV_ADDINEVAL)
12547 PL_lex_op = readline_overriden
12548 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12549 op_append_elem(OP_LIST,
12550 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12551 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12552 : (OP*)newUNOP(OP_READLINE, 0,
12553 newUNOP(OP_RV2SV, 0,
12554 newGVOP(OP_GV, 0, gv)));
12556 if (!readline_overriden)
12557 PL_lex_op->op_flags |= OPf_SPECIAL;
12558 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12559 pl_yylval.ival = OP_NULL;
12562 /* If it's none of the above, it must be a literal filehandle
12563 (<Foo::BAR> or <FOO>) so build a simple readline OP */
12565 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12566 PL_lex_op = readline_overriden
12567 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12568 op_append_elem(OP_LIST,
12569 newGVOP(OP_GV, 0, gv),
12570 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12571 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12572 pl_yylval.ival = OP_NULL;
12581 takes: start position in buffer
12582 keep_quoted preserve \ on the embedded delimiter(s)
12583 keep_delims preserve the delimiters around the string
12584 returns: position to continue reading from buffer
12585 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12586 updates the read buffer.
12588 This subroutine pulls a string out of the input. It is called for:
12589 q single quotes q(literal text)
12590 ' single quotes 'literal text'
12591 qq double quotes qq(interpolate $here please)
12592 " double quotes "interpolate $here please"
12593 qx backticks qx(/bin/ls -l)
12594 ` backticks `/bin/ls -l`
12595 qw quote words @EXPORT_OK = qw( func() $spam )
12596 m// regexp match m/this/
12597 s/// regexp substitute s/this/that/
12598 tr/// string transliterate tr/this/that/
12599 y/// string transliterate y/this/that/
12600 ($*@) sub prototypes sub foo ($)
12601 (stuff) sub attr parameters sub foo : attr(stuff)
12602 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12604 In most of these cases (all but <>, patterns and transliterate)
12605 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12606 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12607 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12610 It skips whitespace before the string starts, and treats the first
12611 character as the delimiter. If the delimiter is one of ([{< then
12612 the corresponding "close" character )]}> is used as the closing
12613 delimiter. It allows quoting of delimiters, and if the string has
12614 balanced delimiters ([{<>}]) it allows nesting.
12616 On success, the SV with the resulting string is put into lex_stuff or,
12617 if that is already non-NULL, into lex_repl. The second case occurs only
12618 when parsing the RHS of the special constructs s/// and tr/// (y///).
12619 For convenience, the terminating delimiter character is stuffed into
12624 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12627 SV *sv; /* scalar value: string */
12628 const char *tmps; /* temp string, used for delimiter matching */
12629 register char *s = start; /* current position in the buffer */
12630 register char term; /* terminating character */
12631 register char *to; /* current position in the sv's data */
12632 I32 brackets = 1; /* bracket nesting level */
12633 bool has_utf8 = FALSE; /* is there any utf8 content? */
12634 I32 termcode; /* terminating char. code */
12635 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
12636 STRLEN termlen; /* length of terminating string */
12637 int last_off = 0; /* last position for nesting bracket */
12643 PERL_ARGS_ASSERT_SCAN_STR;
12645 /* skip space before the delimiter */
12651 if (PL_realtokenstart >= 0) {
12652 stuffstart = PL_realtokenstart;
12653 PL_realtokenstart = -1;
12656 stuffstart = start - SvPVX(PL_linestr);
12658 /* mark where we are, in case we need to report errors */
12661 /* after skipping whitespace, the next character is the terminator */
12664 termcode = termstr[0] = term;
12668 termcode = utf8_to_uvchr((U8*)s, &termlen);
12669 Copy(s, termstr, termlen, U8);
12670 if (!UTF8_IS_INVARIANT(term))
12674 /* mark where we are */
12675 PL_multi_start = CopLINE(PL_curcop);
12676 PL_multi_open = term;
12678 /* find corresponding closing delimiter */
12679 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12680 termcode = termstr[0] = term = tmps[5];
12682 PL_multi_close = term;
12684 /* create a new SV to hold the contents. 79 is the SV's initial length.
12685 What a random number. */
12686 sv = newSV_type(SVt_PVIV);
12688 SvIV_set(sv, termcode);
12689 (void)SvPOK_only(sv); /* validate pointer */
12691 /* move past delimiter and try to read a complete string */
12693 sv_catpvn(sv, s, termlen);
12696 tstart = SvPVX(PL_linestr) + stuffstart;
12697 if (!PL_thisopen && !keep_delims) {
12698 PL_thisopen = newSVpvn(tstart, s - tstart);
12699 stuffstart = s - SvPVX(PL_linestr);
12703 if (PL_encoding && !UTF) {
12707 int offset = s - SvPVX_const(PL_linestr);
12708 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12709 &offset, (char*)termstr, termlen);
12710 const char * const ns = SvPVX_const(PL_linestr) + offset;
12711 char * const svlast = SvEND(sv) - 1;
12713 for (; s < ns; s++) {
12714 if (*s == '\n' && !PL_rsfp)
12715 CopLINE_inc(PL_curcop);
12718 goto read_more_line;
12720 /* handle quoted delimiters */
12721 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12723 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12725 if ((svlast-1 - t) % 2) {
12726 if (!keep_quoted) {
12727 *(svlast-1) = term;
12729 SvCUR_set(sv, SvCUR(sv) - 1);
12734 if (PL_multi_open == PL_multi_close) {
12740 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12741 /* At here, all closes are "was quoted" one,
12742 so we don't check PL_multi_close. */
12744 if (!keep_quoted && *(t+1) == PL_multi_open)
12749 else if (*t == PL_multi_open)
12757 SvCUR_set(sv, w - SvPVX_const(sv));
12759 last_off = w - SvPVX(sv);
12760 if (--brackets <= 0)
12765 if (!keep_delims) {
12766 SvCUR_set(sv, SvCUR(sv) - 1);
12772 /* extend sv if need be */
12773 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12774 /* set 'to' to the next character in the sv's string */
12775 to = SvPVX(sv)+SvCUR(sv);
12777 /* if open delimiter is the close delimiter read unbridle */
12778 if (PL_multi_open == PL_multi_close) {
12779 for (; s < PL_bufend; s++,to++) {
12780 /* embedded newlines increment the current line number */
12781 if (*s == '\n' && !PL_rsfp)
12782 CopLINE_inc(PL_curcop);
12783 /* handle quoted delimiters */
12784 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12785 if (!keep_quoted && s[1] == term)
12787 /* any other quotes are simply copied straight through */
12791 /* terminate when run out of buffer (the for() condition), or
12792 have found the terminator */
12793 else if (*s == term) {
12796 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12799 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12805 /* if the terminator isn't the same as the start character (e.g.,
12806 matched brackets), we have to allow more in the quoting, and
12807 be prepared for nested brackets.
12810 /* read until we run out of string, or we find the terminator */
12811 for (; s < PL_bufend; s++,to++) {
12812 /* embedded newlines increment the line count */
12813 if (*s == '\n' && !PL_rsfp)
12814 CopLINE_inc(PL_curcop);
12815 /* backslashes can escape the open or closing characters */
12816 if (*s == '\\' && s+1 < PL_bufend) {
12817 if (!keep_quoted &&
12818 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12823 /* allow nested opens and closes */
12824 else if (*s == PL_multi_close && --brackets <= 0)
12826 else if (*s == PL_multi_open)
12828 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12833 /* terminate the copied string and update the sv's end-of-string */
12835 SvCUR_set(sv, to - SvPVX_const(sv));
12838 * this next chunk reads more into the buffer if we're not done yet
12842 break; /* handle case where we are done yet :-) */
12844 #ifndef PERL_STRICT_CR
12845 if (to - SvPVX_const(sv) >= 2) {
12846 if ((to[-2] == '\r' && to[-1] == '\n') ||
12847 (to[-2] == '\n' && to[-1] == '\r'))
12851 SvCUR_set(sv, to - SvPVX_const(sv));
12853 else if (to[-1] == '\r')
12856 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12861 /* if we're out of file, or a read fails, bail and reset the current
12862 line marker so we can report where the unterminated string began
12865 if (PL_madskills) {
12866 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12868 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12870 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12873 CopLINE_inc(PL_curcop);
12874 PL_bufptr = PL_bufend;
12875 if (!lex_next_chunk(0)) {
12877 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12886 /* at this point, we have successfully read the delimited string */
12888 if (!PL_encoding || UTF) {
12890 if (PL_madskills) {
12891 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12892 const int len = s - tstart;
12894 sv_catpvn(PL_thisstuff, tstart, len);
12896 PL_thisstuff = newSVpvn(tstart, len);
12897 if (!PL_thisclose && !keep_delims)
12898 PL_thisclose = newSVpvn(s,termlen);
12903 sv_catpvn(sv, s, termlen);
12908 if (PL_madskills) {
12909 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12910 const int len = s - tstart - termlen;
12912 sv_catpvn(PL_thisstuff, tstart, len);
12914 PL_thisstuff = newSVpvn(tstart, len);
12915 if (!PL_thisclose && !keep_delims)
12916 PL_thisclose = newSVpvn(s - termlen,termlen);
12920 if (has_utf8 || PL_encoding)
12923 PL_multi_end = CopLINE(PL_curcop);
12925 /* if we allocated too much space, give some back */
12926 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12927 SvLEN_set(sv, SvCUR(sv) + 1);
12928 SvPV_renew(sv, SvLEN(sv));
12931 /* decide whether this is the first or second quoted string we've read
12944 takes: pointer to position in buffer
12945 returns: pointer to new position in buffer
12946 side-effects: builds ops for the constant in pl_yylval.op
12948 Read a number in any of the formats that Perl accepts:
12950 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12951 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
12954 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12956 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12959 If it reads a number without a decimal point or an exponent, it will
12960 try converting the number to an integer and see if it can do so
12961 without loss of precision.
12965 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12968 register const char *s = start; /* current position in buffer */
12969 register char *d; /* destination in temp buffer */
12970 register char *e; /* end of temp buffer */
12971 NV nv; /* number read, as a double */
12972 SV *sv = NULL; /* place to put the converted number */
12973 bool floatit; /* boolean: int or float? */
12974 const char *lastub = NULL; /* position of last underbar */
12975 static char const number_too_long[] = "Number too long";
12977 PERL_ARGS_ASSERT_SCAN_NUM;
12979 /* We use the first character to decide what type of number this is */
12983 Perl_croak(aTHX_ "panic: scan_num");
12985 /* if it starts with a 0, it could be an octal number, a decimal in
12986 0.13 disguise, or a hexadecimal number, or a binary number. */
12990 u holds the "number so far"
12991 shift the power of 2 of the base
12992 (hex == 4, octal == 3, binary == 1)
12993 overflowed was the number more than we can hold?
12995 Shift is used when we add a digit. It also serves as an "are
12996 we in octal/hex/binary?" indicator to disallow hex characters
12997 when in octal mode.
13002 bool overflowed = FALSE;
13003 bool just_zero = TRUE; /* just plain 0 or binary number? */
13004 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
13005 static const char* const bases[5] =
13006 { "", "binary", "", "octal", "hexadecimal" };
13007 static const char* const Bases[5] =
13008 { "", "Binary", "", "Octal", "Hexadecimal" };
13009 static const char* const maxima[5] =
13011 "0b11111111111111111111111111111111",
13015 const char *base, *Base, *max;
13017 /* check for hex */
13018 if (s[1] == 'x' || s[1] == 'X') {
13022 } else if (s[1] == 'b' || s[1] == 'B') {
13027 /* check for a decimal in disguise */
13028 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
13030 /* so it must be octal */
13037 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13038 "Misplaced _ in number");
13042 base = bases[shift];
13043 Base = Bases[shift];
13044 max = maxima[shift];
13046 /* read the rest of the number */
13048 /* x is used in the overflow test,
13049 b is the digit we're adding on. */
13054 /* if we don't mention it, we're done */
13058 /* _ are ignored -- but warned about if consecutive */
13060 if (lastub && s == lastub + 1)
13061 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13062 "Misplaced _ in number");
13066 /* 8 and 9 are not octal */
13067 case '8': case '9':
13069 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
13073 case '2': case '3': case '4':
13074 case '5': case '6': case '7':
13076 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
13079 case '0': case '1':
13080 b = *s++ & 15; /* ASCII digit -> value of digit */
13084 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13085 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
13086 /* make sure they said 0x */
13089 b = (*s++ & 7) + 9;
13091 /* Prepare to put the digit we have onto the end
13092 of the number so far. We check for overflows.
13098 x = u << shift; /* make room for the digit */
13100 if ((x >> shift) != u
13101 && !(PL_hints & HINT_NEW_BINARY)) {
13104 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13105 "Integer overflow in %s number",
13108 u = x | b; /* add the digit to the end */
13111 n *= nvshift[shift];
13112 /* If an NV has not enough bits in its
13113 * mantissa to represent an UV this summing of
13114 * small low-order numbers is a waste of time
13115 * (because the NV cannot preserve the
13116 * low-order bits anyway): we could just
13117 * remember when did we overflow and in the
13118 * end just multiply n by the right
13126 /* if we get here, we had success: make a scalar value from
13131 /* final misplaced underbar check */
13132 if (s[-1] == '_') {
13133 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13137 if (n > 4294967295.0)
13138 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13139 "%s number > %s non-portable",
13145 if (u > 0xffffffff)
13146 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13147 "%s number > %s non-portable",
13152 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
13153 sv = new_constant(start, s - start, "integer",
13154 sv, NULL, NULL, 0);
13155 else if (PL_hints & HINT_NEW_BINARY)
13156 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
13161 handle decimal numbers.
13162 we're also sent here when we read a 0 as the first digit
13164 case '1': case '2': case '3': case '4': case '5':
13165 case '6': case '7': case '8': case '9': case '.':
13168 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
13171 /* read next group of digits and _ and copy into d */
13172 while (isDIGIT(*s) || *s == '_') {
13173 /* skip underscores, checking for misplaced ones
13177 if (lastub && s == lastub + 1)
13178 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13179 "Misplaced _ in number");
13183 /* check for end of fixed-length buffer */
13185 Perl_croak(aTHX_ number_too_long);
13186 /* if we're ok, copy the character */
13191 /* final misplaced underbar check */
13192 if (lastub && s == lastub + 1) {
13193 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13196 /* read a decimal portion if there is one. avoid
13197 3..5 being interpreted as the number 3. followed
13200 if (*s == '.' && s[1] != '.') {
13205 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13206 "Misplaced _ in number");
13210 /* copy, ignoring underbars, until we run out of digits.
13212 for (; isDIGIT(*s) || *s == '_'; s++) {
13213 /* fixed length buffer check */
13215 Perl_croak(aTHX_ number_too_long);
13217 if (lastub && s == lastub + 1)
13218 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13219 "Misplaced _ in number");
13225 /* fractional part ending in underbar? */
13226 if (s[-1] == '_') {
13227 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13228 "Misplaced _ in number");
13230 if (*s == '.' && isDIGIT(s[1])) {
13231 /* oops, it's really a v-string, but without the "v" */
13237 /* read exponent part, if present */
13238 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
13242 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
13243 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
13245 /* stray preinitial _ */
13247 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13248 "Misplaced _ in number");
13252 /* allow positive or negative exponent */
13253 if (*s == '+' || *s == '-')
13256 /* stray initial _ */
13258 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13259 "Misplaced _ in number");
13263 /* read digits of exponent */
13264 while (isDIGIT(*s) || *s == '_') {
13267 Perl_croak(aTHX_ number_too_long);
13271 if (((lastub && s == lastub + 1) ||
13272 (!isDIGIT(s[1]) && s[1] != '_')))
13273 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13274 "Misplaced _ in number");
13282 We try to do an integer conversion first if no characters
13283 indicating "float" have been found.
13288 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
13290 if (flags == IS_NUMBER_IN_UV) {
13292 sv = newSViv(uv); /* Prefer IVs over UVs. */
13295 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13296 if (uv <= (UV) IV_MIN)
13297 sv = newSViv(-(IV)uv);
13304 /* terminate the string */
13306 nv = Atof(PL_tokenbuf);
13311 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13312 const char *const key = floatit ? "float" : "integer";
13313 const STRLEN keylen = floatit ? 5 : 7;
13314 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13315 key, keylen, sv, NULL, NULL, 0);
13319 /* if it starts with a v, it could be a v-string */
13322 sv = newSV(5); /* preallocate storage space */
13323 s = scan_vstring(s, PL_bufend, sv);
13327 /* make the op for the constant and return */
13330 lvalp->opval = newSVOP(OP_CONST, 0, sv);
13332 lvalp->opval = NULL;
13338 S_scan_formline(pTHX_ register char *s)
13341 register char *eol;
13343 SV * const stuff = newSVpvs("");
13344 bool needargs = FALSE;
13345 bool eofmt = FALSE;
13347 char *tokenstart = s;
13348 SV* savewhite = NULL;
13350 if (PL_madskills) {
13351 savewhite = PL_thiswhite;
13356 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13358 while (!needargs) {
13361 #ifdef PERL_STRICT_CR
13362 while (SPACE_OR_TAB(*t))
13365 while (SPACE_OR_TAB(*t) || *t == '\r')
13368 if (*t == '\n' || t == PL_bufend) {
13373 if (PL_in_eval && !PL_rsfp) {
13374 eol = (char *) memchr(s,'\n',PL_bufend-s);
13379 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13381 for (t = s; t < eol; t++) {
13382 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13384 goto enough; /* ~~ must be first line in formline */
13386 if (*t == '@' || *t == '^')
13390 sv_catpvn(stuff, s, eol-s);
13391 #ifndef PERL_STRICT_CR
13392 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13393 char *end = SvPVX(stuff) + SvCUR(stuff);
13396 SvCUR_set(stuff, SvCUR(stuff) - 1);
13407 if (PL_madskills) {
13409 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13411 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13414 PL_bufptr = PL_bufend;
13415 CopLINE_inc(PL_curcop);
13416 got_some = lex_next_chunk(0);
13417 CopLINE_dec(PL_curcop);
13420 tokenstart = PL_bufptr;
13428 if (SvCUR(stuff)) {
13431 PL_lex_state = LEX_NORMAL;
13432 start_force(PL_curforce);
13433 NEXTVAL_NEXTTOKE.ival = 0;
13437 PL_lex_state = LEX_FORMLINE;
13439 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13441 else if (PL_encoding)
13442 sv_recode_to_utf8(stuff, PL_encoding);
13444 start_force(PL_curforce);
13445 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13447 start_force(PL_curforce);
13448 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13452 SvREFCNT_dec(stuff);
13454 PL_lex_formbrack = 0;
13458 if (PL_madskills) {
13460 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13462 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13463 PL_thiswhite = savewhite;
13470 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13473 const I32 oldsavestack_ix = PL_savestack_ix;
13474 CV* const outsidecv = PL_compcv;
13477 assert(SvTYPE(PL_compcv) == SVt_PVCV);
13479 SAVEI32(PL_subline);
13480 save_item(PL_subname);
13481 SAVESPTR(PL_compcv);
13483 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13484 CvFLAGS(PL_compcv) |= flags;
13486 PL_subline = CopLINE(PL_curcop);
13487 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13488 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13489 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13491 return oldsavestack_ix;
13495 #pragma segment Perl_yylex
13498 S_yywarn(pTHX_ const char *const s)
13502 PERL_ARGS_ASSERT_YYWARN;
13504 PL_in_eval |= EVAL_WARNONLY;
13506 PL_in_eval &= ~EVAL_WARNONLY;
13511 Perl_yyerror(pTHX_ const char *const s)
13514 const char *where = NULL;
13515 const char *context = NULL;
13518 int yychar = PL_parser->yychar;
13520 PERL_ARGS_ASSERT_YYERROR;
13522 if (!yychar || (yychar == ';' && !PL_rsfp))
13524 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13525 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13526 PL_oldbufptr != PL_bufptr) {
13529 The code below is removed for NetWare because it abends/crashes on NetWare
13530 when the script has error such as not having the closing quotes like:
13531 if ($var eq "value)
13532 Checking of white spaces is anyway done in NetWare code.
13535 while (isSPACE(*PL_oldoldbufptr))
13538 context = PL_oldoldbufptr;
13539 contlen = PL_bufptr - PL_oldoldbufptr;
13541 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13542 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13545 The code below is removed for NetWare because it abends/crashes on NetWare
13546 when the script has error such as not having the closing quotes like:
13547 if ($var eq "value)
13548 Checking of white spaces is anyway done in NetWare code.
13551 while (isSPACE(*PL_oldbufptr))
13554 context = PL_oldbufptr;
13555 contlen = PL_bufptr - PL_oldbufptr;
13557 else if (yychar > 255)
13558 where = "next token ???";
13559 else if (yychar == -2) { /* YYEMPTY */
13560 if (PL_lex_state == LEX_NORMAL ||
13561 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13562 where = "at end of line";
13563 else if (PL_lex_inpat)
13564 where = "within pattern";
13566 where = "within string";
13569 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13571 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13572 else if (isPRINT_LC(yychar)) {
13573 const char string = yychar;
13574 sv_catpvn(where_sv, &string, 1);
13577 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13578 where = SvPVX_const(where_sv);
13580 msg = sv_2mortal(newSVpv(s, 0));
13581 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13582 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13584 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13586 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13587 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13588 Perl_sv_catpvf(aTHX_ msg,
13589 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13590 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13593 if (PL_in_eval & EVAL_WARNONLY) {
13594 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13598 if (PL_error_count >= 10) {
13599 if (PL_in_eval && SvCUR(ERRSV))
13600 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13601 SVfARG(ERRSV), OutCopFILE(PL_curcop));
13603 Perl_croak(aTHX_ "%s has too many errors.\n",
13604 OutCopFILE(PL_curcop));
13607 PL_in_my_stash = NULL;
13611 #pragma segment Main
13615 S_swallow_bom(pTHX_ U8 *s)
13618 const STRLEN slen = SvCUR(PL_linestr);
13620 PERL_ARGS_ASSERT_SWALLOW_BOM;
13624 if (s[1] == 0xFE) {
13625 /* UTF-16 little-endian? (or UTF-32LE?) */
13626 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
13627 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13628 #ifndef PERL_NO_UTF16_FILTER
13629 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13631 if (PL_bufend > (char*)s) {
13632 s = add_utf16_textfilter(s, TRUE);
13635 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13640 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
13641 #ifndef PERL_NO_UTF16_FILTER
13642 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13644 if (PL_bufend > (char *)s) {
13645 s = add_utf16_textfilter(s, FALSE);
13648 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13653 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13654 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13655 s += 3; /* UTF-8 */
13661 if (s[2] == 0xFE && s[3] == 0xFF) {
13662 /* UTF-32 big-endian */
13663 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13666 else if (s[2] == 0 && s[3] != 0) {
13669 * are a good indicator of UTF-16BE. */
13670 #ifndef PERL_NO_UTF16_FILTER
13671 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13672 s = add_utf16_textfilter(s, FALSE);
13674 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13680 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13681 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13682 s += 4; /* UTF-8 */
13688 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13691 * are a good indicator of UTF-16LE. */
13692 #ifndef PERL_NO_UTF16_FILTER
13693 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13694 s = add_utf16_textfilter(s, TRUE);
13696 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13704 #ifndef PERL_NO_UTF16_FILTER
13706 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13709 SV *const filter = FILTER_DATA(idx);
13710 /* We re-use this each time round, throwing the contents away before we
13712 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13713 SV *const utf8_buffer = filter;
13714 IV status = IoPAGE(filter);
13715 const bool reverse = cBOOL(IoLINES(filter));
13718 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13720 /* As we're automatically added, at the lowest level, and hence only called
13721 from this file, we can be sure that we're not called in block mode. Hence
13722 don't bother writing code to deal with block mode. */
13724 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13727 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13729 DEBUG_P(PerlIO_printf(Perl_debug_log,
13730 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13731 FPTR2DPTR(void *, S_utf16_textfilter),
13732 reverse ? 'l' : 'b', idx, maxlen, status,
13733 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13740 /* First, look in our buffer of existing UTF-8 data: */
13741 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13745 } else if (status == 0) {
13747 IoPAGE(filter) = 0;
13748 nl = SvEND(utf8_buffer);
13751 STRLEN got = nl - SvPVX(utf8_buffer);
13752 /* Did we have anything to append? */
13754 sv_catpvn(sv, SvPVX(utf8_buffer), got);
13755 /* Everything else in this code works just fine if SVp_POK isn't
13756 set. This, however, needs it, and we need it to work, else
13757 we loop infinitely because the buffer is never consumed. */
13758 sv_chop(utf8_buffer, nl);
13762 /* OK, not a complete line there, so need to read some more UTF-16.
13763 Read an extra octect if the buffer currently has an odd number. */
13767 if (SvCUR(utf16_buffer) >= 2) {
13768 /* Location of the high octet of the last complete code point.
13769 Gosh, UTF-16 is a pain. All the benefits of variable length,
13770 *coupled* with all the benefits of partial reads and
13772 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13773 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13775 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13779 /* We have the first half of a surrogate. Read more. */
13780 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13783 status = FILTER_READ(idx + 1, utf16_buffer,
13784 160 + (SvCUR(utf16_buffer) & 1));
13785 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13786 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13789 IoPAGE(filter) = status;
13794 chars = SvCUR(utf16_buffer) >> 1;
13795 have = SvCUR(utf8_buffer);
13796 SvGROW(utf8_buffer, have + chars * 3 + 1);
13799 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13800 (U8*)SvPVX_const(utf8_buffer) + have,
13801 chars * 2, &newlen);
13803 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13804 (U8*)SvPVX_const(utf8_buffer) + have,
13805 chars * 2, &newlen);
13807 SvCUR_set(utf8_buffer, have + newlen);
13810 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13811 it's private to us, and utf16_to_utf8{,reversed} take a
13812 (pointer,length) pair, rather than a NUL-terminated string. */
13813 if(SvCUR(utf16_buffer) & 1) {
13814 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13815 SvCUR_set(utf16_buffer, 1);
13817 SvCUR_set(utf16_buffer, 0);
13820 DEBUG_P(PerlIO_printf(Perl_debug_log,
13821 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13823 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13824 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13829 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13831 SV *filter = filter_add(S_utf16_textfilter, NULL);
13833 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13835 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13836 sv_setpvs(filter, "");
13837 IoLINES(filter) = reversed;
13838 IoPAGE(filter) = 1; /* Not EOF */
13840 /* Sadly, we have to return a valid pointer, come what may, so we have to
13841 ignore any error return from this. */
13842 SvCUR_set(PL_linestr, 0);
13843 if (FILTER_READ(0, PL_linestr, 0)) {
13844 SvUTF8_on(PL_linestr);
13846 SvUTF8_on(PL_linestr);
13848 PL_bufend = SvEND(PL_linestr);
13849 return (U8*)SvPVX(PL_linestr);
13854 Returns a pointer to the next character after the parsed
13855 vstring, as well as updating the passed in sv.
13857 Function must be called like
13860 s = scan_vstring(s,e,sv);
13862 where s and e are the start and end of the string.
13863 The sv should already be large enough to store the vstring
13864 passed in, for performance reasons.
13869 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13872 const char *pos = s;
13873 const char *start = s;
13875 PERL_ARGS_ASSERT_SCAN_VSTRING;
13877 if (*pos == 'v') pos++; /* get past 'v' */
13878 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13880 if ( *pos != '.') {
13881 /* this may not be a v-string if followed by => */
13882 const char *next = pos;
13883 while (next < e && isSPACE(*next))
13885 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13886 /* return string not v-string */
13887 sv_setpvn(sv,(char *)s,pos-s);
13888 return (char *)pos;
13892 if (!isALPHA(*pos)) {
13893 U8 tmpbuf[UTF8_MAXBYTES+1];
13896 s++; /* get past 'v' */
13901 /* this is atoi() that tolerates underscores */
13904 const char *end = pos;
13906 while (--end >= s) {
13908 const UV orev = rev;
13909 rev += (*end - '0') * mult;
13912 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13913 "Integer overflow in decimal number");
13917 if (rev > 0x7FFFFFFF)
13918 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13920 /* Append native character for the rev point */
13921 tmpend = uvchr_to_utf8(tmpbuf, rev);
13922 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13923 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13925 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13931 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13935 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13942 Perl_keyword_plugin_standard(pTHX_
13943 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13945 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13946 PERL_UNUSED_CONTEXT;
13947 PERL_UNUSED_ARG(keyword_ptr);
13948 PERL_UNUSED_ARG(keyword_len);
13949 PERL_UNUSED_ARG(op_ptr);
13950 return KEYWORD_PLUGIN_DECLINE;
13954 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
13956 Parse a single complete Perl statement. This may be a normal imperative
13957 statement, including optional label, or a declaration that has
13958 compile-time effect. It is up to the caller to ensure that the dynamic
13959 parser state (L</PL_parser> et al) is correctly set to reflect the source
13960 of the code to be parsed and the lexical context for the statement.
13962 The op tree representing the statement is returned. This may be a
13963 null pointer if the statement is null, for example if it was actually
13964 a subroutine definition (which has compile-time side effects). If not
13965 null, it will be the result of a L</newSTATEOP> call, normally including
13966 a C<nextstate> or equivalent op.
13968 If an error occurs in parsing or compilation, in most cases a valid op
13969 tree (most likely null) is returned anyway. The error is reflected in
13970 the parser state, normally resulting in a single exception at the top
13971 level of parsing which covers all the compilation errors that occurred.
13972 Some compilation errors, however, will throw an exception immediately.
13974 The I<flags> parameter is reserved for future use, and must always
13981 Perl_parse_fullstmt(pTHX_ U32 flags)
13985 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13987 SAVEVPTR(PL_eval_root);
13988 PL_eval_root = NULL;
13989 if(yyparse(GRAMFULLSTMT) && !PL_parser->error_count)
13990 qerror(Perl_mess(aTHX_ "Parse error"));
13991 fullstmtop = PL_eval_root;
13997 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
13999 Parse a sequence of zero or more Perl statements. These may be normal
14000 imperative statements, including optional labels, or declarations
14001 that have compile-time effect, or any mixture thereof. The statement
14002 sequence ends when a closing brace or end-of-file is encountered in a
14003 place where a new statement could have validly started. It is up to
14004 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
14005 is correctly set to reflect the source of the code to be parsed and the
14006 lexical context for the statements.
14008 The op tree representing the statement sequence is returned. This may
14009 be a null pointer if the statements were all null, for example if there
14010 were no statements or if there were only subroutine definitions (which
14011 have compile-time side effects). If not null, it will be a C<lineseq>
14012 list, normally including C<nextstate> or equivalent ops.
14014 If an error occurs in parsing or compilation, in most cases a valid op
14015 tree is returned anyway. The error is reflected in the parser state,
14016 normally resulting in a single exception at the top level of parsing
14017 which covers all the compilation errors that occurred. Some compilation
14018 errors, however, will throw an exception immediately.
14020 The I<flags> parameter is reserved for future use, and must always
14027 Perl_parse_stmtseq(pTHX_ U32 flags)
14031 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
14033 SAVEVPTR(PL_eval_root);
14034 PL_eval_root = NULL;
14035 if(yyparse(GRAMSTMTSEQ) && !PL_parser->error_count)
14036 qerror(Perl_mess(aTHX_ "Parse error"));
14037 stmtseqop = PL_eval_root;
14043 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
14045 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
14046 deprecate("qw(...) as parentheses");
14048 if (qwlist->op_type == OP_STUB) {
14052 start_force(PL_curforce);
14053 NEXTVAL_NEXTTOKE.opval = qwlist;
14061 * c-indentation-style: bsd
14062 * c-basic-offset: 4
14063 * indent-tabs-mode: t
14066 * ex: set ts=8 sts=4 sw=4 noet: