3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_brackstack (PL_parser->lex_brackstack)
52 #define PL_lex_casemods (PL_parser->lex_casemods)
53 #define PL_lex_casestack (PL_parser->lex_casestack)
54 #define PL_lex_defer (PL_parser->lex_defer)
55 #define PL_lex_dojoin (PL_parser->lex_dojoin)
56 #define PL_lex_expect (PL_parser->lex_expect)
57 #define PL_lex_formbrack (PL_parser->lex_formbrack)
58 #define PL_lex_inpat (PL_parser->lex_inpat)
59 #define PL_lex_inwhat (PL_parser->lex_inwhat)
60 #define PL_lex_op (PL_parser->lex_op)
61 #define PL_lex_repl (PL_parser->lex_repl)
62 #define PL_lex_starts (PL_parser->lex_starts)
63 #define PL_lex_stuff (PL_parser->lex_stuff)
64 #define PL_multi_start (PL_parser->multi_start)
65 #define PL_multi_open (PL_parser->multi_open)
66 #define PL_multi_close (PL_parser->multi_close)
67 #define PL_pending_ident (PL_parser->pending_ident)
68 #define PL_preambled (PL_parser->preambled)
69 #define PL_sublex_info (PL_parser->sublex_info)
70 #define PL_linestr (PL_parser->linestr)
71 #define PL_expect (PL_parser->expect)
72 #define PL_copline (PL_parser->copline)
73 #define PL_bufptr (PL_parser->bufptr)
74 #define PL_oldbufptr (PL_parser->oldbufptr)
75 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
76 #define PL_linestart (PL_parser->linestart)
77 #define PL_bufend (PL_parser->bufend)
78 #define PL_last_uni (PL_parser->last_uni)
79 #define PL_last_lop (PL_parser->last_lop)
80 #define PL_last_lop_op (PL_parser->last_lop_op)
81 #define PL_lex_state (PL_parser->lex_state)
82 #define PL_rsfp (PL_parser->rsfp)
83 #define PL_rsfp_filters (PL_parser->rsfp_filters)
84 #define PL_in_my (PL_parser->in_my)
85 #define PL_in_my_stash (PL_parser->in_my_stash)
86 #define PL_tokenbuf (PL_parser->tokenbuf)
87 #define PL_multi_end (PL_parser->multi_end)
88 #define PL_error_count (PL_parser->error_count)
91 # define PL_endwhite (PL_parser->endwhite)
92 # define PL_faketokens (PL_parser->faketokens)
93 # define PL_lasttoke (PL_parser->lasttoke)
94 # define PL_nextwhite (PL_parser->nextwhite)
95 # define PL_realtokenstart (PL_parser->realtokenstart)
96 # define PL_skipwhite (PL_parser->skipwhite)
97 # define PL_thisclose (PL_parser->thisclose)
98 # define PL_thismad (PL_parser->thismad)
99 # define PL_thisopen (PL_parser->thisopen)
100 # define PL_thisstuff (PL_parser->thisstuff)
101 # define PL_thistoken (PL_parser->thistoken)
102 # define PL_thiswhite (PL_parser->thiswhite)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_nexttoke (PL_parser->nexttoke)
105 # define PL_curforce (PL_parser->curforce)
107 # define PL_nexttoke (PL_parser->nexttoke)
108 # define PL_nexttype (PL_parser->nexttype)
109 # define PL_nextval (PL_parser->nextval)
112 /* This can't be done with embed.fnc, because struct yy_parser contains a
113 member named pending_ident, which clashes with the generated #define */
115 S_pending_ident(pTHX);
117 static const char ident_too_long[] = "Identifier too long";
120 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
121 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
123 # define CURMAD(slot,sv)
124 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
127 #define XFAKEBRACK 128
128 #define XENUMMASK 127
130 #ifdef USE_UTF8_SCRIPTS
131 # define UTF (!IN_BYTES)
133 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
136 /* The maximum number of characters preceding the unrecognized one to display */
137 #define UNRECOGNIZED_PRECEDE_COUNT 10
139 /* In variables named $^X, these are the legal values for X.
140 * 1999-02-27 mjd-perl-patch@plover.com */
141 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
143 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
145 /* LEX_* are values for PL_lex_state, the state of the lexer.
146 * They are arranged oddly so that the guard on the switch statement
147 * can get by with a single comparison (if the compiler is smart enough).
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
189 #include "keywords.h"
191 /* CLINE is a macro that ensures PL_copline has a sane value */
196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
199 # define SKIPSPACE0(s) skipspace0(s)
200 # define SKIPSPACE1(s) skipspace1(s)
201 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202 # define PEEKSPACE(s) skipspace2(s,0)
204 # define SKIPSPACE0(s) skipspace(s)
205 # define SKIPSPACE1(s) skipspace(s)
206 # define SKIPSPACE2(s,tsv) skipspace(s)
207 # define PEEKSPACE(s) skipspace(s)
211 * Convenience functions to return different tokens and prime the
212 * lexer for the next token. They all take an argument.
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
224 * FUN1 : not used, except for not, which isn't a UNIOP
225 * BOop : bitwise or or xor
227 * SHop : shift operator
228 * PWop : power operator
229 * PMop : pattern-matching operator
230 * Aop : addition-level operator
231 * Mop : multiplication-level operator
232 * Eop : equality-testing operator
233 * Rop : relational operator <= != gt
235 * Also see LOP and lop() below.
238 #ifdef DEBUGGING /* Serve -DT. */
239 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
241 # define REPORT(retval) (retval)
244 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
245 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
246 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
247 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
248 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
249 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
250 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
251 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
252 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
253 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
254 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
255 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
256 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
257 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
258 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
259 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
260 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
261 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
262 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
263 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
265 /* This bit of chicanery makes a unary function followed by
266 * a parenthesis into a function with one argument, highest precedence.
267 * The UNIDOR macro is for unary functions that can be followed by the //
268 * operator (such as C<shift // 0>).
270 #define UNI2(f,x) { \
271 pl_yylval.ival = f; \
274 PL_last_uni = PL_oldbufptr; \
275 PL_last_lop_op = f; \
277 return REPORT( (int)FUNC1 ); \
279 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
281 #define UNI(f) UNI2(f,XTERM)
282 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
284 #define UNIBRACK(f) { \
285 pl_yylval.ival = f; \
287 PL_last_uni = PL_oldbufptr; \
289 return REPORT( (int)FUNC1 ); \
291 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
294 /* grandfather return to old style */
295 #define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
299 /* how to interpret the pl_yylval associated with the token */
303 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
309 static struct debug_tokens {
311 enum token_type type;
313 } const debug_tokens[] =
315 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
316 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
317 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
318 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
319 { ARROW, TOKENTYPE_NONE, "ARROW" },
320 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
321 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
322 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
323 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
324 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
325 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
326 { DO, TOKENTYPE_NONE, "DO" },
327 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
328 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
329 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
330 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
331 { ELSE, TOKENTYPE_NONE, "ELSE" },
332 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
333 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
334 { FOR, TOKENTYPE_IVAL, "FOR" },
335 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
336 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
337 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
338 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
339 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
340 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
341 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
342 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
343 { IF, TOKENTYPE_IVAL, "IF" },
344 { LABEL, TOKENTYPE_PVAL, "LABEL" },
345 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
346 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
347 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
348 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
349 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
350 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
351 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
352 { MY, TOKENTYPE_IVAL, "MY" },
353 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
354 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
355 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
356 { OROP, TOKENTYPE_IVAL, "OROP" },
357 { OROR, TOKENTYPE_NONE, "OROR" },
358 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
359 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
360 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
361 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
362 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
363 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
364 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
365 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
366 { PREINC, TOKENTYPE_NONE, "PREINC" },
367 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
368 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
369 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
370 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
371 { SUB, TOKENTYPE_NONE, "SUB" },
372 { THING, TOKENTYPE_OPVAL, "THING" },
373 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
374 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
375 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
376 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
377 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
378 { USE, TOKENTYPE_IVAL, "USE" },
379 { WHEN, TOKENTYPE_IVAL, "WHEN" },
380 { WHILE, TOKENTYPE_IVAL, "WHILE" },
381 { WORD, TOKENTYPE_OPVAL, "WORD" },
382 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
383 { 0, TOKENTYPE_NONE, NULL }
386 /* dump the returned token in rv, plus any optional arg in pl_yylval */
389 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
393 PERL_ARGS_ASSERT_TOKEREPORT;
396 const char *name = NULL;
397 enum token_type type = TOKENTYPE_NONE;
398 const struct debug_tokens *p;
399 SV* const report = newSVpvs("<== ");
401 for (p = debug_tokens; p->token; p++) {
402 if (p->token == (int)rv) {
409 Perl_sv_catpv(aTHX_ report, name);
410 else if ((char)rv > ' ' && (char)rv < '~')
411 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
413 sv_catpvs(report, "EOF");
415 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
418 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
421 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
423 case TOKENTYPE_OPNUM:
424 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
425 PL_op_name[lvalp->ival]);
428 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
430 case TOKENTYPE_OPVAL:
432 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
433 PL_op_name[lvalp->opval->op_type]);
434 if (lvalp->opval->op_type == OP_CONST) {
435 Perl_sv_catpvf(aTHX_ report, " %s",
436 SvPEEK(cSVOPx_sv(lvalp->opval)));
441 sv_catpvs(report, "(opval=null)");
444 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
450 /* print the buffer with suitable escapes */
453 S_printbuf(pTHX_ const char *const fmt, const char *const s)
455 SV* const tmp = newSVpvs("");
457 PERL_ARGS_ASSERT_PRINTBUF;
459 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
466 S_deprecate_commaless_var_list(pTHX) {
468 deprecate("comma-less variable list");
469 return REPORT(','); /* grandfather non-comma-format format */
475 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
476 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
480 S_ao(pTHX_ int toketype)
483 if (*PL_bufptr == '=') {
485 if (toketype == ANDAND)
486 pl_yylval.ival = OP_ANDASSIGN;
487 else if (toketype == OROR)
488 pl_yylval.ival = OP_ORASSIGN;
489 else if (toketype == DORDOR)
490 pl_yylval.ival = OP_DORASSIGN;
498 * When Perl expects an operator and finds something else, no_op
499 * prints the warning. It always prints "<something> found where
500 * operator expected. It prints "Missing semicolon on previous line?"
501 * if the surprise occurs at the start of the line. "do you need to
502 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
503 * where the compiler doesn't know if foo is a method call or a function.
504 * It prints "Missing operator before end of line" if there's nothing
505 * after the missing operator, or "... before <...>" if there is something
506 * after the missing operator.
510 S_no_op(pTHX_ const char *const what, char *s)
513 char * const oldbp = PL_bufptr;
514 const bool is_first = (PL_oldbufptr == PL_linestart);
516 PERL_ARGS_ASSERT_NO_OP;
522 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
523 if (ckWARN_d(WARN_SYNTAX)) {
525 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
526 "\t(Missing semicolon on previous line?)\n");
527 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
529 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
531 if (t < PL_bufptr && isSPACE(*t))
532 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
533 "\t(Do you need to predeclare %.*s?)\n",
534 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
538 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
539 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
547 * Complain about missing quote/regexp/heredoc terminator.
548 * If it's called with NULL then it cauterizes the line buffer.
549 * If we're in a delimited string and the delimiter is a control
550 * character, it's reformatted into a two-char sequence like ^C.
555 S_missingterm(pTHX_ char *s)
561 char * const nl = strrchr(s,'\n');
565 else if (isCNTRL(PL_multi_close)) {
567 tmpbuf[1] = (char)toCTRL(PL_multi_close);
572 *tmpbuf = (char)PL_multi_close;
576 q = strchr(s,'"') ? '\'' : '"';
577 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
580 #define FEATURE_IS_ENABLED(name) \
581 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
582 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
583 /* The longest string we pass in. */
584 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
587 * S_feature_is_enabled
588 * Check whether the named feature is enabled.
591 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
594 HV * const hinthv = GvHV(PL_hintgv);
595 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
597 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
599 assert(namelen <= MAX_FEATURE_LEN);
600 memcpy(&he_name[8], name, namelen);
602 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
606 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
607 * utf16-to-utf8-reversed.
610 #ifdef PERL_CR_FILTER
614 register const char *s = SvPVX_const(sv);
615 register const char * const e = s + SvCUR(sv);
617 PERL_ARGS_ASSERT_STRIP_RETURN;
619 /* outer loop optimized to do nothing if there are no CR-LFs */
621 if (*s++ == '\r' && *s == '\n') {
622 /* hit a CR-LF, need to copy the rest */
623 register char *d = s - 1;
626 if (*s == '\r' && s[1] == '\n')
637 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
639 const I32 count = FILTER_READ(idx+1, sv, maxlen);
640 if (count > 0 && !maxlen)
651 * Create a parser object and initialise its parser and lexer fields
653 * rsfp is the opened file handle to read from (if any),
655 * line holds any initial content already read from the file (or in
656 * the case of no file, such as an eval, the whole contents);
658 * new_filter indicates that this is a new file and it shouldn't inherit
659 * the filters from the current parser (ie require).
663 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
666 const char *s = NULL;
668 yy_parser *parser, *oparser;
670 /* create and initialise a parser */
672 Newxz(parser, 1, yy_parser);
673 parser->old_parser = oparser = PL_parser;
676 parser->stack = NULL;
678 parser->stack_size = 0;
680 /* on scope exit, free this parser and restore any outer one */
682 parser->saved_curcop = PL_curcop;
684 /* initialise lexer state */
687 parser->curforce = -1;
689 parser->nexttoke = 0;
691 parser->error_count = oparser ? oparser->error_count : 0;
692 parser->copline = NOLINE;
693 parser->lex_state = LEX_NORMAL;
694 parser->expect = XSTATE;
696 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
697 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
699 Newx(parser->lex_brackstack, 120, char);
700 Newx(parser->lex_casestack, 12, char);
701 *parser->lex_casestack = '\0';
704 s = SvPV_const(line, len);
710 parser->linestr = newSVpvs("\n;");
711 } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
712 /* avoid tie/overload weirdness */
713 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
715 sv_catpvs(parser->linestr, "\n;");
718 SvREFCNT_inc_simple_void_NN(line);
719 parser->linestr = line;
721 parser->oldoldbufptr =
724 parser->linestart = SvPVX(parser->linestr);
725 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
726 parser->last_lop = parser->last_uni = NULL;
730 /* delete a parser object */
733 Perl_parser_free(pTHX_ const yy_parser *parser)
735 PERL_ARGS_ASSERT_PARSER_FREE;
737 PL_curcop = parser->saved_curcop;
738 SvREFCNT_dec(parser->linestr);
740 if (parser->rsfp == PerlIO_stdin())
741 PerlIO_clearerr(parser->rsfp);
742 else if (parser->rsfp && (!parser->old_parser ||
743 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
744 PerlIO_close(parser->rsfp);
745 SvREFCNT_dec(parser->rsfp_filters);
747 Safefree(parser->lex_brackstack);
748 Safefree(parser->lex_casestack);
749 PL_parser = parser->old_parser;
756 * Finalizer for lexing operations. Must be called when the parser is
757 * done with the lexer.
764 PL_doextract = FALSE;
768 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
770 Buffer scalar containing the chunk currently under consideration of the
771 text currently being lexed. This is always a plain string scalar (for
772 which C<SvPOK> is true). It is not intended to be used as a scalar by
773 normal scalar means; instead refer to the buffer directly by the pointer
774 variables described below.
776 The lexer maintains various C<char*> pointers to things in the
777 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
778 reallocated, all of these pointers must be updated. Don't attempt to
779 do this manually, but rather use L</lex_grow_linestr> if you need to
780 reallocate the buffer.
782 The content of the text chunk in the buffer is commonly exactly one
783 complete line of input, up to and including a newline terminator,
784 but there are situations where it is otherwise. The octets of the
785 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
786 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
787 flag on this scalar, which may disagree with it.
789 For direct examination of the buffer, the variable
790 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
791 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
792 of these pointers is usually preferable to examination of the scalar
793 through normal scalar means.
795 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
797 Direct pointer to the end of the chunk of text currently being lexed, the
798 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
799 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
800 always located at the end of the buffer, and does not count as part of
801 the buffer's contents.
803 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
805 Points to the current position of lexing inside the lexer buffer.
806 Characters around this point may be freely examined, within
807 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
808 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
809 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
811 Lexing code (whether in the Perl core or not) moves this pointer past
812 the characters that it consumes. It is also expected to perform some
813 bookkeeping whenever a newline character is consumed. This movement
814 can be more conveniently performed by the function L</lex_read_to>,
815 which handles newlines appropriately.
817 Interpretation of the buffer's octets can be abstracted out by
818 using the slightly higher-level functions L</lex_peek_unichar> and
819 L</lex_read_unichar>.
821 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
823 Points to the start of the current line inside the lexer buffer.
824 This is useful for indicating at which column an error occurred, and
825 not much else. This must be updated by any lexing code that consumes
826 a newline; the function L</lex_read_to> handles this detail.
832 =for apidoc Amx|bool|lex_bufutf8
834 Indicates whether the octets in the lexer buffer
835 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
836 of Unicode characters. If not, they should be interpreted as Latin-1
837 characters. This is analogous to the C<SvUTF8> flag for scalars.
839 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
840 contains valid UTF-8. Lexing code must be robust in the face of invalid
843 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
844 is significant, but not the whole story regarding the input character
845 encoding. Normally, when a file is being read, the scalar contains octets
846 and its C<SvUTF8> flag is off, but the octets should be interpreted as
847 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
848 however, the scalar may have the C<SvUTF8> flag on, and in this case its
849 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
850 is in effect. This logic may change in the future; use this function
851 instead of implementing the logic yourself.
857 Perl_lex_bufutf8(pTHX)
863 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
865 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
866 at least I<len> octets (including terminating NUL). Returns a
867 pointer to the reallocated buffer. This is necessary before making
868 any direct modification of the buffer that would increase its length.
869 L</lex_stuff_pvn> provides a more convenient way to insert text into
872 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
873 this function updates all of the lexer's variables that point directly
880 Perl_lex_grow_linestr(pTHX_ STRLEN len)
884 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
885 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
886 linestr = PL_parser->linestr;
887 buf = SvPVX(linestr);
888 if (len <= SvLEN(linestr))
890 bufend_pos = PL_parser->bufend - buf;
891 bufptr_pos = PL_parser->bufptr - buf;
892 oldbufptr_pos = PL_parser->oldbufptr - buf;
893 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
894 linestart_pos = PL_parser->linestart - buf;
895 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
896 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
897 buf = sv_grow(linestr, len);
898 PL_parser->bufend = buf + bufend_pos;
899 PL_parser->bufptr = buf + bufptr_pos;
900 PL_parser->oldbufptr = buf + oldbufptr_pos;
901 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
902 PL_parser->linestart = buf + linestart_pos;
903 if (PL_parser->last_uni)
904 PL_parser->last_uni = buf + last_uni_pos;
905 if (PL_parser->last_lop)
906 PL_parser->last_lop = buf + last_lop_pos;
911 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
913 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
914 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
915 reallocating the buffer if necessary. This means that lexing code that
916 runs later will see the characters as if they had appeared in the input.
917 It is not recommended to do this as part of normal parsing, and most
918 uses of this facility run the risk of the inserted characters being
919 interpreted in an unintended manner.
921 The string to be inserted is represented by I<len> octets starting
922 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
923 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
924 The characters are recoded for the lexer buffer, according to how the
925 buffer is currently being interpreted (L</lex_bufutf8>). If a string
926 to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
927 function is more convenient.
933 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
937 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
938 if (flags & ~(LEX_STUFF_UTF8))
939 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
941 if (flags & LEX_STUFF_UTF8) {
945 const char *p, *e = pv+len;
946 for (p = pv; p != e; p++)
947 highhalf += !!(((U8)*p) & 0x80);
950 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
951 bufptr = PL_parser->bufptr;
952 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
953 SvCUR_set(PL_parser->linestr,
954 SvCUR(PL_parser->linestr) + len+highhalf);
955 PL_parser->bufend += len+highhalf;
956 for (p = pv; p != e; p++) {
959 *bufptr++ = (char)(0xc0 | (c >> 6));
960 *bufptr++ = (char)(0x80 | (c & 0x3f));
967 if (flags & LEX_STUFF_UTF8) {
969 const char *p, *e = pv+len;
970 for (p = pv; p != e; p++) {
973 Perl_croak(aTHX_ "Lexing code attempted to stuff "
974 "non-Latin-1 character into Latin-1 input");
975 } else if (c >= 0xc2 && p+1 != e &&
976 (((U8)p[1]) & 0xc0) == 0x80) {
979 } else if (c >= 0x80) {
980 /* malformed UTF-8 */
982 SAVESPTR(PL_warnhook);
983 PL_warnhook = PERL_WARNHOOK_FATAL;
984 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
990 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
991 bufptr = PL_parser->bufptr;
992 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
993 SvCUR_set(PL_parser->linestr,
994 SvCUR(PL_parser->linestr) + len-highhalf);
995 PL_parser->bufend += len-highhalf;
996 for (p = pv; p != e; p++) {
999 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1002 *bufptr++ = (char)c;
1007 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1008 bufptr = PL_parser->bufptr;
1009 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1010 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1011 PL_parser->bufend += len;
1012 Copy(pv, bufptr, len, char);
1018 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1020 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022 reallocating the buffer if necessary. This means that lexing code that
1023 runs later will see the characters as if they had appeared in the input.
1024 It is not recommended to do this as part of normal parsing, and most
1025 uses of this facility run the risk of the inserted characters being
1026 interpreted in an unintended manner.
1028 The string to be inserted is the string value of I<sv>. The characters
1029 are recoded for the lexer buffer, according to how the buffer is currently
1030 being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1031 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032 need to construct a scalar.
1038 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1042 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1044 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1046 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1050 =for apidoc Amx|void|lex_unstuff|char *ptr
1052 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1054 This hides the discarded text from any lexing code that runs later,
1055 as if the text had never appeared.
1057 This is not the normal way to consume lexed text. For that, use
1064 Perl_lex_unstuff(pTHX_ char *ptr)
1068 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069 buf = PL_parser->bufptr;
1071 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1074 bufend = PL_parser->bufend;
1076 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077 unstuff_len = ptr - buf;
1078 Move(ptr, buf, bufend+1-ptr, char);
1079 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080 PL_parser->bufend = bufend - unstuff_len;
1084 =for apidoc Amx|void|lex_read_to|char *ptr
1086 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088 performing the correct bookkeeping whenever a newline character is passed.
1089 This is the normal way to consume lexed text.
1091 Interpretation of the buffer's octets can be abstracted out by
1092 using the slightly higher-level functions L</lex_peek_unichar> and
1093 L</lex_read_unichar>.
1099 Perl_lex_read_to(pTHX_ char *ptr)
1102 PERL_ARGS_ASSERT_LEX_READ_TO;
1103 s = PL_parser->bufptr;
1104 if (ptr < s || ptr > PL_parser->bufend)
1105 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106 for (; s != ptr; s++)
1108 CopLINE_inc(PL_curcop);
1109 PL_parser->linestart = s+1;
1111 PL_parser->bufptr = ptr;
1115 =for apidoc Amx|void|lex_discard_to|char *ptr
1117 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118 up to I<ptr>. The remaining content of the buffer will be moved, and
1119 all pointers into the buffer updated appropriately. I<ptr> must not
1120 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121 it is not permitted to discard text that has yet to be lexed.
1123 Normally it is not necessarily to do this directly, because it suffices to
1124 use the implicit discarding behaviour of L</lex_next_chunk> and things
1125 based on it. However, if a token stretches across multiple lines,
1126 and the lexing code has kept multiple lines of text in the buffer for
1127 that purpose, then after completion of the token it would be wise to
1128 explicitly discard the now-unneeded earlier lines, to avoid future
1129 multi-line tokens growing the buffer without bound.
1135 Perl_lex_discard_to(pTHX_ char *ptr)
1139 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140 buf = SvPVX(PL_parser->linestr);
1142 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1145 if (ptr > PL_parser->bufptr)
1146 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147 discard_len = ptr - buf;
1148 if (PL_parser->oldbufptr < ptr)
1149 PL_parser->oldbufptr = ptr;
1150 if (PL_parser->oldoldbufptr < ptr)
1151 PL_parser->oldoldbufptr = ptr;
1152 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153 PL_parser->last_uni = NULL;
1154 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155 PL_parser->last_lop = NULL;
1156 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158 PL_parser->bufend -= discard_len;
1159 PL_parser->bufptr -= discard_len;
1160 PL_parser->oldbufptr -= discard_len;
1161 PL_parser->oldoldbufptr -= discard_len;
1162 if (PL_parser->last_uni)
1163 PL_parser->last_uni -= discard_len;
1164 if (PL_parser->last_lop)
1165 PL_parser->last_lop -= discard_len;
1169 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1171 Reads in the next chunk of text to be lexed, appending it to
1172 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1173 looked to the end of the current chunk and wants to know more. It is
1174 usual, but not necessary, for lexing to have consumed the entirety of
1175 the current chunk at this time.
1177 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178 chunk (i.e., the current chunk has been entirely consumed), normally the
1179 current chunk will be discarded at the same time that the new chunk is
1180 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181 will not be discarded. If the current chunk has not been entirely
1182 consumed, then it will not be discarded regardless of the flag.
1184 Returns true if some new text was added to the buffer, or false if the
1185 buffer has reached the end of the input text.
1190 #define LEX_FAKE_EOF 0x80000000
1193 Perl_lex_next_chunk(pTHX_ U32 flags)
1197 STRLEN old_bufend_pos, new_bufend_pos;
1198 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1200 bool got_some_for_debugger = 0;
1202 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1203 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1204 linestr = PL_parser->linestr;
1205 buf = SvPVX(linestr);
1206 if (!(flags & LEX_KEEP_PREVIOUS) &&
1207 PL_parser->bufptr == PL_parser->bufend) {
1208 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1210 if (PL_parser->last_uni != PL_parser->bufend)
1211 PL_parser->last_uni = NULL;
1212 if (PL_parser->last_lop != PL_parser->bufend)
1213 PL_parser->last_lop = NULL;
1214 last_uni_pos = last_lop_pos = 0;
1218 old_bufend_pos = PL_parser->bufend - buf;
1219 bufptr_pos = PL_parser->bufptr - buf;
1220 oldbufptr_pos = PL_parser->oldbufptr - buf;
1221 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1222 linestart_pos = PL_parser->linestart - buf;
1223 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1224 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1226 if (flags & LEX_FAKE_EOF) {
1228 } else if (!PL_parser->rsfp) {
1230 } else if (filter_gets(linestr, old_bufend_pos)) {
1232 got_some_for_debugger = 1;
1234 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1235 sv_setpvs(linestr, "");
1237 /* End of real input. Close filehandle (unless it was STDIN),
1238 * then add implicit termination.
1240 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1241 PerlIO_clearerr(PL_parser->rsfp);
1242 else if (PL_parser->rsfp)
1243 (void)PerlIO_close(PL_parser->rsfp);
1244 PL_parser->rsfp = NULL;
1245 PL_doextract = FALSE;
1247 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1250 if (!PL_in_eval && PL_minus_p) {
1252 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1253 PL_minus_n = PL_minus_p = 0;
1254 } else if (!PL_in_eval && PL_minus_n) {
1255 sv_catpvs(linestr, /*{*/";}");
1258 sv_catpvs(linestr, ";");
1261 buf = SvPVX(linestr);
1262 new_bufend_pos = SvCUR(linestr);
1263 PL_parser->bufend = buf + new_bufend_pos;
1264 PL_parser->bufptr = buf + bufptr_pos;
1265 PL_parser->oldbufptr = buf + oldbufptr_pos;
1266 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1267 PL_parser->linestart = buf + linestart_pos;
1268 if (PL_parser->last_uni)
1269 PL_parser->last_uni = buf + last_uni_pos;
1270 if (PL_parser->last_lop)
1271 PL_parser->last_lop = buf + last_lop_pos;
1272 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1273 PL_curstash != PL_debstash) {
1274 /* debugger active and we're not compiling the debugger code,
1275 * so store the line into the debugger's array of lines
1277 update_debugger_info(NULL, buf+old_bufend_pos,
1278 new_bufend_pos-old_bufend_pos);
1284 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1286 Looks ahead one (Unicode) character in the text currently being lexed.
1287 Returns the codepoint (unsigned integer value) of the next character,
1288 or -1 if lexing has reached the end of the input text. To consume the
1289 peeked character, use L</lex_read_unichar>.
1291 If the next character is in (or extends into) the next chunk of input
1292 text, the next chunk will be read in. Normally the current chunk will be
1293 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1294 then the current chunk will not be discarded.
1296 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1297 is encountered, an exception is generated.
1303 Perl_lex_peek_unichar(pTHX_ U32 flags)
1307 if (flags & ~(LEX_KEEP_PREVIOUS))
1308 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1309 s = PL_parser->bufptr;
1310 bufend = PL_parser->bufend;
1316 if (!lex_next_chunk(flags))
1318 s = PL_parser->bufptr;
1319 bufend = PL_parser->bufend;
1325 len = PL_utf8skip[head];
1326 while ((STRLEN)(bufend-s) < len) {
1327 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1329 s = PL_parser->bufptr;
1330 bufend = PL_parser->bufend;
1333 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1334 if (retlen == (STRLEN)-1) {
1335 /* malformed UTF-8 */
1337 SAVESPTR(PL_warnhook);
1338 PL_warnhook = PERL_WARNHOOK_FATAL;
1339 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1345 if (!lex_next_chunk(flags))
1347 s = PL_parser->bufptr;
1354 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1356 Reads the next (Unicode) character in the text currently being lexed.
1357 Returns the codepoint (unsigned integer value) of the character read,
1358 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1359 if lexing has reached the end of the input text. To non-destructively
1360 examine the next character, use L</lex_peek_unichar> instead.
1362 If the next character is in (or extends into) the next chunk of input
1363 text, the next chunk will be read in. Normally the current chunk will be
1364 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1365 then the current chunk will not be discarded.
1367 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1368 is encountered, an exception is generated.
1374 Perl_lex_read_unichar(pTHX_ U32 flags)
1377 if (flags & ~(LEX_KEEP_PREVIOUS))
1378 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1379 c = lex_peek_unichar(flags);
1382 CopLINE_inc(PL_curcop);
1383 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1389 =for apidoc Amx|void|lex_read_space|U32 flags
1391 Reads optional spaces, in Perl style, in the text currently being
1392 lexed. The spaces may include ordinary whitespace characters and
1393 Perl-style comments. C<#line> directives are processed if encountered.
1394 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1395 at a non-space character (or the end of the input text).
1397 If spaces extend into the next chunk of input text, the next chunk will
1398 be read in. Normally the current chunk will be discarded at the same
1399 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1400 chunk will not be discarded.
1405 #define LEX_NO_NEXT_CHUNK 0x80000000
1408 Perl_lex_read_space(pTHX_ U32 flags)
1411 bool need_incline = 0;
1412 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1413 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1416 sv_free(PL_skipwhite);
1417 PL_skipwhite = NULL;
1420 PL_skipwhite = newSVpvs("");
1421 #endif /* PERL_MAD */
1422 s = PL_parser->bufptr;
1423 bufend = PL_parser->bufend;
1429 } while (!(c == '\n' || (c == 0 && s == bufend)));
1430 } else if (c == '\n') {
1432 PL_parser->linestart = s;
1437 } else if (isSPACE(c)) {
1439 } else if (c == 0 && s == bufend) {
1443 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1444 #endif /* PERL_MAD */
1445 if (flags & LEX_NO_NEXT_CHUNK)
1447 PL_parser->bufptr = s;
1448 CopLINE_inc(PL_curcop);
1449 got_more = lex_next_chunk(flags);
1450 CopLINE_dec(PL_curcop);
1451 s = PL_parser->bufptr;
1452 bufend = PL_parser->bufend;
1455 if (need_incline && PL_parser->rsfp) {
1465 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1466 #endif /* PERL_MAD */
1467 PL_parser->bufptr = s;
1472 * This subroutine has nothing to do with tilting, whether at windmills
1473 * or pinball tables. Its name is short for "increment line". It
1474 * increments the current line number in CopLINE(PL_curcop) and checks
1475 * to see whether the line starts with a comment of the form
1476 * # line 500 "foo.pm"
1477 * If so, it sets the current line number and file to the values in the comment.
1481 S_incline(pTHX_ const char *s)
1488 PERL_ARGS_ASSERT_INCLINE;
1490 CopLINE_inc(PL_curcop);
1493 while (SPACE_OR_TAB(*s))
1495 if (strnEQ(s, "line", 4))
1499 if (SPACE_OR_TAB(*s))
1503 while (SPACE_OR_TAB(*s))
1511 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1513 while (SPACE_OR_TAB(*s))
1515 if (*s == '"' && (t = strchr(s+1, '"'))) {
1521 while (!isSPACE(*t))
1525 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1527 if (*e != '\n' && *e != '\0')
1528 return; /* false alarm */
1531 const STRLEN len = t - s;
1532 #ifndef USE_ITHREADS
1533 SV *const temp_sv = CopFILESV(PL_curcop);
1538 cf = SvPVX(temp_sv);
1539 tmplen = SvCUR(temp_sv);
1545 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1546 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1547 * to *{"::_<newfilename"} */
1548 /* However, the long form of evals is only turned on by the
1549 debugger - usually they're "(eval %lu)" */
1553 STRLEN tmplen2 = len;
1554 if (tmplen + 2 <= sizeof smallbuf)
1557 Newx(tmpbuf, tmplen + 2, char);
1560 memcpy(tmpbuf + 2, cf, tmplen);
1562 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1567 if (tmplen2 + 2 <= sizeof smallbuf)
1570 Newx(tmpbuf2, tmplen2 + 2, char);
1572 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1573 /* Either they malloc'd it, or we malloc'd it,
1574 so no prefix is present in ours. */
1579 memcpy(tmpbuf2 + 2, s, tmplen2);
1582 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1584 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1585 /* adjust ${"::_<newfilename"} to store the new file name */
1586 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1587 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1588 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1591 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1593 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1596 CopFILE_free(PL_curcop);
1597 CopFILE_setn(PL_curcop, s, len);
1599 CopLINE_set(PL_curcop, atoi(n)-1);
1603 /* skip space before PL_thistoken */
1606 S_skipspace0(pTHX_ register char *s)
1608 PERL_ARGS_ASSERT_SKIPSPACE0;
1615 PL_thiswhite = newSVpvs("");
1616 sv_catsv(PL_thiswhite, PL_skipwhite);
1617 sv_free(PL_skipwhite);
1620 PL_realtokenstart = s - SvPVX(PL_linestr);
1624 /* skip space after PL_thistoken */
1627 S_skipspace1(pTHX_ register char *s)
1629 const char *start = s;
1630 I32 startoff = start - SvPVX(PL_linestr);
1632 PERL_ARGS_ASSERT_SKIPSPACE1;
1637 start = SvPVX(PL_linestr) + startoff;
1638 if (!PL_thistoken && PL_realtokenstart >= 0) {
1639 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1640 PL_thistoken = newSVpvn(tstart, start - tstart);
1642 PL_realtokenstart = -1;
1645 PL_nextwhite = newSVpvs("");
1646 sv_catsv(PL_nextwhite, PL_skipwhite);
1647 sv_free(PL_skipwhite);
1654 S_skipspace2(pTHX_ register char *s, SV **svp)
1657 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1658 const I32 startoff = s - SvPVX(PL_linestr);
1660 PERL_ARGS_ASSERT_SKIPSPACE2;
1663 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1664 if (!PL_madskills || !svp)
1666 start = SvPVX(PL_linestr) + startoff;
1667 if (!PL_thistoken && PL_realtokenstart >= 0) {
1668 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1669 PL_thistoken = newSVpvn(tstart, start - tstart);
1670 PL_realtokenstart = -1;
1674 *svp = newSVpvs("");
1675 sv_setsv(*svp, PL_skipwhite);
1676 sv_free(PL_skipwhite);
1685 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1687 AV *av = CopFILEAVx(PL_curcop);
1689 SV * const sv = newSV_type(SVt_PVMG);
1691 sv_setsv(sv, orig_sv);
1693 sv_setpvn(sv, buf, len);
1696 av_store(av, (I32)CopLINE(PL_curcop), sv);
1702 * Called to gobble the appropriate amount and type of whitespace.
1703 * Skips comments as well.
1707 S_skipspace(pTHX_ register char *s)
1711 #endif /* PERL_MAD */
1712 PERL_ARGS_ASSERT_SKIPSPACE;
1715 sv_free(PL_skipwhite);
1716 PL_skipwhite = NULL;
1718 #endif /* PERL_MAD */
1719 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1720 while (s < PL_bufend && SPACE_OR_TAB(*s))
1723 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1725 lex_read_space(LEX_KEEP_PREVIOUS |
1726 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1727 LEX_NO_NEXT_CHUNK : 0));
1729 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1730 if (PL_linestart > PL_bufptr)
1731 PL_bufptr = PL_linestart;
1736 PL_skipwhite = newSVpvn(start, s-start);
1737 #endif /* PERL_MAD */
1743 * Check the unary operators to ensure there's no ambiguity in how they're
1744 * used. An ambiguous piece of code would be:
1746 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1747 * the +5 is its argument.
1757 if (PL_oldoldbufptr != PL_last_uni)
1759 while (isSPACE(*PL_last_uni))
1762 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1764 if ((t = strchr(s, '(')) && t < PL_bufptr)
1767 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1768 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1769 (int)(s - PL_last_uni), PL_last_uni);
1773 * LOP : macro to build a list operator. Its behaviour has been replaced
1774 * with a subroutine, S_lop() for which LOP is just another name.
1777 #define LOP(f,x) return lop(f,x,s)
1781 * Build a list operator (or something that might be one). The rules:
1782 * - if we have a next token, then it's a list operator [why?]
1783 * - if the next thing is an opening paren, then it's a function
1784 * - else it's a list operator
1788 S_lop(pTHX_ I32 f, int x, char *s)
1792 PERL_ARGS_ASSERT_LOP;
1798 PL_last_lop = PL_oldbufptr;
1799 PL_last_lop_op = (OPCODE)f;
1802 return REPORT(LSTOP);
1805 return REPORT(LSTOP);
1808 return REPORT(FUNC);
1811 return REPORT(FUNC);
1813 return REPORT(LSTOP);
1819 * Sets up for an eventual force_next(). start_force(0) basically does
1820 * an unshift, while start_force(-1) does a push. yylex removes items
1825 S_start_force(pTHX_ int where)
1829 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1830 where = PL_lasttoke;
1831 assert(PL_curforce < 0 || PL_curforce == where);
1832 if (PL_curforce != where) {
1833 for (i = PL_lasttoke; i > where; --i) {
1834 PL_nexttoke[i] = PL_nexttoke[i-1];
1838 if (PL_curforce < 0) /* in case of duplicate start_force() */
1839 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1840 PL_curforce = where;
1843 curmad('^', newSVpvs(""));
1844 CURMAD('_', PL_nextwhite);
1849 S_curmad(pTHX_ char slot, SV *sv)
1855 if (PL_curforce < 0)
1856 where = &PL_thismad;
1858 where = &PL_nexttoke[PL_curforce].next_mad;
1864 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1866 else if (PL_encoding) {
1867 sv_recode_to_utf8(sv, PL_encoding);
1872 /* keep a slot open for the head of the list? */
1873 if (slot != '_' && *where && (*where)->mad_key == '^') {
1874 (*where)->mad_key = slot;
1875 sv_free(MUTABLE_SV(((*where)->mad_val)));
1876 (*where)->mad_val = (void*)sv;
1879 addmad(newMADsv(slot, sv), where, 0);
1882 # define start_force(where) NOOP
1883 # define curmad(slot, sv) NOOP
1888 * When the lexer realizes it knows the next token (for instance,
1889 * it is reordering tokens for the parser) then it can call S_force_next
1890 * to know what token to return the next time the lexer is called. Caller
1891 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1892 * and possibly PL_expect to ensure the lexer handles the token correctly.
1896 S_force_next(pTHX_ I32 type)
1901 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1902 tokereport(type, &NEXTVAL_NEXTTOKE);
1906 if (PL_curforce < 0)
1907 start_force(PL_lasttoke);
1908 PL_nexttoke[PL_curforce].next_type = type;
1909 if (PL_lex_state != LEX_KNOWNEXT)
1910 PL_lex_defer = PL_lex_state;
1911 PL_lex_state = LEX_KNOWNEXT;
1912 PL_lex_expect = PL_expect;
1915 PL_nexttype[PL_nexttoke] = type;
1917 if (PL_lex_state != LEX_KNOWNEXT) {
1918 PL_lex_defer = PL_lex_state;
1919 PL_lex_expect = PL_expect;
1920 PL_lex_state = LEX_KNOWNEXT;
1928 if (PL_parser->yychar != YYEMPTY) {
1930 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1931 force_next(PL_parser->yychar);
1932 PL_parser->yychar = YYEMPTY;
1937 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1940 SV * const sv = newSVpvn_utf8(start, len,
1943 && !is_ascii_string((const U8*)start, len)
1944 && is_utf8_string((const U8*)start, len));
1950 * When the lexer knows the next thing is a word (for instance, it has
1951 * just seen -> and it knows that the next char is a word char, then
1952 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1956 * char *start : buffer position (must be within PL_linestr)
1957 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
1958 * int check_keyword : if true, Perl checks to make sure the word isn't
1959 * a keyword (do this if the word is a label, e.g. goto FOO)
1960 * int allow_pack : if true, : characters will also be allowed (require,
1961 * use, etc. do this)
1962 * int allow_initial_tick : used by the "sub" lexer only.
1966 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1972 PERL_ARGS_ASSERT_FORCE_WORD;
1974 start = SKIPSPACE1(start);
1976 if (isIDFIRST_lazy_if(s,UTF) ||
1977 (allow_pack && *s == ':') ||
1978 (allow_initial_tick && *s == '\'') )
1980 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1981 if (check_keyword && keyword(PL_tokenbuf, len, 0))
1983 start_force(PL_curforce);
1985 curmad('X', newSVpvn(start,s-start));
1986 if (token == METHOD) {
1991 PL_expect = XOPERATOR;
1995 curmad('g', newSVpvs( "forced" ));
1996 NEXTVAL_NEXTTOKE.opval
1997 = (OP*)newSVOP(OP_CONST,0,
1998 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1999 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2007 * Called when the lexer wants $foo *foo &foo etc, but the program
2008 * text only contains the "foo" portion. The first argument is a pointer
2009 * to the "foo", and the second argument is the type symbol to prefix.
2010 * Forces the next token to be a "WORD".
2011 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2015 S_force_ident(pTHX_ register const char *s, int kind)
2019 PERL_ARGS_ASSERT_FORCE_IDENT;
2022 const STRLEN len = strlen(s);
2023 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2024 start_force(PL_curforce);
2025 NEXTVAL_NEXTTOKE.opval = o;
2028 o->op_private = OPpCONST_ENTERED;
2029 /* XXX see note in pp_entereval() for why we forgo typo
2030 warnings if the symbol must be introduced in an eval.
2032 gv_fetchpvn_flags(s, len,
2033 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2035 kind == '$' ? SVt_PV :
2036 kind == '@' ? SVt_PVAV :
2037 kind == '%' ? SVt_PVHV :
2045 Perl_str_to_version(pTHX_ SV *sv)
2050 const char *start = SvPV_const(sv,len);
2051 const char * const end = start + len;
2052 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2054 PERL_ARGS_ASSERT_STR_TO_VERSION;
2056 while (start < end) {
2060 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2065 retval += ((NV)n)/nshift;
2074 * Forces the next token to be a version number.
2075 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2076 * and if "guessing" is TRUE, then no new token is created (and the caller
2077 * must use an alternative parsing method).
2081 S_force_version(pTHX_ char *s, int guessing)
2087 I32 startoff = s - SvPVX(PL_linestr);
2090 PERL_ARGS_ASSERT_FORCE_VERSION;
2098 while (isDIGIT(*d) || *d == '_' || *d == '.')
2102 start_force(PL_curforce);
2103 curmad('X', newSVpvn(s,d-s));
2106 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2108 #ifdef USE_LOCALE_NUMERIC
2109 char *loc = setlocale(LC_NUMERIC, "C");
2111 s = scan_num(s, &pl_yylval);
2112 #ifdef USE_LOCALE_NUMERIC
2113 setlocale(LC_NUMERIC, loc);
2115 version = pl_yylval.opval;
2116 ver = cSVOPx(version)->op_sv;
2117 if (SvPOK(ver) && !SvNIOK(ver)) {
2118 SvUPGRADE(ver, SVt_PVNV);
2119 SvNV_set(ver, str_to_version(ver));
2120 SvNOK_on(ver); /* hint that it is a version */
2123 else if (guessing) {
2126 sv_free(PL_nextwhite); /* let next token collect whitespace */
2128 s = SvPVX(PL_linestr) + startoff;
2136 if (PL_madskills && !version) {
2137 sv_free(PL_nextwhite); /* let next token collect whitespace */
2139 s = SvPVX(PL_linestr) + startoff;
2142 /* NOTE: The parser sees the package name and the VERSION swapped */
2143 start_force(PL_curforce);
2144 NEXTVAL_NEXTTOKE.opval = version;
2151 * S_force_strict_version
2152 * Forces the next token to be a version number using strict syntax rules.
2156 S_force_strict_version(pTHX_ char *s)
2161 I32 startoff = s - SvPVX(PL_linestr);
2163 const char *errstr = NULL;
2165 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2167 while (isSPACE(*s)) /* leading whitespace */
2170 if (is_STRICT_VERSION(s,&errstr)) {
2172 s = (char *)scan_version(s, ver, 0);
2173 version = newSVOP(OP_CONST, 0, ver);
2175 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2176 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2180 yyerror(errstr); /* version required */
2185 if (PL_madskills && !version) {
2186 sv_free(PL_nextwhite); /* let next token collect whitespace */
2188 s = SvPVX(PL_linestr) + startoff;
2191 /* NOTE: The parser sees the package name and the VERSION swapped */
2192 start_force(PL_curforce);
2193 NEXTVAL_NEXTTOKE.opval = version;
2201 * Tokenize a quoted string passed in as an SV. It finds the next
2202 * chunk, up to end of string or a backslash. It may make a new
2203 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2208 S_tokeq(pTHX_ SV *sv)
2212 register char *send;
2217 PERL_ARGS_ASSERT_TOKEQ;
2222 s = SvPV_force(sv, len);
2223 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2226 while (s < send && *s != '\\')
2231 if ( PL_hints & HINT_NEW_STRING ) {
2232 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2236 if (s + 1 < send && (s[1] == '\\'))
2237 s++; /* all that, just for this */
2242 SvCUR_set(sv, d - SvPVX_const(sv));
2244 if ( PL_hints & HINT_NEW_STRING )
2245 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2250 * Now come three functions related to double-quote context,
2251 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2252 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2253 * interact with PL_lex_state, and create fake ( ... ) argument lists
2254 * to handle functions and concatenation.
2255 * They assume that whoever calls them will be setting up a fake
2256 * join call, because each subthing puts a ',' after it. This lets
2259 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2261 * (I'm not sure whether the spurious commas at the end of lcfirst's
2262 * arguments and join's arguments are created or not).
2267 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2269 * Pattern matching will set PL_lex_op to the pattern-matching op to
2270 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2272 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2274 * Everything else becomes a FUNC.
2276 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2277 * had an OP_CONST or OP_READLINE). This just sets us up for a
2278 * call to S_sublex_push().
2282 S_sublex_start(pTHX)
2285 register const I32 op_type = pl_yylval.ival;
2287 if (op_type == OP_NULL) {
2288 pl_yylval.opval = PL_lex_op;
2292 if (op_type == OP_CONST || op_type == OP_READLINE) {
2293 SV *sv = tokeq(PL_lex_stuff);
2295 if (SvTYPE(sv) == SVt_PVIV) {
2296 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2298 const char * const p = SvPV_const(sv, len);
2299 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2303 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2304 PL_lex_stuff = NULL;
2305 /* Allow <FH> // "foo" */
2306 if (op_type == OP_READLINE)
2307 PL_expect = XTERMORDORDOR;
2310 else if (op_type == OP_BACKTICK && PL_lex_op) {
2311 /* readpipe() vas overriden */
2312 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2313 pl_yylval.opval = PL_lex_op;
2315 PL_lex_stuff = NULL;
2319 PL_sublex_info.super_state = PL_lex_state;
2320 PL_sublex_info.sub_inwhat = (U16)op_type;
2321 PL_sublex_info.sub_op = PL_lex_op;
2322 PL_lex_state = LEX_INTERPPUSH;
2326 pl_yylval.opval = PL_lex_op;
2336 * Create a new scope to save the lexing state. The scope will be
2337 * ended in S_sublex_done. Returns a '(', starting the function arguments
2338 * to the uc, lc, etc. found before.
2339 * Sets PL_lex_state to LEX_INTERPCONCAT.
2348 PL_lex_state = PL_sublex_info.super_state;
2349 SAVEBOOL(PL_lex_dojoin);
2350 SAVEI32(PL_lex_brackets);
2351 SAVEI32(PL_lex_casemods);
2352 SAVEI32(PL_lex_starts);
2353 SAVEI8(PL_lex_state);
2354 SAVEVPTR(PL_lex_inpat);
2355 SAVEI16(PL_lex_inwhat);
2356 SAVECOPLINE(PL_curcop);
2357 SAVEPPTR(PL_bufptr);
2358 SAVEPPTR(PL_bufend);
2359 SAVEPPTR(PL_oldbufptr);
2360 SAVEPPTR(PL_oldoldbufptr);
2361 SAVEPPTR(PL_last_lop);
2362 SAVEPPTR(PL_last_uni);
2363 SAVEPPTR(PL_linestart);
2364 SAVESPTR(PL_linestr);
2365 SAVEGENERICPV(PL_lex_brackstack);
2366 SAVEGENERICPV(PL_lex_casestack);
2368 PL_linestr = PL_lex_stuff;
2369 PL_lex_stuff = NULL;
2371 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2372 = SvPVX(PL_linestr);
2373 PL_bufend += SvCUR(PL_linestr);
2374 PL_last_lop = PL_last_uni = NULL;
2375 SAVEFREESV(PL_linestr);
2377 PL_lex_dojoin = FALSE;
2378 PL_lex_brackets = 0;
2379 Newx(PL_lex_brackstack, 120, char);
2380 Newx(PL_lex_casestack, 12, char);
2381 PL_lex_casemods = 0;
2382 *PL_lex_casestack = '\0';
2384 PL_lex_state = LEX_INTERPCONCAT;
2385 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2387 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2388 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2389 PL_lex_inpat = PL_sublex_info.sub_op;
2391 PL_lex_inpat = NULL;
2398 * Restores lexer state after a S_sublex_push.
2405 if (!PL_lex_starts++) {
2406 SV * const sv = newSVpvs("");
2407 if (SvUTF8(PL_linestr))
2409 PL_expect = XOPERATOR;
2410 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2414 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2415 PL_lex_state = LEX_INTERPCASEMOD;
2419 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2420 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2421 PL_linestr = PL_lex_repl;
2423 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2424 PL_bufend += SvCUR(PL_linestr);
2425 PL_last_lop = PL_last_uni = NULL;
2426 SAVEFREESV(PL_linestr);
2427 PL_lex_dojoin = FALSE;
2428 PL_lex_brackets = 0;
2429 PL_lex_casemods = 0;
2430 *PL_lex_casestack = '\0';
2432 if (SvEVALED(PL_lex_repl)) {
2433 PL_lex_state = LEX_INTERPNORMAL;
2435 /* we don't clear PL_lex_repl here, so that we can check later
2436 whether this is an evalled subst; that means we rely on the
2437 logic to ensure sublex_done() is called again only via the
2438 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2441 PL_lex_state = LEX_INTERPCONCAT;
2451 PL_endwhite = newSVpvs("");
2452 sv_catsv(PL_endwhite, PL_thiswhite);
2456 sv_setpvs(PL_thistoken,"");
2458 PL_realtokenstart = -1;
2462 PL_bufend = SvPVX(PL_linestr);
2463 PL_bufend += SvCUR(PL_linestr);
2464 PL_expect = XOPERATOR;
2465 PL_sublex_info.sub_inwhat = 0;
2473 Extracts a pattern, double-quoted string, or transliteration. This
2476 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2477 processing a pattern (PL_lex_inpat is true), a transliteration
2478 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2480 Returns a pointer to the character scanned up to. If this is
2481 advanced from the start pointer supplied (i.e. if anything was
2482 successfully parsed), will leave an OP for the substring scanned
2483 in pl_yylval. Caller must intuit reason for not parsing further
2484 by looking at the next characters herself.
2488 constants: \N{NAME} only
2489 case and quoting: \U \Q \E
2490 stops on @ and $, but not for $ as tail anchor
2492 In transliterations:
2493 characters are VERY literal, except for - not at the start or end
2494 of the string, which indicates a range. If the range is in bytes,
2495 scan_const expands the range to the full set of intermediate
2496 characters. If the range is in utf8, the hyphen is replaced with
2497 a certain range mark which will be handled by pmtrans() in op.c.
2499 In double-quoted strings:
2501 double-quoted style: \r and \n
2502 constants: \x31, etc.
2503 deprecated backrefs: \1 (in substitution replacements)
2504 case and quoting: \U \Q \E
2507 scan_const does *not* construct ops to handle interpolated strings.
2508 It stops processing as soon as it finds an embedded $ or @ variable
2509 and leaves it to the caller to work out what's going on.
2511 embedded arrays (whether in pattern or not) could be:
2512 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2514 $ in double-quoted strings must be the symbol of an embedded scalar.
2516 $ in pattern could be $foo or could be tail anchor. Assumption:
2517 it's a tail anchor if $ is the last thing in the string, or if it's
2518 followed by one of "()| \r\n\t"
2520 \1 (backreferences) are turned into $1
2522 The structure of the code is
2523 while (there's a character to process) {
2524 handle transliteration ranges
2525 skip regexp comments /(?#comment)/ and codes /(?{code})/
2526 skip #-initiated comments in //x patterns
2527 check for embedded arrays
2528 check for embedded scalars
2530 deprecate \1 in substitution replacements
2531 handle string-changing backslashes \l \U \Q \E, etc.
2532 switch (what was escaped) {
2533 handle \- in a transliteration (becomes a literal -)
2534 if a pattern and not \N{, go treat as regular character
2535 handle \132 (octal characters)
2536 handle \x15 and \x{1234} (hex characters)
2537 handle \N{name} (named characters, also \N{3,5} in a pattern)
2538 handle \cV (control characters)
2539 handle printf-style backslashes (\f, \r, \n, etc)
2542 } (end if backslash)
2543 handle regular character
2544 } (end while character to read)
2549 S_scan_const(pTHX_ char *start)
2552 register char *send = PL_bufend; /* end of the constant */
2553 SV *sv = newSV(send - start); /* sv for the constant. See
2554 note below on sizing. */
2555 register char *s = start; /* start of the constant */
2556 register char *d = SvPVX(sv); /* destination for copies */
2557 bool dorange = FALSE; /* are we in a translit range? */
2558 bool didrange = FALSE; /* did we just finish a range? */
2559 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
2560 I32 this_utf8 = UTF; /* Is the source string assumed
2561 to be UTF8? But, this can
2562 show as true when the source
2563 isn't utf8, as for example
2564 when it is entirely composed
2567 /* Note on sizing: The scanned constant is placed into sv, which is
2568 * initialized by newSV() assuming one byte of output for every byte of
2569 * input. This routine expects newSV() to allocate an extra byte for a
2570 * trailing NUL, which this routine will append if it gets to the end of
2571 * the input. There may be more bytes of input than output (eg., \N{LATIN
2572 * CAPITAL LETTER A}), or more output than input if the constant ends up
2573 * recoded to utf8, but each time a construct is found that might increase
2574 * the needed size, SvGROW() is called. Its size parameter each time is
2575 * based on the best guess estimate at the time, namely the length used so
2576 * far, plus the length the current construct will occupy, plus room for
2577 * the trailing NUL, plus one byte for every input byte still unscanned */
2581 UV literal_endpoint = 0;
2582 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2585 PERL_ARGS_ASSERT_SCAN_CONST;
2587 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2588 /* If we are doing a trans and we know we want UTF8 set expectation */
2589 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2590 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2594 while (s < send || dorange) {
2596 /* get transliterations out of the way (they're most literal) */
2597 if (PL_lex_inwhat == OP_TRANS) {
2598 /* expand a range A-Z to the full set of characters. AIE! */
2600 I32 i; /* current expanded character */
2601 I32 min; /* first character in range */
2602 I32 max; /* last character in range */
2613 char * const c = (char*)utf8_hop((U8*)d, -1);
2617 *c = (char)UTF_TO_NATIVE(0xff);
2618 /* mark the range as done, and continue */
2624 i = d - SvPVX_const(sv); /* remember current offset */
2627 SvLEN(sv) + (has_utf8 ?
2628 (512 - UTF_CONTINUATION_MARK +
2631 /* How many two-byte within 0..255: 128 in UTF-8,
2632 * 96 in UTF-8-mod. */
2634 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2636 d = SvPVX(sv) + i; /* refresh d after realloc */
2640 for (j = 0; j <= 1; j++) {
2641 char * const c = (char*)utf8_hop((U8*)d, -1);
2642 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2648 max = (U8)0xff; /* only to \xff */
2649 uvmax = uv; /* \x{100} to uvmax */
2651 d = c; /* eat endpoint chars */
2656 d -= 2; /* eat the first char and the - */
2657 min = (U8)*d; /* first char in range */
2658 max = (U8)d[1]; /* last char in range */
2665 "Invalid range \"%c-%c\" in transliteration operator",
2666 (char)min, (char)max);
2670 if (literal_endpoint == 2 &&
2671 ((isLOWER(min) && isLOWER(max)) ||
2672 (isUPPER(min) && isUPPER(max)))) {
2674 for (i = min; i <= max; i++)
2676 *d++ = NATIVE_TO_NEED(has_utf8,i);
2678 for (i = min; i <= max; i++)
2680 *d++ = NATIVE_TO_NEED(has_utf8,i);
2685 for (i = min; i <= max; i++)
2688 const U8 ch = (U8)NATIVE_TO_UTF(i);
2689 if (UNI_IS_INVARIANT(ch))
2692 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2693 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2702 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2704 *d++ = (char)UTF_TO_NATIVE(0xff);
2706 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2710 /* mark the range as done, and continue */
2714 literal_endpoint = 0;
2719 /* range begins (ignore - as first or last char) */
2720 else if (*s == '-' && s+1 < send && s != start) {
2722 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2729 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2739 literal_endpoint = 0;
2740 native_range = TRUE;
2745 /* if we get here, we're not doing a transliteration */
2747 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2748 except for the last char, which will be done separately. */
2749 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2751 while (s+1 < send && *s != ')')
2752 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2754 else if (s[2] == '{' /* This should match regcomp.c */
2755 || (s[2] == '?' && s[3] == '{'))
2758 char *regparse = s + (s[2] == '{' ? 3 : 4);
2761 while (count && (c = *regparse)) {
2762 if (c == '\\' && regparse[1])
2770 if (*regparse != ')')
2771 regparse--; /* Leave one char for continuation. */
2772 while (s < regparse)
2773 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2777 /* likewise skip #-initiated comments in //x patterns */
2778 else if (*s == '#' && PL_lex_inpat &&
2779 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
2780 while (s+1 < send && *s != '\n')
2781 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2784 /* check for embedded arrays
2785 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2787 else if (*s == '@' && s[1]) {
2788 if (isALNUM_lazy_if(s+1,UTF))
2790 if (strchr(":'{$", s[1]))
2792 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2793 break; /* in regexp, neither @+ nor @- are interpolated */
2796 /* check for embedded scalars. only stop if we're sure it's a
2799 else if (*s == '$') {
2800 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2802 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2804 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2805 "Possible unintended interpolation of $\\ in regex");
2807 break; /* in regexp, $ might be tail anchor */
2811 /* End of else if chain - OP_TRANS rejoin rest */
2814 if (*s == '\\' && s+1 < send) {
2815 char* e; /* Can be used for ending '}', etc. */
2819 /* warn on \1 - \9 in substitution replacements, but note that \11
2820 * is an octal; and \19 is \1 followed by '9' */
2821 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2822 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2824 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2829 /* string-change backslash escapes */
2830 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2834 /* In a pattern, process \N, but skip any other backslash escapes.
2835 * This is because we don't want to translate an escape sequence
2836 * into a meta symbol and have the regex compiler use the meta
2837 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2838 * in spite of this, we do have to process \N here while the proper
2839 * charnames handler is in scope. See bugs #56444 and #62056.
2840 * There is a complication because \N in a pattern may also stand
2841 * for 'match a non-nl', and not mean a charname, in which case its
2842 * processing should be deferred to the regex compiler. To be a
2843 * charname it must be followed immediately by a '{', and not look
2844 * like \N followed by a curly quantifier, i.e., not something like
2845 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2847 else if (PL_lex_inpat
2850 || regcurly(s + 1)))
2852 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2853 goto default_action;
2858 /* quoted - in transliterations */
2860 if (PL_lex_inwhat == OP_TRANS) {
2867 if ((isALPHA(*s) || isDIGIT(*s)))
2868 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2869 "Unrecognized escape \\%c passed through",
2871 /* default action is to copy the quoted character */
2872 goto default_action;
2875 /* eg. \132 indicates the octal constant 0132 */
2876 case '0': case '1': case '2': case '3':
2877 case '4': case '5': case '6': case '7':
2881 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2884 goto NUM_ESCAPE_INSERT;
2886 /* eg. \o{24} indicates the octal constant \024 */
2892 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2898 goto NUM_ESCAPE_INSERT;
2901 /* eg. \x24 indicates the hex constant 0x24 */
2905 char* const e = strchr(s, '}');
2906 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2907 PERL_SCAN_DISALLOW_PREFIX;
2912 yyerror("Missing right brace on \\x{}");
2916 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2922 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2923 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2929 /* Insert oct or hex escaped character. There will always be
2930 * enough room in sv since such escapes will be longer than any
2931 * UTF-8 sequence they can end up as, except if they force us
2932 * to recode the rest of the string into utf8 */
2934 /* Here uv is the ordinal of the next character being added in
2935 * unicode (converted from native). */
2936 if (!UNI_IS_INVARIANT(uv)) {
2937 if (!has_utf8 && uv > 255) {
2938 /* Might need to recode whatever we have accumulated so
2939 * far if it contains any chars variant in utf8 or
2942 SvCUR_set(sv, d - SvPVX_const(sv));
2945 /* See Note on sizing above. */
2946 sv_utf8_upgrade_flags_grow(sv,
2947 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2948 UNISKIP(uv) + (STRLEN)(send - s) + 1);
2949 d = SvPVX(sv) + SvCUR(sv);
2954 d = (char*)uvuni_to_utf8((U8*)d, uv);
2955 if (PL_lex_inwhat == OP_TRANS &&
2956 PL_sublex_info.sub_op) {
2957 PL_sublex_info.sub_op->op_private |=
2958 (PL_lex_repl ? OPpTRANS_FROM_UTF
2962 if (uv > 255 && !dorange)
2963 native_range = FALSE;
2976 /* In a non-pattern \N must be a named character, like \N{LATIN
2977 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
2978 * mean to match a non-newline. For non-patterns, named
2979 * characters are converted to their string equivalents. In
2980 * patterns, named characters are not converted to their
2981 * ultimate forms for the same reasons that other escapes
2982 * aren't. Instead, they are converted to the \N{U+...} form
2983 * to get the value from the charnames that is in effect right
2984 * now, while preserving the fact that it was a named character
2985 * so that the regex compiler knows this */
2987 /* This section of code doesn't generally use the
2988 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
2989 * a close examination of this macro and determined it is a
2990 * no-op except on utfebcdic variant characters. Every
2991 * character generated by this that would normally need to be
2992 * enclosed by this macro is invariant, so the macro is not
2993 * needed, and would complicate use of copy(). There are other
2994 * parts of this file where the macro is used inconsistently,
2995 * but are saved by it being a no-op */
2997 /* The structure of this section of code (besides checking for
2998 * errors and upgrading to utf8) is:
2999 * Further disambiguate between the two meanings of \N, and if
3000 * not a charname, go process it elsewhere
3001 * If of form \N{U+...}, pass it through if a pattern;
3002 * otherwise convert to utf8
3003 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3004 * pattern; otherwise convert to utf8 */
3006 /* Here, s points to the 'N'; the test below is guaranteed to
3007 * succeed if we are being called on a pattern as we already
3008 * know from a test above that the next character is a '{'.
3009 * On a non-pattern \N must mean 'named sequence, which
3010 * requires braces */
3013 yyerror("Missing braces on \\N{}");
3018 /* If there is no matching '}', it is an error. */
3019 if (! (e = strchr(s, '}'))) {
3020 if (! PL_lex_inpat) {
3021 yyerror("Missing right brace on \\N{}");
3023 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3028 /* Here it looks like a named character */
3032 /* XXX This block is temporary code. \N{} implies that the
3033 * pattern is to have Unicode semantics, and therefore
3034 * currently has to be encoded in utf8. By putting it in
3035 * utf8 now, we save a whole pass in the regular expression
3036 * compiler. Once that code is changed so Unicode
3037 * semantics doesn't necessarily have to be in utf8, this
3038 * block should be removed */
3040 SvCUR_set(sv, d - SvPVX_const(sv));
3043 /* See Note on sizing above. */
3044 sv_utf8_upgrade_flags_grow(sv,
3045 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3046 /* 5 = '\N{' + cur char + NUL */
3047 (STRLEN)(send - s) + 5);
3048 d = SvPVX(sv) + SvCUR(sv);
3053 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3054 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3055 | PERL_SCAN_DISALLOW_PREFIX;
3058 /* For \N{U+...}, the '...' is a unicode value even on
3059 * EBCDIC machines */
3060 s += 2; /* Skip to next char after the 'U+' */
3062 uv = grok_hex(s, &len, &flags, NULL);
3063 if (len == 0 || len != (STRLEN)(e - s)) {
3064 yyerror("Invalid hexadecimal number in \\N{U+...}");
3071 /* Pass through to the regex compiler unchanged. The
3072 * reason we evaluated the number above is to make sure
3073 * there wasn't a syntax error. */
3074 s -= 5; /* Include the '\N{U+' */
3075 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3078 else { /* Not a pattern: convert the hex to string */
3080 /* If destination is not in utf8, unconditionally
3081 * recode it to be so. This is because \N{} implies
3082 * Unicode semantics, and scalars have to be in utf8
3083 * to guarantee those semantics */
3085 SvCUR_set(sv, d - SvPVX_const(sv));
3088 /* See Note on sizing above. */
3089 sv_utf8_upgrade_flags_grow(
3091 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3092 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3093 d = SvPVX(sv) + SvCUR(sv);
3097 /* Add the string to the output */
3098 if (UNI_IS_INVARIANT(uv)) {
3101 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3104 else { /* Here is \N{NAME} but not \N{U+...}. */
3106 SV *res; /* result from charnames */
3107 const char *str; /* the string in 'res' */
3108 STRLEN len; /* its length */
3110 /* Get the value for NAME */
3111 res = newSVpvn(s, e - s);
3112 res = new_constant( NULL, 0, "charnames",
3113 /* includes all of: \N{...} */
3114 res, NULL, s - 3, e - s + 4 );
3116 /* Most likely res will be in utf8 already since the
3117 * standard charnames uses pack U, but a custom translator
3118 * can leave it otherwise, so make sure. XXX This can be
3119 * revisited to not have charnames use utf8 for characters
3120 * that don't need it when regexes don't have to be in utf8
3121 * for Unicode semantics. If doing so, remember EBCDIC */
3122 sv_utf8_upgrade(res);
3123 str = SvPV_const(res, len);
3125 /* Don't accept malformed input */
3126 if (! is_utf8_string((U8 *) str, len)) {
3127 yyerror("Malformed UTF-8 returned by \\N");
3129 else if (PL_lex_inpat) {
3131 if (! len) { /* The name resolved to an empty string */
3132 Copy("\\N{}", d, 4, char);
3136 /* In order to not lose information for the regex
3137 * compiler, pass the result in the specially made
3138 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3139 * the code points in hex of each character
3140 * returned by charnames */
3142 const char *str_end = str + len;
3143 STRLEN char_length; /* cur char's byte length */
3144 STRLEN output_length; /* and the number of bytes
3145 after this is translated
3147 const STRLEN off = d - SvPVX_const(sv);
3149 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3150 * max('U+', '.'); and 1 for NUL */
3151 char hex_string[2 * UTF8_MAXBYTES + 5];
3153 /* Get the first character of the result. */
3154 U32 uv = utf8n_to_uvuni((U8 *) str,
3159 /* The call to is_utf8_string() above hopefully
3160 * guarantees that there won't be an error. But
3161 * it's easy here to make sure. The function just
3162 * above warns and returns 0 if invalid utf8, but
3163 * it can also return 0 if the input is validly a
3164 * NUL. Disambiguate */
3165 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3166 uv = UNICODE_REPLACEMENT;
3169 /* Convert first code point to hex, including the
3170 * boiler plate before it */
3171 sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3172 output_length = strlen(hex_string);
3174 /* Make sure there is enough space to hold it */
3175 d = off + SvGROW(sv, off
3177 + (STRLEN)(send - e)
3178 + 2); /* '}' + NUL */
3180 Copy(hex_string, d, output_length, char);
3183 /* For each subsequent character, append dot and
3184 * its ordinal in hex */
3185 while ((str += char_length) < str_end) {
3186 const STRLEN off = d - SvPVX_const(sv);
3187 U32 uv = utf8n_to_uvuni((U8 *) str,
3191 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3192 uv = UNICODE_REPLACEMENT;
3195 sprintf(hex_string, ".%X", (unsigned int) uv);
3196 output_length = strlen(hex_string);
3198 d = off + SvGROW(sv, off
3200 + (STRLEN)(send - e)
3201 + 2); /* '}' + NUL */
3202 Copy(hex_string, d, output_length, char);
3206 *d++ = '}'; /* Done. Add the trailing brace */
3209 else { /* Here, not in a pattern. Convert the name to a
3212 /* If destination is not in utf8, unconditionally
3213 * recode it to be so. This is because \N{} implies
3214 * Unicode semantics, and scalars have to be in utf8
3215 * to guarantee those semantics */
3217 SvCUR_set(sv, d - SvPVX_const(sv));
3220 /* See Note on sizing above. */
3221 sv_utf8_upgrade_flags_grow(sv,
3222 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3223 len + (STRLEN)(send - s) + 1);
3224 d = SvPVX(sv) + SvCUR(sv);
3226 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3228 /* See Note on sizing above. (NOTE: SvCUR() is not
3229 * set correctly here). */
3230 const STRLEN off = d - SvPVX_const(sv);
3231 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3233 Copy(str, d, len, char);
3238 /* Deprecate non-approved name syntax */
3239 if (ckWARN_d(WARN_DEPRECATED)) {
3240 bool problematic = FALSE;
3243 /* For non-ut8 input, look to see that the first
3244 * character is an alpha, then loop through the rest
3245 * checking that each is a continuation */
3247 if (! isALPHAU(*i)) problematic = TRUE;
3248 else for (i = s + 1; i < e; i++) {
3249 if (isCHARNAME_CONT(*i)) continue;
3255 /* Similarly for utf8. For invariants can check
3256 * directly. We accept anything above the latin1
3257 * range because it is immaterial to Perl if it is
3258 * correct or not, and is expensive to check. But
3259 * it is fairly easy in the latin1 range to convert
3260 * the variants into a single character and check
3262 if (UTF8_IS_INVARIANT(*i)) {
3263 if (! isALPHAU(*i)) problematic = TRUE;
3264 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3265 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3271 if (! problematic) for (i = s + UTF8SKIP(s);
3275 if (UTF8_IS_INVARIANT(*i)) {
3276 if (isCHARNAME_CONT(*i)) continue;
3277 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3279 } else if (isCHARNAME_CONT(
3281 UTF8_ACCUMULATE(*i, *(i+1)))))
3290 /* The e-i passed to the final %.*s makes sure that
3291 * should the trailing NUL be missing that this
3292 * print won't run off the end of the string */
3293 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3294 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3295 (int)(i - s + 1), s, (int)(e - i), i + 1);
3298 } /* End \N{NAME} */
3301 native_range = FALSE; /* \N{} is defined to be Unicode */
3303 s = e + 1; /* Point to just after the '}' */
3306 /* \c is a control character */
3310 *d++ = grok_bslash_c(*s++, 1);
3313 yyerror("Missing control char name in \\c");
3317 /* printf-style backslashes, formfeeds, newlines, etc */
3319 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3322 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3325 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3328 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3331 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3334 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3337 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3343 } /* end if (backslash) */
3350 /* If we started with encoded form, or already know we want it,
3351 then encode the next character */
3352 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3356 /* One might think that it is wasted effort in the case of the
3357 * source being utf8 (this_utf8 == TRUE) to take the next character
3358 * in the source, convert it to an unsigned value, and then convert
3359 * it back again. But the source has not been validated here. The
3360 * routine that does the conversion checks for errors like
3363 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3364 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3366 SvCUR_set(sv, d - SvPVX_const(sv));
3369 /* See Note on sizing above. */
3370 sv_utf8_upgrade_flags_grow(sv,
3371 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3372 need + (STRLEN)(send - s) + 1);
3373 d = SvPVX(sv) + SvCUR(sv);
3375 } else if (need > len) {
3376 /* encoded value larger than old, may need extra space (NOTE:
3377 * SvCUR() is not set correctly here). See Note on sizing
3379 const STRLEN off = d - SvPVX_const(sv);
3380 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3384 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3386 if (uv > 255 && !dorange)
3387 native_range = FALSE;
3391 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3393 } /* while loop to process each character */
3395 /* terminate the string and set up the sv */
3397 SvCUR_set(sv, d - SvPVX_const(sv));
3398 if (SvCUR(sv) >= SvLEN(sv))
3399 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3402 if (PL_encoding && !has_utf8) {
3403 sv_recode_to_utf8(sv, PL_encoding);
3409 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3410 PL_sublex_info.sub_op->op_private |=
3411 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3415 /* shrink the sv if we allocated more than we used */
3416 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3417 SvPV_shrink_to_cur(sv);
3420 /* return the substring (via pl_yylval) only if we parsed anything */
3421 if (s > PL_bufptr) {
3422 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3423 const char *const key = PL_lex_inpat ? "qr" : "q";
3424 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3428 if (PL_lex_inwhat == OP_TRANS) {
3431 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3439 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3442 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3449 * Returns TRUE if there's more to the expression (e.g., a subscript),
3452 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3454 * ->[ and ->{ return TRUE
3455 * { and [ outside a pattern are always subscripts, so return TRUE
3456 * if we're outside a pattern and it's not { or [, then return FALSE
3457 * if we're in a pattern and the first char is a {
3458 * {4,5} (any digits around the comma) returns FALSE
3459 * if we're in a pattern and the first char is a [
3461 * [SOMETHING] has a funky algorithm to decide whether it's a
3462 * character class or not. It has to deal with things like
3463 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3464 * anything else returns TRUE
3467 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3470 S_intuit_more(pTHX_ register char *s)
3474 PERL_ARGS_ASSERT_INTUIT_MORE;
3476 if (PL_lex_brackets)
3478 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3480 if (*s != '{' && *s != '[')
3485 /* In a pattern, so maybe we have {n,m}. */
3502 /* On the other hand, maybe we have a character class */
3505 if (*s == ']' || *s == '^')
3508 /* this is terrifying, and it works */
3509 int weight = 2; /* let's weigh the evidence */
3511 unsigned char un_char = 255, last_un_char;
3512 const char * const send = strchr(s,']');
3513 char tmpbuf[sizeof PL_tokenbuf * 4];
3515 if (!send) /* has to be an expression */
3518 Zero(seen,256,char);
3521 else if (isDIGIT(*s)) {
3523 if (isDIGIT(s[1]) && s[2] == ']')
3529 for (; s < send; s++) {
3530 last_un_char = un_char;
3531 un_char = (unsigned char)*s;
3536 weight -= seen[un_char] * 10;
3537 if (isALNUM_lazy_if(s+1,UTF)) {
3539 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3540 len = (int)strlen(tmpbuf);
3541 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3546 else if (*s == '$' && s[1] &&
3547 strchr("[#!%*<>()-=",s[1])) {
3548 if (/*{*/ strchr("])} =",s[2]))
3557 if (strchr("wds]",s[1]))
3559 else if (seen[(U8)'\''] || seen[(U8)'"'])
3561 else if (strchr("rnftbxcav",s[1]))
3563 else if (isDIGIT(s[1])) {
3565 while (s[1] && isDIGIT(s[1]))
3575 if (strchr("aA01! ",last_un_char))
3577 if (strchr("zZ79~",s[1]))
3579 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3580 weight -= 5; /* cope with negative subscript */
3583 if (!isALNUM(last_un_char)
3584 && !(last_un_char == '$' || last_un_char == '@'
3585 || last_un_char == '&')
3586 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3591 if (keyword(tmpbuf, d - tmpbuf, 0))
3594 if (un_char == last_un_char + 1)
3596 weight -= seen[un_char];
3601 if (weight >= 0) /* probably a character class */
3611 * Does all the checking to disambiguate
3613 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3614 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3616 * First argument is the stuff after the first token, e.g. "bar".
3618 * Not a method if bar is a filehandle.
3619 * Not a method if foo is a subroutine prototyped to take a filehandle.
3620 * Not a method if it's really "Foo $bar"
3621 * Method if it's "foo $bar"
3622 * Not a method if it's really "print foo $bar"
3623 * Method if it's really "foo package::" (interpreted as package->foo)
3624 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3625 * Not a method if bar is a filehandle or package, but is quoted with
3630 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3633 char *s = start + (*start == '$');
3634 char tmpbuf[sizeof PL_tokenbuf];
3641 PERL_ARGS_ASSERT_INTUIT_METHOD;
3644 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3648 const char *proto = SvPVX_const(cv);
3659 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3660 /* start is the beginning of the possible filehandle/object,
3661 * and s is the end of it
3662 * tmpbuf is a copy of it
3665 if (*start == '$') {
3666 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3667 isUPPER(*PL_tokenbuf))
3670 len = start - SvPVX(PL_linestr);
3674 start = SvPVX(PL_linestr) + len;
3678 return *s == '(' ? FUNCMETH : METHOD;
3680 if (!keyword(tmpbuf, len, 0)) {
3681 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3685 soff = s - SvPVX(PL_linestr);
3689 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3690 if (indirgv && GvCVu(indirgv))
3692 /* filehandle or package name makes it a method */
3693 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3695 soff = s - SvPVX(PL_linestr);
3698 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3699 return 0; /* no assumptions -- "=>" quotes bearword */
3701 start_force(PL_curforce);
3702 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3703 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3704 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3706 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3711 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3713 return *s == '(' ? FUNCMETH : METHOD;
3719 /* Encoded script support. filter_add() effectively inserts a
3720 * 'pre-processing' function into the current source input stream.
3721 * Note that the filter function only applies to the current source file
3722 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3724 * The datasv parameter (which may be NULL) can be used to pass
3725 * private data to this instance of the filter. The filter function
3726 * can recover the SV using the FILTER_DATA macro and use it to
3727 * store private buffers and state information.
3729 * The supplied datasv parameter is upgraded to a PVIO type
3730 * and the IoDIRP/IoANY field is used to store the function pointer,
3731 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3732 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3733 * private use must be set using malloc'd pointers.
3737 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3746 if (!PL_rsfp_filters)
3747 PL_rsfp_filters = newAV();
3750 SvUPGRADE(datasv, SVt_PVIO);
3751 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3752 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3753 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3754 FPTR2DPTR(void *, IoANY(datasv)),
3755 SvPV_nolen(datasv)));
3756 av_unshift(PL_rsfp_filters, 1);
3757 av_store(PL_rsfp_filters, 0, datasv) ;
3762 /* Delete most recently added instance of this filter function. */
3764 Perl_filter_del(pTHX_ filter_t funcp)
3769 PERL_ARGS_ASSERT_FILTER_DEL;
3772 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3773 FPTR2DPTR(void*, funcp)));
3775 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3777 /* if filter is on top of stack (usual case) just pop it off */
3778 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3779 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3780 sv_free(av_pop(PL_rsfp_filters));
3784 /* we need to search for the correct entry and clear it */
3785 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3789 /* Invoke the idxth filter function for the current rsfp. */
3790 /* maxlen 0 = read one text line */
3792 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3797 /* This API is bad. It should have been using unsigned int for maxlen.
3798 Not sure if we want to change the API, but if not we should sanity
3799 check the value here. */
3800 const unsigned int correct_length
3809 PERL_ARGS_ASSERT_FILTER_READ;
3811 if (!PL_parser || !PL_rsfp_filters)
3813 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
3814 /* Provide a default input filter to make life easy. */
3815 /* Note that we append to the line. This is handy. */
3816 DEBUG_P(PerlIO_printf(Perl_debug_log,
3817 "filter_read %d: from rsfp\n", idx));
3818 if (correct_length) {
3821 const int old_len = SvCUR(buf_sv);
3823 /* ensure buf_sv is large enough */
3824 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3825 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3826 correct_length)) <= 0) {
3827 if (PerlIO_error(PL_rsfp))
3828 return -1; /* error */
3830 return 0 ; /* end of file */
3832 SvCUR_set(buf_sv, old_len + len) ;
3833 SvPVX(buf_sv)[old_len + len] = '\0';
3836 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3837 if (PerlIO_error(PL_rsfp))
3838 return -1; /* error */
3840 return 0 ; /* end of file */
3843 return SvCUR(buf_sv);
3845 /* Skip this filter slot if filter has been deleted */
3846 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3847 DEBUG_P(PerlIO_printf(Perl_debug_log,
3848 "filter_read %d: skipped (filter deleted)\n",
3850 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3852 /* Get function pointer hidden within datasv */
3853 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3854 DEBUG_P(PerlIO_printf(Perl_debug_log,
3855 "filter_read %d: via function %p (%s)\n",
3856 idx, (void*)datasv, SvPV_nolen_const(datasv)));
3857 /* Call function. The function is expected to */
3858 /* call "FILTER_READ(idx+1, buf_sv)" first. */
3859 /* Return: <0:error, =0:eof, >0:not eof */
3860 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3864 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3868 PERL_ARGS_ASSERT_FILTER_GETS;
3870 #ifdef PERL_CR_FILTER
3871 if (!PL_rsfp_filters) {
3872 filter_add(S_cr_textfilter,NULL);
3875 if (PL_rsfp_filters) {
3877 SvCUR_set(sv, 0); /* start with empty line */
3878 if (FILTER_READ(0, sv, 0) > 0)
3879 return ( SvPVX(sv) ) ;
3884 return (sv_gets(sv, PL_rsfp, append));
3888 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3893 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3895 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3899 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3900 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3902 return GvHV(gv); /* Foo:: */
3905 /* use constant CLASS => 'MyClass' */
3906 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3907 if (gv && GvCV(gv)) {
3908 SV * const sv = cv_const_sv(GvCV(gv));
3910 pkgname = SvPV_const(sv, len);
3913 return gv_stashpvn(pkgname, len, 0);
3917 * S_readpipe_override
3918 * Check whether readpipe() is overriden, and generates the appropriate
3919 * optree, provided sublex_start() is called afterwards.
3922 S_readpipe_override(pTHX)
3925 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3926 pl_yylval.ival = OP_BACKTICK;
3928 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3930 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3931 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3932 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3934 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3935 append_elem(OP_LIST,
3936 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3937 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3944 * The intent of this yylex wrapper is to minimize the changes to the
3945 * tokener when we aren't interested in collecting madprops. It remains
3946 * to be seen how successful this strategy will be...
3953 char *s = PL_bufptr;
3955 /* make sure PL_thiswhite is initialized */
3959 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
3960 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
3961 return S_pending_ident(aTHX);
3963 /* previous token ate up our whitespace? */
3964 if (!PL_lasttoke && PL_nextwhite) {
3965 PL_thiswhite = PL_nextwhite;
3969 /* isolate the token, and figure out where it is without whitespace */
3970 PL_realtokenstart = -1;
3974 assert(PL_curforce < 0);
3976 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3977 if (!PL_thistoken) {
3978 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
3979 PL_thistoken = newSVpvs("");
3981 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
3982 PL_thistoken = newSVpvn(tstart, s - tstart);
3985 if (PL_thismad) /* install head */
3986 CURMAD('X', PL_thistoken);
3989 /* last whitespace of a sublex? */
3990 if (optype == ')' && PL_endwhite) {
3991 CURMAD('X', PL_endwhite);
3996 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
3997 if (!PL_thiswhite && !PL_endwhite && !optype) {
3998 sv_free(PL_thistoken);
4003 /* put off final whitespace till peg */
4004 if (optype == ';' && !PL_rsfp) {
4005 PL_nextwhite = PL_thiswhite;
4008 else if (PL_thisopen) {
4009 CURMAD('q', PL_thisopen);
4011 sv_free(PL_thistoken);
4015 /* Store actual token text as madprop X */
4016 CURMAD('X', PL_thistoken);
4020 /* add preceding whitespace as madprop _ */
4021 CURMAD('_', PL_thiswhite);
4025 /* add quoted material as madprop = */
4026 CURMAD('=', PL_thisstuff);
4030 /* add terminating quote as madprop Q */
4031 CURMAD('Q', PL_thisclose);
4035 /* special processing based on optype */
4039 /* opval doesn't need a TOKEN since it can already store mp */
4049 if (pl_yylval.opval)
4050 append_madprops(PL_thismad, pl_yylval.opval, 0);
4058 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4067 /* remember any fake bracket that lexer is about to discard */
4068 if (PL_lex_brackets == 1 &&
4069 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4072 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4075 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4076 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4079 break; /* don't bother looking for trailing comment */
4088 /* attach a trailing comment to its statement instead of next token */
4092 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4094 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4096 if (*s == '\n' || *s == '#') {
4097 while (s < PL_bufend && *s != '\n')
4101 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4102 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4119 /* Create new token struct. Note: opvals return early above. */
4120 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4127 S_tokenize_use(pTHX_ int is_use, char *s) {
4130 PERL_ARGS_ASSERT_TOKENIZE_USE;
4132 if (PL_expect != XSTATE)
4133 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4134 is_use ? "use" : "no"));
4136 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4137 s = force_version(s, TRUE);
4138 if (*s == ';' || *s == '}'
4139 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4140 start_force(PL_curforce);
4141 NEXTVAL_NEXTTOKE.opval = NULL;
4144 else if (*s == 'v') {
4145 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4146 s = force_version(s, FALSE);
4150 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4151 s = force_version(s, FALSE);
4153 pl_yylval.ival = is_use;
4157 static const char* const exp_name[] =
4158 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4159 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4166 Works out what to call the token just pulled out of the input
4167 stream. The yacc parser takes care of taking the ops we return and
4168 stitching them into a tree.
4174 if read an identifier
4175 if we're in a my declaration
4176 croak if they tried to say my($foo::bar)
4177 build the ops for a my() declaration
4178 if it's an access to a my() variable
4179 are we in a sort block?
4180 croak if my($a); $a <=> $b
4181 build ops for access to a my() variable
4182 if in a dq string, and they've said @foo and we can't find @foo
4184 build ops for a bareword
4185 if we already built the token before, use it.
4190 #pragma segment Perl_yylex
4196 register char *s = PL_bufptr;
4202 /* orig_keyword, gvp, and gv are initialized here because
4203 * jump to the label just_a_word_zero can bypass their
4204 * initialization later. */
4205 I32 orig_keyword = 0;
4210 SV* tmp = newSVpvs("");
4211 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4212 (IV)CopLINE(PL_curcop),
4213 lex_state_names[PL_lex_state],
4214 exp_name[PL_expect],
4215 pv_display(tmp, s, strlen(s), 0, 60));
4218 /* check if there's an identifier for us to look at */
4219 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4220 return REPORT(S_pending_ident(aTHX));
4222 /* no identifier pending identification */
4224 switch (PL_lex_state) {
4226 case LEX_NORMAL: /* Some compilers will produce faster */
4227 case LEX_INTERPNORMAL: /* code if we comment these out. */
4231 /* when we've already built the next token, just pull it out of the queue */
4235 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4237 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4238 PL_nexttoke[PL_lasttoke].next_mad = 0;
4239 if (PL_thismad && PL_thismad->mad_key == '_') {
4240 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4241 PL_thismad->mad_val = 0;
4242 mad_free(PL_thismad);
4247 PL_lex_state = PL_lex_defer;
4248 PL_expect = PL_lex_expect;
4249 PL_lex_defer = LEX_NORMAL;
4250 if (!PL_nexttoke[PL_lasttoke].next_type)
4255 pl_yylval = PL_nextval[PL_nexttoke];
4257 PL_lex_state = PL_lex_defer;
4258 PL_expect = PL_lex_expect;
4259 PL_lex_defer = LEX_NORMAL;
4263 /* FIXME - can these be merged? */
4264 return(PL_nexttoke[PL_lasttoke].next_type);
4266 return REPORT(PL_nexttype[PL_nexttoke]);
4269 /* interpolated case modifiers like \L \U, including \Q and \E.
4270 when we get here, PL_bufptr is at the \
4272 case LEX_INTERPCASEMOD:
4274 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4275 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4277 /* handle \E or end of string */
4278 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4280 if (PL_lex_casemods) {
4281 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4282 PL_lex_casestack[PL_lex_casemods] = '\0';
4284 if (PL_bufptr != PL_bufend
4285 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4287 PL_lex_state = LEX_INTERPCONCAT;
4290 PL_thistoken = newSVpvs("\\E");
4296 while (PL_bufptr != PL_bufend &&
4297 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4299 PL_thiswhite = newSVpvs("");
4300 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4304 if (PL_bufptr != PL_bufend)
4307 PL_lex_state = LEX_INTERPCONCAT;
4311 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4312 "### Saw case modifier\n"); });
4314 if (s[1] == '\\' && s[2] == 'E') {
4317 PL_thiswhite = newSVpvs("");
4318 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4321 PL_lex_state = LEX_INTERPCONCAT;
4326 if (!PL_madskills) /* when just compiling don't need correct */
4327 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4328 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4329 if ((*s == 'L' || *s == 'U') &&
4330 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4331 PL_lex_casestack[--PL_lex_casemods] = '\0';
4334 if (PL_lex_casemods > 10)
4335 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4336 PL_lex_casestack[PL_lex_casemods++] = *s;
4337 PL_lex_casestack[PL_lex_casemods] = '\0';
4338 PL_lex_state = LEX_INTERPCONCAT;
4339 start_force(PL_curforce);
4340 NEXTVAL_NEXTTOKE.ival = 0;
4342 start_force(PL_curforce);
4344 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4346 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4348 NEXTVAL_NEXTTOKE.ival = OP_LC;
4350 NEXTVAL_NEXTTOKE.ival = OP_UC;
4352 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4354 Perl_croak(aTHX_ "panic: yylex");
4356 SV* const tmpsv = newSVpvs("\\ ");
4357 /* replace the space with the character we want to escape
4359 SvPVX(tmpsv)[1] = *s;
4365 if (PL_lex_starts) {
4371 sv_free(PL_thistoken);
4372 PL_thistoken = newSVpvs("");
4375 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4376 if (PL_lex_casemods == 1 && PL_lex_inpat)
4385 case LEX_INTERPPUSH:
4386 return REPORT(sublex_push());
4388 case LEX_INTERPSTART:
4389 if (PL_bufptr == PL_bufend)
4390 return REPORT(sublex_done());
4391 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4392 "### Interpolated variable\n"); });
4394 PL_lex_dojoin = (*PL_bufptr == '@');
4395 PL_lex_state = LEX_INTERPNORMAL;
4396 if (PL_lex_dojoin) {
4397 start_force(PL_curforce);
4398 NEXTVAL_NEXTTOKE.ival = 0;
4400 start_force(PL_curforce);
4401 force_ident("\"", '$');
4402 start_force(PL_curforce);
4403 NEXTVAL_NEXTTOKE.ival = 0;
4405 start_force(PL_curforce);
4406 NEXTVAL_NEXTTOKE.ival = 0;
4408 start_force(PL_curforce);
4409 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4412 if (PL_lex_starts++) {
4417 sv_free(PL_thistoken);
4418 PL_thistoken = newSVpvs("");
4421 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4422 if (!PL_lex_casemods && PL_lex_inpat)
4429 case LEX_INTERPENDMAYBE:
4430 if (intuit_more(PL_bufptr)) {
4431 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4437 if (PL_lex_dojoin) {
4438 PL_lex_dojoin = FALSE;
4439 PL_lex_state = LEX_INTERPCONCAT;
4443 sv_free(PL_thistoken);
4444 PL_thistoken = newSVpvs("");
4449 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4450 && SvEVALED(PL_lex_repl))
4452 if (PL_bufptr != PL_bufend)
4453 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4457 case LEX_INTERPCONCAT:
4459 if (PL_lex_brackets)
4460 Perl_croak(aTHX_ "panic: INTERPCONCAT");
4462 if (PL_bufptr == PL_bufend)
4463 return REPORT(sublex_done());
4465 if (SvIVX(PL_linestr) == '\'') {
4466 SV *sv = newSVsv(PL_linestr);
4469 else if ( PL_hints & HINT_NEW_RE )
4470 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4471 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4475 s = scan_const(PL_bufptr);
4477 PL_lex_state = LEX_INTERPCASEMOD;
4479 PL_lex_state = LEX_INTERPSTART;
4482 if (s != PL_bufptr) {
4483 start_force(PL_curforce);
4485 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4487 NEXTVAL_NEXTTOKE = pl_yylval;
4490 if (PL_lex_starts++) {
4494 sv_free(PL_thistoken);
4495 PL_thistoken = newSVpvs("");
4498 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4499 if (!PL_lex_casemods && PL_lex_inpat)
4512 PL_lex_state = LEX_NORMAL;
4513 s = scan_formline(PL_bufptr);
4514 if (!PL_lex_formbrack)
4520 PL_oldoldbufptr = PL_oldbufptr;
4526 sv_free(PL_thistoken);
4529 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
4533 if (isIDFIRST_lazy_if(s,UTF))
4536 unsigned char c = *s;
4537 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4538 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4539 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4544 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4548 goto fake_eof; /* emulate EOF on ^D or ^Z */
4557 if (PL_lex_brackets) {
4558 yyerror((const char *)
4560 ? "Format not terminated"
4561 : "Missing right curly or square bracket"));
4563 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4564 "### Tokener got EOF\n");
4568 if (s++ < PL_bufend)
4569 goto retry; /* ignore stray nulls */
4572 if (!PL_in_eval && !PL_preambled) {
4573 PL_preambled = TRUE;
4579 /* Generate a string of Perl code to load the debugger.
4580 * If PERL5DB is set, it will return the contents of that,
4581 * otherwise a compile-time require of perl5db.pl. */
4583 const char * const pdb = PerlEnv_getenv("PERL5DB");
4586 sv_setpv(PL_linestr, pdb);
4587 sv_catpvs(PL_linestr,";");
4589 SETERRNO(0,SS_NORMAL);
4590 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4593 sv_setpvs(PL_linestr,"");
4594 if (PL_preambleav) {
4595 SV **svp = AvARRAY(PL_preambleav);
4596 SV **const end = svp + AvFILLp(PL_preambleav);
4598 sv_catsv(PL_linestr, *svp);
4600 sv_catpvs(PL_linestr, ";");
4602 sv_free(MUTABLE_SV(PL_preambleav));
4603 PL_preambleav = NULL;
4606 sv_catpvs(PL_linestr,
4607 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4608 if (PL_minus_n || PL_minus_p) {
4609 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4611 sv_catpvs(PL_linestr,"chomp;");
4614 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4615 || *PL_splitstr == '"')
4616 && strchr(PL_splitstr + 1, *PL_splitstr))
4617 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4619 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4620 bytes can be used as quoting characters. :-) */
4621 const char *splits = PL_splitstr;
4622 sv_catpvs(PL_linestr, "our @F=split(q\0");
4625 if (*splits == '\\')
4626 sv_catpvn(PL_linestr, splits, 1);
4627 sv_catpvn(PL_linestr, splits, 1);
4628 } while (*splits++);
4629 /* This loop will embed the trailing NUL of
4630 PL_linestr as the last thing it does before
4632 sv_catpvs(PL_linestr, ");");
4636 sv_catpvs(PL_linestr,"our @F=split(' ');");
4639 sv_catpvs(PL_linestr, "\n");
4640 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4641 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4642 PL_last_lop = PL_last_uni = NULL;
4643 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4644 update_debugger_info(PL_linestr, NULL, 0);
4649 bof = PL_rsfp ? TRUE : FALSE;
4652 fake_eof = LEX_FAKE_EOF;
4654 PL_bufptr = PL_bufend;
4655 CopLINE_inc(PL_curcop);
4656 if (!lex_next_chunk(fake_eof)) {
4657 CopLINE_dec(PL_curcop);
4659 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4661 CopLINE_dec(PL_curcop);
4664 PL_realtokenstart = -1;
4667 /* If it looks like the start of a BOM or raw UTF-16,
4668 * check if it in fact is. */
4669 if (bof && PL_rsfp &&
4674 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4676 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4677 s = swallow_bom((U8*)s);
4681 /* Incest with pod. */
4684 sv_catsv(PL_thiswhite, PL_linestr);
4686 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4687 sv_setpvs(PL_linestr, "");
4688 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4689 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4690 PL_last_lop = PL_last_uni = NULL;
4691 PL_doextract = FALSE;
4696 } while (PL_doextract);
4697 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4698 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4699 PL_last_lop = PL_last_uni = NULL;
4700 if (CopLINE(PL_curcop) == 1) {
4701 while (s < PL_bufend && isSPACE(*s))
4703 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4707 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4711 if (*s == '#' && *(s+1) == '!')
4713 #ifdef ALTERNATE_SHEBANG
4715 static char const as[] = ALTERNATE_SHEBANG;
4716 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4717 d = s + (sizeof(as) - 1);
4719 #endif /* ALTERNATE_SHEBANG */
4728 while (*d && !isSPACE(*d))
4732 #ifdef ARG_ZERO_IS_SCRIPT
4733 if (ipathend > ipath) {
4735 * HP-UX (at least) sets argv[0] to the script name,
4736 * which makes $^X incorrect. And Digital UNIX and Linux,
4737 * at least, set argv[0] to the basename of the Perl
4738 * interpreter. So, having found "#!", we'll set it right.
4740 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4742 assert(SvPOK(x) || SvGMAGICAL(x));
4743 if (sv_eq(x, CopFILESV(PL_curcop))) {
4744 sv_setpvn(x, ipath, ipathend - ipath);
4750 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4751 const char * const lstart = SvPV_const(x,llen);
4753 bstart += blen - llen;
4754 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4755 sv_setpvn(x, ipath, ipathend - ipath);
4760 TAINT_NOT; /* $^X is always tainted, but that's OK */
4762 #endif /* ARG_ZERO_IS_SCRIPT */
4767 d = instr(s,"perl -");
4769 d = instr(s,"perl");
4771 /* avoid getting into infinite loops when shebang
4772 * line contains "Perl" rather than "perl" */
4774 for (d = ipathend-4; d >= ipath; --d) {
4775 if ((*d == 'p' || *d == 'P')
4776 && !ibcmp(d, "perl", 4))
4786 #ifdef ALTERNATE_SHEBANG
4788 * If the ALTERNATE_SHEBANG on this system starts with a
4789 * character that can be part of a Perl expression, then if
4790 * we see it but not "perl", we're probably looking at the
4791 * start of Perl code, not a request to hand off to some
4792 * other interpreter. Similarly, if "perl" is there, but
4793 * not in the first 'word' of the line, we assume the line
4794 * contains the start of the Perl program.
4796 if (d && *s != '#') {
4797 const char *c = ipath;
4798 while (*c && !strchr("; \t\r\n\f\v#", *c))
4801 d = NULL; /* "perl" not in first word; ignore */
4803 *s = '#'; /* Don't try to parse shebang line */
4805 #endif /* ALTERNATE_SHEBANG */
4810 !instr(s,"indir") &&
4811 instr(PL_origargv[0],"perl"))
4818 while (s < PL_bufend && isSPACE(*s))
4820 if (s < PL_bufend) {
4821 Newx(newargv,PL_origargc+3,char*);
4823 while (s < PL_bufend && !isSPACE(*s))
4826 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4829 newargv = PL_origargv;
4832 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4834 Perl_croak(aTHX_ "Can't exec %s", ipath);
4837 while (*d && !isSPACE(*d))
4839 while (SPACE_OR_TAB(*d))
4843 const bool switches_done = PL_doswitches;
4844 const U32 oldpdb = PL_perldb;
4845 const bool oldn = PL_minus_n;
4846 const bool oldp = PL_minus_p;
4850 bool baduni = FALSE;
4852 const char *d2 = d1 + 1;
4853 if (parse_unicode_opts((const char **)&d2)
4857 if (baduni || *d1 == 'M' || *d1 == 'm') {
4858 const char * const m = d1;
4859 while (*d1 && !isSPACE(*d1))
4861 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4864 d1 = moreswitches(d1);
4866 if (PL_doswitches && !switches_done) {
4867 int argc = PL_origargc;
4868 char **argv = PL_origargv;
4871 } while (argc && argv[0][0] == '-' && argv[0][1]);
4872 init_argv_symbols(argc,argv);
4874 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4875 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4876 /* if we have already added "LINE: while (<>) {",
4877 we must not do it again */
4879 sv_setpvs(PL_linestr, "");
4880 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4881 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4882 PL_last_lop = PL_last_uni = NULL;
4883 PL_preambled = FALSE;
4884 if (PERLDB_LINE || PERLDB_SAVESRC)
4885 (void)gv_fetchfile(PL_origfilename);
4892 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4894 PL_lex_state = LEX_FORMLINE;
4899 #ifdef PERL_STRICT_CR
4900 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4902 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4904 case ' ': case '\t': case '\f': case 013:
4906 PL_realtokenstart = -1;
4908 PL_thiswhite = newSVpvs("");
4909 sv_catpvn(PL_thiswhite, s, 1);
4916 PL_realtokenstart = -1;
4920 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
4921 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4922 /* handle eval qq[#line 1 "foo"\n ...] */
4923 CopLINE_dec(PL_curcop);
4926 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4928 if (!PL_in_eval || PL_rsfp)
4933 while (d < PL_bufend && *d != '\n')
4937 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4938 Perl_croak(aTHX_ "panic: input overflow");
4941 PL_thiswhite = newSVpvn(s, d - s);
4946 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4948 PL_lex_state = LEX_FORMLINE;
4954 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4955 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
4958 TOKEN(PEG); /* make sure any #! line is accessible */
4963 /* if (PL_madskills && PL_lex_formbrack) { */
4965 while (d < PL_bufend && *d != '\n')
4969 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4970 Perl_croak(aTHX_ "panic: input overflow");
4971 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
4973 PL_thiswhite = newSVpvs("");
4974 if (CopLINE(PL_curcop) == 1) {
4975 sv_setpvs(PL_thiswhite, "");
4978 sv_catpvn(PL_thiswhite, s, d - s);
4992 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5000 while (s < PL_bufend && SPACE_OR_TAB(*s))
5003 if (strnEQ(s,"=>",2)) {
5004 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5005 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5006 OPERATOR('-'); /* unary minus */
5008 PL_last_uni = PL_oldbufptr;
5010 case 'r': ftst = OP_FTEREAD; break;
5011 case 'w': ftst = OP_FTEWRITE; break;
5012 case 'x': ftst = OP_FTEEXEC; break;
5013 case 'o': ftst = OP_FTEOWNED; break;
5014 case 'R': ftst = OP_FTRREAD; break;
5015 case 'W': ftst = OP_FTRWRITE; break;
5016 case 'X': ftst = OP_FTREXEC; break;
5017 case 'O': ftst = OP_FTROWNED; break;
5018 case 'e': ftst = OP_FTIS; break;
5019 case 'z': ftst = OP_FTZERO; break;
5020 case 's': ftst = OP_FTSIZE; break;
5021 case 'f': ftst = OP_FTFILE; break;
5022 case 'd': ftst = OP_FTDIR; break;
5023 case 'l': ftst = OP_FTLINK; break;
5024 case 'p': ftst = OP_FTPIPE; break;
5025 case 'S': ftst = OP_FTSOCK; break;
5026 case 'u': ftst = OP_FTSUID; break;
5027 case 'g': ftst = OP_FTSGID; break;
5028 case 'k': ftst = OP_FTSVTX; break;
5029 case 'b': ftst = OP_FTBLK; break;
5030 case 'c': ftst = OP_FTCHR; break;
5031 case 't': ftst = OP_FTTTY; break;
5032 case 'T': ftst = OP_FTTEXT; break;
5033 case 'B': ftst = OP_FTBINARY; break;
5034 case 'M': case 'A': case 'C':
5035 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5037 case 'M': ftst = OP_FTMTIME; break;
5038 case 'A': ftst = OP_FTATIME; break;
5039 case 'C': ftst = OP_FTCTIME; break;
5047 PL_last_lop_op = (OPCODE)ftst;
5048 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5049 "### Saw file test %c\n", (int)tmp);
5054 /* Assume it was a minus followed by a one-letter named
5055 * subroutine call (or a -bareword), then. */
5056 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5057 "### '-%c' looked like a file test but was not\n",
5064 const char tmp = *s++;
5067 if (PL_expect == XOPERATOR)
5072 else if (*s == '>') {
5075 if (isIDFIRST_lazy_if(s,UTF)) {
5076 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5084 if (PL_expect == XOPERATOR)
5087 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5089 OPERATOR('-'); /* unary minus */
5095 const char tmp = *s++;
5098 if (PL_expect == XOPERATOR)
5103 if (PL_expect == XOPERATOR)
5106 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5113 if (PL_expect != XOPERATOR) {
5114 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5115 PL_expect = XOPERATOR;
5116 force_ident(PL_tokenbuf, '*');
5129 if (PL_expect == XOPERATOR) {
5133 PL_tokenbuf[0] = '%';
5134 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5135 sizeof PL_tokenbuf - 1, FALSE);
5136 if (!PL_tokenbuf[1]) {
5139 PL_pending_ident = '%';
5148 const char tmp = *s++;
5153 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5160 const char tmp = *s++;
5166 goto just_a_word_zero_gv;
5169 switch (PL_expect) {
5175 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5177 PL_bufptr = s; /* update in case we back off */
5179 deprecate(":= for an empty attribute list");
5186 PL_expect = XTERMBLOCK;
5189 stuffstart = s - SvPVX(PL_linestr) - 1;
5193 while (isIDFIRST_lazy_if(s,UTF)) {
5196 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5197 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5198 if (tmp < 0) tmp = -tmp;
5213 sv = newSVpvn(s, len);
5215 d = scan_str(d,TRUE,TRUE);
5217 /* MUST advance bufptr here to avoid bogus
5218 "at end of line" context messages from yyerror().
5220 PL_bufptr = s + len;
5221 yyerror("Unterminated attribute parameter in attribute list");
5225 return REPORT(0); /* EOF indicator */
5229 sv_catsv(sv, PL_lex_stuff);
5230 attrs = append_elem(OP_LIST, attrs,
5231 newSVOP(OP_CONST, 0, sv));
5232 SvREFCNT_dec(PL_lex_stuff);
5233 PL_lex_stuff = NULL;
5236 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5238 if (PL_in_my == KEY_our) {
5239 deprecate(":unique");
5242 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5245 /* NOTE: any CV attrs applied here need to be part of
5246 the CVf_BUILTIN_ATTRS define in cv.h! */
5247 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5249 CvLVALUE_on(PL_compcv);
5251 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5253 deprecate(":locked");
5255 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5257 CvMETHOD_on(PL_compcv);
5259 /* After we've set the flags, it could be argued that
5260 we don't need to do the attributes.pm-based setting
5261 process, and shouldn't bother appending recognized
5262 flags. To experiment with that, uncomment the
5263 following "else". (Note that's already been
5264 uncommented. That keeps the above-applied built-in
5265 attributes from being intercepted (and possibly
5266 rejected) by a package's attribute routines, but is
5267 justified by the performance win for the common case
5268 of applying only built-in attributes.) */
5270 attrs = append_elem(OP_LIST, attrs,
5271 newSVOP(OP_CONST, 0,
5275 if (*s == ':' && s[1] != ':')
5278 break; /* require real whitespace or :'s */
5279 /* XXX losing whitespace on sequential attributes here */
5283 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5284 if (*s != ';' && *s != '}' && *s != tmp
5285 && (tmp != '=' || *s != ')')) {
5286 const char q = ((*s == '\'') ? '"' : '\'');
5287 /* If here for an expression, and parsed no attrs, back
5289 if (tmp == '=' && !attrs) {
5293 /* MUST advance bufptr here to avoid bogus "at end of line"
5294 context messages from yyerror().
5297 yyerror( (const char *)
5299 ? Perl_form(aTHX_ "Invalid separator character "
5300 "%c%c%c in attribute list", q, *s, q)
5301 : "Unterminated attribute list" ) );
5309 start_force(PL_curforce);
5310 NEXTVAL_NEXTTOKE.opval = attrs;
5311 CURMAD('_', PL_nextwhite);
5316 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5317 (s - SvPVX(PL_linestr)) - stuffstart);
5325 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5326 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5334 const char tmp = *s++;
5339 const char tmp = *s++;
5347 if (PL_lex_brackets <= 0)
5348 yyerror("Unmatched right square bracket");
5351 if (PL_lex_state == LEX_INTERPNORMAL) {
5352 if (PL_lex_brackets == 0) {
5353 if (*s == '-' && s[1] == '>')
5354 PL_lex_state = LEX_INTERPENDMAYBE;
5355 else if (*s != '[' && *s != '{')
5356 PL_lex_state = LEX_INTERPEND;
5363 if (PL_lex_brackets > 100) {
5364 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5366 switch (PL_expect) {
5368 if (PL_lex_formbrack) {
5372 if (PL_oldoldbufptr == PL_last_lop)
5373 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5375 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5376 OPERATOR(HASHBRACK);
5378 while (s < PL_bufend && SPACE_OR_TAB(*s))
5381 PL_tokenbuf[0] = '\0';
5382 if (d < PL_bufend && *d == '-') {
5383 PL_tokenbuf[0] = '-';
5385 while (d < PL_bufend && SPACE_OR_TAB(*d))
5388 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5389 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5391 while (d < PL_bufend && SPACE_OR_TAB(*d))
5394 const char minus = (PL_tokenbuf[0] == '-');
5395 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5403 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5408 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5413 if (PL_oldoldbufptr == PL_last_lop)
5414 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5416 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5419 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5421 /* This hack is to get the ${} in the message. */
5423 yyerror("syntax error");
5426 OPERATOR(HASHBRACK);
5428 /* This hack serves to disambiguate a pair of curlies
5429 * as being a block or an anon hash. Normally, expectation
5430 * determines that, but in cases where we're not in a
5431 * position to expect anything in particular (like inside
5432 * eval"") we have to resolve the ambiguity. This code
5433 * covers the case where the first term in the curlies is a
5434 * quoted string. Most other cases need to be explicitly
5435 * disambiguated by prepending a "+" before the opening
5436 * curly in order to force resolution as an anon hash.
5438 * XXX should probably propagate the outer expectation
5439 * into eval"" to rely less on this hack, but that could
5440 * potentially break current behavior of eval"".
5444 if (*s == '\'' || *s == '"' || *s == '`') {
5445 /* common case: get past first string, handling escapes */
5446 for (t++; t < PL_bufend && *t != *s;)
5447 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5451 else if (*s == 'q') {
5454 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5457 /* skip q//-like construct */
5459 char open, close, term;
5462 while (t < PL_bufend && isSPACE(*t))
5464 /* check for q => */
5465 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5466 OPERATOR(HASHBRACK);
5470 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5474 for (t++; t < PL_bufend; t++) {
5475 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5477 else if (*t == open)
5481 for (t++; t < PL_bufend; t++) {
5482 if (*t == '\\' && t+1 < PL_bufend)
5484 else if (*t == close && --brackets <= 0)
5486 else if (*t == open)
5493 /* skip plain q word */
5494 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5497 else if (isALNUM_lazy_if(t,UTF)) {
5499 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5502 while (t < PL_bufend && isSPACE(*t))
5504 /* if comma follows first term, call it an anon hash */
5505 /* XXX it could be a comma expression with loop modifiers */
5506 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5507 || (*t == '=' && t[1] == '>')))
5508 OPERATOR(HASHBRACK);
5509 if (PL_expect == XREF)
5512 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5518 pl_yylval.ival = CopLINE(PL_curcop);
5519 if (isSPACE(*s) || *s == '#')
5520 PL_copline = NOLINE; /* invalidate current command line number */
5525 if (PL_lex_brackets <= 0)
5526 yyerror("Unmatched right curly bracket");
5528 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5529 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5530 PL_lex_formbrack = 0;
5531 if (PL_lex_state == LEX_INTERPNORMAL) {
5532 if (PL_lex_brackets == 0) {
5533 if (PL_expect & XFAKEBRACK) {
5534 PL_expect &= XENUMMASK;
5535 PL_lex_state = LEX_INTERPEND;
5540 PL_thiswhite = newSVpvs("");
5541 sv_catpvs(PL_thiswhite,"}");
5544 return yylex(); /* ignore fake brackets */
5546 if (*s == '-' && s[1] == '>')
5547 PL_lex_state = LEX_INTERPENDMAYBE;
5548 else if (*s != '[' && *s != '{')
5549 PL_lex_state = LEX_INTERPEND;
5552 if (PL_expect & XFAKEBRACK) {
5553 PL_expect &= XENUMMASK;
5555 return yylex(); /* ignore fake brackets */
5557 start_force(PL_curforce);
5559 curmad('X', newSVpvn(s-1,1));
5560 CURMAD('_', PL_thiswhite);
5565 PL_thistoken = newSVpvs("");
5573 if (PL_expect == XOPERATOR) {
5574 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5575 && isIDFIRST_lazy_if(s,UTF))
5577 CopLINE_dec(PL_curcop);
5578 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5579 CopLINE_inc(PL_curcop);
5584 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5586 PL_expect = XOPERATOR;
5587 force_ident(PL_tokenbuf, '&');
5591 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5603 const char tmp = *s++;
5610 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5611 && strchr("+-*/%.^&|<",tmp))
5612 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5613 "Reversed %c= operator",(int)tmp);
5615 if (PL_expect == XSTATE && isALPHA(tmp) &&
5616 (s == PL_linestart+1 || s[-2] == '\n') )
5618 if (PL_in_eval && !PL_rsfp) {
5623 if (strnEQ(s,"=cut",4)) {
5639 PL_thiswhite = newSVpvs("");
5640 sv_catpvn(PL_thiswhite, PL_linestart,
5641 PL_bufend - PL_linestart);
5645 PL_doextract = TRUE;
5649 if (PL_lex_brackets < PL_lex_formbrack) {
5651 #ifdef PERL_STRICT_CR
5652 while (SPACE_OR_TAB(*t))
5654 while (SPACE_OR_TAB(*t) || *t == '\r')
5657 if (*t == '\n' || *t == '#') {
5668 const char tmp = *s++;
5670 /* was this !=~ where !~ was meant?
5671 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5673 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5674 const char *t = s+1;
5676 while (t < PL_bufend && isSPACE(*t))
5679 if (*t == '/' || *t == '?' ||
5680 ((*t == 'm' || *t == 's' || *t == 'y')
5681 && !isALNUM(t[1])) ||
5682 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5683 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5684 "!=~ should be !~");
5694 if (PL_expect != XOPERATOR) {
5695 if (s[1] != '<' && !strchr(s,'>'))
5698 s = scan_heredoc(s);
5700 s = scan_inputsymbol(s);
5701 TERM(sublex_start());
5707 SHop(OP_LEFT_SHIFT);
5721 const char tmp = *s++;
5723 SHop(OP_RIGHT_SHIFT);
5724 else if (tmp == '=')
5733 if (PL_expect == XOPERATOR) {
5734 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5735 return deprecate_commaless_var_list();
5739 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5740 PL_tokenbuf[0] = '@';
5741 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5742 sizeof PL_tokenbuf - 1, FALSE);
5743 if (PL_expect == XOPERATOR)
5744 no_op("Array length", s);
5745 if (!PL_tokenbuf[1])
5747 PL_expect = XOPERATOR;
5748 PL_pending_ident = '#';
5752 PL_tokenbuf[0] = '$';
5753 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5754 sizeof PL_tokenbuf - 1, FALSE);
5755 if (PL_expect == XOPERATOR)
5757 if (!PL_tokenbuf[1]) {
5759 yyerror("Final $ should be \\$ or $name");
5763 /* This kludge not intended to be bulletproof. */
5764 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5765 pl_yylval.opval = newSVOP(OP_CONST, 0,
5766 newSViv(CopARYBASE_get(&PL_compiling)));
5767 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5773 const char tmp = *s;
5774 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5777 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5778 && intuit_more(s)) {
5780 PL_tokenbuf[0] = '@';
5781 if (ckWARN(WARN_SYNTAX)) {
5784 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5787 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5788 while (t < PL_bufend && *t != ']')
5790 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5791 "Multidimensional syntax %.*s not supported",
5792 (int)((t - PL_bufptr) + 1), PL_bufptr);
5796 else if (*s == '{') {
5798 PL_tokenbuf[0] = '%';
5799 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5800 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5802 char tmpbuf[sizeof PL_tokenbuf];
5805 } while (isSPACE(*t));
5806 if (isIDFIRST_lazy_if(t,UTF)) {
5808 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5812 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
5813 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5814 "You need to quote \"%s\"",
5821 PL_expect = XOPERATOR;
5822 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5823 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5824 if (!islop || PL_last_lop_op == OP_GREPSTART)
5825 PL_expect = XOPERATOR;
5826 else if (strchr("$@\"'`q", *s))
5827 PL_expect = XTERM; /* e.g. print $fh "foo" */
5828 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5829 PL_expect = XTERM; /* e.g. print $fh &sub */
5830 else if (isIDFIRST_lazy_if(s,UTF)) {
5831 char tmpbuf[sizeof PL_tokenbuf];
5833 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5834 if ((t2 = keyword(tmpbuf, len, 0))) {
5835 /* binary operators exclude handle interpretations */
5847 PL_expect = XTERM; /* e.g. print $fh length() */
5852 PL_expect = XTERM; /* e.g. print $fh subr() */
5855 else if (isDIGIT(*s))
5856 PL_expect = XTERM; /* e.g. print $fh 3 */
5857 else if (*s == '.' && isDIGIT(s[1]))
5858 PL_expect = XTERM; /* e.g. print $fh .3 */
5859 else if ((*s == '?' || *s == '-' || *s == '+')
5860 && !isSPACE(s[1]) && s[1] != '=')
5861 PL_expect = XTERM; /* e.g. print $fh -1 */
5862 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5864 PL_expect = XTERM; /* e.g. print $fh /.../
5865 XXX except DORDOR operator
5867 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5869 PL_expect = XTERM; /* print $fh <<"EOF" */
5872 PL_pending_ident = '$';
5876 if (PL_expect == XOPERATOR)
5878 PL_tokenbuf[0] = '@';
5879 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5880 if (!PL_tokenbuf[1]) {
5883 if (PL_lex_state == LEX_NORMAL)
5885 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5887 PL_tokenbuf[0] = '%';
5889 /* Warn about @ where they meant $. */
5890 if (*s == '[' || *s == '{') {
5891 if (ckWARN(WARN_SYNTAX)) {
5892 const char *t = s + 1;
5893 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
5895 if (*t == '}' || *t == ']') {
5897 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
5898 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5899 "Scalar value %.*s better written as $%.*s",
5900 (int)(t-PL_bufptr), PL_bufptr,
5901 (int)(t-PL_bufptr-1), PL_bufptr+1);
5906 PL_pending_ident = '@';
5909 case '/': /* may be division, defined-or, or pattern */
5910 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5914 case '?': /* may either be conditional or pattern */
5915 if (PL_expect == XOPERATOR) {
5923 /* A // operator. */
5933 /* Disable warning on "study /blah/" */
5934 if (PL_oldoldbufptr == PL_last_uni
5935 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5936 || memNE(PL_last_uni, "study", 5)
5937 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5940 s = scan_pat(s,OP_MATCH);
5941 TERM(sublex_start());
5945 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5946 #ifdef PERL_STRICT_CR
5949 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5951 && (s == PL_linestart || s[-1] == '\n') )
5953 PL_lex_formbrack = 0;
5957 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5961 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
5967 pl_yylval.ival = OPf_SPECIAL;
5976 case '0': case '1': case '2': case '3': case '4':
5977 case '5': case '6': case '7': case '8': case '9':
5978 s = scan_num(s, &pl_yylval);
5979 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
5980 if (PL_expect == XOPERATOR)
5985 s = scan_str(s,!!PL_madskills,FALSE);
5986 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
5987 if (PL_expect == XOPERATOR) {
5988 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5989 return deprecate_commaless_var_list();
5996 pl_yylval.ival = OP_CONST;
5997 TERM(sublex_start());
6000 s = scan_str(s,!!PL_madskills,FALSE);
6001 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6002 if (PL_expect == XOPERATOR) {
6003 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6004 return deprecate_commaless_var_list();
6011 pl_yylval.ival = OP_CONST;
6012 /* FIXME. I think that this can be const if char *d is replaced by
6013 more localised variables. */
6014 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6015 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6016 pl_yylval.ival = OP_STRINGIFY;
6020 TERM(sublex_start());
6023 s = scan_str(s,!!PL_madskills,FALSE);
6024 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6025 if (PL_expect == XOPERATOR)
6026 no_op("Backticks",s);
6029 readpipe_override();
6030 TERM(sublex_start());
6034 if (PL_lex_inwhat && isDIGIT(*s))
6035 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6037 if (PL_expect == XOPERATOR)
6038 no_op("Backslash",s);
6042 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6043 char *start = s + 2;
6044 while (isDIGIT(*start) || *start == '_')
6046 if (*start == '.' && isDIGIT(start[1])) {
6047 s = scan_num(s, &pl_yylval);
6050 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6051 else if (!isALPHA(*start) && (PL_expect == XTERM
6052 || PL_expect == XREF || PL_expect == XSTATE
6053 || PL_expect == XTERMORDORDOR)) {
6054 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6056 s = scan_num(s, &pl_yylval);
6063 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6106 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6108 /* Some keywords can be followed by any delimiter, including ':' */
6109 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
6110 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6111 (PL_tokenbuf[0] == 'q' &&
6112 strchr("qwxr", PL_tokenbuf[1])))));
6114 /* x::* is just a word, unless x is "CORE" */
6115 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6119 while (d < PL_bufend && isSPACE(*d))
6120 d++; /* no comments skipped here, or s### is misparsed */
6122 /* Is this a word before a => operator? */
6123 if (*d == '=' && d[1] == '>') {
6126 = (OP*)newSVOP(OP_CONST, 0,
6127 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6128 pl_yylval.opval->op_private = OPpCONST_BARE;
6132 /* Check for plugged-in keyword */
6136 char *saved_bufptr = PL_bufptr;
6138 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6140 if (result == KEYWORD_PLUGIN_DECLINE) {
6141 /* not a plugged-in keyword */
6142 PL_bufptr = saved_bufptr;
6143 } else if (result == KEYWORD_PLUGIN_STMT) {
6144 pl_yylval.opval = o;
6147 return REPORT(PLUGSTMT);
6148 } else if (result == KEYWORD_PLUGIN_EXPR) {
6149 pl_yylval.opval = o;
6151 PL_expect = XOPERATOR;
6152 return REPORT(PLUGEXPR);
6154 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6159 /* Check for built-in keyword */
6160 tmp = keyword(PL_tokenbuf, len, 0);
6162 /* Is this a label? */
6163 if (!anydelim && PL_expect == XSTATE
6164 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6166 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6171 if (tmp < 0) { /* second-class keyword? */
6172 GV *ogv = NULL; /* override (winner) */
6173 GV *hgv = NULL; /* hidden (loser) */
6174 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6176 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6179 if (GvIMPORTED_CV(gv))
6181 else if (! CvMETHOD(cv))
6185 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6186 (gv = *gvp) && isGV_with_GP(gv) &&
6187 GvCVu(gv) && GvIMPORTED_CV(gv))
6194 tmp = 0; /* overridden by import or by GLOBAL */
6197 && -tmp==KEY_lock /* XXX generalizable kludge */
6200 tmp = 0; /* any sub overrides "weak" keyword */
6202 else { /* no override */
6204 if (tmp == KEY_dump) {
6205 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6206 "dump() better written as CORE::dump()");
6210 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6211 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6212 "Ambiguous call resolved as CORE::%s(), "
6213 "qualify as such or use &",
6221 default: /* not a keyword */
6222 /* Trade off - by using this evil construction we can pull the
6223 variable gv into the block labelled keylookup. If not, then
6224 we have to give it function scope so that the goto from the
6225 earlier ':' case doesn't bypass the initialisation. */
6227 just_a_word_zero_gv:
6235 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6239 SV *nextPL_nextwhite = 0;
6243 /* Get the rest if it looks like a package qualifier */
6245 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6247 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6250 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6251 *s == '\'' ? "'" : "::");
6256 if (PL_expect == XOPERATOR) {
6257 if (PL_bufptr == PL_linestart) {
6258 CopLINE_dec(PL_curcop);
6259 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6260 CopLINE_inc(PL_curcop);
6263 no_op("Bareword",s);
6266 /* Look for a subroutine with this name in current package,
6267 unless name is "Foo::", in which case Foo is a bearword
6268 (and a package name). */
6270 if (len > 2 && !PL_madskills &&
6271 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6273 if (ckWARN(WARN_BAREWORD)
6274 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6275 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6276 "Bareword \"%s\" refers to nonexistent package",
6279 PL_tokenbuf[len] = '\0';
6285 /* Mustn't actually add anything to a symbol table.
6286 But also don't want to "initialise" any placeholder
6287 constants that might already be there into full
6288 blown PVGVs with attached PVCV. */
6289 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6290 GV_NOADD_NOINIT, SVt_PVCV);
6295 /* if we saw a global override before, get the right name */
6297 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6298 len ? len : strlen(PL_tokenbuf));
6300 SV * const tmp_sv = sv;
6301 sv = newSVpvs("CORE::GLOBAL::");
6302 sv_catsv(sv, tmp_sv);
6303 SvREFCNT_dec(tmp_sv);
6307 if (PL_madskills && !PL_thistoken) {
6308 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6309 PL_thistoken = newSVpvn(start,s - start);
6310 PL_realtokenstart = s - SvPVX(PL_linestr);
6314 /* Presume this is going to be a bareword of some sort. */
6316 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6317 pl_yylval.opval->op_private = OPpCONST_BARE;
6319 /* And if "Foo::", then that's what it certainly is. */
6325 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6326 const_op->op_private = OPpCONST_BARE;
6327 rv2cv_op = newCVREF(0, const_op);
6329 if (rv2cv_op->op_type == OP_RV2CV &&
6330 (rv2cv_op->op_flags & OPf_KIDS)) {
6331 OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6332 switch (rv_op->op_type) {
6334 SV *sv = cSVOPx_sv(rv_op);
6335 if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6339 GV *gv = cGVOPx_gv(rv_op);
6340 CV *maybe_cv = GvCVu(gv);
6341 if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6347 /* See if it's the indirect object for a list operator. */
6349 if (PL_oldoldbufptr &&
6350 PL_oldoldbufptr < PL_bufptr &&
6351 (PL_oldoldbufptr == PL_last_lop
6352 || PL_oldoldbufptr == PL_last_uni) &&
6353 /* NO SKIPSPACE BEFORE HERE! */
6354 (PL_expect == XREF ||
6355 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6357 bool immediate_paren = *s == '(';
6359 /* (Now we can afford to cross potential line boundary.) */
6360 s = SKIPSPACE2(s,nextPL_nextwhite);
6362 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
6365 /* Two barewords in a row may indicate method call. */
6367 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6368 (tmp = intuit_method(s, gv, cv))) {
6373 /* If not a declared subroutine, it's an indirect object. */
6374 /* (But it's an indir obj regardless for sort.) */
6375 /* Also, if "_" follows a filetest operator, it's a bareword */
6378 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6380 (PL_last_lop_op != OP_MAPSTART &&
6381 PL_last_lop_op != OP_GREPSTART))))
6382 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6383 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6386 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6391 PL_expect = XOPERATOR;
6394 s = SKIPSPACE2(s,nextPL_nextwhite);
6395 PL_nextwhite = nextPL_nextwhite;
6400 /* Is this a word before a => operator? */
6401 if (*s == '=' && s[1] == '>' && !pkgname) {
6404 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6405 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6406 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6410 /* If followed by a paren, it's certainly a subroutine. */
6415 while (SPACE_OR_TAB(*d))
6417 if (*d == ')' && (sv = cv_const_sv(cv))) {
6424 PL_nextwhite = PL_thiswhite;
6427 start_force(PL_curforce);
6429 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6430 PL_expect = XOPERATOR;
6433 PL_nextwhite = nextPL_nextwhite;
6434 curmad('X', PL_thistoken);
6435 PL_thistoken = newSVpvs("");
6444 /* If followed by var or block, call it a method (unless sub) */
6446 if ((*s == '$' || *s == '{') && !cv) {
6448 PL_last_lop = PL_oldbufptr;
6449 PL_last_lop_op = OP_METHOD;
6453 /* If followed by a bareword, see if it looks like indir obj. */
6456 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6457 && (tmp = intuit_method(s, gv, cv))) {
6462 /* Not a method, so call it a subroutine (if defined) */
6465 if (lastchar == '-')
6466 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6467 "Ambiguous use of -%s resolved as -&%s()",
6468 PL_tokenbuf, PL_tokenbuf);
6469 /* Check for a constant sub */
6470 if ((sv = cv_const_sv(cv))) {
6473 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6474 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6475 pl_yylval.opval->op_private = 0;
6479 op_free(pl_yylval.opval);
6480 pl_yylval.opval = rv2cv_op;
6481 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6482 PL_last_lop = PL_oldbufptr;
6483 PL_last_lop_op = OP_ENTERSUB;
6484 /* Is there a prototype? */
6492 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6495 while (*proto == ';')
6500 *proto == '$' || *proto == '_'
6506 *proto == '\\' && proto[1] && proto[2] == '\0'
6510 if (*proto == '\\' && proto[1] == '[') {
6511 const char *p = proto + 2;
6512 while(*p && *p != ']')
6514 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6516 if (*proto == '&' && *s == '{') {
6518 sv_setpvs(PL_subname, "__ANON__");
6520 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6527 PL_nextwhite = PL_thiswhite;
6530 start_force(PL_curforce);
6531 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6534 PL_nextwhite = nextPL_nextwhite;
6535 curmad('X', PL_thistoken);
6536 PL_thistoken = newSVpvs("");
6543 /* Guess harder when madskills require "best effort". */
6544 if (PL_madskills && (!gv || !GvCVu(gv))) {
6545 int probable_sub = 0;
6546 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6548 else if (isALPHA(*s)) {
6552 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6553 if (!keyword(tmpbuf, tmplen, 0))
6556 while (d < PL_bufend && isSPACE(*d))
6558 if (*d == '=' && d[1] == '>')
6563 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6564 op_free(pl_yylval.opval);
6565 pl_yylval.opval = rv2cv_op;
6566 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6567 PL_last_lop = PL_oldbufptr;
6568 PL_last_lop_op = OP_ENTERSUB;
6569 PL_nextwhite = PL_thiswhite;
6571 start_force(PL_curforce);
6572 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6574 PL_nextwhite = nextPL_nextwhite;
6575 curmad('X', PL_thistoken);
6576 PL_thistoken = newSVpvs("");
6581 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6588 /* Call it a bare word */
6590 if (PL_hints & HINT_STRICT_SUBS)
6591 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6594 /* after "print" and similar functions (corresponding to
6595 * "F? L" in opcode.pl), whatever wasn't already parsed as
6596 * a filehandle should be subject to "strict subs".
6597 * Likewise for the optional indirect-object argument to system
6598 * or exec, which can't be a bareword */
6599 if ((PL_last_lop_op == OP_PRINT
6600 || PL_last_lop_op == OP_PRTF
6601 || PL_last_lop_op == OP_SAY
6602 || PL_last_lop_op == OP_SYSTEM
6603 || PL_last_lop_op == OP_EXEC)
6604 && (PL_hints & HINT_STRICT_SUBS))
6605 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6606 if (lastchar != '-') {
6607 if (ckWARN(WARN_RESERVED)) {
6611 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6612 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6620 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6621 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6622 "Operator or semicolon missing before %c%s",
6623 lastchar, PL_tokenbuf);
6624 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6625 "Ambiguous use of %c resolved as operator %c",
6626 lastchar, lastchar);
6632 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6633 newSVpv(CopFILE(PL_curcop),0));
6637 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6638 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6641 case KEY___PACKAGE__:
6642 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6644 ? newSVhek(HvNAME_HEK(PL_curstash))
6651 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6652 const char *pname = "main";
6653 if (PL_tokenbuf[2] == 'D')
6654 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6655 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6659 GvIOp(gv) = newIO();
6660 IoIFP(GvIOp(gv)) = PL_rsfp;
6661 #if defined(HAS_FCNTL) && defined(F_SETFD)
6663 const int fd = PerlIO_fileno(PL_rsfp);
6664 fcntl(fd,F_SETFD,fd >= 3);
6667 /* Mark this internal pseudo-handle as clean */
6668 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6669 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6670 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6672 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6673 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6674 /* if the script was opened in binmode, we need to revert
6675 * it to text mode for compatibility; but only iff it has CRs
6676 * XXX this is a questionable hack at best. */
6677 if (PL_bufend-PL_bufptr > 2
6678 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6681 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6682 loc = PerlIO_tell(PL_rsfp);
6683 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6686 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6688 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6689 #endif /* NETWARE */
6690 #ifdef PERLIO_IS_STDIO /* really? */
6691 # if defined(__BORLANDC__)
6692 /* XXX see note in do_binmode() */
6693 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6697 PerlIO_seek(PL_rsfp, loc, 0);
6701 #ifdef PERLIO_LAYERS
6704 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6705 else if (PL_encoding) {
6712 XPUSHs(PL_encoding);
6714 call_method("name", G_SCALAR);
6718 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6719 Perl_form(aTHX_ ":encoding(%"SVf")",
6728 if (PL_realtokenstart >= 0) {
6729 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6731 PL_endwhite = newSVpvs("");
6732 sv_catsv(PL_endwhite, PL_thiswhite);
6734 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6735 PL_realtokenstart = -1;
6737 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6753 if (PL_expect == XSTATE) {
6760 if (*s == ':' && s[1] == ':') {
6763 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6764 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6765 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
6768 else if (tmp == KEY_require || tmp == KEY_do)
6769 /* that's a way to remember we saw "CORE::" */
6782 LOP(OP_ACCEPT,XTERM);
6788 LOP(OP_ATAN2,XTERM);
6794 LOP(OP_BINMODE,XTERM);
6797 LOP(OP_BLESS,XTERM);
6806 /* When 'use switch' is in effect, continue has a dual
6807 life as a control operator. */
6809 if (!FEATURE_IS_ENABLED("switch"))
6812 /* We have to disambiguate the two senses of
6813 "continue". If the next token is a '{' then
6814 treat it as the start of a continue block;
6815 otherwise treat it as a control operator.
6827 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
6844 if (!PL_cryptseen) {
6845 PL_cryptseen = TRUE;
6849 LOP(OP_CRYPT,XTERM);
6852 LOP(OP_CHMOD,XTERM);
6855 LOP(OP_CHOWN,XTERM);
6858 LOP(OP_CONNECT,XTERM);
6877 s = force_word(s,WORD,TRUE,TRUE,FALSE);
6878 if (orig_keyword == KEY_do) {
6887 PL_hints |= HINT_BLOCK_SCOPE;
6897 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
6898 LOP(OP_DBMOPEN,XTERM);
6904 s = force_word(s,WORD,TRUE,FALSE,FALSE);
6911 pl_yylval.ival = CopLINE(PL_curcop);
6927 if (*s == '{') { /* block eval */
6928 PL_expect = XTERMBLOCK;
6929 UNIBRACK(OP_ENTERTRY);
6931 else { /* string eval */
6933 UNIBRACK(OP_ENTEREVAL);
6948 case KEY_endhostent:
6954 case KEY_endservent:
6957 case KEY_endprotoent:
6968 pl_yylval.ival = CopLINE(PL_curcop);
6970 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
6973 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6976 if ((PL_bufend - p) >= 3 &&
6977 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6979 else if ((PL_bufend - p) >= 4 &&
6980 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6983 if (isIDFIRST_lazy_if(p,UTF)) {
6984 p = scan_ident(p, PL_bufend,
6985 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6989 Perl_croak(aTHX_ "Missing $ on loop variable");
6991 s = SvPVX(PL_linestr) + soff;
6997 LOP(OP_FORMLINE,XTERM);
7003 LOP(OP_FCNTL,XTERM);
7009 LOP(OP_FLOCK,XTERM);
7018 LOP(OP_GREPSTART, XREF);
7021 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7036 case KEY_getpriority:
7037 LOP(OP_GETPRIORITY,XTERM);
7039 case KEY_getprotobyname:
7042 case KEY_getprotobynumber:
7043 LOP(OP_GPBYNUMBER,XTERM);
7045 case KEY_getprotoent:
7057 case KEY_getpeername:
7058 UNI(OP_GETPEERNAME);
7060 case KEY_gethostbyname:
7063 case KEY_gethostbyaddr:
7064 LOP(OP_GHBYADDR,XTERM);
7066 case KEY_gethostent:
7069 case KEY_getnetbyname:
7072 case KEY_getnetbyaddr:
7073 LOP(OP_GNBYADDR,XTERM);
7078 case KEY_getservbyname:
7079 LOP(OP_GSBYNAME,XTERM);
7081 case KEY_getservbyport:
7082 LOP(OP_GSBYPORT,XTERM);
7084 case KEY_getservent:
7087 case KEY_getsockname:
7088 UNI(OP_GETSOCKNAME);
7090 case KEY_getsockopt:
7091 LOP(OP_GSOCKOPT,XTERM);
7106 pl_yylval.ival = CopLINE(PL_curcop);
7116 pl_yylval.ival = CopLINE(PL_curcop);
7120 LOP(OP_INDEX,XTERM);
7126 LOP(OP_IOCTL,XTERM);
7138 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7170 LOP(OP_LISTEN,XTERM);
7179 s = scan_pat(s,OP_MATCH);
7180 TERM(sublex_start());
7183 LOP(OP_MAPSTART, XREF);
7186 LOP(OP_MKDIR,XTERM);
7189 LOP(OP_MSGCTL,XTERM);
7192 LOP(OP_MSGGET,XTERM);
7195 LOP(OP_MSGRCV,XTERM);
7198 LOP(OP_MSGSND,XTERM);
7203 PL_in_my = (U16)tmp;
7205 if (isIDFIRST_lazy_if(s,UTF)) {
7209 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7210 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7212 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7213 if (!PL_in_my_stash) {
7216 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7220 if (PL_madskills) { /* just add type to declarator token */
7221 sv_catsv(PL_thistoken, PL_nextwhite);
7223 sv_catpvn(PL_thistoken, start, s - start);
7231 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7238 s = tokenize_use(0, s);
7242 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7249 if (isIDFIRST_lazy_if(s,UTF)) {
7251 for (d = s; isALNUM_lazy_if(d,UTF);)
7253 for (t=d; isSPACE(*t);)
7255 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7257 && !(t[0] == '=' && t[1] == '>')
7259 int parms_len = (int)(d-s);
7260 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7261 "Precedence problem: open %.*s should be open(%.*s)",
7262 parms_len, s, parms_len, s);
7268 pl_yylval.ival = OP_OR;
7278 LOP(OP_OPEN_DIR,XTERM);
7281 checkcomma(s,PL_tokenbuf,"filehandle");
7285 checkcomma(s,PL_tokenbuf,"filehandle");
7304 s = force_word(s,WORD,FALSE,TRUE,FALSE);
7306 s = force_strict_version(s);
7307 PL_lex_expect = XBLOCK;
7311 LOP(OP_PIPE_OP,XTERM);
7314 s = scan_str(s,!!PL_madskills,FALSE);
7317 pl_yylval.ival = OP_CONST;
7318 TERM(sublex_start());
7325 s = scan_str(s,!!PL_madskills,FALSE);
7328 PL_expect = XOPERATOR;
7329 if (SvCUR(PL_lex_stuff)) {
7331 d = SvPV_force(PL_lex_stuff, len);
7333 for (; isSPACE(*d) && len; --len, ++d)
7338 if (!warned && ckWARN(WARN_QW)) {
7339 for (; !isSPACE(*d) && len; --len, ++d) {
7341 Perl_warner(aTHX_ packWARN(WARN_QW),
7342 "Possible attempt to separate words with commas");
7345 else if (*d == '#') {
7346 Perl_warner(aTHX_ packWARN(WARN_QW),
7347 "Possible attempt to put comments in qw() list");
7353 for (; !isSPACE(*d) && len; --len, ++d)
7356 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7357 words = append_elem(OP_LIST, words,
7358 newSVOP(OP_CONST, 0, tokeq(sv)));
7363 words = newNULLLIST();
7365 SvREFCNT_dec(PL_lex_stuff);
7366 PL_lex_stuff = NULL;
7368 PL_expect = XOPERATOR;
7369 pl_yylval.opval = sawparens(words);
7374 s = scan_str(s,!!PL_madskills,FALSE);
7377 pl_yylval.ival = OP_STRINGIFY;
7378 if (SvIVX(PL_lex_stuff) == '\'')
7379 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
7380 TERM(sublex_start());
7383 s = scan_pat(s,OP_QR);
7384 TERM(sublex_start());
7387 s = scan_str(s,!!PL_madskills,FALSE);
7390 readpipe_override();
7391 TERM(sublex_start());
7399 s = force_version(s, FALSE);
7401 else if (*s != 'v' || !isDIGIT(s[1])
7402 || (s = force_version(s, TRUE), *s == 'v'))
7404 *PL_tokenbuf = '\0';
7405 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7406 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7407 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7409 yyerror("<> should be quotes");
7411 if (orig_keyword == KEY_require) {
7419 PL_last_uni = PL_oldbufptr;
7420 PL_last_lop_op = OP_REQUIRE;
7422 return REPORT( (int)REQUIRE );
7428 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7432 LOP(OP_RENAME,XTERM);
7441 LOP(OP_RINDEX,XTERM);
7450 UNIDOR(OP_READLINE);
7453 UNIDOR(OP_BACKTICK);
7462 LOP(OP_REVERSE,XTERM);
7465 UNIDOR(OP_READLINK);
7472 if (pl_yylval.opval)
7473 TERM(sublex_start());
7475 TOKEN(1); /* force error */
7478 checkcomma(s,PL_tokenbuf,"filehandle");
7488 LOP(OP_SELECT,XTERM);
7494 LOP(OP_SEMCTL,XTERM);
7497 LOP(OP_SEMGET,XTERM);
7500 LOP(OP_SEMOP,XTERM);
7506 LOP(OP_SETPGRP,XTERM);
7508 case KEY_setpriority:
7509 LOP(OP_SETPRIORITY,XTERM);
7511 case KEY_sethostent:
7517 case KEY_setservent:
7520 case KEY_setprotoent:
7530 LOP(OP_SEEKDIR,XTERM);
7532 case KEY_setsockopt:
7533 LOP(OP_SSOCKOPT,XTERM);
7539 LOP(OP_SHMCTL,XTERM);
7542 LOP(OP_SHMGET,XTERM);
7545 LOP(OP_SHMREAD,XTERM);
7548 LOP(OP_SHMWRITE,XTERM);
7551 LOP(OP_SHUTDOWN,XTERM);
7560 LOP(OP_SOCKET,XTERM);
7562 case KEY_socketpair:
7563 LOP(OP_SOCKPAIR,XTERM);
7566 checkcomma(s,PL_tokenbuf,"subroutine name");
7568 if (*s == ';' || *s == ')') /* probably a close */
7569 Perl_croak(aTHX_ "sort is now a reserved word");
7571 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7575 LOP(OP_SPLIT,XTERM);
7578 LOP(OP_SPRINTF,XTERM);
7581 LOP(OP_SPLICE,XTERM);
7596 LOP(OP_SUBSTR,XTERM);
7602 char tmpbuf[sizeof PL_tokenbuf];
7603 SSize_t tboffset = 0;
7604 expectation attrful;
7605 bool have_name, have_proto;
7606 const int key = tmp;
7611 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7612 SV *subtoken = newSVpvn(tstart, s - tstart);
7616 s = SKIPSPACE2(s,tmpwhite);
7621 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7622 (*s == ':' && s[1] == ':'))
7625 SV *nametoke = NULL;
7629 attrful = XATTRBLOCK;
7630 /* remember buffer pos'n for later force_word */
7631 tboffset = s - PL_oldbufptr;
7632 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7635 nametoke = newSVpvn(s, d - s);
7637 if (memchr(tmpbuf, ':', len))
7638 sv_setpvn(PL_subname, tmpbuf, len);
7640 sv_setsv(PL_subname,PL_curstname);
7641 sv_catpvs(PL_subname,"::");
7642 sv_catpvn(PL_subname,tmpbuf,len);
7649 CURMAD('X', nametoke);
7650 CURMAD('_', tmpwhite);
7651 (void) force_word(PL_oldbufptr + tboffset, WORD,
7654 s = SKIPSPACE2(d,tmpwhite);
7661 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7662 PL_expect = XTERMBLOCK;
7663 attrful = XATTRTERM;
7664 sv_setpvs(PL_subname,"?");
7668 if (key == KEY_format) {
7670 PL_lex_formbrack = PL_lex_brackets + 1;
7672 PL_thistoken = subtoken;
7676 (void) force_word(PL_oldbufptr + tboffset, WORD,
7682 /* Look for a prototype */
7685 bool bad_proto = FALSE;
7686 bool in_brackets = FALSE;
7687 char greedy_proto = ' ';
7688 bool proto_after_greedy_proto = FALSE;
7689 bool must_be_last = FALSE;
7690 bool underscore = FALSE;
7691 bool seen_underscore = FALSE;
7692 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7694 s = scan_str(s,!!PL_madskills,FALSE);
7696 Perl_croak(aTHX_ "Prototype not terminated");
7697 /* strip spaces and check for bad characters */
7698 d = SvPVX(PL_lex_stuff);
7700 for (p = d; *p; ++p) {
7704 if (warnillegalproto) {
7706 proto_after_greedy_proto = TRUE;
7707 if (!strchr("$@%*;[]&\\_", *p)) {
7719 else if ( *p == ']' ) {
7720 in_brackets = FALSE;
7722 else if ( (*p == '@' || *p == '%') &&
7723 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7725 must_be_last = TRUE;
7728 else if ( *p == '_' ) {
7729 underscore = seen_underscore = TRUE;
7736 if (proto_after_greedy_proto)
7737 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7738 "Prototype after '%c' for %"SVf" : %s",
7739 greedy_proto, SVfARG(PL_subname), d);
7741 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
7742 "Illegal character %sin prototype for %"SVf" : %s",
7743 seen_underscore ? "after '_' " : "",
7744 SVfARG(PL_subname), d);
7745 SvCUR_set(PL_lex_stuff, tmp);
7750 CURMAD('q', PL_thisopen);
7751 CURMAD('_', tmpwhite);
7752 CURMAD('=', PL_thisstuff);
7753 CURMAD('Q', PL_thisclose);
7754 NEXTVAL_NEXTTOKE.opval =
7755 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7756 PL_lex_stuff = NULL;
7759 s = SKIPSPACE2(s,tmpwhite);
7767 if (*s == ':' && s[1] != ':')
7768 PL_expect = attrful;
7769 else if (*s != '{' && key == KEY_sub) {
7771 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
7772 else if (*s != ';' && *s != '}')
7773 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
7780 curmad('^', newSVpvs(""));
7781 CURMAD('_', tmpwhite);
7785 PL_thistoken = subtoken;
7788 NEXTVAL_NEXTTOKE.opval =
7789 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
7790 PL_lex_stuff = NULL;
7796 sv_setpvs(PL_subname, "__ANON__");
7798 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7802 (void) force_word(PL_oldbufptr + tboffset, WORD,
7811 LOP(OP_SYSTEM,XREF);
7814 LOP(OP_SYMLINK,XTERM);
7817 LOP(OP_SYSCALL,XTERM);
7820 LOP(OP_SYSOPEN,XTERM);
7823 LOP(OP_SYSSEEK,XTERM);
7826 LOP(OP_SYSREAD,XTERM);
7829 LOP(OP_SYSWRITE,XTERM);
7833 TERM(sublex_start());
7854 LOP(OP_TRUNCATE,XTERM);
7866 pl_yylval.ival = CopLINE(PL_curcop);
7870 pl_yylval.ival = CopLINE(PL_curcop);
7874 LOP(OP_UNLINK,XTERM);
7880 LOP(OP_UNPACK,XTERM);
7883 LOP(OP_UTIME,XTERM);
7889 LOP(OP_UNSHIFT,XTERM);
7892 s = tokenize_use(1, s);
7902 pl_yylval.ival = CopLINE(PL_curcop);
7906 pl_yylval.ival = CopLINE(PL_curcop);
7910 PL_hints |= HINT_BLOCK_SCOPE;
7917 LOP(OP_WAITPID,XTERM);
7926 ctl_l[0] = toCTRL('L');
7928 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
7931 /* Make sure $^L is defined */
7932 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
7937 if (PL_expect == XOPERATOR)
7943 pl_yylval.ival = OP_XOR;
7948 TERM(sublex_start());
7953 #pragma segment Main
7957 S_pending_ident(pTHX)
7962 /* pit holds the identifier we read and pending_ident is reset */
7963 char pit = PL_pending_ident;
7964 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7965 /* All routes through this function want to know if there is a colon. */
7966 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
7967 PL_pending_ident = 0;
7969 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
7970 DEBUG_T({ PerlIO_printf(Perl_debug_log,
7971 "### Pending identifier '%s'\n", PL_tokenbuf); });
7973 /* if we're in a my(), we can't allow dynamics here.
7974 $foo'bar has already been turned into $foo::bar, so
7975 just check for colons.
7977 if it's a legal name, the OP is a PADANY.
7980 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
7982 yyerror(Perl_form(aTHX_ "No package name allowed for "
7983 "variable %s in \"our\"",
7985 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7989 yyerror(Perl_form(aTHX_ PL_no_myglob,
7990 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
7992 pl_yylval.opval = newOP(OP_PADANY, 0);
7993 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
7999 build the ops for accesses to a my() variable.
8001 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8002 then used in a comparison. This catches most, but not
8003 all cases. For instance, it catches
8004 sort { my($a); $a <=> $b }
8006 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8007 (although why you'd do that is anyone's guess).
8012 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8013 if (tmp != NOT_IN_PAD) {
8014 /* might be an "our" variable" */
8015 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8016 /* build ops for a bareword */
8017 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8018 HEK * const stashname = HvNAME_HEK(stash);
8019 SV * const sym = newSVhek(stashname);
8020 sv_catpvs(sym, "::");
8021 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8022 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8023 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8026 ? (GV_ADDMULTI | GV_ADDINEVAL)
8029 ((PL_tokenbuf[0] == '$') ? SVt_PV
8030 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8035 /* if it's a sort block and they're naming $a or $b */
8036 if (PL_last_lop_op == OP_SORT &&
8037 PL_tokenbuf[0] == '$' &&
8038 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8041 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8042 d < PL_bufend && *d != '\n';
8045 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8046 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8052 pl_yylval.opval = newOP(OP_PADANY, 0);
8053 pl_yylval.opval->op_targ = tmp;
8059 Whine if they've said @foo in a doublequoted string,
8060 and @foo isn't a variable we can find in the symbol
8063 if (ckWARN(WARN_AMBIGUOUS) &&
8064 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8065 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8067 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8068 /* DO NOT warn for @- and @+ */
8069 && !( PL_tokenbuf[2] == '\0' &&
8070 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8073 /* Downgraded from fatal to warning 20000522 mjd */
8074 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8075 "Possible unintended interpolation of %s in string",
8080 /* build ops for a bareword */
8081 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8083 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8084 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8085 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8086 ((PL_tokenbuf[0] == '$') ? SVt_PV
8087 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8093 * The following code was generated by perl_keyword.pl.
8097 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
8101 PERL_ARGS_ASSERT_KEYWORD;
8105 case 1: /* 5 tokens of length 1 */
8137 case 2: /* 18 tokens of length 2 */
8283 case 3: /* 29 tokens of length 3 */
8287 if (name[1] == 'N' &&
8350 if (name[1] == 'i' &&
8382 if (name[1] == 'o' &&
8391 if (name[1] == 'e' &&
8400 if (name[1] == 'n' &&
8409 if (name[1] == 'o' &&
8418 if (name[1] == 'a' &&
8427 if (name[1] == 'o' &&
8489 if (name[1] == 'e' &&
8503 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8529 if (name[1] == 'i' &&
8538 if (name[1] == 's' &&
8547 if (name[1] == 'e' &&
8556 if (name[1] == 'o' &&
8568 case 4: /* 41 tokens of length 4 */
8572 if (name[1] == 'O' &&
8582 if (name[1] == 'N' &&
8592 if (name[1] == 'i' &&
8602 if (name[1] == 'h' &&
8612 if (name[1] == 'u' &&
8625 if (name[2] == 'c' &&
8634 if (name[2] == 's' &&
8643 if (name[2] == 'a' &&
8679 if (name[1] == 'o' &&
8692 if (name[2] == 't' &&
8701 if (name[2] == 'o' &&
8710 if (name[2] == 't' &&
8719 if (name[2] == 'e' &&
8732 if (name[1] == 'o' &&
8745 if (name[2] == 'y' &&
8754 if (name[2] == 'l' &&
8770 if (name[2] == 's' &&
8779 if (name[2] == 'n' &&
8788 if (name[2] == 'c' &&
8801 if (name[1] == 'e' &&
8811 if (name[1] == 'p' &&
8824 if (name[2] == 'c' &&
8833 if (name[2] == 'p' &&
8842 if (name[2] == 's' &&
8858 if (name[2] == 'n' &&
8928 if (name[2] == 'r' &&
8937 if (name[2] == 'r' &&
8946 if (name[2] == 'a' &&
8962 if (name[2] == 'l' &&
9024 if (name[2] == 'e' &&
9027 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
9040 case 5: /* 39 tokens of length 5 */
9044 if (name[1] == 'E' &&
9055 if (name[1] == 'H' &&
9069 if (name[2] == 'a' &&
9079 if (name[2] == 'a' &&
9096 if (name[2] == 'e' &&
9106 if (name[2] == 'e' &&
9110 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
9126 if (name[3] == 'i' &&
9135 if (name[3] == 'o' &&
9171 if (name[2] == 'o' &&
9181 if (name[2] == 'y' &&
9195 if (name[1] == 'l' &&
9209 if (name[2] == 'n' &&
9219 if (name[2] == 'o' &&
9233 if (name[1] == 'i' &&
9238 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
9247 if (name[2] == 'd' &&
9257 if (name[2] == 'c' &&
9274 if (name[2] == 'c' &&
9284 if (name[2] == 't' &&
9298 if (name[1] == 'k' &&
9309 if (name[1] == 'r' &&
9323 if (name[2] == 's' &&
9333 if (name[2] == 'd' &&
9350 if (name[2] == 'm' &&
9360 if (name[2] == 'i' &&
9370 if (name[2] == 'e' &&
9380 if (name[2] == 'l' &&
9390 if (name[2] == 'a' &&
9403 if (name[3] == 't' &&
9406 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9412 if (name[3] == 'd' &&
9429 if (name[1] == 'i' &&
9443 if (name[2] == 'a' &&
9456 if (name[3] == 'e' &&
9491 if (name[2] == 'i' &&
9508 if (name[2] == 'i' &&
9518 if (name[2] == 'i' &&
9535 case 6: /* 33 tokens of length 6 */
9539 if (name[1] == 'c' &&
9554 if (name[2] == 'l' &&
9565 if (name[2] == 'r' &&
9580 if (name[1] == 'e' &&
9595 if (name[2] == 's' &&
9600 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9606 if (name[2] == 'i' &&
9624 if (name[2] == 'l' &&
9635 if (name[2] == 'r' &&
9650 if (name[1] == 'm' &&
9665 if (name[2] == 'n' &&
9676 if (name[2] == 's' &&
9691 if (name[1] == 's' &&
9697 if (name[4] == 't' &&
9706 if (name[4] == 'e' &&
9715 if (name[4] == 'c' &&
9724 if (name[4] == 'n' &&
9740 if (name[1] == 'r' &&
9758 if (name[3] == 'a' &&
9768 if (name[3] == 'u' &&
9782 if (name[2] == 'n' &&
9800 if (name[2] == 'a' &&
9814 if (name[3] == 'e' &&
9827 if (name[4] == 't' &&
9836 if (name[4] == 'e' &&
9858 if (name[4] == 't' &&
9867 if (name[4] == 'e' &&
9883 if (name[2] == 'c' &&
9894 if (name[2] == 'l' &&
9905 if (name[2] == 'b' &&
9916 if (name[2] == 's' &&
9939 if (name[4] == 's' &&
9948 if (name[4] == 'n' &&
9961 if (name[3] == 'a' &&
9978 if (name[1] == 'a' &&
9993 case 7: /* 29 tokens of length 7 */
9997 if (name[1] == 'E' &&
10004 return KEY_DESTROY;
10010 if (name[1] == '_' &&
10017 return KEY___END__;
10023 if (name[1] == 'i' &&
10030 return -KEY_binmode;
10036 if (name[1] == 'o' &&
10043 return -KEY_connect;
10052 if (name[2] == 'm' &&
10058 return -KEY_dbmopen;
10064 if (name[2] == 'f')
10069 if (name[4] == 'u' &&
10073 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
10079 if (name[4] == 'n' &&
10083 return KEY_defined;
10100 if (name[1] == 'o' &&
10107 return KEY_foreach;
10113 if (name[1] == 'e' &&
10120 if (name[5] == 'r' &&
10123 return -KEY_getpgrp;
10129 if (name[5] == 'i' &&
10132 return -KEY_getppid;
10145 if (name[1] == 'c' &&
10152 return -KEY_lcfirst;
10158 if (name[1] == 'p' &&
10165 return -KEY_opendir;
10171 if (name[1] == 'a' &&
10178 return KEY_package;
10184 if (name[1] == 'e')
10189 if (name[3] == 'd' &&
10194 return -KEY_readdir;
10200 if (name[3] == 'u' &&
10205 return KEY_require;
10211 if (name[3] == 'e' &&
10216 return -KEY_reverse;
10235 if (name[3] == 'k' &&
10240 return -KEY_seekdir;
10246 if (name[3] == 'p' &&
10251 return -KEY_setpgrp;
10261 if (name[2] == 'm' &&
10267 return -KEY_shmread;
10273 if (name[2] == 'r' &&
10279 return -KEY_sprintf;
10288 if (name[3] == 'l' &&
10293 return -KEY_symlink;
10302 if (name[4] == 'a' &&
10306 return -KEY_syscall;
10312 if (name[4] == 'p' &&
10316 return -KEY_sysopen;
10322 if (name[4] == 'e' &&
10326 return -KEY_sysread;
10332 if (name[4] == 'e' &&
10336 return -KEY_sysseek;
10354 if (name[1] == 'e' &&
10361 return -KEY_telldir;
10370 if (name[2] == 'f' &&
10376 return -KEY_ucfirst;
10382 if (name[2] == 's' &&
10388 return -KEY_unshift;
10398 if (name[1] == 'a' &&
10405 return -KEY_waitpid;
10414 case 8: /* 26 tokens of length 8 */
10418 if (name[1] == 'U' &&
10426 return KEY_AUTOLOAD;
10432 if (name[1] == '_')
10437 if (name[3] == 'A' &&
10443 return KEY___DATA__;
10449 if (name[3] == 'I' &&
10455 return -KEY___FILE__;
10461 if (name[3] == 'I' &&
10467 return -KEY___LINE__;
10483 if (name[2] == 'o' &&
10490 return -KEY_closedir;
10496 if (name[2] == 'n' &&
10503 return -KEY_continue;
10513 if (name[1] == 'b' &&
10521 return -KEY_dbmclose;
10527 if (name[1] == 'n' &&
10533 if (name[4] == 'r' &&
10538 return -KEY_endgrent;
10544 if (name[4] == 'w' &&
10549 return -KEY_endpwent;
10562 if (name[1] == 'o' &&
10570 return -KEY_formline;
10576 if (name[1] == 'e' &&
10582 if (name[4] == 'r')
10587 if (name[6] == 'n' &&
10590 return -KEY_getgrent;
10596 if (name[6] == 'i' &&
10599 return -KEY_getgrgid;
10605 if (name[6] == 'a' &&
10608 return -KEY_getgrnam;
10621 if (name[4] == 'o' &&
10626 return -KEY_getlogin;
10632 if (name[4] == 'w')
10637 if (name[6] == 'n' &&
10640 return -KEY_getpwent;
10646 if (name[6] == 'a' &&
10649 return -KEY_getpwnam;
10655 if (name[6] == 'i' &&
10658 return -KEY_getpwuid;
10678 if (name[1] == 'e' &&
10685 if (name[5] == 'i' &&
10692 return -KEY_readline;
10697 return -KEY_readlink;
10708 if (name[5] == 'i' &&
10712 return -KEY_readpipe;
10728 if (name[2] == 't')
10733 if (name[4] == 'r' &&
10738 return -KEY_setgrent;
10744 if (name[4] == 'w' &&
10749 return -KEY_setpwent;
10765 if (name[3] == 'w' &&
10771 return -KEY_shmwrite;
10777 if (name[3] == 't' &&
10783 return -KEY_shutdown;
10793 if (name[2] == 's' &&
10800 return -KEY_syswrite;
10810 if (name[1] == 'r' &&
10818 return -KEY_truncate;
10827 case 9: /* 9 tokens of length 9 */
10831 if (name[1] == 'N' &&
10840 return KEY_UNITCHECK;
10846 if (name[1] == 'n' &&
10855 return -KEY_endnetent;
10861 if (name[1] == 'e' &&
10870 return -KEY_getnetent;
10876 if (name[1] == 'o' &&
10885 return -KEY_localtime;
10891 if (name[1] == 'r' &&
10900 return KEY_prototype;
10906 if (name[1] == 'u' &&
10915 return -KEY_quotemeta;
10921 if (name[1] == 'e' &&
10930 return -KEY_rewinddir;
10936 if (name[1] == 'e' &&
10945 return -KEY_setnetent;
10951 if (name[1] == 'a' &&
10960 return -KEY_wantarray;
10969 case 10: /* 9 tokens of length 10 */
10973 if (name[1] == 'n' &&
10979 if (name[4] == 'o' &&
10986 return -KEY_endhostent;
10992 if (name[4] == 'e' &&
10999 return -KEY_endservent;
11012 if (name[1] == 'e' &&
11018 if (name[4] == 'o' &&
11025 return -KEY_gethostent;
11034 if (name[5] == 'r' &&
11040 return -KEY_getservent;
11046 if (name[5] == 'c' &&
11052 return -KEY_getsockopt;
11072 if (name[2] == 't')
11077 if (name[4] == 'o' &&
11084 return -KEY_sethostent;
11093 if (name[5] == 'r' &&
11099 return -KEY_setservent;
11105 if (name[5] == 'c' &&
11111 return -KEY_setsockopt;
11128 if (name[2] == 'c' &&
11137 return -KEY_socketpair;
11150 case 11: /* 8 tokens of length 11 */
11154 if (name[1] == '_' &&
11164 { /* __PACKAGE__ */
11165 return -KEY___PACKAGE__;
11171 if (name[1] == 'n' &&
11181 { /* endprotoent */
11182 return -KEY_endprotoent;
11188 if (name[1] == 'e' &&
11197 if (name[5] == 'e' &&
11203 { /* getpeername */
11204 return -KEY_getpeername;
11213 if (name[6] == 'o' &&
11218 { /* getpriority */
11219 return -KEY_getpriority;
11225 if (name[6] == 't' &&
11230 { /* getprotoent */
11231 return -KEY_getprotoent;
11245 if (name[4] == 'o' &&
11252 { /* getsockname */
11253 return -KEY_getsockname;
11266 if (name[1] == 'e' &&
11274 if (name[6] == 'o' &&
11279 { /* setpriority */
11280 return -KEY_setpriority;
11286 if (name[6] == 't' &&
11291 { /* setprotoent */
11292 return -KEY_setprotoent;
11308 case 12: /* 2 tokens of length 12 */
11309 if (name[0] == 'g' &&
11321 if (name[9] == 'd' &&
11324 { /* getnetbyaddr */
11325 return -KEY_getnetbyaddr;
11331 if (name[9] == 'a' &&
11334 { /* getnetbyname */
11335 return -KEY_getnetbyname;
11347 case 13: /* 4 tokens of length 13 */
11348 if (name[0] == 'g' &&
11355 if (name[4] == 'o' &&
11364 if (name[10] == 'd' &&
11367 { /* gethostbyaddr */
11368 return -KEY_gethostbyaddr;
11374 if (name[10] == 'a' &&
11377 { /* gethostbyname */
11378 return -KEY_gethostbyname;
11391 if (name[4] == 'e' &&
11400 if (name[10] == 'a' &&
11403 { /* getservbyname */
11404 return -KEY_getservbyname;
11410 if (name[10] == 'o' &&
11413 { /* getservbyport */
11414 return -KEY_getservbyport;
11433 case 14: /* 1 tokens of length 14 */
11434 if (name[0] == 'g' &&
11448 { /* getprotobyname */
11449 return -KEY_getprotobyname;
11454 case 16: /* 1 tokens of length 16 */
11455 if (name[0] == 'g' &&
11471 { /* getprotobynumber */
11472 return -KEY_getprotobynumber;
11486 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11490 PERL_ARGS_ASSERT_CHECKCOMMA;
11492 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
11493 if (ckWARN(WARN_SYNTAX)) {
11496 for (w = s+2; *w && level; w++) {
11499 else if (*w == ')')
11502 while (isSPACE(*w))
11504 /* the list of chars below is for end of statements or
11505 * block / parens, boolean operators (&&, ||, //) and branch
11506 * constructs (or, and, if, until, unless, while, err, for).
11507 * Not a very solid hack... */
11508 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11509 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11510 "%s (...) interpreted as function",name);
11513 while (s < PL_bufend && isSPACE(*s))
11517 while (s < PL_bufend && isSPACE(*s))
11519 if (isIDFIRST_lazy_if(s,UTF)) {
11520 const char * const w = s++;
11521 while (isALNUM_lazy_if(s,UTF))
11523 while (s < PL_bufend && isSPACE(*s))
11527 if (keyword(w, s - w, 0))
11530 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11531 if (gv && GvCVu(gv))
11533 Perl_croak(aTHX_ "No comma allowed after %s", what);
11538 /* Either returns sv, or mortalizes sv and returns a new SV*.
11539 Best used as sv=new_constant(..., sv, ...).
11540 If s, pv are NULL, calls subroutine with one argument,
11541 and type is used with error messages only. */
11544 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11545 SV *sv, SV *pv, const char *type, STRLEN typelen)
11548 HV * const table = GvHV(PL_hintgv); /* ^H */
11552 const char *why1 = "", *why2 = "", *why3 = "";
11554 PERL_ARGS_ASSERT_NEW_CONSTANT;
11556 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11559 why2 = (const char *)
11560 (strEQ(key,"charnames")
11561 ? "(possibly a missing \"use charnames ...\")"
11563 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11564 (type ? type: "undef"), why2);
11566 /* This is convoluted and evil ("goto considered harmful")
11567 * but I do not understand the intricacies of all the different
11568 * failure modes of %^H in here. The goal here is to make
11569 * the most probable error message user-friendly. --jhi */
11574 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11575 (type ? type: "undef"), why1, why2, why3);
11577 yyerror(SvPVX_const(msg));
11582 /* charnames doesn't work well if there have been errors found */
11583 if (PL_error_count > 0 && strEQ(key,"charnames"))
11584 return &PL_sv_undef;
11586 cvp = hv_fetch(table, key, keylen, FALSE);
11587 if (!cvp || !SvOK(*cvp)) {
11590 why3 = "} is not defined";
11593 sv_2mortal(sv); /* Parent created it permanently */
11596 pv = newSVpvn_flags(s, len, SVs_TEMP);
11598 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11600 typesv = &PL_sv_undef;
11602 PUSHSTACKi(PERLSI_OVERLOAD);
11614 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11618 /* Check the eval first */
11619 if (!PL_in_eval && SvTRUE(ERRSV)) {
11620 sv_catpvs(ERRSV, "Propagated");
11621 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11623 res = SvREFCNT_inc_simple(sv);
11627 SvREFCNT_inc_simple_void(res);
11636 why1 = "Call to &{$^H{";
11638 why3 = "}} did not return a defined value";
11646 /* Returns a NUL terminated string, with the length of the string written to
11650 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11653 register char *d = dest;
11654 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
11656 PERL_ARGS_ASSERT_SCAN_WORD;
11660 Perl_croak(aTHX_ ident_too_long);
11661 if (isALNUM(*s)) /* UTF handled below */
11663 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11668 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11672 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11673 char *t = s + UTF8SKIP(s);
11675 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11679 Perl_croak(aTHX_ ident_too_long);
11680 Copy(s, d, len, char);
11693 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11696 char *bracket = NULL;
11698 register char *d = dest;
11699 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
11701 PERL_ARGS_ASSERT_SCAN_IDENT;
11706 while (isDIGIT(*s)) {
11708 Perl_croak(aTHX_ ident_too_long);
11715 Perl_croak(aTHX_ ident_too_long);
11716 if (isALNUM(*s)) /* UTF handled below */
11718 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
11723 else if (*s == ':' && s[1] == ':') {
11727 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11728 char *t = s + UTF8SKIP(s);
11729 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11731 if (d + (t - s) > e)
11732 Perl_croak(aTHX_ ident_too_long);
11733 Copy(s, d, t - s, char);
11744 if (PL_lex_state != LEX_NORMAL)
11745 PL_lex_state = LEX_INTERPENDMAYBE;
11748 if (*s == '$' && s[1] &&
11749 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
11762 if (*d == '^' && *s && isCONTROLVAR(*s)) {
11767 if (isSPACE(s[-1])) {
11769 const char ch = *s++;
11770 if (!SPACE_OR_TAB(ch)) {
11776 if (isIDFIRST_lazy_if(d,UTF)) {
11780 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11781 end += UTF8SKIP(end);
11782 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11783 end += UTF8SKIP(end);
11785 Copy(s, d, end - s, char);
11790 while ((isALNUM(*s) || *s == ':') && d < e)
11793 Perl_croak(aTHX_ ident_too_long);
11796 while (s < send && SPACE_OR_TAB(*s))
11798 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
11799 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
11800 const char * const brack =
11802 ((*s == '[') ? "[...]" : "{...}");
11803 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11804 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
11805 funny, dest, brack, funny, dest, brack);
11808 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
11812 /* Handle extended ${^Foo} variables
11813 * 1999-02-27 mjd-perl-patch@plover.com */
11814 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11818 while (isALNUM(*s) && d < e) {
11822 Perl_croak(aTHX_ ident_too_long);
11827 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
11828 PL_lex_state = LEX_INTERPEND;
11831 if (PL_lex_state == LEX_NORMAL) {
11832 if (ckWARN(WARN_AMBIGUOUS) &&
11833 (keyword(dest, d - dest, 0)
11834 || get_cvn_flags(dest, d - dest, 0)))
11838 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
11839 "Ambiguous use of %c{%s} resolved to %c%s",
11840 funny, dest, funny, dest);
11845 s = bracket; /* let the parser handle it */
11849 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11850 PL_lex_state = LEX_INTERPEND;
11855 S_pmflag(U32 pmfl, const char ch) {
11857 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11858 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11859 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11860 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11861 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11862 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
11868 S_scan_pat(pTHX_ char *start, I32 type)
11872 char *s = scan_str(start,!!PL_madskills,FALSE);
11873 const char * const valid_flags =
11874 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
11879 PERL_ARGS_ASSERT_SCAN_PAT;
11882 const char * const delimiter = skipspace(start);
11886 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11887 : "Search pattern not terminated" ));
11890 pm = (PMOP*)newPMOP(type, 0);
11891 if (PL_multi_open == '?') {
11892 /* This is the only point in the code that sets PMf_ONCE: */
11893 pm->op_pmflags |= PMf_ONCE;
11895 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11896 allows us to restrict the list needed by reset to just the ??
11898 assert(type != OP_TRANS);
11900 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
11903 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
11906 elements = mg->mg_len / sizeof(PMOP**);
11907 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11908 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11909 mg->mg_len = elements * sizeof(PMOP**);
11910 PmopSTASH_set(pm,PL_curstash);
11916 while (*s && strchr(valid_flags, *s))
11917 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
11920 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11921 "Having no space between pattern and following word is deprecated");
11925 if (PL_madskills && modstart != s) {
11926 SV* tmptoken = newSVpvn(modstart, s - modstart);
11927 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11930 /* issue a warning if /c is specified,but /g is not */
11931 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
11933 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11934 "Use of /c modifier is meaningless without /g" );
11937 PL_lex_op = (OP*)pm;
11938 pl_yylval.ival = OP_MATCH;
11943 S_scan_subst(pTHX_ char *start)
11954 PERL_ARGS_ASSERT_SCAN_SUBST;
11956 pl_yylval.ival = OP_NULL;
11958 s = scan_str(start,!!PL_madskills,FALSE);
11961 Perl_croak(aTHX_ "Substitution pattern not terminated");
11963 if (s[-1] == PL_multi_open)
11966 if (PL_madskills) {
11967 CURMAD('q', PL_thisopen);
11968 CURMAD('_', PL_thiswhite);
11969 CURMAD('E', PL_thisstuff);
11970 CURMAD('Q', PL_thisclose);
11971 PL_realtokenstart = s - SvPVX(PL_linestr);
11975 first_start = PL_multi_start;
11976 s = scan_str(s,!!PL_madskills,FALSE);
11978 if (PL_lex_stuff) {
11979 SvREFCNT_dec(PL_lex_stuff);
11980 PL_lex_stuff = NULL;
11982 Perl_croak(aTHX_ "Substitution replacement not terminated");
11984 PL_multi_start = first_start; /* so whole substitution is taken together */
11986 pm = (PMOP*)newPMOP(OP_SUBST, 0);
11989 if (PL_madskills) {
11990 CURMAD('z', PL_thisopen);
11991 CURMAD('R', PL_thisstuff);
11992 CURMAD('Z', PL_thisclose);
11998 if (*s == EXEC_PAT_MOD) {
12002 else if (strchr(S_PAT_MODS, *s))
12003 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
12006 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12007 "Having no space between pattern and following word is deprecated");
12015 if (PL_madskills) {
12017 curmad('m', newSVpvn(modstart, s - modstart));
12018 append_madprops(PL_thismad, (OP*)pm, 0);
12022 if ((pm->op_pmflags & PMf_CONTINUE)) {
12023 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
12027 SV * const repl = newSVpvs("");
12029 PL_sublex_info.super_bufptr = s;
12030 PL_sublex_info.super_bufend = PL_bufend;
12032 pm->op_pmflags |= PMf_EVAL;
12035 sv_catpvs(repl, "eval ");
12037 sv_catpvs(repl, "do ");
12039 sv_catpvs(repl, "{");
12040 sv_catsv(repl, PL_lex_repl);
12041 if (strchr(SvPVX(PL_lex_repl), '#'))
12042 sv_catpvs(repl, "\n");
12043 sv_catpvs(repl, "}");
12045 SvREFCNT_dec(PL_lex_repl);
12046 PL_lex_repl = repl;
12049 PL_lex_op = (OP*)pm;
12050 pl_yylval.ival = OP_SUBST;
12055 S_scan_trans(pTHX_ char *start)
12068 PERL_ARGS_ASSERT_SCAN_TRANS;
12070 pl_yylval.ival = OP_NULL;
12072 s = scan_str(start,!!PL_madskills,FALSE);
12074 Perl_croak(aTHX_ "Transliteration pattern not terminated");
12076 if (s[-1] == PL_multi_open)
12079 if (PL_madskills) {
12080 CURMAD('q', PL_thisopen);
12081 CURMAD('_', PL_thiswhite);
12082 CURMAD('E', PL_thisstuff);
12083 CURMAD('Q', PL_thisclose);
12084 PL_realtokenstart = s - SvPVX(PL_linestr);
12088 s = scan_str(s,!!PL_madskills,FALSE);
12090 if (PL_lex_stuff) {
12091 SvREFCNT_dec(PL_lex_stuff);
12092 PL_lex_stuff = NULL;
12094 Perl_croak(aTHX_ "Transliteration replacement not terminated");
12096 if (PL_madskills) {
12097 CURMAD('z', PL_thisopen);
12098 CURMAD('R', PL_thisstuff);
12099 CURMAD('Z', PL_thisclose);
12102 complement = del = squash = 0;
12109 complement = OPpTRANS_COMPLEMENT;
12112 del = OPpTRANS_DELETE;
12115 squash = OPpTRANS_SQUASH;
12124 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
12125 o = newPVOP(OP_TRANS, 0, (char*)tbl);
12126 o->op_private &= ~OPpTRANS_ALL;
12127 o->op_private |= del|squash|complement|
12128 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12129 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
12132 pl_yylval.ival = OP_TRANS;
12135 if (PL_madskills) {
12137 curmad('m', newSVpvn(modstart, s - modstart));
12138 append_madprops(PL_thismad, o, 0);
12147 S_scan_heredoc(pTHX_ register char *s)
12151 I32 op_type = OP_SCALAR;
12155 const char *found_newline;
12159 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
12161 I32 stuffstart = s - SvPVX(PL_linestr);
12164 PL_realtokenstart = -1;
12167 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12171 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
12175 while (SPACE_OR_TAB(*peek))
12177 if (*peek == '`' || *peek == '\'' || *peek =='"') {
12180 s = delimcpy(d, e, s, PL_bufend, term, &len);
12190 if (!isALNUM_lazy_if(s,UTF))
12191 deprecate("bare << to mean <<\"\"");
12192 for (; isALNUM_lazy_if(s,UTF); s++) {
12197 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
12198 Perl_croak(aTHX_ "Delimiter for here document is too long");
12201 len = d - PL_tokenbuf;
12204 if (PL_madskills) {
12205 tstart = PL_tokenbuf + !outer;
12206 PL_thisclose = newSVpvn(tstart, len - !outer);
12207 tstart = SvPVX(PL_linestr) + stuffstart;
12208 PL_thisopen = newSVpvn(tstart, s - tstart);
12209 stuffstart = s - SvPVX(PL_linestr);
12212 #ifndef PERL_STRICT_CR
12213 d = strchr(s, '\r');
12215 char * const olds = s;
12217 while (s < PL_bufend) {
12223 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12232 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12239 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
12240 herewas = newSVpvn(s,PL_bufend-s);
12244 herewas = newSVpvn(s-1,found_newline-s+1);
12247 herewas = newSVpvn(s,found_newline-s);
12251 if (PL_madskills) {
12252 tstart = SvPVX(PL_linestr) + stuffstart;
12254 sv_catpvn(PL_thisstuff, tstart, s - tstart);
12256 PL_thisstuff = newSVpvn(tstart, s - tstart);
12259 s += SvCUR(herewas);
12262 stuffstart = s - SvPVX(PL_linestr);
12268 tmpstr = newSV_type(SVt_PVIV);
12269 SvGROW(tmpstr, 80);
12270 if (term == '\'') {
12271 op_type = OP_CONST;
12272 SvIV_set(tmpstr, -1);
12274 else if (term == '`') {
12275 op_type = OP_BACKTICK;
12276 SvIV_set(tmpstr, '\\');
12280 PL_multi_start = CopLINE(PL_curcop);
12281 PL_multi_open = PL_multi_close = '<';
12282 term = *PL_tokenbuf;
12283 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
12284 char * const bufptr = PL_sublex_info.super_bufptr;
12285 char * const bufend = PL_sublex_info.super_bufend;
12286 char * const olds = s - SvCUR(herewas);
12287 s = strchr(bufptr, '\n');
12291 while (s < bufend &&
12292 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12294 CopLINE_inc(PL_curcop);
12297 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12298 missingterm(PL_tokenbuf);
12300 sv_setpvn(herewas,bufptr,d-bufptr+1);
12301 sv_setpvn(tmpstr,d+1,s-d);
12303 sv_catpvn(herewas,s,bufend-s);
12304 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
12311 while (s < PL_bufend &&
12312 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12314 CopLINE_inc(PL_curcop);
12316 if (s >= PL_bufend) {
12317 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12318 missingterm(PL_tokenbuf);
12320 sv_setpvn(tmpstr,d+1,s-d);
12322 if (PL_madskills) {
12324 sv_catpvn(PL_thisstuff, d + 1, s - d);
12326 PL_thisstuff = newSVpvn(d + 1, s - d);
12327 stuffstart = s - SvPVX(PL_linestr);
12331 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12333 sv_catpvn(herewas,s,PL_bufend-s);
12334 sv_setsv(PL_linestr,herewas);
12335 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12336 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12337 PL_last_lop = PL_last_uni = NULL;
12340 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
12341 while (s >= PL_bufend) { /* multiple line string? */
12343 if (PL_madskills) {
12344 tstart = SvPVX(PL_linestr) + stuffstart;
12346 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12348 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12352 CopLINE_inc(PL_curcop);
12353 if (!outer || !lex_next_chunk(0)) {
12354 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12355 missingterm(PL_tokenbuf);
12357 CopLINE_dec(PL_curcop);
12360 stuffstart = s - SvPVX(PL_linestr);
12362 CopLINE_inc(PL_curcop);
12363 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12364 PL_last_lop = PL_last_uni = NULL;
12365 #ifndef PERL_STRICT_CR
12366 if (PL_bufend - PL_linestart >= 2) {
12367 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12368 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12370 PL_bufend[-2] = '\n';
12372 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12374 else if (PL_bufend[-1] == '\r')
12375 PL_bufend[-1] = '\n';
12377 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12378 PL_bufend[-1] = '\n';
12380 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12381 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12382 *(SvPVX(PL_linestr) + off ) = ' ';
12383 sv_catsv(PL_linestr,herewas);
12384 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12385 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12389 sv_catsv(tmpstr,PL_linestr);
12394 PL_multi_end = CopLINE(PL_curcop);
12395 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12396 SvPV_shrink_to_cur(tmpstr);
12398 SvREFCNT_dec(herewas);
12400 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12402 else if (PL_encoding)
12403 sv_recode_to_utf8(tmpstr, PL_encoding);
12405 PL_lex_stuff = tmpstr;
12406 pl_yylval.ival = op_type;
12410 /* scan_inputsymbol
12411 takes: current position in input buffer
12412 returns: new position in input buffer
12413 side-effects: pl_yylval and lex_op are set.
12418 <FH> read from filehandle
12419 <pkg::FH> read from package qualified filehandle
12420 <pkg'FH> read from package qualified filehandle
12421 <$fh> read from filehandle in $fh
12422 <*.h> filename glob
12427 S_scan_inputsymbol(pTHX_ char *start)
12430 register char *s = start; /* current position in buffer */
12433 char *d = PL_tokenbuf; /* start of temp holding space */
12434 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12436 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12438 end = strchr(s, '\n');
12441 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
12443 /* die if we didn't have space for the contents of the <>,
12444 or if it didn't end, or if we see a newline
12447 if (len >= (I32)sizeof PL_tokenbuf)
12448 Perl_croak(aTHX_ "Excessively long <> operator");
12450 Perl_croak(aTHX_ "Unterminated <> operator");
12455 Remember, only scalar variables are interpreted as filehandles by
12456 this code. Anything more complex (e.g., <$fh{$num}>) will be
12457 treated as a glob() call.
12458 This code makes use of the fact that except for the $ at the front,
12459 a scalar variable and a filehandle look the same.
12461 if (*d == '$' && d[1]) d++;
12463 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12464 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12467 /* If we've tried to read what we allow filehandles to look like, and
12468 there's still text left, then it must be a glob() and not a getline.
12469 Use scan_str to pull out the stuff between the <> and treat it
12470 as nothing more than a string.
12473 if (d - PL_tokenbuf != len) {
12474 pl_yylval.ival = OP_GLOB;
12475 s = scan_str(start,!!PL_madskills,FALSE);
12477 Perl_croak(aTHX_ "Glob not terminated");
12481 bool readline_overriden = FALSE;
12484 /* we're in a filehandle read situation */
12487 /* turn <> into <ARGV> */
12489 Copy("ARGV",d,5,char);
12491 /* Check whether readline() is overriden */
12492 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12494 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12496 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12497 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12498 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12499 readline_overriden = TRUE;
12501 /* if <$fh>, create the ops to turn the variable into a
12505 /* try to find it in the pad for this block, otherwise find
12506 add symbol table ops
12508 const PADOFFSET tmp = pad_findmy(d, len, 0);
12509 if (tmp != NOT_IN_PAD) {
12510 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12511 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12512 HEK * const stashname = HvNAME_HEK(stash);
12513 SV * const sym = sv_2mortal(newSVhek(stashname));
12514 sv_catpvs(sym, "::");
12515 sv_catpv(sym, d+1);
12520 OP * const o = newOP(OP_PADSV, 0);
12522 PL_lex_op = readline_overriden
12523 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12524 append_elem(OP_LIST, o,
12525 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12526 : (OP*)newUNOP(OP_READLINE, 0, o);
12535 ? (GV_ADDMULTI | GV_ADDINEVAL)
12538 PL_lex_op = readline_overriden
12539 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12540 append_elem(OP_LIST,
12541 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12542 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12543 : (OP*)newUNOP(OP_READLINE, 0,
12544 newUNOP(OP_RV2SV, 0,
12545 newGVOP(OP_GV, 0, gv)));
12547 if (!readline_overriden)
12548 PL_lex_op->op_flags |= OPf_SPECIAL;
12549 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12550 pl_yylval.ival = OP_NULL;
12553 /* If it's none of the above, it must be a literal filehandle
12554 (<Foo::BAR> or <FOO>) so build a simple readline OP */
12556 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12557 PL_lex_op = readline_overriden
12558 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12559 append_elem(OP_LIST,
12560 newGVOP(OP_GV, 0, gv),
12561 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12562 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12563 pl_yylval.ival = OP_NULL;
12572 takes: start position in buffer
12573 keep_quoted preserve \ on the embedded delimiter(s)
12574 keep_delims preserve the delimiters around the string
12575 returns: position to continue reading from buffer
12576 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12577 updates the read buffer.
12579 This subroutine pulls a string out of the input. It is called for:
12580 q single quotes q(literal text)
12581 ' single quotes 'literal text'
12582 qq double quotes qq(interpolate $here please)
12583 " double quotes "interpolate $here please"
12584 qx backticks qx(/bin/ls -l)
12585 ` backticks `/bin/ls -l`
12586 qw quote words @EXPORT_OK = qw( func() $spam )
12587 m// regexp match m/this/
12588 s/// regexp substitute s/this/that/
12589 tr/// string transliterate tr/this/that/
12590 y/// string transliterate y/this/that/
12591 ($*@) sub prototypes sub foo ($)
12592 (stuff) sub attr parameters sub foo : attr(stuff)
12593 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12595 In most of these cases (all but <>, patterns and transliterate)
12596 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12597 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12598 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12601 It skips whitespace before the string starts, and treats the first
12602 character as the delimiter. If the delimiter is one of ([{< then
12603 the corresponding "close" character )]}> is used as the closing
12604 delimiter. It allows quoting of delimiters, and if the string has
12605 balanced delimiters ([{<>}]) it allows nesting.
12607 On success, the SV with the resulting string is put into lex_stuff or,
12608 if that is already non-NULL, into lex_repl. The second case occurs only
12609 when parsing the RHS of the special constructs s/// and tr/// (y///).
12610 For convenience, the terminating delimiter character is stuffed into
12615 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12618 SV *sv; /* scalar value: string */
12619 const char *tmps; /* temp string, used for delimiter matching */
12620 register char *s = start; /* current position in the buffer */
12621 register char term; /* terminating character */
12622 register char *to; /* current position in the sv's data */
12623 I32 brackets = 1; /* bracket nesting level */
12624 bool has_utf8 = FALSE; /* is there any utf8 content? */
12625 I32 termcode; /* terminating char. code */
12626 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
12627 STRLEN termlen; /* length of terminating string */
12628 int last_off = 0; /* last position for nesting bracket */
12634 PERL_ARGS_ASSERT_SCAN_STR;
12636 /* skip space before the delimiter */
12642 if (PL_realtokenstart >= 0) {
12643 stuffstart = PL_realtokenstart;
12644 PL_realtokenstart = -1;
12647 stuffstart = start - SvPVX(PL_linestr);
12649 /* mark where we are, in case we need to report errors */
12652 /* after skipping whitespace, the next character is the terminator */
12655 termcode = termstr[0] = term;
12659 termcode = utf8_to_uvchr((U8*)s, &termlen);
12660 Copy(s, termstr, termlen, U8);
12661 if (!UTF8_IS_INVARIANT(term))
12665 /* mark where we are */
12666 PL_multi_start = CopLINE(PL_curcop);
12667 PL_multi_open = term;
12669 /* find corresponding closing delimiter */
12670 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12671 termcode = termstr[0] = term = tmps[5];
12673 PL_multi_close = term;
12675 /* create a new SV to hold the contents. 79 is the SV's initial length.
12676 What a random number. */
12677 sv = newSV_type(SVt_PVIV);
12679 SvIV_set(sv, termcode);
12680 (void)SvPOK_only(sv); /* validate pointer */
12682 /* move past delimiter and try to read a complete string */
12684 sv_catpvn(sv, s, termlen);
12687 tstart = SvPVX(PL_linestr) + stuffstart;
12688 if (!PL_thisopen && !keep_delims) {
12689 PL_thisopen = newSVpvn(tstart, s - tstart);
12690 stuffstart = s - SvPVX(PL_linestr);
12694 if (PL_encoding && !UTF) {
12698 int offset = s - SvPVX_const(PL_linestr);
12699 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
12700 &offset, (char*)termstr, termlen);
12701 const char * const ns = SvPVX_const(PL_linestr) + offset;
12702 char * const svlast = SvEND(sv) - 1;
12704 for (; s < ns; s++) {
12705 if (*s == '\n' && !PL_rsfp)
12706 CopLINE_inc(PL_curcop);
12709 goto read_more_line;
12711 /* handle quoted delimiters */
12712 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
12714 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
12716 if ((svlast-1 - t) % 2) {
12717 if (!keep_quoted) {
12718 *(svlast-1) = term;
12720 SvCUR_set(sv, SvCUR(sv) - 1);
12725 if (PL_multi_open == PL_multi_close) {
12731 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
12732 /* At here, all closes are "was quoted" one,
12733 so we don't check PL_multi_close. */
12735 if (!keep_quoted && *(t+1) == PL_multi_open)
12740 else if (*t == PL_multi_open)
12748 SvCUR_set(sv, w - SvPVX_const(sv));
12750 last_off = w - SvPVX(sv);
12751 if (--brackets <= 0)
12756 if (!keep_delims) {
12757 SvCUR_set(sv, SvCUR(sv) - 1);
12763 /* extend sv if need be */
12764 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
12765 /* set 'to' to the next character in the sv's string */
12766 to = SvPVX(sv)+SvCUR(sv);
12768 /* if open delimiter is the close delimiter read unbridle */
12769 if (PL_multi_open == PL_multi_close) {
12770 for (; s < PL_bufend; s++,to++) {
12771 /* embedded newlines increment the current line number */
12772 if (*s == '\n' && !PL_rsfp)
12773 CopLINE_inc(PL_curcop);
12774 /* handle quoted delimiters */
12775 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
12776 if (!keep_quoted && s[1] == term)
12778 /* any other quotes are simply copied straight through */
12782 /* terminate when run out of buffer (the for() condition), or
12783 have found the terminator */
12784 else if (*s == term) {
12787 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
12790 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12796 /* if the terminator isn't the same as the start character (e.g.,
12797 matched brackets), we have to allow more in the quoting, and
12798 be prepared for nested brackets.
12801 /* read until we run out of string, or we find the terminator */
12802 for (; s < PL_bufend; s++,to++) {
12803 /* embedded newlines increment the line count */
12804 if (*s == '\n' && !PL_rsfp)
12805 CopLINE_inc(PL_curcop);
12806 /* backslashes can escape the open or closing characters */
12807 if (*s == '\\' && s+1 < PL_bufend) {
12808 if (!keep_quoted &&
12809 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
12814 /* allow nested opens and closes */
12815 else if (*s == PL_multi_close && --brackets <= 0)
12817 else if (*s == PL_multi_open)
12819 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
12824 /* terminate the copied string and update the sv's end-of-string */
12826 SvCUR_set(sv, to - SvPVX_const(sv));
12829 * this next chunk reads more into the buffer if we're not done yet
12833 break; /* handle case where we are done yet :-) */
12835 #ifndef PERL_STRICT_CR
12836 if (to - SvPVX_const(sv) >= 2) {
12837 if ((to[-2] == '\r' && to[-1] == '\n') ||
12838 (to[-2] == '\n' && to[-1] == '\r'))
12842 SvCUR_set(sv, to - SvPVX_const(sv));
12844 else if (to[-1] == '\r')
12847 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
12852 /* if we're out of file, or a read fails, bail and reset the current
12853 line marker so we can report where the unterminated string began
12856 if (PL_madskills) {
12857 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12859 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12861 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12864 CopLINE_inc(PL_curcop);
12865 PL_bufptr = PL_bufend;
12866 if (!lex_next_chunk(0)) {
12868 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12877 /* at this point, we have successfully read the delimited string */
12879 if (!PL_encoding || UTF) {
12881 if (PL_madskills) {
12882 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12883 const int len = s - tstart;
12885 sv_catpvn(PL_thisstuff, tstart, len);
12887 PL_thisstuff = newSVpvn(tstart, len);
12888 if (!PL_thisclose && !keep_delims)
12889 PL_thisclose = newSVpvn(s,termlen);
12894 sv_catpvn(sv, s, termlen);
12899 if (PL_madskills) {
12900 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12901 const int len = s - tstart - termlen;
12903 sv_catpvn(PL_thisstuff, tstart, len);
12905 PL_thisstuff = newSVpvn(tstart, len);
12906 if (!PL_thisclose && !keep_delims)
12907 PL_thisclose = newSVpvn(s - termlen,termlen);
12911 if (has_utf8 || PL_encoding)
12914 PL_multi_end = CopLINE(PL_curcop);
12916 /* if we allocated too much space, give some back */
12917 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12918 SvLEN_set(sv, SvCUR(sv) + 1);
12919 SvPV_renew(sv, SvLEN(sv));
12922 /* decide whether this is the first or second quoted string we've read
12935 takes: pointer to position in buffer
12936 returns: pointer to new position in buffer
12937 side-effects: builds ops for the constant in pl_yylval.op
12939 Read a number in any of the formats that Perl accepts:
12941 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12942 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
12945 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
12947 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12950 If it reads a number without a decimal point or an exponent, it will
12951 try converting the number to an integer and see if it can do so
12952 without loss of precision.
12956 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12959 register const char *s = start; /* current position in buffer */
12960 register char *d; /* destination in temp buffer */
12961 register char *e; /* end of temp buffer */
12962 NV nv; /* number read, as a double */
12963 SV *sv = NULL; /* place to put the converted number */
12964 bool floatit; /* boolean: int or float? */
12965 const char *lastub = NULL; /* position of last underbar */
12966 static char const number_too_long[] = "Number too long";
12968 PERL_ARGS_ASSERT_SCAN_NUM;
12970 /* We use the first character to decide what type of number this is */
12974 Perl_croak(aTHX_ "panic: scan_num");
12976 /* if it starts with a 0, it could be an octal number, a decimal in
12977 0.13 disguise, or a hexadecimal number, or a binary number. */
12981 u holds the "number so far"
12982 shift the power of 2 of the base
12983 (hex == 4, octal == 3, binary == 1)
12984 overflowed was the number more than we can hold?
12986 Shift is used when we add a digit. It also serves as an "are
12987 we in octal/hex/binary?" indicator to disallow hex characters
12988 when in octal mode.
12993 bool overflowed = FALSE;
12994 bool just_zero = TRUE; /* just plain 0 or binary number? */
12995 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12996 static const char* const bases[5] =
12997 { "", "binary", "", "octal", "hexadecimal" };
12998 static const char* const Bases[5] =
12999 { "", "Binary", "", "Octal", "Hexadecimal" };
13000 static const char* const maxima[5] =
13002 "0b11111111111111111111111111111111",
13006 const char *base, *Base, *max;
13008 /* check for hex */
13009 if (s[1] == 'x' || s[1] == 'X') {
13013 } else if (s[1] == 'b' || s[1] == 'B') {
13018 /* check for a decimal in disguise */
13019 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
13021 /* so it must be octal */
13028 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13029 "Misplaced _ in number");
13033 base = bases[shift];
13034 Base = Bases[shift];
13035 max = maxima[shift];
13037 /* read the rest of the number */
13039 /* x is used in the overflow test,
13040 b is the digit we're adding on. */
13045 /* if we don't mention it, we're done */
13049 /* _ are ignored -- but warned about if consecutive */
13051 if (lastub && s == lastub + 1)
13052 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13053 "Misplaced _ in number");
13057 /* 8 and 9 are not octal */
13058 case '8': case '9':
13060 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
13064 case '2': case '3': case '4':
13065 case '5': case '6': case '7':
13067 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
13070 case '0': case '1':
13071 b = *s++ & 15; /* ASCII digit -> value of digit */
13075 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13076 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
13077 /* make sure they said 0x */
13080 b = (*s++ & 7) + 9;
13082 /* Prepare to put the digit we have onto the end
13083 of the number so far. We check for overflows.
13089 x = u << shift; /* make room for the digit */
13091 if ((x >> shift) != u
13092 && !(PL_hints & HINT_NEW_BINARY)) {
13095 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13096 "Integer overflow in %s number",
13099 u = x | b; /* add the digit to the end */
13102 n *= nvshift[shift];
13103 /* If an NV has not enough bits in its
13104 * mantissa to represent an UV this summing of
13105 * small low-order numbers is a waste of time
13106 * (because the NV cannot preserve the
13107 * low-order bits anyway): we could just
13108 * remember when did we overflow and in the
13109 * end just multiply n by the right
13117 /* if we get here, we had success: make a scalar value from
13122 /* final misplaced underbar check */
13123 if (s[-1] == '_') {
13124 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13128 if (n > 4294967295.0)
13129 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13130 "%s number > %s non-portable",
13136 if (u > 0xffffffff)
13137 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13138 "%s number > %s non-portable",
13143 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
13144 sv = new_constant(start, s - start, "integer",
13145 sv, NULL, NULL, 0);
13146 else if (PL_hints & HINT_NEW_BINARY)
13147 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
13152 handle decimal numbers.
13153 we're also sent here when we read a 0 as the first digit
13155 case '1': case '2': case '3': case '4': case '5':
13156 case '6': case '7': case '8': case '9': case '.':
13159 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
13162 /* read next group of digits and _ and copy into d */
13163 while (isDIGIT(*s) || *s == '_') {
13164 /* skip underscores, checking for misplaced ones
13168 if (lastub && s == lastub + 1)
13169 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13170 "Misplaced _ in number");
13174 /* check for end of fixed-length buffer */
13176 Perl_croak(aTHX_ number_too_long);
13177 /* if we're ok, copy the character */
13182 /* final misplaced underbar check */
13183 if (lastub && s == lastub + 1) {
13184 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13187 /* read a decimal portion if there is one. avoid
13188 3..5 being interpreted as the number 3. followed
13191 if (*s == '.' && s[1] != '.') {
13196 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13197 "Misplaced _ in number");
13201 /* copy, ignoring underbars, until we run out of digits.
13203 for (; isDIGIT(*s) || *s == '_'; s++) {
13204 /* fixed length buffer check */
13206 Perl_croak(aTHX_ number_too_long);
13208 if (lastub && s == lastub + 1)
13209 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13210 "Misplaced _ in number");
13216 /* fractional part ending in underbar? */
13217 if (s[-1] == '_') {
13218 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13219 "Misplaced _ in number");
13221 if (*s == '.' && isDIGIT(s[1])) {
13222 /* oops, it's really a v-string, but without the "v" */
13228 /* read exponent part, if present */
13229 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
13233 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
13234 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
13236 /* stray preinitial _ */
13238 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13239 "Misplaced _ in number");
13243 /* allow positive or negative exponent */
13244 if (*s == '+' || *s == '-')
13247 /* stray initial _ */
13249 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13250 "Misplaced _ in number");
13254 /* read digits of exponent */
13255 while (isDIGIT(*s) || *s == '_') {
13258 Perl_croak(aTHX_ number_too_long);
13262 if (((lastub && s == lastub + 1) ||
13263 (!isDIGIT(s[1]) && s[1] != '_')))
13264 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13265 "Misplaced _ in number");
13273 We try to do an integer conversion first if no characters
13274 indicating "float" have been found.
13279 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
13281 if (flags == IS_NUMBER_IN_UV) {
13283 sv = newSViv(uv); /* Prefer IVs over UVs. */
13286 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13287 if (uv <= (UV) IV_MIN)
13288 sv = newSViv(-(IV)uv);
13295 /* terminate the string */
13297 nv = Atof(PL_tokenbuf);
13302 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13303 const char *const key = floatit ? "float" : "integer";
13304 const STRLEN keylen = floatit ? 5 : 7;
13305 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13306 key, keylen, sv, NULL, NULL, 0);
13310 /* if it starts with a v, it could be a v-string */
13313 sv = newSV(5); /* preallocate storage space */
13314 s = scan_vstring(s, PL_bufend, sv);
13318 /* make the op for the constant and return */
13321 lvalp->opval = newSVOP(OP_CONST, 0, sv);
13323 lvalp->opval = NULL;
13329 S_scan_formline(pTHX_ register char *s)
13332 register char *eol;
13334 SV * const stuff = newSVpvs("");
13335 bool needargs = FALSE;
13336 bool eofmt = FALSE;
13338 char *tokenstart = s;
13339 SV* savewhite = NULL;
13341 if (PL_madskills) {
13342 savewhite = PL_thiswhite;
13347 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13349 while (!needargs) {
13352 #ifdef PERL_STRICT_CR
13353 while (SPACE_OR_TAB(*t))
13356 while (SPACE_OR_TAB(*t) || *t == '\r')
13359 if (*t == '\n' || t == PL_bufend) {
13364 if (PL_in_eval && !PL_rsfp) {
13365 eol = (char *) memchr(s,'\n',PL_bufend-s);
13370 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13372 for (t = s; t < eol; t++) {
13373 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13375 goto enough; /* ~~ must be first line in formline */
13377 if (*t == '@' || *t == '^')
13381 sv_catpvn(stuff, s, eol-s);
13382 #ifndef PERL_STRICT_CR
13383 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13384 char *end = SvPVX(stuff) + SvCUR(stuff);
13387 SvCUR_set(stuff, SvCUR(stuff) - 1);
13398 if (PL_madskills) {
13400 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13402 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13405 PL_bufptr = PL_bufend;
13406 CopLINE_inc(PL_curcop);
13407 got_some = lex_next_chunk(0);
13408 CopLINE_dec(PL_curcop);
13411 tokenstart = PL_bufptr;
13419 if (SvCUR(stuff)) {
13422 PL_lex_state = LEX_NORMAL;
13423 start_force(PL_curforce);
13424 NEXTVAL_NEXTTOKE.ival = 0;
13428 PL_lex_state = LEX_FORMLINE;
13430 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13432 else if (PL_encoding)
13433 sv_recode_to_utf8(stuff, PL_encoding);
13435 start_force(PL_curforce);
13436 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13438 start_force(PL_curforce);
13439 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13443 SvREFCNT_dec(stuff);
13445 PL_lex_formbrack = 0;
13449 if (PL_madskills) {
13451 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13453 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13454 PL_thiswhite = savewhite;
13461 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13464 const I32 oldsavestack_ix = PL_savestack_ix;
13465 CV* const outsidecv = PL_compcv;
13468 assert(SvTYPE(PL_compcv) == SVt_PVCV);
13470 SAVEI32(PL_subline);
13471 save_item(PL_subname);
13472 SAVESPTR(PL_compcv);
13474 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13475 CvFLAGS(PL_compcv) |= flags;
13477 PL_subline = CopLINE(PL_curcop);
13478 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13479 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13480 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13482 return oldsavestack_ix;
13486 #pragma segment Perl_yylex
13489 S_yywarn(pTHX_ const char *const s)
13493 PERL_ARGS_ASSERT_YYWARN;
13495 PL_in_eval |= EVAL_WARNONLY;
13497 PL_in_eval &= ~EVAL_WARNONLY;
13502 Perl_yyerror(pTHX_ const char *const s)
13505 const char *where = NULL;
13506 const char *context = NULL;
13509 int yychar = PL_parser->yychar;
13511 PERL_ARGS_ASSERT_YYERROR;
13513 if (!yychar || (yychar == ';' && !PL_rsfp))
13515 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13516 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13517 PL_oldbufptr != PL_bufptr) {
13520 The code below is removed for NetWare because it abends/crashes on NetWare
13521 when the script has error such as not having the closing quotes like:
13522 if ($var eq "value)
13523 Checking of white spaces is anyway done in NetWare code.
13526 while (isSPACE(*PL_oldoldbufptr))
13529 context = PL_oldoldbufptr;
13530 contlen = PL_bufptr - PL_oldoldbufptr;
13532 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13533 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13536 The code below is removed for NetWare because it abends/crashes on NetWare
13537 when the script has error such as not having the closing quotes like:
13538 if ($var eq "value)
13539 Checking of white spaces is anyway done in NetWare code.
13542 while (isSPACE(*PL_oldbufptr))
13545 context = PL_oldbufptr;
13546 contlen = PL_bufptr - PL_oldbufptr;
13548 else if (yychar > 255)
13549 where = "next token ???";
13550 else if (yychar == -2) { /* YYEMPTY */
13551 if (PL_lex_state == LEX_NORMAL ||
13552 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13553 where = "at end of line";
13554 else if (PL_lex_inpat)
13555 where = "within pattern";
13557 where = "within string";
13560 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13562 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13563 else if (isPRINT_LC(yychar)) {
13564 const char string = yychar;
13565 sv_catpvn(where_sv, &string, 1);
13568 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13569 where = SvPVX_const(where_sv);
13571 msg = sv_2mortal(newSVpv(s, 0));
13572 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13573 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13575 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13577 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13578 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13579 Perl_sv_catpvf(aTHX_ msg,
13580 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13581 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13584 if (PL_in_eval & EVAL_WARNONLY) {
13585 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13589 if (PL_error_count >= 10) {
13590 if (PL_in_eval && SvCUR(ERRSV))
13591 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13592 SVfARG(ERRSV), OutCopFILE(PL_curcop));
13594 Perl_croak(aTHX_ "%s has too many errors.\n",
13595 OutCopFILE(PL_curcop));
13598 PL_in_my_stash = NULL;
13602 #pragma segment Main
13606 S_swallow_bom(pTHX_ U8 *s)
13609 const STRLEN slen = SvCUR(PL_linestr);
13611 PERL_ARGS_ASSERT_SWALLOW_BOM;
13615 if (s[1] == 0xFE) {
13616 /* UTF-16 little-endian? (or UTF-32LE?) */
13617 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
13618 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13619 #ifndef PERL_NO_UTF16_FILTER
13620 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13622 if (PL_bufend > (char*)s) {
13623 s = add_utf16_textfilter(s, TRUE);
13626 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13631 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
13632 #ifndef PERL_NO_UTF16_FILTER
13633 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13635 if (PL_bufend > (char *)s) {
13636 s = add_utf16_textfilter(s, FALSE);
13639 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13644 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13645 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13646 s += 3; /* UTF-8 */
13652 if (s[2] == 0xFE && s[3] == 0xFF) {
13653 /* UTF-32 big-endian */
13654 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13657 else if (s[2] == 0 && s[3] != 0) {
13660 * are a good indicator of UTF-16BE. */
13661 #ifndef PERL_NO_UTF16_FILTER
13662 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13663 s = add_utf16_textfilter(s, FALSE);
13665 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13671 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13672 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13673 s += 4; /* UTF-8 */
13679 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13682 * are a good indicator of UTF-16LE. */
13683 #ifndef PERL_NO_UTF16_FILTER
13684 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13685 s = add_utf16_textfilter(s, TRUE);
13687 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13695 #ifndef PERL_NO_UTF16_FILTER
13697 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13700 SV *const filter = FILTER_DATA(idx);
13701 /* We re-use this each time round, throwing the contents away before we
13703 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13704 SV *const utf8_buffer = filter;
13705 IV status = IoPAGE(filter);
13706 const bool reverse = cBOOL(IoLINES(filter));
13709 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13711 /* As we're automatically added, at the lowest level, and hence only called
13712 from this file, we can be sure that we're not called in block mode. Hence
13713 don't bother writing code to deal with block mode. */
13715 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13718 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13720 DEBUG_P(PerlIO_printf(Perl_debug_log,
13721 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13722 FPTR2DPTR(void *, S_utf16_textfilter),
13723 reverse ? 'l' : 'b', idx, maxlen, status,
13724 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13731 /* First, look in our buffer of existing UTF-8 data: */
13732 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13736 } else if (status == 0) {
13738 IoPAGE(filter) = 0;
13739 nl = SvEND(utf8_buffer);
13742 STRLEN got = nl - SvPVX(utf8_buffer);
13743 /* Did we have anything to append? */
13745 sv_catpvn(sv, SvPVX(utf8_buffer), got);
13746 /* Everything else in this code works just fine if SVp_POK isn't
13747 set. This, however, needs it, and we need it to work, else
13748 we loop infinitely because the buffer is never consumed. */
13749 sv_chop(utf8_buffer, nl);
13753 /* OK, not a complete line there, so need to read some more UTF-16.
13754 Read an extra octect if the buffer currently has an odd number. */
13758 if (SvCUR(utf16_buffer) >= 2) {
13759 /* Location of the high octet of the last complete code point.
13760 Gosh, UTF-16 is a pain. All the benefits of variable length,
13761 *coupled* with all the benefits of partial reads and
13763 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13764 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13766 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13770 /* We have the first half of a surrogate. Read more. */
13771 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13774 status = FILTER_READ(idx + 1, utf16_buffer,
13775 160 + (SvCUR(utf16_buffer) & 1));
13776 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
13777 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13780 IoPAGE(filter) = status;
13785 chars = SvCUR(utf16_buffer) >> 1;
13786 have = SvCUR(utf8_buffer);
13787 SvGROW(utf8_buffer, have + chars * 3 + 1);
13790 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13791 (U8*)SvPVX_const(utf8_buffer) + have,
13792 chars * 2, &newlen);
13794 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13795 (U8*)SvPVX_const(utf8_buffer) + have,
13796 chars * 2, &newlen);
13798 SvCUR_set(utf8_buffer, have + newlen);
13801 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13802 it's private to us, and utf16_to_utf8{,reversed} take a
13803 (pointer,length) pair, rather than a NUL-terminated string. */
13804 if(SvCUR(utf16_buffer) & 1) {
13805 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13806 SvCUR_set(utf16_buffer, 1);
13808 SvCUR_set(utf16_buffer, 0);
13811 DEBUG_P(PerlIO_printf(Perl_debug_log,
13812 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13814 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13815 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13820 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13822 SV *filter = filter_add(S_utf16_textfilter, NULL);
13824 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13826 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13827 sv_setpvs(filter, "");
13828 IoLINES(filter) = reversed;
13829 IoPAGE(filter) = 1; /* Not EOF */
13831 /* Sadly, we have to return a valid pointer, come what may, so we have to
13832 ignore any error return from this. */
13833 SvCUR_set(PL_linestr, 0);
13834 if (FILTER_READ(0, PL_linestr, 0)) {
13835 SvUTF8_on(PL_linestr);
13837 SvUTF8_on(PL_linestr);
13839 PL_bufend = SvEND(PL_linestr);
13840 return (U8*)SvPVX(PL_linestr);
13845 Returns a pointer to the next character after the parsed
13846 vstring, as well as updating the passed in sv.
13848 Function must be called like
13851 s = scan_vstring(s,e,sv);
13853 where s and e are the start and end of the string.
13854 The sv should already be large enough to store the vstring
13855 passed in, for performance reasons.
13860 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13863 const char *pos = s;
13864 const char *start = s;
13866 PERL_ARGS_ASSERT_SCAN_VSTRING;
13868 if (*pos == 'v') pos++; /* get past 'v' */
13869 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13871 if ( *pos != '.') {
13872 /* this may not be a v-string if followed by => */
13873 const char *next = pos;
13874 while (next < e && isSPACE(*next))
13876 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13877 /* return string not v-string */
13878 sv_setpvn(sv,(char *)s,pos-s);
13879 return (char *)pos;
13883 if (!isALPHA(*pos)) {
13884 U8 tmpbuf[UTF8_MAXBYTES+1];
13887 s++; /* get past 'v' */
13892 /* this is atoi() that tolerates underscores */
13895 const char *end = pos;
13897 while (--end >= s) {
13899 const UV orev = rev;
13900 rev += (*end - '0') * mult;
13903 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13904 "Integer overflow in decimal number");
13908 if (rev > 0x7FFFFFFF)
13909 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13911 /* Append native character for the rev point */
13912 tmpend = uvchr_to_utf8(tmpbuf, rev);
13913 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13914 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13916 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13922 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13926 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13933 Perl_keyword_plugin_standard(pTHX_
13934 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13936 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13937 PERL_UNUSED_CONTEXT;
13938 PERL_UNUSED_ARG(keyword_ptr);
13939 PERL_UNUSED_ARG(keyword_len);
13940 PERL_UNUSED_ARG(op_ptr);
13941 return KEYWORD_PLUGIN_DECLINE;
13945 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
13947 Parse a single complete Perl statement. This may be a normal imperative
13948 statement, including optional label, or a declaration that has
13949 compile-time effect. It is up to the caller to ensure that the dynamic
13950 parser state (L</PL_parser> et al) is correctly set to reflect the source
13951 of the code to be parsed and the lexical context for the statement.
13953 The op tree representing the statement is returned. This may be a
13954 null pointer if the statement is null, for example if it was actually
13955 a subroutine definition (which has compile-time side effects). If not
13956 null, it will be the result of a L</newSTATEOP> call, normally including
13957 a C<nextstate> or equivalent op.
13959 If an error occurs in parsing or compilation, in most cases a valid op
13960 tree (most likely null) is returned anyway. The error is reflected in
13961 the parser state, normally resulting in a single exception at the top
13962 level of parsing which covers all the compilation errors that occurred.
13963 Some compilation errors, however, will throw an exception immediately.
13965 The I<flags> parameter is reserved for future use, and must always
13972 Perl_parse_fullstmt(pTHX_ U32 flags)
13976 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13978 SAVEVPTR(PL_eval_root);
13979 PL_eval_root = NULL;
13980 if(yyparse(GRAMFULLSTMT) && !PL_parser->error_count)
13981 qerror(Perl_mess(aTHX_ "Parse error"));
13982 fullstmtop = PL_eval_root;
13988 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
13990 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
13991 deprecate("qw(...) as parentheses");
13993 if (qwlist->op_type == OP_STUB) {
13997 start_force(PL_curforce);
13998 NEXTVAL_NEXTTOKE.opval = qwlist;
14006 * c-indentation-style: bsd
14007 * c-basic-offset: 4
14008 * indent-tabs-mode: t
14011 * ex: set ts=8 sts=4 sw=4 noet: