3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar (*PL_yycharp)
27 #define yylval (*PL_yylvalp)
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39 /* XXX these probably need to be made into PL vars */
40 static I32 realtokenstart;
41 static I32 faketokens = 0;
42 static MADPROP *thismad;
51 static I32 curforce = -1;
53 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
55 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
57 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
60 #define XFAKEBRACK 128
63 #ifdef USE_UTF8_SCRIPTS
64 # define UTF (!IN_BYTES)
66 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
69 /* In variables named $^X, these are the legal values for X.
70 * 1999-02-27 mjd-perl-patch@plover.com */
71 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
73 /* On MacOS, respect nonbreaking spaces */
74 #ifdef MACOS_TRADITIONAL
75 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
77 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
80 /* LEX_* are values for PL_lex_state, the state of the lexer.
81 * They are arranged oddly so that the guard on the switch statement
82 * can get by with a single comparison (if the compiler is smart enough).
85 /* #define LEX_NOTPARSING 11 is done in perl.h. */
87 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
88 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
89 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
90 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
91 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
93 /* at end of code, eg "$x" followed by: */
94 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
95 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
97 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
98 string or after \E, $foo, etc */
99 #define LEX_INTERPCONST 2 /* NOT USED */
100 #define LEX_FORMLINE 1 /* expecting a format line */
101 #define LEX_KNOWNEXT 0 /* next token known; just return it */
105 static const char* const lex_state_names[] = {
124 #include "keywords.h"
126 /* CLINE is a macro that ensures PL_copline has a sane value */
131 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
133 #if 0 && defined(PERL_MAD)
134 # define SKIPSPACE0(s) skipspace0(s)
135 # define SKIPSPACE1(s) skipspace1(s)
136 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
137 # define PEEKSPACE(s) skipspace2(s,0)
139 # define SKIPSPACE0(s) skipspace(s)
140 # define SKIPSPACE1(s) skipspace(s)
141 # define SKIPSPACE2(s,tsv) skipspace(s)
142 # define PEEKSPACE(s) skipspace(s)
146 * Convenience functions to return different tokens and prime the
147 * lexer for the next token. They all take an argument.
149 * TOKEN : generic token (used for '(', DOLSHARP, etc)
150 * OPERATOR : generic operator
151 * AOPERATOR : assignment operator
152 * PREBLOCK : beginning the block after an if, while, foreach, ...
153 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
154 * PREREF : *EXPR where EXPR is not a simple identifier
155 * TERM : expression term
156 * LOOPX : loop exiting command (goto, last, dump, etc)
157 * FTST : file test operator
158 * FUN0 : zero-argument function
159 * FUN1 : not used, except for not, which isn't a UNIOP
160 * BOop : bitwise or or xor
162 * SHop : shift operator
163 * PWop : power operator
164 * PMop : pattern-matching operator
165 * Aop : addition-level operator
166 * Mop : multiplication-level operator
167 * Eop : equality-testing operator
168 * Rop : relational operator <= != gt
170 * Also see LOP and lop() below.
173 #ifdef DEBUGGING /* Serve -DT. */
174 # define REPORT(retval) tokereport((I32)retval)
176 # define REPORT(retval) (retval)
179 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
180 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
181 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
182 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
183 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
184 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
185 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
186 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
187 #define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
188 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
189 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
190 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
191 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
192 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
193 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
194 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
195 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
196 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
197 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
198 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
200 /* This bit of chicanery makes a unary function followed by
201 * a parenthesis into a function with one argument, highest precedence.
202 * The UNIDOR macro is for unary functions that can be followed by the //
203 * operator (such as C<shift // 0>).
205 #define UNI2(f,x) { \
209 PL_last_uni = PL_oldbufptr; \
210 PL_last_lop_op = f; \
212 return REPORT( (int)FUNC1 ); \
214 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
216 #define UNI(f) UNI2(f,XTERM)
217 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
219 #define UNIBRACK(f) { \
222 PL_last_uni = PL_oldbufptr; \
224 return REPORT( (int)FUNC1 ); \
226 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
229 /* grandfather return to old style */
230 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
234 /* how to interpret the yylval associated with the token */
238 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
244 static struct debug_tokens {
246 enum token_type type;
248 } const debug_tokens[] =
250 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
251 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
252 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
253 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
254 { ARROW, TOKENTYPE_NONE, "ARROW" },
255 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
256 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
257 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
258 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
259 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
260 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
261 { DO, TOKENTYPE_NONE, "DO" },
262 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
263 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
264 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
265 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
266 { ELSE, TOKENTYPE_NONE, "ELSE" },
267 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
268 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
269 { FOR, TOKENTYPE_IVAL, "FOR" },
270 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
271 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
272 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
273 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
274 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
275 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
276 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
277 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
278 { IF, TOKENTYPE_IVAL, "IF" },
279 { LABEL, TOKENTYPE_PVAL, "LABEL" },
280 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
281 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
282 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
283 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
284 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
285 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
286 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
287 { MY, TOKENTYPE_IVAL, "MY" },
288 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
289 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
290 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
291 { OROP, TOKENTYPE_IVAL, "OROP" },
292 { OROR, TOKENTYPE_NONE, "OROR" },
293 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
294 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
295 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
296 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
297 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
298 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
299 { PREINC, TOKENTYPE_NONE, "PREINC" },
300 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
301 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
302 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
303 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
304 { SUB, TOKENTYPE_NONE, "SUB" },
305 { THING, TOKENTYPE_OPVAL, "THING" },
306 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
307 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
308 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
309 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
310 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
311 { USE, TOKENTYPE_IVAL, "USE" },
312 { WHEN, TOKENTYPE_IVAL, "WHEN" },
313 { WHILE, TOKENTYPE_IVAL, "WHILE" },
314 { WORD, TOKENTYPE_OPVAL, "WORD" },
315 { 0, TOKENTYPE_NONE, 0 }
318 /* dump the returned token in rv, plus any optional arg in yylval */
321 S_tokereport(pTHX_ I32 rv)
325 const char *name = NULL;
326 enum token_type type = TOKENTYPE_NONE;
327 const struct debug_tokens *p;
328 SV* const report = newSVpvs("<== ");
330 for (p = debug_tokens; p->token; p++) {
331 if (p->token == (int)rv) {
338 Perl_sv_catpv(aTHX_ report, name);
339 else if ((char)rv > ' ' && (char)rv < '~')
340 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
342 sv_catpvs(report, "EOF");
344 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
347 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
350 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
352 case TOKENTYPE_OPNUM:
353 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
354 PL_op_name[yylval.ival]);
357 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
359 case TOKENTYPE_OPVAL:
361 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
362 PL_op_name[yylval.opval->op_type]);
363 if (yylval.opval->op_type == OP_CONST) {
364 Perl_sv_catpvf(aTHX_ report, " %s",
365 SvPEEK(cSVOPx_sv(yylval.opval)));
370 sv_catpvs(report, "(opval=null)");
373 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
379 /* print the buffer with suitable escapes */
382 S_printbuf(pTHX_ const char* fmt, const char* s)
384 SV* const tmp = newSVpvs("");
385 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
394 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
395 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
399 S_ao(pTHX_ int toketype)
402 if (*PL_bufptr == '=') {
404 if (toketype == ANDAND)
405 yylval.ival = OP_ANDASSIGN;
406 else if (toketype == OROR)
407 yylval.ival = OP_ORASSIGN;
408 else if (toketype == DORDOR)
409 yylval.ival = OP_DORASSIGN;
417 * When Perl expects an operator and finds something else, no_op
418 * prints the warning. It always prints "<something> found where
419 * operator expected. It prints "Missing semicolon on previous line?"
420 * if the surprise occurs at the start of the line. "do you need to
421 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
422 * where the compiler doesn't know if foo is a method call or a function.
423 * It prints "Missing operator before end of line" if there's nothing
424 * after the missing operator, or "... before <...>" if there is something
425 * after the missing operator.
429 S_no_op(pTHX_ const char *what, char *s)
432 char * const oldbp = PL_bufptr;
433 const bool is_first = (PL_oldbufptr == PL_linestart);
439 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
440 if (ckWARN_d(WARN_SYNTAX)) {
442 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
443 "\t(Missing semicolon on previous line?)\n");
444 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
446 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
447 if (t < PL_bufptr && isSPACE(*t))
448 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
449 "\t(Do you need to predeclare %.*s?)\n",
450 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
454 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
455 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
463 * Complain about missing quote/regexp/heredoc terminator.
464 * If it's called with (char *)NULL then it cauterizes the line buffer.
465 * If we're in a delimited string and the delimiter is a control
466 * character, it's reformatted into a two-char sequence like ^C.
471 S_missingterm(pTHX_ char *s)
477 char * const nl = strrchr(s,'\n');
483 iscntrl(PL_multi_close)
485 PL_multi_close < 32 || PL_multi_close == 127
489 tmpbuf[1] = (char)toCTRL(PL_multi_close);
494 *tmpbuf = (char)PL_multi_close;
498 q = strchr(s,'"') ? '\'' : '"';
499 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
502 #define FEATURE_IS_ENABLED(name) \
503 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
504 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
506 * S_feature_is_enabled
507 * Check whether the named feature is enabled.
510 S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
513 HV * const hinthv = GvHV(PL_hintgv);
514 char he_name[32] = "feature_";
515 (void) strncpy(&he_name[8], name, 24);
517 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
525 Perl_deprecate(pTHX_ const char *s)
527 if (ckWARN(WARN_DEPRECATED))
528 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
532 Perl_deprecate_old(pTHX_ const char *s)
534 /* This function should NOT be called for any new deprecated warnings */
535 /* Use Perl_deprecate instead */
537 /* It is here to maintain backward compatibility with the pre-5.8 */
538 /* warnings category hierarchy. The "deprecated" category used to */
539 /* live under the "syntax" category. It is now a top-level category */
540 /* in its own right. */
542 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
543 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
544 "Use of %s is deprecated", s);
548 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
549 * utf16-to-utf8-reversed.
552 #ifdef PERL_CR_FILTER
556 register const char *s = SvPVX_const(sv);
557 register const char * const e = s + SvCUR(sv);
558 /* outer loop optimized to do nothing if there are no CR-LFs */
560 if (*s++ == '\r' && *s == '\n') {
561 /* hit a CR-LF, need to copy the rest */
562 register char *d = s - 1;
565 if (*s == '\r' && s[1] == '\n')
576 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
578 const I32 count = FILTER_READ(idx+1, sv, maxlen);
579 if (count > 0 && !maxlen)
587 * Initialize variables. Uses the Perl save_stack to save its state (for
588 * recursive calls to the parser).
592 Perl_lex_start(pTHX_ SV *line)
598 SAVEI32(PL_lex_dojoin);
599 SAVEI32(PL_lex_brackets);
600 SAVEI32(PL_lex_casemods);
601 SAVEI32(PL_lex_starts);
602 SAVEI32(PL_lex_state);
603 SAVEVPTR(PL_lex_inpat);
604 SAVEI32(PL_lex_inwhat);
605 if (PL_lex_state == LEX_KNOWNEXT) {
606 I32 toke = PL_nexttoke;
607 while (--toke >= 0) {
608 SAVEI32(PL_nexttype[toke]);
609 SAVEVPTR(PL_nextval[toke]);
611 SAVEI32(PL_nexttoke);
613 SAVECOPLINE(PL_curcop);
616 SAVEPPTR(PL_oldbufptr);
617 SAVEPPTR(PL_oldoldbufptr);
618 SAVEPPTR(PL_last_lop);
619 SAVEPPTR(PL_last_uni);
620 SAVEPPTR(PL_linestart);
621 SAVESPTR(PL_linestr);
622 SAVEGENERICPV(PL_lex_brackstack);
623 SAVEGENERICPV(PL_lex_casestack);
624 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
625 SAVESPTR(PL_lex_stuff);
626 SAVEI32(PL_lex_defer);
627 SAVEI32(PL_sublex_info.sub_inwhat);
628 SAVESPTR(PL_lex_repl);
630 SAVEINT(PL_lex_expect);
632 PL_lex_state = LEX_NORMAL;
636 Newx(PL_lex_brackstack, 120, char);
637 Newx(PL_lex_casestack, 12, char);
639 *PL_lex_casestack = '\0';
647 PL_sublex_info.sub_inwhat = 0;
649 if (SvREADONLY(PL_linestr))
650 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
651 s = SvPV_const(PL_linestr, len);
652 if (!len || s[len-1] != ';') {
653 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
654 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
655 sv_catpvs(PL_linestr, "\n;");
657 SvTEMP_off(PL_linestr);
658 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
659 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
660 PL_last_lop = PL_last_uni = NULL;
666 * Finalizer for lexing operations. Must be called when the parser is
667 * done with the lexer.
674 PL_doextract = FALSE;
679 * This subroutine has nothing to do with tilting, whether at windmills
680 * or pinball tables. Its name is short for "increment line". It
681 * increments the current line number in CopLINE(PL_curcop) and checks
682 * to see whether the line starts with a comment of the form
683 * # line 500 "foo.pm"
684 * If so, it sets the current line number and file to the values in the comment.
688 S_incline(pTHX_ char *s)
696 CopLINE_inc(PL_curcop);
699 while (SPACE_OR_TAB(*s)) s++;
700 if (strnEQ(s, "line", 4))
704 if (SPACE_OR_TAB(*s))
708 while (SPACE_OR_TAB(*s)) s++;
714 while (SPACE_OR_TAB(*s))
716 if (*s == '"' && (t = strchr(s+1, '"'))) {
721 for (t = s; !isSPACE(*t); t++) ;
724 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
726 if (*e != '\n' && *e != '\0')
727 return; /* false alarm */
733 const char * const cf = CopFILE(PL_curcop);
734 STRLEN tmplen = cf ? strlen(cf) : 0;
735 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
736 /* must copy *{"::_<(eval N)[oldfilename:L]"}
737 * to *{"::_<newfilename"} */
738 char smallbuf[256], smallbuf2[256];
739 char *tmpbuf, *tmpbuf2;
741 STRLEN tmplen2 = strlen(s);
742 if (tmplen + 3 < sizeof smallbuf)
745 Newx(tmpbuf, tmplen + 3, char);
746 if (tmplen2 + 3 < sizeof smallbuf2)
749 Newx(tmpbuf2, tmplen2 + 3, char);
750 tmpbuf[0] = tmpbuf2[0] = '_';
751 tmpbuf[1] = tmpbuf2[1] = '<';
752 memcpy(tmpbuf + 2, cf, ++tmplen);
753 memcpy(tmpbuf2 + 2, s, ++tmplen2);
755 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
757 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
759 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
760 /* adjust ${"::_<newfilename"} to store the new file name */
761 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
762 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
763 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
765 if (tmpbuf != smallbuf) Safefree(tmpbuf);
766 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
769 CopFILE_free(PL_curcop);
770 CopFILE_set(PL_curcop, s);
773 CopLINE_set(PL_curcop, atoi(n)-1);
777 /* skip space before thistoken */
780 S_skipspace0(pTHX_ register char *s)
787 thiswhite = newSVpvn("",0);
788 sv_catsv(thiswhite, skipwhite);
792 realtokenstart = s - SvPVX(PL_linestr);
796 /* skip space after thistoken */
799 S_skipspace1(pTHX_ register char *s)
802 I32 startoff = start - SvPVX(PL_linestr);
807 start = SvPVX(PL_linestr) + startoff;
808 if (!thistoken && realtokenstart >= 0) {
809 char *tstart = SvPVX(PL_linestr) + realtokenstart;
810 thistoken = newSVpvn(tstart, start - tstart);
815 nextwhite = newSVpvn("",0);
816 sv_catsv(nextwhite, skipwhite);
824 S_skipspace2(pTHX_ register char *s, SV **svp)
827 I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
828 I32 startoff = start - SvPVX(PL_linestr);
830 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
831 if (!PL_madskills || !svp)
833 start = SvPVX(PL_linestr) + startoff;
834 if (!thistoken && realtokenstart >= 0) {
835 char *tstart = SvPVX(PL_linestr) + realtokenstart;
836 thistoken = newSVpvn(tstart, start - tstart);
841 *svp = newSVpvn("",0);
842 sv_setsv(*svp, skipwhite);
853 * Called to gobble the appropriate amount and type of whitespace.
854 * Skips comments as well.
858 S_skipspace(pTHX_ register char *s)
861 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
862 while (s < PL_bufend && SPACE_OR_TAB(*s))
868 SSize_t oldprevlen, oldoldprevlen;
869 SSize_t oldloplen = 0, oldunilen = 0;
870 while (s < PL_bufend && isSPACE(*s)) {
871 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
876 if (s < PL_bufend && *s == '#') {
877 while (s < PL_bufend && *s != '\n')
881 if (PL_in_eval && !PL_rsfp) {
888 /* only continue to recharge the buffer if we're at the end
889 * of the buffer, we're not reading from a source filter, and
890 * we're in normal lexing mode
892 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
893 PL_lex_state == LEX_FORMLINE)
896 /* try to recharge the buffer */
897 if ((s = filter_gets(PL_linestr, PL_rsfp,
898 (prevlen = SvCUR(PL_linestr)))) == NULL)
900 /* end of file. Add on the -p or -n magic */
903 ";}continue{print or die qq(-p destination: $!\\n);}");
904 PL_minus_n = PL_minus_p = 0;
906 else if (PL_minus_n) {
907 sv_setpvn(PL_linestr, ";}", 2);
911 sv_setpvn(PL_linestr,";", 1);
913 /* reset variables for next time we lex */
914 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
916 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
917 PL_last_lop = PL_last_uni = NULL;
919 /* Close the filehandle. Could be from -P preprocessor,
920 * STDIN, or a regular file. If we were reading code from
921 * STDIN (because the commandline held no -e or filename)
922 * then we don't close it, we reset it so the code can
923 * read from STDIN too.
926 if (PL_preprocess && !PL_in_eval)
927 (void)PerlProc_pclose(PL_rsfp);
928 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
929 PerlIO_clearerr(PL_rsfp);
931 (void)PerlIO_close(PL_rsfp);
936 /* not at end of file, so we only read another line */
937 /* make corresponding updates to old pointers, for yyerror() */
938 oldprevlen = PL_oldbufptr - PL_bufend;
939 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
941 oldunilen = PL_last_uni - PL_bufend;
943 oldloplen = PL_last_lop - PL_bufend;
944 PL_linestart = PL_bufptr = s + prevlen;
945 PL_bufend = s + SvCUR(PL_linestr);
947 PL_oldbufptr = s + oldprevlen;
948 PL_oldoldbufptr = s + oldoldprevlen;
950 PL_last_uni = s + oldunilen;
952 PL_last_lop = s + oldloplen;
955 /* debugger active and we're not compiling the debugger code,
956 * so store the line into the debugger's array of lines
958 if (PERLDB_LINE && PL_curstash != PL_debstash) {
959 SV * const sv = newSV(0);
961 sv_upgrade(sv, SVt_PVMG);
962 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
965 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
972 * Check the unary operators to ensure there's no ambiguity in how they're
973 * used. An ambiguous piece of code would be:
975 * This doesn't mean rand() + 5. Because rand() is a unary operator,
976 * the +5 is its argument.
986 if (PL_oldoldbufptr != PL_last_uni)
988 while (isSPACE(*PL_last_uni))
990 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
991 if ((t = strchr(s, '(')) && t < PL_bufptr)
994 /* XXX Things like this are just so nasty. We shouldn't be modifying
995 source code, even if we realquick set it back. */
996 if (ckWARN_d(WARN_AMBIGUOUS)){
999 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1000 "Warning: Use of \"%s\" without parentheses is ambiguous",
1007 * LOP : macro to build a list operator. Its behaviour has been replaced
1008 * with a subroutine, S_lop() for which LOP is just another name.
1011 #define LOP(f,x) return lop(f,x,s)
1015 * Build a list operator (or something that might be one). The rules:
1016 * - if we have a next token, then it's a list operator [why?]
1017 * - if the next thing is an opening paren, then it's a function
1018 * - else it's a list operator
1022 S_lop(pTHX_ I32 f, int x, char *s)
1029 PL_last_lop = PL_oldbufptr;
1030 PL_last_lop_op = (OPCODE)f;
1032 return REPORT(LSTOP);
1034 return REPORT(FUNC);
1037 return REPORT(FUNC);
1039 return REPORT(LSTOP);
1044 * When the lexer realizes it knows the next token (for instance,
1045 * it is reordering tokens for the parser) then it can call S_force_next
1046 * to know what token to return the next time the lexer is called. Caller
1047 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
1048 * handles the token correctly.
1052 S_force_next(pTHX_ I32 type)
1055 PL_nexttype[PL_nexttoke] = type;
1057 if (PL_lex_state != LEX_KNOWNEXT) {
1058 PL_lex_defer = PL_lex_state;
1059 PL_lex_expect = PL_expect;
1060 PL_lex_state = LEX_KNOWNEXT;
1065 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1068 SV * const sv = newSVpvn(start,len);
1069 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
1076 * When the lexer knows the next thing is a word (for instance, it has
1077 * just seen -> and it knows that the next char is a word char, then
1078 * it calls S_force_word to stick the next word into the PL_next lookahead.
1081 * char *start : buffer position (must be within PL_linestr)
1082 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1083 * int check_keyword : if true, Perl checks to make sure the word isn't
1084 * a keyword (do this if the word is a label, e.g. goto FOO)
1085 * int allow_pack : if true, : characters will also be allowed (require,
1086 * use, etc. do this)
1087 * int allow_initial_tick : used by the "sub" lexer only.
1091 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
1097 start = SKIPSPACE1(start);
1099 if (isIDFIRST_lazy_if(s,UTF) ||
1100 (allow_pack && *s == ':') ||
1101 (allow_initial_tick && *s == '\'') )
1103 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1104 if (check_keyword && keyword(PL_tokenbuf, len))
1106 if (token == METHOD) {
1111 PL_expect = XOPERATOR;
1114 NEXTVAL_NEXTTOKE.opval
1115 = (OP*)newSVOP(OP_CONST,0,
1116 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1117 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1125 * Called when the lexer wants $foo *foo &foo etc, but the program
1126 * text only contains the "foo" portion. The first argument is a pointer
1127 * to the "foo", and the second argument is the type symbol to prefix.
1128 * Forces the next token to be a "WORD".
1129 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1133 S_force_ident(pTHX_ register const char *s, int kind)
1137 const STRLEN len = strlen(s);
1138 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1139 NEXTVAL_NEXTTOKE.opval = o;
1142 o->op_private = OPpCONST_ENTERED;
1143 /* XXX see note in pp_entereval() for why we forgo typo
1144 warnings if the symbol must be introduced in an eval.
1146 gv_fetchpvn_flags(s, len,
1147 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1149 kind == '$' ? SVt_PV :
1150 kind == '@' ? SVt_PVAV :
1151 kind == '%' ? SVt_PVHV :
1159 Perl_str_to_version(pTHX_ SV *sv)
1164 const char *start = SvPV_const(sv,len);
1165 const char * const end = start + len;
1166 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1167 while (start < end) {
1171 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1176 retval += ((NV)n)/nshift;
1185 * Forces the next token to be a version number.
1186 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1187 * and if "guessing" is TRUE, then no new token is created (and the caller
1188 * must use an alternative parsing method).
1192 S_force_version(pTHX_ char *s, int guessing)
1204 while (isDIGIT(*d) || *d == '_' || *d == '.')
1206 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1208 s = scan_num(s, &yylval);
1209 version = yylval.opval;
1210 ver = cSVOPx(version)->op_sv;
1211 if (SvPOK(ver) && !SvNIOK(ver)) {
1212 SvUPGRADE(ver, SVt_PVNV);
1213 SvNV_set(ver, str_to_version(ver));
1214 SvNOK_on(ver); /* hint that it is a version */
1221 /* NOTE: The parser sees the package name and the VERSION swapped */
1222 NEXTVAL_NEXTTOKE.opval = version;
1230 * Tokenize a quoted string passed in as an SV. It finds the next
1231 * chunk, up to end of string or a backslash. It may make a new
1232 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1237 S_tokeq(pTHX_ SV *sv)
1241 register char *send;
1249 s = SvPV_force(sv, len);
1250 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1253 while (s < send && *s != '\\')
1258 if ( PL_hints & HINT_NEW_STRING ) {
1259 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1265 if (s + 1 < send && (s[1] == '\\'))
1266 s++; /* all that, just for this */
1271 SvCUR_set(sv, d - SvPVX_const(sv));
1273 if ( PL_hints & HINT_NEW_STRING )
1274 return new_constant(NULL, 0, "q", sv, pv, "q");
1279 * Now come three functions related to double-quote context,
1280 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1281 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1282 * interact with PL_lex_state, and create fake ( ... ) argument lists
1283 * to handle functions and concatenation.
1284 * They assume that whoever calls them will be setting up a fake
1285 * join call, because each subthing puts a ',' after it. This lets
1288 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1290 * (I'm not sure whether the spurious commas at the end of lcfirst's
1291 * arguments and join's arguments are created or not).
1296 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1298 * Pattern matching will set PL_lex_op to the pattern-matching op to
1299 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1301 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1303 * Everything else becomes a FUNC.
1305 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1306 * had an OP_CONST or OP_READLINE). This just sets us up for a
1307 * call to S_sublex_push().
1311 S_sublex_start(pTHX)
1314 register const I32 op_type = yylval.ival;
1316 if (op_type == OP_NULL) {
1317 yylval.opval = PL_lex_op;
1321 if (op_type == OP_CONST || op_type == OP_READLINE) {
1322 SV *sv = tokeq(PL_lex_stuff);
1324 if (SvTYPE(sv) == SVt_PVIV) {
1325 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1327 const char * const p = SvPV_const(sv, len);
1328 SV * const nsv = newSVpvn(p, len);
1334 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1335 PL_lex_stuff = NULL;
1336 /* Allow <FH> // "foo" */
1337 if (op_type == OP_READLINE)
1338 PL_expect = XTERMORDORDOR;
1342 PL_sublex_info.super_state = PL_lex_state;
1343 PL_sublex_info.sub_inwhat = op_type;
1344 PL_sublex_info.sub_op = PL_lex_op;
1345 PL_lex_state = LEX_INTERPPUSH;
1349 yylval.opval = PL_lex_op;
1359 * Create a new scope to save the lexing state. The scope will be
1360 * ended in S_sublex_done. Returns a '(', starting the function arguments
1361 * to the uc, lc, etc. found before.
1362 * Sets PL_lex_state to LEX_INTERPCONCAT.
1371 PL_lex_state = PL_sublex_info.super_state;
1372 SAVEI32(PL_lex_dojoin);
1373 SAVEI32(PL_lex_brackets);
1374 SAVEI32(PL_lex_casemods);
1375 SAVEI32(PL_lex_starts);
1376 SAVEI32(PL_lex_state);
1377 SAVEVPTR(PL_lex_inpat);
1378 SAVEI32(PL_lex_inwhat);
1379 SAVECOPLINE(PL_curcop);
1380 SAVEPPTR(PL_bufptr);
1381 SAVEPPTR(PL_bufend);
1382 SAVEPPTR(PL_oldbufptr);
1383 SAVEPPTR(PL_oldoldbufptr);
1384 SAVEPPTR(PL_last_lop);
1385 SAVEPPTR(PL_last_uni);
1386 SAVEPPTR(PL_linestart);
1387 SAVESPTR(PL_linestr);
1388 SAVEGENERICPV(PL_lex_brackstack);
1389 SAVEGENERICPV(PL_lex_casestack);
1391 PL_linestr = PL_lex_stuff;
1392 PL_lex_stuff = NULL;
1394 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1395 = SvPVX(PL_linestr);
1396 PL_bufend += SvCUR(PL_linestr);
1397 PL_last_lop = PL_last_uni = NULL;
1398 SAVEFREESV(PL_linestr);
1400 PL_lex_dojoin = FALSE;
1401 PL_lex_brackets = 0;
1402 Newx(PL_lex_brackstack, 120, char);
1403 Newx(PL_lex_casestack, 12, char);
1404 PL_lex_casemods = 0;
1405 *PL_lex_casestack = '\0';
1407 PL_lex_state = LEX_INTERPCONCAT;
1408 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1410 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1411 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1412 PL_lex_inpat = PL_sublex_info.sub_op;
1414 PL_lex_inpat = NULL;
1421 * Restores lexer state after a S_sublex_push.
1428 if (!PL_lex_starts++) {
1429 SV * const sv = newSVpvs("");
1430 if (SvUTF8(PL_linestr))
1432 PL_expect = XOPERATOR;
1433 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1437 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1438 PL_lex_state = LEX_INTERPCASEMOD;
1442 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1443 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1444 PL_linestr = PL_lex_repl;
1446 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1447 PL_bufend += SvCUR(PL_linestr);
1448 PL_last_lop = PL_last_uni = NULL;
1449 SAVEFREESV(PL_linestr);
1450 PL_lex_dojoin = FALSE;
1451 PL_lex_brackets = 0;
1452 PL_lex_casemods = 0;
1453 *PL_lex_casestack = '\0';
1455 if (SvEVALED(PL_lex_repl)) {
1456 PL_lex_state = LEX_INTERPNORMAL;
1458 /* we don't clear PL_lex_repl here, so that we can check later
1459 whether this is an evalled subst; that means we rely on the
1460 logic to ensure sublex_done() is called again only via the
1461 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1464 PL_lex_state = LEX_INTERPCONCAT;
1471 PL_bufend = SvPVX(PL_linestr);
1472 PL_bufend += SvCUR(PL_linestr);
1473 PL_expect = XOPERATOR;
1474 PL_sublex_info.sub_inwhat = 0;
1482 Extracts a pattern, double-quoted string, or transliteration. This
1485 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1486 processing a pattern (PL_lex_inpat is true), a transliteration
1487 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1489 Returns a pointer to the character scanned up to. Iff this is
1490 advanced from the start pointer supplied (ie if anything was
1491 successfully parsed), will leave an OP for the substring scanned
1492 in yylval. Caller must intuit reason for not parsing further
1493 by looking at the next characters herself.
1497 double-quoted style: \r and \n
1498 regexp special ones: \D \s
1500 backrefs: \1 (deprecated in substitution replacements)
1501 case and quoting: \U \Q \E
1502 stops on @ and $, but not for $ as tail anchor
1504 In transliterations:
1505 characters are VERY literal, except for - not at the start or end
1506 of the string, which indicates a range. scan_const expands the
1507 range to the full set of intermediate characters.
1509 In double-quoted strings:
1511 double-quoted style: \r and \n
1513 backrefs: \1 (deprecated)
1514 case and quoting: \U \Q \E
1517 scan_const does *not* construct ops to handle interpolated strings.
1518 It stops processing as soon as it finds an embedded $ or @ variable
1519 and leaves it to the caller to work out what's going on.
1521 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
1523 $ in pattern could be $foo or could be tail anchor. Assumption:
1524 it's a tail anchor if $ is the last thing in the string, or if it's
1525 followed by one of ")| \n\t"
1527 \1 (backreferences) are turned into $1
1529 The structure of the code is
1530 while (there's a character to process) {
1531 handle transliteration ranges
1532 skip regexp comments
1533 skip # initiated comments in //x patterns
1534 check for embedded @foo
1535 check for embedded scalars
1537 leave intact backslashes from leave (below)
1538 deprecate \1 in strings and sub replacements
1539 handle string-changing backslashes \l \U \Q \E, etc.
1540 switch (what was escaped) {
1541 handle - in a transliteration (becomes a literal -)
1542 handle \132 octal characters
1543 handle 0x15 hex characters
1544 handle \cV (control V)
1545 handle printf backslashes (\f, \r, \n, etc)
1547 } (end if backslash)
1548 } (end while character to read)
1553 S_scan_const(pTHX_ char *start)
1556 register char *send = PL_bufend; /* end of the constant */
1557 SV *sv = newSV(send - start); /* sv for the constant */
1558 register char *s = start; /* start of the constant */
1559 register char *d = SvPVX(sv); /* destination for copies */
1560 bool dorange = FALSE; /* are we in a translit range? */
1561 bool didrange = FALSE; /* did we just finish a range? */
1562 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1563 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1566 UV literal_endpoint = 0;
1569 const char *leaveit = /* set of acceptably-backslashed characters */
1571 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
1574 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1575 /* If we are doing a trans and we know we want UTF8 set expectation */
1576 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1577 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1581 while (s < send || dorange) {
1582 /* get transliterations out of the way (they're most literal) */
1583 if (PL_lex_inwhat == OP_TRANS) {
1584 /* expand a range A-Z to the full set of characters. AIE! */
1586 I32 i; /* current expanded character */
1587 I32 min; /* first character in range */
1588 I32 max; /* last character in range */
1591 char * const c = (char*)utf8_hop((U8*)d, -1);
1595 *c = (char)UTF_TO_NATIVE(0xff);
1596 /* mark the range as done, and continue */
1602 i = d - SvPVX_const(sv); /* remember current offset */
1603 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1604 d = SvPVX(sv) + i; /* refresh d after realloc */
1605 d -= 2; /* eat the first char and the - */
1607 min = (U8)*d; /* first char in range */
1608 max = (U8)d[1]; /* last char in range */
1612 "Invalid range \"%c-%c\" in transliteration operator",
1613 (char)min, (char)max);
1617 if (literal_endpoint == 2 &&
1618 ((isLOWER(min) && isLOWER(max)) ||
1619 (isUPPER(min) && isUPPER(max)))) {
1621 for (i = min; i <= max; i++)
1623 *d++ = NATIVE_TO_NEED(has_utf8,i);
1625 for (i = min; i <= max; i++)
1627 *d++ = NATIVE_TO_NEED(has_utf8,i);
1632 for (i = min; i <= max; i++)
1635 /* mark the range as done, and continue */
1639 literal_endpoint = 0;
1644 /* range begins (ignore - as first or last char) */
1645 else if (*s == '-' && s+1 < send && s != start) {
1647 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1650 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1660 literal_endpoint = 0;
1665 /* if we get here, we're not doing a transliteration */
1667 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1668 except for the last char, which will be done separately. */
1669 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1671 while (s+1 < send && *s != ')')
1672 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1674 else if (s[2] == '{' /* This should match regcomp.c */
1675 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1678 char *regparse = s + (s[2] == '{' ? 3 : 4);
1681 while (count && (c = *regparse)) {
1682 if (c == '\\' && regparse[1])
1690 if (*regparse != ')')
1691 regparse--; /* Leave one char for continuation. */
1692 while (s < regparse)
1693 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1697 /* likewise skip #-initiated comments in //x patterns */
1698 else if (*s == '#' && PL_lex_inpat &&
1699 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1700 while (s+1 < send && *s != '\n')
1701 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1704 /* check for embedded arrays
1705 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1707 else if (*s == '@' && s[1]
1708 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
1711 /* check for embedded scalars. only stop if we're sure it's a
1714 else if (*s == '$') {
1715 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1717 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1718 break; /* in regexp, $ might be tail anchor */
1721 /* End of else if chain - OP_TRANS rejoin rest */
1724 if (*s == '\\' && s+1 < send) {
1727 /* some backslashes we leave behind */
1728 if (*leaveit && *s && strchr(leaveit, *s)) {
1729 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1730 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1734 /* deprecate \1 in strings and substitution replacements */
1735 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1736 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1738 if (ckWARN(WARN_SYNTAX))
1739 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1744 /* string-change backslash escapes */
1745 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1750 /* if we get here, it's either a quoted -, or a digit */
1753 /* quoted - in transliterations */
1755 if (PL_lex_inwhat == OP_TRANS) {
1765 Perl_warner(aTHX_ packWARN(WARN_MISC),
1766 "Unrecognized escape \\%c passed through",
1768 /* default action is to copy the quoted character */
1769 goto default_action;
1772 /* \132 indicates an octal constant */
1773 case '0': case '1': case '2': case '3':
1774 case '4': case '5': case '6': case '7':
1778 uv = grok_oct(s, &len, &flags, NULL);
1781 goto NUM_ESCAPE_INSERT;
1783 /* \x24 indicates a hex constant */
1787 char* const e = strchr(s, '}');
1788 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1789 PERL_SCAN_DISALLOW_PREFIX;
1794 yyerror("Missing right brace on \\x{}");
1798 uv = grok_hex(s, &len, &flags, NULL);
1804 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1805 uv = grok_hex(s, &len, &flags, NULL);
1811 /* Insert oct or hex escaped character.
1812 * There will always enough room in sv since such
1813 * escapes will be longer than any UTF-8 sequence
1814 * they can end up as. */
1816 /* We need to map to chars to ASCII before doing the tests
1819 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1820 if (!has_utf8 && uv > 255) {
1821 /* Might need to recode whatever we have
1822 * accumulated so far if it contains any
1825 * (Can't we keep track of that and avoid
1826 * this rescan? --jhi)
1830 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1831 if (!NATIVE_IS_INVARIANT(*c)) {
1836 const STRLEN offset = d - SvPVX_const(sv);
1838 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1842 while (src >= (const U8 *)SvPVX_const(sv)) {
1843 if (!NATIVE_IS_INVARIANT(*src)) {
1844 const U8 ch = NATIVE_TO_ASCII(*src);
1845 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1846 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1856 if (has_utf8 || uv > 255) {
1857 d = (char*)uvchr_to_utf8((U8*)d, uv);
1859 if (PL_lex_inwhat == OP_TRANS &&
1860 PL_sublex_info.sub_op) {
1861 PL_sublex_info.sub_op->op_private |=
1862 (PL_lex_repl ? OPpTRANS_FROM_UTF
1875 /* \N{LATIN SMALL LETTER A} is a named character */
1879 char* e = strchr(s, '}');
1885 yyerror("Missing right brace on \\N{}");
1889 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1891 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1892 PERL_SCAN_DISALLOW_PREFIX;
1895 uv = grok_hex(s, &len, &flags, NULL);
1897 goto NUM_ESCAPE_INSERT;
1899 res = newSVpvn(s + 1, e - s - 1);
1900 res = new_constant( NULL, 0, "charnames",
1901 res, NULL, "\\N{...}" );
1903 sv_utf8_upgrade(res);
1904 str = SvPV_const(res,len);
1905 #ifdef EBCDIC_NEVER_MIND
1906 /* charnames uses pack U and that has been
1907 * recently changed to do the below uni->native
1908 * mapping, so this would be redundant (and wrong,
1909 * the code point would be doubly converted).
1910 * But leave this in just in case the pack U change
1911 * gets revoked, but the semantics is still
1912 * desireable for charnames. --jhi */
1914 UV uv = utf8_to_uvchr((const U8*)str, 0);
1917 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1919 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1920 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1921 str = SvPV_const(res, len);
1925 if (!has_utf8 && SvUTF8(res)) {
1926 const char * const ostart = SvPVX_const(sv);
1927 SvCUR_set(sv, d - ostart);
1930 sv_utf8_upgrade(sv);
1931 /* this just broke our allocation above... */
1932 SvGROW(sv, (STRLEN)(send - start));
1933 d = SvPVX(sv) + SvCUR(sv);
1936 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1937 const char * const odest = SvPVX_const(sv);
1939 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1940 d = SvPVX(sv) + (d - odest);
1942 Copy(str, d, len, char);
1949 yyerror("Missing braces on \\N{}");
1952 /* \c is a control character */
1961 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1964 yyerror("Missing control char name in \\c");
1968 /* printf-style backslashes, formfeeds, newlines, etc */
1970 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1973 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1976 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1979 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1982 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1985 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1988 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1994 } /* end if (backslash) */
2001 /* If we started with encoded form, or already know we want it
2002 and then encode the next character */
2003 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2005 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2006 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2009 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
2010 const STRLEN off = d - SvPVX_const(sv);
2011 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2013 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2017 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2019 } /* while loop to process each character */
2021 /* terminate the string and set up the sv */
2023 SvCUR_set(sv, d - SvPVX_const(sv));
2024 if (SvCUR(sv) >= SvLEN(sv))
2025 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2028 if (PL_encoding && !has_utf8) {
2029 sv_recode_to_utf8(sv, PL_encoding);
2035 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2036 PL_sublex_info.sub_op->op_private |=
2037 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2041 /* shrink the sv if we allocated more than we used */
2042 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2043 SvPV_shrink_to_cur(sv);
2046 /* return the substring (via yylval) only if we parsed anything */
2047 if (s > PL_bufptr) {
2048 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2049 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
2051 ( PL_lex_inwhat == OP_TRANS
2053 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2056 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2063 * Returns TRUE if there's more to the expression (e.g., a subscript),
2066 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2068 * ->[ and ->{ return TRUE
2069 * { and [ outside a pattern are always subscripts, so return TRUE
2070 * if we're outside a pattern and it's not { or [, then return FALSE
2071 * if we're in a pattern and the first char is a {
2072 * {4,5} (any digits around the comma) returns FALSE
2073 * if we're in a pattern and the first char is a [
2075 * [SOMETHING] has a funky algorithm to decide whether it's a
2076 * character class or not. It has to deal with things like
2077 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2078 * anything else returns TRUE
2081 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2084 S_intuit_more(pTHX_ register char *s)
2087 if (PL_lex_brackets)
2089 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2091 if (*s != '{' && *s != '[')
2096 /* In a pattern, so maybe we have {n,m}. */
2113 /* On the other hand, maybe we have a character class */
2116 if (*s == ']' || *s == '^')
2119 /* this is terrifying, and it works */
2120 int weight = 2; /* let's weigh the evidence */
2122 unsigned char un_char = 255, last_un_char;
2123 const char * const send = strchr(s,']');
2124 char tmpbuf[sizeof PL_tokenbuf * 4];
2126 if (!send) /* has to be an expression */
2129 Zero(seen,256,char);
2132 else if (isDIGIT(*s)) {
2134 if (isDIGIT(s[1]) && s[2] == ']')
2140 for (; s < send; s++) {
2141 last_un_char = un_char;
2142 un_char = (unsigned char)*s;
2147 weight -= seen[un_char] * 10;
2148 if (isALNUM_lazy_if(s+1,UTF)) {
2150 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2151 len = (int)strlen(tmpbuf);
2152 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2157 else if (*s == '$' && s[1] &&
2158 strchr("[#!%*<>()-=",s[1])) {
2159 if (/*{*/ strchr("])} =",s[2]))
2168 if (strchr("wds]",s[1]))
2170 else if (seen['\''] || seen['"'])
2172 else if (strchr("rnftbxcav",s[1]))
2174 else if (isDIGIT(s[1])) {
2176 while (s[1] && isDIGIT(s[1]))
2186 if (strchr("aA01! ",last_un_char))
2188 if (strchr("zZ79~",s[1]))
2190 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2191 weight -= 5; /* cope with negative subscript */
2194 if (!isALNUM(last_un_char)
2195 && !(last_un_char == '$' || last_un_char == '@'
2196 || last_un_char == '&')
2197 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2202 if (keyword(tmpbuf, d - tmpbuf))
2205 if (un_char == last_un_char + 1)
2207 weight -= seen[un_char];
2212 if (weight >= 0) /* probably a character class */
2222 * Does all the checking to disambiguate
2224 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2225 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2227 * First argument is the stuff after the first token, e.g. "bar".
2229 * Not a method if bar is a filehandle.
2230 * Not a method if foo is a subroutine prototyped to take a filehandle.
2231 * Not a method if it's really "Foo $bar"
2232 * Method if it's "foo $bar"
2233 * Not a method if it's really "print foo $bar"
2234 * Method if it's really "foo package::" (interpreted as package->foo)
2235 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2236 * Not a method if bar is a filehandle or package, but is quoted with
2241 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2244 char *s = start + (*start == '$');
2245 char tmpbuf[sizeof PL_tokenbuf];
2250 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2254 const char *proto = SvPVX_const(cv);
2265 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2266 /* start is the beginning of the possible filehandle/object,
2267 * and s is the end of it
2268 * tmpbuf is a copy of it
2271 if (*start == '$') {
2272 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2277 return *s == '(' ? FUNCMETH : METHOD;
2279 if (!keyword(tmpbuf, len)) {
2280 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2285 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2286 if (indirgv && GvCVu(indirgv))
2288 /* filehandle or package name makes it a method */
2289 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2291 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2292 return 0; /* no assumptions -- "=>" quotes bearword */
2294 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2295 newSVpvn(tmpbuf,len));
2296 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2300 return *s == '(' ? FUNCMETH : METHOD;
2308 * Return a string of Perl code to load the debugger. If PERL5DB
2309 * is set, it will return the contents of that, otherwise a
2310 * compile-time require of perl5db.pl.
2318 const char * const pdb = PerlEnv_getenv("PERL5DB");
2322 SETERRNO(0,SS_NORMAL);
2323 return "BEGIN { require 'perl5db.pl' }";
2329 /* Encoded script support. filter_add() effectively inserts a
2330 * 'pre-processing' function into the current source input stream.
2331 * Note that the filter function only applies to the current source file
2332 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2334 * The datasv parameter (which may be NULL) can be used to pass
2335 * private data to this instance of the filter. The filter function
2336 * can recover the SV using the FILTER_DATA macro and use it to
2337 * store private buffers and state information.
2339 * The supplied datasv parameter is upgraded to a PVIO type
2340 * and the IoDIRP/IoANY field is used to store the function pointer,
2341 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2342 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2343 * private use must be set using malloc'd pointers.
2347 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2353 if (!PL_rsfp_filters)
2354 PL_rsfp_filters = newAV();
2357 SvUPGRADE(datasv, SVt_PVIO);
2358 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2359 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2360 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2361 IoANY(datasv), SvPV_nolen(datasv)));
2362 av_unshift(PL_rsfp_filters, 1);
2363 av_store(PL_rsfp_filters, 0, datasv) ;
2368 /* Delete most recently added instance of this filter function. */
2370 Perl_filter_del(pTHX_ filter_t funcp)
2376 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
2378 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2380 /* if filter is on top of stack (usual case) just pop it off */
2381 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2382 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2383 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2384 IoANY(datasv) = (void *)NULL;
2385 sv_free(av_pop(PL_rsfp_filters));
2389 /* we need to search for the correct entry and clear it */
2390 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2394 /* Invoke the idxth filter function for the current rsfp. */
2395 /* maxlen 0 = read one text line */
2397 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2403 if (!PL_rsfp_filters)
2405 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2406 /* Provide a default input filter to make life easy. */
2407 /* Note that we append to the line. This is handy. */
2408 DEBUG_P(PerlIO_printf(Perl_debug_log,
2409 "filter_read %d: from rsfp\n", idx));
2413 const int old_len = SvCUR(buf_sv);
2415 /* ensure buf_sv is large enough */
2416 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
2417 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2418 if (PerlIO_error(PL_rsfp))
2419 return -1; /* error */
2421 return 0 ; /* end of file */
2423 SvCUR_set(buf_sv, old_len + len) ;
2426 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2427 if (PerlIO_error(PL_rsfp))
2428 return -1; /* error */
2430 return 0 ; /* end of file */
2433 return SvCUR(buf_sv);
2435 /* Skip this filter slot if filter has been deleted */
2436 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2437 DEBUG_P(PerlIO_printf(Perl_debug_log,
2438 "filter_read %d: skipped (filter deleted)\n",
2440 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2442 /* Get function pointer hidden within datasv */
2443 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2444 DEBUG_P(PerlIO_printf(Perl_debug_log,
2445 "filter_read %d: via function %p (%s)\n",
2446 idx, datasv, SvPV_nolen_const(datasv)));
2447 /* Call function. The function is expected to */
2448 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2449 /* Return: <0:error, =0:eof, >0:not eof */
2450 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
2454 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2457 #ifdef PERL_CR_FILTER
2458 if (!PL_rsfp_filters) {
2459 filter_add(S_cr_textfilter,NULL);
2462 if (PL_rsfp_filters) {
2464 SvCUR_set(sv, 0); /* start with empty line */
2465 if (FILTER_READ(0, sv, 0) > 0)
2466 return ( SvPVX(sv) ) ;
2471 return (sv_gets(sv, fp, append));
2475 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2480 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2484 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2485 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2487 return GvHV(gv); /* Foo:: */
2490 /* use constant CLASS => 'MyClass' */
2491 if ((gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV))) {
2493 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2494 pkgname = SvPV_nolen_const(sv);
2498 return gv_stashpv(pkgname, FALSE);
2502 S_tokenize_use(pTHX_ int is_use, char *s) {
2504 if (PL_expect != XSTATE)
2505 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2506 is_use ? "use" : "no"));
2508 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2509 s = force_version(s, TRUE);
2510 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
2511 NEXTVAL_NEXTTOKE.opval = NULL;
2514 else if (*s == 'v') {
2515 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2516 s = force_version(s, FALSE);
2520 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2521 s = force_version(s, FALSE);
2523 yylval.ival = is_use;
2527 static const char* const exp_name[] =
2528 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2529 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
2536 Works out what to call the token just pulled out of the input
2537 stream. The yacc parser takes care of taking the ops we return and
2538 stitching them into a tree.
2544 if read an identifier
2545 if we're in a my declaration
2546 croak if they tried to say my($foo::bar)
2547 build the ops for a my() declaration
2548 if it's an access to a my() variable
2549 are we in a sort block?
2550 croak if my($a); $a <=> $b
2551 build ops for access to a my() variable
2552 if in a dq string, and they've said @foo and we can't find @foo
2554 build ops for a bareword
2555 if we already built the token before, use it.
2560 #pragma segment Perl_yylex
2566 register char *s = PL_bufptr;
2572 SV* tmp = newSVpvs("");
2573 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2574 (IV)CopLINE(PL_curcop),
2575 lex_state_names[PL_lex_state],
2576 exp_name[PL_expect],
2577 pv_display(tmp, s, strlen(s), 0, 60));
2580 /* check if there's an identifier for us to look at */
2581 if (PL_pending_ident)
2582 return REPORT(S_pending_ident(aTHX));
2584 /* no identifier pending identification */
2586 switch (PL_lex_state) {
2588 case LEX_NORMAL: /* Some compilers will produce faster */
2589 case LEX_INTERPNORMAL: /* code if we comment these out. */
2593 /* when we've already built the next token, just pull it out of the queue */
2596 yylval = NEXTVAL_NEXTTOKE;
2598 PL_lex_state = PL_lex_defer;
2599 PL_expect = PL_lex_expect;
2600 PL_lex_defer = LEX_NORMAL;
2602 return REPORT(PL_nexttype[PL_nexttoke]);
2604 /* interpolated case modifiers like \L \U, including \Q and \E.
2605 when we get here, PL_bufptr is at the \
2607 case LEX_INTERPCASEMOD:
2609 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2610 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2612 /* handle \E or end of string */
2613 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2615 if (PL_lex_casemods) {
2616 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2617 PL_lex_casestack[PL_lex_casemods] = '\0';
2619 if (PL_bufptr != PL_bufend
2620 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2622 PL_lex_state = LEX_INTERPCONCAT;
2626 if (PL_bufptr != PL_bufend)
2628 PL_lex_state = LEX_INTERPCONCAT;
2632 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2633 "### Saw case modifier\n"); });
2635 if (s[1] == '\\' && s[2] == 'E') {
2637 PL_lex_state = LEX_INTERPCONCAT;
2642 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2643 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2644 if ((*s == 'L' || *s == 'U') &&
2645 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2646 PL_lex_casestack[--PL_lex_casemods] = '\0';
2649 if (PL_lex_casemods > 10)
2650 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2651 PL_lex_casestack[PL_lex_casemods++] = *s;
2652 PL_lex_casestack[PL_lex_casemods] = '\0';
2653 PL_lex_state = LEX_INTERPCONCAT;
2654 NEXTVAL_NEXTTOKE.ival = 0;
2657 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
2659 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
2661 NEXTVAL_NEXTTOKE.ival = OP_LC;
2663 NEXTVAL_NEXTTOKE.ival = OP_UC;
2665 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
2667 Perl_croak(aTHX_ "panic: yylex");
2671 if (PL_lex_starts) {
2674 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2675 if (PL_lex_casemods == 1 && PL_lex_inpat)
2684 case LEX_INTERPPUSH:
2685 return REPORT(sublex_push());
2687 case LEX_INTERPSTART:
2688 if (PL_bufptr == PL_bufend)
2689 return REPORT(sublex_done());
2690 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2691 "### Interpolated variable\n"); });
2693 PL_lex_dojoin = (*PL_bufptr == '@');
2694 PL_lex_state = LEX_INTERPNORMAL;
2695 if (PL_lex_dojoin) {
2696 NEXTVAL_NEXTTOKE.ival = 0;
2698 force_ident("\"", '$');
2699 NEXTVAL_NEXTTOKE.ival = 0;
2701 NEXTVAL_NEXTTOKE.ival = 0;
2703 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
2706 if (PL_lex_starts++) {
2708 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2709 if (!PL_lex_casemods && PL_lex_inpat)
2716 case LEX_INTERPENDMAYBE:
2717 if (intuit_more(PL_bufptr)) {
2718 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2724 if (PL_lex_dojoin) {
2725 PL_lex_dojoin = FALSE;
2726 PL_lex_state = LEX_INTERPCONCAT;
2729 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2730 && SvEVALED(PL_lex_repl))
2732 if (PL_bufptr != PL_bufend)
2733 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2737 case LEX_INTERPCONCAT:
2739 if (PL_lex_brackets)
2740 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2742 if (PL_bufptr == PL_bufend)
2743 return REPORT(sublex_done());
2745 if (SvIVX(PL_linestr) == '\'') {
2746 SV *sv = newSVsv(PL_linestr);
2749 else if ( PL_hints & HINT_NEW_RE )
2750 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2751 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2755 s = scan_const(PL_bufptr);
2757 PL_lex_state = LEX_INTERPCASEMOD;
2759 PL_lex_state = LEX_INTERPSTART;
2762 if (s != PL_bufptr) {
2763 NEXTVAL_NEXTTOKE = yylval;
2766 if (PL_lex_starts++) {
2767 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2768 if (!PL_lex_casemods && PL_lex_inpat)
2781 PL_lex_state = LEX_NORMAL;
2782 s = scan_formline(PL_bufptr);
2783 if (!PL_lex_formbrack)
2789 PL_oldoldbufptr = PL_oldbufptr;
2795 if (isIDFIRST_lazy_if(s,UTF))
2797 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2800 goto fake_eof; /* emulate EOF on ^D or ^Z */
2805 if (PL_lex_brackets) {
2806 yyerror(PL_lex_formbrack
2807 ? "Format not terminated"
2808 : "Missing right curly or square bracket");
2810 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2811 "### Tokener got EOF\n");
2815 if (s++ < PL_bufend)
2816 goto retry; /* ignore stray nulls */
2819 if (!PL_in_eval && !PL_preambled) {
2820 PL_preambled = TRUE;
2821 sv_setpv(PL_linestr,incl_perldb());
2822 if (SvCUR(PL_linestr))
2823 sv_catpvs(PL_linestr,";");
2825 while(AvFILLp(PL_preambleav) >= 0) {
2826 SV *tmpsv = av_shift(PL_preambleav);
2827 sv_catsv(PL_linestr, tmpsv);
2828 sv_catpvs(PL_linestr, ";");
2831 sv_free((SV*)PL_preambleav);
2832 PL_preambleav = NULL;
2834 if (PL_minus_n || PL_minus_p) {
2835 sv_catpvs(PL_linestr, "LINE: while (<>) {");
2837 sv_catpvs(PL_linestr,"chomp;");
2840 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2841 || *PL_splitstr == '"')
2842 && strchr(PL_splitstr + 1, *PL_splitstr))
2843 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2845 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2846 bytes can be used as quoting characters. :-) */
2847 const char *splits = PL_splitstr;
2848 sv_catpvs(PL_linestr, "our @F=split(q\0");
2851 if (*splits == '\\')
2852 sv_catpvn(PL_linestr, splits, 1);
2853 sv_catpvn(PL_linestr, splits, 1);
2854 } while (*splits++);
2855 /* This loop will embed the trailing NUL of
2856 PL_linestr as the last thing it does before
2858 sv_catpvs(PL_linestr, ");");
2862 sv_catpvs(PL_linestr,"our @F=split(' ');");
2866 sv_catpvs(PL_linestr,"use feature ':5.10';");
2867 sv_catpvs(PL_linestr, "\n");
2868 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2869 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2870 PL_last_lop = PL_last_uni = NULL;
2871 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2872 SV * const sv = newSV(0);
2874 sv_upgrade(sv, SVt_PVMG);
2875 sv_setsv(sv,PL_linestr);
2878 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2883 bof = PL_rsfp ? TRUE : FALSE;
2884 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
2887 if (PL_preprocess && !PL_in_eval)
2888 (void)PerlProc_pclose(PL_rsfp);
2889 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2890 PerlIO_clearerr(PL_rsfp);
2892 (void)PerlIO_close(PL_rsfp);
2894 PL_doextract = FALSE;
2896 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2897 sv_setpv(PL_linestr,PL_minus_p
2898 ? ";}continue{print;}" : ";}");
2899 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2900 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2901 PL_last_lop = PL_last_uni = NULL;
2902 PL_minus_n = PL_minus_p = 0;
2905 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2906 PL_last_lop = PL_last_uni = NULL;
2907 sv_setpvn(PL_linestr,"",0);
2908 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2910 /* If it looks like the start of a BOM or raw UTF-16,
2911 * check if it in fact is. */
2917 #ifdef PERLIO_IS_STDIO
2918 # ifdef __GNU_LIBRARY__
2919 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2920 # define FTELL_FOR_PIPE_IS_BROKEN
2924 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2925 # define FTELL_FOR_PIPE_IS_BROKEN
2930 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2931 /* This loses the possibility to detect the bof
2932 * situation on perl -P when the libc5 is being used.
2933 * Workaround? Maybe attach some extra state to PL_rsfp?
2936 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2938 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2941 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2942 s = swallow_bom((U8*)s);
2946 /* Incest with pod. */
2947 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2948 sv_setpvn(PL_linestr, "", 0);
2949 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2950 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2951 PL_last_lop = PL_last_uni = NULL;
2952 PL_doextract = FALSE;
2956 } while (PL_doextract);
2957 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2958 if (PERLDB_LINE && PL_curstash != PL_debstash) {
2959 SV * const sv = newSV(0);
2961 sv_upgrade(sv, SVt_PVMG);
2962 sv_setsv(sv,PL_linestr);
2965 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
2967 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2968 PL_last_lop = PL_last_uni = NULL;
2969 if (CopLINE(PL_curcop) == 1) {
2970 while (s < PL_bufend && isSPACE(*s))
2972 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2976 if (*s == '#' && *(s+1) == '!')
2978 #ifdef ALTERNATE_SHEBANG
2980 static char const as[] = ALTERNATE_SHEBANG;
2981 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2982 d = s + (sizeof(as) - 1);
2984 #endif /* ALTERNATE_SHEBANG */
2993 while (*d && !isSPACE(*d))
2997 #ifdef ARG_ZERO_IS_SCRIPT
2998 if (ipathend > ipath) {
3000 * HP-UX (at least) sets argv[0] to the script name,
3001 * which makes $^X incorrect. And Digital UNIX and Linux,
3002 * at least, set argv[0] to the basename of the Perl
3003 * interpreter. So, having found "#!", we'll set it right.
3005 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
3007 assert(SvPOK(x) || SvGMAGICAL(x));
3008 if (sv_eq(x, CopFILESV(PL_curcop))) {
3009 sv_setpvn(x, ipath, ipathend - ipath);
3015 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3016 const char * const lstart = SvPV_const(x,llen);
3018 bstart += blen - llen;
3019 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3020 sv_setpvn(x, ipath, ipathend - ipath);
3025 TAINT_NOT; /* $^X is always tainted, but that's OK */
3027 #endif /* ARG_ZERO_IS_SCRIPT */
3032 d = instr(s,"perl -");
3034 d = instr(s,"perl");
3036 /* avoid getting into infinite loops when shebang
3037 * line contains "Perl" rather than "perl" */
3039 for (d = ipathend-4; d >= ipath; --d) {
3040 if ((*d == 'p' || *d == 'P')
3041 && !ibcmp(d, "perl", 4))
3051 #ifdef ALTERNATE_SHEBANG
3053 * If the ALTERNATE_SHEBANG on this system starts with a
3054 * character that can be part of a Perl expression, then if
3055 * we see it but not "perl", we're probably looking at the
3056 * start of Perl code, not a request to hand off to some
3057 * other interpreter. Similarly, if "perl" is there, but
3058 * not in the first 'word' of the line, we assume the line
3059 * contains the start of the Perl program.
3061 if (d && *s != '#') {
3062 const char *c = ipath;
3063 while (*c && !strchr("; \t\r\n\f\v#", *c))
3066 d = NULL; /* "perl" not in first word; ignore */
3068 *s = '#'; /* Don't try to parse shebang line */
3070 #endif /* ALTERNATE_SHEBANG */
3071 #ifndef MACOS_TRADITIONAL
3076 !instr(s,"indir") &&
3077 instr(PL_origargv[0],"perl"))
3084 while (s < PL_bufend && isSPACE(*s))
3086 if (s < PL_bufend) {
3087 Newxz(newargv,PL_origargc+3,char*);
3089 while (s < PL_bufend && !isSPACE(*s))
3092 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3095 newargv = PL_origargv;
3098 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3100 Perl_croak(aTHX_ "Can't exec %s", ipath);
3104 while (*d && !isSPACE(*d)) d++;
3105 while (SPACE_OR_TAB(*d)) d++;
3108 const bool switches_done = PL_doswitches;
3109 const U32 oldpdb = PL_perldb;
3110 const bool oldn = PL_minus_n;
3111 const bool oldp = PL_minus_p;
3114 if (*d == 'M' || *d == 'm' || *d == 'C') {
3115 const char * const m = d;
3116 while (*d && !isSPACE(*d)) d++;
3117 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3120 d = moreswitches(d);
3122 if (PL_doswitches && !switches_done) {
3123 int argc = PL_origargc;
3124 char **argv = PL_origargv;
3127 } while (argc && argv[0][0] == '-' && argv[0][1]);
3128 init_argv_symbols(argc,argv);
3130 if ((PERLDB_LINE && !oldpdb) ||
3131 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3132 /* if we have already added "LINE: while (<>) {",
3133 we must not do it again */
3135 sv_setpvn(PL_linestr, "", 0);
3136 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3137 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3138 PL_last_lop = PL_last_uni = NULL;
3139 PL_preambled = FALSE;
3141 (void)gv_fetchfile(PL_origfilename);
3148 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3150 PL_lex_state = LEX_FORMLINE;
3155 #ifdef PERL_STRICT_CR
3156 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3158 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3160 case ' ': case '\t': case '\f': case 013:
3161 #ifdef MACOS_TRADITIONAL
3168 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3169 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3170 /* handle eval qq[#line 1 "foo"\n ...] */
3171 CopLINE_dec(PL_curcop);
3175 while (d < PL_bufend && *d != '\n')
3179 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3180 Perl_croak(aTHX_ "panic: input overflow");
3183 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3185 PL_lex_state = LEX_FORMLINE;
3195 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3203 while (s < PL_bufend && SPACE_OR_TAB(*s))
3206 if (strnEQ(s,"=>",2)) {
3207 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3208 DEBUG_T( { S_printbuf(aTHX_
3209 "### Saw unary minus before =>, forcing word %s\n", s);
3211 OPERATOR('-'); /* unary minus */
3213 PL_last_uni = PL_oldbufptr;
3215 case 'r': ftst = OP_FTEREAD; break;
3216 case 'w': ftst = OP_FTEWRITE; break;
3217 case 'x': ftst = OP_FTEEXEC; break;
3218 case 'o': ftst = OP_FTEOWNED; break;
3219 case 'R': ftst = OP_FTRREAD; break;
3220 case 'W': ftst = OP_FTRWRITE; break;
3221 case 'X': ftst = OP_FTREXEC; break;
3222 case 'O': ftst = OP_FTROWNED; break;
3223 case 'e': ftst = OP_FTIS; break;
3224 case 'z': ftst = OP_FTZERO; break;
3225 case 's': ftst = OP_FTSIZE; break;
3226 case 'f': ftst = OP_FTFILE; break;
3227 case 'd': ftst = OP_FTDIR; break;
3228 case 'l': ftst = OP_FTLINK; break;
3229 case 'p': ftst = OP_FTPIPE; break;
3230 case 'S': ftst = OP_FTSOCK; break;
3231 case 'u': ftst = OP_FTSUID; break;
3232 case 'g': ftst = OP_FTSGID; break;
3233 case 'k': ftst = OP_FTSVTX; break;
3234 case 'b': ftst = OP_FTBLK; break;
3235 case 'c': ftst = OP_FTCHR; break;
3236 case 't': ftst = OP_FTTTY; break;
3237 case 'T': ftst = OP_FTTEXT; break;
3238 case 'B': ftst = OP_FTBINARY; break;
3239 case 'M': case 'A': case 'C':
3240 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
3242 case 'M': ftst = OP_FTMTIME; break;
3243 case 'A': ftst = OP_FTATIME; break;
3244 case 'C': ftst = OP_FTCTIME; break;
3252 PL_last_lop_op = (OPCODE)ftst;
3253 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3254 "### Saw file test %c\n", (int)tmp);
3259 /* Assume it was a minus followed by a one-letter named
3260 * subroutine call (or a -bareword), then. */
3261 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3262 "### '-%c' looked like a file test but was not\n",
3269 const char tmp = *s++;
3272 if (PL_expect == XOPERATOR)
3277 else if (*s == '>') {
3280 if (isIDFIRST_lazy_if(s,UTF)) {
3281 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3289 if (PL_expect == XOPERATOR)
3292 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3294 OPERATOR('-'); /* unary minus */
3300 const char tmp = *s++;
3303 if (PL_expect == XOPERATOR)
3308 if (PL_expect == XOPERATOR)
3311 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3318 if (PL_expect != XOPERATOR) {
3319 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3320 PL_expect = XOPERATOR;
3321 force_ident(PL_tokenbuf, '*');
3334 if (PL_expect == XOPERATOR) {
3338 PL_tokenbuf[0] = '%';
3339 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3340 if (!PL_tokenbuf[1]) {
3343 PL_pending_ident = '%';
3354 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
3355 && FEATURE_IS_ENABLED("~~"))
3362 const char tmp = *s++;
3368 goto just_a_word_zero_gv;
3371 switch (PL_expect) {
3374 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3376 PL_bufptr = s; /* update in case we back off */
3382 PL_expect = XTERMBLOCK;
3386 while (isIDFIRST_lazy_if(s,UTF)) {
3388 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3389 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3390 if (tmp < 0) tmp = -tmp;
3406 d = scan_str(d,TRUE,TRUE);
3408 /* MUST advance bufptr here to avoid bogus
3409 "at end of line" context messages from yyerror().
3411 PL_bufptr = s + len;
3412 yyerror("Unterminated attribute parameter in attribute list");
3415 return REPORT(0); /* EOF indicator */
3419 SV *sv = newSVpvn(s, len);
3420 sv_catsv(sv, PL_lex_stuff);
3421 attrs = append_elem(OP_LIST, attrs,
3422 newSVOP(OP_CONST, 0, sv));
3423 SvREFCNT_dec(PL_lex_stuff);
3424 PL_lex_stuff = NULL;
3427 if (len == 6 && strnEQ(s, "unique", len)) {
3428 if (PL_in_my == KEY_our)
3430 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3432 /*EMPTY*/; /* skip to avoid loading attributes.pm */
3435 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3438 /* NOTE: any CV attrs applied here need to be part of
3439 the CVf_BUILTIN_ATTRS define in cv.h! */
3440 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3441 CvLVALUE_on(PL_compcv);
3442 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3443 CvLOCKED_on(PL_compcv);
3444 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3445 CvMETHOD_on(PL_compcv);
3446 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3447 CvASSERTION_on(PL_compcv);
3448 /* After we've set the flags, it could be argued that
3449 we don't need to do the attributes.pm-based setting
3450 process, and shouldn't bother appending recognized
3451 flags. To experiment with that, uncomment the
3452 following "else". (Note that's already been
3453 uncommented. That keeps the above-applied built-in
3454 attributes from being intercepted (and possibly
3455 rejected) by a package's attribute routines, but is
3456 justified by the performance win for the common case
3457 of applying only built-in attributes.) */
3459 attrs = append_elem(OP_LIST, attrs,
3460 newSVOP(OP_CONST, 0,
3464 if (*s == ':' && s[1] != ':')
3467 break; /* require real whitespace or :'s */
3468 /* XXX losing whitespace on sequential attributes here */
3472 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3473 if (*s != ';' && *s != '}' && *s != tmp
3474 && (tmp != '=' || *s != ')')) {
3475 const char q = ((*s == '\'') ? '"' : '\'');
3476 /* If here for an expression, and parsed no attrs, back
3478 if (tmp == '=' && !attrs) {
3482 /* MUST advance bufptr here to avoid bogus "at end of line"
3483 context messages from yyerror().
3487 ? Perl_form(aTHX_ "Invalid separator character "
3488 "%c%c%c in attribute list", q, *s, q)
3489 : "Unterminated attribute list" );
3497 NEXTVAL_NEXTTOKE.opval = attrs;
3505 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3506 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3514 const char tmp = *s++;
3519 const char tmp = *s++;
3527 if (PL_lex_brackets <= 0)
3528 yyerror("Unmatched right square bracket");
3531 if (PL_lex_state == LEX_INTERPNORMAL) {
3532 if (PL_lex_brackets == 0) {
3533 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3534 PL_lex_state = LEX_INTERPEND;
3541 if (PL_lex_brackets > 100) {
3542 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3544 switch (PL_expect) {
3546 if (PL_lex_formbrack) {
3550 if (PL_oldoldbufptr == PL_last_lop)
3551 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3553 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3554 OPERATOR(HASHBRACK);
3556 while (s < PL_bufend && SPACE_OR_TAB(*s))
3559 PL_tokenbuf[0] = '\0';
3560 if (d < PL_bufend && *d == '-') {
3561 PL_tokenbuf[0] = '-';
3563 while (d < PL_bufend && SPACE_OR_TAB(*d))
3566 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3567 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3569 while (d < PL_bufend && SPACE_OR_TAB(*d))
3572 const char minus = (PL_tokenbuf[0] == '-');
3573 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3581 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3586 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3591 if (PL_oldoldbufptr == PL_last_lop)
3592 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3594 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3597 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3599 /* This hack is to get the ${} in the message. */
3601 yyerror("syntax error");
3604 OPERATOR(HASHBRACK);
3606 /* This hack serves to disambiguate a pair of curlies
3607 * as being a block or an anon hash. Normally, expectation
3608 * determines that, but in cases where we're not in a
3609 * position to expect anything in particular (like inside
3610 * eval"") we have to resolve the ambiguity. This code
3611 * covers the case where the first term in the curlies is a
3612 * quoted string. Most other cases need to be explicitly
3613 * disambiguated by prepending a "+" before the opening
3614 * curly in order to force resolution as an anon hash.
3616 * XXX should probably propagate the outer expectation
3617 * into eval"" to rely less on this hack, but that could
3618 * potentially break current behavior of eval"".
3622 if (*s == '\'' || *s == '"' || *s == '`') {
3623 /* common case: get past first string, handling escapes */
3624 for (t++; t < PL_bufend && *t != *s;)
3625 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3629 else if (*s == 'q') {
3632 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3635 /* skip q//-like construct */
3637 char open, close, term;
3640 while (t < PL_bufend && isSPACE(*t))
3642 /* check for q => */
3643 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3644 OPERATOR(HASHBRACK);
3648 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3652 for (t++; t < PL_bufend; t++) {
3653 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3655 else if (*t == open)
3659 for (t++; t < PL_bufend; t++) {
3660 if (*t == '\\' && t+1 < PL_bufend)
3662 else if (*t == close && --brackets <= 0)
3664 else if (*t == open)
3671 /* skip plain q word */
3672 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3675 else if (isALNUM_lazy_if(t,UTF)) {
3677 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3680 while (t < PL_bufend && isSPACE(*t))
3682 /* if comma follows first term, call it an anon hash */
3683 /* XXX it could be a comma expression with loop modifiers */
3684 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3685 || (*t == '=' && t[1] == '>')))
3686 OPERATOR(HASHBRACK);
3687 if (PL_expect == XREF)
3690 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3696 yylval.ival = CopLINE(PL_curcop);
3697 if (isSPACE(*s) || *s == '#')
3698 PL_copline = NOLINE; /* invalidate current command line number */
3703 if (PL_lex_brackets <= 0)
3704 yyerror("Unmatched right curly bracket");
3706 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3707 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3708 PL_lex_formbrack = 0;
3709 if (PL_lex_state == LEX_INTERPNORMAL) {
3710 if (PL_lex_brackets == 0) {
3711 if (PL_expect & XFAKEBRACK) {
3712 PL_expect &= XENUMMASK;
3713 PL_lex_state = LEX_INTERPEND;
3715 return yylex(); /* ignore fake brackets */
3717 if (*s == '-' && s[1] == '>')
3718 PL_lex_state = LEX_INTERPENDMAYBE;
3719 else if (*s != '[' && *s != '{')
3720 PL_lex_state = LEX_INTERPEND;
3723 if (PL_expect & XFAKEBRACK) {
3724 PL_expect &= XENUMMASK;
3726 return yylex(); /* ignore fake brackets */
3735 if (PL_expect == XOPERATOR) {
3736 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
3737 && isIDFIRST_lazy_if(s,UTF))
3739 CopLINE_dec(PL_curcop);
3740 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3741 CopLINE_inc(PL_curcop);
3746 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3748 PL_expect = XOPERATOR;
3749 force_ident(PL_tokenbuf, '&');
3753 yylval.ival = (OPpENTERSUB_AMPER<<8);
3765 const char tmp = *s++;
3772 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
3773 && strchr("+-*/%.^&|<",tmp))
3774 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3775 "Reversed %c= operator",(int)tmp);
3777 if (PL_expect == XSTATE && isALPHA(tmp) &&
3778 (s == PL_linestart+1 || s[-2] == '\n') )
3780 if (PL_in_eval && !PL_rsfp) {
3785 if (strnEQ(s,"=cut",4)) {
3799 PL_doextract = TRUE;
3803 if (PL_lex_brackets < PL_lex_formbrack) {
3805 #ifdef PERL_STRICT_CR
3806 for (t = s; SPACE_OR_TAB(*t); t++) ;
3808 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
3810 if (*t == '\n' || *t == '#') {
3821 const char tmp = *s++;
3823 /* was this !=~ where !~ was meant?
3824 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3826 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3827 const char *t = s+1;
3829 while (t < PL_bufend && isSPACE(*t))
3832 if (*t == '/' || *t == '?' ||
3833 ((*t == 'm' || *t == 's' || *t == 'y')
3834 && !isALNUM(t[1])) ||
3835 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
3836 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3837 "!=~ should be !~");
3847 if (PL_expect != XOPERATOR) {
3848 if (s[1] != '<' && !strchr(s,'>'))
3851 s = scan_heredoc(s);
3853 s = scan_inputsymbol(s);
3854 TERM(sublex_start());
3860 SHop(OP_LEFT_SHIFT);
3874 const char tmp = *s++;
3876 SHop(OP_RIGHT_SHIFT);
3886 if (PL_expect == XOPERATOR) {
3887 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3889 deprecate_old(commaless_variable_list);
3890 return REPORT(','); /* grandfather non-comma-format format */
3894 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3895 PL_tokenbuf[0] = '@';
3896 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3897 sizeof PL_tokenbuf - 1, FALSE);
3898 if (PL_expect == XOPERATOR)
3899 no_op("Array length", s);
3900 if (!PL_tokenbuf[1])
3902 PL_expect = XOPERATOR;
3903 PL_pending_ident = '#';
3907 PL_tokenbuf[0] = '$';
3908 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3909 sizeof PL_tokenbuf - 1, FALSE);
3910 if (PL_expect == XOPERATOR)
3912 if (!PL_tokenbuf[1]) {
3914 yyerror("Final $ should be \\$ or $name");
3918 /* This kludge not intended to be bulletproof. */
3919 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3920 yylval.opval = newSVOP(OP_CONST, 0,
3921 newSViv(PL_compiling.cop_arybase));
3922 yylval.opval->op_private = OPpCONST_ARYBASE;
3928 const char tmp = *s;
3929 if (PL_lex_state == LEX_NORMAL)
3932 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
3933 && intuit_more(s)) {
3935 PL_tokenbuf[0] = '@';
3936 if (ckWARN(WARN_SYNTAX)) {
3939 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
3942 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
3943 while (t < PL_bufend && *t != ']')
3945 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3946 "Multidimensional syntax %.*s not supported",
3947 (int)((t - PL_bufptr) + 1), PL_bufptr);
3951 else if (*s == '{') {
3953 PL_tokenbuf[0] = '%';
3954 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
3955 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
3957 char tmpbuf[sizeof PL_tokenbuf];
3958 for (t++; isSPACE(*t); t++) ;
3959 if (isIDFIRST_lazy_if(t,UTF)) {
3961 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
3963 for (; isSPACE(*t); t++) ;
3964 if (*t == ';' && get_cv(tmpbuf, FALSE))
3965 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3966 "You need to quote \"%s\"",
3973 PL_expect = XOPERATOR;
3974 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3975 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3976 if (!islop || PL_last_lop_op == OP_GREPSTART)
3977 PL_expect = XOPERATOR;
3978 else if (strchr("$@\"'`q", *s))
3979 PL_expect = XTERM; /* e.g. print $fh "foo" */
3980 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3981 PL_expect = XTERM; /* e.g. print $fh &sub */
3982 else if (isIDFIRST_lazy_if(s,UTF)) {
3983 char tmpbuf[sizeof PL_tokenbuf];
3985 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3986 if ((t2 = keyword(tmpbuf, len))) {
3987 /* binary operators exclude handle interpretations */
3999 PL_expect = XTERM; /* e.g. print $fh length() */
4004 PL_expect = XTERM; /* e.g. print $fh subr() */
4007 else if (isDIGIT(*s))
4008 PL_expect = XTERM; /* e.g. print $fh 3 */
4009 else if (*s == '.' && isDIGIT(s[1]))
4010 PL_expect = XTERM; /* e.g. print $fh .3 */
4011 else if ((*s == '?' || *s == '-' || *s == '+')
4012 && !isSPACE(s[1]) && s[1] != '=')
4013 PL_expect = XTERM; /* e.g. print $fh -1 */
4014 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
4016 PL_expect = XTERM; /* e.g. print $fh /.../
4017 XXX except DORDOR operator
4019 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
4021 PL_expect = XTERM; /* print $fh <<"EOF" */
4024 PL_pending_ident = '$';
4028 if (PL_expect == XOPERATOR)
4030 PL_tokenbuf[0] = '@';
4031 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4032 if (!PL_tokenbuf[1]) {
4035 if (PL_lex_state == LEX_NORMAL)
4037 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4039 PL_tokenbuf[0] = '%';
4041 /* Warn about @ where they meant $. */
4042 if (*s == '[' || *s == '{') {
4043 if (ckWARN(WARN_SYNTAX)) {
4044 const char *t = s + 1;
4045 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4047 if (*t == '}' || *t == ']') {
4049 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4050 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4051 "Scalar value %.*s better written as $%.*s",
4052 (int)(t-PL_bufptr), PL_bufptr,
4053 (int)(t-PL_bufptr-1), PL_bufptr+1);
4058 PL_pending_ident = '@';
4061 case '/': /* may be division, defined-or, or pattern */
4062 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
4066 case '?': /* may either be conditional or pattern */
4067 if(PL_expect == XOPERATOR) {
4075 /* A // operator. */
4085 /* Disable warning on "study /blah/" */
4086 if (PL_oldoldbufptr == PL_last_uni
4087 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4088 || memNE(PL_last_uni, "study", 5)
4089 || isALNUM_lazy_if(PL_last_uni+5,UTF)
4092 s = scan_pat(s,OP_MATCH);
4093 TERM(sublex_start());
4097 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4098 #ifdef PERL_STRICT_CR
4101 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4103 && (s == PL_linestart || s[-1] == '\n') )
4105 PL_lex_formbrack = 0;
4109 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4115 yylval.ival = OPf_SPECIAL;
4121 if (PL_expect != XOPERATOR)
4126 case '0': case '1': case '2': case '3': case '4':
4127 case '5': case '6': case '7': case '8': case '9':
4128 s = scan_num(s, &yylval);
4129 DEBUG_T( { S_printbuf(aTHX_ "### Saw number in %s\n", s); } );
4130 if (PL_expect == XOPERATOR)
4135 s = scan_str(s,FALSE,FALSE);
4136 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4137 if (PL_expect == XOPERATOR) {
4138 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4140 deprecate_old(commaless_variable_list);
4141 return REPORT(','); /* grandfather non-comma-format format */
4147 missingterm((char*)0);
4148 yylval.ival = OP_CONST;
4149 TERM(sublex_start());
4152 s = scan_str(s,FALSE,FALSE);
4153 DEBUG_T( { S_printbuf(aTHX_ "### Saw string before %s\n", s); } );
4154 if (PL_expect == XOPERATOR) {
4155 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4157 deprecate_old(commaless_variable_list);
4158 return REPORT(','); /* grandfather non-comma-format format */
4164 missingterm((char*)0);
4165 yylval.ival = OP_CONST;
4166 /* FIXME. I think that this can be const if char *d is replaced by
4167 more localised variables. */
4168 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4169 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4170 yylval.ival = OP_STRINGIFY;
4174 TERM(sublex_start());
4177 s = scan_str(s,FALSE,FALSE);
4178 DEBUG_T( { S_printbuf(aTHX_ "### Saw backtick string before %s\n", s); } );
4179 if (PL_expect == XOPERATOR)
4180 no_op("Backticks",s);
4182 missingterm((char*)0);
4183 yylval.ival = OP_BACKTICK;
4185 TERM(sublex_start());
4189 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4190 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4192 if (PL_expect == XOPERATOR)
4193 no_op("Backslash",s);
4197 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4198 char *start = s + 2;
4199 while (isDIGIT(*start) || *start == '_')
4201 if (*start == '.' && isDIGIT(start[1])) {
4202 s = scan_num(s, &yylval);
4205 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4206 else if (!isALPHA(*start) && (PL_expect == XTERM
4207 || PL_expect == XREF || PL_expect == XSTATE
4208 || PL_expect == XTERMORDORDOR)) {
4209 const char c = *start;
4212 gv = gv_fetchpv(s, 0, SVt_PVCV);
4215 s = scan_num(s, &yylval);
4222 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4258 I32 orig_keyword = 0;
4263 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4265 /* Some keywords can be followed by any delimiter, including ':' */
4266 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4267 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4268 (PL_tokenbuf[0] == 'q' &&
4269 strchr("qwxr", PL_tokenbuf[1])))));
4271 /* x::* is just a word, unless x is "CORE" */
4272 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4276 while (d < PL_bufend && isSPACE(*d))
4277 d++; /* no comments skipped here, or s### is misparsed */
4279 /* Is this a label? */
4280 if (!tmp && PL_expect == XSTATE
4281 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4283 yylval.pval = savepv(PL_tokenbuf);
4288 /* Check for keywords */
4289 tmp = keyword(PL_tokenbuf, len);
4291 /* Is this a word before a => operator? */
4292 if (*d == '=' && d[1] == '>') {
4295 = (OP*)newSVOP(OP_CONST, 0,
4296 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4297 yylval.opval->op_private = OPpCONST_BARE;