3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * "It all comes from here, the stench and the peril." --Frodo
16 * This file is the lexer for Perl. It's closely linked to the
19 * The main routine is yylex(), which returns the next token.
23 #define PERL_IN_TOKE_C
26 #define yychar PL_yychar
27 #define yylval PL_yylval
29 static const char ident_too_long[] = "Identifier too long";
30 static const char commaless_variable_list[] = "comma-less variable list";
32 static void restore_rsfp(pTHX_ void *f);
33 #ifndef PERL_NO_UTF16_FILTER
34 static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35 static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
38 #define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
40 #define XFAKEBRACK 128
43 #ifdef USE_UTF8_SCRIPTS
44 # define UTF (!IN_BYTES)
46 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
49 /* In variables named $^X, these are the legal values for X.
50 * 1999-02-27 mjd-perl-patch@plover.com */
51 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
53 /* On MacOS, respect nonbreaking spaces */
54 #ifdef MACOS_TRADITIONAL
55 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
57 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
60 /* LEX_* are values for PL_lex_state, the state of the lexer.
61 * They are arranged oddly so that the guard on the switch statement
62 * can get by with a single comparison (if the compiler is smart enough).
65 /* #define LEX_NOTPARSING 11 is done in perl.h. */
67 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
68 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
69 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
70 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
71 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
73 /* at end of code, eg "$x" followed by: */
74 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
75 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
77 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
78 string or after \E, $foo, etc */
79 #define LEX_INTERPCONST 2 /* NOT USED */
80 #define LEX_FORMLINE 1 /* expecting a format line */
81 #define LEX_KNOWNEXT 0 /* next token known; just return it */
85 static const char* const lex_state_names[] = {
104 #ifdef USE_PURE_BISON
106 # define YYMAXLEVEL 100
108 YYSTYPE* yylval_pointer[YYMAXLEVEL];
109 int* yychar_pointer[YYMAXLEVEL];
113 # define yylval (*yylval_pointer[yyactlevel])
114 # define yychar (*yychar_pointer[yyactlevel])
115 # define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
117 # define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
120 #include "keywords.h"
122 /* CLINE is a macro that ensures PL_copline has a sane value */
127 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
129 #define SKIPSPACE0(s) skipspace(s)
130 #define SKIPSPACE1(s) skipspace(s)
131 #define SKIPSPACE2(s,tsv) skipspace(s)
132 #define PEEKSPACE(s) skipspace(s)
135 * Convenience functions to return different tokens and prime the
136 * lexer for the next token. They all take an argument.
138 * TOKEN : generic token (used for '(', DOLSHARP, etc)
139 * OPERATOR : generic operator
140 * AOPERATOR : assignment operator
141 * PREBLOCK : beginning the block after an if, while, foreach, ...
142 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
143 * PREREF : *EXPR where EXPR is not a simple identifier
144 * TERM : expression term
145 * LOOPX : loop exiting command (goto, last, dump, etc)
146 * FTST : file test operator
147 * FUN0 : zero-argument function
148 * FUN1 : not used, except for not, which isn't a UNIOP
149 * BOop : bitwise or or xor
151 * SHop : shift operator
152 * PWop : power operator
153 * PMop : pattern-matching operator
154 * Aop : addition-level operator
155 * Mop : multiplication-level operator
156 * Eop : equality-testing operator
157 * Rop : relational operator <= != gt
159 * Also see LOP and lop() below.
162 #ifdef DEBUGGING /* Serve -DT. */
163 # define REPORT(retval) tokereport((I32)retval)
165 # define REPORT(retval) (retval)
168 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
169 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
170 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
171 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
172 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
173 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
174 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
175 #define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
176 #define FTST(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)UNIOP))
177 #define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
178 #define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
179 #define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
180 #define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
181 #define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
182 #define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
183 #define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
184 #define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
185 #define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
186 #define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
187 #define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
189 /* This bit of chicanery makes a unary function followed by
190 * a parenthesis into a function with one argument, highest precedence.
196 PL_last_uni = PL_oldbufptr; \
197 PL_last_lop_op = f; \
199 return REPORT( (int)FUNC1 ); \
201 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
204 #define UNIBRACK(f) { \
207 PL_last_uni = PL_oldbufptr; \
209 return REPORT( (int)FUNC1 ); \
211 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
214 /* grandfather return to old style */
215 #define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
219 /* how to interpret the yylval associated with the token */
223 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
229 static struct debug_tokens {
231 enum token_type type;
233 } const debug_tokens[] =
235 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
236 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
237 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
238 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
239 { ARROW, TOKENTYPE_NONE, "ARROW" },
240 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
241 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
242 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
243 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
244 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
245 { DO, TOKENTYPE_NONE, "DO" },
246 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
247 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
248 { ELSE, TOKENTYPE_NONE, "ELSE" },
249 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
250 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
251 { FOR, TOKENTYPE_IVAL, "FOR" },
252 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
253 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
254 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
255 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
256 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
257 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
258 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
259 { IF, TOKENTYPE_IVAL, "IF" },
260 { LABEL, TOKENTYPE_PVAL, "LABEL" },
261 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
262 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
263 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
264 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
265 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
266 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
267 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
268 { MY, TOKENTYPE_IVAL, "MY" },
269 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
270 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
271 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
272 { OROP, TOKENTYPE_IVAL, "OROP" },
273 { OROR, TOKENTYPE_NONE, "OROR" },
274 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
275 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
276 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
277 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
278 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
279 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
280 { PREINC, TOKENTYPE_NONE, "PREINC" },
281 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
282 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
283 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
284 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
285 { SUB, TOKENTYPE_NONE, "SUB" },
286 { THING, TOKENTYPE_OPVAL, "THING" },
287 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
288 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
289 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
290 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
291 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
292 { USE, TOKENTYPE_IVAL, "USE" },
293 { WHILE, TOKENTYPE_IVAL, "WHILE" },
294 { WORD, TOKENTYPE_OPVAL, "WORD" },
295 { 0, TOKENTYPE_NONE, NULL }
298 /* dump the returned token in rv, plus any optional arg in yylval */
301 S_tokereport(pTHX_ I32 rv)
304 const char *name = NULL;
305 enum token_type type = TOKENTYPE_NONE;
306 const struct debug_tokens *p;
307 SV* const report = newSVpvs("<== ");
309 for (p = debug_tokens; p->token; p++) {
310 if (p->token == (int)rv) {
317 Perl_sv_catpvf(aTHX_ report, "%s", name);
318 else if ((char)rv > ' ' && (char)rv < '~')
319 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
321 sv_catpvs(report, "EOF");
323 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
326 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
329 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
331 case TOKENTYPE_OPNUM:
332 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
333 PL_op_name[yylval.ival]);
336 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
338 case TOKENTYPE_OPVAL:
340 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
341 PL_op_name[yylval.opval->op_type]);
342 if (yylval.opval->op_type == OP_CONST) {
343 Perl_sv_catpvf(aTHX_ report, " %s",
344 SvPEEK(cSVOPx_sv(yylval.opval)));
349 sv_catpvs(report, "(opval=null)");
352 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
358 /* print the buffer with suitable escapes */
361 S_printbuf(pTHX_ const char* fmt, const char* s)
363 SV* const tmp = newSVpvs("");
364 PerlIO_printf(Perl_debug_log, fmt,
365 pv_display(tmp, (char *)s, strlen(s), 0, 60));
374 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
375 * into an OP_ANDASSIGN or OP_ORASSIGN
379 S_ao(pTHX_ int toketype)
381 if (*PL_bufptr == '=') {
383 if (toketype == ANDAND)
384 yylval.ival = OP_ANDASSIGN;
385 else if (toketype == OROR)
386 yylval.ival = OP_ORASSIGN;
394 * When Perl expects an operator and finds something else, no_op
395 * prints the warning. It always prints "<something> found where
396 * operator expected. It prints "Missing semicolon on previous line?"
397 * if the surprise occurs at the start of the line. "do you need to
398 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
399 * where the compiler doesn't know if foo is a method call or a function.
400 * It prints "Missing operator before end of line" if there's nothing
401 * after the missing operator, or "... before <...>" if there is something
402 * after the missing operator.
406 S_no_op(pTHX_ const char *what, char *s)
408 char * const oldbp = PL_bufptr;
409 const bool is_first = (PL_oldbufptr == PL_linestart);
415 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
416 if (ckWARN_d(WARN_SYNTAX)) {
418 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
419 "\t(Missing semicolon on previous line?)\n");
420 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
422 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
424 if (t < PL_bufptr && isSPACE(*t))
425 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
426 "\t(Do you need to predeclare %.*s?)\n",
427 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
431 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
432 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
440 * Complain about missing quote/regexp/heredoc terminator.
441 * If it's called with NULL then it cauterizes the line buffer.
442 * If we're in a delimited string and the delimiter is a control
443 * character, it's reformatted into a two-char sequence like ^C.
448 S_missingterm(pTHX_ char *s)
453 char * const nl = strrchr(s,'\n');
459 iscntrl(PL_multi_close)
461 PL_multi_close < 32 || PL_multi_close == 127
465 tmpbuf[1] = (char)toCTRL(PL_multi_close);
470 *tmpbuf = (char)PL_multi_close;
474 q = strchr(s,'"') ? '\'' : '"';
475 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
483 Perl_deprecate(pTHX_ char *s)
485 if (ckWARN(WARN_DEPRECATED))
486 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
490 Perl_deprecate_old(pTHX_ char *s)
492 /* This function should NOT be called for any new deprecated warnings */
493 /* Use Perl_deprecate instead */
495 /* It is here to maintain backward compatibility with the pre-5.8 */
496 /* warnings category hierarchy. The "deprecated" category used to */
497 /* live under the "syntax" category. It is now a top-level category */
498 /* in its own right. */
500 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
501 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
502 "Use of %s is deprecated", s);
506 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
507 * utf16-to-utf8-reversed.
510 #ifdef PERL_CR_FILTER
514 register const char *s = SvPVX_const(sv);
515 register const char * const e = s + SvCUR(sv);
516 /* outer loop optimized to do nothing if there are no CR-LFs */
518 if (*s++ == '\r' && *s == '\n') {
519 /* hit a CR-LF, need to copy the rest */
520 register char *d = s - 1;
523 if (*s == '\r' && s[1] == '\n')
534 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
536 const I32 count = FILTER_READ(idx+1, sv, maxlen);
537 if (count > 0 && !maxlen)
545 * Initialize variables. Uses the Perl save_stack to save its state (for
546 * recursive calls to the parser).
550 Perl_lex_start(pTHX_ SV *line)
555 SAVEI32(PL_lex_dojoin);
556 SAVEI32(PL_lex_brackets);
557 SAVEI32(PL_lex_casemods);
558 SAVEI32(PL_lex_starts);
559 SAVEI32(PL_lex_state);
560 SAVEVPTR(PL_lex_inpat);
561 SAVEI32(PL_lex_inwhat);
562 if (PL_lex_state == LEX_KNOWNEXT) {
563 I32 toke = PL_nexttoke;
564 while (--toke >= 0) {
565 SAVEI32(PL_nexttype[toke]);
566 SAVEVPTR(PL_nextval[toke]);
568 SAVEI32(PL_nexttoke);
570 SAVECOPLINE(PL_curcop);
573 SAVEPPTR(PL_oldbufptr);
574 SAVEPPTR(PL_oldoldbufptr);
575 SAVEPPTR(PL_last_lop);
576 SAVEPPTR(PL_last_uni);
577 SAVEPPTR(PL_linestart);
578 SAVESPTR(PL_linestr);
579 SAVEGENERICPV(PL_lex_brackstack);
580 SAVEGENERICPV(PL_lex_casestack);
581 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
582 SAVESPTR(PL_lex_stuff);
583 SAVEI32(PL_lex_defer);
584 SAVEI32(PL_sublex_info.sub_inwhat);
585 SAVESPTR(PL_lex_repl);
587 SAVEINT(PL_lex_expect);
589 PL_lex_state = LEX_NORMAL;
593 Newx(PL_lex_brackstack, 120, char);
594 Newx(PL_lex_casestack, 12, char);
596 *PL_lex_casestack = '\0';
604 PL_sublex_info.sub_inwhat = 0;
606 s = SvPV_const(line, len);
611 PL_linestr = newSVpvs("\n;");
612 } else if (SvREADONLY(line) || s[len-1] != ';') {
613 PL_linestr = newSVsv(line);
615 sv_catpvs(PL_linestr, "\n;");
618 SvREFCNT_inc_simple_void_NN(line);
621 /* PL_linestr needs to survive until end of scope, not just the next
622 FREETMPS. See changes 17505 and 17546 which fixed the symptoms only. */
623 SAVEFREESV(PL_linestr);
624 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
625 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
626 PL_last_lop = PL_last_uni = NULL;
632 * Finalizer for lexing operations. Must be called when the parser is
633 * done with the lexer.
639 PL_doextract = FALSE;
644 * This subroutine has nothing to do with tilting, whether at windmills
645 * or pinball tables. Its name is short for "increment line". It
646 * increments the current line number in CopLINE(PL_curcop) and checks
647 * to see whether the line starts with a comment of the form
648 * # line 500 "foo.pm"
649 * If so, it sets the current line number and file to the values in the comment.
653 S_incline(pTHX_ char *s)
660 CopLINE_inc(PL_curcop);
663 while (SPACE_OR_TAB(*s))
665 if (strnEQ(s, "line", 4))
669 if (SPACE_OR_TAB(*s))
673 while (SPACE_OR_TAB(*s))
681 while (SPACE_OR_TAB(*s))
683 if (*s == '"' && (t = strchr(s+1, '"'))) {
693 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
695 if (*e != '\n' && *e != '\0')
696 return; /* false alarm */
702 const char * const cf = CopFILE(PL_curcop);
703 STRLEN tmplen = cf ? strlen(cf) : 0;
704 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
705 /* must copy *{"::_<(eval N)[oldfilename:L]"}
706 * to *{"::_<newfilename"} */
707 char smallbuf[256], smallbuf2[256];
708 char *tmpbuf, *tmpbuf2;
710 STRLEN tmplen2 = strlen(s);
711 if (tmplen + 3 < sizeof smallbuf)
714 Newx(tmpbuf, tmplen + 3, char);
715 if (tmplen2 + 3 < sizeof smallbuf2)
718 Newx(tmpbuf2, tmplen2 + 3, char);
719 tmpbuf[0] = tmpbuf2[0] = '_';
720 tmpbuf[1] = tmpbuf2[1] = '<';
721 memcpy(tmpbuf + 2, cf, ++tmplen);
722 memcpy(tmpbuf2 + 2, s, ++tmplen2);
724 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
726 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
728 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
729 /* adjust ${"::_<newfilename"} to store the new file name */
730 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
731 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
732 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
735 if (tmpbuf != smallbuf) Safefree(tmpbuf);
736 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
739 CopFILE_free(PL_curcop);
740 CopFILE_set(PL_curcop, s);
743 CopLINE_set(PL_curcop, atoi(n)-1);
748 S_update_debugger_info(pTHX_ SV *orig_sv, const char *buf, STRLEN len)
750 AV *av = CopFILEAVx(PL_curcop);
752 SV * const sv = newSV(0);
753 sv_upgrade(sv, SVt_PVMG);
755 sv_setsv(sv, orig_sv);
757 sv_setpvn(sv, buf, len);
760 av_store(av, (I32)CopLINE(PL_curcop), sv);
766 * Called to gobble the appropriate amount and type of whitespace.
767 * Skips comments as well.
771 S_skipspace(pTHX_ register char *s)
773 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
774 while (s < PL_bufend && SPACE_OR_TAB(*s))
780 SSize_t oldprevlen, oldoldprevlen;
781 SSize_t oldloplen = 0, oldunilen = 0;
782 while (s < PL_bufend && isSPACE(*s)) {
783 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
788 if (s < PL_bufend && *s == '#') {
789 while (s < PL_bufend && *s != '\n')
793 if (PL_in_eval && !PL_rsfp) {
800 /* only continue to recharge the buffer if we're at the end
801 * of the buffer, we're not reading from a source filter, and
802 * we're in normal lexing mode
804 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
805 PL_lex_state == LEX_FORMLINE)
808 /* try to recharge the buffer */
809 if ((s = filter_gets(PL_linestr, PL_rsfp,
810 (prevlen = SvCUR(PL_linestr)))) == NULL)
812 /* end of file. Add on the -p or -n magic */
815 ";}continue{print or die qq(-p destination: $!\\n);}");
816 PL_minus_n = PL_minus_p = 0;
818 else if (PL_minus_n) {
819 sv_setpvn(PL_linestr, ";}", 2);
823 sv_setpvn(PL_linestr,";", 1);
825 /* reset variables for next time we lex */
826 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
828 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
829 PL_last_lop = PL_last_uni = NULL;
831 /* Close the filehandle. Could be from -P preprocessor,
832 * STDIN, or a regular file. If we were reading code from
833 * STDIN (because the commandline held no -e or filename)
834 * then we don't close it, we reset it so the code can
835 * read from STDIN too.
838 if (PL_preprocess && !PL_in_eval)
839 (void)PerlProc_pclose(PL_rsfp);
840 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
841 PerlIO_clearerr(PL_rsfp);
843 (void)PerlIO_close(PL_rsfp);
848 /* not at end of file, so we only read another line */
849 /* make corresponding updates to old pointers, for yyerror() */
850 oldprevlen = PL_oldbufptr - PL_bufend;
851 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
853 oldunilen = PL_last_uni - PL_bufend;
855 oldloplen = PL_last_lop - PL_bufend;
856 PL_linestart = PL_bufptr = s + prevlen;
857 PL_bufend = s + SvCUR(PL_linestr);
859 PL_oldbufptr = s + oldprevlen;
860 PL_oldoldbufptr = s + oldoldprevlen;
862 PL_last_uni = s + oldunilen;
864 PL_last_lop = s + oldloplen;
867 /* debugger active and we're not compiling the debugger code,
868 * so store the line into the debugger's array of lines
870 if (PERLDB_LINE && PL_curstash != PL_debstash)
871 update_debugger_info(NULL, PL_bufptr, PL_bufend - PL_bufptr);
877 * Check the unary operators to ensure there's no ambiguity in how they're
878 * used. An ambiguous piece of code would be:
880 * This doesn't mean rand() + 5. Because rand() is a unary operator,
881 * the +5 is its argument.
890 if (PL_oldoldbufptr != PL_last_uni)
892 while (isSPACE(*PL_last_uni))
895 while (isALNUM_lazy_if(s,UTF) || *s == '-')
897 if ((t = strchr(s, '(')) && t < PL_bufptr)
900 if (ckWARN_d(WARN_AMBIGUOUS)){
901 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
902 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
903 (int)(s - PL_last_uni), PL_last_uni);
908 * LOP : macro to build a list operator. Its behaviour has been replaced
909 * with a subroutine, S_lop() for which LOP is just another name.
912 #define LOP(f,x) return lop(f,x,s)
916 * Build a list operator (or something that might be one). The rules:
917 * - if we have a next token, then it's a list operator [why?]
918 * - if the next thing is an opening paren, then it's a function
919 * - else it's a list operator
923 S_lop(pTHX_ I32 f, int x, char *s)
929 PL_last_lop = PL_oldbufptr;
930 PL_last_lop_op = (OPCODE)f;
932 return REPORT(LSTOP);
939 return REPORT(LSTOP);
944 * When the lexer realizes it knows the next token (for instance,
945 * it is reordering tokens for the parser) then it can call S_force_next
946 * to know what token to return the next time the lexer is called. Caller
947 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
948 * handles the token correctly.
952 S_force_next(pTHX_ I32 type)
954 PL_nexttype[PL_nexttoke] = type;
956 if (PL_lex_state != LEX_KNOWNEXT) {
957 PL_lex_defer = PL_lex_state;
958 PL_lex_expect = PL_expect;
959 PL_lex_state = LEX_KNOWNEXT;
964 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
966 SV *sv = newSVpvn(start,len);
967 if (UTF && !IN_BYTES && is_utf8_string((U8*)start, len))
974 * When the lexer knows the next thing is a word (for instance, it has
975 * just seen -> and it knows that the next char is a word char, then
976 * it calls S_force_word to stick the next word into the PL_next lookahead.
979 * char *start : buffer position (must be within PL_linestr)
980 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
981 * int check_keyword : if true, Perl checks to make sure the word isn't
982 * a keyword (do this if the word is a label, e.g. goto FOO)
983 * int allow_pack : if true, : characters will also be allowed (require,
985 * int allow_initial_tick : used by the "sub" lexer only.
989 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
994 start = SKIPSPACE1(start);
996 if (isIDFIRST_lazy_if(s,UTF) ||
997 (allow_pack && *s == ':') ||
998 (allow_initial_tick && *s == '\'') )
1000 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1001 if (check_keyword && keyword(PL_tokenbuf, len))
1003 if (token == METHOD) {
1008 PL_expect = XOPERATOR;
1011 NEXTVAL_NEXTTOKE.opval
1012 = (OP*)newSVOP(OP_CONST,0,
1013 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
1014 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
1022 * Called when the lexer wants $foo *foo &foo etc, but the program
1023 * text only contains the "foo" portion. The first argument is a pointer
1024 * to the "foo", and the second argument is the type symbol to prefix.
1025 * Forces the next token to be a "WORD".
1026 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
1030 S_force_ident(pTHX_ register const char *s, int kind)
1033 const STRLEN len = strlen(s);
1034 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
1035 NEXTVAL_NEXTTOKE.opval = o;
1038 o->op_private = OPpCONST_ENTERED;
1039 /* XXX see note in pp_entereval() for why we forgo typo
1040 warnings if the symbol must be introduced in an eval.
1042 gv_fetchpvn_flags(s, len,
1043 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1045 kind == '$' ? SVt_PV :
1046 kind == '@' ? SVt_PVAV :
1047 kind == '%' ? SVt_PVHV :
1055 Perl_str_to_version(pTHX_ SV *sv)
1060 const char *start = SvPV_const(sv,len);
1061 const char * const end = start + len;
1062 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1063 while (start < end) {
1067 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1072 retval += ((NV)n)/nshift;
1081 * Forces the next token to be a version number.
1082 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1083 * and if "guessing" is TRUE, then no new token is created (and the caller
1084 * must use an alternative parsing method).
1088 S_force_version(pTHX_ char *s, int guessing)
1099 while (isDIGIT(*d) || *d == '_' || *d == '.')
1101 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
1103 s = scan_num(s, &yylval);
1104 version = yylval.opval;
1105 ver = cSVOPx(version)->op_sv;
1106 if (SvPOK(ver) && !SvNIOK(ver)) {
1107 (void)SvUPGRADE(ver, SVt_PVNV);
1108 SvNV_set(ver, str_to_version(ver));
1109 SvNOK_on(ver); /* hint that it is a version */
1116 /* NOTE: The parser sees the package name and the VERSION swapped */
1117 NEXTVAL_NEXTTOKE.opval = version;
1125 * Tokenize a quoted string passed in as an SV. It finds the next
1126 * chunk, up to end of string or a backslash. It may make a new
1127 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1132 S_tokeq(pTHX_ SV *sv)
1135 register char *send;
1143 s = SvPV_force(sv, len);
1144 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
1147 while (s < send && *s != '\\')
1152 if ( PL_hints & HINT_NEW_STRING ) {
1153 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
1159 if (s + 1 < send && (s[1] == '\\'))
1160 s++; /* all that, just for this */
1165 SvCUR_set(sv, d - SvPVX_const(sv));
1167 if ( PL_hints & HINT_NEW_STRING )
1168 return new_constant(NULL, 0, "q", sv, pv, "q");
1173 * Now come three functions related to double-quote context,
1174 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1175 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1176 * interact with PL_lex_state, and create fake ( ... ) argument lists
1177 * to handle functions and concatenation.
1178 * They assume that whoever calls them will be setting up a fake
1179 * join call, because each subthing puts a ',' after it. This lets
1182 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1184 * (I'm not sure whether the spurious commas at the end of lcfirst's
1185 * arguments and join's arguments are created or not).
1190 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1192 * Pattern matching will set PL_lex_op to the pattern-matching op to
1193 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1195 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1197 * Everything else becomes a FUNC.
1199 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1200 * had an OP_CONST or OP_READLINE). This just sets us up for a
1201 * call to S_sublex_push().
1205 S_sublex_start(pTHX)
1207 register const I32 op_type = yylval.ival;
1209 if (op_type == OP_NULL) {
1210 yylval.opval = PL_lex_op;
1214 if (op_type == OP_CONST || op_type == OP_READLINE) {
1215 SV *sv = tokeq(PL_lex_stuff);
1217 if (SvTYPE(sv) == SVt_PVIV) {
1218 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1220 const char * const p = SvPV_const(sv, len);
1221 SV * const nsv = newSVpvn(p, len);
1227 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
1228 PL_lex_stuff = NULL;
1232 PL_sublex_info.super_state = PL_lex_state;
1233 PL_sublex_info.sub_inwhat = op_type;
1234 PL_sublex_info.sub_op = PL_lex_op;
1235 PL_lex_state = LEX_INTERPPUSH;
1239 yylval.opval = PL_lex_op;
1249 * Create a new scope to save the lexing state. The scope will be
1250 * ended in S_sublex_done. Returns a '(', starting the function arguments
1251 * to the uc, lc, etc. found before.
1252 * Sets PL_lex_state to LEX_INTERPCONCAT.
1260 PL_lex_state = PL_sublex_info.super_state;
1261 SAVEI32(PL_lex_dojoin);
1262 SAVEI32(PL_lex_brackets);
1263 SAVEI32(PL_lex_casemods);
1264 SAVEI32(PL_lex_starts);
1265 SAVEI32(PL_lex_state);
1266 SAVEVPTR(PL_lex_inpat);
1267 SAVEI32(PL_lex_inwhat);
1268 SAVECOPLINE(PL_curcop);
1269 SAVEPPTR(PL_bufptr);
1270 SAVEPPTR(PL_bufend);
1271 SAVEPPTR(PL_oldbufptr);
1272 SAVEPPTR(PL_oldoldbufptr);
1273 SAVEPPTR(PL_last_lop);
1274 SAVEPPTR(PL_last_uni);
1275 SAVEPPTR(PL_linestart);
1276 SAVESPTR(PL_linestr);
1277 SAVEGENERICPV(PL_lex_brackstack);
1278 SAVEGENERICPV(PL_lex_casestack);
1280 PL_linestr = PL_lex_stuff;
1281 PL_lex_stuff = NULL;
1283 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1284 = SvPVX(PL_linestr);
1285 PL_bufend += SvCUR(PL_linestr);
1286 PL_last_lop = PL_last_uni = NULL;
1287 SAVEFREESV(PL_linestr);
1289 PL_lex_dojoin = FALSE;
1290 PL_lex_brackets = 0;
1291 Newx(PL_lex_brackstack, 120, char);
1292 Newx(PL_lex_casestack, 12, char);
1293 PL_lex_casemods = 0;
1294 *PL_lex_casestack = '\0';
1296 PL_lex_state = LEX_INTERPCONCAT;
1297 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
1299 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1300 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1301 PL_lex_inpat = PL_sublex_info.sub_op;
1303 PL_lex_inpat = NULL;
1310 * Restores lexer state after a S_sublex_push.
1316 if (!PL_lex_starts++) {
1317 SV * const sv = newSVpvs("");
1318 if (SvUTF8(PL_linestr))
1320 PL_expect = XOPERATOR;
1321 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
1325 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1326 PL_lex_state = LEX_INTERPCASEMOD;
1330 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
1331 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1332 PL_linestr = PL_lex_repl;
1334 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1335 PL_bufend += SvCUR(PL_linestr);
1336 PL_last_lop = PL_last_uni = NULL;
1337 SAVEFREESV(PL_linestr);
1338 PL_lex_dojoin = FALSE;
1339 PL_lex_brackets = 0;
1340 PL_lex_casemods = 0;
1341 *PL_lex_casestack = '\0';
1343 if (SvEVALED(PL_lex_repl)) {
1344 PL_lex_state = LEX_INTERPNORMAL;
1346 /* we don't clear PL_lex_repl here, so that we can check later
1347 whether this is an evalled subst; that means we rely on the
1348 logic to ensure sublex_done() is called again only via the
1349 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
1352 PL_lex_state = LEX_INTERPCONCAT;
1359 PL_bufend = SvPVX(PL_linestr);
1360 PL_bufend += SvCUR(PL_linestr);
1361 PL_expect = XOPERATOR;
1362 PL_sublex_info.sub_inwhat = 0;
1370 Extracts a pattern, double-quoted string, or transliteration. This
1373 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
1374 processing a pattern (PL_lex_inpat is true), a transliteration
1375 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
1377 Returns a pointer to the character scanned up to. If this is
1378 advanced from the start pointer supplied (i.e. if anything was
1379 successfully parsed), will leave an OP for the substring scanned
1380 in yylval. Caller must intuit reason for not parsing further
1381 by looking at the next characters herself.
1385 double-quoted style: \r and \n
1386 regexp special ones: \D \s
1389 case and quoting: \U \Q \E
1390 stops on @ and $, but not for $ as tail anchor
1392 In transliterations:
1393 characters are VERY literal, except for - not at the start or end
1394 of the string, which indicates a range. If the range is in bytes,
1395 scan_const expands the range to the full set of intermediate
1396 characters. If the range is in utf8, the hyphen is replaced with
1397 a certain range mark which will be handled by pmtrans() in op.c.
1399 In double-quoted strings:
1401 double-quoted style: \r and \n
1403 deprecated backrefs: \1 (in substitution replacements)
1404 case and quoting: \U \Q \E
1407 scan_const does *not* construct ops to handle interpolated strings.
1408 It stops processing as soon as it finds an embedded $ or @ variable
1409 and leaves it to the caller to work out what's going on.
1411 embedded arrays (whether in pattern or not) could be:
1412 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1414 $ in double-quoted strings must be the symbol of an embedded scalar.
1416 $ in pattern could be $foo or could be tail anchor. Assumption:
1417 it's a tail anchor if $ is the last thing in the string, or if it's
1418 followed by one of "()| \r\n\t"
1420 \1 (backreferences) are turned into $1
1422 The structure of the code is
1423 while (there's a character to process) {
1424 handle transliteration ranges
1425 skip regexp comments /(?#comment)/ and codes /(?{code})/
1426 skip #-initiated comments in //x patterns
1427 check for embedded arrays
1428 check for embedded scalars
1430 leave intact backslashes from leaveit (below)
1431 deprecate \1 in substitution replacements
1432 handle string-changing backslashes \l \U \Q \E, etc.
1433 switch (what was escaped) {
1434 handle \- in a transliteration (becomes a literal -)
1435 handle \132 (octal characters)
1436 handle \x15 and \x{1234} (hex characters)
1437 handle \N{name} (named characters)
1438 handle \cV (control characters)
1439 handle printf-style backslashes (\f, \r, \n, etc)
1441 } (end if backslash)
1442 } (end while character to read)
1447 S_scan_const(pTHX_ char *start)
1449 register char *send = PL_bufend; /* end of the constant */
1450 SV *sv = newSV(send - start); /* sv for the constant */
1451 register char *s = start; /* start of the constant */
1452 register char *d = SvPVX(sv); /* destination for copies */
1453 bool dorange = FALSE; /* are we in a translit range? */
1454 bool didrange = FALSE; /* did we just finish a range? */
1455 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1456 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
1459 UV literal_endpoint = 0;
1460 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
1463 const char * const leaveit = /* set of acceptably-backslashed characters */
1466 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
1469 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1470 /* If we are doing a trans and we know we want UTF8 set expectation */
1471 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1472 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1476 while (s < send || dorange) {
1477 /* get transliterations out of the way (they're most literal) */
1478 if (PL_lex_inwhat == OP_TRANS) {
1479 /* expand a range A-Z to the full set of characters. AIE! */
1481 I32 i; /* current expanded character */
1482 I32 min; /* first character in range */
1483 I32 max; /* last character in range */
1494 char * const c = (char*)utf8_hop((U8*)d, -1);
1498 *c = (char)UTF_TO_NATIVE(0xff);
1499 /* mark the range as done, and continue */
1505 i = d - SvPVX_const(sv); /* remember current offset */
1508 SvLEN(sv) + (has_utf8 ?
1509 (512 - UTF_CONTINUATION_MARK +
1512 /* How many two-byte within 0..255: 128 in UTF-8,
1513 * 96 in UTF-8-mod. */
1515 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1517 d = SvPVX(sv) + i; /* refresh d after realloc */
1521 for (j = 0; j <= 1; j++) {
1522 char * const c = (char*)utf8_hop((U8*)d, -1);
1523 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1529 max = (U8)0xff; /* only to \xff */
1530 uvmax = uv; /* \x{100} to uvmax */
1532 d = c; /* eat endpoint chars */
1537 d -= 2; /* eat the first char and the - */
1538 min = (U8)*d; /* first char in range */
1539 max = (U8)d[1]; /* last char in range */
1546 "Invalid range \"%c-%c\" in transliteration operator",
1547 (char)min, (char)max);
1551 if (literal_endpoint == 2 &&
1552 ((isLOWER(min) && isLOWER(max)) ||
1553 (isUPPER(min) && isUPPER(max)))) {
1555 for (i = min; i <= max; i++)
1557 *d++ = NATIVE_TO_NEED(has_utf8,i);
1559 for (i = min; i <= max; i++)
1561 *d++ = NATIVE_TO_NEED(has_utf8,i);
1566 for (i = min; i <= max; i++)
1569 const U8 ch = (U8)NATIVE_TO_UTF(i);
1570 if (UNI_IS_INVARIANT(ch))
1573 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1574 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1583 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1585 *d++ = (char)UTF_TO_NATIVE(0xff);
1587 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1591 /* mark the range as done, and continue */
1595 literal_endpoint = 0;
1600 /* range begins (ignore - as first or last char) */
1601 else if (*s == '-' && s+1 < send && s != start) {
1603 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
1610 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
1620 literal_endpoint = 0;
1621 native_range = TRUE;
1626 /* if we get here, we're not doing a transliteration */
1628 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1629 except for the last char, which will be done separately. */
1630 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
1632 while (s+1 < send && *s != ')')
1633 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1635 else if (s[2] == '{' /* This should match regcomp.c */
1636 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1639 char *regparse = s + (s[2] == '{' ? 3 : 4);
1642 while (count && (c = *regparse)) {
1643 if (c == '\\' && regparse[1])
1651 if (*regparse != ')')
1652 regparse--; /* Leave one char for continuation. */
1653 while (s < regparse)
1654 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1658 /* likewise skip #-initiated comments in //x patterns */
1659 else if (*s == '#' && PL_lex_inpat &&
1660 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
1661 while (s+1 < send && *s != '\n')
1662 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1665 /* check for embedded arrays
1666 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
1668 else if (*s == '@' && s[1]) {
1669 if (isALNUM_lazy_if(s+1,UTF))
1671 if (strchr(":'{$", s[1]))
1673 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
1674 break; /* in regexp, neither @+ nor @- are interpolated */
1677 /* check for embedded scalars. only stop if we're sure it's a
1680 else if (*s == '$') {
1681 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
1683 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
1684 break; /* in regexp, $ might be tail anchor */
1687 /* End of else if chain - OP_TRANS rejoin rest */
1690 if (*s == '\\' && s+1 < send) {
1693 /* some backslashes we leave behind */
1694 if (*leaveit && *s && strchr(leaveit, *s)) {
1695 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1696 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1700 /* deprecate \1 in strings and substitution replacements */
1701 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
1702 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
1704 if (ckWARN(WARN_SYNTAX))
1705 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
1710 /* string-change backslash escapes */
1711 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
1716 /* if we get here, it's either a quoted -, or a digit */
1719 /* quoted - in transliterations */
1721 if (PL_lex_inwhat == OP_TRANS) {
1728 if ((isALPHA(*s) || isDIGIT(*s)) &&
1730 Perl_warner(aTHX_ packWARN(WARN_MISC),
1731 "Unrecognized escape \\%c passed through",
1733 /* default action is to copy the quoted character */
1734 goto default_action;
1737 /* \132 indicates an octal constant */
1738 case '0': case '1': case '2': case '3':
1739 case '4': case '5': case '6': case '7':
1743 uv = grok_oct(s, &len, &flags, NULL);
1746 goto NUM_ESCAPE_INSERT;
1748 /* \x24 indicates a hex constant */
1752 char* const e = strchr(s, '}');
1753 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1754 PERL_SCAN_DISALLOW_PREFIX;
1759 yyerror("Missing right brace on \\x{}");
1763 uv = grok_hex(s, &len, &flags, NULL);
1769 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
1770 uv = grok_hex(s, &len, &flags, NULL);
1776 /* Insert oct or hex escaped character.
1777 * There will always enough room in sv since such
1778 * escapes will be longer than any UTF-8 sequence
1779 * they can end up as. */
1781 /* We need to map to chars to ASCII before doing the tests
1784 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
1785 if (!has_utf8 && uv > 255) {
1786 /* Might need to recode whatever we have
1787 * accumulated so far if it contains any
1790 * (Can't we keep track of that and avoid
1791 * this rescan? --jhi)
1795 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1796 if (!NATIVE_IS_INVARIANT(*c)) {
1801 const STRLEN offset = d - SvPVX_const(sv);
1803 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1807 while (src >= (const U8 *)SvPVX_const(sv)) {
1808 if (!NATIVE_IS_INVARIANT(*src)) {
1809 const U8 ch = NATIVE_TO_ASCII(*src);
1810 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1811 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
1821 if (has_utf8 || uv > 255) {
1822 d = (char*)uvchr_to_utf8((U8*)d, uv);
1824 if (PL_lex_inwhat == OP_TRANS &&
1825 PL_sublex_info.sub_op) {
1826 PL_sublex_info.sub_op->op_private |=
1827 (PL_lex_repl ? OPpTRANS_FROM_UTF
1831 if (uv > 255 && !dorange)
1832 native_range = FALSE;
1844 /* \N{LATIN SMALL LETTER A} is a named character */
1848 char* e = strchr(s, '}');
1854 yyerror("Missing right brace on \\N{}");
1858 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1860 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1861 PERL_SCAN_DISALLOW_PREFIX;
1864 uv = grok_hex(s, &len, &flags, NULL);
1866 goto NUM_ESCAPE_INSERT;
1868 res = newSVpvn(s + 1, e - s - 1);
1869 res = new_constant( NULL, 0, "charnames",
1870 res, NULL, "\\N{...}" );
1872 sv_utf8_upgrade(res);
1873 str = SvPV_const(res,len);
1874 #ifdef EBCDIC_NEVER_MIND
1875 /* charnames uses pack U and that has been
1876 * recently changed to do the below uni->native
1877 * mapping, so this would be redundant (and wrong,
1878 * the code point would be doubly converted).
1879 * But leave this in just in case the pack U change
1880 * gets revoked, but the semantics is still
1881 * desireable for charnames. --jhi */
1883 UV uv = utf8_to_uvchr((const U8*)str, 0);
1886 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
1888 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1889 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1890 str = SvPV_const(res, len);
1894 if (!has_utf8 && SvUTF8(res)) {
1895 const char * const ostart = SvPVX_const(sv);
1896 SvCUR_set(sv, d - ostart);
1899 sv_utf8_upgrade(sv);
1900 /* this just broke our allocation above... */
1901 SvGROW(sv, (STRLEN)(send - start));
1902 d = SvPVX(sv) + SvCUR(sv);
1905 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
1906 const char * const odest = SvPVX_const(sv);
1908 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
1909 d = SvPVX(sv) + (d - odest);
1913 native_range = FALSE; /* \N{} is guessed to be Unicode */
1915 Copy(str, d, len, char);
1922 yyerror("Missing braces on \\N{}");
1925 /* \c is a control character */
1934 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
1937 yyerror("Missing control char name in \\c");
1941 /* printf-style backslashes, formfeeds, newlines, etc */
1943 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
1946 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
1949 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
1952 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
1955 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
1958 *d++ = ASCII_TO_NEED(has_utf8,'\033');
1961 *d++ = ASCII_TO_NEED(has_utf8,'\007');
1967 } /* end if (backslash) */
1974 /* If we started with encoded form, or already know we want it
1975 and then encode the next character */
1976 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1978 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1979 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
1982 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1983 const STRLEN off = d - SvPVX_const(sv);
1984 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1986 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
1989 if (uv > 255 && !dorange)
1990 native_range = FALSE;
1994 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1996 } /* while loop to process each character */
1998 /* terminate the string and set up the sv */
2000 SvCUR_set(sv, d - SvPVX_const(sv));
2001 if (SvCUR(sv) >= SvLEN(sv))
2002 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2005 if (PL_encoding && !has_utf8) {
2006 sv_recode_to_utf8(sv, PL_encoding);
2012 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2013 PL_sublex_info.sub_op->op_private |=
2014 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2018 /* shrink the sv if we allocated more than we used */
2019 if (SvCUR(sv) + 5 < SvLEN(sv)) {
2020 SvPV_shrink_to_cur(sv);
2023 /* return the substring (via yylval) only if we parsed anything */
2024 if (s > PL_bufptr) {
2025 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
2026 sv = new_constant(start, s - start,
2027 (const char *)(PL_lex_inpat ? "qr" : "q"),
2030 (( PL_lex_inwhat == OP_TRANS
2032 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2035 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2042 * Returns TRUE if there's more to the expression (e.g., a subscript),
2045 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2047 * ->[ and ->{ return TRUE
2048 * { and [ outside a pattern are always subscripts, so return TRUE
2049 * if we're outside a pattern and it's not { or [, then return FALSE
2050 * if we're in a pattern and the first char is a {
2051 * {4,5} (any digits around the comma) returns FALSE
2052 * if we're in a pattern and the first char is a [
2054 * [SOMETHING] has a funky algorithm to decide whether it's a
2055 * character class or not. It has to deal with things like
2056 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2057 * anything else returns TRUE
2060 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
2063 S_intuit_more(pTHX_ register char *s)
2065 if (PL_lex_brackets)
2067 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2069 if (*s != '{' && *s != '[')
2074 /* In a pattern, so maybe we have {n,m}. */
2091 /* On the other hand, maybe we have a character class */
2094 if (*s == ']' || *s == '^')
2097 /* this is terrifying, and it works */
2098 int weight = 2; /* let's weigh the evidence */
2100 unsigned char un_char = 255, last_un_char;
2101 const char * const send = strchr(s,']');
2102 char tmpbuf[sizeof PL_tokenbuf * 4];
2104 if (!send) /* has to be an expression */
2107 Zero(seen,256,char);
2110 else if (isDIGIT(*s)) {
2112 if (isDIGIT(s[1]) && s[2] == ']')
2118 for (; s < send; s++) {
2119 last_un_char = un_char;
2120 un_char = (unsigned char)*s;
2125 weight -= seen[un_char] * 10;
2126 if (isALNUM_lazy_if(s+1,UTF)) {
2128 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
2129 len = (int)strlen(tmpbuf);
2130 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
2135 else if (*s == '$' && s[1] &&
2136 strchr("[#!%*<>()-=",s[1])) {
2137 if (/*{*/ strchr("])} =",s[2]))
2146 if (strchr("wds]",s[1]))
2148 else if (seen[(U8)'\''] || seen[(U8)'"'])
2150 else if (strchr("rnftbxcav",s[1]))
2152 else if (isDIGIT(s[1])) {
2154 while (s[1] && isDIGIT(s[1]))
2164 if (strchr("aA01! ",last_un_char))
2166 if (strchr("zZ79~",s[1]))
2168 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2169 weight -= 5; /* cope with negative subscript */
2172 if (!isALNUM(last_un_char)
2173 && !(last_un_char == '$' || last_un_char == '@'
2174 || last_un_char == '&')
2175 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
2180 if (keyword(tmpbuf, d - tmpbuf))
2183 if (un_char == last_un_char + 1)
2185 weight -= seen[un_char];
2190 if (weight >= 0) /* probably a character class */
2200 * Does all the checking to disambiguate
2202 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2203 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2205 * First argument is the stuff after the first token, e.g. "bar".
2207 * Not a method if bar is a filehandle.
2208 * Not a method if foo is a subroutine prototyped to take a filehandle.
2209 * Not a method if it's really "Foo $bar"
2210 * Method if it's "foo $bar"
2211 * Not a method if it's really "print foo $bar"
2212 * Method if it's really "foo package::" (interpreted as package->foo)
2213 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
2214 * Not a method if bar is a filehandle or package, but is quoted with
2219 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
2221 char *s = start + (*start == '$');
2222 char tmpbuf[sizeof PL_tokenbuf];
2227 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
2231 const char *proto = SvPVX_const(cv);
2242 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
2243 /* start is the beginning of the possible filehandle/object,
2244 * and s is the end of it
2245 * tmpbuf is a copy of it
2248 if (*start == '$') {
2249 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
2254 return *s == '(' ? FUNCMETH : METHOD;
2256 if (!keyword(tmpbuf, len)) {
2257 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2262 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
2263 if (indirgv && GvCVu(indirgv))
2265 /* filehandle or package name makes it a method */
2266 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
2268 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
2269 return 0; /* no assumptions -- "=>" quotes bearword */
2271 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
2272 newSVpvn(tmpbuf,len));
2273 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
2277 return *s == '(' ? FUNCMETH : METHOD;
2285 * Return a string of Perl code to load the debugger. If PERL5DB
2286 * is set, it will return the contents of that, otherwise a
2287 * compile-time require of perl5db.pl.
2294 const char * const pdb = PerlEnv_getenv("PERL5DB");
2298 SETERRNO(0,SS_NORMAL);
2299 return "BEGIN { require 'perl5db.pl' }";
2305 /* Encoded script support. filter_add() effectively inserts a
2306 * 'pre-processing' function into the current source input stream.
2307 * Note that the filter function only applies to the current source file
2308 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2310 * The datasv parameter (which may be NULL) can be used to pass
2311 * private data to this instance of the filter. The filter function
2312 * can recover the SV using the FILTER_DATA macro and use it to
2313 * store private buffers and state information.
2315 * The supplied datasv parameter is upgraded to a PVIO type
2316 * and the IoDIRP/IoANY field is used to store the function pointer,
2317 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
2318 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2319 * private use must be set using malloc'd pointers.
2323 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
2328 if (!PL_rsfp_filters)
2329 PL_rsfp_filters = newAV();
2332 (void)SvUPGRADE(datasv, SVt_PVIO);
2333 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
2334 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
2335 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
2336 FPTR2DPTR(void *, IoANY(datasv)),
2337 SvPV_nolen(datasv)));
2338 av_unshift(PL_rsfp_filters, 1);
2339 av_store(PL_rsfp_filters, 0, datasv) ;
2344 /* Delete most recently added instance of this filter function. */
2346 Perl_filter_del(pTHX_ filter_t funcp)
2351 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2352 FPTR2DPTR(void*, funcp)));
2354 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
2356 /* if filter is on top of stack (usual case) just pop it off */
2357 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
2358 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
2359 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
2360 IoANY(datasv) = (void *)NULL;
2361 sv_free(av_pop(PL_rsfp_filters));
2365 /* we need to search for the correct entry and clear it */
2366 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
2370 /* Invoke the idxth filter function for the current rsfp. */
2371 /* maxlen 0 = read one text line */
2373 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
2377 /* This API is bad. It should have been using unsigned int for maxlen.
2378 Not sure if we want to change the API, but if not we should sanity
2379 check the value here. */
2380 const unsigned int correct_length
2389 if (!PL_rsfp_filters)
2391 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
2392 /* Provide a default input filter to make life easy. */
2393 /* Note that we append to the line. This is handy. */
2394 DEBUG_P(PerlIO_printf(Perl_debug_log,
2395 "filter_read %d: from rsfp\n", idx));
2396 if (correct_length) {
2399 const int old_len = SvCUR(buf_sv);
2401 /* ensure buf_sv is large enough */
2402 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2403 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2404 correct_length)) <= 0) {
2405 if (PerlIO_error(PL_rsfp))
2406 return -1; /* error */
2408 return 0 ; /* end of file */
2410 SvCUR_set(buf_sv, old_len + len) ;
2413 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2414 if (PerlIO_error(PL_rsfp))
2415 return -1; /* error */
2417 return 0 ; /* end of file */
2420 return SvCUR(buf_sv);
2422 /* Skip this filter slot if filter has been deleted */
2423 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
2424 DEBUG_P(PerlIO_printf(Perl_debug_log,
2425 "filter_read %d: skipped (filter deleted)\n",
2427 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
2429 /* Get function pointer hidden within datasv */
2430 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
2431 DEBUG_P(PerlIO_printf(Perl_debug_log,
2432 "filter_read %d: via function %p (%s)\n",
2433 idx, (void*)datasv, SvPV_nolen_const(datasv)));
2434 /* Call function. The function is expected to */
2435 /* call "FILTER_READ(idx+1, buf_sv)" first. */
2436 /* Return: <0:error, =0:eof, >0:not eof */
2437 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
2441 S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
2443 #ifdef PERL_CR_FILTER
2444 if (!PL_rsfp_filters) {
2445 filter_add(S_cr_textfilter,NULL);
2448 if (PL_rsfp_filters) {
2450 SvCUR_set(sv, 0); /* start with empty line */
2451 if (FILTER_READ(0, sv, 0) > 0)
2452 return ( SvPVX(sv) ) ;
2457 return (sv_gets(sv, fp, append));
2461 S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
2465 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
2469 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
2470 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
2472 return GvHV(gv); /* Foo:: */
2475 /* use constant CLASS => 'MyClass' */
2476 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2477 if (gv && GvCV(gv)) {
2478 SV * const sv = cv_const_sv(GvCV(gv));
2480 pkgname = SvPV_nolen_const(sv);
2483 return gv_stashpv(pkgname, FALSE);
2487 S_tokenize_use(pTHX_ int is_use, char *s) {
2488 if (PL_expect != XSTATE)
2489 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2490 is_use ? "use" : "no"));
2492 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2493 s = force_version(s, TRUE);
2494 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
2495 NEXTVAL_NEXTTOKE.opval = NULL;
2498 else if (*s == 'v') {
2499 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2500 s = force_version(s, FALSE);
2504 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2505 s = force_version(s, FALSE);
2507 yylval.ival = is_use;
2511 static const char* const exp_name[] =
2512 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2513 "ATTRTERM", "TERMBLOCK"
2520 Works out what to call the token just pulled out of the input
2521 stream. The yacc parser takes care of taking the ops we return and
2522 stitching them into a tree.
2528 if read an identifier
2529 if we're in a my declaration
2530 croak if they tried to say my($foo::bar)
2531 build the ops for a my() declaration
2532 if it's an access to a my() variable
2533 are we in a sort block?
2534 croak if my($a); $a <=> $b
2535 build ops for access to a my() variable
2536 if in a dq string, and they've said @foo and we can't find @foo
2538 build ops for a bareword
2539 if we already built the token before, use it.
2542 #ifdef USE_PURE_BISON
2544 Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
2549 yylval_pointer[yyactlevel] = lvalp;
2550 yychar_pointer[yyactlevel] = lcharp;
2551 if (yyactlevel >= YYMAXLEVEL)
2552 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
2554 r = Perl_yylex(aTHX);
2564 #pragma segment Perl_yylex
2569 register char *s = PL_bufptr;
2574 /* orig_keyword, gvp, and gv are initialized here because
2575 * jump to the label just_a_word_zero can bypass their
2576 * initialization later. */
2577 I32 orig_keyword = 0;
2582 SV* tmp = newSVpvs("");
2583 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2584 (IV)CopLINE(PL_curcop),
2585 lex_state_names[PL_lex_state],
2586 exp_name[PL_expect],
2587 pv_display(tmp, s, strlen(s), 0, 60));
2590 /* check if there's an identifier for us to look at */
2591 if (PL_pending_ident)
2592 return REPORT(S_pending_ident(aTHX));
2594 /* no identifier pending identification */
2596 switch (PL_lex_state) {
2598 case LEX_NORMAL: /* Some compilers will produce faster */
2599 case LEX_INTERPNORMAL: /* code if we comment these out. */
2603 /* when we've already built the next token, just pull it out of the queue */
2606 yylval = NEXTVAL_NEXTTOKE;
2608 PL_lex_state = PL_lex_defer;
2609 PL_expect = PL_lex_expect;
2610 PL_lex_defer = LEX_NORMAL;
2612 return REPORT(PL_nexttype[PL_nexttoke]);
2614 /* interpolated case modifiers like \L \U, including \Q and \E.
2615 when we get here, PL_bufptr is at the \
2617 case LEX_INTERPCASEMOD:
2619 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
2620 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
2622 /* handle \E or end of string */
2623 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
2625 if (PL_lex_casemods) {
2626 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
2627 PL_lex_casestack[PL_lex_casemods] = '\0';
2629 if (PL_bufptr != PL_bufend
2630 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
2632 PL_lex_state = LEX_INTERPCONCAT;
2636 if (PL_bufptr != PL_bufend)
2638 PL_lex_state = LEX_INTERPCONCAT;
2642 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2643 "### Saw case modifier\n"); });
2645 if (s[1] == '\\' && s[2] == 'E') {
2647 PL_lex_state = LEX_INTERPCONCAT;
2652 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2653 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2654 if ((*s == 'L' || *s == 'U') &&
2655 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2656 PL_lex_casestack[--PL_lex_casemods] = '\0';
2659 if (PL_lex_casemods > 10)
2660 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2661 PL_lex_casestack[PL_lex_casemods++] = *s;
2662 PL_lex_casestack[PL_lex_casemods] = '\0';
2663 PL_lex_state = LEX_INTERPCONCAT;
2664 NEXTVAL_NEXTTOKE.ival = 0;
2667 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
2669 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
2671 NEXTVAL_NEXTTOKE.ival = OP_LC;
2673 NEXTVAL_NEXTTOKE.ival = OP_UC;
2675 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
2677 Perl_croak(aTHX_ "panic: yylex");
2681 if (PL_lex_starts) {
2690 case LEX_INTERPPUSH:
2691 return REPORT(sublex_push());
2693 case LEX_INTERPSTART:
2694 if (PL_bufptr == PL_bufend)
2695 return REPORT(sublex_done());
2696 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2697 "### Interpolated variable\n"); });
2699 PL_lex_dojoin = (*PL_bufptr == '@');
2700 PL_lex_state = LEX_INTERPNORMAL;
2701 if (PL_lex_dojoin) {
2702 NEXTVAL_NEXTTOKE.ival = 0;
2704 #ifdef USE_5005THREADS
2705 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2706 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
2707 force_next(PRIVATEREF);
2709 force_ident("\"", '$');
2710 #endif /* USE_5005THREADS */
2711 NEXTVAL_NEXTTOKE.ival = 0;
2713 NEXTVAL_NEXTTOKE.ival = 0;
2715 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
2718 if (PL_lex_starts++) {
2724 case LEX_INTERPENDMAYBE:
2725 if (intuit_more(PL_bufptr)) {
2726 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
2732 if (PL_lex_dojoin) {
2733 PL_lex_dojoin = FALSE;
2734 PL_lex_state = LEX_INTERPCONCAT;
2737 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
2738 && SvEVALED(PL_lex_repl))
2740 if (PL_bufptr != PL_bufend)
2741 Perl_croak(aTHX_ "Bad evalled substitution pattern");
2745 case LEX_INTERPCONCAT:
2747 if (PL_lex_brackets)
2748 Perl_croak(aTHX_ "panic: INTERPCONCAT");
2750 if (PL_bufptr == PL_bufend)
2751 return REPORT(sublex_done());
2753 if (SvIVX(PL_linestr) == '\'') {
2754 SV *sv = newSVsv(PL_linestr);
2757 else if ( PL_hints & HINT_NEW_RE )
2758 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
2759 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2763 s = scan_const(PL_bufptr);
2765 PL_lex_state = LEX_INTERPCASEMOD;
2767 PL_lex_state = LEX_INTERPSTART;
2770 if (s != PL_bufptr) {
2771 NEXTVAL_NEXTTOKE = yylval;
2774 if (PL_lex_starts++)
2784 PL_lex_state = LEX_NORMAL;
2785 s = scan_formline(PL_bufptr);
2786 if (!PL_lex_formbrack)
2792 PL_oldoldbufptr = PL_oldbufptr;
2798 if (isIDFIRST_lazy_if(s,UTF))
2800 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
2803 goto fake_eof; /* emulate EOF on ^D or ^Z */
2808 if (PL_lex_brackets) {
2809 yyerror((PL_lex_formbrack
2810 ? "Format not terminated"
2811 : "Missing right curly or square bracket"));
2813 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2814 "### Tokener got EOF\n");
2818 if (s++ < PL_bufend)
2819 goto retry; /* ignore stray nulls */
2822 if (!PL_in_eval && !PL_preambled) {
2823 PL_preambled = TRUE;
2824 sv_setpv(PL_linestr,incl_perldb());
2825 if (SvCUR(PL_linestr))
2826 sv_catpvs(PL_linestr,";");
2828 while(AvFILLp(PL_preambleav) >= 0) {
2829 SV *tmpsv = av_shift(PL_preambleav);
2830 sv_catsv(PL_linestr, tmpsv);
2831 sv_catpvs(PL_linestr, ";");
2834 sv_free((SV*)PL_preambleav);
2835 PL_preambleav = NULL;
2837 if (PL_minus_n || PL_minus_p) {
2838 sv_catpvs(PL_linestr, "LINE: while (<>) {");
2840 sv_catpvs(PL_linestr,"chomp;");
2843 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2844 || *PL_splitstr == '"')
2845 && strchr(PL_splitstr + 1, *PL_splitstr))
2846 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
2848 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2849 bytes can be used as quoting characters. :-) */
2850 const char *splits = PL_splitstr;
2851 sv_catpvs(PL_linestr, "our @F=split(q\0");
2854 if (*splits == '\\')
2855 sv_catpvn(PL_linestr, splits, 1);
2856 sv_catpvn(PL_linestr, splits, 1);
2857 } while (*splits++);
2858 /* This loop will embed the trailing NUL of
2859 PL_linestr as the last thing it does before
2861 sv_catpvs(PL_linestr, ");");
2865 sv_catpvs(PL_linestr,"our @F=split(' ');");
2868 sv_catpvs(PL_linestr, "\n");
2869 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2870 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2871 PL_last_lop = PL_last_uni = NULL;
2872 if (PERLDB_LINE && PL_curstash != PL_debstash)
2873 update_debugger_info(PL_linestr, NULL, 0);
2877 bof = PL_rsfp ? TRUE : FALSE;
2878 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
2881 if (PL_preprocess && !PL_in_eval)
2882 (void)PerlProc_pclose(PL_rsfp);
2883 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2884 PerlIO_clearerr(PL_rsfp);
2886 (void)PerlIO_close(PL_rsfp);
2888 PL_doextract = FALSE;
2890 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2891 sv_setpv(PL_linestr,
2894 ? ";}continue{print;}" : ";}"));
2895 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2896 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2897 PL_last_lop = PL_last_uni = NULL;
2898 PL_minus_n = PL_minus_p = 0;
2901 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2902 PL_last_lop = PL_last_uni = NULL;
2903 sv_setpvn(PL_linestr,"",0);
2904 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2906 /* If it looks like the start of a BOM or raw UTF-16,
2907 * check if it in fact is. */
2913 #ifdef PERLIO_IS_STDIO
2914 # ifdef __GNU_LIBRARY__
2915 # if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
2916 # define FTELL_FOR_PIPE_IS_BROKEN
2920 # if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2921 # define FTELL_FOR_PIPE_IS_BROKEN
2926 #ifdef FTELL_FOR_PIPE_IS_BROKEN
2927 /* This loses the possibility to detect the bof
2928 * situation on perl -P when the libc5 is being used.
2929 * Workaround? Maybe attach some extra state to PL_rsfp?
2932 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
2934 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
2937 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2938 s = swallow_bom((U8*)s);
2942 /* Incest with pod. */
2943 if (*s == '=' && strnEQ(s, "=cut", 4)) {
2944 sv_setpvn(PL_linestr, "", 0);
2945 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2946 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2947 PL_last_lop = PL_last_uni = NULL;
2948 PL_doextract = FALSE;
2952 } while (PL_doextract);
2953 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2954 if (PERLDB_LINE && PL_curstash != PL_debstash)
2955 update_debugger_info(PL_linestr, NULL, 0);
2956 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2957 PL_last_lop = PL_last_uni = NULL;
2958 if (CopLINE(PL_curcop) == 1) {
2959 while (s < PL_bufend && isSPACE(*s))
2961 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
2965 if (*s == '#' && *(s+1) == '!')
2967 #ifdef ALTERNATE_SHEBANG
2969 static char const as[] = ALTERNATE_SHEBANG;
2970 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2971 d = s + (sizeof(as) - 1);
2973 #endif /* ALTERNATE_SHEBANG */
2982 while (*d && !isSPACE(*d))
2986 #ifdef ARG_ZERO_IS_SCRIPT
2987 if (ipathend > ipath) {
2989 * HP-UX (at least) sets argv[0] to the script name,
2990 * which makes $^X incorrect. And Digital UNIX and Linux,
2991 * at least, set argv[0] to the basename of the Perl
2992 * interpreter. So, having found "#!", we'll set it right.
2994 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
2996 assert(SvPOK(x) || SvGMAGICAL(x));
2997 if (sv_eq(x, CopFILESV(PL_curcop))) {
2998 sv_setpvn(x, ipath, ipathend - ipath);
3004 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
3005 const char * const lstart = SvPV_const(x,llen);
3007 bstart += blen - llen;
3008 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
3009 sv_setpvn(x, ipath, ipathend - ipath);
3014 TAINT_NOT; /* $^X is always tainted, but that's OK */
3016 #endif /* ARG_ZERO_IS_SCRIPT */
3021 d = instr(s,"perl -");
3023 d = instr(s,"perl");
3025 /* avoid getting into infinite loops when shebang
3026 * line contains "Perl" rather than "perl" */
3028 for (d = ipathend-4; d >= ipath; --d) {
3029 if ((*d == 'p' || *d == 'P')
3030 && !ibcmp(d, "perl", 4))
3040 #ifdef ALTERNATE_SHEBANG
3042 * If the ALTERNATE_SHEBANG on this system starts with a
3043 * character that can be part of a Perl expression, then if
3044 * we see it but not "perl", we're probably looking at the
3045 * start of Perl code, not a request to hand off to some
3046 * other interpreter. Similarly, if "perl" is there, but
3047 * not in the first 'word' of the line, we assume the line
3048 * contains the start of the Perl program.
3050 if (d && *s != '#') {
3051 const char *c = ipath;
3052 while (*c && !strchr("; \t\r\n\f\v#", *c))
3055 d = NULL; /* "perl" not in first word; ignore */
3057 *s = '#'; /* Don't try to parse shebang line */
3059 #endif /* ALTERNATE_SHEBANG */
3060 #ifndef MACOS_TRADITIONAL
3065 !instr(s,"indir") &&
3066 instr(PL_origargv[0],"perl"))
3072 while (s < PL_bufend && isSPACE(*s))
3074 if (s < PL_bufend) {
3075 Newxz(newargv,PL_origargc+3,char*);
3077 while (s < PL_bufend && !isSPACE(*s))
3080 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
3083 newargv = PL_origargv;
3086 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
3088 Perl_croak(aTHX_ "Can't exec %s", ipath);
3092 while (*d && !isSPACE(*d))
3094 while (SPACE_OR_TAB(*d))
3098 const bool switches_done = PL_doswitches;
3099 const U32 oldpdb = PL_perldb;
3100 const bool oldn = PL_minus_n;
3101 const bool oldp = PL_minus_p;
3104 if (*d == 'M' || *d == 'm') {
3105 const char * const m = d;
3106 while (*d && !isSPACE(*d))
3108 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
3111 d = moreswitches(d);
3113 if (PL_doswitches && !switches_done) {
3114 int argc = PL_origargc;
3115 char **argv = PL_origargv;
3118 } while (argc && argv[0][0] == '-' && argv[0][1]);
3119 init_argv_symbols(argc,argv);
3121 if ((PERLDB_LINE && !oldpdb) ||
3122 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
3123 /* if we have already added "LINE: while (<>) {",
3124 we must not do it again */
3126 sv_setpvn(PL_linestr, "", 0);
3127 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3128 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3129 PL_last_lop = PL_last_uni = NULL;
3130 PL_preambled = FALSE;
3132 (void)gv_fetchfile(PL_origfilename);
3139 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3141 PL_lex_state = LEX_FORMLINE;
3146 #ifdef PERL_STRICT_CR
3147 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
3149 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
3151 case ' ': case '\t': case '\f': case 013:
3152 #ifdef MACOS_TRADITIONAL
3159 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
3160 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3161 /* handle eval qq[#line 1 "foo"\n ...] */
3162 CopLINE_dec(PL_curcop);
3166 while (d < PL_bufend && *d != '\n')
3170 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
3171 Perl_croak(aTHX_ "panic: input overflow");
3174 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3176 PL_lex_state = LEX_FORMLINE;
3186 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
3194 while (s < PL_bufend && SPACE_OR_TAB(*s))
3197 if (strnEQ(s,"=>",2)) {
3198 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
3199 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
3200 OPERATOR('-'); /* unary minus */
3202 PL_last_uni = PL_oldbufptr;
3204 case 'r': ftst = OP_FTEREAD; break;
3205 case 'w': ftst = OP_FTEWRITE; break;
3206 case 'x': ftst = OP_FTEEXEC; break;
3207 case 'o': ftst = OP_FTEOWNED; break;
3208 case 'R': ftst = OP_FTRREAD; break;
3209 case 'W': ftst = OP_FTRWRITE; break;
3210 case 'X': ftst = OP_FTREXEC; break;
3211 case 'O': ftst = OP_FTROWNED; break;
3212 case 'e': ftst = OP_FTIS; break;
3213 case 'z': ftst = OP_FTZERO; break;
3214 case 's': ftst = OP_FTSIZE; break;
3215 case 'f': ftst = OP_FTFILE; break;
3216 case 'd': ftst = OP_FTDIR; break;
3217 case 'l': ftst = OP_FTLINK; break;
3218 case 'p': ftst = OP_FTPIPE; break;
3219 case 'S': ftst = OP_FTSOCK; break;
3220 case 'u': ftst = OP_FTSUID; break;
3221 case 'g': ftst = OP_FTSGID; break;
3222 case 'k': ftst = OP_FTSVTX; break;
3223 case 'b': ftst = OP_FTBLK; break;
3224 case 'c': ftst = OP_FTCHR; break;
3225 case 't': ftst = OP_FTTTY; break;
3226 case 'T': ftst = OP_FTTEXT; break;
3227 case 'B': ftst = OP_FTBINARY; break;
3228 case 'M': case 'A': case 'C':
3229 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
3231 case 'M': ftst = OP_FTMTIME; break;
3232 case 'A': ftst = OP_FTATIME; break;
3233 case 'C': ftst = OP_FTCTIME; break;
3241 PL_last_lop_op = (OPCODE)ftst;
3242 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3243 "### Saw file test %c\n", (int)tmp);
3248 /* Assume it was a minus followed by a one-letter named
3249 * subroutine call (or a -bareword), then. */
3250 DEBUG_T( { PerlIO_printf(Perl_debug_log,
3251 "### '-%c' looked like a file test but was not\n",
3258 const char tmp = *s++;
3261 if (PL_expect == XOPERATOR)
3266 else if (*s == '>') {
3269 if (isIDFIRST_lazy_if(s,UTF)) {
3270 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3278 if (PL_expect == XOPERATOR)
3281 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3283 OPERATOR('-'); /* unary minus */
3289 const char tmp = *s++;
3292 if (PL_expect == XOPERATOR)
3297 if (PL_expect == XOPERATOR)
3300 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3307 if (PL_expect != XOPERATOR) {
3308 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3309 PL_expect = XOPERATOR;
3310 force_ident(PL_tokenbuf, '*');
3323 if (PL_expect == XOPERATOR) {
3327 PL_tokenbuf[0] = '%';
3328 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3329 if (!PL_tokenbuf[1]) {
3332 PL_pending_ident = '%';
3344 const char tmp = *s++;
3350 goto just_a_word_zero_gv;
3353 switch (PL_expect) {
3356 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3358 PL_bufptr = s; /* update in case we back off */
3364 PL_expect = XTERMBLOCK;
3368 while (isIDFIRST_lazy_if(s,UTF)) {
3371 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
3372 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3373 if (tmp < 0) tmp = -tmp;
3387 sv = newSVpvn(s, len);
3389 d = scan_str(d,TRUE,TRUE);
3391 /* MUST advance bufptr here to avoid bogus
3392 "at end of line" context messages from yyerror().
3394 PL_bufptr = s + len;
3395 yyerror("Unterminated attribute parameter in attribute list");
3399 return REPORT(0); /* EOF indicator */
3403 sv_catsv(sv, PL_lex_stuff);
3404 attrs = append_elem(OP_LIST, attrs,
3405 newSVOP(OP_CONST, 0, sv));
3406 SvREFCNT_dec(PL_lex_stuff);
3407 PL_lex_stuff = NULL;
3410 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
3412 if (PL_in_my == KEY_our)
3414 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3416 /*EMPTY*/; /* skip to avoid loading attributes.pm */
3419 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3422 /* NOTE: any CV attrs applied here need to be part of
3423 the CVf_BUILTIN_ATTRS define in cv.h! */
3424 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
3426 CvLVALUE_on(PL_compcv);
3428 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
3430 CvLOCKED_on(PL_compcv);
3432 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
3434 CvMETHOD_on(PL_compcv);
3436 /* After we've set the flags, it could be argued that
3437 we don't need to do the attributes.pm-based setting
3438 process, and shouldn't bother appending recognized
3439 flags. To experiment with that, uncomment the
3440 following "else". (Note that's already been
3441 uncommented. That keeps the above-applied built-in
3442 attributes from being intercepted (and possibly
3443 rejected) by a package's attribute routines, but is
3444 justified by the performance win for the common case
3445 of applying only built-in attributes.) */
3447 attrs = append_elem(OP_LIST, attrs,
3448 newSVOP(OP_CONST, 0,
3452 if (*s == ':' && s[1] != ':')
3455 break; /* require real whitespace or :'s */
3456 /* XXX losing whitespace on sequential attributes here */
3460 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3461 if (*s != ';' && *s != '}' && *s != tmp
3462 && (tmp != '=' || *s != ')')) {
3463 const char q = ((*s == '\'') ? '"' : '\'');
3464 /* If here for an expression, and parsed no attrs, back
3466 if (tmp == '=' && !attrs) {
3470 /* MUST advance bufptr here to avoid bogus "at end of line"
3471 context messages from yyerror().
3475 ? Perl_form(aTHX_ "Invalid separator character "
3476 "%c%c%c in attribute list", q, *s, q)
3477 : "Unterminated attribute list" ) );
3485 NEXTVAL_NEXTTOKE.opval = attrs;
3493 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3494 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
3502 const char tmp = *s++;
3507 const char tmp = *s++;
3515 if (PL_lex_brackets <= 0)
3516 yyerror("Unmatched right square bracket");
3519 if (PL_lex_state == LEX_INTERPNORMAL) {
3520 if (PL_lex_brackets == 0) {
3521 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3522 PL_lex_state = LEX_INTERPEND;
3529 if (PL_lex_brackets > 100) {
3530 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
3532 switch (PL_expect) {
3534 if (PL_lex_formbrack) {
3538 if (PL_oldoldbufptr == PL_last_lop)
3539 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3541 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3542 OPERATOR(HASHBRACK);
3544 while (s < PL_bufend && SPACE_OR_TAB(*s))
3547 PL_tokenbuf[0] = '\0';
3548 if (d < PL_bufend && *d == '-') {
3549 PL_tokenbuf[0] = '-';
3551 while (d < PL_bufend && SPACE_OR_TAB(*d))
3554 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3555 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
3557 while (d < PL_bufend && SPACE_OR_TAB(*d))
3560 const char minus = (PL_tokenbuf[0] == '-');
3561 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3569 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3574 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3579 if (PL_oldoldbufptr == PL_last_lop)
3580 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
3582 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3585 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3587 /* This hack is to get the ${} in the message. */
3589 yyerror("syntax error");
3592 OPERATOR(HASHBRACK);
3594 /* This hack serves to disambiguate a pair of curlies
3595 * as being a block or an anon hash. Normally, expectation
3596 * determines that, but in cases where we're not in a
3597 * position to expect anything in particular (like inside
3598 * eval"") we have to resolve the ambiguity. This code
3599 * covers the case where the first term in the curlies is a
3600 * quoted string. Most other cases need to be explicitly
3601 * disambiguated by prepending a "+" before the opening
3602 * curly in order to force resolution as an anon hash.
3604 * XXX should probably propagate the outer expectation
3605 * into eval"" to rely less on this hack, but that could
3606 * potentially break current behavior of eval"".
3610 if (*s == '\'' || *s == '"' || *s == '`') {
3611 /* common case: get past first string, handling escapes */
3612 for (t++; t < PL_bufend && *t != *s;)
3613 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3617 else if (*s == 'q') {
3620 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
3623 /* skip q//-like construct */
3625 char open, close, term;
3628 while (t < PL_bufend && isSPACE(*t))
3630 /* check for q => */
3631 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3632 OPERATOR(HASHBRACK);
3636 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3640 for (t++; t < PL_bufend; t++) {
3641 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
3643 else if (*t == open)
3647 for (t++; t < PL_bufend; t++) {
3648 if (*t == '\\' && t+1 < PL_bufend)
3650 else if (*t == close && --brackets <= 0)
3652 else if (*t == open)
3659 /* skip plain q word */
3660 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3663 else if (isALNUM_lazy_if(t,UTF)) {
3665 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3668 while (t < PL_bufend && isSPACE(*t))
3670 /* if comma follows first term, call it an anon hash */
3671 /* XXX it could be a comma expression with loop modifiers */
3672 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
3673 || (*t == '=' && t[1] == '>')))
3674 OPERATOR(HASHBRACK);
3675 if (PL_expect == XREF)
3678 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3684 yylval.ival = CopLINE(PL_curcop);
3685 if (isSPACE(*s) || *s == '#')
3686 PL_copline = NOLINE; /* invalidate current command line number */
3691 if (PL_lex_brackets <= 0)
3692 yyerror("Unmatched right curly bracket");
3694 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3695 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3696 PL_lex_formbrack = 0;
3697 if (PL_lex_state == LEX_INTERPNORMAL) {
3698 if (PL_lex_brackets == 0) {
3699 if (PL_expect & XFAKEBRACK) {
3700 PL_expect &= XENUMMASK;
3701 PL_lex_state = LEX_INTERPEND;
3703 return yylex(); /* ignore fake brackets */
3705 if (*s == '-' && s[1] == '>')
3706 PL_lex_state = LEX_INTERPENDMAYBE;
3707 else if (*s != '[' && *s != '{')
3708 PL_lex_state = LEX_INTERPEND;
3711 if (PL_expect & XFAKEBRACK) {
3712 PL_expect &= XENUMMASK;
3714 return yylex(); /* ignore fake brackets */
3723 if (PL_expect == XOPERATOR) {
3724 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
3725 && isIDFIRST_lazy_if(s,UTF))
3727 CopLINE_dec(PL_curcop);
3728 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
3729 CopLINE_inc(PL_curcop);
3734 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3736 PL_expect = XOPERATOR;
3737 force_ident(PL_tokenbuf, '&');
3741 yylval.ival = (OPpENTERSUB_AMPER<<8);
3753 const char tmp = *s++;
3760 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
3761 && strchr("+-*/%.^&|<",tmp))
3762 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3763 "Reversed %c= operator",(int)tmp);
3765 if (PL_expect == XSTATE && isALPHA(tmp) &&
3766 (s == PL_linestart+1 || s[-2] == '\n') )
3768 if (PL_in_eval && !PL_rsfp) {
3773 if (strnEQ(s,"=cut",4)) {
3787 PL_doextract = TRUE;
3791 if (PL_lex_brackets < PL_lex_formbrack) {
3793 #ifdef PERL_STRICT_CR
3794 while (SPACE_OR_TAB(*t))
3796 while (SPACE_OR_TAB(*t) || *t == '\r')
3799 if (*t == '\n' || *t == '#') {
3810 const char tmp = *s++;
3819 if (PL_expect != XOPERATOR) {
3820 if (s[1] != '<' && !strchr(s,'>'))
3823 s = scan_heredoc(s);
3825 s = scan_inputsymbol(s);
3826 TERM(sublex_start());
3832 SHop(OP_LEFT_SHIFT);
3846 const char tmp = *s++;
3848 SHop(OP_RIGHT_SHIFT);
3849 else if (tmp == '=')
3858 if (PL_expect == XOPERATOR) {
3859 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3861 deprecate_old((char *)commaless_variable_list);
3862 return REPORT(','); /* grandfather non-comma-format format */
3866 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3867 PL_tokenbuf[0] = '@';
3868 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3869 sizeof PL_tokenbuf - 1, FALSE);
3870 if (PL_expect == XOPERATOR)
3871 no_op("Array length", s);
3872 if (!PL_tokenbuf[1])
3874 PL_expect = XOPERATOR;
3875 PL_pending_ident = '#';
3879 PL_tokenbuf[0] = '$';
3880 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3881 sizeof PL_tokenbuf - 1, FALSE);
3882 if (PL_expect == XOPERATOR)
3884 if (!PL_tokenbuf[1]) {
3886 yyerror("Final $ should be \\$ or $name");
3890 /* This kludge not intended to be bulletproof. */
3891 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
3892 yylval.opval = newSVOP(OP_CONST, 0,
3893 newSViv(CopARYBASE_get(&PL_compiling)));
3894 yylval.opval->op_private = OPpCONST_ARYBASE;
3900 const char tmp = *s;
3901 if (PL_lex_state == LEX_NORMAL)
3904 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
3905 && intuit_more(s)) {
3907 PL_tokenbuf[0] = '@';
3908 if (ckWARN(WARN_SYNTAX)) {
3911 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
3914 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
3915 while (t < PL_bufend && *t != ']')
3917 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3918 "Multidimensional syntax %.*s not supported",
3919 (int)((t - PL_bufptr) + 1), PL_bufptr);
3923 else if (*s == '{') {
3925 PL_tokenbuf[0] = '%';
3926 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
3927 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
3929 char tmpbuf[sizeof PL_tokenbuf];
3932 } while (isSPACE(*t));
3933 if (isIDFIRST_lazy_if(t,UTF)) {
3935 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
3939 if (*t == ';' && get_cv(tmpbuf, FALSE))
3940 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3941 "You need to quote \"%s\"",
3948 PL_expect = XOPERATOR;
3949 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3950 const bool islop = (PL_last_lop == PL_oldoldbufptr);
3951 if (!islop || PL_last_lop_op == OP_GREPSTART)
3952 PL_expect = XOPERATOR;
3953 else if (strchr("$@\"'`q", *s))
3954 PL_expect = XTERM; /* e.g. print $fh "foo" */
3955 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3956 PL_expect = XTERM; /* e.g. print $fh &sub */
3957 else if (isIDFIRST_lazy_if(s,UTF)) {
3958 char tmpbuf[sizeof PL_tokenbuf];
3960 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3961 if ((t2 = keyword(tmpbuf, len))) {
3962 /* binary operators exclude handle interpretations */
3974 PL_expect = XTERM; /* e.g. print $fh length() */
3979 PL_expect = XTERM; /* e.g. print $fh subr() */
3982 else if (isDIGIT(*s))
3983 PL_expect = XTERM; /* e.g. print $fh 3 */
3984 else if (*s == '.' && isDIGIT(s[1]))
3985 PL_expect = XTERM; /* e.g. print $fh .3 */
3986 else if ((*s == '?' || *s == '-' || *s == '+')
3987 && !isSPACE(s[1]) && s[1] != '=')
3988 PL_expect = XTERM; /* e.g. print $fh -1 */
3989 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
3991 PL_expect = XTERM; /* print $fh <<"EOF" */
3994 PL_pending_ident = '$';
3998 if (PL_expect == XOPERATOR)
4000 PL_tokenbuf[0] = '@';
4001 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4002 if (!PL_tokenbuf[1]) {
4005 if (PL_lex_state == LEX_NORMAL)
4007 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
4009 PL_tokenbuf[0] = '%';
4011 /* Warn about @ where they meant $. */
4012 if (*s == '[' || *s == '{') {
4013 if (ckWARN(WARN_SYNTAX)) {
4014 const char *t = s + 1;
4015 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
4017 if (*t == '}' || *t == ']') {
4019 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
4020 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4021 "Scalar value %.*s better written as $%.*s",
4022 (int)(t-PL_bufptr), PL_bufptr,
4023 (int)(t-PL_bufptr-1), PL_bufptr+1);
4028 PL_pending_ident = '@';
4031 case '/': /* may either be division or pattern */
4032 case '?': /* may either be conditional or pattern */
4033 if (PL_expect != XOPERATOR) {
4034 /* Disable warning on "study /blah/" */
4035 if (PL_oldoldbufptr == PL_last_uni
4036 && (*PL_last_uni != 's' || s - PL_last_uni < 5
4037 || memNE(PL_last_uni, "study", 5)
4038 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
4040 s = scan_pat(s,OP_MATCH);
4041 TERM(sublex_start());
4051 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
4052 #ifdef PERL_STRICT_CR
4055 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
4057 && (s == PL_linestart || s[-1] == '\n') )
4059 PL_lex_formbrack = 0;
4063 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
4069 yylval.ival = OPf_SPECIAL;
4075 if (PL_expect != XOPERATOR)
4080 case '0': case '1': case '2': case '3': case '4':
4081 case '5': case '6': case '7': case '8': case '9':
4082 s = scan_num(s, &yylval);
4083 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
4084 if (PL_expect == XOPERATOR)
4089 s = scan_str(s,FALSE,FALSE);
4090 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
4091 if (PL_expect == XOPERATOR) {
4092 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4094 deprecate_old((char *)commaless_variable_list);
4095 return REPORT(','); /* grandfather non-comma-format format */
4102 yylval.ival = OP_CONST;
4103 TERM(sublex_start());
4106 s = scan_str(s,FALSE,FALSE);
4107 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
4108 if (PL_expect == XOPERATOR) {
4109 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
4111 deprecate_old((char *)commaless_variable_list);
4112 return REPORT(','); /* grandfather non-comma-format format */
4119 yylval.ival = OP_CONST;
4120 /* FIXME. I think that this can be const if char *d is replaced by
4121 more localised variables. */
4122 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
4123 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4124 yylval.ival = OP_STRINGIFY;
4128 TERM(sublex_start());
4131 s = scan_str(s,FALSE,FALSE);
4132 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
4133 if (PL_expect == XOPERATOR)
4134 no_op("Backticks",s);
4137 yylval.ival = OP_BACKTICK;
4139 TERM(sublex_start());
4143 if (PL_lex_inwhat && isDIGIT(*s) && ckWARN(WARN_SYNTAX))
4144 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
4146 if (PL_expect == XOPERATOR)
4147 no_op("Backslash",s);
4151 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
4152 char *start = s + 2;
4153 while (isDIGIT(*start) || *start == '_')
4155 if (*start == '.' && isDIGIT(start[1])) {
4156 s = scan_num(s, &yylval);
4159 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
4160 else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF || PL_expect == XSTATE)) {
4161 /* XXX Use gv_fetchpvn rather than stomping on a const string */
4162 const char c = *start;
4165 gv = gv_fetchpv(s, 0, SVt_PVCV);
4168 s = scan_num(s, &yylval);
4175 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
4217 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4219 /* Some keywords can be followed by any delimiter, including ':' */
4220 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
4221 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
4222 (PL_tokenbuf[0] == 'q' &&
4223 strchr("qwxr", PL_tokenbuf[1])))));
4225 /* x::* is just a word, unless x is "CORE" */
4226 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4230 while (d < PL_bufend && isSPACE(*d))
4231 d++; /* no comments skipped here, or s### is misparsed */
4233 /* Is this a label? */
4234 if (!tmp && PL_expect == XSTATE
4235 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
4237 yylval.pval = savepv(PL_tokenbuf);
4242 /* Check for keywords */
4243 tmp = keyword(PL_tokenbuf, len);
4245 /* Is this a word before a => operator? */
4246 if (*d == '=' && d[1] == '>') {
4249 = (OP*)newSVOP(OP_CONST, 0,
4250 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
4251 yylval.opval->op_private = OPpCONST_BARE;
4255 if (tmp < 0) { /* second-class keyword? */
4256 GV *ogv = NULL; /* override (winner) */
4257 GV *hgv = NULL; /* hidden (loser) */
4258 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
4260 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
4263 if (GvIMPORTED_CV(gv))
4265 else if (! CvMETHOD(cv))
4269 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4270 (gv = *gvp) != (GV*)&PL_sv_undef &&
4271 GvCVu(gv) && GvIMPORTED_CV(gv))
4278 tmp = 0; /* overridden by import or by GLOBAL */
4281 && -tmp==KEY_lock /* XXX generalizable kludge */
4283 && !hv_fetchs(GvHVn(PL_incgv), "Thread.pm", FALSE))
4285 tmp = 0; /* any sub overrides "weak" keyword */
4287 else { /* no override */
4289 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
4290 Perl_warner(aTHX_ packWARN(WARN_MISC),
4291 "dump() better written as CORE::dump()");
4295 if (hgv && tmp != KEY_x && tmp != KEY_CORE
4296 && ckWARN(WARN_AMBIGUOUS)) /* never ambiguous */
4297 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4298 "Ambiguous call resolved as CORE::%s(), %s",
4299 GvENAME(hgv), "qualify as such or use &");
4306 default: /* not a keyword */
4307 /* Trade off - by using this evil construction we can pull the
4308 variable gv into the block labelled keylookup. If not, then
4309 we have to give it function scope so that the goto from the
4310 earlier ':' case doesn't bypass the initialisation. */
4312 just_a_word_zero_gv:
4320 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
4323 /* Get the rest if it looks like a package qualifier */
4325 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
4327 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
4330 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
4331 *s == '\'' ? "'" : "::");
4336 if (PL_expect == XOPERATOR) {
4337 if (PL_bufptr == PL_linestart) {
4338 CopLINE_dec(PL_curcop);
4339 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
4340 CopLINE_inc(PL_curcop);
4343 no_op("Bareword",s);
4346 /* Look for a subroutine with this name in current package,
4347 unless name is "Foo::", in which case Foo is a bearword
4348 (and a package name). */
4351 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
4353 if (ckWARN(WARN_BAREWORD)
4354 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
4355 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
4356 "Bareword \"%s\" refers to nonexistent package",
4359 PL_tokenbuf[len] = '\0';
4365 /* Mustn't actually add anything to a symbol table.
4366 But also don't want to "initialise" any placeholder
4367 constants that might already be there into full
4368 blown PVGVs with attached PVCV. */
4369 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
4370 GV_NOADD_NOINIT, SVt_PVCV);
4375 /* if we saw a global override before, get the right name */
4378 sv = newSVpvs("CORE::GLOBAL::");
4379 sv_catpv(sv,PL_tokenbuf);
4382 /* If len is 0, newSVpv does strlen(), which is correct.
4383 If len is non-zero, then it will be the true length,
4384 and so the scalar will be created correctly. */
4385 sv = newSVpv(PL_tokenbuf,len);
4388 /* Presume this is going to be a bareword of some sort. */
4391 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4392 yylval.opval->op_private = OPpCONST_BARE;
4393 /* UTF-8 package name? */
4394 if (UTF && !IN_BYTES &&
4395 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
4398 /* And if "Foo::", then that's what it certainly is. */
4403 /* Do the explicit type check so that we don't need to force
4404 the initialisation of the symbol table to have a real GV.
4405 Beware - gv may not really be a PVGV, cv may not really be
4406 a PVCV, (because of the space optimisations that gv_init
4407 understands) But they're true if for this symbol there is
4408 respectively a typeglob and a subroutine.
4410 cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
4411 /* Real typeglob, so get the real subroutine: */
4413 /* A proxy for a subroutine in this package? */
4414 : SvOK(gv) ? (CV *) gv : NULL)
4417 /* See if it's the indirect object for a list operator. */
4419 if (PL_oldoldbufptr &&
4420 PL_oldoldbufptr < PL_bufptr &&
4421 (PL_oldoldbufptr == PL_last_lop
4422 || PL_oldoldbufptr == PL_last_uni) &&
4423 /* NO SKIPSPACE BEFORE HERE! */
4424 (PL_expect == XREF ||
4425 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
4427 bool immediate_paren = *s == '(';
4429 /* (Now we can afford to cross potential line boundary.) */
4430 s = SKIPSPACE2(s,nextnextwhite);
4432 /* Two barewords in a row may indicate method call. */
4434 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
4435 (tmp = intuit_method(s, gv, cv)))
4438 /* If not a declared subroutine, it's an indirect object. */
4439 /* (But it's an indir obj regardless for sort.) */
4441 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
4443 (PL_last_lop_op != OP_MAPSTART &&
4444 PL_last_lop_op != OP_GREPSTART))))
4446 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
4451 PL_expect = XOPERATOR;
4454 /* Is this a word before a => operator? */
4455 if (*s == '=' && s[1] == '>' && !pkgname) {
4457 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
4458 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
4459 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
4463 /* If followed by a paren, it's certainly a subroutine. */
4468 while (SPACE_OR_TAB(*d))
4470 if (*d == ')' && (sv = gv_const_sv(gv))) {
4475 NEXTVAL_NEXTTOKE.opval = yylval.opval;
4476 PL_expect = XOPERATOR;
4482 /* If followed by var or block, call it a method (unless sub) */
4484 if ((*s == '$' || *s == '{') && (!gv || !cv)) {
4485 PL_last_lop = PL_oldbufptr;
4486 PL_last_lop_op = OP_METHOD;
4490 /* If followed by a bareword, see if it looks like indir obj. */
4493 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4494 && (tmp = intuit_method(s, gv, cv)))
4497 /* Not a method, so call it a subroutine (if defined) */
4500 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4501 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4502 "Ambiguous use of -%s resolved as -&%s()",
4503 PL_tokenbuf, PL_tokenbuf);
4504 /* Check for a constant sub */
4505 if ((sv = gv_const_sv(gv))) {
4507 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4508 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
4509 yylval.opval->op_private = 0;
4513 /* Resolve to GV now. */
4514 if (SvTYPE(gv) != SVt_PVGV) {
4515 gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
4516 assert (SvTYPE(gv) == SVt_PVGV);
4517 /* cv must have been some sort of placeholder, so
4518 now needs replacing with a real code reference. */
4522 op_free(yylval.opval);
4523 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
4524 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
4525 PL_last_lop = PL_oldbufptr;
4526 PL_last_lop_op = OP_ENTERSUB;
4527 /* Is there a prototype? */
4531 const char *proto = SvPV_const((SV*)cv, protolen);
4534 if (*proto == '$' && proto[1] == '\0')
4536 while (*proto == ';')
4538 if (*proto == '&' && *s == '{') {
4539 sv_setpv(PL_subname,
4542 "__ANON__" : "__ANON__::__ANON__"));
4546 NEXTVAL_NEXTTOKE.opval = yylval.opval;
4552 /* Call it a bare word */
4554 if (PL_hints & HINT_STRICT_SUBS)
4555 yylval.opval->op_private |= OPpCONST_STRICT;
4558 if (lastchar != '-') {
4559 if (ckWARN(WARN_RESERVED)) {
4563 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
4564 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
4571 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
4572 && ckWARN_d(WARN_AMBIGUOUS)) {
4573 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4574 "Operator or semicolon missing before %c%s",
4575 lastchar, PL_tokenbuf);
4576 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4577 "Ambiguous use of %c resolved as operator %c",
4578 lastchar, lastchar);
4584 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4585 newSVpv(CopFILE(PL_curcop),0));
4589 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4590 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
4593 case KEY___PACKAGE__:
4594 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
4596 ? newSVpv(HvNAME_get(PL_curstash), 0)
4603 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
4604 const char *pname = "main";
4605 if (PL_tokenbuf[2] == 'D')
4606 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
4607 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
4611 GvIOp(gv) = newIO();
4612 IoIFP(GvIOp(gv)) = PL_rsfp;
4613 #if defined(HAS_FCNTL) && defined(F_SETFD)
4615 const int fd = PerlIO_fileno(PL_rsfp);
4616 fcntl(fd,F_SETFD,fd >= 3);
4619 /* Mark this internal pseudo-handle as clean */
4620 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4622 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
4623 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
4624 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
4626 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
4627 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4628 /* if the script was opened in binmode, we need to revert
4629 * it to text mode for compatibility; but only iff it has CRs
4630 * XXX this is a questionable hack at best. */
4631 if (PL_bufend-PL_bufptr > 2
4632 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
4635 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
4636 loc = PerlIO_tell(PL_rsfp);
4637 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4640 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4642 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
4643 #endif /* NETWARE */
4644 #ifdef PERLIO_IS_STDIO /* really? */
4645 # if defined(__BORLANDC__)
4646 /* XXX see note in do_binmode() */
4647 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
4651 PerlIO_seek(PL_rsfp, loc, 0);
4655 #ifdef PERLIO_LAYERS
4658 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4659 else if (PL_encoding) {
4666 XPUSHs(PL_encoding);
4668 call_method("name", G_SCALAR);
4672 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4673 Perl_form(aTHX_ ":encoding(%"SVf")",
4691 if (PL_expect == XSTATE) {
4698 if (*s == ':' && s[1] == ':') {
4701 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
4702 if (!(tmp = keyword(PL_tokenbuf, len)))
4703 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
4706 else if (tmp == KEY_require || tmp == KEY_do)
4707 /* that's a way to remember we saw "CORE::" */
4720 LOP(OP_ACCEPT,XTERM);
4726 LOP(OP_ATAN2,XTERM);
4732 LOP(OP_BINMODE,XTERM);
4735 LOP(OP_BLESS,XTERM);
4745 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
4762 if (!PL_cryptseen) {
4763 PL_cryptseen = TRUE;
4767 LOP(OP_CRYPT,XTERM);
4770 LOP(OP_CHMOD,XTERM);
4773 LOP(OP_CHOWN,XTERM);
4776 LOP(OP_CONNECT,XTERM);
4792 s = force_word(s,WORD,TRUE,TRUE,FALSE);
4793 if (orig_keyword == KEY_do) {
4802 PL_hints |= HINT_BLOCK_SCOPE;
4812 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4813 LOP(OP_DBMOPEN,XTERM);
4819 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4826 yylval.ival = CopLINE(PL_curcop);
4840 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
4841 UNIBRACK(OP_ENTEREVAL);
4856 case KEY_endhostent:
4862 case KEY_endservent:
4865 case KEY_endprotoent:
4876 yylval.ival = CopLINE(PL_curcop);
4878 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
4880 if ((PL_bufend - p) >= 3 &&
4881 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4883 else if ((PL_bufend - p) >= 4 &&
4884 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4887 if (isIDFIRST_lazy_if(p,UTF)) {
4888 p = scan_ident(p, PL_bufend,
4889 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4893 Perl_croak(aTHX_ "Missing $ on loop variable");
4898 LOP(OP_FORMLINE,XTERM);
4904 LOP(OP_FCNTL,XTERM);
4910 LOP(OP_FLOCK,XTERM);
4919 LOP(OP_GREPSTART, XREF);
4922 s = force_word(s,WORD,TRUE,FALSE,FALSE);
4937 case KEY_getpriority:
4938 LOP(OP_GETPRIORITY,XTERM);
4940 case KEY_getprotobyname:
4943 case KEY_getprotobynumber:
4944 LOP(OP_GPBYNUMBER,XTERM);
4946 case KEY_getprotoent:
4958 case KEY_getpeername:
4959 UNI(OP_GETPEERNAME);
4961 case KEY_gethostbyname:
4964 case KEY_gethostbyaddr:
4965 LOP(OP_GHBYADDR,XTERM);
4967 case KEY_gethostent:
4970 case KEY_getnetbyname:
4973 case KEY_getnetbyaddr:
4974 LOP(OP_GNBYADDR,XTERM);
4979 case KEY_getservbyname:
4980 LOP(OP_GSBYNAME,XTERM);
4982 case KEY_getservbyport:
4983 LOP(OP_GSBYPORT,XTERM);
4985 case KEY_getservent:
4988 case KEY_getsockname:
4989 UNI(OP_GETSOCKNAME);
4991 case KEY_getsockopt:
4992 LOP(OP_GSOCKOPT,XTERM);
5014 yylval.ival = CopLINE(PL_curcop);
5018 LOP(OP_INDEX,XTERM);
5024 LOP(OP_IOCTL,XTERM);
5036 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5068 LOP(OP_LISTEN,XTERM);
5077 s = scan_pat(s,OP_MATCH);
5078 TERM(sublex_start());
5081 LOP(OP_MAPSTART, XREF);
5084 LOP(OP_MKDIR,XTERM);
5087 LOP(OP_MSGCTL,XTERM);
5090 LOP(OP_MSGGET,XTERM);
5093 LOP(OP_MSGRCV,XTERM);
5096 LOP(OP_MSGSND,XTERM);
5102 if (isIDFIRST_lazy_if(s,UTF)) {
5103 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
5104 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
5106 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
5107 if (!PL_in_my_stash) {
5110 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
5118 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5125 s = tokenize_use(0, s);
5129 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
5136 if (isIDFIRST_lazy_if(s,UTF)) {
5138 for (d = s; isALNUM_lazy_if(d,UTF);)
5140 for (t=d; isSPACE(*t);)
5142 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
5144 && !(t[0] == '=' && t[1] == '>')
5146 int parms_len = (int)(d-s);
5147 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
5148 "Precedence problem: open %.*s should be open(%.*s)",
5149 parms_len, s, parms_len, s);
5155 yylval.ival = OP_OR;
5165 LOP(OP_OPEN_DIR,XTERM);
5168 checkcomma(s,PL_tokenbuf,"filehandle");
5172 checkcomma(s,PL_tokenbuf,"filehandle");
5191 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5195 LOP(OP_PIPE_OP,XTERM);
5198 s = scan_str(s,FALSE,FALSE);
5201 yylval.ival = OP_CONST;
5202 TERM(sublex_start());
5208 s = scan_str(s,FALSE,FALSE);
5211 PL_expect = XOPERATOR;
5213 if (SvCUR(PL_lex_stuff)) {
5216 d = SvPV_force(PL_lex_stuff, len);
5218 for (; isSPACE(*d) && len; --len, ++d)
5223 if (!warned && ckWARN(WARN_QW)) {
5224 for (; !isSPACE(*d) && len; --len, ++d) {
5226 Perl_warner(aTHX_ packWARN(WARN_QW),
5227 "Possible attempt to separate words with commas");
5230 else if (*d == '#') {
5231 Perl_warner(aTHX_ packWARN(WARN_QW),
5232 "Possible attempt to put comments in qw() list");
5238 for (; !isSPACE(*d) && len; --len, ++d)
5241 sv = newSVpvn(b, d-b);
5242 if (DO_UTF8(PL_lex_stuff))
5244 words = append_elem(OP_LIST, words,
5245 newSVOP(OP_CONST, 0, tokeq(sv)));
5249 NEXTVAL_NEXTTOKE.opval = words;
5254 SvREFCNT_dec(PL_lex_stuff);
5255 PL_lex_stuff = NULL;
5261 s = scan_str(s,FALSE,FALSE);
5264 yylval.ival = OP_STRINGIFY;
5265 if (SvIVX(PL_lex_stuff) == '\'')
5266 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
5267 TERM(sublex_start());
5270 s = scan_pat(s,OP_QR);
5271 TERM(sublex_start());
5274 s = scan_str(s,FALSE,FALSE);
5277 yylval.ival = OP_BACKTICK;
5279 TERM(sublex_start());
5287 s = force_version(s, FALSE);
5289 else if (*s != 'v' || !isDIGIT(s[1])
5290 || (s = force_version(s, TRUE), *s == 'v'))
5292 *PL_tokenbuf = '\0';
5293 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5294 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
5295 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5297 yyerror("<> should be quotes");
5299 if (orig_keyword == KEY_require) {
5307 PL_last_uni = PL_oldbufptr;
5308 PL_last_lop_op = OP_REQUIRE;
5310 return REPORT( (int)REQUIRE );
5316 s = force_word(s,WORD,TRUE,FALSE,FALSE);
5320 LOP(OP_RENAME,XTERM);
5329 LOP(OP_RINDEX,XTERM);
5352 LOP(OP_REVERSE,XTERM);
5363 TERM(sublex_start());
5365 TOKEN(1); /* force error */
5374 LOP(OP_SELECT,XTERM);
5380 LOP(OP_SEMCTL,XTERM);
5383 LOP(OP_SEMGET,XTERM);
5386 LOP(OP_SEMOP,XTERM);
5392 LOP(OP_SETPGRP,XTERM);
5394 case KEY_setpriority:
5395 LOP(OP_SETPRIORITY,XTERM);
5397 case KEY_sethostent:
5403 case KEY_setservent:
5406 case KEY_setprotoent:
5416 LOP(OP_SEEKDIR,XTERM);
5418 case KEY_setsockopt:
5419 LOP(OP_SSOCKOPT,XTERM);
5425 LOP(OP_SHMCTL,XTERM);
5428 LOP(OP_SHMGET,XTERM);
5431 LOP(OP_SHMREAD,XTERM);
5434 LOP(OP_SHMWRITE,XTERM);
5437 LOP(OP_SHUTDOWN,XTERM);
5446 LOP(OP_SOCKET,XTERM);
5448 case KEY_socketpair:
5449 LOP(OP_SOCKPAIR,XTERM);
5452 checkcomma(s,PL_tokenbuf,"subroutine name");
5454 if (*s == ';' || *s == ')') /* probably a close */
5455 Perl_croak(aTHX_ "sort is now a reserved word");
5457 s = force_word(s,WORD,TRUE,TRUE,FALSE);
5461 LOP(OP_SPLIT,XTERM);
5464 LOP(OP_SPRINTF,XTERM);
5467 LOP(OP_SPLICE,XTERM);
5482 LOP(OP_SUBSTR,XTERM);
5488 char tmpbuf[sizeof PL_tokenbuf];
5489 SSize_t tboffset = 0;
5490 expectation attrful;
5491 bool have_name, have_proto;
5492 const int key = tmp;
5496 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
5497 (*s == ':' && s[1] == ':'))
5500 attrful = XATTRBLOCK;
5501 /* remember buffer pos'n for later force_word */
5502 tboffset = s - PL_oldbufptr;
5503 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5504 if (strchr(tmpbuf, ':'))
5505 sv_setpv(PL_subname, tmpbuf);
5507 sv_setsv(PL_subname,PL_curstname);
5508 sv_catpvs(PL_subname,"::");
5509 sv_catpvn(PL_subname,tmpbuf,len);
5516 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5517 PL_expect = XTERMBLOCK;
5518 attrful = XATTRTERM;
5519 sv_setpvn(PL_subname,"?",1);
5523 if (key == KEY_format) {
5525 PL_lex_formbrack = PL_lex_brackets + 1;
5527 (void) force_word(PL_oldbufptr + tboffset, WORD,
5532 /* Look for a prototype */
5535 bool bad_proto = FALSE;
5536 const bool warnsyntax = ckWARN(WARN_SYNTAX);
5538 s = scan_str(s,FALSE,FALSE);
5540 Perl_croak(aTHX_ "Prototype not terminated");
5541 /* strip spaces and check for bad characters */
5542 d = SvPVX(PL_lex_stuff);
5544 for (p = d; *p; ++p) {
5547 if (warnsyntax && !strchr("$@%*;[]&\\", *p))
5553 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5554 "Illegal character in prototype for %"SVf" : %s",
5555 (void*)PL_subname, d);
5556 SvCUR_set(PL_lex_stuff, tmp);
5564 if (*s == ':' && s[1] != ':')
5565 PL_expect = attrful;
5566 else if (*s != '{' && key == KEY_sub) {
5568 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5570 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, (void*)PL_subname);
5574 NEXTVAL_NEXTTOKE.opval =
5575 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
5576 PL_lex_stuff = NULL;
5580 sv_setpv(PL_subname,
5582 (PL_curstash ? "__ANON__" : "__ANON__::__ANON__"));
5585 (void) force_word(PL_oldbufptr + tboffset, WORD,
5594 LOP(OP_SYSTEM,XREF);
5597 LOP(OP_SYMLINK,XTERM);
5600 LOP(OP_SYSCALL,XTERM);
5603 LOP(OP_SYSOPEN,XTERM);
5606 LOP(OP_SYSSEEK,XTERM);
5609 LOP(OP_SYSREAD,XTERM);
5612 LOP(OP_SYSWRITE,XTERM);
5616 TERM(sublex_start());
5637 LOP(OP_TRUNCATE,XTERM);
5649 yylval.ival = CopLINE(PL_curcop);
5653 yylval.ival = CopLINE(PL_curcop);
5657 LOP(OP_UNLINK,XTERM);
5663 LOP(OP_UNPACK,XTERM);
5666 LOP(OP_UTIME,XTERM);
5672 LOP(OP_UNSHIFT,XTERM);
5675 s = tokenize_use(1, s);
5685 yylval.ival = CopLINE(PL_curcop);
5689 PL_hints |= HINT_BLOCK_SCOPE;
5696 LOP(OP_WAITPID,XTERM);
5705 ctl_l[0] = toCTRL('L');
5707 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
5710 /* Make sure $^L is defined */
5711 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
5716 if (PL_expect == XOPERATOR)
5722 yylval.ival = OP_XOR;
5727 TERM(sublex_start());
5732 #pragma segment Main
5736 S_pending_ident(pTHX)
5740 /* pit holds the identifier we read and pending_ident is reset */
5741 char pit = PL_pending_ident;
5742 PL_pending_ident = 0;
5744 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5745 "### Pending identifier '%s'\n", PL_tokenbuf); });
5747 /* if we're in a my(), we can't allow dynamics here.
5748 $foo'bar has already been turned into $foo::bar, so
5749 just check for colons.
5751 if it's a legal name, the OP is a PADANY.
5754 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5755 if (strchr(PL_tokenbuf,':'))
5756 yyerror(Perl_form(aTHX_ "No package name allowed for "
5757 "variable %s in \"our\"",
5759 tmp = allocmy(PL_tokenbuf);
5762 if (strchr(PL_tokenbuf,':'))
5763 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5765 yylval.opval = newOP(OP_PADANY, 0);
5766 yylval.opval->op_targ = allocmy(PL_tokenbuf);
5772 build the ops for accesses to a my() variable.
5774 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5775 then used in a comparison. This catches most, but not
5776 all cases. For instance, it catches
5777 sort { my($a); $a <=> $b }
5779 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5780 (although why you'd do that is anyone's guess).
5783 if (!strchr(PL_tokenbuf,':')) {
5784 #ifdef USE_5005THREADS
5785 /* Check for single character per-thread SVs */
5786 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
5787 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
5788 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
5790 yylval.opval = newOP(OP_THREADSV, 0);
5791 yylval.opval->op_targ = tmp;
5794 #endif /* USE_5005THREADS */
5796 tmp = pad_findmy(PL_tokenbuf);
5797 if (tmp != NOT_IN_PAD) {
5798 /* might be an "our" variable" */
5799 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
5800 /* build ops for a bareword */
5802 = newSVpv(HvNAME_get(PAD_COMPNAME_OURSTASH(tmp)), 0);
5803 sv_catpvs(sym, "::");
5804 sv_catpv(sym, PL_tokenbuf+1);
5805 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5806 yylval.opval->op_private = OPpCONST_ENTERED;
5809 ? (GV_ADDMULTI | GV_ADDINEVAL)
5812 ((PL_tokenbuf[0] == '$') ? SVt_PV
5813 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5818 /* if it's a sort block and they're naming $a or $b */
5819 if (PL_last_lop_op == OP_SORT &&
5820 PL_tokenbuf[0] == '$' &&
5821 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5824 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5825 d < PL_bufend && *d != '\n';
5828 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5829 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5835 yylval.opval = newOP(OP_PADANY, 0);
5836 yylval.opval->op_targ = tmp;
5842 Whine if they've said @foo in a doublequoted string,
5843 and @foo isn't a variable we can find in the symbol
5846 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5847 GV *gv = gv_fetchpv(PL_tokenbuf+1, 0, SVt_PVAV);
5848 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5849 && ckWARN(WARN_AMBIGUOUS))
5851 /* Downgraded from fatal to warning 20000522 mjd */
5852 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5853 "Possible unintended interpolation of %s in string",
5858 /* build ops for a bareword */
5859 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5860 yylval.opval->op_private = OPpCONST_ENTERED;
5861 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5862 ((PL_tokenbuf[0] == '$') ? SVt_PV
5863 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5869 * The following code was generated by perl_keyword.pl.
5873 Perl_keyword (pTHX_ char *name, I32 len)
5877 case 1: /* 5 tokens of length 1 */
5909 case 2: /* 18 tokens of length 2 */
6055 case 3: /* 27 tokens of length 3 */
6059 if (name[1] == 'N' &&
6122 if (name[1] == 'i' &&
6154 if (name[1] == 'o' &&
6163 if (name[1] == 'e' &&
6172 if (name[1] == 'n' &&
6181 if (name[1] == 'o' &&
6190 if (name[1] == 'a' &&
6199 if (name[1] == 'o' &&
6261 if (name[1] == 'e' &&
6293 if (name[1] == 'i' &&
6302 if (name[1] == 's' &&
6311 if (name[1] == 'e' &&
6320 if (name[1] == 'o' &&
6332 case 4: /* 40 tokens of length 4 */
6336 if (name[1] == 'O' &&
6346 if (name[1] == 'N' &&
6356 if (name[1] == 'i' &&
6366 if (name[1] == 'h' &&
6376 if (name[1] == 'u' &&
6389 if (name[2] == 'c' &&
6398 if (name[2] == 's' &&
6407 if (name[2] == 'a' &&
6443 if (name[1] == 'o' &&
6456 if (name[2] == 't' &&
6465 if (name[2] == 'o' &&
6474 if (name[2] == 't' &&
6483 if (name[2] == 'e' &&
6496 if (name[1] == 'o' &&
6509 if (name[2] == 'y' &&
6518 if (name[2] == 'l' &&
6534 if (name[2] == 's' &&
6543 if (name[2] == 'n' &&
6552 if (name[2] == 'c' &&
6565 if (name[1] == 'e' &&
6575 if (name[1] == 'p' &&
6588 if (name[2] == 'c' &&
6597 if (name[2] == 'p' &&
6606 if (name[2] == 's' &&
6622 if (name[2] == 'n' &&
6692 if (name[2] == 'r' &&
6701 if (name[2] == 'r' &&
6710 if (name[2] == 'a' &&
6726 if (name[2] == 'l' &&
6793 case 5: /* 36 tokens of length 5 */
6797 if (name[1] == 'E' &&
6808 if (name[1] == 'H' &&
6822 if (name[2] == 'a' &&
6832 if (name[2] == 'a' &&
6846 if (name[1] == 'l' &&
6863 if (name[3] == 'i' &&
6872 if (name[3] == 'o' &&
6908 if (name[2] == 'o' &&
6918 if (name[2] == 'y' &&
6932 if (name[1] == 'l' &&
6946 if (name[2] == 'n' &&
6956 if (name[2] == 'o' &&
6973 if (name[2] == 'd' &&
6983 if (name[2] == 'c' &&
7000 if (name[2] == 'c' &&
7010 if (name[2] == 't' &&
7024 if (name[1] == 'k' &&
7035 if (name[1] == 'r' &&
7049 if (name[2] == 's' &&
7059 if (name[2] == 'd' &&
7076 if (name[2] == 'm' &&
7086 if (name[2] == 'i' &&
7096 if (name[2] == 'e' &&
7106 if (name[2] == 'l' &&
7116 if (name[2] == 'a' &&
7126 if (name[2] == 'u' &&
7140 if (name[1] == 'i' &&
7154 if (name[2] == 'a' &&
7167 if (name[3] == 'e' &&
7202 if (name[2] == 'i' &&
7219 if (name[2] == 'i' &&
7229 if (name[2] == 'i' &&
7246 case 6: /* 33 tokens of length 6 */
7250 if (name[1] == 'c' &&
7265 if (name[2] == 'l' &&
7276 if (name[2] == 'r' &&
7291 if (name[1] == 'e' &&
7306 if (name[2] == 's' &&
7311 if(ckWARN_d(WARN_SYNTAX))
7312 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
7318 if (name[2] == 'i' &&
7336 if (name[2] == 'l' &&
7347 if (name[2] == 'r' &&
7362 if (name[1] == 'm' &&
7377 if (name[2] == 'n' &&
7388 if (name[2] == 's' &&
7403 if (name[1] == 's' &&
7409 if (name[4] == 't' &&
7418 if (name[4] == 'e' &&
7427 if (name[4] == 'c' &&
7436 if (name[4] == 'n' &&
7452 if (name[1] == 'r' &&
7470 if (name[3] == 'a' &&
7480 if (name[3] == 'u' &&
7494 if (name[2] == 'n' &&
7512 if (name[2] == 'a' &&
7526 if (name[3] == 'e' &&
7539 if (name[4] == 't' &&
7548 if (name[4] == 'e' &&
7570 if (name[4] == 't' &&
7579 if (name[4] == 'e' &&
7595 if (name[2] == 'c' &&
7606 if (name[2] == 'l' &&
7617 if (name[2] == 'b' &&
7628 if (name[2] == 's' &&
7651 if (name[4] == 's' &&
7660 if (name[4] == 'n' &&
7673 if (name[3] == 'a' &&
7690 if (name[1] == 'a' &&
7705 case 7: /* 28 tokens of length 7 */
7709 if (name[1] == 'E' &&
7722 if (name[1] == '_' &&
7735 if (name[1] == 'i' &&
7742 return -KEY_binmode;
7748 if (name[1] == 'o' &&
7755 return -KEY_connect;
7764 if (name[2] == 'm' &&
7770 return -KEY_dbmopen;
7776 if (name[2] == 'f' &&
7792 if (name[1] == 'o' &&
7805 if (name[1] == 'e' &&
7812 if (name[5] == 'r' &&
7815 return -KEY_getpgrp;
7821 if (name[5] == 'i' &&
7824 return -KEY_getppid;
7837 if (name[1] == 'c' &&
7844 return -KEY_lcfirst;
7850 if (name[1] == 'p' &&
7857 return -KEY_opendir;
7863 if (name[1] == 'a' &&
7881 if (name[3] == 'd' &&
7886 return -KEY_readdir;
7892 if (name[3] == 'u' &&
7903 if (name[3] == 'e' &&
7908 return -KEY_reverse;
7927 if (name[3] == 'k' &&
7932 return -KEY_seekdir;
7938 if (name[3] == 'p' &&
7943 return -KEY_setpgrp;
7953 if (name[2] == 'm' &&
7959 return -KEY_shmread;
7965 if (name[2] == 'r' &&
7971 return -KEY_sprintf;
7980 if (name[3] == 'l' &&
7985 return -KEY_symlink;
7994 if (name[4] == 'a' &&
7998 return -KEY_syscall;
8004 if (name[4] == 'p' &&
8008 return -KEY_sysopen;
8014 if (name[4] == 'e' &&
8018 return -KEY_sysread;
8024 if (name[4] == 'e' &&
8028 return -KEY_sysseek;
8046 if (name[1] == 'e' &&
8053 return -KEY_telldir;
8062 if (name[2] == 'f' &&
8068 return -KEY_ucfirst;
8074 if (name[2] == 's' &&
8080 return -KEY_unshift;
8090 if (name[1] == 'a' &&
8097 return -KEY_waitpid;
8106 case 8: /* 26 tokens of length 8 */
8110 if (name[1] == 'U' &&
8118 return KEY_AUTOLOAD;
8129 if (name[3] == 'A' &&
8135 return KEY___DATA__;
8141 if (name[3] == 'I' &&
8147 return -KEY___FILE__;
8153 if (name[3] == 'I' &&
8159 return -KEY___LINE__;
8175 if (name[2] == 'o' &&
8182 return -KEY_closedir;
8188 if (name[2] == 'n' &&
8195 return -KEY_continue;
8205 if (name[1] == 'b' &&
8213 return -KEY_dbmclose;
8219 if (name[1] == 'n' &&
8225 if (name[4] == 'r' &&
8230 return -KEY_endgrent;
8236 if (name[4] == 'w' &&
8241 return -KEY_endpwent;
8254 if (name[1] == 'o' &&
8262 return -KEY_formline;
8268 if (name[1] == 'e' &&
8279 if (name[6] == 'n' &&
8282 return -KEY_getgrent;
8288 if (name[6] == 'i' &&
8291 return -KEY_getgrgid;
8297 if (name[6] == 'a' &&
8300 return -KEY_getgrnam;
8313 if (name[4] == 'o' &&
8318 return -KEY_getlogin;
8329 if (name[6] == 'n' &&
8332 return -KEY_getpwent;
8338 if (name[6] == 'a' &&
8341 return -KEY_getpwnam;
8347 if (name[6] == 'i' &&
8350 return -KEY_getpwuid;
8370 if (name[1] == 'e' &&
8377 if (name[5] == 'i' &&
8384 return -KEY_readline;
8389 return -KEY_readlink;
8400 if (name[5] == 'i' &&
8404 return -KEY_readpipe;
8425 if (name[4] == 'r' &&
8430 return -KEY_setgrent;
8436 if (name[4] == 'w' &&
8441 return -KEY_setpwent;
8457 if (name[3] == 'w' &&
8463 return -KEY_shmwrite;
8469 if (name[3] == 't' &&
8475 return -KEY_shutdown;
8485 if (name[2] == 's' &&
8492 return -KEY_syswrite;
8502 if (name[1] == 'r' &&
8510 return -KEY_truncate;
8519 case 9: /* 8 tokens of length 9 */
8523 if (name[1] == 'n' &&
8532 return -KEY_endnetent;
8538 if (name[1] == 'e' &&
8547 return -KEY_getnetent;
8553 if (name[1] == 'o' &&
8562 return -KEY_localtime;
8568 if (name[1] == 'r' &&
8577 return KEY_prototype;
8583 if (name[1] == 'u' &&
8592 return -KEY_quotemeta;
8598 if (name[1] == 'e' &&
8607 return -KEY_rewinddir;
8613 if (name[1] == 'e' &&
8622 return -KEY_setnetent;
8628 if (name[1] == 'a' &&
8637 return -KEY_wantarray;
8646 case 10: /* 9 tokens of length 10 */
8650 if (name[1] == 'n' &&
8656 if (name[4] == 'o' &&
8663 return -KEY_endhostent;
8669 if (name[4] == 'e' &&
8676 return -KEY_endservent;
8689 if (name[1] == 'e' &&
8695 if (name[4] == 'o' &&
8702 return -KEY_gethostent;
8711 if (name[5] == 'r' &&
8717 return -KEY_getservent;
8723 if (name[5] == 'c' &&
8729 return -KEY_getsockopt;
8754 if (name[4] == 'o' &&
8761 return -KEY_sethostent;
8770 if (name[5] == 'r' &&
8776 return -KEY_setservent;
8782 if (name[5] == 'c' &&
8788 return -KEY_setsockopt;
8805 if (name[2] == 'c' &&
8814 return -KEY_socketpair;
8827 case 11: /* 8 tokens of length 11 */
8831 if (name[1] == '_' &&
8842 return -KEY___PACKAGE__;
8848 if (name[1] == 'n' &&
8859 return -KEY_endprotoent;
8865 if (name[1] == 'e' &&
8874 if (name[5] == 'e' &&
8881 return -KEY_getpeername;
8890 if (name[6] == 'o' &&
8896 return -KEY_getpriority;
8902 if (name[6] == 't' &&
8908 return -KEY_getprotoent;
8922 if (name[4] == 'o' &&
8930 return -KEY_getsockname;
8943 if (name[1] == 'e' &&
8951 if (name[6] == 'o' &&
8957 return -KEY_setpriority;
8963 if (name[6] == 't' &&
8969 return -KEY_setprotoent;
8985 case 12: /* 2 tokens of length 12 */
8986 if (name[0] == 'g' &&
8998 if (name[9] == 'd' &&
9001 { /* getnetbyaddr */
9002 return -KEY_getnetbyaddr;
9008 if (name[9] == 'a' &&
9011 { /* getnetbyname */
9012 return -KEY_getnetbyname;
9024 case 13: /* 4 tokens of length 13 */
9025 if (name[0] == 'g' &&
9032 if (name[4] == 'o' &&
9041 if (name[10] == 'd' &&
9044 { /* gethostbyaddr */
9045 return -KEY_gethostbyaddr;
9051 if (name[10] == 'a' &&
9054 { /* gethostbyname */
9055 return -KEY_gethostbyname;
9068 if (name[4] == 'e' &&
9077 if (name[10] == 'a' &&
9080 { /* getservbyname */
9081 return -KEY_getservbyname;
9087 if (name[10] == 'o' &&
9090 { /* getservbyport */
9091 return -KEY_getservbyport;
9110 case 14: /* 1 tokens of length 14 */
9111 if (name[0] == 'g' &&
9125 { /* getprotobyname */
9126 return -KEY_getprotobyname;
9131 case 16: /* 1 tokens of length 16 */
9132 if (name[0] == 'g' &&
9148 { /* getprotobynumber */
9149 return -KEY_getprotobynumber;
9163 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9166 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9167 if (ckWARN(WARN_SYNTAX)) {
9170 for (w = s+2; *w && level; w++) {
9178 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9179 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9180 "%s (...) interpreted as function",name);
9183 while (s < PL_bufend && isSPACE(*s))
9187 while (s < PL_bufend && isSPACE(*s))
9189 if (isIDFIRST_lazy_if(s,UTF)) {
9190 const char * const w = s++;
9191 while (isALNUM_lazy_if(s,UTF))
9193 while (s < PL_bufend && isSPACE(*s))
9197 if (keyword((char *)w, s - w))
9200 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
9201 if (gv && GvCVu(gv))
9203 Perl_croak(aTHX_ "No comma allowed after %s", what);
9208 /* Either returns sv, or mortalizes sv and returns a new SV*.
9209 Best used as sv=new_constant(..., sv, ...).
9210 If s, pv are NULL, calls subroutine with one argument,
9211 and type is used with error messages only. */
9214 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9218 HV * const table = GvHV(PL_hintgv); /* ^H */
9222 const char *why1 = "", *why2 = "", *why3 = "";
9224 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9227 why2 = (const char *)
9228 (strEQ(key,"charnames")
9229 ? "(possibly a missing \"use charnames ...\")"
9231 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
9232 (type ? type: "undef"), why2);
9234 /* This is convoluted and evil ("goto considered harmful")
9235 * but I do not understand the intricacies of all the different
9236 * failure modes of %^H in here. The goal here is to make
9237 * the most probable error message user-friendly. --jhi */
9242 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
9243 (type ? type: "undef"), why1, why2, why3);
9245 yyerror((char *)SvPVX_const(msg));
9249 cvp = hv_fetch(table, key, strlen(key), FALSE);
9250 if (!cvp || !SvOK(*cvp)) {
9253 why3 = "} is not defined";
9256 sv_2mortal(sv); /* Parent created it permanently */
9259 pv = sv_2mortal(newSVpvn(s, len));
9261 typesv = sv_2mortal(newSVpv(type, 0));
9263 typesv = &PL_sv_undef;
9265 PUSHSTACKi(PERLSI_OVERLOAD);
9277 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9281 /* Check the eval first */
9282 if (!PL_in_eval && SvTRUE(ERRSV)) {
9283 sv_catpvs(ERRSV, "Propagated");
9284 yyerror((char *)SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
9286 res = SvREFCNT_inc_simple(sv);
9290 SvREFCNT_inc_simple_void(res);
9299 why1 = "Call to &{$^H{";
9301 why3 = "}} did not return a defined value";
9309 /* Returns a NUL terminated string, with the length of the string written to
9313 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9315 register char *d = dest;
9316 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
9319 Perl_croak(aTHX_ ident_too_long);
9320 if (isALNUM(*s)) /* UTF handled below */
9322 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
9327 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
9331 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9332 char *t = s + UTF8SKIP(s);
9334 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9338 Perl_croak(aTHX_ ident_too_long);
9339 Copy(s, d, len, char);
9352 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9354 char *bracket = NULL;
9356 register char *d = dest;
9357 register char * const e = d + destlen + 3; /* two-character token, ending NUL */
9362 while (isDIGIT(*s)) {
9364 Perl_croak(aTHX_ ident_too_long);
9371 Perl_croak(aTHX_ ident_too_long);
9372 if (isALNUM(*s)) /* UTF handled below */
9374 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9379 else if (*s == ':' && s[1] == ':') {
9383 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9384 char *t = s + UTF8SKIP(s);
9385 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9387 if (d + (t - s) > e)
9388 Perl_croak(aTHX_ ident_too_long);
9389 Copy(s, d, t - s, char);
9400 if (PL_lex_state != LEX_NORMAL)
9401 PL_lex_state = LEX_INTERPENDMAYBE;
9404 if (*s == '$' && s[1] &&
9405 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9418 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9423 if (isSPACE(s[-1])) {
9425 const char ch = *s++;
9426 if (!SPACE_OR_TAB(ch)) {
9432 if (isIDFIRST_lazy_if(d,UTF)) {
9436 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9437 end += UTF8SKIP(end);
9438 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9439 end += UTF8SKIP(end);
9441 Copy(s, d, end - s, char);
9446 while ((isALNUM(*s) || *s == ':') && d < e)
9449 Perl_croak(aTHX_ ident_too_long);
9452 while (s < send && SPACE_OR_TAB(*s))
9454 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9455 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
9456 const char * const brack =
9458 ((*s == '[') ? "[...]" : "{...}");
9459 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9460 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9461 funny, dest, brack, funny, dest, brack);
9464 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9468 /* Handle extended ${^Foo} variables
9469 * 1999-02-27 mjd-perl-patch@plover.com */
9470 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9474 while (isALNUM(*s) && d < e) {
9478 Perl_croak(aTHX_ ident_too_long);
9483 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9484 PL_lex_state = LEX_INTERPEND;
9487 if (PL_lex_state == LEX_NORMAL) {
9488 if (ckWARN(WARN_AMBIGUOUS) &&
9489 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
9493 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9494 "Ambiguous use of %c{%s} resolved to %c%s",
9495 funny, dest, funny, dest);
9500 s = bracket; /* let the parser handle it */
9504 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9505 PL_lex_state = LEX_INTERPEND;
9510 Perl_pmflag(pTHX_ U32* pmfl, int ch)
9512 PERL_UNUSED_CONTEXT;
9516 *pmfl |= PMf_GLOBAL;
9518 *pmfl |= PMf_CONTINUE;
9522 *pmfl |= PMf_MULTILINE;
9524 *pmfl |= PMf_SINGLELINE;
9526 *pmfl |= PMf_EXTENDED;
9530 S_scan_pat(pTHX_ char *start, I32 type)
9533 char *s = scan_str(start,FALSE,FALSE);
9534 const char * const valid_flags =
9535 (const char *)((type == OP_QR) ? "iomsx" : "iogcmsx");
9538 const char * const delimiter = skipspace(start);
9542 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9543 : "Search pattern not terminated" ));
9546 pm = (PMOP*)newPMOP(type, 0);
9547 if (PL_multi_open == '?')
9548 pm->op_pmflags |= PMf_ONCE;
9549 while (*s && strchr(valid_flags, *s))
9550 pmflag(&pm->op_pmflags,*s++);
9551 /* issue a warning if /c is specified,but /g is not */
9552 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL)
9553 && ckWARN(WARN_REGEXP))
9555 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless without /g" );
9558 pm->op_pmpermflags = pm->op_pmflags;
9560 PL_lex_op = (OP*)pm;
9561 yylval.ival = OP_MATCH;
9566 S_scan_subst(pTHX_ char *start)
9573 yylval.ival = OP_NULL;
9575 s = scan_str(start,FALSE,FALSE);
9578 Perl_croak(aTHX_ "Substitution pattern not terminated");
9580 if (s[-1] == PL_multi_open)
9583 first_start = PL_multi_start;
9584 s = scan_str(s,FALSE,FALSE);
9587 SvREFCNT_dec(PL_lex_stuff);
9588 PL_lex_stuff = NULL;
9590 Perl_croak(aTHX_ "Substitution replacement not terminated");
9592 PL_multi_start = first_start; /* so whole substitution is taken together */
9594 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9600 else if (strchr("iogcmsx", *s))
9601 pmflag(&pm->op_pmflags,*s++);
9606 if ((pm->op_pmflags & PMf_CONTINUE) && ckWARN(WARN_REGEXP)) {
9607 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9611 SV * const repl = newSVpvs("");
9613 PL_sublex_info.super_bufptr = s;
9614 PL_sublex_info.super_bufend = PL_bufend;
9616 pm->op_pmflags |= PMf_EVAL;
9618 sv_catpv(repl, (const char *)(es ? "eval " : "do "));
9619 sv_catpvs(repl, "{");
9620 sv_catsv(repl, PL_lex_repl);
9621 if (strchr(SvPVX(PL_lex_repl), '#'))
9622 sv_catpvs(repl, "\n");
9623 sv_catpvs(repl, "}");
9625 SvREFCNT_dec(PL_lex_repl);
9629 pm->op_pmpermflags = pm->op_pmflags;
9630 PL_lex_op = (OP*)pm;
9631 yylval.ival = OP_SUBST;
9636 S_scan_trans(pTHX_ char *start)
9645 yylval.ival = OP_NULL;
9647 s = scan_str(start,FALSE,FALSE);
9649 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9650 if (s[-1] == PL_multi_open)
9653 s = scan_str(s,FALSE,FALSE);
9656 SvREFCNT_dec(PL_lex_stuff);
9657 PL_lex_stuff = NULL;
9659 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9662 complement = del = squash = 0;
9666 complement = OPpTRANS_COMPLEMENT;
9669 del = OPpTRANS_DELETE;
9672 squash = OPpTRANS_SQUASH;
9681 Newx(tbl, complement&&!del?258:256, short);
9682 o = newPVOP(OP_TRANS, 0, (char*)tbl);
9683 o->op_private = del|squash|complement|
9684 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9685 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9688 yylval.ival = OP_TRANS;
9693 S_scan_heredoc(pTHX_ register char *s)
9696 I32 op_type = OP_SCALAR;
9700 const char *found_newline;
9704 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
9708 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9712 while (SPACE_OR_TAB(*peek))
9714 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9717 s = delimcpy(d, e, s, PL_bufend, term, &len);
9727 if (!isALNUM_lazy_if(s,UTF))
9728 deprecate_old("bare << to mean <<\"\"");
9729 for (; isALNUM_lazy_if(s,UTF); s++) {
9734 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9735 Perl_croak(aTHX_ "Delimiter for here document is too long");
9738 len = d - PL_tokenbuf;
9739 #ifndef PERL_STRICT_CR
9740 d = strchr(s, '\r');
9742 char * const olds = s;
9744 while (s < PL_bufend) {
9750 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9759 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9763 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9764 herewas = newSVpvn(s,PL_bufend-s);
9768 herewas = newSVpvn(s,found_newline-s);
9770 s += SvCUR(herewas);
9773 sv_upgrade(tmpstr, SVt_PVIV);
9776 SvIV_set(tmpstr, -1);
9778 else if (term == '`') {
9779 op_type = OP_BACKTICK;
9780 SvIV_set(tmpstr, '\\');
9784 PL_multi_start = CopLINE(PL_curcop);
9785 PL_multi_open = PL_multi_close = '<';
9786 term = *PL_tokenbuf;
9787 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
9788 char * const bufptr = PL_sublex_info.super_bufptr;
9789 char * const bufend = PL_sublex_info.super_bufend;
9790 char * const olds = s - SvCUR(herewas);
9791 s = strchr(bufptr, '\n');
9795 while (s < bufend &&
9796 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9798 CopLINE_inc(PL_curcop);
9801 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9802 missingterm(PL_tokenbuf);
9804 sv_setpvn(herewas,bufptr,d-bufptr+1);
9805 sv_setpvn(tmpstr,d+1,s-d);
9807 sv_catpvn(herewas,s,bufend-s);
9808 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9815 while (s < PL_bufend &&
9816 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9818 CopLINE_inc(PL_curcop);
9820 if (s >= PL_bufend) {
9821 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9822 missingterm(PL_tokenbuf);
9824 sv_setpvn(tmpstr,d+1,s-d);
9826 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9828 sv_catpvn(herewas,s,PL_bufend-s);
9829 sv_setsv(PL_linestr,herewas);
9830 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9831 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9832 PL_last_lop = PL_last_uni = NULL;
9835 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
9836 while (s >= PL_bufend) { /* multiple line string? */
9838 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
9839 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9840 missingterm(PL_tokenbuf);
9842 CopLINE_inc(PL_curcop);
9843 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9844 PL_last_lop = PL_last_uni = NULL;
9845 #ifndef PERL_STRICT_CR
9846 if (PL_bufend - PL_linestart >= 2) {
9847 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9848 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9850 PL_bufend[-2] = '\n';
9852 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9854 else if (PL_bufend[-1] == '\r')
9855 PL_bufend[-1] = '\n';
9857 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9858 PL_bufend[-1] = '\n';
9860 if (PERLDB_LINE && PL_curstash != PL_debstash)
9861 update_debugger_info(PL_linestr, NULL, 0);
9862 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9863 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9864 *(SvPVX(PL_linestr) + off ) = ' ';
9865 sv_catsv(PL_linestr,herewas);
9866 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9867 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9871 sv_catsv(tmpstr,PL_linestr);
9876 PL_multi_end = CopLINE(PL_curcop);
9877 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9878 SvPV_shrink_to_cur(tmpstr);
9880 SvREFCNT_dec(herewas);
9882 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9884 else if (PL_encoding)
9885 sv_recode_to_utf8(tmpstr, PL_encoding);
9887 PL_lex_stuff = tmpstr;
9888 yylval.ival = op_type;
9893 takes: current position in input buffer
9894 returns: new position in input buffer
9895 side-effects: yylval and lex_op are set.
9900 <FH> read from filehandle
9901 <pkg::FH> read from package qualified filehandle
9902 <pkg'FH> read from package qualified filehandle
9903 <$fh> read from filehandle in $fh
9909 S_scan_inputsymbol(pTHX_ char *start)
9911 register char *s = start; /* current position in buffer */
9915 char *d = PL_tokenbuf; /* start of temp holding space */
9916 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9918 end = strchr(s, '\n');
9921 s = delimcpy(d, (char *)e, s + 1, end, '>', &len); /* extract until > */
9923 /* die if we didn't have space for the contents of the <>,
9924 or if it didn't end, or if we see a newline
9927 if (len >= (I32)sizeof PL_tokenbuf)
9928 Perl_croak(aTHX_ "Excessively long <> operator");
9930 Perl_croak(aTHX_ "Unterminated <> operator");
9935 Remember, only scalar variables are interpreted as filehandles by
9936 this code. Anything more complex (e.g., <$fh{$num}>) will be
9937 treated as a glob() call.
9938 This code makes use of the fact that except for the $ at the front,
9939 a scalar variable and a filehandle look the same.
9941 if (*d == '$' && d[1]) d++;
9943 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9944 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9947 /* If we've tried to read what we allow filehandles to look like, and
9948 there's still text left, then it must be a glob() and not a getline.
9949 Use scan_str to pull out the stuff between the <> and treat it
9950 as nothing more than a string.
9953 if (d - PL_tokenbuf != len) {
9954 yylval.ival = OP_GLOB;
9956 s = scan_str(start,FALSE,FALSE);
9958 Perl_croak(aTHX_ "Glob not terminated");
9962 bool readline_overriden = FALSE;
9965 /* we're in a filehandle read situation */
9968 /* turn <> into <ARGV> */
9970 Copy("ARGV",d,5,char);
9972 /* Check whether readline() is overriden */
9973 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9975 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9977 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9978 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
9979 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9980 readline_overriden = TRUE;
9982 /* if <$fh>, create the ops to turn the variable into a
9986 /* try to find it in the pad for this block, otherwise find
9987 add symbol table ops
9989 const PADOFFSET tmp = pad_findmy(d);
9990 if (tmp != NOT_IN_PAD) {
9991 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9992 SV * const sym = sv_2mortal(
9993 newSVpv(HvNAME_get(PAD_COMPNAME_OURSTASH(tmp)),0));
9994 sv_catpvs(sym, "::");
10000 OP * const o = newOP(OP_PADSV, 0);
10002 PL_lex_op = readline_overriden
10003 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10004 append_elem(OP_LIST, o,
10005 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10006 : (OP*)newUNOP(OP_READLINE, 0, o);
10015 ? (GV_ADDMULTI | GV_ADDINEVAL)
10018 PL_lex_op = readline_overriden
10019 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10020 append_elem(OP_LIST,
10021 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10022 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10023 : (OP*)newUNOP(OP_READLINE, 0,
10024 newUNOP(OP_RV2SV, 0,
10025 newGVOP(OP_GV, 0, gv)));
10027 if (!readline_overriden)
10028 PL_lex_op->op_flags |= OPf_SPECIAL;
10029 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
10030 yylval.ival = OP_NULL;
10033 /* If it's none of the above, it must be a literal filehandle
10034 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10036 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
10037 PL_lex_op = readline_overriden
10038 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10039 append_elem(OP_LIST,
10040 newGVOP(OP_GV, 0, gv),
10041 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10042 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10043 yylval.ival = OP_NULL;
10052 takes: start position in buffer
10053 keep_quoted preserve \ on the embedded delimiter(s)
10054 keep_delims preserve the delimiters around the string
10055 returns: position to continue reading from buffer
10056 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10057 updates the read buffer.
10059 This subroutine pulls a string out of the input. It is called for:
10060 q single quotes q(literal text)
10061 ' single quotes 'literal text'
10062 qq double quotes qq(interpolate $here please)
10063 " double quotes "interpolate $here please"
10064 qx backticks qx(/bin/ls -l)
10065 ` backticks `/bin/ls -l`
10066 qw quote words @EXPORT_OK = qw( func() $spam )
10067 m// regexp match m/this/
10068 s/// regexp substitute s/this/that/
10069 tr/// string transliterate tr/this/that/
10070 y/// string transliterate y/this/that/
10071 ($*@) sub prototypes sub foo ($)
10072 (stuff) sub attr parameters sub foo : attr(stuff)
10073 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10075 In most of these cases (all but <>, patterns and transliterate)
10076 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10077 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10078 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10081 It skips whitespace before the string starts, and treats the first
10082 character as the delimiter. If the delimiter is one of ([{< then
10083 the corresponding "close" character )]}> is used as the closing
10084 delimiter. It allows quoting of delimiters, and if the string has
10085 balanced delimiters ([{<>}]) it allows nesting.
10087 On success, the SV with the resulting string is put into lex_stuff or,
10088 if that is already non-NULL, into lex_repl. The second case occurs only
10089 when parsing the RHS of the special constructs s/// and tr/// (y///).
10090 For convenience, the terminating delimiter character is stuffed into
10095 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
10097 SV *sv; /* scalar value: string */
10098 const char *tmps; /* temp string, used for delimiter matching */
10099 register char *s = start; /* current position in the buffer */
10100 register char term; /* terminating character */
10101 register char *to; /* current position in the sv's data */
10102 I32 brackets = 1; /* bracket nesting level */
10103 bool has_utf8 = FALSE; /* is there any utf8 content? */
10104 I32 termcode; /* terminating char. code */
10105 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10106 STRLEN termlen; /* length of terminating string */
10107 int last_off = 0; /* last position for nesting bracket */
10109 /* skip space before the delimiter */
10114 /* mark where we are, in case we need to report errors */
10117 /* after skipping whitespace, the next character is the terminator */
10120 termcode = termstr[0] = term;
10124 termcode = utf8_to_uvchr((U8*)s, &termlen);
10125 Copy(s, termstr, termlen, U8);
10126 if (!UTF8_IS_INVARIANT(term))
10130 /* mark where we are */
10131 PL_multi_start = CopLINE(PL_curcop);
10132 PL_multi_open = term;
10134 /* find corresponding closing delimiter */
10135 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10136 termcode = termstr[0] = term = tmps[5];
10138 PL_multi_close = term;
10140 /* create a new SV to hold the contents. 79 is the SV's initial length.
10141 What a random number. */
10143 sv_upgrade(sv, SVt_PVIV);
10144 SvIV_set(sv, termcode);
10145 (void)SvPOK_only(sv); /* validate pointer */
10147 /* move past delimiter and try to read a complete string */
10149 sv_catpvn(sv, s, termlen);
10152 if (PL_encoding && !UTF) {
10156 int offset = s - SvPVX_const(PL_linestr);
10157 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10158 &offset, (char*)termstr, termlen);
10159 const char * const ns = SvPVX_const(PL_linestr) + offset;
10160 char * const svlast = SvEND(sv) - 1;
10162 for (; s < ns; s++) {
10163 if (*s == '\n' && !PL_rsfp)
10164 CopLINE_inc(PL_curcop);
10167 goto read_more_line;
10169 /* handle quoted delimiters */
10170 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10172 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10174 if ((svlast-1 - t) % 2) {
10175 if (!keep_quoted) {
10176 *(svlast-1) = term;
10178 SvCUR_set(sv, SvCUR(sv) - 1);
10183 if (PL_multi_open == PL_multi_close) {
10189 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10190 /* At here, all closes are "was quoted" one,
10191 so we don't check PL_multi_close. */
10193 if (!keep_quoted && *(t+1) == PL_multi_open)
10198 else if (*t == PL_multi_open)
10206 SvCUR_set(sv, w - SvPVX_const(sv));
10208 last_off = w - SvPVX(sv);
10209 if (--brackets <= 0)
10214 if (!keep_delims) {
10215 SvCUR_set(sv, SvCUR(sv) - 1);
10221 /* extend sv if need be */
10222 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10223 /* set 'to' to the next character in the sv's string */
10224 to = SvPVX(sv)+SvCUR(sv);
10226 /* if open delimiter is the close delimiter read unbridle */
10227 if (PL_multi_open == PL_multi_close) {
10228 for (; s < PL_bufend; s++,to++) {
10229 /* embedded newlines increment the current line number */
10230 if (*s == '\n' && !PL_rsfp)
10231 CopLINE_inc(PL_curcop);
10232 /* handle quoted delimiters */
10233 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10234 if (!keep_quoted && s[1] == term)
10236 /* any other quotes are simply copied straight through */
10240 /* terminate when run out of buffer (the for() condition), or
10241 have found the terminator */
10242 else if (*s == term) {
10245 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10248 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10254 /* if the terminator isn't the same as the start character (e.g.,
10255 matched brackets), we have to allow more in the quoting, and
10256 be prepared for nested brackets.
10259 /* read until we run out of string, or we find the terminator */
10260 for (; s < PL_bufend; s++,to++) {
10261 /* embedded newlines increment the line count */
10262 if (*s == '\n' && !PL_rsfp)
10263 CopLINE_inc(PL_curcop);
10264 /* backslashes can escape the open or closing characters */
10265 if (*s == '\\' && s+1 < PL_bufend) {
10266 if (!keep_quoted &&
10267 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10272 /* allow nested opens and closes */
10273 else if (*s == PL_multi_close && --brackets <= 0)
10275 else if (*s == PL_multi_open)
10277 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10282 /* terminate the copied string and update the sv's end-of-string */
10284 SvCUR_set(sv, to - SvPVX_const(sv));
10287 * this next chunk reads more into the buffer if we're not done yet
10291 break; /* handle case where we are done yet :-) */
10293 #ifndef PERL_STRICT_CR
10294 if (to - SvPVX_const(sv) >= 2) {
10295 if ((to[-2] == '\r' && to[-1] == '\n') ||
10296 (to[-2] == '\n' && to[-1] == '\r'))
10300 SvCUR_set(sv, to - SvPVX_const(sv));
10302 else if (to[-1] == '\r')
10305 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10310 /* if we're out of file, or a read fails, bail and reset the current
10311 line marker so we can report where the unterminated string began
10314 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
10316 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10319 /* we read a line, so increment our line counter */
10320 CopLINE_inc(PL_curcop);
10322 /* update debugger info */
10323 if (PERLDB_LINE && PL_curstash != PL_debstash)
10324 update_debugger_info(PL_linestr, NULL, 0);
10326 /* having changed the buffer, we must update PL_bufend */
10327 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10328 PL_last_lop = PL_last_uni = NULL;
10331 /* at this point, we have successfully read the delimited string */
10333 if (!PL_encoding || UTF) {
10335 sv_catpvn(sv, s, termlen);
10338 if (has_utf8 || PL_encoding)
10341 PL_multi_end = CopLINE(PL_curcop);
10343 /* if we allocated too much space, give some back */
10344 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10345 SvLEN_set(sv, SvCUR(sv) + 1);
10346 SvPV_renew(sv, SvLEN(sv));
10349 /* decide whether this is the first or second quoted string we've read
10362 takes: pointer to position in buffer
10363 returns: pointer to new position in buffer
10364 side-effects: builds ops for the constant in yylval.op
10366 Read a number in any of the formats that Perl accepts:
10368 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10369 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10372 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10374 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10377 If it reads a number without a decimal point or an exponent, it will
10378 try converting the number to an integer and see if it can do so
10379 without loss of precision.
10383 Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
10385 register const char *s = start; /* current position in buffer */
10386 register char *d; /* destination in temp buffer */
10387 register char *e; /* end of temp buffer */
10388 NV nv; /* number read, as a double */
10389 SV *sv = NULL; /* place to put the converted number */
10390 bool floatit; /* boolean: int or float? */
10391 const char *lastub = NULL; /* position of last underbar */
10392 static char const number_too_long[] = "Number too long";
10394 /* We use the first character to decide what type of number this is */
10398 Perl_croak(aTHX_ "panic: scan_num");
10400 /* if it starts with a 0, it could be an octal number, a decimal in
10401 0.13 disguise, or a hexadecimal number, or a binary number. */
10405 u holds the "number so far"
10406 shift the power of 2 of the base
10407 (hex == 4, octal == 3, binary == 1)
10408 overflowed was the number more than we can hold?
10410 Shift is used when we add a digit. It also serves as an "are
10411 we in octal/hex/binary?" indicator to disallow hex characters
10412 when in octal mode.
10417 bool overflowed = FALSE;
10418 bool just_zero = TRUE; /* just plain 0 or binary number? */
10419 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10420 static const char* const bases[5] =
10421 { "", "binary", "", "octal", "hexadecimal" };
10422 static const char* const Bases[5] =
10423 { "", "Binary", "", "Octal", "Hexadecimal" };
10424 static const char* const maxima[5] =
10426 "0b11111111111111111111111111111111",
10430 const char *base, *Base, *max;
10432 /* check for hex */
10437 } else if (s[1] == 'b') {
10442 /* check for a decimal in disguise */
10443 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10445 /* so it must be octal */
10452 if (ckWARN(WARN_SYNTAX))
10453 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10454 "Misplaced _ in number");
10458 base = bases[shift];
10459 Base = Bases[shift];
10460 max = maxima[shift];
10462 /* read the rest of the number */
10464 /* x is used in the overflow test,
10465 b is the digit we're adding on. */
10470 /* if we don't mention it, we're done */
10474 /* _ are ignored -- but warned about if consecutive */
10476 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10477 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10478 "Misplaced _ in number");
10482 /* 8 and 9 are not octal */
10483 case '8': case '9':
10485 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10489 case '2': case '3': case '4':
10490 case '5': case '6': case '7':
10492 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10495 case '0': case '1':
10496 b = *s++ & 15; /* ASCII digit -> value of digit */
10500 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10501 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10502 /* make sure they said 0x */
10505 b = (*s++ & 7) + 9;
10507 /* Prepare to put the digit we have onto the end
10508 of the number so far. We check for overflows.
10514 x = u << shift; /* make room for the digit */
10516 if ((x >> shift) != u
10517 && !(PL_hints & HINT_NEW_BINARY)) {
10520 if (ckWARN_d(WARN_OVERFLOW))
10521 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
10522 "Integer overflow in %s number",
10525 u = x | b; /* add the digit to the end */
10528 n *= nvshift[shift];
10529 /* If an NV has not enough bits in its
10530 * mantissa to represent an UV this summing of
10531 * small low-order numbers is a waste of time
10532 * (because the NV cannot preserve the
10533 * low-order bits anyway): we could just
10534 * remember when did we overflow and in the
10535 * end just multiply n by the right
10543 /* if we get here, we had success: make a scalar value from
10548 /* final misplaced underbar check */
10549 if (s[-1] == '_') {
10550 if (ckWARN(WARN_SYNTAX))
10551 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10556 if (n > 4294967295.0 && ckWARN(WARN_PORTABLE))
10557 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10558 "%s number > %s non-portable",
10564 if (u > 0xffffffff && ckWARN(WARN_PORTABLE))
10565 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
10566 "%s number > %s non-portable",
10571 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10572 sv = new_constant(start, s - start, "integer",
10574 else if (PL_hints & HINT_NEW_BINARY)
10575 sv = new_constant(start, s - start, "binary", sv, NULL, NULL);
10580 handle decimal numbers.
10581 we're also sent here when we read a 0 as the first digit
10583 case '1': case '2': case '3': case '4': case '5':
10584 case '6': case '7': case '8': case '9': case '.':
10587 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10590 /* read next group of digits and _ and copy into d */
10591 while (isDIGIT(*s) || *s == '_') {
10592 /* skip underscores, checking for misplaced ones
10596 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10597 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10598 "Misplaced _ in number");
10602 /* check for end of fixed-length buffer */
10604 Perl_croak(aTHX_ number_too_long);
10605 /* if we're ok, copy the character */
10610 /* final misplaced underbar check */
10611 if (lastub && s == lastub + 1) {
10612 if (ckWARN(WARN_SYNTAX))
10613 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10616 /* read a decimal portion if there is one. avoid
10617 3..5 being interpreted as the number 3. followed
10620 if (*s == '.' && s[1] != '.') {
10625 if (ckWARN(WARN_SYNTAX))
10626 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10627 "Misplaced _ in number");
10631 /* copy, ignoring underbars, until we run out of digits.
10633 for (; isDIGIT(*s) || *s == '_'; s++) {
10634 /* fixed length buffer check */
10636 Perl_croak(aTHX_ number_too_long);
10638 if (lastub && s == lastub + 1 && ckWARN(WARN_SYNTAX))
10639 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10640 "Misplaced _ in number");
10646 /* fractional part ending in underbar? */
10647 if (s[-1] == '_') {
10648 if (ckWARN(WARN_SYNTAX))
10649 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10650 "Misplaced _ in number");
10652 if (*s == '.' && isDIGIT(s[1])) {
10653 /* oops, it's really a v-string, but without the "v" */
10659 /* read exponent part, if present */
10660 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10664 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10665 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10667 /* stray preinitial _ */
10669 if (ckWARN(WARN_SYNTAX))
10670 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10671 "Misplaced _ in number");
10675 /* allow positive or negative exponent */
10676 if (*s == '+' || *s == '-')
10679 /* stray initial _ */
10681 if (ckWARN(WARN_SYNTAX))
10682 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10683 "Misplaced _ in number");
10687 /* read digits of exponent */
10688 while (isDIGIT(*s) || *s == '_') {
10691 Perl_croak(aTHX_ number_too_long);
10695 if (((lastub && s == lastub + 1) ||
10696 (!isDIGIT(s[1]) && s[1] != '_'))
10697 && ckWARN(WARN_SYNTAX))
10698 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10699 "Misplaced _ in number");
10706 /* make an sv from the string */
10710 We try to do an integer conversion first if no characters
10711 indicating "float" have been found.
10716 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10718 if (flags == IS_NUMBER_IN_UV) {
10720 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
10723 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10724 if (uv <= (UV) IV_MIN)
10725 sv_setiv(sv, -(IV)uv);
10732 /* terminate the string */
10734 nv = Atof(PL_tokenbuf);
10738 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
10739 (PL_hints & HINT_NEW_INTEGER) )
10740 sv = new_constant(PL_tokenbuf,
10743 (floatit ? "float" : "integer"),
10747 /* if it starts with a v, it could be a v-string */
10750 sv = newSV(5); /* preallocate storage space */
10751 s = scan_vstring((char *)s,sv);
10752 DEBUG_T( { PerlIO_printf(Perl_debug_log,
10753 "### Saw v-string before '%s'\n", s);
10758 /* make the op for the constant and return */
10761 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10763 lvalp->opval = NULL;
10769 S_scan_formline(pTHX_ register char *s)
10771 register char *eol;
10773 SV * const stuff = newSVpvs("");
10774 bool needargs = FALSE;
10775 bool eofmt = FALSE;
10777 while (!needargs) {
10780 #ifdef PERL_STRICT_CR
10781 while (SPACE_OR_TAB(*t))
10784 while (SPACE_OR_TAB(*t) || *t == '\r')
10787 if (*t == '\n' || t == PL_bufend) {
10792 if (PL_in_eval && !PL_rsfp) {
10793 eol = (char *) memchr(s,'\n',PL_bufend-s);
10798 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10800 for (t = s; t < eol; t++) {
10801 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10803 goto enough; /* ~~ must be first line in formline */
10805 if (*t == '@' || *t == '^')
10809 sv_catpvn(stuff, s, eol-s);
10810 #ifndef PERL_STRICT_CR
10811 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10812 char *end = SvPVX(stuff) + SvCUR(stuff);
10815 SvCUR_set(stuff, SvCUR(stuff) - 1);
10824 s = filter_gets(PL_linestr, PL_rsfp, 0);
10825 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
10826 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
10827 PL_last_lop = PL_last_uni = NULL;
10836 if (SvCUR(stuff)) {
10839 PL_lex_state = LEX_NORMAL;
10840 NEXTVAL_NEXTTOKE.ival = 0;
10844 PL_lex_state = LEX_FORMLINE;
10846 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10848 else if (PL_encoding)
10849 sv_recode_to_utf8(stuff, PL_encoding);
10851 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10853 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10857 SvREFCNT_dec(stuff);
10859 PL_lex_formbrack = 0;
10870 PL_cshlen = strlen(PL_cshname);
10872 #if defined(USE_ITHREADS)
10873 PERL_UNUSED_CONTEXT;
10879 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10881 const I32 oldsavestack_ix = PL_savestack_ix;
10882 CV* const outsidecv = PL_compcv;
10885 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10887 SAVEI32(PL_subline);
10888 save_item(PL_subname);
10889 SAVESPTR(PL_compcv);
10891 PL_compcv = (CV*)newSV(0);
10892 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
10893 CvFLAGS(PL_compcv) |= flags;
10895 PL_subline = CopLINE(PL_curcop);
10896 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10897 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc_simple(outsidecv);
10898 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10899 #ifdef USE_5005THREADS
10900 CvOWNER(PL_compcv) = 0;
10901 New(666, CvMUTEXP(PL_compcv), 1, perl_mutex);
10902 MUTEX_INIT(CvMUTEXP(PL_compcv));
10903 #endif /* USE_5005THREADS */
10905 return oldsavestack_ix;
10909 #pragma segment Perl_yylex
10912 Perl_yywarn(pTHX_ char *s)
10914 PL_in_eval |= EVAL_WARNONLY;
10916 PL_in_eval &= ~EVAL_WARNONLY;
10921 Perl_yyerror(pTHX_ char *s)
10923 const char *where = NULL;
10924 const char *context = NULL;
10928 if (!yychar || (yychar == ';' && !PL_rsfp))
10930 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10931 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10932 PL_oldbufptr != PL_bufptr) {
10935 The code below is removed for NetWare because it abends/crashes on NetWare
10936 when the script has error such as not having the closing quotes like:
10937 if ($var eq "value)
10938 Checking of white spaces is anyway done in NetWare code.
10941 while (isSPACE(*PL_oldoldbufptr))
10944 context = PL_oldoldbufptr;
10945 contlen = PL_bufptr - PL_oldoldbufptr;
10947 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10948 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10951 The code below is removed for NetWare because it abends/crashes on NetWare
10952 when the script has error such as not having the closing quotes like:
10953 if ($var eq "value)
10954 Checking of white spaces is anyway done in NetWare code.
10957 while (isSPACE(*PL_oldbufptr))
10960 context = PL_oldbufptr;
10961 contlen = PL_bufptr - PL_oldbufptr;
10963 else if (yychar > 255)
10964 where = "next token ???";
10965 #ifdef USE_PURE_BISON
10966 /* GNU Bison sets the value -2 */
10967 else if (yychar == -2) {
10969 else if ((yychar & 127) == 127) {
10971 if (PL_lex_state == LEX_NORMAL ||
10972 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10973 where = "at end of line";
10974 else if (PL_lex_inpat)
10975 where = "within pattern";
10977 where = "within string";
10980 SV * const where_sv = sv_2mortal(newSVpvs("next char "));
10982 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10983 else if (isPRINT_LC(yychar))
10984 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
10986 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10987 where = SvPVX_const(where_sv);
10989 msg = sv_2mortal(newSVpv(s, 0));
10990 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10991 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10993 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10995 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10996 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10997 Perl_sv_catpvf(aTHX_ msg,
10998 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10999 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11002 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
11003 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, (void*)msg);
11006 if (PL_error_count >= 10) {
11007 if (PL_in_eval && SvCUR(ERRSV))
11008 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11009 (void*)ERRSV, OutCopFILE(PL_curcop));
11011 Perl_croak(aTHX_ "%s has too many errors.\n",
11012 OutCopFILE(PL_curcop));
11015 PL_in_my_stash = NULL;
11019 #pragma segment Main
11023 S_swallow_bom(pTHX_ U8 *s)
11025 const STRLEN slen = SvCUR(PL_linestr);
11028 if (s[1] == 0xFE) {
11029 /* UTF-16 little-endian? (or UTF32-LE?) */
11030 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11031 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
11032 #ifndef PERL_NO_UTF16_FILTER
11033 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
11036 if (PL_bufend > (char*)s) {
11040 filter_add(utf16rev_textfilter, NULL);
11041 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11042 utf16_to_utf8_reversed(s, news,
11043 PL_bufend - (char*)s - 1,
11045 sv_setpvn(PL_linestr, (const char*)news, newlen);
11047 SvUTF8_on(PL_linestr);
11048 s = (U8*)SvPVX(PL_linestr);
11049 PL_bufend = SvPVX(PL_linestr) + newlen;
11052 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
11057 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11058 #ifndef PERL_NO_UTF16_FILTER
11059 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11062 if (PL_bufend > (char *)s) {
11066 filter_add(utf16_textfilter, NULL);
11067 Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
11068 utf16_to_utf8(s, news,
11069 PL_bufend - (char*)s,
11071 sv_setpvn(PL_linestr, (const char*)news, newlen);
11073 SvUTF8_on(PL_linestr);
11074 s = (U8*)SvPVX(PL_linestr);
11075 PL_bufend = SvPVX(PL_linestr) + newlen;
11078 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
11083 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11084 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11085 s += 3; /* UTF-8 */
11091 if (s[2] == 0xFE && s[3] == 0xFF) {
11092 /* UTF-32 big-endian */
11093 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
11096 else if (s[2] == 0 && s[3] != 0) {
11099 * are a good indicator of UTF-16BE. */
11100 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11106 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11107 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11108 s += 4; /* UTF-8 */
11114 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11117 * are a good indicator of UTF-16LE. */
11118 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11127 * Restore a source filter.
11131 restore_rsfp(pTHX_ void *f)
11133 PerlIO * const fp = (PerlIO*)f;
11135 if (PL_rsfp == PerlIO_stdin())
11136 PerlIO_clearerr(PL_rsfp);
11137 else if (PL_rsfp && (PL_rsfp != fp))
11138 PerlIO_close(PL_rsfp);
11142 #ifndef PERL_NO_UTF16_FILTER
11144 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11146 const STRLEN old = SvCUR(sv);
11147 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11148 DEBUG_P(PerlIO_printf(Perl_debug_log,
11149 "utf16_textfilter(%p): %d %d (%d)\n",
11150 FPTR2DPTR(void *, utf16_textfilter),
11151 idx, maxlen, (int) count));
11155 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11156 Copy(SvPVX_const(sv), tmps, old, char);
11157 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11158 SvCUR(sv) - old, &newlen);
11159 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11161 DEBUG_P({sv_dump(sv);});
11166 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11168 const STRLEN old = SvCUR(sv);
11169 const I32 count = FILTER_READ(idx+1, sv, maxlen);
11170 DEBUG_P(PerlIO_printf(Perl_debug_log,
11171 "utf16rev_textfilter(%p): %d %d (%d)\n",
11172 FPTR2DPTR(void *, utf16rev_textfilter),
11173 idx, maxlen, (int) count));
11177 Newx(tmps, SvCUR(sv) * 3 / 2 + 1, U8);
11178 Copy(SvPVX_const(sv), tmps, old, char);
11179 utf16_to_utf8((U8*)SvPVX_const(sv) + old, tmps + old,
11180 SvCUR(sv) - old, &newlen);
11181 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
11183 DEBUG_P({ sv_dump(sv); });
11189 Returns a pointer to the next character after the parsed
11190 vstring, as well as updating the passed in sv.
11192 Function must be called like
11195 s = scan_vstring(s,sv);
11197 The sv should already be large enough to store the vstring
11198 passed in, for performance reasons.
11203 Perl_scan_vstring(pTHX_ char *s, SV *sv)
11205 const char *pos = s;
11206 const char *start = s;
11207 if (*pos == 'v') pos++; /* get past 'v' */
11208 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11210 if ( *pos != '.') {
11211 /* this may not be a v-string if followed by => */
11212 const char *next = pos;
11213 while (next < PL_bufend && isSPACE(*next))
11215 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
11216 /* return string not v-string */
11217 sv_setpvn(sv,(char *)s,pos-s);
11218 return (char *)pos;
11222 if (!isALPHA(*pos)) {
11223 U8 tmpbuf[UTF8_MAXBYTES+1];
11226 s++; /* get past 'v' */
11228 sv_setpvn(sv, "", 0);
11231 /* this is atoi() that tolerates underscores */
11234 const char *end = pos;
11236 while (--end >= s) {
11238 const UV orev = rev;
11239 rev += (*end - '0') * mult;
11241 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
11242 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
11243 "Integer overflow in decimal number");
11247 if (rev > 0x7FFFFFFF)
11248 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11250 /* Append native character for the rev point */
11251 tmpend = uvchr_to_utf8(tmpbuf, rev);
11252 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11253 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11255 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
11261 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
11265 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11273 * c-indentation-style: bsd
11274 * c-basic-offset: 4
11275 * indent-tabs-mode: t
11278 * ex: set ts=8 sts=4 sw=4 noet: