This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
integrate cfgperl changes into mainline
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4eb8286e 3 * Copyright (c) 1991-1999, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
ffb4593c
NT
14/* toke.c
15 *
16 * This file is the tokenizer for Perl. It's closely linked to the
17 * parser, perly.y.
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
d3b6f988
GS
26#define yychar PL_yychar
27#define yylval PL_yylval
28
fc36a67e 29static char ident_too_long[] = "Identifier too long";
8903cb82 30
51371543
GS
31static void restore_rsfp(pTHXo_ void *f);
32static void restore_expect(pTHXo_ void *e);
33static void restore_lex_expect(pTHXo_ void *e);
34
a0ed51b3 35#define UTF (PL_hints & HINT_UTF8)
834a4ddd
LW
36/*
37 * Note: we try to be careful never to call the isXXX_utf8() functions
38 * unless we're pretty sure we've seen the beginning of a UTF-8 character
39 * (that is, the two high bits are set). Otherwise we risk loading in the
40 * heavy-duty SWASHINIT and SWASHGET routines unnecessarily.
41 */
42#define isIDFIRST_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
43 ? isIDFIRST(*(p)) \
44 : isIDFIRST_utf8((U8*)p))
45#define isALNUM_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
46 ? isALNUM(*(p)) \
47 : isALNUM_utf8((U8*)p))
a0ed51b3 48
2b92dfce
GS
49/* In variables name $^X, these are the legal values for X.
50 * 1999-02-27 mjd-perl-patch@plover.com */
51#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a 58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff 60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
395c3793
LW
72#ifdef I_FCNTL
73#include <fcntl.h>
74#endif
fe14fcc3
LW
75#ifdef I_SYS_FILE
76#include <sys/file.h>
77#endif
395c3793 78
a790bc05 79/* XXX If this causes problems, set i_unistd=undef in the hint file. */
80#ifdef I_UNISTD
81# include <unistd.h> /* Needed for execv() */
82#endif
83
84
79072805
LW
85#ifdef ff_next
86#undef ff_next
d48672a2
LW
87#endif
88
a1a0e61e
TD
89#ifdef USE_PURE_BISON
90YYSTYPE* yylval_pointer = NULL;
91int* yychar_pointer = NULL;
22c35a8c
GS
92# undef yylval
93# undef yychar
e4bfbdd4
JH
94# define yylval (*yylval_pointer)
95# define yychar (*yychar_pointer)
96# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
97# undef yylex
98# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
99#endif
100
79072805 101#include "keywords.h"
fe14fcc3 102
ffb4593c
NT
103/* CLINE is a macro that ensures PL_copline has a sane value */
104
ae986130
LW
105#ifdef CLINE
106#undef CLINE
107#endif
3280af22
NIS
108#define CLINE (PL_copline = (PL_curcop->cop_line < PL_copline ? PL_curcop->cop_line : PL_copline))
109
ffb4593c
NT
110/*
111 * Convenience functions to return different tokens and prime the
112 * tokenizer for the next token. They all take an argument.
113 *
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
124 * FUN1 : not used
125 * BOop : bitwise or or xor
126 * BAop : bitwise and
127 * SHop : shift operator
128 * PWop : power operator
129 * PMop : matching operator
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
133 * Rop : relational operator <= != gt
134 *
135 * Also see LOP and lop() below.
136 */
137
3280af22
NIS
138#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
139#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
140#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
141#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
142#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
143#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
144#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
145#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
146#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
147#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
148#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
149#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
150#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
151#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
152#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
153#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
154#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
155#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
156#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
157#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 158
a687059c
LW
159/* This bit of chicanery makes a unary function followed by
160 * a parenthesis into a function with one argument, highest precedence.
161 */
2f3197b3 162#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
163 PL_expect = XTERM, \
164 PL_bufptr = s, \
165 PL_last_uni = PL_oldbufptr, \
166 PL_last_lop_op = f, \
a687059c
LW
167 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
168
79072805 169#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
79072805
LW
172 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
173
9f68db38 174/* grandfather return to old style */
3280af22 175#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 176
ffb4593c
NT
177/*
178 * S_ao
179 *
180 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
181 * into an OP_ANDASSIGN or OP_ORASSIGN
182 */
183
76e3520e 184STATIC int
cea2e8a9 185S_ao(pTHX_ int toketype)
a0d0e21e 186{
3280af22
NIS
187 if (*PL_bufptr == '=') {
188 PL_bufptr++;
a0d0e21e
LW
189 if (toketype == ANDAND)
190 yylval.ival = OP_ANDASSIGN;
191 else if (toketype == OROR)
192 yylval.ival = OP_ORASSIGN;
193 toketype = ASSIGNOP;
194 }
195 return toketype;
196}
197
ffb4593c
NT
198/*
199 * S_no_op
200 * When Perl expects an operator and finds something else, no_op
201 * prints the warning. It always prints "<something> found where
202 * operator expected. It prints "Missing semicolon on previous line?"
203 * if the surprise occurs at the start of the line. "do you need to
204 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
205 * where the compiler doesn't know if foo is a method call or a function.
206 * It prints "Missing operator before end of line" if there's nothing
207 * after the missing operator, or "... before <...>" if there is something
208 * after the missing operator.
209 */
210
76e3520e 211STATIC void
cea2e8a9 212S_no_op(pTHX_ char *what, char *s)
463ee0b2 213{
3280af22
NIS
214 char *oldbp = PL_bufptr;
215 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 216
376b8730 217 assert(s >= oldbp);
3280af22 218 PL_bufptr = s;
cea2e8a9 219 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 220 if (is_first)
cea2e8a9 221 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
834a4ddd 222 else if (PL_oldoldbufptr && isIDFIRST_lazy(PL_oldoldbufptr)) {
748a9306 223 char *t;
834a4ddd 224 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy(t) || *t == ':'); t++) ;
3280af22 225 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 226 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 227 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
228 }
229 else
cea2e8a9 230 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 231 PL_bufptr = oldbp;
8990e307
LW
232}
233
ffb4593c
NT
234/*
235 * S_missingterm
236 * Complain about missing quote/regexp/heredoc terminator.
237 * If it's called with (char *)NULL then it cauterizes the line buffer.
238 * If we're in a delimited string and the delimiter is a control
239 * character, it's reformatted into a two-char sequence like ^C.
240 * This is fatal.
241 */
242
76e3520e 243STATIC void
cea2e8a9 244S_missingterm(pTHX_ char *s)
8990e307
LW
245{
246 char tmpbuf[3];
247 char q;
248 if (s) {
249 char *nl = strrchr(s,'\n');
d2719217 250 if (nl)
8990e307
LW
251 *nl = '\0';
252 }
9d116dd7
JH
253 else if (
254#ifdef EBCDIC
255 iscntrl(PL_multi_close)
256#else
257 PL_multi_close < 32 || PL_multi_close == 127
258#endif
259 ) {
8990e307 260 *tmpbuf = '^';
3280af22 261 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
262 s = "\\n";
263 tmpbuf[2] = '\0';
264 s = tmpbuf;
265 }
266 else {
3280af22 267 *tmpbuf = PL_multi_close;
8990e307
LW
268 tmpbuf[1] = '\0';
269 s = tmpbuf;
270 }
271 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 272 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 273}
79072805 274
ffb4593c
NT
275/*
276 * Perl_deprecate
277 * Warns that something is deprecated. Duh.
278 */
279
79072805 280void
864dbfa3 281Perl_deprecate(pTHX_ char *s)
a0d0e21e 282{
d008e5eb 283 dTHR;
599cee73 284 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 285 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
286}
287
ffb4593c
NT
288/*
289 * depcom
290 * Deprecate a comma-less variable list. Called from three places
291 * in the tokenizer.
292 */
293
76e3520e 294STATIC void
cea2e8a9 295S_depcom(pTHX)
a0d0e21e
LW
296{
297 deprecate("comma-less variable list");
298}
299
ffb4593c
NT
300/*
301 * text filters for win32 carriage-returns, utf16-to-utf8 and
302 * utf16-to-utf8-reversed, whatever that is.
303 */
304
a868473f
NIS
305#ifdef WIN32
306
76e3520e 307STATIC I32
cea2e8a9 308S_win32_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f
NIS
309{
310 I32 count = FILTER_READ(idx+1, sv, maxlen);
311 if (count > 0 && !maxlen)
312 win32_strip_return(sv);
313 return count;
314}
315#endif
316
a0ed51b3 317STATIC I32
cea2e8a9 318S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
319{
320 I32 count = FILTER_READ(idx+1, sv, maxlen);
321 if (count) {
dfe13c55
GS
322 U8* tmps;
323 U8* tend;
324 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 325 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 326 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
327
328 }
329 return count;
330}
331
332STATIC I32
cea2e8a9 333S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
334{
335 I32 count = FILTER_READ(idx+1, sv, maxlen);
336 if (count) {
dfe13c55
GS
337 U8* tmps;
338 U8* tend;
339 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 340 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 341 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
342
343 }
344 return count;
345}
a868473f 346
ffb4593c
NT
347/*
348 * Perl_lex_start
349 * Initialize variables. Called by perl.c. It uses the Perl stack
350 * to save its state (for recursive calls to the parser).
351 */
352
a0d0e21e 353void
864dbfa3 354Perl_lex_start(pTHX_ SV *line)
79072805 355{
0f15f207 356 dTHR;
8990e307
LW
357 char *s;
358 STRLEN len;
359
3280af22
NIS
360 SAVEI32(PL_lex_dojoin);
361 SAVEI32(PL_lex_brackets);
362 SAVEI32(PL_lex_fakebrack);
363 SAVEI32(PL_lex_casemods);
364 SAVEI32(PL_lex_starts);
365 SAVEI32(PL_lex_state);
366 SAVESPTR(PL_lex_inpat);
367 SAVEI32(PL_lex_inwhat);
368 SAVEI16(PL_curcop->cop_line);
369 SAVEPPTR(PL_bufptr);
370 SAVEPPTR(PL_bufend);
371 SAVEPPTR(PL_oldbufptr);
372 SAVEPPTR(PL_oldoldbufptr);
373 SAVEPPTR(PL_linestart);
374 SAVESPTR(PL_linestr);
375 SAVEPPTR(PL_lex_brackstack);
376 SAVEPPTR(PL_lex_casestack);
51371543 377 SAVEDESTRUCTOR(restore_rsfp, PL_rsfp);
3280af22
NIS
378 SAVESPTR(PL_lex_stuff);
379 SAVEI32(PL_lex_defer);
380 SAVESPTR(PL_lex_repl);
51371543
GS
381 SAVEDESTRUCTOR(restore_expect, PL_tokenbuf + PL_expect); /* encode as pointer */
382 SAVEDESTRUCTOR(restore_lex_expect, PL_tokenbuf + PL_expect);
3280af22
NIS
383
384 PL_lex_state = LEX_NORMAL;
385 PL_lex_defer = 0;
386 PL_expect = XSTATE;
387 PL_lex_brackets = 0;
388 PL_lex_fakebrack = 0;
389 New(899, PL_lex_brackstack, 120, char);
390 New(899, PL_lex_casestack, 12, char);
391 SAVEFREEPV(PL_lex_brackstack);
392 SAVEFREEPV(PL_lex_casestack);
393 PL_lex_casemods = 0;
394 *PL_lex_casestack = '\0';
395 PL_lex_dojoin = 0;
396 PL_lex_starts = 0;
397 PL_lex_stuff = Nullsv;
398 PL_lex_repl = Nullsv;
399 PL_lex_inpat = 0;
400 PL_lex_inwhat = 0;
401 PL_linestr = line;
402 if (SvREADONLY(PL_linestr))
403 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
404 s = SvPV(PL_linestr, len);
8990e307 405 if (len && s[len-1] != ';') {
3280af22
NIS
406 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
407 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
408 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 409 }
3280af22
NIS
410 SvTEMP_off(PL_linestr);
411 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
412 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
413 SvREFCNT_dec(PL_rs);
79cb57f6 414 PL_rs = newSVpvn("\n", 1);
3280af22 415 PL_rsfp = 0;
79072805 416}
a687059c 417
ffb4593c
NT
418/*
419 * Perl_lex_end
420 * Tidy up. Called from pp_ctl.c in the sv_compile_2op(), doeval(),
421 * and pp_leaveeval() subroutines.
422 */
423
463ee0b2 424void
864dbfa3 425Perl_lex_end(pTHX)
463ee0b2 426{
3280af22 427 PL_doextract = FALSE;
463ee0b2
LW
428}
429
ffb4593c
NT
430/*
431 * S_incline
432 * This subroutine has nothing to do with tilting, whether at windmills
433 * or pinball tables. Its name is short for "increment line". It
434 * increments the current line number in PL_curcop->cop_line and checks
435 * to see whether the line starts with a comment of the form
436 * # line 500
437 * If so, it sets the current line number to the number in the comment.
438 */
439
76e3520e 440STATIC void
cea2e8a9 441S_incline(pTHX_ char *s)
463ee0b2 442{
0f15f207 443 dTHR;
463ee0b2
LW
444 char *t;
445 char *n;
446 char ch;
447 int sawline = 0;
448
3280af22 449 PL_curcop->cop_line++;
463ee0b2
LW
450 if (*s++ != '#')
451 return;
452 while (*s == ' ' || *s == '\t') s++;
453 if (strnEQ(s, "line ", 5)) {
454 s += 5;
455 sawline = 1;
456 }
457 if (!isDIGIT(*s))
458 return;
459 n = s;
460 while (isDIGIT(*s))
461 s++;
462 while (*s == ' ' || *s == '\t')
463 s++;
464 if (*s == '"' && (t = strchr(s+1, '"')))
465 s++;
466 else {
467 if (!sawline)
468 return; /* false alarm */
469 for (t = s; !isSPACE(*t); t++) ;
470 }
471 ch = *t;
472 *t = '\0';
473 if (t - s > 0)
3280af22 474 PL_curcop->cop_filegv = gv_fetchfile(s);
463ee0b2 475 else
3280af22 476 PL_curcop->cop_filegv = gv_fetchfile(PL_origfilename);
463ee0b2 477 *t = ch;
3280af22 478 PL_curcop->cop_line = atoi(n)-1;
463ee0b2
LW
479}
480
ffb4593c
NT
481/*
482 * S_skipspace
483 * Called to gobble the appropriate amount and type of whitespace.
484 * Skips comments as well.
485 */
486
76e3520e 487STATIC char *
cea2e8a9 488S_skipspace(pTHX_ register char *s)
a687059c 489{
11343788 490 dTHR;
3280af22
NIS
491 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
492 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
493 s++;
494 return s;
495 }
496 for (;;) {
fd049845 497 STRLEN prevlen;
60e6418e
GS
498 while (s < PL_bufend && isSPACE(*s)) {
499 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
500 incline(s);
501 }
ffb4593c
NT
502
503 /* comment */
3280af22
NIS
504 if (s < PL_bufend && *s == '#') {
505 while (s < PL_bufend && *s != '\n')
463ee0b2 506 s++;
60e6418e 507 if (s < PL_bufend) {
463ee0b2 508 s++;
60e6418e
GS
509 if (PL_in_eval && !PL_rsfp) {
510 incline(s);
511 continue;
512 }
513 }
463ee0b2 514 }
ffb4593c
NT
515
516 /* only continue to recharge the buffer if we're at the end
517 * of the buffer, we're not reading from a source filter, and
518 * we're in normal lexing mode
519 */
3280af22 520 if (s < PL_bufend || !PL_rsfp || PL_lex_state != LEX_NORMAL)
463ee0b2 521 return s;
ffb4593c
NT
522
523 /* try to recharge the buffer */
3280af22 524 if ((s = filter_gets(PL_linestr, PL_rsfp, (prevlen = SvCUR(PL_linestr)))) == Nullch) {
ffb4593c 525 /* end of file. Add on the -p or -n magic */
3280af22
NIS
526 if (PL_minus_n || PL_minus_p) {
527 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
528 ";}continue{print or die qq(-p destination: $!\\n)" :
529 "");
3280af22
NIS
530 sv_catpv(PL_linestr,";}");
531 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
532 }
533 else
3280af22 534 sv_setpv(PL_linestr,";");
ffb4593c
NT
535
536 /* reset variables for next time we lex */
3280af22
NIS
537 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
538 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
539
540 /* Close the filehandle. Could be from -P preprocessor,
541 * STDIN, or a regular file. If we were reading code from
542 * STDIN (because the commandline held no -e or filename)
543 * then we don't close it, we reset it so the code can
544 * read from STDIN too.
545 */
546
3280af22
NIS
547 if (PL_preprocess && !PL_in_eval)
548 (void)PerlProc_pclose(PL_rsfp);
549 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
550 PerlIO_clearerr(PL_rsfp);
8990e307 551 else
3280af22
NIS
552 (void)PerlIO_close(PL_rsfp);
553 PL_rsfp = Nullfp;
463ee0b2
LW
554 return s;
555 }
ffb4593c
NT
556
557 /* not at end of file, so we only read another line */
3280af22
NIS
558 PL_linestart = PL_bufptr = s + prevlen;
559 PL_bufend = s + SvCUR(PL_linestr);
560 s = PL_bufptr;
a0d0e21e 561 incline(s);
ffb4593c
NT
562
563 /* debugger active and we're not compiling the debugger code,
564 * so store the line into the debugger's array of lines
565 */
3280af22 566 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
567 SV *sv = NEWSV(85,0);
568
569 sv_upgrade(sv, SVt_PVMG);
3280af22
NIS
570 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
571 av_store(GvAV(PL_curcop->cop_filegv),(I32)PL_curcop->cop_line,sv);
8990e307 572 }
463ee0b2 573 }
a687059c 574}
378cc40b 575
ffb4593c
NT
576/*
577 * S_check_uni
578 * Check the unary operators to ensure there's no ambiguity in how they're
579 * used. An ambiguous piece of code would be:
580 * rand + 5
581 * This doesn't mean rand() + 5. Because rand() is a unary operator,
582 * the +5 is its argument.
583 */
584
76e3520e 585STATIC void
cea2e8a9 586S_check_uni(pTHX)
ba106d47 587{
2f3197b3 588 char *s;
a0d0e21e 589 char *t;
0453d815 590 dTHR;
2f3197b3 591
3280af22 592 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 593 return;
3280af22
NIS
594 while (isSPACE(*PL_last_uni))
595 PL_last_uni++;
834a4ddd 596 for (s = PL_last_uni; isALNUM_lazy(s) || *s == '-'; s++) ;
3280af22 597 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 598 return;
0453d815 599 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 600 char ch = *s;
0453d815
PM
601 *s = '\0';
602 Perl_warner(aTHX_ WARN_AMBIGUOUS,
603 "Warning: Use of \"%s\" without parens is ambiguous",
604 PL_last_uni);
605 *s = ch;
606 }
2f3197b3
LW
607}
608
ffb4593c
NT
609/* workaround to replace the UNI() macro with a function. Only the
610 * hints/uts.sh file mentions this. Other comments elsewhere in the
611 * source indicate Microport Unix might need it too.
612 */
613
ffed7fef
LW
614#ifdef CRIPPLED_CC
615
616#undef UNI
ffed7fef 617#define UNI(f) return uni(f,s)
ffed7fef 618
76e3520e 619STATIC int
cea2e8a9 620S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
621{
622 yylval.ival = f;
3280af22
NIS
623 PL_expect = XTERM;
624 PL_bufptr = s;
8f872242
NIS
625 PL_last_uni = PL_oldbufptr;
626 PL_last_lop_op = f;
ffed7fef
LW
627 if (*s == '(')
628 return FUNC1;
629 s = skipspace(s);
630 if (*s == '(')
631 return FUNC1;
632 else
633 return UNIOP;
634}
635
a0d0e21e
LW
636#endif /* CRIPPLED_CC */
637
ffb4593c
NT
638/*
639 * LOP : macro to build a list operator. Its behaviour has been replaced
640 * with a subroutine, S_lop() for which LOP is just another name.
641 */
642
a0d0e21e
LW
643#define LOP(f,x) return lop(f,x,s)
644
ffb4593c
NT
645/*
646 * S_lop
647 * Build a list operator (or something that might be one). The rules:
648 * - if we have a next token, then it's a list operator [why?]
649 * - if the next thing is an opening paren, then it's a function
650 * - else it's a list operator
651 */
652
76e3520e 653STATIC I32
cea2e8a9 654S_lop(pTHX_ I32 f, expectation x, char *s)
ffed7fef 655{
0f15f207 656 dTHR;
79072805 657 yylval.ival = f;
35c8bce7 658 CLINE;
3280af22
NIS
659 PL_expect = x;
660 PL_bufptr = s;
661 PL_last_lop = PL_oldbufptr;
662 PL_last_lop_op = f;
663 if (PL_nexttoke)
a0d0e21e 664 return LSTOP;
79072805
LW
665 if (*s == '(')
666 return FUNC;
667 s = skipspace(s);
668 if (*s == '(')
669 return FUNC;
670 else
671 return LSTOP;
672}
673
ffb4593c
NT
674/*
675 * S_force_next
676 * When the tokenizer realizes it knows the next token (for instance,
677 * it is reordering tokens for the parser) then it can call S_force_next
678 * to make the current token be the next one. It will also set
679 * PL_nextval, and possibly PL_expect to ensure the lexer handles the
680 * token correctly.
681 */
682
76e3520e 683STATIC void
cea2e8a9 684S_force_next(pTHX_ I32 type)
79072805 685{
3280af22
NIS
686 PL_nexttype[PL_nexttoke] = type;
687 PL_nexttoke++;
688 if (PL_lex_state != LEX_KNOWNEXT) {
689 PL_lex_defer = PL_lex_state;
690 PL_lex_expect = PL_expect;
691 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
692 }
693}
694
ffb4593c
NT
695/*
696 * S_force_word
697 * When the lexer knows the next thing is a word (for instance, it has
698 * just seen -> and it knows that the next char is a word char, then
699 * it calls S_force_word to stick the next word into the PL_next lookahead.
700 *
701 * Arguments:
702 * char *start : start of the buffer
703 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
704 * int check_keyword : if true, Perl checks to make sure the word isn't
705 * a keyword (do this if the word is a label, e.g. goto FOO)
706 * int allow_pack : if true, : characters will also be allowed (require,
707 * use, etc. do this)
708 * int allow_initial_tick : used by the "sub" tokenizer only.
709 */
710
76e3520e 711STATIC char *
cea2e8a9 712S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 713{
463ee0b2
LW
714 register char *s;
715 STRLEN len;
716
717 start = skipspace(start);
718 s = start;
834a4ddd 719 if (isIDFIRST_lazy(s) ||
a0d0e21e 720 (allow_pack && *s == ':') ||
15f0808c 721 (allow_initial_tick && *s == '\'') )
a0d0e21e 722 {
3280af22
NIS
723 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
724 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
725 return start;
726 if (token == METHOD) {
727 s = skipspace(s);
728 if (*s == '(')
3280af22 729 PL_expect = XTERM;
463ee0b2 730 else {
3280af22 731 PL_expect = XOPERATOR;
463ee0b2 732 }
79072805 733 }
3280af22
NIS
734 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
735 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
736 force_next(token);
737 }
738 return s;
739}
740
ffb4593c
NT
741/*
742 * S_force_ident
743 * Called when the tokenizer wants $foo *foo &foo etc, but the program
744 * text only contains the "foo" portion. The first argument is a pointer
745 * to the "foo", and the second argument is the type symbol to prefix.
746 * Forces the next token to be a "WORD".
747 * Creates the symbol if it didn't already exist (through the gv_fetchpv
748 * call).
749 */
750
76e3520e 751STATIC void
cea2e8a9 752S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
753{
754 if (s && *s) {
11343788 755 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 756 PL_nextval[PL_nexttoke].opval = o;
79072805 757 force_next(WORD);
748a9306 758 if (kind) {
e858de61 759 dTHR; /* just for in_eval */
11343788 760 o->op_private = OPpCONST_ENTERED;
55497cff 761 /* XXX see note in pp_entereval() for why we forgo typo
762 warnings if the symbol must be introduced in an eval.
763 GSAR 96-10-12 */
3280af22 764 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
765 kind == '$' ? SVt_PV :
766 kind == '@' ? SVt_PVAV :
767 kind == '%' ? SVt_PVHV :
768 SVt_PVGV
769 );
748a9306 770 }
79072805
LW
771 }
772}
773
ffb4593c
NT
774/*
775 * S_force_version
776 * Forces the next token to be a version number.
777 */
778
76e3520e 779STATIC char *
cea2e8a9 780S_force_version(pTHX_ char *s)
89bfa8cd 781{
782 OP *version = Nullop;
783
784 s = skipspace(s);
785
786 /* default VERSION number -- GBARR */
787
788 if(isDIGIT(*s)) {
789 char *d;
790 int c;
55497cff 791 for( d=s, c = 1; isDIGIT(*d) || *d == '_' || (*d == '.' && c--); d++);
89bfa8cd 792 if((*d == ';' || isSPACE(*d)) && *(skipspace(d)) != ',') {
793 s = scan_num(s);
794 /* real VERSION number -- GBARR */
795 version = yylval.opval;
796 }
797 }
798
799 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 800 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd 801 force_next(WORD);
802
803 return (s);
804}
805
ffb4593c
NT
806/*
807 * S_tokeq
808 * Tokenize a quoted string passed in as an SV. It finds the next
809 * chunk, up to end of string or a backslash. It may make a new
810 * SV containing that chunk (if HINT_NEW_STRING is on). It also
811 * turns \\ into \.
812 */
813
76e3520e 814STATIC SV *
cea2e8a9 815S_tokeq(pTHX_ SV *sv)
79072805
LW
816{
817 register char *s;
818 register char *send;
819 register char *d;
b3ac6de7
IZ
820 STRLEN len = 0;
821 SV *pv = sv;
79072805
LW
822
823 if (!SvLEN(sv))
b3ac6de7 824 goto finish;
79072805 825
a0d0e21e 826 s = SvPV_force(sv, len);
748a9306 827 if (SvIVX(sv) == -1)
b3ac6de7 828 goto finish;
463ee0b2 829 send = s + len;
79072805
LW
830 while (s < send && *s != '\\')
831 s++;
832 if (s == send)
b3ac6de7 833 goto finish;
79072805 834 d = s;
3280af22 835 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 836 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
837 while (s < send) {
838 if (*s == '\\') {
a0d0e21e 839 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
840 s++; /* all that, just for this */
841 }
842 *d++ = *s++;
843 }
844 *d = '\0';
463ee0b2 845 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 846 finish:
3280af22 847 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 848 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
849 return sv;
850}
851
ffb4593c
NT
852/*
853 * Now come three functions related to double-quote context,
854 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
855 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
856 * interact with PL_lex_state, and create fake ( ... ) argument lists
857 * to handle functions and concatenation.
858 * They assume that whoever calls them will be setting up a fake
859 * join call, because each subthing puts a ',' after it. This lets
860 * "lower \luPpEr"
861 * become
862 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
863 *
864 * (I'm not sure whether the spurious commas at the end of lcfirst's
865 * arguments and join's arguments are created or not).
866 */
867
868/*
869 * S_sublex_start
870 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
871 *
872 * Pattern matching will set PL_lex_op to the pattern-matching op to
873 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
874 *
875 * OP_CONST and OP_READLINE are easy--just make the new op and return.
876 *
877 * Everything else becomes a FUNC.
878 *
879 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
880 * had an OP_CONST or OP_READLINE). This just sets us up for a
881 * call to S_sublex_push().
882 */
883
76e3520e 884STATIC I32
cea2e8a9 885S_sublex_start(pTHX)
79072805
LW
886{
887 register I32 op_type = yylval.ival;
79072805
LW
888
889 if (op_type == OP_NULL) {
3280af22
NIS
890 yylval.opval = PL_lex_op;
891 PL_lex_op = Nullop;
79072805
LW
892 return THING;
893 }
894 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 895 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
896
897 if (SvTYPE(sv) == SVt_PVIV) {
898 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
899 STRLEN len;
900 char *p;
901 SV *nsv;
902
903 p = SvPV(sv, len);
79cb57f6 904 nsv = newSVpvn(p, len);
b3ac6de7
IZ
905 SvREFCNT_dec(sv);
906 sv = nsv;
907 }
908 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 909 PL_lex_stuff = Nullsv;
79072805
LW
910 return THING;
911 }
912
3280af22
NIS
913 PL_sublex_info.super_state = PL_lex_state;
914 PL_sublex_info.sub_inwhat = op_type;
915 PL_sublex_info.sub_op = PL_lex_op;
916 PL_lex_state = LEX_INTERPPUSH;
55497cff 917
3280af22
NIS
918 PL_expect = XTERM;
919 if (PL_lex_op) {
920 yylval.opval = PL_lex_op;
921 PL_lex_op = Nullop;
55497cff 922 return PMFUNC;
923 }
924 else
925 return FUNC;
926}
927
ffb4593c
NT
928/*
929 * S_sublex_push
930 * Create a new scope to save the lexing state. The scope will be
931 * ended in S_sublex_done. Returns a '(', starting the function arguments
932 * to the uc, lc, etc. found before.
933 * Sets PL_lex_state to LEX_INTERPCONCAT.
934 */
935
76e3520e 936STATIC I32
cea2e8a9 937S_sublex_push(pTHX)
55497cff 938{
0f15f207 939 dTHR;
f46d017c 940 ENTER;
55497cff 941
3280af22
NIS
942 PL_lex_state = PL_sublex_info.super_state;
943 SAVEI32(PL_lex_dojoin);
944 SAVEI32(PL_lex_brackets);
945 SAVEI32(PL_lex_fakebrack);
946 SAVEI32(PL_lex_casemods);
947 SAVEI32(PL_lex_starts);
948 SAVEI32(PL_lex_state);
949 SAVESPTR(PL_lex_inpat);
950 SAVEI32(PL_lex_inwhat);
951 SAVEI16(PL_curcop->cop_line);
952 SAVEPPTR(PL_bufptr);
953 SAVEPPTR(PL_oldbufptr);
954 SAVEPPTR(PL_oldoldbufptr);
955 SAVEPPTR(PL_linestart);
956 SAVESPTR(PL_linestr);
957 SAVEPPTR(PL_lex_brackstack);
958 SAVEPPTR(PL_lex_casestack);
959
960 PL_linestr = PL_lex_stuff;
961 PL_lex_stuff = Nullsv;
962
963 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
964 PL_bufend += SvCUR(PL_linestr);
965 SAVEFREESV(PL_linestr);
966
967 PL_lex_dojoin = FALSE;
968 PL_lex_brackets = 0;
969 PL_lex_fakebrack = 0;
970 New(899, PL_lex_brackstack, 120, char);
971 New(899, PL_lex_casestack, 12, char);
972 SAVEFREEPV(PL_lex_brackstack);
973 SAVEFREEPV(PL_lex_casestack);
974 PL_lex_casemods = 0;
975 *PL_lex_casestack = '\0';
976 PL_lex_starts = 0;
977 PL_lex_state = LEX_INTERPCONCAT;
978 PL_curcop->cop_line = PL_multi_start;
979
980 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
981 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
982 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 983 else
3280af22 984 PL_lex_inpat = Nullop;
79072805 985
55497cff 986 return '(';
79072805
LW
987}
988
ffb4593c
NT
989/*
990 * S_sublex_done
991 * Restores lexer state after a S_sublex_push.
992 */
993
76e3520e 994STATIC I32
cea2e8a9 995S_sublex_done(pTHX)
79072805 996{
3280af22
NIS
997 if (!PL_lex_starts++) {
998 PL_expect = XOPERATOR;
79cb57f6 999 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1000 return THING;
1001 }
1002
3280af22
NIS
1003 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1004 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1005 return yylex();
79072805
LW
1006 }
1007
ffb4593c 1008 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1009 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1010 PL_linestr = PL_lex_repl;
1011 PL_lex_inpat = 0;
1012 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1013 PL_bufend += SvCUR(PL_linestr);
1014 SAVEFREESV(PL_linestr);
1015 PL_lex_dojoin = FALSE;
1016 PL_lex_brackets = 0;
1017 PL_lex_fakebrack = 0;
1018 PL_lex_casemods = 0;
1019 *PL_lex_casestack = '\0';
1020 PL_lex_starts = 0;
25da4f38 1021 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1022 PL_lex_state = LEX_INTERPNORMAL;
1023 PL_lex_starts++;
e9fa98b2
HS
1024 /* we don't clear PL_lex_repl here, so that we can check later
1025 whether this is an evalled subst; that means we rely on the
1026 logic to ensure sublex_done() is called again only via the
1027 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1028 }
e9fa98b2 1029 else {
3280af22 1030 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1031 PL_lex_repl = Nullsv;
1032 }
79072805 1033 return ',';
ffed7fef
LW
1034 }
1035 else {
f46d017c 1036 LEAVE;
3280af22
NIS
1037 PL_bufend = SvPVX(PL_linestr);
1038 PL_bufend += SvCUR(PL_linestr);
1039 PL_expect = XOPERATOR;
79072805 1040 return ')';
ffed7fef
LW
1041 }
1042}
1043
02aa26ce
NT
1044/*
1045 scan_const
1046
1047 Extracts a pattern, double-quoted string, or transliteration. This
1048 is terrifying code.
1049
3280af22
NIS
1050 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1051 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1052 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1053
9b599b2a
GS
1054 Returns a pointer to the character scanned up to. Iff this is
1055 advanced from the start pointer supplied (ie if anything was
1056 successfully parsed), will leave an OP for the substring scanned
1057 in yylval. Caller must intuit reason for not parsing further
1058 by looking at the next characters herself.
1059
02aa26ce
NT
1060 In patterns:
1061 backslashes:
1062 double-quoted style: \r and \n
1063 regexp special ones: \D \s
1064 constants: \x3
1065 backrefs: \1 (deprecated in substitution replacements)
1066 case and quoting: \U \Q \E
1067 stops on @ and $, but not for $ as tail anchor
1068
1069 In transliterations:
1070 characters are VERY literal, except for - not at the start or end
1071 of the string, which indicates a range. scan_const expands the
1072 range to the full set of intermediate characters.
1073
1074 In double-quoted strings:
1075 backslashes:
1076 double-quoted style: \r and \n
1077 constants: \x3
1078 backrefs: \1 (deprecated)
1079 case and quoting: \U \Q \E
1080 stops on @ and $
1081
1082 scan_const does *not* construct ops to handle interpolated strings.
1083 It stops processing as soon as it finds an embedded $ or @ variable
1084 and leaves it to the caller to work out what's going on.
1085
1086 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1087
1088 $ in pattern could be $foo or could be tail anchor. Assumption:
1089 it's a tail anchor if $ is the last thing in the string, or if it's
1090 followed by one of ")| \n\t"
1091
1092 \1 (backreferences) are turned into $1
1093
1094 The structure of the code is
1095 while (there's a character to process) {
1096 handle transliteration ranges
1097 skip regexp comments
1098 skip # initiated comments in //x patterns
1099 check for embedded @foo
1100 check for embedded scalars
1101 if (backslash) {
1102 leave intact backslashes from leave (below)
1103 deprecate \1 in strings and sub replacements
1104 handle string-changing backslashes \l \U \Q \E, etc.
1105 switch (what was escaped) {
1106 handle - in a transliteration (becomes a literal -)
1107 handle \132 octal characters
1108 handle 0x15 hex characters
1109 handle \cV (control V)
1110 handle printf backslashes (\f, \r, \n, etc)
1111 } (end switch)
1112 } (end if backslash)
1113 } (end while character to read)
1114
1115*/
1116
76e3520e 1117STATIC char *
cea2e8a9 1118S_scan_const(pTHX_ char *start)
79072805 1119{
3280af22 1120 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1121 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1122 register char *s = start; /* start of the constant */
1123 register char *d = SvPVX(sv); /* destination for copies */
1124 bool dorange = FALSE; /* are we in a translit range? */
1125 I32 len; /* ? */
ac2262e3 1126 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1127 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1128 : UTF;
ac2262e3 1129 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1130 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
1131 : UTF;
9b599b2a 1132 /* leaveit is the set of acceptably-backslashed characters */
72aaf631 1133 char *leaveit =
3280af22 1134 PL_lex_inpat
a0ed51b3 1135 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1136 : "";
79072805
LW
1137
1138 while (s < send || dorange) {
02aa26ce 1139 /* get transliterations out of the way (they're most literal) */
3280af22 1140 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1141 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1142 if (dorange) {
02aa26ce 1143 I32 i; /* current expanded character */
8ada0baa 1144 I32 min; /* first character in range */
02aa26ce
NT
1145 I32 max; /* last character in range */
1146
1147 i = d - SvPVX(sv); /* remember current offset */
1148 SvGROW(sv, SvLEN(sv) + 256); /* expand the sv -- there'll never be more'n 256 chars in a range for it to grow by */
1149 d = SvPVX(sv) + i; /* restore d after the grow potentially has changed the ptr */
1150 d -= 2; /* eat the first char and the - */
1151
8ada0baa
JH
1152 min = (U8)*d; /* first char in range */
1153 max = (U8)d[1]; /* last char in range */
1154
1155#ifndef ASCIIish
1156 if ((isLOWER(min) && isLOWER(max)) ||
1157 (isUPPER(min) && isUPPER(max))) {
1158 if (isLOWER(min)) {
1159 for (i = min; i <= max; i++)
1160 if (isLOWER(i))
1161 *d++ = i;
1162 } else {
1163 for (i = min; i <= max; i++)
1164 if (isUPPER(i))
1165 *d++ = i;
1166 }
1167 }
1168 else
1169#endif
1170 for (i = min; i <= max; i++)
1171 *d++ = i;
02aa26ce
NT
1172
1173 /* mark the range as done, and continue */
79072805
LW
1174 dorange = FALSE;
1175 continue;
1176 }
02aa26ce
NT
1177
1178 /* range begins (ignore - as first or last char) */
79072805 1179 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1180 if (utf) {
a176fa2a 1181 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1182 s++;
1183 continue;
1184 }
79072805
LW
1185 dorange = TRUE;
1186 s++;
1187 }
1188 }
02aa26ce
NT
1189
1190 /* if we get here, we're not doing a transliteration */
1191
0f5d15d6
IZ
1192 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1193 except for the last char, which will be done separately. */
3280af22 1194 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1195 if (s[2] == '#') {
1196 while (s < send && *s != ')')
1197 *d++ = *s++;
0f5d15d6
IZ
1198 } else if (s[2] == '{'
1199 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1200 I32 count = 1;
0f5d15d6 1201 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1202 char c;
1203
d9f97599
GS
1204 while (count && (c = *regparse)) {
1205 if (c == '\\' && regparse[1])
1206 regparse++;
cc6b7395
IZ
1207 else if (c == '{')
1208 count++;
1209 else if (c == '}')
1210 count--;
d9f97599 1211 regparse++;
cc6b7395 1212 }
5bdf89e7
IZ
1213 if (*regparse != ')') {
1214 regparse--; /* Leave one char for continuation. */
cc6b7395 1215 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1216 }
0f5d15d6 1217 while (s < regparse)
cc6b7395
IZ
1218 *d++ = *s++;
1219 }
748a9306 1220 }
02aa26ce
NT
1221
1222 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1223 else if (*s == '#' && PL_lex_inpat &&
1224 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1225 while (s+1 < send && *s != '\n')
1226 *d++ = *s++;
1227 }
02aa26ce
NT
1228
1229 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
834a4ddd 1230 else if (*s == '@' && s[1] && (isALNUM_lazy(s+1) || strchr(":'{$", s[1])))
79072805 1231 break;
02aa26ce
NT
1232
1233 /* check for embedded scalars. only stop if we're sure it's a
1234 variable.
1235 */
79072805 1236 else if (*s == '$') {
3280af22 1237 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1238 break;
c277df42 1239 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1240 break; /* in regexp, $ might be tail anchor */
1241 }
02aa26ce 1242
a0ed51b3
LW
1243 /* (now in tr/// code again) */
1244
d008e5eb
GS
1245 if (*s & 0x80 && thisutf) {
1246 dTHR; /* only for ckWARN */
1247 if (ckWARN(WARN_UTF8)) {
dfe13c55 1248 (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
d008e5eb
GS
1249 if (len) {
1250 while (len--)
1251 *d++ = *s++;
1252 continue;
1253 }
a0ed51b3
LW
1254 }
1255 }
1256
02aa26ce 1257 /* backslashes */
79072805
LW
1258 if (*s == '\\' && s+1 < send) {
1259 s++;
02aa26ce
NT
1260
1261 /* some backslashes we leave behind */
c9f97d15 1262 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1263 *d++ = '\\';
1264 *d++ = *s++;
1265 continue;
1266 }
02aa26ce
NT
1267
1268 /* deprecate \1 in strings and substitution replacements */
3280af22 1269 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1270 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1271 {
d008e5eb 1272 dTHR; /* only for ckWARN */
599cee73 1273 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1274 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1275 *--s = '$';
1276 break;
1277 }
02aa26ce
NT
1278
1279 /* string-change backslash escapes */
3280af22 1280 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1281 --s;
1282 break;
1283 }
02aa26ce
NT
1284
1285 /* if we get here, it's either a quoted -, or a digit */
79072805 1286 switch (*s) {
02aa26ce
NT
1287
1288 /* quoted - in transliterations */
79072805 1289 case '-':
3280af22 1290 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1291 *d++ = *s++;
1292 continue;
1293 }
1294 /* FALL THROUGH */
1295 default:
11b8faa4
JH
1296 {
1297 dTHR;
1298 if (ckWARN(WARN_UNSAFE) && isALPHA(*s))
cea2e8a9 1299 Perl_warner(aTHX_ WARN_UNSAFE,
11b8faa4
JH
1300 "Unrecognized escape \\%c passed through",
1301 *s);
1302 /* default action is to copy the quoted character */
1303 *d++ = *s++;
1304 continue;
1305 }
02aa26ce
NT
1306
1307 /* \132 indicates an octal constant */
79072805
LW
1308 case '0': case '1': case '2': case '3':
1309 case '4': case '5': case '6': case '7':
1310 *d++ = scan_oct(s, 3, &len);
1311 s += len;
1312 continue;
02aa26ce
NT
1313
1314 /* \x24 indicates a hex constant */
79072805 1315 case 'x':
a0ed51b3
LW
1316 ++s;
1317 if (*s == '{') {
1318 char* e = strchr(s, '}');
1319
adaeee49 1320 if (!e) {
a0ed51b3 1321 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1322 e = s;
1323 }
d008e5eb
GS
1324 if (!utf) {
1325 dTHR;
1326 if (ckWARN(WARN_UTF8))
cea2e8a9 1327 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1328 "Use of \\x{} without utf8 declaration");
1329 }
a0ed51b3 1330 /* note: utf always shorter than hex */
dfe13c55
GS
1331 d = (char*)uv_to_utf8((U8*)d,
1332 scan_hex(s + 1, e - s - 1, &len));
a0ed51b3 1333 s = e + 1;
a0ed51b3
LW
1334 }
1335 else {
1336 UV uv = (UV)scan_hex(s, 2, &len);
1337 if (utf && PL_lex_inwhat == OP_TRANS &&
1338 utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1339 {
dfe13c55 1340 d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
a0ed51b3
LW
1341 }
1342 else {
d008e5eb
GS
1343 if (uv >= 127 && UTF) {
1344 dTHR;
1345 if (ckWARN(WARN_UTF8))
cea2e8a9 1346 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1347 "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
1348 len,s,len,s);
1349 }
a0ed51b3
LW
1350 *d++ = (char)uv;
1351 }
1352 s += len;
1353 }
79072805 1354 continue;
02aa26ce
NT
1355
1356 /* \c is a control character */
79072805
LW
1357 case 'c':
1358 s++;
9d116dd7
JH
1359#ifdef EBCDIC
1360 *d = *s++;
1361 if (isLOWER(*d))
1362 *d = toUPPER(*d);
1363 *d++ = toCTRL(*d);
1364#else
bbce6d69 1365 len = *s++;
1366 *d++ = toCTRL(len);
9d116dd7 1367#endif
79072805 1368 continue;
02aa26ce
NT
1369
1370 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1371 case 'b':
1372 *d++ = '\b';
1373 break;
1374 case 'n':
1375 *d++ = '\n';
1376 break;
1377 case 'r':
1378 *d++ = '\r';
1379 break;
1380 case 'f':
1381 *d++ = '\f';
1382 break;
1383 case 't':
1384 *d++ = '\t';
1385 break;
34a3fe2a
PP
1386#ifdef EBCDIC
1387 case 'e':
1388 *d++ = '\047'; /* CP 1047 */
1389 break;
1390 case 'a':
1391 *d++ = '\057'; /* CP 1047 */
1392 break;
1393#else
79072805
LW
1394 case 'e':
1395 *d++ = '\033';
1396 break;
1397 case 'a':
1398 *d++ = '\007';
1399 break;
34a3fe2a 1400#endif
02aa26ce
NT
1401 } /* end switch */
1402
79072805
LW
1403 s++;
1404 continue;
02aa26ce
NT
1405 } /* end if (backslash) */
1406
79072805 1407 *d++ = *s++;
02aa26ce
NT
1408 } /* while loop to process each character */
1409
1410 /* terminate the string and set up the sv */
79072805 1411 *d = '\0';
463ee0b2 1412 SvCUR_set(sv, d - SvPVX(sv));
79072805
LW
1413 SvPOK_on(sv);
1414
02aa26ce 1415 /* shrink the sv if we allocated more than we used */
79072805
LW
1416 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1417 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1418 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1419 }
02aa26ce 1420
9b599b2a 1421 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1422 if (s > PL_bufptr) {
1423 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1424 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1425 sv, Nullsv,
3280af22 1426 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1427 ? "tr"
3280af22 1428 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1429 ? "s"
1430 : "qq")));
79072805 1431 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1432 } else
8990e307 1433 SvREFCNT_dec(sv);
79072805
LW
1434 return s;
1435}
1436
ffb4593c
NT
1437/* S_intuit_more
1438 * Returns TRUE if there's more to the expression (e.g., a subscript),
1439 * FALSE otherwise.
1440 * This is the one truly awful dwimmer necessary to conflate C and sed.
1441 *
1442 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1443 *
1444 * ->[ and ->{ return TRUE
1445 * { and [ outside a pattern are always subscripts, so return TRUE
1446 * if we're outside a pattern and it's not { or [, then return FALSE
1447 * if we're in a pattern and the first char is a {
1448 * {4,5} (any digits around the comma) returns FALSE
1449 * if we're in a pattern and the first char is a [
1450 * [] returns FALSE
1451 * [SOMETHING] has a funky algorithm to decide whether it's a
1452 * character class or not. It has to deal with things like
1453 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1454 * anything else returns TRUE
1455 */
1456
76e3520e 1457STATIC int
cea2e8a9 1458S_intuit_more(pTHX_ register char *s)
79072805 1459{
3280af22 1460 if (PL_lex_brackets)
79072805
LW
1461 return TRUE;
1462 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1463 return TRUE;
1464 if (*s != '{' && *s != '[')
1465 return FALSE;
3280af22 1466 if (!PL_lex_inpat)
79072805
LW
1467 return TRUE;
1468
1469 /* In a pattern, so maybe we have {n,m}. */
1470 if (*s == '{') {
1471 s++;
1472 if (!isDIGIT(*s))
1473 return TRUE;
1474 while (isDIGIT(*s))
1475 s++;
1476 if (*s == ',')
1477 s++;
1478 while (isDIGIT(*s))
1479 s++;
1480 if (*s == '}')
1481 return FALSE;
1482 return TRUE;
1483
1484 }
1485
1486 /* On the other hand, maybe we have a character class */
1487
1488 s++;
1489 if (*s == ']' || *s == '^')
1490 return FALSE;
1491 else {
ffb4593c 1492 /* this is terrifying, and it works */
79072805
LW
1493 int weight = 2; /* let's weigh the evidence */
1494 char seen[256];
f27ffc4a 1495 unsigned char un_char = 255, last_un_char;
93a17b20 1496 char *send = strchr(s,']');
3280af22 1497 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1498
1499 if (!send) /* has to be an expression */
1500 return TRUE;
1501
1502 Zero(seen,256,char);
1503 if (*s == '$')
1504 weight -= 3;
1505 else if (isDIGIT(*s)) {
1506 if (s[1] != ']') {
1507 if (isDIGIT(s[1]) && s[2] == ']')
1508 weight -= 10;
1509 }
1510 else
1511 weight -= 100;
1512 }
1513 for (; s < send; s++) {
1514 last_un_char = un_char;
1515 un_char = (unsigned char)*s;
1516 switch (*s) {
1517 case '@':
1518 case '&':
1519 case '$':
1520 weight -= seen[un_char] * 10;
834a4ddd 1521 if (isALNUM_lazy(s+1)) {
8903cb82 1522 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1523 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1524 weight -= 100;
1525 else
1526 weight -= 10;
1527 }
1528 else if (*s == '$' && s[1] &&
93a17b20
LW
1529 strchr("[#!%*<>()-=",s[1])) {
1530 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1531 weight -= 10;
1532 else
1533 weight -= 1;
1534 }
1535 break;
1536 case '\\':
1537 un_char = 254;
1538 if (s[1]) {
93a17b20 1539 if (strchr("wds]",s[1]))
79072805
LW
1540 weight += 100;
1541 else if (seen['\''] || seen['"'])
1542 weight += 1;
93a17b20 1543 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1544 weight += 40;
1545 else if (isDIGIT(s[1])) {
1546 weight += 40;
1547 while (s[1] && isDIGIT(s[1]))
1548 s++;
1549 }
1550 }
1551 else
1552 weight += 100;
1553 break;
1554 case '-':
1555 if (s[1] == '\\')
1556 weight += 50;
93a17b20 1557 if (strchr("aA01! ",last_un_char))
79072805 1558 weight += 30;
93a17b20 1559 if (strchr("zZ79~",s[1]))
79072805 1560 weight += 30;
f27ffc4a
GS
1561 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1562 weight -= 5; /* cope with negative subscript */
79072805
LW
1563 break;
1564 default:
93a17b20 1565 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1566 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1567 char *d = tmpbuf;
1568 while (isALPHA(*s))
1569 *d++ = *s++;
1570 *d = '\0';
1571 if (keyword(tmpbuf, d - tmpbuf))
1572 weight -= 150;
1573 }
1574 if (un_char == last_un_char + 1)
1575 weight += 5;
1576 weight -= seen[un_char];
1577 break;
1578 }
1579 seen[un_char]++;
1580 }
1581 if (weight >= 0) /* probably a character class */
1582 return FALSE;
1583 }
1584
1585 return TRUE;
1586}
ffed7fef 1587
ffb4593c
NT
1588/*
1589 * S_intuit_method
1590 *
1591 * Does all the checking to disambiguate
1592 * foo bar
1593 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1594 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1595 *
1596 * First argument is the stuff after the first token, e.g. "bar".
1597 *
1598 * Not a method if bar is a filehandle.
1599 * Not a method if foo is a subroutine prototyped to take a filehandle.
1600 * Not a method if it's really "Foo $bar"
1601 * Method if it's "foo $bar"
1602 * Not a method if it's really "print foo $bar"
1603 * Method if it's really "foo package::" (interpreted as package->foo)
1604 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
1605 * Not a method if bar is a filehandle or package, but is quotd with
1606 * =>
1607 */
1608
76e3520e 1609STATIC int
cea2e8a9 1610S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1611{
1612 char *s = start + (*start == '$');
3280af22 1613 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1614 STRLEN len;
1615 GV* indirgv;
1616
1617 if (gv) {
b6c543e3 1618 CV *cv;
a0d0e21e
LW
1619 if (GvIO(gv))
1620 return 0;
b6c543e3
IZ
1621 if ((cv = GvCVu(gv))) {
1622 char *proto = SvPVX(cv);
1623 if (proto) {
1624 if (*proto == ';')
1625 proto++;
1626 if (*proto == '*')
1627 return 0;
1628 }
1629 } else
a0d0e21e
LW
1630 gv = 0;
1631 }
8903cb82 1632 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1633 /* start is the beginning of the possible filehandle/object,
1634 * and s is the end of it
1635 * tmpbuf is a copy of it
1636 */
1637
a0d0e21e 1638 if (*start == '$') {
3280af22 1639 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1640 return 0;
1641 s = skipspace(s);
3280af22
NIS
1642 PL_bufptr = start;
1643 PL_expect = XREF;
a0d0e21e
LW
1644 return *s == '(' ? FUNCMETH : METHOD;
1645 }
1646 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1647 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1648 len -= 2;
1649 tmpbuf[len] = '\0';
1650 goto bare_package;
1651 }
1652 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1653 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1654 return 0;
1655 /* filehandle or package name makes it a method */
89bfa8cd 1656 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1657 s = skipspace(s);
3280af22 1658 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1659 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1660 bare_package:
3280af22 1661 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1662 newSVpvn(tmpbuf,len));
3280af22
NIS
1663 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1664 PL_expect = XTERM;
a0d0e21e 1665 force_next(WORD);
3280af22 1666 PL_bufptr = s;
a0d0e21e
LW
1667 return *s == '(' ? FUNCMETH : METHOD;
1668 }
1669 }
1670 return 0;
1671}
1672
ffb4593c
NT
1673/*
1674 * S_incl_perldb
1675 * Return a string of Perl code to load the debugger. If PERL5DB
1676 * is set, it will return the contents of that, otherwise a
1677 * compile-time require of perl5db.pl.
1678 */
1679
76e3520e 1680STATIC char*
cea2e8a9 1681S_incl_perldb(pTHX)
a0d0e21e 1682{
3280af22 1683 if (PL_perldb) {
76e3520e 1684 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1685
1686 if (pdb)
1687 return pdb;
61bb5906 1688 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1689 return "BEGIN { require 'perl5db.pl' }";
1690 }
1691 return "";
1692}
1693
1694
16d20bd9
AD
1695/* Encoded script support. filter_add() effectively inserts a
1696 * 'pre-processing' function into the current source input stream.
1697 * Note that the filter function only applies to the current source file
1698 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1699 *
1700 * The datasv parameter (which may be NULL) can be used to pass
1701 * private data to this instance of the filter. The filter function
1702 * can recover the SV using the FILTER_DATA macro and use it to
1703 * store private buffers and state information.
1704 *
1705 * The supplied datasv parameter is upgraded to a PVIO type
1706 * and the IoDIRP field is used to store the function pointer.
1707 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1708 * private use must be set using malloc'd pointers.
1709 */
16d20bd9
AD
1710
1711SV *
864dbfa3 1712Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9
AD
1713{
1714 if (!funcp){ /* temporary handy debugging hack to be deleted */
80252599 1715 PL_filter_debug = atoi((char*)datasv);
16d20bd9
AD
1716 return NULL;
1717 }
3280af22
NIS
1718 if (!PL_rsfp_filters)
1719 PL_rsfp_filters = newAV();
16d20bd9 1720 if (!datasv)
8c52afec 1721 datasv = NEWSV(255,0);
16d20bd9 1722 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1723 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1724 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
0453d815 1725#ifdef DEBUGGING
80252599 1726 if (PL_filter_debug) {
2d8e6c8d 1727 STRLEN n_a;
cea2e8a9 1728 Perl_warn(aTHX_ "filter_add func %p (%s)", funcp, SvPV(datasv, n_a));
2d8e6c8d 1729 }
0453d815 1730#endif /* DEBUGGING */
3280af22
NIS
1731 av_unshift(PL_rsfp_filters, 1);
1732 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1733 return(datasv);
1734}
1735
1736
1737/* Delete most recently added instance of this filter function. */
a0d0e21e 1738void
864dbfa3 1739Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1740{
0453d815 1741#ifdef DEBUGGING
80252599 1742 if (PL_filter_debug)
cea2e8a9 1743 Perl_warn(aTHX_ "filter_del func %p", funcp);
0453d815 1744#endif /* DEBUGGING */
3280af22 1745 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1746 return;
1747 /* if filter is on top of stack (usual case) just pop it off */
677ca527 1748 if (IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) == (DIR*)funcp){
a6c40364 1749 IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) = NULL;
3280af22 1750 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1751
16d20bd9
AD
1752 return;
1753 }
1754 /* we need to search for the correct entry and clear it */
cea2e8a9 1755 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1756}
1757
1758
1759/* Invoke the n'th filter function for the current rsfp. */
1760I32
864dbfa3 1761Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1762
1763
1764 /* 0 = read one text line */
a0d0e21e 1765{
16d20bd9
AD
1766 filter_t funcp;
1767 SV *datasv = NULL;
e50aee73 1768
3280af22 1769 if (!PL_rsfp_filters)
16d20bd9 1770 return -1;
3280af22 1771 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1772 /* Provide a default input filter to make life easy. */
1773 /* Note that we append to the line. This is handy. */
0453d815 1774#ifdef DEBUGGING
80252599 1775 if (PL_filter_debug)
cea2e8a9 1776 Perl_warn(aTHX_ "filter_read %d: from rsfp\n", idx);
0453d815 1777#endif /* DEBUGGING */
16d20bd9
AD
1778 if (maxlen) {
1779 /* Want a block */
1780 int len ;
1781 int old_len = SvCUR(buf_sv) ;
1782
1783 /* ensure buf_sv is large enough */
1784 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1785 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1786 if (PerlIO_error(PL_rsfp))
37120919
AD
1787 return -1; /* error */
1788 else
1789 return 0 ; /* end of file */
1790 }
16d20bd9
AD
1791 SvCUR_set(buf_sv, old_len + len) ;
1792 } else {
1793 /* Want a line */
3280af22
NIS
1794 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1795 if (PerlIO_error(PL_rsfp))
37120919
AD
1796 return -1; /* error */
1797 else
1798 return 0 ; /* end of file */
1799 }
16d20bd9
AD
1800 }
1801 return SvCUR(buf_sv);
1802 }
1803 /* Skip this filter slot if filter has been deleted */
3280af22 1804 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
0453d815 1805#ifdef DEBUGGING
80252599 1806 if (PL_filter_debug)
cea2e8a9 1807 Perl_warn(aTHX_ "filter_read %d: skipped (filter deleted)\n", idx);
0453d815 1808#endif /* DEBUGGING */
16d20bd9
AD
1809 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1810 }
1811 /* Get function pointer hidden within datasv */
1812 funcp = (filter_t)IoDIRP(datasv);
0453d815 1813#ifdef DEBUGGING
80252599 1814 if (PL_filter_debug) {
2d8e6c8d 1815 STRLEN n_a;
cea2e8a9 1816 Perl_warn(aTHX_ "filter_read %d: via function %p (%s)\n",
2d8e6c8d
GS
1817 idx, funcp, SvPV(datasv,n_a));
1818 }
0453d815 1819#endif /* DEBUGGING */
16d20bd9
AD
1820 /* Call function. The function is expected to */
1821 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1822 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1823 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1824}
1825
76e3520e 1826STATIC char *
cea2e8a9 1827S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1828{
a868473f 1829#ifdef WIN32FILTER
3280af22 1830 if (!PL_rsfp_filters) {
a868473f
NIS
1831 filter_add(win32_textfilter,NULL);
1832 }
1833#endif
3280af22 1834 if (PL_rsfp_filters) {
16d20bd9 1835
55497cff 1836 if (!append)
1837 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1838 if (FILTER_READ(0, sv, 0) > 0)
1839 return ( SvPVX(sv) ) ;
1840 else
1841 return Nullch ;
1842 }
9d116dd7 1843 else
fd049845 1844 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1845}
1846
1847
748a9306
LW
1848#ifdef DEBUGGING
1849 static char* exp_name[] =
a0d0e21e 1850 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "TERMBLOCK" };
748a9306 1851#endif
463ee0b2 1852
02aa26ce
NT
1853/*
1854 yylex
1855
1856 Works out what to call the token just pulled out of the input
1857 stream. The yacc parser takes care of taking the ops we return and
1858 stitching them into a tree.
1859
1860 Returns:
1861 PRIVATEREF
1862
1863 Structure:
1864 if read an identifier
1865 if we're in a my declaration
1866 croak if they tried to say my($foo::bar)
1867 build the ops for a my() declaration
1868 if it's an access to a my() variable
1869 are we in a sort block?
1870 croak if my($a); $a <=> $b
1871 build ops for access to a my() variable
1872 if in a dq string, and they've said @foo and we can't find @foo
1873 croak
1874 build ops for a bareword
1875 if we already built the token before, use it.
1876*/
1877
864dbfa3
GS
1878int
1879#ifdef USE_PURE_BISON
cea2e8a9 1880Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1881#else
cea2e8a9 1882Perl_yylex(pTHX)
864dbfa3 1883#endif
378cc40b 1884{
11343788 1885 dTHR;
79072805 1886 register char *s;
378cc40b 1887 register char *d;
79072805 1888 register I32 tmp;
463ee0b2 1889 STRLEN len;
161b471a
NIS
1890 GV *gv = Nullgv;
1891 GV **gvp = 0;
a687059c 1892
a1a0e61e
TD
1893#ifdef USE_PURE_BISON
1894 yylval_pointer = lvalp;
1895 yychar_pointer = lcharp;
1896#endif
1897
02aa26ce 1898 /* check if there's an identifier for us to look at */
3280af22 1899 if (PL_pending_ident) {
02aa26ce 1900 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
1901 char pit = PL_pending_ident;
1902 PL_pending_ident = 0;
bbce6d69 1903
02aa26ce
NT
1904 /* if we're in a my(), we can't allow dynamics here.
1905 $foo'bar has already been turned into $foo::bar, so
1906 just check for colons.
1907
1908 if it's a legal name, the OP is a PADANY.
1909 */
3280af22
NIS
1910 if (PL_in_my) {
1911 if (strchr(PL_tokenbuf,':'))
cea2e8a9 1912 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 1913
bbce6d69 1914 yylval.opval = newOP(OP_PADANY, 0);
3280af22 1915 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
bbce6d69 1916 return PRIVATEREF;
1917 }
1918
02aa26ce
NT
1919 /*
1920 build the ops for accesses to a my() variable.
1921
1922 Deny my($a) or my($b) in a sort block, *if* $a or $b is
1923 then used in a comparison. This catches most, but not
1924 all cases. For instance, it catches
1925 sort { my($a); $a <=> $b }
1926 but not
1927 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
1928 (although why you'd do that is anyone's guess).
1929 */
1930
3280af22 1931 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 1932#ifdef USE_THREADS
54b9620d 1933 /* Check for single character per-thread SVs */
3280af22
NIS
1934 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
1935 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
1936 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 1937 {
2faa37cc 1938 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
1939 yylval.opval->op_targ = tmp;
1940 return PRIVATEREF;
1941 }
1942#endif /* USE_THREADS */
3280af22 1943 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
02aa26ce 1944 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
1945 if (PL_last_lop_op == OP_SORT &&
1946 PL_tokenbuf[0] == '$' &&
1947 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
1948 && !PL_tokenbuf[2])
bbce6d69 1949 {
3280af22
NIS
1950 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
1951 d < PL_bufend && *d != '\n';
a863c7d1
MB
1952 d++)
1953 {
1954 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 1955 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 1956 PL_tokenbuf);
a863c7d1 1957 }
bbce6d69 1958 }
1959 }
bbce6d69 1960
a863c7d1
MB
1961 yylval.opval = newOP(OP_PADANY, 0);
1962 yylval.opval->op_targ = tmp;
1963 return PRIVATEREF;
1964 }
bbce6d69 1965 }
1966
02aa26ce
NT
1967 /*
1968 Whine if they've said @foo in a doublequoted string,
1969 and @foo isn't a variable we can find in the symbol
1970 table.
1971 */
3280af22
NIS
1972 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
1973 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
1974 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 1975 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 1976 PL_tokenbuf, PL_tokenbuf));
bbce6d69 1977 }
1978
02aa26ce 1979 /* build ops for a bareword */
3280af22 1980 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 1981 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
1982 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
1983 ((PL_tokenbuf[0] == '$') ? SVt_PV
1984 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 1985 : SVt_PVHV));
1986 return WORD;
1987 }
1988
02aa26ce
NT
1989 /* no identifier pending identification */
1990
3280af22 1991 switch (PL_lex_state) {
79072805
LW
1992#ifdef COMMENTARY
1993 case LEX_NORMAL: /* Some compilers will produce faster */
1994 case LEX_INTERPNORMAL: /* code if we comment these out. */
1995 break;
1996#endif
1997
02aa26ce 1998 /* when we're already built the next token, just pull it out the queue */
79072805 1999 case LEX_KNOWNEXT:
3280af22
NIS
2000 PL_nexttoke--;
2001 yylval = PL_nextval[PL_nexttoke];
2002 if (!PL_nexttoke) {
2003 PL_lex_state = PL_lex_defer;
2004 PL_expect = PL_lex_expect;
2005 PL_lex_defer = LEX_NORMAL;
463ee0b2 2006 }
3280af22 2007 return(PL_nexttype[PL_nexttoke]);
79072805 2008
02aa26ce 2009 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2010 when we get here, PL_bufptr is at the \
02aa26ce 2011 */
79072805
LW
2012 case LEX_INTERPCASEMOD:
2013#ifdef DEBUGGING
3280af22 2014 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2015 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2016#endif
02aa26ce 2017 /* handle \E or end of string */
3280af22 2018 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2019 char oldmod;
02aa26ce
NT
2020
2021 /* if at a \E */
3280af22
NIS
2022 if (PL_lex_casemods) {
2023 oldmod = PL_lex_casestack[--PL_lex_casemods];
2024 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2025
3280af22
NIS
2026 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2027 PL_bufptr += 2;
2028 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2029 }
79072805
LW
2030 return ')';
2031 }
3280af22
NIS
2032 if (PL_bufptr != PL_bufend)
2033 PL_bufptr += 2;
2034 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2035 return yylex();
79072805
LW
2036 }
2037 else {
3280af22 2038 s = PL_bufptr + 1;
79072805
LW
2039 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2040 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2041 if (strchr("LU", *s) &&
3280af22 2042 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2043 {
3280af22 2044 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2045 return ')';
2046 }
3280af22
NIS
2047 if (PL_lex_casemods > 10) {
2048 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2049 if (newlb != PL_lex_casestack) {
a0d0e21e 2050 SAVEFREEPV(newlb);
3280af22 2051 PL_lex_casestack = newlb;
a0d0e21e
LW
2052 }
2053 }
3280af22
NIS
2054 PL_lex_casestack[PL_lex_casemods++] = *s;
2055 PL_lex_casestack[PL_lex_casemods] = '\0';
2056 PL_lex_state = LEX_INTERPCONCAT;
2057 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2058 force_next('(');
2059 if (*s == 'l')
3280af22 2060 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2061 else if (*s == 'u')
3280af22 2062 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2063 else if (*s == 'L')
3280af22 2064 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2065 else if (*s == 'U')
3280af22 2066 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2067 else if (*s == 'Q')
3280af22 2068 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2069 else
cea2e8a9 2070 Perl_croak(aTHX_ "panic: yylex");
3280af22 2071 PL_bufptr = s + 1;
79072805 2072 force_next(FUNC);
3280af22
NIS
2073 if (PL_lex_starts) {
2074 s = PL_bufptr;
2075 PL_lex_starts = 0;
79072805
LW
2076 Aop(OP_CONCAT);
2077 }
2078 else
cea2e8a9 2079 return yylex();
79072805
LW
2080 }
2081
55497cff 2082 case LEX_INTERPPUSH:
2083 return sublex_push();
2084
79072805 2085 case LEX_INTERPSTART:
3280af22 2086 if (PL_bufptr == PL_bufend)
79072805 2087 return sublex_done();
3280af22
NIS
2088 PL_expect = XTERM;
2089 PL_lex_dojoin = (*PL_bufptr == '@');
2090 PL_lex_state = LEX_INTERPNORMAL;
2091 if (PL_lex_dojoin) {
2092 PL_nextval[PL_nexttoke].ival = 0;
79072805 2093 force_next(',');
554b3eca 2094#ifdef USE_THREADS
533c011a
NIS
2095 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2096 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2097 force_next(PRIVATEREF);
2098#else
a0d0e21e 2099 force_ident("\"", '$');
554b3eca 2100#endif /* USE_THREADS */
3280af22 2101 PL_nextval[PL_nexttoke].ival = 0;
79072805 2102 force_next('$');
3280af22 2103 PL_nextval[PL_nexttoke].ival = 0;
79072805 2104 force_next('(');
3280af22 2105 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2106 force_next(FUNC);
2107 }
3280af22
NIS
2108 if (PL_lex_starts++) {
2109 s = PL_bufptr;
79072805
LW
2110 Aop(OP_CONCAT);
2111 }
cea2e8a9 2112 return yylex();
79072805
LW
2113
2114 case LEX_INTERPENDMAYBE:
3280af22
NIS
2115 if (intuit_more(PL_bufptr)) {
2116 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2117 break;
2118 }
2119 /* FALL THROUGH */
2120
2121 case LEX_INTERPEND:
3280af22
NIS
2122 if (PL_lex_dojoin) {
2123 PL_lex_dojoin = FALSE;
2124 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2125 return ')';
2126 }
43a16006 2127 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2128 && SvEVALED(PL_lex_repl))
43a16006 2129 {
e9fa98b2 2130 if (PL_bufptr != PL_bufend)
cea2e8a9 2131 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2132 PL_lex_repl = Nullsv;
2133 }
79072805
LW
2134 /* FALLTHROUGH */
2135 case LEX_INTERPCONCAT:
2136#ifdef DEBUGGING
3280af22 2137 if (PL_lex_brackets)
cea2e8a9 2138 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2139#endif
3280af22 2140 if (PL_bufptr == PL_bufend)
79072805
LW
2141 return sublex_done();
2142
3280af22
NIS
2143 if (SvIVX(PL_linestr) == '\'') {
2144 SV *sv = newSVsv(PL_linestr);
2145 if (!PL_lex_inpat)
76e3520e 2146 sv = tokeq(sv);
3280af22 2147 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2148 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2149 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2150 s = PL_bufend;
79072805
LW
2151 }
2152 else {
3280af22 2153 s = scan_const(PL_bufptr);
79072805 2154 if (*s == '\\')
3280af22 2155 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2156 else
3280af22 2157 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2158 }
2159
3280af22
NIS
2160 if (s != PL_bufptr) {
2161 PL_nextval[PL_nexttoke] = yylval;
2162 PL_expect = XTERM;
79072805 2163 force_next(THING);
3280af22 2164 if (PL_lex_starts++)
79072805
LW
2165 Aop(OP_CONCAT);
2166 else {
3280af22 2167 PL_bufptr = s;
cea2e8a9 2168 return yylex();
79072805
LW
2169 }
2170 }
2171
cea2e8a9 2172 return yylex();
a0d0e21e 2173 case LEX_FORMLINE:
3280af22
NIS
2174 PL_lex_state = LEX_NORMAL;
2175 s = scan_formline(PL_bufptr);
2176 if (!PL_lex_formbrack)
a0d0e21e
LW
2177 goto rightbracket;
2178 OPERATOR(';');
79072805
LW
2179 }
2180
3280af22
NIS
2181 s = PL_bufptr;
2182 PL_oldoldbufptr = PL_oldbufptr;
2183 PL_oldbufptr = s;
79072805 2184 DEBUG_p( {
3280af22 2185 PerlIO_printf(PerlIO_stderr(), "### Tokener expecting %s at %s\n", exp_name[PL_expect], s);
79072805 2186 } )
463ee0b2
LW
2187
2188 retry:
378cc40b
LW
2189 switch (*s) {
2190 default:
834a4ddd
LW
2191 if (isIDFIRST_lazy(s))
2192 goto keylookup;
cea2e8a9 2193 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2194 case 4:
2195 case 26:
2196 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2197 case 0:
3280af22
NIS
2198 if (!PL_rsfp) {
2199 PL_last_uni = 0;
2200 PL_last_lop = 0;
2201 if (PL_lex_brackets)
d98d5fff 2202 yyerror("Missing right curly or square bracket");
79072805 2203 TOKEN(0);
463ee0b2 2204 }
3280af22 2205 if (s++ < PL_bufend)
a687059c 2206 goto retry; /* ignore stray nulls */
3280af22
NIS
2207 PL_last_uni = 0;
2208 PL_last_lop = 0;
2209 if (!PL_in_eval && !PL_preambled) {
2210 PL_preambled = TRUE;
2211 sv_setpv(PL_linestr,incl_perldb());
2212 if (SvCUR(PL_linestr))
2213 sv_catpv(PL_linestr,";");
2214 if (PL_preambleav){
2215 while(AvFILLp(PL_preambleav) >= 0) {
2216 SV *tmpsv = av_shift(PL_preambleav);
2217 sv_catsv(PL_linestr, tmpsv);
2218 sv_catpv(PL_linestr, ";");
91b7def8 2219 sv_free(tmpsv);
2220 }
3280af22
NIS
2221 sv_free((SV*)PL_preambleav);
2222 PL_preambleav = NULL;
91b7def8 2223 }
3280af22
NIS
2224 if (PL_minus_n || PL_minus_p) {
2225 sv_catpv(PL_linestr, "LINE: while (<>) {");
2226 if (PL_minus_l)
2227 sv_catpv(PL_linestr,"chomp;");
2228 if (PL_minus_a) {
8fd239a7
CS
2229 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2230 if (gv)
2231 GvIMPORTED_AV_on(gv);
3280af22
NIS
2232 if (PL_minus_F) {
2233 if (strchr("/'\"", *PL_splitstr)
2234 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2235 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2236 else {
2237 char delim;
2238 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2239 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2240 delim = *s;
cea2e8a9 2241 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2242 "q" + (delim == '\''), delim);
3280af22 2243 for (s = PL_splitstr; *s; s++) {
54310121 2244 if (*s == '\\')
3280af22
NIS
2245 sv_catpvn(PL_linestr, "\\", 1);
2246 sv_catpvn(PL_linestr, s, 1);
54310121 2247 }
cea2e8a9 2248 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2249 }
2304df62
AD
2250 }
2251 else
3280af22 2252 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2253 }
79072805 2254 }
3280af22
NIS
2255 sv_catpv(PL_linestr, "\n");
2256 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2257 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2258 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2259 SV *sv = NEWSV(85,0);
2260
2261 sv_upgrade(sv, SVt_PVMG);
3280af22
NIS
2262 sv_setsv(sv,PL_linestr);
2263 av_store(GvAV(PL_curcop->cop_filegv),(I32)PL_curcop->cop_line,sv);
a0d0e21e 2264 }
79072805 2265 goto retry;
a687059c 2266 }
e929a76b 2267 do {
3280af22 2268 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2269 fake_eof:
3280af22
NIS
2270 if (PL_rsfp) {
2271 if (PL_preprocess && !PL_in_eval)
2272 (void)PerlProc_pclose(PL_rsfp);
2273 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2274 PerlIO_clearerr(PL_rsfp);
395c3793 2275 else
3280af22
NIS
2276 (void)PerlIO_close(PL_rsfp);
2277 PL_rsfp = Nullfp;
4a9ae47a 2278 PL_doextract = FALSE;
395c3793 2279 }
3280af22
NIS
2280 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2281 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2282 sv_catpv(PL_linestr,";}");
2283 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2284 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2285 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2286 goto retry;
2287 }
3280af22
NIS
2288 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2289 sv_setpv(PL_linestr,"");
79072805 2290 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2291 }
3280af22 2292 if (PL_doextract) {
a0d0e21e 2293 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2294 PL_doextract = FALSE;
a0d0e21e
LW
2295
2296 /* Incest with pod. */
2297 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2298 sv_setpv(PL_linestr, "");
2299 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2300 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2301 PL_doextract = FALSE;
a0d0e21e
LW
2302 }
2303 }
463ee0b2 2304 incline(s);
3280af22
NIS
2305 } while (PL_doextract);
2306 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2307 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2308 SV *sv = NEWSV(85,0);
a687059c 2309
93a17b20 2310 sv_upgrade(sv, SVt_PVMG);
3280af22
NIS
2311 sv_setsv(sv,PL_linestr);
2312 av_store(GvAV(PL_curcop->cop_filegv),(I32)PL_curcop->cop_line,sv);
a687059c 2313 }
3280af22
NIS
2314 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2315 if (PL_curcop->cop_line == 1) {
2316 while (s < PL_bufend && isSPACE(*s))
79072805 2317 s++;
a0d0e21e 2318 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2319 s++;
44a8e56a 2320 d = Nullch;
3280af22 2321 if (!PL_in_eval) {
44a8e56a 2322 if (*s == '#' && *(s+1) == '!')
2323 d = s + 2;
2324#ifdef ALTERNATE_SHEBANG
2325 else {
2326 static char as[] = ALTERNATE_SHEBANG;
2327 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2328 d = s + (sizeof(as) - 1);
2329 }
2330#endif /* ALTERNATE_SHEBANG */
2331 }
2332 if (d) {
b8378b72 2333 char *ipath;
774d564b 2334 char *ipathend;
b8378b72 2335
774d564b 2336 while (isSPACE(*d))
b8378b72
CS
2337 d++;
2338 ipath = d;
774d564b 2339 while (*d && !isSPACE(*d))
2340 d++;
2341 ipathend = d;
2342
2343#ifdef ARG_ZERO_IS_SCRIPT
2344 if (ipathend > ipath) {
2345 /*
2346 * HP-UX (at least) sets argv[0] to the script name,
2347 * which makes $^X incorrect. And Digital UNIX and Linux,
2348 * at least, set argv[0] to the basename of the Perl
2349 * interpreter. So, having found "#!", we'll set it right.
2350 */
2351 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2352 assert(SvPOK(x) || SvGMAGICAL(x));
6b88bc9c 2353 if (sv_eq(x, GvSV(PL_curcop->cop_filegv))) {
774d564b 2354 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2355 SvSETMAGIC(x);
2356 }
774d564b 2357 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2358 }
774d564b 2359#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2360
2361 /*
2362 * Look for options.
2363 */
748a9306
LW
2364 d = instr(s,"perl -");
2365 if (!d)
2366 d = instr(s,"perl");
44a8e56a 2367#ifdef ALTERNATE_SHEBANG
2368 /*
2369 * If the ALTERNATE_SHEBANG on this system starts with a
2370 * character that can be part of a Perl expression, then if
2371 * we see it but not "perl", we're probably looking at the
2372 * start of Perl code, not a request to hand off to some
2373 * other interpreter. Similarly, if "perl" is there, but
2374 * not in the first 'word' of the line, we assume the line
2375 * contains the start of the Perl program.
44a8e56a 2376 */
2377 if (d && *s != '#') {
774d564b 2378 char *c = ipath;
44a8e56a 2379 while (*c && !strchr("; \t\r\n\f\v#", *c))
2380 c++;
2381 if (c < d)
2382 d = Nullch; /* "perl" not in first word; ignore */
2383 else
2384 *s = '#'; /* Don't try to parse shebang line */
2385 }
774d564b 2386#endif /* ALTERNATE_SHEBANG */
748a9306 2387 if (!d &&
44a8e56a 2388 *s == '#' &&
774d564b 2389 ipathend > ipath &&
3280af22 2390 !PL_minus_c &&
748a9306 2391 !instr(s,"indir") &&
3280af22 2392 instr(PL_origargv[0],"perl"))
748a9306 2393 {
9f68db38 2394 char **newargv;
9f68db38 2395
774d564b 2396 *ipathend = '\0';
2397 s = ipathend + 1;
3280af22 2398 while (s < PL_bufend && isSPACE(*s))
9f68db38 2399 s++;
3280af22
NIS
2400 if (s < PL_bufend) {
2401 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2402 newargv[1] = s;
3280af22 2403 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2404 s++;
2405 *s = '\0';
3280af22 2406 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2407 }
2408 else
3280af22 2409 newargv = PL_origargv;
774d564b 2410 newargv[0] = ipath;
80252599 2411 PerlProc_execv(ipath, newargv);
cea2e8a9 2412 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2413 }
748a9306 2414 if (d) {
3280af22
NIS
2415 U32 oldpdb = PL_perldb;
2416 bool oldn = PL_minus_n;
2417 bool oldp = PL_minus_p;
748a9306
LW
2418
2419 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2420 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2421
2422 if (*d++ == '-') {
8cc95fdb 2423 do {
2424 if (*d == 'M' || *d == 'm') {
2425 char *m = d;
2426 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2427 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2428 (int)(d - m), m);
2429 }
2430 d = moreswitches(d);
2431 } while (d);
84902520 2432 if (PERLDB_LINE && !oldpdb ||
3280af22 2433 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b 2434 /* if we have already added "LINE: while (<>) {",
2435 we must not do it again */
748a9306 2436 {
3280af22
NIS
2437 sv_setpv(PL_linestr, "");
2438 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2439 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2440 PL_preambled = FALSE;
84902520 2441 if (PERLDB_LINE)
3280af22 2442 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2443 goto retry;
2444 }
a0d0e21e 2445 }
79072805 2446 }
9f68db38 2447 }
79072805 2448 }
3280af22
NIS
2449 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2450 PL_bufptr = s;
2451 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2452 return yylex();
ae986130 2453 }
378cc40b 2454 goto retry;
4fdae800 2455 case '\r':
6a27c188 2456#ifdef PERL_STRICT_CR
cea2e8a9
GS
2457 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2458 Perl_croak(aTHX_
54310121 2459 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2460#endif
4fdae800 2461 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2462 s++;
2463 goto retry;
378cc40b 2464 case '#':
e929a76b 2465 case '\n':
3280af22
NIS
2466 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2467 d = PL_bufend;
a687059c 2468 while (s < d && *s != '\n')
378cc40b 2469 s++;
0f85fab0 2470 if (s < d)
378cc40b 2471 s++;
463ee0b2 2472 incline(s);
3280af22
NIS
2473 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2474 PL_bufptr = s;
2475 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2476 return yylex();
a687059c 2477 }
378cc40b 2478 }
a687059c 2479 else {
378cc40b 2480 *s = '\0';
3280af22 2481 PL_bufend = s;
a687059c 2482 }
378cc40b
LW
2483 goto retry;
2484 case '-':
79072805 2485 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2486 s++;
3280af22 2487 PL_bufptr = s;
748a9306
LW
2488 tmp = *s++;
2489
3280af22 2490 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2491 s++;
2492
2493 if (strnEQ(s,"=>",2)) {
3280af22 2494 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2495 OPERATOR('-'); /* unary minus */
2496 }
3280af22
NIS
2497 PL_last_uni = PL_oldbufptr;
2498 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2499 switch (tmp) {
79072805
LW
2500 case 'r': FTST(OP_FTEREAD);
2501 case 'w': FTST(OP_FTEWRITE);
2502 case 'x': FTST(OP_FTEEXEC);
2503 case 'o': FTST(OP_FTEOWNED);
2504 case 'R': FTST(OP_FTRREAD);
2505 case 'W': FTST(OP_FTRWRITE);
2506 case 'X': FTST(OP_FTREXEC);
2507 case 'O': FTST(OP_FTROWNED);
2508 case 'e': FTST(OP_FTIS);
2509 case 'z': FTST(OP_FTZERO);
2510 case 's': FTST(OP_FTSIZE);
2511 case 'f': FTST(OP_FTFILE);
2512 case 'd': FTST(OP_FTDIR);
2513 case 'l': FTST(OP_FTLINK);
2514 case 'p': FTST(OP_FTPIPE);
2515 case 'S': FTST(OP_FTSOCK);
2516 case 'u': FTST(OP_FTSUID);
2517 case 'g': FTST(OP_FTSGID);
2518 case 'k': FTST(OP_FTSVTX);
2519 case 'b': FTST(OP_FTBLK);
2520 case 'c': FTST(OP_FTCHR);
2521 case 't': FTST(OP_FTTTY);
2522 case 'T': FTST(OP_FTTEXT);
2523 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2524 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2525 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2526 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2527 default:
cea2e8a9 2528 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2529 break;
2530 }
2531 }
a687059c
LW
2532 tmp = *s++;
2533 if (*s == tmp) {
2534 s++;
3280af22 2535 if (PL_expect == XOPERATOR)
79072805
LW
2536 TERM(POSTDEC);
2537 else
2538 OPERATOR(PREDEC);
2539 }
2540 else if (*s == '>') {
2541 s++;
2542 s = skipspace(s);
834a4ddd 2543 if (isIDFIRST_lazy(s)) {
a0d0e21e 2544 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2545 TOKEN(ARROW);
79072805 2546 }
748a9306
LW
2547 else if (*s == '$')
2548 OPERATOR(ARROW);
463ee0b2 2549 else
748a9306 2550 TERM(ARROW);
a687059c 2551 }
3280af22 2552 if (PL_expect == XOPERATOR)
79072805
LW
2553 Aop(OP_SUBTRACT);
2554 else {
3280af22 2555 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2556 check_uni();
79072805 2557 OPERATOR('-'); /* unary minus */
2f3197b3 2558 }
79072805 2559
378cc40b 2560 case '+':
a687059c
LW
2561 tmp = *s++;
2562 if (*s == tmp) {
378cc40b 2563 s++;
3280af22 2564 if (PL_expect == XOPERATOR)
79072805
LW
2565 TERM(POSTINC);
2566 else
2567 OPERATOR(PREINC);
378cc40b 2568 }
3280af22 2569 if (PL_expect == XOPERATOR)
79072805
LW
2570 Aop(OP_ADD);
2571 else {
3280af22 2572 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2573 check_uni();
a687059c 2574 OPERATOR('+');
2f3197b3 2575 }
a687059c 2576
378cc40b 2577 case '*':
3280af22
NIS
2578 if (PL_expect != XOPERATOR) {
2579 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2580 PL_expect = XOPERATOR;
2581 force_ident(PL_tokenbuf, '*');
2582 if (!*PL_tokenbuf)
a0d0e21e 2583 PREREF('*');
79072805 2584 TERM('*');
a687059c 2585 }
79072805
LW
2586 s++;
2587 if (*s == '*') {
a687059c 2588 s++;
79072805 2589 PWop(OP_POW);
a687059c 2590 }
79072805
LW
2591 Mop(OP_MULTIPLY);
2592
378cc40b 2593 case '%':
3280af22 2594 if (PL_expect == XOPERATOR) {
bbce6d69 2595 ++s;
2596 Mop(OP_MODULO);
a687059c 2597 }
3280af22
NIS
2598 PL_tokenbuf[0] = '%';
2599 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2600 if (!PL_tokenbuf[1]) {
2601 if (s == PL_bufend)
bbce6d69 2602 yyerror("Final % should be \\% or %name");
2603 PREREF('%');
a687059c 2604 }
3280af22 2605 PL_pending_ident = '%';
bbce6d69 2606 TERM('%');
a687059c 2607
378cc40b 2608 case '^':
79072805 2609 s++;
a0d0e21e 2610 BOop(OP_BIT_XOR);
79072805 2611 case '[':
3280af22 2612 PL_lex_brackets++;
79072805 2613 /* FALL THROUGH */
378cc40b 2614 case '~':
378cc40b 2615 case ',':
378cc40b
LW
2616 tmp = *s++;
2617 OPERATOR(tmp);
a0d0e21e
LW
2618 case ':':
2619 if (s[1] == ':') {
2620 len = 0;
2621 goto just_a_word;
2622 }
2623 s++;
2624 OPERATOR(':');
8990e307
LW
2625 case '(':
2626 s++;
3280af22
NIS
2627 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2628 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2629 else
3280af22 2630 PL_expect = XTERM;
a0d0e21e 2631 TOKEN('(');
378cc40b 2632 case ';':
3280af22
NIS
2633 if (PL_curcop->cop_line < PL_copline)
2634 PL_copline = PL_curcop->cop_line;
378cc40b
LW
2635 tmp = *s++;
2636 OPERATOR(tmp);
2637 case ')':
378cc40b 2638 tmp = *s++;
16d20bd9
AD
2639 s = skipspace(s);
2640 if (*s == '{')
2641 PREBLOCK(tmp);
378cc40b 2642 TERM(tmp);
79072805
LW
2643 case ']':
2644 s++;
3280af22 2645 if (PL_lex_brackets <= 0)
d98d5fff 2646 yyerror("Unmatched right square bracket");
463ee0b2 2647 else
3280af22
NIS
2648 --PL_lex_brackets;
2649 if (PL_lex_state == LEX_INTERPNORMAL) {
2650 if (PL_lex_brackets == 0) {
a0d0e21e 2651 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2652 PL_lex_state = LEX_INTERPEND;
79072805
LW
2653 }
2654 }
4633a7c4 2655 TERM(']');
79072805
LW
2656 case '{':
2657 leftbracket:
79072805 2658 s++;
3280af22
NIS
2659 if (PL_lex_brackets > 100) {
2660 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2661 if (newlb != PL_lex_brackstack) {
8990e307 2662 SAVEFREEPV(newlb);
3280af22 2663 PL_lex_brackstack = newlb;
8990e307
LW
2664 }
2665 }
3280af22 2666 switch (PL_expect) {
a0d0e21e 2667 case XTERM:
3280af22 2668 if (PL_lex_formbrack) {
a0d0e21e
LW
2669 s--;
2670 PRETERMBLOCK(DO);
2671 }
3280af22
NIS
2672 if (PL_oldoldbufptr == PL_last_lop)
2673 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2674 else
3280af22 2675 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2676 OPERATOR(HASHBRACK);
a0d0e21e 2677 case XOPERATOR:
3280af22 2678 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2679 s++;
44a8e56a 2680 d = s;
3280af22
NIS
2681 PL_tokenbuf[0] = '\0';
2682 if (d < PL_bufend && *d == '-') {
2683 PL_tokenbuf[0] = '-';
44a8e56a 2684 d++;
3280af22 2685 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a 2686 d++;
2687 }
834a4ddd 2688 if (d < PL_bufend && isIDFIRST_lazy(d)) {
3280af22 2689 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2690 FALSE, &len);
3280af22 2691 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2692 d++;
2693 if (*d == '}') {
3280af22 2694 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 2695 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2696 if (minus)
2697 force_next('-');
748a9306
LW
2698 }
2699 }
2700 /* FALL THROUGH */
2701 case XBLOCK:
3280af22
NIS
2702 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2703 PL_expect = XSTATE;
a0d0e21e
LW
2704 break;
2705 case XTERMBLOCK:
3280af22
NIS
2706 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2707 PL_expect = XSTATE;
a0d0e21e
LW
2708 break;
2709 default: {
2710 char *t;
3280af22
NIS
2711 if (PL_oldoldbufptr == PL_last_lop)
2712 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2713 else
3280af22 2714 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2715 s = skipspace(s);
09ecc4b6 2716 if (*s == '}')
a0d0e21e 2717 OPERATOR(HASHBRACK);
b8a4b1be
GS
2718 /* This hack serves to disambiguate a pair of curlies
2719 * as being a block or an anon hash. Normally, expectation
2720 * determines that, but in cases where we're not in a
2721 * position to expect anything in particular (like inside
2722 * eval"") we have to resolve the ambiguity. This code
2723 * covers the case where the first term in the curlies is a
2724 * quoted string. Most other cases need to be explicitly
2725 * disambiguated by prepending a `+' before the opening
2726 * curly in order to force resolution as an anon hash.
2727 *
2728 * XXX should probably propagate the outer expectation
2729 * into eval"" to rely less on this hack, but that could
2730 * potentially break current behavior of eval"".
2731 * GSAR 97-07-21
2732 */
2733 t = s;
2734 if (*s == '\'' || *s == '"' || *s == '`') {
2735 /* common case: get past first string, handling escapes */
3280af22 2736 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2737 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2738 t++;
2739 t++;
a0d0e21e 2740 }
b8a4b1be 2741 else if (*s == 'q') {
3280af22 2742 if (++t < PL_bufend
b8a4b1be 2743 && (!isALNUM(*t)
3280af22 2744 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
b8a4b1be
GS
2745 && !isALNUM(*t)))) {
2746 char *tmps;
2747 char open, close, term;
2748 I32 brackets = 1;
2749
3280af22 2750 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
2751 t++;
2752 term = *t;
2753 open = term;
2754 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
2755 term = tmps[5];
2756 close = term;
2757 if (open == close)
3280af22
NIS
2758 for (t++; t < PL_bufend; t++) {
2759 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 2760 t++;
6d07e5e9 2761 else if (*t == open)
b8a4b1be
GS
2762 break;
2763 }
2764 else
3280af22
NIS
2765 for (t++; t < PL_bufend; t++) {
2766 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 2767 t++;
6d07e5e9 2768 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
2769 break;
2770 else if (*t == open)
2771 brackets++;
2772 }
2773 }
2774 t++;
a0d0e21e 2775 }
834a4ddd
LW
2776 else if (isIDFIRST_lazy(s)) {
2777 for (t++; t < PL_bufend && isALNUM_lazy(t); t++) ;
a0d0e21e 2778 }
3280af22 2779 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 2780 t++;
b8a4b1be
GS
2781 /* if comma follows first term, call it an anon hash */
2782 /* XXX it could be a comma expression with loop modifiers */
3280af22 2783 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 2784 || (*t == '=' && t[1] == '>')))
a0d0e21e 2785 OPERATOR(HASHBRACK);
3280af22 2786 if (PL_expect == XREF)
4e4e412b 2787 PL_expect = XTERM;
a0d0e21e 2788 else {
3280af22
NIS
2789 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
2790 PL_expect = XSTATE;
a0d0e21e 2791 }
8990e307 2792 }
a0d0e21e 2793 break;
463ee0b2 2794 }
3280af22 2795 yylval.ival = PL_curcop->cop_line;
79072805 2796 if (isSPACE(*s) || *s == '#')
3280af22 2797 PL_copline = NOLINE; /* invalidate current command line number */
79072805 2798 TOKEN('{');
378cc40b 2799 case '}':
79072805
LW
2800 rightbracket:
2801 s++;
3280af22 2802 if (PL_lex_brackets <= 0)
d98d5fff 2803 yyerror("Unmatched right curly bracket");
463ee0b2 2804 else
3280af22
NIS
2805 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
2806 if (PL_lex_brackets < PL_lex_formbrack)
2807 PL_lex_formbrack = 0;
2808 if (PL_lex_state == LEX_INTERPNORMAL) {
2809 if (PL_lex_brackets == 0) {
2810 if (PL_lex_fakebrack) {
2811 PL_lex_state = LEX_INTERPEND;
2812 PL_bufptr = s;
cea2e8a9 2813 return yylex(); /* ignore fake brackets */
79072805 2814 }
fa83b5b6 2815 if (*s == '-' && s[1] == '>')
3280af22 2816 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 2817 else if (*s != '[' && *s != '{')
3280af22 2818 PL_lex_state = LEX_INTERPEND;
79072805
LW
2819 }
2820 }
3280af22
NIS
2821 if (PL_lex_brackets < PL_lex_fakebrack) {
2822 PL_bufptr = s;
2823 PL_lex_fakebrack = 0;
cea2e8a9 2824 return yylex(); /* ignore fake brackets */
748a9306 2825 }
79072805
LW
2826 force_next('}');
2827 TOKEN(';');
378cc40b
LW
2828 case '&':
2829 s++;
2830 tmp = *s++;
2831 if (tmp == '&')
a0d0e21e 2832 AOPERATOR(ANDAND);
378cc40b 2833 s--;
3280af22 2834 if (PL_expect == XOPERATOR) {
834a4ddd 2835 if (ckWARN(WARN_SEMICOLON) && isIDFIRST_lazy(s) && PL_bufptr == PL_linestart) {
3280af22 2836 PL_curcop->cop_line--;
cea2e8a9 2837 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
3280af22 2838 PL_curcop->cop_line++;
463ee0b2 2839 }
79072805 2840 BAop(OP_BIT_AND);
463ee0b2 2841 }
79072805 2842
3280af22
NIS
2843 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2844 if (*PL_tokenbuf) {
2845 PL_expect = XOPERATOR;
2846 force_ident(PL_tokenbuf, '&');
463ee0b2 2847 }
79072805
LW
2848 else
2849 PREREF('&');
c07a80fd 2850 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
2851 TERM('&');
2852
378cc40b
LW
2853 case '|':
2854 s++;
2855 tmp = *s++;
2856 if (tmp == '|')
a0d0e21e 2857 AOPERATOR(OROR);
378cc40b 2858 s--;
79072805 2859 BOop(OP_BIT_OR);
378cc40b
LW
2860 case '=':
2861 s++;
2862 tmp = *s++;
2863 if (tmp == '=')
79072805
LW
2864 Eop(OP_EQ);
2865 if (tmp == '>')
2866 OPERATOR(',');
378cc40b 2867 if (tmp == '~')
79072805 2868 PMop(OP_MATCH);
599cee73 2869 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 2870 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 2871 s--;
3280af22
NIS
2872 if (PL_expect == XSTATE && isALPHA(tmp) &&
2873 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 2874 {
3280af22
NIS
2875 if (PL_in_eval && !PL_rsfp) {
2876 d = PL_bufend;
a5f75d66
AD
2877 while (s < d) {
2878 if (*s++ == '\n') {
2879 incline(s);
2880 if (strnEQ(s,"=cut",4)) {
2881 s = strchr(s,'\n');
2882 if (s)
2883 s++;
2884 else
2885 s = d;
2886 incline(s);
2887 goto retry;
2888 }
2889 }
2890 }
2891 goto retry;
2892 }
3280af22
NIS
2893 s = PL_bufend;
2894 PL_doextract = TRUE;
a0d0e21e
LW
2895 goto retry;
2896 }
3280af22 2897 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 2898 char *t;
51882d45 2899#ifdef PERL_STRICT_CR
a0d0e21e 2900 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
2901#else
2902 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
2903#endif
a0d0e21e
LW
2904 if (*t == '\n' || *t == '#') {
2905 s--;
3280af22 2906 PL_expect = XBLOCK;
a0d0e21e
LW
2907 goto leftbracket;
2908 }
79072805 2909 }
a0d0e21e
LW
2910 yylval.ival = 0;
2911 OPERATOR(ASSIGNOP);
378cc40b
LW
2912 case '!':
2913 s++;
2914 tmp = *s++;
2915 if (tmp == '=')
79072805 2916 Eop(OP_NE);
378cc40b 2917 if (tmp == '~')
79072805 2918 PMop(OP_NOT);
378cc40b
LW
2919 s--;
2920 OPERATOR('!');
2921 case '<':
3280af22 2922 if (PL_expect != XOPERATOR) {
93a17b20 2923 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 2924 check_uni();
79072805
LW
2925 if (s[1] == '<')
2926 s = scan_heredoc(s);
2927 else
2928 s = scan_inputsymbol(s);
2929 TERM(sublex_start());
378cc40b
LW
2930 }
2931 s++;
2932 tmp = *s++;
2933 if (tmp == '<')
79072805 2934 SHop(OP_LEFT_SHIFT);
395c3793
LW
2935 if (tmp == '=') {
2936 tmp = *s++;
2937 if (tmp == '>')
79072805 2938 Eop(OP_NCMP);
395c3793 2939 s--;
79072805 2940 Rop(OP_LE);
395c3793 2941 }
378cc40b 2942 s--;
79072805 2943 Rop(OP_LT);
378cc40b
LW
2944 case '>':
2945 s++;
2946 tmp = *s++;
2947 if (tmp == '>')
79072805 2948 SHop(OP_RIGHT_SHIFT);
378cc40b 2949 if (tmp == '=')
79072805 2950 Rop(OP_GE);
378cc40b 2951 s--;
79072805 2952 Rop(OP_GT);
378cc40b
LW
2953
2954 case '$':
bbce6d69 2955 CLINE;
2956
3280af22
NIS
2957 if (PL_expect == XOPERATOR) {
2958 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
2959 PL_expect = XTERM;
a0d0e21e 2960 depcom();
bbce6d69 2961 return ','; /* grandfather non-comma-format format */
a0d0e21e 2962 }
8990e307 2963 }
a0d0e21e 2964
834a4ddd 2965 if (s[1] == '#' && (isIDFIRST_lazy(s+2) || strchr("{$:+-", s[2]))) {
3280af22 2966 PL_tokenbuf[0] = '@';
376b8730
SM
2967 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
2968 sizeof PL_tokenbuf - 1, FALSE);
2969 if (PL_expect == XOPERATOR)
2970 no_op("Array length", s);
3280af22 2971 if (!PL_tokenbuf[1])
a0d0e21e 2972 PREREF(DOLSHARP);
3280af22
NIS
2973 PL_expect = XOPERATOR;
2974 PL_pending_ident = '#';
463ee0b2 2975 TOKEN(DOLSHARP);
79072805 2976 }
bbce6d69 2977
3280af22 2978 PL_tokenbuf[0] = '$';
376b8730
SM
2979 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
2980 sizeof PL_tokenbuf - 1, FALSE);
2981 if (PL_expect == XOPERATOR)
2982 no_op("Scalar", s);
3280af22
NIS
2983 if (!PL_tokenbuf[1]) {
2984 if (s == PL_bufend)
bbce6d69 2985 yyerror("Final $ should be \\$ or $name");
2986 PREREF('$');
8990e307 2987 }
a0d0e21e 2988
bbce6d69 2989 /* This kludge not intended to be bulletproof. */
3280af22 2990 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 2991 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 2992 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69 2993 yylval.opval->op_private = OPpCONST_ARYBASE;
2994 TERM(THING);
2995 }
2996
ff68c719 2997 d = s;
69d2bceb 2998 tmp = (I32)*s;
3280af22 2999 if (PL_lex_state == LEX_NORMAL)
ff68c719 3000 s = skipspace(s);
3001
3280af22 3002 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3003 char *t;
3004 if (*s == '[') {
3280af22 3005 PL_tokenbuf[0] = '@';
599cee73 3006 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3007 for(t = s + 1;
834a4ddd 3008 isSPACE(*t) || isALNUM_lazy(t) || *t == '$';
bbce6d69 3009 t++) ;
a0d0e21e 3010 if (*t++ == ',') {
3280af22
NIS
3011 PL_bufptr = skipspace(PL_bufptr);
3012 while (t < PL_bufend && *t != ']')
bbce6d69 3013 t++;
cea2e8a9 3014 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3015 "Multidimensional syntax %.*s not supported",
3016 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3017 }
3018 }
bbce6d69 3019 }
3020 else if (*s == '{') {
3280af22 3021 PL_tokenbuf[0] = '%';
599cee73 3022 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3023 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3024 {
3280af22 3025 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3026 STRLEN len;
3027 for (t++; isSPACE(*t); t++) ;
834a4ddd 3028 if (isIDFIRST_lazy(t)) {
8903cb82 3029 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3030 for (; isSPACE(*t); t++) ;
864dbfa3 3031 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3032 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3033 "You need to quote \"%s\"", tmpbuf);
748a9306 3034 }
93a17b20
LW
3035 }
3036 }
2f3197b3 3037 }
bbce6d69 3038
3280af22 3039 PL_expect = XOPERATOR;
69d2bceb 3040 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3041 bool islop = (PL_last_lop == PL_oldoldbufptr);
3042 if (!islop || PL_last_lop_op == OP_GREPSTART)
3043 PL_expect = XOPERATOR;
bbce6d69 3044 else if (strchr("$@\"'`q", *s))
3280af22 3045 PL_expect = XTERM; /* e.g. print $fh "foo" */
834a4ddd 3046 else if (strchr("&*<%", *s) && isIDFIRST_lazy(s+1))
3280af22 3047 PL_expect = XTERM; /* e.g. print $fh &sub */
834a4ddd 3048 else if (isIDFIRST_lazy(s)) {
3280af22 3049 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3050 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3051 if (tmp = keyword(tmpbuf, len)) {
3052 /* binary operators exclude handle interpretations */
3053 switch (tmp) {
3054 case -KEY_x:
3055 case -KEY_eq:
3056 case -KEY_ne:
3057 case -KEY_gt:
3058 case -KEY_lt:
3059 case -KEY_ge:
3060 case -KEY_le:
3061 case -KEY_cmp:
3062 break;
3063 default:
3280af22 3064 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3065 break;
3066 }
3067 }
68dc0745 3068 else {
3069 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3070 if (gv && GvCVu(gv))
3280af22 3071 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3072 }
93a17b20 3073 }
bbce6d69 3074 else if (isDIGIT(*s))
3280af22 3075 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3076 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3077 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3078 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3079 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3080 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3081 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3082 }
3280af22 3083 PL_pending_ident = '$';
79072805 3084 TOKEN('$');
378cc40b
LW
3085
3086 case '@':
3280af22 3087 if (PL_expect == XOPERATOR)
bbce6d69 3088 no_op("Array", s);
3280af22
NIS
3089 PL_tokenbuf[0] = '@';
3090 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3091 if (!PL_tokenbuf[1]) {
3092 if (s == PL_bufend)
bbce6d69 3093 yyerror("Final @ should be \\@ or @name");
3094 PREREF('@');
3095 }
3280af22 3096 if (PL_lex_state == LEX_NORMAL)
ff68c719 3097 s = skipspace(s);
3280af22 3098 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3099 if (*s == '{')
3280af22 3100 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3101
3102 /* Warn about @ where they meant $. */
599cee73 3103 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3104 if (*s == '[' || *s == '{') {
3105 char *t = s + 1;
834a4ddd 3106 while (*t && (isALNUM_lazy(t) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3107 t++;
3108 if (*t == '}' || *t == ']') {
3109 t++;
3280af22 3110 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3111 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3112 "Scalar value %.*s better written as $%.*s",
3280af22 3113 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3114 }
93a17b20
LW
3115 }
3116 }
463ee0b2 3117 }
3280af22 3118 PL_pending_ident = '@';
79072805 3119 TERM('@');
378cc40b
LW
3120
3121 case '/': /* may either be division or pattern */
3122 case '?': /* may either be conditional or pattern */
3280af22 3123 if (PL_expect != XOPERATOR) {
c277df42 3124 /* Disable warning on "study /blah/" */
3280af22
NIS
3125 if (PL_oldoldbufptr == PL_last_uni
3126 && (*PL_last_uni != 's' || s - PL_last_uni < 5
834a4ddd 3127 || memNE(PL_last_uni, "study", 5) || isALNUM_lazy(PL_last_uni+5)))
c277df42 3128 check_uni();
8782bef2 3129 s = scan_pat(s,OP_MATCH);
79072805 3130 TERM(sublex_start());
378cc40b
LW
3131 }
3132 tmp = *s++;
a687059c 3133 if (tmp == '/')
79072805 3134 Mop(OP_DIVIDE);
378cc40b
LW
3135 OPERATOR(tmp);
3136
3137 case '.':
51882d45
GS
3138 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3139#ifdef PERL_STRICT_CR
3140 && s[1] == '\n'
3141#else
3142 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3143#endif
3144 && (s == PL_linestart || s[-1] == '\n') )
3145 {
3280af22
NIS
3146 PL_lex_formbrack = 0;
3147 PL_expect = XSTATE;
79072805
LW
3148 goto rightbracket;
3149 }
3280af22 3150 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3151 tmp = *s++;
a687059c
LW
3152 if (*s == tmp) {
3153 s++;
2f3197b3
LW
3154 if (*s == tmp) {
3155 s++;
79072805 3156 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3157 }
3158 else
79072805 3159 yylval.ival = 0;
378cc40b 3160 OPERATOR(DOTDOT);
a687059c 3161 }
3280af22 3162 if (PL_expect != XOPERATOR)
2f3197b3 3163 check_uni();
79072805 3164 Aop(OP_CONCAT);
378cc40b
LW
3165 }
3166 /* FALL THROUGH */
3167 case '0': case '1': case '2': case '3': case '4':
3168 case '5': case '6': case '7': case '8': case '9':
79072805 3169 s = scan_num(s);
3280af22 3170 if (PL_expect == XOPERATOR)
8990e307 3171 no_op("Number",s);
79072805
LW
3172 TERM(THING);
3173
3174 case '\'':
8990e307 3175 s = scan_str(s);
3280af22
NIS
3176 if (PL_expect == XOPERATOR) {
3177 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3178 PL_expect = XTERM;
a0d0e21e
LW
3179 depcom();
3180 return ','; /* grandfather non-comma-format format */
3181 }
463ee0b2 3182 else
8990e307 3183 no_op("String",s);
463ee0b2 3184 }
79072805 3185 if (!s)
85e6fe83 3186 missingterm((char*)0);
79072805
LW
3187 yylval.ival = OP_CONST;
3188 TERM(sublex_start());
3189
3190 case '"':
8990e307 3191 s = scan_str(s);
3280af22
NIS
3192 if (PL_expect == XOPERATOR) {
3193 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3194 PL_expect = XTERM;
a0d0e21e
LW
3195 depcom();
3196 return ','; /* grandfather non-comma-format format */
3197 }
463ee0b2 3198 else
8990e307 3199 no_op("String",s);
463ee0b2 3200 }
79072805 3201 if (!s)
85e6fe83 3202 missingterm((char*)0);
4633a7c4 3203 yylval.ival = OP_CONST;
3280af22 3204 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3205 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3206 yylval.ival = OP_STRINGIFY;
3207 break;
3208 }
3209 }
79072805
LW
3210 TERM(sublex_start());
3211
3212 case '`':
3213 s = scan_str(s);
3280af22 3214 if (PL_expect == XOPERATOR)
8990e307 3215 no_op("Backticks",s);
79072805 3216 if (!s)
85e6fe83 3217 missingterm((char*)0);
79072805
LW
3218 yylval.ival = OP_BACKTICK;
3219 set_csh();
3220 TERM(sublex_start());
3221
3222 case '\\':
3223 s++;
599cee73 3224 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3225 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3226 *s, *s);
3280af22 3227 if (PL_expect == XOPERATOR)
8990e307 3228 no_op("Backslash",s);
79072805
LW
3229 OPERATOR(REFGEN);
3230
3231 case 'x':
3280af22 3232 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3233 s++;
3234 Mop(OP_REPEAT);
2f3197b3 3235 }
79072805
LW
3236 goto keylookup;
3237
378cc40b 3238 case '_':
79072805
LW
3239 case 'a': case 'A':
3240 case 'b': case 'B':
3241 case 'c': case 'C':
3242 case 'd': case 'D':
3243 case 'e': case 'E':
3244 case 'f': case 'F':
3245 case 'g': case 'G':
3246 case 'h': case 'H':
3247 case 'i': case 'I':
3248 case 'j': case 'J':
3249 case 'k': case 'K':
3250 case 'l': case 'L':
3251 case 'm': case 'M':
3252 case 'n': case 'N':
3253 case 'o': case 'O':
3254 case 'p': case 'P':
3255 case 'q': case 'Q':
3256 case 'r': case 'R':
3257 case 's': case 'S':
3258 case 't': case 'T':
3259 case 'u': case 'U':
3260 case 'v': case 'V':
3261 case 'w': case 'W':
3262 case 'X':
3263 case 'y': case 'Y':
3264 case 'z': case 'Z':
3265
49dc05e3 3266 keylookup: {
2d8e6c8d 3267 STRLEN n_a;
161b471a
NIS
3268 gv = Nullgv;
3269 gvp = 0;
49dc05e3 3270
3280af22
NIS
3271 PL_bufptr = s;
3272 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3273
3274 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3275 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3276 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3277 (PL_tokenbuf[0] == 'q' &&
3278 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01 3279
3280 /* x::* is just a word, unless x is "CORE" */
3280af22 3281 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3282 goto just_a_word;
3283
3643fb5f 3284 d = s;
3280af22 3285 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3286 d++; /* no comments skipped here, or s### is misparsed */
3287
3288 /* Is this a label? */
3280af22
NIS
3289 if (!tmp && PL_expect == XSTATE
3290 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3291 s = d + 1;
3280af22 3292 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3293 CLINE;
3294 TOKEN(LABEL);
3643fb5f
CS
3295 }
3296
3297 /* Check for keywords */
3280af22 3298 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3299
3300 /* Is this a word before a => operator? */
748a9306
LW
3301 if (strnEQ(d,"=>",2)) {
3302 CLINE;
3280af22 3303 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3304 yylval.opval->op_private = OPpCONST_BARE;
3305 TERM(WORD);
3306 }
3307
a0d0e21e 3308 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3309 GV *ogv = Nullgv; /* override (winner) */
3310 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3311 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3312 CV *cv;
3280af22 3313 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3314 (cv = GvCVu(gv)))
3315 {
3316 if (GvIMPORTED_CV(gv))
3317 ogv = gv;
3318 else if (! CvMETHOD(cv))
3319 hgv = gv;
3320 }
3321 if (!ogv &&
3280af22
NIS
3322 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3323 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3324 GvCVu(gv) && GvIMPORTED_CV(gv))
3325 {
3326 ogv = gv;
3327 }
3328 }
3329 if (ogv) {
3330 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3331 }
3332 else if (gv && !gvp
3333 && -tmp==KEY_lock /* XXX generalizable kludge */
3280af22 3334 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3335 {
3336 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3337 }
56f7f34b
CS
3338 else { /* no override */
3339 tmp = -tmp;
3340 gv = Nullgv;
3341 gvp = 0;
4944e2f7
GS
3342 if (ckWARN(WARN_AMBIGUOUS) && hgv
3343 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3344 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3345 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3346 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3347 }
a0d0e21e
LW
3348 }
3349
3350 reserved_word:
3351 switch (tmp) {
79072805
LW
3352
3353 default: /* not a keyword */
93a17b20 3354 just_a_word: {
96e4d5b1 3355 SV *sv;
3280af22 3356 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3357
3358 /* Get the rest if it looks like a package qualifier */
3359
a0d0e21e 3360 if (*s == '\'' || *s == ':' && s[1] == ':') {
c3e0f903 3361 STRLEN morelen;
3280af22 3362 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3363 TRUE, &morelen);
3364 if (!morelen)
cea2e8a9 3365 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3366 *s == '\'' ? "'" : "::");
c3e0f903 3367 len += morelen;
a0d0e21e 3368 }
8990e307 3369
3280af22
NIS
3370 if (PL_expect == XOPERATOR) {
3371 if (PL_bufptr == PL_linestart) {
3372 PL_curcop->cop_line--;
cea2e8a9 3373 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
3280af22 3374 PL_curcop->cop_line++;
463ee0b2
LW
3375 }
3376 else
54310121 3377 no_op("Bareword",s);
463ee0b2 3378 }
8990e307 3379
c3e0f903
GS
3380 /* Look for a subroutine with this name in current package,
3381 unless name is "Foo::", in which case Foo is a bearword
3382 (and a package name). */
3383
3384 if (len > 2 &&
3280af22 3385 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3386 {
599cee73 3387 if (ckWARN(WARN_UNSAFE) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
cea2e8a9 3388 Perl_warner(aTHX_ WARN_UNSAFE,
599cee73 3389 "Bareword \"%s\" refers to nonexistent package",
3280af22 3390 PL_tokenbuf);
c3e0f903 3391 len -= 2;
3280af22 3392 PL_tokenbuf[len] = '\0';
c3e0f903
GS
3393 gv = Nullgv;
3394 gvp = 0;
3395 }
3396 else {
3397 len = 0;
3398 if (!gv)
3280af22 3399 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
c3e0f903
GS
3400 }
3401
3402 /* if we saw a global override before, get the right name */
8990e307 3403
49dc05e3 3404 if (gvp) {
79cb57f6 3405 sv = newSVpvn("CORE::GLOBAL::",14);
3280af22 3406 sv_catpv(sv,PL_tokenbuf);
49dc05e3
GS
3407 }
3408 else
3280af22 3409 sv = newSVpv(PL_tokenbuf,0);
8990e307 3410
a0d0e21e
LW
3411 /* Presume this is going to be a bareword of some sort. */
3412
3413 CLINE;
49dc05e3 3414 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
a0d0e21e
LW
3415 yylval.opval->op_private = OPpCONST_BARE;
3416
c3e0f903
GS
3417 /* And if "Foo::", then that's what it certainly is. */
3418
3419 if (len)
3420 goto safe_bareword;
3421
8990e307
LW
3422 /* See if it's the indirect object for a list operator. */
3423
3280af22
NIS
3424 if (PL_oldoldbufptr &&
3425 PL_oldoldbufptr < PL_bufptr &&
3426 (PL_oldoldbufptr == PL_last_lop || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 3427 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
3428 (PL_expect == XREF ||
3429 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 3430 {
748a9306
LW
3431 bool immediate_paren = *s == '(';
3432
a0d0e21e
LW
3433 /* (Now we can afford to cross potential line boundary.) */
3434 s = skipspace(s);
3435
3436 /* Two barewords in a row may indicate method call. */
3437
834a4ddd 3438 if ((isIDFIRST_lazy(s) || *s == '$') && (tmp=intuit_method(s,gv)))
a0d0e21e
LW
3439 return tmp;
3440
3441 /* If not a declared subroutine, it's an indirect object. */
3442 /* (But it's an indir obj regardless for sort.) */
3443
3280af22 3444 if ((PL_last_lop_op == OP_SORT ||
a9ef352a
GS
3445 (!immediate_paren && (!gv || !GvCVu(gv)))) &&
3446 (PL_last_lop_op != OP_MAPSTART &&
3447 PL_last_lop_op != OP_GREPSTART))
3448 {
3280af22 3449 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 3450 goto bareword;
93a17b20
LW
3451 }
3452 }
8990e307
LW
3453
3454 /* If followed by a paren, it's certainly a subroutine. */
3455
3280af22 3456 PL_expect = XOPERATOR;
8990e307 3457 s = skipspace(s);
93a17b20 3458 if (*s == '(') {
79072805 3459 CLINE;
96e4d5b1 3460 if (gv && GvCVu(gv)) {
3461 for (d = s + 1; *d == ' ' || *d == '\t'; d++) ;
7a52d87a 3462 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
96e4d5b1 3463 s = d + 1;
3464 goto its_constant;
3465 }
3466 }
3280af22
NIS
3467 PL_nextval[PL_nexttoke].opval = yylval.opval;
3468 PL_expect = XOPERATOR;
93a17b20 3469 force_next(WORD);
c07a80fd 3470 yylval.ival = 0;
463ee0b2 3471 TOKEN('&');
79072805 3472 }
93a17b20 3473
a0d0e21e 3474 /* If followed by var or block, call it a method (unless sub) */
8990e307 3475
8ebc5c01 3476 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
3280af22
NIS
3477 PL_last_lop = PL_oldbufptr;
3478 PL_last_lop_op = OP_METHOD;
93a17b20 3479 PREBLOCK(METHOD);
463ee0b2
LW
3480 }
3481
8990e307
LW
3482 /* If followed by a bareword, see if it looks like indir obj. */
3483
834a4ddd 3484 if ((isIDFIRST_lazy(s) || *s == '$') && (tmp = intuit_method(s,gv)))
a0d0e21e 3485 return tmp;
93a17b20 3486
8990e307
LW
3487 /* Not a method, so call it a subroutine (if defined) */
3488
8ebc5c01 3489 if (gv && GvCVu(gv)) {
46fc3d4c 3490 CV* cv;
0453d815
PM
3491 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
3492 Perl_warner(aTHX_ WARN_AMBIGUOUS,
3493 "Ambiguous use of -%s resolved as -&%s()",
3280af22 3494 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 3495 /* Check for a constant sub */
46fc3d4c 3496 cv = GvCV(gv);
96e4d5b1 3497 if ((sv = cv_const_sv(cv))) {
3498 its_constant:
3499 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
3500 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
3501 yylval.opval->op_private = 0;
3502 TOKEN(WORD);
89bfa8cd 3503 }
3504
a5f75d66
AD
3505 /* Resolve to GV now. */
3506 op_free(yylval.opval);
3507 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
9675f7ac 3508 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 3509 PL_last_lop = PL_oldbufptr;
bf848113 3510 PL_last_lop_op = OP_ENTERSUB;
4633a7c4
LW
3511 /* Is there a prototype? */
3512 if (SvPOK(cv)) {
3513 STRLEN len;
7a52d87a 3514 char *proto = SvPV((SV*)cv, len);
4633a7c4
LW
3515 if (!len)
3516 TERM(FUNC0SUB);
7a52d87a 3517 if (strEQ(proto, "$"))
4633a7c4 3518 OPERATOR(UNIOPSUB);
7a52d87a 3519 if (*proto == '&' && *s == '{') {
3280af22 3520 sv_setpv(PL_subname,"__ANON__");
4633a7c4
LW
3521 PREBLOCK(LSTOPSUB);
3522 }
a9ef352a 3523 }
3280af22
NIS
3524 PL_nextval[PL_nexttoke].opval = yylval.opval;
3525 PL_expect = XTERM;
8990e307
LW
3526 force_next(WORD);
3527 TOKEN(NOAMP);
3528 }
748a9306 3529
8990e307
LW
3530 /* Call it a bare word */
3531
5603f27d
GS
3532 if (PL_hints & HINT_STRICT_SUBS)
3533 yylval.opval->op_private |= OPpCONST_STRICT;
3534 else {
3535 bareword:
3536 if (ckWARN(WARN_RESERVED)) {
3537 if (lastchar != '-') {
3538 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
3539 if (!*d)
cea2e8a9 3540 Perl_warner(aTHX_ WARN_RESERVED, PL_warn_reserved,
5603f27d
GS
3541 PL_tokenbuf);
3542 }
748a9306
LW
3543 }
3544 }
c3e0f903
GS
3545
3546 safe_bareword:
f248d071 3547 if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
0453d815
PM
3548 Perl_warner(aTHX_ WARN_AMBIGUOUS,
3549 "Operator or semicolon missing before %c%s",
3280af22 3550 lastchar, PL_tokenbuf);
0453d815
PM
3551 Perl_warner(aTHX_ WARN_AMBIGUOUS,
3552 "Ambiguous use of %c resolved as operator %c",
748a9306
LW
3553 lastchar, lastchar);
3554 }
93a17b20 3555 TOKEN(WORD);
79072805 3556 }
79072805 3557
68dc0745 3558 case KEY___FILE__:
46fc3d4c 3559 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 3560 newSVsv(GvSV(PL_curcop->cop_filegv)));
46fc3d4c 3561 TERM(THING);
3562
79072805 3563 case KEY___LINE__:
46fc3d4c 3564 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
cea2e8a9 3565 Perl_newSVpvf(aTHX_ "%ld", (long)PL_curcop->cop_line));
79072805 3566 TERM(THING);
68dc0745 3567
3568 case KEY___PACKAGE__:
3569 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22
NIS
3570 (PL_curstash
3571 ? newSVsv(PL_curstname)
3572 : &PL_sv_undef));
79072805 3573 TERM(THING);
79072805 3574
e50aee73 3575 case KEY___DATA__:
79072805
LW
3576 case KEY___END__: {
3577 GV *gv;
79072805
LW
3578
3579 /*SUPPRESS 560*/
3280af22 3580 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
e50aee73 3581 char *pname = "main";
3280af22
NIS
3582 if (PL_tokenbuf[2] == 'D')
3583 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
cea2e8a9 3584 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
a5f75d66 3585 GvMULTI_on(gv);