This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Forgot from #8545. Oops.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
bc89e66f 3 * Copyright (c) 1991-2001, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
4e553d73 42/* In variables name $^X, these are the legal values for X.
2b92dfce
GS
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
79072805
LW
72#ifdef ff_next
73#undef ff_next
d48672a2
LW
74#endif
75
a1a0e61e 76#ifdef USE_PURE_BISON
dba4d153
JH
77# ifndef YYMAXLEVEL
78# define YYMAXLEVEL 100
79# endif
20141f0e
RI
80YYSTYPE* yylval_pointer[YYMAXLEVEL];
81int* yychar_pointer[YYMAXLEVEL];
6f202aea 82int yyactlevel = -1;
22c35a8c
GS
83# undef yylval
84# undef yychar
20141f0e
RI
85# define yylval (*yylval_pointer[yyactlevel])
86# define yychar (*yychar_pointer[yyactlevel])
87# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 88# undef yylex
dba4d153 89# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
90#endif
91
79072805 92#include "keywords.h"
fe14fcc3 93
ffb4593c
NT
94/* CLINE is a macro that ensures PL_copline has a sane value */
95
ae986130
LW
96#ifdef CLINE
97#undef CLINE
98#endif
57843af0 99#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 100
ffb4593c
NT
101/*
102 * Convenience functions to return different tokens and prime the
9cbb5ea2 103 * lexer for the next token. They all take an argument.
ffb4593c
NT
104 *
105 * TOKEN : generic token (used for '(', DOLSHARP, etc)
106 * OPERATOR : generic operator
107 * AOPERATOR : assignment operator
108 * PREBLOCK : beginning the block after an if, while, foreach, ...
109 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
110 * PREREF : *EXPR where EXPR is not a simple identifier
111 * TERM : expression term
112 * LOOPX : loop exiting command (goto, last, dump, etc)
113 * FTST : file test operator
114 * FUN0 : zero-argument function
2d2e263d 115 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
116 * BOop : bitwise or or xor
117 * BAop : bitwise and
118 * SHop : shift operator
119 * PWop : power operator
9cbb5ea2 120 * PMop : pattern-matching operator
ffb4593c
NT
121 * Aop : addition-level operator
122 * Mop : multiplication-level operator
123 * Eop : equality-testing operator
e5edeb50 124 * Rop : relational operator <= != gt
ffb4593c
NT
125 *
126 * Also see LOP and lop() below.
127 */
128
3280af22
NIS
129#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
130#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
131#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
132#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
133#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
134#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
135#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
136#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
137#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
138#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
139#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
140#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
141#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
142#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
143#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
144#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
145#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
146#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
147#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
148#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 149
a687059c
LW
150/* This bit of chicanery makes a unary function followed by
151 * a parenthesis into a function with one argument, highest precedence.
152 */
2f3197b3 153#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
154 PL_expect = XTERM, \
155 PL_bufptr = s, \
156 PL_last_uni = PL_oldbufptr, \
157 PL_last_lop_op = f, \
a687059c
LW
158 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
159
79072805 160#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
161 PL_bufptr = s, \
162 PL_last_uni = PL_oldbufptr, \
79072805
LW
163 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
164
9f68db38 165/* grandfather return to old style */
3280af22 166#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 167
ffb4593c
NT
168/*
169 * S_ao
170 *
171 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
172 * into an OP_ANDASSIGN or OP_ORASSIGN
173 */
174
76e3520e 175STATIC int
cea2e8a9 176S_ao(pTHX_ int toketype)
a0d0e21e 177{
3280af22
NIS
178 if (*PL_bufptr == '=') {
179 PL_bufptr++;
a0d0e21e
LW
180 if (toketype == ANDAND)
181 yylval.ival = OP_ANDASSIGN;
182 else if (toketype == OROR)
183 yylval.ival = OP_ORASSIGN;
184 toketype = ASSIGNOP;
185 }
186 return toketype;
187}
188
ffb4593c
NT
189/*
190 * S_no_op
191 * When Perl expects an operator and finds something else, no_op
192 * prints the warning. It always prints "<something> found where
193 * operator expected. It prints "Missing semicolon on previous line?"
194 * if the surprise occurs at the start of the line. "do you need to
195 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
196 * where the compiler doesn't know if foo is a method call or a function.
197 * It prints "Missing operator before end of line" if there's nothing
198 * after the missing operator, or "... before <...>" if there is something
199 * after the missing operator.
200 */
201
76e3520e 202STATIC void
cea2e8a9 203S_no_op(pTHX_ char *what, char *s)
463ee0b2 204{
3280af22
NIS
205 char *oldbp = PL_bufptr;
206 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 207
1189a94a
GS
208 if (!s)
209 s = oldbp;
07c798fb 210 else
1189a94a 211 PL_bufptr = s;
cea2e8a9 212 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 213 if (is_first)
cea2e8a9 214 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 215 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 216 char *t;
7e2040f0 217 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 218 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 219 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 220 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 221 }
07c798fb
HS
222 else {
223 assert(s >= oldbp);
cea2e8a9 224 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 225 }
3280af22 226 PL_bufptr = oldbp;
8990e307
LW
227}
228
ffb4593c
NT
229/*
230 * S_missingterm
231 * Complain about missing quote/regexp/heredoc terminator.
232 * If it's called with (char *)NULL then it cauterizes the line buffer.
233 * If we're in a delimited string and the delimiter is a control
234 * character, it's reformatted into a two-char sequence like ^C.
235 * This is fatal.
236 */
237
76e3520e 238STATIC void
cea2e8a9 239S_missingterm(pTHX_ char *s)
8990e307
LW
240{
241 char tmpbuf[3];
242 char q;
243 if (s) {
244 char *nl = strrchr(s,'\n');
d2719217 245 if (nl)
8990e307
LW
246 *nl = '\0';
247 }
9d116dd7
JH
248 else if (
249#ifdef EBCDIC
250 iscntrl(PL_multi_close)
251#else
252 PL_multi_close < 32 || PL_multi_close == 127
253#endif
254 ) {
8990e307 255 *tmpbuf = '^';
3280af22 256 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
257 s = "\\n";
258 tmpbuf[2] = '\0';
259 s = tmpbuf;
260 }
261 else {
3280af22 262 *tmpbuf = PL_multi_close;
8990e307
LW
263 tmpbuf[1] = '\0';
264 s = tmpbuf;
265 }
266 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 267 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 268}
79072805 269
ffb4593c
NT
270/*
271 * Perl_deprecate
ffb4593c
NT
272 */
273
79072805 274void
864dbfa3 275Perl_deprecate(pTHX_ char *s)
a0d0e21e 276{
599cee73 277 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 278 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
279}
280
ffb4593c
NT
281/*
282 * depcom
9cbb5ea2 283 * Deprecate a comma-less variable list.
ffb4593c
NT
284 */
285
76e3520e 286STATIC void
cea2e8a9 287S_depcom(pTHX)
a0d0e21e
LW
288{
289 deprecate("comma-less variable list");
290}
291
ffb4593c 292/*
9cbb5ea2
GS
293 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
294 * utf16-to-utf8-reversed.
ffb4593c
NT
295 */
296
c39cd008
GS
297#ifdef PERL_CR_FILTER
298static void
299strip_return(SV *sv)
300{
301 register char *s = SvPVX(sv);
302 register char *e = s + SvCUR(sv);
303 /* outer loop optimized to do nothing if there are no CR-LFs */
304 while (s < e) {
305 if (*s++ == '\r' && *s == '\n') {
306 /* hit a CR-LF, need to copy the rest */
307 register char *d = s - 1;
308 *d++ = *s++;
309 while (s < e) {
310 if (*s == '\r' && s[1] == '\n')
311 s++;
312 *d++ = *s++;
313 }
314 SvCUR(sv) -= s - d;
315 return;
316 }
317 }
318}
a868473f 319
76e3520e 320STATIC I32
c39cd008 321S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 322{
c39cd008
GS
323 I32 count = FILTER_READ(idx+1, sv, maxlen);
324 if (count > 0 && !maxlen)
325 strip_return(sv);
326 return count;
a868473f
NIS
327}
328#endif
329
ffb4593c
NT
330/*
331 * Perl_lex_start
9cbb5ea2
GS
332 * Initialize variables. Uses the Perl save_stack to save its state (for
333 * recursive calls to the parser).
ffb4593c
NT
334 */
335
a0d0e21e 336void
864dbfa3 337Perl_lex_start(pTHX_ SV *line)
79072805 338{
8990e307
LW
339 char *s;
340 STRLEN len;
341
3280af22
NIS
342 SAVEI32(PL_lex_dojoin);
343 SAVEI32(PL_lex_brackets);
3280af22
NIS
344 SAVEI32(PL_lex_casemods);
345 SAVEI32(PL_lex_starts);
346 SAVEI32(PL_lex_state);
7766f137 347 SAVEVPTR(PL_lex_inpat);
3280af22 348 SAVEI32(PL_lex_inwhat);
18b09519
GS
349 if (PL_lex_state == LEX_KNOWNEXT) {
350 I32 toke = PL_nexttoke;
351 while (--toke >= 0) {
352 SAVEI32(PL_nexttype[toke]);
353 SAVEVPTR(PL_nextval[toke]);
354 }
355 SAVEI32(PL_nexttoke);
18b09519 356 }
57843af0 357 SAVECOPLINE(PL_curcop);
3280af22
NIS
358 SAVEPPTR(PL_bufptr);
359 SAVEPPTR(PL_bufend);
360 SAVEPPTR(PL_oldbufptr);
361 SAVEPPTR(PL_oldoldbufptr);
362 SAVEPPTR(PL_linestart);
363 SAVESPTR(PL_linestr);
364 SAVEPPTR(PL_lex_brackstack);
365 SAVEPPTR(PL_lex_casestack);
c76ac1ee 366 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
367 SAVESPTR(PL_lex_stuff);
368 SAVEI32(PL_lex_defer);
09bef843 369 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 370 SAVESPTR(PL_lex_repl);
bebdddfc
GS
371 SAVEINT(PL_expect);
372 SAVEINT(PL_lex_expect);
3280af22
NIS
373
374 PL_lex_state = LEX_NORMAL;
375 PL_lex_defer = 0;
376 PL_expect = XSTATE;
377 PL_lex_brackets = 0;
3280af22
NIS
378 New(899, PL_lex_brackstack, 120, char);
379 New(899, PL_lex_casestack, 12, char);
380 SAVEFREEPV(PL_lex_brackstack);
381 SAVEFREEPV(PL_lex_casestack);
382 PL_lex_casemods = 0;
383 *PL_lex_casestack = '\0';
384 PL_lex_dojoin = 0;
385 PL_lex_starts = 0;
386 PL_lex_stuff = Nullsv;
387 PL_lex_repl = Nullsv;
388 PL_lex_inpat = 0;
76be56bc 389 PL_nexttoke = 0;
3280af22 390 PL_lex_inwhat = 0;
09bef843 391 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
392 PL_linestr = line;
393 if (SvREADONLY(PL_linestr))
394 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
395 s = SvPV(PL_linestr, len);
8990e307 396 if (len && s[len-1] != ';') {
3280af22
NIS
397 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
398 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
399 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 400 }
3280af22
NIS
401 SvTEMP_off(PL_linestr);
402 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
403 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
404 SvREFCNT_dec(PL_rs);
79cb57f6 405 PL_rs = newSVpvn("\n", 1);
3280af22 406 PL_rsfp = 0;
79072805 407}
a687059c 408
ffb4593c
NT
409/*
410 * Perl_lex_end
9cbb5ea2
GS
411 * Finalizer for lexing operations. Must be called when the parser is
412 * done with the lexer.
ffb4593c
NT
413 */
414
463ee0b2 415void
864dbfa3 416Perl_lex_end(pTHX)
463ee0b2 417{
3280af22 418 PL_doextract = FALSE;
463ee0b2
LW
419}
420
ffb4593c
NT
421/*
422 * S_incline
423 * This subroutine has nothing to do with tilting, whether at windmills
424 * or pinball tables. Its name is short for "increment line". It
57843af0 425 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 426 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
427 * # line 500 "foo.pm"
428 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
429 */
430
76e3520e 431STATIC void
cea2e8a9 432S_incline(pTHX_ char *s)
463ee0b2
LW
433{
434 char *t;
435 char *n;
73659bf1 436 char *e;
463ee0b2 437 char ch;
463ee0b2 438
57843af0 439 CopLINE_inc(PL_curcop);
463ee0b2
LW
440 if (*s++ != '#')
441 return;
bf4acbe4 442 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
443 if (strnEQ(s, "line", 4))
444 s += 4;
445 else
446 return;
447 if (*s == ' ' || *s == '\t')
448 s++;
4e553d73 449 else
73659bf1 450 return;
bf4acbe4 451 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
452 if (!isDIGIT(*s))
453 return;
454 n = s;
455 while (isDIGIT(*s))
456 s++;
bf4acbe4 457 while (SPACE_OR_TAB(*s))
463ee0b2 458 s++;
73659bf1 459 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 460 s++;
73659bf1
GS
461 e = t + 1;
462 }
463ee0b2 463 else {
463ee0b2 464 for (t = s; !isSPACE(*t); t++) ;
73659bf1 465 e = t;
463ee0b2 466 }
bf4acbe4 467 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
468 e++;
469 if (*e != '\n' && *e != '\0')
470 return; /* false alarm */
471
463ee0b2
LW
472 ch = *t;
473 *t = '\0';
f4dd75d9
GS
474 if (t - s > 0) {
475#ifdef USE_ITHREADS
476 Safefree(CopFILE(PL_curcop));
477#else
478 SvREFCNT_dec(CopFILEGV(PL_curcop));
479#endif
57843af0 480 CopFILE_set(PL_curcop, s);
f4dd75d9 481 }
463ee0b2 482 *t = ch;
57843af0 483 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
484}
485
ffb4593c
NT
486/*
487 * S_skipspace
488 * Called to gobble the appropriate amount and type of whitespace.
489 * Skips comments as well.
490 */
491
76e3520e 492STATIC char *
cea2e8a9 493S_skipspace(pTHX_ register char *s)
a687059c 494{
3280af22 495 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 496 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
497 s++;
498 return s;
499 }
500 for (;;) {
fd049845 501 STRLEN prevlen;
09bef843
SB
502 SSize_t oldprevlen, oldoldprevlen;
503 SSize_t oldloplen, oldunilen;
60e6418e
GS
504 while (s < PL_bufend && isSPACE(*s)) {
505 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
506 incline(s);
507 }
ffb4593c
NT
508
509 /* comment */
3280af22
NIS
510 if (s < PL_bufend && *s == '#') {
511 while (s < PL_bufend && *s != '\n')
463ee0b2 512 s++;
60e6418e 513 if (s < PL_bufend) {
463ee0b2 514 s++;
60e6418e
GS
515 if (PL_in_eval && !PL_rsfp) {
516 incline(s);
517 continue;
518 }
519 }
463ee0b2 520 }
ffb4593c
NT
521
522 /* only continue to recharge the buffer if we're at the end
523 * of the buffer, we're not reading from a source filter, and
524 * we're in normal lexing mode
525 */
09bef843
SB
526 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
527 PL_lex_state == LEX_FORMLINE)
463ee0b2 528 return s;
ffb4593c
NT
529
530 /* try to recharge the buffer */
9cbb5ea2
GS
531 if ((s = filter_gets(PL_linestr, PL_rsfp,
532 (prevlen = SvCUR(PL_linestr)))) == Nullch)
533 {
534 /* end of file. Add on the -p or -n magic */
3280af22
NIS
535 if (PL_minus_n || PL_minus_p) {
536 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
537 ";}continue{print or die qq(-p destination: $!\\n)" :
538 "");
3280af22
NIS
539 sv_catpv(PL_linestr,";}");
540 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
541 }
542 else
3280af22 543 sv_setpv(PL_linestr,";");
ffb4593c
NT
544
545 /* reset variables for next time we lex */
9cbb5ea2
GS
546 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
547 = SvPVX(PL_linestr);
3280af22 548 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
549
550 /* Close the filehandle. Could be from -P preprocessor,
551 * STDIN, or a regular file. If we were reading code from
552 * STDIN (because the commandline held no -e or filename)
553 * then we don't close it, we reset it so the code can
554 * read from STDIN too.
555 */
556
3280af22
NIS
557 if (PL_preprocess && !PL_in_eval)
558 (void)PerlProc_pclose(PL_rsfp);
559 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
560 PerlIO_clearerr(PL_rsfp);
8990e307 561 else
3280af22
NIS
562 (void)PerlIO_close(PL_rsfp);
563 PL_rsfp = Nullfp;
463ee0b2
LW
564 return s;
565 }
ffb4593c
NT
566
567 /* not at end of file, so we only read another line */
09bef843
SB
568 /* make corresponding updates to old pointers, for yyerror() */
569 oldprevlen = PL_oldbufptr - PL_bufend;
570 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
571 if (PL_last_uni)
572 oldunilen = PL_last_uni - PL_bufend;
573 if (PL_last_lop)
574 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
575 PL_linestart = PL_bufptr = s + prevlen;
576 PL_bufend = s + SvCUR(PL_linestr);
577 s = PL_bufptr;
09bef843
SB
578 PL_oldbufptr = s + oldprevlen;
579 PL_oldoldbufptr = s + oldoldprevlen;
580 if (PL_last_uni)
581 PL_last_uni = s + oldunilen;
582 if (PL_last_lop)
583 PL_last_lop = s + oldloplen;
a0d0e21e 584 incline(s);
ffb4593c
NT
585
586 /* debugger active and we're not compiling the debugger code,
587 * so store the line into the debugger's array of lines
588 */
3280af22 589 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
590 SV *sv = NEWSV(85,0);
591
592 sv_upgrade(sv, SVt_PVMG);
3280af22 593 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 594 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 595 }
463ee0b2 596 }
a687059c 597}
378cc40b 598
ffb4593c
NT
599/*
600 * S_check_uni
601 * Check the unary operators to ensure there's no ambiguity in how they're
602 * used. An ambiguous piece of code would be:
603 * rand + 5
604 * This doesn't mean rand() + 5. Because rand() is a unary operator,
605 * the +5 is its argument.
606 */
607
76e3520e 608STATIC void
cea2e8a9 609S_check_uni(pTHX)
ba106d47 610{
2f3197b3 611 char *s;
a0d0e21e 612 char *t;
2f3197b3 613
3280af22 614 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 615 return;
3280af22
NIS
616 while (isSPACE(*PL_last_uni))
617 PL_last_uni++;
7e2040f0 618 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 619 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 620 return;
0453d815 621 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 622 char ch = *s;
0453d815 623 *s = '\0';
4e553d73
NIS
624 Perl_warner(aTHX_ WARN_AMBIGUOUS,
625 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
626 PL_last_uni);
627 *s = ch;
628 }
2f3197b3
LW
629}
630
ffb4593c
NT
631/* workaround to replace the UNI() macro with a function. Only the
632 * hints/uts.sh file mentions this. Other comments elsewhere in the
633 * source indicate Microport Unix might need it too.
634 */
635
ffed7fef
LW
636#ifdef CRIPPLED_CC
637
638#undef UNI
ffed7fef 639#define UNI(f) return uni(f,s)
ffed7fef 640
76e3520e 641STATIC int
cea2e8a9 642S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
643{
644 yylval.ival = f;
3280af22
NIS
645 PL_expect = XTERM;
646 PL_bufptr = s;
8f872242
NIS
647 PL_last_uni = PL_oldbufptr;
648 PL_last_lop_op = f;
ffed7fef
LW
649 if (*s == '(')
650 return FUNC1;
651 s = skipspace(s);
652 if (*s == '(')
653 return FUNC1;
654 else
655 return UNIOP;
656}
657
a0d0e21e
LW
658#endif /* CRIPPLED_CC */
659
ffb4593c
NT
660/*
661 * LOP : macro to build a list operator. Its behaviour has been replaced
662 * with a subroutine, S_lop() for which LOP is just another name.
663 */
664
a0d0e21e
LW
665#define LOP(f,x) return lop(f,x,s)
666
ffb4593c
NT
667/*
668 * S_lop
669 * Build a list operator (or something that might be one). The rules:
670 * - if we have a next token, then it's a list operator [why?]
671 * - if the next thing is an opening paren, then it's a function
672 * - else it's a list operator
673 */
674
76e3520e 675STATIC I32
a0be28da 676S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 677{
79072805 678 yylval.ival = f;
35c8bce7 679 CLINE;
3280af22
NIS
680 PL_expect = x;
681 PL_bufptr = s;
682 PL_last_lop = PL_oldbufptr;
683 PL_last_lop_op = f;
684 if (PL_nexttoke)
a0d0e21e 685 return LSTOP;
79072805
LW
686 if (*s == '(')
687 return FUNC;
688 s = skipspace(s);
689 if (*s == '(')
690 return FUNC;
691 else
692 return LSTOP;
693}
694
ffb4593c
NT
695/*
696 * S_force_next
9cbb5ea2 697 * When the lexer realizes it knows the next token (for instance,
ffb4593c 698 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
699 * to know what token to return the next time the lexer is called. Caller
700 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
701 * handles the token correctly.
ffb4593c
NT
702 */
703
4e553d73 704STATIC void
cea2e8a9 705S_force_next(pTHX_ I32 type)
79072805 706{
3280af22
NIS
707 PL_nexttype[PL_nexttoke] = type;
708 PL_nexttoke++;
709 if (PL_lex_state != LEX_KNOWNEXT) {
710 PL_lex_defer = PL_lex_state;
711 PL_lex_expect = PL_expect;
712 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
713 }
714}
715
ffb4593c
NT
716/*
717 * S_force_word
718 * When the lexer knows the next thing is a word (for instance, it has
719 * just seen -> and it knows that the next char is a word char, then
720 * it calls S_force_word to stick the next word into the PL_next lookahead.
721 *
722 * Arguments:
b1b65b59 723 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
724 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
725 * int check_keyword : if true, Perl checks to make sure the word isn't
726 * a keyword (do this if the word is a label, e.g. goto FOO)
727 * int allow_pack : if true, : characters will also be allowed (require,
728 * use, etc. do this)
9cbb5ea2 729 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
730 */
731
76e3520e 732STATIC char *
cea2e8a9 733S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 734{
463ee0b2
LW
735 register char *s;
736 STRLEN len;
4e553d73 737
463ee0b2
LW
738 start = skipspace(start);
739 s = start;
7e2040f0 740 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 741 (allow_pack && *s == ':') ||
15f0808c 742 (allow_initial_tick && *s == '\'') )
a0d0e21e 743 {
3280af22
NIS
744 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
745 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
746 return start;
747 if (token == METHOD) {
748 s = skipspace(s);
749 if (*s == '(')
3280af22 750 PL_expect = XTERM;
463ee0b2 751 else {
3280af22 752 PL_expect = XOPERATOR;
463ee0b2 753 }
79072805 754 }
3280af22
NIS
755 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
756 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
757 force_next(token);
758 }
759 return s;
760}
761
ffb4593c
NT
762/*
763 * S_force_ident
9cbb5ea2 764 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
765 * text only contains the "foo" portion. The first argument is a pointer
766 * to the "foo", and the second argument is the type symbol to prefix.
767 * Forces the next token to be a "WORD".
9cbb5ea2 768 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
769 */
770
76e3520e 771STATIC void
cea2e8a9 772S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
773{
774 if (s && *s) {
11343788 775 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 776 PL_nextval[PL_nexttoke].opval = o;
79072805 777 force_next(WORD);
748a9306 778 if (kind) {
11343788 779 o->op_private = OPpCONST_ENTERED;
55497cff
PP
780 /* XXX see note in pp_entereval() for why we forgo typo
781 warnings if the symbol must be introduced in an eval.
782 GSAR 96-10-12 */
3280af22 783 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
784 kind == '$' ? SVt_PV :
785 kind == '@' ? SVt_PVAV :
786 kind == '%' ? SVt_PVHV :
787 SVt_PVGV
788 );
748a9306 789 }
79072805
LW
790 }
791}
792
1571675a
GS
793NV
794Perl_str_to_version(pTHX_ SV *sv)
795{
796 NV retval = 0.0;
797 NV nshift = 1.0;
798 STRLEN len;
799 char *start = SvPVx(sv,len);
3aa33fe5 800 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
801 char *end = start + len;
802 while (start < end) {
ba210ebe 803 STRLEN skip;
1571675a
GS
804 UV n;
805 if (utf)
dcad2880 806 n = utf8_to_uv((U8*)start, len, &skip, 0);
1571675a
GS
807 else {
808 n = *(U8*)start;
809 skip = 1;
810 }
811 retval += ((NV)n)/nshift;
812 start += skip;
813 nshift *= 1000;
814 }
815 return retval;
816}
817
4e553d73 818/*
ffb4593c
NT
819 * S_force_version
820 * Forces the next token to be a version number.
821 */
822
76e3520e 823STATIC char *
cea2e8a9 824S_force_version(pTHX_ char *s)
89bfa8cd
PP
825{
826 OP *version = Nullop;
44dcb63b 827 char *d;
89bfa8cd
PP
828
829 s = skipspace(s);
830
44dcb63b 831 d = s;
dd629d5b 832 if (*d == 'v')
44dcb63b 833 d++;
44dcb63b 834 if (isDIGIT(*d)) {
a7cb1f99 835 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 836 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 837 SV *ver;
b73d6f50 838 s = scan_num(s, &yylval);
89bfa8cd 839 version = yylval.opval;
dd629d5b
GS
840 ver = cSVOPx(version)->op_sv;
841 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 842 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
843 SvNVX(ver) = str_to_version(ver);
844 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 845 }
89bfa8cd
PP
846 }
847 }
848
849 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 850 PL_nextval[PL_nexttoke].opval = version;
4e553d73 851 force_next(WORD);
89bfa8cd
PP
852
853 return (s);
854}
855
ffb4593c
NT
856/*
857 * S_tokeq
858 * Tokenize a quoted string passed in as an SV. It finds the next
859 * chunk, up to end of string or a backslash. It may make a new
860 * SV containing that chunk (if HINT_NEW_STRING is on). It also
861 * turns \\ into \.
862 */
863
76e3520e 864STATIC SV *
cea2e8a9 865S_tokeq(pTHX_ SV *sv)
79072805
LW
866{
867 register char *s;
868 register char *send;
869 register char *d;
b3ac6de7
IZ
870 STRLEN len = 0;
871 SV *pv = sv;
79072805
LW
872
873 if (!SvLEN(sv))
b3ac6de7 874 goto finish;
79072805 875
a0d0e21e 876 s = SvPV_force(sv, len);
21a311ee 877 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 878 goto finish;
463ee0b2 879 send = s + len;
79072805
LW
880 while (s < send && *s != '\\')
881 s++;
882 if (s == send)
b3ac6de7 883 goto finish;
79072805 884 d = s;
3280af22 885 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 886 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
887 while (s < send) {
888 if (*s == '\\') {
a0d0e21e 889 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
890 s++; /* all that, just for this */
891 }
892 *d++ = *s++;
893 }
894 *d = '\0';
463ee0b2 895 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 896 finish:
3280af22 897 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 898 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
899 return sv;
900}
901
ffb4593c
NT
902/*
903 * Now come three functions related to double-quote context,
904 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
905 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
906 * interact with PL_lex_state, and create fake ( ... ) argument lists
907 * to handle functions and concatenation.
908 * They assume that whoever calls them will be setting up a fake
909 * join call, because each subthing puts a ',' after it. This lets
910 * "lower \luPpEr"
911 * become
912 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
913 *
914 * (I'm not sure whether the spurious commas at the end of lcfirst's
915 * arguments and join's arguments are created or not).
916 */
917
918/*
919 * S_sublex_start
920 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
921 *
922 * Pattern matching will set PL_lex_op to the pattern-matching op to
923 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
924 *
925 * OP_CONST and OP_READLINE are easy--just make the new op and return.
926 *
927 * Everything else becomes a FUNC.
928 *
929 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
930 * had an OP_CONST or OP_READLINE). This just sets us up for a
931 * call to S_sublex_push().
932 */
933
76e3520e 934STATIC I32
cea2e8a9 935S_sublex_start(pTHX)
79072805
LW
936{
937 register I32 op_type = yylval.ival;
79072805
LW
938
939 if (op_type == OP_NULL) {
3280af22
NIS
940 yylval.opval = PL_lex_op;
941 PL_lex_op = Nullop;
79072805
LW
942 return THING;
943 }
944 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 945 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
946
947 if (SvTYPE(sv) == SVt_PVIV) {
948 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
949 STRLEN len;
950 char *p;
951 SV *nsv;
952
953 p = SvPV(sv, len);
79cb57f6 954 nsv = newSVpvn(p, len);
01ec43d0
GS
955 if (SvUTF8(sv))
956 SvUTF8_on(nsv);
b3ac6de7
IZ
957 SvREFCNT_dec(sv);
958 sv = nsv;
4e553d73 959 }
b3ac6de7 960 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 961 PL_lex_stuff = Nullsv;
79072805
LW
962 return THING;
963 }
964
3280af22
NIS
965 PL_sublex_info.super_state = PL_lex_state;
966 PL_sublex_info.sub_inwhat = op_type;
967 PL_sublex_info.sub_op = PL_lex_op;
968 PL_lex_state = LEX_INTERPPUSH;
55497cff 969
3280af22
NIS
970 PL_expect = XTERM;
971 if (PL_lex_op) {
972 yylval.opval = PL_lex_op;
973 PL_lex_op = Nullop;
55497cff
PP
974 return PMFUNC;
975 }
976 else
977 return FUNC;
978}
979
ffb4593c
NT
980/*
981 * S_sublex_push
982 * Create a new scope to save the lexing state. The scope will be
983 * ended in S_sublex_done. Returns a '(', starting the function arguments
984 * to the uc, lc, etc. found before.
985 * Sets PL_lex_state to LEX_INTERPCONCAT.
986 */
987
76e3520e 988STATIC I32
cea2e8a9 989S_sublex_push(pTHX)
55497cff 990{
f46d017c 991 ENTER;
55497cff 992
3280af22
NIS
993 PL_lex_state = PL_sublex_info.super_state;
994 SAVEI32(PL_lex_dojoin);
995 SAVEI32(PL_lex_brackets);
3280af22
NIS
996 SAVEI32(PL_lex_casemods);
997 SAVEI32(PL_lex_starts);
998 SAVEI32(PL_lex_state);
7766f137 999 SAVEVPTR(PL_lex_inpat);
3280af22 1000 SAVEI32(PL_lex_inwhat);
57843af0 1001 SAVECOPLINE(PL_curcop);
3280af22
NIS
1002 SAVEPPTR(PL_bufptr);
1003 SAVEPPTR(PL_oldbufptr);
1004 SAVEPPTR(PL_oldoldbufptr);
1005 SAVEPPTR(PL_linestart);
1006 SAVESPTR(PL_linestr);
1007 SAVEPPTR(PL_lex_brackstack);
1008 SAVEPPTR(PL_lex_casestack);
1009
1010 PL_linestr = PL_lex_stuff;
1011 PL_lex_stuff = Nullsv;
1012
9cbb5ea2
GS
1013 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1014 = SvPVX(PL_linestr);
3280af22
NIS
1015 PL_bufend += SvCUR(PL_linestr);
1016 SAVEFREESV(PL_linestr);
1017
1018 PL_lex_dojoin = FALSE;
1019 PL_lex_brackets = 0;
3280af22
NIS
1020 New(899, PL_lex_brackstack, 120, char);
1021 New(899, PL_lex_casestack, 12, char);
1022 SAVEFREEPV(PL_lex_brackstack);
1023 SAVEFREEPV(PL_lex_casestack);
1024 PL_lex_casemods = 0;
1025 *PL_lex_casestack = '\0';
1026 PL_lex_starts = 0;
1027 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1028 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1029
1030 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1031 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1032 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1033 else
3280af22 1034 PL_lex_inpat = Nullop;
79072805 1035
55497cff 1036 return '(';
79072805
LW
1037}
1038
ffb4593c
NT
1039/*
1040 * S_sublex_done
1041 * Restores lexer state after a S_sublex_push.
1042 */
1043
76e3520e 1044STATIC I32
cea2e8a9 1045S_sublex_done(pTHX)
79072805 1046{
3280af22 1047 if (!PL_lex_starts++) {
9aa983d2
JH
1048 SV *sv = newSVpvn("",0);
1049 if (SvUTF8(PL_linestr))
1050 SvUTF8_on(sv);
3280af22 1051 PL_expect = XOPERATOR;
9aa983d2 1052 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1053 return THING;
1054 }
1055
3280af22
NIS
1056 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1057 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1058 return yylex();
79072805
LW
1059 }
1060
ffb4593c 1061 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1062 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1063 PL_linestr = PL_lex_repl;
1064 PL_lex_inpat = 0;
1065 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1066 PL_bufend += SvCUR(PL_linestr);
1067 SAVEFREESV(PL_linestr);
1068 PL_lex_dojoin = FALSE;
1069 PL_lex_brackets = 0;
3280af22
NIS
1070 PL_lex_casemods = 0;
1071 *PL_lex_casestack = '\0';
1072 PL_lex_starts = 0;
25da4f38 1073 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1074 PL_lex_state = LEX_INTERPNORMAL;
1075 PL_lex_starts++;
e9fa98b2
HS
1076 /* we don't clear PL_lex_repl here, so that we can check later
1077 whether this is an evalled subst; that means we rely on the
1078 logic to ensure sublex_done() is called again only via the
1079 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1080 }
e9fa98b2 1081 else {
3280af22 1082 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1083 PL_lex_repl = Nullsv;
1084 }
79072805 1085 return ',';
ffed7fef
LW
1086 }
1087 else {
f46d017c 1088 LEAVE;
3280af22
NIS
1089 PL_bufend = SvPVX(PL_linestr);
1090 PL_bufend += SvCUR(PL_linestr);
1091 PL_expect = XOPERATOR;
09bef843 1092 PL_sublex_info.sub_inwhat = 0;
79072805 1093 return ')';
ffed7fef
LW
1094 }
1095}
1096
02aa26ce
NT
1097/*
1098 scan_const
1099
1100 Extracts a pattern, double-quoted string, or transliteration. This
1101 is terrifying code.
1102
3280af22
NIS
1103 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1104 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1105 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1106
9b599b2a
GS
1107 Returns a pointer to the character scanned up to. Iff this is
1108 advanced from the start pointer supplied (ie if anything was
1109 successfully parsed), will leave an OP for the substring scanned
1110 in yylval. Caller must intuit reason for not parsing further
1111 by looking at the next characters herself.
1112
02aa26ce
NT
1113 In patterns:
1114 backslashes:
1115 double-quoted style: \r and \n
1116 regexp special ones: \D \s
1117 constants: \x3
1118 backrefs: \1 (deprecated in substitution replacements)
1119 case and quoting: \U \Q \E
1120 stops on @ and $, but not for $ as tail anchor
1121
1122 In transliterations:
1123 characters are VERY literal, except for - not at the start or end
1124 of the string, which indicates a range. scan_const expands the
1125 range to the full set of intermediate characters.
1126
1127 In double-quoted strings:
1128 backslashes:
1129 double-quoted style: \r and \n
1130 constants: \x3
1131 backrefs: \1 (deprecated)
1132 case and quoting: \U \Q \E
1133 stops on @ and $
1134
1135 scan_const does *not* construct ops to handle interpolated strings.
1136 It stops processing as soon as it finds an embedded $ or @ variable
1137 and leaves it to the caller to work out what's going on.
1138
1139 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1140
1141 $ in pattern could be $foo or could be tail anchor. Assumption:
1142 it's a tail anchor if $ is the last thing in the string, or if it's
1143 followed by one of ")| \n\t"
1144
1145 \1 (backreferences) are turned into $1
1146
1147 The structure of the code is
1148 while (there's a character to process) {
1149 handle transliteration ranges
1150 skip regexp comments
1151 skip # initiated comments in //x patterns
1152 check for embedded @foo
1153 check for embedded scalars
1154 if (backslash) {
1155 leave intact backslashes from leave (below)
1156 deprecate \1 in strings and sub replacements
1157 handle string-changing backslashes \l \U \Q \E, etc.
1158 switch (what was escaped) {
1159 handle - in a transliteration (becomes a literal -)
1160 handle \132 octal characters
1161 handle 0x15 hex characters
1162 handle \cV (control V)
1163 handle printf backslashes (\f, \r, \n, etc)
1164 } (end switch)
1165 } (end if backslash)
1166 } (end while character to read)
4e553d73 1167
02aa26ce
NT
1168*/
1169
76e3520e 1170STATIC char *
cea2e8a9 1171S_scan_const(pTHX_ char *start)
79072805 1172{
3280af22 1173 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1174 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1175 register char *s = start; /* start of the constant */
1176 register char *d = SvPVX(sv); /* destination for copies */
1177 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1178 bool didrange = FALSE; /* did we just finish a range? */
9aa983d2
JH
1179 bool has_utf8 = (PL_linestr && SvUTF8(PL_linestr));
1180 /* the constant is UTF8 */
012bcf8d
GS
1181 UV uv;
1182
ac2262e3 1183 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1184 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1185 : UTF;
89491803 1186 I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1187 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1188 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1189 : UTF;
dff6d3cd 1190 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1191 PL_lex_inpat
4a2d328f 1192 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1193 : "";
79072805
LW
1194
1195 while (s < send || dorange) {
02aa26ce 1196 /* get transliterations out of the way (they're most literal) */
3280af22 1197 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1198 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1199 if (dorange) {
02aa26ce 1200 I32 i; /* current expanded character */
8ada0baa 1201 I32 min; /* first character in range */
02aa26ce
NT
1202 I32 max; /* last character in range */
1203
1204 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1205 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1206 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1207 d -= 2; /* eat the first char and the - */
1208
8ada0baa
JH
1209 min = (U8)*d; /* first char in range */
1210 max = (U8)d[1]; /* last char in range */
1211
c2e66d9e 1212 if (min > max) {
01ec43d0
GS
1213 Perl_croak(aTHX_
1214 "Invalid [] range \"%c-%c\" in transliteration operator",
d2560b70 1215 (char)min, (char)max);
c2e66d9e
GS
1216 }
1217
8ada0baa
JH
1218#ifndef ASCIIish
1219 if ((isLOWER(min) && isLOWER(max)) ||
1220 (isUPPER(min) && isUPPER(max))) {
1221 if (isLOWER(min)) {
1222 for (i = min; i <= max; i++)
1223 if (isLOWER(i))
1224 *d++ = i;
1225 } else {
1226 for (i = min; i <= max; i++)
1227 if (isUPPER(i))
1228 *d++ = i;
1229 }
1230 }
1231 else
1232#endif
1233 for (i = min; i <= max; i++)
1234 *d++ = i;
02aa26ce
NT
1235
1236 /* mark the range as done, and continue */
79072805 1237 dorange = FALSE;
01ec43d0 1238 didrange = TRUE;
79072805 1239 continue;
4e553d73 1240 }
02aa26ce
NT
1241
1242 /* range begins (ignore - as first or last char) */
79072805 1243 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1244 if (didrange) {
1fafa243 1245 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1246 }
a0ed51b3 1247 if (utf) {
a176fa2a 1248 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1249 s++;
1250 continue;
1251 }
79072805
LW
1252 dorange = TRUE;
1253 s++;
01ec43d0
GS
1254 }
1255 else {
1256 didrange = FALSE;
1257 }
79072805 1258 }
02aa26ce
NT
1259
1260 /* if we get here, we're not doing a transliteration */
1261
0f5d15d6
IZ
1262 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1263 except for the last char, which will be done separately. */
3280af22 1264 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1265 if (s[2] == '#') {
1266 while (s < send && *s != ')')
1267 *d++ = *s++;
155aba94
GS
1268 }
1269 else if (s[2] == '{' /* This should match regcomp.c */
1270 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1271 {
cc6b7395 1272 I32 count = 1;
0f5d15d6 1273 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1274 char c;
1275
d9f97599
GS
1276 while (count && (c = *regparse)) {
1277 if (c == '\\' && regparse[1])
1278 regparse++;
4e553d73 1279 else if (c == '{')
cc6b7395 1280 count++;
4e553d73 1281 else if (c == '}')
cc6b7395 1282 count--;
d9f97599 1283 regparse++;
cc6b7395 1284 }
5bdf89e7
IZ
1285 if (*regparse != ')') {
1286 regparse--; /* Leave one char for continuation. */
cc6b7395 1287 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1288 }
0f5d15d6 1289 while (s < regparse)
cc6b7395
IZ
1290 *d++ = *s++;
1291 }
748a9306 1292 }
02aa26ce
NT
1293
1294 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1295 else if (*s == '#' && PL_lex_inpat &&
1296 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1297 while (s+1 < send && *s != '\n')
1298 *d++ = *s++;
1299 }
02aa26ce 1300
5d1d4326
JH
1301 /* check for embedded arrays
1302 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1303 */
7e2040f0 1304 else if (*s == '@' && s[1]
5d1d4326 1305 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1306 break;
02aa26ce
NT
1307
1308 /* check for embedded scalars. only stop if we're sure it's a
1309 variable.
1310 */
79072805 1311 else if (*s == '$') {
3280af22 1312 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1313 break;
c277df42 1314 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1315 break; /* in regexp, $ might be tail anchor */
1316 }
02aa26ce
NT
1317
1318 /* backslashes */
79072805
LW
1319 if (*s == '\\' && s+1 < send) {
1320 s++;
02aa26ce
NT
1321
1322 /* some backslashes we leave behind */
c9f97d15 1323 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1324 *d++ = '\\';
1325 *d++ = *s++;
1326 continue;
1327 }
02aa26ce
NT
1328
1329 /* deprecate \1 in strings and substitution replacements */
3280af22 1330 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1331 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1332 {
599cee73 1333 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1334 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1335 *--s = '$';
1336 break;
1337 }
02aa26ce
NT
1338
1339 /* string-change backslash escapes */
3280af22 1340 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1341 --s;
1342 break;
1343 }
02aa26ce
NT
1344
1345 /* if we get here, it's either a quoted -, or a digit */
79072805 1346 switch (*s) {
02aa26ce
NT
1347
1348 /* quoted - in transliterations */
79072805 1349 case '-':
3280af22 1350 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1351 *d++ = *s++;
1352 continue;
1353 }
1354 /* FALL THROUGH */
1355 default:
11b8faa4 1356 {
7e84c16c 1357 if (ckWARN(WARN_MISC) && isALNUM(*s))
4e553d73 1358 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1359 "Unrecognized escape \\%c passed through",
1360 *s);
1361 /* default action is to copy the quoted character */
1362 *d++ = *s++;
1363 continue;
1364 }
02aa26ce
NT
1365
1366 /* \132 indicates an octal constant */
79072805
LW
1367 case '0': case '1': case '2': case '3':
1368 case '4': case '5': case '6': case '7':
ba210ebe
JH
1369 {
1370 STRLEN len = 0; /* disallow underscores */
1371 uv = (UV)scan_oct(s, 3, &len);
1372 s += len;
1373 }
012bcf8d 1374 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1375
1376 /* \x24 indicates a hex constant */
79072805 1377 case 'x':
a0ed51b3
LW
1378 ++s;
1379 if (*s == '{') {
1380 char* e = strchr(s, '}');
adaeee49 1381 if (!e) {
a0ed51b3 1382 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1383 e = s;
1384 }
89491803 1385 else {
ba210ebe
JH
1386 STRLEN len = 1; /* allow underscores */
1387 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1388 }
1389 s = e + 1;
a0ed51b3
LW
1390 }
1391 else {
ba210ebe
JH
1392 {
1393 STRLEN len = 0; /* disallow underscores */
1394 uv = (UV)scan_hex(s, 2, &len);
1395 s += len;
1396 }
012bcf8d
GS
1397 }
1398
1399 NUM_ESCAPE_INSERT:
1400 /* Insert oct or hex escaped character.
301d3d20
JH
1401 * There will always enough room in sv since such
1402 * escapes will be longer than any UT-F8 sequence
1403 * they can end up as. */
ba7cea30
JH
1404
1405 /* This spot is wrong for EBCDIC. Characters like
1406 * the lowercase letters and digits are >127 in EBCDIC,
1407 * so here they would need to be mapped to the Unicode
1408 * repertoire. --jhi */
1409
7948272d 1410 if (uv > 127) {
9aa983d2 1411 if (!has_utf8 && uv > 255) {
301d3d20
JH
1412 /* Might need to recode whatever we have
1413 * accumulated so far if it contains any
1414 * hibit chars.
1415 *
1416 * (Can't we keep track of that and avoid
1417 * this rescan? --jhi)
012bcf8d
GS
1418 */
1419 int hicount = 0;
1420 char *c;
301d3d20 1421
012bcf8d 1422 for (c = SvPVX(sv); c < d; c++) {
fd400ab9 1423 if (UTF8_IS_CONTINUED(*c))
012bcf8d
GS
1424 hicount++;
1425 }
1426 if (hicount) {
1427 char *old_pvx = SvPVX(sv);
1428 char *src, *dst;
301d3d20
JH
1429
1430 d = SvGROW(sv,
1431 SvCUR(sv) + hicount + 1) +
1432 (d - old_pvx);
012bcf8d
GS
1433
1434 src = d - 1;
1435 d += hicount;
1436 dst = d - 1;
1437
1438 while (src < dst) {
fd400ab9 1439 if (UTF8_IS_CONTINUED(*src)) {
9b877dbb
IH
1440 *dst-- = UTF8_EIGHT_BIT_LO(*src);
1441 *dst-- = UTF8_EIGHT_BIT_HI(*src--);
012bcf8d
GS
1442 }
1443 else {
1444 *dst-- = *src--;
1445 }
1446 }
1447 }
1448 }
1449
9aa983d2 1450 if (has_utf8 || uv > 255) {
012bcf8d 1451 d = (char*)uv_to_utf8((U8*)d, uv);
4e553d73 1452 has_utf8 = TRUE;
012bcf8d 1453 }
a0ed51b3 1454 else {
012bcf8d 1455 *d++ = (char)uv;
a0ed51b3 1456 }
012bcf8d
GS
1457 }
1458 else {
1459 *d++ = (char)uv;
a0ed51b3 1460 }
79072805 1461 continue;
02aa26ce 1462
4a2d328f
IZ
1463 /* \N{latin small letter a} is a named character */
1464 case 'N':
423cee85
JH
1465 ++s;
1466 if (*s == '{') {
1467 char* e = strchr(s, '}');
155aba94 1468 SV *res;
423cee85
JH
1469 STRLEN len;
1470 char *str;
4e553d73 1471
423cee85 1472 if (!e) {
5777a3f7 1473 yyerror("Missing right brace on \\N{}");
423cee85
JH
1474 e = s - 1;
1475 goto cont_scan;
1476 }
1477 res = newSVpvn(s + 1, e - s - 1);
4e553d73 1478 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1479 res, Nullsv, "\\N{...}" );
423cee85 1480 str = SvPV(res,len);
89491803 1481 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1482 char *ostart = SvPVX(sv);
1483 SvCUR_set(sv, d - ostart);
1484 SvPOK_on(sv);
e4f3eed8 1485 *d = '\0';
f08d6ad9 1486 sv_utf8_upgrade(sv);
d2f449dd
SB
1487 /* this just broke our allocation above... */
1488 SvGROW(sv, send - start);
f08d6ad9 1489 d = SvPVX(sv) + SvCUR(sv);
89491803 1490 has_utf8 = TRUE;
f08d6ad9 1491 }
423cee85
JH
1492 if (len > e - s + 4) {
1493 char *odest = SvPVX(sv);
1494
1495 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1496 d = SvPVX(sv) + (d - odest);
1497 }
1498 Copy(str, d, len, char);
1499 d += len;
1500 SvREFCNT_dec(res);
1501 cont_scan:
1502 s = e + 1;
1503 }
1504 else
5777a3f7 1505 yyerror("Missing braces on \\N{}");
423cee85
JH
1506 continue;
1507
02aa26ce 1508 /* \c is a control character */
79072805
LW
1509 case 'c':
1510 s++;
9d116dd7
JH
1511#ifdef EBCDIC
1512 *d = *s++;
1513 if (isLOWER(*d))
1514 *d = toUPPER(*d);
4e553d73 1515 *d = toCTRL(*d);
774a9426 1516 d++;
9d116dd7 1517#else
ba210ebe
JH
1518 {
1519 U8 c = *s++;
1520 *d++ = toCTRL(c);
1521 }
9d116dd7 1522#endif
79072805 1523 continue;
02aa26ce
NT
1524
1525 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1526 case 'b':
1527 *d++ = '\b';
1528 break;
1529 case 'n':
1530 *d++ = '\n';
1531 break;
1532 case 'r':
1533 *d++ = '\r';
1534 break;
1535 case 'f':
1536 *d++ = '\f';
1537 break;
1538 case 't':
1539 *d++ = '\t';
1540 break;
34a3fe2a
PP
1541#ifdef EBCDIC
1542 case 'e':
1543 *d++ = '\047'; /* CP 1047 */
1544 break;
1545 case 'a':
1546 *d++ = '\057'; /* CP 1047 */
1547 break;
1548#else
79072805
LW
1549 case 'e':
1550 *d++ = '\033';
1551 break;
1552 case 'a':
1553 *d++ = '\007';
1554 break;
34a3fe2a 1555#endif
02aa26ce
NT
1556 } /* end switch */
1557
79072805
LW
1558 s++;
1559 continue;
02aa26ce
NT
1560 } /* end if (backslash) */
1561
a5a960be
RI
1562 /* (now in tr/// code again) */
1563
fd400ab9 1564 if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
a5a960be
RI
1565 STRLEN len = (STRLEN) -1;
1566 UV uv;
1567 if (this_utf8) {
a0dbb045 1568 uv = utf8_to_uv((U8*)s, send - s, &len, 0);
a5a960be
RI
1569 }
1570 if (len == (STRLEN)-1) {
1571 /* Illegal UTF8 (a high-bit byte), make it valid. */
1572 char *old_pvx = SvPVX(sv);
1573 /* need space for one extra char (NOTE: SvCUR() not set here) */
1574 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
1575 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1576 }
1577 else {
1578 while (len--)
1579 *d++ = *s++;
1580 }
1581 has_utf8 = TRUE;
1582 continue;
1583 }
1584
79072805 1585 *d++ = *s++;
02aa26ce
NT
1586 } /* while loop to process each character */
1587
1588 /* terminate the string and set up the sv */
79072805 1589 *d = '\0';
463ee0b2 1590 SvCUR_set(sv, d - SvPVX(sv));
79072805 1591 SvPOK_on(sv);
89491803 1592 if (has_utf8)
7e2040f0 1593 SvUTF8_on(sv);
79072805 1594
02aa26ce 1595 /* shrink the sv if we allocated more than we used */
79072805
LW
1596 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1597 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1598 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1599 }
02aa26ce 1600
9b599b2a 1601 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1602 if (s > PL_bufptr) {
1603 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1604 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1605 sv, Nullsv,
4e553d73 1606 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1607 ? "tr"
3280af22 1608 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1609 ? "s"
1610 : "qq")));
79072805 1611 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1612 } else
8990e307 1613 SvREFCNT_dec(sv);
79072805
LW
1614 return s;
1615}
1616
ffb4593c
NT
1617/* S_intuit_more
1618 * Returns TRUE if there's more to the expression (e.g., a subscript),
1619 * FALSE otherwise.
ffb4593c
NT
1620 *
1621 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1622 *
1623 * ->[ and ->{ return TRUE
1624 * { and [ outside a pattern are always subscripts, so return TRUE
1625 * if we're outside a pattern and it's not { or [, then return FALSE
1626 * if we're in a pattern and the first char is a {
1627 * {4,5} (any digits around the comma) returns FALSE
1628 * if we're in a pattern and the first char is a [
1629 * [] returns FALSE
1630 * [SOMETHING] has a funky algorithm to decide whether it's a
1631 * character class or not. It has to deal with things like
1632 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1633 * anything else returns TRUE
1634 */
1635
9cbb5ea2
GS
1636/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1637
76e3520e 1638STATIC int
cea2e8a9 1639S_intuit_more(pTHX_ register char *s)
79072805 1640{
3280af22 1641 if (PL_lex_brackets)
79072805
LW
1642 return TRUE;
1643 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1644 return TRUE;
1645 if (*s != '{' && *s != '[')
1646 return FALSE;
3280af22 1647 if (!PL_lex_inpat)
79072805
LW
1648 return TRUE;
1649
1650 /* In a pattern, so maybe we have {n,m}. */
1651 if (*s == '{') {
1652 s++;
1653 if (!isDIGIT(*s))
1654 return TRUE;
1655 while (isDIGIT(*s))
1656 s++;
1657 if (*s == ',')
1658 s++;
1659 while (isDIGIT(*s))
1660 s++;
1661 if (*s == '}')
1662 return FALSE;
1663 return TRUE;
1664
1665 }
1666
1667 /* On the other hand, maybe we have a character class */
1668
1669 s++;
1670 if (*s == ']' || *s == '^')
1671 return FALSE;
1672 else {
ffb4593c 1673 /* this is terrifying, and it works */
79072805
LW
1674 int weight = 2; /* let's weigh the evidence */
1675 char seen[256];
f27ffc4a 1676 unsigned char un_char = 255, last_un_char;
93a17b20 1677 char *send = strchr(s,']');
3280af22 1678 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1679
1680 if (!send) /* has to be an expression */
1681 return TRUE;
1682
1683 Zero(seen,256,char);
1684 if (*s == '$')
1685 weight -= 3;
1686 else if (isDIGIT(*s)) {
1687 if (s[1] != ']') {
1688 if (isDIGIT(s[1]) && s[2] == ']')
1689 weight -= 10;
1690 }
1691 else
1692 weight -= 100;
1693 }
1694 for (; s < send; s++) {
1695 last_un_char = un_char;
1696 un_char = (unsigned char)*s;
1697 switch (*s) {
1698 case '@':
1699 case '&':
1700 case '$':
1701 weight -= seen[un_char] * 10;
7e2040f0 1702 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1703 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1704 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1705 weight -= 100;
1706 else
1707 weight -= 10;
1708 }
1709 else if (*s == '$' && s[1] &&
93a17b20
LW
1710 strchr("[#!%*<>()-=",s[1])) {
1711 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1712 weight -= 10;
1713 else
1714 weight -= 1;
1715 }
1716 break;
1717 case '\\':
1718 un_char = 254;
1719 if (s[1]) {
93a17b20 1720 if (strchr("wds]",s[1]))
79072805
LW
1721 weight += 100;
1722 else if (seen['\''] || seen['"'])
1723 weight += 1;
93a17b20 1724 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1725 weight += 40;
1726 else if (isDIGIT(s[1])) {
1727 weight += 40;
1728 while (s[1] && isDIGIT(s[1]))
1729 s++;
1730 }
1731 }
1732 else
1733 weight += 100;
1734 break;
1735 case '-':
1736 if (s[1] == '\\')
1737 weight += 50;
93a17b20 1738 if (strchr("aA01! ",last_un_char))
79072805 1739 weight += 30;
93a17b20 1740 if (strchr("zZ79~",s[1]))
79072805 1741 weight += 30;
f27ffc4a
GS
1742 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1743 weight -= 5; /* cope with negative subscript */
79072805
LW
1744 break;
1745 default:
93a17b20 1746 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1747 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1748 char *d = tmpbuf;
1749 while (isALPHA(*s))
1750 *d++ = *s++;
1751 *d = '\0';
1752 if (keyword(tmpbuf, d - tmpbuf))
1753 weight -= 150;
1754 }
1755 if (un_char == last_un_char + 1)
1756 weight += 5;
1757 weight -= seen[un_char];
1758 break;
1759 }
1760 seen[un_char]++;
1761 }
1762 if (weight >= 0) /* probably a character class */
1763 return FALSE;
1764 }
1765
1766 return TRUE;
1767}
ffed7fef 1768
ffb4593c
NT
1769/*
1770 * S_intuit_method
1771 *
1772 * Does all the checking to disambiguate
1773 * foo bar
1774 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1775 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1776 *
1777 * First argument is the stuff after the first token, e.g. "bar".
1778 *
1779 * Not a method if bar is a filehandle.
1780 * Not a method if foo is a subroutine prototyped to take a filehandle.
1781 * Not a method if it's really "Foo $bar"
1782 * Method if it's "foo $bar"
1783 * Not a method if it's really "print foo $bar"
1784 * Method if it's really "foo package::" (interpreted as package->foo)
1785 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1786 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1787 * =>
1788 */
1789
76e3520e 1790STATIC int
cea2e8a9 1791S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1792{
1793 char *s = start + (*start == '$');
3280af22 1794 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1795 STRLEN len;
1796 GV* indirgv;
1797
1798 if (gv) {
b6c543e3 1799 CV *cv;
a0d0e21e
LW
1800 if (GvIO(gv))
1801 return 0;
b6c543e3
IZ
1802 if ((cv = GvCVu(gv))) {
1803 char *proto = SvPVX(cv);
1804 if (proto) {
1805 if (*proto == ';')
1806 proto++;
1807 if (*proto == '*')
1808 return 0;
1809 }
1810 } else
a0d0e21e
LW
1811 gv = 0;
1812 }
8903cb82 1813 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1814 /* start is the beginning of the possible filehandle/object,
1815 * and s is the end of it
1816 * tmpbuf is a copy of it
1817 */
1818
a0d0e21e 1819 if (*start == '$') {
3280af22 1820 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1821 return 0;
1822 s = skipspace(s);
3280af22
NIS
1823 PL_bufptr = start;
1824 PL_expect = XREF;
a0d0e21e
LW
1825 return *s == '(' ? FUNCMETH : METHOD;
1826 }
1827 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1828 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1829 len -= 2;
1830 tmpbuf[len] = '\0';
1831 goto bare_package;
1832 }
1833 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1834 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1835 return 0;
1836 /* filehandle or package name makes it a method */
89bfa8cd 1837 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1838 s = skipspace(s);
3280af22 1839 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1840 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1841 bare_package:
3280af22 1842 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1843 newSVpvn(tmpbuf,len));
3280af22
NIS
1844 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1845 PL_expect = XTERM;
a0d0e21e 1846 force_next(WORD);
3280af22 1847 PL_bufptr = s;
a0d0e21e
LW
1848 return *s == '(' ? FUNCMETH : METHOD;
1849 }
1850 }
1851 return 0;
1852}
1853
ffb4593c
NT
1854/*
1855 * S_incl_perldb
1856 * Return a string of Perl code to load the debugger. If PERL5DB
1857 * is set, it will return the contents of that, otherwise a
1858 * compile-time require of perl5db.pl.
1859 */
1860
76e3520e 1861STATIC char*
cea2e8a9 1862S_incl_perldb(pTHX)
a0d0e21e 1863{
3280af22 1864 if (PL_perldb) {
76e3520e 1865 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1866
1867 if (pdb)
1868 return pdb;
61bb5906 1869 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1870 return "BEGIN { require 'perl5db.pl' }";
1871 }
1872 return "";
1873}
1874
1875
16d20bd9 1876/* Encoded script support. filter_add() effectively inserts a
4e553d73 1877 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1878 * Note that the filter function only applies to the current source file
1879 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1880 *
1881 * The datasv parameter (which may be NULL) can be used to pass
1882 * private data to this instance of the filter. The filter function
1883 * can recover the SV using the FILTER_DATA macro and use it to
1884 * store private buffers and state information.
1885 *
1886 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1887 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1888 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1889 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1890 * private use must be set using malloc'd pointers.
1891 */
16d20bd9
AD
1892
1893SV *
864dbfa3 1894Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1895{
f4c556ac
GS
1896 if (!funcp)
1897 return Nullsv;
1898
3280af22
NIS
1899 if (!PL_rsfp_filters)
1900 PL_rsfp_filters = newAV();
16d20bd9 1901 if (!datasv)
8c52afec 1902 datasv = NEWSV(255,0);
16d20bd9 1903 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1904 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1905 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1906 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1907 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1908 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1909 av_unshift(PL_rsfp_filters, 1);
1910 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1911 return(datasv);
1912}
4e553d73 1913
16d20bd9
AD
1914
1915/* Delete most recently added instance of this filter function. */
a0d0e21e 1916void
864dbfa3 1917Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1918{
e0c19803 1919 SV *datasv;
f4c556ac 1920 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1921 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1922 return;
1923 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1924 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1925 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1926 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1927 IoANY(datasv) = (void *)NULL;
3280af22 1928 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1929
16d20bd9
AD
1930 return;
1931 }
1932 /* we need to search for the correct entry and clear it */
cea2e8a9 1933 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1934}
1935
1936
1937/* Invoke the n'th filter function for the current rsfp. */
1938I32
864dbfa3 1939Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
1940
1941
8ac85365 1942 /* 0 = read one text line */
a0d0e21e 1943{
16d20bd9
AD
1944 filter_t funcp;
1945 SV *datasv = NULL;
e50aee73 1946
3280af22 1947 if (!PL_rsfp_filters)
16d20bd9 1948 return -1;
3280af22 1949 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1950 /* Provide a default input filter to make life easy. */
1951 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1952 DEBUG_P(PerlIO_printf(Perl_debug_log,
1953 "filter_read %d: from rsfp\n", idx));
4e553d73 1954 if (maxlen) {
16d20bd9
AD
1955 /* Want a block */
1956 int len ;
1957 int old_len = SvCUR(buf_sv) ;
1958
1959 /* ensure buf_sv is large enough */
1960 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1961 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1962 if (PerlIO_error(PL_rsfp))
37120919
AD
1963 return -1; /* error */
1964 else
1965 return 0 ; /* end of file */
1966 }
16d20bd9
AD
1967 SvCUR_set(buf_sv, old_len + len) ;
1968 } else {
1969 /* Want a line */
3280af22
NIS
1970 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1971 if (PerlIO_error(PL_rsfp))
37120919
AD
1972 return -1; /* error */
1973 else
1974 return 0 ; /* end of file */
1975 }
16d20bd9
AD
1976 }
1977 return SvCUR(buf_sv);
1978 }
1979 /* Skip this filter slot if filter has been deleted */
3280af22 1980 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1981 DEBUG_P(PerlIO_printf(Perl_debug_log,
1982 "filter_read %d: skipped (filter deleted)\n",
1983 idx));
16d20bd9
AD
1984 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1985 }
1986 /* Get function pointer hidden within datasv */
4755096e 1987 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
1988 DEBUG_P(PerlIO_printf(Perl_debug_log,
1989 "filter_read %d: via function %p (%s)\n",
1990 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1991 /* Call function. The function is expected to */
1992 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1993 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1994 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1995}
1996
76e3520e 1997STATIC char *
cea2e8a9 1998S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1999{
c39cd008 2000#ifdef PERL_CR_FILTER
3280af22 2001 if (!PL_rsfp_filters) {
c39cd008 2002 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2003 }
2004#endif
3280af22 2005 if (PL_rsfp_filters) {
16d20bd9 2006
55497cff
PP
2007 if (!append)
2008 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2009 if (FILTER_READ(0, sv, 0) > 0)
2010 return ( SvPVX(sv) ) ;
2011 else
2012 return Nullch ;
2013 }
9d116dd7 2014 else
fd049845 2015 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2016}
2017
01ec43d0
GS
2018STATIC HV *
2019S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2020{
2021 GV *gv;
2022
01ec43d0 2023 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2024 return PL_curstash;
2025
2026 if (len > 2 &&
2027 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2028 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2029 {
2030 return GvHV(gv); /* Foo:: */
def3634b
GS
2031 }
2032
2033 /* use constant CLASS => 'MyClass' */
2034 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2035 SV *sv;
2036 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2037 pkgname = SvPV_nolen(sv);
2038 }
2039 }
2040
2041 return gv_stashpv(pkgname, FALSE);
2042}
a0d0e21e 2043
748a9306
LW
2044#ifdef DEBUGGING
2045 static char* exp_name[] =
09bef843
SB
2046 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2047 "ATTRTERM", "TERMBLOCK"
2048 };
748a9306 2049#endif
463ee0b2 2050
02aa26ce
NT
2051/*
2052 yylex
2053
2054 Works out what to call the token just pulled out of the input
2055 stream. The yacc parser takes care of taking the ops we return and
2056 stitching them into a tree.
2057
2058 Returns:
2059 PRIVATEREF
2060
2061 Structure:
2062 if read an identifier
2063 if we're in a my declaration
2064 croak if they tried to say my($foo::bar)
2065 build the ops for a my() declaration
2066 if it's an access to a my() variable
2067 are we in a sort block?
2068 croak if my($a); $a <=> $b
2069 build ops for access to a my() variable
2070 if in a dq string, and they've said @foo and we can't find @foo
2071 croak
2072 build ops for a bareword
2073 if we already built the token before, use it.
2074*/
2075
dba4d153 2076#ifdef USE_PURE_BISON
bf4acbe4 2077#ifdef __SC__
dba4d153 2078#pragma segment Perl_yylex_r
bf4acbe4 2079#endif
864dbfa3 2080int
dba4d153 2081Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2082{
20141f0e
RI
2083 int r;
2084
6f202aea 2085 yyactlevel++;
20141f0e
RI
2086 yylval_pointer[yyactlevel] = lvalp;
2087 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2088 if (yyactlevel >= YYMAXLEVEL)
2089 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2090
dba4d153 2091 r = Perl_yylex(aTHX);
20141f0e 2092
d8ae6756
RI
2093 if (yyactlevel > 0)
2094 yyactlevel--;
20141f0e
RI
2095
2096 return r;
2097}
dba4d153 2098#endif
20141f0e 2099
dba4d153
JH
2100#ifdef __SC__
2101#pragma segment Perl_yylex
2102#endif
dba4d153 2103int
dba4d153 2104Perl_yylex(pTHX)
20141f0e 2105{
79072805 2106 register char *s;
378cc40b 2107 register char *d;
79072805 2108 register I32 tmp;
463ee0b2 2109 STRLEN len;
161b471a
NIS
2110 GV *gv = Nullgv;
2111 GV **gvp = 0;
aa7440fb 2112 bool bof = FALSE;
a687059c 2113
02aa26ce 2114 /* check if there's an identifier for us to look at */
3280af22 2115 if (PL_pending_ident) {
02aa26ce 2116 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2117 char pit = PL_pending_ident;
2118 PL_pending_ident = 0;
bbce6d69 2119
607df283
SC
2120 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2121 "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
2122
02aa26ce
NT
2123 /* if we're in a my(), we can't allow dynamics here.
2124 $foo'bar has already been turned into $foo::bar, so
2125 just check for colons.
2126
2127 if it's a legal name, the OP is a PADANY.
2128 */
3280af22 2129 if (PL_in_my) {
77ca0c92 2130 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2131 if (strchr(PL_tokenbuf,':'))
2132 yyerror(Perl_form(aTHX_ "No package name allowed for "
2133 "variable %s in \"our\"",
2134 PL_tokenbuf));
77ca0c92
LW
2135 tmp = pad_allocmy(PL_tokenbuf);
2136 }
2137 else {
2138 if (strchr(PL_tokenbuf,':'))
2139 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2140
77ca0c92
LW
2141 yylval.opval = newOP(OP_PADANY, 0);
2142 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2143 return PRIVATEREF;
2144 }
bbce6d69
PP
2145 }
2146
4e553d73 2147 /*
02aa26ce
NT
2148 build the ops for accesses to a my() variable.
2149
2150 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2151 then used in a comparison. This catches most, but not
2152 all cases. For instance, it catches
2153 sort { my($a); $a <=> $b }
2154 but not
2155 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2156 (although why you'd do that is anyone's guess).
2157 */
2158
3280af22 2159 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2160#ifdef USE_THREADS
54b9620d 2161 /* Check for single character per-thread SVs */
3280af22
NIS
2162 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2163 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2164 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2165 {
2faa37cc 2166 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2167 yylval.opval->op_targ = tmp;
2168 return PRIVATEREF;
2169 }
2170#endif /* USE_THREADS */
3280af22 2171 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2172 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2173 /* might be an "our" variable" */
f472eb5c 2174 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2175 /* build ops for a bareword */
f472eb5c
GS
2176 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2177 sv_catpvn(sym, "::", 2);
2178 sv_catpv(sym, PL_tokenbuf+1);
2179 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2180 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2181 gv_fetchpv(SvPVX(sym),
77ca0c92 2182 (PL_in_eval
f472eb5c
GS
2183 ? (GV_ADDMULTI | GV_ADDINEVAL)
2184 : TRUE
77ca0c92
LW
2185 ),
2186 ((PL_tokenbuf[0] == '$') ? SVt_PV
2187 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2188 : SVt_PVHV));
2189 return WORD;
2190 }
2191
02aa26ce 2192 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2193 if (PL_last_lop_op == OP_SORT &&
2194 PL_tokenbuf[0] == '$' &&
2195 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2196 && !PL_tokenbuf[2])
bbce6d69 2197 {
3280af22
NIS
2198 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2199 d < PL_bufend && *d != '\n';
a863c7d1
MB
2200 d++)
2201 {
2202 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2203 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2204 PL_tokenbuf);
a863c7d1 2205 }
bbce6d69
PP
2206 }
2207 }
bbce6d69 2208
a863c7d1
MB
2209 yylval.opval = newOP(OP_PADANY, 0);
2210 yylval.opval->op_targ = tmp;
2211 return PRIVATEREF;
2212 }
bbce6d69
PP
2213 }
2214
02aa26ce
NT
2215 /*
2216 Whine if they've said @foo in a doublequoted string,
2217 and @foo isn't a variable we can find in the symbol
2218 table.
2219 */
3280af22
NIS
2220 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2221 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2222 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2223 && ckWARN(WARN_AMBIGUOUS))
2224 {
2225 /* Downgraded from fatal to warning 20000522 mjd */
2226 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2227 "Possible unintended interpolation of %s in string",
2228 PL_tokenbuf);
2229 }
bbce6d69
PP
2230 }
2231
02aa26ce 2232 /* build ops for a bareword */
3280af22 2233 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2234 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2235 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2236 ((PL_tokenbuf[0] == '$') ? SVt_PV
2237 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2238 : SVt_PVHV));
2239 return WORD;
2240 }
2241
02aa26ce
NT
2242 /* no identifier pending identification */
2243
3280af22 2244 switch (PL_lex_state) {
79072805
LW
2245#ifdef COMMENTARY
2246 case LEX_NORMAL: /* Some compilers will produce faster */
2247 case LEX_INTERPNORMAL: /* code if we comment these out. */
2248 break;
2249#endif
2250
09bef843 2251 /* when we've already built the next token, just pull it out of the queue */
79072805 2252 case LEX_KNOWNEXT:
3280af22
NIS
2253 PL_nexttoke--;
2254 yylval = PL_nextval[PL_nexttoke];
2255 if (!PL_nexttoke) {
2256 PL_lex_state = PL_lex_defer;
2257 PL_expect = PL_lex_expect;
2258 PL_lex_defer = LEX_NORMAL;
463ee0b2 2259 }
607df283 2260 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f
RB
2261 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2262 (IV)PL_nexttype[PL_nexttoke]); })
607df283 2263
3280af22 2264 return(PL_nexttype[PL_nexttoke]);
79072805 2265
02aa26ce 2266 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2267 when we get here, PL_bufptr is at the \
02aa26ce 2268 */
79072805
LW
2269 case LEX_INTERPCASEMOD:
2270#ifdef DEBUGGING
3280af22 2271 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2272 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2273#endif
02aa26ce 2274 /* handle \E or end of string */
3280af22 2275 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2276 char oldmod;
02aa26ce
NT
2277
2278 /* if at a \E */
3280af22
NIS
2279 if (PL_lex_casemods) {
2280 oldmod = PL_lex_casestack[--PL_lex_casemods];
2281 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2282
3280af22
NIS
2283 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2284 PL_bufptr += 2;
2285 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2286 }
79072805
LW
2287 return ')';
2288 }
3280af22
NIS
2289 if (PL_bufptr != PL_bufend)
2290 PL_bufptr += 2;
2291 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2292 return yylex();
79072805
LW
2293 }
2294 else {
607df283
SC
2295 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2296 "### Saw case modifier at '%s'\n", PL_bufptr); })
3280af22 2297 s = PL_bufptr + 1;
79072805
LW
2298 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2299 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2300 if (strchr("LU", *s) &&
3280af22 2301 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2302 {
3280af22 2303 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2304 return ')';
2305 }
3280af22
NIS
2306 if (PL_lex_casemods > 10) {
2307 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2308 if (newlb != PL_lex_casestack) {
a0d0e21e 2309 SAVEFREEPV(newlb);
3280af22 2310 PL_lex_casestack = newlb;
a0d0e21e
LW
2311 }
2312 }
3280af22
NIS
2313 PL_lex_casestack[PL_lex_casemods++] = *s;
2314 PL_lex_casestack[PL_lex_casemods] = '\0';
2315 PL_lex_state = LEX_INTERPCONCAT;
2316 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2317 force_next('(');
2318 if (*s == 'l')
3280af22 2319 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2320 else if (*s == 'u')
3280af22 2321 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2322 else if (*s == 'L')
3280af22 2323 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2324 else if (*s == 'U')
3280af22 2325 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2326 else if (*s == 'Q')
3280af22 2327 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2328 else
cea2e8a9 2329 Perl_croak(aTHX_ "panic: yylex");
3280af22 2330 PL_bufptr = s + 1;
79072805 2331 force_next(FUNC);
3280af22
NIS
2332 if (PL_lex_starts) {
2333 s = PL_bufptr;
2334 PL_lex_starts = 0;
79072805
LW
2335 Aop(OP_CONCAT);
2336 }
2337 else
cea2e8a9 2338 return yylex();
79072805
LW
2339 }
2340
55497cff
PP
2341 case LEX_INTERPPUSH:
2342 return sublex_push();
2343
79072805 2344 case LEX_INTERPSTART:
3280af22 2345 if (PL_bufptr == PL_bufend)
79072805 2346 return sublex_done();
607df283
SC
2347 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2348 "### Interpolated variable at '%s'\n", PL_bufptr); })
3280af22
NIS
2349 PL_expect = XTERM;
2350 PL_lex_dojoin = (*PL_bufptr == '@');
2351 PL_lex_state = LEX_INTERPNORMAL;
2352 if (PL_lex_dojoin) {
2353 PL_nextval[PL_nexttoke].ival = 0;
79072805 2354 force_next(',');
554b3eca 2355#ifdef USE_THREADS
533c011a
NIS
2356 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2357 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2358 force_next(PRIVATEREF);
2359#else
a0d0e21e 2360 force_ident("\"", '$');
554b3eca 2361#endif /* USE_THREADS */
3280af22 2362 PL_nextval[PL_nexttoke].ival = 0;
79072805 2363 force_next('$');
3280af22 2364 PL_nextval[PL_nexttoke].ival = 0;
79072805 2365 force_next('(');
3280af22 2366 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2367 force_next(FUNC);
2368 }
3280af22
NIS
2369 if (PL_lex_starts++) {
2370 s = PL_bufptr;
79072805
LW
2371 Aop(OP_CONCAT);
2372 }
cea2e8a9 2373 return yylex();
79072805
LW
2374
2375 case LEX_INTERPENDMAYBE:
3280af22
NIS
2376 if (intuit_more(PL_bufptr)) {
2377 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2378 break;
2379 }
2380 /* FALL THROUGH */
2381
2382 case LEX_INTERPEND:
3280af22
NIS
2383 if (PL_lex_dojoin) {
2384 PL_lex_dojoin = FALSE;
2385 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2386 return ')';
2387 }
43a16006 2388 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2389 && SvEVALED(PL_lex_repl))
43a16006 2390 {
e9fa98b2 2391 if (PL_bufptr != PL_bufend)
cea2e8a9 2392 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2393 PL_lex_repl = Nullsv;
2394 }
79072805
LW
2395 /* FALLTHROUGH */
2396 case LEX_INTERPCONCAT:
2397#ifdef DEBUGGING
3280af22 2398 if (PL_lex_brackets)
cea2e8a9 2399 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2400#endif
3280af22 2401 if (PL_bufptr == PL_bufend)
79072805
LW
2402 return sublex_done();
2403
3280af22
NIS
2404 if (SvIVX(PL_linestr) == '\'') {
2405 SV *sv = newSVsv(PL_linestr);
2406 if (!PL_lex_inpat)
76e3520e 2407 sv = tokeq(sv);
3280af22 2408 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2409 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2410 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2411 s = PL_bufend;
79072805
LW
2412 }
2413 else {
3280af22 2414 s = scan_const(PL_bufptr);
79072805 2415 if (*s == '\\')
3280af22 2416 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2417 else
3280af22 2418 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2419 }
2420
3280af22
NIS
2421 if (s != PL_bufptr) {
2422 PL_nextval[PL_nexttoke] = yylval;
2423 PL_expect = XTERM;
79072805 2424 force_next(THING);
3280af22 2425 if (PL_lex_starts++)
79072805
LW
2426 Aop(OP_CONCAT);
2427 else {
3280af22 2428 PL_bufptr = s;
cea2e8a9 2429 return yylex();
79072805
LW
2430 }
2431 }
2432
cea2e8a9 2433 return yylex();
a0d0e21e 2434 case LEX_FORMLINE:
3280af22
NIS
2435 PL_lex_state = LEX_NORMAL;
2436 s = scan_formline(PL_bufptr);
2437 if (!PL_lex_formbrack)
a0d0e21e
LW
2438 goto rightbracket;
2439 OPERATOR(';');
79072805
LW
2440 }
2441
3280af22
NIS
2442 s = PL_bufptr;
2443 PL_oldoldbufptr = PL_oldbufptr;
2444 PL_oldbufptr = s;
607df283 2445 DEBUG_T( {
bf49b057
GS
2446 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2447 exp_name[PL_expect], s);
79072805 2448 } )
463ee0b2
LW
2449
2450 retry:
378cc40b
LW
2451 switch (*s) {
2452 default:
7e2040f0 2453 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2454 goto keylookup;
cea2e8a9 2455 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2456 case 4:
2457 case 26:
2458 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2459 case 0:
3280af22
NIS
2460 if (!PL_rsfp) {
2461 PL_last_uni = 0;
2462 PL_last_lop = 0;
2463 if (PL_lex_brackets)
d98d5fff 2464 yyerror("Missing right curly or square bracket");
4e553d73 2465 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2466 "### Tokener got EOF\n");
2467 } )
79072805 2468 TOKEN(0);
463ee0b2 2469 }
3280af22 2470 if (s++ < PL_bufend)
a687059c 2471 goto retry; /* ignore stray nulls */
3280af22
NIS
2472 PL_last_uni = 0;
2473 PL_last_lop = 0;
2474 if (!PL_in_eval && !PL_preambled) {
2475 PL_preambled = TRUE;
2476 sv_setpv(PL_linestr,incl_perldb());
2477 if (SvCUR(PL_linestr))
2478 sv_catpv(PL_linestr,";");
2479 if (PL_preambleav){
2480 while(AvFILLp(PL_preambleav) >= 0) {
2481 SV *tmpsv = av_shift(PL_preambleav);
2482 sv_catsv(PL_linestr, tmpsv);
2483 sv_catpv(PL_linestr, ";");
91b7def8
PP
2484 sv_free(tmpsv);
2485 }
3280af22
NIS
2486 sv_free((SV*)PL_preambleav);
2487 PL_preambleav = NULL;
91b7def8 2488 }
3280af22
NIS
2489 if (PL_minus_n || PL_minus_p) {
2490 sv_catpv(PL_linestr, "LINE: while (<>) {");
2491 if (PL_minus_l)
2492 sv_catpv(PL_linestr,"chomp;");
2493 if (PL_minus_a) {
8fd239a7
CS
2494 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2495 if (gv)
2496 GvIMPORTED_AV_on(gv);
3280af22
NIS
2497 if (PL_minus_F) {
2498 if (strchr("/'\"", *PL_splitstr)
2499 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2500 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2501 else {
2502 char delim;
2503 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2504 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2505 delim = *s;
cea2e8a9 2506 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2507 "q" + (delim == '\''), delim);
3280af22 2508 for (s = PL_splitstr; *s; s++) {
54310121 2509 if (*s == '\\')
3280af22
NIS
2510 sv_catpvn(PL_linestr, "\\", 1);
2511 sv_catpvn(PL_linestr, s, 1);
54310121 2512 }
cea2e8a9 2513 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2514 }
2304df62
AD
2515 }
2516 else
3280af22 2517 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2518 }
79072805 2519 }
3280af22
NIS
2520 sv_catpv(PL_linestr, "\n");
2521 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2522 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2523 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2524 SV *sv = NEWSV(85,0);
2525
2526 sv_upgrade(sv, SVt_PVMG);
3280af22 2527 sv_setsv(sv,PL_linestr);
57843af0 2528 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2529 }
79072805 2530 goto retry;
a687059c 2531 }
e929a76b 2532 do {
aa7440fb 2533 bof = PL_rsfp ? TRUE : FALSE;
226017aa
DD
2534 if (bof) {
2535#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2536# ifdef __GNU_LIBRARY__
2537# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2538# define FTELL_FOR_PIPE_IS_BROKEN
2539# endif
e3f494f1
JH
2540# else
2541# ifdef __GLIBC__
2542# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2543# define FTELL_FOR_PIPE_IS_BROKEN
2544# endif
2545# endif
226017aa
DD
2546# endif
2547#endif
2548#ifdef FTELL_FOR_PIPE_IS_BROKEN
2549 /* This loses the possibility to detect the bof
2550 * situation on perl -P when the libc5 is being used.
2551 * Workaround? Maybe attach some extra state to PL_rsfp?
2552 */
2553 if (!PL_preprocess)
2554 bof = PerlIO_tell(PL_rsfp) == 0;
2555#else
2556 bof = PerlIO_tell(PL_rsfp) == 0;
2557#endif
2558 }
dea0fc0b
JH
2559 s = filter_gets(PL_linestr, PL_rsfp, 0);
2560 if (s == Nullch) {
e929a76b 2561 fake_eof:
3280af22
NIS
2562 if (PL_rsfp) {
2563 if (PL_preprocess && !PL_in_eval)
2564 (void)PerlProc_pclose(PL_rsfp);
2565 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2566 PerlIO_clearerr(PL_rsfp);
395c3793 2567 else
3280af22
NIS
2568 (void)PerlIO_close(PL_rsfp);
2569 PL_rsfp = Nullfp;
4a9ae47a 2570 PL_doextract = FALSE;
395c3793 2571 }
3280af22
NIS
2572 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2573 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2574 sv_catpv(PL_linestr,";}");
2575 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2576 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2577 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2578 goto retry;
2579 }
3280af22
NIS
2580 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2581 sv_setpv(PL_linestr,"");
79072805 2582 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
dea0fc0b
JH
2583 } else if (bof) {
2584 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2585 s = swallow_bom((U8*)s);
378cc40b 2586 }
3280af22 2587 if (PL_doextract) {
a0d0e21e 2588 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2589 PL_doextract = FALSE;
a0d0e21e
LW
2590
2591 /* Incest with pod. */
2592 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2593 sv_setpv(PL_linestr, "");
2594 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2595 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2596 PL_doextract = FALSE;
a0d0e21e 2597 }
4e553d73 2598 }
463ee0b2 2599 incline(s);
3280af22
NIS
2600 } while (PL_doextract);
2601 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2602 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2603 SV *sv = NEWSV(85,0);
a687059c 2604
93a17b20 2605 sv_upgrade(sv, SVt_PVMG);
3280af22 2606 sv_setsv(sv,PL_linestr);
57843af0 2607 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2608 }
3280af22 2609 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2610 if (CopLINE(PL_curcop) == 1) {
3280af22 2611 while (s < PL_bufend && isSPACE(*s))
79072805 2612 s++;
a0d0e21e 2613 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2614 s++;
44a8e56a 2615 d = Nullch;
3280af22 2616 if (!PL_in_eval) {
44a8e56a
PP
2617 if (*s == '#' && *(s+1) == '!')
2618 d = s + 2;
2619#ifdef ALTERNATE_SHEBANG
2620 else {
2621 static char as[] = ALTERNATE_SHEBANG;
2622 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2623 d = s + (sizeof(as) - 1);
2624 }
2625#endif /* ALTERNATE_SHEBANG */
2626 }
2627 if (d) {
b8378b72 2628 char *ipath;
774d564b 2629 char *ipathend;
b8378b72 2630
774d564b 2631 while (isSPACE(*d))
b8378b72
CS
2632 d++;
2633 ipath = d;
774d564b
PP
2634 while (*d && !isSPACE(*d))
2635 d++;
2636 ipathend = d;
2637
2638#ifdef ARG_ZERO_IS_SCRIPT
2639 if (ipathend > ipath) {
2640 /*
2641 * HP-UX (at least) sets argv[0] to the script name,
2642 * which makes $^X incorrect. And Digital UNIX and Linux,
2643 * at least, set argv[0] to the basename of the Perl
2644 * interpreter. So, having found "#!", we'll set it right.
2645 */
2646 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2647 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2648 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2649 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2650 SvSETMAGIC(x);
2651 }
774d564b 2652 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2653 }
774d564b 2654#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2655
2656 /*
2657 * Look for options.
2658 */
748a9306 2659 d = instr(s,"perl -");
84e30d1a 2660 if (!d) {
748a9306 2661 d = instr(s,"perl");
84e30d1a
GS
2662#if defined(DOSISH)
2663 /* avoid getting into infinite loops when shebang
2664 * line contains "Perl" rather than "perl" */
2665 if (!d) {
2666 for (d = ipathend-4; d >= ipath; --d) {
2667 if ((*d == 'p' || *d == 'P')
2668 && !ibcmp(d, "perl", 4))
2669 {
2670 break;
2671 }
2672 }
2673 if (d < ipath)
2674 d = Nullch;
2675 }
2676#endif
2677 }
44a8e56a
PP
2678#ifdef ALTERNATE_SHEBANG
2679 /*
2680 * If the ALTERNATE_SHEBANG on this system starts with a
2681 * character that can be part of a Perl expression, then if
2682 * we see it but not "perl", we're probably looking at the
2683 * start of Perl code, not a request to hand off to some
2684 * other interpreter. Similarly, if "perl" is there, but
2685 * not in the first 'word' of the line, we assume the line
2686 * contains the start of the Perl program.
44a8e56a
PP
2687 */
2688 if (d && *s != '#') {
774d564b 2689 char *c = ipath;
44a8e56a
PP
2690 while (*c && !strchr("; \t\r\n\f\v#", *c))
2691 c++;
2692 if (c < d)
2693 d = Nullch; /* "perl" not in first word; ignore */
2694 else
2695 *s = '#'; /* Don't try to parse shebang line */
2696 }
774d564b 2697#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2698#ifndef MACOS_TRADITIONAL
748a9306 2699 if (!d &&
44a8e56a 2700 *s == '#' &&
774d564b 2701 ipathend > ipath &&
3280af22 2702 !PL_minus_c &&
748a9306 2703 !instr(s,"indir") &&
3280af22 2704 instr(PL_origargv[0],"perl"))
748a9306 2705 {
9f68db38 2706 char **newargv;
9f68db38 2707
774d564b
PP
2708 *ipathend = '\0';
2709 s = ipathend + 1;
3280af22 2710 while (s < PL_bufend && isSPACE(*s))
9f68db38 2711 s++;
3280af22
NIS
2712 if (s < PL_bufend) {
2713 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2714 newargv[1] = s;
3280af22 2715 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2716 s++;
2717 *s = '\0';
3280af22 2718 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2719 }
2720 else
3280af22 2721 newargv = PL_origargv;
774d564b 2722 newargv[0] = ipath;
b4748376 2723 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2724 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2725 }
bf4acbe4 2726#endif
748a9306 2727 if (d) {
3280af22
NIS
2728 U32 oldpdb = PL_perldb;
2729 bool oldn = PL_minus_n;
2730 bool oldp = PL_minus_p;
748a9306
LW
2731
2732 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2733 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2734
2735 if (*d++ == '-') {
8cc95fdb
PP
2736 do {
2737 if (*d == 'M' || *d == 'm') {
2738 char *m = d;
2739 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2740 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2741 (int)(d - m), m);
2742 }
2743 d = moreswitches(d);
2744 } while (d);
155aba94
GS
2745 if ((PERLDB_LINE && !oldpdb) ||
2746 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2747 /* if we have already added "LINE: while (<>) {",
2748 we must not do it again */
748a9306 2749 {
3280af22
NIS
2750 sv_setpv(PL_linestr, "");
2751 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2752 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2753 PL_preambled = FALSE;
84902520 2754 if (PERLDB_LINE)
3280af22 2755 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2756 goto retry;
2757 }
a0d0e21e 2758 }
79072805 2759 }
9f68db38 2760 }
79072805 2761 }
3280af22
NIS
2762 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2763 PL_bufptr = s;
2764 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2765 return yylex();
ae986130 2766 }
378cc40b 2767 goto retry;
4fdae800 2768 case '\r':
6a27c188 2769#ifdef PERL_STRICT_CR
cea2e8a9 2770 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2771 Perl_croak(aTHX_
cc507455 2772 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2773#endif
4fdae800 2774 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2775#ifdef MACOS_TRADITIONAL
2776 case '\312':
2777#endif
378cc40b
LW
2778 s++;
2779 goto retry;
378cc40b 2780 case '#':
e929a76b 2781 case '\n':
3280af22 2782 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2783 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2784 /* handle eval qq[#line 1 "foo"\n ...] */
2785 CopLINE_dec(PL_curcop);
2786 incline(s);
2787 }
3280af22 2788 d = PL_bufend;
a687059c 2789 while (s < d && *s != '\n')
378cc40b 2790 s++;
0f85fab0 2791 if (s < d)
378cc40b 2792 s++;
463ee0b2 2793 incline(s);
3280af22
NIS
2794 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2795 PL_bufptr = s;
2796 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2797 return yylex();
a687059c 2798 }
378cc40b 2799 }
a687059c 2800 else {
378cc40b 2801 *s = '\0';
3280af22 2802 PL_bufend = s;
a687059c 2803 }
378cc40b
LW
2804 goto retry;
2805 case '-':
79072805 2806 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2807 I32 ftst = 0;
2808
378cc40b 2809 s++;
3280af22 2810 PL_bufptr = s;
748a9306
LW
2811 tmp = *s++;
2812
bf4acbe4 2813 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2814 s++;
2815
2816 if (strnEQ(s,"=>",2)) {
3280af22 2817 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2818 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2819 "### Saw unary minus before =>, forcing word '%s'\n", s);
2820 } )
748a9306
LW
2821 OPERATOR('-'); /* unary minus */
2822 }
3280af22 2823 PL_last_uni = PL_oldbufptr;
748a9306 2824 switch (tmp) {
e5edeb50
JH
2825 case 'r': ftst = OP_FTEREAD; break;
2826 case 'w': ftst = OP_FTEWRITE; break;
2827 case 'x': ftst = OP_FTEEXEC; break;
2828 case 'o': ftst = OP_FTEOWNED; break;
2829 case 'R': ftst = OP_FTRREAD; break;
2830 case 'W': ftst = OP_FTRWRITE; break;
2831 case 'X': ftst = OP_FTREXEC; break;
2832 case 'O': ftst = OP_FTROWNED; break;
2833 case 'e': ftst = OP_FTIS; break;
2834 case 'z': ftst = OP_FTZERO; break;
2835 case 's': ftst = OP_FTSIZE; break;
2836 case 'f': ftst = OP_FTFILE; break;
2837 case 'd': ftst = OP_FTDIR; break;
2838 case 'l': ftst = OP_FTLINK; break;
2839 case 'p': ftst = OP_FTPIPE; break;
2840 case 'S': ftst = OP_FTSOCK; break;
2841 case 'u': ftst = OP_FTSUID; break;
2842 case 'g': ftst = OP_FTSGID; break;
2843 case 'k': ftst = OP_FTSVTX; break;
2844 case 'b': ftst = OP_FTBLK; break;
2845 case 'c': ftst = OP_FTCHR; break;
2846 case 't': ftst = OP_FTTTY; break;
2847 case 'T': ftst = OP_FTTEXT; break;
2848 case 'B': ftst = OP_FTBINARY; break;
2849 case 'M': case 'A': case 'C':
2850 gv_fetchpv("\024",TRUE, SVt_PV);
2851 switch (tmp) {
2852 case 'M': ftst = OP_FTMTIME; break;
2853 case 'A': ftst = OP_FTATIME; break;
2854 case 'C': ftst = OP_FTCTIME; break;
2855 default: break;
2856 }
2857 break;
378cc40b 2858 default:
378cc40b
LW
2859 break;
2860 }
e5edeb50
JH
2861 if (ftst) {
2862 PL_last_lop_op = ftst;
4e553d73 2863 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2864 "### Saw file test %c\n", (int)ftst);
e5edeb50 2865 } )
e5edeb50
JH
2866 FTST(ftst);
2867 }
2868 else {
2869 /* Assume it was a minus followed by a one-letter named
2870 * subroutine call (or a -bareword), then. */
95c31fe3 2871 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2872 "### %c looked like a file test but was not\n",
2873 (int)ftst);
95c31fe3 2874 } )
e5edeb50
JH
2875 s -= 2;
2876 }
378cc40b 2877 }
a687059c
LW
2878 tmp = *s++;
2879 if (*s == tmp) {
2880 s++;
3280af22 2881 if (PL_expect == XOPERATOR)
79072805
LW
2882 TERM(POSTDEC);
2883 else
2884 OPERATOR(PREDEC);
2885 }
2886 else if (*s == '>') {
2887 s++;
2888 s = skipspace(s);
7e2040f0 2889 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2890 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2891 TOKEN(ARROW);
79072805 2892 }
748a9306
LW
2893 else if (*s == '$')
2894 OPERATOR(ARROW);
463ee0b2 2895 else
748a9306 2896 TERM(ARROW);
a687059c 2897 }
3280af22 2898 if (PL_expect == XOPERATOR)
79072805
LW
2899 Aop(OP_SUBTRACT);
2900 else {
3280af22 2901 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2902 check_uni();
79072805 2903 OPERATOR('-'); /* unary minus */
2f3197b3 2904 }
79072805 2905
378cc40b 2906 case '+':
a687059c
LW
2907 tmp = *s++;
2908 if (*s == tmp) {
378cc40b 2909 s++;
3280af22 2910 if (PL_expect == XOPERATOR)
79072805
LW
2911 TERM(POSTINC);
2912 else
2913 OPERATOR(PREINC);
378cc40b 2914 }
3280af22 2915 if (PL_expect == XOPERATOR)
79072805
LW
2916 Aop(OP_ADD);
2917 else {
3280af22 2918 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2919 check_uni();
a687059c 2920 OPERATOR('+');
2f3197b3 2921 }
a687059c 2922
378cc40b 2923 case '*':
3280af22
NIS
2924 if (PL_expect != XOPERATOR) {
2925 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2926 PL_expect = XOPERATOR;
2927 force_ident(PL_tokenbuf, '*');
2928 if (!*PL_tokenbuf)
a0d0e21e 2929 PREREF('*');
79072805 2930 TERM('*');
a687059c 2931 }
79072805
LW
2932 s++;
2933 if (*s == '*') {
a687059c 2934 s++;
79072805 2935 PWop(OP_POW);
a687059c 2936 }
79072805
LW
2937 Mop(OP_MULTIPLY);
2938
378cc40b 2939 case '%':
3280af22 2940 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2941 ++s;
2942 Mop(OP_MODULO);
a687059c 2943 }
3280af22
NIS
2944 PL_tokenbuf[0] = '%';
2945 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2946 if (!PL_tokenbuf[1]) {
2947 if (s == PL_bufend)
bbce6d69
PP
2948 yyerror("Final % should be \\% or %name");
2949 PREREF('%');
a687059c 2950 }
3280af22 2951 PL_pending_ident = '%';
bbce6d69 2952 TERM('%');
a687059c 2953
378cc40b 2954 case '^':
79072805 2955 s++;
a0d0e21e 2956 BOop(OP_BIT_XOR);
79072805 2957 case '[':
3280af22 2958 PL_lex_brackets++;
79072805 2959 /* FALL THROUGH */
378cc40b 2960 case '~':
378cc40b 2961 case ',':
378cc40b
LW
2962 tmp = *s++;
2963 OPERATOR(tmp);
a0d0e21e
LW
2964 case ':':
2965 if (s[1] == ':') {
2966 len = 0;
2967 goto just_a_word;
2968 }
2969 s++;
09bef843
SB
2970 switch (PL_expect) {
2971 OP *attrs;
2972 case XOPERATOR:
2973 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2974 break;
2975 PL_bufptr = s; /* update in case we back off */
2976 goto grabattrs;
2977 case XATTRBLOCK:
2978 PL_expect = XBLOCK;
2979 goto grabattrs;
2980 case XATTRTERM:
2981 PL_expect = XTERMBLOCK;
2982 grabattrs:
2983 s = skipspace(s);
2984 attrs = Nullop;
7e2040f0 2985 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2986 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2987 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2988 if (tmp < 0) tmp = -tmp;
2989 switch (tmp) {
2990 case KEY_or:
2991 case KEY_and:
2992 case KEY_for:
2993 case KEY_unless:
2994 case KEY_if:
2995 case KEY_while:
2996 case KEY_until:
2997 goto got_attrs;
2998 default:
2999 break;
3000 }
3001 }
09bef843
SB
3002 if (*d == '(') {
3003 d = scan_str(d,TRUE,TRUE);
3004 if (!d) {
3005 if (PL_lex_stuff) {
3006 SvREFCNT_dec(PL_lex_stuff);
3007 PL_lex_stuff = Nullsv;
3008 }
3009 /* MUST advance bufptr here to avoid bogus
3010 "at end of line" context messages from yyerror().
3011 */
3012 PL_bufptr = s + len;
3013 yyerror("Unterminated attribute parameter in attribute list");
3014 if (attrs)
3015 op_free(attrs);
3016 return 0; /* EOF indicator */
3017 }
3018 }
3019 if (PL_lex_stuff) {
3020 SV *sv = newSVpvn(s, len);
3021 sv_catsv(sv, PL_lex_stuff);
3022 attrs = append_elem(OP_LIST, attrs,
3023 newSVOP(OP_CONST, 0, sv));
3024 SvREFCNT_dec(PL_lex_stuff);
3025 PL_lex_stuff = Nullsv;
3026 }
3027 else {
78f9721b
SM
3028 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3029 CvLVALUE_on(PL_compcv);
3030 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3031 CvLOCKED_on(PL_compcv);
3032 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3033 CvMETHOD_on(PL_compcv);
3034 /* After we've set the flags, it could be argued that
3035 we don't need to do the attributes.pm-based setting
3036 process, and shouldn't bother appending recognized
3037 flags. To experiment with that, uncomment the
3038 following "else": */
3039 /* else */
3040 attrs = append_elem(OP_LIST, attrs,
3041 newSVOP(OP_CONST, 0,
3042 newSVpvn(s, len)));
09bef843
SB
3043 }
3044 s = skipspace(d);
0120eecf 3045 if (*s == ':' && s[1] != ':')
09bef843 3046 s = skipspace(s+1);
0120eecf
GS
3047 else if (s == d)
3048 break; /* require real whitespace or :'s */
09bef843 3049 }
f9829d6b
GS
3050 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3051 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3052 char q = ((*s == '\'') ? '"' : '\'');
3053 /* If here for an expression, and parsed no attrs, back off. */
3054 if (tmp == '=' && !attrs) {
3055 s = PL_bufptr;
3056 break;
3057 }
3058 /* MUST advance bufptr here to avoid bogus "at end of line"
3059 context messages from yyerror().
3060 */
3061 PL_bufptr = s;
3062 if (!*s)
3063 yyerror("Unterminated attribute list");
3064 else
3065 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3066 q, *s, q));
3067 if (attrs)
3068 op_free(attrs);
3069 OPERATOR(':');
3070 }
f9829d6b 3071 got_attrs:
09bef843
SB
3072 if (attrs) {
3073 PL_nextval[PL_nexttoke].opval = attrs;
3074 force_next(THING);
3075 }
3076 TOKEN(COLONATTR);
3077 }
a0d0e21e 3078 OPERATOR(':');
8990e307
LW
3079 case '(':
3080 s++;
3280af22
NIS
3081 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3082 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3083 else
3280af22 3084 PL_expect = XTERM;
a0d0e21e 3085 TOKEN('(');
378cc40b 3086 case ';':
f4dd75d9 3087 CLINE;
378cc40b
LW
3088 tmp = *s++;
3089 OPERATOR(tmp);
3090 case ')':
378cc40b 3091 tmp = *s++;
16d20bd9
AD
3092 s = skipspace(s);
3093 if (*s == '{')
3094 PREBLOCK(tmp);
378cc40b 3095 TERM(tmp);
79072805
LW
3096 case ']':
3097 s++;
3280af22 3098 if (PL_lex_brackets <= 0)
d98d5fff 3099 yyerror("Unmatched right square bracket");
463ee0b2 3100 else
3280af22
NIS
3101 --PL_lex_brackets;
3102 if (PL_lex_state == LEX_INTERPNORMAL) {
3103 if (PL_lex_brackets == 0) {
a0d0e21e 3104 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3105 PL_lex_state = LEX_INTERPEND;
79072805
LW
3106 }
3107 }
4633a7c4 3108 TERM(']');
79072805
LW
3109 case '{':
3110 leftbracket:
79072805 3111 s++;
3280af22
NIS
3112 if (PL_lex_brackets > 100) {
3113 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3114 if (newlb != PL_lex_brackstack) {
8990e307 3115 SAVEFREEPV(newlb);
3280af22 3116 PL_lex_brackstack = newlb;
8990e307
LW
3117 }
3118 }
3280af22 3119 switch (PL_expect) {
a0d0e21e 3120 case XTERM:
3280af22 3121 if (PL_lex_formbrack) {
a0d0e21e
LW
3122 s--;
3123 PRETERMBLOCK(DO);
3124 }
3280af22
NIS
3125 if (PL_oldoldbufptr == PL_last_lop)
3126 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3127 else
3280af22 3128 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3129 OPERATOR(HASHBRACK);
a0d0e21e 3130 case XOPERATOR:
bf4acbe4 3131 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3132 s++;
44a8e56a 3133 d = s;
3280af22
NIS
3134 PL_tokenbuf[0] = '\0';
3135 if (d < PL_bufend && *d == '-') {
3136 PL_tokenbuf[0] = '-';
44a8e56a 3137 d++;
bf4acbe4 3138 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3139 d++;
3140 }
7e2040f0 3141 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3142 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3143 FALSE, &len);
bf4acbe4 3144 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3145 d++;
3146 if (*d == '}') {
3280af22 3147 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3148 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
7948272d
NIS
3149 if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, 0) &&
3150 PL_nextval[PL_nexttoke-1].opval)
3151 SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke-1].opval)->op_sv);
44a8e56a
PP
3152 if (minus)
3153 force_next('-');
748a9306
LW
3154 }
3155 }
3156 /* FALL THROUGH */
09bef843 3157 case XATTRBLOCK:
748a9306 3158 case XBLOCK:
3280af22
NIS
3159 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3160 PL_expect = XSTATE;
a0d0e21e 3161 break;
09bef843 3162 case XATTRTERM:
a0d0e21e 3163 case XTERMBLOCK:
3280af22
NIS
3164 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3165 PL_expect = XSTATE;
a0d0e21e
LW
3166 break;
3167 default: {
3168 char *t;
3280af22
NIS
3169 if (PL_oldoldbufptr == PL_last_lop)
3170 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3171 else
3280af22 3172 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3173 s = skipspace(s);
09ecc4b6 3174 if (*s == '}')
a0d0e21e 3175 OPERATOR(HASHBRACK);
b8a4b1be
GS
3176 /* This hack serves to disambiguate a pair of curlies
3177 * as being a block or an anon hash. Normally, expectation
3178 * determines that, but in cases where we're not in a
3179 * position to expect anything in particular (like inside
3180 * eval"") we have to resolve the ambiguity. This code
3181 * covers the case where the first term in the curlies is a
3182 * quoted string. Most other cases need to be explicitly
3183 * disambiguated by prepending a `+' before the opening
3184 * curly in order to force resolution as an anon hash.
3185 *
3186 * XXX should probably propagate the outer expectation
3187 * into eval"" to rely less on this hack, but that could
3188 * potentially break current behavior of eval"".
3189 * GSAR 97-07-21
3190 */
3191 t = s;
3192 if (*s == '\'' || *s == '"' || *s == '`') {
3193 /* common case: get past first string, handling escapes */
3280af22 3194 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3195 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3196 t++;
3197 t++;
a0d0e21e 3198 }
b8a4b1be 3199 else if (*s == 'q') {
3280af22 3200 if (++t < PL_bufend
b8a4b1be 3201 && (!isALNUM(*t)
3280af22 3202 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3203 && !isALNUM(*t))))
3204 {
b8a4b1be
GS
3205 char *tmps;
3206 char open, close, term;
3207 I32 brackets = 1;
3208
3280af22 3209 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3210 t++;
3211 term = *t;
3212 open = term;
3213 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3214 term = tmps[5];
3215 close = term;
3216 if (open == close)
3280af22
NIS
3217 for (t++; t < PL_bufend; t++) {
3218 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3219 t++;
6d07e5e9 3220 else if (*t == open)
b8a4b1be
GS
3221 break;
3222 }
3223 else
3280af22
NIS
3224 for (t++; t < PL_bufend; t++) {
3225 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3226 t++;
6d07e5e9 3227 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3228 break;
3229 else if (*t == open)
3230 brackets++;
3231 }
3232 }
3233 t++;
a0d0e21e 3234 }
7e2040f0 3235 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3236 t += UTF8SKIP(t);
7e2040f0 3237 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3238 t += UTF8SKIP(t);
a0d0e21e 3239 }
3280af22 3240 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3241 t++;
b8a4b1be
GS
3242 /* if comma follows first term, call it an anon hash */
3243 /* XXX it could be a comma expression with loop modifiers */
3280af22 3244 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3245 || (*t == '=' && t[1] == '>')))
a0d0e21e 3246 OPERATOR(HASHBRACK);
3280af22 3247 if (PL_expect == XREF)
4e4e412b 3248 PL_expect = XTERM;
a0d0e21e 3249 else {
3280af22
NIS
3250 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3251 PL_expect = XSTATE;
a0d0e21e 3252 }
8990e307 3253 }
a0d0e21e 3254 break;
463ee0b2 3255 }
57843af0 3256 yylval.ival = CopLINE(PL_curcop);
79072805 3257 if (isSPACE(*s) || *s == '#')
3280af22 3258 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3259 TOKEN('{');
378cc40b 3260 case '}':
79072805
LW
3261 rightbracket:
3262 s++;
3280af22 3263 if (PL_lex_brackets <= 0)
d98d5fff 3264 yyerror("Unmatched right curly bracket");
463ee0b2 3265 else
3280af22 3266 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3267 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3268 PL_lex_formbrack = 0;
3269 if (PL_lex_state == LEX_INTERPNORMAL) {
3270 if (PL_lex_brackets == 0) {
9059aa12
LW
3271 if (PL_expect & XFAKEBRACK) {
3272 PL_expect &= XENUMMASK;
3280af22
NIS
3273 PL_lex_state = LEX_INTERPEND;
3274 PL_bufptr = s;
cea2e8a9 3275 return yylex(); /* ignore fake brackets */
79072805 3276 }
fa83b5b6 3277 if (*s == '-' && s[1] == '>')
3280af22 3278 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3279 else if (*s != '[' && *s != '{')
3280af22 3280 PL_lex_state = LEX_INTERPEND;
79072805
LW
3281 }
3282 }
9059aa12
LW
3283 if (PL_expect & XFAKEBRACK) {
3284 PL_expect &= XENUMMASK;
3280af22 3285 PL_bufptr = s;
cea2e8a9 3286 return yylex(); /* ignore fake brackets */
748a9306 3287 }
79072805
LW
3288 force_next('}');
3289 TOKEN(';');
378cc40b
LW
3290 case '&':
3291 s++;
3292 tmp = *s++;
3293 if (tmp == '&')
a0d0e21e 3294 AOPERATOR(ANDAND);
378cc40b 3295 s--;
3280af22 3296 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3297 if (ckWARN(WARN_SEMICOLON)
3298 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3299 {
57843af0 3300 CopLINE_dec(PL_curcop);
cea2e8a9 3301 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3302 CopLINE_inc(PL_curcop);
463ee0b2 3303 }
79072805 3304 BAop(OP_BIT_AND);
463ee0b2 3305 }
79072805 3306
3280af22
NIS
3307 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3308 if (*PL_tokenbuf) {
3309 PL_expect = XOPERATOR;
3310 force_ident(PL_tokenbuf, '&');
463ee0b2 3311 }
79072805
LW
3312 else
3313 PREREF('&');
c07a80fd 3314 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3315 TERM('&');
3316
378cc40b
LW
3317 case '|':
3318 s++;
3319 tmp = *s++;
3320 if (tmp == '|')
a0d0e21e 3321 AOPERATOR(OROR);
378cc40b 3322 s--;
79072805 3323 BOop(OP_BIT_OR);
378cc40b
LW
3324 case '=':
3325 s++;
3326 tmp = *s++;
3327 if (tmp == '=')
79072805
LW
3328 Eop(OP_EQ);
3329 if (tmp == '>')
3330 OPERATOR(',');
378cc40b 3331 if (tmp == '~')
79072805 3332 PMop(OP_MATCH);
599cee73 3333 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3334 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3335 s--;
3280af22
NIS
3336 if (PL_expect == XSTATE && isALPHA(tmp) &&
3337 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3338 {
3280af22
NIS
3339 if (PL_in_eval && !PL_rsfp) {
3340 d = PL_bufend;
a5f75d66
AD
3341 while (s < d) {
3342 if (*s++ == '\n') {
3343 incline(s);
3344 if (strnEQ(s,"=cut",4)) {
3345 s = strchr(s,'\n');
3346 if (s)
3347 s++;
3348 else
3349 s = d;
3350 incline(s);
3351 goto retry;
3352 }
3353 }
3354 }
3355 goto retry;
3356 }
3280af22
NIS
3357 s = PL_bufend;
3358 PL_doextract = TRUE;
a0d0e21e
LW
3359 goto retry;
3360 }
3280af22 3361 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3362 char *t;
51882d45 3363#ifdef PERL_STRICT_CR
bf4acbe4 3364 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3365#else
bf4acbe4 3366 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3367#endif
a0d0e21e
LW
3368 if (*t == '\n' || *t == '#') {
3369 s--;
3280af22 3370 PL_expect = XBLOCK;
a0d0e21e
LW
3371 goto leftbracket;
3372 }
79072805 3373 }
a0d0e21e
LW
3374 yylval.ival = 0;
3375 OPERATOR(ASSIGNOP);
378cc40b
LW
3376 case '!':
3377 s++;
3378 tmp = *s++;
3379 if (tmp == '=')
79072805 3380 Eop(OP_NE);
378cc40b 3381 if (tmp == '~')
79072805 3382 PMop(OP_NOT);
378cc40b
LW
3383 s--;
3384 OPERATOR('!');
3385 case '<':
3280af22 3386 if (PL_expect != XOPERATOR) {
93a17b20 3387 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3388 check_uni();
79072805
LW
3389 if (s[1] == '<')
3390 s = scan_heredoc(s);
3391 else
3392 s = scan_inputsymbol(s);
3393 TERM(sublex_start());
378cc40b
LW
3394 }
3395 s++;
3396 tmp = *s++;
3397 if (tmp == '<')
79072805 3398 SHop(OP_LEFT_SHIFT);
395c3793
LW
3399 if (tmp == '=') {
3400 tmp = *s++;
3401 if (tmp == '>')
79072805 3402 Eop(OP_NCMP);
395c3793 3403 s--;
79072805 3404 Rop(OP_LE);
395c3793 3405 }
378cc40b 3406 s--;
79072805 3407 Rop(OP_LT);
378cc40b
LW
3408 case '>':
3409 s++;
3410 tmp = *s++;
3411 if (tmp == '>')
79072805 3412 SHop(OP_RIGHT_SHIFT);
378cc40b 3413 if (tmp == '=')
79072805 3414 Rop(OP_GE);
378cc40b 3415 s--;
79072805 3416 Rop(OP_GT);
378cc40b
LW
3417
3418 case '$':
bbce6d69
PP
3419 CLINE;
3420
3280af22
NIS
3421 if (PL_expect == XOPERATOR) {
3422 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3423 PL_expect = XTERM;
a0d0e21e 3424 depcom();
bbce6d69 3425 return ','; /* grandfather non-comma-format format */
a0d0e21e 3426 }
8990e307 3427 }
a0d0e21e 3428
7e2040f0 3429 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3430 PL_tokenbuf[0] = '@';
376b8730
SM
3431 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3432 sizeof PL_tokenbuf - 1, FALSE);
3433 if (PL_expect == XOPERATOR)
3434 no_op("Array length", s);
3280af22 3435 if (!PL_tokenbuf[1])
a0d0e21e 3436 PREREF(DOLSHARP);
3280af22
NIS
3437 PL_expect = XOPERATOR;
3438 PL_pending_ident = '#';
463ee0b2 3439 TOKEN(DOLSHARP);
79072805 3440 }
bbce6d69 3441
3280af22 3442 PL_tokenbuf[0] = '$';
376b8730
SM
3443 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3444 sizeof PL_tokenbuf - 1, FALSE);
3445 if (PL_expect == XOPERATOR)
3446 no_op("Scalar", s);
3280af22
NIS
3447 if (!PL_tokenbuf[1]) {
3448 if (s == PL_bufend)
bbce6d69
PP
3449 yyerror("Final $ should be \\$ or $name");
3450 PREREF('$');
8990e307 3451 }
a0d0e21e 3452
bbce6d69 3453 /* This kludge not intended to be bulletproof. */
3280af22 3454 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3455 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3456 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3457 yylval.opval->op_private = OPpCONST_ARYBASE;
3458 TERM(THING);
3459 }
3460
ff68c719 3461 d = s;
69d2bceb 3462 tmp = (I32)*s;
3280af22 3463 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3464 s = skipspace(s);
3465
3280af22 3466 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3467 char *t;
3468 if (*s == '[') {
3280af22 3469 PL_tokenbuf[0] = '@';
599cee73 3470 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3471 for(t = s + 1;
7e2040f0 3472 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3473 t++) ;
a0d0e21e 3474 if (*t++ == ',') {
3280af22
NIS
3475 PL_bufptr = skipspace(PL_bufptr);
3476 while (t < PL_bufend && *t != ']')
bbce6d69 3477 t++;
cea2e8a9 3478 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3479 "Multidimensional syntax %.*s not supported",
3480 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3481 }
3482 }
bbce6d69
PP
3483 }
3484 else if (*s == '{') {
3280af22 3485 PL_tokenbuf[0] = '%';
599cee73 3486 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3487 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3488 {
3280af22 3489 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3490 STRLEN len;
3491 for (t++; isSPACE(*t); t++) ;
7e2040f0 3492 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3493 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3494 for (; isSPACE(*t); t++) ;
864dbfa3 3495 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3496 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3497 "You need to quote \"%s\"", tmpbuf);
748a9306 3498 }
93a17b20
LW
3499 }
3500 }
2f3197b3 3501 }
bbce6d69 3502
3280af22 3503 PL_expect = XOPERATOR;
69d2bceb 3504 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3505 bool islop = (PL_last_lop == PL_oldoldbufptr);
3506 if (!islop || PL_last_lop_op == OP_GREPSTART)
3507 PL_expect = XOPERATOR;
bbce6d69 3508 else if (strchr("$@\"'`q", *s))
3280af22 3509 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3510 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3511 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3512 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3513 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3514 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3515 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3516 /* binary operators exclude handle interpretations */
3517 switch (tmp) {
3518 case -KEY_x:
3519 case -KEY_eq:
3520 case -KEY_ne:
3521 case -KEY_gt:
3522 case -KEY_lt:
3523 case -KEY_ge:
3524 case -KEY_le:
3525 case -KEY_cmp:
3526 break;
3527 default:
3280af22 3528 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3529 break;
3530 }
3531 }
68dc0745
PP
3532 else {
3533 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3534 if (gv && GvCVu(gv))
3280af22 3535 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3536 }
93a17b20 3537 }
bbce6d69 3538 else if (isDIGIT(*s))
3280af22 3539 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3540 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3541 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3542 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3543 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3544 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3545 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3546 }
3280af22 3547 PL_pending_ident = '$';
79072805 3548 TOKEN('$');
378cc40b
LW
3549
3550 case '@':
3280af22 3551 if (PL_expect == XOPERATOR)
bbce6d69 3552 no_op("Array", s);
3280af22
NIS
3553 PL_tokenbuf[0] = '@';
3554 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3555 if (!PL_tokenbuf[1]) {
3556 if (s == PL_bufend)
bbce6d69
PP
3557 yyerror("Final @ should be \\@ or @name");
3558 PREREF('@');
3559 }
3280af22 3560 if (PL_lex_state == LEX_NORMAL)
ff68c719 3561 s = skipspace(s);
3280af22 3562 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3563 if (*s == '{')
3280af22 3564 PL_tokenbuf[0] = '%';