This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
RE: [PATCH] [ID 20001223.002] lvalues in list context
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
bc89e66f 3 * Copyright (c) 1991-2001, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
4e553d73 42/* In variables name $^X, these are the legal values for X.
2b92dfce
GS
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
79072805
LW
72#ifdef ff_next
73#undef ff_next
d48672a2
LW
74#endif
75
a1a0e61e 76#ifdef USE_PURE_BISON
dba4d153
JH
77# ifndef YYMAXLEVEL
78# define YYMAXLEVEL 100
79# endif
20141f0e
RI
80YYSTYPE* yylval_pointer[YYMAXLEVEL];
81int* yychar_pointer[YYMAXLEVEL];
6f202aea 82int yyactlevel = -1;
22c35a8c
GS
83# undef yylval
84# undef yychar
20141f0e
RI
85# define yylval (*yylval_pointer[yyactlevel])
86# define yychar (*yychar_pointer[yyactlevel])
87# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 88# undef yylex
dba4d153 89# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
90#endif
91
79072805 92#include "keywords.h"
fe14fcc3 93
ffb4593c
NT
94/* CLINE is a macro that ensures PL_copline has a sane value */
95
ae986130
LW
96#ifdef CLINE
97#undef CLINE
98#endif
57843af0 99#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 100
ffb4593c
NT
101/*
102 * Convenience functions to return different tokens and prime the
9cbb5ea2 103 * lexer for the next token. They all take an argument.
ffb4593c
NT
104 *
105 * TOKEN : generic token (used for '(', DOLSHARP, etc)
106 * OPERATOR : generic operator
107 * AOPERATOR : assignment operator
108 * PREBLOCK : beginning the block after an if, while, foreach, ...
109 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
110 * PREREF : *EXPR where EXPR is not a simple identifier
111 * TERM : expression term
112 * LOOPX : loop exiting command (goto, last, dump, etc)
113 * FTST : file test operator
114 * FUN0 : zero-argument function
2d2e263d 115 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
116 * BOop : bitwise or or xor
117 * BAop : bitwise and
118 * SHop : shift operator
119 * PWop : power operator
9cbb5ea2 120 * PMop : pattern-matching operator
ffb4593c
NT
121 * Aop : addition-level operator
122 * Mop : multiplication-level operator
123 * Eop : equality-testing operator
e5edeb50 124 * Rop : relational operator <= != gt
ffb4593c
NT
125 *
126 * Also see LOP and lop() below.
127 */
128
3280af22
NIS
129#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
130#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
131#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
132#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
133#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
134#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
135#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
136#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
137#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
138#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
139#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
140#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
141#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
142#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
143#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
144#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
145#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
146#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
147#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
148#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 149
a687059c
LW
150/* This bit of chicanery makes a unary function followed by
151 * a parenthesis into a function with one argument, highest precedence.
152 */
2f3197b3 153#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
154 PL_expect = XTERM, \
155 PL_bufptr = s, \
156 PL_last_uni = PL_oldbufptr, \
157 PL_last_lop_op = f, \
a687059c
LW
158 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
159
79072805 160#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
161 PL_bufptr = s, \
162 PL_last_uni = PL_oldbufptr, \
79072805
LW
163 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
164
9f68db38 165/* grandfather return to old style */
3280af22 166#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 167
ffb4593c
NT
168/*
169 * S_ao
170 *
171 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
172 * into an OP_ANDASSIGN or OP_ORASSIGN
173 */
174
76e3520e 175STATIC int
cea2e8a9 176S_ao(pTHX_ int toketype)
a0d0e21e 177{
3280af22
NIS
178 if (*PL_bufptr == '=') {
179 PL_bufptr++;
a0d0e21e
LW
180 if (toketype == ANDAND)
181 yylval.ival = OP_ANDASSIGN;
182 else if (toketype == OROR)
183 yylval.ival = OP_ORASSIGN;
184 toketype = ASSIGNOP;
185 }
186 return toketype;
187}
188
ffb4593c
NT
189/*
190 * S_no_op
191 * When Perl expects an operator and finds something else, no_op
192 * prints the warning. It always prints "<something> found where
193 * operator expected. It prints "Missing semicolon on previous line?"
194 * if the surprise occurs at the start of the line. "do you need to
195 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
196 * where the compiler doesn't know if foo is a method call or a function.
197 * It prints "Missing operator before end of line" if there's nothing
198 * after the missing operator, or "... before <...>" if there is something
199 * after the missing operator.
200 */
201
76e3520e 202STATIC void
cea2e8a9 203S_no_op(pTHX_ char *what, char *s)
463ee0b2 204{
3280af22
NIS
205 char *oldbp = PL_bufptr;
206 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 207
1189a94a
GS
208 if (!s)
209 s = oldbp;
07c798fb 210 else
1189a94a 211 PL_bufptr = s;
cea2e8a9 212 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 213 if (is_first)
cea2e8a9 214 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 215 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 216 char *t;
7e2040f0 217 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 218 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 219 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 220 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 221 }
07c798fb
HS
222 else {
223 assert(s >= oldbp);
cea2e8a9 224 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 225 }
3280af22 226 PL_bufptr = oldbp;
8990e307
LW
227}
228
ffb4593c
NT
229/*
230 * S_missingterm
231 * Complain about missing quote/regexp/heredoc terminator.
232 * If it's called with (char *)NULL then it cauterizes the line buffer.
233 * If we're in a delimited string and the delimiter is a control
234 * character, it's reformatted into a two-char sequence like ^C.
235 * This is fatal.
236 */
237
76e3520e 238STATIC void
cea2e8a9 239S_missingterm(pTHX_ char *s)
8990e307
LW
240{
241 char tmpbuf[3];
242 char q;
243 if (s) {
244 char *nl = strrchr(s,'\n');
d2719217 245 if (nl)
8990e307
LW
246 *nl = '\0';
247 }
9d116dd7
JH
248 else if (
249#ifdef EBCDIC
250 iscntrl(PL_multi_close)
251#else
252 PL_multi_close < 32 || PL_multi_close == 127
253#endif
254 ) {
8990e307 255 *tmpbuf = '^';
3280af22 256 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
257 s = "\\n";
258 tmpbuf[2] = '\0';
259 s = tmpbuf;
260 }
261 else {
3280af22 262 *tmpbuf = PL_multi_close;
8990e307
LW
263 tmpbuf[1] = '\0';
264 s = tmpbuf;
265 }
266 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 267 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 268}
79072805 269
ffb4593c
NT
270/*
271 * Perl_deprecate
ffb4593c
NT
272 */
273
79072805 274void
864dbfa3 275Perl_deprecate(pTHX_ char *s)
a0d0e21e 276{
599cee73 277 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 278 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
279}
280
ffb4593c
NT
281/*
282 * depcom
9cbb5ea2 283 * Deprecate a comma-less variable list.
ffb4593c
NT
284 */
285
76e3520e 286STATIC void
cea2e8a9 287S_depcom(pTHX)
a0d0e21e
LW
288{
289 deprecate("comma-less variable list");
290}
291
ffb4593c 292/*
9cbb5ea2
GS
293 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
294 * utf16-to-utf8-reversed.
ffb4593c
NT
295 */
296
c39cd008
GS
297#ifdef PERL_CR_FILTER
298static void
299strip_return(SV *sv)
300{
301 register char *s = SvPVX(sv);
302 register char *e = s + SvCUR(sv);
303 /* outer loop optimized to do nothing if there are no CR-LFs */
304 while (s < e) {
305 if (*s++ == '\r' && *s == '\n') {
306 /* hit a CR-LF, need to copy the rest */
307 register char *d = s - 1;
308 *d++ = *s++;
309 while (s < e) {
310 if (*s == '\r' && s[1] == '\n')
311 s++;
312 *d++ = *s++;
313 }
314 SvCUR(sv) -= s - d;
315 return;
316 }
317 }
318}
a868473f 319
76e3520e 320STATIC I32
c39cd008 321S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 322{
c39cd008
GS
323 I32 count = FILTER_READ(idx+1, sv, maxlen);
324 if (count > 0 && !maxlen)
325 strip_return(sv);
326 return count;
a868473f
NIS
327}
328#endif
329
ffb4593c
NT
330/*
331 * Perl_lex_start
9cbb5ea2
GS
332 * Initialize variables. Uses the Perl save_stack to save its state (for
333 * recursive calls to the parser).
ffb4593c
NT
334 */
335
a0d0e21e 336void
864dbfa3 337Perl_lex_start(pTHX_ SV *line)
79072805 338{
8990e307
LW
339 char *s;
340 STRLEN len;
341
3280af22
NIS
342 SAVEI32(PL_lex_dojoin);
343 SAVEI32(PL_lex_brackets);
3280af22
NIS
344 SAVEI32(PL_lex_casemods);
345 SAVEI32(PL_lex_starts);
346 SAVEI32(PL_lex_state);
7766f137 347 SAVEVPTR(PL_lex_inpat);
3280af22 348 SAVEI32(PL_lex_inwhat);
18b09519
GS
349 if (PL_lex_state == LEX_KNOWNEXT) {
350 I32 toke = PL_nexttoke;
351 while (--toke >= 0) {
352 SAVEI32(PL_nexttype[toke]);
353 SAVEVPTR(PL_nextval[toke]);
354 }
355 SAVEI32(PL_nexttoke);
18b09519 356 }
57843af0 357 SAVECOPLINE(PL_curcop);
3280af22
NIS
358 SAVEPPTR(PL_bufptr);
359 SAVEPPTR(PL_bufend);
360 SAVEPPTR(PL_oldbufptr);
361 SAVEPPTR(PL_oldoldbufptr);
362 SAVEPPTR(PL_linestart);
363 SAVESPTR(PL_linestr);
364 SAVEPPTR(PL_lex_brackstack);
365 SAVEPPTR(PL_lex_casestack);
c76ac1ee 366 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
367 SAVESPTR(PL_lex_stuff);
368 SAVEI32(PL_lex_defer);
09bef843 369 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 370 SAVESPTR(PL_lex_repl);
bebdddfc
GS
371 SAVEINT(PL_expect);
372 SAVEINT(PL_lex_expect);
3280af22
NIS
373
374 PL_lex_state = LEX_NORMAL;
375 PL_lex_defer = 0;
376 PL_expect = XSTATE;
377 PL_lex_brackets = 0;
3280af22
NIS
378 New(899, PL_lex_brackstack, 120, char);
379 New(899, PL_lex_casestack, 12, char);
380 SAVEFREEPV(PL_lex_brackstack);
381 SAVEFREEPV(PL_lex_casestack);
382 PL_lex_casemods = 0;
383 *PL_lex_casestack = '\0';
384 PL_lex_dojoin = 0;
385 PL_lex_starts = 0;
386 PL_lex_stuff = Nullsv;
387 PL_lex_repl = Nullsv;
388 PL_lex_inpat = 0;
76be56bc 389 PL_nexttoke = 0;
3280af22 390 PL_lex_inwhat = 0;
09bef843 391 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
392 PL_linestr = line;
393 if (SvREADONLY(PL_linestr))
394 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
395 s = SvPV(PL_linestr, len);
8990e307 396 if (len && s[len-1] != ';') {
3280af22
NIS
397 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
398 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
399 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 400 }
3280af22
NIS
401 SvTEMP_off(PL_linestr);
402 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
403 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
404 SvREFCNT_dec(PL_rs);
79cb57f6 405 PL_rs = newSVpvn("\n", 1);
3280af22 406 PL_rsfp = 0;
79072805 407}
a687059c 408
ffb4593c
NT
409/*
410 * Perl_lex_end
9cbb5ea2
GS
411 * Finalizer for lexing operations. Must be called when the parser is
412 * done with the lexer.
ffb4593c
NT
413 */
414
463ee0b2 415void
864dbfa3 416Perl_lex_end(pTHX)
463ee0b2 417{
3280af22 418 PL_doextract = FALSE;
463ee0b2
LW
419}
420
ffb4593c
NT
421/*
422 * S_incline
423 * This subroutine has nothing to do with tilting, whether at windmills
424 * or pinball tables. Its name is short for "increment line". It
57843af0 425 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 426 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
427 * # line 500 "foo.pm"
428 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
429 */
430
76e3520e 431STATIC void
cea2e8a9 432S_incline(pTHX_ char *s)
463ee0b2
LW
433{
434 char *t;
435 char *n;
73659bf1 436 char *e;
463ee0b2 437 char ch;
463ee0b2 438
57843af0 439 CopLINE_inc(PL_curcop);
463ee0b2
LW
440 if (*s++ != '#')
441 return;
bf4acbe4 442 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
443 if (strnEQ(s, "line", 4))
444 s += 4;
445 else
446 return;
447 if (*s == ' ' || *s == '\t')
448 s++;
4e553d73 449 else
73659bf1 450 return;
bf4acbe4 451 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
452 if (!isDIGIT(*s))
453 return;
454 n = s;
455 while (isDIGIT(*s))
456 s++;
bf4acbe4 457 while (SPACE_OR_TAB(*s))
463ee0b2 458 s++;
73659bf1 459 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 460 s++;
73659bf1
GS
461 e = t + 1;
462 }
463ee0b2 463 else {
463ee0b2 464 for (t = s; !isSPACE(*t); t++) ;
73659bf1 465 e = t;
463ee0b2 466 }
bf4acbe4 467 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
468 e++;
469 if (*e != '\n' && *e != '\0')
470 return; /* false alarm */
471
463ee0b2
LW
472 ch = *t;
473 *t = '\0';
f4dd75d9
GS
474 if (t - s > 0) {
475#ifdef USE_ITHREADS
476 Safefree(CopFILE(PL_curcop));
477#else
478 SvREFCNT_dec(CopFILEGV(PL_curcop));
479#endif
57843af0 480 CopFILE_set(PL_curcop, s);
f4dd75d9 481 }
463ee0b2 482 *t = ch;
57843af0 483 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
484}
485
ffb4593c
NT
486/*
487 * S_skipspace
488 * Called to gobble the appropriate amount and type of whitespace.
489 * Skips comments as well.
490 */
491
76e3520e 492STATIC char *
cea2e8a9 493S_skipspace(pTHX_ register char *s)
a687059c 494{
3280af22 495 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 496 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
497 s++;
498 return s;
499 }
500 for (;;) {
fd049845 501 STRLEN prevlen;
09bef843
SB
502 SSize_t oldprevlen, oldoldprevlen;
503 SSize_t oldloplen, oldunilen;
60e6418e
GS
504 while (s < PL_bufend && isSPACE(*s)) {
505 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
506 incline(s);
507 }
ffb4593c
NT
508
509 /* comment */
3280af22
NIS
510 if (s < PL_bufend && *s == '#') {
511 while (s < PL_bufend && *s != '\n')
463ee0b2 512 s++;
60e6418e 513 if (s < PL_bufend) {
463ee0b2 514 s++;
60e6418e
GS
515 if (PL_in_eval && !PL_rsfp) {
516 incline(s);
517 continue;
518 }
519 }
463ee0b2 520 }
ffb4593c
NT
521
522 /* only continue to recharge the buffer if we're at the end
523 * of the buffer, we're not reading from a source filter, and
524 * we're in normal lexing mode
525 */
09bef843
SB
526 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
527 PL_lex_state == LEX_FORMLINE)
463ee0b2 528 return s;
ffb4593c
NT
529
530 /* try to recharge the buffer */
9cbb5ea2
GS
531 if ((s = filter_gets(PL_linestr, PL_rsfp,
532 (prevlen = SvCUR(PL_linestr)))) == Nullch)
533 {
534 /* end of file. Add on the -p or -n magic */
3280af22
NIS
535 if (PL_minus_n || PL_minus_p) {
536 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
537 ";}continue{print or die qq(-p destination: $!\\n)" :
538 "");
3280af22
NIS
539 sv_catpv(PL_linestr,";}");
540 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
541 }
542 else
3280af22 543 sv_setpv(PL_linestr,";");
ffb4593c
NT
544
545 /* reset variables for next time we lex */
9cbb5ea2
GS
546 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
547 = SvPVX(PL_linestr);
3280af22 548 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
549
550 /* Close the filehandle. Could be from -P preprocessor,
551 * STDIN, or a regular file. If we were reading code from
552 * STDIN (because the commandline held no -e or filename)
553 * then we don't close it, we reset it so the code can
554 * read from STDIN too.
555 */
556
3280af22
NIS
557 if (PL_preprocess && !PL_in_eval)
558 (void)PerlProc_pclose(PL_rsfp);
559 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
560 PerlIO_clearerr(PL_rsfp);
8990e307 561 else
3280af22
NIS
562 (void)PerlIO_close(PL_rsfp);
563 PL_rsfp = Nullfp;
463ee0b2
LW
564 return s;
565 }
ffb4593c
NT
566
567 /* not at end of file, so we only read another line */
09bef843
SB
568 /* make corresponding updates to old pointers, for yyerror() */
569 oldprevlen = PL_oldbufptr - PL_bufend;
570 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
571 if (PL_last_uni)
572 oldunilen = PL_last_uni - PL_bufend;
573 if (PL_last_lop)
574 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
575 PL_linestart = PL_bufptr = s + prevlen;
576 PL_bufend = s + SvCUR(PL_linestr);
577 s = PL_bufptr;
09bef843
SB
578 PL_oldbufptr = s + oldprevlen;
579 PL_oldoldbufptr = s + oldoldprevlen;
580 if (PL_last_uni)
581 PL_last_uni = s + oldunilen;
582 if (PL_last_lop)
583 PL_last_lop = s + oldloplen;
a0d0e21e 584 incline(s);
ffb4593c
NT
585
586 /* debugger active and we're not compiling the debugger code,
587 * so store the line into the debugger's array of lines
588 */
3280af22 589 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
590 SV *sv = NEWSV(85,0);
591
592 sv_upgrade(sv, SVt_PVMG);
3280af22 593 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 594 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 595 }
463ee0b2 596 }
a687059c 597}
378cc40b 598
ffb4593c
NT
599/*
600 * S_check_uni
601 * Check the unary operators to ensure there's no ambiguity in how they're
602 * used. An ambiguous piece of code would be:
603 * rand + 5
604 * This doesn't mean rand() + 5. Because rand() is a unary operator,
605 * the +5 is its argument.
606 */
607
76e3520e 608STATIC void
cea2e8a9 609S_check_uni(pTHX)
ba106d47 610{
2f3197b3 611 char *s;
a0d0e21e 612 char *t;
2f3197b3 613
3280af22 614 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 615 return;
3280af22
NIS
616 while (isSPACE(*PL_last_uni))
617 PL_last_uni++;
7e2040f0 618 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 619 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 620 return;
0453d815 621 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 622 char ch = *s;
0453d815 623 *s = '\0';
4e553d73
NIS
624 Perl_warner(aTHX_ WARN_AMBIGUOUS,
625 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
626 PL_last_uni);
627 *s = ch;
628 }
2f3197b3
LW
629}
630
ffb4593c
NT
631/* workaround to replace the UNI() macro with a function. Only the
632 * hints/uts.sh file mentions this. Other comments elsewhere in the
633 * source indicate Microport Unix might need it too.
634 */
635
ffed7fef
LW
636#ifdef CRIPPLED_CC
637
638#undef UNI
ffed7fef 639#define UNI(f) return uni(f,s)
ffed7fef 640
76e3520e 641STATIC int
cea2e8a9 642S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
643{
644 yylval.ival = f;
3280af22
NIS
645 PL_expect = XTERM;
646 PL_bufptr = s;
8f872242
NIS
647 PL_last_uni = PL_oldbufptr;
648 PL_last_lop_op = f;
ffed7fef
LW
649 if (*s == '(')
650 return FUNC1;
651 s = skipspace(s);
652 if (*s == '(')
653 return FUNC1;
654 else
655 return UNIOP;
656}
657
a0d0e21e
LW
658#endif /* CRIPPLED_CC */
659
ffb4593c
NT
660/*
661 * LOP : macro to build a list operator. Its behaviour has been replaced
662 * with a subroutine, S_lop() for which LOP is just another name.
663 */
664
a0d0e21e
LW
665#define LOP(f,x) return lop(f,x,s)
666
ffb4593c
NT
667/*
668 * S_lop
669 * Build a list operator (or something that might be one). The rules:
670 * - if we have a next token, then it's a list operator [why?]
671 * - if the next thing is an opening paren, then it's a function
672 * - else it's a list operator
673 */
674
76e3520e 675STATIC I32
a0be28da 676S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 677{
79072805 678 yylval.ival = f;
35c8bce7 679 CLINE;
3280af22
NIS
680 PL_expect = x;
681 PL_bufptr = s;
682 PL_last_lop = PL_oldbufptr;
683 PL_last_lop_op = f;
684 if (PL_nexttoke)
a0d0e21e 685 return LSTOP;
79072805
LW
686 if (*s == '(')
687 return FUNC;
688 s = skipspace(s);
689 if (*s == '(')
690 return FUNC;
691 else
692 return LSTOP;
693}
694
ffb4593c
NT
695/*
696 * S_force_next
9cbb5ea2 697 * When the lexer realizes it knows the next token (for instance,
ffb4593c 698 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
699 * to know what token to return the next time the lexer is called. Caller
700 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
701 * handles the token correctly.
ffb4593c
NT
702 */
703
4e553d73 704STATIC void
cea2e8a9 705S_force_next(pTHX_ I32 type)
79072805 706{
3280af22
NIS
707 PL_nexttype[PL_nexttoke] = type;
708 PL_nexttoke++;
709 if (PL_lex_state != LEX_KNOWNEXT) {
710 PL_lex_defer = PL_lex_state;
711 PL_lex_expect = PL_expect;
712 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
713 }
714}
715
ffb4593c
NT
716/*
717 * S_force_word
718 * When the lexer knows the next thing is a word (for instance, it has
719 * just seen -> and it knows that the next char is a word char, then
720 * it calls S_force_word to stick the next word into the PL_next lookahead.
721 *
722 * Arguments:
b1b65b59 723 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
724 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
725 * int check_keyword : if true, Perl checks to make sure the word isn't
726 * a keyword (do this if the word is a label, e.g. goto FOO)
727 * int allow_pack : if true, : characters will also be allowed (require,
728 * use, etc. do this)
9cbb5ea2 729 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
730 */
731
76e3520e 732STATIC char *
cea2e8a9 733S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 734{
463ee0b2
LW
735 register char *s;
736 STRLEN len;
4e553d73 737
463ee0b2
LW
738 start = skipspace(start);
739 s = start;
7e2040f0 740 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 741 (allow_pack && *s == ':') ||
15f0808c 742 (allow_initial_tick && *s == '\'') )
a0d0e21e 743 {
3280af22
NIS
744 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
745 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
746 return start;
747 if (token == METHOD) {
748 s = skipspace(s);
749 if (*s == '(')
3280af22 750 PL_expect = XTERM;
463ee0b2 751 else {
3280af22 752 PL_expect = XOPERATOR;
463ee0b2 753 }
79072805 754 }
3280af22
NIS
755 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
756 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
757 force_next(token);
758 }
759 return s;
760}
761
ffb4593c
NT
762/*
763 * S_force_ident
9cbb5ea2 764 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
765 * text only contains the "foo" portion. The first argument is a pointer
766 * to the "foo", and the second argument is the type symbol to prefix.
767 * Forces the next token to be a "WORD".
9cbb5ea2 768 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
769 */
770
76e3520e 771STATIC void
cea2e8a9 772S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
773{
774 if (s && *s) {
11343788 775 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 776 PL_nextval[PL_nexttoke].opval = o;
79072805 777 force_next(WORD);
748a9306 778 if (kind) {
11343788 779 o->op_private = OPpCONST_ENTERED;
55497cff
PP
780 /* XXX see note in pp_entereval() for why we forgo typo
781 warnings if the symbol must be introduced in an eval.
782 GSAR 96-10-12 */
3280af22 783 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
784 kind == '$' ? SVt_PV :
785 kind == '@' ? SVt_PVAV :
786 kind == '%' ? SVt_PVHV :
787 SVt_PVGV
788 );
748a9306 789 }
79072805
LW
790 }
791}
792
1571675a
GS
793NV
794Perl_str_to_version(pTHX_ SV *sv)
795{
796 NV retval = 0.0;
797 NV nshift = 1.0;
798 STRLEN len;
799 char *start = SvPVx(sv,len);
3aa33fe5 800 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
801 char *end = start + len;
802 while (start < end) {
ba210ebe 803 STRLEN skip;
1571675a
GS
804 UV n;
805 if (utf)
dcad2880 806 n = utf8_to_uv((U8*)start, len, &skip, 0);
1571675a
GS
807 else {
808 n = *(U8*)start;
809 skip = 1;
810 }
811 retval += ((NV)n)/nshift;
812 start += skip;
813 nshift *= 1000;
814 }
815 return retval;
816}
817
4e553d73 818/*
ffb4593c
NT
819 * S_force_version
820 * Forces the next token to be a version number.
821 */
822
76e3520e 823STATIC char *
cea2e8a9 824S_force_version(pTHX_ char *s)
89bfa8cd
PP
825{
826 OP *version = Nullop;
44dcb63b 827 char *d;
89bfa8cd
PP
828
829 s = skipspace(s);
830
44dcb63b 831 d = s;
dd629d5b 832 if (*d == 'v')
44dcb63b 833 d++;
44dcb63b 834 if (isDIGIT(*d)) {
a7cb1f99 835 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 836 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 837 SV *ver;
b73d6f50 838 s = scan_num(s, &yylval);
89bfa8cd 839 version = yylval.opval;
dd629d5b
GS
840 ver = cSVOPx(version)->op_sv;
841 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 842 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
843 SvNVX(ver) = str_to_version(ver);
844 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 845 }
89bfa8cd
PP
846 }
847 }
848
849 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 850 PL_nextval[PL_nexttoke].opval = version;
4e553d73 851 force_next(WORD);
89bfa8cd
PP
852
853 return (s);
854}
855
ffb4593c
NT
856/*
857 * S_tokeq
858 * Tokenize a quoted string passed in as an SV. It finds the next
859 * chunk, up to end of string or a backslash. It may make a new
860 * SV containing that chunk (if HINT_NEW_STRING is on). It also
861 * turns \\ into \.
862 */
863
76e3520e 864STATIC SV *
cea2e8a9 865S_tokeq(pTHX_ SV *sv)
79072805
LW
866{
867 register char *s;
868 register char *send;
869 register char *d;
b3ac6de7
IZ
870 STRLEN len = 0;
871 SV *pv = sv;
79072805
LW
872
873 if (!SvLEN(sv))
b3ac6de7 874 goto finish;
79072805 875
a0d0e21e 876 s = SvPV_force(sv, len);
21a311ee 877 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 878 goto finish;
463ee0b2 879 send = s + len;
79072805
LW
880 while (s < send && *s != '\\')
881 s++;
882 if (s == send)
b3ac6de7 883 goto finish;
79072805 884 d = s;
3280af22 885 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 886 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
887 while (s < send) {
888 if (*s == '\\') {
a0d0e21e 889 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
890 s++; /* all that, just for this */
891 }
892 *d++ = *s++;
893 }
894 *d = '\0';
463ee0b2 895 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 896 finish:
3280af22 897 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 898 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
899 return sv;
900}
901
ffb4593c
NT
902/*
903 * Now come three functions related to double-quote context,
904 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
905 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
906 * interact with PL_lex_state, and create fake ( ... ) argument lists
907 * to handle functions and concatenation.
908 * They assume that whoever calls them will be setting up a fake
909 * join call, because each subthing puts a ',' after it. This lets
910 * "lower \luPpEr"
911 * become
912 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
913 *
914 * (I'm not sure whether the spurious commas at the end of lcfirst's
915 * arguments and join's arguments are created or not).
916 */
917
918/*
919 * S_sublex_start
920 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
921 *
922 * Pattern matching will set PL_lex_op to the pattern-matching op to
923 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
924 *
925 * OP_CONST and OP_READLINE are easy--just make the new op and return.
926 *
927 * Everything else becomes a FUNC.
928 *
929 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
930 * had an OP_CONST or OP_READLINE). This just sets us up for a
931 * call to S_sublex_push().
932 */
933
76e3520e 934STATIC I32
cea2e8a9 935S_sublex_start(pTHX)
79072805
LW
936{
937 register I32 op_type = yylval.ival;
79072805
LW
938
939 if (op_type == OP_NULL) {
3280af22
NIS
940 yylval.opval = PL_lex_op;
941 PL_lex_op = Nullop;
79072805
LW
942 return THING;
943 }
944 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 945 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
946
947 if (SvTYPE(sv) == SVt_PVIV) {
948 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
949 STRLEN len;
950 char *p;
951 SV *nsv;
952
953 p = SvPV(sv, len);
79cb57f6 954 nsv = newSVpvn(p, len);
01ec43d0
GS
955 if (SvUTF8(sv))
956 SvUTF8_on(nsv);
b3ac6de7
IZ
957 SvREFCNT_dec(sv);
958 sv = nsv;
4e553d73 959 }
b3ac6de7 960 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 961 PL_lex_stuff = Nullsv;
79072805
LW
962 return THING;
963 }
964
3280af22
NIS
965 PL_sublex_info.super_state = PL_lex_state;
966 PL_sublex_info.sub_inwhat = op_type;
967 PL_sublex_info.sub_op = PL_lex_op;
968 PL_lex_state = LEX_INTERPPUSH;
55497cff 969
3280af22
NIS
970 PL_expect = XTERM;
971 if (PL_lex_op) {
972 yylval.opval = PL_lex_op;
973 PL_lex_op = Nullop;
55497cff
PP
974 return PMFUNC;
975 }
976 else
977 return FUNC;
978}
979
ffb4593c
NT
980/*
981 * S_sublex_push
982 * Create a new scope to save the lexing state. The scope will be
983 * ended in S_sublex_done. Returns a '(', starting the function arguments
984 * to the uc, lc, etc. found before.
985 * Sets PL_lex_state to LEX_INTERPCONCAT.
986 */
987
76e3520e 988STATIC I32
cea2e8a9 989S_sublex_push(pTHX)
55497cff 990{
f46d017c 991 ENTER;
55497cff 992
3280af22
NIS
993 PL_lex_state = PL_sublex_info.super_state;
994 SAVEI32(PL_lex_dojoin);
995 SAVEI32(PL_lex_brackets);
3280af22
NIS
996 SAVEI32(PL_lex_casemods);
997 SAVEI32(PL_lex_starts);
998 SAVEI32(PL_lex_state);
7766f137 999 SAVEVPTR(PL_lex_inpat);
3280af22 1000 SAVEI32(PL_lex_inwhat);
57843af0 1001 SAVECOPLINE(PL_curcop);
3280af22
NIS
1002 SAVEPPTR(PL_bufptr);
1003 SAVEPPTR(PL_oldbufptr);
1004 SAVEPPTR(PL_oldoldbufptr);
1005 SAVEPPTR(PL_linestart);
1006 SAVESPTR(PL_linestr);
1007 SAVEPPTR(PL_lex_brackstack);
1008 SAVEPPTR(PL_lex_casestack);
1009
1010 PL_linestr = PL_lex_stuff;
1011 PL_lex_stuff = Nullsv;
1012
9cbb5ea2
GS
1013 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1014 = SvPVX(PL_linestr);
3280af22
NIS
1015 PL_bufend += SvCUR(PL_linestr);
1016 SAVEFREESV(PL_linestr);
1017
1018 PL_lex_dojoin = FALSE;
1019 PL_lex_brackets = 0;
3280af22
NIS
1020 New(899, PL_lex_brackstack, 120, char);
1021 New(899, PL_lex_casestack, 12, char);
1022 SAVEFREEPV(PL_lex_brackstack);
1023 SAVEFREEPV(PL_lex_casestack);
1024 PL_lex_casemods = 0;
1025 *PL_lex_casestack = '\0';
1026 PL_lex_starts = 0;
1027 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1028 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1029
1030 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1031 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1032 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1033 else
3280af22 1034 PL_lex_inpat = Nullop;
79072805 1035
55497cff 1036 return '(';
79072805
LW
1037}
1038
ffb4593c
NT
1039/*
1040 * S_sublex_done
1041 * Restores lexer state after a S_sublex_push.
1042 */
1043
76e3520e 1044STATIC I32
cea2e8a9 1045S_sublex_done(pTHX)
79072805 1046{
3280af22
NIS
1047 if (!PL_lex_starts++) {
1048 PL_expect = XOPERATOR;
79cb57f6 1049 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1050 return THING;
1051 }
1052
3280af22
NIS
1053 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1054 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1055 return yylex();
79072805
LW
1056 }
1057
ffb4593c 1058 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1059 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1060 PL_linestr = PL_lex_repl;
1061 PL_lex_inpat = 0;
1062 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1063 PL_bufend += SvCUR(PL_linestr);
1064 SAVEFREESV(PL_linestr);
1065 PL_lex_dojoin = FALSE;
1066 PL_lex_brackets = 0;
3280af22
NIS
1067 PL_lex_casemods = 0;
1068 *PL_lex_casestack = '\0';
1069 PL_lex_starts = 0;
25da4f38 1070 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1071 PL_lex_state = LEX_INTERPNORMAL;
1072 PL_lex_starts++;
e9fa98b2
HS
1073 /* we don't clear PL_lex_repl here, so that we can check later
1074 whether this is an evalled subst; that means we rely on the
1075 logic to ensure sublex_done() is called again only via the
1076 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1077 }
e9fa98b2 1078 else {
3280af22 1079 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1080 PL_lex_repl = Nullsv;
1081 }
79072805 1082 return ',';
ffed7fef
LW
1083 }
1084 else {
f46d017c 1085 LEAVE;
3280af22
NIS
1086 PL_bufend = SvPVX(PL_linestr);
1087 PL_bufend += SvCUR(PL_linestr);
1088 PL_expect = XOPERATOR;
09bef843 1089 PL_sublex_info.sub_inwhat = 0;
79072805 1090 return ')';
ffed7fef
LW
1091 }
1092}
1093
02aa26ce
NT
1094/*
1095 scan_const
1096
1097 Extracts a pattern, double-quoted string, or transliteration. This
1098 is terrifying code.
1099
3280af22
NIS
1100 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1101 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1102 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1103
9b599b2a
GS
1104 Returns a pointer to the character scanned up to. Iff this is
1105 advanced from the start pointer supplied (ie if anything was
1106 successfully parsed), will leave an OP for the substring scanned
1107 in yylval. Caller must intuit reason for not parsing further
1108 by looking at the next characters herself.
1109
02aa26ce
NT
1110 In patterns:
1111 backslashes:
1112 double-quoted style: \r and \n
1113 regexp special ones: \D \s
1114 constants: \x3
1115 backrefs: \1 (deprecated in substitution replacements)
1116 case and quoting: \U \Q \E
1117 stops on @ and $, but not for $ as tail anchor
1118
1119 In transliterations:
1120 characters are VERY literal, except for - not at the start or end
1121 of the string, which indicates a range. scan_const expands the
1122 range to the full set of intermediate characters.
1123
1124 In double-quoted strings:
1125 backslashes:
1126 double-quoted style: \r and \n
1127 constants: \x3
1128 backrefs: \1 (deprecated)
1129 case and quoting: \U \Q \E
1130 stops on @ and $
1131
1132 scan_const does *not* construct ops to handle interpolated strings.
1133 It stops processing as soon as it finds an embedded $ or @ variable
1134 and leaves it to the caller to work out what's going on.
1135
1136 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1137
1138 $ in pattern could be $foo or could be tail anchor. Assumption:
1139 it's a tail anchor if $ is the last thing in the string, or if it's
1140 followed by one of ")| \n\t"
1141
1142 \1 (backreferences) are turned into $1
1143
1144 The structure of the code is
1145 while (there's a character to process) {
1146 handle transliteration ranges
1147 skip regexp comments
1148 skip # initiated comments in //x patterns
1149 check for embedded @foo
1150 check for embedded scalars
1151 if (backslash) {
1152 leave intact backslashes from leave (below)
1153 deprecate \1 in strings and sub replacements
1154 handle string-changing backslashes \l \U \Q \E, etc.
1155 switch (what was escaped) {
1156 handle - in a transliteration (becomes a literal -)
1157 handle \132 octal characters
1158 handle 0x15 hex characters
1159 handle \cV (control V)
1160 handle printf backslashes (\f, \r, \n, etc)
1161 } (end switch)
1162 } (end if backslash)
1163 } (end while character to read)
4e553d73 1164
02aa26ce
NT
1165*/
1166
76e3520e 1167STATIC char *
cea2e8a9 1168S_scan_const(pTHX_ char *start)
79072805 1169{
3280af22 1170 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1171 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1172 register char *s = start; /* start of the constant */
1173 register char *d = SvPVX(sv); /* destination for copies */
1174 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1175 bool didrange = FALSE; /* did we just finish a range? */
89491803 1176 bool has_utf8 = FALSE; /* embedded \x{} */
012bcf8d
GS
1177 UV uv;
1178
ac2262e3 1179 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1180 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1181 : UTF;
89491803 1182 I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1183 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1184 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1185 : UTF;
dff6d3cd 1186 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1187 PL_lex_inpat
4a2d328f 1188 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1189 : "";
79072805
LW
1190
1191 while (s < send || dorange) {
02aa26ce 1192 /* get transliterations out of the way (they're most literal) */
3280af22 1193 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1194 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1195 if (dorange) {
02aa26ce 1196 I32 i; /* current expanded character */
8ada0baa 1197 I32 min; /* first character in range */
02aa26ce
NT
1198 I32 max; /* last character in range */
1199
1200 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1201 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1202 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1203 d -= 2; /* eat the first char and the - */
1204
8ada0baa
JH
1205 min = (U8)*d; /* first char in range */
1206 max = (U8)d[1]; /* last char in range */
1207
c2e66d9e 1208 if (min > max) {
01ec43d0
GS
1209 Perl_croak(aTHX_
1210 "Invalid [] range \"%c-%c\" in transliteration operator",
d2560b70 1211 (char)min, (char)max);
c2e66d9e
GS
1212 }
1213
8ada0baa
JH
1214#ifndef ASCIIish
1215 if ((isLOWER(min) && isLOWER(max)) ||
1216 (isUPPER(min) && isUPPER(max))) {
1217 if (isLOWER(min)) {
1218 for (i = min; i <= max; i++)
1219 if (isLOWER(i))
1220 *d++ = i;
1221 } else {
1222 for (i = min; i <= max; i++)
1223 if (isUPPER(i))
1224 *d++ = i;
1225 }
1226 }
1227 else
1228#endif
1229 for (i = min; i <= max; i++)
1230 *d++ = i;
02aa26ce
NT
1231
1232 /* mark the range as done, and continue */
79072805 1233 dorange = FALSE;
01ec43d0 1234 didrange = TRUE;
79072805 1235 continue;
4e553d73 1236 }
02aa26ce
NT
1237
1238 /* range begins (ignore - as first or last char) */
79072805 1239 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1240 if (didrange) {
1fafa243 1241 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1242 }
a0ed51b3 1243 if (utf) {
a176fa2a 1244 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1245 s++;
1246 continue;
1247 }
79072805
LW
1248 dorange = TRUE;
1249 s++;
01ec43d0
GS
1250 }
1251 else {
1252 didrange = FALSE;
1253 }
79072805 1254 }
02aa26ce
NT
1255
1256 /* if we get here, we're not doing a transliteration */
1257
0f5d15d6
IZ
1258 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1259 except for the last char, which will be done separately. */
3280af22 1260 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1261 if (s[2] == '#') {
1262 while (s < send && *s != ')')
1263 *d++ = *s++;
155aba94
GS
1264 }
1265 else if (s[2] == '{' /* This should match regcomp.c */
1266 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1267 {
cc6b7395 1268 I32 count = 1;
0f5d15d6 1269 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1270 char c;
1271
d9f97599
GS
1272 while (count && (c = *regparse)) {
1273 if (c == '\\' && regparse[1])
1274 regparse++;
4e553d73 1275 else if (c == '{')
cc6b7395 1276 count++;
4e553d73 1277 else if (c == '}')
cc6b7395 1278 count--;
d9f97599 1279 regparse++;
cc6b7395 1280 }
5bdf89e7
IZ
1281 if (*regparse != ')') {
1282 regparse--; /* Leave one char for continuation. */
cc6b7395 1283 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1284 }
0f5d15d6 1285 while (s < regparse)
cc6b7395
IZ
1286 *d++ = *s++;
1287 }
748a9306 1288 }
02aa26ce
NT
1289
1290 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1291 else if (*s == '#' && PL_lex_inpat &&
1292 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1293 while (s+1 < send && *s != '\n')
1294 *d++ = *s++;
1295 }
02aa26ce 1296
5d1d4326
JH
1297 /* check for embedded arrays
1298 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1299 */
7e2040f0 1300 else if (*s == '@' && s[1]
5d1d4326 1301 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1302 break;
02aa26ce
NT
1303
1304 /* check for embedded scalars. only stop if we're sure it's a
1305 variable.
1306 */
79072805 1307 else if (*s == '$') {
3280af22 1308 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1309 break;
c277df42 1310 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1311 break; /* in regexp, $ might be tail anchor */
1312 }
02aa26ce
NT
1313
1314 /* backslashes */
79072805 1315 if (*s == '\\' && s+1 < send) {
7948272d
NIS
1316 bool to_be_utf8 = FALSE;
1317
79072805 1318 s++;
02aa26ce
NT
1319
1320 /* some backslashes we leave behind */
c9f97d15 1321 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1322 *d++ = '\\';
1323 *d++ = *s++;
1324 continue;
1325 }
02aa26ce
NT
1326
1327 /* deprecate \1 in strings and substitution replacements */
3280af22 1328 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1329 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1330 {
599cee73 1331 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1332 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1333 *--s = '$';
1334 break;
1335 }
02aa26ce
NT
1336
1337 /* string-change backslash escapes */
3280af22 1338 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1339 --s;
1340 break;
1341 }
02aa26ce
NT
1342
1343 /* if we get here, it's either a quoted -, or a digit */
79072805 1344 switch (*s) {
02aa26ce
NT
1345
1346 /* quoted - in transliterations */
79072805 1347 case '-':
3280af22 1348 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1349 *d++ = *s++;
1350 continue;
1351 }
1352 /* FALL THROUGH */
1353 default:
11b8faa4 1354 {
7e84c16c 1355 if (ckWARN(WARN_MISC) && isALNUM(*s))
4e553d73 1356 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1357 "Unrecognized escape \\%c passed through",
1358 *s);
1359 /* default action is to copy the quoted character */
1360 *d++ = *s++;
1361 continue;
1362 }
02aa26ce
NT
1363
1364 /* \132 indicates an octal constant */
79072805
LW
1365 case '0': case '1': case '2': case '3':
1366 case '4': case '5': case '6': case '7':
ba210ebe
JH
1367 {
1368 STRLEN len = 0; /* disallow underscores */
1369 uv = (UV)scan_oct(s, 3, &len);
1370 s += len;
1371 }
012bcf8d 1372 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1373
1374 /* \x24 indicates a hex constant */
79072805 1375 case 'x':
a0ed51b3
LW
1376 ++s;
1377 if (*s == '{') {
1378 char* e = strchr(s, '}');
adaeee49 1379 if (!e) {
a0ed51b3 1380 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1381 e = s;
1382 }
89491803 1383 else {
ba210ebe
JH
1384 STRLEN len = 1; /* allow underscores */
1385 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
9b877dbb
IH
1386 if (PL_hints & HINT_UTF8)
1387 to_be_utf8 = TRUE;
ba210ebe
JH
1388 }
1389 s = e + 1;
a0ed51b3
LW
1390 }
1391 else {
ba210ebe
JH
1392 {
1393 STRLEN len = 0; /* disallow underscores */
1394 uv = (UV)scan_hex(s, 2, &len);
1395 s += len;
1396 }
012bcf8d
GS
1397 }
1398
1399 NUM_ESCAPE_INSERT:
1400 /* Insert oct or hex escaped character.
301d3d20
JH
1401 * There will always enough room in sv since such
1402 * escapes will be longer than any UT-F8 sequence
1403 * they can end up as. */
ba7cea30
JH
1404
1405 /* This spot is wrong for EBCDIC. Characters like
1406 * the lowercase letters and digits are >127 in EBCDIC,
1407 * so here they would need to be mapped to the Unicode
1408 * repertoire. --jhi */
1409
7948272d
NIS
1410 if (uv > 127) {
1411 if (!has_utf8 && (to_be_utf8 || uv > 255)) {
301d3d20
JH
1412 /* Might need to recode whatever we have
1413 * accumulated so far if it contains any
1414 * hibit chars.
1415 *
1416 * (Can't we keep track of that and avoid
1417 * this rescan? --jhi)
012bcf8d
GS
1418 */
1419 int hicount = 0;
1420 char *c;
301d3d20 1421
012bcf8d 1422 for (c = SvPVX(sv); c < d; c++) {
fd400ab9 1423 if (UTF8_IS_CONTINUED(*c))
012bcf8d
GS
1424 hicount++;
1425 }
1426 if (hicount) {
1427 char *old_pvx = SvPVX(sv);
1428 char *src, *dst;
301d3d20
JH
1429
1430 d = SvGROW(sv,
1431 SvCUR(sv) + hicount + 1) +
1432 (d - old_pvx);
012bcf8d
GS
1433
1434 src = d - 1;
1435 d += hicount;
1436 dst = d - 1;
1437
1438 while (src < dst) {
fd400ab9 1439 if (UTF8_IS_CONTINUED(*src)) {
9b877dbb
IH
1440 *dst-- = UTF8_EIGHT_BIT_LO(*src);
1441 *dst-- = UTF8_EIGHT_BIT_HI(*src--);
012bcf8d
GS
1442 }
1443 else {
1444 *dst-- = *src--;
1445 }
1446 }
1447 }
1448 }
1449
9b877dbb 1450 if (to_be_utf8 || has_utf8 || uv > 255) {
012bcf8d 1451 d = (char*)uv_to_utf8((U8*)d, uv);
4e553d73 1452 has_utf8 = TRUE;
012bcf8d 1453 }
a0ed51b3 1454 else {
012bcf8d 1455 *d++ = (char)uv;
a0ed51b3 1456 }
012bcf8d
GS
1457 }
1458 else {
1459 *d++ = (char)uv;
a0ed51b3 1460 }
79072805 1461 continue;
02aa26ce 1462
4a2d328f
IZ
1463 /* \N{latin small letter a} is a named character */
1464 case 'N':
423cee85
JH
1465 ++s;
1466 if (*s == '{') {
1467 char* e = strchr(s, '}');
155aba94 1468 SV *res;
423cee85
JH
1469 STRLEN len;
1470 char *str;
4e553d73 1471
423cee85 1472 if (!e) {
5777a3f7 1473 yyerror("Missing right brace on \\N{}");
423cee85
JH
1474 e = s - 1;
1475 goto cont_scan;
1476 }
1477 res = newSVpvn(s + 1, e - s - 1);
4e553d73 1478 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1479 res, Nullsv, "\\N{...}" );
423cee85 1480 str = SvPV(res,len);
89491803 1481 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1482 char *ostart = SvPVX(sv);
1483 SvCUR_set(sv, d - ostart);
1484 SvPOK_on(sv);
e4f3eed8 1485 *d = '\0';
f08d6ad9 1486 sv_utf8_upgrade(sv);
d2f449dd
SB
1487 /* this just broke our allocation above... */
1488 SvGROW(sv, send - start);
f08d6ad9 1489 d = SvPVX(sv) + SvCUR(sv);
89491803 1490 has_utf8 = TRUE;
f08d6ad9 1491 }
423cee85
JH
1492 if (len > e - s + 4) {
1493 char *odest = SvPVX(sv);
1494
1495 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1496 d = SvPVX(sv) + (d - odest);
1497 }
1498 Copy(str, d, len, char);
1499 d += len;
1500 SvREFCNT_dec(res);
1501 cont_scan:
1502 s = e + 1;
1503 }
1504 else
5777a3f7 1505 yyerror("Missing braces on \\N{}");
423cee85
JH
1506 continue;
1507
02aa26ce 1508 /* \c is a control character */
79072805
LW
1509 case 'c':
1510 s++;
9d116dd7
JH
1511#ifdef EBCDIC
1512 *d = *s++;
1513 if (isLOWER(*d))
1514 *d = toUPPER(*d);
4e553d73 1515 *d = toCTRL(*d);
774a9426 1516 d++;
9d116dd7 1517#else
ba210ebe
JH
1518 {
1519 U8 c = *s++;
1520 *d++ = toCTRL(c);
1521 }
9d116dd7 1522#endif
79072805 1523 continue;
02aa26ce
NT
1524
1525 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1526 case 'b':
1527 *d++ = '\b';
1528 break;
1529 case 'n':
1530 *d++ = '\n';
1531 break;
1532 case 'r':
1533 *d++ = '\r';
1534 break;
1535 case 'f':
1536 *d++ = '\f';
1537 break;
1538 case 't':
1539 *d++ = '\t';
1540 break;
34a3fe2a
PP
1541#ifdef EBCDIC
1542 case 'e':
1543 *d++ = '\047'; /* CP 1047 */
1544 break;
1545 case 'a':
1546 *d++ = '\057'; /* CP 1047 */
1547 break;
1548#else
79072805
LW
1549 case 'e':
1550 *d++ = '\033';
1551 break;
1552 case 'a':
1553 *d++ = '\007';
1554 break;
34a3fe2a 1555#endif
02aa26ce
NT
1556 } /* end switch */
1557
79072805
LW
1558 s++;
1559 continue;
02aa26ce
NT
1560 } /* end if (backslash) */
1561
a5a960be
RI
1562 /* (now in tr/// code again) */
1563
fd400ab9 1564 if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
a5a960be
RI
1565 STRLEN len = (STRLEN) -1;
1566 UV uv;
1567 if (this_utf8) {
a0dbb045 1568 uv = utf8_to_uv((U8*)s, send - s, &len, 0);
a5a960be
RI
1569 }
1570 if (len == (STRLEN)-1) {
1571 /* Illegal UTF8 (a high-bit byte), make it valid. */
1572 char *old_pvx = SvPVX(sv);
1573 /* need space for one extra char (NOTE: SvCUR() not set here) */
1574 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
1575 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1576 }
1577 else {
1578 while (len--)
1579 *d++ = *s++;
1580 }
1581 has_utf8 = TRUE;
1582 continue;
1583 }
1584
79072805 1585 *d++ = *s++;
02aa26ce
NT
1586 } /* while loop to process each character */
1587
1588 /* terminate the string and set up the sv */
79072805 1589 *d = '\0';
463ee0b2 1590 SvCUR_set(sv, d - SvPVX(sv));
79072805 1591 SvPOK_on(sv);
89491803 1592 if (has_utf8)
7e2040f0 1593 SvUTF8_on(sv);
79072805 1594
02aa26ce 1595 /* shrink the sv if we allocated more than we used */
79072805
LW
1596 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1597 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1598 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1599 }
02aa26ce 1600
9b599b2a 1601 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1602 if (s > PL_bufptr) {
1603 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1604 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1605 sv, Nullsv,
4e553d73 1606 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1607 ? "tr"
3280af22 1608 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1609 ? "s"
1610 : "qq")));
79072805 1611 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1612 } else
8990e307 1613 SvREFCNT_dec(sv);
79072805
LW
1614 return s;
1615}
1616
ffb4593c
NT
1617/* S_intuit_more
1618 * Returns TRUE if there's more to the expression (e.g., a subscript),
1619 * FALSE otherwise.
ffb4593c
NT
1620 *
1621 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1622 *
1623 * ->[ and ->{ return TRUE
1624 * { and [ outside a pattern are always subscripts, so return TRUE
1625 * if we're outside a pattern and it's not { or [, then return FALSE
1626 * if we're in a pattern and the first char is a {
1627 * {4,5} (any digits around the comma) returns FALSE
1628 * if we're in a pattern and the first char is a [
1629 * [] returns FALSE
1630 * [SOMETHING] has a funky algorithm to decide whether it's a
1631 * character class or not. It has to deal with things like
1632 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1633 * anything else returns TRUE
1634 */
1635
9cbb5ea2
GS
1636/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1637
76e3520e 1638STATIC int
cea2e8a9 1639S_intuit_more(pTHX_ register char *s)
79072805 1640{
3280af22 1641 if (PL_lex_brackets)
79072805
LW
1642 return TRUE;
1643 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1644 return TRUE;
1645 if (*s != '{' && *s != '[')
1646 return FALSE;
3280af22 1647 if (!PL_lex_inpat)
79072805
LW
1648 return TRUE;
1649
1650 /* In a pattern, so maybe we have {n,m}. */
1651 if (*s == '{') {
1652 s++;
1653 if (!isDIGIT(*s))
1654 return TRUE;
1655 while (isDIGIT(*s))
1656 s++;
1657 if (*s == ',')
1658 s++;
1659 while (isDIGIT(*s))
1660 s++;
1661 if (*s == '}')
1662 return FALSE;
1663 return TRUE;
1664
1665 }
1666
1667 /* On the other hand, maybe we have a character class */
1668
1669 s++;
1670 if (*s == ']' || *s == '^')
1671 return FALSE;
1672 else {
ffb4593c 1673 /* this is terrifying, and it works */
79072805
LW
1674 int weight = 2; /* let's weigh the evidence */
1675 char seen[256];
f27ffc4a 1676 unsigned char un_char = 255, last_un_char;
93a17b20 1677 char *send = strchr(s,']');
3280af22 1678 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1679
1680 if (!send) /* has to be an expression */
1681 return TRUE;
1682
1683 Zero(seen,256,char);
1684 if (*s == '$')
1685 weight -= 3;
1686 else if (isDIGIT(*s)) {
1687 if (s[1] != ']') {
1688 if (isDIGIT(s[1]) && s[2] == ']')
1689 weight -= 10;
1690 }
1691 else
1692 weight -= 100;
1693 }
1694 for (; s < send; s++) {
1695 last_un_char = un_char;
1696 un_char = (unsigned char)*s;
1697 switch (*s) {
1698 case '@':
1699 case '&':
1700 case '$':
1701 weight -= seen[un_char] * 10;
7e2040f0 1702 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1703 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1704 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1705 weight -= 100;
1706 else
1707 weight -= 10;
1708 }
1709 else if (*s == '$' && s[1] &&
93a17b20
LW
1710 strchr("[#!%*<>()-=",s[1])) {
1711 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1712 weight -= 10;
1713 else
1714 weight -= 1;
1715 }
1716 break;
1717 case '\\':
1718 un_char = 254;
1719 if (s[1]) {
93a17b20 1720 if (strchr("wds]",s[1]))
79072805
LW
1721 weight += 100;
1722 else if (seen['\''] || seen['"'])
1723 weight += 1;
93a17b20 1724 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1725 weight += 40;
1726 else if (isDIGIT(s[1])) {
1727 weight += 40;
1728 while (s[1] && isDIGIT(s[1]))
1729 s++;
1730 }
1731 }
1732 else
1733 weight += 100;
1734 break;
1735 case '-':
1736 if (s[1] == '\\')
1737 weight += 50;
93a17b20 1738 if (strchr("aA01! ",last_un_char))
79072805 1739 weight += 30;
93a17b20 1740 if (strchr("zZ79~",s[1]))
79072805 1741 weight += 30;
f27ffc4a
GS
1742 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1743 weight -= 5; /* cope with negative subscript */
79072805
LW
1744 break;
1745 default:
93a17b20 1746 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1747 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1748 char *d = tmpbuf;
1749 while (isALPHA(*s))
1750 *d++ = *s++;
1751 *d = '\0';
1752 if (keyword(tmpbuf, d - tmpbuf))
1753 weight -= 150;
1754 }
1755 if (un_char == last_un_char + 1)
1756 weight += 5;
1757 weight -= seen[un_char];
1758 break;
1759 }
1760 seen[un_char]++;
1761 }
1762 if (weight >= 0) /* probably a character class */
1763 return FALSE;
1764 }
1765
1766 return TRUE;
1767}
ffed7fef 1768
ffb4593c
NT
1769/*
1770 * S_intuit_method
1771 *
1772 * Does all the checking to disambiguate
1773 * foo bar
1774 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1775 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1776 *
1777 * First argument is the stuff after the first token, e.g. "bar".
1778 *
1779 * Not a method if bar is a filehandle.
1780 * Not a method if foo is a subroutine prototyped to take a filehandle.
1781 * Not a method if it's really "Foo $bar"
1782 * Method if it's "foo $bar"
1783 * Not a method if it's really "print foo $bar"
1784 * Method if it's really "foo package::" (interpreted as package->foo)
1785 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1786 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1787 * =>
1788 */
1789
76e3520e 1790STATIC int
cea2e8a9 1791S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1792{
1793 char *s = start + (*start == '$');
3280af22 1794 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1795 STRLEN len;
1796 GV* indirgv;
1797
1798 if (gv) {
b6c543e3 1799 CV *cv;
a0d0e21e
LW
1800 if (GvIO(gv))
1801 return 0;
b6c543e3
IZ
1802 if ((cv = GvCVu(gv))) {
1803 char *proto = SvPVX(cv);
1804 if (proto) {
1805 if (*proto == ';')
1806 proto++;
1807 if (*proto == '*')
1808 return 0;
1809 }
1810 } else
a0d0e21e
LW
1811 gv = 0;
1812 }
8903cb82 1813 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1814 /* start is the beginning of the possible filehandle/object,
1815 * and s is the end of it
1816 * tmpbuf is a copy of it
1817 */
1818
a0d0e21e 1819 if (*start == '$') {
3280af22 1820 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1821 return 0;
1822 s = skipspace(s);
3280af22
NIS
1823 PL_bufptr = start;
1824 PL_expect = XREF;
a0d0e21e
LW
1825 return *s == '(' ? FUNCMETH : METHOD;
1826 }
1827 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1828 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1829 len -= 2;
1830 tmpbuf[len] = '\0';
1831 goto bare_package;
1832 }
1833 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1834 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1835 return 0;
1836 /* filehandle or package name makes it a method */
89bfa8cd 1837 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1838 s = skipspace(s);
3280af22 1839 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1840 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1841 bare_package:
3280af22 1842 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1843 newSVpvn(tmpbuf,len));
3280af22
NIS
1844 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1845 PL_expect = XTERM;
a0d0e21e 1846 force_next(WORD);
3280af22 1847 PL_bufptr = s;
a0d0e21e
LW
1848 return *s == '(' ? FUNCMETH : METHOD;
1849 }
1850 }
1851 return 0;
1852}
1853
ffb4593c
NT
1854/*
1855 * S_incl_perldb
1856 * Return a string of Perl code to load the debugger. If PERL5DB
1857 * is set, it will return the contents of that, otherwise a
1858 * compile-time require of perl5db.pl.
1859 */
1860
76e3520e 1861STATIC char*
cea2e8a9 1862S_incl_perldb(pTHX)
a0d0e21e 1863{
3280af22 1864 if (PL_perldb) {
76e3520e 1865 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1866
1867 if (pdb)
1868 return pdb;
61bb5906 1869 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1870 return "BEGIN { require 'perl5db.pl' }";
1871 }
1872 return "";
1873}
1874
1875
16d20bd9 1876/* Encoded script support. filter_add() effectively inserts a
4e553d73 1877 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1878 * Note that the filter function only applies to the current source file
1879 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1880 *
1881 * The datasv parameter (which may be NULL) can be used to pass
1882 * private data to this instance of the filter. The filter function
1883 * can recover the SV using the FILTER_DATA macro and use it to
1884 * store private buffers and state information.
1885 *
1886 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1887 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1888 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1889 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1890 * private use must be set using malloc'd pointers.
1891 */
16d20bd9
AD
1892
1893SV *
864dbfa3 1894Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1895{
f4c556ac
GS
1896 if (!funcp)
1897 return Nullsv;
1898
3280af22
NIS
1899 if (!PL_rsfp_filters)
1900 PL_rsfp_filters = newAV();
16d20bd9 1901 if (!datasv)
8c52afec 1902 datasv = NEWSV(255,0);
16d20bd9 1903 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1904 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1905 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1906 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1907 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1908 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1909 av_unshift(PL_rsfp_filters, 1);
1910 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1911 return(datasv);
1912}
4e553d73 1913
16d20bd9
AD
1914
1915/* Delete most recently added instance of this filter function. */
a0d0e21e 1916void
864dbfa3 1917Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1918{
e0c19803 1919 SV *datasv;
f4c556ac 1920 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1921 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1922 return;
1923 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1924 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1925 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1926 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1927 IoANY(datasv) = (void *)NULL;
3280af22 1928 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1929
16d20bd9
AD
1930 return;
1931 }
1932 /* we need to search for the correct entry and clear it */
cea2e8a9 1933 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1934}
1935
1936
1937/* Invoke the n'th filter function for the current rsfp. */
1938I32
864dbfa3 1939Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
1940
1941
8ac85365 1942 /* 0 = read one text line */
a0d0e21e 1943{
16d20bd9
AD
1944 filter_t funcp;
1945 SV *datasv = NULL;
e50aee73 1946
3280af22 1947 if (!PL_rsfp_filters)
16d20bd9 1948 return -1;
3280af22 1949 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1950 /* Provide a default input filter to make life easy. */
1951 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1952 DEBUG_P(PerlIO_printf(Perl_debug_log,
1953 "filter_read %d: from rsfp\n", idx));
4e553d73 1954 if (maxlen) {
16d20bd9
AD
1955 /* Want a block */
1956 int len ;
1957 int old_len = SvCUR(buf_sv) ;
1958
1959 /* ensure buf_sv is large enough */
1960 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1961 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1962 if (PerlIO_error(PL_rsfp))
37120919
AD
1963 return -1; /* error */
1964 else
1965 return 0 ; /* end of file */
1966 }
16d20bd9
AD
1967 SvCUR_set(buf_sv, old_len + len) ;
1968 } else {
1969 /* Want a line */
3280af22
NIS
1970 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1971 if (PerlIO_error(PL_rsfp))
37120919
AD
1972 return -1; /* error */
1973 else
1974 return 0 ; /* end of file */
1975 }
16d20bd9
AD
1976 }
1977 return SvCUR(buf_sv);
1978 }
1979 /* Skip this filter slot if filter has been deleted */
3280af22 1980 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1981 DEBUG_P(PerlIO_printf(Perl_debug_log,
1982 "filter_read %d: skipped (filter deleted)\n",
1983 idx));
16d20bd9
AD
1984 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1985 }
1986 /* Get function pointer hidden within datasv */
4755096e 1987 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
1988 DEBUG_P(PerlIO_printf(Perl_debug_log,
1989 "filter_read %d: via function %p (%s)\n",
1990 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1991 /* Call function. The function is expected to */
1992 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1993 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1994 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1995}
1996
76e3520e 1997STATIC char *
cea2e8a9 1998S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1999{
c39cd008 2000#ifdef PERL_CR_FILTER
3280af22 2001 if (!PL_rsfp_filters) {
c39cd008 2002 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2003 }
2004#endif
3280af22 2005 if (PL_rsfp_filters) {
16d20bd9 2006
55497cff
PP
2007 if (!append)
2008 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2009 if (FILTER_READ(0, sv, 0) > 0)
2010 return ( SvPVX(sv) ) ;
2011 else
2012 return Nullch ;
2013 }
9d116dd7 2014 else
fd049845 2015 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2016}
2017
01ec43d0
GS
2018STATIC HV *
2019S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2020{
2021 GV *gv;
2022
01ec43d0 2023 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2024 return PL_curstash;
2025
2026 if (len > 2 &&
2027 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2028 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2029 {
2030 return GvHV(gv); /* Foo:: */
def3634b
GS
2031 }
2032
2033 /* use constant CLASS => 'MyClass' */
2034 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2035 SV *sv;
2036 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2037 pkgname = SvPV_nolen(sv);
2038 }
2039 }
2040
2041 return gv_stashpv(pkgname, FALSE);
2042}
a0d0e21e 2043
748a9306
LW
2044#ifdef DEBUGGING
2045 static char* exp_name[] =
09bef843
SB
2046 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2047 "ATTRTERM", "TERMBLOCK"
2048 };
748a9306 2049#endif
463ee0b2 2050
02aa26ce
NT
2051/*
2052 yylex
2053
2054 Works out what to call the token just pulled out of the input
2055 stream. The yacc parser takes care of taking the ops we return and
2056 stitching them into a tree.
2057
2058 Returns:
2059 PRIVATEREF
2060
2061 Structure:
2062 if read an identifier
2063 if we're in a my declaration
2064 croak if they tried to say my($foo::bar)
2065 build the ops for a my() declaration
2066 if it's an access to a my() variable
2067 are we in a sort block?
2068 croak if my($a); $a <=> $b
2069 build ops for access to a my() variable
2070 if in a dq string, and they've said @foo and we can't find @foo
2071 croak
2072 build ops for a bareword
2073 if we already built the token before, use it.
2074*/
2075
dba4d153 2076#ifdef USE_PURE_BISON
bf4acbe4 2077#ifdef __SC__
dba4d153 2078#pragma segment Perl_yylex_r
bf4acbe4 2079#endif
864dbfa3 2080int
dba4d153 2081Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2082{
20141f0e
RI
2083 int r;
2084
6f202aea 2085 yyactlevel++;
20141f0e
RI
2086 yylval_pointer[yyactlevel] = lvalp;
2087 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2088 if (yyactlevel >= YYMAXLEVEL)
2089 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2090
dba4d153 2091 r = Perl_yylex(aTHX);
20141f0e 2092
d8ae6756
RI
2093 if (yyactlevel > 0)
2094 yyactlevel--;
20141f0e
RI
2095
2096 return r;
2097}
dba4d153 2098#endif
20141f0e 2099
dba4d153
JH
2100#ifdef __SC__
2101#pragma segment Perl_yylex
2102#endif
dba4d153 2103int
dba4d153 2104Perl_yylex(pTHX)
20141f0e 2105{
79072805 2106 register char *s;
378cc40b 2107 register char *d;
79072805 2108 register I32 tmp;
463ee0b2 2109 STRLEN len;
161b471a
NIS
2110 GV *gv = Nullgv;
2111 GV **gvp = 0;
aa7440fb 2112 bool bof = FALSE;
a687059c 2113
02aa26ce 2114 /* check if there's an identifier for us to look at */
3280af22 2115 if (PL_pending_ident) {
02aa26ce 2116 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2117 char pit = PL_pending_ident;
2118 PL_pending_ident = 0;
bbce6d69 2119
607df283
SC
2120 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2121 "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
2122
02aa26ce
NT
2123 /* if we're in a my(), we can't allow dynamics here.
2124 $foo'bar has already been turned into $foo::bar, so
2125 just check for colons.
2126
2127 if it's a legal name, the OP is a PADANY.
2128 */
3280af22 2129 if (PL_in_my) {
77ca0c92 2130 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2131 if (strchr(PL_tokenbuf,':'))
2132 yyerror(Perl_form(aTHX_ "No package name allowed for "
2133 "variable %s in \"our\"",
2134 PL_tokenbuf));
77ca0c92
LW
2135 tmp = pad_allocmy(PL_tokenbuf);
2136 }
2137 else {
2138 if (strchr(PL_tokenbuf,':'))
2139 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2140
77ca0c92
LW
2141 yylval.opval = newOP(OP_PADANY, 0);
2142 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2143 return PRIVATEREF;
2144 }
bbce6d69
PP
2145 }
2146
4e553d73 2147 /*
02aa26ce
NT
2148 build the ops for accesses to a my() variable.
2149
2150 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2151 then used in a comparison. This catches most, but not
2152 all cases. For instance, it catches
2153 sort { my($a); $a <=> $b }
2154 but not
2155 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2156 (although why you'd do that is anyone's guess).
2157 */
2158
3280af22 2159 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2160#ifdef USE_THREADS
54b9620d 2161 /* Check for single character per-thread SVs */
3280af22
NIS
2162 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2163 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2164 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2165 {
2faa37cc 2166 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2167 yylval.opval->op_targ = tmp;
2168 return PRIVATEREF;
2169 }
2170#endif /* USE_THREADS */
3280af22 2171 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2172 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2173 /* might be an "our" variable" */
f472eb5c 2174 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2175 /* build ops for a bareword */
f472eb5c
GS
2176 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2177 sv_catpvn(sym, "::", 2);
2178 sv_catpv(sym, PL_tokenbuf+1);
2179 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2180 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2181 gv_fetchpv(SvPVX(sym),
77ca0c92 2182 (PL_in_eval
f472eb5c
GS
2183 ? (GV_ADDMULTI | GV_ADDINEVAL)
2184 : TRUE
77ca0c92
LW
2185 ),
2186 ((PL_tokenbuf[0] == '$') ? SVt_PV
2187 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2188 : SVt_PVHV));
2189 return WORD;
2190 }
2191
02aa26ce 2192 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2193 if (PL_last_lop_op == OP_SORT &&
2194 PL_tokenbuf[0] == '$' &&
2195 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2196 && !PL_tokenbuf[2])
bbce6d69 2197 {
3280af22
NIS
2198 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2199 d < PL_bufend && *d != '\n';
a863c7d1
MB
2200 d++)
2201 {
2202 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2203 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2204 PL_tokenbuf);
a863c7d1 2205 }
bbce6d69
PP
2206 }
2207 }
bbce6d69 2208
a863c7d1
MB
2209 yylval.opval = newOP(OP_PADANY, 0);
2210 yylval.opval->op_targ = tmp;
2211 return PRIVATEREF;
2212 }
bbce6d69
PP
2213 }
2214
02aa26ce
NT
2215 /*
2216 Whine if they've said @foo in a doublequoted string,
2217 and @foo isn't a variable we can find in the symbol
2218 table.
2219 */
3280af22
NIS
2220 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2221 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2222 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2223 && ckWARN(WARN_AMBIGUOUS))
2224 {
2225 /* Downgraded from fatal to warning 20000522 mjd */
2226 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2227 "Possible unintended interpolation of %s in string",
2228 PL_tokenbuf);
2229 }
bbce6d69
PP
2230 }
2231
02aa26ce 2232 /* build ops for a bareword */
3280af22 2233 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2234 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2235 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2236 ((PL_tokenbuf[0] == '$') ? SVt_PV
2237 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2238 : SVt_PVHV));
2239 return WORD;
2240 }
2241
02aa26ce
NT
2242 /* no identifier pending identification */
2243
3280af22 2244 switch (PL_lex_state) {
79072805
LW
2245#ifdef COMMENTARY
2246 case LEX_NORMAL: /* Some compilers will produce faster */
2247 case LEX_INTERPNORMAL: /* code if we comment these out. */
2248 break;
2249#endif
2250
09bef843 2251 /* when we've already built the next token, just pull it out of the queue */
79072805 2252 case LEX_KNOWNEXT:
3280af22
NIS
2253 PL_nexttoke--;
2254 yylval = PL_nextval[PL_nexttoke];
2255 if (!PL_nexttoke) {
2256 PL_lex_state = PL_lex_defer;
2257 PL_expect = PL_lex_expect;
2258 PL_lex_defer = LEX_NORMAL;
463ee0b2 2259 }
607df283 2260 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f
RB
2261 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2262 (IV)PL_nexttype[PL_nexttoke]); })
607df283 2263
3280af22 2264 return(PL_nexttype[PL_nexttoke]);
79072805 2265
02aa26ce 2266 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2267 when we get here, PL_bufptr is at the \
02aa26ce 2268 */
79072805
LW
2269 case LEX_INTERPCASEMOD:
2270#ifdef DEBUGGING
3280af22 2271 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2272 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2273#endif
02aa26ce 2274 /* handle \E or end of string */
3280af22 2275 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2276 char oldmod;
02aa26ce
NT
2277
2278 /* if at a \E */
3280af22
NIS
2279 if (PL_lex_casemods) {
2280 oldmod = PL_lex_casestack[--PL_lex_casemods];
2281 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2282
3280af22
NIS
2283 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2284 PL_bufptr += 2;
2285 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2286 }
79072805
LW
2287 return ')';
2288 }
3280af22
NIS
2289 if (PL_bufptr != PL_bufend)
2290 PL_bufptr += 2;
2291 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2292 return yylex();
79072805
LW
2293 }
2294 else {
607df283
SC
2295 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2296 "### Saw case modifier at '%s'\n", PL_bufptr); })
3280af22 2297 s = PL_bufptr + 1;
79072805
LW
2298 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2299 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2300 if (strchr("LU", *s) &&
3280af22 2301 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2302 {
3280af22 2303 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2304 return ')';
2305 }
3280af22
NIS
2306 if (PL_lex_casemods > 10) {
2307 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2308 if (newlb != PL_lex_casestack) {
a0d0e21e 2309 SAVEFREEPV(newlb);
3280af22 2310 PL_lex_casestack = newlb;
a0d0e21e
LW
2311 }
2312 }
3280af22
NIS
2313 PL_lex_casestack[PL_lex_casemods++] = *s;
2314 PL_lex_casestack[PL_lex_casemods] = '\0';
2315 PL_lex_state = LEX_INTERPCONCAT;
2316 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2317 force_next('(');
2318 if (*s == 'l')
3280af22 2319 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2320 else if (*s == 'u')
3280af22 2321 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2322 else if (*s == 'L')
3280af22 2323 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2324 else if (*s == 'U')
3280af22 2325 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2326 else if (*s == 'Q')
3280af22 2327 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2328 else
cea2e8a9 2329 Perl_croak(aTHX_ "panic: yylex");
3280af22 2330 PL_bufptr = s + 1;
79072805 2331 force_next(FUNC);
3280af22
NIS
2332 if (PL_lex_starts) {
2333 s = PL_bufptr;
2334 PL_lex_starts = 0;
79072805
LW
2335 Aop(OP_CONCAT);
2336 }
2337 else
cea2e8a9 2338 return yylex();
79072805
LW
2339 }
2340
55497cff
PP
2341 case LEX_INTERPPUSH:
2342 return sublex_push();
2343
79072805 2344 case LEX_INTERPSTART:
3280af22 2345 if (PL_bufptr == PL_bufend)
79072805 2346 return sublex_done();
607df283
SC
2347 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2348 "### Interpolated variable at '%s'\n", PL_bufptr); })
3280af22
NIS
2349 PL_expect = XTERM;
2350 PL_lex_dojoin = (*PL_bufptr == '@');
2351 PL_lex_state = LEX_INTERPNORMAL;
2352 if (PL_lex_dojoin) {
2353 PL_nextval[PL_nexttoke].ival = 0;
79072805 2354 force_next(',');
554b3eca 2355#ifdef USE_THREADS
533c011a
NIS
2356 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2357 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2358 force_next(PRIVATEREF);
2359#else
a0d0e21e 2360 force_ident("\"", '$');
554b3eca 2361#endif /* USE_THREADS */
3280af22 2362 PL_nextval[PL_nexttoke].ival = 0;
79072805 2363 force_next('$');
3280af22 2364 PL_nextval[PL_nexttoke].ival = 0;
79072805 2365 force_next('(');
3280af22 2366 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2367 force_next(FUNC);
2368 }
3280af22
NIS
2369 if (PL_lex_starts++) {
2370 s = PL_bufptr;
79072805
LW
2371 Aop(OP_CONCAT);
2372 }
cea2e8a9 2373 return yylex();
79072805
LW
2374
2375 case LEX_INTERPENDMAYBE:
3280af22
NIS
2376 if (intuit_more(PL_bufptr)) {
2377 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2378 break;
2379 }
2380 /* FALL THROUGH */
2381
2382 case LEX_INTERPEND:
3280af22
NIS
2383 if (PL_lex_dojoin) {
2384 PL_lex_dojoin = FALSE;
2385 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2386 return ')';
2387 }
43a16006 2388 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2389 && SvEVALED(PL_lex_repl))
43a16006 2390 {
e9fa98b2 2391 if (PL_bufptr != PL_bufend)
cea2e8a9 2392 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2393 PL_lex_repl = Nullsv;
2394 }
79072805
LW
2395 /* FALLTHROUGH */
2396 case LEX_INTERPCONCAT:
2397#ifdef DEBUGGING
3280af22 2398 if (PL_lex_brackets)
cea2e8a9 2399 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2400#endif
3280af22 2401 if (PL_bufptr == PL_bufend)
79072805
LW
2402 return sublex_done();
2403
3280af22
NIS
2404 if (SvIVX(PL_linestr) == '\'') {
2405 SV *sv = newSVsv(PL_linestr);
2406 if (!PL_lex_inpat)
76e3520e 2407 sv = tokeq(sv);
3280af22 2408 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2409 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2410 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2411 s = PL_bufend;
79072805
LW
2412 }
2413 else {
3280af22 2414 s = scan_const(PL_bufptr);
79072805 2415 if (*s == '\\')
3280af22 2416 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2417 else
3280af22 2418 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2419 }
2420
3280af22
NIS
2421 if (s != PL_bufptr) {
2422 PL_nextval[PL_nexttoke] = yylval;
2423 PL_expect = XTERM;
79072805 2424 force_next(THING);
3280af22 2425 if (PL_lex_starts++)
79072805
LW
2426 Aop(OP_CONCAT);
2427 else {
3280af22 2428 PL_bufptr = s;
cea2e8a9 2429 return yylex();
79072805
LW
2430 }
2431 }
2432
cea2e8a9 2433 return yylex();
a0d0e21e 2434 case LEX_FORMLINE:
3280af22
NIS
2435 PL_lex_state = LEX_NORMAL;
2436 s = scan_formline(PL_bufptr);
2437 if (!PL_lex_formbrack)
a0d0e21e
LW
2438 goto rightbracket;
2439 OPERATOR(';');
79072805
LW
2440 }
2441
3280af22
NIS
2442 s = PL_bufptr;
2443 PL_oldoldbufptr = PL_oldbufptr;
2444 PL_oldbufptr = s;
607df283 2445 DEBUG_T( {
bf49b057
GS
2446 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2447 exp_name[PL_expect], s);
79072805 2448 } )
463ee0b2
LW
2449
2450 retry:
378cc40b
LW
2451 switch (*s) {
2452 default:
7e2040f0 2453 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2454 goto keylookup;
cea2e8a9 2455 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2456 case 4:
2457 case 26:
2458 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2459 case 0:
3280af22
NIS
2460 if (!PL_rsfp) {
2461 PL_last_uni = 0;
2462 PL_last_lop = 0;
2463 if (PL_lex_brackets)
d98d5fff 2464 yyerror("Missing right curly or square bracket");
4e553d73 2465 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2466 "### Tokener got EOF\n");
2467 } )
79072805 2468 TOKEN(0);
463ee0b2 2469 }
3280af22 2470 if (s++ < PL_bufend)
a687059c 2471 goto retry; /* ignore stray nulls */
3280af22
NIS
2472 PL_last_uni = 0;
2473 PL_last_lop = 0;
2474 if (!PL_in_eval && !PL_preambled) {
2475 PL_preambled = TRUE;
2476 sv_setpv(PL_linestr,incl_perldb());
2477 if (SvCUR(PL_linestr))
2478 sv_catpv(PL_linestr,";");
2479 if (PL_preambleav){
2480 while(AvFILLp(PL_preambleav) >= 0) {
2481 SV *tmpsv = av_shift(PL_preambleav);
2482 sv_catsv(PL_linestr, tmpsv);
2483 sv_catpv(PL_linestr, ";");
91b7def8
PP
2484 sv_free(tmpsv);
2485 }
3280af22
NIS
2486 sv_free((SV*)PL_preambleav);
2487 PL_preambleav = NULL;
91b7def8 2488 }
3280af22
NIS
2489 if (PL_minus_n || PL_minus_p) {
2490 sv_catpv(PL_linestr, "LINE: while (<>) {");
2491 if (PL_minus_l)
2492 sv_catpv(PL_linestr,"chomp;");
2493 if (PL_minus_a) {
8fd239a7
CS
2494 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2495 if (gv)
2496 GvIMPORTED_AV_on(gv);
3280af22
NIS
2497 if (PL_minus_F) {
2498 if (strchr("/'\"", *PL_splitstr)
2499 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2500 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2501 else {
2502 char delim;
2503 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2504 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2505 delim = *s;
cea2e8a9 2506 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2507 "q" + (delim == '\''), delim);
3280af22 2508 for (s = PL_splitstr; *s; s++) {
54310121 2509 if (*s == '\\')
3280af22
NIS
2510 sv_catpvn(PL_linestr, "\\", 1);
2511 sv_catpvn(PL_linestr, s, 1);
54310121 2512 }
cea2e8a9 2513 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2514 }
2304df62
AD
2515 }
2516 else
3280af22 2517 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2518 }
79072805 2519 }
3280af22
NIS
2520 sv_catpv(PL_linestr, "\n");
2521 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2522 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2523 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2524 SV *sv = NEWSV(85,0);
2525
2526 sv_upgrade(sv, SVt_PVMG);
3280af22 2527 sv_setsv(sv,PL_linestr);
57843af0 2528 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2529 }
79072805 2530 goto retry;
a687059c 2531 }
e929a76b 2532 do {
aa7440fb 2533 bof = PL_rsfp ? TRUE : FALSE;
226017aa
DD
2534 if (bof) {
2535#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2536# ifdef __GNU_LIBRARY__
2537# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2538# define FTELL_FOR_PIPE_IS_BROKEN
2539# endif
e3f494f1
JH
2540# else
2541# ifdef __GLIBC__
2542# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2543# define FTELL_FOR_PIPE_IS_BROKEN
2544# endif
2545# endif
226017aa
DD
2546# endif
2547#endif
2548#ifdef FTELL_FOR_PIPE_IS_BROKEN
2549 /* This loses the possibility to detect the bof
2550 * situation on perl -P when the libc5 is being used.
2551 * Workaround? Maybe attach some extra state to PL_rsfp?
2552 */
2553 if (!PL_preprocess)
2554 bof = PerlIO_tell(PL_rsfp) == 0;
2555#else
2556 bof = PerlIO_tell(PL_rsfp) == 0;
2557#endif
2558 }
dea0fc0b
JH
2559 s = filter_gets(PL_linestr, PL_rsfp, 0);
2560 if (s == Nullch) {
e929a76b 2561 fake_eof:
3280af22
NIS
2562 if (PL_rsfp) {
2563 if (PL_preprocess && !PL_in_eval)
2564 (void)PerlProc_pclose(PL_rsfp);
2565 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2566 PerlIO_clearerr(PL_rsfp);
395c3793 2567 else
3280af22
NIS
2568 (void)PerlIO_close(PL_rsfp);
2569 PL_rsfp = Nullfp;
4a9ae47a 2570 PL_doextract = FALSE;
395c3793 2571 }
3280af22
NIS
2572 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2573 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2574 sv_catpv(PL_linestr,";}");
2575 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2576 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2577 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2578 goto retry;
2579 }
3280af22
NIS
2580 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2581 sv_setpv(PL_linestr,"");
79072805 2582 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
dea0fc0b
JH
2583 } else if (bof) {
2584 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2585 s = swallow_bom((U8*)s);
378cc40b 2586 }
3280af22 2587 if (PL_doextract) {
a0d0e21e 2588 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2589 PL_doextract = FALSE;
a0d0e21e
LW
2590
2591 /* Incest with pod. */
2592 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2593 sv_setpv(PL_linestr, "");
2594 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2595 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2596 PL_doextract = FALSE;
a0d0e21e 2597 }
4e553d73 2598 }
463ee0b2 2599 incline(s);
3280af22
NIS
2600 } while (PL_doextract);
2601 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2602 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2603 SV *sv = NEWSV(85,0);
a687059c 2604
93a17b20 2605 sv_upgrade(sv, SVt_PVMG);
3280af22 2606 sv_setsv(sv,PL_linestr);
57843af0 2607 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2608 }
3280af22 2609 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2610 if (CopLINE(PL_curcop) == 1) {
3280af22 2611 while (s < PL_bufend && isSPACE(*s))
79072805 2612 s++;
a0d0e21e 2613 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2614 s++;
44a8e56a 2615 d = Nullch;
3280af22 2616 if (!PL_in_eval) {
44a8e56a
PP
2617 if (*s == '#' && *(s+1) == '!')
2618 d = s + 2;
2619#ifdef ALTERNATE_SHEBANG
2620 else {
2621 static char as[] = ALTERNATE_SHEBANG;
2622 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2623 d = s + (sizeof(as) - 1);
2624 }
2625#endif /* ALTERNATE_SHEBANG */
2626 }
2627 if (d) {
b8378b72 2628 char *ipath;
774d564b 2629 char *ipathend;
b8378b72 2630
774d564b 2631 while (isSPACE(*d))
b8378b72
CS
2632 d++;
2633 ipath = d;
774d564b
PP
2634 while (*d && !isSPACE(*d))
2635 d++;
2636 ipathend = d;
2637
2638#ifdef ARG_ZERO_IS_SCRIPT
2639 if (ipathend > ipath) {
2640 /*
2641 * HP-UX (at least) sets argv[0] to the script name,
2642 * which makes $^X incorrect. And Digital UNIX and Linux,
2643 * at least, set argv[0] to the basename of the Perl
2644 * interpreter. So, having found "#!", we'll set it right.
2645 */
2646 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2647 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2648 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2649 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2650 SvSETMAGIC(x);
2651 }
774d564b 2652 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2653 }
774d564b 2654#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2655
2656 /*
2657 * Look for options.
2658 */
748a9306 2659 d = instr(s,"perl -");
84e30d1a 2660 if (!d) {
748a9306 2661 d = instr(s,"perl");
84e30d1a
GS
2662#if defined(DOSISH)
2663 /* avoid getting into infinite loops when shebang
2664 * line contains "Perl" rather than "perl" */
2665 if (!d) {
2666 for (d = ipathend-4; d >= ipath; --d) {
2667 if ((*d == 'p' || *d == 'P')
2668 && !ibcmp(d, "perl", 4))
2669 {
2670 break;
2671 }
2672 }
2673 if (d < ipath)
2674 d = Nullch;
2675 }
2676#endif
2677 }
44a8e56a
PP
2678#ifdef ALTERNATE_SHEBANG
2679 /*
2680 * If the ALTERNATE_SHEBANG on this system starts with a
2681 * character that can be part of a Perl expression, then if
2682 * we see it but not "perl", we're probably looking at the
2683 * start of Perl code, not a request to hand off to some
2684 * other interpreter. Similarly, if "perl" is there, but
2685 * not in the first 'word' of the line, we assume the line
2686 * contains the start of the Perl program.
44a8e56a
PP
2687 */
2688 if (d && *s != '#') {
774d564b 2689 char *c = ipath;
44a8e56a
PP
2690 while (*c && !strchr("; \t\r\n\f\v#", *c))
2691 c++;
2692 if (c < d)
2693 d = Nullch; /* "perl" not in first word; ignore */
2694 else
2695 *s = '#'; /* Don't try to parse shebang line */
2696 }
774d564b 2697#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2698#ifndef MACOS_TRADITIONAL
748a9306 2699 if (!d &&
44a8e56a 2700 *s == '#' &&
774d564b 2701 ipathend > ipath &&
3280af22 2702 !PL_minus_c &&
748a9306 2703 !instr(s,"indir") &&
3280af22 2704 instr(PL_origargv[0],"perl"))
748a9306 2705 {
9f68db38 2706 char **newargv;
9f68db38 2707
774d564b
PP
2708 *ipathend = '\0';
2709 s = ipathend + 1;
3280af22 2710 while (s < PL_bufend && isSPACE(*s))
9f68db38 2711 s++;
3280af22
NIS
2712 if (s < PL_bufend) {
2713 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2714 newargv[1] = s;
3280af22 2715 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2716 s++;
2717 *s = '\0';
3280af22 2718 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2719 }
2720 else
3280af22 2721 newargv = PL_origargv;
774d564b 2722 newargv[0] = ipath;
b4748376 2723 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2724 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2725 }
bf4acbe4 2726#endif
748a9306 2727 if (d) {
3280af22
NIS
2728 U32 oldpdb = PL_perldb;
2729 bool oldn = PL_minus_n;
2730 bool oldp = PL_minus_p;
748a9306
LW
2731
2732 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2733 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2734
2735 if (*d++ == '-') {
8cc95fdb
PP
2736 do {
2737 if (*d == 'M' || *d == 'm') {
2738 char *m = d;
2739 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2740 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2741 (int)(d - m), m);
2742 }
2743 d = moreswitches(d);
2744 } while (d);
155aba94
GS
2745 if ((PERLDB_LINE && !oldpdb) ||
2746 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2747 /* if we have already added "LINE: while (<>) {",
2748 we must not do it again */
748a9306 2749 {
3280af22
NIS
2750 sv_setpv(PL_linestr, "");
2751 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2752 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2753 PL_preambled = FALSE;
84902520 2754 if (PERLDB_LINE)
3280af22 2755 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2756 goto retry;
2757 }
a0d0e21e 2758 }
79072805 2759 }
9f68db38 2760 }
79072805 2761 }
3280af22
NIS
2762 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2763 PL_bufptr = s;
2764 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2765 return yylex();
ae986130 2766 }
378cc40b 2767 goto retry;
4fdae800 2768 case '\r':
6a27c188 2769#ifdef PERL_STRICT_CR
cea2e8a9 2770 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2771 Perl_croak(aTHX_
cc507455 2772 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2773#endif
4fdae800 2774 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2775#ifdef MACOS_TRADITIONAL
2776 case '\312':
2777#endif
378cc40b
LW
2778 s++;
2779 goto retry;
378cc40b 2780 case '#':
e929a76b 2781 case '\n':
3280af22 2782 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2783 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2784 /* handle eval qq[#line 1 "foo"\n ...] */
2785 CopLINE_dec(PL_curcop);
2786 incline(s);
2787 }
3280af22 2788 d = PL_bufend;
a687059c 2789 while (s < d && *s != '\n')
378cc40b 2790 s++;
0f85fab0 2791 if (s < d)
378cc40b 2792 s++;
463ee0b2 2793 incline(s);
3280af22
NIS
2794 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2795 PL_bufptr = s;
2796 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2797 return yylex();
a687059c 2798 }
378cc40b 2799 }
a687059c 2800 else {
378cc40b 2801 *s = '\0';
3280af22 2802 PL_bufend = s;
a687059c 2803 }
378cc40b
LW
2804 goto retry;
2805 case '-':
79072805 2806 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2807 I32 ftst = 0;
2808
378cc40b 2809 s++;
3280af22 2810 PL_bufptr = s;
748a9306
LW
2811 tmp = *s++;
2812
bf4acbe4 2813 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2814 s++;
2815
2816 if (strnEQ(s,"=>",2)) {
3280af22 2817 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2818 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2819 "### Saw unary minus before =>, forcing word '%s'\n", s);
2820 } )
748a9306
LW
2821 OPERATOR('-'); /* unary minus */
2822 }
3280af22 2823 PL_last_uni = PL_oldbufptr;
748a9306 2824 switch (tmp) {
e5edeb50
JH
2825 case 'r': ftst = OP_FTEREAD; break;
2826 case 'w': ftst = OP_FTEWRITE; break;
2827 case 'x': ftst = OP_FTEEXEC; break;
2828 case 'o': ftst = OP_FTEOWNED; break;
2829 case 'R': ftst = OP_FTRREAD; break;
2830 case 'W': ftst = OP_FTRWRITE; break;
2831 case 'X': ftst = OP_FTREXEC; break;
2832 case 'O': ftst = OP_FTROWNED; break;
2833 case 'e': ftst = OP_FTIS; break;
2834 case 'z': ftst = OP_FTZERO; break;
2835 case 's': ftst = OP_FTSIZE; break;
2836 case 'f': ftst = OP_FTFILE; break;
2837 case 'd': ftst = OP_FTDIR; break;
2838 case 'l': ftst = OP_FTLINK; break;
2839 case 'p': ftst = OP_FTPIPE; break;
2840 case 'S': ftst = OP_FTSOCK; break;
2841 case 'u': ftst = OP_FTSUID; break;
2842 case 'g': ftst = OP_FTSGID; break;
2843 case 'k': ftst = OP_FTSVTX; break;
2844 case 'b': ftst = OP_FTBLK; break;
2845 case 'c': ftst = OP_FTCHR; break;
2846 case 't': ftst = OP_FTTTY; break;
2847 case 'T': ftst = OP_FTTEXT; break;
2848 case 'B': ftst = OP_FTBINARY; break;
2849 case 'M': case 'A': case 'C':
2850 gv_fetchpv("\024",TRUE, SVt_PV);
2851 switch (tmp) {
2852 case 'M': ftst = OP_FTMTIME; break;
2853 case 'A': ftst = OP_FTATIME; break;
2854 case 'C': ftst = OP_FTCTIME; break;
2855 default: break;
2856 }
2857 break;
378cc40b 2858 default:
378cc40b
LW
2859 break;
2860 }
e5edeb50
JH
2861 if (ftst) {
2862 PL_last_lop_op = ftst;
4e553d73 2863 DEBUG_T( { PerlIO_printf(Perl_debug_log,
e5edeb50
JH
2864 "### Saw file test %c\n", ftst);
2865 } )
e5edeb50
JH
2866 FTST(ftst);
2867 }
2868 else {
2869 /* Assume it was a minus followed by a one-letter named
2870 * subroutine call (or a -bareword), then. */
95c31fe3
JH
2871 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2872 "### %c looked like a file test but was not\n", ftst);
2873 } )
e5edeb50
JH
2874 s -= 2;
2875 }
378cc40b 2876 }
a687059c
LW
2877 tmp = *s++;
2878 if (*s == tmp) {
2879 s++;
3280af22 2880 if (PL_expect == XOPERATOR)
79072805
LW
2881 TERM(POSTDEC);
2882 else
2883 OPERATOR(PREDEC);
2884 }
2885 else if (*s == '>') {
2886 s++;
2887 s = skipspace(s);
7e2040f0 2888 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2889 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2890 TOKEN(ARROW);
79072805 2891 }
748a9306
LW
2892 else if (*s == '$')
2893 OPERATOR(ARROW);
463ee0b2 2894 else
748a9306 2895 TERM(ARROW);
a687059c 2896 }
3280af22 2897 if (PL_expect == XOPERATOR)
79072805
LW
2898 Aop(OP_SUBTRACT);
2899 else {
3280af22 2900 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2901 check_uni();
79072805 2902 OPERATOR('-'); /* unary minus */
2f3197b3 2903 }
79072805 2904
378cc40b 2905 case '+':
a687059c
LW
2906 tmp = *s++;
2907 if (*s == tmp) {
378cc40b 2908 s++;
3280af22 2909 if (PL_expect == XOPERATOR)
79072805
LW
2910 TERM(POSTINC);
2911 else
2912 OPERATOR(PREINC);
378cc40b 2913 }
3280af22 2914 if (PL_expect == XOPERATOR)
79072805
LW
2915 Aop(OP_ADD);
2916 else {
3280af22 2917 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2918 check_uni();
a687059c 2919 OPERATOR('+');
2f3197b3 2920 }
a687059c 2921
378cc40b 2922 case '*':
3280af22
NIS
2923 if (PL_expect != XOPERATOR) {
2924 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2925 PL_expect = XOPERATOR;
2926 force_ident(PL_tokenbuf, '*');
2927 if (!*PL_tokenbuf)
a0d0e21e 2928 PREREF('*');
79072805 2929 TERM('*');
a687059c 2930 }
79072805
LW
2931 s++;
2932 if (*s == '*') {
a687059c 2933 s++;
79072805 2934 PWop(OP_POW);
a687059c 2935 }
79072805
LW
2936 Mop(OP_MULTIPLY);
2937
378cc40b 2938 case '%':
3280af22 2939 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2940 ++s;
2941 Mop(OP_MODULO);
a687059c 2942 }
3280af22
NIS
2943 PL_tokenbuf[0] = '%';
2944 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2945 if (!PL_tokenbuf[1]) {
2946 if (s == PL_bufend)
bbce6d69
PP
2947 yyerror("Final % should be \\% or %name");
2948 PREREF('%');
a687059c 2949 }
3280af22 2950 PL_pending_ident = '%';
bbce6d69 2951 TERM('%');
a687059c 2952
378cc40b 2953 case '^':
79072805 2954 s++;
a0d0e21e 2955 BOop(OP_BIT_XOR);
79072805 2956 case '[':
3280af22 2957 PL_lex_brackets++;
79072805 2958 /* FALL THROUGH */
378cc40b 2959 case '~':
378cc40b 2960 case ',':
378cc40b
LW
2961 tmp = *s++;
2962 OPERATOR(tmp);
a0d0e21e
LW
2963 case ':':
2964 if (s[1] == ':') {
2965 len = 0;
2966 goto just_a_word;
2967 }
2968 s++;
09bef843
SB
2969 switch (PL_expect) {
2970 OP *attrs;
2971 case XOPERATOR:
2972 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2973 break;
2974 PL_bufptr = s; /* update in case we back off */
2975 goto grabattrs;
2976 case XATTRBLOCK:
2977 PL_expect = XBLOCK;
2978 goto grabattrs;
2979 case XATTRTERM:
2980 PL_expect = XTERMBLOCK;
2981 grabattrs:
2982 s = skipspace(s);
2983 attrs = Nullop;
7e2040f0 2984 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2985 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2986 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2987 if (tmp < 0) tmp = -tmp;
2988 switch (tmp) {
2989 case KEY_or:
2990 case KEY_and:
2991 case KEY_for:
2992 case KEY_unless:
2993 case KEY_if:
2994 case KEY_while:
2995 case KEY_until:
2996 goto got_attrs;
2997 default:
2998 break;
2999 }
3000 }
09bef843
SB
3001 if (*d == '(') {
3002 d = scan_str(d,TRUE,TRUE);
3003 if (!d) {
3004 if (PL_lex_stuff) {
3005 SvREFCNT_dec(PL_lex_stuff);
3006 PL_lex_stuff = Nullsv;
3007 }
3008 /* MUST advance bufptr here to avoid bogus
3009 "at end of line" context messages from yyerror().
3010 */
3011 PL_bufptr = s + len;
3012 yyerror("Unterminated attribute parameter in attribute list");
3013 if (attrs)
3014 op_free(attrs);
3015 return 0; /* EOF indicator */
3016 }
3017 }
3018 if (PL_lex_stuff) {
3019 SV *sv = newSVpvn(s, len);
3020 sv_catsv(sv, PL_lex_stuff);
3021 attrs = append_elem(OP_LIST, attrs,
3022 newSVOP(OP_CONST, 0, sv));
3023 SvREFCNT_dec(PL_lex_stuff);
3024 PL_lex_stuff = Nullsv;
3025 }
3026 else {
3027 attrs = append_elem(OP_LIST, attrs,
3028 newSVOP(OP_CONST, 0,
3029 newSVpvn(s, len)));
3030 }
3031 s = skipspace(d);
0120eecf 3032 if (*s == ':' && s[1] != ':')
09bef843 3033 s = skipspace(s+1);
0120eecf
GS
3034 else if (s == d)
3035 break; /* require real whitespace or :'s */
09bef843 3036 }
f9829d6b
GS
3037 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3038 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3039 char q = ((*s == '\'') ? '"' : '\'');
3040 /* If here for an expression, and parsed no attrs, back off. */
3041 if (tmp == '=' && !attrs) {
3042 s = PL_bufptr;
3043 break;
3044 }
3045 /* MUST advance bufptr here to avoid bogus "at end of line"
3046 context messages from yyerror().
3047 */
3048 PL_bufptr = s;
3049 if (!*s)
3050 yyerror("Unterminated attribute list");
3051 else
3052 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3053 q, *s, q));
3054 if (attrs)
3055 op_free(attrs);
3056 OPERATOR(':');
3057 }
f9829d6b 3058 got_attrs:
09bef843
SB
3059 if (attrs) {
3060 PL_nextval[PL_nexttoke].opval = attrs;
3061 force_next(THING);
3062 }
3063 TOKEN(COLONATTR);
3064 }
a0d0e21e 3065 OPERATOR(':');
8990e307
LW
3066 case '(':
3067 s++;
3280af22
NIS
3068 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3069 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3070 else
3280af22 3071 PL_expect = XTERM;
a0d0e21e 3072 TOKEN('(');
378cc40b 3073 case ';':
f4dd75d9 3074 CLINE;
378cc40b
LW
3075 tmp = *s++;
3076 OPERATOR(tmp);
3077 case ')':
378cc40b 3078 tmp = *s++;
16d20bd9
AD
3079 s = skipspace(s);
3080 if (*s == '{')
3081 PREBLOCK(tmp);
378cc40b 3082 TERM(tmp);
79072805
LW
3083 case ']':
3084 s++;
3280af22 3085 if (PL_lex_brackets <= 0)
d98d5fff 3086 yyerror("Unmatched right square bracket");
463ee0b2 3087 else
3280af22
NIS
3088 --PL_lex_brackets;
3089 if (PL_lex_state == LEX_INTERPNORMAL) {
3090 if (PL_lex_brackets == 0) {
a0d0e21e 3091 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3092 PL_lex_state = LEX_INTERPEND;
79072805
LW
3093 }
3094 }
4633a7c4 3095 TERM(']');
79072805
LW
3096 case '{':
3097 leftbracket:
79072805 3098 s++;
3280af22
NIS
3099 if (PL_lex_brackets > 100) {
3100 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3101 if (newlb != PL_lex_brackstack) {
8990e307 3102 SAVEFREEPV(newlb);
3280af22 3103 PL_lex_brackstack = newlb;
8990e307
LW
3104 }
3105 }
3280af22 3106 switch (PL_expect) {
a0d0e21e 3107 case XTERM:
3280af22 3108 if (PL_lex_formbrack) {
a0d0e21e
LW
3109 s--;
3110 PRETERMBLOCK(DO);
3111 }
3280af22
NIS
3112 if (PL_oldoldbufptr == PL_last_lop)
3113 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3114 else
3280af22 3115 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3116 OPERATOR(HASHBRACK);
a0d0e21e 3117 case XOPERATOR:
bf4acbe4 3118 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3119 s++;
44a8e56a 3120 d = s;
3280af22
NIS
3121 PL_tokenbuf[0] = '\0';
3122 if (d < PL_bufend && *d == '-') {
3123 PL_tokenbuf[0] = '-';
44a8e56a 3124 d++;
bf4acbe4 3125 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3126 d++;
3127 }
7e2040f0 3128 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3129 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3130 FALSE, &len);
bf4acbe4 3131 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3132 d++;
3133 if (*d == '}') {
3280af22 3134 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3135 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
7948272d
NIS
3136 if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, 0) &&
3137 PL_nextval[PL_nexttoke-1].opval)
3138 SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke-1].opval)->op_sv);
44a8e56a
PP
3139 if (minus)
3140 force_next('-');
748a9306
LW
3141 }
3142 }
3143 /* FALL THROUGH */
09bef843 3144 case XATTRBLOCK:
748a9306 3145 case XBLOCK:
3280af22
NIS
3146 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3147 PL_expect = XSTATE;
a0d0e21e 3148 break;
09bef843 3149 case XATTRTERM:
a0d0e21e 3150 case XTERMBLOCK:
3280af22
NIS
3151 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3152 PL_expect = XSTATE;
a0d0e21e
LW
3153 break;
3154 default: {
3155 char *t;
3280af22
NIS
3156 if (PL_oldoldbufptr == PL_last_lop)
3157 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3158 else
3280af22 3159 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3160 s = skipspace(s);
09ecc4b6 3161 if (*s == '}')
a0d0e21e 3162 OPERATOR(HASHBRACK);
b8a4b1be
GS
3163 /* This hack serves to disambiguate a pair of curlies
3164 * as being a block or an anon hash. Normally, expectation
3165 * determines that, but in cases where we're not in a
3166 * position to expect anything in particular (like inside
3167 * eval"") we have to resolve the ambiguity. This code
3168 * covers the case where the first term in the curlies is a
3169 * quoted string. Most other cases need to be explicitly
3170 * disambiguated by prepending a `+' before the opening
3171 * curly in order to force resolution as an anon hash.
3172 *
3173 * XXX should probably propagate the outer expectation
3174 * into eval"" to rely less on this hack, but that could
3175 * potentially break current behavior of eval"".
3176 * GSAR 97-07-21
3177 */
3178 t = s;
3179 if (*s == '\'' || *s == '"' || *s == '`') {
3180 /* common case: get past first string, handling escapes */
3280af22 3181 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3182 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3183 t++;
3184 t++;
a0d0e21e 3185 }
b8a4b1be 3186 else if (*s == 'q') {
3280af22 3187 if (++t < PL_bufend
b8a4b1be 3188 && (!isALNUM(*t)
3280af22 3189 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3190 && !isALNUM(*t))))
3191 {
b8a4b1be
GS
3192 char *tmps;
3193 char open, close, term;
3194 I32 brackets = 1;
3195
3280af22 3196 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3197 t++;
3198 term = *t;
3199 open = term;
3200 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3201 term = tmps[5];
3202 close = term;
3203 if (open == close)
3280af22
NIS
3204 for (t++; t < PL_bufend; t++) {
3205 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3206 t++;
6d07e5e9 3207 else if (*t == open)
b8a4b1be
GS
3208 break;
3209 }
3210 else
3280af22
NIS
3211 for (t++; t < PL_bufend; t++) {
3212 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3213 t++;
6d07e5e9 3214 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3215 break;
3216 else if (*t == open)
3217 brackets++;
3218 }
3219 }
3220 t++;
a0d0e21e 3221 }
7e2040f0 3222 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3223 t += UTF8SKIP(t);
7e2040f0 3224 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3225 t += UTF8SKIP(t);
a0d0e21e 3226 }
3280af22 3227 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3228 t++;
b8a4b1be
GS
3229 /* if comma follows first term, call it an anon hash */
3230 /* XXX it could be a comma expression with loop modifiers */
3280af22 3231 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3232 || (*t == '=' && t[1] == '>')))
a0d0e21e 3233 OPERATOR(HASHBRACK);
3280af22 3234 if (PL_expect == XREF)
4e4e412b 3235 PL_expect = XTERM;
a0d0e21e 3236 else {
3280af22
NIS
3237 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3238 PL_expect = XSTATE;
a0d0e21e 3239 }
8990e307 3240 }
a0d0e21e 3241 break;
463ee0b2 3242 }
57843af0 3243 yylval.ival = CopLINE(PL_curcop);
79072805 3244 if (isSPACE(*s) || *s == '#')
3280af22 3245 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3246 TOKEN('{');
378cc40b 3247 case '}':
79072805
LW
3248 rightbracket:
3249 s++;
3280af22 3250 if (PL_lex_brackets <= 0)
d98d5fff 3251 yyerror("Unmatched right curly bracket");
463ee0b2 3252 else
3280af22 3253 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3254 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3255 PL_lex_formbrack = 0;
3256 if (PL_lex_state == LEX_INTERPNORMAL) {
3257 if (PL_lex_brackets == 0) {
9059aa12
LW
3258 if (PL_expect & XFAKEBRACK) {
3259 PL_expect &= XENUMMASK;
3280af22
NIS
3260 PL_lex_state = LEX_INTERPEND;
3261 PL_bufptr = s;
cea2e8a9 3262 return yylex(); /* ignore fake brackets */
79072805 3263 }
fa83b5b6 3264 if (*s == '-' && s[1] == '>')
3280af22 3265 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3266 else if (*s != '[' && *s != '{')
3280af22 3267 PL_lex_state = LEX_INTERPEND;
79072805
LW
3268 }
3269 }
9059aa12
LW
3270 if (PL_expect & XFAKEBRACK) {
3271 PL_expect &= XENUMMASK;
3280af22 3272 PL_bufptr = s;
cea2e8a9 3273 return yylex(); /* ignore fake brackets */
748a9306 3274 }
79072805
LW
3275 force_next('}');
3276 TOKEN(';');
378cc40b
LW
3277 case '&':
3278 s++;
3279 tmp = *s++;
3280 if (tmp == '&')
a0d0e21e 3281 AOPERATOR(ANDAND);
378cc40b 3282 s--;
3280af22 3283 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3284 if (ckWARN(WARN_SEMICOLON)
3285 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3286 {
57843af0 3287 CopLINE_dec(PL_curcop);
cea2e8a9 3288 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3289 CopLINE_inc(PL_curcop);
463ee0b2 3290 }
79072805 3291 BAop(OP_BIT_AND);
463ee0b2 3292 }
79072805 3293
3280af22
NIS
3294 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3295 if (*PL_tokenbuf) {
3296 PL_expect = XOPERATOR;
3297 force_ident(PL_tokenbuf, '&');
463ee0b2 3298 }
79072805
LW
3299 else
3300 PREREF('&');
c07a80fd 3301 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3302 TERM('&');
3303
378cc40b
LW
3304 case '|':
3305 s++;
3306 tmp = *s++;
3307 if (tmp == '|')
a0d0e21e 3308 AOPERATOR(OROR);
378cc40b 3309 s--;
79072805 3310 BOop(OP_BIT_OR);
378cc40b
LW
3311 case '=':
3312 s++;
3313 tmp = *s++;
3314 if (tmp == '=')
79072805
LW
3315 Eop(OP_EQ);
3316 if (tmp == '>')
3317 OPERATOR(',');
378cc40b 3318 if (tmp == '~')
79072805 3319 PMop(OP_MATCH);
599cee73 3320 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3321 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3322 s--;
3280af22
NIS
3323 if (PL_expect == XSTATE && isALPHA(tmp) &&
3324 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3325 {
3280af22
NIS
3326 if (PL_in_eval && !PL_rsfp) {
3327 d = PL_bufend;
a5f75d66
AD
3328 while (s < d) {
3329 if (*s++ == '\n') {
3330 incline(s);
3331 if (strnEQ(s,"=cut",4)) {
3332 s = strchr(s,'\n');
3333 if (s)
3334 s++;
3335 else
3336 s = d;
3337 incline(s);
3338 goto retry;
3339 }
3340 }
3341 }
3342 goto retry;
3343 }
3280af22
NIS
3344 s = PL_bufend;
3345 PL_doextract = TRUE;
a0d0e21e
LW
3346 goto retry;
3347 }
3280af22 3348 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3349 char *t;
51882d45 3350#ifdef PERL_STRICT_CR
bf4acbe4 3351 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3352#else
bf4acbe4 3353 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3354#endif
a0d0e21e
LW
3355 if (*t == '\n' || *t == '#') {
3356 s--;
3280af22 3357 PL_expect = XBLOCK;
a0d0e21e
LW
3358 goto leftbracket;
3359 }
79072805 3360 }
a0d0e21e
LW
3361 yylval.ival = 0;
3362 OPERATOR(ASSIGNOP);
378cc40b
LW
3363 case '!':
3364 s++;
3365 tmp = *s++;
3366 if (tmp == '=')
79072805 3367 Eop(OP_NE);
378cc40b 3368 if (tmp == '~')
79072805 3369 PMop(OP_NOT);
378cc40b
LW
3370 s--;
3371 OPERATOR('!');
3372 case '<':
3280af22 3373 if (PL_expect != XOPERATOR) {
93a17b20 3374 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3375 check_uni();
79072805
LW
3376 if (s[1] == '<')
3377 s = scan_heredoc(s);
3378 else
3379 s = scan_inputsymbol(s);
3380 TERM(sublex_start());
378cc40b
LW
3381 }
3382 s++;
3383 tmp = *s++;
3384 if (tmp == '<')
79072805 3385 SHop(OP_LEFT_SHIFT);
395c3793
LW
3386 if (tmp == '=') {
3387 tmp = *s++;
3388 if (tmp == '>')
79072805 3389 Eop(OP_NCMP);
395c3793 3390 s--;
79072805 3391 Rop(OP_LE);
395c3793 3392 }
378cc40b 3393 s--;
79072805 3394 Rop(OP_LT);
378cc40b
LW
3395 case '>':
3396 s++;
3397 tmp = *s++;
3398 if (tmp == '>')
79072805 3399 SHop(OP_RIGHT_SHIFT);
378cc40b 3400 if (tmp == '=')
79072805 3401 Rop(OP_GE);
378cc40b 3402 s--;
79072805 3403 Rop(OP_GT);
378cc40b
LW
3404
3405 case '$':
bbce6d69
PP
3406 CLINE;
3407
3280af22
NIS
3408 if (PL_expect == XOPERATOR) {
3409 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3410 PL_expect = XTERM;
a0d0e21e 3411 depcom();
bbce6d69 3412 return ','; /* grandfather non-comma-format format */
a0d0e21e 3413 }
8990e307 3414 }
a0d0e21e 3415
7e2040f0 3416 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3417 PL_tokenbuf[0] = '@';
376b8730
SM
3418 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3419 sizeof PL_tokenbuf - 1, FALSE);
3420 if (PL_expect == XOPERATOR)
3421 no_op("Array length", s);
3280af22 3422 if (!PL_tokenbuf[1])
a0d0e21e 3423 PREREF(DOLSHARP);
3280af22
NIS
3424 PL_expect = XOPERATOR;
3425 PL_pending_ident = '#';
463ee0b2 3426 TOKEN(DOLSHARP);
79072805 3427 }
bbce6d69 3428
3280af22 3429 PL_tokenbuf[0] = '$';
376b8730
SM
3430 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3431 sizeof PL_tokenbuf - 1, FALSE);
3432 if (PL_expect == XOPERATOR)
3433 no_op("Scalar", s);
3280af22
NIS
3434 if (!PL_tokenbuf[1]) {
3435 if (s == PL_bufend)
bbce6d69
PP
3436 yyerror("Final $ should be \\$ or $name");
3437 PREREF('$');
8990e307 3438 }
a0d0e21e 3439
bbce6d69 3440 /* This kludge not intended to be bulletproof. */
3280af22 3441 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3442 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3443 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3444 yylval.opval->op_private = OPpCONST_ARYBASE;
3445 TERM(THING);
3446 }
3447
ff68c719 3448 d = s;
69d2bceb 3449 tmp = (I32)*s;
3280af22 3450 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3451 s = skipspace(s);
3452
3280af22 3453 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3454 char *t;
3455 if (*s == '[') {
3280af22 3456 PL_tokenbuf[0] = '@';
599cee73 3457 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3458 for(t = s + 1;
7e2040f0 3459 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3460 t++) ;
a0d0e21e 3461 if (*t++ == ',') {
3280af22
NIS
3462 PL_bufptr = skipspace(PL_bufptr);
3463 while (t < PL_bufend && *t != ']')
bbce6d69 3464 t++;
cea2e8a9 3465 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3466 "Multidimensional syntax %.*s not supported",
3467 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3468 }
3469 }
bbce6d69
PP
3470 }
3471 else if (*s == '{') {
3280af22 3472 PL_tokenbuf[0] = '%';
599cee73 3473 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3474 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3475 {
3280af22 3476 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3477 STRLEN len;
3478 for (t++; isSPACE(*t); t++) ;
7e2040f0 3479 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3480 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3481 for (; isSPACE(*t); t++) ;
864dbfa3 3482 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3483 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3484 "You need to quote \"%s\"", tmpbuf);
748a9306 3485 }
93a17b20
LW
3486 }
3487 }
2f3197b3 3488 }
bbce6d69 3489
3280af22 3490 PL_expect = XOPERATOR;
69d2bceb 3491 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3492 bool islop = (PL_last_lop == PL_oldoldbufptr);
3493 if (!islop || PL_last_lop_op == OP_GREPSTART)
3494 PL_expect = XOPERATOR;
bbce6d69 3495 else if (strchr("$@\"'`q", *s))
3280af22 3496 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3497 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3498 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3499 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3500 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3501 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3502 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3503 /* binary operators exclude handle interpretations */
3504 switch (tmp) {
3505 case -KEY_x:
3506 case -KEY_eq:
3507 case -KEY_ne:
3508 case -KEY_gt:
3509 case -KEY_lt:
3510 case -KEY_ge:
3511 case -KEY_le:
3512 case -KEY_cmp:
3513 break;
3514 default:
3280af22 3515 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3516 break;
3517 }
3518 }
68dc0745
PP
3519 else {
3520 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3521 if (gv && GvCVu(gv))
3280af22 3522 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3523 }
93a17b20 3524 }
bbce6d69 3525 else if (isDIGIT(*s))
3280af22 3526 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3527 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3528 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3529 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3530 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3531 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3532 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3533 }
3280af22 3534 PL_pending_ident = '$';
79072805 3535 TOKEN('$');
378cc40b
LW
3536
3537 case '@':
3280af22 3538 if (PL_expect == XOPERATOR)
bbce6d69 3539 no_op("Array", s);
3280af22
NIS
3540 PL_tokenbuf[0] = '@';
3541 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3542 if (!PL_tokenbuf[1]) {
3543 if (s == PL_bufend)
bbce6d69
PP
3544 yyerror("Final @ should be \\@ or @name");
3545 PREREF('@');
3546 }
3280af22 3547 if (PL_lex_state == LEX_NORMAL)
ff68c719 3548 s = skipspace(s);
3280af22 3549 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3550 if (*s == '{')
3280af22 3551 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3552
3553 /* Warn about @ where they meant $. */
599cee73 3554 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3555 if (*s == '[' || *s == '{') {
3556 char *t = s + 1;
7e2040f0 3557 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3558 t++;
3559 if (*t == '}' || *t == ']') {
3560 t++;
3280af22 3561 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3562 Perl_warner(aTHX_ WARN_SYNTAX,