This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
longstanding bug in parsing "require VERSION", could reallocate
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
51371543 31
9059aa12
LW
32#define XFAKEBRACK 128
33#define XENUMMASK 127
34
7e2040f0 35/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
36#define UTF (PL_hints & HINT_UTF8)
37
2b92dfce
GS
38/* In variables name $^X, these are the legal values for X.
39 * 1999-02-27 mjd-perl-patch@plover.com */
40#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
41
ffb4593c
NT
42/* LEX_* are values for PL_lex_state, the state of the lexer.
43 * They are arranged oddly so that the guard on the switch statement
79072805
LW
44 * can get by with a single comparison (if the compiler is smart enough).
45 */
46
fb73857a
PP
47/* #define LEX_NOTPARSING 11 is done in perl.h. */
48
55497cff
PP
49#define LEX_NORMAL 10
50#define LEX_INTERPNORMAL 9
51#define LEX_INTERPCASEMOD 8
52#define LEX_INTERPPUSH 7
53#define LEX_INTERPSTART 6
54#define LEX_INTERPEND 5
55#define LEX_INTERPENDMAYBE 4
56#define LEX_INTERPCONCAT 3
57#define LEX_INTERPCONST 2
58#define LEX_FORMLINE 1
59#define LEX_KNOWNEXT 0
79072805 60
395c3793
LW
61#ifdef I_FCNTL
62#include <fcntl.h>
63#endif
fe14fcc3
LW
64#ifdef I_SYS_FILE
65#include <sys/file.h>
66#endif
395c3793 67
a790bc05
PP
68/* XXX If this causes problems, set i_unistd=undef in the hint file. */
69#ifdef I_UNISTD
70# include <unistd.h> /* Needed for execv() */
71#endif
72
73
79072805
LW
74#ifdef ff_next
75#undef ff_next
d48672a2
LW
76#endif
77
a1a0e61e
TD
78#ifdef USE_PURE_BISON
79YYSTYPE* yylval_pointer = NULL;
80int* yychar_pointer = NULL;
22c35a8c
GS
81# undef yylval
82# undef yychar
e4bfbdd4
JH
83# define yylval (*yylval_pointer)
84# define yychar (*yychar_pointer)
85# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
86# undef yylex
87# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
88#endif
89
79072805 90#include "keywords.h"
fe14fcc3 91
ffb4593c
NT
92/* CLINE is a macro that ensures PL_copline has a sane value */
93
ae986130
LW
94#ifdef CLINE
95#undef CLINE
96#endif
57843af0 97#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 98
ffb4593c
NT
99/*
100 * Convenience functions to return different tokens and prime the
9cbb5ea2 101 * lexer for the next token. They all take an argument.
ffb4593c
NT
102 *
103 * TOKEN : generic token (used for '(', DOLSHARP, etc)
104 * OPERATOR : generic operator
105 * AOPERATOR : assignment operator
106 * PREBLOCK : beginning the block after an if, while, foreach, ...
107 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
108 * PREREF : *EXPR where EXPR is not a simple identifier
109 * TERM : expression term
110 * LOOPX : loop exiting command (goto, last, dump, etc)
111 * FTST : file test operator
112 * FUN0 : zero-argument function
2d2e263d 113 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
114 * BOop : bitwise or or xor
115 * BAop : bitwise and
116 * SHop : shift operator
117 * PWop : power operator
9cbb5ea2 118 * PMop : pattern-matching operator
ffb4593c
NT
119 * Aop : addition-level operator
120 * Mop : multiplication-level operator
121 * Eop : equality-testing operator
122 * Rop : relational operator <= != gt
123 *
124 * Also see LOP and lop() below.
125 */
126
3280af22
NIS
127#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
128#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
129#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
130#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
131#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
132#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
133#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
134#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
135#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
136#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
137#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
138#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
139#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
140#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
141#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
142#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
143#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
144#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
145#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
146#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 147
a687059c
LW
148/* This bit of chicanery makes a unary function followed by
149 * a parenthesis into a function with one argument, highest precedence.
150 */
2f3197b3 151#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
152 PL_expect = XTERM, \
153 PL_bufptr = s, \
154 PL_last_uni = PL_oldbufptr, \
155 PL_last_lop_op = f, \
a687059c
LW
156 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
157
79072805 158#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
159 PL_bufptr = s, \
160 PL_last_uni = PL_oldbufptr, \
79072805
LW
161 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
162
9f68db38 163/* grandfather return to old style */
3280af22 164#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 165
ffb4593c
NT
166/*
167 * S_ao
168 *
169 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
170 * into an OP_ANDASSIGN or OP_ORASSIGN
171 */
172
76e3520e 173STATIC int
cea2e8a9 174S_ao(pTHX_ int toketype)
a0d0e21e 175{
3280af22
NIS
176 if (*PL_bufptr == '=') {
177 PL_bufptr++;
a0d0e21e
LW
178 if (toketype == ANDAND)
179 yylval.ival = OP_ANDASSIGN;
180 else if (toketype == OROR)
181 yylval.ival = OP_ORASSIGN;
182 toketype = ASSIGNOP;
183 }
184 return toketype;
185}
186
ffb4593c
NT
187/*
188 * S_no_op
189 * When Perl expects an operator and finds something else, no_op
190 * prints the warning. It always prints "<something> found where
191 * operator expected. It prints "Missing semicolon on previous line?"
192 * if the surprise occurs at the start of the line. "do you need to
193 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
194 * where the compiler doesn't know if foo is a method call or a function.
195 * It prints "Missing operator before end of line" if there's nothing
196 * after the missing operator, or "... before <...>" if there is something
197 * after the missing operator.
198 */
199
76e3520e 200STATIC void
cea2e8a9 201S_no_op(pTHX_ char *what, char *s)
463ee0b2 202{
3280af22
NIS
203 char *oldbp = PL_bufptr;
204 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 205
1189a94a
GS
206 if (!s)
207 s = oldbp;
208 else {
209 assert(s >= oldbp);
210 PL_bufptr = s;
211 }
cea2e8a9 212 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 213 if (is_first)
cea2e8a9 214 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 215 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 216 char *t;
7e2040f0 217 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 218 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 219 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 220 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
221 }
222 else
cea2e8a9 223 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 224 PL_bufptr = oldbp;
8990e307
LW
225}
226
ffb4593c
NT
227/*
228 * S_missingterm
229 * Complain about missing quote/regexp/heredoc terminator.
230 * If it's called with (char *)NULL then it cauterizes the line buffer.
231 * If we're in a delimited string and the delimiter is a control
232 * character, it's reformatted into a two-char sequence like ^C.
233 * This is fatal.
234 */
235
76e3520e 236STATIC void
cea2e8a9 237S_missingterm(pTHX_ char *s)
8990e307
LW
238{
239 char tmpbuf[3];
240 char q;
241 if (s) {
242 char *nl = strrchr(s,'\n');
d2719217 243 if (nl)
8990e307
LW
244 *nl = '\0';
245 }
9d116dd7
JH
246 else if (
247#ifdef EBCDIC
248 iscntrl(PL_multi_close)
249#else
250 PL_multi_close < 32 || PL_multi_close == 127
251#endif
252 ) {
8990e307 253 *tmpbuf = '^';
3280af22 254 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
255 s = "\\n";
256 tmpbuf[2] = '\0';
257 s = tmpbuf;
258 }
259 else {
3280af22 260 *tmpbuf = PL_multi_close;
8990e307
LW
261 tmpbuf[1] = '\0';
262 s = tmpbuf;
263 }
264 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 265 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 266}
79072805 267
ffb4593c
NT
268/*
269 * Perl_deprecate
ffb4593c
NT
270 */
271
79072805 272void
864dbfa3 273Perl_deprecate(pTHX_ char *s)
a0d0e21e 274{
d008e5eb 275 dTHR;
599cee73 276 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 277 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
278}
279
ffb4593c
NT
280/*
281 * depcom
9cbb5ea2 282 * Deprecate a comma-less variable list.
ffb4593c
NT
283 */
284
76e3520e 285STATIC void
cea2e8a9 286S_depcom(pTHX)
a0d0e21e
LW
287{
288 deprecate("comma-less variable list");
289}
290
ffb4593c 291/*
9cbb5ea2
GS
292 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
293 * utf16-to-utf8-reversed.
ffb4593c
NT
294 */
295
c39cd008
GS
296#ifdef PERL_CR_FILTER
297static void
298strip_return(SV *sv)
299{
300 register char *s = SvPVX(sv);
301 register char *e = s + SvCUR(sv);
302 /* outer loop optimized to do nothing if there are no CR-LFs */
303 while (s < e) {
304 if (*s++ == '\r' && *s == '\n') {
305 /* hit a CR-LF, need to copy the rest */
306 register char *d = s - 1;
307 *d++ = *s++;
308 while (s < e) {
309 if (*s == '\r' && s[1] == '\n')
310 s++;
311 *d++ = *s++;
312 }
313 SvCUR(sv) -= s - d;
314 return;
315 }
316 }
317}
a868473f 318
76e3520e 319STATIC I32
c39cd008 320S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 321{
c39cd008
GS
322 I32 count = FILTER_READ(idx+1, sv, maxlen);
323 if (count > 0 && !maxlen)
324 strip_return(sv);
325 return count;
a868473f
NIS
326}
327#endif
328
a0ed51b3 329STATIC I32
cea2e8a9 330S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
331{
332 I32 count = FILTER_READ(idx+1, sv, maxlen);
333 if (count) {
dfe13c55
GS
334 U8* tmps;
335 U8* tend;
336 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 337 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 338 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
339
340 }
341 return count;
342}
343
344STATIC I32
cea2e8a9 345S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
346{
347 I32 count = FILTER_READ(idx+1, sv, maxlen);
348 if (count) {
dfe13c55
GS
349 U8* tmps;
350 U8* tend;
351 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 352 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 353 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
354
355 }
356 return count;
357}
a868473f 358
ffb4593c
NT
359/*
360 * Perl_lex_start
9cbb5ea2
GS
361 * Initialize variables. Uses the Perl save_stack to save its state (for
362 * recursive calls to the parser).
ffb4593c
NT
363 */
364
a0d0e21e 365void
864dbfa3 366Perl_lex_start(pTHX_ SV *line)
79072805 367{
0f15f207 368 dTHR;
8990e307
LW
369 char *s;
370 STRLEN len;
371
3280af22
NIS
372 SAVEI32(PL_lex_dojoin);
373 SAVEI32(PL_lex_brackets);
3280af22
NIS
374 SAVEI32(PL_lex_casemods);
375 SAVEI32(PL_lex_starts);
376 SAVEI32(PL_lex_state);
7766f137 377 SAVEVPTR(PL_lex_inpat);
3280af22 378 SAVEI32(PL_lex_inwhat);
18b09519
GS
379 if (PL_lex_state == LEX_KNOWNEXT) {
380 I32 toke = PL_nexttoke;
381 while (--toke >= 0) {
382 SAVEI32(PL_nexttype[toke]);
383 SAVEVPTR(PL_nextval[toke]);
384 }
385 SAVEI32(PL_nexttoke);
386 PL_nexttoke = 0;
387 }
57843af0 388 SAVECOPLINE(PL_curcop);
3280af22
NIS
389 SAVEPPTR(PL_bufptr);
390 SAVEPPTR(PL_bufend);
391 SAVEPPTR(PL_oldbufptr);
392 SAVEPPTR(PL_oldoldbufptr);
393 SAVEPPTR(PL_linestart);
394 SAVESPTR(PL_linestr);
395 SAVEPPTR(PL_lex_brackstack);
396 SAVEPPTR(PL_lex_casestack);
c76ac1ee 397 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
398 SAVESPTR(PL_lex_stuff);
399 SAVEI32(PL_lex_defer);
09bef843 400 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 401 SAVESPTR(PL_lex_repl);
bebdddfc
GS
402 SAVEINT(PL_expect);
403 SAVEINT(PL_lex_expect);
3280af22
NIS
404
405 PL_lex_state = LEX_NORMAL;
406 PL_lex_defer = 0;
407 PL_expect = XSTATE;
408 PL_lex_brackets = 0;
3280af22
NIS
409 New(899, PL_lex_brackstack, 120, char);
410 New(899, PL_lex_casestack, 12, char);
411 SAVEFREEPV(PL_lex_brackstack);
412 SAVEFREEPV(PL_lex_casestack);
413 PL_lex_casemods = 0;
414 *PL_lex_casestack = '\0';
415 PL_lex_dojoin = 0;
416 PL_lex_starts = 0;
417 PL_lex_stuff = Nullsv;
418 PL_lex_repl = Nullsv;
419 PL_lex_inpat = 0;
420 PL_lex_inwhat = 0;
09bef843 421 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
422 PL_linestr = line;
423 if (SvREADONLY(PL_linestr))
424 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
425 s = SvPV(PL_linestr, len);
8990e307 426 if (len && s[len-1] != ';') {
3280af22
NIS
427 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
428 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
429 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 430 }
3280af22
NIS
431 SvTEMP_off(PL_linestr);
432 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
433 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
434 SvREFCNT_dec(PL_rs);
79cb57f6 435 PL_rs = newSVpvn("\n", 1);
3280af22 436 PL_rsfp = 0;
79072805 437}
a687059c 438
ffb4593c
NT
439/*
440 * Perl_lex_end
9cbb5ea2
GS
441 * Finalizer for lexing operations. Must be called when the parser is
442 * done with the lexer.
ffb4593c
NT
443 */
444
463ee0b2 445void
864dbfa3 446Perl_lex_end(pTHX)
463ee0b2 447{
3280af22 448 PL_doextract = FALSE;
463ee0b2
LW
449}
450
ffb4593c
NT
451/*
452 * S_incline
453 * This subroutine has nothing to do with tilting, whether at windmills
454 * or pinball tables. Its name is short for "increment line". It
57843af0 455 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 456 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
457 * # line 500 "foo.pm"
458 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
459 */
460
76e3520e 461STATIC void
cea2e8a9 462S_incline(pTHX_ char *s)
463ee0b2 463{
0f15f207 464 dTHR;
463ee0b2
LW
465 char *t;
466 char *n;
467 char ch;
468 int sawline = 0;
469
57843af0 470 CopLINE_inc(PL_curcop);
463ee0b2
LW
471 if (*s++ != '#')
472 return;
473 while (*s == ' ' || *s == '\t') s++;
474 if (strnEQ(s, "line ", 5)) {
475 s += 5;
476 sawline = 1;
477 }
478 if (!isDIGIT(*s))
479 return;
480 n = s;
481 while (isDIGIT(*s))
482 s++;
483 while (*s == ' ' || *s == '\t')
484 s++;
485 if (*s == '"' && (t = strchr(s+1, '"')))
486 s++;
487 else {
488 if (!sawline)
489 return; /* false alarm */
490 for (t = s; !isSPACE(*t); t++) ;
491 }
492 ch = *t;
493 *t = '\0';
494 if (t - s > 0)
57843af0 495 CopFILE_set(PL_curcop, s);
463ee0b2 496 *t = ch;
57843af0 497 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
498}
499
ffb4593c
NT
500/*
501 * S_skipspace
502 * Called to gobble the appropriate amount and type of whitespace.
503 * Skips comments as well.
504 */
505
76e3520e 506STATIC char *
cea2e8a9 507S_skipspace(pTHX_ register char *s)
a687059c 508{
11343788 509 dTHR;
3280af22
NIS
510 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
511 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
512 s++;
513 return s;
514 }
515 for (;;) {
fd049845 516 STRLEN prevlen;
09bef843
SB
517 SSize_t oldprevlen, oldoldprevlen;
518 SSize_t oldloplen, oldunilen;
60e6418e
GS
519 while (s < PL_bufend && isSPACE(*s)) {
520 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
521 incline(s);
522 }
ffb4593c
NT
523
524 /* comment */
3280af22
NIS
525 if (s < PL_bufend && *s == '#') {
526 while (s < PL_bufend && *s != '\n')
463ee0b2 527 s++;
60e6418e 528 if (s < PL_bufend) {
463ee0b2 529 s++;
60e6418e
GS
530 if (PL_in_eval && !PL_rsfp) {
531 incline(s);
532 continue;
533 }
534 }
463ee0b2 535 }
ffb4593c
NT
536
537 /* only continue to recharge the buffer if we're at the end
538 * of the buffer, we're not reading from a source filter, and
539 * we're in normal lexing mode
540 */
09bef843
SB
541 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
542 PL_lex_state == LEX_FORMLINE)
463ee0b2 543 return s;
ffb4593c
NT
544
545 /* try to recharge the buffer */
9cbb5ea2
GS
546 if ((s = filter_gets(PL_linestr, PL_rsfp,
547 (prevlen = SvCUR(PL_linestr)))) == Nullch)
548 {
549 /* end of file. Add on the -p or -n magic */
3280af22
NIS
550 if (PL_minus_n || PL_minus_p) {
551 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
552 ";}continue{print or die qq(-p destination: $!\\n)" :
553 "");
3280af22
NIS
554 sv_catpv(PL_linestr,";}");
555 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
556 }
557 else
3280af22 558 sv_setpv(PL_linestr,";");
ffb4593c
NT
559
560 /* reset variables for next time we lex */
9cbb5ea2
GS
561 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
562 = SvPVX(PL_linestr);
3280af22 563 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
564
565 /* Close the filehandle. Could be from -P preprocessor,
566 * STDIN, or a regular file. If we were reading code from
567 * STDIN (because the commandline held no -e or filename)
568 * then we don't close it, we reset it so the code can
569 * read from STDIN too.
570 */
571
3280af22
NIS
572 if (PL_preprocess && !PL_in_eval)
573 (void)PerlProc_pclose(PL_rsfp);
574 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
575 PerlIO_clearerr(PL_rsfp);
8990e307 576 else
3280af22
NIS
577 (void)PerlIO_close(PL_rsfp);
578 PL_rsfp = Nullfp;
463ee0b2
LW
579 return s;
580 }
ffb4593c
NT
581
582 /* not at end of file, so we only read another line */
09bef843
SB
583 /* make corresponding updates to old pointers, for yyerror() */
584 oldprevlen = PL_oldbufptr - PL_bufend;
585 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
586 if (PL_last_uni)
587 oldunilen = PL_last_uni - PL_bufend;
588 if (PL_last_lop)
589 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
590 PL_linestart = PL_bufptr = s + prevlen;
591 PL_bufend = s + SvCUR(PL_linestr);
592 s = PL_bufptr;
09bef843
SB
593 PL_oldbufptr = s + oldprevlen;
594 PL_oldoldbufptr = s + oldoldprevlen;
595 if (PL_last_uni)
596 PL_last_uni = s + oldunilen;
597 if (PL_last_lop)
598 PL_last_lop = s + oldloplen;
a0d0e21e 599 incline(s);
ffb4593c
NT
600
601 /* debugger active and we're not compiling the debugger code,
602 * so store the line into the debugger's array of lines
603 */
3280af22 604 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
605 SV *sv = NEWSV(85,0);
606
607 sv_upgrade(sv, SVt_PVMG);
3280af22 608 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 609 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 610 }
463ee0b2 611 }
a687059c 612}
378cc40b 613
ffb4593c
NT
614/*
615 * S_check_uni
616 * Check the unary operators to ensure there's no ambiguity in how they're
617 * used. An ambiguous piece of code would be:
618 * rand + 5
619 * This doesn't mean rand() + 5. Because rand() is a unary operator,
620 * the +5 is its argument.
621 */
622
76e3520e 623STATIC void
cea2e8a9 624S_check_uni(pTHX)
ba106d47 625{
2f3197b3 626 char *s;
a0d0e21e 627 char *t;
0453d815 628 dTHR;
2f3197b3 629
3280af22 630 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 631 return;
3280af22
NIS
632 while (isSPACE(*PL_last_uni))
633 PL_last_uni++;
7e2040f0 634 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 635 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 636 return;
0453d815 637 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 638 char ch = *s;
0453d815
PM
639 *s = '\0';
640 Perl_warner(aTHX_ WARN_AMBIGUOUS,
641 "Warning: Use of \"%s\" without parens is ambiguous",
642 PL_last_uni);
643 *s = ch;
644 }
2f3197b3
LW
645}
646
ffb4593c
NT
647/* workaround to replace the UNI() macro with a function. Only the
648 * hints/uts.sh file mentions this. Other comments elsewhere in the
649 * source indicate Microport Unix might need it too.
650 */
651
ffed7fef
LW
652#ifdef CRIPPLED_CC
653
654#undef UNI
ffed7fef 655#define UNI(f) return uni(f,s)
ffed7fef 656
76e3520e 657STATIC int
cea2e8a9 658S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
659{
660 yylval.ival = f;
3280af22
NIS
661 PL_expect = XTERM;
662 PL_bufptr = s;
8f872242
NIS
663 PL_last_uni = PL_oldbufptr;
664 PL_last_lop_op = f;
ffed7fef
LW
665 if (*s == '(')
666 return FUNC1;
667 s = skipspace(s);
668 if (*s == '(')
669 return FUNC1;
670 else
671 return UNIOP;
672}
673
a0d0e21e
LW
674#endif /* CRIPPLED_CC */
675
ffb4593c
NT
676/*
677 * LOP : macro to build a list operator. Its behaviour has been replaced
678 * with a subroutine, S_lop() for which LOP is just another name.
679 */
680
a0d0e21e
LW
681#define LOP(f,x) return lop(f,x,s)
682
ffb4593c
NT
683/*
684 * S_lop
685 * Build a list operator (or something that might be one). The rules:
686 * - if we have a next token, then it's a list operator [why?]
687 * - if the next thing is an opening paren, then it's a function
688 * - else it's a list operator
689 */
690
76e3520e 691STATIC I32
a0be28da 692S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 693{
0f15f207 694 dTHR;
79072805 695 yylval.ival = f;
35c8bce7 696 CLINE;
3280af22
NIS
697 PL_expect = x;
698 PL_bufptr = s;
699 PL_last_lop = PL_oldbufptr;
700 PL_last_lop_op = f;
701 if (PL_nexttoke)
a0d0e21e 702 return LSTOP;
79072805
LW
703 if (*s == '(')
704 return FUNC;
705 s = skipspace(s);
706 if (*s == '(')
707 return FUNC;
708 else
709 return LSTOP;
710}
711
ffb4593c
NT
712/*
713 * S_force_next
9cbb5ea2 714 * When the lexer realizes it knows the next token (for instance,
ffb4593c 715 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
716 * to know what token to return the next time the lexer is called. Caller
717 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
718 * handles the token correctly.
ffb4593c
NT
719 */
720
76e3520e 721STATIC void
cea2e8a9 722S_force_next(pTHX_ I32 type)
79072805 723{
3280af22
NIS
724 PL_nexttype[PL_nexttoke] = type;
725 PL_nexttoke++;
726 if (PL_lex_state != LEX_KNOWNEXT) {
727 PL_lex_defer = PL_lex_state;
728 PL_lex_expect = PL_expect;
729 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
730 }
731}
732
ffb4593c
NT
733/*
734 * S_force_word
735 * When the lexer knows the next thing is a word (for instance, it has
736 * just seen -> and it knows that the next char is a word char, then
737 * it calls S_force_word to stick the next word into the PL_next lookahead.
738 *
739 * Arguments:
b1b65b59 740 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
741 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
742 * int check_keyword : if true, Perl checks to make sure the word isn't
743 * a keyword (do this if the word is a label, e.g. goto FOO)
744 * int allow_pack : if true, : characters will also be allowed (require,
745 * use, etc. do this)
9cbb5ea2 746 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
747 */
748
76e3520e 749STATIC char *
cea2e8a9 750S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 751{
463ee0b2
LW
752 register char *s;
753 STRLEN len;
754
755 start = skipspace(start);
756 s = start;
7e2040f0 757 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 758 (allow_pack && *s == ':') ||
15f0808c 759 (allow_initial_tick && *s == '\'') )
a0d0e21e 760 {
3280af22
NIS
761 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
762 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
763 return start;
764 if (token == METHOD) {
765 s = skipspace(s);
766 if (*s == '(')
3280af22 767 PL_expect = XTERM;
463ee0b2 768 else {
3280af22 769 PL_expect = XOPERATOR;
463ee0b2 770 }
79072805 771 }
3280af22
NIS
772 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
773 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
774 force_next(token);
775 }
776 return s;
777}
778
ffb4593c
NT
779/*
780 * S_force_ident
9cbb5ea2 781 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
782 * text only contains the "foo" portion. The first argument is a pointer
783 * to the "foo", and the second argument is the type symbol to prefix.
784 * Forces the next token to be a "WORD".
9cbb5ea2 785 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
786 */
787
76e3520e 788STATIC void
cea2e8a9 789S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
790{
791 if (s && *s) {
11343788 792 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 793 PL_nextval[PL_nexttoke].opval = o;
79072805 794 force_next(WORD);
748a9306 795 if (kind) {
e858de61 796 dTHR; /* just for in_eval */
11343788 797 o->op_private = OPpCONST_ENTERED;
55497cff
PP
798 /* XXX see note in pp_entereval() for why we forgo typo
799 warnings if the symbol must be introduced in an eval.
800 GSAR 96-10-12 */
3280af22 801 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
802 kind == '$' ? SVt_PV :
803 kind == '@' ? SVt_PVAV :
804 kind == '%' ? SVt_PVHV :
805 SVt_PVGV
806 );
748a9306 807 }
79072805
LW
808 }
809}
810
ffb4593c
NT
811/*
812 * S_force_version
813 * Forces the next token to be a version number.
814 */
815
76e3520e 816STATIC char *
cea2e8a9 817S_force_version(pTHX_ char *s)
89bfa8cd
PP
818{
819 OP *version = Nullop;
820
821 s = skipspace(s);
822
a7cb1f99
GS
823 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
824 char *d = s;
825 if (*d == 'v')
826 d++;
827 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 828 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
89bfa8cd
PP
829 s = scan_num(s);
830 /* real VERSION number -- GBARR */
831 version = yylval.opval;
832 }
833 }
834
835 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 836 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd
PP
837 force_next(WORD);
838
839 return (s);
840}
841
ffb4593c
NT
842/*
843 * S_tokeq
844 * Tokenize a quoted string passed in as an SV. It finds the next
845 * chunk, up to end of string or a backslash. It may make a new
846 * SV containing that chunk (if HINT_NEW_STRING is on). It also
847 * turns \\ into \.
848 */
849
76e3520e 850STATIC SV *
cea2e8a9 851S_tokeq(pTHX_ SV *sv)
79072805
LW
852{
853 register char *s;
854 register char *send;
855 register char *d;
b3ac6de7
IZ
856 STRLEN len = 0;
857 SV *pv = sv;
79072805
LW
858
859 if (!SvLEN(sv))
b3ac6de7 860 goto finish;
79072805 861
a0d0e21e 862 s = SvPV_force(sv, len);
748a9306 863 if (SvIVX(sv) == -1)
b3ac6de7 864 goto finish;
463ee0b2 865 send = s + len;
79072805
LW
866 while (s < send && *s != '\\')
867 s++;
868 if (s == send)
b3ac6de7 869 goto finish;
79072805 870 d = s;
3280af22 871 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 872 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
873 while (s < send) {
874 if (*s == '\\') {
a0d0e21e 875 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
876 s++; /* all that, just for this */
877 }
878 *d++ = *s++;
879 }
880 *d = '\0';
463ee0b2 881 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 882 finish:
3280af22 883 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 884 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
885 return sv;
886}
887
ffb4593c
NT
888/*
889 * Now come three functions related to double-quote context,
890 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
891 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
892 * interact with PL_lex_state, and create fake ( ... ) argument lists
893 * to handle functions and concatenation.
894 * They assume that whoever calls them will be setting up a fake
895 * join call, because each subthing puts a ',' after it. This lets
896 * "lower \luPpEr"
897 * become
898 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
899 *
900 * (I'm not sure whether the spurious commas at the end of lcfirst's
901 * arguments and join's arguments are created or not).
902 */
903
904/*
905 * S_sublex_start
906 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
907 *
908 * Pattern matching will set PL_lex_op to the pattern-matching op to
909 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
910 *
911 * OP_CONST and OP_READLINE are easy--just make the new op and return.
912 *
913 * Everything else becomes a FUNC.
914 *
915 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
916 * had an OP_CONST or OP_READLINE). This just sets us up for a
917 * call to S_sublex_push().
918 */
919
76e3520e 920STATIC I32
cea2e8a9 921S_sublex_start(pTHX)
79072805
LW
922{
923 register I32 op_type = yylval.ival;
79072805
LW
924
925 if (op_type == OP_NULL) {
3280af22
NIS
926 yylval.opval = PL_lex_op;
927 PL_lex_op = Nullop;
79072805
LW
928 return THING;
929 }
930 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 931 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
932
933 if (SvTYPE(sv) == SVt_PVIV) {
934 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
935 STRLEN len;
936 char *p;
937 SV *nsv;
938
939 p = SvPV(sv, len);
79cb57f6 940 nsv = newSVpvn(p, len);
b3ac6de7
IZ
941 SvREFCNT_dec(sv);
942 sv = nsv;
943 }
944 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 945 PL_lex_stuff = Nullsv;
79072805
LW
946 return THING;
947 }
948
3280af22
NIS
949 PL_sublex_info.super_state = PL_lex_state;
950 PL_sublex_info.sub_inwhat = op_type;
951 PL_sublex_info.sub_op = PL_lex_op;
952 PL_lex_state = LEX_INTERPPUSH;
55497cff 953
3280af22
NIS
954 PL_expect = XTERM;
955 if (PL_lex_op) {
956 yylval.opval = PL_lex_op;
957 PL_lex_op = Nullop;
55497cff
PP
958 return PMFUNC;
959 }
960 else
961 return FUNC;
962}
963
ffb4593c
NT
964/*
965 * S_sublex_push
966 * Create a new scope to save the lexing state. The scope will be
967 * ended in S_sublex_done. Returns a '(', starting the function arguments
968 * to the uc, lc, etc. found before.
969 * Sets PL_lex_state to LEX_INTERPCONCAT.
970 */
971
76e3520e 972STATIC I32
cea2e8a9 973S_sublex_push(pTHX)
55497cff 974{
0f15f207 975 dTHR;
f46d017c 976 ENTER;
55497cff 977
3280af22
NIS
978 PL_lex_state = PL_sublex_info.super_state;
979 SAVEI32(PL_lex_dojoin);
980 SAVEI32(PL_lex_brackets);
3280af22
NIS
981 SAVEI32(PL_lex_casemods);
982 SAVEI32(PL_lex_starts);
983 SAVEI32(PL_lex_state);
7766f137 984 SAVEVPTR(PL_lex_inpat);
3280af22 985 SAVEI32(PL_lex_inwhat);
57843af0 986 SAVECOPLINE(PL_curcop);
3280af22
NIS
987 SAVEPPTR(PL_bufptr);
988 SAVEPPTR(PL_oldbufptr);
989 SAVEPPTR(PL_oldoldbufptr);
990 SAVEPPTR(PL_linestart);
991 SAVESPTR(PL_linestr);
992 SAVEPPTR(PL_lex_brackstack);
993 SAVEPPTR(PL_lex_casestack);
994
995 PL_linestr = PL_lex_stuff;
996 PL_lex_stuff = Nullsv;
997
9cbb5ea2
GS
998 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
999 = SvPVX(PL_linestr);
3280af22
NIS
1000 PL_bufend += SvCUR(PL_linestr);
1001 SAVEFREESV(PL_linestr);
1002
1003 PL_lex_dojoin = FALSE;
1004 PL_lex_brackets = 0;
3280af22
NIS
1005 New(899, PL_lex_brackstack, 120, char);
1006 New(899, PL_lex_casestack, 12, char);
1007 SAVEFREEPV(PL_lex_brackstack);
1008 SAVEFREEPV(PL_lex_casestack);
1009 PL_lex_casemods = 0;
1010 *PL_lex_casestack = '\0';
1011 PL_lex_starts = 0;
1012 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1013 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1014
1015 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1016 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1017 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1018 else
3280af22 1019 PL_lex_inpat = Nullop;
79072805 1020
55497cff 1021 return '(';
79072805
LW
1022}
1023
ffb4593c
NT
1024/*
1025 * S_sublex_done
1026 * Restores lexer state after a S_sublex_push.
1027 */
1028
76e3520e 1029STATIC I32
cea2e8a9 1030S_sublex_done(pTHX)
79072805 1031{
3280af22
NIS
1032 if (!PL_lex_starts++) {
1033 PL_expect = XOPERATOR;
79cb57f6 1034 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1035 return THING;
1036 }
1037
3280af22
NIS
1038 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1039 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1040 return yylex();
79072805
LW
1041 }
1042
ffb4593c 1043 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1044 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1045 PL_linestr = PL_lex_repl;
1046 PL_lex_inpat = 0;
1047 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1048 PL_bufend += SvCUR(PL_linestr);
1049 SAVEFREESV(PL_linestr);
1050 PL_lex_dojoin = FALSE;
1051 PL_lex_brackets = 0;
3280af22
NIS
1052 PL_lex_casemods = 0;
1053 *PL_lex_casestack = '\0';
1054 PL_lex_starts = 0;
25da4f38 1055 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1056 PL_lex_state = LEX_INTERPNORMAL;
1057 PL_lex_starts++;
e9fa98b2
HS
1058 /* we don't clear PL_lex_repl here, so that we can check later
1059 whether this is an evalled subst; that means we rely on the
1060 logic to ensure sublex_done() is called again only via the
1061 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1062 }
e9fa98b2 1063 else {
3280af22 1064 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1065 PL_lex_repl = Nullsv;
1066 }
79072805 1067 return ',';
ffed7fef
LW
1068 }
1069 else {
f46d017c 1070 LEAVE;
3280af22
NIS
1071 PL_bufend = SvPVX(PL_linestr);
1072 PL_bufend += SvCUR(PL_linestr);
1073 PL_expect = XOPERATOR;
09bef843 1074 PL_sublex_info.sub_inwhat = 0;
79072805 1075 return ')';
ffed7fef
LW
1076 }
1077}
1078
02aa26ce
NT
1079/*
1080 scan_const
1081
1082 Extracts a pattern, double-quoted string, or transliteration. This
1083 is terrifying code.
1084
3280af22
NIS
1085 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1086 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1087 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1088
9b599b2a
GS
1089 Returns a pointer to the character scanned up to. Iff this is
1090 advanced from the start pointer supplied (ie if anything was
1091 successfully parsed), will leave an OP for the substring scanned
1092 in yylval. Caller must intuit reason for not parsing further
1093 by looking at the next characters herself.
1094
02aa26ce
NT
1095 In patterns:
1096 backslashes:
1097 double-quoted style: \r and \n
1098 regexp special ones: \D \s
1099 constants: \x3
1100 backrefs: \1 (deprecated in substitution replacements)
1101 case and quoting: \U \Q \E
1102 stops on @ and $, but not for $ as tail anchor
1103
1104 In transliterations:
1105 characters are VERY literal, except for - not at the start or end
1106 of the string, which indicates a range. scan_const expands the
1107 range to the full set of intermediate characters.
1108
1109 In double-quoted strings:
1110 backslashes:
1111 double-quoted style: \r and \n
1112 constants: \x3
1113 backrefs: \1 (deprecated)
1114 case and quoting: \U \Q \E
1115 stops on @ and $
1116
1117 scan_const does *not* construct ops to handle interpolated strings.
1118 It stops processing as soon as it finds an embedded $ or @ variable
1119 and leaves it to the caller to work out what's going on.
1120
1121 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1122
1123 $ in pattern could be $foo or could be tail anchor. Assumption:
1124 it's a tail anchor if $ is the last thing in the string, or if it's
1125 followed by one of ")| \n\t"
1126
1127 \1 (backreferences) are turned into $1
1128
1129 The structure of the code is
1130 while (there's a character to process) {
1131 handle transliteration ranges
1132 skip regexp comments
1133 skip # initiated comments in //x patterns
1134 check for embedded @foo
1135 check for embedded scalars
1136 if (backslash) {
1137 leave intact backslashes from leave (below)
1138 deprecate \1 in strings and sub replacements
1139 handle string-changing backslashes \l \U \Q \E, etc.
1140 switch (what was escaped) {
1141 handle - in a transliteration (becomes a literal -)
1142 handle \132 octal characters
1143 handle 0x15 hex characters
1144 handle \cV (control V)
1145 handle printf backslashes (\f, \r, \n, etc)
1146 } (end switch)
1147 } (end if backslash)
1148 } (end while character to read)
1149
1150*/
1151
76e3520e 1152STATIC char *
cea2e8a9 1153S_scan_const(pTHX_ char *start)
79072805 1154{
3280af22 1155 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1156 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1157 register char *s = start; /* start of the constant */
1158 register char *d = SvPVX(sv); /* destination for copies */
1159 bool dorange = FALSE; /* are we in a translit range? */
7e2040f0 1160 bool has_utf = FALSE; /* embedded \x{} */
02aa26ce 1161 I32 len; /* ? */
ac2262e3 1162 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1163 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1164 : UTF;
ac2262e3 1165 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1166 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1167 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1168 : UTF;
dff6d3cd 1169 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1170 PL_lex_inpat
4a2d328f 1171 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1172 : "";
79072805
LW
1173
1174 while (s < send || dorange) {
02aa26ce 1175 /* get transliterations out of the way (they're most literal) */
3280af22 1176 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1177 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1178 if (dorange) {
02aa26ce 1179 I32 i; /* current expanded character */
8ada0baa 1180 I32 min; /* first character in range */
02aa26ce
NT
1181 I32 max; /* last character in range */
1182
1183 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1184 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1185 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1186 d -= 2; /* eat the first char and the - */
1187
8ada0baa
JH
1188 min = (U8)*d; /* first char in range */
1189 max = (U8)d[1]; /* last char in range */
1190
1191#ifndef ASCIIish
1192 if ((isLOWER(min) && isLOWER(max)) ||
1193 (isUPPER(min) && isUPPER(max))) {
1194 if (isLOWER(min)) {
1195 for (i = min; i <= max; i++)
1196 if (isLOWER(i))
1197 *d++ = i;
1198 } else {
1199 for (i = min; i <= max; i++)
1200 if (isUPPER(i))
1201 *d++ = i;
1202 }
1203 }
1204 else
1205#endif
1206 for (i = min; i <= max; i++)
1207 *d++ = i;
02aa26ce
NT
1208
1209 /* mark the range as done, and continue */
79072805
LW
1210 dorange = FALSE;
1211 continue;
1212 }
02aa26ce
NT
1213
1214 /* range begins (ignore - as first or last char) */
79072805 1215 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1216 if (utf) {
a176fa2a 1217 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1218 s++;
1219 continue;
1220 }
79072805
LW
1221 dorange = TRUE;
1222 s++;
1223 }
1224 }
02aa26ce
NT
1225
1226 /* if we get here, we're not doing a transliteration */
1227
0f5d15d6
IZ
1228 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1229 except for the last char, which will be done separately. */
3280af22 1230 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1231 if (s[2] == '#') {
1232 while (s < send && *s != ')')
1233 *d++ = *s++;
0f5d15d6
IZ
1234 } else if (s[2] == '{'
1235 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1236 I32 count = 1;
0f5d15d6 1237 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1238 char c;
1239
d9f97599
GS
1240 while (count && (c = *regparse)) {
1241 if (c == '\\' && regparse[1])
1242 regparse++;
cc6b7395
IZ
1243 else if (c == '{')
1244 count++;
1245 else if (c == '}')
1246 count--;
d9f97599 1247 regparse++;
cc6b7395 1248 }
5bdf89e7
IZ
1249 if (*regparse != ')') {
1250 regparse--; /* Leave one char for continuation. */
cc6b7395 1251 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1252 }
0f5d15d6 1253 while (s < regparse)
cc6b7395
IZ
1254 *d++ = *s++;
1255 }
748a9306 1256 }
02aa26ce
NT
1257
1258 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1259 else if (*s == '#' && PL_lex_inpat &&
1260 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1261 while (s+1 < send && *s != '\n')
1262 *d++ = *s++;
1263 }
02aa26ce
NT
1264
1265 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
7e2040f0
GS
1266 else if (*s == '@' && s[1]
1267 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$", s[1])))
79072805 1268 break;
02aa26ce
NT
1269
1270 /* check for embedded scalars. only stop if we're sure it's a
1271 variable.
1272 */
79072805 1273 else if (*s == '$') {
3280af22 1274 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1275 break;
c277df42 1276 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1277 break; /* in regexp, $ might be tail anchor */
1278 }
02aa26ce 1279
a0ed51b3
LW
1280 /* (now in tr/// code again) */
1281
d008e5eb
GS
1282 if (*s & 0x80 && thisutf) {
1283 dTHR; /* only for ckWARN */
1284 if (ckWARN(WARN_UTF8)) {
dfe13c55 1285 (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
d008e5eb 1286 if (len) {
b1c7b182 1287 has_utf = TRUE;
d008e5eb
GS
1288 while (len--)
1289 *d++ = *s++;
1290 continue;
1291 }
a0ed51b3 1292 }
b1c7b182
GS
1293 else
1294 has_utf = TRUE; /* assume valid utf8 */
a0ed51b3
LW
1295 }
1296
02aa26ce 1297 /* backslashes */
79072805
LW
1298 if (*s == '\\' && s+1 < send) {
1299 s++;
02aa26ce
NT
1300
1301 /* some backslashes we leave behind */
c9f97d15 1302 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1303 *d++ = '\\';
1304 *d++ = *s++;
1305 continue;
1306 }
02aa26ce
NT
1307
1308 /* deprecate \1 in strings and substitution replacements */
3280af22 1309 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1310 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1311 {
d008e5eb 1312 dTHR; /* only for ckWARN */
599cee73 1313 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1314 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1315 *--s = '$';
1316 break;
1317 }
02aa26ce
NT
1318
1319 /* string-change backslash escapes */
3280af22 1320 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1321 --s;
1322 break;
1323 }
02aa26ce
NT
1324
1325 /* if we get here, it's either a quoted -, or a digit */
79072805 1326 switch (*s) {
02aa26ce
NT
1327
1328 /* quoted - in transliterations */
79072805 1329 case '-':
3280af22 1330 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1331 *d++ = *s++;
1332 continue;
1333 }
1334 /* FALL THROUGH */
1335 default:
11b8faa4
JH
1336 {
1337 dTHR;
1338 if (ckWARN(WARN_UNSAFE) && isALPHA(*s))
cea2e8a9 1339 Perl_warner(aTHX_ WARN_UNSAFE,
11b8faa4
JH
1340 "Unrecognized escape \\%c passed through",
1341 *s);
1342 /* default action is to copy the quoted character */
1343 *d++ = *s++;
1344 continue;
1345 }
02aa26ce
NT
1346
1347 /* \132 indicates an octal constant */
79072805
LW
1348 case '0': case '1': case '2': case '3':
1349 case '4': case '5': case '6': case '7':
dff6d3cd 1350 *d++ = (char)scan_oct(s, 3, &len);
79072805
LW
1351 s += len;
1352 continue;
02aa26ce
NT
1353
1354 /* \x24 indicates a hex constant */
79072805 1355 case 'x':
a0ed51b3
LW
1356 ++s;
1357 if (*s == '{') {
1358 char* e = strchr(s, '}');
234a4bc6 1359 UV uv;
a0ed51b3 1360
adaeee49 1361 if (!e) {
a0ed51b3 1362 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1363 e = s;
1364 }
a0ed51b3 1365 /* note: utf always shorter than hex */
234a4bc6
GS
1366 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1367 if (uv > 127) {
1368 d = (char*)uv_to_utf8((U8*)d, uv);
1369 has_utf = TRUE;
1370 }
1371 else
1372 *d++ = (char)uv;
a0ed51b3 1373 s = e + 1;
a0ed51b3
LW
1374 }
1375 else {
234a4bc6 1376 /* XXX collapse this branch into the one above */
a0ed51b3
LW
1377 UV uv = (UV)scan_hex(s, 2, &len);
1378 if (utf && PL_lex_inwhat == OP_TRANS &&
1379 utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1380 {
dfe13c55 1381 d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
7e2040f0 1382 has_utf = TRUE;
a0ed51b3
LW
1383 }
1384 else {
d008e5eb
GS
1385 if (uv >= 127 && UTF) {
1386 dTHR;
1387 if (ckWARN(WARN_UTF8))
cea2e8a9 1388 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb 1389 "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
894356b3 1390 (int)len,s,(int)len,s);
d008e5eb 1391 }
a0ed51b3
LW
1392 *d++ = (char)uv;
1393 }
1394 s += len;
1395 }
79072805 1396 continue;
02aa26ce 1397
4a2d328f
IZ
1398 /* \N{latin small letter a} is a named character */
1399 case 'N':
423cee85
JH
1400 ++s;
1401 if (*s == '{') {
1402 char* e = strchr(s, '}');
1403 HV *hv;
1404 SV **svp;
1405 SV *res, *cv;
1406 STRLEN len;
1407 char *str;
1408 char *why = Nullch;
1409
1410 if (!e) {
5777a3f7 1411 yyerror("Missing right brace on \\N{}");
423cee85
JH
1412 e = s - 1;
1413 goto cont_scan;
1414 }
1415 res = newSVpvn(s + 1, e - s - 1);
1416 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1417 res, Nullsv, "\\N{...}" );
423cee85
JH
1418 str = SvPV(res,len);
1419 if (len > e - s + 4) {
1420 char *odest = SvPVX(sv);
1421
1422 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1423 d = SvPVX(sv) + (d - odest);
1424 }
1425 Copy(str, d, len, char);
1426 d += len;
1427 SvREFCNT_dec(res);
1428 cont_scan:
1429 s = e + 1;
1430 }
1431 else
5777a3f7 1432 yyerror("Missing braces on \\N{}");
423cee85
JH
1433 continue;
1434
02aa26ce 1435 /* \c is a control character */
79072805
LW
1436 case 'c':
1437 s++;
9d116dd7
JH
1438#ifdef EBCDIC
1439 *d = *s++;
1440 if (isLOWER(*d))
1441 *d = toUPPER(*d);
1442 *d++ = toCTRL(*d);
1443#else
bbce6d69
PP
1444 len = *s++;
1445 *d++ = toCTRL(len);
9d116dd7 1446#endif
79072805 1447 continue;
02aa26ce
NT
1448
1449 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1450 case 'b':
1451 *d++ = '\b';
1452 break;
1453 case 'n':
1454 *d++ = '\n';
1455 break;
1456 case 'r':
1457 *d++ = '\r';
1458 break;
1459 case 'f':
1460 *d++ = '\f';
1461 break;
1462 case 't':
1463 *d++ = '\t';
1464 break;
34a3fe2a
PP
1465#ifdef EBCDIC
1466 case 'e':
1467 *d++ = '\047'; /* CP 1047 */
1468 break;
1469 case 'a':
1470 *d++ = '\057'; /* CP 1047 */
1471 break;
1472#else
79072805
LW
1473 case 'e':
1474 *d++ = '\033';
1475 break;
1476 case 'a':
1477 *d++ = '\007';
1478 break;
34a3fe2a 1479#endif
02aa26ce
NT
1480 } /* end switch */
1481
79072805
LW
1482 s++;
1483 continue;
02aa26ce
NT
1484 } /* end if (backslash) */
1485
79072805 1486 *d++ = *s++;
02aa26ce
NT
1487 } /* while loop to process each character */
1488
1489 /* terminate the string and set up the sv */
79072805 1490 *d = '\0';
463ee0b2 1491 SvCUR_set(sv, d - SvPVX(sv));
79072805 1492 SvPOK_on(sv);
7e2040f0
GS
1493 if (has_utf)
1494 SvUTF8_on(sv);
79072805 1495
02aa26ce 1496 /* shrink the sv if we allocated more than we used */
79072805
LW
1497 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1498 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1499 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1500 }
02aa26ce 1501
9b599b2a 1502 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1503 if (s > PL_bufptr) {
1504 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1505 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1506 sv, Nullsv,
3280af22 1507 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1508 ? "tr"
3280af22 1509 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1510 ? "s"
1511 : "qq")));
79072805 1512 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1513 } else
8990e307 1514 SvREFCNT_dec(sv);
79072805
LW
1515 return s;
1516}
1517
ffb4593c
NT
1518/* S_intuit_more
1519 * Returns TRUE if there's more to the expression (e.g., a subscript),
1520 * FALSE otherwise.
ffb4593c
NT
1521 *
1522 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1523 *
1524 * ->[ and ->{ return TRUE
1525 * { and [ outside a pattern are always subscripts, so return TRUE
1526 * if we're outside a pattern and it's not { or [, then return FALSE
1527 * if we're in a pattern and the first char is a {
1528 * {4,5} (any digits around the comma) returns FALSE
1529 * if we're in a pattern and the first char is a [
1530 * [] returns FALSE
1531 * [SOMETHING] has a funky algorithm to decide whether it's a
1532 * character class or not. It has to deal with things like
1533 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1534 * anything else returns TRUE
1535 */
1536
9cbb5ea2
GS
1537/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1538
76e3520e 1539STATIC int
cea2e8a9 1540S_intuit_more(pTHX_ register char *s)
79072805 1541{
3280af22 1542 if (PL_lex_brackets)
79072805
LW
1543 return TRUE;
1544 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1545 return TRUE;
1546 if (*s != '{' && *s != '[')
1547 return FALSE;
3280af22 1548 if (!PL_lex_inpat)
79072805
LW
1549 return TRUE;
1550
1551 /* In a pattern, so maybe we have {n,m}. */
1552 if (*s == '{') {
1553 s++;
1554 if (!isDIGIT(*s))
1555 return TRUE;
1556 while (isDIGIT(*s))
1557 s++;
1558 if (*s == ',')
1559 s++;
1560 while (isDIGIT(*s))
1561 s++;
1562 if (*s == '}')
1563 return FALSE;
1564 return TRUE;
1565
1566 }
1567
1568 /* On the other hand, maybe we have a character class */
1569
1570 s++;
1571 if (*s == ']' || *s == '^')
1572 return FALSE;
1573 else {
ffb4593c 1574 /* this is terrifying, and it works */
79072805
LW
1575 int weight = 2; /* let's weigh the evidence */
1576 char seen[256];
f27ffc4a 1577 unsigned char un_char = 255, last_un_char;
93a17b20 1578 char *send = strchr(s,']');
3280af22 1579 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1580
1581 if (!send) /* has to be an expression */
1582 return TRUE;
1583
1584 Zero(seen,256,char);
1585 if (*s == '$')
1586 weight -= 3;
1587 else if (isDIGIT(*s)) {
1588 if (s[1] != ']') {
1589 if (isDIGIT(s[1]) && s[2] == ']')
1590 weight -= 10;
1591 }
1592 else
1593 weight -= 100;
1594 }
1595 for (; s < send; s++) {
1596 last_un_char = un_char;
1597 un_char = (unsigned char)*s;
1598 switch (*s) {
1599 case '@':
1600 case '&':
1601 case '$':
1602 weight -= seen[un_char] * 10;
7e2040f0 1603 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1604 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1605 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1606 weight -= 100;
1607 else
1608 weight -= 10;
1609 }
1610 else if (*s == '$' && s[1] &&
93a17b20
LW
1611 strchr("[#!%*<>()-=",s[1])) {
1612 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1613 weight -= 10;
1614 else
1615 weight -= 1;
1616 }
1617 break;
1618 case '\\':
1619 un_char = 254;
1620 if (s[1]) {
93a17b20 1621 if (strchr("wds]",s[1]))
79072805
LW
1622 weight += 100;
1623 else if (seen['\''] || seen['"'])
1624 weight += 1;
93a17b20 1625 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1626 weight += 40;
1627 else if (isDIGIT(s[1])) {
1628 weight += 40;
1629 while (s[1] && isDIGIT(s[1]))
1630 s++;
1631 }
1632 }
1633 else
1634 weight += 100;
1635 break;
1636 case '-':
1637 if (s[1] == '\\')
1638 weight += 50;
93a17b20 1639 if (strchr("aA01! ",last_un_char))
79072805 1640 weight += 30;
93a17b20 1641 if (strchr("zZ79~",s[1]))
79072805 1642 weight += 30;
f27ffc4a
GS
1643 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1644 weight -= 5; /* cope with negative subscript */
79072805
LW
1645 break;
1646 default:
93a17b20 1647 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1648 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1649 char *d = tmpbuf;
1650 while (isALPHA(*s))
1651 *d++ = *s++;
1652 *d = '\0';
1653 if (keyword(tmpbuf, d - tmpbuf))
1654 weight -= 150;
1655 }
1656 if (un_char == last_un_char + 1)
1657 weight += 5;
1658 weight -= seen[un_char];
1659 break;
1660 }
1661 seen[un_char]++;
1662 }
1663 if (weight >= 0) /* probably a character class */
1664 return FALSE;
1665 }
1666
1667 return TRUE;
1668}
ffed7fef 1669
ffb4593c
NT
1670/*
1671 * S_intuit_method
1672 *
1673 * Does all the checking to disambiguate
1674 * foo bar
1675 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1676 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1677 *
1678 * First argument is the stuff after the first token, e.g. "bar".
1679 *
1680 * Not a method if bar is a filehandle.
1681 * Not a method if foo is a subroutine prototyped to take a filehandle.
1682 * Not a method if it's really "Foo $bar"
1683 * Method if it's "foo $bar"
1684 * Not a method if it's really "print foo $bar"
1685 * Method if it's really "foo package::" (interpreted as package->foo)
1686 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1687 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1688 * =>
1689 */
1690
76e3520e 1691STATIC int
cea2e8a9 1692S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1693{
1694 char *s = start + (*start == '$');
3280af22 1695 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1696 STRLEN len;
1697 GV* indirgv;
1698
1699 if (gv) {
b6c543e3 1700 CV *cv;
a0d0e21e
LW
1701 if (GvIO(gv))
1702 return 0;
b6c543e3
IZ
1703 if ((cv = GvCVu(gv))) {
1704 char *proto = SvPVX(cv);
1705 if (proto) {
1706 if (*proto == ';')
1707 proto++;
1708 if (*proto == '*')
1709 return 0;
1710 }
1711 } else
a0d0e21e
LW
1712 gv = 0;
1713 }
8903cb82 1714 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1715 /* start is the beginning of the possible filehandle/object,
1716 * and s is the end of it
1717 * tmpbuf is a copy of it
1718 */
1719
a0d0e21e 1720 if (*start == '$') {
3280af22 1721 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1722 return 0;
1723 s = skipspace(s);
3280af22
NIS
1724 PL_bufptr = start;
1725 PL_expect = XREF;
a0d0e21e
LW
1726 return *s == '(' ? FUNCMETH : METHOD;
1727 }
1728 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1729 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1730 len -= 2;
1731 tmpbuf[len] = '\0';
1732 goto bare_package;
1733 }
1734 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1735 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1736 return 0;
1737 /* filehandle or package name makes it a method */
89bfa8cd 1738 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1739 s = skipspace(s);
3280af22 1740 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1741 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1742 bare_package:
3280af22 1743 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1744 newSVpvn(tmpbuf,len));
3280af22
NIS
1745 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1746 PL_expect = XTERM;
a0d0e21e 1747 force_next(WORD);
3280af22 1748 PL_bufptr = s;
a0d0e21e
LW
1749 return *s == '(' ? FUNCMETH : METHOD;
1750 }
1751 }
1752 return 0;
1753}
1754
ffb4593c
NT
1755/*
1756 * S_incl_perldb
1757 * Return a string of Perl code to load the debugger. If PERL5DB
1758 * is set, it will return the contents of that, otherwise a
1759 * compile-time require of perl5db.pl.
1760 */
1761
76e3520e 1762STATIC char*
cea2e8a9 1763S_incl_perldb(pTHX)
a0d0e21e 1764{
3280af22 1765 if (PL_perldb) {
76e3520e 1766 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1767
1768 if (pdb)
1769 return pdb;
61bb5906 1770 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1771 return "BEGIN { require 'perl5db.pl' }";
1772 }
1773 return "";
1774}
1775
1776
16d20bd9
AD
1777/* Encoded script support. filter_add() effectively inserts a
1778 * 'pre-processing' function into the current source input stream.
1779 * Note that the filter function only applies to the current source file
1780 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1781 *
1782 * The datasv parameter (which may be NULL) can be used to pass
1783 * private data to this instance of the filter. The filter function
1784 * can recover the SV using the FILTER_DATA macro and use it to
1785 * store private buffers and state information.
1786 *
1787 * The supplied datasv parameter is upgraded to a PVIO type
e0c19803
GS
1788 * and the IoDIRP field is used to store the function pointer,
1789 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1790 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1791 * private use must be set using malloc'd pointers.
1792 */
16d20bd9
AD
1793
1794SV *
864dbfa3 1795Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1796{
f4c556ac
GS
1797 if (!funcp)
1798 return Nullsv;
1799
3280af22
NIS
1800 if (!PL_rsfp_filters)
1801 PL_rsfp_filters = newAV();
16d20bd9 1802 if (!datasv)
8c52afec 1803 datasv = NEWSV(255,0);
16d20bd9 1804 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1805 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1806 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
e0c19803 1807 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1808 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1809 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1810 av_unshift(PL_rsfp_filters, 1);
1811 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1812 return(datasv);
1813}
1814
1815
1816/* Delete most recently added instance of this filter function. */
a0d0e21e 1817void
864dbfa3 1818Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1819{
e0c19803 1820 SV *datasv;
f4c556ac 1821 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1822 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1823 return;
1824 /* if filter is on top of stack (usual case) just pop it off */
e0c19803
GS
1825 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1826 if (IoDIRP(datasv) == (DIR*)funcp) {
1827 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
1828 IoDIRP(datasv) = (DIR*)NULL;
3280af22 1829 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1830
16d20bd9
AD
1831 return;
1832 }
1833 /* we need to search for the correct entry and clear it */
cea2e8a9 1834 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1835}
1836
1837
1838/* Invoke the n'th filter function for the current rsfp. */
1839I32
864dbfa3 1840Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1841
1842
1843 /* 0 = read one text line */
a0d0e21e 1844{
16d20bd9
AD
1845 filter_t funcp;
1846 SV *datasv = NULL;
e50aee73 1847
3280af22 1848 if (!PL_rsfp_filters)
16d20bd9 1849 return -1;
3280af22 1850 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1851 /* Provide a default input filter to make life easy. */
1852 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1853 DEBUG_P(PerlIO_printf(Perl_debug_log,
1854 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1855 if (maxlen) {
1856 /* Want a block */
1857 int len ;
1858 int old_len = SvCUR(buf_sv) ;
1859
1860 /* ensure buf_sv is large enough */
1861 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1862 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1863 if (PerlIO_error(PL_rsfp))
37120919
AD
1864 return -1; /* error */
1865 else
1866 return 0 ; /* end of file */
1867 }
16d20bd9
AD
1868 SvCUR_set(buf_sv, old_len + len) ;
1869 } else {
1870 /* Want a line */
3280af22
NIS
1871 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1872 if (PerlIO_error(PL_rsfp))
37120919
AD
1873 return -1; /* error */
1874 else
1875 return 0 ; /* end of file */
1876 }
16d20bd9
AD
1877 }
1878 return SvCUR(buf_sv);
1879 }
1880 /* Skip this filter slot if filter has been deleted */
3280af22 1881 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1882 DEBUG_P(PerlIO_printf(Perl_debug_log,
1883 "filter_read %d: skipped (filter deleted)\n",
1884 idx));
16d20bd9
AD
1885 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1886 }
1887 /* Get function pointer hidden within datasv */
1888 funcp = (filter_t)IoDIRP(datasv);
f4c556ac
GS
1889 DEBUG_P(PerlIO_printf(Perl_debug_log,
1890 "filter_read %d: via function %p (%s)\n",
1891 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1892 /* Call function. The function is expected to */
1893 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1894 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1895 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1896}
1897
76e3520e 1898STATIC char *
cea2e8a9 1899S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1900{
c39cd008 1901#ifdef PERL_CR_FILTER
3280af22 1902 if (!PL_rsfp_filters) {
c39cd008 1903 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1904 }
1905#endif
3280af22 1906 if (PL_rsfp_filters) {
16d20bd9 1907
55497cff
PP
1908 if (!append)
1909 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1910 if (FILTER_READ(0, sv, 0) > 0)
1911 return ( SvPVX(sv) ) ;
1912 else
1913 return Nullch ;
1914 }
9d116dd7 1915 else
fd049845 1916 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1917}
1918
1919
748a9306
LW
1920#ifdef DEBUGGING
1921 static char* exp_name[] =
09bef843
SB
1922 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
1923 "ATTRTERM", "TERMBLOCK"
1924 };
748a9306 1925#endif
463ee0b2 1926
02aa26ce
NT
1927/*
1928 yylex
1929
1930 Works out what to call the token just pulled out of the input
1931 stream. The yacc parser takes care of taking the ops we return and
1932 stitching them into a tree.
1933
1934 Returns:
1935 PRIVATEREF
1936
1937 Structure:
1938 if read an identifier
1939 if we're in a my declaration
1940 croak if they tried to say my($foo::bar)
1941 build the ops for a my() declaration
1942 if it's an access to a my() variable
1943 are we in a sort block?
1944 croak if my($a); $a <=> $b
1945 build ops for access to a my() variable
1946 if in a dq string, and they've said @foo and we can't find @foo
1947 croak
1948 build ops for a bareword
1949 if we already built the token before, use it.
1950*/
1951
864dbfa3
GS
1952int
1953#ifdef USE_PURE_BISON
cea2e8a9 1954Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1955#else
cea2e8a9 1956Perl_yylex(pTHX)
864dbfa3 1957#endif
378cc40b 1958{
11343788 1959 dTHR;
79072805 1960 register char *s;
378cc40b 1961 register char *d;
79072805 1962 register I32 tmp;
463ee0b2 1963 STRLEN len;
161b471a
NIS
1964 GV *gv = Nullgv;
1965 GV **gvp = 0;
a687059c 1966
a1a0e61e
TD
1967#ifdef USE_PURE_BISON
1968 yylval_pointer = lvalp;
1969 yychar_pointer = lcharp;
1970#endif
1971
02aa26ce 1972 /* check if there's an identifier for us to look at */
3280af22 1973 if (PL_pending_ident) {
02aa26ce 1974 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
1975 char pit = PL_pending_ident;
1976 PL_pending_ident = 0;
bbce6d69 1977
02aa26ce
NT
1978 /* if we're in a my(), we can't allow dynamics here.
1979 $foo'bar has already been turned into $foo::bar, so
1980 just check for colons.
1981
1982 if it's a legal name, the OP is a PADANY.
1983 */
3280af22 1984 if (PL_in_my) {
77ca0c92 1985 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
1986 if (strchr(PL_tokenbuf,':'))
1987 yyerror(Perl_form(aTHX_ "No package name allowed for "
1988 "variable %s in \"our\"",
1989 PL_tokenbuf));
77ca0c92
LW
1990 tmp = pad_allocmy(PL_tokenbuf);
1991 }
1992 else {
1993 if (strchr(PL_tokenbuf,':'))
1994 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 1995
77ca0c92
LW
1996 yylval.opval = newOP(OP_PADANY, 0);
1997 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
1998 return PRIVATEREF;
1999 }
bbce6d69
PP
2000 }
2001
02aa26ce
NT
2002 /*
2003 build the ops for accesses to a my() variable.
2004
2005 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2006 then used in a comparison. This catches most, but not
2007 all cases. For instance, it catches
2008 sort { my($a); $a <=> $b }
2009 but not
2010 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2011 (although why you'd do that is anyone's guess).
2012 */
2013
3280af22 2014 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2015#ifdef USE_THREADS
54b9620d 2016 /* Check for single character per-thread SVs */
3280af22
NIS
2017 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2018 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2019 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2020 {
2faa37cc 2021 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2022 yylval.opval->op_targ = tmp;
2023 return PRIVATEREF;
2024 }
2025#endif /* USE_THREADS */
3280af22 2026 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2027 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2028 /* might be an "our" variable" */
f472eb5c 2029 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2030 /* build ops for a bareword */
f472eb5c
GS
2031 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2032 sv_catpvn(sym, "::", 2);
2033 sv_catpv(sym, PL_tokenbuf+1);
2034 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2035 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2036 gv_fetchpv(SvPVX(sym),
77ca0c92 2037 (PL_in_eval
f472eb5c
GS
2038 ? (GV_ADDMULTI | GV_ADDINEVAL)
2039 : TRUE
77ca0c92
LW
2040 ),
2041 ((PL_tokenbuf[0] == '$') ? SVt_PV
2042 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2043 : SVt_PVHV));
2044 return WORD;
2045 }
2046
02aa26ce 2047 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2048 if (PL_last_lop_op == OP_SORT &&
2049 PL_tokenbuf[0] == '$' &&
2050 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2051 && !PL_tokenbuf[2])
bbce6d69 2052 {
3280af22
NIS
2053 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2054 d < PL_bufend && *d != '\n';
a863c7d1
MB
2055 d++)
2056 {
2057 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2058 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2059 PL_tokenbuf);
a863c7d1 2060 }
bbce6d69
PP
2061 }
2062 }
bbce6d69 2063
a863c7d1
MB
2064 yylval.opval = newOP(OP_PADANY, 0);
2065 yylval.opval->op_targ = tmp;
2066 return PRIVATEREF;
2067 }
bbce6d69
PP
2068 }
2069
02aa26ce
NT
2070 /*
2071 Whine if they've said @foo in a doublequoted string,
2072 and @foo isn't a variable we can find in the symbol
2073 table.
2074 */
3280af22
NIS
2075 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2076 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2077 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2078 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2079 PL_tokenbuf, PL_tokenbuf));
bbce6d69
PP
2080 }
2081
02aa26ce 2082 /* build ops for a bareword */
3280af22 2083 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2084 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2085 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2086 ((PL_tokenbuf[0] == '$') ? SVt_PV
2087 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2088 : SVt_PVHV));
2089 return WORD;
2090 }
2091
02aa26ce
NT
2092 /* no identifier pending identification */
2093
3280af22 2094 switch (PL_lex_state) {
79072805
LW
2095#ifdef COMMENTARY
2096 case LEX_NORMAL: /* Some compilers will produce faster */
2097 case LEX_INTERPNORMAL: /* code if we comment these out. */
2098 break;
2099#endif
2100
09bef843 2101 /* when we've already built the next token, just pull it out of the queue */
79072805 2102 case LEX_KNOWNEXT:
3280af22
NIS
2103 PL_nexttoke--;
2104 yylval = PL_nextval[PL_nexttoke];
2105 if (!PL_nexttoke) {
2106 PL_lex_state = PL_lex_defer;
2107 PL_expect = PL_lex_expect;
2108 PL_lex_defer = LEX_NORMAL;
463ee0b2 2109 }
3280af22 2110 return(PL_nexttype[PL_nexttoke]);
79072805 2111
02aa26ce 2112 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2113 when we get here, PL_bufptr is at the \
02aa26ce 2114 */
79072805
LW
2115 case LEX_INTERPCASEMOD:
2116#ifdef DEBUGGING
3280af22 2117 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2118 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2119#endif
02aa26ce 2120 /* handle \E or end of string */
3280af22 2121 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2122 char oldmod;
02aa26ce
NT
2123
2124 /* if at a \E */
3280af22
NIS
2125 if (PL_lex_casemods) {
2126 oldmod = PL_lex_casestack[--PL_lex_casemods];
2127 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2128
3280af22
NIS
2129 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2130 PL_bufptr += 2;
2131 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2132 }
79072805
LW
2133 return ')';
2134 }
3280af22
NIS
2135 if (PL_bufptr != PL_bufend)
2136 PL_bufptr += 2;
2137 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2138 return yylex();
79072805
LW
2139 }
2140 else {
3280af22 2141 s = PL_bufptr + 1;
79072805
LW
2142 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2143 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2144 if (strchr("LU", *s) &&
3280af22 2145 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2146 {
3280af22 2147 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2148 return ')';
2149 }
3280af22
NIS
2150 if (PL_lex_casemods > 10) {
2151 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2152 if (newlb != PL_lex_casestack) {
a0d0e21e 2153 SAVEFREEPV(newlb);
3280af22 2154 PL_lex_casestack = newlb;
a0d0e21e
LW
2155 }
2156 }
3280af22
NIS
2157 PL_lex_casestack[PL_lex_casemods++] = *s;
2158 PL_lex_casestack[PL_lex_casemods] = '\0';
2159 PL_lex_state = LEX_INTERPCONCAT;
2160 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2161 force_next('(');
2162 if (*s == 'l')
3280af22 2163 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2164 else if (*s == 'u')
3280af22 2165 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2166 else if (*s == 'L')
3280af22 2167 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2168 else if (*s == 'U')
3280af22 2169 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2170 else if (*s == 'Q')
3280af22 2171 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2172 else
cea2e8a9 2173 Perl_croak(aTHX_ "panic: yylex");
3280af22 2174 PL_bufptr = s + 1;
79072805 2175 force_next(FUNC);
3280af22
NIS
2176 if (PL_lex_starts) {
2177 s = PL_bufptr;
2178 PL_lex_starts = 0;
79072805
LW
2179 Aop(OP_CONCAT);
2180 }
2181 else
cea2e8a9 2182 return yylex();
79072805
LW
2183 }
2184
55497cff
PP
2185 case LEX_INTERPPUSH:
2186 return sublex_push();
2187
79072805 2188 case LEX_INTERPSTART:
3280af22 2189 if (PL_bufptr == PL_bufend)
79072805 2190 return sublex_done();
3280af22
NIS
2191 PL_expect = XTERM;
2192 PL_lex_dojoin = (*PL_bufptr == '@');
2193 PL_lex_state = LEX_INTERPNORMAL;
2194 if (PL_lex_dojoin) {
2195 PL_nextval[PL_nexttoke].ival = 0;
79072805 2196 force_next(',');
554b3eca 2197#ifdef USE_THREADS
533c011a
NIS
2198 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2199 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2200 force_next(PRIVATEREF);
2201#else
a0d0e21e 2202 force_ident("\"", '$');
554b3eca 2203#endif /* USE_THREADS */
3280af22 2204 PL_nextval[PL_nexttoke].ival = 0;
79072805 2205 force_next('$');
3280af22 2206 PL_nextval[PL_nexttoke].ival = 0;
79072805 2207 force_next('(');
3280af22 2208 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2209 force_next(FUNC);
2210 }
3280af22
NIS
2211 if (PL_lex_starts++) {
2212 s = PL_bufptr;
79072805
LW
2213 Aop(OP_CONCAT);
2214 }
cea2e8a9 2215 return yylex();
79072805
LW
2216
2217 case LEX_INTERPENDMAYBE:
3280af22
NIS
2218 if (intuit_more(PL_bufptr)) {
2219 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2220 break;
2221 }
2222 /* FALL THROUGH */
2223
2224 case LEX_INTERPEND:
3280af22
NIS
2225 if (PL_lex_dojoin) {
2226 PL_lex_dojoin = FALSE;
2227 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2228 return ')';
2229 }
43a16006 2230 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2231 && SvEVALED(PL_lex_repl))
43a16006 2232 {
e9fa98b2 2233 if (PL_bufptr != PL_bufend)
cea2e8a9 2234 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2235 PL_lex_repl = Nullsv;
2236 }
79072805
LW
2237 /* FALLTHROUGH */
2238 case LEX_INTERPCONCAT:
2239#ifdef DEBUGGING
3280af22 2240 if (PL_lex_brackets)
cea2e8a9 2241 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2242#endif
3280af22 2243 if (PL_bufptr == PL_bufend)
79072805
LW
2244 return sublex_done();
2245
3280af22
NIS
2246 if (SvIVX(PL_linestr) == '\'') {
2247 SV *sv = newSVsv(PL_linestr);
2248 if (!PL_lex_inpat)
76e3520e 2249 sv = tokeq(sv);
3280af22 2250 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2251 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2252 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2253 s = PL_bufend;
79072805
LW
2254 }
2255 else {
3280af22 2256 s = scan_const(PL_bufptr);
79072805 2257 if (*s == '\\')
3280af22 2258 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2259 else
3280af22 2260 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2261 }
2262
3280af22
NIS
2263 if (s != PL_bufptr) {
2264 PL_nextval[PL_nexttoke] = yylval;
2265 PL_expect = XTERM;
79072805 2266 force_next(THING);
3280af22 2267 if (PL_lex_starts++)
79072805
LW
2268 Aop(OP_CONCAT);
2269 else {
3280af22 2270 PL_bufptr = s;
cea2e8a9 2271 return yylex();
79072805
LW
2272 }
2273 }
2274
cea2e8a9 2275 return yylex();
a0d0e21e 2276 case LEX_FORMLINE:
3280af22
NIS
2277 PL_lex_state = LEX_NORMAL;
2278 s = scan_formline(PL_bufptr);
2279 if (!PL_lex_formbrack)
a0d0e21e
LW
2280 goto rightbracket;
2281 OPERATOR(';');
79072805
LW
2282 }
2283
3280af22
NIS
2284 s = PL_bufptr;
2285 PL_oldoldbufptr = PL_oldbufptr;
2286 PL_oldbufptr = s;
79072805 2287 DEBUG_p( {
bf49b057
GS
2288 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2289 exp_name[PL_expect], s);
79072805 2290 } )
463ee0b2
LW
2291
2292 retry:
378cc40b
LW
2293 switch (*s) {
2294 default:
7e2040f0 2295 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2296 goto keylookup;
cea2e8a9 2297 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2298 case 4:
2299 case 26:
2300 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2301 case 0:
3280af22
NIS
2302 if (!PL_rsfp) {
2303 PL_last_uni = 0;
2304 PL_last_lop = 0;
2305 if (PL_lex_brackets)
d98d5fff 2306 yyerror("Missing right curly or square bracket");
79072805 2307 TOKEN(0);
463ee0b2 2308 }
3280af22 2309 if (s++ < PL_bufend)
a687059c 2310 goto retry; /* ignore stray nulls */
3280af22
NIS
2311 PL_last_uni = 0;
2312 PL_last_lop = 0;
2313 if (!PL_in_eval && !PL_preambled) {
2314 PL_preambled = TRUE;
2315 sv_setpv(PL_linestr,incl_perldb());
2316 if (SvCUR(PL_linestr))
2317 sv_catpv(PL_linestr,";");
2318 if (PL_preambleav){
2319 while(AvFILLp(PL_preambleav) >= 0) {
2320 SV *tmpsv = av_shift(PL_preambleav);
2321 sv_catsv(PL_linestr, tmpsv);
2322 sv_catpv(PL_linestr, ";");
91b7def8
PP
2323 sv_free(tmpsv);
2324 }
3280af22
NIS
2325 sv_free((SV*)PL_preambleav);
2326 PL_preambleav = NULL;
91b7def8 2327 }
3280af22
NIS
2328 if (PL_minus_n || PL_minus_p) {
2329 sv_catpv(PL_linestr, "LINE: while (<>) {");
2330 if (PL_minus_l)
2331 sv_catpv(PL_linestr,"chomp;");
2332 if (PL_minus_a) {
8fd239a7
CS
2333 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2334 if (gv)
2335 GvIMPORTED_AV_on(gv);
3280af22
NIS
2336 if (PL_minus_F) {
2337 if (strchr("/'\"", *PL_splitstr)
2338 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2339 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2340 else {
2341 char delim;
2342 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2343 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2344 delim = *s;
cea2e8a9 2345 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2346 "q" + (delim == '\''), delim);
3280af22 2347 for (s = PL_splitstr; *s; s++) {
54310121 2348 if (*s == '\\')
3280af22
NIS
2349 sv_catpvn(PL_linestr, "\\", 1);
2350 sv_catpvn(PL_linestr, s, 1);
54310121 2351 }
cea2e8a9 2352 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2353 }
2304df62
AD
2354 }
2355 else
3280af22 2356 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2357 }
79072805 2358 }
3280af22
NIS
2359 sv_catpv(PL_linestr, "\n");
2360 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2361 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2362 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2363 SV *sv = NEWSV(85,0);
2364
2365 sv_upgrade(sv, SVt_PVMG);
3280af22 2366 sv_setsv(sv,PL_linestr);
57843af0 2367 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2368 }
79072805 2369 goto retry;
a687059c 2370 }
e929a76b 2371 do {
3280af22 2372 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2373 fake_eof:
3280af22
NIS
2374 if (PL_rsfp) {
2375 if (PL_preprocess && !PL_in_eval)
2376 (void)PerlProc_pclose(PL_rsfp);
2377 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2378 PerlIO_clearerr(PL_rsfp);
395c3793 2379 else
3280af22
NIS
2380 (void)PerlIO_close(PL_rsfp);
2381 PL_rsfp = Nullfp;
4a9ae47a 2382 PL_doextract = FALSE;
395c3793 2383 }
3280af22
NIS
2384 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2385 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2386 sv_catpv(PL_linestr,";}");
2387 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2388 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2389 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2390 goto retry;
2391 }
3280af22
NIS
2392 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2393 sv_setpv(PL_linestr,"");
79072805 2394 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2395 }
3280af22 2396 if (PL_doextract) {
a0d0e21e 2397 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2398 PL_doextract = FALSE;
a0d0e21e
LW
2399
2400 /* Incest with pod. */
2401 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2402 sv_setpv(PL_linestr, "");
2403 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2404 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2405 PL_doextract = FALSE;
a0d0e21e
LW
2406 }
2407 }
463ee0b2 2408 incline(s);
3280af22
NIS
2409 } while (PL_doextract);
2410 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2411 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2412 SV *sv = NEWSV(85,0);
a687059c 2413
93a17b20 2414 sv_upgrade(sv, SVt_PVMG);
3280af22 2415 sv_setsv(sv,PL_linestr);
57843af0 2416 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2417 }
3280af22 2418 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2419 if (CopLINE(PL_curcop) == 1) {
3280af22 2420 while (s < PL_bufend && isSPACE(*s))
79072805 2421 s++;
a0d0e21e 2422 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2423 s++;
44a8e56a 2424 d = Nullch;
3280af22 2425 if (!PL_in_eval) {
44a8e56a
PP
2426 if (*s == '#' && *(s+1) == '!')
2427 d = s + 2;
2428#ifdef ALTERNATE_SHEBANG
2429 else {
2430 static char as[] = ALTERNATE_SHEBANG;
2431 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2432 d = s + (sizeof(as) - 1);
2433 }
2434#endif /* ALTERNATE_SHEBANG */
2435 }
2436 if (d) {
b8378b72 2437 char *ipath;
774d564b 2438 char *ipathend;
b8378b72 2439
774d564b 2440 while (isSPACE(*d))
b8378b72
CS
2441 d++;
2442 ipath = d;
774d564b
PP
2443 while (*d && !isSPACE(*d))
2444 d++;
2445 ipathend = d;
2446
2447#ifdef ARG_ZERO_IS_SCRIPT
2448 if (ipathend > ipath) {
2449 /*
2450 * HP-UX (at least) sets argv[0] to the script name,
2451 * which makes $^X incorrect. And Digital UNIX and Linux,
2452 * at least, set argv[0] to the basename of the Perl
2453 * interpreter. So, having found "#!", we'll set it right.
2454 */
2455 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2456 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2457 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2458 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2459 SvSETMAGIC(x);
2460 }
774d564b 2461 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2462 }
774d564b 2463#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2464
2465 /*
2466 * Look for options.
2467 */
748a9306 2468 d = instr(s,"perl -");
84e30d1a 2469 if (!d) {
748a9306 2470 d = instr(s,"perl");
84e30d1a
GS
2471#if defined(DOSISH)
2472 /* avoid getting into infinite loops when shebang
2473 * line contains "Perl" rather than "perl" */
2474 if (!d) {
2475 for (d = ipathend-4; d >= ipath; --d) {
2476 if ((*d == 'p' || *d == 'P')
2477 && !ibcmp(d, "perl", 4))
2478 {
2479 break;
2480 }
2481 }
2482 if (d < ipath)
2483 d = Nullch;
2484 }
2485#endif
2486 }
44a8e56a
PP
2487#ifdef ALTERNATE_SHEBANG
2488 /*
2489 * If the ALTERNATE_SHEBANG on this system starts with a
2490 * character that can be part of a Perl expression, then if
2491 * we see it but not "perl", we're probably looking at the
2492 * start of Perl code, not a request to hand off to some
2493 * other interpreter. Similarly, if "perl" is there, but
2494 * not in the first 'word' of the line, we assume the line
2495 * contains the start of the Perl program.
44a8e56a
PP
2496 */
2497 if (d && *s != '#') {
774d564b 2498 char *c = ipath;
44a8e56a
PP
2499 while (*c && !strchr("; \t\r\n\f\v#", *c))
2500 c++;
2501 if (c < d)
2502 d = Nullch; /* "perl" not in first word; ignore */
2503 else
2504 *s = '#'; /* Don't try to parse shebang line */
2505 }
774d564b 2506#endif /* ALTERNATE_SHEBANG */
748a9306 2507 if (!d &&
44a8e56a 2508 *s == '#' &&
774d564b 2509 ipathend > ipath &&
3280af22 2510 !PL_minus_c &&
748a9306 2511 !instr(s,"indir") &&
3280af22 2512 instr(PL_origargv[0],"perl"))
748a9306 2513 {
9f68db38 2514 char **newargv;
9f68db38 2515
774d564b
PP
2516 *ipathend = '\0';
2517 s = ipathend + 1;
3280af22 2518 while (s < PL_bufend && isSPACE(*s))
9f68db38 2519 s++;
3280af22
NIS
2520 if (s < PL_bufend) {
2521 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2522 newargv[1] = s;
3280af22 2523 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2524 s++;
2525 *s = '\0';
3280af22 2526 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2527 }
2528 else
3280af22 2529 newargv = PL_origargv;
774d564b 2530 newargv[0] = ipath;
80252599 2531 PerlProc_execv(ipath, newargv);
cea2e8a9 2532 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2533 }
748a9306 2534 if (d) {
3280af22
NIS
2535 U32 oldpdb = PL_perldb;
2536 bool oldn = PL_minus_n;
2537 bool oldp = PL_minus_p;
748a9306
LW
2538
2539 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2540 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2541
2542 if (*d++ == '-') {
8cc95fdb
PP
2543 do {
2544 if (*d == 'M' || *d == 'm') {
2545 char *m = d;
2546 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2547 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2548 (int)(d - m), m);
2549 }
2550 d = moreswitches(d);
2551 } while (d);
84902520 2552 if (PERLDB_LINE && !oldpdb ||
3280af22 2553 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b
PP
2554 /* if we have already added "LINE: while (<>) {",
2555 we must not do it again */
748a9306 2556 {
3280af22
NIS
2557 sv_setpv(PL_linestr, "");
2558 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2559 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2560 PL_preambled = FALSE;
84902520 2561 if (PERLDB_LINE)
3280af22 2562 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2563 goto retry;
2564 }
a0d0e21e 2565 }
79072805 2566 }
9f68db38 2567 }
79072805 2568 }
3280af22
NIS
2569 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2570 PL_bufptr = s;
2571 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2572 return yylex();
ae986130 2573 }
378cc40b 2574 goto retry;
4fdae800 2575 case '\r':
6a27c188 2576#ifdef PERL_STRICT_CR
cea2e8a9
GS
2577 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2578 Perl_croak(aTHX_
54310121 2579 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2580#endif
4fdae800 2581 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2582 s++;
2583 goto retry;
378cc40b 2584 case '#':
e929a76b 2585 case '\n':
3280af22
NIS
2586 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2587 d = PL_bufend;
a687059c 2588 while (s < d && *s != '\n')
378cc40b 2589 s++;
0f85fab0 2590 if (s < d)
378cc40b 2591 s++;
463ee0b2 2592 incline(s);
3280af22
NIS
2593 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2594 PL_bufptr = s;
2595 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2596 return yylex();
a687059c 2597 }
378cc40b 2598 }
a687059c 2599 else {
378cc40b 2600 *s = '\0';
3280af22 2601 PL_bufend = s;
a687059c 2602 }
378cc40b
LW
2603 goto retry;
2604 case '-':
79072805 2605 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2606 s++;
3280af22 2607 PL_bufptr = s;
748a9306
LW
2608 tmp = *s++;
2609
3280af22 2610 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2611 s++;
2612
2613 if (strnEQ(s,"=>",2)) {
3280af22 2614 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2615 OPERATOR('-'); /* unary minus */
2616 }
3280af22
NIS
2617 PL_last_uni = PL_oldbufptr;
2618 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2619 switch (tmp) {
79072805
LW
2620 case 'r': FTST(OP_FTEREAD);
2621 case 'w': FTST(OP_FTEWRITE);
2622 case 'x': FTST(OP_FTEEXEC);
2623 case 'o': FTST(OP_FTEOWNED);
2624 case 'R': FTST(OP_FTRREAD);
2625 case 'W': FTST(OP_FTRWRITE);
2626 case 'X': FTST(OP_FTREXEC);
2627 case 'O': FTST(OP_FTROWNED);
2628 case 'e': FTST(OP_FTIS);
2629 case 'z': FTST(OP_FTZERO);
2630 case 's': FTST(OP_FTSIZE);
2631 case 'f': FTST(OP_FTFILE);
2632 case 'd': FTST(OP_FTDIR);
2633 case 'l': FTST(OP_FTLINK);
2634 case 'p': FTST(OP_FTPIPE);
2635 case 'S': FTST(OP_FTSOCK);
2636 case 'u': FTST(OP_FTSUID);
2637 case 'g': FTST(OP_FTSGID);
2638 case 'k': FTST(OP_FTSVTX);
2639 case 'b': FTST(OP_FTBLK);
2640 case 'c': FTST(OP_FTCHR);
2641 case 't': FTST(OP_FTTTY);
2642 case 'T': FTST(OP_FTTEXT);
2643 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2644 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2645 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2646 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2647 default:
cea2e8a9 2648 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2649 break;
2650 }
2651 }
a687059c
LW
2652 tmp = *s++;
2653 if (*s == tmp) {
2654 s++;
3280af22 2655 if (PL_expect == XOPERATOR)
79072805
LW
2656 TERM(POSTDEC);
2657 else
2658 OPERATOR(PREDEC);
2659 }
2660 else if (*s == '>') {
2661 s++;
2662 s = skipspace(s);
7e2040f0 2663 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2664 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2665 TOKEN(ARROW);
79072805 2666 }
748a9306
LW
2667 else if (*s == '$')
2668 OPERATOR(ARROW);
463ee0b2 2669 else
748a9306 2670 TERM(ARROW);
a687059c 2671 }
3280af22 2672 if (PL_expect == XOPERATOR)
79072805
LW
2673 Aop(OP_SUBTRACT);
2674 else {
3280af22 2675 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2676 check_uni();
79072805 2677 OPERATOR('-'); /* unary minus */
2f3197b3 2678 }
79072805 2679
378cc40b 2680 case '+':
a687059c
LW
2681 tmp = *s++;
2682 if (*s == tmp) {
378cc40b 2683 s++;
3280af22 2684 if (PL_expect == XOPERATOR)
79072805
LW
2685 TERM(POSTINC);
2686 else
2687 OPERATOR(PREINC);
378cc40b 2688 }
3280af22 2689 if (PL_expect == XOPERATOR)
79072805
LW
2690 Aop(OP_ADD);
2691 else {
3280af22 2692 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2693 check_uni();
a687059c 2694 OPERATOR('+');
2f3197b3 2695 }
a687059c 2696
378cc40b 2697 case '*':
3280af22
NIS
2698 if (PL_expect != XOPERATOR) {
2699 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2700 PL_expect = XOPERATOR;
2701 force_ident(PL_tokenbuf, '*');
2702 if (!*PL_tokenbuf)
a0d0e21e 2703 PREREF('*');
79072805 2704 TERM('*');
a687059c 2705 }
79072805
LW
2706 s++;
2707 if (*s == '*') {
a687059c 2708 s++;
79072805 2709 PWop(OP_POW);
a687059c 2710 }
79072805
LW
2711 Mop(OP_MULTIPLY);
2712
378cc40b 2713 case '%':
3280af22 2714 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2715 ++s;
2716 Mop(OP_MODULO);
a687059c 2717 }
3280af22
NIS
2718 PL_tokenbuf[0] = '%';
2719 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2720 if (!PL_tokenbuf[1]) {
2721 if (s == PL_bufend)
bbce6d69
PP
2722 yyerror("Final % should be \\% or %name");
2723 PREREF('%');
a687059c 2724 }
3280af22 2725 PL_pending_ident = '%';
bbce6d69 2726 TERM('%');
a687059c 2727
378cc40b 2728 case '^':
79072805 2729 s++;
a0d0e21e 2730 BOop(OP_BIT_XOR);
79072805 2731 case '[':
3280af22 2732 PL_lex_brackets++;
79072805 2733 /* FALL THROUGH */
378cc40b 2734 case '~':
378cc40b 2735 case ',':
378cc40b
LW
2736 tmp = *s++;
2737 OPERATOR(tmp);
a0d0e21e
LW
2738 case ':':
2739 if (s[1] == ':') {
2740 len = 0;
2741 goto just_a_word;
2742 }
2743 s++;
09bef843
SB
2744 switch (PL_expect) {
2745 OP *attrs;
2746 case XOPERATOR:
2747 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2748 break;
2749 PL_bufptr = s; /* update in case we back off */
2750 goto grabattrs;
2751 case XATTRBLOCK:
2752 PL_expect = XBLOCK;
2753 goto grabattrs;
2754 case XATTRTERM:
2755 PL_expect = XTERMBLOCK;
2756 grabattrs:
2757 s = skipspace(s);
2758 attrs = Nullop;
7e2040f0 2759 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2760 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2761 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2762 if (tmp < 0) tmp = -tmp;
2763 switch (tmp) {
2764 case KEY_or:
2765 case KEY_and:
2766 case KEY_for:
2767 case KEY_unless:
2768 case KEY_if:
2769 case KEY_while:
2770 case KEY_until:
2771 goto got_attrs;
2772 default:
2773 break;
2774 }
2775 }
09bef843
SB
2776 if (*d == '(') {
2777 d = scan_str(d,TRUE,TRUE);
2778 if (!d) {
2779 if (PL_lex_stuff) {
2780 SvREFCNT_dec(PL_lex_stuff);
2781 PL_lex_stuff = Nullsv;
2782 }
2783 /* MUST advance bufptr here to avoid bogus
2784 "at end of line" context messages from yyerror().
2785 */
2786 PL_bufptr = s + len;
2787 yyerror("Unterminated attribute parameter in attribute list");
2788 if (attrs)
2789 op_free(attrs);
2790 return 0; /* EOF indicator */
2791 }
2792 }
2793 if (PL_lex_stuff) {
2794 SV *sv = newSVpvn(s, len);
2795 sv_catsv(sv, PL_lex_stuff);
2796 attrs = append_elem(OP_LIST, attrs,
2797 newSVOP(OP_CONST, 0, sv));
2798 SvREFCNT_dec(PL_lex_stuff);
2799 PL_lex_stuff = Nullsv;
2800 }
2801 else {
2802 attrs = append_elem(OP_LIST, attrs,
2803 newSVOP(OP_CONST, 0,
2804 newSVpvn(s, len)));
2805 }
2806 s = skipspace(d);
0120eecf 2807 if (*s == ':' && s[1] != ':')
09bef843 2808 s = skipspace(s+1);
0120eecf
GS
2809 else if (s == d)
2810 break; /* require real whitespace or :'s */
09bef843 2811 }
f9829d6b
GS
2812 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2813 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2814 char q = ((*s == '\'') ? '"' : '\'');
2815 /* If here for an expression, and parsed no attrs, back off. */
2816 if (tmp == '=' && !attrs) {
2817 s = PL_bufptr;
2818 break;
2819 }
2820 /* MUST advance bufptr here to avoid bogus "at end of line"
2821 context messages from yyerror().
2822 */
2823 PL_bufptr = s;
2824 if (!*s)
2825 yyerror("Unterminated attribute list");
2826 else
2827 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2828 q, *s, q));
2829 if (attrs)
2830 op_free(attrs);
2831 OPERATOR(':');
2832 }
f9829d6b 2833 got_attrs:
09bef843
SB
2834 if (attrs) {
2835 PL_nextval[PL_nexttoke].opval = attrs;
2836 force_next(THING);
2837 }
2838 TOKEN(COLONATTR);
2839 }
a0d0e21e 2840 OPERATOR(':');
8990e307
LW
2841 case '(':
2842 s++;
3280af22
NIS
2843 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2844 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2845 else
3280af22 2846 PL_expect = XTERM;
a0d0e21e 2847 TOKEN('(');
378cc40b 2848 case ';':
57843af0
GS
2849 if (CopLINE(PL_curcop) < PL_copline)
2850 PL_copline = CopLINE(PL_curcop);
378cc40b
LW
2851 tmp = *s++;
2852 OPERATOR(tmp);
2853 case ')':
378cc40b 2854 tmp = *s++;
16d20bd9
AD
2855 s = skipspace(s);
2856 if (*s == '{')
2857 PREBLOCK(tmp);
378cc40b 2858 TERM(tmp);
79072805
LW
2859 case ']':
2860 s++;
3280af22 2861 if (PL_lex_brackets <= 0)
d98d5fff 2862 yyerror("Unmatched right square bracket");
463ee0b2 2863 else
3280af22
NIS
2864 --PL_lex_brackets;
2865 if (PL_lex_state == LEX_INTERPNORMAL) {
2866 if (PL_lex_brackets == 0) {
a0d0e21e 2867 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2868 PL_lex_state = LEX_INTERPEND;
79072805
LW
2869 }
2870 }
4633a7c4 2871 TERM(']');
79072805
LW
2872 case '{':
2873 leftbracket:
79072805 2874 s++;
3280af22
NIS
2875 if (PL_lex_brackets > 100) {
2876 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2877 if (newlb != PL_lex_brackstack) {
8990e307 2878 SAVEFREEPV(newlb);
3280af22 2879 PL_lex_brackstack = newlb;
8990e307
LW
2880 }
2881 }
3280af22 2882 switch (PL_expect) {
a0d0e21e 2883 case XTERM:
3280af22 2884 if (PL_lex_formbrack) {
a0d0e21e
LW
2885 s--;
2886 PRETERMBLOCK(DO);
2887 }
3280af22
NIS
2888 if (PL_oldoldbufptr == PL_last_lop)
2889 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2890 else
3280af22 2891 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2892 OPERATOR(HASHBRACK);
a0d0e21e 2893 case XOPERATOR:
3280af22 2894 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2895 s++;
44a8e56a 2896 d = s;
3280af22
NIS
2897 PL_tokenbuf[0] = '\0';
2898 if (d < PL_bufend && *d == '-') {
2899 PL_tokenbuf[0] = '-';
44a8e56a 2900 d++;
3280af22 2901 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a
PP
2902 d++;
2903 }
7e2040f0 2904 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 2905 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2906 FALSE, &len);
3280af22 2907 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2908 d++;
2909 if (*d == '}') {
3280af22 2910 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
2911 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2912 if (minus)
2913 force_next('-');
748a9306
LW
2914 }
2915 }
2916 /* FALL THROUGH */
09bef843 2917 case XATTRBLOCK:
748a9306 2918 case XBLOCK:
3280af22
NIS
2919 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2920 PL_expect = XSTATE;
a0d0e21e 2921 break;
09bef843 2922 case XATTRTERM:
a0d0e21e 2923 case XTERMBLOCK:
3280af22
NIS
2924 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2925 PL_expect = XSTATE;
a0d0e21e
LW
2926 break;
2927 default: {
2928 char *t;
3280af22
NIS
2929 if (PL_oldoldbufptr == PL_last_lop)
2930 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2931 else
3280af22 2932 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2933 s = skipspace(s);
09ecc4b6 2934 if (*s == '}')
a0d0e21e 2935 OPERATOR(HASHBRACK);
b8a4b1be
GS
2936 /* This hack serves to disambiguate a pair of curlies
2937 * as being a block or an anon hash. Normally, expectation
2938 * determines that, but in cases where we're not in a
2939 * position to expect anything in particular (like inside
2940 * eval"") we have to resolve the ambiguity. This code
2941 * covers the case where the first term in the curlies is a
2942 * quoted string. Most other cases need to be explicitly
2943 * disambiguated by prepending a `+' before the opening
2944 * curly in order to force resolution as an anon hash.
2945 *
2946 * XXX should probably propagate the outer expectation
2947 * into eval"" to rely less on this hack, but that could
2948 * potentially break current behavior of eval"".
2949 * GSAR 97-07-21
2950 */
2951 t = s;
2952 if (*s == '\'' || *s == '"' || *s == '`') {
2953 /* common case: get past first string, handling escapes */
3280af22 2954 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2955 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2956 t++;
2957 t++;
a0d0e21e 2958 }
b8a4b1be 2959 else if (*s == 'q') {
3280af22 2960 if (++t < PL_bufend
b8a4b1be 2961 && (!isALNUM(*t)
3280af22 2962 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
2963 && !isALNUM(*t))))
2964 {
b8a4b1be
GS
2965 char *tmps;
2966 char open, close, term;
2967 I32 brackets = 1;
2968
3280af22 2969 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
2970 t++;
2971 term = *t;
2972 open = term;
2973 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
2974 term = tmps[5];
2975 close = term;
2976 if (open == close)
3280af22
NIS
2977 for (t++; t < PL_bufend; t++) {
2978 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 2979 t++;
6d07e5e9 2980 else if (*t == open)
b8a4b1be
GS
2981 break;
2982 }
2983 else
3280af22
NIS
2984 for (t++; t < PL_bufend; t++) {
2985 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 2986 t++;
6d07e5e9 2987 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
2988 break;
2989 else if (*t == open)
2990 brackets++;
2991 }
2992 }
2993 t++;
a0d0e21e 2994 }
7e2040f0 2995 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 2996 t += UTF8SKIP(t);
7e2040f0 2997 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 2998 t += UTF8SKIP(t);
a0d0e21e 2999 }
3280af22 3000 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3001 t++;
b8a4b1be
GS
3002 /* if comma follows first term, call it an anon hash */
3003 /* XXX it could be a comma expression with loop modifiers */
3280af22 3004 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3005 || (*t == '=' && t[1] == '>')))
a0d0e21e 3006 OPERATOR(HASHBRACK);
3280af22 3007 if (PL_expect == XREF)
4e4e412b 3008 PL_expect = XTERM;
a0d0e21e 3009 else {
3280af22
NIS
3010 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3011 PL_expect = XSTATE;
a0d0e21e 3012 }
8990e307 3013 }
a0d0e21e 3014 break;
463ee0b2 3015 }
57843af0 3016 yylval.ival = CopLINE(PL_curcop);
79072805 3017 if (isSPACE(*s) || *s == '#')
3280af22 3018 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3019 TOKEN('{');
378cc40b 3020 case '}':
79072805
LW
3021 rightbracket:
3022 s++;
3280af22 3023 if (PL_lex_brackets <= 0)
d98d5fff 3024 yyerror("Unmatched right curly bracket");
463ee0b2 3025 else
3280af22
NIS
3026 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3027 if (PL_lex_brackets < PL_lex_formbrack)
3028 PL_lex_formbrack = 0;
3029 if (PL_lex_state == LEX_INTERPNORMAL) {
3030 if (PL_lex_brackets == 0) {
9059aa12
LW
3031 if (PL_expect & XFAKEBRACK) {
3032 PL_expect &= XENUMMASK;
3280af22
NIS
3033 PL_lex_state = LEX_INTERPEND;
3034 PL_bufptr = s;
cea2e8a9 3035 return yylex(); /* ignore fake brackets */
79072805 3036 }
fa83b5b6 3037 if (*s == '-' && s[1] == '>')
3280af22 3038 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3039 else if (*s != '[' && *s != '{')
3280af22 3040 PL_lex_state = LEX_INTERPEND;
79072805
LW
3041 }
3042 }
9059aa12
LW
3043 if (PL_expect & XFAKEBRACK) {
3044 PL_expect &= XENUMMASK;
3280af22 3045 PL_bufptr = s;
cea2e8a9 3046 return yylex(); /* ignore fake brackets */
748a9306 3047 }
79072805
LW
3048 force_next('}');
3049 TOKEN(';');
378cc40b
LW
3050 case '&':
3051 s++;
3052 tmp = *s++;
3053 if (tmp == '&')
a0d0e21e 3054 AOPERATOR(ANDAND);
378cc40b 3055 s--;
3280af22 3056 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3057 if (ckWARN(WARN_SEMICOLON)
3058 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3059 {
57843af0 3060 CopLINE_dec(PL_curcop);
cea2e8a9 3061 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3062 CopLINE_inc(PL_curcop);
463ee0b2 3063 }
79072805 3064 BAop(OP_BIT_AND);
463ee0b2 3065 }
79072805 3066
3280af22
NIS
3067 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3068 if (*PL_tokenbuf) {
3069 PL_expect = XOPERATOR;
3070 force_ident(PL_tokenbuf, '&');
463ee0b2 3071 }
79072805
LW
3072 else
3073 PREREF('&');
c07a80fd 3074 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3075 TERM('&');
3076
378cc40b
LW
3077 case '|':
3078 s++;
3079 tmp = *s++;
3080 if (tmp == '|')
a0d0e21e 3081 AOPERATOR(OROR);
378cc40b 3082 s--;
79072805 3083 BOop(OP_BIT_OR);
378cc40b
LW
3084 case '=':
3085 s++;
3086 tmp = *s++;
3087 if (tmp == '=')
79072805
LW
3088 Eop(OP_EQ);
3089 if (tmp == '>')
3090 OPERATOR(',');
378cc40b 3091 if (tmp == '~')
79072805 3092 PMop(OP_MATCH);
599cee73 3093 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3094 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3095 s--;
3280af22
NIS
3096 if (PL_expect == XSTATE && isALPHA(tmp) &&
3097 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3098 {
3280af22
NIS
3099 if (PL_in_eval && !PL_rsfp) {
3100 d = PL_bufend;
a5f75d66
AD
3101 while (s < d) {
3102 if (*s++ == '\n') {
3103 incline(s);
3104 if (strnEQ(s,"=cut",4)) {
3105 s = strchr(s,'\n');
3106 if (s)
3107 s++;
3108 else
3109 s = d;
3110 incline(s);
3111 goto retry;
3112 }
3113 }
3114 }
3115 goto retry;
3116 }
3280af22
NIS
3117 s = PL_bufend;
3118 PL_doextract = TRUE;
a0d0e21e
LW
3119 goto retry;
3120 }
3280af22 3121 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3122 char *t;
51882d45 3123#ifdef PERL_STRICT_CR
a0d0e21e 3124 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
3125#else
3126 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
3127#endif
a0d0e21e
LW
3128 if (*t == '\n' || *t == '#') {
3129 s--;
3280af22 3130 PL_expect = XBLOCK;
a0d0e21e
LW
3131 goto leftbracket;
3132 }
79072805 3133 }
a0d0e21e
LW
3134 yylval.ival = 0;
3135 OPERATOR(ASSIGNOP);
378cc40b
LW
3136 case '!':
3137 s++;
3138 tmp = *s++;
3139 if (tmp == '=')
79072805 3140 Eop(OP_NE);
378cc40b 3141 if (tmp == '~')
79072805 3142 PMop(OP_NOT);
378cc40b
LW
3143 s--;
3144 OPERATOR('!');
3145 case '<':
3280af22 3146 if (PL_expect != XOPERATOR) {
93a17b20 3147 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3148 check_uni();
79072805
LW
3149 if (s[1] == '<')
3150 s = scan_heredoc(s);
3151 else
3152 s = scan_inputsymbol(s);
3153 TERM(sublex_start());
378cc40b
LW
3154 }
3155 s++;
3156 tmp = *s++;
3157 if (tmp == '<')
79072805 3158 SHop(OP_LEFT_SHIFT);
395c3793
LW
3159 if (tmp == '=') {
3160 tmp = *s++;
3161 if (tmp == '>')
79072805 3162 Eop(OP_NCMP);
395c3793 3163 s--;
79072805 3164 Rop(OP_LE);
395c3793 3165 }
378cc40b 3166 s--;
79072805 3167 Rop(OP_LT);
378cc40b
LW
3168 case '>':
3169 s++;
3170 tmp = *s++;
3171 if (tmp == '>')
79072805 3172 SHop(OP_RIGHT_SHIFT);
378cc40b 3173 if (tmp == '=')
79072805 3174 Rop(OP_GE);
378cc40b 3175 s--;
79072805 3176 Rop(OP_GT);
378cc40b
LW
3177
3178 case '$':
bbce6d69
PP
3179 CLINE;
3180
3280af22
NIS
3181 if (PL_expect == XOPERATOR) {
3182 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3183 PL_expect = XTERM;
a0d0e21e 3184 depcom();
bbce6d69 3185 return ','; /* grandfather non-comma-format format */
a0d0e21e 3186 }
8990e307 3187 }
a0d0e21e 3188
7e2040f0 3189 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3190 PL_tokenbuf[0] = '@';
376b8730
SM
3191 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3192 sizeof PL_tokenbuf - 1, FALSE);
3193 if (PL_expect == XOPERATOR)
3194 no_op("Array length", s);
3280af22 3195 if (!PL_tokenbuf[1])
a0d0e21e 3196 PREREF(DOLSHARP);
3280af22
NIS
3197 PL_expect = XOPERATOR;
3198 PL_pending_ident = '#';
463ee0b2 3199 TOKEN(DOLSHARP);
79072805 3200 }
bbce6d69 3201
3280af22 3202 PL_tokenbuf[0] = '$';
376b8730
SM
3203 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3204 sizeof PL_tokenbuf - 1, FALSE);
3205 if (PL_expect == XOPERATOR)
3206 no_op("Scalar", s);
3280af22
NIS
3207 if (!PL_tokenbuf[1]) {
3208 if (s == PL_bufend)
bbce6d69
PP
3209 yyerror("Final $ should be \\$ or $name");
3210 PREREF('$');
8990e307 3211 }
a0d0e21e 3212
bbce6d69 3213 /* This kludge not intended to be bulletproof. */
3280af22 3214 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3215 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3216 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69
PP
3217 yylval.opval->op_private = OPpCONST_ARYBASE;
3218 TERM(THING);
3219 }
3220
ff68c719 3221 d = s;
69d2bceb 3222 tmp = (I32)*s;
3280af22 3223 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3224 s = skipspace(s);
3225
3280af22 3226 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3227 char *t;
3228 if (*s == '[') {
3280af22 3229 PL_tokenbuf[0] = '@';
599cee73 3230 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3231 for(t = s + 1;
7e2040f0 3232 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3233 t++) ;
a0d0e21e 3234 if (*t++ == ',') {
3280af22
NIS
3235 PL_bufptr = skipspace(PL_bufptr);
3236 while (t < PL_bufend && *t != ']')
bbce6d69 3237 t++;
cea2e8a9 3238 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3239 "Multidimensional syntax %.*s not supported",
3240 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3241 }
3242 }
bbce6d69
PP
3243 }
3244 else if (*s == '{') {
3280af22 3245 PL_tokenbuf[0] = '%';
599cee73 3246 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3247 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3248 {
3280af22 3249 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3250 STRLEN len;
3251 for (t++; isSPACE(*t); t++) ;
7e2040f0 3252 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3253 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3254 for (; isSPACE(*t); t++) ;
864dbfa3 3255 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3256 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3257 "You need to quote \"%s\"", tmpbuf);
748a9306 3258 }
93a17b20
LW
3259 }
3260 }
2f3197b3 3261 }
bbce6d69 3262
3280af22 3263 PL_expect = XOPERATOR;
69d2bceb 3264 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3265 bool islop = (PL_last_lop == PL_oldoldbufptr);
3266 if (!islop || PL_last_lop_op == OP_GREPSTART)
3267 PL_expect = XOPERATOR;
bbce6d69 3268 else if (strchr("$@\"'`q", *s))
3280af22 3269 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3270 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3271 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3272 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3273 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3274 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3275 if (tmp = keyword(tmpbuf, len)) {
3276 /* binary operators exclude handle interpretations */
3277 switch (tmp) {
3278 case -KEY_x:
3279 case -KEY_eq:
3280 case -KEY_ne:
3281 case -KEY_gt:
3282 case -KEY_lt:
3283 case -KEY_ge:
3284 case -KEY_le:
3285 case -KEY_cmp:
3286 break;
3287 default:
3280af22 3288 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3289 break;
3290 }
3291 }
68dc0745
PP
3292 else {
3293 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3294 if (gv && GvCVu(gv))
3280af22 3295 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3296 }
93a17b20 3297 }
bbce6d69 3298 else if (isDIGIT(*s))
3280af22 3299 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3300 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3301 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3302 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3303 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3304 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3305 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3306 }
3280af22 3307 PL_pending_ident = '$';
79072805 3308 TOKEN('$');
378cc40b
LW
3309
3310 case '@':
3280af22 3311 if (PL_expect == XOPERATOR)
bbce6d69 3312 no_op("Array", s);
3280af22
NIS
3313 PL_tokenbuf[0] = '@';
3314 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3315 if (!PL_tokenbuf[1]) {
3316 if (s == PL_bufend)
bbce6d69
PP
3317 yyerror("Final @ should be \\@ or @name");
3318 PREREF('@');
3319 }
3280af22 3320 if (PL_lex_state == LEX_NORMAL)
ff68c719 3321 s = skipspace(s);
3280af22 3322 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3323 if (*s == '{')
3280af22 3324 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3325
3326 /* Warn about @ where they meant $. */
599cee73 3327 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3328 if (*s == '[' || *s == '{') {
3329 char *t = s + 1;
7e2040f0 3330 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3331 t++;
3332 if (*t == '}' || *t == ']') {
3333 t++;
3280af22 3334 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3335 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3336 "Scalar value %.*s better written as $%.*s",
3280af22 3337 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3338 }
93a17b20
LW
3339 }
3340 }
463ee0b2 3341 }
3280af22 3342 PL_pending_ident = '@';
79072805 3343 TERM('@');
378cc40b
LW
3344
3345 case '/': /* may either be division or pattern */
3346 case '?': /* may either be conditional or pattern */
3280af22 3347 if (PL_expect != XOPERATOR) {
c277df42 3348 /* Disable warning on "study /blah/" */
3280af22
NIS
3349 if (PL_oldoldbufptr == PL_last_uni
3350 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3351 || memNE(PL_last_uni, "study", 5)
3352 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3353 check_uni();
8782bef2 3354 s = scan_pat(s,OP_MATCH);
79072805 3355 TERM(sublex_start());
378cc40b
LW
3356 }
3357 tmp = *s++;
a687059c 3358 if (tmp == '/')
79072805 3359 Mop(OP_DIVIDE);
378cc40b
LW
3360 OPERATOR(tmp);
3361
3362 case '.':
51882d45
GS
3363 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3364#ifdef PERL_STRICT_CR
3365 && s[1] == '\n'
3366#else
3367 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3368#endif
3369 && (s == PL_linestart || s[-1] == '\n') )
3370 {
3280af22
NIS
3371 PL_lex_formbrack = 0;
3372 PL_expect = XSTATE;
79072805
LW
3373 goto rightbracket;
3374 }
3280af22 3375 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3376 tmp = *s++;
a687059c
LW
3377 if (*s == tmp) {
3378 s++;
2f3197b3
LW
3379 if (*s == tmp) {
3380 s++;
79072805 3381 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3382 }
3383 else
79072805 3384 yylval.ival = 0;
378cc40b 3385 OPERATOR(DOTDOT);
a687059c 3386 }
3280af22 3387 if (PL_expect != XOPERATOR)
2f3197b3 3388 check_uni();
79072805 3389 Aop(OP_CONCAT);
378cc40b
LW
3390 }
3391 /* FALL THROUGH */
3392 case '0': case '1': case '2': case '3': case '4':
3393 case '5': case '6': case '7': case '8': case '9':
79072805 3394 s = scan_num(s);
3280af22 3395 if (PL_expect == XOPERATOR)
8990e307 3396 no_op("Number",s);
79072805
LW
3397 TERM(THING);
3398
3399 case '\'':
09bef843 3400 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3401 if (PL_expect == XOPERATOR) {
3402 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3403 PL_expect = XTERM;
a0d0e21e
LW
3404 depcom();
3405 return ','; /* grandfather non-comma-format format */
3406 }
463ee0b2 3407 else
8990e307 3408 no_op("String",s);
463ee0b2 3409 }
79072805 3410 if (!s)
85e6fe83 3411 missingterm((char*)0);
79072805
LW
3412 yylval.ival = OP_CONST;
3413 TERM(sublex_start());
3414
3415 case '"':
09bef843 3416 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3417 if (PL_expect == XOPERATOR) {
3418 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3419 PL_expect = XTERM;
a0d0e21e
LW
3420 depcom();
3421 return ','; /* grandfather non-comma-format format */
3422 }
463ee0b2 3423 else
8990e307 3424 no_op("String",s);
463ee0b2 3425 }
79072805 3426 if (!s)
85e6fe83 3427 missingterm((char*)0);
4633a7c4 3428 yylval.ival = OP_CONST;
3280af22 3429 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3430 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3431 yylval.ival = OP_STRINGIFY;
3432 break;
3433 }
3434 }
79072805
LW
3435 TERM(sublex_start());
3436
3437 case '`':
09bef843 3438 s = scan_str(s,FALSE,FALSE);
3280af22 3439 if (PL_expect == XOPERATOR)
8990e307 3440 no_op("Backticks",s);
79072805 3441 if (!s)
85e6fe83 3442 missingterm((char*)0);
79072805
LW
3443 yylval.ival = OP_BACKTICK;
3444 set_csh();
3445 TERM(sublex_start());
3446
3447 case '\\':
3448 s++;
599cee73 3449 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3450 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3451 *s, *s);
3280af22 3452 if (PL_expect == XOPERATOR)
8990e307 3453 no_op("Backslash",s);
79072805
LW
3454 OPERATOR(REFGEN);
3455
a7cb1f99
GS
3456 case 'v':
3457 if (isDIGIT(s[1]) && PL_expect == XTERM) {
3458 char *start = s;
3459 start++;
3460 start++;
3461 while (isDIGIT(*start))
3462 start++;
3463 if (*start == '.' && isDIGIT(start[1])) {
3464 s = scan_num(s);
3465 TERM(THING);
3466 }
3467 }
3468 goto keylookup;
79072805 3469 case 'x':
3280af22 3470 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3471 s++;
3472 Mop(OP_REPEAT);
2f3197b3 3473 }
79072805
LW
3474 goto keylookup;
3475
378cc40b 3476 case '_':
79072805
LW
3477 case 'a': case 'A':
3478 case 'b': case 'B':
3479 case 'c': case 'C':
3480 case 'd': case 'D':
3481 case 'e': case 'E':
3482 case 'f': case 'F':
3483 case 'g': case 'G':
3484 case 'h': case 'H':
3485 case 'i': case 'I':
3486 case 'j': case 'J':
3487 case 'k': case 'K':
3488 case 'l': case 'L':
3489 case 'm': case 'M':
3490 case 'n': case 'N':
3491 case 'o': case 'O':
3492 case 'p': case 'P':
3493 case 'q': case 'Q':
3494 case 'r': case 'R':
3495 case 's': case 'S':
3496 case 't': case 'T':
3497 case 'u': case 'U':
a7cb1f99 3498 case 'V':
79072805
LW
3499 case 'w': case 'W':
3500 case 'X':
3501 case 'y': case 'Y':
3502 case 'z': case 'Z':
3503
49dc05e3 3504 keylookup: {
2d8e6c8d 3505 STRLEN n_a;
161b471a
NIS
3506 gv = Nullgv;
3507 gvp = 0;
49dc05e3 3508
3280af22
NIS
3509 PL_bufptr = s;
3510 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01
PP
3511
3512 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3513 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3514 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3515 (PL_tokenbuf[0] == 'q' &&
3516 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01
PP
3517
3518 /* x::* is just a word, unless x is "CORE" */
3280af22 3519 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3520 goto just_a_word;
3521
3643fb5f 3522 d = s;
3280af22 3523 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3524 d++; /* no comments skipped here, or s### is misparsed */
3525
3526 /* Is this a label? */
3280af22
NIS
3527 if (!tmp && PL_expect == XSTATE
3528 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3529 s = d + 1;
3280af22 3530 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01
PP
3531 CLINE;
3532 TOKEN(LABEL);
3643fb5f
CS
3533 }
3534
3535 /* Check for keywords */
3280af22 3536 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3537
3538 /* Is this a word before a => operator? */
748a9306
LW
3539 if (strnEQ(d,"=>",2)) {
3540 CLINE;
3280af22 3541 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3542 yylval.opval->op_private = OPpCONST_BARE;
3543 TERM(WORD);
3544 }
3545
a0d0e21e 3546 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3547 GV *ogv = Nullgv; /* override (winner) */
3548 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3549 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3550 CV *cv;
3280af22 3551 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3552 (cv = GvCVu(gv)))
3553 {
3554 if (GvIMPORTED_CV(gv))
3555 ogv = gv;
3556 else if (! CvMETHOD(cv))
3557 hgv = gv;
3558 }
3559 if (!ogv &&
3280af22
NIS
3560 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3561 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3562 GvCVu(gv) && GvIMPORTED_CV(gv))
3563 {
3564 ogv = gv;
3565 }
3566 }
3567 if (ogv) {
3568 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3569 }
3570 else if (gv && !gvp
3571 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3572 && GvCVu(gv)
3280af22 3573 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3574 {
3575 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3576 }
56f7f34b
CS
3577 else { /* no override */
3578 tmp = -tmp;
3579 gv = Nullgv;
3580 gvp = 0;
4944e2f7
GS
3581 if (ckWARN(WARN_AMBIGUOUS) && hgv
3582 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3583 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3584 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3585 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3586 }
a0d0e21e
LW
3587 }
3588
3589 reserved_word:
3590 switch (tmp) {
79072805
LW
3591
3592 default: /* not a keyword */