This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
fix parse error on C<{ use strict }> and other constructs that
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4eb8286e 3 * Copyright (c) 1991-1999, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
51371543 31
9059aa12
LW
32#define XFAKEBRACK 128
33#define XENUMMASK 127
34
7e2040f0 35/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
36#define UTF (PL_hints & HINT_UTF8)
37
2b92dfce
GS
38/* In variables name $^X, these are the legal values for X.
39 * 1999-02-27 mjd-perl-patch@plover.com */
40#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
41
ffb4593c
NT
42/* LEX_* are values for PL_lex_state, the state of the lexer.
43 * They are arranged oddly so that the guard on the switch statement
79072805
LW
44 * can get by with a single comparison (if the compiler is smart enough).
45 */
46
fb73857a
PP
47/* #define LEX_NOTPARSING 11 is done in perl.h. */
48
55497cff
PP
49#define LEX_NORMAL 10
50#define LEX_INTERPNORMAL 9
51#define LEX_INTERPCASEMOD 8
52#define LEX_INTERPPUSH 7
53#define LEX_INTERPSTART 6
54#define LEX_INTERPEND 5
55#define LEX_INTERPENDMAYBE 4
56#define LEX_INTERPCONCAT 3
57#define LEX_INTERPCONST 2
58#define LEX_FORMLINE 1
59#define LEX_KNOWNEXT 0
79072805 60
395c3793
LW
61#ifdef I_FCNTL
62#include <fcntl.h>
63#endif
fe14fcc3
LW
64#ifdef I_SYS_FILE
65#include <sys/file.h>
66#endif
395c3793 67
a790bc05
PP
68/* XXX If this causes problems, set i_unistd=undef in the hint file. */
69#ifdef I_UNISTD
70# include <unistd.h> /* Needed for execv() */
71#endif
72
73
79072805
LW
74#ifdef ff_next
75#undef ff_next
d48672a2
LW
76#endif
77
a1a0e61e
TD
78#ifdef USE_PURE_BISON
79YYSTYPE* yylval_pointer = NULL;
80int* yychar_pointer = NULL;
22c35a8c
GS
81# undef yylval
82# undef yychar
e4bfbdd4
JH
83# define yylval (*yylval_pointer)
84# define yychar (*yychar_pointer)
85# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
86# undef yylex
87# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
88#endif
89
79072805 90#include "keywords.h"
fe14fcc3 91
ffb4593c
NT
92/* CLINE is a macro that ensures PL_copline has a sane value */
93
ae986130
LW
94#ifdef CLINE
95#undef CLINE
96#endif
57843af0 97#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 98
ffb4593c
NT
99/*
100 * Convenience functions to return different tokens and prime the
9cbb5ea2 101 * lexer for the next token. They all take an argument.
ffb4593c
NT
102 *
103 * TOKEN : generic token (used for '(', DOLSHARP, etc)
104 * OPERATOR : generic operator
105 * AOPERATOR : assignment operator
106 * PREBLOCK : beginning the block after an if, while, foreach, ...
107 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
108 * PREREF : *EXPR where EXPR is not a simple identifier
109 * TERM : expression term
110 * LOOPX : loop exiting command (goto, last, dump, etc)
111 * FTST : file test operator
112 * FUN0 : zero-argument function
2d2e263d 113 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
114 * BOop : bitwise or or xor
115 * BAop : bitwise and
116 * SHop : shift operator
117 * PWop : power operator
9cbb5ea2 118 * PMop : pattern-matching operator
ffb4593c
NT
119 * Aop : addition-level operator
120 * Mop : multiplication-level operator
121 * Eop : equality-testing operator
122 * Rop : relational operator <= != gt
123 *
124 * Also see LOP and lop() below.
125 */
126
3280af22
NIS
127#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
128#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
129#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
130#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
131#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
132#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
133#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
134#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
135#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
136#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
137#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
138#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
139#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
140#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
141#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
142#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
143#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
144#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
145#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
146#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 147
a687059c
LW
148/* This bit of chicanery makes a unary function followed by
149 * a parenthesis into a function with one argument, highest precedence.
150 */
2f3197b3 151#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
152 PL_expect = XTERM, \
153 PL_bufptr = s, \
154 PL_last_uni = PL_oldbufptr, \
155 PL_last_lop_op = f, \
a687059c
LW
156 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
157
79072805 158#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
159 PL_bufptr = s, \
160 PL_last_uni = PL_oldbufptr, \
79072805
LW
161 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
162
9f68db38 163/* grandfather return to old style */
3280af22 164#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 165
ffb4593c
NT
166/*
167 * S_ao
168 *
169 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
170 * into an OP_ANDASSIGN or OP_ORASSIGN
171 */
172
76e3520e 173STATIC int
cea2e8a9 174S_ao(pTHX_ int toketype)
a0d0e21e 175{
3280af22
NIS
176 if (*PL_bufptr == '=') {
177 PL_bufptr++;
a0d0e21e
LW
178 if (toketype == ANDAND)
179 yylval.ival = OP_ANDASSIGN;
180 else if (toketype == OROR)
181 yylval.ival = OP_ORASSIGN;
182 toketype = ASSIGNOP;
183 }
184 return toketype;
185}
186
ffb4593c
NT
187/*
188 * S_no_op
189 * When Perl expects an operator and finds something else, no_op
190 * prints the warning. It always prints "<something> found where
191 * operator expected. It prints "Missing semicolon on previous line?"
192 * if the surprise occurs at the start of the line. "do you need to
193 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
194 * where the compiler doesn't know if foo is a method call or a function.
195 * It prints "Missing operator before end of line" if there's nothing
196 * after the missing operator, or "... before <...>" if there is something
197 * after the missing operator.
198 */
199
76e3520e 200STATIC void
cea2e8a9 201S_no_op(pTHX_ char *what, char *s)
463ee0b2 202{
3280af22
NIS
203 char *oldbp = PL_bufptr;
204 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 205
1189a94a
GS
206 if (!s)
207 s = oldbp;
208 else {
209 assert(s >= oldbp);
210 PL_bufptr = s;
211 }
cea2e8a9 212 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 213 if (is_first)
cea2e8a9 214 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 215 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 216 char *t;
7e2040f0 217 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 218 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 219 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 220 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
221 }
222 else
cea2e8a9 223 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 224 PL_bufptr = oldbp;
8990e307
LW
225}
226
ffb4593c
NT
227/*
228 * S_missingterm
229 * Complain about missing quote/regexp/heredoc terminator.
230 * If it's called with (char *)NULL then it cauterizes the line buffer.
231 * If we're in a delimited string and the delimiter is a control
232 * character, it's reformatted into a two-char sequence like ^C.
233 * This is fatal.
234 */
235
76e3520e 236STATIC void
cea2e8a9 237S_missingterm(pTHX_ char *s)
8990e307
LW
238{
239 char tmpbuf[3];
240 char q;
241 if (s) {
242 char *nl = strrchr(s,'\n');
d2719217 243 if (nl)
8990e307
LW
244 *nl = '\0';
245 }
9d116dd7
JH
246 else if (
247#ifdef EBCDIC
248 iscntrl(PL_multi_close)
249#else
250 PL_multi_close < 32 || PL_multi_close == 127
251#endif
252 ) {
8990e307 253 *tmpbuf = '^';
3280af22 254 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
255 s = "\\n";
256 tmpbuf[2] = '\0';
257 s = tmpbuf;
258 }
259 else {
3280af22 260 *tmpbuf = PL_multi_close;
8990e307
LW
261 tmpbuf[1] = '\0';
262 s = tmpbuf;
263 }
264 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 265 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 266}
79072805 267
ffb4593c
NT
268/*
269 * Perl_deprecate
ffb4593c
NT
270 */
271
79072805 272void
864dbfa3 273Perl_deprecate(pTHX_ char *s)
a0d0e21e 274{
d008e5eb 275 dTHR;
599cee73 276 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 277 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
278}
279
ffb4593c
NT
280/*
281 * depcom
9cbb5ea2 282 * Deprecate a comma-less variable list.
ffb4593c
NT
283 */
284
76e3520e 285STATIC void
cea2e8a9 286S_depcom(pTHX)
a0d0e21e
LW
287{
288 deprecate("comma-less variable list");
289}
290
ffb4593c 291/*
9cbb5ea2
GS
292 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
293 * utf16-to-utf8-reversed.
ffb4593c
NT
294 */
295
c39cd008
GS
296#ifdef PERL_CR_FILTER
297static void
298strip_return(SV *sv)
299{
300 register char *s = SvPVX(sv);
301 register char *e = s + SvCUR(sv);
302 /* outer loop optimized to do nothing if there are no CR-LFs */
303 while (s < e) {
304 if (*s++ == '\r' && *s == '\n') {
305 /* hit a CR-LF, need to copy the rest */
306 register char *d = s - 1;
307 *d++ = *s++;
308 while (s < e) {
309 if (*s == '\r' && s[1] == '\n')
310 s++;
311 *d++ = *s++;
312 }
313 SvCUR(sv) -= s - d;
314 return;
315 }
316 }
317}
a868473f 318
76e3520e 319STATIC I32
c39cd008 320S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 321{
c39cd008
GS
322 I32 count = FILTER_READ(idx+1, sv, maxlen);
323 if (count > 0 && !maxlen)
324 strip_return(sv);
325 return count;
a868473f
NIS
326}
327#endif
328
a0ed51b3 329STATIC I32
cea2e8a9 330S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
331{
332 I32 count = FILTER_READ(idx+1, sv, maxlen);
333 if (count) {
dfe13c55
GS
334 U8* tmps;
335 U8* tend;
336 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 337 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 338 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
339
340 }
341 return count;
342}
343
344STATIC I32
cea2e8a9 345S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
346{
347 I32 count = FILTER_READ(idx+1, sv, maxlen);
348 if (count) {
dfe13c55
GS
349 U8* tmps;
350 U8* tend;
351 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 352 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 353 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
354
355 }
356 return count;
357}
a868473f 358
ffb4593c
NT
359/*
360 * Perl_lex_start
9cbb5ea2
GS
361 * Initialize variables. Uses the Perl save_stack to save its state (for
362 * recursive calls to the parser).
ffb4593c
NT
363 */
364
a0d0e21e 365void
864dbfa3 366Perl_lex_start(pTHX_ SV *line)
79072805 367{
0f15f207 368 dTHR;
8990e307
LW
369 char *s;
370 STRLEN len;
371
3280af22
NIS
372 SAVEI32(PL_lex_dojoin);
373 SAVEI32(PL_lex_brackets);
3280af22
NIS
374 SAVEI32(PL_lex_casemods);
375 SAVEI32(PL_lex_starts);
376 SAVEI32(PL_lex_state);
7766f137 377 SAVEVPTR(PL_lex_inpat);
3280af22 378 SAVEI32(PL_lex_inwhat);
18b09519
GS
379 if (PL_lex_state == LEX_KNOWNEXT) {
380 I32 toke = PL_nexttoke;
381 while (--toke >= 0) {
382 SAVEI32(PL_nexttype[toke]);
383 SAVEVPTR(PL_nextval[toke]);
384 }
385 SAVEI32(PL_nexttoke);
386 PL_nexttoke = 0;
387 }
57843af0 388 SAVECOPLINE(PL_curcop);
3280af22
NIS
389 SAVEPPTR(PL_bufptr);
390 SAVEPPTR(PL_bufend);
391 SAVEPPTR(PL_oldbufptr);
392 SAVEPPTR(PL_oldoldbufptr);
393 SAVEPPTR(PL_linestart);
394 SAVESPTR(PL_linestr);
395 SAVEPPTR(PL_lex_brackstack);
396 SAVEPPTR(PL_lex_casestack);
c76ac1ee 397 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
398 SAVESPTR(PL_lex_stuff);
399 SAVEI32(PL_lex_defer);
09bef843 400 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 401 SAVESPTR(PL_lex_repl);
bebdddfc
GS
402 SAVEINT(PL_expect);
403 SAVEINT(PL_lex_expect);
3280af22
NIS
404
405 PL_lex_state = LEX_NORMAL;
406 PL_lex_defer = 0;
407 PL_expect = XSTATE;
408 PL_lex_brackets = 0;
3280af22
NIS
409 New(899, PL_lex_brackstack, 120, char);
410 New(899, PL_lex_casestack, 12, char);
411 SAVEFREEPV(PL_lex_brackstack);
412 SAVEFREEPV(PL_lex_casestack);
413 PL_lex_casemods = 0;
414 *PL_lex_casestack = '\0';
415 PL_lex_dojoin = 0;
416 PL_lex_starts = 0;
417 PL_lex_stuff = Nullsv;
418 PL_lex_repl = Nullsv;
419 PL_lex_inpat = 0;
420 PL_lex_inwhat = 0;
09bef843 421 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
422 PL_linestr = line;
423 if (SvREADONLY(PL_linestr))
424 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
425 s = SvPV(PL_linestr, len);
8990e307 426 if (len && s[len-1] != ';') {
3280af22
NIS
427 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
428 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
429 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 430 }
3280af22
NIS
431 SvTEMP_off(PL_linestr);
432 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
433 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
434 SvREFCNT_dec(PL_rs);
79cb57f6 435 PL_rs = newSVpvn("\n", 1);
3280af22 436 PL_rsfp = 0;
79072805 437}
a687059c 438
ffb4593c
NT
439/*
440 * Perl_lex_end
9cbb5ea2
GS
441 * Finalizer for lexing operations. Must be called when the parser is
442 * done with the lexer.
ffb4593c
NT
443 */
444
463ee0b2 445void
864dbfa3 446Perl_lex_end(pTHX)
463ee0b2 447{
3280af22 448 PL_doextract = FALSE;
463ee0b2
LW
449}
450
ffb4593c
NT
451/*
452 * S_incline
453 * This subroutine has nothing to do with tilting, whether at windmills
454 * or pinball tables. Its name is short for "increment line". It
57843af0 455 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 456 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
457 * # line 500 "foo.pm"
458 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
459 */
460
76e3520e 461STATIC void
cea2e8a9 462S_incline(pTHX_ char *s)
463ee0b2 463{
0f15f207 464 dTHR;
463ee0b2
LW
465 char *t;
466 char *n;
467 char ch;
468 int sawline = 0;
469
57843af0 470 CopLINE_inc(PL_curcop);
463ee0b2
LW
471 if (*s++ != '#')
472 return;
473 while (*s == ' ' || *s == '\t') s++;
474 if (strnEQ(s, "line ", 5)) {
475 s += 5;
476 sawline = 1;
477 }
478 if (!isDIGIT(*s))
479 return;
480 n = s;
481 while (isDIGIT(*s))
482 s++;
483 while (*s == ' ' || *s == '\t')
484 s++;
485 if (*s == '"' && (t = strchr(s+1, '"')))
486 s++;
487 else {
488 if (!sawline)
489 return; /* false alarm */
490 for (t = s; !isSPACE(*t); t++) ;
491 }
492 ch = *t;
493 *t = '\0';
494 if (t - s > 0)
57843af0 495 CopFILE_set(PL_curcop, s);
463ee0b2 496 *t = ch;
57843af0 497 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
498}
499
ffb4593c
NT
500/*
501 * S_skipspace
502 * Called to gobble the appropriate amount and type of whitespace.
503 * Skips comments as well.
504 */
505
76e3520e 506STATIC char *
cea2e8a9 507S_skipspace(pTHX_ register char *s)
a687059c 508{
11343788 509 dTHR;
3280af22
NIS
510 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
511 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
512 s++;
513 return s;
514 }
515 for (;;) {
fd049845 516 STRLEN prevlen;
09bef843
SB
517 SSize_t oldprevlen, oldoldprevlen;
518 SSize_t oldloplen, oldunilen;
60e6418e
GS
519 while (s < PL_bufend && isSPACE(*s)) {
520 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
521 incline(s);
522 }
ffb4593c
NT
523
524 /* comment */
3280af22
NIS
525 if (s < PL_bufend && *s == '#') {
526 while (s < PL_bufend && *s != '\n')
463ee0b2 527 s++;
60e6418e 528 if (s < PL_bufend) {
463ee0b2 529 s++;
60e6418e
GS
530 if (PL_in_eval && !PL_rsfp) {
531 incline(s);
532 continue;
533 }
534 }
463ee0b2 535 }
ffb4593c
NT
536
537 /* only continue to recharge the buffer if we're at the end
538 * of the buffer, we're not reading from a source filter, and
539 * we're in normal lexing mode
540 */
09bef843
SB
541 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
542 PL_lex_state == LEX_FORMLINE)
463ee0b2 543 return s;
ffb4593c
NT
544
545 /* try to recharge the buffer */
9cbb5ea2
GS
546 if ((s = filter_gets(PL_linestr, PL_rsfp,
547 (prevlen = SvCUR(PL_linestr)))) == Nullch)
548 {
549 /* end of file. Add on the -p or -n magic */
3280af22
NIS
550 if (PL_minus_n || PL_minus_p) {
551 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
552 ";}continue{print or die qq(-p destination: $!\\n)" :
553 "");
3280af22
NIS
554 sv_catpv(PL_linestr,";}");
555 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
556 }
557 else
3280af22 558 sv_setpv(PL_linestr,";");
ffb4593c
NT
559
560 /* reset variables for next time we lex */
9cbb5ea2
GS
561 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
562 = SvPVX(PL_linestr);
3280af22 563 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
564
565 /* Close the filehandle. Could be from -P preprocessor,
566 * STDIN, or a regular file. If we were reading code from
567 * STDIN (because the commandline held no -e or filename)
568 * then we don't close it, we reset it so the code can
569 * read from STDIN too.
570 */
571
3280af22
NIS
572 if (PL_preprocess && !PL_in_eval)
573 (void)PerlProc_pclose(PL_rsfp);
574 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
575 PerlIO_clearerr(PL_rsfp);
8990e307 576 else
3280af22
NIS
577 (void)PerlIO_close(PL_rsfp);
578 PL_rsfp = Nullfp;
463ee0b2
LW
579 return s;
580 }
ffb4593c
NT
581
582 /* not at end of file, so we only read another line */
09bef843
SB
583 /* make corresponding updates to old pointers, for yyerror() */
584 oldprevlen = PL_oldbufptr - PL_bufend;
585 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
586 if (PL_last_uni)
587 oldunilen = PL_last_uni - PL_bufend;
588 if (PL_last_lop)
589 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
590 PL_linestart = PL_bufptr = s + prevlen;
591 PL_bufend = s + SvCUR(PL_linestr);
592 s = PL_bufptr;
09bef843
SB
593 PL_oldbufptr = s + oldprevlen;
594 PL_oldoldbufptr = s + oldoldprevlen;
595 if (PL_last_uni)
596 PL_last_uni = s + oldunilen;
597 if (PL_last_lop)
598 PL_last_lop = s + oldloplen;
a0d0e21e 599 incline(s);
ffb4593c
NT
600
601 /* debugger active and we're not compiling the debugger code,
602 * so store the line into the debugger's array of lines
603 */
3280af22 604 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
605 SV *sv = NEWSV(85,0);
606
607 sv_upgrade(sv, SVt_PVMG);
3280af22 608 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 609 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 610 }
463ee0b2 611 }
a687059c 612}
378cc40b 613
ffb4593c
NT
614/*
615 * S_check_uni
616 * Check the unary operators to ensure there's no ambiguity in how they're
617 * used. An ambiguous piece of code would be:
618 * rand + 5
619 * This doesn't mean rand() + 5. Because rand() is a unary operator,
620 * the +5 is its argument.
621 */
622
76e3520e 623STATIC void
cea2e8a9 624S_check_uni(pTHX)
ba106d47 625{
2f3197b3 626 char *s;
a0d0e21e 627 char *t;
0453d815 628 dTHR;
2f3197b3 629
3280af22 630 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 631 return;
3280af22
NIS
632 while (isSPACE(*PL_last_uni))
633 PL_last_uni++;
7e2040f0 634 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 635 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 636 return;
0453d815 637 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 638 char ch = *s;
0453d815
PM
639 *s = '\0';
640 Perl_warner(aTHX_ WARN_AMBIGUOUS,
641 "Warning: Use of \"%s\" without parens is ambiguous",
642 PL_last_uni);
643 *s = ch;
644 }
2f3197b3
LW
645}
646
ffb4593c
NT
647/* workaround to replace the UNI() macro with a function. Only the
648 * hints/uts.sh file mentions this. Other comments elsewhere in the
649 * source indicate Microport Unix might need it too.
650 */
651
ffed7fef
LW
652#ifdef CRIPPLED_CC
653
654#undef UNI
ffed7fef 655#define UNI(f) return uni(f,s)
ffed7fef 656
76e3520e 657STATIC int
cea2e8a9 658S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
659{
660 yylval.ival = f;
3280af22
NIS
661 PL_expect = XTERM;
662 PL_bufptr = s;
8f872242
NIS
663 PL_last_uni = PL_oldbufptr;
664 PL_last_lop_op = f;
ffed7fef
LW
665 if (*s == '(')
666 return FUNC1;
667 s = skipspace(s);
668 if (*s == '(')
669 return FUNC1;
670 else
671 return UNIOP;
672}
673
a0d0e21e
LW
674#endif /* CRIPPLED_CC */
675
ffb4593c
NT
676/*
677 * LOP : macro to build a list operator. Its behaviour has been replaced
678 * with a subroutine, S_lop() for which LOP is just another name.
679 */
680
a0d0e21e
LW
681#define LOP(f,x) return lop(f,x,s)
682
ffb4593c
NT
683/*
684 * S_lop
685 * Build a list operator (or something that might be one). The rules:
686 * - if we have a next token, then it's a list operator [why?]
687 * - if the next thing is an opening paren, then it's a function
688 * - else it's a list operator
689 */
690
76e3520e 691STATIC I32
a0be28da 692S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 693{
0f15f207 694 dTHR;
79072805 695 yylval.ival = f;
35c8bce7 696 CLINE;
3280af22
NIS
697 PL_expect = x;
698 PL_bufptr = s;
699 PL_last_lop = PL_oldbufptr;
700 PL_last_lop_op = f;
701 if (PL_nexttoke)
a0d0e21e 702 return LSTOP;
79072805
LW
703 if (*s == '(')
704 return FUNC;
705 s = skipspace(s);
706 if (*s == '(')
707 return FUNC;
708 else
709 return LSTOP;
710}
711
ffb4593c
NT
712/*
713 * S_force_next
9cbb5ea2 714 * When the lexer realizes it knows the next token (for instance,
ffb4593c 715 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
716 * to know what token to return the next time the lexer is called. Caller
717 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
718 * handles the token correctly.
ffb4593c
NT
719 */
720
76e3520e 721STATIC void
cea2e8a9 722S_force_next(pTHX_ I32 type)
79072805 723{
3280af22
NIS
724 PL_nexttype[PL_nexttoke] = type;
725 PL_nexttoke++;
726 if (PL_lex_state != LEX_KNOWNEXT) {
727 PL_lex_defer = PL_lex_state;
728 PL_lex_expect = PL_expect;
729 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
730 }
731}
732
ffb4593c
NT
733/*
734 * S_force_word
735 * When the lexer knows the next thing is a word (for instance, it has
736 * just seen -> and it knows that the next char is a word char, then
737 * it calls S_force_word to stick the next word into the PL_next lookahead.
738 *
739 * Arguments:
b1b65b59 740 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
741 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
742 * int check_keyword : if true, Perl checks to make sure the word isn't
743 * a keyword (do this if the word is a label, e.g. goto FOO)
744 * int allow_pack : if true, : characters will also be allowed (require,
745 * use, etc. do this)
9cbb5ea2 746 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
747 */
748
76e3520e 749STATIC char *
cea2e8a9 750S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 751{
463ee0b2
LW
752 register char *s;
753 STRLEN len;
754
755 start = skipspace(start);
756 s = start;
7e2040f0 757 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 758 (allow_pack && *s == ':') ||
15f0808c 759 (allow_initial_tick && *s == '\'') )
a0d0e21e 760 {
3280af22
NIS
761 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
762 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
763 return start;
764 if (token == METHOD) {
765 s = skipspace(s);
766 if (*s == '(')
3280af22 767 PL_expect = XTERM;
463ee0b2 768 else {
3280af22 769 PL_expect = XOPERATOR;
463ee0b2 770 }
79072805 771 }
3280af22
NIS
772 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
773 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
774 force_next(token);
775 }
776 return s;
777}
778
ffb4593c
NT
779/*
780 * S_force_ident
9cbb5ea2 781 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
782 * text only contains the "foo" portion. The first argument is a pointer
783 * to the "foo", and the second argument is the type symbol to prefix.
784 * Forces the next token to be a "WORD".
9cbb5ea2 785 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
786 */
787
76e3520e 788STATIC void
cea2e8a9 789S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
790{
791 if (s && *s) {
11343788 792 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 793 PL_nextval[PL_nexttoke].opval = o;
79072805 794 force_next(WORD);
748a9306 795 if (kind) {
e858de61 796 dTHR; /* just for in_eval */
11343788 797 o->op_private = OPpCONST_ENTERED;
55497cff
PP
798 /* XXX see note in pp_entereval() for why we forgo typo
799 warnings if the symbol must be introduced in an eval.
800 GSAR 96-10-12 */
3280af22 801 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
802 kind == '$' ? SVt_PV :
803 kind == '@' ? SVt_PVAV :
804 kind == '%' ? SVt_PVHV :
805 SVt_PVGV
806 );
748a9306 807 }
79072805
LW
808 }
809}
810
ffb4593c
NT
811/*
812 * S_force_version
813 * Forces the next token to be a version number.
814 */
815
76e3520e 816STATIC char *
cea2e8a9 817S_force_version(pTHX_ char *s)
89bfa8cd
PP
818{
819 OP *version = Nullop;
820
821 s = skipspace(s);
822
a7cb1f99
GS
823 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
824 char *d = s;
825 if (*d == 'v')
826 d++;
827 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
828 if ((*d == ';' || isSPACE(*d)) && *(skipspace(d)) != ',') {
89bfa8cd
PP
829 s = scan_num(s);
830 /* real VERSION number -- GBARR */
831 version = yylval.opval;
832 }
833 }
834
835 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 836 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd
PP
837 force_next(WORD);
838
839 return (s);
840}
841
ffb4593c
NT
842/*
843 * S_tokeq
844 * Tokenize a quoted string passed in as an SV. It finds the next
845 * chunk, up to end of string or a backslash. It may make a new
846 * SV containing that chunk (if HINT_NEW_STRING is on). It also
847 * turns \\ into \.
848 */
849
76e3520e 850STATIC SV *
cea2e8a9 851S_tokeq(pTHX_ SV *sv)
79072805
LW
852{
853 register char *s;
854 register char *send;
855 register char *d;
b3ac6de7
IZ
856 STRLEN len = 0;
857 SV *pv = sv;
79072805
LW
858
859 if (!SvLEN(sv))
b3ac6de7 860 goto finish;
79072805 861
a0d0e21e 862 s = SvPV_force(sv, len);
748a9306 863 if (SvIVX(sv) == -1)
b3ac6de7 864 goto finish;
463ee0b2 865 send = s + len;
79072805
LW
866 while (s < send && *s != '\\')
867 s++;
868 if (s == send)
b3ac6de7 869 goto finish;
79072805 870 d = s;
3280af22 871 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 872 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
873 while (s < send) {
874 if (*s == '\\') {
a0d0e21e 875 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
876 s++; /* all that, just for this */
877 }
878 *d++ = *s++;
879 }
880 *d = '\0';
463ee0b2 881 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 882 finish:
3280af22 883 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 884 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
885 return sv;
886}
887
ffb4593c
NT
888/*
889 * Now come three functions related to double-quote context,
890 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
891 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
892 * interact with PL_lex_state, and create fake ( ... ) argument lists
893 * to handle functions and concatenation.
894 * They assume that whoever calls them will be setting up a fake
895 * join call, because each subthing puts a ',' after it. This lets
896 * "lower \luPpEr"
897 * become
898 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
899 *
900 * (I'm not sure whether the spurious commas at the end of lcfirst's
901 * arguments and join's arguments are created or not).
902 */
903
904/*
905 * S_sublex_start
906 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
907 *
908 * Pattern matching will set PL_lex_op to the pattern-matching op to
909 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
910 *
911 * OP_CONST and OP_READLINE are easy--just make the new op and return.
912 *
913 * Everything else becomes a FUNC.
914 *
915 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
916 * had an OP_CONST or OP_READLINE). This just sets us up for a
917 * call to S_sublex_push().
918 */
919
76e3520e 920STATIC I32
cea2e8a9 921S_sublex_start(pTHX)
79072805
LW
922{
923 register I32 op_type = yylval.ival;
79072805
LW
924
925 if (op_type == OP_NULL) {
3280af22
NIS
926 yylval.opval = PL_lex_op;
927 PL_lex_op = Nullop;
79072805
LW
928 return THING;
929 }
930 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 931 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
932
933 if (SvTYPE(sv) == SVt_PVIV) {
934 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
935 STRLEN len;
936 char *p;
937 SV *nsv;
938
939 p = SvPV(sv, len);
79cb57f6 940 nsv = newSVpvn(p, len);
b3ac6de7
IZ
941 SvREFCNT_dec(sv);
942 sv = nsv;
943 }
944 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 945 PL_lex_stuff = Nullsv;
79072805
LW
946 return THING;
947 }
948
3280af22
NIS
949 PL_sublex_info.super_state = PL_lex_state;
950 PL_sublex_info.sub_inwhat = op_type;
951 PL_sublex_info.sub_op = PL_lex_op;
952 PL_lex_state = LEX_INTERPPUSH;
55497cff 953
3280af22
NIS
954 PL_expect = XTERM;
955 if (PL_lex_op) {
956 yylval.opval = PL_lex_op;
957 PL_lex_op = Nullop;
55497cff
PP
958 return PMFUNC;
959 }
960 else
961 return FUNC;
962}
963
ffb4593c
NT
964/*
965 * S_sublex_push
966 * Create a new scope to save the lexing state. The scope will be
967 * ended in S_sublex_done. Returns a '(', starting the function arguments
968 * to the uc, lc, etc. found before.
969 * Sets PL_lex_state to LEX_INTERPCONCAT.
970 */
971
76e3520e 972STATIC I32
cea2e8a9 973S_sublex_push(pTHX)
55497cff 974{
0f15f207 975 dTHR;
f46d017c 976 ENTER;
55497cff 977
3280af22
NIS
978 PL_lex_state = PL_sublex_info.super_state;
979 SAVEI32(PL_lex_dojoin);
980 SAVEI32(PL_lex_brackets);
3280af22
NIS
981 SAVEI32(PL_lex_casemods);
982 SAVEI32(PL_lex_starts);
983 SAVEI32(PL_lex_state);
7766f137 984 SAVEVPTR(PL_lex_inpat);
3280af22 985 SAVEI32(PL_lex_inwhat);
57843af0 986 SAVECOPLINE(PL_curcop);
3280af22
NIS
987 SAVEPPTR(PL_bufptr);
988 SAVEPPTR(PL_oldbufptr);
989 SAVEPPTR(PL_oldoldbufptr);
990 SAVEPPTR(PL_linestart);
991 SAVESPTR(PL_linestr);
992 SAVEPPTR(PL_lex_brackstack);
993 SAVEPPTR(PL_lex_casestack);
994
995 PL_linestr = PL_lex_stuff;
996 PL_lex_stuff = Nullsv;
997
9cbb5ea2
GS
998 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
999 = SvPVX(PL_linestr);
3280af22
NIS
1000 PL_bufend += SvCUR(PL_linestr);
1001 SAVEFREESV(PL_linestr);
1002
1003 PL_lex_dojoin = FALSE;
1004 PL_lex_brackets = 0;
3280af22
NIS
1005 New(899, PL_lex_brackstack, 120, char);
1006 New(899, PL_lex_casestack, 12, char);
1007 SAVEFREEPV(PL_lex_brackstack);
1008 SAVEFREEPV(PL_lex_casestack);
1009 PL_lex_casemods = 0;
1010 *PL_lex_casestack = '\0';
1011 PL_lex_starts = 0;
1012 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1013 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1014
1015 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1016 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1017 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1018 else
3280af22 1019 PL_lex_inpat = Nullop;
79072805 1020
55497cff 1021 return '(';
79072805
LW
1022}
1023
ffb4593c
NT
1024/*
1025 * S_sublex_done
1026 * Restores lexer state after a S_sublex_push.
1027 */
1028
76e3520e 1029STATIC I32
cea2e8a9 1030S_sublex_done(pTHX)
79072805 1031{
3280af22
NIS
1032 if (!PL_lex_starts++) {
1033 PL_expect = XOPERATOR;
79cb57f6 1034 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1035 return THING;
1036 }
1037
3280af22
NIS
1038 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1039 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1040 return yylex();
79072805
LW
1041 }
1042
ffb4593c 1043 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1044 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1045 PL_linestr = PL_lex_repl;
1046 PL_lex_inpat = 0;
1047 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1048 PL_bufend += SvCUR(PL_linestr);
1049 SAVEFREESV(PL_linestr);
1050 PL_lex_dojoin = FALSE;
1051 PL_lex_brackets = 0;
3280af22
NIS
1052 PL_lex_casemods = 0;
1053 *PL_lex_casestack = '\0';
1054 PL_lex_starts = 0;
25da4f38 1055 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1056 PL_lex_state = LEX_INTERPNORMAL;
1057 PL_lex_starts++;
e9fa98b2
HS
1058 /* we don't clear PL_lex_repl here, so that we can check later
1059 whether this is an evalled subst; that means we rely on the
1060 logic to ensure sublex_done() is called again only via the
1061 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1062 }
e9fa98b2 1063 else {
3280af22 1064 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1065 PL_lex_repl = Nullsv;
1066 }
79072805 1067 return ',';
ffed7fef
LW
1068 }
1069 else {
f46d017c 1070 LEAVE;
3280af22
NIS
1071 PL_bufend = SvPVX(PL_linestr);
1072 PL_bufend += SvCUR(PL_linestr);
1073 PL_expect = XOPERATOR;
09bef843 1074 PL_sublex_info.sub_inwhat = 0;
79072805 1075 return ')';
ffed7fef
LW
1076 }
1077}
1078
02aa26ce
NT
1079/*
1080 scan_const
1081
1082 Extracts a pattern, double-quoted string, or transliteration. This
1083 is terrifying code.
1084
3280af22
NIS
1085 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1086 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1087 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1088
9b599b2a
GS
1089 Returns a pointer to the character scanned up to. Iff this is
1090 advanced from the start pointer supplied (ie if anything was
1091 successfully parsed), will leave an OP for the substring scanned
1092 in yylval. Caller must intuit reason for not parsing further
1093 by looking at the next characters herself.
1094
02aa26ce
NT
1095 In patterns:
1096 backslashes:
1097 double-quoted style: \r and \n
1098 regexp special ones: \D \s
1099 constants: \x3
1100 backrefs: \1 (deprecated in substitution replacements)
1101 case and quoting: \U \Q \E
1102 stops on @ and $, but not for $ as tail anchor
1103
1104 In transliterations:
1105 characters are VERY literal, except for - not at the start or end
1106 of the string, which indicates a range. scan_const expands the
1107 range to the full set of intermediate characters.
1108
1109 In double-quoted strings:
1110 backslashes:
1111 double-quoted style: \r and \n
1112 constants: \x3
1113 backrefs: \1 (deprecated)
1114 case and quoting: \U \Q \E
1115 stops on @ and $
1116
1117 scan_const does *not* construct ops to handle interpolated strings.
1118 It stops processing as soon as it finds an embedded $ or @ variable
1119 and leaves it to the caller to work out what's going on.
1120
1121 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1122
1123 $ in pattern could be $foo or could be tail anchor. Assumption:
1124 it's a tail anchor if $ is the last thing in the string, or if it's
1125 followed by one of ")| \n\t"
1126
1127 \1 (backreferences) are turned into $1
1128
1129 The structure of the code is
1130 while (there's a character to process) {
1131 handle transliteration ranges
1132 skip regexp comments
1133 skip # initiated comments in //x patterns
1134 check for embedded @foo
1135 check for embedded scalars
1136 if (backslash) {
1137 leave intact backslashes from leave (below)
1138 deprecate \1 in strings and sub replacements
1139 handle string-changing backslashes \l \U \Q \E, etc.
1140 switch (what was escaped) {
1141 handle - in a transliteration (becomes a literal -)
1142 handle \132 octal characters
1143 handle 0x15 hex characters
1144 handle \cV (control V)
1145 handle printf backslashes (\f, \r, \n, etc)
1146 } (end switch)
1147 } (end if backslash)
1148 } (end while character to read)
1149
1150*/
1151
76e3520e 1152STATIC char *
cea2e8a9 1153S_scan_const(pTHX_ char *start)
79072805 1154{
3280af22 1155 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1156 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1157 register char *s = start; /* start of the constant */
1158 register char *d = SvPVX(sv); /* destination for copies */
1159 bool dorange = FALSE; /* are we in a translit range? */
7e2040f0 1160 bool has_utf = FALSE; /* embedded \x{} */
02aa26ce 1161 I32 len; /* ? */
ac2262e3 1162 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1163 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1164 : UTF;
ac2262e3 1165 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1166 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1167 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1168 : UTF;
dff6d3cd 1169 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1170 PL_lex_inpat
4a2d328f 1171 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1172 : "";
79072805
LW
1173
1174 while (s < send || dorange) {
02aa26ce 1175 /* get transliterations out of the way (they're most literal) */
3280af22 1176 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1177 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1178 if (dorange) {
02aa26ce 1179 I32 i; /* current expanded character */
8ada0baa 1180 I32 min; /* first character in range */
02aa26ce
NT
1181 I32 max; /* last character in range */
1182
1183 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1184 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1185 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1186 d -= 2; /* eat the first char and the - */
1187
8ada0baa
JH
1188 min = (U8)*d; /* first char in range */
1189 max = (U8)d[1]; /* last char in range */
1190
1191#ifndef ASCIIish
1192 if ((isLOWER(min) && isLOWER(max)) ||
1193 (isUPPER(min) && isUPPER(max))) {
1194 if (isLOWER(min)) {
1195 for (i = min; i <= max; i++)
1196 if (isLOWER(i))
1197 *d++ = i;
1198 } else {
1199 for (i = min; i <= max; i++)
1200 if (isUPPER(i))
1201 *d++ = i;
1202 }
1203 }
1204 else
1205#endif
1206 for (i = min; i <= max; i++)
1207 *d++ = i;
02aa26ce
NT
1208
1209 /* mark the range as done, and continue */
79072805
LW
1210 dorange = FALSE;
1211 continue;
1212 }
02aa26ce
NT
1213
1214 /* range begins (ignore - as first or last char) */
79072805 1215 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1216 if (utf) {
a176fa2a 1217 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1218 s++;
1219 continue;
1220 }
79072805
LW
1221 dorange = TRUE;
1222 s++;
1223 }
1224 }
02aa26ce
NT
1225
1226 /* if we get here, we're not doing a transliteration */
1227
0f5d15d6
IZ
1228 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1229 except for the last char, which will be done separately. */
3280af22 1230 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1231 if (s[2] == '#') {
1232 while (s < send && *s != ')')
1233 *d++ = *s++;
0f5d15d6
IZ
1234 } else if (s[2] == '{'
1235 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1236 I32 count = 1;
0f5d15d6 1237 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1238 char c;
1239
d9f97599
GS
1240 while (count && (c = *regparse)) {
1241 if (c == '\\' && regparse[1])
1242 regparse++;
cc6b7395
IZ
1243 else if (c == '{')
1244 count++;
1245 else if (c == '}')
1246 count--;
d9f97599 1247 regparse++;
cc6b7395 1248 }
5bdf89e7
IZ
1249 if (*regparse != ')') {
1250 regparse--; /* Leave one char for continuation. */
cc6b7395 1251 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1252 }
0f5d15d6 1253 while (s < regparse)
cc6b7395
IZ
1254 *d++ = *s++;
1255 }
748a9306 1256 }
02aa26ce
NT
1257
1258 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1259 else if (*s == '#' && PL_lex_inpat &&
1260 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1261 while (s+1 < send && *s != '\n')
1262 *d++ = *s++;
1263 }
02aa26ce
NT
1264
1265 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
7e2040f0
GS
1266 else if (*s == '@' && s[1]
1267 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$", s[1])))
79072805 1268 break;
02aa26ce
NT
1269
1270 /* check for embedded scalars. only stop if we're sure it's a
1271 variable.
1272 */
79072805 1273 else if (*s == '$') {
3280af22 1274 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1275 break;
c277df42 1276 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1277 break; /* in regexp, $ might be tail anchor */
1278 }
02aa26ce 1279
a0ed51b3
LW
1280 /* (now in tr/// code again) */
1281
d008e5eb
GS
1282 if (*s & 0x80 && thisutf) {
1283 dTHR; /* only for ckWARN */
1284 if (ckWARN(WARN_UTF8)) {
dfe13c55 1285 (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
d008e5eb 1286 if (len) {
b1c7b182 1287 has_utf = TRUE;
d008e5eb
GS
1288 while (len--)
1289 *d++ = *s++;
1290 continue;
1291 }
a0ed51b3 1292 }
b1c7b182
GS
1293 else
1294 has_utf = TRUE; /* assume valid utf8 */
a0ed51b3
LW
1295 }
1296
02aa26ce 1297 /* backslashes */
79072805
LW
1298 if (*s == '\\' && s+1 < send) {
1299 s++;
02aa26ce
NT
1300
1301 /* some backslashes we leave behind */
c9f97d15 1302 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1303 *d++ = '\\';
1304 *d++ = *s++;
1305 continue;
1306 }
02aa26ce
NT
1307
1308 /* deprecate \1 in strings and substitution replacements */
3280af22 1309 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1310 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1311 {
d008e5eb 1312 dTHR; /* only for ckWARN */
599cee73 1313 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1314 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1315 *--s = '$';
1316 break;
1317 }
02aa26ce
NT
1318
1319 /* string-change backslash escapes */
3280af22 1320 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1321 --s;
1322 break;
1323 }
02aa26ce
NT
1324
1325 /* if we get here, it's either a quoted -, or a digit */
79072805 1326 switch (*s) {
02aa26ce
NT
1327
1328 /* quoted - in transliterations */
79072805 1329 case '-':
3280af22 1330 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1331 *d++ = *s++;
1332 continue;
1333 }
1334 /* FALL THROUGH */
1335 default:
11b8faa4
JH
1336 {
1337 dTHR;
1338 if (ckWARN(WARN_UNSAFE) && isALPHA(*s))
cea2e8a9 1339 Perl_warner(aTHX_ WARN_UNSAFE,
11b8faa4
JH
1340 "Unrecognized escape \\%c passed through",
1341 *s);
1342 /* default action is to copy the quoted character */
1343 *d++ = *s++;
1344 continue;
1345 }
02aa26ce
NT
1346
1347 /* \132 indicates an octal constant */
79072805
LW
1348 case '0': case '1': case '2': case '3':
1349 case '4': case '5': case '6': case '7':
dff6d3cd 1350 *d++ = (char)scan_oct(s, 3, &len);
79072805
LW
1351 s += len;
1352 continue;
02aa26ce
NT
1353
1354 /* \x24 indicates a hex constant */
79072805 1355 case 'x':
a0ed51b3
LW
1356 ++s;
1357 if (*s == '{') {
1358 char* e = strchr(s, '}');
1359
adaeee49 1360 if (!e) {
a0ed51b3 1361 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1362 e = s;
1363 }
a0ed51b3 1364 /* note: utf always shorter than hex */
dfe13c55 1365 d = (char*)uv_to_utf8((U8*)d,
dff6d3cd 1366 (UV)scan_hex(s + 1, e - s - 1, &len));
a0ed51b3 1367 s = e + 1;
7e2040f0 1368 has_utf = TRUE;
a0ed51b3
LW
1369 }
1370 else {
1371 UV uv = (UV)scan_hex(s, 2, &len);
1372 if (utf && PL_lex_inwhat == OP_TRANS &&
1373 utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1374 {
dfe13c55 1375 d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
7e2040f0 1376 has_utf = TRUE;
a0ed51b3
LW
1377 }
1378 else {
d008e5eb
GS
1379 if (uv >= 127 && UTF) {
1380 dTHR;
1381 if (ckWARN(WARN_UTF8))
cea2e8a9 1382 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb 1383 "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
894356b3 1384 (int)len,s,(int)len,s);
d008e5eb 1385 }
a0ed51b3
LW
1386 *d++ = (char)uv;
1387 }
1388 s += len;
1389 }
79072805 1390 continue;
02aa26ce 1391
4a2d328f
IZ
1392 /* \N{latin small letter a} is a named character */
1393 case 'N':
423cee85
JH
1394 ++s;
1395 if (*s == '{') {
1396 char* e = strchr(s, '}');
1397 HV *hv;
1398 SV **svp;
1399 SV *res, *cv;
1400 STRLEN len;
1401 char *str;
1402 char *why = Nullch;
1403
1404 if (!e) {
5777a3f7 1405 yyerror("Missing right brace on \\N{}");
423cee85
JH
1406 e = s - 1;
1407 goto cont_scan;
1408 }
1409 res = newSVpvn(s + 1, e - s - 1);
1410 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1411 res, Nullsv, "\\N{...}" );
423cee85
JH
1412 str = SvPV(res,len);
1413 if (len > e - s + 4) {
1414 char *odest = SvPVX(sv);
1415
1416 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1417 d = SvPVX(sv) + (d - odest);
1418 }
1419 Copy(str, d, len, char);
1420 d += len;
1421 SvREFCNT_dec(res);
1422 cont_scan:
1423 s = e + 1;
1424 }
1425 else
5777a3f7 1426 yyerror("Missing braces on \\N{}");
423cee85
JH
1427 continue;
1428
02aa26ce 1429 /* \c is a control character */
79072805
LW
1430 case 'c':
1431 s++;
9d116dd7
JH
1432#ifdef EBCDIC
1433 *d = *s++;
1434 if (isLOWER(*d))
1435 *d = toUPPER(*d);
1436 *d++ = toCTRL(*d);
1437#else
bbce6d69
PP
1438 len = *s++;
1439 *d++ = toCTRL(len);
9d116dd7 1440#endif
79072805 1441 continue;
02aa26ce
NT
1442
1443 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1444 case 'b':
1445 *d++ = '\b';
1446 break;
1447 case 'n':
1448 *d++ = '\n';
1449 break;
1450 case 'r':
1451 *d++ = '\r';
1452 break;
1453 case 'f':
1454 *d++ = '\f';
1455 break;
1456 case 't':
1457 *d++ = '\t';
1458 break;
34a3fe2a
PP
1459#ifdef EBCDIC
1460 case 'e':
1461 *d++ = '\047'; /* CP 1047 */
1462 break;
1463 case 'a':
1464 *d++ = '\057'; /* CP 1047 */
1465 break;
1466#else
79072805
LW
1467 case 'e':
1468 *d++ = '\033';
1469 break;
1470 case 'a':
1471 *d++ = '\007';
1472 break;
34a3fe2a 1473#endif
02aa26ce
NT
1474 } /* end switch */
1475
79072805
LW
1476 s++;
1477 continue;
02aa26ce
NT
1478 } /* end if (backslash) */
1479
79072805 1480 *d++ = *s++;
02aa26ce
NT
1481 } /* while loop to process each character */
1482
1483 /* terminate the string and set up the sv */
79072805 1484 *d = '\0';
463ee0b2 1485 SvCUR_set(sv, d - SvPVX(sv));
79072805 1486 SvPOK_on(sv);
7e2040f0
GS
1487 if (has_utf)
1488 SvUTF8_on(sv);
79072805 1489
02aa26ce 1490 /* shrink the sv if we allocated more than we used */
79072805
LW
1491 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1492 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1493 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1494 }
02aa26ce 1495
9b599b2a 1496 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1497 if (s > PL_bufptr) {
1498 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1499 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1500 sv, Nullsv,
3280af22 1501 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1502 ? "tr"
3280af22 1503 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1504 ? "s"
1505 : "qq")));
79072805 1506 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1507 } else
8990e307 1508 SvREFCNT_dec(sv);
79072805
LW
1509 return s;
1510}
1511
ffb4593c
NT
1512/* S_intuit_more
1513 * Returns TRUE if there's more to the expression (e.g., a subscript),
1514 * FALSE otherwise.
ffb4593c
NT
1515 *
1516 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1517 *
1518 * ->[ and ->{ return TRUE
1519 * { and [ outside a pattern are always subscripts, so return TRUE
1520 * if we're outside a pattern and it's not { or [, then return FALSE
1521 * if we're in a pattern and the first char is a {
1522 * {4,5} (any digits around the comma) returns FALSE
1523 * if we're in a pattern and the first char is a [
1524 * [] returns FALSE
1525 * [SOMETHING] has a funky algorithm to decide whether it's a
1526 * character class or not. It has to deal with things like
1527 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1528 * anything else returns TRUE
1529 */
1530
9cbb5ea2
GS
1531/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1532
76e3520e 1533STATIC int
cea2e8a9 1534S_intuit_more(pTHX_ register char *s)
79072805 1535{
3280af22 1536 if (PL_lex_brackets)
79072805
LW
1537 return TRUE;
1538 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1539 return TRUE;
1540 if (*s != '{' && *s != '[')
1541 return FALSE;
3280af22 1542 if (!PL_lex_inpat)
79072805
LW
1543 return TRUE;
1544
1545 /* In a pattern, so maybe we have {n,m}. */
1546 if (*s == '{') {
1547 s++;
1548 if (!isDIGIT(*s))
1549 return TRUE;
1550 while (isDIGIT(*s))
1551 s++;
1552 if (*s == ',')
1553 s++;
1554 while (isDIGIT(*s))
1555 s++;
1556 if (*s == '}')
1557 return FALSE;
1558 return TRUE;
1559
1560 }
1561
1562 /* On the other hand, maybe we have a character class */
1563
1564 s++;
1565 if (*s == ']' || *s == '^')
1566 return FALSE;
1567 else {
ffb4593c 1568 /* this is terrifying, and it works */
79072805
LW
1569 int weight = 2; /* let's weigh the evidence */
1570 char seen[256];
f27ffc4a 1571 unsigned char un_char = 255, last_un_char;
93a17b20 1572 char *send = strchr(s,']');
3280af22 1573 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1574
1575 if (!send) /* has to be an expression */
1576 return TRUE;
1577
1578 Zero(seen,256,char);
1579 if (*s == '$')
1580 weight -= 3;
1581 else if (isDIGIT(*s)) {
1582 if (s[1] != ']') {
1583 if (isDIGIT(s[1]) && s[2] == ']')
1584 weight -= 10;
1585 }
1586 else
1587 weight -= 100;
1588 }
1589 for (; s < send; s++) {
1590 last_un_char = un_char;
1591 un_char = (unsigned char)*s;
1592 switch (*s) {
1593 case '@':
1594 case '&':
1595 case '$':
1596 weight -= seen[un_char] * 10;
7e2040f0 1597 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1598 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1599 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1600 weight -= 100;
1601 else
1602 weight -= 10;
1603 }
1604 else if (*s == '$' && s[1] &&
93a17b20
LW
1605 strchr("[#!%*<>()-=",s[1])) {
1606 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1607 weight -= 10;
1608 else
1609 weight -= 1;
1610 }
1611 break;
1612 case '\\':
1613 un_char = 254;
1614 if (s[1]) {
93a17b20 1615 if (strchr("wds]",s[1]))
79072805
LW
1616 weight += 100;
1617 else if (seen['\''] || seen['"'])
1618 weight += 1;
93a17b20 1619 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1620 weight += 40;
1621 else if (isDIGIT(s[1])) {
1622 weight += 40;
1623 while (s[1] && isDIGIT(s[1]))
1624 s++;
1625 }
1626 }
1627 else
1628 weight += 100;
1629 break;
1630 case '-':
1631 if (s[1] == '\\')
1632 weight += 50;
93a17b20 1633 if (strchr("aA01! ",last_un_char))
79072805 1634 weight += 30;
93a17b20 1635 if (strchr("zZ79~",s[1]))
79072805 1636 weight += 30;
f27ffc4a
GS
1637 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1638 weight -= 5; /* cope with negative subscript */
79072805
LW
1639 break;
1640 default:
93a17b20 1641 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1642 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1643 char *d = tmpbuf;
1644 while (isALPHA(*s))
1645 *d++ = *s++;
1646 *d = '\0';
1647 if (keyword(tmpbuf, d - tmpbuf))
1648 weight -= 150;
1649 }
1650 if (un_char == last_un_char + 1)
1651 weight += 5;
1652 weight -= seen[un_char];
1653 break;
1654 }
1655 seen[un_char]++;
1656 }
1657 if (weight >= 0) /* probably a character class */
1658 return FALSE;
1659 }
1660
1661 return TRUE;
1662}
ffed7fef 1663
ffb4593c
NT
1664/*
1665 * S_intuit_method
1666 *
1667 * Does all the checking to disambiguate
1668 * foo bar
1669 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1670 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1671 *
1672 * First argument is the stuff after the first token, e.g. "bar".
1673 *
1674 * Not a method if bar is a filehandle.
1675 * Not a method if foo is a subroutine prototyped to take a filehandle.
1676 * Not a method if it's really "Foo $bar"
1677 * Method if it's "foo $bar"
1678 * Not a method if it's really "print foo $bar"
1679 * Method if it's really "foo package::" (interpreted as package->foo)
1680 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1681 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1682 * =>
1683 */
1684
76e3520e 1685STATIC int
cea2e8a9 1686S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1687{
1688 char *s = start + (*start == '$');
3280af22 1689 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1690 STRLEN len;
1691 GV* indirgv;
1692
1693 if (gv) {
b6c543e3 1694 CV *cv;
a0d0e21e
LW
1695 if (GvIO(gv))
1696 return 0;
b6c543e3
IZ
1697 if ((cv = GvCVu(gv))) {
1698 char *proto = SvPVX(cv);
1699 if (proto) {
1700 if (*proto == ';')
1701 proto++;
1702 if (*proto == '*')
1703 return 0;
1704 }
1705 } else
a0d0e21e
LW
1706 gv = 0;
1707 }
8903cb82 1708 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1709 /* start is the beginning of the possible filehandle/object,
1710 * and s is the end of it
1711 * tmpbuf is a copy of it
1712 */
1713
a0d0e21e 1714 if (*start == '$') {
3280af22 1715 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1716 return 0;
1717 s = skipspace(s);
3280af22
NIS
1718 PL_bufptr = start;
1719 PL_expect = XREF;
a0d0e21e
LW
1720 return *s == '(' ? FUNCMETH : METHOD;
1721 }
1722 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1723 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1724 len -= 2;
1725 tmpbuf[len] = '\0';
1726 goto bare_package;
1727 }
1728 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1729 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1730 return 0;
1731 /* filehandle or package name makes it a method */
89bfa8cd 1732 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1733 s = skipspace(s);
3280af22 1734 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1735 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1736 bare_package:
3280af22 1737 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1738 newSVpvn(tmpbuf,len));
3280af22
NIS
1739 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1740 PL_expect = XTERM;
a0d0e21e 1741 force_next(WORD);
3280af22 1742 PL_bufptr = s;
a0d0e21e
LW
1743 return *s == '(' ? FUNCMETH : METHOD;
1744 }
1745 }
1746 return 0;
1747}
1748
ffb4593c
NT
1749/*
1750 * S_incl_perldb
1751 * Return a string of Perl code to load the debugger. If PERL5DB
1752 * is set, it will return the contents of that, otherwise a
1753 * compile-time require of perl5db.pl.
1754 */
1755
76e3520e 1756STATIC char*
cea2e8a9 1757S_incl_perldb(pTHX)
a0d0e21e 1758{
3280af22 1759 if (PL_perldb) {
76e3520e 1760 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1761
1762 if (pdb)
1763 return pdb;
61bb5906 1764 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1765 return "BEGIN { require 'perl5db.pl' }";
1766 }
1767 return "";
1768}
1769
1770
16d20bd9
AD
1771/* Encoded script support. filter_add() effectively inserts a
1772 * 'pre-processing' function into the current source input stream.
1773 * Note that the filter function only applies to the current source file
1774 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1775 *
1776 * The datasv parameter (which may be NULL) can be used to pass
1777 * private data to this instance of the filter. The filter function
1778 * can recover the SV using the FILTER_DATA macro and use it to
1779 * store private buffers and state information.
1780 *
1781 * The supplied datasv parameter is upgraded to a PVIO type
e0c19803
GS
1782 * and the IoDIRP field is used to store the function pointer,
1783 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1784 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1785 * private use must be set using malloc'd pointers.
1786 */
16d20bd9
AD
1787
1788SV *
864dbfa3 1789Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1790{
f4c556ac
GS
1791 if (!funcp)
1792 return Nullsv;
1793
3280af22
NIS
1794 if (!PL_rsfp_filters)
1795 PL_rsfp_filters = newAV();
16d20bd9 1796 if (!datasv)
8c52afec 1797 datasv = NEWSV(255,0);
16d20bd9 1798 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1799 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1800 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
e0c19803 1801 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1802 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1803 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1804 av_unshift(PL_rsfp_filters, 1);
1805 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1806 return(datasv);
1807}
1808
1809
1810/* Delete most recently added instance of this filter function. */
a0d0e21e 1811void
864dbfa3 1812Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1813{
e0c19803 1814 SV *datasv;
f4c556ac 1815 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1816 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1817 return;
1818 /* if filter is on top of stack (usual case) just pop it off */
e0c19803
GS
1819 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1820 if (IoDIRP(datasv) == (DIR*)funcp) {
1821 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
1822 IoDIRP(datasv) = (DIR*)NULL;
3280af22 1823 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1824
16d20bd9
AD
1825 return;
1826 }
1827 /* we need to search for the correct entry and clear it */
cea2e8a9 1828 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1829}
1830
1831
1832/* Invoke the n'th filter function for the current rsfp. */
1833I32
864dbfa3 1834Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1835
1836
1837 /* 0 = read one text line */
a0d0e21e 1838{
16d20bd9
AD
1839 filter_t funcp;
1840 SV *datasv = NULL;
e50aee73 1841
3280af22 1842 if (!PL_rsfp_filters)
16d20bd9 1843 return -1;
3280af22 1844 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1845 /* Provide a default input filter to make life easy. */
1846 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1847 DEBUG_P(PerlIO_printf(Perl_debug_log,
1848 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1849 if (maxlen) {
1850 /* Want a block */
1851 int len ;
1852 int old_len = SvCUR(buf_sv) ;
1853
1854 /* ensure buf_sv is large enough */
1855 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1856 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1857 if (PerlIO_error(PL_rsfp))
37120919
AD
1858 return -1; /* error */
1859 else
1860 return 0 ; /* end of file */
1861 }
16d20bd9
AD
1862 SvCUR_set(buf_sv, old_len + len) ;
1863 } else {
1864 /* Want a line */
3280af22
NIS
1865 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1866 if (PerlIO_error(PL_rsfp))
37120919
AD
1867 return -1; /* error */
1868 else
1869 return 0 ; /* end of file */
1870 }
16d20bd9
AD
1871 }
1872 return SvCUR(buf_sv);
1873 }
1874 /* Skip this filter slot if filter has been deleted */
3280af22 1875 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1876 DEBUG_P(PerlIO_printf(Perl_debug_log,
1877 "filter_read %d: skipped (filter deleted)\n",
1878 idx));
16d20bd9
AD
1879 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1880 }
1881 /* Get function pointer hidden within datasv */
1882 funcp = (filter_t)IoDIRP(datasv);
f4c556ac
GS
1883 DEBUG_P(PerlIO_printf(Perl_debug_log,
1884 "filter_read %d: via function %p (%s)\n",
1885 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1886 /* Call function. The function is expected to */
1887 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1888 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1889 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1890}
1891
76e3520e 1892STATIC char *
cea2e8a9 1893S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1894{
c39cd008 1895#ifdef PERL_CR_FILTER
3280af22 1896 if (!PL_rsfp_filters) {
c39cd008 1897 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1898 }
1899#endif
3280af22 1900 if (PL_rsfp_filters) {
16d20bd9 1901
55497cff
PP
1902 if (!append)
1903 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1904 if (FILTER_READ(0, sv, 0) > 0)
1905 return ( SvPVX(sv) ) ;
1906 else
1907 return Nullch ;
1908 }
9d116dd7 1909 else
fd049845 1910 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1911}
1912
1913
748a9306
LW
1914#ifdef DEBUGGING
1915 static char* exp_name[] =
09bef843
SB
1916 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
1917 "ATTRTERM", "TERMBLOCK"
1918 };
748a9306 1919#endif
463ee0b2 1920
02aa26ce
NT
1921/*
1922 yylex
1923
1924 Works out what to call the token just pulled out of the input
1925 stream. The yacc parser takes care of taking the ops we return and
1926 stitching them into a tree.
1927
1928 Returns:
1929 PRIVATEREF
1930
1931 Structure:
1932 if read an identifier
1933 if we're in a my declaration
1934 croak if they tried to say my($foo::bar)
1935 build the ops for a my() declaration
1936 if it's an access to a my() variable
1937 are we in a sort block?
1938 croak if my($a); $a <=> $b
1939 build ops for access to a my() variable
1940 if in a dq string, and they've said @foo and we can't find @foo
1941 croak
1942 build ops for a bareword
1943 if we already built the token before, use it.
1944*/
1945
864dbfa3
GS
1946int
1947#ifdef USE_PURE_BISON
cea2e8a9 1948Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1949#else
cea2e8a9 1950Perl_yylex(pTHX)
864dbfa3 1951#endif
378cc40b 1952{
11343788 1953 dTHR;
79072805 1954 register char *s;
378cc40b 1955 register char *d;
79072805 1956 register I32 tmp;
463ee0b2 1957 STRLEN len;
161b471a
NIS
1958 GV *gv = Nullgv;
1959 GV **gvp = 0;
a687059c 1960
a1a0e61e
TD
1961#ifdef USE_PURE_BISON
1962 yylval_pointer = lvalp;
1963 yychar_pointer = lcharp;
1964#endif
1965
02aa26ce 1966 /* check if there's an identifier for us to look at */
3280af22 1967 if (PL_pending_ident) {
02aa26ce 1968 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
1969 char pit = PL_pending_ident;
1970 PL_pending_ident = 0;
bbce6d69 1971
02aa26ce
NT
1972 /* if we're in a my(), we can't allow dynamics here.
1973 $foo'bar has already been turned into $foo::bar, so
1974 just check for colons.
1975
1976 if it's a legal name, the OP is a PADANY.
1977 */
3280af22 1978 if (PL_in_my) {
77ca0c92 1979 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
1980 if (strchr(PL_tokenbuf,':'))
1981 yyerror(Perl_form(aTHX_ "No package name allowed for "
1982 "variable %s in \"our\"",
1983 PL_tokenbuf));
77ca0c92
LW
1984 tmp = pad_allocmy(PL_tokenbuf);
1985 }
1986 else {
1987 if (strchr(PL_tokenbuf,':'))
1988 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 1989
77ca0c92
LW
1990 yylval.opval = newOP(OP_PADANY, 0);
1991 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
1992 return PRIVATEREF;
1993 }
bbce6d69
PP
1994 }
1995
02aa26ce
NT
1996 /*
1997 build the ops for accesses to a my() variable.
1998
1999 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2000 then used in a comparison. This catches most, but not
2001 all cases. For instance, it catches
2002 sort { my($a); $a <=> $b }
2003 but not
2004 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2005 (although why you'd do that is anyone's guess).
2006 */
2007
3280af22 2008 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2009#ifdef USE_THREADS
54b9620d 2010 /* Check for single character per-thread SVs */
3280af22
NIS
2011 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2012 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2013 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2014 {
2faa37cc 2015 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2016 yylval.opval->op_targ = tmp;
2017 return PRIVATEREF;
2018 }
2019#endif /* USE_THREADS */
3280af22 2020 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2021 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2022 /* might be an "our" variable" */
f472eb5c 2023 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2024 /* build ops for a bareword */
f472eb5c
GS
2025 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2026 sv_catpvn(sym, "::", 2);
2027 sv_catpv(sym, PL_tokenbuf+1);
2028 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2029 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2030 gv_fetchpv(SvPVX(sym),
77ca0c92 2031 (PL_in_eval
f472eb5c
GS
2032 ? (GV_ADDMULTI | GV_ADDINEVAL)
2033 : TRUE
77ca0c92
LW
2034 ),
2035 ((PL_tokenbuf[0] == '$') ? SVt_PV
2036 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2037 : SVt_PVHV));
2038 return WORD;
2039 }
2040
02aa26ce 2041 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2042 if (PL_last_lop_op == OP_SORT &&
2043 PL_tokenbuf[0] == '$' &&
2044 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2045 && !PL_tokenbuf[2])
bbce6d69 2046 {
3280af22
NIS
2047 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2048 d < PL_bufend && *d != '\n';
a863c7d1
MB
2049 d++)
2050 {
2051 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2052 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2053 PL_tokenbuf);
a863c7d1 2054 }
bbce6d69
PP
2055 }
2056 }
bbce6d69 2057
a863c7d1
MB
2058 yylval.opval = newOP(OP_PADANY, 0);
2059 yylval.opval->op_targ = tmp;
2060 return PRIVATEREF;
2061 }
bbce6d69
PP
2062 }
2063
02aa26ce
NT
2064 /*
2065 Whine if they've said @foo in a doublequoted string,
2066 and @foo isn't a variable we can find in the symbol
2067 table.
2068 */
3280af22
NIS
2069 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2070 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2071 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2072 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2073 PL_tokenbuf, PL_tokenbuf));
bbce6d69
PP
2074 }
2075
02aa26ce 2076 /* build ops for a bareword */
3280af22 2077 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2078 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2079 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2080 ((PL_tokenbuf[0] == '$') ? SVt_PV
2081 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2082 : SVt_PVHV));
2083 return WORD;
2084 }
2085
02aa26ce
NT
2086 /* no identifier pending identification */
2087
3280af22 2088 switch (PL_lex_state) {
79072805
LW
2089#ifdef COMMENTARY
2090 case LEX_NORMAL: /* Some compilers will produce faster */
2091 case LEX_INTERPNORMAL: /* code if we comment these out. */
2092 break;
2093#endif
2094
09bef843 2095 /* when we've already built the next token, just pull it out of the queue */
79072805 2096 case LEX_KNOWNEXT:
3280af22
NIS
2097 PL_nexttoke--;
2098 yylval = PL_nextval[PL_nexttoke];
2099 if (!PL_nexttoke) {
2100 PL_lex_state = PL_lex_defer;
2101 PL_expect = PL_lex_expect;
2102 PL_lex_defer = LEX_NORMAL;
463ee0b2 2103 }
3280af22 2104 return(PL_nexttype[PL_nexttoke]);
79072805 2105
02aa26ce 2106 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2107 when we get here, PL_bufptr is at the \
02aa26ce 2108 */
79072805
LW
2109 case LEX_INTERPCASEMOD:
2110#ifdef DEBUGGING
3280af22 2111 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2112 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2113#endif
02aa26ce 2114 /* handle \E or end of string */
3280af22 2115 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2116 char oldmod;
02aa26ce
NT
2117
2118 /* if at a \E */
3280af22
NIS
2119 if (PL_lex_casemods) {
2120 oldmod = PL_lex_casestack[--PL_lex_casemods];
2121 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2122
3280af22
NIS
2123 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2124 PL_bufptr += 2;
2125 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2126 }
79072805
LW
2127 return ')';
2128 }
3280af22
NIS
2129 if (PL_bufptr != PL_bufend)
2130 PL_bufptr += 2;
2131 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2132 return yylex();
79072805
LW
2133 }
2134 else {
3280af22 2135 s = PL_bufptr + 1;
79072805
LW
2136 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2137 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2138 if (strchr("LU", *s) &&
3280af22 2139 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2140 {
3280af22 2141 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2142 return ')';
2143 }
3280af22
NIS
2144 if (PL_lex_casemods > 10) {
2145 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2146 if (newlb != PL_lex_casestack) {
a0d0e21e 2147 SAVEFREEPV(newlb);
3280af22 2148 PL_lex_casestack = newlb;
a0d0e21e
LW
2149 }
2150 }
3280af22
NIS
2151 PL_lex_casestack[PL_lex_casemods++] = *s;
2152 PL_lex_casestack[PL_lex_casemods] = '\0';
2153 PL_lex_state = LEX_INTERPCONCAT;
2154 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2155 force_next('(');
2156 if (*s == 'l')
3280af22 2157 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2158 else if (*s == 'u')
3280af22 2159 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2160 else if (*s == 'L')
3280af22 2161 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2162 else if (*s == 'U')
3280af22 2163 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2164 else if (*s == 'Q')
3280af22 2165 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2166 else
cea2e8a9 2167 Perl_croak(aTHX_ "panic: yylex");
3280af22 2168 PL_bufptr = s + 1;
79072805 2169 force_next(FUNC);
3280af22
NIS
2170 if (PL_lex_starts) {
2171 s = PL_bufptr;
2172 PL_lex_starts = 0;
79072805
LW
2173 Aop(OP_CONCAT);
2174 }
2175 else
cea2e8a9 2176 return yylex();
79072805
LW
2177 }
2178
55497cff
PP
2179 case LEX_INTERPPUSH:
2180 return sublex_push();
2181
79072805 2182 case LEX_INTERPSTART:
3280af22 2183 if (PL_bufptr == PL_bufend)
79072805 2184 return sublex_done();
3280af22
NIS
2185 PL_expect = XTERM;
2186 PL_lex_dojoin = (*PL_bufptr == '@');
2187 PL_lex_state = LEX_INTERPNORMAL;
2188 if (PL_lex_dojoin) {
2189 PL_nextval[PL_nexttoke].ival = 0;
79072805 2190 force_next(',');
554b3eca 2191#ifdef USE_THREADS
533c011a
NIS
2192 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2193 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2194 force_next(PRIVATEREF);
2195#else
a0d0e21e 2196 force_ident("\"", '$');
554b3eca 2197#endif /* USE_THREADS */
3280af22 2198 PL_nextval[PL_nexttoke].ival = 0;
79072805 2199 force_next('$');
3280af22 2200 PL_nextval[PL_nexttoke].ival = 0;
79072805 2201 force_next('(');
3280af22 2202 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2203 force_next(FUNC);
2204 }
3280af22
NIS
2205 if (PL_lex_starts++) {
2206 s = PL_bufptr;
79072805
LW
2207 Aop(OP_CONCAT);
2208 }
cea2e8a9 2209 return yylex();
79072805
LW
2210
2211 case LEX_INTERPENDMAYBE:
3280af22
NIS
2212 if (intuit_more(PL_bufptr)) {
2213 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2214 break;
2215 }
2216 /* FALL THROUGH */
2217
2218 case LEX_INTERPEND:
3280af22
NIS
2219 if (PL_lex_dojoin) {
2220 PL_lex_dojoin = FALSE;
2221 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2222 return ')';
2223 }
43a16006 2224 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2225 && SvEVALED(PL_lex_repl))
43a16006 2226 {
e9fa98b2 2227 if (PL_bufptr != PL_bufend)
cea2e8a9 2228 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2229 PL_lex_repl = Nullsv;
2230 }
79072805
LW
2231 /* FALLTHROUGH */
2232 case LEX_INTERPCONCAT:
2233#ifdef DEBUGGING
3280af22 2234 if (PL_lex_brackets)
cea2e8a9 2235 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2236#endif
3280af22 2237 if (PL_bufptr == PL_bufend)
79072805
LW
2238 return sublex_done();
2239
3280af22
NIS
2240 if (SvIVX(PL_linestr) == '\'') {
2241 SV *sv = newSVsv(PL_linestr);
2242 if (!PL_lex_inpat)
76e3520e 2243 sv = tokeq(sv);
3280af22 2244 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2245 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2246 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2247 s = PL_bufend;
79072805
LW
2248 }
2249 else {
3280af22 2250 s = scan_const(PL_bufptr);
79072805 2251 if (*s == '\\')
3280af22 2252 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2253 else
3280af22 2254 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2255 }
2256
3280af22
NIS
2257 if (s != PL_bufptr) {
2258 PL_nextval[PL_nexttoke] = yylval;
2259 PL_expect = XTERM;
79072805 2260 force_next(THING);
3280af22 2261 if (PL_lex_starts++)
79072805
LW
2262 Aop(OP_CONCAT);
2263 else {
3280af22 2264 PL_bufptr = s;
cea2e8a9 2265 return yylex();
79072805
LW
2266 }
2267 }
2268
cea2e8a9 2269 return yylex();
a0d0e21e 2270 case LEX_FORMLINE:
3280af22
NIS
2271 PL_lex_state = LEX_NORMAL;
2272 s = scan_formline(PL_bufptr);
2273 if (!PL_lex_formbrack)
a0d0e21e
LW
2274 goto rightbracket;
2275 OPERATOR(';');
79072805
LW
2276 }
2277
3280af22
NIS
2278 s = PL_bufptr;
2279 PL_oldoldbufptr = PL_oldbufptr;
2280 PL_oldbufptr = s;
79072805 2281 DEBUG_p( {
bf49b057
GS
2282 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2283 exp_name[PL_expect], s);
79072805 2284 } )
463ee0b2
LW
2285
2286 retry:
378cc40b
LW
2287 switch (*s) {
2288 default:
7e2040f0 2289 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2290 goto keylookup;
cea2e8a9 2291 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2292 case 4:
2293 case 26:
2294 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2295 case 0:
3280af22
NIS
2296 if (!PL_rsfp) {
2297 PL_last_uni = 0;
2298 PL_last_lop = 0;
2299 if (PL_lex_brackets)
d98d5fff 2300 yyerror("Missing right curly or square bracket");
79072805 2301 TOKEN(0);
463ee0b2 2302 }
3280af22 2303 if (s++ < PL_bufend)
a687059c 2304 goto retry; /* ignore stray nulls */
3280af22
NIS
2305 PL_last_uni = 0;
2306 PL_last_lop = 0;
2307 if (!PL_in_eval && !PL_preambled) {
2308 PL_preambled = TRUE;
2309 sv_setpv(PL_linestr,incl_perldb());
2310 if (SvCUR(PL_linestr))
2311 sv_catpv(PL_linestr,";");
2312 if (PL_preambleav){
2313 while(AvFILLp(PL_preambleav) >= 0) {
2314 SV *tmpsv = av_shift(PL_preambleav);
2315 sv_catsv(PL_linestr, tmpsv);
2316 sv_catpv(PL_linestr, ";");
91b7def8
PP
2317 sv_free(tmpsv);
2318 }
3280af22
NIS
2319 sv_free((SV*)PL_preambleav);
2320 PL_preambleav = NULL;
91b7def8 2321 }
3280af22
NIS
2322 if (PL_minus_n || PL_minus_p) {
2323 sv_catpv(PL_linestr, "LINE: while (<>) {");
2324 if (PL_minus_l)
2325 sv_catpv(PL_linestr,"chomp;");
2326 if (PL_minus_a) {
8fd239a7
CS
2327 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2328 if (gv)
2329 GvIMPORTED_AV_on(gv);
3280af22
NIS
2330 if (PL_minus_F) {
2331 if (strchr("/'\"", *PL_splitstr)
2332 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2333 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2334 else {
2335 char delim;
2336 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2337 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2338 delim = *s;
cea2e8a9 2339 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2340 "q" + (delim == '\''), delim);
3280af22 2341 for (s = PL_splitstr; *s; s++) {
54310121 2342 if (*s == '\\')
3280af22
NIS
2343 sv_catpvn(PL_linestr, "\\", 1);
2344 sv_catpvn(PL_linestr, s, 1);
54310121 2345 }
cea2e8a9 2346 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2347 }
2304df62
AD
2348 }
2349 else
3280af22 2350 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2351 }
79072805 2352 }
3280af22
NIS
2353 sv_catpv(PL_linestr, "\n");
2354 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2355 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2356 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2357 SV *sv = NEWSV(85,0);
2358
2359 sv_upgrade(sv, SVt_PVMG);
3280af22 2360 sv_setsv(sv,PL_linestr);
57843af0 2361 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2362 }
79072805 2363 goto retry;
a687059c 2364 }
e929a76b 2365 do {
3280af22 2366 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2367 fake_eof:
3280af22
NIS
2368 if (PL_rsfp) {
2369 if (PL_preprocess && !PL_in_eval)
2370 (void)PerlProc_pclose(PL_rsfp);
2371 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2372 PerlIO_clearerr(PL_rsfp);
395c3793 2373 else
3280af22
NIS
2374 (void)PerlIO_close(PL_rsfp);
2375 PL_rsfp = Nullfp;
4a9ae47a 2376 PL_doextract = FALSE;
395c3793 2377 }
3280af22
NIS
2378 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2379 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2380 sv_catpv(PL_linestr,";}");
2381 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2382 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2383 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2384 goto retry;
2385 }
3280af22
NIS
2386 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2387 sv_setpv(PL_linestr,"");
79072805 2388 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2389 }
3280af22 2390 if (PL_doextract) {
a0d0e21e 2391 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2392 PL_doextract = FALSE;
a0d0e21e
LW
2393
2394 /* Incest with pod. */
2395 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2396 sv_setpv(PL_linestr, "");
2397 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2398 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2399 PL_doextract = FALSE;
a0d0e21e
LW
2400 }
2401 }
463ee0b2 2402 incline(s);
3280af22
NIS
2403 } while (PL_doextract);
2404 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2405 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2406 SV *sv = NEWSV(85,0);
a687059c 2407
93a17b20 2408 sv_upgrade(sv, SVt_PVMG);
3280af22 2409 sv_setsv(sv,PL_linestr);
57843af0 2410 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2411 }
3280af22 2412 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2413 if (CopLINE(PL_curcop) == 1) {
3280af22 2414 while (s < PL_bufend && isSPACE(*s))
79072805 2415 s++;
a0d0e21e 2416 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2417 s++;
44a8e56a 2418 d = Nullch;
3280af22 2419 if (!PL_in_eval) {
44a8e56a
PP
2420 if (*s == '#' && *(s+1) == '!')
2421 d = s + 2;
2422#ifdef ALTERNATE_SHEBANG
2423 else {
2424 static char as[] = ALTERNATE_SHEBANG;
2425 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2426 d = s + (sizeof(as) - 1);
2427 }
2428#endif /* ALTERNATE_SHEBANG */
2429 }
2430 if (d) {
b8378b72 2431 char *ipath;
774d564b 2432 char *ipathend;
b8378b72 2433
774d564b 2434 while (isSPACE(*d))
b8378b72
CS
2435 d++;
2436 ipath = d;
774d564b
PP
2437 while (*d && !isSPACE(*d))
2438 d++;
2439 ipathend = d;
2440
2441#ifdef ARG_ZERO_IS_SCRIPT
2442 if (ipathend > ipath) {
2443 /*
2444 * HP-UX (at least) sets argv[0] to the script name,
2445 * which makes $^X incorrect. And Digital UNIX and Linux,
2446 * at least, set argv[0] to the basename of the Perl
2447 * interpreter. So, having found "#!", we'll set it right.
2448 */
2449 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2450 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2451 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2452 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2453 SvSETMAGIC(x);
2454 }
774d564b 2455 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2456 }
774d564b 2457#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2458
2459 /*
2460 * Look for options.
2461 */
748a9306 2462 d = instr(s,"perl -");
84e30d1a 2463 if (!d) {
748a9306 2464 d = instr(s,"perl");
84e30d1a
GS
2465#if defined(DOSISH)
2466 /* avoid getting into infinite loops when shebang
2467 * line contains "Perl" rather than "perl" */
2468 if (!d) {
2469 for (d = ipathend-4; d >= ipath; --d) {
2470 if ((*d == 'p' || *d == 'P')
2471 && !ibcmp(d, "perl", 4))
2472 {
2473 break;
2474 }
2475 }
2476 if (d < ipath)
2477 d = Nullch;
2478 }
2479#endif
2480 }
44a8e56a
PP
2481#ifdef ALTERNATE_SHEBANG
2482 /*
2483 * If the ALTERNATE_SHEBANG on this system starts with a
2484 * character that can be part of a Perl expression, then if
2485 * we see it but not "perl", we're probably looking at the
2486 * start of Perl code, not a request to hand off to some
2487 * other interpreter. Similarly, if "perl" is there, but
2488 * not in the first 'word' of the line, we assume the line
2489 * contains the start of the Perl program.
44a8e56a
PP
2490 */
2491 if (d && *s != '#') {
774d564b 2492 char *c = ipath;
44a8e56a
PP
2493 while (*c && !strchr("; \t\r\n\f\v#", *c))
2494 c++;
2495 if (c < d)
2496 d = Nullch; /* "perl" not in first word; ignore */
2497 else
2498 *s = '#'; /* Don't try to parse shebang line */
2499 }
774d564b 2500#endif /* ALTERNATE_SHEBANG */
748a9306 2501 if (!d &&
44a8e56a 2502 *s == '#' &&
774d564b 2503 ipathend > ipath &&
3280af22 2504 !PL_minus_c &&
748a9306 2505 !instr(s,"indir") &&
3280af22 2506 instr(PL_origargv[0],"perl"))
748a9306 2507 {
9f68db38 2508 char **newargv;
9f68db38 2509
774d564b
PP
2510 *ipathend = '\0';
2511 s = ipathend + 1;
3280af22 2512 while (s < PL_bufend && isSPACE(*s))
9f68db38 2513 s++;
3280af22
NIS
2514 if (s < PL_bufend) {
2515 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2516 newargv[1] = s;
3280af22 2517 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2518 s++;
2519 *s = '\0';
3280af22 2520 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2521 }
2522 else
3280af22 2523 newargv = PL_origargv;
774d564b 2524 newargv[0] = ipath;
80252599 2525 PerlProc_execv(ipath, newargv);
cea2e8a9 2526 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2527 }
748a9306 2528 if (d) {
3280af22
NIS
2529 U32 oldpdb = PL_perldb;
2530 bool oldn = PL_minus_n;
2531 bool oldp = PL_minus_p;
748a9306
LW
2532
2533 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2534 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2535
2536 if (*d++ == '-') {
8cc95fdb
PP
2537 do {
2538 if (*d == 'M' || *d == 'm') {
2539 char *m = d;
2540 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2541 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2542 (int)(d - m), m);
2543 }
2544 d = moreswitches(d);
2545 } while (d);
84902520 2546 if (PERLDB_LINE && !oldpdb ||
3280af22 2547 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b
PP
2548 /* if we have already added "LINE: while (<>) {",
2549 we must not do it again */
748a9306 2550 {
3280af22
NIS
2551 sv_setpv(PL_linestr, "");
2552 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2553 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2554 PL_preambled = FALSE;
84902520 2555 if (PERLDB_LINE)
3280af22 2556 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2557 goto retry;
2558 }
a0d0e21e 2559 }
79072805 2560 }
9f68db38 2561 }
79072805 2562 }
3280af22
NIS
2563 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2564 PL_bufptr = s;
2565 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2566 return yylex();
ae986130 2567 }
378cc40b 2568 goto retry;
4fdae800 2569 case '\r':
6a27c188 2570#ifdef PERL_STRICT_CR
cea2e8a9
GS
2571 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2572 Perl_croak(aTHX_
54310121 2573 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2574#endif
4fdae800 2575 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2576 s++;
2577 goto retry;
378cc40b 2578 case '#':
e929a76b 2579 case '\n':
3280af22
NIS
2580 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2581 d = PL_bufend;
a687059c 2582 while (s < d && *s != '\n')
378cc40b 2583 s++;
0f85fab0 2584 if (s < d)
378cc40b 2585 s++;
463ee0b2 2586 incline(s);
3280af22
NIS
2587 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2588 PL_bufptr = s;
2589 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2590 return yylex();
a687059c 2591 }
378cc40b 2592 }
a687059c 2593 else {
378cc40b 2594 *s = '\0';
3280af22 2595 PL_bufend = s;
a687059c 2596 }
378cc40b
LW
2597 goto retry;
2598 case '-':
79072805 2599 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2600 s++;
3280af22 2601 PL_bufptr = s;
748a9306
LW
2602 tmp = *s++;
2603
3280af22 2604 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2605 s++;
2606
2607 if (strnEQ(s,"=>",2)) {
3280af22 2608 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2609 OPERATOR('-'); /* unary minus */
2610 }
3280af22
NIS
2611 PL_last_uni = PL_oldbufptr;
2612 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2613 switch (tmp) {
79072805
LW
2614 case 'r': FTST(OP_FTEREAD);
2615 case 'w': FTST(OP_FTEWRITE);
2616 case 'x': FTST(OP_FTEEXEC);
2617 case 'o': FTST(OP_FTEOWNED);
2618 case 'R': FTST(OP_FTRREAD);
2619 case 'W': FTST(OP_FTRWRITE);
2620 case 'X': FTST(OP_FTREXEC);
2621 case 'O': FTST(OP_FTROWNED);
2622 case 'e': FTST(OP_FTIS);
2623 case 'z': FTST(OP_FTZERO);
2624 case 's': FTST(OP_FTSIZE);
2625 case 'f': FTST(OP_FTFILE);
2626 case 'd': FTST(OP_FTDIR);
2627 case 'l': FTST(OP_FTLINK);
2628 case 'p': FTST(OP_FTPIPE);
2629 case 'S': FTST(OP_FTSOCK);
2630 case 'u': FTST(OP_FTSUID);
2631 case 'g': FTST(OP_FTSGID);
2632 case 'k': FTST(OP_FTSVTX);
2633 case 'b': FTST(OP_FTBLK);
2634 case 'c': FTST(OP_FTCHR);
2635 case 't': FTST(OP_FTTTY);
2636 case 'T': FTST(OP_FTTEXT);
2637 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2638 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2639 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2640 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2641 default:
cea2e8a9 2642 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2643 break;
2644 }
2645 }
a687059c
LW
2646 tmp = *s++;
2647 if (*s == tmp) {
2648 s++;
3280af22 2649 if (PL_expect == XOPERATOR)
79072805
LW
2650 TERM(POSTDEC);
2651 else
2652 OPERATOR(PREDEC);
2653 }
2654 else if (*s == '>') {
2655 s++;
2656 s = skipspace(s);
7e2040f0 2657 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2658 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2659 TOKEN(ARROW);
79072805 2660 }
748a9306
LW
2661 else if (*s == '$')
2662 OPERATOR(ARROW);
463ee0b2 2663 else
748a9306 2664 TERM(ARROW);
a687059c 2665 }
3280af22 2666 if (PL_expect == XOPERATOR)
79072805
LW
2667 Aop(OP_SUBTRACT);
2668 else {
3280af22 2669 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2670 check_uni();
79072805 2671 OPERATOR('-'); /* unary minus */
2f3197b3 2672 }
79072805 2673
378cc40b 2674 case '+':
a687059c
LW
2675 tmp = *s++;
2676 if (*s == tmp) {
378cc40b 2677 s++;
3280af22 2678 if (PL_expect == XOPERATOR)
79072805
LW
2679 TERM(POSTINC);
2680 else
2681 OPERATOR(PREINC);
378cc40b 2682 }
3280af22 2683 if (PL_expect == XOPERATOR)
79072805
LW
2684 Aop(OP_ADD);
2685 else {
3280af22 2686 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2687 check_uni();
a687059c 2688 OPERATOR('+');
2f3197b3 2689 }
a687059c 2690
378cc40b 2691 case '*':
3280af22
NIS
2692 if (PL_expect != XOPERATOR) {
2693 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2694 PL_expect = XOPERATOR;
2695 force_ident(PL_tokenbuf, '*');
2696 if (!*PL_tokenbuf)
a0d0e21e 2697 PREREF('*');
79072805 2698 TERM('*');
a687059c 2699 }
79072805
LW
2700 s++;
2701 if (*s == '*') {
a687059c 2702 s++;
79072805 2703 PWop(OP_POW);
a687059c 2704 }
79072805
LW
2705 Mop(OP_MULTIPLY);
2706
378cc40b 2707 case '%':
3280af22 2708 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2709 ++s;
2710 Mop(OP_MODULO);
a687059c 2711 }
3280af22
NIS
2712 PL_tokenbuf[0] = '%';
2713 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2714 if (!PL_tokenbuf[1]) {
2715 if (s == PL_bufend)
bbce6d69
PP
2716 yyerror("Final % should be \\% or %name");
2717 PREREF('%');
a687059c 2718 }
3280af22 2719 PL_pending_ident = '%';
bbce6d69 2720 TERM('%');
a687059c 2721
378cc40b 2722 case '^':
79072805 2723 s++;
a0d0e21e 2724 BOop(OP_BIT_XOR);
79072805 2725 case '[':
3280af22 2726 PL_lex_brackets++;
79072805 2727 /* FALL THROUGH */
378cc40b 2728 case '~':
378cc40b 2729 case ',':
378cc40b
LW
2730 tmp = *s++;
2731 OPERATOR(tmp);
a0d0e21e
LW
2732 case ':':
2733 if (s[1] == ':') {
2734 len = 0;
2735 goto just_a_word;
2736 }
2737 s++;
09bef843
SB
2738 switch (PL_expect) {
2739 OP *attrs;
2740 case XOPERATOR:
2741 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2742 break;
2743 PL_bufptr = s; /* update in case we back off */
2744 goto grabattrs;
2745 case XATTRBLOCK:
2746 PL_expect = XBLOCK;
2747 goto grabattrs;
2748 case XATTRTERM:
2749 PL_expect = XTERMBLOCK;
2750 grabattrs:
2751 s = skipspace(s);
2752 attrs = Nullop;
7e2040f0 2753 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2754 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2755 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2756 if (tmp < 0) tmp = -tmp;
2757 switch (tmp) {
2758 case KEY_or:
2759 case KEY_and:
2760 case KEY_for:
2761 case KEY_unless:
2762 case KEY_if:
2763 case KEY_while:
2764 case KEY_until:
2765 goto got_attrs;
2766 default:
2767 break;
2768 }
2769 }
09bef843
SB
2770 if (*d == '(') {
2771 d = scan_str(d,TRUE,TRUE);
2772 if (!d) {
2773 if (PL_lex_stuff) {
2774 SvREFCNT_dec(PL_lex_stuff);
2775 PL_lex_stuff = Nullsv;
2776 }
2777 /* MUST advance bufptr here to avoid bogus
2778 "at end of line" context messages from yyerror().
2779 */
2780 PL_bufptr = s + len;
2781 yyerror("Unterminated attribute parameter in attribute list");
2782 if (attrs)
2783 op_free(attrs);
2784 return 0; /* EOF indicator */
2785 }
2786 }
2787 if (PL_lex_stuff) {
2788 SV *sv = newSVpvn(s, len);
2789 sv_catsv(sv, PL_lex_stuff);
2790 attrs = append_elem(OP_LIST, attrs,
2791 newSVOP(OP_CONST, 0, sv));
2792 SvREFCNT_dec(PL_lex_stuff);
2793 PL_lex_stuff = Nullsv;
2794 }
2795 else {
2796 attrs = append_elem(OP_LIST, attrs,
2797 newSVOP(OP_CONST, 0,
2798 newSVpvn(s, len)));
2799 }
2800 s = skipspace(d);
0120eecf 2801 if (*s == ':' && s[1] != ':')
09bef843 2802 s = skipspace(s+1);
0120eecf
GS
2803 else if (s == d)
2804 break; /* require real whitespace or :'s */
09bef843 2805 }
f9829d6b
GS
2806 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2807 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2808 char q = ((*s == '\'') ? '"' : '\'');
2809 /* If here for an expression, and parsed no attrs, back off. */
2810 if (tmp == '=' && !attrs) {
2811 s = PL_bufptr;
2812 break;
2813 }
2814 /* MUST advance bufptr here to avoid bogus "at end of line"
2815 context messages from yyerror().
2816 */
2817 PL_bufptr = s;
2818 if (!*s)
2819 yyerror("Unterminated attribute list");
2820 else
2821 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2822 q, *s, q));
2823 if (attrs)
2824 op_free(attrs);
2825 OPERATOR(':');
2826 }
f9829d6b 2827 got_attrs:
09bef843
SB
2828 if (attrs) {
2829 PL_nextval[PL_nexttoke].opval = attrs;
2830 force_next(THING);
2831 }
2832 TOKEN(COLONATTR);
2833 }
a0d0e21e 2834 OPERATOR(':');
8990e307
LW
2835 case '(':
2836 s++;
3280af22
NIS
2837 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2838 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2839 else
3280af22 2840 PL_expect = XTERM;
a0d0e21e 2841 TOKEN('(');
378cc40b 2842 case ';':
57843af0
GS
2843 if (CopLINE(PL_curcop) < PL_copline)
2844 PL_copline = CopLINE(PL_curcop);
378cc40b
LW
2845 tmp = *s++;
2846 OPERATOR(tmp);
2847 case ')':
378cc40b 2848 tmp = *s++;
16d20bd9
AD
2849 s = skipspace(s);
2850 if (*s == '{')
2851 PREBLOCK(tmp);
378cc40b 2852 TERM(tmp);
79072805
LW
2853 case ']':
2854 s++;
3280af22 2855 if (PL_lex_brackets <= 0)
d98d5fff 2856 yyerror("Unmatched right square bracket");
463ee0b2 2857 else
3280af22
NIS
2858 --PL_lex_brackets;
2859 if (PL_lex_state == LEX_INTERPNORMAL) {
2860 if (PL_lex_brackets == 0) {
a0d0e21e 2861 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2862 PL_lex_state = LEX_INTERPEND;
79072805
LW
2863 }
2864 }
4633a7c4 2865 TERM(']');
79072805
LW
2866 case '{':
2867 leftbracket:
79072805 2868 s++;
3280af22
NIS
2869 if (PL_lex_brackets > 100) {
2870 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2871 if (newlb != PL_lex_brackstack) {
8990e307 2872 SAVEFREEPV(newlb);
3280af22 2873 PL_lex_brackstack = newlb;
8990e307
LW
2874 }
2875 }
3280af22 2876 switch (PL_expect) {
a0d0e21e 2877 case XTERM:
3280af22 2878 if (PL_lex_formbrack) {
a0d0e21e
LW
2879 s--;
2880 PRETERMBLOCK(DO);
2881 }
3280af22
NIS
2882 if (PL_oldoldbufptr == PL_last_lop)
2883 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2884 else
3280af22 2885 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2886 OPERATOR(HASHBRACK);
a0d0e21e 2887 case XOPERATOR:
3280af22 2888 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2889 s++;
44a8e56a 2890 d = s;
3280af22
NIS
2891 PL_tokenbuf[0] = '\0';
2892 if (d < PL_bufend && *d == '-') {
2893 PL_tokenbuf[0] = '-';
44a8e56a 2894 d++;
3280af22 2895 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a
PP
2896 d++;
2897 }
7e2040f0 2898 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 2899 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2900 FALSE, &len);
3280af22 2901 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2902 d++;
2903 if (*d == '}') {
3280af22 2904 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
2905 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2906 if (minus)
2907 force_next('-');
748a9306
LW
2908 }
2909 }
2910 /* FALL THROUGH */
09bef843 2911 case XATTRBLOCK:
748a9306 2912 case XBLOCK:
3280af22
NIS
2913 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2914 PL_expect = XSTATE;
a0d0e21e 2915 break;
09bef843 2916 case XATTRTERM:
a0d0e21e 2917 case XTERMBLOCK:
3280af22
NIS
2918 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2919 PL_expect = XSTATE;
a0d0e21e
LW
2920 break;
2921 default: {
2922 char *t;
3280af22
NIS
2923 if (PL_oldoldbufptr == PL_last_lop)
2924 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2925 else
3280af22 2926 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2927 s = skipspace(s);
09ecc4b6 2928 if (*s == '}')
a0d0e21e 2929 OPERATOR(HASHBRACK);
b8a4b1be
GS
2930 /* This hack serves to disambiguate a pair of curlies
2931 * as being a block or an anon hash. Normally, expectation
2932 * determines that, but in cases where we're not in a
2933 * position to expect anything in particular (like inside
2934 * eval"") we have to resolve the ambiguity. This code
2935 * covers the case where the first term in the curlies is a
2936 * quoted string. Most other cases need to be explicitly
2937 * disambiguated by prepending a `+' before the opening
2938 * curly in order to force resolution as an anon hash.
2939 *
2940 * XXX should probably propagate the outer expectation
2941 * into eval"" to rely less on this hack, but that could
2942 * potentially break current behavior of eval"".
2943 * GSAR 97-07-21
2944 */
2945 t = s;
2946 if (*s == '\'' || *s == '"' || *s == '`') {
2947 /* common case: get past first string, handling escapes */
3280af22 2948 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2949 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2950 t++;
2951 t++;
a0d0e21e 2952 }
b8a4b1be 2953 else if (*s == 'q') {
3280af22 2954 if (++t < PL_bufend
b8a4b1be 2955 && (!isALNUM(*t)
3280af22 2956 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
2957 && !isALNUM(*t))))
2958 {
b8a4b1be
GS
2959 char *tmps;
2960 char open, close, term;
2961 I32 brackets = 1;
2962
3280af22 2963 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
2964 t++;
2965 term = *t;
2966 open = term;
2967 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
2968 term = tmps[5];
2969 close = term;
2970 if (open == close)
3280af22
NIS
2971 for (t++; t < PL_bufend; t++) {
2972 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 2973 t++;
6d07e5e9 2974 else if (*t == open)
b8a4b1be
GS
2975 break;
2976 }
2977 else
3280af22
NIS
2978 for (t++; t < PL_bufend; t++) {
2979 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 2980 t++;
6d07e5e9 2981 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
2982 break;
2983 else if (*t == open)
2984 brackets++;
2985 }
2986 }
2987 t++;
a0d0e21e 2988 }
7e2040f0 2989 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 2990 t += UTF8SKIP(t);
7e2040f0 2991 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 2992 t += UTF8SKIP(t);
a0d0e21e 2993 }
3280af22 2994 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 2995 t++;
b8a4b1be
GS
2996 /* if comma follows first term, call it an anon hash */
2997 /* XXX it could be a comma expression with loop modifiers */
3280af22 2998 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 2999 || (*t == '=' && t[1] == '>')))
a0d0e21e 3000 OPERATOR(HASHBRACK);
3280af22 3001 if (PL_expect == XREF)
4e4e412b 3002 PL_expect = XTERM;
a0d0e21e 3003 else {
3280af22
NIS
3004 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3005 PL_expect = XSTATE;
a0d0e21e 3006 }
8990e307 3007 }
a0d0e21e 3008 break;
463ee0b2 3009 }
57843af0 3010 yylval.ival = CopLINE(PL_curcop);
79072805 3011 if (isSPACE(*s) || *s == '#')
3280af22 3012 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3013 TOKEN('{');
378cc40b 3014 case '}':
79072805
LW
3015 rightbracket:
3016 s++;
3280af22 3017 if (PL_lex_brackets <= 0)
d98d5fff 3018 yyerror("Unmatched right curly bracket");
463ee0b2 3019 else
3280af22
NIS
3020 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3021 if (PL_lex_brackets < PL_lex_formbrack)
3022 PL_lex_formbrack = 0;
3023 if (PL_lex_state == LEX_INTERPNORMAL) {
3024 if (PL_lex_brackets == 0) {
9059aa12
LW
3025 if (PL_expect & XFAKEBRACK) {
3026 PL_expect &= XENUMMASK;
3280af22
NIS
3027 PL_lex_state = LEX_INTERPEND;
3028 PL_bufptr = s;
cea2e8a9 3029 return yylex(); /* ignore fake brackets */
79072805 3030 }
fa83b5b6 3031 if (*s == '-' && s[1] == '>')
3280af22 3032 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3033 else if (*s != '[' && *s != '{')
3280af22 3034 PL_lex_state = LEX_INTERPEND;
79072805
LW
3035 }
3036 }
9059aa12
LW
3037 if (PL_expect & XFAKEBRACK) {
3038 PL_expect &= XENUMMASK;
3280af22 3039 PL_bufptr = s;
cea2e8a9 3040 return yylex(); /* ignore fake brackets */
748a9306 3041 }
79072805
LW
3042 force_next('}');
3043 TOKEN(';');
378cc40b
LW
3044 case '&':
3045 s++;
3046 tmp = *s++;
3047 if (tmp == '&')
a0d0e21e 3048 AOPERATOR(ANDAND);
378cc40b 3049 s--;
3280af22 3050 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3051 if (ckWARN(WARN_SEMICOLON)
3052 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3053 {
57843af0 3054 CopLINE_dec(PL_curcop);
cea2e8a9 3055 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3056 CopLINE_inc(PL_curcop);
463ee0b2 3057 }
79072805 3058 BAop(OP_BIT_AND);
463ee0b2 3059 }
79072805 3060
3280af22
NIS
3061 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3062 if (*PL_tokenbuf) {
3063 PL_expect = XOPERATOR;
3064 force_ident(PL_tokenbuf, '&');
463ee0b2 3065 }
79072805
LW
3066 else
3067 PREREF('&');
c07a80fd 3068 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3069 TERM('&');
3070
378cc40b
LW
3071 case '|':
3072 s++;
3073 tmp = *s++;
3074 if (tmp == '|')
a0d0e21e 3075 AOPERATOR(OROR);
378cc40b 3076 s--;
79072805 3077 BOop(OP_BIT_OR);
378cc40b
LW
3078 case '=':
3079 s++;
3080 tmp = *s++;
3081 if (tmp == '=')
79072805
LW
3082 Eop(OP_EQ);
3083 if (tmp == '>')
3084 OPERATOR(',');
378cc40b 3085 if (tmp == '~')
79072805 3086 PMop(OP_MATCH);
599cee73 3087 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3088 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3089 s--;
3280af22
NIS
3090 if (PL_expect == XSTATE && isALPHA(tmp) &&
3091 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3092 {
3280af22
NIS
3093 if (PL_in_eval && !PL_rsfp) {
3094 d = PL_bufend;
a5f75d66
AD
3095 while (s < d) {
3096 if (*s++ == '\n') {
3097 incline(s);
3098 if (strnEQ(s,"=cut",4)) {
3099 s = strchr(s,'\n');
3100 if (s)
3101 s++;
3102 else
3103 s = d;
3104 incline(s);
3105 goto retry;
3106 }
3107 }
3108 }
3109 goto retry;
3110 }
3280af22
NIS
3111 s = PL_bufend;
3112 PL_doextract = TRUE;
a0d0e21e
LW
3113 goto retry;
3114 }
3280af22 3115 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3116 char *t;
51882d45 3117#ifdef PERL_STRICT_CR
a0d0e21e 3118 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
3119#else
3120 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
3121#endif
a0d0e21e
LW
3122 if (*t == '\n' || *t == '#') {
3123 s--;
3280af22 3124 PL_expect = XBLOCK;
a0d0e21e
LW
3125 goto leftbracket;
3126 }
79072805 3127 }
a0d0e21e
LW
3128 yylval.ival = 0;
3129 OPERATOR(ASSIGNOP);
378cc40b
LW
3130 case '!':
3131 s++;
3132 tmp = *s++;
3133 if (tmp == '=')
79072805 3134 Eop(OP_NE);
378cc40b 3135 if (tmp == '~')
79072805 3136 PMop(OP_NOT);
378cc40b
LW
3137 s--;
3138 OPERATOR('!');
3139 case '<':
3280af22 3140 if (PL_expect != XOPERATOR) {
93a17b20 3141 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3142 check_uni();
79072805
LW
3143 if (s[1] == '<')
3144 s = scan_heredoc(s);
3145 else
3146 s = scan_inputsymbol(s);
3147 TERM(sublex_start());
378cc40b
LW
3148 }
3149 s++;
3150 tmp = *s++;
3151 if (tmp == '<')
79072805 3152 SHop(OP_LEFT_SHIFT);
395c3793
LW
3153 if (tmp == '=') {
3154 tmp = *s++;
3155 if (tmp == '>')
79072805 3156 Eop(OP_NCMP);
395c3793 3157 s--;
79072805 3158 Rop(OP_LE);
395c3793 3159 }
378cc40b 3160 s--;
79072805 3161 Rop(OP_LT);
378cc40b
LW
3162 case '>':
3163 s++;
3164 tmp = *s++;
3165 if (tmp == '>')
79072805 3166 SHop(OP_RIGHT_SHIFT);
378cc40b 3167 if (tmp == '=')
79072805 3168 Rop(OP_GE);
378cc40b 3169 s--;
79072805 3170 Rop(OP_GT);
378cc40b
LW
3171
3172 case '$':
bbce6d69
PP
3173 CLINE;
3174
3280af22
NIS
3175 if (PL_expect == XOPERATOR) {
3176 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3177 PL_expect = XTERM;
a0d0e21e 3178 depcom();
bbce6d69 3179 return ','; /* grandfather non-comma-format format */
a0d0e21e 3180 }
8990e307 3181 }
a0d0e21e 3182
7e2040f0 3183 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3184 PL_tokenbuf[0] = '@';
376b8730
SM
3185 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3186 sizeof PL_tokenbuf - 1, FALSE);
3187 if (PL_expect == XOPERATOR)
3188 no_op("Array length", s);
3280af22 3189 if (!PL_tokenbuf[1])
a0d0e21e 3190 PREREF(DOLSHARP);
3280af22
NIS
3191 PL_expect = XOPERATOR;
3192 PL_pending_ident = '#';
463ee0b2 3193 TOKEN(DOLSHARP);
79072805 3194 }
bbce6d69 3195
3280af22 3196 PL_tokenbuf[0] = '$';
376b8730
SM
3197 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3198 sizeof PL_tokenbuf - 1, FALSE);
3199 if (PL_expect == XOPERATOR)
3200 no_op("Scalar", s);
3280af22
NIS
3201 if (!PL_tokenbuf[1]) {
3202 if (s == PL_bufend)
bbce6d69
PP
3203 yyerror("Final $ should be \\$ or $name");
3204 PREREF('$');
8990e307 3205 }
a0d0e21e 3206
bbce6d69 3207 /* This kludge not intended to be bulletproof. */
3280af22 3208 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3209 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3210 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69
PP
3211 yylval.opval->op_private = OPpCONST_ARYBASE;
3212 TERM(THING);
3213 }
3214
ff68c719 3215 d = s;
69d2bceb 3216 tmp = (I32)*s;
3280af22 3217 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3218 s = skipspace(s);
3219
3280af22 3220 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3221 char *t;
3222 if (*s == '[') {
3280af22 3223 PL_tokenbuf[0] = '@';
599cee73 3224 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3225 for(t = s + 1;
7e2040f0 3226 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3227 t++) ;
a0d0e21e 3228 if (*t++ == ',') {
3280af22
NIS
3229 PL_bufptr = skipspace(PL_bufptr);
3230 while (t < PL_bufend && *t != ']')
bbce6d69 3231 t++;
cea2e8a9 3232 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3233 "Multidimensional syntax %.*s not supported",
3234 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3235 }
3236 }
bbce6d69
PP
3237 }
3238 else if (*s == '{') {
3280af22 3239 PL_tokenbuf[0] = '%';
599cee73 3240 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3241 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3242 {
3280af22 3243 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3244 STRLEN len;
3245 for (t++; isSPACE(*t); t++) ;
7e2040f0 3246 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3247 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3248 for (; isSPACE(*t); t++) ;
864dbfa3 3249 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3250 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3251 "You need to quote \"%s\"", tmpbuf);
748a9306 3252 }
93a17b20
LW
3253 }
3254 }
2f3197b3 3255 }
bbce6d69 3256
3280af22 3257 PL_expect = XOPERATOR;
69d2bceb 3258 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3259 bool islop = (PL_last_lop == PL_oldoldbufptr);
3260 if (!islop || PL_last_lop_op == OP_GREPSTART)
3261 PL_expect = XOPERATOR;
bbce6d69 3262 else if (strchr("$@\"'`q", *s))
3280af22 3263 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3264 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3265 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3266 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3267 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3268 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3269 if (tmp = keyword(tmpbuf, len)) {
3270 /* binary operators exclude handle interpretations */
3271 switch (tmp) {
3272 case -KEY_x:
3273 case -KEY_eq:
3274 case -KEY_ne:
3275 case -KEY_gt:
3276 case -KEY_lt:
3277 case -KEY_ge:
3278 case -KEY_le:
3279 case -KEY_cmp:
3280 break;
3281 default:
3280af22 3282 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3283 break;
3284 }
3285 }
68dc0745
PP
3286 else {
3287 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3288 if (gv && GvCVu(gv))
3280af22 3289 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3290 }
93a17b20 3291 }
bbce6d69 3292 else if (isDIGIT(*s))
3280af22 3293 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3294 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3295 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3296 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3297 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3298 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3299 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3300 }
3280af22 3301 PL_pending_ident = '$';
79072805 3302 TOKEN('$');
378cc40b
LW
3303
3304 case '@':
3280af22 3305 if (PL_expect == XOPERATOR)
bbce6d69 3306 no_op("Array", s);
3280af22
NIS
3307 PL_tokenbuf[0] = '@';
3308 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3309 if (!PL_tokenbuf[1]) {
3310 if (s == PL_bufend)
bbce6d69
PP
3311 yyerror("Final @ should be \\@ or @name");
3312 PREREF('@');
3313 }
3280af22 3314 if (PL_lex_state == LEX_NORMAL)
ff68c719 3315 s = skipspace(s);
3280af22 3316 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3317 if (*s == '{')
3280af22 3318 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3319
3320 /* Warn about @ where they meant $. */
599cee73 3321 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3322 if (*s == '[' || *s == '{') {
3323 char *t = s + 1;
7e2040f0 3324 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3325 t++;
3326 if (*t == '}' || *t == ']') {
3327 t++;
3280af22 3328 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3329 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3330 "Scalar value %.*s better written as $%.*s",
3280af22 3331 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3332 }
93a17b20
LW
3333 }
3334 }
463ee0b2 3335 }
3280af22 3336 PL_pending_ident = '@';
79072805 3337 TERM('@');
378cc40b
LW
3338
3339 case '/': /* may either be division or pattern */
3340 case '?': /* may either be conditional or pattern */
3280af22 3341 if (PL_expect != XOPERATOR) {
c277df42 3342 /* Disable warning on "study /blah/" */
3280af22
NIS
3343 if (PL_oldoldbufptr == PL_last_uni
3344 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3345 || memNE(PL_last_uni, "study", 5)
3346 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3347 check_uni();
8782bef2 3348 s = scan_pat(s,OP_MATCH);
79072805 3349 TERM(sublex_start());
378cc40b
LW
3350 }
3351 tmp = *s++;
a687059c 3352 if (tmp == '/')
79072805 3353 Mop(OP_DIVIDE);
378cc40b
LW
3354 OPERATOR(tmp);
3355
3356 case '.':
51882d45
GS
3357 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3358#ifdef PERL_STRICT_CR
3359 && s[1] == '\n'
3360#else
3361 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3362#endif
3363 && (s == PL_linestart || s[-1] == '\n') )
3364 {
3280af22
NIS
3365 PL_lex_formbrack = 0;
3366 PL_expect = XSTATE;
79072805
LW
3367 goto rightbracket;
3368 }
3280af22 3369 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3370 tmp = *s++;
a687059c
LW
3371 if (*s == tmp) {
3372 s++;
2f3197b3
LW
3373 if (*s == tmp) {
3374 s++;
79072805 3375 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3376 }
3377 else
79072805 3378 yylval.ival = 0;
378cc40b 3379 OPERATOR(DOTDOT);
a687059c 3380 }
3280af22 3381 if (PL_expect != XOPERATOR)
2f3197b3 3382 check_uni();
79072805 3383 Aop(OP_CONCAT);
378cc40b
LW
3384 }
3385 /* FALL THROUGH */
3386 case '0': case '1': case '2': case '3': case '4':
3387 case '5': case '6': case '7': case '8': case '9':
79072805 3388 s = scan_num(s);
3280af22 3389 if (PL_expect == XOPERATOR)
8990e307 3390 no_op("Number",s);
79072805
LW
3391 TERM(THING);
3392
3393 case '\'':
09bef843 3394 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3395 if (PL_expect == XOPERATOR) {
3396 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3397 PL_expect = XTERM;
a0d0e21e
LW
3398 depcom();
3399 return ','; /* grandfather non-comma-format format */
3400 }
463ee0b2 3401 else
8990e307 3402 no_op("String",s);
463ee0b2 3403 }
79072805 3404 if (!s)
85e6fe83 3405 missingterm((char*)0);
79072805
LW
3406 yylval.ival = OP_CONST;
3407 TERM(sublex_start());
3408
3409 case '"':
09bef843 3410 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3411 if (PL_expect == XOPERATOR) {
3412 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3413 PL_expect = XTERM;
a0d0e21e
LW
3414 depcom();
3415 return ','; /* grandfather non-comma-format format */
3416 }
463ee0b2 3417 else
8990e307 3418 no_op("String",s);
463ee0b2 3419 }
79072805 3420 if (!s)
85e6fe83 3421 missingterm((char*)0);
4633a7c4 3422 yylval.ival = OP_CONST;
3280af22 3423 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3424 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3425 yylval.ival = OP_STRINGIFY;
3426 break;
3427 }
3428 }
79072805
LW
3429 TERM(sublex_start());
3430
3431 case '`':
09bef843 3432 s = scan_str(s,FALSE,FALSE);
3280af22 3433 if (PL_expect == XOPERATOR)
8990e307 3434 no_op("Backticks",s);
79072805 3435 if (!s)
85e6fe83 3436 missingterm((char*)0);
79072805
LW
3437 yylval.ival = OP_BACKTICK;
3438 set_csh();
3439 TERM(sublex_start());
3440
3441 case '\\':
3442 s++;
599cee73 3443 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3444 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3445 *s, *s);
3280af22 3446 if (PL_expect == XOPERATOR)
8990e307 3447 no_op("Backslash",s);
79072805
LW
3448 OPERATOR(REFGEN);
3449
a7cb1f99
GS
3450 case 'v':
3451 if (isDIGIT(s[1]) && PL_expect == XTERM) {
3452 char *start = s;
3453 start++;
3454 start++;
3455 while (isDIGIT(*start))
3456 start++;
3457 if (*start == '.' && isDIGIT(start[1])) {
3458 s = scan_num(s);
3459 TERM(THING);
3460 }
3461 }
3462 goto keylookup;
79072805 3463 case 'x':
3280af22 3464 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3465 s++;
3466 Mop(OP_REPEAT);
2f3197b3 3467 }
79072805
LW
3468 goto keylookup;
3469
378cc40b 3470 case '_':
79072805
LW
3471 case 'a': case 'A':
3472 case 'b': case 'B':
3473 case 'c': case 'C':
3474 case 'd': case 'D':
3475 case 'e': case 'E':
3476 case 'f': case 'F':
3477 case 'g': case 'G':
3478 case 'h': case 'H':
3479 case 'i': case 'I':
3480 case 'j': case 'J':
3481 case 'k': case 'K':
3482 case 'l': case 'L':
3483 case 'm': case 'M':
3484 case 'n': case 'N':
3485 case 'o': case 'O':
3486 case 'p': case 'P':
3487 case 'q': case 'Q':
3488 case 'r': case 'R':
3489 case 's': case 'S':
3490 case 't': case 'T':
3491 case 'u': case 'U':
a7cb1f99 3492 case 'V':
79072805
LW
3493 case 'w': case 'W':
3494 case 'X':
3495 case 'y': case 'Y':
3496 case 'z': case 'Z':
3497
49dc05e3 3498 keylookup: {
2d8e6c8d 3499 STRLEN n_a;
161b471a
NIS
3500 gv = Nullgv;
3501 gvp = 0;
49dc05e3 3502
3280af22
NIS
3503 PL_bufptr = s;
3504 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01
PP
3505
3506 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3507 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3508 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3509 (PL_tokenbuf[0] == 'q' &&
3510 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01
PP
3511
3512 /* x::* is just a word, unless x is "CORE" */
3280af22 3513 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3514 goto just_a_word;
3515
3643fb5f 3516 d = s;
3280af22 3517 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3518 d++; /* no comments skipped here, or s### is misparsed */
3519
3520 /* Is this a label? */
3280af22
NIS
3521 if (!tmp && PL_expect == XSTATE
3522 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3523 s = d + 1;
3280af22 3524 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01
PP
3525 CLINE;
3526 TOKEN(LABEL);
3643fb5f
CS
3527 }
3528
3529 /* Check for keywords */
3280af22 3530 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3531
3532 /* Is this a word before a => operator? */
748a9306
LW
3533 if (strnEQ(d,"=>",2)) {
3534 CLINE;
3280af22 3535 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3536 yylval.opval->op_private = OPpCONST_BARE;
3537 TERM(WORD);
3538 }
3539
a0d0e21e 3540 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3541 GV *ogv = Nullgv; /* override (winner) */
3542 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3543 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3544 CV *cv;
3280af22 3545 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3546 (cv = GvCVu(gv)))
3547 {
3548 if (GvIMPORTED_CV(gv))
3549 ogv = gv;
3550 else if (! CvMETHOD(cv))
3551 hgv = gv;
3552 }
3553 if (!ogv &&
3280af22
NIS
3554 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3555 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3556 GvCVu(gv) && GvIMPORTED_CV(gv))
3557 {
3558 ogv = gv;
3559 }
3560 }
3561 if (ogv) {
3562 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3563 }
3564 else if (gv && !gvp
3565 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3566 && GvCVu(gv)
3280af22 3567 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3568 {
3569 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3570 }
56f7f34b
CS
3571 else { /* no override */
3572 tmp = -tmp;
3573 gv = Nullgv;
3574 gvp = 0;
4944e2f7
GS
3575 if (ckWARN(WARN_AMBIGUOUS) && hgv
3576 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3577 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3578 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3579 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3580 }
a0d0e21e
LW
3581 }
3582
3583 reserved_word:
3584 switch (tmp) {
79072805
LW
3585
3586 default: /* not a keyword */
93a17b20 3587 just_a_word: {
96e4d5b1 3588 SV *sv;
3280af22 3589 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3590
3591 /* Get the rest if it looks like a package qualifier */