This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
CR-LF support broken for formats
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4eb8286e 3 * Copyright (c) 1991-1999, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
51371543 31
9059aa12
LW
32#define XFAKEBRACK 128
33#define XENUMMASK 127
34
a0ed51b3 35#define UTF (PL_hints & HINT_UTF8)
834a4ddd
LW
36/*
37 * Note: we try to be careful never to call the isXXX_utf8() functions
38 * unless we're pretty sure we've seen the beginning of a UTF-8 character
39 * (that is, the two high bits are set). Otherwise we risk loading in the
40 * heavy-duty SWASHINIT and SWASHGET routines unnecessarily.
41 */
42#define isIDFIRST_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
43 ? isIDFIRST(*(p)) \
44 : isIDFIRST_utf8((U8*)p))
45#define isALNUM_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
46 ? isALNUM(*(p)) \
47 : isALNUM_utf8((U8*)p))
a0ed51b3 48
2b92dfce
GS
49/* In variables name $^X, these are the legal values for X.
50 * 1999-02-27 mjd-perl-patch@plover.com */
51#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
395c3793
LW
72#ifdef I_FCNTL
73#include <fcntl.h>
74#endif
fe14fcc3
LW
75#ifdef I_SYS_FILE
76#include <sys/file.h>
77#endif
395c3793 78
a790bc05
PP
79/* XXX If this causes problems, set i_unistd=undef in the hint file. */
80#ifdef I_UNISTD
81# include <unistd.h> /* Needed for execv() */
82#endif
83
84
79072805
LW
85#ifdef ff_next
86#undef ff_next
d48672a2
LW
87#endif
88
a1a0e61e
TD
89#ifdef USE_PURE_BISON
90YYSTYPE* yylval_pointer = NULL;
91int* yychar_pointer = NULL;
22c35a8c
GS
92# undef yylval
93# undef yychar
e4bfbdd4
JH
94# define yylval (*yylval_pointer)
95# define yychar (*yychar_pointer)
96# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
97# undef yylex
98# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
99#endif
100
79072805 101#include "keywords.h"
fe14fcc3 102
ffb4593c
NT
103/* CLINE is a macro that ensures PL_copline has a sane value */
104
ae986130
LW
105#ifdef CLINE
106#undef CLINE
107#endif
57843af0 108#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 109
ffb4593c
NT
110/*
111 * Convenience functions to return different tokens and prime the
9cbb5ea2 112 * lexer for the next token. They all take an argument.
ffb4593c
NT
113 *
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
2d2e263d 124 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
125 * BOop : bitwise or or xor
126 * BAop : bitwise and
127 * SHop : shift operator
128 * PWop : power operator
9cbb5ea2 129 * PMop : pattern-matching operator
ffb4593c
NT
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
133 * Rop : relational operator <= != gt
134 *
135 * Also see LOP and lop() below.
136 */
137
3280af22
NIS
138#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
139#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
140#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
141#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
142#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
143#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
144#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
145#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
146#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
147#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
148#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
149#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
150#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
151#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
152#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
153#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
154#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
155#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
156#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
157#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 158
a687059c
LW
159/* This bit of chicanery makes a unary function followed by
160 * a parenthesis into a function with one argument, highest precedence.
161 */
2f3197b3 162#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
163 PL_expect = XTERM, \
164 PL_bufptr = s, \
165 PL_last_uni = PL_oldbufptr, \
166 PL_last_lop_op = f, \
a687059c
LW
167 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
168
79072805 169#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
79072805
LW
172 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
173
9f68db38 174/* grandfather return to old style */
3280af22 175#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 176
ffb4593c
NT
177/*
178 * S_ao
179 *
180 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
181 * into an OP_ANDASSIGN or OP_ORASSIGN
182 */
183
76e3520e 184STATIC int
cea2e8a9 185S_ao(pTHX_ int toketype)
a0d0e21e 186{
3280af22
NIS
187 if (*PL_bufptr == '=') {
188 PL_bufptr++;
a0d0e21e
LW
189 if (toketype == ANDAND)
190 yylval.ival = OP_ANDASSIGN;
191 else if (toketype == OROR)
192 yylval.ival = OP_ORASSIGN;
193 toketype = ASSIGNOP;
194 }
195 return toketype;
196}
197
ffb4593c
NT
198/*
199 * S_no_op
200 * When Perl expects an operator and finds something else, no_op
201 * prints the warning. It always prints "<something> found where
202 * operator expected. It prints "Missing semicolon on previous line?"
203 * if the surprise occurs at the start of the line. "do you need to
204 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
205 * where the compiler doesn't know if foo is a method call or a function.
206 * It prints "Missing operator before end of line" if there's nothing
207 * after the missing operator, or "... before <...>" if there is something
208 * after the missing operator.
209 */
210
76e3520e 211STATIC void
cea2e8a9 212S_no_op(pTHX_ char *what, char *s)
463ee0b2 213{
3280af22
NIS
214 char *oldbp = PL_bufptr;
215 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 216
1189a94a
GS
217 if (!s)
218 s = oldbp;
219 else {
220 assert(s >= oldbp);
221 PL_bufptr = s;
222 }
cea2e8a9 223 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 224 if (is_first)
cea2e8a9 225 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
834a4ddd 226 else if (PL_oldoldbufptr && isIDFIRST_lazy(PL_oldoldbufptr)) {
748a9306 227 char *t;
834a4ddd 228 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy(t) || *t == ':'); t++) ;
3280af22 229 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 230 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 231 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
232 }
233 else
cea2e8a9 234 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 235 PL_bufptr = oldbp;
8990e307
LW
236}
237
ffb4593c
NT
238/*
239 * S_missingterm
240 * Complain about missing quote/regexp/heredoc terminator.
241 * If it's called with (char *)NULL then it cauterizes the line buffer.
242 * If we're in a delimited string and the delimiter is a control
243 * character, it's reformatted into a two-char sequence like ^C.
244 * This is fatal.
245 */
246
76e3520e 247STATIC void
cea2e8a9 248S_missingterm(pTHX_ char *s)
8990e307
LW
249{
250 char tmpbuf[3];
251 char q;
252 if (s) {
253 char *nl = strrchr(s,'\n');
d2719217 254 if (nl)
8990e307
LW
255 *nl = '\0';
256 }
9d116dd7
JH
257 else if (
258#ifdef EBCDIC
259 iscntrl(PL_multi_close)
260#else
261 PL_multi_close < 32 || PL_multi_close == 127
262#endif
263 ) {
8990e307 264 *tmpbuf = '^';
3280af22 265 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
266 s = "\\n";
267 tmpbuf[2] = '\0';
268 s = tmpbuf;
269 }
270 else {
3280af22 271 *tmpbuf = PL_multi_close;
8990e307
LW
272 tmpbuf[1] = '\0';
273 s = tmpbuf;
274 }
275 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 276 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 277}
79072805 278
ffb4593c
NT
279/*
280 * Perl_deprecate
ffb4593c
NT
281 */
282
79072805 283void
864dbfa3 284Perl_deprecate(pTHX_ char *s)
a0d0e21e 285{
d008e5eb 286 dTHR;
599cee73 287 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 288 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
289}
290
ffb4593c
NT
291/*
292 * depcom
9cbb5ea2 293 * Deprecate a comma-less variable list.
ffb4593c
NT
294 */
295
76e3520e 296STATIC void
cea2e8a9 297S_depcom(pTHX)
a0d0e21e
LW
298{
299 deprecate("comma-less variable list");
300}
301
ffb4593c 302/*
9cbb5ea2
GS
303 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
304 * utf16-to-utf8-reversed.
ffb4593c
NT
305 */
306
a868473f
NIS
307#ifdef WIN32
308
76e3520e 309STATIC I32
cea2e8a9 310S_win32_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f
NIS
311{
312 I32 count = FILTER_READ(idx+1, sv, maxlen);
313 if (count > 0 && !maxlen)
314 win32_strip_return(sv);
315 return count;
316}
317#endif
318
a0ed51b3 319STATIC I32
cea2e8a9 320S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
321{
322 I32 count = FILTER_READ(idx+1, sv, maxlen);
323 if (count) {
dfe13c55
GS
324 U8* tmps;
325 U8* tend;
326 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 327 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 328 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
329
330 }
331 return count;
332}
333
334STATIC I32
cea2e8a9 335S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
336{
337 I32 count = FILTER_READ(idx+1, sv, maxlen);
338 if (count) {
dfe13c55
GS
339 U8* tmps;
340 U8* tend;
341 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 342 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 343 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
344
345 }
346 return count;
347}
a868473f 348
ffb4593c
NT
349/*
350 * Perl_lex_start
9cbb5ea2
GS
351 * Initialize variables. Uses the Perl save_stack to save its state (for
352 * recursive calls to the parser).
ffb4593c
NT
353 */
354
a0d0e21e 355void
864dbfa3 356Perl_lex_start(pTHX_ SV *line)
79072805 357{
0f15f207 358 dTHR;
8990e307
LW
359 char *s;
360 STRLEN len;
361
3280af22
NIS
362 SAVEI32(PL_lex_dojoin);
363 SAVEI32(PL_lex_brackets);
3280af22
NIS
364 SAVEI32(PL_lex_casemods);
365 SAVEI32(PL_lex_starts);
366 SAVEI32(PL_lex_state);
7766f137 367 SAVEVPTR(PL_lex_inpat);
3280af22 368 SAVEI32(PL_lex_inwhat);
57843af0 369 SAVECOPLINE(PL_curcop);
3280af22
NIS
370 SAVEPPTR(PL_bufptr);
371 SAVEPPTR(PL_bufend);
372 SAVEPPTR(PL_oldbufptr);
373 SAVEPPTR(PL_oldoldbufptr);
374 SAVEPPTR(PL_linestart);
375 SAVESPTR(PL_linestr);
376 SAVEPPTR(PL_lex_brackstack);
377 SAVEPPTR(PL_lex_casestack);
c76ac1ee 378 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
379 SAVESPTR(PL_lex_stuff);
380 SAVEI32(PL_lex_defer);
09bef843 381 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 382 SAVESPTR(PL_lex_repl);
bebdddfc
GS
383 SAVEINT(PL_expect);
384 SAVEINT(PL_lex_expect);
3280af22
NIS
385
386 PL_lex_state = LEX_NORMAL;
387 PL_lex_defer = 0;
388 PL_expect = XSTATE;
389 PL_lex_brackets = 0;
3280af22
NIS
390 New(899, PL_lex_brackstack, 120, char);
391 New(899, PL_lex_casestack, 12, char);
392 SAVEFREEPV(PL_lex_brackstack);
393 SAVEFREEPV(PL_lex_casestack);
394 PL_lex_casemods = 0;
395 *PL_lex_casestack = '\0';
396 PL_lex_dojoin = 0;
397 PL_lex_starts = 0;
398 PL_lex_stuff = Nullsv;
399 PL_lex_repl = Nullsv;
400 PL_lex_inpat = 0;
401 PL_lex_inwhat = 0;
09bef843 402 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
403 PL_linestr = line;
404 if (SvREADONLY(PL_linestr))
405 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
406 s = SvPV(PL_linestr, len);
8990e307 407 if (len && s[len-1] != ';') {
3280af22
NIS
408 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
409 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
410 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 411 }
3280af22
NIS
412 SvTEMP_off(PL_linestr);
413 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
414 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
415 SvREFCNT_dec(PL_rs);
79cb57f6 416 PL_rs = newSVpvn("\n", 1);
3280af22 417 PL_rsfp = 0;
79072805 418}
a687059c 419
ffb4593c
NT
420/*
421 * Perl_lex_end
9cbb5ea2
GS
422 * Finalizer for lexing operations. Must be called when the parser is
423 * done with the lexer.
ffb4593c
NT
424 */
425
463ee0b2 426void
864dbfa3 427Perl_lex_end(pTHX)
463ee0b2 428{
3280af22 429 PL_doextract = FALSE;
463ee0b2
LW
430}
431
ffb4593c
NT
432/*
433 * S_incline
434 * This subroutine has nothing to do with tilting, whether at windmills
435 * or pinball tables. Its name is short for "increment line". It
57843af0 436 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 437 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
438 * # line 500 "foo.pm"
439 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
440 */
441
76e3520e 442STATIC void
cea2e8a9 443S_incline(pTHX_ char *s)
463ee0b2 444{
0f15f207 445 dTHR;
463ee0b2
LW
446 char *t;
447 char *n;
448 char ch;
449 int sawline = 0;
450
57843af0 451 CopLINE_inc(PL_curcop);
463ee0b2
LW
452 if (*s++ != '#')
453 return;
454 while (*s == ' ' || *s == '\t') s++;
455 if (strnEQ(s, "line ", 5)) {
456 s += 5;
457 sawline = 1;
458 }
459 if (!isDIGIT(*s))
460 return;
461 n = s;
462 while (isDIGIT(*s))
463 s++;
464 while (*s == ' ' || *s == '\t')
465 s++;
466 if (*s == '"' && (t = strchr(s+1, '"')))
467 s++;
468 else {
469 if (!sawline)
470 return; /* false alarm */
471 for (t = s; !isSPACE(*t); t++) ;
472 }
473 ch = *t;
474 *t = '\0';
475 if (t - s > 0)
57843af0 476 CopFILE_set(PL_curcop, s);
463ee0b2 477 else
57843af0 478 CopFILE_set(PL_curcop, PL_origfilename);
463ee0b2 479 *t = ch;
57843af0 480 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
481}
482
ffb4593c
NT
483/*
484 * S_skipspace
485 * Called to gobble the appropriate amount and type of whitespace.
486 * Skips comments as well.
487 */
488
76e3520e 489STATIC char *
cea2e8a9 490S_skipspace(pTHX_ register char *s)
a687059c 491{
11343788 492 dTHR;
3280af22
NIS
493 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
494 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
495 s++;
496 return s;
497 }
498 for (;;) {
fd049845 499 STRLEN prevlen;
09bef843
SB
500 SSize_t oldprevlen, oldoldprevlen;
501 SSize_t oldloplen, oldunilen;
60e6418e
GS
502 while (s < PL_bufend && isSPACE(*s)) {
503 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
504 incline(s);
505 }
ffb4593c
NT
506
507 /* comment */
3280af22
NIS
508 if (s < PL_bufend && *s == '#') {
509 while (s < PL_bufend && *s != '\n')
463ee0b2 510 s++;
60e6418e 511 if (s < PL_bufend) {
463ee0b2 512 s++;
60e6418e
GS
513 if (PL_in_eval && !PL_rsfp) {
514 incline(s);
515 continue;
516 }
517 }
463ee0b2 518 }
ffb4593c
NT
519
520 /* only continue to recharge the buffer if we're at the end
521 * of the buffer, we're not reading from a source filter, and
522 * we're in normal lexing mode
523 */
09bef843
SB
524 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
525 PL_lex_state == LEX_FORMLINE)
463ee0b2 526 return s;
ffb4593c
NT
527
528 /* try to recharge the buffer */
9cbb5ea2
GS
529 if ((s = filter_gets(PL_linestr, PL_rsfp,
530 (prevlen = SvCUR(PL_linestr)))) == Nullch)
531 {
532 /* end of file. Add on the -p or -n magic */
3280af22
NIS
533 if (PL_minus_n || PL_minus_p) {
534 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
535 ";}continue{print or die qq(-p destination: $!\\n)" :
536 "");
3280af22
NIS
537 sv_catpv(PL_linestr,";}");
538 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
539 }
540 else
3280af22 541 sv_setpv(PL_linestr,";");
ffb4593c
NT
542
543 /* reset variables for next time we lex */
9cbb5ea2
GS
544 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
545 = SvPVX(PL_linestr);
3280af22 546 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
547
548 /* Close the filehandle. Could be from -P preprocessor,
549 * STDIN, or a regular file. If we were reading code from
550 * STDIN (because the commandline held no -e or filename)
551 * then we don't close it, we reset it so the code can
552 * read from STDIN too.
553 */
554
3280af22
NIS
555 if (PL_preprocess && !PL_in_eval)
556 (void)PerlProc_pclose(PL_rsfp);
557 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
558 PerlIO_clearerr(PL_rsfp);
8990e307 559 else
3280af22
NIS
560 (void)PerlIO_close(PL_rsfp);
561 PL_rsfp = Nullfp;
463ee0b2
LW
562 return s;
563 }
ffb4593c
NT
564
565 /* not at end of file, so we only read another line */
09bef843
SB
566 /* make corresponding updates to old pointers, for yyerror() */
567 oldprevlen = PL_oldbufptr - PL_bufend;
568 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
569 if (PL_last_uni)
570 oldunilen = PL_last_uni - PL_bufend;
571 if (PL_last_lop)
572 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
573 PL_linestart = PL_bufptr = s + prevlen;
574 PL_bufend = s + SvCUR(PL_linestr);
575 s = PL_bufptr;
09bef843
SB
576 PL_oldbufptr = s + oldprevlen;
577 PL_oldoldbufptr = s + oldoldprevlen;
578 if (PL_last_uni)
579 PL_last_uni = s + oldunilen;
580 if (PL_last_lop)
581 PL_last_lop = s + oldloplen;
a0d0e21e 582 incline(s);
ffb4593c
NT
583
584 /* debugger active and we're not compiling the debugger code,
585 * so store the line into the debugger's array of lines
586 */
3280af22 587 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
588 SV *sv = NEWSV(85,0);
589
590 sv_upgrade(sv, SVt_PVMG);
3280af22 591 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 592 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 593 }
463ee0b2 594 }
a687059c 595}
378cc40b 596
ffb4593c
NT
597/*
598 * S_check_uni
599 * Check the unary operators to ensure there's no ambiguity in how they're
600 * used. An ambiguous piece of code would be:
601 * rand + 5
602 * This doesn't mean rand() + 5. Because rand() is a unary operator,
603 * the +5 is its argument.
604 */
605
76e3520e 606STATIC void
cea2e8a9 607S_check_uni(pTHX)
ba106d47 608{
2f3197b3 609 char *s;
a0d0e21e 610 char *t;
0453d815 611 dTHR;
2f3197b3 612
3280af22 613 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 614 return;
3280af22
NIS
615 while (isSPACE(*PL_last_uni))
616 PL_last_uni++;
834a4ddd 617 for (s = PL_last_uni; isALNUM_lazy(s) || *s == '-'; s++) ;
3280af22 618 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 619 return;
0453d815 620 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 621 char ch = *s;
0453d815
PM
622 *s = '\0';
623 Perl_warner(aTHX_ WARN_AMBIGUOUS,
624 "Warning: Use of \"%s\" without parens is ambiguous",
625 PL_last_uni);
626 *s = ch;
627 }
2f3197b3
LW
628}
629
ffb4593c
NT
630/* workaround to replace the UNI() macro with a function. Only the
631 * hints/uts.sh file mentions this. Other comments elsewhere in the
632 * source indicate Microport Unix might need it too.
633 */
634
ffed7fef
LW
635#ifdef CRIPPLED_CC
636
637#undef UNI
ffed7fef 638#define UNI(f) return uni(f,s)
ffed7fef 639
76e3520e 640STATIC int
cea2e8a9 641S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
642{
643 yylval.ival = f;
3280af22
NIS
644 PL_expect = XTERM;
645 PL_bufptr = s;
8f872242
NIS
646 PL_last_uni = PL_oldbufptr;
647 PL_last_lop_op = f;
ffed7fef
LW
648 if (*s == '(')
649 return FUNC1;
650 s = skipspace(s);
651 if (*s == '(')
652 return FUNC1;
653 else
654 return UNIOP;
655}
656
a0d0e21e
LW
657#endif /* CRIPPLED_CC */
658
ffb4593c
NT
659/*
660 * LOP : macro to build a list operator. Its behaviour has been replaced
661 * with a subroutine, S_lop() for which LOP is just another name.
662 */
663
a0d0e21e
LW
664#define LOP(f,x) return lop(f,x,s)
665
ffb4593c
NT
666/*
667 * S_lop
668 * Build a list operator (or something that might be one). The rules:
669 * - if we have a next token, then it's a list operator [why?]
670 * - if the next thing is an opening paren, then it's a function
671 * - else it's a list operator
672 */
673
76e3520e 674STATIC I32
a0be28da 675S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 676{
0f15f207 677 dTHR;
79072805 678 yylval.ival = f;
35c8bce7 679 CLINE;
3280af22
NIS
680 PL_expect = x;
681 PL_bufptr = s;
682 PL_last_lop = PL_oldbufptr;
683 PL_last_lop_op = f;
684 if (PL_nexttoke)
a0d0e21e 685 return LSTOP;
79072805
LW
686 if (*s == '(')
687 return FUNC;
688 s = skipspace(s);
689 if (*s == '(')
690 return FUNC;
691 else
692 return LSTOP;
693}
694
ffb4593c
NT
695/*
696 * S_force_next
9cbb5ea2 697 * When the lexer realizes it knows the next token (for instance,
ffb4593c 698 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
699 * to know what token to return the next time the lexer is called. Caller
700 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
701 * handles the token correctly.
ffb4593c
NT
702 */
703
76e3520e 704STATIC void
cea2e8a9 705S_force_next(pTHX_ I32 type)
79072805 706{
3280af22
NIS
707 PL_nexttype[PL_nexttoke] = type;
708 PL_nexttoke++;
709 if (PL_lex_state != LEX_KNOWNEXT) {
710 PL_lex_defer = PL_lex_state;
711 PL_lex_expect = PL_expect;
712 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
713 }
714}
715
ffb4593c
NT
716/*
717 * S_force_word
718 * When the lexer knows the next thing is a word (for instance, it has
719 * just seen -> and it knows that the next char is a word char, then
720 * it calls S_force_word to stick the next word into the PL_next lookahead.
721 *
722 * Arguments:
b1b65b59 723 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
724 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
725 * int check_keyword : if true, Perl checks to make sure the word isn't
726 * a keyword (do this if the word is a label, e.g. goto FOO)
727 * int allow_pack : if true, : characters will also be allowed (require,
728 * use, etc. do this)
9cbb5ea2 729 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
730 */
731
76e3520e 732STATIC char *
cea2e8a9 733S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 734{
463ee0b2
LW
735 register char *s;
736 STRLEN len;
737
738 start = skipspace(start);
739 s = start;
834a4ddd 740 if (isIDFIRST_lazy(s) ||
a0d0e21e 741 (allow_pack && *s == ':') ||
15f0808c 742 (allow_initial_tick && *s == '\'') )
a0d0e21e 743 {
3280af22
NIS
744 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
745 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
746 return start;
747 if (token == METHOD) {
748 s = skipspace(s);
749 if (*s == '(')
3280af22 750 PL_expect = XTERM;
463ee0b2 751 else {
3280af22 752 PL_expect = XOPERATOR;
463ee0b2 753 }
79072805 754 }
3280af22
NIS
755 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
756 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
757 force_next(token);
758 }
759 return s;
760}
761
ffb4593c
NT
762/*
763 * S_force_ident
9cbb5ea2 764 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
765 * text only contains the "foo" portion. The first argument is a pointer
766 * to the "foo", and the second argument is the type symbol to prefix.
767 * Forces the next token to be a "WORD".
9cbb5ea2 768 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
769 */
770
76e3520e 771STATIC void
cea2e8a9 772S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
773{
774 if (s && *s) {
11343788 775 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 776 PL_nextval[PL_nexttoke].opval = o;
79072805 777 force_next(WORD);
748a9306 778 if (kind) {
e858de61 779 dTHR; /* just for in_eval */
11343788 780 o->op_private = OPpCONST_ENTERED;
55497cff
PP
781 /* XXX see note in pp_entereval() for why we forgo typo
782 warnings if the symbol must be introduced in an eval.
783 GSAR 96-10-12 */
3280af22 784 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
785 kind == '$' ? SVt_PV :
786 kind == '@' ? SVt_PVAV :
787 kind == '%' ? SVt_PVHV :
788 SVt_PVGV
789 );
748a9306 790 }
79072805
LW
791 }
792}
793
ffb4593c
NT
794/*
795 * S_force_version
796 * Forces the next token to be a version number.
797 */
798
76e3520e 799STATIC char *
cea2e8a9 800S_force_version(pTHX_ char *s)
89bfa8cd
PP
801{
802 OP *version = Nullop;
803
804 s = skipspace(s);
805
a7cb1f99
GS
806 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
807 char *d = s;
808 if (*d == 'v')
809 d++;
810 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
811 if ((*d == ';' || isSPACE(*d)) && *(skipspace(d)) != ',') {
89bfa8cd
PP
812 s = scan_num(s);
813 /* real VERSION number -- GBARR */
814 version = yylval.opval;
815 }
816 }
817
818 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 819 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd
PP
820 force_next(WORD);
821
822 return (s);
823}
824
ffb4593c
NT
825/*
826 * S_tokeq
827 * Tokenize a quoted string passed in as an SV. It finds the next
828 * chunk, up to end of string or a backslash. It may make a new
829 * SV containing that chunk (if HINT_NEW_STRING is on). It also
830 * turns \\ into \.
831 */
832
76e3520e 833STATIC SV *
cea2e8a9 834S_tokeq(pTHX_ SV *sv)
79072805
LW
835{
836 register char *s;
837 register char *send;
838 register char *d;
b3ac6de7
IZ
839 STRLEN len = 0;
840 SV *pv = sv;
79072805
LW
841
842 if (!SvLEN(sv))
b3ac6de7 843 goto finish;
79072805 844
a0d0e21e 845 s = SvPV_force(sv, len);
748a9306 846 if (SvIVX(sv) == -1)
b3ac6de7 847 goto finish;
463ee0b2 848 send = s + len;
79072805
LW
849 while (s < send && *s != '\\')
850 s++;
851 if (s == send)
b3ac6de7 852 goto finish;
79072805 853 d = s;
3280af22 854 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 855 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
856 while (s < send) {
857 if (*s == '\\') {
a0d0e21e 858 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
859 s++; /* all that, just for this */
860 }
861 *d++ = *s++;
862 }
863 *d = '\0';
463ee0b2 864 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 865 finish:
3280af22 866 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 867 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
868 return sv;
869}
870
ffb4593c
NT
871/*
872 * Now come three functions related to double-quote context,
873 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
874 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
875 * interact with PL_lex_state, and create fake ( ... ) argument lists
876 * to handle functions and concatenation.
877 * They assume that whoever calls them will be setting up a fake
878 * join call, because each subthing puts a ',' after it. This lets
879 * "lower \luPpEr"
880 * become
881 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
882 *
883 * (I'm not sure whether the spurious commas at the end of lcfirst's
884 * arguments and join's arguments are created or not).
885 */
886
887/*
888 * S_sublex_start
889 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
890 *
891 * Pattern matching will set PL_lex_op to the pattern-matching op to
892 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
893 *
894 * OP_CONST and OP_READLINE are easy--just make the new op and return.
895 *
896 * Everything else becomes a FUNC.
897 *
898 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
899 * had an OP_CONST or OP_READLINE). This just sets us up for a
900 * call to S_sublex_push().
901 */
902
76e3520e 903STATIC I32
cea2e8a9 904S_sublex_start(pTHX)
79072805
LW
905{
906 register I32 op_type = yylval.ival;
79072805
LW
907
908 if (op_type == OP_NULL) {
3280af22
NIS
909 yylval.opval = PL_lex_op;
910 PL_lex_op = Nullop;
79072805
LW
911 return THING;
912 }
913 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 914 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
915
916 if (SvTYPE(sv) == SVt_PVIV) {
917 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
918 STRLEN len;
919 char *p;
920 SV *nsv;
921
922 p = SvPV(sv, len);
79cb57f6 923 nsv = newSVpvn(p, len);
b3ac6de7
IZ
924 SvREFCNT_dec(sv);
925 sv = nsv;
926 }
927 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 928 PL_lex_stuff = Nullsv;
79072805
LW
929 return THING;
930 }
931
3280af22
NIS
932 PL_sublex_info.super_state = PL_lex_state;
933 PL_sublex_info.sub_inwhat = op_type;
934 PL_sublex_info.sub_op = PL_lex_op;
935 PL_lex_state = LEX_INTERPPUSH;
55497cff 936
3280af22
NIS
937 PL_expect = XTERM;
938 if (PL_lex_op) {
939 yylval.opval = PL_lex_op;
940 PL_lex_op = Nullop;
55497cff
PP
941 return PMFUNC;
942 }
943 else
944 return FUNC;
945}
946
ffb4593c
NT
947/*
948 * S_sublex_push
949 * Create a new scope to save the lexing state. The scope will be
950 * ended in S_sublex_done. Returns a '(', starting the function arguments
951 * to the uc, lc, etc. found before.
952 * Sets PL_lex_state to LEX_INTERPCONCAT.
953 */
954
76e3520e 955STATIC I32
cea2e8a9 956S_sublex_push(pTHX)
55497cff 957{
0f15f207 958 dTHR;
f46d017c 959 ENTER;
55497cff 960
3280af22
NIS
961 PL_lex_state = PL_sublex_info.super_state;
962 SAVEI32(PL_lex_dojoin);
963 SAVEI32(PL_lex_brackets);
3280af22
NIS
964 SAVEI32(PL_lex_casemods);
965 SAVEI32(PL_lex_starts);
966 SAVEI32(PL_lex_state);
7766f137 967 SAVEVPTR(PL_lex_inpat);
3280af22 968 SAVEI32(PL_lex_inwhat);
57843af0 969 SAVECOPLINE(PL_curcop);
3280af22
NIS
970 SAVEPPTR(PL_bufptr);
971 SAVEPPTR(PL_oldbufptr);
972 SAVEPPTR(PL_oldoldbufptr);
973 SAVEPPTR(PL_linestart);
974 SAVESPTR(PL_linestr);
975 SAVEPPTR(PL_lex_brackstack);
976 SAVEPPTR(PL_lex_casestack);
977
978 PL_linestr = PL_lex_stuff;
979 PL_lex_stuff = Nullsv;
980
9cbb5ea2
GS
981 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
982 = SvPVX(PL_linestr);
3280af22
NIS
983 PL_bufend += SvCUR(PL_linestr);
984 SAVEFREESV(PL_linestr);
985
986 PL_lex_dojoin = FALSE;
987 PL_lex_brackets = 0;
3280af22
NIS
988 New(899, PL_lex_brackstack, 120, char);
989 New(899, PL_lex_casestack, 12, char);
990 SAVEFREEPV(PL_lex_brackstack);
991 SAVEFREEPV(PL_lex_casestack);
992 PL_lex_casemods = 0;
993 *PL_lex_casestack = '\0';
994 PL_lex_starts = 0;
995 PL_lex_state = LEX_INTERPCONCAT;
57843af0 996 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
997
998 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
999 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1000 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1001 else
3280af22 1002 PL_lex_inpat = Nullop;
79072805 1003
55497cff 1004 return '(';
79072805
LW
1005}
1006
ffb4593c
NT
1007/*
1008 * S_sublex_done
1009 * Restores lexer state after a S_sublex_push.
1010 */
1011
76e3520e 1012STATIC I32
cea2e8a9 1013S_sublex_done(pTHX)
79072805 1014{
3280af22
NIS
1015 if (!PL_lex_starts++) {
1016 PL_expect = XOPERATOR;
79cb57f6 1017 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1018 return THING;
1019 }
1020
3280af22
NIS
1021 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1022 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1023 return yylex();
79072805
LW
1024 }
1025
ffb4593c 1026 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1027 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1028 PL_linestr = PL_lex_repl;
1029 PL_lex_inpat = 0;
1030 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1031 PL_bufend += SvCUR(PL_linestr);
1032 SAVEFREESV(PL_linestr);
1033 PL_lex_dojoin = FALSE;
1034 PL_lex_brackets = 0;
3280af22
NIS
1035 PL_lex_casemods = 0;
1036 *PL_lex_casestack = '\0';
1037 PL_lex_starts = 0;
25da4f38 1038 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1039 PL_lex_state = LEX_INTERPNORMAL;
1040 PL_lex_starts++;
e9fa98b2
HS
1041 /* we don't clear PL_lex_repl here, so that we can check later
1042 whether this is an evalled subst; that means we rely on the
1043 logic to ensure sublex_done() is called again only via the
1044 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1045 }
e9fa98b2 1046 else {
3280af22 1047 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1048 PL_lex_repl = Nullsv;
1049 }
79072805 1050 return ',';
ffed7fef
LW
1051 }
1052 else {
f46d017c 1053 LEAVE;
3280af22
NIS
1054 PL_bufend = SvPVX(PL_linestr);
1055 PL_bufend += SvCUR(PL_linestr);
1056 PL_expect = XOPERATOR;
09bef843 1057 PL_sublex_info.sub_inwhat = 0;
79072805 1058 return ')';
ffed7fef
LW
1059 }
1060}
1061
02aa26ce
NT
1062/*
1063 scan_const
1064
1065 Extracts a pattern, double-quoted string, or transliteration. This
1066 is terrifying code.
1067
3280af22
NIS
1068 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1069 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1070 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1071
9b599b2a
GS
1072 Returns a pointer to the character scanned up to. Iff this is
1073 advanced from the start pointer supplied (ie if anything was
1074 successfully parsed), will leave an OP for the substring scanned
1075 in yylval. Caller must intuit reason for not parsing further
1076 by looking at the next characters herself.
1077
02aa26ce
NT
1078 In patterns:
1079 backslashes:
1080 double-quoted style: \r and \n
1081 regexp special ones: \D \s
1082 constants: \x3
1083 backrefs: \1 (deprecated in substitution replacements)
1084 case and quoting: \U \Q \E
1085 stops on @ and $, but not for $ as tail anchor
1086
1087 In transliterations:
1088 characters are VERY literal, except for - not at the start or end
1089 of the string, which indicates a range. scan_const expands the
1090 range to the full set of intermediate characters.
1091
1092 In double-quoted strings:
1093 backslashes:
1094 double-quoted style: \r and \n
1095 constants: \x3
1096 backrefs: \1 (deprecated)
1097 case and quoting: \U \Q \E
1098 stops on @ and $
1099
1100 scan_const does *not* construct ops to handle interpolated strings.
1101 It stops processing as soon as it finds an embedded $ or @ variable
1102 and leaves it to the caller to work out what's going on.
1103
1104 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1105
1106 $ in pattern could be $foo or could be tail anchor. Assumption:
1107 it's a tail anchor if $ is the last thing in the string, or if it's
1108 followed by one of ")| \n\t"
1109
1110 \1 (backreferences) are turned into $1
1111
1112 The structure of the code is
1113 while (there's a character to process) {
1114 handle transliteration ranges
1115 skip regexp comments
1116 skip # initiated comments in //x patterns
1117 check for embedded @foo
1118 check for embedded scalars
1119 if (backslash) {
1120 leave intact backslashes from leave (below)
1121 deprecate \1 in strings and sub replacements
1122 handle string-changing backslashes \l \U \Q \E, etc.
1123 switch (what was escaped) {
1124 handle - in a transliteration (becomes a literal -)
1125 handle \132 octal characters
1126 handle 0x15 hex characters
1127 handle \cV (control V)
1128 handle printf backslashes (\f, \r, \n, etc)
1129 } (end switch)
1130 } (end if backslash)
1131 } (end while character to read)
1132
1133*/
1134
76e3520e 1135STATIC char *
cea2e8a9 1136S_scan_const(pTHX_ char *start)
79072805 1137{
3280af22 1138 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1139 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1140 register char *s = start; /* start of the constant */
1141 register char *d = SvPVX(sv); /* destination for copies */
1142 bool dorange = FALSE; /* are we in a translit range? */
1143 I32 len; /* ? */
ac2262e3 1144 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1145 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1146 : UTF;
ac2262e3 1147 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1148 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1149 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1150 : UTF;
dff6d3cd 1151 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1152 PL_lex_inpat
4a2d328f 1153 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1154 : "";
79072805
LW
1155
1156 while (s < send || dorange) {
02aa26ce 1157 /* get transliterations out of the way (they're most literal) */
3280af22 1158 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1159 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1160 if (dorange) {
02aa26ce 1161 I32 i; /* current expanded character */
8ada0baa 1162 I32 min; /* first character in range */
02aa26ce
NT
1163 I32 max; /* last character in range */
1164
1165 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1166 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1167 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1168 d -= 2; /* eat the first char and the - */
1169
8ada0baa
JH
1170 min = (U8)*d; /* first char in range */
1171 max = (U8)d[1]; /* last char in range */
1172
1173#ifndef ASCIIish
1174 if ((isLOWER(min) && isLOWER(max)) ||
1175 (isUPPER(min) && isUPPER(max))) {
1176 if (isLOWER(min)) {
1177 for (i = min; i <= max; i++)
1178 if (isLOWER(i))
1179 *d++ = i;
1180 } else {
1181 for (i = min; i <= max; i++)
1182 if (isUPPER(i))
1183 *d++ = i;
1184 }
1185 }
1186 else
1187#endif
1188 for (i = min; i <= max; i++)
1189 *d++ = i;
02aa26ce
NT
1190
1191 /* mark the range as done, and continue */
79072805
LW
1192 dorange = FALSE;
1193 continue;
1194 }
02aa26ce
NT
1195
1196 /* range begins (ignore - as first or last char) */
79072805 1197 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1198 if (utf) {
a176fa2a 1199 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1200 s++;
1201 continue;
1202 }
79072805
LW
1203 dorange = TRUE;
1204 s++;
1205 }
1206 }
02aa26ce
NT
1207
1208 /* if we get here, we're not doing a transliteration */
1209
0f5d15d6
IZ
1210 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1211 except for the last char, which will be done separately. */
3280af22 1212 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1213 if (s[2] == '#') {
1214 while (s < send && *s != ')')
1215 *d++ = *s++;
0f5d15d6
IZ
1216 } else if (s[2] == '{'
1217 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1218 I32 count = 1;
0f5d15d6 1219 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1220 char c;
1221
d9f97599
GS
1222 while (count && (c = *regparse)) {
1223 if (c == '\\' && regparse[1])
1224 regparse++;
cc6b7395
IZ
1225 else if (c == '{')
1226 count++;
1227 else if (c == '}')
1228 count--;
d9f97599 1229 regparse++;
cc6b7395 1230 }
5bdf89e7
IZ
1231 if (*regparse != ')') {
1232 regparse--; /* Leave one char for continuation. */
cc6b7395 1233 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1234 }
0f5d15d6 1235 while (s < regparse)
cc6b7395
IZ
1236 *d++ = *s++;
1237 }
748a9306 1238 }
02aa26ce
NT
1239
1240 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1241 else if (*s == '#' && PL_lex_inpat &&
1242 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1243 while (s+1 < send && *s != '\n')
1244 *d++ = *s++;
1245 }
02aa26ce
NT
1246
1247 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
834a4ddd 1248 else if (*s == '@' && s[1] && (isALNUM_lazy(s+1) || strchr(":'{$", s[1])))
79072805 1249 break;
02aa26ce
NT
1250
1251 /* check for embedded scalars. only stop if we're sure it's a
1252 variable.
1253 */
79072805 1254 else if (*s == '$') {
3280af22 1255 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1256 break;
c277df42 1257 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1258 break; /* in regexp, $ might be tail anchor */
1259 }
02aa26ce 1260
a0ed51b3
LW
1261 /* (now in tr/// code again) */
1262
d008e5eb
GS
1263 if (*s & 0x80 && thisutf) {
1264 dTHR; /* only for ckWARN */
1265 if (ckWARN(WARN_UTF8)) {
dfe13c55 1266 (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
d008e5eb
GS
1267 if (len) {
1268 while (len--)
1269 *d++ = *s++;
1270 continue;
1271 }
a0ed51b3
LW
1272 }
1273 }
1274
02aa26ce 1275 /* backslashes */
79072805
LW
1276 if (*s == '\\' && s+1 < send) {
1277 s++;
02aa26ce
NT
1278
1279 /* some backslashes we leave behind */
c9f97d15 1280 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1281 *d++ = '\\';
1282 *d++ = *s++;
1283 continue;
1284 }
02aa26ce
NT
1285
1286 /* deprecate \1 in strings and substitution replacements */
3280af22 1287 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1288 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1289 {
d008e5eb 1290 dTHR; /* only for ckWARN */
599cee73 1291 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1292 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1293 *--s = '$';
1294 break;
1295 }
02aa26ce
NT
1296
1297 /* string-change backslash escapes */
3280af22 1298 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1299 --s;
1300 break;
1301 }
02aa26ce
NT
1302
1303 /* if we get here, it's either a quoted -, or a digit */
79072805 1304 switch (*s) {
02aa26ce
NT
1305
1306 /* quoted - in transliterations */
79072805 1307 case '-':
3280af22 1308 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1309 *d++ = *s++;
1310 continue;
1311 }
1312 /* FALL THROUGH */
1313 default:
11b8faa4
JH
1314 {
1315 dTHR;
1316 if (ckWARN(WARN_UNSAFE) && isALPHA(*s))
cea2e8a9 1317 Perl_warner(aTHX_ WARN_UNSAFE,
11b8faa4
JH
1318 "Unrecognized escape \\%c passed through",
1319 *s);
1320 /* default action is to copy the quoted character */
1321 *d++ = *s++;
1322 continue;
1323 }
02aa26ce
NT
1324
1325 /* \132 indicates an octal constant */
79072805
LW
1326 case '0': case '1': case '2': case '3':
1327 case '4': case '5': case '6': case '7':
dff6d3cd 1328 *d++ = (char)scan_oct(s, 3, &len);
79072805
LW
1329 s += len;
1330 continue;
02aa26ce
NT
1331
1332 /* \x24 indicates a hex constant */
79072805 1333 case 'x':
a0ed51b3
LW
1334 ++s;
1335 if (*s == '{') {
1336 char* e = strchr(s, '}');
1337
adaeee49 1338 if (!e) {
a0ed51b3 1339 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1340 e = s;
1341 }
d008e5eb
GS
1342 if (!utf) {
1343 dTHR;
1344 if (ckWARN(WARN_UTF8))
cea2e8a9 1345 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1346 "Use of \\x{} without utf8 declaration");
1347 }
a0ed51b3 1348 /* note: utf always shorter than hex */
dfe13c55 1349 d = (char*)uv_to_utf8((U8*)d,
dff6d3cd 1350 (UV)scan_hex(s + 1, e - s - 1, &len));
a0ed51b3 1351 s = e + 1;
a0ed51b3
LW
1352 }
1353 else {
1354 UV uv = (UV)scan_hex(s, 2, &len);
1355 if (utf && PL_lex_inwhat == OP_TRANS &&
1356 utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1357 {
dfe13c55 1358 d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
a0ed51b3
LW
1359 }
1360 else {
d008e5eb
GS
1361 if (uv >= 127 && UTF) {
1362 dTHR;
1363 if (ckWARN(WARN_UTF8))
cea2e8a9 1364 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1365 "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
1366 len,s,len,s);
1367 }
a0ed51b3
LW
1368 *d++ = (char)uv;
1369 }
1370 s += len;
1371 }
79072805 1372 continue;
02aa26ce 1373
4a2d328f
IZ
1374 /* \N{latin small letter a} is a named character */
1375 case 'N':
423cee85
JH
1376 ++s;
1377 if (*s == '{') {
1378 char* e = strchr(s, '}');
1379 HV *hv;
1380 SV **svp;
1381 SV *res, *cv;
1382 STRLEN len;
1383 char *str;
1384 char *why = Nullch;
1385
1386 if (!e) {
5777a3f7 1387 yyerror("Missing right brace on \\N{}");
423cee85
JH
1388 e = s - 1;
1389 goto cont_scan;
1390 }
1391 res = newSVpvn(s + 1, e - s - 1);
1392 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1393 res, Nullsv, "\\N{...}" );
423cee85
JH
1394 str = SvPV(res,len);
1395 if (len > e - s + 4) {
1396 char *odest = SvPVX(sv);
1397
1398 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1399 d = SvPVX(sv) + (d - odest);
1400 }
1401 Copy(str, d, len, char);
1402 d += len;
1403 SvREFCNT_dec(res);
1404 cont_scan:
1405 s = e + 1;
1406 }
1407 else
5777a3f7 1408 yyerror("Missing braces on \\N{}");
423cee85
JH
1409 continue;
1410
02aa26ce 1411 /* \c is a control character */
79072805
LW
1412 case 'c':
1413 s++;
9d116dd7
JH
1414#ifdef EBCDIC
1415 *d = *s++;
1416 if (isLOWER(*d))
1417 *d = toUPPER(*d);
1418 *d++ = toCTRL(*d);
1419#else
bbce6d69
PP
1420 len = *s++;
1421 *d++ = toCTRL(len);
9d116dd7 1422#endif
79072805 1423 continue;
02aa26ce
NT
1424
1425 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1426 case 'b':
1427 *d++ = '\b';
1428 break;
1429 case 'n':
1430 *d++ = '\n';
1431 break;
1432 case 'r':
1433 *d++ = '\r';
1434 break;
1435 case 'f':
1436 *d++ = '\f';
1437 break;
1438 case 't':
1439 *d++ = '\t';
1440 break;
34a3fe2a
PP
1441#ifdef EBCDIC
1442 case 'e':
1443 *d++ = '\047'; /* CP 1047 */
1444 break;
1445 case 'a':
1446 *d++ = '\057'; /* CP 1047 */
1447 break;
1448#else
79072805
LW
1449 case 'e':
1450 *d++ = '\033';
1451 break;
1452 case 'a':
1453 *d++ = '\007';
1454 break;
34a3fe2a 1455#endif
02aa26ce
NT
1456 } /* end switch */
1457
79072805
LW
1458 s++;
1459 continue;
02aa26ce
NT
1460 } /* end if (backslash) */
1461
79072805 1462 *d++ = *s++;
02aa26ce
NT
1463 } /* while loop to process each character */
1464
1465 /* terminate the string and set up the sv */
79072805 1466 *d = '\0';
463ee0b2 1467 SvCUR_set(sv, d - SvPVX(sv));
79072805
LW
1468 SvPOK_on(sv);
1469
02aa26ce 1470 /* shrink the sv if we allocated more than we used */
79072805
LW
1471 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1472 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1473 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1474 }
02aa26ce 1475
9b599b2a 1476 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1477 if (s > PL_bufptr) {
1478 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1479 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1480 sv, Nullsv,
3280af22 1481 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1482 ? "tr"
3280af22 1483 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1484 ? "s"
1485 : "qq")));
79072805 1486 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1487 } else
8990e307 1488 SvREFCNT_dec(sv);
79072805
LW
1489 return s;
1490}
1491
ffb4593c
NT
1492/* S_intuit_more
1493 * Returns TRUE if there's more to the expression (e.g., a subscript),
1494 * FALSE otherwise.
ffb4593c
NT
1495 *
1496 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1497 *
1498 * ->[ and ->{ return TRUE
1499 * { and [ outside a pattern are always subscripts, so return TRUE
1500 * if we're outside a pattern and it's not { or [, then return FALSE
1501 * if we're in a pattern and the first char is a {
1502 * {4,5} (any digits around the comma) returns FALSE
1503 * if we're in a pattern and the first char is a [
1504 * [] returns FALSE
1505 * [SOMETHING] has a funky algorithm to decide whether it's a
1506 * character class or not. It has to deal with things like
1507 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1508 * anything else returns TRUE
1509 */
1510
9cbb5ea2
GS
1511/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1512
76e3520e 1513STATIC int
cea2e8a9 1514S_intuit_more(pTHX_ register char *s)
79072805 1515{
3280af22 1516 if (PL_lex_brackets)
79072805
LW
1517 return TRUE;
1518 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1519 return TRUE;
1520 if (*s != '{' && *s != '[')
1521 return FALSE;
3280af22 1522 if (!PL_lex_inpat)
79072805
LW
1523 return TRUE;
1524
1525 /* In a pattern, so maybe we have {n,m}. */
1526 if (*s == '{') {
1527 s++;
1528 if (!isDIGIT(*s))
1529 return TRUE;
1530 while (isDIGIT(*s))
1531 s++;
1532 if (*s == ',')
1533 s++;
1534 while (isDIGIT(*s))
1535 s++;
1536 if (*s == '}')
1537 return FALSE;
1538 return TRUE;
1539
1540 }
1541
1542 /* On the other hand, maybe we have a character class */
1543
1544 s++;
1545 if (*s == ']' || *s == '^')
1546 return FALSE;
1547 else {
ffb4593c 1548 /* this is terrifying, and it works */
79072805
LW
1549 int weight = 2; /* let's weigh the evidence */
1550 char seen[256];
f27ffc4a 1551 unsigned char un_char = 255, last_un_char;
93a17b20 1552 char *send = strchr(s,']');
3280af22 1553 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1554
1555 if (!send) /* has to be an expression */
1556 return TRUE;
1557
1558 Zero(seen,256,char);
1559 if (*s == '$')
1560 weight -= 3;
1561 else if (isDIGIT(*s)) {
1562 if (s[1] != ']') {
1563 if (isDIGIT(s[1]) && s[2] == ']')
1564 weight -= 10;
1565 }
1566 else
1567 weight -= 100;
1568 }
1569 for (; s < send; s++) {
1570 last_un_char = un_char;
1571 un_char = (unsigned char)*s;
1572 switch (*s) {
1573 case '@':
1574 case '&':
1575 case '$':
1576 weight -= seen[un_char] * 10;
834a4ddd 1577 if (isALNUM_lazy(s+1)) {
8903cb82 1578 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1579 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1580 weight -= 100;
1581 else
1582 weight -= 10;
1583 }
1584 else if (*s == '$' && s[1] &&
93a17b20
LW
1585 strchr("[#!%*<>()-=",s[1])) {
1586 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1587 weight -= 10;
1588 else
1589 weight -= 1;
1590 }
1591 break;
1592 case '\\':
1593 un_char = 254;
1594 if (s[1]) {
93a17b20 1595 if (strchr("wds]",s[1]))
79072805
LW
1596 weight += 100;
1597 else if (seen['\''] || seen['"'])
1598 weight += 1;
93a17b20 1599 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1600 weight += 40;
1601 else if (isDIGIT(s[1])) {
1602 weight += 40;
1603 while (s[1] && isDIGIT(s[1]))
1604 s++;
1605 }
1606 }
1607 else
1608 weight += 100;
1609 break;
1610 case '-':
1611 if (s[1] == '\\')
1612 weight += 50;
93a17b20 1613 if (strchr("aA01! ",last_un_char))
79072805 1614 weight += 30;
93a17b20 1615 if (strchr("zZ79~",s[1]))
79072805 1616 weight += 30;
f27ffc4a
GS
1617 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1618 weight -= 5; /* cope with negative subscript */
79072805
LW
1619 break;
1620 default:
93a17b20 1621 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1622 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1623 char *d = tmpbuf;
1624 while (isALPHA(*s))
1625 *d++ = *s++;
1626 *d = '\0';
1627 if (keyword(tmpbuf, d - tmpbuf))
1628 weight -= 150;
1629 }
1630 if (un_char == last_un_char + 1)
1631 weight += 5;
1632 weight -= seen[un_char];
1633 break;
1634 }
1635 seen[un_char]++;
1636 }
1637 if (weight >= 0) /* probably a character class */
1638 return FALSE;
1639 }
1640
1641 return TRUE;
1642}
ffed7fef 1643
ffb4593c
NT
1644/*
1645 * S_intuit_method
1646 *
1647 * Does all the checking to disambiguate
1648 * foo bar
1649 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1650 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1651 *
1652 * First argument is the stuff after the first token, e.g. "bar".
1653 *
1654 * Not a method if bar is a filehandle.
1655 * Not a method if foo is a subroutine prototyped to take a filehandle.
1656 * Not a method if it's really "Foo $bar"
1657 * Method if it's "foo $bar"
1658 * Not a method if it's really "print foo $bar"
1659 * Method if it's really "foo package::" (interpreted as package->foo)
1660 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
1661 * Not a method if bar is a filehandle or package, but is quotd with
1662 * =>
1663 */
1664
76e3520e 1665STATIC int
cea2e8a9 1666S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1667{
1668 char *s = start + (*start == '$');
3280af22 1669 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1670 STRLEN len;
1671 GV* indirgv;
1672
1673 if (gv) {
b6c543e3 1674 CV *cv;
a0d0e21e
LW
1675 if (GvIO(gv))
1676 return 0;
b6c543e3
IZ
1677 if ((cv = GvCVu(gv))) {
1678 char *proto = SvPVX(cv);
1679 if (proto) {
1680 if (*proto == ';')
1681 proto++;
1682 if (*proto == '*')
1683 return 0;
1684 }
1685 } else
a0d0e21e
LW
1686 gv = 0;
1687 }
8903cb82 1688 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1689 /* start is the beginning of the possible filehandle/object,
1690 * and s is the end of it
1691 * tmpbuf is a copy of it
1692 */
1693
a0d0e21e 1694 if (*start == '$') {
3280af22 1695 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1696 return 0;
1697 s = skipspace(s);
3280af22
NIS
1698 PL_bufptr = start;
1699 PL_expect = XREF;
a0d0e21e
LW
1700 return *s == '(' ? FUNCMETH : METHOD;
1701 }
1702 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1703 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1704 len -= 2;
1705 tmpbuf[len] = '\0';
1706 goto bare_package;
1707 }
1708 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1709 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1710 return 0;
1711 /* filehandle or package name makes it a method */
89bfa8cd 1712 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1713 s = skipspace(s);
3280af22 1714 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1715 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1716 bare_package:
3280af22 1717 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1718 newSVpvn(tmpbuf,len));
3280af22
NIS
1719 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1720 PL_expect = XTERM;
a0d0e21e 1721 force_next(WORD);
3280af22 1722 PL_bufptr = s;
a0d0e21e
LW
1723 return *s == '(' ? FUNCMETH : METHOD;
1724 }
1725 }
1726 return 0;
1727}
1728
ffb4593c
NT
1729/*
1730 * S_incl_perldb
1731 * Return a string of Perl code to load the debugger. If PERL5DB
1732 * is set, it will return the contents of that, otherwise a
1733 * compile-time require of perl5db.pl.
1734 */
1735
76e3520e 1736STATIC char*
cea2e8a9 1737S_incl_perldb(pTHX)
a0d0e21e 1738{
3280af22 1739 if (PL_perldb) {
76e3520e 1740 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1741
1742 if (pdb)
1743 return pdb;
61bb5906 1744 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1745 return "BEGIN { require 'perl5db.pl' }";
1746 }
1747 return "";
1748}
1749
1750
16d20bd9
AD
1751/* Encoded script support. filter_add() effectively inserts a
1752 * 'pre-processing' function into the current source input stream.
1753 * Note that the filter function only applies to the current source file
1754 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1755 *
1756 * The datasv parameter (which may be NULL) can be used to pass
1757 * private data to this instance of the filter. The filter function
1758 * can recover the SV using the FILTER_DATA macro and use it to
1759 * store private buffers and state information.
1760 *
1761 * The supplied datasv parameter is upgraded to a PVIO type
e0c19803
GS
1762 * and the IoDIRP field is used to store the function pointer,
1763 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1764 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1765 * private use must be set using malloc'd pointers.
1766 */
16d20bd9
AD
1767
1768SV *
864dbfa3 1769Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1770{
f4c556ac
GS
1771 if (!funcp)
1772 return Nullsv;
1773
3280af22
NIS
1774 if (!PL_rsfp_filters)
1775 PL_rsfp_filters = newAV();
16d20bd9 1776 if (!datasv)
8c52afec 1777 datasv = NEWSV(255,0);
16d20bd9 1778 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1779 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1780 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
e0c19803 1781 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1782 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1783 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1784 av_unshift(PL_rsfp_filters, 1);
1785 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1786 return(datasv);
1787}
1788
1789
1790/* Delete most recently added instance of this filter function. */
a0d0e21e 1791void
864dbfa3 1792Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1793{
e0c19803 1794 SV *datasv;
f4c556ac 1795 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1796 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1797 return;
1798 /* if filter is on top of stack (usual case) just pop it off */
e0c19803
GS
1799 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1800 if (IoDIRP(datasv) == (DIR*)funcp) {
1801 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
1802 IoDIRP(datasv) = (DIR*)NULL;
3280af22 1803 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1804
16d20bd9
AD
1805 return;
1806 }
1807 /* we need to search for the correct entry and clear it */
cea2e8a9 1808 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1809}
1810
1811
1812/* Invoke the n'th filter function for the current rsfp. */
1813I32
864dbfa3 1814Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1815
1816
1817 /* 0 = read one text line */
a0d0e21e 1818{
16d20bd9
AD
1819 filter_t funcp;
1820 SV *datasv = NULL;
e50aee73 1821
3280af22 1822 if (!PL_rsfp_filters)
16d20bd9 1823 return -1;
3280af22 1824 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1825 /* Provide a default input filter to make life easy. */
1826 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1827 DEBUG_P(PerlIO_printf(Perl_debug_log,
1828 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1829 if (maxlen) {
1830 /* Want a block */
1831 int len ;
1832 int old_len = SvCUR(buf_sv) ;
1833
1834 /* ensure buf_sv is large enough */
1835 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1836 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1837 if (PerlIO_error(PL_rsfp))
37120919
AD
1838 return -1; /* error */
1839 else
1840 return 0 ; /* end of file */
1841 }
16d20bd9
AD
1842 SvCUR_set(buf_sv, old_len + len) ;
1843 } else {
1844 /* Want a line */
3280af22
NIS
1845 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1846 if (PerlIO_error(PL_rsfp))
37120919
AD
1847 return -1; /* error */
1848 else
1849 return 0 ; /* end of file */
1850 }
16d20bd9
AD
1851 }
1852 return SvCUR(buf_sv);
1853 }
1854 /* Skip this filter slot if filter has been deleted */
3280af22 1855 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1856 DEBUG_P(PerlIO_printf(Perl_debug_log,
1857 "filter_read %d: skipped (filter deleted)\n",
1858 idx));
16d20bd9
AD
1859 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1860 }
1861 /* Get function pointer hidden within datasv */
1862 funcp = (filter_t)IoDIRP(datasv);
f4c556ac
GS
1863 DEBUG_P(PerlIO_printf(Perl_debug_log,
1864 "filter_read %d: via function %p (%s)\n",
1865 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1866 /* Call function. The function is expected to */
1867 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1868 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1869 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1870}
1871
76e3520e 1872STATIC char *
cea2e8a9 1873S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1874{
a868473f 1875#ifdef WIN32FILTER
3280af22 1876 if (!PL_rsfp_filters) {
a868473f
NIS
1877 filter_add(win32_textfilter,NULL);
1878 }
1879#endif
3280af22 1880 if (PL_rsfp_filters) {
16d20bd9 1881
55497cff
PP
1882 if (!append)
1883 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1884 if (FILTER_READ(0, sv, 0) > 0)
1885 return ( SvPVX(sv) ) ;
1886 else
1887 return Nullch ;
1888 }
9d116dd7 1889 else
fd049845 1890 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1891}
1892
1893
748a9306
LW
1894#ifdef DEBUGGING
1895 static char* exp_name[] =
09bef843
SB
1896 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
1897 "ATTRTERM", "TERMBLOCK"
1898 };
748a9306 1899#endif
463ee0b2 1900
02aa26ce
NT
1901/*
1902 yylex
1903
1904 Works out what to call the token just pulled out of the input
1905 stream. The yacc parser takes care of taking the ops we return and
1906 stitching them into a tree.
1907
1908 Returns:
1909 PRIVATEREF
1910
1911 Structure:
1912 if read an identifier
1913 if we're in a my declaration
1914 croak if they tried to say my($foo::bar)
1915 build the ops for a my() declaration
1916 if it's an access to a my() variable
1917 are we in a sort block?
1918 croak if my($a); $a <=> $b
1919 build ops for access to a my() variable
1920 if in a dq string, and they've said @foo and we can't find @foo
1921 croak
1922 build ops for a bareword
1923 if we already built the token before, use it.
1924*/
1925
864dbfa3
GS
1926int
1927#ifdef USE_PURE_BISON
cea2e8a9 1928Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1929#else
cea2e8a9 1930Perl_yylex(pTHX)
864dbfa3 1931#endif
378cc40b 1932{
11343788 1933 dTHR;
79072805 1934 register char *s;
378cc40b 1935 register char *d;
79072805 1936 register I32 tmp;
463ee0b2 1937 STRLEN len;
161b471a
NIS
1938 GV *gv = Nullgv;
1939 GV **gvp = 0;
a687059c 1940
a1a0e61e
TD
1941#ifdef USE_PURE_BISON
1942 yylval_pointer = lvalp;
1943 yychar_pointer = lcharp;
1944#endif
1945
02aa26ce 1946 /* check if there's an identifier for us to look at */
3280af22 1947 if (PL_pending_ident) {
02aa26ce 1948 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
1949 char pit = PL_pending_ident;
1950 PL_pending_ident = 0;
bbce6d69 1951
02aa26ce
NT
1952 /* if we're in a my(), we can't allow dynamics here.
1953 $foo'bar has already been turned into $foo::bar, so
1954 just check for colons.
1955
1956 if it's a legal name, the OP is a PADANY.
1957 */
3280af22 1958 if (PL_in_my) {
77ca0c92
LW
1959 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1960 tmp = pad_allocmy(PL_tokenbuf);
1961 }
1962 else {
1963 if (strchr(PL_tokenbuf,':'))
1964 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 1965
77ca0c92
LW
1966 yylval.opval = newOP(OP_PADANY, 0);
1967 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
1968 return PRIVATEREF;
1969 }
bbce6d69
PP
1970 }
1971
02aa26ce
NT
1972 /*
1973 build the ops for accesses to a my() variable.
1974
1975 Deny my($a) or my($b) in a sort block, *if* $a or $b is
1976 then used in a comparison. This catches most, but not
1977 all cases. For instance, it catches
1978 sort { my($a); $a <=> $b }
1979 but not
1980 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
1981 (although why you'd do that is anyone's guess).
1982 */
1983
3280af22 1984 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 1985#ifdef USE_THREADS
54b9620d 1986 /* Check for single character per-thread SVs */
3280af22
NIS
1987 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
1988 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
1989 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 1990 {
2faa37cc 1991 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
1992 yylval.opval->op_targ = tmp;
1993 return PRIVATEREF;
1994 }
1995#endif /* USE_THREADS */
3280af22 1996 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
77ca0c92
LW
1997 /* might be an "our" variable" */
1998 if (SvFLAGS(AvARRAY(PL_comppad_name)[tmp]) & SVpad_OUR) {
1999 /* build ops for a bareword */
2000 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
2001 yylval.opval->op_private = OPpCONST_ENTERED;
2002 gv_fetchpv(PL_tokenbuf+1,
2003 (PL_in_eval
2004 ? (GV_ADDMULTI | GV_ADDINEVAL | GV_ADDOUR)
2005 : GV_ADDOUR
2006 ),
2007 ((PL_tokenbuf[0] == '$') ? SVt_PV
2008 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2009 : SVt_PVHV));
2010 return WORD;
2011 }
2012
02aa26ce 2013 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2014 if (PL_last_lop_op == OP_SORT &&
2015 PL_tokenbuf[0] == '$' &&
2016 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2017 && !PL_tokenbuf[2])
bbce6d69 2018 {
3280af22
NIS
2019 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2020 d < PL_bufend && *d != '\n';
a863c7d1
MB
2021 d++)
2022 {
2023 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2024 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2025 PL_tokenbuf);
a863c7d1 2026 }
bbce6d69
PP
2027 }
2028 }
bbce6d69 2029
a863c7d1
MB
2030 yylval.opval = newOP(OP_PADANY, 0);
2031 yylval.opval->op_targ = tmp;
2032 return PRIVATEREF;
2033 }
bbce6d69
PP
2034 }
2035
02aa26ce
NT
2036 /*
2037 Whine if they've said @foo in a doublequoted string,
2038 and @foo isn't a variable we can find in the symbol
2039 table.
2040 */
3280af22
NIS
2041 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2042 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2043 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2044 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2045 PL_tokenbuf, PL_tokenbuf));
bbce6d69
PP
2046 }
2047
02aa26ce 2048 /* build ops for a bareword */
3280af22 2049 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2050 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2051 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2052 ((PL_tokenbuf[0] == '$') ? SVt_PV
2053 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2054 : SVt_PVHV));
2055 return WORD;
2056 }
2057
02aa26ce
NT
2058 /* no identifier pending identification */
2059
3280af22 2060 switch (PL_lex_state) {
79072805
LW
2061#ifdef COMMENTARY
2062 case LEX_NORMAL: /* Some compilers will produce faster */
2063 case LEX_INTERPNORMAL: /* code if we comment these out. */
2064 break;
2065#endif
2066
09bef843 2067 /* when we've already built the next token, just pull it out of the queue */
79072805 2068 case LEX_KNOWNEXT:
3280af22
NIS
2069 PL_nexttoke--;
2070 yylval = PL_nextval[PL_nexttoke];
2071 if (!PL_nexttoke) {
2072 PL_lex_state = PL_lex_defer;
2073 PL_expect = PL_lex_expect;
2074 PL_lex_defer = LEX_NORMAL;
463ee0b2 2075 }
3280af22 2076 return(PL_nexttype[PL_nexttoke]);
79072805 2077
02aa26ce 2078 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2079 when we get here, PL_bufptr is at the \
02aa26ce 2080 */
79072805
LW
2081 case LEX_INTERPCASEMOD:
2082#ifdef DEBUGGING
3280af22 2083 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2084 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2085#endif
02aa26ce 2086 /* handle \E or end of string */
3280af22 2087 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2088 char oldmod;
02aa26ce
NT
2089
2090 /* if at a \E */
3280af22
NIS
2091 if (PL_lex_casemods) {
2092 oldmod = PL_lex_casestack[--PL_lex_casemods];
2093 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2094
3280af22
NIS
2095 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2096 PL_bufptr += 2;
2097 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2098 }
79072805
LW
2099 return ')';
2100 }
3280af22
NIS
2101 if (PL_bufptr != PL_bufend)
2102 PL_bufptr += 2;
2103 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2104 return yylex();
79072805
LW
2105 }
2106 else {
3280af22 2107 s = PL_bufptr + 1;
79072805
LW
2108 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2109 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2110 if (strchr("LU", *s) &&
3280af22 2111 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2112 {
3280af22 2113 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2114 return ')';
2115 }
3280af22
NIS
2116 if (PL_lex_casemods > 10) {
2117 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2118 if (newlb != PL_lex_casestack) {
a0d0e21e 2119 SAVEFREEPV(newlb);
3280af22 2120 PL_lex_casestack = newlb;
a0d0e21e
LW
2121 }
2122 }
3280af22
NIS
2123 PL_lex_casestack[PL_lex_casemods++] = *s;
2124 PL_lex_casestack[PL_lex_casemods] = '\0';
2125 PL_lex_state = LEX_INTERPCONCAT;
2126 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2127 force_next('(');
2128 if (*s == 'l')
3280af22 2129 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2130 else if (*s == 'u')
3280af22 2131 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2132 else if (*s == 'L')
3280af22 2133 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2134 else if (*s == 'U')
3280af22 2135 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2136 else if (*s == 'Q')
3280af22 2137 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2138 else
cea2e8a9 2139 Perl_croak(aTHX_ "panic: yylex");
3280af22 2140 PL_bufptr = s + 1;
79072805 2141 force_next(FUNC);
3280af22
NIS
2142 if (PL_lex_starts) {
2143 s = PL_bufptr;
2144 PL_lex_starts = 0;
79072805
LW
2145 Aop(OP_CONCAT);
2146 }
2147 else
cea2e8a9 2148 return yylex();
79072805
LW
2149 }
2150
55497cff
PP
2151 case LEX_INTERPPUSH:
2152 return sublex_push();
2153
79072805 2154 case LEX_INTERPSTART:
3280af22 2155 if (PL_bufptr == PL_bufend)
79072805 2156 return sublex_done();
3280af22
NIS
2157 PL_expect = XTERM;
2158 PL_lex_dojoin = (*PL_bufptr == '@');
2159 PL_lex_state = LEX_INTERPNORMAL;
2160 if (PL_lex_dojoin) {
2161 PL_nextval[PL_nexttoke].ival = 0;
79072805 2162 force_next(',');
554b3eca 2163#ifdef USE_THREADS
533c011a
NIS
2164 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2165 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2166 force_next(PRIVATEREF);
2167#else
a0d0e21e 2168 force_ident("\"", '$');
554b3eca 2169#endif /* USE_THREADS */
3280af22 2170 PL_nextval[PL_nexttoke].ival = 0;
79072805 2171 force_next('$');
3280af22 2172 PL_nextval[PL_nexttoke].ival = 0;
79072805 2173 force_next('(');
3280af22 2174 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2175 force_next(FUNC);
2176 }
3280af22
NIS
2177 if (PL_lex_starts++) {
2178 s = PL_bufptr;
79072805
LW
2179 Aop(OP_CONCAT);
2180 }
cea2e8a9 2181 return yylex();
79072805
LW
2182
2183 case LEX_INTERPENDMAYBE:
3280af22
NIS
2184 if (intuit_more(PL_bufptr)) {
2185 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2186 break;
2187 }
2188 /* FALL THROUGH */
2189
2190 case LEX_INTERPEND:
3280af22
NIS
2191 if (PL_lex_dojoin) {
2192 PL_lex_dojoin = FALSE;
2193 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2194 return ')';
2195 }
43a16006 2196 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2197 && SvEVALED(PL_lex_repl))
43a16006 2198 {
e9fa98b2 2199 if (PL_bufptr != PL_bufend)
cea2e8a9 2200 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2201 PL_lex_repl = Nullsv;
2202 }
79072805
LW
2203 /* FALLTHROUGH */
2204 case LEX_INTERPCONCAT:
2205#ifdef DEBUGGING
3280af22 2206 if (PL_lex_brackets)
cea2e8a9 2207 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2208#endif
3280af22 2209 if (PL_bufptr == PL_bufend)
79072805
LW
2210 return sublex_done();
2211
3280af22
NIS
2212 if (SvIVX(PL_linestr) == '\'') {
2213 SV *sv = newSVsv(PL_linestr);
2214 if (!PL_lex_inpat)
76e3520e 2215 sv = tokeq(sv);
3280af22 2216 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2217 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2218 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2219 s = PL_bufend;
79072805
LW
2220 }
2221 else {
3280af22 2222 s = scan_const(PL_bufptr);
79072805 2223 if (*s == '\\')
3280af22 2224 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2225 else
3280af22 2226 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2227 }
2228
3280af22
NIS
2229 if (s != PL_bufptr) {
2230 PL_nextval[PL_nexttoke] = yylval;
2231 PL_expect = XTERM;
79072805 2232 force_next(THING);
3280af22 2233 if (PL_lex_starts++)
79072805
LW
2234 Aop(OP_CONCAT);
2235 else {
3280af22 2236 PL_bufptr = s;
cea2e8a9 2237 return yylex();
79072805
LW
2238 }
2239 }
2240
cea2e8a9 2241 return yylex();
a0d0e21e 2242 case LEX_FORMLINE:
3280af22
NIS
2243 PL_lex_state = LEX_NORMAL;
2244 s = scan_formline(PL_bufptr);
2245 if (!PL_lex_formbrack)
a0d0e21e
LW
2246 goto rightbracket;
2247 OPERATOR(';');
79072805
LW
2248 }
2249
3280af22
NIS
2250 s = PL_bufptr;
2251 PL_oldoldbufptr = PL_oldbufptr;
2252 PL_oldbufptr = s;
79072805 2253 DEBUG_p( {
bf49b057
GS
2254 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2255 exp_name[PL_expect], s);
79072805 2256 } )
463ee0b2
LW
2257
2258 retry:
378cc40b
LW
2259 switch (*s) {
2260 default:
834a4ddd
LW
2261 if (isIDFIRST_lazy(s))
2262 goto keylookup;
cea2e8a9 2263 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2264 case 4:
2265 case 26:
2266 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2267 case 0:
3280af22
NIS
2268 if (!PL_rsfp) {
2269 PL_last_uni = 0;
2270 PL_last_lop = 0;
2271 if (PL_lex_brackets)
d98d5fff 2272 yyerror("Missing right curly or square bracket");
79072805 2273 TOKEN(0);
463ee0b2 2274 }
3280af22 2275 if (s++ < PL_bufend)
a687059c 2276 goto retry; /* ignore stray nulls */
3280af22
NIS
2277 PL_last_uni = 0;
2278 PL_last_lop = 0;
2279 if (!PL_in_eval && !PL_preambled) {
2280 PL_preambled = TRUE;
2281 sv_setpv(PL_linestr,incl_perldb());
2282 if (SvCUR(PL_linestr))
2283 sv_catpv(PL_linestr,";");
2284 if (PL_preambleav){
2285 while(AvFILLp(PL_preambleav) >= 0) {
2286 SV *tmpsv = av_shift(PL_preambleav);
2287 sv_catsv(PL_linestr, tmpsv);
2288 sv_catpv(PL_linestr, ";");
91b7def8
PP
2289 sv_free(tmpsv);
2290 }
3280af22
NIS
2291 sv_free((SV*)PL_preambleav);
2292 PL_preambleav = NULL;
91b7def8 2293 }
3280af22
NIS
2294 if (PL_minus_n || PL_minus_p) {
2295 sv_catpv(PL_linestr, "LINE: while (<>) {");
2296 if (PL_minus_l)
2297 sv_catpv(PL_linestr,"chomp;");
2298 if (PL_minus_a) {
8fd239a7
CS
2299 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2300 if (gv)
2301 GvIMPORTED_AV_on(gv);
3280af22
NIS
2302 if (PL_minus_F) {
2303 if (strchr("/'\"", *PL_splitstr)
2304 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2305 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2306 else {
2307 char delim;
2308 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2309 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2310 delim = *s;
cea2e8a9 2311 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2312 "q" + (delim == '\''), delim);
3280af22 2313 for (s = PL_splitstr; *s; s++) {
54310121 2314 if (*s == '\\')
3280af22
NIS
2315 sv_catpvn(PL_linestr, "\\", 1);
2316 sv_catpvn(PL_linestr, s, 1);
54310121 2317 }
cea2e8a9 2318 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2319 }
2304df62
AD
2320 }
2321 else
3280af22 2322 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2323 }
79072805 2324 }
3280af22
NIS
2325 sv_catpv(PL_linestr, "\n");
2326 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2327 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2328 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2329 SV *sv = NEWSV(85,0);
2330
2331 sv_upgrade(sv, SVt_PVMG);
3280af22 2332 sv_setsv(sv,PL_linestr);
57843af0 2333 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2334 }
79072805 2335 goto retry;
a687059c 2336 }
e929a76b 2337 do {
3280af22 2338 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2339 fake_eof:
3280af22
NIS
2340 if (PL_rsfp) {
2341 if (PL_preprocess && !PL_in_eval)
2342 (void)PerlProc_pclose(PL_rsfp);
2343 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2344 PerlIO_clearerr(PL_rsfp);
395c3793 2345 else
3280af22
NIS
2346 (void)PerlIO_close(PL_rsfp);
2347 PL_rsfp = Nullfp;
4a9ae47a 2348 PL_doextract = FALSE;
395c3793 2349 }
3280af22
NIS
2350 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2351 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2352 sv_catpv(PL_linestr,";}");
2353 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2354 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2355 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2356 goto retry;
2357 }
3280af22
NIS
2358 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2359 sv_setpv(PL_linestr,"");
79072805 2360 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2361 }
3280af22 2362 if (PL_doextract) {
a0d0e21e 2363 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2364 PL_doextract = FALSE;
a0d0e21e
LW
2365
2366 /* Incest with pod. */
2367 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2368 sv_setpv(PL_linestr, "");
2369 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2370 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2371 PL_doextract = FALSE;
a0d0e21e
LW
2372 }
2373 }
463ee0b2 2374 incline(s);
3280af22
NIS
2375 } while (PL_doextract);
2376 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2377 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2378 SV *sv = NEWSV(85,0);
a687059c 2379
93a17b20 2380 sv_upgrade(sv, SVt_PVMG);
3280af22 2381 sv_setsv(sv,PL_linestr);
57843af0 2382 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2383 }
3280af22 2384 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2385 if (CopLINE(PL_curcop) == 1) {
3280af22 2386 while (s < PL_bufend && isSPACE(*s))
79072805 2387 s++;
a0d0e21e 2388 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2389 s++;
44a8e56a 2390 d = Nullch;
3280af22 2391 if (!PL_in_eval) {
44a8e56a
PP
2392 if (*s == '#' && *(s+1) == '!')
2393 d = s + 2;
2394#ifdef ALTERNATE_SHEBANG
2395 else {
2396 static char as[] = ALTERNATE_SHEBANG;
2397 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2398 d = s + (sizeof(as) - 1);
2399 }
2400#endif /* ALTERNATE_SHEBANG */
2401 }
2402 if (d) {
b8378b72 2403 char *ipath;
774d564b 2404 char *ipathend;
b8378b72 2405
774d564b 2406 while (isSPACE(*d))
b8378b72
CS
2407 d++;
2408 ipath = d;
774d564b
PP
2409 while (*d && !isSPACE(*d))
2410 d++;
2411 ipathend = d;
2412
2413#ifdef ARG_ZERO_IS_SCRIPT
2414 if (ipathend > ipath) {
2415 /*
2416 * HP-UX (at least) sets argv[0] to the script name,
2417 * which makes $^X incorrect. And Digital UNIX and Linux,
2418 * at least, set argv[0] to the basename of the Perl
2419 * interpreter. So, having found "#!", we'll set it right.
2420 */
2421 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2422 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2423 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2424 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2425 SvSETMAGIC(x);
2426 }
774d564b 2427 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2428 }
774d564b 2429#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2430
2431 /*
2432 * Look for options.
2433 */
748a9306 2434 d = instr(s,"perl -");
84e30d1a 2435 if (!d) {
748a9306 2436 d = instr(s,"perl");
84e30d1a
GS
2437#if defined(DOSISH)
2438 /* avoid getting into infinite loops when shebang
2439 * line contains "Perl" rather than "perl" */
2440 if (!d) {
2441 for (d = ipathend-4; d >= ipath; --d) {
2442 if ((*d == 'p' || *d == 'P')
2443 && !ibcmp(d, "perl", 4))
2444 {
2445 break;
2446 }
2447 }
2448 if (d < ipath)
2449 d = Nullch;
2450 }
2451#endif
2452 }
44a8e56a
PP
2453#ifdef ALTERNATE_SHEBANG
2454 /*
2455 * If the ALTERNATE_SHEBANG on this system starts with a
2456 * character that can be part of a Perl expression, then if
2457 * we see it but not "perl", we're probably looking at the
2458 * start of Perl code, not a request to hand off to some
2459 * other interpreter. Similarly, if "perl" is there, but
2460 * not in the first 'word' of the line, we assume the line
2461 * contains the start of the Perl program.
44a8e56a
PP
2462 */
2463 if (d && *s != '#') {
774d564b 2464 char *c = ipath;
44a8e56a
PP
2465 while (*c && !strchr("; \t\r\n\f\v#", *c))
2466 c++;
2467 if (c < d)
2468 d = Nullch; /* "perl" not in first word; ignore */
2469 else
2470 *s = '#'; /* Don't try to parse shebang line */
2471 }
774d564b 2472#endif /* ALTERNATE_SHEBANG */
748a9306 2473 if (!d &&
44a8e56a 2474 *s == '#' &&
774d564b 2475 ipathend > ipath &&
3280af22 2476 !PL_minus_c &&
748a9306 2477 !instr(s,"indir") &&
3280af22 2478 instr(PL_origargv[0],"perl"))
748a9306 2479 {
9f68db38 2480 char **newargv;
9f68db38 2481
774d564b
PP
2482 *ipathend = '\0';
2483 s = ipathend + 1;
3280af22 2484 while (s < PL_bufend && isSPACE(*s))
9f68db38 2485 s++;
3280af22
NIS
2486 if (s < PL_bufend) {
2487 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2488 newargv[1] = s;
3280af22 2489 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2490 s++;
2491 *s = '\0';
3280af22 2492 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2493 }
2494 else
3280af22 2495 newargv = PL_origargv;
774d564b 2496 newargv[0] = ipath;
80252599 2497 PerlProc_execv(ipath, newargv);
cea2e8a9 2498 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2499 }
748a9306 2500 if (d) {
3280af22
NIS
2501 U32 oldpdb = PL_perldb;
2502 bool oldn = PL_minus_n;
2503 bool oldp = PL_minus_p;
748a9306
LW
2504
2505 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2506 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2507
2508 if (*d++ == '-') {
8cc95fdb
PP
2509 do {
2510 if (*d == 'M' || *d == 'm') {
2511 char *m = d;
2512 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2513 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2514 (int)(d - m), m);
2515 }
2516 d = moreswitches(d);
2517 } while (d);
84902520 2518 if (PERLDB_LINE && !oldpdb ||
3280af22 2519 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b
PP
2520 /* if we have already added "LINE: while (<>) {",
2521 we must not do it again */
748a9306 2522 {
3280af22
NIS
2523 sv_setpv(PL_linestr, "");
2524 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2525 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2526 PL_preambled = FALSE;
84902520 2527 if (PERLDB_LINE)
3280af22 2528 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2529 goto retry;
2530 }
a0d0e21e 2531 }
79072805 2532 }
9f68db38 2533 }
79072805 2534 }
3280af22
NIS
2535 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2536 PL_bufptr = s;
2537 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2538 return yylex();
ae986130 2539 }
378cc40b 2540 goto retry;
4fdae800 2541 case '\r':
6a27c188 2542#ifdef PERL_STRICT_CR
cea2e8a9
GS
2543 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2544 Perl_croak(aTHX_
54310121 2545 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2546#endif
4fdae800 2547 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2548 s++;
2549 goto retry;
378cc40b 2550 case '#':
e929a76b 2551 case '\n':
3280af22
NIS
2552 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2553 d = PL_bufend;
a687059c 2554 while (s < d && *s != '\n')
378cc40b 2555 s++;
0f85fab0 2556 if (s < d)
378cc40b 2557 s++;
463ee0b2 2558 incline(s);
3280af22
NIS
2559 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2560 PL_bufptr = s;
2561 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2562 return yylex();
a687059c 2563 }
378cc40b 2564 }
a687059c 2565 else {
378cc40b 2566 *s = '\0';
3280af22 2567 PL_bufend = s;
a687059c 2568 }
378cc40b
LW
2569 goto retry;
2570 case '-':
79072805 2571 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2572 s++;
3280af22 2573 PL_bufptr = s;
748a9306
LW
2574 tmp = *s++;
2575
3280af22 2576 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2577 s++;
2578
2579 if (strnEQ(s,"=>",2)) {
3280af22 2580 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2581 OPERATOR('-'); /* unary minus */
2582 }
3280af22
NIS
2583 PL_last_uni = PL_oldbufptr;
2584 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2585 switch (tmp) {
79072805
LW
2586 case 'r': FTST(OP_FTEREAD);
2587 case 'w': FTST(OP_FTEWRITE);
2588 case 'x': FTST(OP_FTEEXEC);
2589 case 'o': FTST(OP_FTEOWNED);
2590 case 'R': FTST(OP_FTRREAD);
2591 case 'W': FTST(OP_FTRWRITE);
2592 case 'X': FTST(OP_FTREXEC);
2593 case 'O': FTST(OP_FTROWNED);
2594 case 'e': FTST(OP_FTIS);
2595 case 'z': FTST(OP_FTZERO);
2596 case 's': FTST(OP_FTSIZE);
2597 case 'f': FTST(OP_FTFILE);
2598 case 'd': FTST(OP_FTDIR);
2599 case 'l': FTST(OP_FTLINK);
2600 case 'p': FTST(OP_FTPIPE);
2601 case 'S': FTST(OP_FTSOCK);
2602 case 'u': FTST(OP_FTSUID);
2603 case 'g': FTST(OP_FTSGID);
2604 case 'k': FTST(OP_FTSVTX);
2605 case 'b': FTST(OP_FTBLK);
2606 case 'c': FTST(OP_FTCHR);
2607 case 't': FTST(OP_FTTTY);
2608 case 'T': FTST(OP_FTTEXT);
2609 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2610 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2611 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2612 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2613 default:
cea2e8a9 2614 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2615 break;
2616 }
2617 }
a687059c
LW
2618 tmp = *s++;
2619 if (*s == tmp) {
2620 s++;
3280af22 2621 if (PL_expect == XOPERATOR)
79072805
LW
2622 TERM(POSTDEC);
2623 else
2624 OPERATOR(PREDEC);
2625 }
2626 else if (*s == '>') {
2627 s++;
2628 s = skipspace(s);
834a4ddd 2629 if (isIDFIRST_lazy(s)) {
a0d0e21e 2630 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2631 TOKEN(ARROW);
79072805 2632 }
748a9306
LW
2633 else if (*s == '$')
2634 OPERATOR(ARROW);
463ee0b2 2635 else
748a9306 2636 TERM(ARROW);
a687059c 2637 }
3280af22 2638 if (PL_expect == XOPERATOR)
79072805
LW
2639 Aop(OP_SUBTRACT);
2640 else {
3280af22 2641 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2642 check_uni();
79072805 2643 OPERATOR('-'); /* unary minus */
2f3197b3 2644 }
79072805 2645
378cc40b 2646 case '+':
a687059c
LW
2647 tmp = *s++;
2648 if (*s == tmp) {
378cc40b 2649 s++;
3280af22 2650 if (PL_expect == XOPERATOR)
79072805
LW
2651 TERM(POSTINC);
2652 else
2653 OPERATOR(PREINC);
378cc40b 2654 }
3280af22 2655 if (PL_expect == XOPERATOR)
79072805
LW
2656 Aop(OP_ADD);
2657 else {
3280af22 2658 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2659 check_uni();
a687059c 2660 OPERATOR('+');
2f3197b3 2661 }
a687059c 2662
378cc40b 2663 case '*':
3280af22
NIS
2664 if (PL_expect != XOPERATOR) {
2665 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2666 PL_expect = XOPERATOR;
2667 force_ident(PL_tokenbuf, '*');
2668 if (!*PL_tokenbuf)
a0d0e21e 2669 PREREF('*');
79072805 2670 TERM('*');
a687059c 2671 }
79072805
LW
2672 s++;
2673 if (*s == '*') {
a687059c 2674 s++;
79072805 2675 PWop(OP_POW);
a687059c 2676 }
79072805
LW
2677 Mop(OP_MULTIPLY);
2678
378cc40b 2679 case '%':
3280af22 2680 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2681 ++s;
2682 Mop(OP_MODULO);
a687059c 2683 }
3280af22
NIS
2684 PL_tokenbuf[0] = '%';
2685 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2686 if (!PL_tokenbuf[1]) {
2687 if (s == PL_bufend)
bbce6d69
PP
2688 yyerror("Final % should be \\% or %name");
2689 PREREF('%');
a687059c 2690 }
3280af22 2691 PL_pending_ident = '%';
bbce6d69 2692 TERM('%');
a687059c 2693
378cc40b 2694 case '^':
79072805 2695 s++;
a0d0e21e 2696 BOop(OP_BIT_XOR);
79072805 2697 case '[':
3280af22 2698 PL_lex_brackets++;
79072805 2699 /* FALL THROUGH */
378cc40b 2700 case '~':
378cc40b 2701 case ',':
378cc40b
LW
2702 tmp = *s++;
2703 OPERATOR(tmp);
a0d0e21e
LW
2704 case ':':
2705 if (s[1] == ':') {
2706 len = 0;
2707 goto just_a_word;
2708 }
2709 s++;
09bef843
SB
2710 switch (PL_expect) {
2711 OP *attrs;
2712 case XOPERATOR:
2713 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2714 break;
2715 PL_bufptr = s; /* update in case we back off */
2716 goto grabattrs;
2717 case XATTRBLOCK:
2718 PL_expect = XBLOCK;
2719 goto grabattrs;
2720 case XATTRTERM:
2721 PL_expect = XTERMBLOCK;
2722 grabattrs:
2723 s = skipspace(s);
2724 attrs = Nullop;
2725 while (isIDFIRST_lazy(s)) {
2726 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
2727 if (*d == '(') {
2728 d = scan_str(d,TRUE,TRUE);
2729 if (!d) {
2730 if (PL_lex_stuff) {
2731 SvREFCNT_dec(PL_lex_stuff);
2732 PL_lex_stuff = Nullsv;
2733 }
2734 /* MUST advance bufptr here to avoid bogus
2735 "at end of line" context messages from yyerror().
2736 */
2737 PL_bufptr = s + len;
2738 yyerror("Unterminated attribute parameter in attribute list");
2739 if (attrs)
2740 op_free(attrs);
2741 return 0; /* EOF indicator */
2742 }
2743 }
2744 if (PL_lex_stuff) {
2745 SV *sv = newSVpvn(s, len);
2746 sv_catsv(sv, PL_lex_stuff);
2747 attrs = append_elem(OP_LIST, attrs,
2748 newSVOP(OP_CONST, 0, sv));
2749 SvREFCNT_dec(PL_lex_stuff);
2750 PL_lex_stuff = Nullsv;
2751 }
2752 else {
2753 attrs = append_elem(OP_LIST, attrs,
2754 newSVOP(OP_CONST, 0,
2755 newSVpvn(s, len)));
2756 }
2757 s = skipspace(d);
2758 while (*s == ',')
2759 s = skipspace(s+1);
2760 }
2761 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}' for vi */
2762 if (*s != ';' && *s != tmp) {
2763 char q = ((*s == '\'') ? '"' : '\'');
2764 /* If here for an expression, and parsed no attrs, back off. */
2765 if (tmp == '=' && !attrs) {
2766 s = PL_bufptr;
2767 break;
2768 }
2769 /* MUST advance bufptr here to avoid bogus "at end of line"
2770 context messages from yyerror().
2771 */
2772 PL_bufptr = s;
2773 if (!*s)
2774 yyerror("Unterminated attribute list");
2775 else
2776 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2777 q, *s, q));
2778 if (attrs)
2779 op_free(attrs);
2780 OPERATOR(':');
2781 }
2782 if (attrs) {
2783 PL_nextval[PL_nexttoke].opval = attrs;
2784 force_next(THING);
2785 }
2786 TOKEN(COLONATTR);
2787 }
a0d0e21e 2788 OPERATOR(':');
8990e307
LW
2789 case '(':
2790 s++;
3280af22
NIS
2791 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2792 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2793 else
3280af22 2794 PL_expect = XTERM;
a0d0e21e 2795 TOKEN('(');
378cc40b 2796 case ';':
57843af0
GS
2797 if (CopLINE(PL_curcop) < PL_copline)
2798 PL_copline = CopLINE(PL_curcop);
378cc40b
LW
2799 tmp = *s++;
2800 OPERATOR(tmp);
2801 case ')':
378cc40b 2802 tmp = *s++;
16d20bd9
AD
2803 s = skipspace(s);
2804 if (*s == '{')
2805 PREBLOCK(tmp);
378cc40b 2806 TERM(tmp);
79072805
LW
2807 case ']':
2808 s++;
3280af22 2809 if (PL_lex_brackets <= 0)
d98d5fff 2810 yyerror("Unmatched right square bracket");
463ee0b2 2811 else
3280af22
NIS
2812 --PL_lex_brackets;
2813 if (PL_lex_state == LEX_INTERPNORMAL) {
2814 if (PL_lex_brackets == 0) {
a0d0e21e 2815 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2816 PL_lex_state = LEX_INTERPEND;
79072805
LW
2817 }
2818 }
4633a7c4 2819 TERM(']');
79072805
LW
2820 case '{':
2821 leftbracket:
79072805 2822 s++;
3280af22
NIS
2823 if (PL_lex_brackets > 100) {
2824 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2825 if (newlb != PL_lex_brackstack) {
8990e307 2826 SAVEFREEPV(newlb);
3280af22 2827 PL_lex_brackstack = newlb;
8990e307
LW
2828 }
2829 }
3280af22 2830 switch (PL_expect) {
a0d0e21e 2831 case XTERM:
3280af22 2832 if (PL_lex_formbrack) {
a0d0e21e
LW
2833 s--;
2834 PRETERMBLOCK(DO);
2835 }
3280af22
NIS
2836 if (PL_oldoldbufptr == PL_last_lop)
2837 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2838 else
3280af22 2839 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2840 OPERATOR(HASHBRACK);
a0d0e21e 2841 case XOPERATOR:
3280af22 2842 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2843 s++;
44a8e56a 2844 d = s;
3280af22
NIS
2845 PL_tokenbuf[0] = '\0';
2846 if (d < PL_bufend && *d == '-') {
2847 PL_tokenbuf[0] = '-';
44a8e56a 2848 d++;
3280af22 2849 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a
PP
2850 d++;
2851 }
834a4ddd 2852 if (d < PL_bufend && isIDFIRST_lazy(d)) {
3280af22 2853 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2854 FALSE, &len);
3280af22 2855 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2856 d++;
2857 if (*d == '}') {
3280af22 2858 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
2859 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2860 if (minus)
2861 force_next('-');
748a9306
LW
2862 }
2863 }
2864 /* FALL THROUGH */
09bef843 2865 case XATTRBLOCK:
748a9306 2866 case XBLOCK:
3280af22
NIS
2867 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2868 PL_expect = XSTATE;
a0d0e21e 2869 break;
09bef843 2870 case XATTRTERM:
a0d0e21e 2871 case XTERMBLOCK:
3280af22
NIS
2872 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2873 PL_expect = XSTATE;
a0d0e21e
LW
2874 break;
2875 default: {
2876 char *t;
3280af22
NIS
2877 if (PL_oldoldbufptr == PL_last_lop)
2878 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2879 else
3280af22 2880 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2881 s = skipspace(s);
09ecc4b6 2882 if (*s == '}')
a0d0e21e 2883 OPERATOR(HASHBRACK);
b8a4b1be
GS
2884 /* This hack serves to disambiguate a pair of curlies
2885 * as being a block or an anon hash. Normally, expectation
2886 * determines that, but in cases where we're not in a
2887 * position to expect anything in particular (like inside
2888 * eval"") we have to resolve the ambiguity. This code
2889 * covers the case where the first term in the curlies is a
2890 * quoted string. Most other cases need to be explicitly
2891 * disambiguated by prepending a `+' before the opening
2892 * curly in order to force resolution as an anon hash.
2893 *
2894 * XXX should probably propagate the outer expectation
2895 * into eval"" to rely less on this hack, but that could
2896 * potentially break current behavior of eval"".
2897 * GSAR 97-07-21
2898 */
2899 t = s;
2900 if (*s == '\'' || *s == '"' || *s == '`') {
2901 /* common case: get past first string, handling escapes */
3280af22 2902 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2903 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2904 t++;
2905 t++;
a0d0e21e 2906 }
b8a4b1be 2907 else if (*s == 'q') {
3280af22 2908 if (++t < PL_bufend
b8a4b1be 2909 && (!isALNUM(*t)
3280af22 2910 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
2911 && !isALNUM(*t))))
2912 {
b8a4b1be
GS
2913 char *tmps;
2914 char open, close, term;
2915 I32 brackets = 1;
2916
3280af22 2917 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
2918 t++;
2919 term = *t;
2920 open = term;
2921 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
2922 term = tmps[5];
2923 close = term;
2924 if (open == close)
3280af22
NIS
2925 for (t++; t < PL_bufend; t++) {
2926 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 2927 t++;
6d07e5e9 2928 else if (*t == open)
b8a4b1be
GS
2929 break;
2930 }
2931 else
3280af22
NIS
2932 for (t++; t < PL_bufend; t++) {
2933 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 2934 t++;
6d07e5e9 2935 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
2936 break;
2937 else if (*t == open)
2938 brackets++;
2939 }
2940 }
2941 t++;
a0d0e21e 2942 }
0505442f
GS
2943 else if (isALNUM_lazy(t)) {
2944 t += UTF8SKIP(t);
2945 while (t < PL_bufend && isALNUM_lazy(t))
2946 t += UTF8SKIP(t);
a0d0e21e 2947 }
3280af22 2948 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 2949 t++;
b8a4b1be
GS
2950 /* if comma follows first term, call it an anon hash */
2951 /* XXX it could be a comma expression with loop modifiers */
3280af22 2952 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 2953 || (*t == '=' && t[1] == '>')))
a0d0e21e 2954 OPERATOR(HASHBRACK);
3280af22 2955 if (PL_expect == XREF)
4e4e412b 2956 PL_expect = XTERM;
a0d0e21e 2957 else {
3280af22
NIS
2958 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
2959 PL_expect = XSTATE;
a0d0e21e 2960 }
8990e307 2961 }
a0d0e21e 2962 break;
463ee0b2 2963 }
57843af0 2964 yylval.ival = CopLINE(PL_curcop);
79072805 2965 if (isSPACE(*s) || *s == '#')
3280af22 2966 PL_copline = NOLINE; /* invalidate current command line number */
79072805 2967 TOKEN('{');
378cc40b 2968 case '}':
79072805
LW
2969 rightbracket:
2970 s++;
3280af22 2971 if (PL_lex_brackets <= 0)
d98d5fff 2972 yyerror("Unmatched right curly bracket");
463ee0b2 2973 else
3280af22
NIS
2974 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
2975 if (PL_lex_brackets < PL_lex_formbrack)
2976 PL_lex_formbrack = 0;
2977 if (PL_lex_state == LEX_INTERPNORMAL) {
2978 if (PL_lex_brackets == 0) {
9059aa12
LW
2979 if (PL_expect & XFAKEBRACK) {
2980 PL_expect &= XENUMMASK;
3280af22
NIS
2981 PL_lex_state = LEX_INTERPEND;
2982 PL_bufptr = s;
cea2e8a9 2983 return yylex(); /* ignore fake brackets */
79072805 2984 }
fa83b5b6 2985 if (*s == '-' && s[1] == '>')
3280af22 2986 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 2987 else if (*s != '[' && *s != '{')
3280af22 2988 PL_lex_state = LEX_INTERPEND;
79072805
LW
2989 }
2990 }
9059aa12
LW
2991 if (PL_expect & XFAKEBRACK) {
2992 PL_expect &= XENUMMASK;
3280af22 2993 PL_bufptr = s;
cea2e8a9 2994 return yylex(); /* ignore fake brackets */
748a9306 2995 }
79072805
LW
2996 force_next('}');
2997 TOKEN(';');
378cc40b
LW
2998 case '&':
2999 s++;
3000 tmp = *s++;
3001 if (tmp == '&')
a0d0e21e 3002 AOPERATOR(ANDAND);
378cc40b 3003 s--;
3280af22 3004 if (PL_expect == XOPERATOR) {
834a4ddd 3005 if (ckWARN(WARN_SEMICOLON) && isIDFIRST_lazy(s) && PL_bufptr == PL_linestart) {
57843af0 3006 CopLINE_dec(PL_curcop);
cea2e8a9 3007 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3008 CopLINE_inc(PL_curcop);
463ee0b2 3009 }
79072805 3010 BAop(OP_BIT_AND);
463ee0b2 3011 }
79072805 3012
3280af22
NIS
3013 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3014 if (*PL_tokenbuf) {
3015 PL_expect = XOPERATOR;
3016 force_ident(PL_tokenbuf, '&');
463ee0b2 3017 }
79072805
LW
3018 else
3019 PREREF('&');
c07a80fd 3020 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3021 TERM('&');
3022
378cc40b
LW
3023 case '|':
3024 s++;
3025 tmp = *s++;
3026 if (tmp == '|')
a0d0e21e 3027 AOPERATOR(OROR);
378cc40b 3028 s--;
79072805 3029 BOop(OP_BIT_OR);
378cc40b
LW
3030 case '=':
3031 s++;
3032 tmp = *s++;
3033 if (tmp == '=')
79072805
LW
3034 Eop(OP_EQ);
3035 if (tmp == '>')
3036 OPERATOR(',');
378cc40b 3037 if (tmp == '~')
79072805 3038 PMop(OP_MATCH);
599cee73 3039 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3040 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3041 s--;
3280af22
NIS
3042 if (PL_expect == XSTATE && isALPHA(tmp) &&
3043 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3044 {
3280af22
NIS
3045 if (PL_in_eval && !PL_rsfp) {
3046 d = PL_bufend;
a5f75d66
AD
3047 while (s < d) {
3048 if (*s++ == '\n') {
3049 incline(s);
3050 if (strnEQ(s,"=cut",4)) {
3051 s = strchr(s,'\n');
3052 if (s)
3053 s++;
3054 else
3055 s = d;
3056 incline(s);
3057 goto retry;
3058 }
3059 }
3060 }
3061 goto retry;
3062 }
3280af22
NIS
3063 s = PL_bufend;
3064 PL_doextract = TRUE;
a0d0e21e
LW
3065 goto retry;
3066 }
3280af22 3067 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3068 char *t;
51882d45 3069#ifdef PERL_STRICT_CR
a0d0e21e 3070 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
3071#else
3072 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
3073#endif
a0d0e21e
LW
3074 if (*t == '\n' || *t == '#') {
3075 s--;
3280af22 3076 PL_expect = XBLOCK;
a0d0e21e
LW
3077 goto leftbracket;
3078 }
79072805 3079 }
a0d0e21e
LW
3080 yylval.ival = 0;
3081 OPERATOR(ASSIGNOP);
378cc40b
LW
3082 case '!':
3083 s++;
3084 tmp = *s++;
3085 if (tmp == '=')
79072805 3086 Eop(OP_NE);
378cc40b 3087 if (tmp == '~')
79072805 3088 PMop(OP_NOT);
378cc40b
LW
3089 s--;
3090 OPERATOR('!');
3091 case '<':
3280af22 3092 if (PL_expect != XOPERATOR) {
93a17b20 3093 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3094 check_uni();
79072805
LW
3095 if (s[1] == '<')
3096 s = scan_heredoc(s);
3097 else
3098 s = scan_inputsymbol(s);
3099 TERM(sublex_start());
378cc40b
LW
3100 }
3101 s++;
3102 tmp = *s++;
3103 if (tmp == '<')
79072805 3104 SHop(OP_LEFT_SHIFT);
395c3793
LW
3105 if (tmp == '=') {
3106 tmp = *s++;
3107 if (tmp == '>')
79072805 3108 Eop(OP_NCMP);
395c3793 3109 s--;
79072805 3110 Rop(OP_LE);
395c3793 3111 }
378cc40b 3112 s--;
79072805 3113 Rop(OP_LT);
378cc40b
LW
3114 case '>':
3115 s++;
3116 tmp = *s++;
3117 if (tmp == '>')
79072805 3118 SHop(OP_RIGHT_SHIFT);
378cc40b 3119 if (tmp == '=')
79072805 3120 Rop(OP_GE);
378cc40b 3121 s--;
79072805 3122 Rop(OP_GT);
378cc40b
LW
3123
3124 case '$':
bbce6d69
PP
3125 CLINE;
3126
3280af22
NIS
3127 if (PL_expect == XOPERATOR) {
3128 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3129 PL_expect = XTERM;
a0d0e21e 3130 depcom();
bbce6d69 3131 return ','; /* grandfather non-comma-format format */
a0d0e21e 3132 }
8990e307 3133 }
a0d0e21e 3134
834a4ddd 3135 if (s[1] == '#' && (isIDFIRST_lazy(s+2) || strchr("{$:+-", s[2]))) {
3280af22 3136 PL_tokenbuf[0] = '@';
376b8730
SM
3137 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3138 sizeof PL_tokenbuf - 1, FALSE);
3139 if (PL_expect == XOPERATOR)
3140 no_op("Array length", s);
3280af22 3141 if (!PL_tokenbuf[1])
a0d0e21e 3142 PREREF(DOLSHARP);
3280af22
NIS
3143 PL_expect = XOPERATOR;
3144 PL_pending_ident = '#';
463ee0b2 3145 TOKEN(DOLSHARP);
79072805 3146 }
bbce6d69 3147
3280af22 3148 PL_tokenbuf[0] = '$';
376b8730
SM
3149 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3150 sizeof PL_tokenbuf - 1, FALSE);
3151 if (PL_expect == XOPERATOR)
3152 no_op("Scalar", s);
3280af22
NIS
3153 if (!PL_tokenbuf[1]) {
3154 if (s == PL_bufend)
bbce6d69
PP
3155 yyerror("Final $ should be \\$ or $name");
3156 PREREF('$');
8990e307 3157 }
a0d0e21e 3158
bbce6d69 3159 /* This kludge not intended to be bulletproof. */
3280af22 3160 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3161 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3162 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69
PP
3163 yylval.opval->op_private = OPpCONST_ARYBASE;
3164 TERM(THING);
3165 }
3166
ff68c719 3167 d = s;
69d2bceb 3168 tmp = (I32)*s;
3280af22 3169 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3170 s = skipspace(s);
3171
3280af22 3172 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3173 char *t;
3174 if (*s == '[') {
3280af22 3175 PL_tokenbuf[0] = '@';
599cee73 3176 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3177 for(t = s + 1;
834a4ddd 3178 isSPACE(*t) || isALNUM_lazy(t) || *t == '$';
bbce6d69 3179 t++) ;
a0d0e21e 3180 if (*t++ == ',') {
3280af22
NIS
3181 PL_bufptr = skipspace(PL_bufptr);
3182 while (t < PL_bufend && *t != ']')
bbce6d69 3183 t++;
cea2e8a9 3184 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3185 "Multidimensional syntax %.*s not supported",
3186 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3187 }
3188 }
bbce6d69
PP
3189 }
3190 else if (*s == '{') {
3280af22 3191 PL_tokenbuf[0] = '%';
599cee73 3192 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3193 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3194 {
3280af22 3195 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3196 STRLEN len;
3197 for (t++; isSPACE(*t); t++) ;
834a4ddd 3198 if (isIDFIRST_lazy(t)) {
8903cb82 3199 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3200 for (; isSPACE(*t); t++) ;
864dbfa3 3201 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3202 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3203 "You need to quote \"%s\"", tmpbuf);
748a9306 3204 }
93a17b20
LW
3205 }
3206 }
2f3197b3 3207 }
bbce6d69 3208
3280af22 3209 PL_expect = XOPERATOR;
69d2bceb 3210 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3211 bool islop = (PL_last_lop == PL_oldoldbufptr);
3212 if (!islop || PL_last_lop_op == OP_GREPSTART)
3213 PL_expect = XOPERATOR;
bbce6d69 3214 else if (strchr("$@\"'`q", *s))
3280af22 3215 PL_expect = XTERM; /* e.g. print $fh "foo" */
834a4ddd 3216 else if (strchr("&*<%", *s) && isIDFIRST_lazy(s+1))
3280af22 3217 PL_expect = XTERM; /* e.g. print $fh &sub */
834a4ddd 3218 else if (isIDFIRST_lazy(s)) {
3280af22 3219 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3220 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3221 if (tmp = keyword(tmpbuf, len)) {
3222 /* binary operators exclude handle interpretations */
3223 switch (tmp) {
3224 case -KEY_x:
3225 case -KEY_eq:
3226 case -KEY_ne:
3227 case -KEY_gt:
3228 case -KEY_lt:
3229 case -KEY_ge:
3230 case -KEY_le:
3231 case -KEY_cmp:
3232 break;
3233 default:
3280af22 3234 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3235 break;
3236 }
3237 }
68dc0745
PP
3238 else {
3239 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3240 if (gv && GvCVu(gv))
3280af22 3241 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3242 }
93a17b20 3243 }
bbce6d69 3244 else if (isDIGIT(*s))
3280af22 3245 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3246 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3247 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3248 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3249 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3250 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3251 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3252 }
3280af22 3253 PL_pending_ident = '$';
79072805 3254 TOKEN('$');
378cc40b
LW
3255
3256 case '@':
3280af22 3257 if (PL_expect == XOPERATOR)
bbce6d69 3258 no_op("Array", s);
3280af22
NIS
3259 PL_tokenbuf[0] = '@';
3260 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3261 if (!PL_tokenbuf[1]) {
3262 if (s == PL_bufend)
bbce6d69
PP
3263 yyerror("Final @ should be \\@ or @name");
3264 PREREF('@');
3265 }
3280af22 3266 if (PL_lex_state == LEX_NORMAL)
ff68c719 3267 s = skipspace(s);
3280af22 3268 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3269 if (*s == '{')
3280af22 3270 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3271
3272 /* Warn about @ where they meant $. */
599cee73 3273 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3274 if (*s == '[' || *s == '{') {
3275 char *t = s + 1;
834a4ddd 3276 while (*t && (isALNUM_lazy(t) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3277 t++;
3278 if (*t == '}' || *t == ']') {
3279 t++;
3280af22 3280 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3281 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3282 "Scalar value %.*s better written as $%.*s",
3280af22 3283 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3284 }
93a17b20
LW
3285 }
3286 }
463ee0b2 3287 }
3280af22 3288 PL_pending_ident = '@';
79072805 3289 TERM('@');
378cc40b
LW
3290
3291 case '/': /* may either be division or pattern */
3292 case '?': /* may either be conditional or pattern */
3280af22 3293 if (PL_expect != XOPERATOR) {
c277df42 3294 /* Disable warning on "study /blah/" */
3280af22
NIS
3295 if (PL_oldoldbufptr == PL_last_uni
3296 && (*PL_last_uni != 's' || s - PL_last_uni < 5
834a4ddd 3297 || memNE(PL_last_uni, "study", 5) || isALNUM_lazy(PL_last_uni+5)))
c277df42 3298 check_uni();
8782bef2 3299 s = scan_pat(s,OP_MATCH);
79072805 3300 TERM(sublex_start());
378cc40b
LW
3301 }
3302 tmp = *s++;
a687059c 3303 if (tmp == '/')
79072805 3304 Mop(OP_DIVIDE);
378cc40b
LW
3305 OPERATOR(tmp);
3306
3307 case '.':
51882d45
GS
3308 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3309#ifdef PERL_STRICT_CR
3310 && s[1] == '\n'
3311#else
3312 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3313#endif
3314 && (s == PL_linestart || s[-1] == '\n') )
3315 {
3280af22
NIS
3316 PL_lex_formbrack = 0;
3317 PL_expect = XSTATE;
79072805
LW
3318 goto rightbracket;
3319 }
3280af22 3320 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3321 tmp = *s++;
a687059c
LW
3322 if (*s == tmp) {
3323 s++;
2f3197b3
LW
3324 if (*s == tmp) {
3325 s++;
79072805 3326 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3327 }
3328 else
79072805 3329 yylval.ival = 0;
378cc40b 3330 OPERATOR(DOTDOT);
a687059c 3331 }
3280af22 3332 if (PL_expect != XOPERATOR)
2f3197b3 3333 check_uni();
79072805 3334 Aop(OP_CONCAT);
378cc40b
LW
3335 }
3336 /* FALL THROUGH */
3337 case '0': case '1': case '2': case '3': case '4':
3338 case '5': case '6': case '7': case '8': case '9':
79072805 3339 s = scan_num(s);
3280af22 3340 if (PL_expect == XOPERATOR)
8990e307 3341 no_op("Number",s);
79072805
LW
3342 TERM(THING);
3343
3344 case '\'':
09bef843 3345 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3346 if (PL_expect == XOPERATOR) {
3347 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3348 PL_expect = XTERM;
a0d0e21e
LW
3349 depcom();
3350 return ','; /* grandfather non-comma-format format */
3351 }
463ee0b2 3352 else
8990e307 3353 no_op("String",s);
463ee0b2 3354 }
79072805 3355 if (!s)
85e6fe83 3356 missingterm((char*)0);
79072805
LW
3357 yylval.ival = OP_CONST;
3358 TERM(sublex_start());
3359
3360 case '"':
09bef843 3361 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3362 if (PL_expect == XOPERATOR) {
3363 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3364 PL_expect = XTERM;
a0d0e21e
LW
3365 depcom();
3366 return ','; /* grandfather non-comma-format format */
3367 }
463ee0b2 3368 else
8990e307 3369 no_op("String",s);
463ee0b2 3370 }
79072805 3371 if (!s)
85e6fe83 3372 missingterm((char*)0);
4633a7c4 3373 yylval.ival = OP_CONST;
3280af22 3374 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3375 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3376 yylval.ival = OP_STRINGIFY;
3377 break;
3378 }
3379 }
79072805
LW
3380 TERM(sublex_start());
3381
3382 case '`':
09bef843 3383 s = scan_str(s,FALSE,FALSE);
3280af22 3384 if (PL_expect == XOPERATOR)
8990e307 3385 no_op("Backticks",s);
79072805 3386 if (!s)
85e6fe83 3387 missingterm((char*)0);
79072805
LW
3388 yylval.ival = OP_BACKTICK;
3389 set_csh();
3390 TERM(sublex_start());
3391
3392 case '\\':
3393 s++;
599cee73 3394 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3395 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3396 *s, *s);
3280af22 3397 if (PL_expect == XOPERATOR)
8990e307 3398 no_op("Backslash",s);
79072805
LW
3399 OPERATOR(REFGEN);
3400
a7cb1f99
GS
3401 case 'v':
3402 if (isDIGIT(s[1]) && PL_expect == XTERM) {
3403 char *start = s;
3404 start++;
3405 start++;
3406 while (isDIGIT(*start))
3407 start++;
3408 if (*start == '.' && isDIGIT(start[1])) {
3409 s = scan_num(s);
3410 TERM(THING);
3411 }
3412 }
3413 goto keylookup;
79072805 3414 case 'x':
3280af22 3415 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3416 s++;
3417 Mop(OP_REPEAT);
2f3197b3 3418 }
79072805
LW
3419 goto keylookup;
3420
378cc40b 3421 case '_':
79072805
LW
3422 case 'a': case 'A':
3423 case 'b': case 'B':
3424 case 'c': case 'C':
3425 case 'd': case 'D':
3426 case 'e': case 'E':
3427 case 'f': case 'F':
3428 case 'g': case 'G':
3429 case 'h': case 'H':
3430 case 'i': case 'I':
3431 case 'j': case 'J':
3432 case 'k': case 'K':
3433 case 'l': case 'L':
3434 case 'm': case 'M':
3435 case 'n': case 'N':
3436 case 'o': case 'O':
3437 case 'p': case 'P':
3438 case 'q': case 'Q':
3439 case 'r': case 'R':
3440 case 's': case 'S':
3441 case 't': case 'T':
3442 case 'u': case 'U':
a7cb1f99 3443 case 'V':
79072805
LW
3444 case 'w': case 'W':
3445 case 'X':
3446 case 'y': case 'Y':
3447 case 'z': case 'Z':
3448
49dc05e3 3449 keylookup: {
2d8e6c8d 3450 STRLEN n_a;
161b471a
NIS
3451 gv = Nullgv;
3452 gvp = 0;
49dc05e3 3453
3280af22
NIS
3454 PL_bufptr = s;
3455 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01
PP
3456
3457 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3458 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3459 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3460 (PL_tokenbuf[0] == 'q' &&
3461 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01
PP
3462
3463 /* x::* is just a word, unless x is "CORE" */
3280af22 3464 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3465 goto just_a_word;
3466
3643fb5f 3467 d = s;
3280af22 3468 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3469 d++; /* no comments skipped here, or s### is misparsed */
3470
3471 /* Is this a label? */
3280af22
NIS
3472 if (!tmp && PL_expect == XSTATE
3473 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3474 s = d + 1;
3280af22 3475 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01
PP
3476 CLINE;
3477 TOKEN(LABEL);
3643fb5f
CS
3478 }
3479
3480 /* Check for keywords */
3280af22 3481 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3482
3483 /* Is this a word before a => operator? */
748a9306
LW
3484 if (strnEQ(d,"=>",2)) {
3485 CLINE;
3280af22 3486 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3487 yylval.opval->op_private = OPpCONST_BARE;
3488 TERM(WORD);
3489 }
3490
a0d0e21e 3491 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3492 GV *ogv = Nullgv; /* override (winner) */
3493 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3494 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3495 CV *cv;
3280af22 3496 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3497 (cv = GvCVu(gv)))
3498 {
3499 if (GvIMPORTED_CV(gv))
3500 ogv = gv;
3501 else if (! CvMETHOD(cv))
3502 hgv = gv;
3503 }
3504 if (!ogv &&
3280af22
NIS
3505 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3506 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3507 GvCVu(gv) && GvIMPORTED_CV(gv))
3508 {
3509 ogv = gv;
3510 }
3511 }
3512 if (ogv) {
3513 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3514 }
3515 else if (gv && !gvp
3516 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3517 && GvCVu(gv)
3280af22 3518 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3519 {
3520 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3521 }
56f7f34b
CS
3522 else { /* no override */
3523 tmp = -tmp;
3524 gv = Nullgv;
3525 gvp = 0;
4944e2f7
GS
3526 if (ckWARN(WARN_AMBIGUOUS) && hgv
3527 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3528 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3529 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3530 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3531 }
a0d0e21e
LW
3532 }
3533
3534 reserved_word:
3535 switch (tmp) {
79072805
LW
3536
3537 default: /* not a keyword */
93a17b20 3538 just_a_word: {
96e4d5b1 3539 SV *sv;
3280af22 3540 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3541
3542 /* Get the rest if it looks like a package qualifier */
3543
a0d0e21e 3544 if (*s == '\'' || *s == ':' && s[1] == ':') {
c3e0f903 3545 STRLEN morelen;
3280af22 3546 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3547 TRUE, &morelen);
3548 if (!morelen)
cea2e8a9 3549 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3550 *s == '\'' ? "'" : "::");
c3e0f903 3551 len += morelen;
a0d0e21e 3552 }
8990e307 3553
3280af22
NIS
3554 if (PL_expect == XOPERATOR) {
3555 if (PL_bufptr == PL_linestart) {
57843af0 3556 CopLINE_dec(PL_curcop);
cea2e8a9 3557 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3558 CopLINE_inc(PL_curcop);
463ee0b2
LW
3559 }
3560 else
54310121 3561 no_op("Bareword",s);
463ee0b2 3562 }
8990e307 3563
c3e0f903
GS
3564 /* Look for a subroutine with this name in current package,
3565 unless name is "Foo::", in which case Foo is a bearword
3566 (and a package name). */
3567
3568 if (len > 2 &&
3280af22 3569 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3570 {
599cee73 3571 if (ckWARN(WARN_UNSAFE) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
cea2e8a9 3572 Perl_warner(aTHX_ WARN_UNSAFE,
599cee73 3573 "Bareword \"%s\" refers to nonexistent package",
3280af22 3574 PL_tokenbuf);
c3e0f903 3575 len -= 2;
3280af22 3576 PL_tokenbuf[len] = '\0';
c3e0f903
GS
3577 gv = Nullgv;
3578 gvp = 0;
3579 }
3580 else {
3581 len = 0;
3582 if (!gv)
3280af22 3583 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
c3e0f903
GS
3584 }
3585
3586 /* if we saw a global override before, get the right name */
8990e307 3587
49dc05e3 3588 if (gvp) {
79cb57f6 3589 sv = newSVpvn("CORE::GLOBAL::",14);
3280af22 3590 sv_catpv(sv,PL_tokenbuf);
49dc05e3
GS
3591 }
3592 else
3280af22 3593 sv = newSVpv(PL_tokenbuf,0);
8990e307 3594