This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
small nits in changes#4538,4539
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4eb8286e 3 * Copyright (c) 1991-1999, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543
GS
30static void restore_rsfp(pTHXo_ void *f);
31static void restore_expect(pTHXo_ void *e);
32static void restore_lex_expect(pTHXo_ void *e);
33
a0ed51b3 34#define UTF (PL_hints & HINT_UTF8)
834a4ddd
LW
35/*
36 * Note: we try to be careful never to call the isXXX_utf8() functions
37 * unless we're pretty sure we've seen the beginning of a UTF-8 character
38 * (that is, the two high bits are set). Otherwise we risk loading in the
39 * heavy-duty SWASHINIT and SWASHGET routines unnecessarily.
40 */
41#define isIDFIRST_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
42 ? isIDFIRST(*(p)) \
43 : isIDFIRST_utf8((U8*)p))
44#define isALNUM_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
45 ? isALNUM(*(p)) \
46 : isALNUM_utf8((U8*)p))
a0ed51b3 47
2b92dfce
GS
48/* In variables name $^X, these are the legal values for X.
49 * 1999-02-27 mjd-perl-patch@plover.com */
50#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51
ffb4593c
NT
52/* LEX_* are values for PL_lex_state, the state of the lexer.
53 * They are arranged oddly so that the guard on the switch statement
79072805
LW
54 * can get by with a single comparison (if the compiler is smart enough).
55 */
56
fb73857a
PP
57/* #define LEX_NOTPARSING 11 is done in perl.h. */
58
55497cff
PP
59#define LEX_NORMAL 10
60#define LEX_INTERPNORMAL 9
61#define LEX_INTERPCASEMOD 8
62#define LEX_INTERPPUSH 7
63#define LEX_INTERPSTART 6
64#define LEX_INTERPEND 5
65#define LEX_INTERPENDMAYBE 4
66#define LEX_INTERPCONCAT 3
67#define LEX_INTERPCONST 2
68#define LEX_FORMLINE 1
69#define LEX_KNOWNEXT 0
79072805 70
395c3793
LW
71#ifdef I_FCNTL
72#include <fcntl.h>
73#endif
fe14fcc3
LW
74#ifdef I_SYS_FILE
75#include <sys/file.h>
76#endif
395c3793 77
a790bc05
PP
78/* XXX If this causes problems, set i_unistd=undef in the hint file. */
79#ifdef I_UNISTD
80# include <unistd.h> /* Needed for execv() */
81#endif
82
83
79072805
LW
84#ifdef ff_next
85#undef ff_next
d48672a2
LW
86#endif
87
a1a0e61e
TD
88#ifdef USE_PURE_BISON
89YYSTYPE* yylval_pointer = NULL;
90int* yychar_pointer = NULL;
22c35a8c
GS
91# undef yylval
92# undef yychar
e4bfbdd4
JH
93# define yylval (*yylval_pointer)
94# define yychar (*yychar_pointer)
95# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
96# undef yylex
97# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
98#endif
99
79072805 100#include "keywords.h"
fe14fcc3 101
ffb4593c
NT
102/* CLINE is a macro that ensures PL_copline has a sane value */
103
ae986130
LW
104#ifdef CLINE
105#undef CLINE
106#endif
3280af22
NIS
107#define CLINE (PL_copline = (PL_curcop->cop_line < PL_copline ? PL_curcop->cop_line : PL_copline))
108
ffb4593c
NT
109/*
110 * Convenience functions to return different tokens and prime the
9cbb5ea2 111 * lexer for the next token. They all take an argument.
ffb4593c
NT
112 *
113 * TOKEN : generic token (used for '(', DOLSHARP, etc)
114 * OPERATOR : generic operator
115 * AOPERATOR : assignment operator
116 * PREBLOCK : beginning the block after an if, while, foreach, ...
117 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
118 * PREREF : *EXPR where EXPR is not a simple identifier
119 * TERM : expression term
120 * LOOPX : loop exiting command (goto, last, dump, etc)
121 * FTST : file test operator
122 * FUN0 : zero-argument function
2d2e263d 123 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
124 * BOop : bitwise or or xor
125 * BAop : bitwise and
126 * SHop : shift operator
127 * PWop : power operator
9cbb5ea2 128 * PMop : pattern-matching operator
ffb4593c
NT
129 * Aop : addition-level operator
130 * Mop : multiplication-level operator
131 * Eop : equality-testing operator
132 * Rop : relational operator <= != gt
133 *
134 * Also see LOP and lop() below.
135 */
136
3280af22
NIS
137#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
138#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
139#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
140#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
141#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
142#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
143#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
144#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
145#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
146#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
147#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
148#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
149#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
150#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
151#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
152#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
153#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
154#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
155#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
156#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 157
a687059c
LW
158/* This bit of chicanery makes a unary function followed by
159 * a parenthesis into a function with one argument, highest precedence.
160 */
2f3197b3 161#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
162 PL_expect = XTERM, \
163 PL_bufptr = s, \
164 PL_last_uni = PL_oldbufptr, \
165 PL_last_lop_op = f, \
a687059c
LW
166 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
167
79072805 168#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
169 PL_bufptr = s, \
170 PL_last_uni = PL_oldbufptr, \
79072805
LW
171 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
172
9f68db38 173/* grandfather return to old style */
3280af22 174#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 175
ffb4593c
NT
176/*
177 * S_ao
178 *
179 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
180 * into an OP_ANDASSIGN or OP_ORASSIGN
181 */
182
76e3520e 183STATIC int
cea2e8a9 184S_ao(pTHX_ int toketype)
a0d0e21e 185{
3280af22
NIS
186 if (*PL_bufptr == '=') {
187 PL_bufptr++;
a0d0e21e
LW
188 if (toketype == ANDAND)
189 yylval.ival = OP_ANDASSIGN;
190 else if (toketype == OROR)
191 yylval.ival = OP_ORASSIGN;
192 toketype = ASSIGNOP;
193 }
194 return toketype;
195}
196
ffb4593c
NT
197/*
198 * S_no_op
199 * When Perl expects an operator and finds something else, no_op
200 * prints the warning. It always prints "<something> found where
201 * operator expected. It prints "Missing semicolon on previous line?"
202 * if the surprise occurs at the start of the line. "do you need to
203 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
204 * where the compiler doesn't know if foo is a method call or a function.
205 * It prints "Missing operator before end of line" if there's nothing
206 * after the missing operator, or "... before <...>" if there is something
207 * after the missing operator.
208 */
209
76e3520e 210STATIC void
cea2e8a9 211S_no_op(pTHX_ char *what, char *s)
463ee0b2 212{
3280af22
NIS
213 char *oldbp = PL_bufptr;
214 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 215
1189a94a
GS
216 if (!s)
217 s = oldbp;
218 else {
219 assert(s >= oldbp);
220 PL_bufptr = s;
221 }
cea2e8a9 222 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 223 if (is_first)
cea2e8a9 224 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
834a4ddd 225 else if (PL_oldoldbufptr && isIDFIRST_lazy(PL_oldoldbufptr)) {
748a9306 226 char *t;
834a4ddd 227 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy(t) || *t == ':'); t++) ;
3280af22 228 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 229 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 230 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
231 }
232 else
cea2e8a9 233 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 234 PL_bufptr = oldbp;
8990e307
LW
235}
236
ffb4593c
NT
237/*
238 * S_missingterm
239 * Complain about missing quote/regexp/heredoc terminator.
240 * If it's called with (char *)NULL then it cauterizes the line buffer.
241 * If we're in a delimited string and the delimiter is a control
242 * character, it's reformatted into a two-char sequence like ^C.
243 * This is fatal.
244 */
245
76e3520e 246STATIC void
cea2e8a9 247S_missingterm(pTHX_ char *s)
8990e307
LW
248{
249 char tmpbuf[3];
250 char q;
251 if (s) {
252 char *nl = strrchr(s,'\n');
d2719217 253 if (nl)
8990e307
LW
254 *nl = '\0';
255 }
9d116dd7
JH
256 else if (
257#ifdef EBCDIC
258 iscntrl(PL_multi_close)
259#else
260 PL_multi_close < 32 || PL_multi_close == 127
261#endif
262 ) {
8990e307 263 *tmpbuf = '^';
3280af22 264 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
265 s = "\\n";
266 tmpbuf[2] = '\0';
267 s = tmpbuf;
268 }
269 else {
3280af22 270 *tmpbuf = PL_multi_close;
8990e307
LW
271 tmpbuf[1] = '\0';
272 s = tmpbuf;
273 }
274 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 275 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 276}
79072805 277
ffb4593c
NT
278/*
279 * Perl_deprecate
ffb4593c
NT
280 */
281
79072805 282void
864dbfa3 283Perl_deprecate(pTHX_ char *s)
a0d0e21e 284{
d008e5eb 285 dTHR;
599cee73 286 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 287 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
288}
289
ffb4593c
NT
290/*
291 * depcom
9cbb5ea2 292 * Deprecate a comma-less variable list.
ffb4593c
NT
293 */
294
76e3520e 295STATIC void
cea2e8a9 296S_depcom(pTHX)
a0d0e21e
LW
297{
298 deprecate("comma-less variable list");
299}
300
ffb4593c 301/*
9cbb5ea2
GS
302 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
303 * utf16-to-utf8-reversed.
ffb4593c
NT
304 */
305
a868473f
NIS
306#ifdef WIN32
307
76e3520e 308STATIC I32
cea2e8a9 309S_win32_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f
NIS
310{
311 I32 count = FILTER_READ(idx+1, sv, maxlen);
312 if (count > 0 && !maxlen)
313 win32_strip_return(sv);
314 return count;
315}
316#endif
317
a0ed51b3 318STATIC I32
cea2e8a9 319S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
320{
321 I32 count = FILTER_READ(idx+1, sv, maxlen);
322 if (count) {
dfe13c55
GS
323 U8* tmps;
324 U8* tend;
325 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 326 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 327 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
328
329 }
330 return count;
331}
332
333STATIC I32
cea2e8a9 334S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
335{
336 I32 count = FILTER_READ(idx+1, sv, maxlen);
337 if (count) {
dfe13c55
GS
338 U8* tmps;
339 U8* tend;
340 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 341 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 342 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
343
344 }
345 return count;
346}
a868473f 347
ffb4593c
NT
348/*
349 * Perl_lex_start
9cbb5ea2
GS
350 * Initialize variables. Uses the Perl save_stack to save its state (for
351 * recursive calls to the parser).
ffb4593c
NT
352 */
353
a0d0e21e 354void
864dbfa3 355Perl_lex_start(pTHX_ SV *line)
79072805 356{
0f15f207 357 dTHR;
8990e307
LW
358 char *s;
359 STRLEN len;
360
3280af22
NIS
361 SAVEI32(PL_lex_dojoin);
362 SAVEI32(PL_lex_brackets);
363 SAVEI32(PL_lex_fakebrack);
364 SAVEI32(PL_lex_casemods);
365 SAVEI32(PL_lex_starts);
366 SAVEI32(PL_lex_state);
367 SAVESPTR(PL_lex_inpat);
368 SAVEI32(PL_lex_inwhat);
369 SAVEI16(PL_curcop->cop_line);
370 SAVEPPTR(PL_bufptr);
371 SAVEPPTR(PL_bufend);
372 SAVEPPTR(PL_oldbufptr);
373 SAVEPPTR(PL_oldoldbufptr);
374 SAVEPPTR(PL_linestart);
375 SAVESPTR(PL_linestr);
376 SAVEPPTR(PL_lex_brackstack);
377 SAVEPPTR(PL_lex_casestack);
c76ac1ee 378 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
379 SAVESPTR(PL_lex_stuff);
380 SAVEI32(PL_lex_defer);
09bef843 381 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 382 SAVESPTR(PL_lex_repl);
c76ac1ee
GS
383 SAVEDESTRUCTOR_X(restore_expect, PL_tokenbuf + PL_expect); /* encode as pointer */
384 SAVEDESTRUCTOR_X(restore_lex_expect, PL_tokenbuf + PL_expect);
3280af22
NIS
385
386 PL_lex_state = LEX_NORMAL;
387 PL_lex_defer = 0;
388 PL_expect = XSTATE;
389 PL_lex_brackets = 0;
390 PL_lex_fakebrack = 0;
391 New(899, PL_lex_brackstack, 120, char);
392 New(899, PL_lex_casestack, 12, char);
393 SAVEFREEPV(PL_lex_brackstack);
394 SAVEFREEPV(PL_lex_casestack);
395 PL_lex_casemods = 0;
396 *PL_lex_casestack = '\0';
397 PL_lex_dojoin = 0;
398 PL_lex_starts = 0;
399 PL_lex_stuff = Nullsv;
400 PL_lex_repl = Nullsv;
401 PL_lex_inpat = 0;
402 PL_lex_inwhat = 0;
09bef843 403 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
404 PL_linestr = line;
405 if (SvREADONLY(PL_linestr))
406 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
407 s = SvPV(PL_linestr, len);
8990e307 408 if (len && s[len-1] != ';') {
3280af22
NIS
409 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
410 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
411 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 412 }
3280af22
NIS
413 SvTEMP_off(PL_linestr);
414 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
415 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
416 SvREFCNT_dec(PL_rs);
79cb57f6 417 PL_rs = newSVpvn("\n", 1);
3280af22 418 PL_rsfp = 0;
79072805 419}
a687059c 420
ffb4593c
NT
421/*
422 * Perl_lex_end
9cbb5ea2
GS
423 * Finalizer for lexing operations. Must be called when the parser is
424 * done with the lexer.
ffb4593c
NT
425 */
426
463ee0b2 427void
864dbfa3 428Perl_lex_end(pTHX)
463ee0b2 429{
3280af22 430 PL_doextract = FALSE;
463ee0b2
LW
431}
432
ffb4593c
NT
433/*
434 * S_incline
435 * This subroutine has nothing to do with tilting, whether at windmills
436 * or pinball tables. Its name is short for "increment line". It
437 * increments the current line number in PL_curcop->cop_line and checks
438 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
439 * # line 500 "foo.pm"
440 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
441 */
442
76e3520e 443STATIC void
cea2e8a9 444S_incline(pTHX_ char *s)
463ee0b2 445{
0f15f207 446 dTHR;
463ee0b2
LW
447 char *t;
448 char *n;
449 char ch;
450 int sawline = 0;
451
3280af22 452 PL_curcop->cop_line++;
463ee0b2
LW
453 if (*s++ != '#')
454 return;
455 while (*s == ' ' || *s == '\t') s++;
456 if (strnEQ(s, "line ", 5)) {
457 s += 5;
458 sawline = 1;
459 }
460 if (!isDIGIT(*s))
461 return;
462 n = s;
463 while (isDIGIT(*s))
464 s++;
465 while (*s == ' ' || *s == '\t')
466 s++;
467 if (*s == '"' && (t = strchr(s+1, '"')))
468 s++;
469 else {
470 if (!sawline)
471 return; /* false alarm */
472 for (t = s; !isSPACE(*t); t++) ;
473 }
474 ch = *t;
475 *t = '\0';
476 if (t - s > 0)
cc49e20b 477 CopFILEGV_set(PL_curcop, gv_fetchfile(s));
463ee0b2 478 else
cc49e20b 479 CopFILEGV_set(PL_curcop, gv_fetchfile(PL_origfilename));
463ee0b2 480 *t = ch;
3280af22 481 PL_curcop->cop_line = atoi(n)-1;
463ee0b2
LW
482}
483
ffb4593c
NT
484/*
485 * S_skipspace
486 * Called to gobble the appropriate amount and type of whitespace.
487 * Skips comments as well.
488 */
489
76e3520e 490STATIC char *
cea2e8a9 491S_skipspace(pTHX_ register char *s)
a687059c 492{
11343788 493 dTHR;
3280af22
NIS
494 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
495 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
496 s++;
497 return s;
498 }
499 for (;;) {
fd049845 500 STRLEN prevlen;
09bef843
SB
501 SSize_t oldprevlen, oldoldprevlen;
502 SSize_t oldloplen, oldunilen;
60e6418e
GS
503 while (s < PL_bufend && isSPACE(*s)) {
504 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
505 incline(s);
506 }
ffb4593c
NT
507
508 /* comment */
3280af22
NIS
509 if (s < PL_bufend && *s == '#') {
510 while (s < PL_bufend && *s != '\n')
463ee0b2 511 s++;
60e6418e 512 if (s < PL_bufend) {
463ee0b2 513 s++;
60e6418e
GS
514 if (PL_in_eval && !PL_rsfp) {
515 incline(s);
516 continue;
517 }
518 }
463ee0b2 519 }
ffb4593c
NT
520
521 /* only continue to recharge the buffer if we're at the end
522 * of the buffer, we're not reading from a source filter, and
523 * we're in normal lexing mode
524 */
09bef843
SB
525 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
526 PL_lex_state == LEX_FORMLINE)
463ee0b2 527 return s;
ffb4593c
NT
528
529 /* try to recharge the buffer */
9cbb5ea2
GS
530 if ((s = filter_gets(PL_linestr, PL_rsfp,
531 (prevlen = SvCUR(PL_linestr)))) == Nullch)
532 {
533 /* end of file. Add on the -p or -n magic */
3280af22
NIS
534 if (PL_minus_n || PL_minus_p) {
535 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
536 ";}continue{print or die qq(-p destination: $!\\n)" :
537 "");
3280af22
NIS
538 sv_catpv(PL_linestr,";}");
539 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
540 }
541 else
3280af22 542 sv_setpv(PL_linestr,";");
ffb4593c
NT
543
544 /* reset variables for next time we lex */
9cbb5ea2
GS
545 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
546 = SvPVX(PL_linestr);
3280af22 547 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
548
549 /* Close the filehandle. Could be from -P preprocessor,
550 * STDIN, or a regular file. If we were reading code from
551 * STDIN (because the commandline held no -e or filename)
552 * then we don't close it, we reset it so the code can
553 * read from STDIN too.
554 */
555
3280af22
NIS
556 if (PL_preprocess && !PL_in_eval)
557 (void)PerlProc_pclose(PL_rsfp);
558 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
559 PerlIO_clearerr(PL_rsfp);
8990e307 560 else
3280af22
NIS
561 (void)PerlIO_close(PL_rsfp);
562 PL_rsfp = Nullfp;
463ee0b2
LW
563 return s;
564 }
ffb4593c
NT
565
566 /* not at end of file, so we only read another line */
09bef843
SB
567 /* make corresponding updates to old pointers, for yyerror() */
568 oldprevlen = PL_oldbufptr - PL_bufend;
569 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
570 if (PL_last_uni)
571 oldunilen = PL_last_uni - PL_bufend;
572 if (PL_last_lop)
573 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
574 PL_linestart = PL_bufptr = s + prevlen;
575 PL_bufend = s + SvCUR(PL_linestr);
576 s = PL_bufptr;
09bef843
SB
577 PL_oldbufptr = s + oldprevlen;
578 PL_oldoldbufptr = s + oldoldprevlen;
579 if (PL_last_uni)
580 PL_last_uni = s + oldunilen;
581 if (PL_last_lop)
582 PL_last_lop = s + oldloplen;
a0d0e21e 583 incline(s);
ffb4593c
NT
584
585 /* debugger active and we're not compiling the debugger code,
586 * so store the line into the debugger's array of lines
587 */
3280af22 588 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
589 SV *sv = NEWSV(85,0);
590
591 sv_upgrade(sv, SVt_PVMG);
3280af22 592 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
cc49e20b 593 av_store(CopFILEAV(PL_curcop),(I32)PL_curcop->cop_line,sv);
8990e307 594 }
463ee0b2 595 }
a687059c 596}
378cc40b 597
ffb4593c
NT
598/*
599 * S_check_uni
600 * Check the unary operators to ensure there's no ambiguity in how they're
601 * used. An ambiguous piece of code would be:
602 * rand + 5
603 * This doesn't mean rand() + 5. Because rand() is a unary operator,
604 * the +5 is its argument.
605 */
606
76e3520e 607STATIC void
cea2e8a9 608S_check_uni(pTHX)
ba106d47 609{
2f3197b3 610 char *s;
a0d0e21e 611 char *t;
0453d815 612 dTHR;
2f3197b3 613
3280af22 614 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 615 return;
3280af22
NIS
616 while (isSPACE(*PL_last_uni))
617 PL_last_uni++;
834a4ddd 618 for (s = PL_last_uni; isALNUM_lazy(s) || *s == '-'; s++) ;
3280af22 619 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 620 return;
0453d815 621 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 622 char ch = *s;
0453d815
PM
623 *s = '\0';
624 Perl_warner(aTHX_ WARN_AMBIGUOUS,
625 "Warning: Use of \"%s\" without parens is ambiguous",
626 PL_last_uni);
627 *s = ch;
628 }
2f3197b3
LW
629}
630
ffb4593c
NT
631/* workaround to replace the UNI() macro with a function. Only the
632 * hints/uts.sh file mentions this. Other comments elsewhere in the
633 * source indicate Microport Unix might need it too.
634 */
635
ffed7fef
LW
636#ifdef CRIPPLED_CC
637
638#undef UNI
ffed7fef 639#define UNI(f) return uni(f,s)
ffed7fef 640
76e3520e 641STATIC int
cea2e8a9 642S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
643{
644 yylval.ival = f;
3280af22
NIS
645 PL_expect = XTERM;
646 PL_bufptr = s;
8f872242
NIS
647 PL_last_uni = PL_oldbufptr;
648 PL_last_lop_op = f;
ffed7fef
LW
649 if (*s == '(')
650 return FUNC1;
651 s = skipspace(s);
652 if (*s == '(')
653 return FUNC1;
654 else
655 return UNIOP;
656}
657
a0d0e21e
LW
658#endif /* CRIPPLED_CC */
659
ffb4593c
NT
660/*
661 * LOP : macro to build a list operator. Its behaviour has been replaced
662 * with a subroutine, S_lop() for which LOP is just another name.
663 */
664
a0d0e21e
LW
665#define LOP(f,x) return lop(f,x,s)
666
ffb4593c
NT
667/*
668 * S_lop
669 * Build a list operator (or something that might be one). The rules:
670 * - if we have a next token, then it's a list operator [why?]
671 * - if the next thing is an opening paren, then it's a function
672 * - else it's a list operator
673 */
674
76e3520e 675STATIC I32
cea2e8a9 676S_lop(pTHX_ I32 f, expectation x, char *s)
ffed7fef 677{
0f15f207 678 dTHR;
79072805 679 yylval.ival = f;
35c8bce7 680 CLINE;
3280af22
NIS
681 PL_expect = x;
682 PL_bufptr = s;
683 PL_last_lop = PL_oldbufptr;
684 PL_last_lop_op = f;
685 if (PL_nexttoke)
a0d0e21e 686 return LSTOP;
79072805
LW
687 if (*s == '(')
688 return FUNC;
689 s = skipspace(s);
690 if (*s == '(')
691 return FUNC;
692 else
693 return LSTOP;
694}
695
ffb4593c
NT
696/*
697 * S_force_next
9cbb5ea2 698 * When the lexer realizes it knows the next token (for instance,
ffb4593c 699 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
700 * to know what token to return the next time the lexer is called. Caller
701 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
702 * handles the token correctly.
ffb4593c
NT
703 */
704
76e3520e 705STATIC void
cea2e8a9 706S_force_next(pTHX_ I32 type)
79072805 707{
3280af22
NIS
708 PL_nexttype[PL_nexttoke] = type;
709 PL_nexttoke++;
710 if (PL_lex_state != LEX_KNOWNEXT) {
711 PL_lex_defer = PL_lex_state;
712 PL_lex_expect = PL_expect;
713 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
714 }
715}
716
ffb4593c
NT
717/*
718 * S_force_word
719 * When the lexer knows the next thing is a word (for instance, it has
720 * just seen -> and it knows that the next char is a word char, then
721 * it calls S_force_word to stick the next word into the PL_next lookahead.
722 *
723 * Arguments:
b1b65b59 724 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
725 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
726 * int check_keyword : if true, Perl checks to make sure the word isn't
727 * a keyword (do this if the word is a label, e.g. goto FOO)
728 * int allow_pack : if true, : characters will also be allowed (require,
729 * use, etc. do this)
9cbb5ea2 730 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
731 */
732
76e3520e 733STATIC char *
cea2e8a9 734S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 735{
463ee0b2
LW
736 register char *s;
737 STRLEN len;
738
739 start = skipspace(start);
740 s = start;
834a4ddd 741 if (isIDFIRST_lazy(s) ||
a0d0e21e 742 (allow_pack && *s == ':') ||
15f0808c 743 (allow_initial_tick && *s == '\'') )
a0d0e21e 744 {
3280af22
NIS
745 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
746 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
747 return start;
748 if (token == METHOD) {
749 s = skipspace(s);
750 if (*s == '(')
3280af22 751 PL_expect = XTERM;
463ee0b2 752 else {
3280af22 753 PL_expect = XOPERATOR;
463ee0b2 754 }
79072805 755 }
3280af22
NIS
756 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
757 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
758 force_next(token);
759 }
760 return s;
761}
762
ffb4593c
NT
763/*
764 * S_force_ident
9cbb5ea2 765 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
766 * text only contains the "foo" portion. The first argument is a pointer
767 * to the "foo", and the second argument is the type symbol to prefix.
768 * Forces the next token to be a "WORD".
9cbb5ea2 769 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
770 */
771
76e3520e 772STATIC void
cea2e8a9 773S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
774{
775 if (s && *s) {
11343788 776 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 777 PL_nextval[PL_nexttoke].opval = o;
79072805 778 force_next(WORD);
748a9306 779 if (kind) {
e858de61 780 dTHR; /* just for in_eval */
11343788 781 o->op_private = OPpCONST_ENTERED;
55497cff
PP
782 /* XXX see note in pp_entereval() for why we forgo typo
783 warnings if the symbol must be introduced in an eval.
784 GSAR 96-10-12 */
3280af22 785 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
786 kind == '$' ? SVt_PV :
787 kind == '@' ? SVt_PVAV :
788 kind == '%' ? SVt_PVHV :
789 SVt_PVGV
790 );
748a9306 791 }
79072805
LW
792 }
793}
794
ffb4593c
NT
795/*
796 * S_force_version
797 * Forces the next token to be a version number.
798 */
799
76e3520e 800STATIC char *
cea2e8a9 801S_force_version(pTHX_ char *s)
89bfa8cd
PP
802{
803 OP *version = Nullop;
804
805 s = skipspace(s);
806
807 /* default VERSION number -- GBARR */
808
809 if(isDIGIT(*s)) {
810 char *d;
811 int c;
55497cff 812 for( d=s, c = 1; isDIGIT(*d) || *d == '_' || (*d == '.' && c--); d++);
89bfa8cd
PP
813 if((*d == ';' || isSPACE(*d)) && *(skipspace(d)) != ',') {
814 s = scan_num(s);
815 /* real VERSION number -- GBARR */
816 version = yylval.opval;
817 }
818 }
819
820 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 821 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd
PP
822 force_next(WORD);
823
824 return (s);
825}
826
ffb4593c
NT
827/*
828 * S_tokeq
829 * Tokenize a quoted string passed in as an SV. It finds the next
830 * chunk, up to end of string or a backslash. It may make a new
831 * SV containing that chunk (if HINT_NEW_STRING is on). It also
832 * turns \\ into \.
833 */
834
76e3520e 835STATIC SV *
cea2e8a9 836S_tokeq(pTHX_ SV *sv)
79072805
LW
837{
838 register char *s;
839 register char *send;
840 register char *d;
b3ac6de7
IZ
841 STRLEN len = 0;
842 SV *pv = sv;
79072805
LW
843
844 if (!SvLEN(sv))
b3ac6de7 845 goto finish;
79072805 846
a0d0e21e 847 s = SvPV_force(sv, len);
748a9306 848 if (SvIVX(sv) == -1)
b3ac6de7 849 goto finish;
463ee0b2 850 send = s + len;
79072805
LW
851 while (s < send && *s != '\\')
852 s++;
853 if (s == send)
b3ac6de7 854 goto finish;
79072805 855 d = s;
3280af22 856 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 857 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
858 while (s < send) {
859 if (*s == '\\') {
a0d0e21e 860 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
861 s++; /* all that, just for this */
862 }
863 *d++ = *s++;
864 }
865 *d = '\0';
463ee0b2 866 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 867 finish:
3280af22 868 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 869 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
870 return sv;
871}
872
ffb4593c
NT
873/*
874 * Now come three functions related to double-quote context,
875 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
876 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
877 * interact with PL_lex_state, and create fake ( ... ) argument lists
878 * to handle functions and concatenation.
879 * They assume that whoever calls them will be setting up a fake
880 * join call, because each subthing puts a ',' after it. This lets
881 * "lower \luPpEr"
882 * become
883 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
884 *
885 * (I'm not sure whether the spurious commas at the end of lcfirst's
886 * arguments and join's arguments are created or not).
887 */
888
889/*
890 * S_sublex_start
891 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
892 *
893 * Pattern matching will set PL_lex_op to the pattern-matching op to
894 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
895 *
896 * OP_CONST and OP_READLINE are easy--just make the new op and return.
897 *
898 * Everything else becomes a FUNC.
899 *
900 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
901 * had an OP_CONST or OP_READLINE). This just sets us up for a
902 * call to S_sublex_push().
903 */
904
76e3520e 905STATIC I32
cea2e8a9 906S_sublex_start(pTHX)
79072805
LW
907{
908 register I32 op_type = yylval.ival;
79072805
LW
909
910 if (op_type == OP_NULL) {
3280af22
NIS
911 yylval.opval = PL_lex_op;
912 PL_lex_op = Nullop;
79072805
LW
913 return THING;
914 }
915 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 916 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
917
918 if (SvTYPE(sv) == SVt_PVIV) {
919 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
920 STRLEN len;
921 char *p;
922 SV *nsv;
923
924 p = SvPV(sv, len);
79cb57f6 925 nsv = newSVpvn(p, len);
b3ac6de7
IZ
926 SvREFCNT_dec(sv);
927 sv = nsv;
928 }
929 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 930 PL_lex_stuff = Nullsv;
79072805
LW
931 return THING;
932 }
933
3280af22
NIS
934 PL_sublex_info.super_state = PL_lex_state;
935 PL_sublex_info.sub_inwhat = op_type;
936 PL_sublex_info.sub_op = PL_lex_op;
937 PL_lex_state = LEX_INTERPPUSH;
55497cff 938
3280af22
NIS
939 PL_expect = XTERM;
940 if (PL_lex_op) {
941 yylval.opval = PL_lex_op;
942 PL_lex_op = Nullop;
55497cff
PP
943 return PMFUNC;
944 }
945 else
946 return FUNC;
947}
948
ffb4593c
NT
949/*
950 * S_sublex_push
951 * Create a new scope to save the lexing state. The scope will be
952 * ended in S_sublex_done. Returns a '(', starting the function arguments
953 * to the uc, lc, etc. found before.
954 * Sets PL_lex_state to LEX_INTERPCONCAT.
955 */
956
76e3520e 957STATIC I32
cea2e8a9 958S_sublex_push(pTHX)
55497cff 959{
0f15f207 960 dTHR;
f46d017c 961 ENTER;
55497cff 962
3280af22
NIS
963 PL_lex_state = PL_sublex_info.super_state;
964 SAVEI32(PL_lex_dojoin);
965 SAVEI32(PL_lex_brackets);
966 SAVEI32(PL_lex_fakebrack);
967 SAVEI32(PL_lex_casemods);
968 SAVEI32(PL_lex_starts);
969 SAVEI32(PL_lex_state);
970 SAVESPTR(PL_lex_inpat);
971 SAVEI32(PL_lex_inwhat);
972 SAVEI16(PL_curcop->cop_line);
973 SAVEPPTR(PL_bufptr);
974 SAVEPPTR(PL_oldbufptr);
975 SAVEPPTR(PL_oldoldbufptr);
976 SAVEPPTR(PL_linestart);
977 SAVESPTR(PL_linestr);
978 SAVEPPTR(PL_lex_brackstack);
979 SAVEPPTR(PL_lex_casestack);
980
981 PL_linestr = PL_lex_stuff;
982 PL_lex_stuff = Nullsv;
983
9cbb5ea2
GS
984 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
985 = SvPVX(PL_linestr);
3280af22
NIS
986 PL_bufend += SvCUR(PL_linestr);
987 SAVEFREESV(PL_linestr);
988
989 PL_lex_dojoin = FALSE;
990 PL_lex_brackets = 0;
991 PL_lex_fakebrack = 0;
992 New(899, PL_lex_brackstack, 120, char);
993 New(899, PL_lex_casestack, 12, char);
994 SAVEFREEPV(PL_lex_brackstack);
995 SAVEFREEPV(PL_lex_casestack);
996 PL_lex_casemods = 0;
997 *PL_lex_casestack = '\0';
998 PL_lex_starts = 0;
999 PL_lex_state = LEX_INTERPCONCAT;
1000 PL_curcop->cop_line = PL_multi_start;
1001
1002 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1003 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1004 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1005 else
3280af22 1006 PL_lex_inpat = Nullop;
79072805 1007
55497cff 1008 return '(';
79072805
LW
1009}
1010
ffb4593c
NT
1011/*
1012 * S_sublex_done
1013 * Restores lexer state after a S_sublex_push.
1014 */
1015
76e3520e 1016STATIC I32
cea2e8a9 1017S_sublex_done(pTHX)
79072805 1018{
3280af22
NIS
1019 if (!PL_lex_starts++) {
1020 PL_expect = XOPERATOR;
79cb57f6 1021 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1022 return THING;
1023 }
1024
3280af22
NIS
1025 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1026 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1027 return yylex();
79072805
LW
1028 }
1029
ffb4593c 1030 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1031 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1032 PL_linestr = PL_lex_repl;
1033 PL_lex_inpat = 0;
1034 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1035 PL_bufend += SvCUR(PL_linestr);
1036 SAVEFREESV(PL_linestr);
1037 PL_lex_dojoin = FALSE;
1038 PL_lex_brackets = 0;
1039 PL_lex_fakebrack = 0;
1040 PL_lex_casemods = 0;
1041 *PL_lex_casestack = '\0';
1042 PL_lex_starts = 0;
25da4f38 1043 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1044 PL_lex_state = LEX_INTERPNORMAL;
1045 PL_lex_starts++;
e9fa98b2
HS
1046 /* we don't clear PL_lex_repl here, so that we can check later
1047 whether this is an evalled subst; that means we rely on the
1048 logic to ensure sublex_done() is called again only via the
1049 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1050 }
e9fa98b2 1051 else {
3280af22 1052 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1053 PL_lex_repl = Nullsv;
1054 }
79072805 1055 return ',';
ffed7fef
LW
1056 }
1057 else {
f46d017c 1058 LEAVE;
3280af22
NIS
1059 PL_bufend = SvPVX(PL_linestr);
1060 PL_bufend += SvCUR(PL_linestr);
1061 PL_expect = XOPERATOR;
09bef843 1062 PL_sublex_info.sub_inwhat = 0;
79072805 1063 return ')';
ffed7fef
LW
1064 }
1065}
1066
02aa26ce
NT
1067/*
1068 scan_const
1069
1070 Extracts a pattern, double-quoted string, or transliteration. This
1071 is terrifying code.
1072
3280af22
NIS
1073 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1074 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1075 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1076
9b599b2a
GS
1077 Returns a pointer to the character scanned up to. Iff this is
1078 advanced from the start pointer supplied (ie if anything was
1079 successfully parsed), will leave an OP for the substring scanned
1080 in yylval. Caller must intuit reason for not parsing further
1081 by looking at the next characters herself.
1082
02aa26ce
NT
1083 In patterns:
1084 backslashes:
1085 double-quoted style: \r and \n
1086 regexp special ones: \D \s
1087 constants: \x3
1088 backrefs: \1 (deprecated in substitution replacements)
1089 case and quoting: \U \Q \E
1090 stops on @ and $, but not for $ as tail anchor
1091
1092 In transliterations:
1093 characters are VERY literal, except for - not at the start or end
1094 of the string, which indicates a range. scan_const expands the
1095 range to the full set of intermediate characters.
1096
1097 In double-quoted strings:
1098 backslashes:
1099 double-quoted style: \r and \n
1100 constants: \x3
1101 backrefs: \1 (deprecated)
1102 case and quoting: \U \Q \E
1103 stops on @ and $
1104
1105 scan_const does *not* construct ops to handle interpolated strings.
1106 It stops processing as soon as it finds an embedded $ or @ variable
1107 and leaves it to the caller to work out what's going on.
1108
1109 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1110
1111 $ in pattern could be $foo or could be tail anchor. Assumption:
1112 it's a tail anchor if $ is the last thing in the string, or if it's
1113 followed by one of ")| \n\t"
1114
1115 \1 (backreferences) are turned into $1
1116
1117 The structure of the code is
1118 while (there's a character to process) {
1119 handle transliteration ranges
1120 skip regexp comments
1121 skip # initiated comments in //x patterns
1122 check for embedded @foo
1123 check for embedded scalars
1124 if (backslash) {
1125 leave intact backslashes from leave (below)
1126 deprecate \1 in strings and sub replacements
1127 handle string-changing backslashes \l \U \Q \E, etc.
1128 switch (what was escaped) {
1129 handle - in a transliteration (becomes a literal -)
1130 handle \132 octal characters
1131 handle 0x15 hex characters
1132 handle \cV (control V)
1133 handle printf backslashes (\f, \r, \n, etc)
1134 } (end switch)
1135 } (end if backslash)
1136 } (end while character to read)
1137
1138*/
1139
76e3520e 1140STATIC char *
cea2e8a9 1141S_scan_const(pTHX_ char *start)
79072805 1142{
3280af22 1143 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1144 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1145 register char *s = start; /* start of the constant */
1146 register char *d = SvPVX(sv); /* destination for copies */
1147 bool dorange = FALSE; /* are we in a translit range? */
1148 I32 len; /* ? */
ac2262e3 1149 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1150 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1151 : UTF;
ac2262e3 1152 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1153 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1154 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1155 : UTF;
dff6d3cd 1156 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1157 PL_lex_inpat
4a2d328f 1158 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1159 : "";
79072805
LW
1160
1161 while (s < send || dorange) {
02aa26ce 1162 /* get transliterations out of the way (they're most literal) */
3280af22 1163 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1164 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1165 if (dorange) {
02aa26ce 1166 I32 i; /* current expanded character */
8ada0baa 1167 I32 min; /* first character in range */
02aa26ce
NT
1168 I32 max; /* last character in range */
1169
1170 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1171 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1172 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1173 d -= 2; /* eat the first char and the - */
1174
8ada0baa
JH
1175 min = (U8)*d; /* first char in range */
1176 max = (U8)d[1]; /* last char in range */
1177
1178#ifndef ASCIIish
1179 if ((isLOWER(min) && isLOWER(max)) ||
1180 (isUPPER(min) && isUPPER(max))) {
1181 if (isLOWER(min)) {
1182 for (i = min; i <= max; i++)
1183 if (isLOWER(i))
1184 *d++ = i;
1185 } else {
1186 for (i = min; i <= max; i++)
1187 if (isUPPER(i))
1188 *d++ = i;
1189 }
1190 }
1191 else
1192#endif
1193 for (i = min; i <= max; i++)
1194 *d++ = i;
02aa26ce
NT
1195
1196 /* mark the range as done, and continue */
79072805
LW
1197 dorange = FALSE;
1198 continue;
1199 }
02aa26ce
NT
1200
1201 /* range begins (ignore - as first or last char) */
79072805 1202 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1203 if (utf) {
a176fa2a 1204 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1205 s++;
1206 continue;
1207 }
79072805
LW
1208 dorange = TRUE;
1209 s++;
1210 }
1211 }
02aa26ce
NT
1212
1213 /* if we get here, we're not doing a transliteration */
1214
0f5d15d6
IZ
1215 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1216 except for the last char, which will be done separately. */
3280af22 1217 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1218 if (s[2] == '#') {
1219 while (s < send && *s != ')')
1220 *d++ = *s++;
0f5d15d6
IZ
1221 } else if (s[2] == '{'
1222 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1223 I32 count = 1;
0f5d15d6 1224 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1225 char c;
1226
d9f97599
GS
1227 while (count && (c = *regparse)) {
1228 if (c == '\\' && regparse[1])
1229 regparse++;
cc6b7395
IZ
1230 else if (c == '{')
1231 count++;
1232 else if (c == '}')
1233 count--;
d9f97599 1234 regparse++;
cc6b7395 1235 }
5bdf89e7
IZ
1236 if (*regparse != ')') {
1237 regparse--; /* Leave one char for continuation. */
cc6b7395 1238 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1239 }
0f5d15d6 1240 while (s < regparse)
cc6b7395
IZ
1241 *d++ = *s++;
1242 }
748a9306 1243 }
02aa26ce
NT
1244
1245 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1246 else if (*s == '#' && PL_lex_inpat &&
1247 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1248 while (s+1 < send && *s != '\n')
1249 *d++ = *s++;
1250 }
02aa26ce
NT
1251
1252 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
834a4ddd 1253 else if (*s == '@' && s[1] && (isALNUM_lazy(s+1) || strchr(":'{$", s[1])))
79072805 1254 break;
02aa26ce
NT
1255
1256 /* check for embedded scalars. only stop if we're sure it's a
1257 variable.
1258 */
79072805 1259 else if (*s == '$') {
3280af22 1260 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1261 break;
c277df42 1262 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1263 break; /* in regexp, $ might be tail anchor */
1264 }
02aa26ce 1265
a0ed51b3
LW
1266 /* (now in tr/// code again) */
1267
d008e5eb
GS
1268 if (*s & 0x80 && thisutf) {
1269 dTHR; /* only for ckWARN */
1270 if (ckWARN(WARN_UTF8)) {
dfe13c55 1271 (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
d008e5eb
GS
1272 if (len) {
1273 while (len--)
1274 *d++ = *s++;
1275 continue;
1276 }
a0ed51b3
LW
1277 }
1278 }
1279
02aa26ce 1280 /* backslashes */
79072805
LW
1281 if (*s == '\\' && s+1 < send) {
1282 s++;
02aa26ce
NT
1283
1284 /* some backslashes we leave behind */
c9f97d15 1285 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1286 *d++ = '\\';
1287 *d++ = *s++;
1288 continue;
1289 }
02aa26ce
NT
1290
1291 /* deprecate \1 in strings and substitution replacements */
3280af22 1292 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1293 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1294 {
d008e5eb 1295 dTHR; /* only for ckWARN */
599cee73 1296 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1297 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1298 *--s = '$';
1299 break;
1300 }
02aa26ce
NT
1301
1302 /* string-change backslash escapes */
3280af22 1303 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1304 --s;
1305 break;
1306 }
02aa26ce
NT
1307
1308 /* if we get here, it's either a quoted -, or a digit */
79072805 1309 switch (*s) {
02aa26ce
NT
1310
1311 /* quoted - in transliterations */
79072805 1312 case '-':
3280af22 1313 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1314 *d++ = *s++;
1315 continue;
1316 }
1317 /* FALL THROUGH */
1318 default:
11b8faa4
JH
1319 {
1320 dTHR;
1321 if (ckWARN(WARN_UNSAFE) && isALPHA(*s))
cea2e8a9 1322 Perl_warner(aTHX_ WARN_UNSAFE,
11b8faa4
JH
1323 "Unrecognized escape \\%c passed through",
1324 *s);
1325 /* default action is to copy the quoted character */
1326 *d++ = *s++;
1327 continue;
1328 }
02aa26ce
NT
1329
1330 /* \132 indicates an octal constant */
79072805
LW
1331 case '0': case '1': case '2': case '3':
1332 case '4': case '5': case '6': case '7':
dff6d3cd 1333 *d++ = (char)scan_oct(s, 3, &len);
79072805
LW
1334 s += len;
1335 continue;
02aa26ce
NT
1336
1337 /* \x24 indicates a hex constant */
79072805 1338 case 'x':
a0ed51b3
LW
1339 ++s;
1340 if (*s == '{') {
1341 char* e = strchr(s, '}');
1342
adaeee49 1343 if (!e) {
a0ed51b3 1344 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1345 e = s;
1346 }
d008e5eb
GS
1347 if (!utf) {
1348 dTHR;
1349 if (ckWARN(WARN_UTF8))
cea2e8a9 1350 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1351 "Use of \\x{} without utf8 declaration");
1352 }
a0ed51b3 1353 /* note: utf always shorter than hex */
dfe13c55 1354 d = (char*)uv_to_utf8((U8*)d,
dff6d3cd 1355 (UV)scan_hex(s + 1, e - s - 1, &len));
a0ed51b3 1356 s = e + 1;
a0ed51b3
LW
1357 }
1358 else {
1359 UV uv = (UV)scan_hex(s, 2, &len);
1360 if (utf && PL_lex_inwhat == OP_TRANS &&
1361 utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1362 {
dfe13c55 1363 d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
a0ed51b3
LW
1364 }
1365 else {
d008e5eb
GS
1366 if (uv >= 127 && UTF) {
1367 dTHR;
1368 if (ckWARN(WARN_UTF8))
cea2e8a9 1369 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1370 "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
1371 len,s,len,s);
1372 }
a0ed51b3
LW
1373 *d++ = (char)uv;
1374 }
1375 s += len;
1376 }
79072805 1377 continue;
02aa26ce 1378
4a2d328f
IZ
1379 /* \N{latin small letter a} is a named character */
1380 case 'N':
423cee85
JH
1381 ++s;
1382 if (*s == '{') {
1383 char* e = strchr(s, '}');
1384 HV *hv;
1385 SV **svp;
1386 SV *res, *cv;
1387 STRLEN len;
1388 char *str;
1389 char *why = Nullch;
1390
1391 if (!e) {
5777a3f7 1392 yyerror("Missing right brace on \\N{}");
423cee85
JH
1393 e = s - 1;
1394 goto cont_scan;
1395 }
1396 res = newSVpvn(s + 1, e - s - 1);
1397 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1398 res, Nullsv, "\\N{...}" );
423cee85
JH
1399 str = SvPV(res,len);
1400 if (len > e - s + 4) {
1401 char *odest = SvPVX(sv);
1402
1403 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1404 d = SvPVX(sv) + (d - odest);
1405 }
1406 Copy(str, d, len, char);
1407 d += len;
1408 SvREFCNT_dec(res);
1409 cont_scan:
1410 s = e + 1;
1411 }
1412 else
5777a3f7 1413 yyerror("Missing braces on \\N{}");
423cee85
JH
1414 continue;
1415
02aa26ce 1416 /* \c is a control character */
79072805
LW
1417 case 'c':
1418 s++;
9d116dd7
JH
1419#ifdef EBCDIC
1420 *d = *s++;
1421 if (isLOWER(*d))
1422 *d = toUPPER(*d);
1423 *d++ = toCTRL(*d);
1424#else
bbce6d69
PP
1425 len = *s++;
1426 *d++ = toCTRL(len);
9d116dd7 1427#endif
79072805 1428 continue;
02aa26ce
NT
1429
1430 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1431 case 'b':
1432 *d++ = '\b';
1433 break;
1434 case 'n':
1435 *d++ = '\n';
1436 break;
1437 case 'r':
1438 *d++ = '\r';
1439 break;
1440 case 'f':
1441 *d++ = '\f';
1442 break;
1443 case 't':
1444 *d++ = '\t';
1445 break;
34a3fe2a
PP
1446#ifdef EBCDIC
1447 case 'e':
1448 *d++ = '\047'; /* CP 1047 */
1449 break;
1450 case 'a':
1451 *d++ = '\057'; /* CP 1047 */
1452 break;
1453#else
79072805
LW
1454 case 'e':
1455 *d++ = '\033';
1456 break;
1457 case 'a':
1458 *d++ = '\007';
1459 break;
34a3fe2a 1460#endif
02aa26ce
NT
1461 } /* end switch */
1462
79072805
LW
1463 s++;
1464 continue;
02aa26ce
NT
1465 } /* end if (backslash) */
1466
79072805 1467 *d++ = *s++;
02aa26ce
NT
1468 } /* while loop to process each character */
1469
1470 /* terminate the string and set up the sv */
79072805 1471 *d = '\0';
463ee0b2 1472 SvCUR_set(sv, d - SvPVX(sv));
79072805
LW
1473 SvPOK_on(sv);
1474
02aa26ce 1475 /* shrink the sv if we allocated more than we used */
79072805
LW
1476 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1477 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1478 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1479 }
02aa26ce 1480
9b599b2a 1481 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1482 if (s > PL_bufptr) {
1483 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1484 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1485 sv, Nullsv,
3280af22 1486 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1487 ? "tr"
3280af22 1488 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1489 ? "s"
1490 : "qq")));
79072805 1491 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1492 } else
8990e307 1493 SvREFCNT_dec(sv);
79072805
LW
1494 return s;
1495}
1496
ffb4593c
NT
1497/* S_intuit_more
1498 * Returns TRUE if there's more to the expression (e.g., a subscript),
1499 * FALSE otherwise.
ffb4593c
NT
1500 *
1501 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1502 *
1503 * ->[ and ->{ return TRUE
1504 * { and [ outside a pattern are always subscripts, so return TRUE
1505 * if we're outside a pattern and it's not { or [, then return FALSE
1506 * if we're in a pattern and the first char is a {
1507 * {4,5} (any digits around the comma) returns FALSE
1508 * if we're in a pattern and the first char is a [
1509 * [] returns FALSE
1510 * [SOMETHING] has a funky algorithm to decide whether it's a
1511 * character class or not. It has to deal with things like
1512 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1513 * anything else returns TRUE
1514 */
1515
9cbb5ea2
GS
1516/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1517
76e3520e 1518STATIC int
cea2e8a9 1519S_intuit_more(pTHX_ register char *s)
79072805 1520{
3280af22 1521 if (PL_lex_brackets)
79072805
LW
1522 return TRUE;
1523 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1524 return TRUE;
1525 if (*s != '{' && *s != '[')
1526 return FALSE;
3280af22 1527 if (!PL_lex_inpat)
79072805
LW
1528 return TRUE;
1529
1530 /* In a pattern, so maybe we have {n,m}. */
1531 if (*s == '{') {
1532 s++;
1533 if (!isDIGIT(*s))
1534 return TRUE;
1535 while (isDIGIT(*s))
1536 s++;
1537 if (*s == ',')
1538 s++;
1539 while (isDIGIT(*s))
1540 s++;
1541 if (*s == '}')
1542 return FALSE;
1543 return TRUE;
1544
1545 }
1546
1547 /* On the other hand, maybe we have a character class */
1548
1549 s++;
1550 if (*s == ']' || *s == '^')
1551 return FALSE;
1552 else {
ffb4593c 1553 /* this is terrifying, and it works */
79072805
LW
1554 int weight = 2; /* let's weigh the evidence */
1555 char seen[256];
f27ffc4a 1556 unsigned char un_char = 255, last_un_char;
93a17b20 1557 char *send = strchr(s,']');
3280af22 1558 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1559
1560 if (!send) /* has to be an expression */
1561 return TRUE;
1562
1563 Zero(seen,256,char);
1564 if (*s == '$')
1565 weight -= 3;
1566 else if (isDIGIT(*s)) {
1567 if (s[1] != ']') {
1568 if (isDIGIT(s[1]) && s[2] == ']')
1569 weight -= 10;
1570 }
1571 else
1572 weight -= 100;
1573 }
1574 for (; s < send; s++) {
1575 last_un_char = un_char;
1576 un_char = (unsigned char)*s;
1577 switch (*s) {
1578 case '@':
1579 case '&':
1580 case '$':
1581 weight -= seen[un_char] * 10;
834a4ddd 1582 if (isALNUM_lazy(s+1)) {
8903cb82 1583 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1584 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1585 weight -= 100;
1586 else
1587 weight -= 10;
1588 }
1589 else if (*s == '$' && s[1] &&
93a17b20
LW
1590 strchr("[#!%*<>()-=",s[1])) {
1591 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1592 weight -= 10;
1593 else
1594 weight -= 1;
1595 }
1596 break;
1597 case '\\':
1598 un_char = 254;
1599 if (s[1]) {
93a17b20 1600 if (strchr("wds]",s[1]))
79072805
LW
1601 weight += 100;
1602 else if (seen['\''] || seen['"'])
1603 weight += 1;
93a17b20 1604 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1605 weight += 40;
1606 else if (isDIGIT(s[1])) {
1607 weight += 40;
1608 while (s[1] && isDIGIT(s[1]))
1609 s++;
1610 }
1611 }
1612 else
1613 weight += 100;
1614 break;
1615 case '-':
1616 if (s[1] == '\\')
1617 weight += 50;
93a17b20 1618 if (strchr("aA01! ",last_un_char))
79072805 1619 weight += 30;
93a17b20 1620 if (strchr("zZ79~",s[1]))
79072805 1621 weight += 30;
f27ffc4a
GS
1622 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1623 weight -= 5; /* cope with negative subscript */
79072805
LW
1624 break;
1625 default:
93a17b20 1626 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1627 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1628 char *d = tmpbuf;
1629 while (isALPHA(*s))
1630 *d++ = *s++;
1631 *d = '\0';
1632 if (keyword(tmpbuf, d - tmpbuf))
1633 weight -= 150;
1634 }
1635 if (un_char == last_un_char + 1)
1636 weight += 5;
1637 weight -= seen[un_char];
1638 break;
1639 }
1640 seen[un_char]++;
1641 }
1642 if (weight >= 0) /* probably a character class */
1643 return FALSE;
1644 }
1645
1646 return TRUE;
1647}
ffed7fef 1648
ffb4593c
NT
1649/*
1650 * S_intuit_method
1651 *
1652 * Does all the checking to disambiguate
1653 * foo bar
1654 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1655 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1656 *
1657 * First argument is the stuff after the first token, e.g. "bar".
1658 *
1659 * Not a method if bar is a filehandle.
1660 * Not a method if foo is a subroutine prototyped to take a filehandle.
1661 * Not a method if it's really "Foo $bar"
1662 * Method if it's "foo $bar"
1663 * Not a method if it's really "print foo $bar"
1664 * Method if it's really "foo package::" (interpreted as package->foo)
1665 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
1666 * Not a method if bar is a filehandle or package, but is quotd with
1667 * =>
1668 */
1669
76e3520e 1670STATIC int
cea2e8a9 1671S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1672{
1673 char *s = start + (*start == '$');
3280af22 1674 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1675 STRLEN len;
1676 GV* indirgv;
1677
1678 if (gv) {
b6c543e3 1679 CV *cv;
a0d0e21e
LW
1680 if (GvIO(gv))
1681 return 0;
b6c543e3
IZ
1682 if ((cv = GvCVu(gv))) {
1683 char *proto = SvPVX(cv);
1684 if (proto) {
1685 if (*proto == ';')
1686 proto++;
1687 if (*proto == '*')
1688 return 0;
1689 }
1690 } else
a0d0e21e
LW
1691 gv = 0;
1692 }
8903cb82 1693 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1694 /* start is the beginning of the possible filehandle/object,
1695 * and s is the end of it
1696 * tmpbuf is a copy of it
1697 */
1698
a0d0e21e 1699 if (*start == '$') {
3280af22 1700 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1701 return 0;
1702 s = skipspace(s);
3280af22
NIS
1703 PL_bufptr = start;
1704 PL_expect = XREF;
a0d0e21e
LW
1705 return *s == '(' ? FUNCMETH : METHOD;
1706 }
1707 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1708 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1709 len -= 2;
1710 tmpbuf[len] = '\0';
1711 goto bare_package;
1712 }
1713 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1714 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1715 return 0;
1716 /* filehandle or package name makes it a method */
89bfa8cd 1717 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1718 s = skipspace(s);
3280af22 1719 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1720 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1721 bare_package:
3280af22 1722 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1723 newSVpvn(tmpbuf,len));
3280af22
NIS
1724 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1725 PL_expect = XTERM;
a0d0e21e 1726 force_next(WORD);
3280af22 1727 PL_bufptr = s;
a0d0e21e
LW
1728 return *s == '(' ? FUNCMETH : METHOD;
1729 }
1730 }
1731 return 0;
1732}
1733
ffb4593c
NT
1734/*
1735 * S_incl_perldb
1736 * Return a string of Perl code to load the debugger. If PERL5DB
1737 * is set, it will return the contents of that, otherwise a
1738 * compile-time require of perl5db.pl.
1739 */
1740
76e3520e 1741STATIC char*
cea2e8a9 1742S_incl_perldb(pTHX)
a0d0e21e 1743{
3280af22 1744 if (PL_perldb) {
76e3520e 1745 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1746
1747 if (pdb)
1748 return pdb;
61bb5906 1749 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1750 return "BEGIN { require 'perl5db.pl' }";
1751 }
1752 return "";
1753}
1754
1755
16d20bd9
AD
1756/* Encoded script support. filter_add() effectively inserts a
1757 * 'pre-processing' function into the current source input stream.
1758 * Note that the filter function only applies to the current source file
1759 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1760 *
1761 * The datasv parameter (which may be NULL) can be used to pass
1762 * private data to this instance of the filter. The filter function
1763 * can recover the SV using the FILTER_DATA macro and use it to
1764 * store private buffers and state information.
1765 *
1766 * The supplied datasv parameter is upgraded to a PVIO type
1767 * and the IoDIRP field is used to store the function pointer.
1768 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1769 * private use must be set using malloc'd pointers.
1770 */
16d20bd9
AD
1771
1772SV *
864dbfa3 1773Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1774{
f4c556ac
GS
1775 if (!funcp)
1776 return Nullsv;
1777
3280af22
NIS
1778 if (!PL_rsfp_filters)
1779 PL_rsfp_filters = newAV();
16d20bd9 1780 if (!datasv)
8c52afec 1781 datasv = NEWSV(255,0);
16d20bd9 1782 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1783 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1784 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
f4c556ac
GS
1785 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1786 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1787 av_unshift(PL_rsfp_filters, 1);
1788 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1789 return(datasv);
1790}
1791
1792
1793/* Delete most recently added instance of this filter function. */
a0d0e21e 1794void
864dbfa3 1795Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1796{
f4c556ac 1797 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1798 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1799 return;
1800 /* if filter is on top of stack (usual case) just pop it off */
677ca527 1801 if (IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) == (DIR*)funcp){
a6c40364 1802 IoDIRP(FILTER_DATA(AvFILLp(PL_rsfp_filters))) = NULL;
3280af22 1803 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1804
16d20bd9
AD
1805 return;
1806 }
1807 /* we need to search for the correct entry and clear it */
cea2e8a9 1808 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1809}
1810
1811
1812/* Invoke the n'th filter function for the current rsfp. */
1813I32
864dbfa3 1814Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1815
1816
1817 /* 0 = read one text line */
a0d0e21e 1818{
16d20bd9
AD
1819 filter_t funcp;
1820 SV *datasv = NULL;
e50aee73 1821
3280af22 1822 if (!PL_rsfp_filters)
16d20bd9 1823 return -1;
3280af22 1824 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1825 /* Provide a default input filter to make life easy. */
1826 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1827 DEBUG_P(PerlIO_printf(Perl_debug_log,
1828 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1829 if (maxlen) {
1830 /* Want a block */
1831 int len ;
1832 int old_len = SvCUR(buf_sv) ;
1833
1834 /* ensure buf_sv is large enough */
1835 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1836 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1837 if (PerlIO_error(PL_rsfp))
37120919
AD
1838 return -1; /* error */
1839 else
1840 return 0 ; /* end of file */
1841 }
16d20bd9
AD
1842 SvCUR_set(buf_sv, old_len + len) ;
1843 } else {
1844 /* Want a line */
3280af22
NIS
1845 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1846 if (PerlIO_error(PL_rsfp))
37120919
AD
1847 return -1; /* error */
1848 else
1849 return 0 ; /* end of file */
1850 }
16d20bd9
AD
1851 }
1852 return SvCUR(buf_sv);
1853 }
1854 /* Skip this filter slot if filter has been deleted */
3280af22 1855 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1856 DEBUG_P(PerlIO_printf(Perl_debug_log,
1857 "filter_read %d: skipped (filter deleted)\n",
1858 idx));
16d20bd9
AD
1859 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1860 }
1861 /* Get function pointer hidden within datasv */
1862 funcp = (filter_t)IoDIRP(datasv);
f4c556ac
GS
1863 DEBUG_P(PerlIO_printf(Perl_debug_log,
1864 "filter_read %d: via function %p (%s)\n",
1865 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1866 /* Call function. The function is expected to */
1867 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1868 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1869 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1870}
1871
76e3520e 1872STATIC char *
cea2e8a9 1873S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1874{
a868473f 1875#ifdef WIN32FILTER
3280af22 1876 if (!PL_rsfp_filters) {
a868473f
NIS
1877 filter_add(win32_textfilter,NULL);
1878 }
1879#endif
3280af22 1880 if (PL_rsfp_filters) {
16d20bd9 1881
55497cff
PP
1882 if (!append)
1883 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1884 if (FILTER_READ(0, sv, 0) > 0)
1885 return ( SvPVX(sv) ) ;
1886 else
1887 return Nullch ;
1888 }
9d116dd7 1889 else
fd049845 1890 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1891}
1892
1893
748a9306
LW
1894#ifdef DEBUGGING
1895 static char* exp_name[] =
09bef843
SB
1896 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
1897 "ATTRTERM", "TERMBLOCK"
1898 };
748a9306 1899#endif
463ee0b2 1900
02aa26ce
NT
1901/*
1902 yylex
1903
1904 Works out what to call the token just pulled out of the input
1905 stream. The yacc parser takes care of taking the ops we return and
1906 stitching them into a tree.
1907
1908 Returns:
1909 PRIVATEREF
1910
1911 Structure:
1912 if read an identifier
1913 if we're in a my declaration
1914 croak if they tried to say my($foo::bar)
1915 build the ops for a my() declaration
1916 if it's an access to a my() variable
1917 are we in a sort block?
1918 croak if my($a); $a <=> $b
1919 build ops for access to a my() variable
1920 if in a dq string, and they've said @foo and we can't find @foo
1921 croak
1922 build ops for a bareword
1923 if we already built the token before, use it.
1924*/
1925
864dbfa3
GS
1926int
1927#ifdef USE_PURE_BISON
cea2e8a9 1928Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1929#else
cea2e8a9 1930Perl_yylex(pTHX)
864dbfa3 1931#endif
378cc40b 1932{
11343788 1933 dTHR;
79072805 1934 register char *s;
378cc40b 1935 register char *d;
79072805 1936 register I32 tmp;
463ee0b2 1937 STRLEN len;
161b471a
NIS
1938 GV *gv = Nullgv;
1939 GV **gvp = 0;
a687059c 1940
a1a0e61e
TD
1941#ifdef USE_PURE_BISON
1942 yylval_pointer = lvalp;
1943 yychar_pointer = lcharp;
1944#endif
1945
02aa26ce 1946 /* check if there's an identifier for us to look at */
3280af22 1947 if (PL_pending_ident) {
02aa26ce 1948 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
1949 char pit = PL_pending_ident;
1950 PL_pending_ident = 0;
bbce6d69 1951
02aa26ce
NT
1952 /* if we're in a my(), we can't allow dynamics here.
1953 $foo'bar has already been turned into $foo::bar, so
1954 just check for colons.
1955
1956 if it's a legal name, the OP is a PADANY.
1957 */
3280af22 1958 if (PL_in_my) {
77ca0c92
LW
1959 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1960 tmp = pad_allocmy(PL_tokenbuf);
1961 }
1962 else {
1963 if (strchr(PL_tokenbuf,':'))
1964 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 1965
77ca0c92
LW
1966 yylval.opval = newOP(OP_PADANY, 0);
1967 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
1968 return PRIVATEREF;
1969 }
bbce6d69
PP
1970 }
1971
02aa26ce
NT
1972 /*
1973 build the ops for accesses to a my() variable.
1974
1975 Deny my($a) or my($b) in a sort block, *if* $a or $b is
1976 then used in a comparison. This catches most, but not
1977 all cases. For instance, it catches
1978 sort { my($a); $a <=> $b }
1979 but not
1980 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
1981 (although why you'd do that is anyone's guess).
1982 */
1983
3280af22 1984 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 1985#ifdef USE_THREADS
54b9620d 1986 /* Check for single character per-thread SVs */
3280af22
NIS
1987 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
1988 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
1989 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 1990 {
2faa37cc 1991 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
1992 yylval.opval->op_targ = tmp;
1993 return PRIVATEREF;
1994 }
1995#endif /* USE_THREADS */
3280af22 1996 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
77ca0c92
LW
1997 /* might be an "our" variable" */
1998 if (SvFLAGS(AvARRAY(PL_comppad_name)[tmp]) & SVpad_OUR) {
1999 /* build ops for a bareword */
2000 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
2001 yylval.opval->op_private = OPpCONST_ENTERED;
2002 gv_fetchpv(PL_tokenbuf+1,
2003 (PL_in_eval
2004 ? (GV_ADDMULTI | GV_ADDINEVAL | GV_ADDOUR)
2005 : GV_ADDOUR
2006 ),
2007 ((PL_tokenbuf[0] == '$') ? SVt_PV
2008 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2009 : SVt_PVHV));
2010 return WORD;
2011 }
2012
02aa26ce 2013 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2014 if (PL_last_lop_op == OP_SORT &&
2015 PL_tokenbuf[0] == '$' &&
2016 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2017 && !PL_tokenbuf[2])
bbce6d69 2018 {
3280af22
NIS
2019 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2020 d < PL_bufend && *d != '\n';
a863c7d1
MB
2021 d++)
2022 {
2023 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2024 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2025 PL_tokenbuf);
a863c7d1 2026 }
bbce6d69
PP
2027 }
2028 }
bbce6d69 2029
a863c7d1
MB
2030 yylval.opval = newOP(OP_PADANY, 0);
2031 yylval.opval->op_targ = tmp;
2032 return PRIVATEREF;
2033 }
bbce6d69
PP
2034 }
2035
02aa26ce
NT
2036 /*
2037 Whine if they've said @foo in a doublequoted string,
2038 and @foo isn't a variable we can find in the symbol
2039 table.
2040 */
3280af22
NIS
2041 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2042 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2043 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2044 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2045 PL_tokenbuf, PL_tokenbuf));
bbce6d69
PP
2046 }
2047
02aa26ce 2048 /* build ops for a bareword */
3280af22 2049 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2050 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2051 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2052 ((PL_tokenbuf[0] == '$') ? SVt_PV
2053 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2054 : SVt_PVHV));
2055 return WORD;
2056 }
2057
02aa26ce
NT
2058 /* no identifier pending identification */
2059
3280af22 2060 switch (PL_lex_state) {
79072805
LW
2061#ifdef COMMENTARY
2062 case LEX_NORMAL: /* Some compilers will produce faster */
2063 case LEX_INTERPNORMAL: /* code if we comment these out. */
2064 break;
2065#endif
2066
09bef843 2067 /* when we've already built the next token, just pull it out of the queue */
79072805 2068 case LEX_KNOWNEXT:
3280af22
NIS
2069 PL_nexttoke--;
2070 yylval = PL_nextval[PL_nexttoke];
2071 if (!PL_nexttoke) {
2072 PL_lex_state = PL_lex_defer;
2073 PL_expect = PL_lex_expect;
2074 PL_lex_defer = LEX_NORMAL;
463ee0b2 2075 }
3280af22 2076 return(PL_nexttype[PL_nexttoke]);
79072805 2077
02aa26ce 2078 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2079 when we get here, PL_bufptr is at the \
02aa26ce 2080 */
79072805
LW
2081 case LEX_INTERPCASEMOD:
2082#ifdef DEBUGGING
3280af22 2083 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2084 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2085#endif
02aa26ce 2086 /* handle \E or end of string */
3280af22 2087 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2088 char oldmod;
02aa26ce
NT
2089
2090 /* if at a \E */
3280af22
NIS
2091 if (PL_lex_casemods) {
2092 oldmod = PL_lex_casestack[--PL_lex_casemods];
2093 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2094
3280af22
NIS
2095 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2096 PL_bufptr += 2;
2097 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2098 }
79072805
LW
2099 return ')';
2100 }
3280af22
NIS
2101 if (PL_bufptr != PL_bufend)
2102 PL_bufptr += 2;
2103 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2104 return yylex();
79072805
LW
2105 }
2106 else {
3280af22 2107 s = PL_bufptr + 1;
79072805
LW
2108 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2109 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2110 if (strchr("LU", *s) &&
3280af22 2111 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2112 {
3280af22 2113 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2114 return ')';
2115 }
3280af22
NIS
2116 if (PL_lex_casemods > 10) {
2117 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2118 if (newlb != PL_lex_casestack) {
a0d0e21e 2119 SAVEFREEPV(newlb);
3280af22 2120 PL_lex_casestack = newlb;
a0d0e21e
LW
2121 }
2122 }
3280af22
NIS
2123 PL_lex_casestack[PL_lex_casemods++] = *s;
2124 PL_lex_casestack[PL_lex_casemods] = '\0';
2125 PL_lex_state = LEX_INTERPCONCAT;
2126 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2127 force_next('(');
2128 if (*s == 'l')
3280af22 2129 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2130 else if (*s == 'u')
3280af22 2131 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2132 else if (*s == 'L')
3280af22 2133 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2134 else if (*s == 'U')
3280af22 2135 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2136 else if (*s == 'Q')
3280af22 2137 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2138 else
cea2e8a9 2139 Perl_croak(aTHX_ "panic: yylex");
3280af22 2140 PL_bufptr = s + 1;
79072805 2141 force_next(FUNC);
3280af22
NIS
2142 if (PL_lex_starts) {
2143 s = PL_bufptr;
2144 PL_lex_starts = 0;
79072805
LW
2145 Aop(OP_CONCAT);
2146 }
2147 else
cea2e8a9 2148 return yylex();
79072805
LW
2149 }
2150
55497cff
PP
2151 case LEX_INTERPPUSH:
2152 return sublex_push();
2153
79072805 2154 case LEX_INTERPSTART:
3280af22 2155 if (PL_bufptr == PL_bufend)
79072805 2156 return sublex_done();
3280af22
NIS
2157 PL_expect = XTERM;
2158 PL_lex_dojoin = (*PL_bufptr == '@');
2159 PL_lex_state = LEX_INTERPNORMAL;
2160 if (PL_lex_dojoin) {
2161 PL_nextval[PL_nexttoke].ival = 0;
79072805 2162 force_next(',');
554b3eca 2163#ifdef USE_THREADS
533c011a
NIS
2164 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2165 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2166 force_next(PRIVATEREF);
2167#else
a0d0e21e 2168 force_ident("\"", '$');
554b3eca 2169#endif /* USE_THREADS */
3280af22 2170 PL_nextval[PL_nexttoke].ival = 0;
79072805 2171 force_next('$');
3280af22 2172 PL_nextval[PL_nexttoke].ival = 0;
79072805 2173 force_next('(');
3280af22 2174 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2175 force_next(FUNC);
2176 }
3280af22
NIS
2177 if (PL_lex_starts++) {
2178 s = PL_bufptr;
79072805
LW
2179 Aop(OP_CONCAT);
2180 }
cea2e8a9 2181 return yylex();
79072805
LW
2182
2183 case LEX_INTERPENDMAYBE:
3280af22
NIS
2184 if (intuit_more(PL_bufptr)) {
2185 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2186 break;
2187 }
2188 /* FALL THROUGH */
2189
2190 case LEX_INTERPEND:
3280af22
NIS
2191 if (PL_lex_dojoin) {
2192 PL_lex_dojoin = FALSE;
2193 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2194 return ')';
2195 }
43a16006 2196 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2197 && SvEVALED(PL_lex_repl))
43a16006 2198 {
e9fa98b2 2199 if (PL_bufptr != PL_bufend)
cea2e8a9 2200 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2201 PL_lex_repl = Nullsv;
2202 }
79072805
LW
2203 /* FALLTHROUGH */
2204 case LEX_INTERPCONCAT:
2205#ifdef DEBUGGING
3280af22 2206 if (PL_lex_brackets)
cea2e8a9 2207 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2208#endif
3280af22 2209 if (PL_bufptr == PL_bufend)
79072805
LW
2210 return sublex_done();
2211
3280af22
NIS
2212 if (SvIVX(PL_linestr) == '\'') {
2213 SV *sv = newSVsv(PL_linestr);
2214 if (!PL_lex_inpat)
76e3520e 2215 sv = tokeq(sv);
3280af22 2216 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2217 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2218 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2219 s = PL_bufend;
79072805
LW
2220 }
2221 else {
3280af22 2222 s = scan_const(PL_bufptr);
79072805 2223 if (*s == '\\')
3280af22 2224 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2225 else
3280af22 2226 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2227 }
2228
3280af22
NIS
2229 if (s != PL_bufptr) {
2230 PL_nextval[PL_nexttoke] = yylval;
2231 PL_expect = XTERM;
79072805 2232 force_next(THING);
3280af22 2233 if (PL_lex_starts++)
79072805
LW
2234 Aop(OP_CONCAT);
2235 else {
3280af22 2236 PL_bufptr = s;
cea2e8a9 2237 return yylex();
79072805
LW
2238 }
2239 }
2240
cea2e8a9 2241 return yylex();
a0d0e21e 2242 case LEX_FORMLINE:
3280af22
NIS
2243 PL_lex_state = LEX_NORMAL;
2244 s = scan_formline(PL_bufptr);
2245 if (!PL_lex_formbrack)
a0d0e21e
LW
2246 goto rightbracket;
2247 OPERATOR(';');
79072805
LW
2248 }
2249
3280af22
NIS
2250 s = PL_bufptr;
2251 PL_oldoldbufptr = PL_oldbufptr;
2252 PL_oldbufptr = s;
79072805 2253 DEBUG_p( {
bf49b057
GS
2254 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2255 exp_name[PL_expect], s);
79072805 2256 } )
463ee0b2
LW
2257
2258 retry:
378cc40b
LW
2259 switch (*s) {
2260 default:
834a4ddd
LW
2261 if (isIDFIRST_lazy(s))
2262 goto keylookup;
cea2e8a9 2263 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2264 case 4:
2265 case 26:
2266 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2267 case 0:
3280af22
NIS
2268 if (!PL_rsfp) {
2269 PL_last_uni = 0;
2270 PL_last_lop = 0;
2271 if (PL_lex_brackets)
d98d5fff 2272 yyerror("Missing right curly or square bracket");
79072805 2273 TOKEN(0);
463ee0b2 2274 }
3280af22 2275 if (s++ < PL_bufend)
a687059c 2276 goto retry; /* ignore stray nulls */
3280af22
NIS
2277 PL_last_uni = 0;
2278 PL_last_lop = 0;
2279 if (!PL_in_eval && !PL_preambled) {
2280 PL_preambled = TRUE;
2281 sv_setpv(PL_linestr,incl_perldb());
2282 if (SvCUR(PL_linestr))
2283 sv_catpv(PL_linestr,";");
2284 if (PL_preambleav){
2285 while(AvFILLp(PL_preambleav) >= 0) {
2286 SV *tmpsv = av_shift(PL_preambleav);
2287 sv_catsv(PL_linestr, tmpsv);
2288 sv_catpv(PL_linestr, ";");
91b7def8
PP
2289 sv_free(tmpsv);
2290 }
3280af22
NIS
2291 sv_free((SV*)PL_preambleav);
2292 PL_preambleav = NULL;
91b7def8 2293 }
3280af22
NIS
2294 if (PL_minus_n || PL_minus_p) {
2295 sv_catpv(PL_linestr, "LINE: while (<>) {");
2296 if (PL_minus_l)
2297 sv_catpv(PL_linestr,"chomp;");
2298 if (PL_minus_a) {
8fd239a7
CS
2299 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2300 if (gv)
2301 GvIMPORTED_AV_on(gv);
3280af22
NIS
2302 if (PL_minus_F) {
2303 if (strchr("/'\"", *PL_splitstr)
2304 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2305 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2306 else {
2307 char delim;
2308 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2309 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2310 delim = *s;
cea2e8a9 2311 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2312 "q" + (delim == '\''), delim);
3280af22 2313 for (s = PL_splitstr; *s; s++) {
54310121 2314 if (*s == '\\')
3280af22
NIS
2315 sv_catpvn(PL_linestr, "\\", 1);
2316 sv_catpvn(PL_linestr, s, 1);
54310121 2317 }
cea2e8a9 2318 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2319 }
2304df62
AD
2320 }
2321 else
3280af22 2322 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2323 }
79072805 2324 }
3280af22
NIS
2325 sv_catpv(PL_linestr, "\n");
2326 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2327 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2328 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2329 SV *sv = NEWSV(85,0);
2330
2331 sv_upgrade(sv, SVt_PVMG);
3280af22 2332 sv_setsv(sv,PL_linestr);
cc49e20b 2333 av_store(CopFILEAV(PL_curcop),(I32)PL_curcop->cop_line,sv);
a0d0e21e 2334 }
79072805 2335 goto retry;
a687059c 2336 }
e929a76b 2337 do {
3280af22 2338 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2339 fake_eof:
3280af22
NIS
2340 if (PL_rsfp) {
2341 if (PL_preprocess && !PL_in_eval)
2342 (void)PerlProc_pclose(PL_rsfp);
2343 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2344 PerlIO_clearerr(PL_rsfp);
395c3793 2345 else
3280af22
NIS
2346 (void)PerlIO_close(PL_rsfp);
2347 PL_rsfp = Nullfp;
4a9ae47a 2348 PL_doextract = FALSE;
395c3793 2349 }
3280af22
NIS
2350 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2351 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2352 sv_catpv(PL_linestr,";}");
2353 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2354 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2355 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2356 goto retry;
2357 }
3280af22
NIS
2358 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2359 sv_setpv(PL_linestr,"");
79072805 2360 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2361 }
3280af22 2362 if (PL_doextract) {
a0d0e21e 2363 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2364 PL_doextract = FALSE;
a0d0e21e
LW
2365
2366 /* Incest with pod. */
2367 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2368 sv_setpv(PL_linestr, "");
2369 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2370 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2371 PL_doextract = FALSE;
a0d0e21e
LW
2372 }
2373 }
463ee0b2 2374 incline(s);
3280af22
NIS
2375 } while (PL_doextract);
2376 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2377 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2378 SV *sv = NEWSV(85,0);
a687059c 2379
93a17b20 2380 sv_upgrade(sv, SVt_PVMG);
3280af22 2381 sv_setsv(sv,PL_linestr);
cc49e20b 2382 av_store(CopFILEAV(PL_curcop),(I32)PL_curcop->cop_line,sv);
a687059c 2383 }
3280af22
NIS
2384 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2385 if (PL_curcop->cop_line == 1) {
2386 while (s < PL_bufend && isSPACE(*s))
79072805 2387 s++;
a0d0e21e 2388 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2389 s++;
44a8e56a 2390 d = Nullch;
3280af22 2391 if (!PL_in_eval) {
44a8e56a
PP
2392 if (*s == '#' && *(s+1) == '!')
2393 d = s + 2;
2394#ifdef ALTERNATE_SHEBANG
2395 else {
2396 static char as[] = ALTERNATE_SHEBANG;
2397 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2398 d = s + (sizeof(as) - 1);
2399 }
2400#endif /* ALTERNATE_SHEBANG */
2401 }
2402 if (d) {
b8378b72 2403 char *ipath;
774d564b 2404 char *ipathend;
b8378b72 2405
774d564b 2406 while (isSPACE(*d))
b8378b72
CS
2407 d++;
2408 ipath = d;
774d564b
PP
2409 while (*d && !isSPACE(*d))
2410 d++;
2411 ipathend = d;
2412
2413#ifdef ARG_ZERO_IS_SCRIPT
2414 if (ipathend > ipath) {
2415 /*
2416 * HP-UX (at least) sets argv[0] to the script name,
2417 * which makes $^X incorrect. And Digital UNIX and Linux,
2418 * at least, set argv[0] to the basename of the Perl
2419 * interpreter. So, having found "#!", we'll set it right.
2420 */
2421 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2422 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2423 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2424 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2425 SvSETMAGIC(x);
2426 }
774d564b 2427 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2428 }
774d564b 2429#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2430
2431 /*
2432 * Look for options.
2433 */
748a9306 2434 d = instr(s,"perl -");
84e30d1a 2435 if (!d) {
748a9306 2436 d = instr(s,"perl");
84e30d1a
GS
2437#if defined(DOSISH)
2438 /* avoid getting into infinite loops when shebang
2439 * line contains "Perl" rather than "perl" */
2440 if (!d) {
2441 for (d = ipathend-4; d >= ipath; --d) {
2442 if ((*d == 'p' || *d == 'P')
2443 && !ibcmp(d, "perl", 4))
2444 {
2445 break;
2446 }
2447 }
2448 if (d < ipath)
2449 d = Nullch;
2450 }
2451#endif
2452 }
44a8e56a
PP
2453#ifdef ALTERNATE_SHEBANG
2454 /*
2455 * If the ALTERNATE_SHEBANG on this system starts with a
2456 * character that can be part of a Perl expression, then if
2457 * we see it but not "perl", we're probably looking at the
2458 * start of Perl code, not a request to hand off to some
2459 * other interpreter. Similarly, if "perl" is there, but
2460 * not in the first 'word' of the line, we assume the line
2461 * contains the start of the Perl program.
44a8e56a
PP
2462 */
2463 if (d && *s != '#') {
774d564b 2464 char *c = ipath;
44a8e56a
PP
2465 while (*c && !strchr("; \t\r\n\f\v#", *c))
2466 c++;
2467 if (c < d)
2468 d = Nullch; /* "perl" not in first word; ignore */
2469 else
2470 *s = '#'; /* Don't try to parse shebang line */
2471 }
774d564b 2472#endif /* ALTERNATE_SHEBANG */
748a9306 2473 if (!d &&
44a8e56a 2474 *s == '#' &&
774d564b 2475 ipathend > ipath &&
3280af22 2476 !PL_minus_c &&
748a9306 2477 !instr(s,"indir") &&
3280af22 2478 instr(PL_origargv[0],"perl"))
748a9306 2479 {
9f68db38 2480 char **newargv;
9f68db38 2481
774d564b
PP
2482 *ipathend = '\0';
2483 s = ipathend + 1;
3280af22 2484 while (s < PL_bufend && isSPACE(*s))
9f68db38 2485 s++;
3280af22
NIS
2486 if (s < PL_bufend) {
2487 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2488 newargv[1] = s;
3280af22 2489 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2490 s++;
2491 *s = '\0';
3280af22 2492 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2493 }
2494 else
3280af22 2495 newargv = PL_origargv;
774d564b 2496 newargv[0] = ipath;
80252599 2497 PerlProc_execv(ipath, newargv);
cea2e8a9 2498 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2499 }
748a9306 2500 if (d) {
3280af22
NIS
2501 U32 oldpdb = PL_perldb;
2502 bool oldn = PL_minus_n;
2503 bool oldp = PL_minus_p;
748a9306
LW
2504
2505 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2506 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2507
2508 if (*d++ == '-') {
8cc95fdb
PP
2509 do {
2510 if (*d == 'M' || *d == 'm') {
2511 char *m = d;
2512 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2513 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2514 (int)(d - m), m);
2515 }
2516 d = moreswitches(d);
2517 } while (d);
84902520 2518 if (PERLDB_LINE && !oldpdb ||
3280af22 2519 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b
PP
2520 /* if we have already added "LINE: while (<>) {",
2521 we must not do it again */
748a9306 2522 {
3280af22
NIS
2523 sv_setpv(PL_linestr, "");
2524 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2525 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2526 PL_preambled = FALSE;
84902520 2527 if (PERLDB_LINE)
3280af22 2528 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2529 goto retry;
2530 }
a0d0e21e 2531 }
79072805 2532 }
9f68db38 2533 }
79072805 2534 }
3280af22
NIS
2535 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2536 PL_bufptr = s;
2537 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2538 return yylex();
ae986130 2539 }
378cc40b 2540 goto retry;
4fdae800 2541 case '\r':
6a27c188 2542#ifdef PERL_STRICT_CR
cea2e8a9
GS
2543 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2544 Perl_croak(aTHX_
54310121 2545 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2546#endif
4fdae800 2547 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2548 s++;
2549 goto retry;
378cc40b 2550 case '#':
e929a76b 2551 case '\n':
3280af22
NIS
2552 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2553 d = PL_bufend;
a687059c 2554 while (s < d && *s != '\n')
378cc40b 2555 s++;
0f85fab0 2556 if (s < d)
378cc40b 2557 s++;
463ee0b2 2558 incline(s);
3280af22
NIS
2559 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2560 PL_bufptr = s;
2561 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2562 return yylex();
a687059c 2563 }
378cc40b 2564 }
a687059c 2565 else {
378cc40b 2566 *s = '\0';
3280af22 2567 PL_bufend = s;
a687059c 2568 }
378cc40b
LW
2569 goto retry;
2570 case '-':
79072805 2571 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2572 s++;
3280af22 2573 PL_bufptr = s;
748a9306
LW
2574 tmp = *s++;
2575
3280af22 2576 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2577 s++;
2578
2579 if (strnEQ(s,"=>",2)) {
3280af22 2580 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2581 OPERATOR('-'); /* unary minus */
2582 }
3280af22
NIS
2583 PL_last_uni = PL_oldbufptr;
2584 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2585 switch (tmp) {
79072805
LW
2586 case 'r': FTST(OP_FTEREAD);
2587 case 'w': FTST(OP_FTEWRITE);
2588 case 'x': FTST(OP_FTEEXEC);
2589 case 'o': FTST(OP_FTEOWNED);
2590 case 'R': FTST(OP_FTRREAD);
2591 case 'W': FTST(OP_FTRWRITE);
2592 case 'X': FTST(OP_FTREXEC);
2593 case 'O': FTST(OP_FTROWNED);
2594 case 'e': FTST(OP_FTIS);
2595 case 'z': FTST(OP_FTZERO);
2596 case 's': FTST(OP_FTSIZE);
2597 case 'f': FTST(OP_FTFILE);
2598 case 'd': FTST(OP_FTDIR);
2599 case 'l': FTST(OP_FTLINK);
2600 case 'p': FTST(OP_FTPIPE);
2601 case 'S': FTST(OP_FTSOCK);
2602 case 'u': FTST(OP_FTSUID);
2603 case 'g': FTST(OP_FTSGID);
2604 case 'k': FTST(OP_FTSVTX);
2605 case 'b': FTST(OP_FTBLK);
2606 case 'c': FTST(OP_FTCHR);
2607 case 't': FTST(OP_FTTTY);
2608 case 'T': FTST(OP_FTTEXT);
2609 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2610 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2611 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2612 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2613 default:
cea2e8a9 2614 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2615 break;
2616 }
2617 }
a687059c
LW
2618 tmp = *s++;
2619 if (*s == tmp) {
2620 s++;
3280af22 2621 if (PL_expect == XOPERATOR)
79072805
LW
2622 TERM(POSTDEC);
2623 else
2624 OPERATOR(PREDEC);
2625 }
2626 else if (*s == '>') {
2627 s++;
2628 s = skipspace(s);
834a4ddd 2629 if (isIDFIRST_lazy(s)) {
a0d0e21e 2630 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2631 TOKEN(ARROW);
79072805 2632 }
748a9306
LW
2633 else if (*s == '$')
2634 OPERATOR(ARROW);
463ee0b2 2635 else
748a9306 2636 TERM(ARROW);
a687059c 2637 }
3280af22 2638 if (PL_expect == XOPERATOR)
79072805
LW
2639 Aop(OP_SUBTRACT);
2640 else {
3280af22 2641 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2642 check_uni();
79072805 2643 OPERATOR('-'); /* unary minus */
2f3197b3 2644 }
79072805 2645
378cc40b 2646 case '+':
a687059c
LW
2647 tmp = *s++;
2648 if (*s == tmp) {
378cc40b 2649 s++;
3280af22 2650 if (PL_expect == XOPERATOR)
79072805
LW
2651 TERM(POSTINC);
2652 else
2653 OPERATOR(PREINC);
378cc40b 2654 }
3280af22 2655 if (PL_expect == XOPERATOR)
79072805
LW
2656 Aop(OP_ADD);
2657 else {
3280af22 2658 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2659 check_uni();
a687059c 2660 OPERATOR('+');
2f3197b3 2661 }
a687059c 2662
378cc40b 2663 case '*':
3280af22
NIS
2664 if (PL_expect != XOPERATOR) {
2665 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2666 PL_expect = XOPERATOR;
2667 force_ident(PL_tokenbuf, '*');
2668 if (!*PL_tokenbuf)
a0d0e21e 2669 PREREF('*');
79072805 2670 TERM('*');
a687059c 2671 }
79072805
LW
2672 s++;
2673 if (*s == '*') {
a687059c 2674 s++;
79072805 2675 PWop(OP_POW);
a687059c 2676 }
79072805
LW
2677 Mop(OP_MULTIPLY);
2678
378cc40b 2679 case '%':
3280af22 2680 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2681 ++s;
2682 Mop(OP_MODULO);
a687059c 2683 }
3280af22
NIS
2684 PL_tokenbuf[0] = '%';
2685 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2686 if (!PL_tokenbuf[1]) {
2687 if (s == PL_bufend)
bbce6d69
PP
2688 yyerror("Final % should be \\% or %name");
2689 PREREF('%');
a687059c 2690 }
3280af22 2691 PL_pending_ident = '%';
bbce6d69 2692 TERM('%');
a687059c 2693
378cc40b 2694 case '^':
79072805 2695 s++;
a0d0e21e 2696 BOop(OP_BIT_XOR);
79072805 2697 case '[':
3280af22 2698 PL_lex_brackets++;
79072805 2699 /* FALL THROUGH */
378cc40b 2700 case '~':
378cc40b 2701 case ',':
378cc40b
LW
2702 tmp = *s++;
2703 OPERATOR(tmp);
a0d0e21e
LW
2704 case ':':
2705 if (s[1] == ':') {
2706 len = 0;
2707 goto just_a_word;
2708 }
2709 s++;
09bef843
SB
2710 switch (PL_expect) {
2711 OP *attrs;
2712 case XOPERATOR:
2713 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2714 break;
2715 PL_bufptr = s; /* update in case we back off */
2716 goto grabattrs;
2717 case XATTRBLOCK:
2718 PL_expect = XBLOCK;
2719 goto grabattrs;
2720 case XATTRTERM:
2721 PL_expect = XTERMBLOCK;
2722 grabattrs:
2723 s = skipspace(s);
2724 attrs = Nullop;
2725 while (isIDFIRST_lazy(s)) {
2726 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
2727 if (*d == '(') {
2728 d = scan_str(d,TRUE,TRUE);
2729 if (!d) {
2730 if (PL_lex_stuff) {
2731 SvREFCNT_dec(PL_lex_stuff);
2732 PL_lex_stuff = Nullsv;
2733 }
2734 /* MUST advance bufptr here to avoid bogus
2735 "at end of line" context messages from yyerror().
2736 */
2737 PL_bufptr = s + len;
2738 yyerror("Unterminated attribute parameter in attribute list");
2739 if (attrs)
2740 op_free(attrs);
2741 return 0; /* EOF indicator */
2742 }
2743 }
2744 if (PL_lex_stuff) {
2745 SV *sv = newSVpvn(s, len);
2746 sv_catsv(sv, PL_lex_stuff);
2747 attrs = append_elem(OP_LIST, attrs,
2748 newSVOP(OP_CONST, 0, sv));
2749 SvREFCNT_dec(PL_lex_stuff);
2750 PL_lex_stuff = Nullsv;
2751 }
2752 else {
2753 attrs = append_elem(OP_LIST, attrs,
2754 newSVOP(OP_CONST, 0,
2755 newSVpvn(s, len)));
2756 }
2757 s = skipspace(d);
2758 while (*s == ',')
2759 s = skipspace(s+1);
2760 }
2761 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}' for vi */
2762 if (*s != ';' && *s != tmp) {
2763 char q = ((*s == '\'') ? '"' : '\'');
2764 /* If here for an expression, and parsed no attrs, back off. */
2765 if (tmp == '=' && !attrs) {
2766 s = PL_bufptr;
2767 break;
2768 }
2769 /* MUST advance bufptr here to avoid bogus "at end of line"
2770 context messages from yyerror().
2771 */
2772 PL_bufptr = s;
2773 if (!*s)
2774 yyerror("Unterminated attribute list");
2775 else
2776 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2777 q, *s, q));
2778 if (attrs)
2779 op_free(attrs);
2780 OPERATOR(':');
2781 }
2782 if (attrs) {
2783 PL_nextval[PL_nexttoke].opval = attrs;
2784 force_next(THING);
2785 }
2786 TOKEN(COLONATTR);
2787 }
a0d0e21e 2788 OPERATOR(':');
8990e307
LW
2789 case '(':
2790 s++;
3280af22
NIS
2791 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2792 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2793 else
3280af22 2794 PL_expect = XTERM;
a0d0e21e 2795 TOKEN('(');
378cc40b 2796 case ';':
3280af22
NIS
2797 if (PL_curcop->cop_line < PL_copline)
2798 PL_copline = PL_curcop->cop_line;
378cc40b
LW
2799 tmp = *s++;
2800 OPERATOR(tmp);
2801 case ')':
378cc40b 2802 tmp = *s++;
16d20bd9
AD
2803 s = skipspace(s);
2804 if (*s == '{')
2805 PREBLOCK(tmp);
378cc40b 2806 TERM(tmp);
79072805
LW
2807 case ']':
2808 s++;
3280af22 2809 if (PL_lex_brackets <= 0)
d98d5fff 2810 yyerror("Unmatched right square bracket");
463ee0b2 2811 else
3280af22
NIS
2812 --PL_lex_brackets;
2813 if (PL_lex_state == LEX_INTERPNORMAL) {
2814 if (PL_lex_brackets == 0) {
a0d0e21e 2815 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2816 PL_lex_state = LEX_INTERPEND;
79072805
LW
2817 }
2818 }
4633a7c4 2819 TERM(']');
79072805
LW
2820 case '{':
2821 leftbracket:
79072805 2822 s++;
3280af22
NIS
2823 if (PL_lex_brackets > 100) {
2824 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2825 if (newlb != PL_lex_brackstack) {
8990e307 2826 SAVEFREEPV(newlb);
3280af22 2827 PL_lex_brackstack = newlb;
8990e307
LW
2828 }
2829 }
3280af22 2830 switch (PL_expect) {
a0d0e21e 2831 case XTERM:
3280af22 2832 if (PL_lex_formbrack) {
a0d0e21e
LW
2833 s--;
2834 PRETERMBLOCK(DO);
2835 }
3280af22
NIS
2836 if (PL_oldoldbufptr == PL_last_lop)
2837 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2838 else
3280af22 2839 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2840 OPERATOR(HASHBRACK);
a0d0e21e 2841 case XOPERATOR:
3280af22 2842 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2843 s++;
44a8e56a 2844 d = s;
3280af22
NIS
2845 PL_tokenbuf[0] = '\0';
2846 if (d < PL_bufend && *d == '-') {
2847 PL_tokenbuf[0] = '-';
44a8e56a 2848 d++;
3280af22 2849 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a
PP
2850 d++;
2851 }
834a4ddd 2852 if (d < PL_bufend && isIDFIRST_lazy(d)) {
3280af22 2853 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2854 FALSE, &len);
3280af22 2855 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2856 d++;
2857 if (*d == '}') {
3280af22 2858 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
2859 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2860 if (minus)
2861 force_next('-');
748a9306
LW
2862 }
2863 }
2864 /* FALL THROUGH */
09bef843 2865 case XATTRBLOCK:
748a9306 2866 case XBLOCK:
3280af22
NIS
2867 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2868 PL_expect = XSTATE;
a0d0e21e 2869 break;
09bef843 2870 case XATTRTERM:
a0d0e21e 2871 case XTERMBLOCK:
3280af22
NIS
2872 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2873 PL_expect = XSTATE;
a0d0e21e
LW
2874 break;
2875 default: {
2876 char *t;
3280af22
NIS
2877 if (PL_oldoldbufptr == PL_last_lop)
2878 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2879 else
3280af22 2880 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2881 s = skipspace(s);
09ecc4b6 2882 if (*s == '}')
a0d0e21e 2883 OPERATOR(HASHBRACK);
b8a4b1be
GS
2884 /* This hack serves to disambiguate a pair of curlies
2885 * as being a block or an anon hash. Normally, expectation
2886 * determines that, but in cases where we're not in a
2887 * position to expect anything in particular (like inside
2888 * eval"") we have to resolve the ambiguity. This code
2889 * covers the case where the first term in the curlies is a
2890 * quoted string. Most other cases need to be explicitly
2891 * disambiguated by prepending a `+' before the opening
2892 * curly in order to force resolution as an anon hash.
2893 *
2894 * XXX should probably propagate the outer expectation
2895 * into eval"" to rely less on this hack, but that could
2896 * potentially break current behavior of eval"".
2897 * GSAR 97-07-21
2898 */
2899 t = s;
2900 if (*s == '\'' || *s == '"' || *s == '`') {
2901 /* common case: get past first string, handling escapes */
3280af22 2902 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2903 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2904 t++;
2905 t++;
a0d0e21e 2906 }
b8a4b1be 2907 else if (*s == 'q') {
3280af22 2908 if (++t < PL_bufend
b8a4b1be 2909 && (!isALNUM(*t)
3280af22 2910 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
b8a4b1be
GS
2911 && !isALNUM(*t)))) {
2912 char *tmps;
2913 char open, close, term;
2914 I32 brackets = 1;
2915
3280af22 2916 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
2917 t++;
2918 term = *t;
2919 open = term;
2920 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
2921 term = tmps[5];
2922 close = term;
2923 if (open == close)
3280af22
NIS
2924 for (t++; t < PL_bufend; t++) {
2925 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 2926 t++;
6d07e5e9 2927 else if (*t == open)
b8a4b1be
GS
2928 break;
2929 }
2930 else
3280af22
NIS
2931 for (t++; t < PL_bufend; t++) {
2932 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 2933 t++;
6d07e5e9 2934 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
2935 break;
2936 else if (*t == open)
2937 brackets++;
2938 }
2939 }
2940 t++;
a0d0e21e 2941 }
834a4ddd
LW
2942 else if (isIDFIRST_lazy(s)) {
2943 for (t++; t < PL_bufend && isALNUM_lazy(t); t++) ;
a0d0e21e 2944 }
3280af22 2945 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 2946 t++;
b8a4b1be
GS
2947 /* if comma follows first term, call it an anon hash */
2948 /* XXX it could be a comma expression with loop modifiers */
3280af22 2949 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 2950 || (*t == '=' && t[1] == '>')))
a0d0e21e 2951 OPERATOR(HASHBRACK);
3280af22 2952 if (PL_expect == XREF)
4e4e412b 2953 PL_expect = XTERM;
a0d0e21e 2954 else {
3280af22
NIS
2955 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
2956 PL_expect = XSTATE;
a0d0e21e 2957 }
8990e307 2958 }
a0d0e21e 2959 break;
463ee0b2 2960 }
3280af22 2961 yylval.ival = PL_curcop->cop_line;
79072805 2962 if (isSPACE(*s) || *s == '#')
3280af22 2963 PL_copline = NOLINE; /* invalidate current command line number */
79072805 2964 TOKEN('{');
378cc40b 2965 case '}':
79072805
LW
2966 rightbracket:
2967 s++;
3280af22 2968 if (PL_lex_brackets <= 0)
d98d5fff 2969 yyerror("Unmatched right curly bracket");
463ee0b2 2970 else
3280af22
NIS
2971 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
2972 if (PL_lex_brackets < PL_lex_formbrack)
2973 PL_lex_formbrack = 0;
2974 if (PL_lex_state == LEX_INTERPNORMAL) {
2975 if (PL_lex_brackets == 0) {
2976 if (PL_lex_fakebrack) {
2977 PL_lex_state = LEX_INTERPEND;
2978 PL_bufptr = s;
cea2e8a9 2979 return yylex(); /* ignore fake brackets */
79072805 2980 }
fa83b5b6 2981 if (*s == '-' && s[1] == '>')
3280af22 2982 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 2983 else if (*s != '[' && *s != '{')
3280af22 2984 PL_lex_state = LEX_INTERPEND;
79072805
LW
2985 }
2986 }
3280af22
NIS
2987 if (PL_lex_brackets < PL_lex_fakebrack) {
2988 PL_bufptr = s;
2989 PL_lex_fakebrack = 0;
cea2e8a9 2990 return yylex(); /* ignore fake brackets */
748a9306 2991 }
79072805
LW
2992 force_next('}');
2993 TOKEN(';');
378cc40b
LW
2994 case '&':
2995 s++;
2996 tmp = *s++;
2997 if (tmp == '&')
a0d0e21e 2998 AOPERATOR(ANDAND);
378cc40b 2999 s--;
3280af22 3000 if (PL_expect == XOPERATOR) {
834a4ddd 3001 if (ckWARN(WARN_SEMICOLON) && isIDFIRST_lazy(s) && PL_bufptr == PL_linestart) {
3280af22 3002 PL_curcop->cop_line--;
cea2e8a9 3003 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
3280af22 3004 PL_curcop->cop_line++;
463ee0b2 3005 }
79072805 3006 BAop(OP_BIT_AND);
463ee0b2 3007 }
79072805 3008
3280af22
NIS
3009 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3010 if (*PL_tokenbuf) {
3011 PL_expect = XOPERATOR;
3012 force_ident(PL_tokenbuf, '&');
463ee0b2 3013 }
79072805
LW
3014 else
3015 PREREF('&');
c07a80fd 3016 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3017 TERM('&');
3018
378cc40b
LW
3019 case '|':
3020 s++;
3021 tmp = *s++;
3022 if (tmp == '|')
a0d0e21e 3023 AOPERATOR(OROR);
378cc40b 3024 s--;
79072805 3025 BOop(OP_BIT_OR);
378cc40b
LW
3026 case '=':
3027 s++;
3028 tmp = *s++;
3029 if (tmp == '=')
79072805
LW
3030 Eop(OP_EQ);
3031 if (tmp == '>')
3032 OPERATOR(',');
378cc40b 3033 if (tmp == '~')
79072805 3034 PMop(OP_MATCH);
599cee73 3035 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3036 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3037 s--;
3280af22
NIS
3038 if (PL_expect == XSTATE && isALPHA(tmp) &&
3039 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3040 {
3280af22
NIS
3041 if (PL_in_eval && !PL_rsfp) {
3042 d = PL_bufend;
a5f75d66
AD
3043 while (s < d) {
3044 if (*s++ == '\n') {
3045 incline(s);
3046 if (strnEQ(s,"=cut",4)) {
3047 s = strchr(s,'\n');
3048 if (s)
3049 s++;
3050 else
3051 s = d;
3052 incline(s);
3053 goto retry;
3054 }
3055 }
3056 }
3057 goto retry;
3058 }
3280af22
NIS
3059 s = PL_bufend;
3060 PL_doextract = TRUE;
a0d0e21e
LW
3061 goto retry;
3062 }
3280af22 3063 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3064 char *t;
51882d45 3065#ifdef PERL_STRICT_CR
a0d0e21e 3066 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
3067#else
3068 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
3069#endif
a0d0e21e
LW
3070 if (*t == '\n' || *t == '#') {
3071 s--;
3280af22 3072 PL_expect = XBLOCK;
a0d0e21e
LW
3073 goto leftbracket;
3074 }
79072805 3075 }
a0d0e21e
LW
3076 yylval.ival = 0;
3077 OPERATOR(ASSIGNOP);
378cc40b
LW
3078 case '!':
3079 s++;
3080 tmp = *s++;
3081 if (tmp == '=')
79072805 3082 Eop(OP_NE);
378cc40b 3083 if (tmp == '~')
79072805 3084 PMop(OP_NOT);
378cc40b
LW
3085 s--;
3086 OPERATOR('!');
3087 case '<':
3280af22 3088 if (PL_expect != XOPERATOR) {
93a17b20 3089 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3090 check_uni();
79072805
LW
3091 if (s[1] == '<')
3092 s = scan_heredoc(s);
3093 else
3094 s = scan_inputsymbol(s);
3095 TERM(sublex_start());
378cc40b
LW
3096 }
3097 s++;
3098 tmp = *s++;
3099 if (tmp == '<')
79072805 3100 SHop(OP_LEFT_SHIFT);
395c3793
LW
3101 if (tmp == '=') {
3102 tmp = *s++;
3103 if (tmp == '>')
79072805 3104 Eop(OP_NCMP);
395c3793 3105 s--;
79072805 3106 Rop(OP_LE);
395c3793 3107 }
378cc40b 3108 s--;
79072805 3109 Rop(OP_LT);
378cc40b
LW
3110 case '>':
3111 s++;
3112 tmp = *s++;
3113 if (tmp == '>')
79072805 3114 SHop(OP_RIGHT_SHIFT);
378cc40b 3115 if (tmp == '=')
79072805 3116 Rop(OP_GE);
378cc40b 3117 s--;
79072805 3118 Rop(OP_GT);
378cc40b
LW
3119
3120 case '$':
bbce6d69
PP
3121 CLINE;
3122
3280af22
NIS
3123 if (PL_expect == XOPERATOR) {
3124 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3125 PL_expect = XTERM;
a0d0e21e 3126 depcom();
bbce6d69 3127 return ','; /* grandfather non-comma-format format */
a0d0e21e 3128 }
8990e307 3129 }
a0d0e21e 3130
834a4ddd 3131 if (s[1] == '#' && (isIDFIRST_lazy(s+2) || strchr("{$:+-", s[2]))) {
3280af22 3132 PL_tokenbuf[0] = '@';
376b8730
SM
3133 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3134 sizeof PL_tokenbuf - 1, FALSE);
3135 if (PL_expect == XOPERATOR)
3136 no_op("Array length", s);
3280af22 3137 if (!PL_tokenbuf[1])
a0d0e21e 3138 PREREF(DOLSHARP);
3280af22
NIS
3139 PL_expect = XOPERATOR;
3140 PL_pending_ident = '#';
463ee0b2 3141 TOKEN(DOLSHARP);
79072805 3142 }
bbce6d69 3143
3280af22 3144 PL_tokenbuf[0] = '$';
376b8730
SM
3145 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3146 sizeof PL_tokenbuf - 1, FALSE);
3147 if (PL_expect == XOPERATOR)
3148 no_op("Scalar", s);
3280af22
NIS
3149 if (!PL_tokenbuf[1]) {
3150 if (s == PL_bufend)
bbce6d69
PP
3151 yyerror("Final $ should be \\$ or $name");
3152 PREREF('$');
8990e307 3153 }
a0d0e21e 3154
bbce6d69 3155 /* This kludge not intended to be bulletproof. */
3280af22 3156 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3157 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3158 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69
PP
3159 yylval.opval->op_private = OPpCONST_ARYBASE;
3160 TERM(THING);
3161 }
3162
ff68c719 3163 d = s;
69d2bceb 3164 tmp = (I32)*s;
3280af22 3165 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3166 s = skipspace(s);
3167
3280af22 3168 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3169 char *t;
3170 if (*s == '[') {
3280af22 3171 PL_tokenbuf[0] = '@';
599cee73 3172 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3173 for(t = s + 1;
834a4ddd 3174 isSPACE(*t) || isALNUM_lazy(t) || *t == '$';
bbce6d69 3175 t++) ;
a0d0e21e 3176 if (*t++ == ',') {
3280af22
NIS
3177 PL_bufptr = skipspace(PL_bufptr);
3178 while (t < PL_bufend && *t != ']')
bbce6d69 3179 t++;
cea2e8a9 3180 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3181 "Multidimensional syntax %.*s not supported",
3182 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3183 }
3184 }
bbce6d69
PP
3185 }
3186 else if (*s == '{') {
3280af22 3187 PL_tokenbuf[0] = '%';
599cee73 3188 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3189 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3190 {
3280af22 3191 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3192 STRLEN len;
3193 for (t++; isSPACE(*t); t++) ;
834a4ddd 3194 if (isIDFIRST_lazy(t)) {
8903cb82 3195 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3196 for (; isSPACE(*t); t++) ;
864dbfa3 3197 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3198 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3199 "You need to quote \"%s\"", tmpbuf);
748a9306 3200 }
93a17b20
LW
3201 }
3202 }
2f3197b3 3203 }
bbce6d69 3204
3280af22 3205 PL_expect = XOPERATOR;
69d2bceb 3206 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3207 bool islop = (PL_last_lop == PL_oldoldbufptr);
3208 if (!islop || PL_last_lop_op == OP_GREPSTART)
3209 PL_expect = XOPERATOR;
bbce6d69 3210 else if (strchr("$@\"'`q", *s))
3280af22 3211 PL_expect = XTERM; /* e.g. print $fh "foo" */
834a4ddd 3212 else if (strchr("&*<%", *s) && isIDFIRST_lazy(s+1))
3280af22 3213 PL_expect = XTERM; /* e.g. print $fh &sub */
834a4ddd 3214 else if (isIDFIRST_lazy(s)) {
3280af22 3215 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3216 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3217 if (tmp = keyword(tmpbuf, len)) {
3218 /* binary operators exclude handle interpretations */
3219 switch (tmp) {
3220 case -KEY_x:
3221 case -KEY_eq:
3222 case -KEY_ne:
3223 case -KEY_gt:
3224 case -KEY_lt:
3225 case -KEY_ge:
3226 case -KEY_le:
3227 case -KEY_cmp:
3228 break;
3229 default:
3280af22 3230 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3231 break;
3232 }
3233 }
68dc0745
PP
3234 else {
3235 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3236 if (gv && GvCVu(gv))
3280af22 3237 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3238 }
93a17b20 3239 }
bbce6d69 3240 else if (isDIGIT(*s))
3280af22 3241 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3242 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3243 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3244 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3245 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3246 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3247 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3248 }
3280af22 3249 PL_pending_ident = '$';
79072805 3250 TOKEN('$');
378cc40b
LW
3251
3252 case '@':
3280af22 3253 if (PL_expect == XOPERATOR)
bbce6d69 3254 no_op("Array", s);
3280af22
NIS
3255 PL_tokenbuf[0] = '@';
3256 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3257 if (!PL_tokenbuf[1]) {
3258 if (s == PL_bufend)
bbce6d69
PP
3259 yyerror("Final @ should be \\@ or @name");
3260 PREREF('@');
3261 }
3280af22 3262 if (PL_lex_state == LEX_NORMAL)
ff68c719 3263 s = skipspace(s);
3280af22 3264 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3265 if (*s == '{')
3280af22 3266 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3267
3268 /* Warn about @ where they meant $. */
599cee73 3269 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3270 if (*s == '[' || *s == '{') {
3271 char *t = s + 1;
834a4ddd 3272 while (*t && (isALNUM_lazy(t) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3273 t++;
3274 if (*t == '}' || *t == ']') {
3275 t++;
3280af22 3276 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3277 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3278 "Scalar value %.*s better written as $%.*s",
3280af22 3279 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3280 }
93a17b20
LW
3281 }
3282 }
463ee0b2 3283 }
3280af22 3284 PL_pending_ident = '@';
79072805 3285 TERM('@');
378cc40b
LW
3286
3287 case '/': /* may either be division or pattern */
3288 case '?': /* may either be conditional or pattern */
3280af22 3289 if (PL_expect != XOPERATOR) {
c277df42 3290 /* Disable warning on "study /blah/" */
3280af22
NIS
3291 if (PL_oldoldbufptr == PL_last_uni
3292 && (*PL_last_uni != 's' || s - PL_last_uni < 5
834a4ddd 3293 || memNE(PL_last_uni, "study", 5) || isALNUM_lazy(PL_last_uni+5)))
c277df42 3294 check_uni();
8782bef2 3295 s = scan_pat(s,OP_MATCH);
79072805 3296 TERM(sublex_start());
378cc40b
LW
3297 }
3298 tmp = *s++;
a687059c 3299 if (tmp == '/')
79072805 3300 Mop(OP_DIVIDE);
378cc40b
LW
3301 OPERATOR(tmp);
3302
3303 case '.':
51882d45
GS
3304 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3305#ifdef PERL_STRICT_CR
3306 && s[1] == '\n'
3307#else
3308 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3309#endif
3310 && (s == PL_linestart || s[-1] == '\n') )
3311 {
3280af22
NIS
3312 PL_lex_formbrack = 0;
3313 PL_expect = XSTATE;
79072805
LW
3314 goto rightbracket;
3315 }
3280af22 3316 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3317 tmp = *s++;
a687059c
LW
3318 if (*s == tmp) {
3319 s++;
2f3197b3
LW
3320 if (*s == tmp) {
3321 s++;
79072805 3322 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3323 }
3324 else
79072805 3325 yylval.ival = 0;
378cc40b 3326 OPERATOR(DOTDOT);
a687059c 3327 }
3280af22 3328 if (PL_expect != XOPERATOR)
2f3197b3 3329 check_uni();
79072805 3330 Aop(OP_CONCAT);
378cc40b
LW
3331 }
3332 /* FALL THROUGH */
3333 case '0': case '1': case '2': case '3': case '4':
3334 case '5': case '6': case '7': case '8': case '9':
79072805 3335 s = scan_num(s);
3280af22 3336 if (PL_expect == XOPERATOR)
8990e307 3337 no_op("Number",s);
79072805
LW
3338 TERM(THING);
3339
3340 case '\'':
09bef843 3341 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3342 if (PL_expect == XOPERATOR) {
3343 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3344 PL_expect = XTERM;
a0d0e21e
LW
3345 depcom();
3346 return ','; /* grandfather non-comma-format format */
3347 }
463ee0b2 3348 else
8990e307 3349 no_op("String",s);
463ee0b2 3350 }
79072805 3351 if (!s)
85e6fe83 3352 missingterm((char*)0);
79072805
LW
3353 yylval.ival = OP_CONST;
3354 TERM(sublex_start());
3355
3356 case '"':
09bef843 3357 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3358 if (PL_expect == XOPERATOR) {
3359 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3360 PL_expect = XTERM;
a0d0e21e
LW
3361 depcom();
3362 return ','; /* grandfather non-comma-format format */
3363 }
463ee0b2 3364 else
8990e307 3365 no_op("String",s);
463ee0b2 3366 }
79072805 3367 if (!s)
85e6fe83 3368 missingterm((char*)0);
4633a7c4 3369 yylval.ival = OP_CONST;
3280af22 3370 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3371 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3372 yylval.ival = OP_STRINGIFY;
3373 break;
3374 }
3375 }
79072805
LW
3376 TERM(sublex_start());
3377
3378 case '`':
09bef843 3379 s = scan_str(s,FALSE,FALSE);
3280af22 3380 if (PL_expect == XOPERATOR)
8990e307 3381 no_op("Backticks",s);
79072805 3382 if (!s)
85e6fe83 3383 missingterm((char*)0);
79072805
LW
3384 yylval.ival = OP_BACKTICK;
3385 set_csh();
3386 TERM(sublex_start());
3387
3388 case '\\':
3389 s++;
599cee73 3390 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3391 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3392 *s, *s);
3280af22 3393 if (PL_expect == XOPERATOR)
8990e307 3394 no_op("Backslash",s);
79072805
LW
3395 OPERATOR(REFGEN);
3396
3397 case 'x':
3280af22 3398 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3399 s++;
3400 Mop(OP_REPEAT);
2f3197b3 3401 }
79072805
LW
3402 goto keylookup;
3403
378cc40b 3404 case '_':
79072805
LW
3405 case 'a': case 'A':
3406 case 'b': case 'B':
3407 case 'c': case 'C':
3408 case 'd': case 'D':
3409 case 'e': case 'E':
3410 case 'f': case 'F':
3411 case 'g': case 'G':
3412 case 'h': case 'H':
3413 case 'i': case 'I':
3414 case 'j': case 'J':
3415 case 'k': case 'K':
3416 case 'l': case 'L':
3417 case 'm': case 'M':
3418 case 'n': case 'N':
3419 case 'o': case 'O':
3420 case 'p': case 'P':
3421 case 'q': case 'Q':
3422 case 'r': case 'R':
3423 case 's': case 'S':
3424 case 't': case 'T':
3425 case 'u': case 'U':
3426 case 'v': case 'V':
3427 case 'w': case 'W':
3428 case 'X':
3429 case 'y': case 'Y':
3430 case 'z': case 'Z':
3431
49dc05e3 3432 keylookup: {
2d8e6c8d 3433 STRLEN n_a;
161b471a
NIS
3434 gv = Nullgv;
3435 gvp = 0;
49dc05e3 3436
3280af22
NIS
3437 PL_bufptr = s;
3438 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01
PP
3439
3440 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3441 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3442 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3443 (PL_tokenbuf[0] == 'q' &&
3444 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01
PP
3445
3446 /* x::* is just a word, unless x is "CORE" */
3280af22 3447 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3448 goto just_a_word;
3449
3643fb5f 3450 d = s;
3280af22 3451 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3452 d++; /* no comments skipped here, or s### is misparsed */
3453
3454 /* Is this a label? */
3280af22
NIS
3455 if (!tmp && PL_expect == XSTATE
3456 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3457 s = d + 1;
3280af22 3458 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01
PP
3459 CLINE;
3460 TOKEN(LABEL);
3643fb5f
CS
3461 }
3462
3463 /* Check for keywords */
3280af22 3464 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3465
3466 /* Is this a word before a => operator? */
748a9306
LW
3467 if (strnEQ(d,"=>",2)) {
3468 CLINE;
3280af22 3469 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3470 yylval.opval->op_private = OPpCONST_BARE;
3471 TERM(WORD);
3472 }
3473
a0d0e21e 3474 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3475 GV *ogv = Nullgv; /* override (winner) */
3476 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3477 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3478 CV *cv;
3280af22 3479 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3480 (cv = GvCVu(gv)))
3481 {
3482 if (GvIMPORTED_CV(gv))
3483 ogv = gv;
3484 else if (! CvMETHOD(cv))
3485 hgv = gv;
3486 }
3487 if (!ogv &&
3280af22
NIS
3488 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3489 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3490 GvCVu(gv) && GvIMPORTED_CV(gv))
3491 {
3492 ogv = gv;
3493 }
3494 }
3495 if (ogv) {
3496 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3497 }
3498 else if (gv && !gvp
3499 && -tmp==KEY_lock /* XXX generalizable kludge */
3280af22 3500 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3501 {
3502 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3503 }
56f7f34b
CS
3504 else { /* no override */
3505 tmp = -tmp;
3506 gv = Nullgv;
3507 gvp = 0;
4944e2f7
GS
3508 if (ckWARN(WARN_AMBIGUOUS) && hgv
3509 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3510 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3511 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3512 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3513 }
a0d0e21e
LW
3514 }
3515
3516 reserved_word:
3517 switch (tmp) {
79072805
LW
3518
3519 default: /* not a keyword */
93a17b20 3520 just_a_word: {
96e4d5b1 3521 SV *sv;
3280af22 3522 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3523
3524 /* Get the rest if it looks like a package qualifier */
3525
a0d0e21e 3526 if (*s == '\'' || *s == ':' && s[1] == ':') {
c3e0f903 3527 STRLEN morelen;
3280af22 3528 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3529 TRUE, &morelen);
3530 if (!morelen)
cea2e8a9 3531 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3532 *s == '\'' ? "'" : "::");
c3e0f903 3533 len += morelen;
a0d0e21e 3534 }
8990e307 3535
3280af22
NIS
3536 if (PL_expect == XOPERATOR) {
3537 if (PL_bufptr == PL_linestart) {
3538 PL_curcop->cop_line--;
cea2e8a9 3539 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
3280af22 3540 PL_curcop->cop_line++;
463ee0b2
LW
3541 }
3542 else
54310121 3543 no_op("Bareword",s);
463ee0b2 3544 }
8990e307 3545
c3e0f903
GS
3546 /* Look for a subroutine with this name in current package,
3547 unless name is "Foo::", in which case Foo is a bearword
3548 (and a package name). */
3549
3550 if (len > 2 &&
3280af22 3551 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3552 {
599cee73 3553 if (ckWARN(WARN_UNSAFE) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
cea2e8a9 3554 Perl_warner(aTHX_ WARN_UNSAFE,
599cee73 3555 "Bareword \"%s\" refers to nonexistent package",
3280af22 3556 PL_tokenbuf);
c3e0f903 3557 len -= 2;
3280af22 3558 PL_tokenbuf[len] = '\0';
c3e0f903
GS
3559 gv = Nullgv;
3560 gvp = 0;
3561 }
3562 else {
3563 len = 0;
3564 if (!gv)
3280af22 3565 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
c3e0f903
GS
3566 }
3567
3568 /* if we saw a global override before, get the right name */
8990e307 3569
49dc05e3 3570 if (gvp) {
79cb57f6 3571 sv = newSVpvn("CORE::GLOBAL::",14);
3280af22 3572 sv_catpv(sv,PL_tokenbuf);
49dc05e3
GS
3573 }
3574 else
3280af22 3575 sv = newSVpv(PL_tokenbuf,0);
8990e307 3576
a0d0e21e
LW
3577 /* Presume this is going to be a bareword of some sort. */
3578
3579 CLINE;
49dc05e3 3580 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
a0d0e21e
LW
3581 yylval.opval->op_private = OPpCONST_BARE;
3582
c3e0f903
GS
3583 /* And if "Foo::", then that's what it certainly is. */
3584
3585 if (len)
3586 goto safe_bareword;
3587
8990e307
LW
3588 /* See if it's the indirect object for a list operator. */
3589
3280af22
NIS
3590 if (PL_oldoldbufptr &&
3591 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
3592 (PL_oldoldbufptr == PL_last_lop
3593 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 3594 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
3595 (PL_expect == XREF ||
3596 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 3597 {
748a9306
LW
3598 bool immediate_paren = *s == '(';
3599
a0d0e21e
LW
3600 /* (Now we can afford to cross potential line boundary.) */
3601 s = skipspace(s);
3602
3603 /* Two barewords in a row may indicate method call. */
3604
834a4ddd 3605 if ((isIDFIRST_lazy(s) || *s == '$') && (tmp=intuit_method(s,gv)))
a0d0e21e
LW
3606 return tmp;
3607
3608 /* If not a declared subroutine, it's an indirect object. */
3609 /* (But it's