This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
integrate mainline contents into utfperl
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4eb8286e 3 * Copyright (c) 1991-1999, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
51371543 31
9059aa12
LW
32#define XFAKEBRACK 128
33#define XENUMMASK 127
34
a0ed51b3 35#define UTF (PL_hints & HINT_UTF8)
834a4ddd
LW
36/*
37 * Note: we try to be careful never to call the isXXX_utf8() functions
38 * unless we're pretty sure we've seen the beginning of a UTF-8 character
39 * (that is, the two high bits are set). Otherwise we risk loading in the
40 * heavy-duty SWASHINIT and SWASHGET routines unnecessarily.
41 */
42#define isIDFIRST_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
43 ? isIDFIRST(*(p)) \
44 : isIDFIRST_utf8((U8*)p))
45#define isALNUM_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
46 ? isALNUM(*(p)) \
47 : isALNUM_utf8((U8*)p))
a0ed51b3 48
2b92dfce
GS
49/* In variables name $^X, these are the legal values for X.
50 * 1999-02-27 mjd-perl-patch@plover.com */
51#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
395c3793
LW
72#ifdef I_FCNTL
73#include <fcntl.h>
74#endif
fe14fcc3
LW
75#ifdef I_SYS_FILE
76#include <sys/file.h>
77#endif
395c3793 78
a790bc05
PP
79/* XXX If this causes problems, set i_unistd=undef in the hint file. */
80#ifdef I_UNISTD
81# include <unistd.h> /* Needed for execv() */
82#endif
83
84
79072805
LW
85#ifdef ff_next
86#undef ff_next
d48672a2
LW
87#endif
88
a1a0e61e
TD
89#ifdef USE_PURE_BISON
90YYSTYPE* yylval_pointer = NULL;
91int* yychar_pointer = NULL;
22c35a8c
GS
92# undef yylval
93# undef yychar
e4bfbdd4
JH
94# define yylval (*yylval_pointer)
95# define yychar (*yychar_pointer)
96# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
97# undef yylex
98# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
99#endif
100
79072805 101#include "keywords.h"
fe14fcc3 102
ffb4593c
NT
103/* CLINE is a macro that ensures PL_copline has a sane value */
104
ae986130
LW
105#ifdef CLINE
106#undef CLINE
107#endif
57843af0 108#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 109
ffb4593c
NT
110/*
111 * Convenience functions to return different tokens and prime the
9cbb5ea2 112 * lexer for the next token. They all take an argument.
ffb4593c
NT
113 *
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
2d2e263d 124 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
125 * BOop : bitwise or or xor
126 * BAop : bitwise and
127 * SHop : shift operator
128 * PWop : power operator
9cbb5ea2 129 * PMop : pattern-matching operator
ffb4593c
NT
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
133 * Rop : relational operator <= != gt
134 *
135 * Also see LOP and lop() below.
136 */
137
3280af22
NIS
138#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
139#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
140#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
141#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
142#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
143#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
144#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
145#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
146#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
147#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
148#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
149#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
150#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
151#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
152#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
153#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
154#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
155#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
156#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
157#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 158
a687059c
LW
159/* This bit of chicanery makes a unary function followed by
160 * a parenthesis into a function with one argument, highest precedence.
161 */
2f3197b3 162#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
163 PL_expect = XTERM, \
164 PL_bufptr = s, \
165 PL_last_uni = PL_oldbufptr, \
166 PL_last_lop_op = f, \
a687059c
LW
167 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
168
79072805 169#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
79072805
LW
172 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
173
9f68db38 174/* grandfather return to old style */
3280af22 175#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 176
ffb4593c
NT
177/*
178 * S_ao
179 *
180 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
181 * into an OP_ANDASSIGN or OP_ORASSIGN
182 */
183
76e3520e 184STATIC int
cea2e8a9 185S_ao(pTHX_ int toketype)
a0d0e21e 186{
3280af22
NIS
187 if (*PL_bufptr == '=') {
188 PL_bufptr++;
a0d0e21e
LW
189 if (toketype == ANDAND)
190 yylval.ival = OP_ANDASSIGN;
191 else if (toketype == OROR)
192 yylval.ival = OP_ORASSIGN;
193 toketype = ASSIGNOP;
194 }
195 return toketype;
196}
197
ffb4593c
NT
198/*
199 * S_no_op
200 * When Perl expects an operator and finds something else, no_op
201 * prints the warning. It always prints "<something> found where
202 * operator expected. It prints "Missing semicolon on previous line?"
203 * if the surprise occurs at the start of the line. "do you need to
204 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
205 * where the compiler doesn't know if foo is a method call or a function.
206 * It prints "Missing operator before end of line" if there's nothing
207 * after the missing operator, or "... before <...>" if there is something
208 * after the missing operator.
209 */
210
76e3520e 211STATIC void
cea2e8a9 212S_no_op(pTHX_ char *what, char *s)
463ee0b2 213{
3280af22
NIS
214 char *oldbp = PL_bufptr;
215 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 216
1189a94a
GS
217 if (!s)
218 s = oldbp;
219 else {
220 assert(s >= oldbp);
221 PL_bufptr = s;
222 }
cea2e8a9 223 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 224 if (is_first)
cea2e8a9 225 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
834a4ddd 226 else if (PL_oldoldbufptr && isIDFIRST_lazy(PL_oldoldbufptr)) {
748a9306 227 char *t;
834a4ddd 228 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy(t) || *t == ':'); t++) ;
3280af22 229 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 230 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 231 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
232 }
233 else
cea2e8a9 234 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 235 PL_bufptr = oldbp;
8990e307
LW
236}
237
ffb4593c
NT
238/*
239 * S_missingterm
240 * Complain about missing quote/regexp/heredoc terminator.
241 * If it's called with (char *)NULL then it cauterizes the line buffer.
242 * If we're in a delimited string and the delimiter is a control
243 * character, it's reformatted into a two-char sequence like ^C.
244 * This is fatal.
245 */
246
76e3520e 247STATIC void
cea2e8a9 248S_missingterm(pTHX_ char *s)
8990e307
LW
249{
250 char tmpbuf[3];
251 char q;
252 if (s) {
253 char *nl = strrchr(s,'\n');
d2719217 254 if (nl)
8990e307
LW
255 *nl = '\0';
256 }
9d116dd7
JH
257 else if (
258#ifdef EBCDIC
259 iscntrl(PL_multi_close)
260#else
261 PL_multi_close < 32 || PL_multi_close == 127
262#endif
263 ) {
8990e307 264 *tmpbuf = '^';
3280af22 265 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
266 s = "\\n";
267 tmpbuf[2] = '\0';
268 s = tmpbuf;
269 }
270 else {
3280af22 271 *tmpbuf = PL_multi_close;
8990e307
LW
272 tmpbuf[1] = '\0';
273 s = tmpbuf;
274 }
275 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 276 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 277}
79072805 278
ffb4593c
NT
279/*
280 * Perl_deprecate
ffb4593c
NT
281 */
282
79072805 283void
864dbfa3 284Perl_deprecate(pTHX_ char *s)
a0d0e21e 285{
d008e5eb 286 dTHR;
599cee73 287 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 288 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
289}
290
ffb4593c
NT
291/*
292 * depcom
9cbb5ea2 293 * Deprecate a comma-less variable list.
ffb4593c
NT
294 */
295
76e3520e 296STATIC void
cea2e8a9 297S_depcom(pTHX)
a0d0e21e
LW
298{
299 deprecate("comma-less variable list");
300}
301
ffb4593c 302/*
9cbb5ea2
GS
303 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
304 * utf16-to-utf8-reversed.
ffb4593c
NT
305 */
306
a868473f
NIS
307#ifdef WIN32
308
76e3520e 309STATIC I32
cea2e8a9 310S_win32_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f
NIS
311{
312 I32 count = FILTER_READ(idx+1, sv, maxlen);
313 if (count > 0 && !maxlen)
314 win32_strip_return(sv);
315 return count;
316}
317#endif
318
a0ed51b3 319STATIC I32
cea2e8a9 320S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
321{
322 I32 count = FILTER_READ(idx+1, sv, maxlen);
323 if (count) {
dfe13c55
GS
324 U8* tmps;
325 U8* tend;
326 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 327 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 328 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
329
330 }
331 return count;
332}
333
334STATIC I32
cea2e8a9 335S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
336{
337 I32 count = FILTER_READ(idx+1, sv, maxlen);
338 if (count) {
dfe13c55
GS
339 U8* tmps;
340 U8* tend;
341 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 342 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 343 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
344
345 }
346 return count;
347}
a868473f 348
ffb4593c
NT
349/*
350 * Perl_lex_start
9cbb5ea2
GS
351 * Initialize variables. Uses the Perl save_stack to save its state (for
352 * recursive calls to the parser).
ffb4593c
NT
353 */
354
a0d0e21e 355void
864dbfa3 356Perl_lex_start(pTHX_ SV *line)
79072805 357{
0f15f207 358 dTHR;
8990e307
LW
359 char *s;
360 STRLEN len;
361
3280af22
NIS
362 SAVEI32(PL_lex_dojoin);
363 SAVEI32(PL_lex_brackets);
3280af22
NIS
364 SAVEI32(PL_lex_casemods);
365 SAVEI32(PL_lex_starts);
366 SAVEI32(PL_lex_state);
7766f137 367 SAVEVPTR(PL_lex_inpat);
3280af22 368 SAVEI32(PL_lex_inwhat);
57843af0 369 SAVECOPLINE(PL_curcop);
3280af22
NIS
370 SAVEPPTR(PL_bufptr);
371 SAVEPPTR(PL_bufend);
372 SAVEPPTR(PL_oldbufptr);
373 SAVEPPTR(PL_oldoldbufptr);
374 SAVEPPTR(PL_linestart);
375 SAVESPTR(PL_linestr);
376 SAVEPPTR(PL_lex_brackstack);
377 SAVEPPTR(PL_lex_casestack);
c76ac1ee 378 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
379 SAVESPTR(PL_lex_stuff);
380 SAVEI32(PL_lex_defer);
09bef843 381 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 382 SAVESPTR(PL_lex_repl);
bebdddfc
GS
383 SAVEINT(PL_expect);
384 SAVEINT(PL_lex_expect);
3280af22
NIS
385
386 PL_lex_state = LEX_NORMAL;
387 PL_lex_defer = 0;
388 PL_expect = XSTATE;
389 PL_lex_brackets = 0;
3280af22
NIS
390 New(899, PL_lex_brackstack, 120, char);
391 New(899, PL_lex_casestack, 12, char);
392 SAVEFREEPV(PL_lex_brackstack);
393 SAVEFREEPV(PL_lex_casestack);
394 PL_lex_casemods = 0;
395 *PL_lex_casestack = '\0';
396 PL_lex_dojoin = 0;
397 PL_lex_starts = 0;
398 PL_lex_stuff = Nullsv;
399 PL_lex_repl = Nullsv;
400 PL_lex_inpat = 0;
401 PL_lex_inwhat = 0;
09bef843 402 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
403 PL_linestr = line;
404 if (SvREADONLY(PL_linestr))
405 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
406 s = SvPV(PL_linestr, len);
8990e307 407 if (len && s[len-1] != ';') {
3280af22
NIS
408 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
409 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
410 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 411 }
3280af22
NIS
412 SvTEMP_off(PL_linestr);
413 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
414 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
415 SvREFCNT_dec(PL_rs);
79cb57f6 416 PL_rs = newSVpvn("\n", 1);
3280af22 417 PL_rsfp = 0;
79072805 418}
a687059c 419
ffb4593c
NT
420/*
421 * Perl_lex_end
9cbb5ea2
GS
422 * Finalizer for lexing operations. Must be called when the parser is
423 * done with the lexer.
ffb4593c
NT
424 */
425
463ee0b2 426void
864dbfa3 427Perl_lex_end(pTHX)
463ee0b2 428{
3280af22 429 PL_doextract = FALSE;
463ee0b2
LW
430}
431
ffb4593c
NT
432/*
433 * S_incline
434 * This subroutine has nothing to do with tilting, whether at windmills
435 * or pinball tables. Its name is short for "increment line". It
57843af0 436 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 437 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
438 * # line 500 "foo.pm"
439 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
440 */
441
76e3520e 442STATIC void
cea2e8a9 443S_incline(pTHX_ char *s)
463ee0b2 444{
0f15f207 445 dTHR;
463ee0b2
LW
446 char *t;
447 char *n;
448 char ch;
449 int sawline = 0;
450
57843af0 451 CopLINE_inc(PL_curcop);
463ee0b2
LW
452 if (*s++ != '#')
453 return;
454 while (*s == ' ' || *s == '\t') s++;
455 if (strnEQ(s, "line ", 5)) {
456 s += 5;
457 sawline = 1;
458 }
459 if (!isDIGIT(*s))
460 return;
461 n = s;
462 while (isDIGIT(*s))
463 s++;
464 while (*s == ' ' || *s == '\t')
465 s++;
466 if (*s == '"' && (t = strchr(s+1, '"')))
467 s++;
468 else {
469 if (!sawline)
470 return; /* false alarm */
471 for (t = s; !isSPACE(*t); t++) ;
472 }
473 ch = *t;
474 *t = '\0';
475 if (t - s > 0)
57843af0 476 CopFILE_set(PL_curcop, s);
463ee0b2 477 else
57843af0 478 CopFILE_set(PL_curcop, PL_origfilename);
463ee0b2 479 *t = ch;
57843af0 480 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
481}
482
ffb4593c
NT
483/*
484 * S_skipspace
485 * Called to gobble the appropriate amount and type of whitespace.
486 * Skips comments as well.
487 */
488
76e3520e 489STATIC char *
cea2e8a9 490S_skipspace(pTHX_ register char *s)
a687059c 491{
11343788 492 dTHR;
3280af22
NIS
493 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
494 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
495 s++;
496 return s;
497 }
498 for (;;) {
fd049845 499 STRLEN prevlen;
09bef843
SB
500 SSize_t oldprevlen, oldoldprevlen;
501 SSize_t oldloplen, oldunilen;
60e6418e
GS
502 while (s < PL_bufend && isSPACE(*s)) {
503 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
504 incline(s);
505 }
ffb4593c
NT
506
507 /* comment */
3280af22
NIS
508 if (s < PL_bufend && *s == '#') {
509 while (s < PL_bufend && *s != '\n')
463ee0b2 510 s++;
60e6418e 511 if (s < PL_bufend) {
463ee0b2 512 s++;
60e6418e
GS
513 if (PL_in_eval && !PL_rsfp) {
514 incline(s);
515 continue;
516 }
517 }
463ee0b2 518 }
ffb4593c
NT
519
520 /* only continue to recharge the buffer if we're at the end
521 * of the buffer, we're not reading from a source filter, and
522 * we're in normal lexing mode
523 */
09bef843
SB
524 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
525 PL_lex_state == LEX_FORMLINE)
463ee0b2 526 return s;
ffb4593c
NT
527
528 /* try to recharge the buffer */
9cbb5ea2
GS
529 if ((s = filter_gets(PL_linestr, PL_rsfp,
530 (prevlen = SvCUR(PL_linestr)))) == Nullch)
531 {
532 /* end of file. Add on the -p or -n magic */
3280af22
NIS
533 if (PL_minus_n || PL_minus_p) {
534 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
535 ";}continue{print or die qq(-p destination: $!\\n)" :
536 "");
3280af22
NIS
537 sv_catpv(PL_linestr,";}");
538 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
539 }
540 else
3280af22 541 sv_setpv(PL_linestr,";");
ffb4593c
NT
542
543 /* reset variables for next time we lex */
9cbb5ea2
GS
544 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
545 = SvPVX(PL_linestr);
3280af22 546 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
547
548 /* Close the filehandle. Could be from -P preprocessor,
549 * STDIN, or a regular file. If we were reading code from
550 * STDIN (because the commandline held no -e or filename)
551 * then we don't close it, we reset it so the code can
552 * read from STDIN too.
553 */
554
3280af22
NIS
555 if (PL_preprocess && !PL_in_eval)
556 (void)PerlProc_pclose(PL_rsfp);
557 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
558 PerlIO_clearerr(PL_rsfp);
8990e307 559 else
3280af22
NIS
560 (void)PerlIO_close(PL_rsfp);
561 PL_rsfp = Nullfp;
463ee0b2
LW
562 return s;
563 }
ffb4593c
NT
564
565 /* not at end of file, so we only read another line */
09bef843
SB
566 /* make corresponding updates to old pointers, for yyerror() */
567 oldprevlen = PL_oldbufptr - PL_bufend;
568 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
569 if (PL_last_uni)
570 oldunilen = PL_last_uni - PL_bufend;
571 if (PL_last_lop)
572 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
573 PL_linestart = PL_bufptr = s + prevlen;
574 PL_bufend = s + SvCUR(PL_linestr);
575 s = PL_bufptr;
09bef843
SB
576 PL_oldbufptr = s + oldprevlen;
577 PL_oldoldbufptr = s + oldoldprevlen;
578 if (PL_last_uni)
579 PL_last_uni = s + oldunilen;
580 if (PL_last_lop)
581 PL_last_lop = s + oldloplen;
a0d0e21e 582 incline(s);
ffb4593c
NT
583
584 /* debugger active and we're not compiling the debugger code,
585 * so store the line into the debugger's array of lines
586 */
3280af22 587 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
588 SV *sv = NEWSV(85,0);
589
590 sv_upgrade(sv, SVt_PVMG);
3280af22 591 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 592 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 593 }
463ee0b2 594 }
a687059c 595}
378cc40b 596
ffb4593c
NT
597/*
598 * S_check_uni
599 * Check the unary operators to ensure there's no ambiguity in how they're
600 * used. An ambiguous piece of code would be:
601 * rand + 5
602 * This doesn't mean rand() + 5. Because rand() is a unary operator,
603 * the +5 is its argument.
604 */
605
76e3520e 606STATIC void
cea2e8a9 607S_check_uni(pTHX)
ba106d47 608{
2f3197b3 609 char *s;
a0d0e21e 610 char *t;
0453d815 611 dTHR;
2f3197b3 612
3280af22 613 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 614 return;
3280af22
NIS
615 while (isSPACE(*PL_last_uni))
616 PL_last_uni++;
834a4ddd 617 for (s = PL_last_uni; isALNUM_lazy(s) || *s == '-'; s++) ;
3280af22 618 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 619 return;
0453d815 620 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 621 char ch = *s;
0453d815
PM
622 *s = '\0';
623 Perl_warner(aTHX_ WARN_AMBIGUOUS,
624 "Warning: Use of \"%s\" without parens is ambiguous",
625 PL_last_uni);
626 *s = ch;
627 }
2f3197b3
LW
628}
629
ffb4593c
NT
630/* workaround to replace the UNI() macro with a function. Only the
631 * hints/uts.sh file mentions this. Other comments elsewhere in the
632 * source indicate Microport Unix might need it too.
633 */
634
ffed7fef
LW
635#ifdef CRIPPLED_CC
636
637#undef UNI
ffed7fef 638#define UNI(f) return uni(f,s)
ffed7fef 639
76e3520e 640STATIC int
cea2e8a9 641S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
642{
643 yylval.ival = f;
3280af22
NIS
644 PL_expect = XTERM;
645 PL_bufptr = s;
8f872242
NIS
646 PL_last_uni = PL_oldbufptr;
647 PL_last_lop_op = f;
ffed7fef
LW
648 if (*s == '(')
649 return FUNC1;
650 s = skipspace(s);
651 if (*s == '(')
652 return FUNC1;
653 else
654 return UNIOP;
655}
656
a0d0e21e
LW
657#endif /* CRIPPLED_CC */
658
ffb4593c
NT
659/*
660 * LOP : macro to build a list operator. Its behaviour has been replaced
661 * with a subroutine, S_lop() for which LOP is just another name.
662 */
663
a0d0e21e
LW
664#define LOP(f,x) return lop(f,x,s)
665
ffb4593c
NT
666/*
667 * S_lop
668 * Build a list operator (or something that might be one). The rules:
669 * - if we have a next token, then it's a list operator [why?]
670 * - if the next thing is an opening paren, then it's a function
671 * - else it's a list operator
672 */
673
76e3520e 674STATIC I32
a0be28da 675S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 676{
0f15f207 677 dTHR;
79072805 678 yylval.ival = f;
35c8bce7 679 CLINE;
3280af22
NIS
680 PL_expect = x;
681 PL_bufptr = s;
682 PL_last_lop = PL_oldbufptr;
683 PL_last_lop_op = f;
684 if (PL_nexttoke)
a0d0e21e 685 return LSTOP;
79072805
LW
686 if (*s == '(')
687 return FUNC;
688 s = skipspace(s);
689 if (*s == '(')
690 return FUNC;
691 else
692 return LSTOP;
693}
694
ffb4593c
NT
695/*
696 * S_force_next
9cbb5ea2 697 * When the lexer realizes it knows the next token (for instance,
ffb4593c 698 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
699 * to know what token to return the next time the lexer is called. Caller
700 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
701 * handles the token correctly.
ffb4593c
NT
702 */
703
76e3520e 704STATIC void
cea2e8a9 705S_force_next(pTHX_ I32 type)
79072805 706{
3280af22
NIS
707 PL_nexttype[PL_nexttoke] = type;
708 PL_nexttoke++;
709 if (PL_lex_state != LEX_KNOWNEXT) {
710 PL_lex_defer = PL_lex_state;
711 PL_lex_expect = PL_expect;
712 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
713 }
714}
715
ffb4593c
NT
716/*
717 * S_force_word
718 * When the lexer knows the next thing is a word (for instance, it has
719 * just seen -> and it knows that the next char is a word char, then
720 * it calls S_force_word to stick the next word into the PL_next lookahead.
721 *
722 * Arguments:
b1b65b59 723 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
724 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
725 * int check_keyword : if true, Perl checks to make sure the word isn't
726 * a keyword (do this if the word is a label, e.g. goto FOO)
727 * int allow_pack : if true, : characters will also be allowed (require,
728 * use, etc. do this)
9cbb5ea2 729 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
730 */
731
76e3520e 732STATIC char *
cea2e8a9 733S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 734{
463ee0b2
LW
735 register char *s;
736 STRLEN len;
737
738 start = skipspace(start);
739 s = start;
834a4ddd 740 if (isIDFIRST_lazy(s) ||
a0d0e21e 741 (allow_pack && *s == ':') ||
15f0808c 742 (allow_initial_tick && *s == '\'') )
a0d0e21e 743 {
3280af22
NIS
744 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
745 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
746 return start;
747 if (token == METHOD) {
748 s = skipspace(s);
749 if (*s == '(')
3280af22 750 PL_expect = XTERM;
463ee0b2 751 else {
3280af22 752 PL_expect = XOPERATOR;
463ee0b2 753 }
79072805 754 }
3280af22
NIS
755 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
756 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
757 force_next(token);
758 }
759 return s;
760}
761
ffb4593c
NT
762/*
763 * S_force_ident
9cbb5ea2 764 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
765 * text only contains the "foo" portion. The first argument is a pointer
766 * to the "foo", and the second argument is the type symbol to prefix.
767 * Forces the next token to be a "WORD".
9cbb5ea2 768 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
769 */
770
76e3520e 771STATIC void
cea2e8a9 772S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
773{
774 if (s && *s) {
11343788 775 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 776 PL_nextval[PL_nexttoke].opval = o;
79072805 777 force_next(WORD);
748a9306 778 if (kind) {
e858de61 779 dTHR; /* just for in_eval */
11343788 780 o->op_private = OPpCONST_ENTERED;
55497cff
PP
781 /* XXX see note in pp_entereval() for why we forgo typo
782 warnings if the symbol must be introduced in an eval.
783 GSAR 96-10-12 */
3280af22 784 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
785 kind == '$' ? SVt_PV :
786 kind == '@' ? SVt_PVAV :
787 kind == '%' ? SVt_PVHV :
788 SVt_PVGV
789 );
748a9306 790 }
79072805
LW
791 }
792}
793
ffb4593c
NT
794/*
795 * S_force_version
796 * Forces the next token to be a version number.
797 */
798
76e3520e 799STATIC char *
cea2e8a9 800S_force_version(pTHX_ char *s)
89bfa8cd
PP
801{
802 OP *version = Nullop;
803
804 s = skipspace(s);
805
806 /* default VERSION number -- GBARR */
807
808 if(isDIGIT(*s)) {
809 char *d;
810 int c;
55497cff 811 for( d=s, c = 1; isDIGIT(*d) || *d == '_' || (*d == '.' && c--); d++);
89bfa8cd
PP
812 if((*d == ';' || isSPACE(*d)) && *(skipspace(d)) != ',') {
813 s = scan_num(s);
814 /* real VERSION number -- GBARR */
815 version = yylval.opval;
816 }
817 }
818
819 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 820 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd
PP
821 force_next(WORD);
822
823 return (s);
824}
825
ffb4593c
NT
826/*
827 * S_tokeq
828 * Tokenize a quoted string passed in as an SV. It finds the next
829 * chunk, up to end of string or a backslash. It may make a new
830 * SV containing that chunk (if HINT_NEW_STRING is on). It also
831 * turns \\ into \.
832 */
833
76e3520e 834STATIC SV *
cea2e8a9 835S_tokeq(pTHX_ SV *sv)
79072805
LW
836{
837 register char *s;
838 register char *send;
839 register char *d;
b3ac6de7
IZ
840 STRLEN len = 0;
841 SV *pv = sv;
79072805
LW
842
843 if (!SvLEN(sv))
b3ac6de7 844 goto finish;
79072805 845
a0d0e21e 846 s = SvPV_force(sv, len);
748a9306 847 if (SvIVX(sv) == -1)
b3ac6de7 848 goto finish;
463ee0b2 849 send = s + len;
79072805
LW
850 while (s < send && *s != '\\')
851 s++;
852 if (s == send)
b3ac6de7 853 goto finish;
79072805 854 d = s;
3280af22 855 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 856 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
857 while (s < send) {
858 if (*s == '\\') {
a0d0e21e 859 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
860 s++; /* all that, just for this */
861 }
862 *d++ = *s++;
863 }
864 *d = '\0';
463ee0b2 865 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 866 finish:
3280af22 867 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 868 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
869 return sv;
870}
871
ffb4593c
NT
872/*
873 * Now come three functions related to double-quote context,
874 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
875 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
876 * interact with PL_lex_state, and create fake ( ... ) argument lists
877 * to handle functions and concatenation.
878 * They assume that whoever calls them will be setting up a fake
879 * join call, because each subthing puts a ',' after it. This lets
880 * "lower \luPpEr"
881 * become
882 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
883 *
884 * (I'm not sure whether the spurious commas at the end of lcfirst's
885 * arguments and join's arguments are created or not).
886 */
887
888/*
889 * S_sublex_start
890 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
891 *
892 * Pattern matching will set PL_lex_op to the pattern-matching op to
893 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
894 *
895 * OP_CONST and OP_READLINE are easy--just make the new op and return.
896 *
897 * Everything else becomes a FUNC.
898 *
899 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
900 * had an OP_CONST or OP_READLINE). This just sets us up for a
901 * call to S_sublex_push().
902 */
903
76e3520e 904STATIC I32
cea2e8a9 905S_sublex_start(pTHX)
79072805
LW
906{
907 register I32 op_type = yylval.ival;
79072805
LW
908
909 if (op_type == OP_NULL) {
3280af22
NIS
910 yylval.opval = PL_lex_op;
911 PL_lex_op = Nullop;
79072805
LW
912 return THING;
913 }
914 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 915 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
916
917 if (SvTYPE(sv) == SVt_PVIV) {
918 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
919 STRLEN len;
920 char *p;
921 SV *nsv;
922
923 p = SvPV(sv, len);
79cb57f6 924 nsv = newSVpvn(p, len);
b3ac6de7
IZ
925 SvREFCNT_dec(sv);
926 sv = nsv;
927 }
928 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 929 PL_lex_stuff = Nullsv;
79072805
LW
930 return THING;
931 }
932
3280af22
NIS
933 PL_sublex_info.super_state = PL_lex_state;
934 PL_sublex_info.sub_inwhat = op_type;
935 PL_sublex_info.sub_op = PL_lex_op;
936 PL_lex_state = LEX_INTERPPUSH;
55497cff 937
3280af22
NIS
938 PL_expect = XTERM;
939 if (PL_lex_op) {
940 yylval.opval = PL_lex_op;
941 PL_lex_op = Nullop;
55497cff
PP
942 return PMFUNC;
943 }
944 else
945 return FUNC;
946}
947
ffb4593c
NT
948/*
949 * S_sublex_push
950 * Create a new scope to save the lexing state. The scope will be
951 * ended in S_sublex_done. Returns a '(', starting the function arguments
952 * to the uc, lc, etc. found before.
953 * Sets PL_lex_state to LEX_INTERPCONCAT.
954 */
955
76e3520e 956STATIC I32
cea2e8a9 957S_sublex_push(pTHX)
55497cff 958{
0f15f207 959 dTHR;
f46d017c 960 ENTER;
55497cff 961
3280af22
NIS
962 PL_lex_state = PL_sublex_info.super_state;
963 SAVEI32(PL_lex_dojoin);
964 SAVEI32(PL_lex_brackets);
3280af22
NIS
965 SAVEI32(PL_lex_casemods);
966 SAVEI32(PL_lex_starts);
967 SAVEI32(PL_lex_state);
7766f137 968 SAVEVPTR(PL_lex_inpat);
3280af22 969 SAVEI32(PL_lex_inwhat);
57843af0 970 SAVECOPLINE(PL_curcop);
3280af22
NIS
971 SAVEPPTR(PL_bufptr);
972 SAVEPPTR(PL_oldbufptr);
973 SAVEPPTR(PL_oldoldbufptr);
974 SAVEPPTR(PL_linestart);
975 SAVESPTR(PL_linestr);
976 SAVEPPTR(PL_lex_brackstack);
977 SAVEPPTR(PL_lex_casestack);
978
979 PL_linestr = PL_lex_stuff;
980 PL_lex_stuff = Nullsv;
981
9cbb5ea2
GS
982 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
983 = SvPVX(PL_linestr);
3280af22
NIS
984 PL_bufend += SvCUR(PL_linestr);
985 SAVEFREESV(PL_linestr);
986
987 PL_lex_dojoin = FALSE;
988 PL_lex_brackets = 0;
3280af22
NIS
989 New(899, PL_lex_brackstack, 120, char);
990 New(899, PL_lex_casestack, 12, char);
991 SAVEFREEPV(PL_lex_brackstack);
992 SAVEFREEPV(PL_lex_casestack);
993 PL_lex_casemods = 0;
994 *PL_lex_casestack = '\0';
995 PL_lex_starts = 0;
996 PL_lex_state = LEX_INTERPCONCAT;
57843af0 997 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
998
999 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1000 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1001 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1002 else
3280af22 1003 PL_lex_inpat = Nullop;
79072805 1004
55497cff 1005 return '(';
79072805
LW
1006}
1007
ffb4593c
NT
1008/*
1009 * S_sublex_done
1010 * Restores lexer state after a S_sublex_push.
1011 */
1012
76e3520e 1013STATIC I32
cea2e8a9 1014S_sublex_done(pTHX)
79072805 1015{
3280af22
NIS
1016 if (!PL_lex_starts++) {
1017 PL_expect = XOPERATOR;
79cb57f6 1018 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1019 return THING;
1020 }
1021
3280af22
NIS
1022 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1023 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1024 return yylex();
79072805
LW
1025 }
1026
ffb4593c 1027 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1028 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1029 PL_linestr = PL_lex_repl;
1030 PL_lex_inpat = 0;
1031 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1032 PL_bufend += SvCUR(PL_linestr);
1033 SAVEFREESV(PL_linestr);
1034 PL_lex_dojoin = FALSE;
1035 PL_lex_brackets = 0;
3280af22
NIS
1036 PL_lex_casemods = 0;
1037 *PL_lex_casestack = '\0';
1038 PL_lex_starts = 0;
25da4f38 1039 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1040 PL_lex_state = LEX_INTERPNORMAL;
1041 PL_lex_starts++;
e9fa98b2
HS
1042 /* we don't clear PL_lex_repl here, so that we can check later
1043 whether this is an evalled subst; that means we rely on the
1044 logic to ensure sublex_done() is called again only via the
1045 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1046 }
e9fa98b2 1047 else {
3280af22 1048 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1049 PL_lex_repl = Nullsv;
1050 }
79072805 1051 return ',';
ffed7fef
LW
1052 }
1053 else {
f46d017c 1054 LEAVE;
3280af22
NIS
1055 PL_bufend = SvPVX(PL_linestr);
1056 PL_bufend += SvCUR(PL_linestr);
1057 PL_expect = XOPERATOR;
09bef843 1058 PL_sublex_info.sub_inwhat = 0;
79072805 1059 return ')';
ffed7fef
LW
1060 }
1061}
1062
02aa26ce
NT
1063/*
1064 scan_const
1065
1066 Extracts a pattern, double-quoted string, or transliteration. This
1067 is terrifying code.
1068
3280af22
NIS
1069 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1070 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1071 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1072
9b599b2a
GS
1073 Returns a pointer to the character scanned up to. Iff this is
1074 advanced from the start pointer supplied (ie if anything was
1075 successfully parsed), will leave an OP for the substring scanned
1076 in yylval. Caller must intuit reason for not parsing further
1077 by looking at the next characters herself.
1078
02aa26ce
NT
1079 In patterns:
1080 backslashes:
1081 double-quoted style: \r and \n
1082 regexp special ones: \D \s
1083 constants: \x3
1084 backrefs: \1 (deprecated in substitution replacements)
1085 case and quoting: \U \Q \E
1086 stops on @ and $, but not for $ as tail anchor
1087
1088 In transliterations:
1089 characters are VERY literal, except for - not at the start or end
1090 of the string, which indicates a range. scan_const expands the
1091 range to the full set of intermediate characters.
1092
1093 In double-quoted strings:
1094 backslashes:
1095 double-quoted style: \r and \n
1096 constants: \x3
1097 backrefs: \1 (deprecated)
1098 case and quoting: \U \Q \E
1099 stops on @ and $
1100
1101 scan_const does *not* construct ops to handle interpolated strings.
1102 It stops processing as soon as it finds an embedded $ or @ variable
1103 and leaves it to the caller to work out what's going on.
1104
1105 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1106
1107 $ in pattern could be $foo or could be tail anchor. Assumption:
1108 it's a tail anchor if $ is the last thing in the string, or if it's
1109 followed by one of ")| \n\t"
1110
1111 \1 (backreferences) are turned into $1
1112
1113 The structure of the code is
1114 while (there's a character to process) {
1115 handle transliteration ranges
1116 skip regexp comments
1117 skip # initiated comments in //x patterns
1118 check for embedded @foo
1119 check for embedded scalars
1120 if (backslash) {
1121 leave intact backslashes from leave (below)
1122 deprecate \1 in strings and sub replacements
1123 handle string-changing backslashes \l \U \Q \E, etc.
1124 switch (what was escaped) {
1125 handle - in a transliteration (becomes a literal -)
1126 handle \132 octal characters
1127 handle 0x15 hex characters
1128 handle \cV (control V)
1129 handle printf backslashes (\f, \r, \n, etc)
1130 } (end switch)
1131 } (end if backslash)
1132 } (end while character to read)
1133
1134*/
1135
76e3520e 1136STATIC char *
cea2e8a9 1137S_scan_const(pTHX_ char *start)
79072805 1138{
3280af22 1139 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1140 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1141 register char *s = start; /* start of the constant */
1142 register char *d = SvPVX(sv); /* destination for copies */
1143 bool dorange = FALSE; /* are we in a translit range? */
1144 I32 len; /* ? */
ac2262e3 1145 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1146 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1147 : UTF;
ac2262e3 1148 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1149 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1150 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1151 : UTF;
dff6d3cd 1152 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1153 PL_lex_inpat
4a2d328f 1154 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1155 : "";
79072805
LW
1156
1157 while (s < send || dorange) {
02aa26ce 1158 /* get transliterations out of the way (they're most literal) */
3280af22 1159 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1160 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1161 if (dorange) {
02aa26ce 1162 I32 i; /* current expanded character */
8ada0baa 1163 I32 min; /* first character in range */
02aa26ce
NT
1164 I32 max; /* last character in range */
1165
1166 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1167 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1168 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1169 d -= 2; /* eat the first char and the - */
1170
8ada0baa
JH
1171 min = (U8)*d; /* first char in range */
1172 max = (U8)d[1]; /* last char in range */
1173
1174#ifndef ASCIIish
1175 if ((isLOWER(min) && isLOWER(max)) ||
1176 (isUPPER(min) && isUPPER(max))) {
1177 if (isLOWER(min)) {
1178 for (i = min; i <= max; i++)
1179 if (isLOWER(i))
1180 *d++ = i;
1181 } else {
1182 for (i = min; i <= max; i++)
1183 if (isUPPER(i))
1184 *d++ = i;
1185 }
1186 }
1187 else
1188#endif
1189 for (i = min; i <= max; i++)
1190 *d++ = i;
02aa26ce
NT
1191
1192 /* mark the range as done, and continue */
79072805
LW
1193 dorange = FALSE;
1194 continue;
1195 }
02aa26ce
NT
1196
1197 /* range begins (ignore - as first or last char) */
79072805 1198 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1199 if (utf) {
a176fa2a 1200 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1201 s++;
1202 continue;
1203 }
79072805
LW
1204 dorange = TRUE;
1205 s++;
1206 }
1207 }
02aa26ce
NT
1208
1209 /* if we get here, we're not doing a transliteration */
1210
0f5d15d6
IZ
1211 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1212 except for the last char, which will be done separately. */
3280af22 1213 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1214 if (s[2] == '#') {
1215 while (s < send && *s != ')')
1216 *d++ = *s++;
0f5d15d6
IZ
1217 } else if (s[2] == '{'
1218 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1219 I32 count = 1;
0f5d15d6 1220 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1221 char c;
1222
d9f97599
GS
1223 while (count && (c = *regparse)) {
1224 if (c == '\\' && regparse[1])
1225 regparse++;
cc6b7395
IZ
1226 else if (c == '{')
1227 count++;
1228 else if (c == '}')
1229 count--;
d9f97599 1230 regparse++;
cc6b7395 1231 }
5bdf89e7
IZ
1232 if (*regparse != ')') {
1233 regparse--; /* Leave one char for continuation. */
cc6b7395 1234 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1235 }
0f5d15d6 1236 while (s < regparse)
cc6b7395
IZ
1237 *d++ = *s++;
1238 }
748a9306 1239 }
02aa26ce
NT
1240
1241 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1242 else if (*s == '#' && PL_lex_inpat &&
1243 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1244 while (s+1 < send && *s != '\n')
1245 *d++ = *s++;
1246 }
02aa26ce
NT
1247
1248 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
834a4ddd 1249 else if (*s == '@' && s[1] && (isALNUM_lazy(s+1) || strchr(":'{$", s[1])))
79072805 1250 break;
02aa26ce
NT
1251
1252 /* check for embedded scalars. only stop if we're sure it's a
1253 variable.
1254 */
79072805 1255 else if (*s == '$') {
3280af22 1256 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1257 break;
c277df42 1258 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1259 break; /* in regexp, $ might be tail anchor */
1260 }
02aa26ce 1261
a0ed51b3
LW
1262 /* (now in tr/// code again) */
1263
d008e5eb
GS
1264 if (*s & 0x80 && thisutf) {
1265 dTHR; /* only for ckWARN */
1266 if (ckWARN(WARN_UTF8)) {
dfe13c55 1267 (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
d008e5eb
GS
1268 if (len) {
1269 while (len--)
1270 *d++ = *s++;
1271 continue;
1272 }
a0ed51b3
LW
1273 }
1274 }
1275
02aa26ce 1276 /* backslashes */
79072805
LW
1277 if (*s == '\\' && s+1 < send) {
1278 s++;
02aa26ce
NT
1279
1280 /* some backslashes we leave behind */
c9f97d15 1281 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1282 *d++ = '\\';
1283 *d++ = *s++;
1284 continue;
1285 }
02aa26ce
NT
1286
1287 /* deprecate \1 in strings and substitution replacements */
3280af22 1288 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1289 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1290 {
d008e5eb 1291 dTHR; /* only for ckWARN */
599cee73 1292 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1293 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1294 *--s = '$';
1295 break;
1296 }
02aa26ce
NT
1297
1298 /* string-change backslash escapes */
3280af22 1299 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1300 --s;
1301 break;
1302 }
02aa26ce
NT
1303
1304 /* if we get here, it's either a quoted -, or a digit */
79072805 1305 switch (*s) {
02aa26ce
NT
1306
1307 /* quoted - in transliterations */
79072805 1308 case '-':
3280af22 1309 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1310 *d++ = *s++;
1311 continue;
1312 }
1313 /* FALL THROUGH */
1314 default:
11b8faa4
JH
1315 {
1316 dTHR;
1317 if (ckWARN(WARN_UNSAFE) && isALPHA(*s))
cea2e8a9 1318 Perl_warner(aTHX_ WARN_UNSAFE,
11b8faa4
JH
1319 "Unrecognized escape \\%c passed through",
1320 *s);
1321 /* default action is to copy the quoted character */
1322 *d++ = *s++;
1323 continue;
1324 }
02aa26ce
NT
1325
1326 /* \132 indicates an octal constant */
79072805
LW
1327 case '0': case '1': case '2': case '3':
1328 case '4': case '5': case '6': case '7':
dff6d3cd 1329 *d++ = (char)scan_oct(s, 3, &len);
79072805
LW
1330 s += len;
1331 continue;
02aa26ce
NT
1332
1333 /* \x24 indicates a hex constant */
79072805 1334 case 'x':
a0ed51b3
LW
1335 ++s;
1336 if (*s == '{') {
1337 char* e = strchr(s, '}');
1338
adaeee49 1339 if (!e) {
a0ed51b3 1340 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1341 e = s;
1342 }
d008e5eb
GS
1343 if (!utf) {
1344 dTHR;
1345 if (ckWARN(WARN_UTF8))
cea2e8a9 1346 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1347 "Use of \\x{} without utf8 declaration");
1348 }
a0ed51b3 1349 /* note: utf always shorter than hex */
dfe13c55 1350 d = (char*)uv_to_utf8((U8*)d,
dff6d3cd 1351 (UV)scan_hex(s + 1, e - s - 1, &len));
a0ed51b3 1352 s = e + 1;
a0ed51b3
LW
1353 }
1354 else {
1355 UV uv = (UV)scan_hex(s, 2, &len);
1356 if (utf && PL_lex_inwhat == OP_TRANS &&
1357 utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1358 {
dfe13c55 1359 d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
a0ed51b3
LW
1360 }
1361 else {
d008e5eb
GS
1362 if (uv >= 127 && UTF) {
1363 dTHR;
1364 if (ckWARN(WARN_UTF8))
cea2e8a9 1365 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1366 "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
1367 len,s,len,s);
1368 }
a0ed51b3
LW
1369 *d++ = (char)uv;
1370 }
1371 s += len;
1372 }
79072805 1373 continue;
02aa26ce 1374
4a2d328f
IZ
1375 /* \N{latin small letter a} is a named character */
1376 case 'N':
423cee85
JH
1377 ++s;
1378 if (*s == '{') {
1379 char* e = strchr(s, '}');
1380 HV *hv;
1381 SV **svp;
1382 SV *res, *cv;
1383 STRLEN len;
1384 char *str;
1385 char *why = Nullch;
1386
1387 if (!e) {
5777a3f7 1388 yyerror("Missing right brace on \\N{}");
423cee85
JH
1389 e = s - 1;
1390 goto cont_scan;
1391 }
1392 res = newSVpvn(s + 1, e - s - 1);
1393 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1394 res, Nullsv, "\\N{...}" );
423cee85
JH
1395 str = SvPV(res,len);
1396 if (len > e - s + 4) {
1397 char *odest = SvPVX(sv);
1398
1399 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1400 d = SvPVX(sv) + (d - odest);
1401 }
1402 Copy(str, d, len, char);
1403 d += len;
1404 SvREFCNT_dec(res);
1405 cont_scan:
1406 s = e + 1;
1407 }
1408 else
5777a3f7 1409 yyerror("Missing braces on \\N{}");
423cee85
JH
1410 continue;
1411
02aa26ce 1412 /* \c is a control character */
79072805
LW
1413 case 'c':
1414 s++;
9d116dd7
JH
1415#ifdef EBCDIC
1416 *d = *s++;
1417 if (isLOWER(*d))
1418 *d = toUPPER(*d);
1419 *d++ = toCTRL(*d);
1420#else
bbce6d69
PP
1421 len = *s++;
1422 *d++ = toCTRL(len);
9d116dd7 1423#endif
79072805 1424 continue;
02aa26ce
NT
1425
1426 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1427 case 'b':
1428 *d++ = '\b';
1429 break;
1430 case 'n':
1431 *d++ = '\n';
1432 break;
1433 case 'r':
1434 *d++ = '\r';
1435 break;
1436 case 'f':
1437 *d++ = '\f';
1438 break;
1439 case 't':
1440 *d++ = '\t';
1441 break;
34a3fe2a
PP
1442#ifdef EBCDIC
1443 case 'e':
1444 *d++ = '\047'; /* CP 1047 */
1445 break;
1446 case 'a':
1447 *d++ = '\057'; /* CP 1047 */
1448 break;
1449#else
79072805
LW
1450 case 'e':
1451 *d++ = '\033';
1452 break;
1453 case 'a':
1454 *d++ = '\007';
1455 break;
34a3fe2a 1456#endif
02aa26ce
NT
1457 } /* end switch */
1458
79072805
LW
1459 s++;
1460 continue;
02aa26ce
NT
1461 } /* end if (backslash) */
1462
79072805 1463 *d++ = *s++;
02aa26ce
NT
1464 } /* while loop to process each character */
1465
1466 /* terminate the string and set up the sv */
79072805 1467 *d = '\0';
463ee0b2 1468 SvCUR_set(sv, d - SvPVX(sv));
79072805
LW
1469 SvPOK_on(sv);
1470
02aa26ce 1471 /* shrink the sv if we allocated more than we used */
79072805
LW
1472 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1473 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1474 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1475 }
02aa26ce 1476
9b599b2a 1477 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1478 if (s > PL_bufptr) {
1479 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1480 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1481 sv, Nullsv,
3280af22 1482 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1483 ? "tr"
3280af22 1484 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1485 ? "s"
1486 : "qq")));
79072805 1487 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1488 } else
8990e307 1489 SvREFCNT_dec(sv);
79072805
LW
1490 return s;
1491}
1492
ffb4593c
NT
1493/* S_intuit_more
1494 * Returns TRUE if there's more to the expression (e.g., a subscript),
1495 * FALSE otherwise.
ffb4593c
NT
1496 *
1497 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1498 *
1499 * ->[ and ->{ return TRUE
1500 * { and [ outside a pattern are always subscripts, so return TRUE
1501 * if we're outside a pattern and it's not { or [, then return FALSE
1502 * if we're in a pattern and the first char is a {
1503 * {4,5} (any digits around the comma) returns FALSE
1504 * if we're in a pattern and the first char is a [
1505 * [] returns FALSE
1506 * [SOMETHING] has a funky algorithm to decide whether it's a
1507 * character class or not. It has to deal with things like
1508 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1509 * anything else returns TRUE
1510 */
1511
9cbb5ea2
GS
1512/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1513
76e3520e 1514STATIC int
cea2e8a9 1515S_intuit_more(pTHX_ register char *s)
79072805 1516{
3280af22 1517 if (PL_lex_brackets)
79072805
LW
1518 return TRUE;
1519 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1520 return TRUE;
1521 if (*s != '{' && *s != '[')
1522 return FALSE;
3280af22 1523 if (!PL_lex_inpat)
79072805
LW
1524 return TRUE;
1525
1526 /* In a pattern, so maybe we have {n,m}. */
1527 if (*s == '{') {
1528 s++;
1529 if (!isDIGIT(*s))
1530 return TRUE;
1531 while (isDIGIT(*s))
1532 s++;
1533 if (*s == ',')
1534 s++;
1535 while (isDIGIT(*s))
1536 s++;
1537 if (*s == '}')
1538 return FALSE;
1539 return TRUE;
1540
1541 }
1542
1543 /* On the other hand, maybe we have a character class */
1544
1545 s++;
1546 if (*s == ']' || *s == '^')
1547 return FALSE;
1548 else {
ffb4593c 1549 /* this is terrifying, and it works */
79072805
LW
1550 int weight = 2; /* let's weigh the evidence */
1551 char seen[256];
f27ffc4a 1552 unsigned char un_char = 255, last_un_char;
93a17b20 1553 char *send = strchr(s,']');
3280af22 1554 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1555
1556 if (!send) /* has to be an expression */
1557 return TRUE;
1558
1559 Zero(seen,256,char);
1560 if (*s == '$')
1561 weight -= 3;
1562 else if (isDIGIT(*s)) {
1563 if (s[1] != ']') {
1564 if (isDIGIT(s[1]) && s[2] == ']')
1565 weight -= 10;
1566 }
1567 else
1568 weight -= 100;
1569 }
1570 for (; s < send; s++) {
1571 last_un_char = un_char;
1572 un_char = (unsigned char)*s;
1573 switch (*s) {
1574 case '@':
1575 case '&':
1576 case '$':
1577 weight -= seen[un_char] * 10;
834a4ddd 1578 if (isALNUM_lazy(s+1)) {
8903cb82 1579 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1580 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1581 weight -= 100;
1582 else
1583 weight -= 10;
1584 }
1585 else if (*s == '$' && s[1] &&
93a17b20
LW
1586 strchr("[#!%*<>()-=",s[1])) {
1587 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1588 weight -= 10;
1589 else
1590 weight -= 1;
1591 }
1592 break;
1593 case '\\':
1594 un_char = 254;
1595 if (s[1]) {
93a17b20 1596 if (strchr("wds]",s[1]))
79072805
LW
1597 weight += 100;
1598 else if (seen['\''] || seen['"'])
1599 weight += 1;
93a17b20 1600 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1601 weight += 40;
1602 else if (isDIGIT(s[1])) {
1603 weight += 40;
1604 while (s[1] && isDIGIT(s[1]))
1605 s++;
1606 }
1607 }
1608 else
1609 weight += 100;
1610 break;
1611 case '-':
1612 if (s[1] == '\\')
1613 weight += 50;
93a17b20 1614 if (strchr("aA01! ",last_un_char))
79072805 1615 weight += 30;
93a17b20 1616 if (strchr("zZ79~",s[1]))
79072805 1617 weight += 30;
f27ffc4a
GS
1618 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1619 weight -= 5; /* cope with negative subscript */
79072805
LW
1620 break;
1621 default:
93a17b20 1622 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1623 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1624 char *d = tmpbuf;
1625 while (isALPHA(*s))
1626 *d++ = *s++;
1627 *d = '\0';
1628 if (keyword(tmpbuf, d - tmpbuf))
1629 weight -= 150;
1630 }
1631 if (un_char == last_un_char + 1)
1632 weight += 5;
1633 weight -= seen[un_char];
1634 break;
1635 }
1636 seen[un_char]++;
1637 }
1638 if (weight >= 0) /* probably a character class */
1639 return FALSE;
1640 }
1641
1642 return TRUE;
1643}
ffed7fef 1644
ffb4593c
NT
1645/*
1646 * S_intuit_method
1647 *
1648 * Does all the checking to disambiguate
1649 * foo bar
1650 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1651 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1652 *
1653 * First argument is the stuff after the first token, e.g. "bar".
1654 *
1655 * Not a method if bar is a filehandle.
1656 * Not a method if foo is a subroutine prototyped to take a filehandle.
1657 * Not a method if it's really "Foo $bar"
1658 * Method if it's "foo $bar"
1659 * Not a method if it's really "print foo $bar"
1660 * Method if it's really "foo package::" (interpreted as package->foo)
1661 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
1662 * Not a method if bar is a filehandle or package, but is quotd with
1663 * =>
1664 */
1665
76e3520e 1666STATIC int
cea2e8a9 1667S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1668{
1669 char *s = start + (*start == '$');
3280af22 1670 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1671 STRLEN len;
1672 GV* indirgv;
1673
1674 if (gv) {
b6c543e3 1675 CV *cv;
a0d0e21e
LW
1676 if (GvIO(gv))
1677 return 0;
b6c543e3
IZ
1678 if ((cv = GvCVu(gv))) {
1679 char *proto = SvPVX(cv);
1680 if (proto) {
1681 if (*proto == ';')
1682 proto++;
1683 if (*proto == '*')
1684 return 0;
1685 }
1686 } else
a0d0e21e
LW
1687 gv = 0;
1688 }
8903cb82 1689 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1690 /* start is the beginning of the possible filehandle/object,
1691 * and s is the end of it
1692 * tmpbuf is a copy of it
1693 */
1694
a0d0e21e 1695 if (*start == '$') {
3280af22 1696 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1697 return 0;
1698 s = skipspace(s);
3280af22
NIS
1699 PL_bufptr = start;
1700 PL_expect = XREF;
a0d0e21e
LW
1701 return *s == '(' ? FUNCMETH : METHOD;
1702 }
1703 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1704 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1705 len -= 2;
1706 tmpbuf[len] = '\0';
1707 goto bare_package;
1708 }
1709 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1710 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1711 return 0;
1712 /* filehandle or package name makes it a method */
89bfa8cd 1713 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1714 s = skipspace(s);
3280af22 1715 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1716 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1717 bare_package:
3280af22 1718 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1719 newSVpvn(tmpbuf,len));
3280af22
NIS
1720 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1721 PL_expect = XTERM;
a0d0e21e 1722 force_next(WORD);
3280af22 1723 PL_bufptr = s;
a0d0e21e
LW
1724 return *s == '(' ? FUNCMETH : METHOD;
1725 }
1726 }
1727 return 0;
1728}
1729
ffb4593c
NT
1730/*
1731 * S_incl_perldb
1732 * Return a string of Perl code to load the debugger. If PERL5DB
1733 * is set, it will return the contents of that, otherwise a
1734 * compile-time require of perl5db.pl.
1735 */
1736
76e3520e 1737STATIC char*
cea2e8a9 1738S_incl_perldb(pTHX)
a0d0e21e 1739{
3280af22 1740 if (PL_perldb) {
76e3520e 1741 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1742
1743 if (pdb)
1744 return pdb;
61bb5906 1745 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1746 return "BEGIN { require 'perl5db.pl' }";
1747 }
1748 return "";
1749}
1750
1751
16d20bd9
AD
1752/* Encoded script support. filter_add() effectively inserts a
1753 * 'pre-processing' function into the current source input stream.
1754 * Note that the filter function only applies to the current source file
1755 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1756 *
1757 * The datasv parameter (which may be NULL) can be used to pass
1758 * private data to this instance of the filter. The filter function
1759 * can recover the SV using the FILTER_DATA macro and use it to
1760 * store private buffers and state information.
1761 *
1762 * The supplied datasv parameter is upgraded to a PVIO type
e0c19803
GS
1763 * and the IoDIRP field is used to store the function pointer,
1764 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1765 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1766 * private use must be set using malloc'd pointers.
1767 */
16d20bd9
AD
1768
1769SV *
864dbfa3 1770Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1771{
f4c556ac
GS
1772 if (!funcp)
1773 return Nullsv;
1774
3280af22
NIS
1775 if (!PL_rsfp_filters)
1776 PL_rsfp_filters = newAV();
16d20bd9 1777 if (!datasv)
8c52afec 1778 datasv = NEWSV(255,0);
16d20bd9 1779 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1780 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1781 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
e0c19803 1782 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1783 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1784 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1785 av_unshift(PL_rsfp_filters, 1);
1786 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1787 return(datasv);
1788}
1789
1790
1791/* Delete most recently added instance of this filter function. */
a0d0e21e 1792void
864dbfa3 1793Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1794{
e0c19803 1795 SV *datasv;
f4c556ac 1796 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1797 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1798 return;
1799 /* if filter is on top of stack (usual case) just pop it off */
e0c19803
GS
1800 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1801 if (IoDIRP(datasv) == (DIR*)funcp) {
1802 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
1803 IoDIRP(datasv) = (DIR*)NULL;
3280af22 1804 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1805
16d20bd9
AD
1806 return;
1807 }
1808 /* we need to search for the correct entry and clear it */
cea2e8a9 1809 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1810}
1811
1812
1813/* Invoke the n'th filter function for the current rsfp. */
1814I32
864dbfa3 1815Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1816
1817
1818 /* 0 = read one text line */
a0d0e21e 1819{
16d20bd9
AD
1820 filter_t funcp;
1821 SV *datasv = NULL;
e50aee73 1822
3280af22 1823 if (!PL_rsfp_filters)
16d20bd9 1824 return -1;
3280af22 1825 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1826 /* Provide a default input filter to make life easy. */
1827 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1828 DEBUG_P(PerlIO_printf(Perl_debug_log,
1829 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1830 if (maxlen) {
1831 /* Want a block */
1832 int len ;
1833 int old_len = SvCUR(buf_sv) ;
1834
1835 /* ensure buf_sv is large enough */
1836 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1837 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1838 if (PerlIO_error(PL_rsfp))
37120919
AD
1839 return -1; /* error */
1840 else
1841 return 0 ; /* end of file */
1842 }
16d20bd9
AD
1843 SvCUR_set(buf_sv, old_len + len) ;
1844 } else {
1845 /* Want a line */
3280af22
NIS
1846 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1847 if (PerlIO_error(PL_rsfp))
37120919
AD
1848 return -1; /* error */
1849 else
1850 return 0 ; /* end of file */
1851 }
16d20bd9
AD
1852 }
1853 return SvCUR(buf_sv);
1854 }
1855 /* Skip this filter slot if filter has been deleted */
3280af22 1856 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1857 DEBUG_P(PerlIO_printf(Perl_debug_log,
1858 "filter_read %d: skipped (filter deleted)\n",
1859 idx));
16d20bd9
AD
1860 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1861 }
1862 /* Get function pointer hidden within datasv */
1863 funcp = (filter_t)IoDIRP(datasv);
f4c556ac
GS
1864 DEBUG_P(PerlIO_printf(Perl_debug_log,
1865 "filter_read %d: via function %p (%s)\n",
1866 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1867 /* Call function. The function is expected to */
1868 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1869 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1870 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1871}
1872
76e3520e 1873STATIC char *
cea2e8a9 1874S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1875{
a868473f 1876#ifdef WIN32FILTER
3280af22 1877 if (!PL_rsfp_filters) {
a868473f
NIS
1878 filter_add(win32_textfilter,NULL);
1879 }
1880#endif
3280af22 1881 if (PL_rsfp_filters) {
16d20bd9 1882
55497cff
PP
1883 if (!append)
1884 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1885 if (FILTER_READ(0, sv, 0) > 0)
1886 return ( SvPVX(sv) ) ;
1887 else
1888 return Nullch ;
1889 }
9d116dd7 1890 else
fd049845 1891 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1892}
1893
1894
748a9306
LW
1895#ifdef DEBUGGING
1896 static char* exp_name[] =
09bef843
SB
1897 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
1898 "ATTRTERM", "TERMBLOCK"
1899 };
748a9306 1900#endif
463ee0b2 1901
02aa26ce
NT
1902/*
1903 yylex
1904
1905 Works out what to call the token just pulled out of the input
1906 stream. The yacc parser takes care of taking the ops we return and
1907 stitching them into a tree.
1908
1909 Returns:
1910 PRIVATEREF
1911
1912 Structure:
1913 if read an identifier
1914 if we're in a my declaration
1915 croak if they tried to say my($foo::bar)
1916 build the ops for a my() declaration
1917 if it's an access to a my() variable
1918 are we in a sort block?
1919 croak if my($a); $a <=> $b
1920 build ops for access to a my() variable
1921 if in a dq string, and they've said @foo and we can't find @foo
1922 croak
1923 build ops for a bareword
1924 if we already built the token before, use it.
1925*/
1926
864dbfa3
GS
1927int
1928#ifdef USE_PURE_BISON
cea2e8a9 1929Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1930#else
cea2e8a9 1931Perl_yylex(pTHX)
864dbfa3 1932#endif
378cc40b 1933{
11343788 1934 dTHR;
79072805 1935 register char *s;
378cc40b 1936 register char *d;
79072805 1937 register I32 tmp;
463ee0b2 1938 STRLEN len;
161b471a
NIS
1939 GV *gv = Nullgv;
1940 GV **gvp = 0;
a687059c 1941
a1a0e61e
TD
1942#ifdef USE_PURE_BISON
1943 yylval_pointer = lvalp;
1944 yychar_pointer = lcharp;
1945#endif
1946
02aa26ce 1947 /* check if there's an identifier for us to look at */
3280af22 1948 if (PL_pending_ident) {
02aa26ce 1949 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
1950 char pit = PL_pending_ident;
1951 PL_pending_ident = 0;
bbce6d69 1952
02aa26ce
NT
1953 /* if we're in a my(), we can't allow dynamics here.
1954 $foo'bar has already been turned into $foo::bar, so
1955 just check for colons.
1956
1957 if it's a legal name, the OP is a PADANY.
1958 */
3280af22 1959 if (PL_in_my) {
77ca0c92
LW
1960 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1961 tmp = pad_allocmy(PL_tokenbuf);
1962 }
1963 else {
1964 if (strchr(PL_tokenbuf,':'))
1965 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 1966
77ca0c92
LW
1967 yylval.opval = newOP(OP_PADANY, 0);
1968 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
1969 return PRIVATEREF;
1970 }
bbce6d69
PP
1971 }
1972
02aa26ce
NT
1973 /*
1974 build the ops for accesses to a my() variable.
1975
1976 Deny my($a) or my($b) in a sort block, *if* $a or $b is
1977 then used in a comparison. This catches most, but not
1978 all cases. For instance, it catches
1979 sort { my($a); $a <=> $b }
1980 but not
1981 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
1982 (although why you'd do that is anyone's guess).
1983 */
1984
3280af22 1985 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 1986#ifdef USE_THREADS
54b9620d 1987 /* Check for single character per-thread SVs */
3280af22
NIS
1988 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
1989 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
1990 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 1991 {
2faa37cc 1992 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
1993 yylval.opval->op_targ = tmp;
1994 return PRIVATEREF;
1995 }
1996#endif /* USE_THREADS */
3280af22 1997 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
77ca0c92
LW
1998 /* might be an "our" variable" */
1999 if (SvFLAGS(AvARRAY(PL_comppad_name)[tmp]) & SVpad_OUR) {
2000 /* build ops for a bareword */
2001 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
2002 yylval.opval->op_private = OPpCONST_ENTERED;
2003 gv_fetchpv(PL_tokenbuf+1,
2004 (PL_in_eval
2005 ? (GV_ADDMULTI | GV_ADDINEVAL | GV_ADDOUR)
2006 : GV_ADDOUR
2007 ),
2008 ((PL_tokenbuf[0] == '$') ? SVt_PV
2009 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2010 : SVt_PVHV));
2011 return WORD;
2012 }
2013
02aa26ce 2014 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2015 if (PL_last_lop_op == OP_SORT &&
2016 PL_tokenbuf[0] == '$' &&
2017 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2018 && !PL_tokenbuf[2])
bbce6d69 2019 {
3280af22
NIS
2020 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2021 d < PL_bufend && *d != '\n';
a863c7d1
MB
2022 d++)
2023 {
2024 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2025 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2026 PL_tokenbuf);
a863c7d1 2027 }
bbce6d69
PP
2028 }
2029 }
bbce6d69 2030
a863c7d1
MB
2031 yylval.opval = newOP(OP_PADANY, 0);
2032 yylval.opval->op_targ = tmp;
2033 return PRIVATEREF;
2034 }
bbce6d69
PP
2035 }
2036
02aa26ce
NT
2037 /*
2038 Whine if they've said @foo in a doublequoted string,
2039 and @foo isn't a variable we can find in the symbol
2040 table.
2041 */
3280af22
NIS
2042 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2043 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2044 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2045 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2046 PL_tokenbuf, PL_tokenbuf));
bbce6d69
PP
2047 }
2048
02aa26ce 2049 /* build ops for a bareword */
3280af22 2050 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2051 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2052 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2053 ((PL_tokenbuf[0] == '$') ? SVt_PV
2054 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2055 : SVt_PVHV));
2056 return WORD;
2057 }
2058
02aa26ce
NT
2059 /* no identifier pending identification */
2060
3280af22 2061 switch (PL_lex_state) {
79072805
LW
2062#ifdef COMMENTARY
2063 case LEX_NORMAL: /* Some compilers will produce faster */
2064 case LEX_INTERPNORMAL: /* code if we comment these out. */
2065 break;
2066#endif
2067
09bef843 2068 /* when we've already built the next token, just pull it out of the queue */
79072805 2069 case LEX_KNOWNEXT:
3280af22
NIS
2070 PL_nexttoke--;
2071 yylval = PL_nextval[PL_nexttoke];
2072 if (!PL_nexttoke) {
2073 PL_lex_state = PL_lex_defer;
2074 PL_expect = PL_lex_expect;
2075 PL_lex_defer = LEX_NORMAL;
463ee0b2 2076 }
3280af22 2077 return(PL_nexttype[PL_nexttoke]);
79072805 2078
02aa26ce 2079 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2080 when we get here, PL_bufptr is at the \
02aa26ce 2081 */
79072805
LW
2082 case LEX_INTERPCASEMOD:
2083#ifdef DEBUGGING
3280af22 2084 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2085 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2086#endif
02aa26ce 2087 /* handle \E or end of string */
3280af22 2088 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2089 char oldmod;
02aa26ce
NT
2090
2091 /* if at a \E */
3280af22
NIS
2092 if (PL_lex_casemods) {
2093 oldmod = PL_lex_casestack[--PL_lex_casemods];
2094 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2095
3280af22
NIS
2096 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2097 PL_bufptr += 2;
2098 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2099 }
79072805
LW
2100 return ')';
2101 }
3280af22
NIS
2102 if (PL_bufptr != PL_bufend)
2103 PL_bufptr += 2;
2104 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2105 return yylex();
79072805
LW
2106 }
2107 else {
3280af22 2108 s = PL_bufptr + 1;
79072805
LW
2109 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2110 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2111 if (strchr("LU", *s) &&
3280af22 2112 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2113 {
3280af22 2114 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2115 return ')';
2116 }
3280af22
NIS
2117 if (PL_lex_casemods > 10) {
2118 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2119 if (newlb != PL_lex_casestack) {
a0d0e21e 2120 SAVEFREEPV(newlb);
3280af22 2121 PL_lex_casestack = newlb;
a0d0e21e
LW
2122 }
2123 }
3280af22
NIS
2124 PL_lex_casestack[PL_lex_casemods++] = *s;
2125 PL_lex_casestack[PL_lex_casemods] = '\0';
2126 PL_lex_state = LEX_INTERPCONCAT;
2127 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2128 force_next('(');
2129 if (*s == 'l')
3280af22 2130 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2131 else if (*s == 'u')
3280af22 2132 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2133 else if (*s == 'L')
3280af22 2134 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2135 else if (*s == 'U')
3280af22 2136 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2137 else if (*s == 'Q')
3280af22 2138 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2139 else
cea2e8a9 2140 Perl_croak(aTHX_ "panic: yylex");
3280af22 2141 PL_bufptr = s + 1;
79072805 2142 force_next(FUNC);
3280af22
NIS
2143 if (PL_lex_starts) {
2144 s = PL_bufptr;
2145 PL_lex_starts = 0;
79072805
LW
2146 Aop(OP_CONCAT);
2147 }
2148 else
cea2e8a9 2149 return yylex();
79072805
LW
2150 }
2151
55497cff
PP
2152 case LEX_INTERPPUSH:
2153 return sublex_push();
2154
79072805 2155 case LEX_INTERPSTART:
3280af22 2156 if (PL_bufptr == PL_bufend)
79072805 2157 return sublex_done();
3280af22
NIS
2158 PL_expect = XTERM;
2159 PL_lex_dojoin = (*PL_bufptr == '@');
2160 PL_lex_state = LEX_INTERPNORMAL;
2161 if (PL_lex_dojoin) {
2162 PL_nextval[PL_nexttoke].ival = 0;
79072805 2163 force_next(',');
554b3eca 2164#ifdef USE_THREADS
533c011a
NIS
2165 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2166 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2167 force_next(PRIVATEREF);
2168#else
a0d0e21e 2169 force_ident("\"", '$');
554b3eca 2170#endif /* USE_THREADS */
3280af22 2171 PL_nextval[PL_nexttoke].ival = 0;
79072805 2172 force_next('$');
3280af22 2173 PL_nextval[PL_nexttoke].ival = 0;
79072805 2174 force_next('(');
3280af22 2175 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2176 force_next(FUNC);
2177 }
3280af22
NIS
2178 if (PL_lex_starts++) {
2179 s = PL_bufptr;
79072805
LW
2180 Aop(OP_CONCAT);
2181 }
cea2e8a9 2182 return yylex();
79072805
LW
2183
2184 case LEX_INTERPENDMAYBE:
3280af22
NIS
2185 if (intuit_more(PL_bufptr)) {
2186 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2187 break;
2188 }
2189 /* FALL THROUGH */
2190
2191 case LEX_INTERPEND:
3280af22
NIS
2192 if (PL_lex_dojoin) {
2193 PL_lex_dojoin = FALSE;
2194 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2195 return ')';
2196 }
43a16006 2197 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2198 && SvEVALED(PL_lex_repl))
43a16006 2199 {
e9fa98b2 2200 if (PL_bufptr != PL_bufend)
cea2e8a9 2201 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2202 PL_lex_repl = Nullsv;
2203 }
79072805
LW
2204 /* FALLTHROUGH */
2205 case LEX_INTERPCONCAT:
2206#ifdef DEBUGGING
3280af22 2207 if (PL_lex_brackets)
cea2e8a9 2208 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2209#endif
3280af22 2210 if (PL_bufptr == PL_bufend)
79072805
LW
2211 return sublex_done();
2212
3280af22
NIS
2213 if (SvIVX(PL_linestr) == '\'') {
2214 SV *sv = newSVsv(PL_linestr);
2215 if (!PL_lex_inpat)
76e3520e 2216 sv = tokeq(sv);
3280af22 2217 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2218 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2219 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2220 s = PL_bufend;
79072805
LW
2221 }
2222 else {
3280af22 2223 s = scan_const(PL_bufptr);
79072805 2224 if (*s == '\\')
3280af22 2225 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2226 else
3280af22 2227 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2228 }
2229
3280af22
NIS
2230 if (s != PL_bufptr) {
2231 PL_nextval[PL_nexttoke] = yylval;
2232 PL_expect = XTERM;
79072805 2233 force_next(THING);
3280af22 2234 if (PL_lex_starts++)
79072805
LW
2235 Aop(OP_CONCAT);
2236 else {
3280af22 2237 PL_bufptr = s;
cea2e8a9 2238 return yylex();
79072805
LW
2239 }
2240 }
2241
cea2e8a9 2242 return yylex();
a0d0e21e 2243 case LEX_FORMLINE:
3280af22
NIS
2244 PL_lex_state = LEX_NORMAL;
2245 s = scan_formline(PL_bufptr);
2246 if (!PL_lex_formbrack)
a0d0e21e
LW
2247 goto rightbracket;
2248 OPERATOR(';');
79072805
LW
2249 }
2250
3280af22
NIS
2251 s = PL_bufptr;
2252 PL_oldoldbufptr = PL_oldbufptr;
2253 PL_oldbufptr = s;
79072805 2254 DEBUG_p( {
bf49b057
GS
2255 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2256 exp_name[PL_expect], s);
79072805 2257 } )
463ee0b2
LW
2258
2259 retry:
378cc40b
LW
2260 switch (*s) {
2261 default:
834a4ddd
LW
2262 if (isIDFIRST_lazy(s))
2263 goto keylookup;
cea2e8a9 2264 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2265 case 4:
2266 case 26:
2267 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2268 case 0:
3280af22
NIS
2269 if (!PL_rsfp) {
2270 PL_last_uni = 0;
2271 PL_last_lop = 0;
2272 if (PL_lex_brackets)
d98d5fff 2273 yyerror("Missing right curly or square bracket");
79072805 2274 TOKEN(0);
463ee0b2 2275 }
3280af22 2276 if (s++ < PL_bufend)
a687059c 2277 goto retry; /* ignore stray nulls */
3280af22
NIS
2278 PL_last_uni = 0;
2279 PL_last_lop = 0;
2280 if (!PL_in_eval && !PL_preambled) {
2281 PL_preambled = TRUE;
2282 sv_setpv(PL_linestr,incl_perldb());
2283 if (SvCUR(PL_linestr))
2284 sv_catpv(PL_linestr,";");
2285 if (PL_preambleav){
2286 while(AvFILLp(PL_preambleav) >= 0) {
2287 SV *tmpsv = av_shift(PL_preambleav);
2288 sv_catsv(PL_linestr, tmpsv);
2289 sv_catpv(PL_linestr, ";");
91b7def8
PP
2290 sv_free(tmpsv);
2291 }
3280af22
NIS
2292 sv_free((SV*)PL_preambleav);
2293 PL_preambleav = NULL;
91b7def8 2294 }
3280af22
NIS
2295 if (PL_minus_n || PL_minus_p) {
2296 sv_catpv(PL_linestr, "LINE: while (<>) {");
2297 if (PL_minus_l)
2298 sv_catpv(PL_linestr,"chomp;");
2299 if (PL_minus_a) {
8fd239a7
CS
2300 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2301 if (gv)
2302 GvIMPORTED_AV_on(gv);
3280af22
NIS
2303 if (PL_minus_F) {
2304 if (strchr("/'\"", *PL_splitstr)
2305 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2306 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2307 else {
2308 char delim;
2309 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2310 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2311 delim = *s;
cea2e8a9 2312 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2313 "q" + (delim == '\''), delim);
3280af22 2314 for (s = PL_splitstr; *s; s++) {
54310121 2315 if (*s == '\\')
3280af22
NIS
2316 sv_catpvn(PL_linestr, "\\", 1);
2317 sv_catpvn(PL_linestr, s, 1);
54310121 2318 }
cea2e8a9 2319 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2320 }
2304df62
AD
2321 }
2322 else
3280af22 2323 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2324 }
79072805 2325 }
3280af22
NIS
2326 sv_catpv(PL_linestr, "\n");
2327 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2328 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2329 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2330 SV *sv = NEWSV(85,0);
2331
2332 sv_upgrade(sv, SVt_PVMG);
3280af22 2333 sv_setsv(sv,PL_linestr);
57843af0 2334 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2335 }
79072805 2336 goto retry;
a687059c 2337 }
e929a76b 2338 do {
3280af22 2339 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2340 fake_eof:
3280af22
NIS
2341 if (PL_rsfp) {
2342 if (PL_preprocess && !PL_in_eval)
2343 (void)PerlProc_pclose(PL_rsfp);
2344 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2345 PerlIO_clearerr(PL_rsfp);
395c3793 2346 else
3280af22
NIS
2347 (void)PerlIO_close(PL_rsfp);
2348 PL_rsfp = Nullfp;
4a9ae47a 2349 PL_doextract = FALSE;
395c3793 2350 }
3280af22
NIS
2351 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2352 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2353 sv_catpv(PL_linestr,";}");
2354 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2355 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2356 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2357 goto retry;
2358 }
3280af22
NIS
2359 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2360 sv_setpv(PL_linestr,"");
79072805 2361 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2362 }
3280af22 2363 if (PL_doextract) {
a0d0e21e 2364 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2365 PL_doextract = FALSE;
a0d0e21e
LW
2366
2367 /* Incest with pod. */
2368 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2369 sv_setpv(PL_linestr, "");
2370 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2371 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2372 PL_doextract = FALSE;
a0d0e21e
LW
2373 }
2374 }
463ee0b2 2375 incline(s);
3280af22
NIS
2376 } while (PL_doextract);
2377 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2378 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2379 SV *sv = NEWSV(85,0);
a687059c 2380
93a17b20 2381 sv_upgrade(sv, SVt_PVMG);
3280af22 2382 sv_setsv(sv,PL_linestr);
57843af0 2383 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2384 }
3280af22 2385 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2386 if (CopLINE(PL_curcop) == 1) {
3280af22 2387 while (s < PL_bufend && isSPACE(*s))
79072805 2388 s++;
a0d0e21e 2389 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2390 s++;
44a8e56a 2391 d = Nullch;
3280af22 2392 if (!PL_in_eval) {
44a8e56a
PP
2393 if (*s == '#' && *(s+1) == '!')
2394 d = s + 2;
2395#ifdef ALTERNATE_SHEBANG
2396 else {
2397 static char as[] = ALTERNATE_SHEBANG;
2398 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2399 d = s + (sizeof(as) - 1);
2400 }
2401#endif /* ALTERNATE_SHEBANG */
2402 }
2403 if (d) {
b8378b72 2404 char *ipath;
774d564b 2405 char *ipathend;
b8378b72 2406
774d564b 2407 while (isSPACE(*d))
b8378b72
CS
2408 d++;
2409 ipath = d;
774d564b
PP
2410 while (*d && !isSPACE(*d))
2411 d++;
2412 ipathend = d;
2413
2414#ifdef ARG_ZERO_IS_SCRIPT
2415 if (ipathend > ipath) {
2416 /*
2417 * HP-UX (at least) sets argv[0] to the script name,
2418 * which makes $^X incorrect. And Digital UNIX and Linux,
2419 * at least, set argv[0] to the basename of the Perl
2420 * interpreter. So, having found "#!", we'll set it right.
2421 */
2422 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2423 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2424 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2425 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2426 SvSETMAGIC(x);
2427 }
774d564b 2428 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2429 }
774d564b 2430#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2431
2432 /*
2433 * Look for options.
2434 */
748a9306 2435 d = instr(s,"perl -");
84e30d1a 2436 if (!d) {
748a9306 2437 d = instr(s,"perl");
84e30d1a
GS
2438#if defined(DOSISH)
2439 /* avoid getting into infinite loops when shebang
2440 * line contains "Perl" rather than "perl" */
2441 if (!d) {
2442 for (d = ipathend-4; d >= ipath; --d) {
2443 if ((*d == 'p' || *d == 'P')
2444 && !ibcmp(d, "perl", 4))
2445 {
2446 break;
2447 }
2448 }
2449 if (d < ipath)
2450 d = Nullch;
2451 }
2452#endif
2453 }
44a8e56a
PP
2454#ifdef ALTERNATE_SHEBANG
2455 /*
2456 * If the ALTERNATE_SHEBANG on this system starts with a
2457 * character that can be part of a Perl expression, then if
2458 * we see it but not "perl", we're probably looking at the
2459 * start of Perl code, not a request to hand off to some
2460 * other interpreter. Similarly, if "perl" is there, but
2461 * not in the first 'word' of the line, we assume the line
2462 * contains the start of the Perl program.
44a8e56a
PP
2463 */
2464 if (d && *s != '#') {
774d564b 2465 char *c = ipath;
44a8e56a
PP
2466 while (*c && !strchr("; \t\r\n\f\v#", *c))
2467 c++;
2468 if (c < d)
2469 d = Nullch; /* "perl" not in first word; ignore */
2470 else
2471 *s = '#'; /* Don't try to parse shebang line */
2472 }
774d564b 2473#endif /* ALTERNATE_SHEBANG */
748a9306 2474 if (!d &&
44a8e56a 2475 *s == '#' &&
774d564b 2476 ipathend > ipath &&
3280af22 2477 !PL_minus_c &&
748a9306 2478 !instr(s,"indir") &&
3280af22 2479 instr(PL_origargv[0],"perl"))
748a9306 2480 {
9f68db38 2481 char **newargv;
9f68db38 2482
774d564b
PP
2483 *ipathend = '\0';
2484 s = ipathend + 1;
3280af22 2485 while (s < PL_bufend && isSPACE(*s))
9f68db38 2486 s++;
3280af22
NIS
2487 if (s < PL_bufend) {
2488 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2489 newargv[1] = s;
3280af22 2490 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2491 s++;
2492 *s = '\0';
3280af22 2493 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2494 }
2495 else
3280af22 2496 newargv = PL_origargv;
774d564b 2497 newargv[0] = ipath;
80252599 2498 PerlProc_execv(ipath, newargv);
cea2e8a9 2499 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2500 }
748a9306 2501 if (d) {
3280af22
NIS
2502 U32 oldpdb = PL_perldb;
2503 bool oldn = PL_minus_n;
2504 bool oldp = PL_minus_p;
748a9306
LW
2505
2506 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2507 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2508
2509 if (*d++ == '-') {
8cc95fdb
PP
2510 do {
2511 if (*d == 'M' || *d == 'm') {
2512 char *m = d;
2513 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2514 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2515 (int)(d - m), m);
2516 }
2517 d = moreswitches(d);
2518 } while (d);
84902520 2519 if (PERLDB_LINE && !oldpdb ||
3280af22 2520 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b
PP
2521 /* if we have already added "LINE: while (<>) {",
2522 we must not do it again */
748a9306 2523 {
3280af22
NIS
2524 sv_setpv(PL_linestr, "");
2525 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2526 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2527 PL_preambled = FALSE;
84902520 2528 if (PERLDB_LINE)
3280af22 2529 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2530 goto retry;
2531 }
a0d0e21e 2532 }
79072805 2533 }
9f68db38 2534 }
79072805 2535 }
3280af22
NIS
2536 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2537 PL_bufptr = s;
2538 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2539 return yylex();
ae986130 2540 }
378cc40b 2541 goto retry;
4fdae800 2542 case '\r':
6a27c188 2543#ifdef PERL_STRICT_CR
cea2e8a9
GS
2544 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2545 Perl_croak(aTHX_
54310121 2546 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2547#endif
4fdae800 2548 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2549 s++;
2550 goto retry;
378cc40b 2551 case '#':
e929a76b 2552 case '\n':
3280af22
NIS
2553 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2554 d = PL_bufend;
a687059c 2555 while (s < d && *s != '\n')
378cc40b 2556 s++;
0f85fab0 2557 if (s < d)
378cc40b 2558 s++;
463ee0b2 2559 incline(s);
3280af22
NIS
2560 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2561 PL_bufptr = s;
2562 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2563 return yylex();
a687059c 2564 }
378cc40b 2565 }
a687059c 2566 else {
378cc40b 2567 *s = '\0';
3280af22 2568 PL_bufend = s;
a687059c 2569 }
378cc40b
LW
2570 goto retry;
2571 case '-':
79072805 2572 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2573 s++;
3280af22 2574 PL_bufptr = s;
748a9306
LW
2575 tmp = *s++;
2576
3280af22 2577 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2578 s++;
2579
2580 if (strnEQ(s,"=>",2)) {
3280af22 2581 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2582 OPERATOR('-'); /* unary minus */
2583 }
3280af22
NIS
2584 PL_last_uni = PL_oldbufptr;
2585 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2586 switch (tmp) {
79072805
LW
2587 case 'r': FTST(OP_FTEREAD);
2588 case 'w': FTST(OP_FTEWRITE);
2589 case 'x': FTST(OP_FTEEXEC);
2590 case 'o': FTST(OP_FTEOWNED);
2591 case 'R': FTST(OP_FTRREAD);
2592 case 'W': FTST(OP_FTRWRITE);
2593 case 'X': FTST(OP_FTREXEC);
2594 case 'O': FTST(OP_FTROWNED);
2595 case 'e': FTST(OP_FTIS);
2596 case 'z': FTST(OP_FTZERO);
2597 case 's': FTST(OP_FTSIZE);
2598 case 'f': FTST(OP_FTFILE);
2599 case 'd': FTST(OP_FTDIR);
2600 case 'l': FTST(OP_FTLINK);
2601 case 'p': FTST(OP_FTPIPE);
2602 case 'S': FTST(OP_FTSOCK);
2603 case 'u': FTST(OP_FTSUID);
2604 case 'g': FTST(OP_FTSGID);
2605 case 'k': FTST(OP_FTSVTX);
2606 case 'b': FTST(OP_FTBLK);
2607 case 'c': FTST(OP_FTCHR);
2608 case 't': FTST(OP_FTTTY);
2609 case 'T': FTST(OP_FTTEXT);
2610 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2611 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2612 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2613 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2614 default:
cea2e8a9 2615 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2616 break;
2617 }
2618 }
a687059c
LW
2619 tmp = *s++;
2620 if (*s == tmp) {
2621 s++;
3280af22 2622 if (PL_expect == XOPERATOR)
79072805
LW
2623 TERM(POSTDEC);
2624 else
2625 OPERATOR(PREDEC);
2626 }
2627 else if (*s == '>') {
2628 s++;
2629 s = skipspace(s);
834a4ddd 2630 if (isIDFIRST_lazy(s)) {
a0d0e21e 2631 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2632 TOKEN(ARROW);
79072805 2633 }
748a9306
LW
2634 else if (*s == '$')
2635 OPERATOR(ARROW);
463ee0b2 2636 else
748a9306 2637 TERM(ARROW);
a687059c 2638 }
3280af22 2639 if (PL_expect == XOPERATOR)
79072805
LW
2640 Aop(OP_SUBTRACT);
2641 else {
3280af22 2642 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2643 check_uni();
79072805 2644 OPERATOR('-'); /* unary minus */
2f3197b3 2645 }
79072805 2646
378cc40b 2647 case '+':
a687059c
LW
2648 tmp = *s++;
2649 if (*s == tmp) {
378cc40b 2650 s++;
3280af22 2651 if (PL_expect == XOPERATOR)
79072805
LW
2652 TERM(POSTINC);
2653 else
2654 OPERATOR(PREINC);
378cc40b 2655 }
3280af22 2656 if (PL_expect == XOPERATOR)
79072805
LW
2657 Aop(OP_ADD);
2658 else {
3280af22 2659 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2660 check_uni();
a687059c 2661 OPERATOR('+');
2f3197b3 2662 }
a687059c 2663
378cc40b 2664 case '*':
3280af22
NIS
2665 if (PL_expect != XOPERATOR) {
2666 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2667 PL_expect = XOPERATOR;
2668 force_ident(PL_tokenbuf, '*');
2669 if (!*PL_tokenbuf)
a0d0e21e 2670 PREREF('*');
79072805 2671 TERM('*');
a687059c 2672 }
79072805
LW
2673 s++;
2674 if (*s == '*') {
a687059c 2675 s++;
79072805 2676 PWop(OP_POW);
a687059c 2677 }
79072805
LW
2678 Mop(OP_MULTIPLY);
2679
378cc40b 2680 case '%':
3280af22 2681 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2682 ++s;
2683 Mop(OP_MODULO);
a687059c 2684 }
3280af22
NIS
2685 PL_tokenbuf[0] = '%';
2686 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2687 if (!PL_tokenbuf[1]) {
2688 if (s == PL_bufend)
bbce6d69
PP
2689 yyerror("Final % should be \\% or %name");
2690 PREREF('%');
a687059c 2691 }
3280af22 2692 PL_pending_ident = '%';
bbce6d69 2693 TERM('%');
a687059c 2694
378cc40b 2695 case '^':
79072805 2696 s++;
a0d0e21e 2697 BOop(OP_BIT_XOR);
79072805 2698 case '[':
3280af22 2699 PL_lex_brackets++;
79072805 2700 /* FALL THROUGH */
378cc40b 2701 case '~':
378cc40b 2702 case ',':
378cc40b
LW
2703 tmp = *s++;
2704 OPERATOR(tmp);
a0d0e21e
LW
2705 case ':':
2706 if (s[1] == ':') {
2707 len = 0;
2708 goto just_a_word;
2709 }
2710 s++;
09bef843
SB
2711 switch (PL_expect) {
2712 OP *attrs;
2713 case XOPERATOR:
2714 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2715 break;
2716 PL_bufptr = s; /* update in case we back off */
2717 goto grabattrs;
2718 case XATTRBLOCK:
2719 PL_expect = XBLOCK;
2720 goto grabattrs;
2721 case XATTRTERM:
2722 PL_expect = XTERMBLOCK;
2723 grabattrs:
2724 s = skipspace(s);
2725 attrs = Nullop;
2726 while (isIDFIRST_lazy(s)) {
2727 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
2728 if (*d == '(') {
2729 d = scan_str(d,TRUE,TRUE);
2730 if (!d) {
2731 if (PL_lex_stuff) {
2732 SvREFCNT_dec(PL_lex_stuff);
2733 PL_lex_stuff = Nullsv;
2734 }
2735 /* MUST advance bufptr here to avoid bogus
2736 "at end of line" context messages from yyerror().
2737 */
2738 PL_bufptr = s + len;
2739 yyerror("Unterminated attribute parameter in attribute list");
2740 if (attrs)
2741 op_free(attrs);
2742 return 0; /* EOF indicator */
2743 }
2744 }
2745 if (PL_lex_stuff) {
2746 SV *sv = newSVpvn(s, len);
2747 sv_catsv(sv, PL_lex_stuff);
2748 attrs = append_elem(OP_LIST, attrs,
2749 newSVOP(OP_CONST, 0, sv));
2750 SvREFCNT_dec(PL_lex_stuff);
2751 PL_lex_stuff = Nullsv;
2752 }
2753 else {
2754 attrs = append_elem(OP_LIST, attrs,
2755 newSVOP(OP_CONST, 0,
2756 newSVpvn(s, len)));
2757 }
2758 s = skipspace(d);
2759 while (*s == ',')
2760 s = skipspace(s+1);
2761 }
2762 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}' for vi */
2763 if (*s != ';' && *s != tmp) {
2764 char q = ((*s == '\'') ? '"' : '\'');
2765 /* If here for an expression, and parsed no attrs, back off. */
2766 if (tmp == '=' && !attrs) {
2767 s = PL_bufptr;
2768 break;
2769 }
2770 /* MUST advance bufptr here to avoid bogus "at end of line"
2771 context messages from yyerror().
2772 */
2773 PL_bufptr = s;
2774 if (!*s)
2775 yyerror("Unterminated attribute list");
2776 else
2777 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2778 q, *s, q));
2779 if (attrs)
2780 op_free(attrs);
2781 OPERATOR(':');
2782 }
2783 if (attrs) {
2784 PL_nextval[PL_nexttoke].opval = attrs;
2785 force_next(THING);
2786 }
2787 TOKEN(COLONATTR);
2788 }
a0d0e21e 2789 OPERATOR(':');
8990e307
LW
2790 case '(':
2791 s++;
3280af22
NIS
2792 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2793 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2794 else
3280af22 2795 PL_expect = XTERM;
a0d0e21e 2796 TOKEN('(');
378cc40b 2797 case ';':
57843af0
GS
2798 if (CopLINE(PL_curcop) < PL_copline)
2799 PL_copline = CopLINE(PL_curcop);
378cc40b
LW
2800 tmp = *s++;
2801 OPERATOR(tmp);
2802 case ')':
378cc40b 2803 tmp = *s++;
16d20bd9
AD
2804 s = skipspace(s);
2805 if (*s == '{')
2806 PREBLOCK(tmp);
378cc40b 2807 TERM(tmp);
79072805
LW
2808 case ']':
2809 s++;
3280af22 2810 if (PL_lex_brackets <= 0)
d98d5fff 2811 yyerror("Unmatched right square bracket");
463ee0b2 2812 else
3280af22
NIS
2813 --PL_lex_brackets;
2814 if (PL_lex_state == LEX_INTERPNORMAL) {
2815 if (PL_lex_brackets == 0) {
a0d0e21e 2816 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2817 PL_lex_state = LEX_INTERPEND;
79072805
LW
2818 }
2819 }
4633a7c4 2820 TERM(']');
79072805
LW
2821 case '{':
2822 leftbracket:
79072805 2823 s++;
3280af22
NIS
2824 if (PL_lex_brackets > 100) {
2825 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2826 if (newlb != PL_lex_brackstack) {
8990e307 2827 SAVEFREEPV(newlb);
3280af22 2828 PL_lex_brackstack = newlb;
8990e307
LW
2829 }
2830 }
3280af22 2831 switch (PL_expect) {
a0d0e21e 2832 case XTERM:
3280af22 2833 if (PL_lex_formbrack) {
a0d0e21e
LW
2834 s--;
2835 PRETERMBLOCK(DO);
2836 }
3280af22
NIS
2837 if (PL_oldoldbufptr == PL_last_lop)
2838 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2839 else
3280af22 2840 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2841 OPERATOR(HASHBRACK);
a0d0e21e 2842 case XOPERATOR:
3280af22 2843 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2844 s++;
44a8e56a 2845 d = s;
3280af22
NIS
2846 PL_tokenbuf[0] = '\0';
2847 if (d < PL_bufend && *d == '-') {
2848 PL_tokenbuf[0] = '-';
44a8e56a 2849 d++;
3280af22 2850 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a
PP
2851 d++;
2852 }
834a4ddd 2853 if (d < PL_bufend && isIDFIRST_lazy(d)) {
3280af22 2854 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2855 FALSE, &len);
3280af22 2856 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2857 d++;
2858 if (*d == '}') {
3280af22 2859 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
2860 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2861 if (minus)
2862 force_next('-');
748a9306
LW
2863 }
2864 }
2865 /* FALL THROUGH */
09bef843 2866 case XATTRBLOCK:
748a9306 2867 case XBLOCK:
3280af22
NIS
2868 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2869 PL_expect = XSTATE;
a0d0e21e 2870 break;
09bef843 2871 case XATTRTERM:
a0d0e21e 2872 case XTERMBLOCK:
3280af22
NIS
2873 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2874 PL_expect = XSTATE;
a0d0e21e
LW
2875 break;
2876 default: {
2877 char *t;
3280af22
NIS
2878 if (PL_oldoldbufptr == PL_last_lop)
2879 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2880 else
3280af22 2881 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2882 s = skipspace(s);
09ecc4b6 2883 if (*s == '}')
a0d0e21e 2884 OPERATOR(HASHBRACK);
b8a4b1be
GS
2885 /* This hack serves to disambiguate a pair of curlies
2886 * as being a block or an anon hash. Normally, expectation
2887 * determines that, but in cases where we're not in a
2888 * position to expect anything in particular (like inside
2889 * eval"") we have to resolve the ambiguity. This code
2890 * covers the case where the first term in the curlies is a
2891 * quoted string. Most other cases need to be explicitly
2892 * disambiguated by prepending a `+' before the opening
2893 * curly in order to force resolution as an anon hash.
2894 *
2895 * XXX should probably propagate the outer expectation
2896 * into eval"" to rely less on this hack, but that could
2897 * potentially break current behavior of eval"".
2898 * GSAR 97-07-21
2899 */
2900 t = s;
2901 if (*s == '\'' || *s == '"' || *s == '`') {
2902 /* common case: get past first string, handling escapes */
3280af22 2903 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2904 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2905 t++;
2906 t++;
a0d0e21e 2907 }
b8a4b1be 2908 else if (*s == 'q') {
3280af22 2909 if (++t < PL_bufend
b8a4b1be 2910 && (!isALNUM(*t)
3280af22 2911 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
2912 && !isALNUM(*t))))
2913 {
b8a4b1be
GS
2914 char *tmps;
2915 char open, close, term;
2916 I32 brackets = 1;
2917
3280af22 2918 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
2919 t++;
2920 term = *t;
2921 open = term;
2922 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
2923 term = tmps[5];
2924 close = term;
2925 if (open == close)
3280af22
NIS
2926 for (t++; t < PL_bufend; t++) {
2927 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 2928 t++;
6d07e5e9 2929 else if (*t == open)
b8a4b1be
GS
2930 break;
2931 }
2932 else
3280af22
NIS
2933 for (t++; t < PL_bufend; t++) {
2934 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 2935 t++;
6d07e5e9 2936 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
2937 break;
2938 else if (*t == open)
2939 brackets++;
2940 }
2941 }
2942 t++;
a0d0e21e 2943 }
0505442f
GS
2944 else if (isALNUM_lazy(t)) {
2945 t += UTF8SKIP(t);
2946 while (t < PL_bufend && isALNUM_lazy(t))
2947 t += UTF8SKIP(t);
a0d0e21e 2948 }
3280af22 2949 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 2950 t++;
b8a4b1be
GS
2951 /* if comma follows first term, call it an anon hash */
2952 /* XXX it could be a comma expression with loop modifiers */
3280af22 2953 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 2954 || (*t == '=' && t[1] == '>')))
a0d0e21e 2955 OPERATOR(HASHBRACK);
3280af22 2956 if (PL_expect == XREF)
4e4e412b 2957 PL_expect = XTERM;
a0d0e21e 2958 else {
3280af22
NIS
2959 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
2960 PL_expect = XSTATE;
a0d0e21e 2961 }
8990e307 2962 }
a0d0e21e 2963 break;
463ee0b2 2964 }
57843af0 2965 yylval.ival = CopLINE(PL_curcop);
79072805 2966 if (isSPACE(*s) || *s == '#')
3280af22 2967 PL_copline = NOLINE; /* invalidate current command line number */
79072805 2968 TOKEN('{');
378cc40b 2969 case '}':
79072805
LW
2970 rightbracket:
2971 s++;
3280af22 2972 if (PL_lex_brackets <= 0)
d98d5fff 2973 yyerror("Unmatched right curly bracket");
463ee0b2 2974 else
3280af22
NIS
2975 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
2976 if (PL_lex_brackets < PL_lex_formbrack)
2977 PL_lex_formbrack = 0;
2978 if (PL_lex_state == LEX_INTERPNORMAL) {
2979 if (PL_lex_brackets == 0) {
9059aa12
LW
2980 if (PL_expect & XFAKEBRACK) {
2981 PL_expect &= XENUMMASK;
3280af22
NIS
2982 PL_lex_state = LEX_INTERPEND;
2983 PL_bufptr = s;
cea2e8a9 2984 return yylex(); /* ignore fake brackets */
79072805 2985 }
fa83b5b6 2986 if (*s == '-' && s[1] == '>')
3280af22 2987 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 2988 else if (*s != '[' && *s != '{')
3280af22 2989 PL_lex_state = LEX_INTERPEND;
79072805
LW
2990 }
2991 }
9059aa12
LW
2992 if (PL_expect & XFAKEBRACK) {
2993 PL_expect &= XENUMMASK;
3280af22 2994 PL_bufptr = s;
cea2e8a9 2995 return yylex(); /* ignore fake brackets */
748a9306 2996 }
79072805
LW
2997 force_next('}');
2998 TOKEN(';');
378cc40b
LW
2999 case '&':
3000 s++;
3001 tmp = *s++;
3002 if (tmp == '&')
a0d0e21e 3003 AOPERATOR(ANDAND);
378cc40b 3004 s--;
3280af22 3005 if (PL_expect == XOPERATOR) {
834a4ddd 3006 if (ckWARN(WARN_SEMICOLON) && isIDFIRST_lazy(s) && PL_bufptr == PL_linestart) {
57843af0 3007 CopLINE_dec(PL_curcop);
cea2e8a9 3008 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3009 CopLINE_inc(PL_curcop);
463ee0b2 3010 }
79072805 3011 BAop(OP_BIT_AND);
463ee0b2 3012 }
79072805 3013
3280af22
NIS
3014 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3015 if (*PL_tokenbuf) {
3016 PL_expect = XOPERATOR;
3017 force_ident(PL_tokenbuf, '&');
463ee0b2 3018 }
79072805
LW
3019 else
3020 PREREF('&');
c07a80fd 3021 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3022 TERM('&');
3023
378cc40b
LW
3024 case '|':
3025 s++;
3026 tmp = *s++;
3027 if (tmp == '|')
a0d0e21e 3028 AOPERATOR(OROR);
378cc40b 3029 s--;
79072805 3030 BOop(OP_BIT_OR);
378cc40b
LW
3031 case '=':
3032 s++;
3033 tmp = *s++;
3034 if (tmp == '=')
79072805
LW
3035 Eop(OP_EQ);
3036 if (tmp == '>')
3037 OPERATOR(',');
378cc40b 3038 if (tmp == '~')
79072805 3039 PMop(OP_MATCH);
599cee73 3040 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3041 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3042 s--;
3280af22
NIS
3043 if (PL_expect == XSTATE && isALPHA(tmp) &&
3044 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3045 {
3280af22
NIS
3046 if (PL_in_eval && !PL_rsfp) {
3047 d = PL_bufend;
a5f75d66
AD
3048 while (s < d) {
3049 if (*s++ == '\n') {
3050 incline(s);
3051 if (strnEQ(s,"=cut",4)) {
3052 s = strchr(s,'\n');
3053 if (s)
3054 s++;
3055 else
3056 s = d;
3057 incline(s);
3058 goto retry;
3059 }
3060 }
3061 }
3062 goto retry;
3063 }
3280af22
NIS
3064 s = PL_bufend;
3065 PL_doextract = TRUE;
a0d0e21e
LW
3066 goto retry;
3067 }
3280af22 3068 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3069 char *t;
51882d45 3070#ifdef PERL_STRICT_CR
a0d0e21e 3071 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
3072#else
3073 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
3074#endif
a0d0e21e
LW
3075 if (*t == '\n' || *t == '#') {
3076 s--;
3280af22 3077 PL_expect = XBLOCK;
a0d0e21e
LW
3078 goto leftbracket;
3079 }
79072805 3080 }
a0d0e21e
LW
3081 yylval.ival = 0;
3082 OPERATOR(ASSIGNOP);
378cc40b
LW
3083 case '!':
3084 s++;
3085 tmp = *s++;
3086 if (tmp == '=')
79072805 3087 Eop(OP_NE);
378cc40b 3088 if (tmp == '~')
79072805 3089 PMop(OP_NOT);
378cc40b
LW
3090 s--;
3091 OPERATOR('!');
3092 case '<':
3280af22 3093 if (PL_expect != XOPERATOR) {
93a17b20 3094 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3095 check_uni();
79072805
LW
3096 if (s[1] == '<')
3097 s = scan_heredoc(s);
3098 else
3099 s = scan_inputsymbol(s);
3100 TERM(sublex_start());
378cc40b
LW
3101 }
3102 s++;
3103 tmp = *s++;
3104 if (tmp == '<')
79072805 3105 SHop(OP_LEFT_SHIFT);
395c3793
LW
3106 if (tmp == '=') {
3107 tmp = *s++;
3108 if (tmp == '>')
79072805 3109 Eop(OP_NCMP);
395c3793 3110 s--;
79072805 3111 Rop(OP_LE);
395c3793 3112 }
378cc40b 3113 s--;
79072805 3114 Rop(OP_LT);
378cc40b
LW
3115 case '>':
3116 s++;
3117 tmp = *s++;
3118 if (tmp == '>')
79072805 3119 SHop(OP_RIGHT_SHIFT);
378cc40b 3120 if (tmp == '=')
79072805 3121 Rop(OP_GE);
378cc40b 3122 s--;
79072805 3123 Rop(OP_GT);
378cc40b
LW
3124
3125 case '$':
bbce6d69
PP
3126 CLINE;
3127
3280af22
NIS
3128 if (PL_expect == XOPERATOR) {
3129 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3130 PL_expect = XTERM;
a0d0e21e 3131 depcom();
bbce6d69 3132 return ','; /* grandfather non-comma-format format */
a0d0e21e 3133 }
8990e307 3134 }
a0d0e21e 3135
834a4ddd 3136 if (s[1] == '#' && (isIDFIRST_lazy(s+2) || strchr("{$:+-", s[2]))) {
3280af22 3137 PL_tokenbuf[0] = '@';
376b8730
SM
3138 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3139 sizeof PL_tokenbuf - 1, FALSE);
3140 if (PL_expect == XOPERATOR)
3141 no_op("Array length", s);
3280af22 3142 if (!PL_tokenbuf[1])
a0d0e21e 3143 PREREF(DOLSHARP);
3280af22
NIS
3144 PL_expect = XOPERATOR;
3145 PL_pending_ident = '#';
463ee0b2 3146 TOKEN(DOLSHARP);
79072805 3147 }
bbce6d69 3148
3280af22 3149 PL_tokenbuf[0] = '$';
376b8730
SM
3150 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3151 sizeof PL_tokenbuf - 1, FALSE);
3152 if (PL_expect == XOPERATOR)
3153 no_op("Scalar", s);
3280af22
NIS
3154 if (!PL_tokenbuf[1]) {
3155 if (s == PL_bufend)
bbce6d69
PP
3156 yyerror("Final $ should be \\$ or $name");
3157 PREREF('$');
8990e307 3158 }
a0d0e21e 3159
bbce6d69 3160 /* This kludge not intended to be bulletproof. */
3280af22 3161 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3162 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3163 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69
PP
3164 yylval.opval->op_private = OPpCONST_ARYBASE;
3165 TERM(THING);
3166 }
3167
ff68c719 3168 d = s;
69d2bceb 3169 tmp = (I32)*s;
3280af22 3170 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3171 s = skipspace(s);
3172
3280af22 3173 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3174 char *t;
3175 if (*s == '[') {
3280af22 3176 PL_tokenbuf[0] = '@';
599cee73 3177 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3178 for(t = s + 1;
834a4ddd 3179 isSPACE(*t) || isALNUM_lazy(t) || *t == '$';
bbce6d69 3180 t++) ;
a0d0e21e 3181 if (*t++ == ',') {
3280af22
NIS
3182 PL_bufptr = skipspace(PL_bufptr);
3183 while (t < PL_bufend && *t != ']')
bbce6d69 3184 t++;
cea2e8a9 3185 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3186 "Multidimensional syntax %.*s not supported",
3187 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3188 }
3189 }
bbce6d69
PP
3190 }
3191 else if (*s == '{') {
3280af22 3192 PL_tokenbuf[0] = '%';
599cee73 3193 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3194 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3195 {
3280af22 3196 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3197 STRLEN len;
3198 for (t++; isSPACE(*t); t++) ;
834a4ddd 3199 if (isIDFIRST_lazy(t)) {
8903cb82 3200 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3201 for (; isSPACE(*t); t++) ;
864dbfa3 3202 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3203 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3204 "You need to quote \"%s\"", tmpbuf);
748a9306 3205 }
93a17b20
LW
3206 }
3207 }
2f3197b3 3208 }
bbce6d69 3209
3280af22 3210 PL_expect = XOPERATOR;
69d2bceb 3211 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3212 bool islop = (PL_last_lop == PL_oldoldbufptr);
3213 if (!islop || PL_last_lop_op == OP_GREPSTART)
3214 PL_expect = XOPERATOR;
bbce6d69 3215 else if (strchr("$@\"'`q", *s))
3280af22 3216 PL_expect = XTERM; /* e.g. print $fh "foo" */
834a4ddd 3217 else if (strchr("&*<%", *s) && isIDFIRST_lazy(s+1))
3280af22 3218 PL_expect = XTERM; /* e.g. print $fh &sub */
834a4ddd 3219 else if (isIDFIRST_lazy(s)) {
3280af22 3220 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3221 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3222 if (tmp = keyword(tmpbuf, len)) {
3223 /* binary operators exclude handle interpretations */
3224 switch (tmp) {
3225 case -KEY_x:
3226 case -KEY_eq:
3227 case -KEY_ne:
3228 case -KEY_gt:
3229 case -KEY_lt:
3230 case -KEY_ge:
3231 case -KEY_le:
3232 case -KEY_cmp:
3233 break;
3234 default:
3280af22 3235 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3236 break;
3237 }
3238 }
68dc0745
PP
3239 else {
3240 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3241 if (gv && GvCVu(gv))
3280af22 3242 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3243 }
93a17b20 3244 }
bbce6d69 3245 else if (isDIGIT(*s))
3280af22 3246 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3247 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3248 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3249 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3250 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3251 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3252 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3253 }
3280af22 3254 PL_pending_ident = '$';
79072805 3255 TOKEN('$');
378cc40b
LW
3256
3257 case '@':
3280af22 3258 if (PL_expect == XOPERATOR)
bbce6d69 3259 no_op("Array", s);
3280af22
NIS
3260 PL_tokenbuf[0] = '@';
3261 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3262 if (!PL_tokenbuf[1]) {
3263 if (s == PL_bufend)
bbce6d69
PP
3264 yyerror("Final @ should be \\@ or @name");
3265 PREREF('@');
3266 }
3280af22 3267 if (PL_lex_state == LEX_NORMAL)
ff68c719 3268 s = skipspace(s);
3280af22 3269 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3270 if (*s == '{')
3280af22 3271 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3272
3273 /* Warn about @ where they meant $. */
599cee73 3274 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3275 if (*s == '[' || *s == '{') {
3276 char *t = s + 1;
834a4ddd 3277 while (*t && (isALNUM_lazy(t) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3278 t++;
3279 if (*t == '}' || *t == ']') {
3280 t++;
3280af22 3281 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3282 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3283 "Scalar value %.*s better written as $%.*s",
3280af22 3284 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3285 }
93a17b20
LW
3286 }
3287 }
463ee0b2 3288 }
3280af22 3289 PL_pending_ident = '@';
79072805 3290 TERM('@');
378cc40b
LW
3291
3292 case '/': /* may either be division or pattern */
3293 case '?': /* may either be conditional or pattern */
3280af22 3294 if (PL_expect != XOPERATOR) {
c277df42 3295 /* Disable warning on "study /blah/" */
3280af22
NIS
3296 if (PL_oldoldbufptr == PL_last_uni
3297 && (*PL_last_uni != 's' || s - PL_last_uni < 5
834a4ddd 3298 || memNE(PL_last_uni, "study", 5) || isALNUM_lazy(PL_last_uni+5)))
c277df42 3299 check_uni();
8782bef2 3300 s = scan_pat(s,OP_MATCH);
79072805 3301 TERM(sublex_start());
378cc40b
LW
3302 }
3303 tmp = *s++;
a687059c 3304 if (tmp == '/')
79072805 3305 Mop(OP_DIVIDE);
378cc40b
LW
3306 OPERATOR(tmp);
3307
3308 case '.':
51882d45
GS
3309 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3310#ifdef PERL_STRICT_CR
3311 && s[1] == '\n'
3312#else
3313 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3314#endif
3315 && (s == PL_linestart || s[-1] == '\n') )
3316 {
3280af22
NIS
3317 PL_lex_formbrack = 0;
3318 PL_expect = XSTATE;
79072805
LW
3319 goto rightbracket;
3320 }
3280af22 3321 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3322 tmp = *s++;
a687059c
LW
3323 if (*s == tmp) {
3324 s++;
2f3197b3
LW
3325 if (*s == tmp) {
3326 s++;
79072805 3327 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3328 }
3329 else
79072805 3330 yylval.ival = 0;
378cc40b 3331 OPERATOR(DOTDOT);
a687059c 3332 }
3280af22 3333 if (PL_expect != XOPERATOR)
2f3197b3 3334 check_uni();
79072805 3335 Aop(OP_CONCAT);
378cc40b
LW
3336 }
3337 /* FALL THROUGH */
3338 case '0': case '1': case '2': case '3': case '4':
3339 case '5': case '6': case '7': case '8': case '9':
79072805 3340 s = scan_num(s);
3280af22 3341 if (PL_expect == XOPERATOR)
8990e307 3342 no_op("Number",s);
79072805
LW
3343 TERM(THING);
3344
3345 case '\'':
09bef843 3346 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3347 if (PL_expect == XOPERATOR) {
3348 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3349 PL_expect = XTERM;
a0d0e21e
LW
3350 depcom();
3351 return ','; /* grandfather non-comma-format format */
3352 }
463ee0b2 3353 else
8990e307 3354 no_op("String",s);
463ee0b2 3355 }
79072805 3356 if (!s)
85e6fe83 3357 missingterm((char*)0);
79072805
LW
3358 yylval.ival = OP_CONST;
3359 TERM(sublex_start());
3360
3361 case '"':
09bef843 3362 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3363 if (PL_expect == XOPERATOR) {
3364 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3365 PL_expect = XTERM;
a0d0e21e
LW
3366 depcom();
3367 return ','; /* grandfather non-comma-format format */
3368 }
463ee0b2 3369 else
8990e307 3370 no_op("String",s);
463ee0b2 3371 }
79072805 3372 if (!s)
85e6fe83 3373 missingterm((char*)0);
4633a7c4 3374 yylval.ival = OP_CONST;
3280af22 3375 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3376 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3377 yylval.ival = OP_STRINGIFY;
3378 break;
3379 }
3380 }
79072805
LW
3381 TERM(sublex_start());
3382
3383 case '`':
09bef843 3384 s = scan_str(s,FALSE,FALSE);
3280af22 3385 if (PL_expect == XOPERATOR)
8990e307 3386 no_op("Backticks",s);
79072805 3387 if (!s)
85e6fe83 3388 missingterm((char*)0);
79072805
LW
3389 yylval.ival = OP_BACKTICK;
3390 set_csh();
3391 TERM(sublex_start());
3392
3393 case '\\':
3394 s++;
599cee73 3395 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3396 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3397 *s, *s);
3280af22 3398 if (PL_expect == XOPERATOR)
8990e307 3399 no_op("Backslash",s);
79072805
LW
3400 OPERATOR(REFGEN);
3401
3402 case 'x':
3280af22 3403 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3404 s++;
3405 Mop(OP_REPEAT);
2f3197b3 3406 }
79072805
LW
3407 goto keylookup;
3408
378cc40b 3409 case '_':
79072805
LW
3410 case 'a': case 'A':
3411 case 'b': case 'B':
3412 case 'c': case 'C':
3413 case 'd': case 'D':
3414 case 'e': case 'E':
3415 case 'f': case 'F':
3416 case 'g': case 'G':
3417 case 'h': case 'H':
3418 case 'i': case 'I':
3419 case 'j': case 'J':
3420 case 'k': case 'K':
3421 case 'l': case 'L':
3422 case 'm': case 'M':
3423 case 'n': case 'N':
3424 case 'o': case 'O':
3425 case 'p': case 'P':
3426 case 'q': case 'Q':
3427 case 'r': case 'R':
3428 case 's': case 'S':
3429 case 't': case 'T':
3430 case 'u': case 'U':
3431 case 'v': case 'V':
3432 case 'w': case 'W':
3433 case 'X':
3434 case 'y': case 'Y':
3435 case 'z': case 'Z':
3436
49dc05e3 3437 keylookup: {
2d8e6c8d 3438 STRLEN n_a;
161b471a
NIS
3439 gv = Nullgv;
3440 gvp = 0;
49dc05e3 3441
3280af22
NIS
3442 PL_bufptr = s;
3443 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01
PP
3444
3445 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3446 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3447 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3448 (PL_tokenbuf[0] == 'q' &&
3449 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01
PP
3450
3451 /* x::* is just a word, unless x is "CORE" */
3280af22 3452 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3453 goto just_a_word;
3454
3643fb5f 3455 d = s;
3280af22 3456 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3457 d++; /* no comments skipped here, or s### is misparsed */
3458
3459 /* Is this a label? */
3280af22
NIS
3460 if (!tmp && PL_expect == XSTATE
3461 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3462 s = d + 1;
3280af22 3463 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01
PP
3464 CLINE;
3465 TOKEN(LABEL);
3643fb5f
CS
3466 }
3467
3468 /* Check for keywords */
3280af22 3469 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3470
3471 /* Is this a word before a => operator? */
748a9306
LW
3472 if (strnEQ(d,"=>",2)) {
3473 CLINE;
3280af22 3474 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3475 yylval.opval->op_private = OPpCONST_BARE;
3476 TERM(WORD);
3477 }
3478
a0d0e21e 3479 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3480 GV *ogv = Nullgv; /* override (winner) */
3481 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3482 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3483 CV *cv;
3280af22 3484 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3485 (cv = GvCVu(gv)))
3486 {
3487 if (GvIMPORTED_CV(gv))
3488 ogv = gv;
3489 else if (! CvMETHOD(cv))
3490 hgv = gv;
3491 }
3492 if (!ogv &&
3280af22
NIS
3493 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3494 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3495 GvCVu(gv) && GvIMPORTED_CV(gv))
3496 {
3497 ogv = gv;
3498 }
3499 }
3500 if (ogv) {
3501 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3502 }
3503 else if (gv && !gvp
3504 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3505 && GvCVu(gv)
3280af22 3506 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3507 {
3508 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3509 }
56f7f34b
CS
3510 else { /* no override */
3511 tmp = -tmp;
3512 gv = Nullgv;
3513 gvp = 0;
4944e2f7
GS
3514 if (ckWARN(WARN_AMBIGUOUS) && hgv
3515 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3516 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3517 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3518 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3519 }
a0d0e21e
LW
3520 }
3521
3522 reserved_word:
3523 switch (tmp) {
79072805
LW
3524
3525 default: /* not a keyword */
93a17b20 3526 just_a_word: {
96e4d5b1 3527 SV *sv;
3280af22 3528 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3529
3530 /* Get the rest if it looks like a package qualifier */
3531
a0d0e21e 3532 if (*s == '\'' || *s == ':' && s[1] == ':') {
c3e0f903 3533 STRLEN morelen;
3280af22 3534 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3535 TRUE, &morelen);
3536 if (!morelen)
cea2e8a9 3537 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3538 *s == '\'' ? "'" : "::");
c3e0f903 3539 len += morelen;
a0d0e21e 3540 }
8990e307 3541
3280af22
NIS
3542 if (PL_expect == XOPERATOR) {
3543 if (PL_bufptr == PL_linestart) {
57843af0 3544 CopLINE_dec(PL_curcop);
cea2e8a9 3545 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3546 CopLINE_inc(PL_curcop);
463ee0b2
LW
3547 }
3548 else
54310121 3549 no_op("Bareword",s);
463ee0b2 3550 }
8990e307 3551
c3e0f903
GS
3552 /* Look for a subroutine with this name in current package,
3553 unless name is "Foo::", in which case Foo is a bearword
3554 (and a package name). */
3555
3556 if (len > 2 &&
3280af22 3557 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3558 {
599cee73 3559 if (ckWARN(WARN_UNSAFE) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
cea2e8a9 3560 Perl_warner(aTHX_ WARN_UNSAFE,
599cee73 3561 "Bareword \"%s\" refers to nonexistent package",
3280af22 3562 PL_tokenbuf);
c3e0f903 3563 len -= 2;
3280af22 3564 PL_tokenbuf[len] = '\0';
c3e0f903
GS
3565 gv = Nullgv;
3566 gvp = 0;
3567 }
3568 else {
3569 len = 0;
3570 if (!gv)
3280af22 3571 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
c3e0f903
GS
3572 }
3573
3574 /* if we saw a global override before, get the right name */
8990e307 3575
49dc05e3 3576 if (gvp) {
79cb57f6 3577 sv = newSVpvn("CORE::GLOBAL::",14);
3280af22 3578 sv_catpv(sv,PL_tokenbuf);
49dc05e3
GS
3579 }
3580 else
3280af22 3581 sv = newSVpv(PL_tokenbuf,0);
8990e307 3582
a0d0e21e
LW
3583 /* Presume this is going to be a bareword of some sort. */
3584
3585 CLINE;
49dc05e3 3586 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
a0d0e21e
LW
3587 yylval.opval->op_private = OPpCONST_BARE;
3588
c3e0f903
GS
3589 /* And if "Foo::", then that's what it certainly is. */
3590
3591 if (len)
3592 goto safe_bareword;
3593
8990e307
LW