This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
#line directives without a filename leave the file name as it was
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4eb8286e 3 * Copyright (c) 1991-1999, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
51371543 31
9059aa12
LW
32#define XFAKEBRACK 128
33#define XENUMMASK 127
34
a0ed51b3 35#define UTF (PL_hints & HINT_UTF8)
834a4ddd
LW
36/*
37 * Note: we try to be careful never to call the isXXX_utf8() functions
38 * unless we're pretty sure we've seen the beginning of a UTF-8 character
39 * (that is, the two high bits are set). Otherwise we risk loading in the
40 * heavy-duty SWASHINIT and SWASHGET routines unnecessarily.
41 */
42#define isIDFIRST_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
43 ? isIDFIRST(*(p)) \
44 : isIDFIRST_utf8((U8*)p))
45#define isALNUM_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
46 ? isALNUM(*(p)) \
47 : isALNUM_utf8((U8*)p))
a0ed51b3 48
2b92dfce
GS
49/* In variables name $^X, these are the legal values for X.
50 * 1999-02-27 mjd-perl-patch@plover.com */
51#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a 58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff 60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
395c3793
LW
72#ifdef I_FCNTL
73#include <fcntl.h>
74#endif
fe14fcc3
LW
75#ifdef I_SYS_FILE
76#include <sys/file.h>
77#endif
395c3793 78
a790bc05 79/* XXX If this causes problems, set i_unistd=undef in the hint file. */
80#ifdef I_UNISTD
81# include <unistd.h> /* Needed for execv() */
82#endif
83
84
79072805
LW
85#ifdef ff_next
86#undef ff_next
d48672a2
LW
87#endif
88
a1a0e61e
TD
89#ifdef USE_PURE_BISON
90YYSTYPE* yylval_pointer = NULL;
91int* yychar_pointer = NULL;
22c35a8c
GS
92# undef yylval
93# undef yychar
e4bfbdd4
JH
94# define yylval (*yylval_pointer)
95# define yychar (*yychar_pointer)
96# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
97# undef yylex
98# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
99#endif
100
79072805 101#include "keywords.h"
fe14fcc3 102
ffb4593c
NT
103/* CLINE is a macro that ensures PL_copline has a sane value */
104
ae986130
LW
105#ifdef CLINE
106#undef CLINE
107#endif
57843af0 108#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 109
ffb4593c
NT
110/*
111 * Convenience functions to return different tokens and prime the
9cbb5ea2 112 * lexer for the next token. They all take an argument.
ffb4593c
NT
113 *
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
2d2e263d 124 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
125 * BOop : bitwise or or xor
126 * BAop : bitwise and
127 * SHop : shift operator
128 * PWop : power operator
9cbb5ea2 129 * PMop : pattern-matching operator
ffb4593c
NT
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
133 * Rop : relational operator <= != gt
134 *
135 * Also see LOP and lop() below.
136 */
137
3280af22
NIS
138#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
139#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
140#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
141#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
142#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
143#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
144#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
145#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
146#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
147#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
148#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
149#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
150#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
151#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
152#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
153#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
154#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
155#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
156#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
157#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 158
a687059c
LW
159/* This bit of chicanery makes a unary function followed by
160 * a parenthesis into a function with one argument, highest precedence.
161 */
2f3197b3 162#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
163 PL_expect = XTERM, \
164 PL_bufptr = s, \
165 PL_last_uni = PL_oldbufptr, \
166 PL_last_lop_op = f, \
a687059c
LW
167 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
168
79072805 169#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
79072805
LW
172 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
173
9f68db38 174/* grandfather return to old style */
3280af22 175#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 176
ffb4593c
NT
177/*
178 * S_ao
179 *
180 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
181 * into an OP_ANDASSIGN or OP_ORASSIGN
182 */
183
76e3520e 184STATIC int
cea2e8a9 185S_ao(pTHX_ int toketype)
a0d0e21e 186{
3280af22
NIS
187 if (*PL_bufptr == '=') {
188 PL_bufptr++;
a0d0e21e
LW
189 if (toketype == ANDAND)
190 yylval.ival = OP_ANDASSIGN;
191 else if (toketype == OROR)
192 yylval.ival = OP_ORASSIGN;
193 toketype = ASSIGNOP;
194 }
195 return toketype;
196}
197
ffb4593c
NT
198/*
199 * S_no_op
200 * When Perl expects an operator and finds something else, no_op
201 * prints the warning. It always prints "<something> found where
202 * operator expected. It prints "Missing semicolon on previous line?"
203 * if the surprise occurs at the start of the line. "do you need to
204 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
205 * where the compiler doesn't know if foo is a method call or a function.
206 * It prints "Missing operator before end of line" if there's nothing
207 * after the missing operator, or "... before <...>" if there is something
208 * after the missing operator.
209 */
210
76e3520e 211STATIC void
cea2e8a9 212S_no_op(pTHX_ char *what, char *s)
463ee0b2 213{
3280af22
NIS
214 char *oldbp = PL_bufptr;
215 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 216
1189a94a
GS
217 if (!s)
218 s = oldbp;
219 else {
220 assert(s >= oldbp);
221 PL_bufptr = s;
222 }
cea2e8a9 223 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 224 if (is_first)
cea2e8a9 225 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
834a4ddd 226 else if (PL_oldoldbufptr && isIDFIRST_lazy(PL_oldoldbufptr)) {
748a9306 227 char *t;
834a4ddd 228 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy(t) || *t == ':'); t++) ;
3280af22 229 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 230 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 231 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
232 }
233 else
cea2e8a9 234 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 235 PL_bufptr = oldbp;
8990e307
LW
236}
237
ffb4593c
NT
238/*
239 * S_missingterm
240 * Complain about missing quote/regexp/heredoc terminator.
241 * If it's called with (char *)NULL then it cauterizes the line buffer.
242 * If we're in a delimited string and the delimiter is a control
243 * character, it's reformatted into a two-char sequence like ^C.
244 * This is fatal.
245 */
246
76e3520e 247STATIC void
cea2e8a9 248S_missingterm(pTHX_ char *s)
8990e307
LW
249{
250 char tmpbuf[3];
251 char q;
252 if (s) {
253 char *nl = strrchr(s,'\n');
d2719217 254 if (nl)
8990e307
LW
255 *nl = '\0';
256 }
9d116dd7
JH
257 else if (
258#ifdef EBCDIC
259 iscntrl(PL_multi_close)
260#else
261 PL_multi_close < 32 || PL_multi_close == 127
262#endif
263 ) {
8990e307 264 *tmpbuf = '^';
3280af22 265 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
266 s = "\\n";
267 tmpbuf[2] = '\0';
268 s = tmpbuf;
269 }
270 else {
3280af22 271 *tmpbuf = PL_multi_close;
8990e307
LW
272 tmpbuf[1] = '\0';
273 s = tmpbuf;
274 }
275 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 276 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 277}
79072805 278
ffb4593c
NT
279/*
280 * Perl_deprecate
ffb4593c
NT
281 */
282
79072805 283void
864dbfa3 284Perl_deprecate(pTHX_ char *s)
a0d0e21e 285{
d008e5eb 286 dTHR;
599cee73 287 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 288 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
289}
290
ffb4593c
NT
291/*
292 * depcom
9cbb5ea2 293 * Deprecate a comma-less variable list.
ffb4593c
NT
294 */
295
76e3520e 296STATIC void
cea2e8a9 297S_depcom(pTHX)
a0d0e21e
LW
298{
299 deprecate("comma-less variable list");
300}
301
ffb4593c 302/*
9cbb5ea2
GS
303 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
304 * utf16-to-utf8-reversed.
ffb4593c
NT
305 */
306
c39cd008
GS
307#ifdef PERL_CR_FILTER
308static void
309strip_return(SV *sv)
310{
311 register char *s = SvPVX(sv);
312 register char *e = s + SvCUR(sv);
313 /* outer loop optimized to do nothing if there are no CR-LFs */
314 while (s < e) {
315 if (*s++ == '\r' && *s == '\n') {
316 /* hit a CR-LF, need to copy the rest */
317 register char *d = s - 1;
318 *d++ = *s++;
319 while (s < e) {
320 if (*s == '\r' && s[1] == '\n')
321 s++;
322 *d++ = *s++;
323 }
324 SvCUR(sv) -= s - d;
325 return;
326 }
327 }
328}
a868473f 329
76e3520e 330STATIC I32
c39cd008 331S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 332{
c39cd008
GS
333 I32 count = FILTER_READ(idx+1, sv, maxlen);
334 if (count > 0 && !maxlen)
335 strip_return(sv);
336 return count;
a868473f
NIS
337}
338#endif
339
a0ed51b3 340STATIC I32
cea2e8a9 341S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
342{
343 I32 count = FILTER_READ(idx+1, sv, maxlen);
344 if (count) {
dfe13c55
GS
345 U8* tmps;
346 U8* tend;
347 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 348 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 349 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
350
351 }
352 return count;
353}
354
355STATIC I32
cea2e8a9 356S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
357{
358 I32 count = FILTER_READ(idx+1, sv, maxlen);
359 if (count) {
dfe13c55
GS
360 U8* tmps;
361 U8* tend;
362 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 363 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 364 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
365
366 }
367 return count;
368}
a868473f 369
ffb4593c
NT
370/*
371 * Perl_lex_start
9cbb5ea2
GS
372 * Initialize variables. Uses the Perl save_stack to save its state (for
373 * recursive calls to the parser).
ffb4593c
NT
374 */
375
a0d0e21e 376void
864dbfa3 377Perl_lex_start(pTHX_ SV *line)
79072805 378{
0f15f207 379 dTHR;
8990e307
LW
380 char *s;
381 STRLEN len;
382
3280af22
NIS
383 SAVEI32(PL_lex_dojoin);
384 SAVEI32(PL_lex_brackets);
3280af22
NIS
385 SAVEI32(PL_lex_casemods);
386 SAVEI32(PL_lex_starts);
387 SAVEI32(PL_lex_state);
7766f137 388 SAVEVPTR(PL_lex_inpat);
3280af22 389 SAVEI32(PL_lex_inwhat);
57843af0 390 SAVECOPLINE(PL_curcop);
3280af22
NIS
391 SAVEPPTR(PL_bufptr);
392 SAVEPPTR(PL_bufend);
393 SAVEPPTR(PL_oldbufptr);
394 SAVEPPTR(PL_oldoldbufptr);
395 SAVEPPTR(PL_linestart);
396 SAVESPTR(PL_linestr);
397 SAVEPPTR(PL_lex_brackstack);
398 SAVEPPTR(PL_lex_casestack);
c76ac1ee 399 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
400 SAVESPTR(PL_lex_stuff);
401 SAVEI32(PL_lex_defer);
09bef843 402 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 403 SAVESPTR(PL_lex_repl);
bebdddfc
GS
404 SAVEINT(PL_expect);
405 SAVEINT(PL_lex_expect);
3280af22
NIS
406
407 PL_lex_state = LEX_NORMAL;
408 PL_lex_defer = 0;
409 PL_expect = XSTATE;
410 PL_lex_brackets = 0;
3280af22
NIS
411 New(899, PL_lex_brackstack, 120, char);
412 New(899, PL_lex_casestack, 12, char);
413 SAVEFREEPV(PL_lex_brackstack);
414 SAVEFREEPV(PL_lex_casestack);
415 PL_lex_casemods = 0;
416 *PL_lex_casestack = '\0';
417 PL_lex_dojoin = 0;
418 PL_lex_starts = 0;
419 PL_lex_stuff = Nullsv;
420 PL_lex_repl = Nullsv;
421 PL_lex_inpat = 0;
422 PL_lex_inwhat = 0;
09bef843 423 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
424 PL_linestr = line;
425 if (SvREADONLY(PL_linestr))
426 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
427 s = SvPV(PL_linestr, len);
8990e307 428 if (len && s[len-1] != ';') {
3280af22
NIS
429 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
430 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
431 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 432 }
3280af22
NIS
433 SvTEMP_off(PL_linestr);
434 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
435 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
436 SvREFCNT_dec(PL_rs);
79cb57f6 437 PL_rs = newSVpvn("\n", 1);
3280af22 438 PL_rsfp = 0;
79072805 439}
a687059c 440
ffb4593c
NT
441/*
442 * Perl_lex_end
9cbb5ea2
GS
443 * Finalizer for lexing operations. Must be called when the parser is
444 * done with the lexer.
ffb4593c
NT
445 */
446
463ee0b2 447void
864dbfa3 448Perl_lex_end(pTHX)
463ee0b2 449{
3280af22 450 PL_doextract = FALSE;
463ee0b2
LW
451}
452
ffb4593c
NT
453/*
454 * S_incline
455 * This subroutine has nothing to do with tilting, whether at windmills
456 * or pinball tables. Its name is short for "increment line". It
57843af0 457 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 458 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
459 * # line 500 "foo.pm"
460 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
461 */
462
76e3520e 463STATIC void
cea2e8a9 464S_incline(pTHX_ char *s)
463ee0b2 465{
0f15f207 466 dTHR;
463ee0b2
LW
467 char *t;
468 char *n;
469 char ch;
470 int sawline = 0;
471
57843af0 472 CopLINE_inc(PL_curcop);
463ee0b2
LW
473 if (*s++ != '#')
474 return;
475 while (*s == ' ' || *s == '\t') s++;
476 if (strnEQ(s, "line ", 5)) {
477 s += 5;
478 sawline = 1;
479 }
480 if (!isDIGIT(*s))
481 return;
482 n = s;
483 while (isDIGIT(*s))
484 s++;
485 while (*s == ' ' || *s == '\t')
486 s++;
487 if (*s == '"' && (t = strchr(s+1, '"')))
488 s++;
489 else {
490 if (!sawline)
491 return; /* false alarm */
492 for (t = s; !isSPACE(*t); t++) ;
493 }
494 ch = *t;
495 *t = '\0';
496 if (t - s > 0)
57843af0 497 CopFILE_set(PL_curcop, s);
463ee0b2 498 *t = ch;
57843af0 499 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
500}
501
ffb4593c
NT
502/*
503 * S_skipspace
504 * Called to gobble the appropriate amount and type of whitespace.
505 * Skips comments as well.
506 */
507
76e3520e 508STATIC char *
cea2e8a9 509S_skipspace(pTHX_ register char *s)
a687059c 510{
11343788 511 dTHR;
3280af22
NIS
512 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
513 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
514 s++;
515 return s;
516 }
517 for (;;) {
fd049845 518 STRLEN prevlen;
09bef843
SB
519 SSize_t oldprevlen, oldoldprevlen;
520 SSize_t oldloplen, oldunilen;
60e6418e
GS
521 while (s < PL_bufend && isSPACE(*s)) {
522 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
523 incline(s);
524 }
ffb4593c
NT
525
526 /* comment */
3280af22
NIS
527 if (s < PL_bufend && *s == '#') {
528 while (s < PL_bufend && *s != '\n')
463ee0b2 529 s++;
60e6418e 530 if (s < PL_bufend) {
463ee0b2 531 s++;
60e6418e
GS
532 if (PL_in_eval && !PL_rsfp) {
533 incline(s);
534 continue;
535 }
536 }
463ee0b2 537 }
ffb4593c
NT
538
539 /* only continue to recharge the buffer if we're at the end
540 * of the buffer, we're not reading from a source filter, and
541 * we're in normal lexing mode
542 */
09bef843
SB
543 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
544 PL_lex_state == LEX_FORMLINE)
463ee0b2 545 return s;
ffb4593c
NT
546
547 /* try to recharge the buffer */
9cbb5ea2
GS
548 if ((s = filter_gets(PL_linestr, PL_rsfp,
549 (prevlen = SvCUR(PL_linestr)))) == Nullch)
550 {
551 /* end of file. Add on the -p or -n magic */
3280af22
NIS
552 if (PL_minus_n || PL_minus_p) {
553 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
554 ";}continue{print or die qq(-p destination: $!\\n)" :
555 "");
3280af22
NIS
556 sv_catpv(PL_linestr,";}");
557 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
558 }
559 else
3280af22 560 sv_setpv(PL_linestr,";");
ffb4593c
NT
561
562 /* reset variables for next time we lex */
9cbb5ea2
GS
563 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
564 = SvPVX(PL_linestr);
3280af22 565 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
566
567 /* Close the filehandle. Could be from -P preprocessor,
568 * STDIN, or a regular file. If we were reading code from
569 * STDIN (because the commandline held no -e or filename)
570 * then we don't close it, we reset it so the code can
571 * read from STDIN too.
572 */
573
3280af22
NIS
574 if (PL_preprocess && !PL_in_eval)
575 (void)PerlProc_pclose(PL_rsfp);
576 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
577 PerlIO_clearerr(PL_rsfp);
8990e307 578 else
3280af22
NIS
579 (void)PerlIO_close(PL_rsfp);
580 PL_rsfp = Nullfp;
463ee0b2
LW
581 return s;
582 }
ffb4593c
NT
583
584 /* not at end of file, so we only read another line */
09bef843
SB
585 /* make corresponding updates to old pointers, for yyerror() */
586 oldprevlen = PL_oldbufptr - PL_bufend;
587 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
588 if (PL_last_uni)
589 oldunilen = PL_last_uni - PL_bufend;
590 if (PL_last_lop)
591 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
592 PL_linestart = PL_bufptr = s + prevlen;
593 PL_bufend = s + SvCUR(PL_linestr);
594 s = PL_bufptr;
09bef843
SB
595 PL_oldbufptr = s + oldprevlen;
596 PL_oldoldbufptr = s + oldoldprevlen;
597 if (PL_last_uni)
598 PL_last_uni = s + oldunilen;
599 if (PL_last_lop)
600 PL_last_lop = s + oldloplen;
a0d0e21e 601 incline(s);
ffb4593c
NT
602
603 /* debugger active and we're not compiling the debugger code,
604 * so store the line into the debugger's array of lines
605 */
3280af22 606 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
607 SV *sv = NEWSV(85,0);
608
609 sv_upgrade(sv, SVt_PVMG);
3280af22 610 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 611 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 612 }
463ee0b2 613 }
a687059c 614}
378cc40b 615
ffb4593c
NT
616/*
617 * S_check_uni
618 * Check the unary operators to ensure there's no ambiguity in how they're
619 * used. An ambiguous piece of code would be:
620 * rand + 5
621 * This doesn't mean rand() + 5. Because rand() is a unary operator,
622 * the +5 is its argument.
623 */
624
76e3520e 625STATIC void
cea2e8a9 626S_check_uni(pTHX)
ba106d47 627{
2f3197b3 628 char *s;
a0d0e21e 629 char *t;
0453d815 630 dTHR;
2f3197b3 631
3280af22 632 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 633 return;
3280af22
NIS
634 while (isSPACE(*PL_last_uni))
635 PL_last_uni++;
834a4ddd 636 for (s = PL_last_uni; isALNUM_lazy(s) || *s == '-'; s++) ;
3280af22 637 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 638 return;
0453d815 639 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 640 char ch = *s;
0453d815
PM
641 *s = '\0';
642 Perl_warner(aTHX_ WARN_AMBIGUOUS,
643 "Warning: Use of \"%s\" without parens is ambiguous",
644 PL_last_uni);
645 *s = ch;
646 }
2f3197b3
LW
647}
648
ffb4593c
NT
649/* workaround to replace the UNI() macro with a function. Only the
650 * hints/uts.sh file mentions this. Other comments elsewhere in the
651 * source indicate Microport Unix might need it too.
652 */
653
ffed7fef
LW
654#ifdef CRIPPLED_CC
655
656#undef UNI
ffed7fef 657#define UNI(f) return uni(f,s)
ffed7fef 658
76e3520e 659STATIC int
cea2e8a9 660S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
661{
662 yylval.ival = f;
3280af22
NIS
663 PL_expect = XTERM;
664 PL_bufptr = s;
8f872242
NIS
665 PL_last_uni = PL_oldbufptr;
666 PL_last_lop_op = f;
ffed7fef
LW
667 if (*s == '(')
668 return FUNC1;
669 s = skipspace(s);
670 if (*s == '(')
671 return FUNC1;
672 else
673 return UNIOP;
674}
675
a0d0e21e
LW
676#endif /* CRIPPLED_CC */
677
ffb4593c
NT
678/*
679 * LOP : macro to build a list operator. Its behaviour has been replaced
680 * with a subroutine, S_lop() for which LOP is just another name.
681 */
682
a0d0e21e
LW
683#define LOP(f,x) return lop(f,x,s)
684
ffb4593c
NT
685/*
686 * S_lop
687 * Build a list operator (or something that might be one). The rules:
688 * - if we have a next token, then it's a list operator [why?]
689 * - if the next thing is an opening paren, then it's a function
690 * - else it's a list operator
691 */
692
76e3520e 693STATIC I32
a0be28da 694S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 695{
0f15f207 696 dTHR;
79072805 697 yylval.ival = f;
35c8bce7 698 CLINE;
3280af22
NIS
699 PL_expect = x;
700 PL_bufptr = s;
701 PL_last_lop = PL_oldbufptr;
702 PL_last_lop_op = f;
703 if (PL_nexttoke)
a0d0e21e 704 return LSTOP;
79072805
LW
705 if (*s == '(')
706 return FUNC;
707 s = skipspace(s);
708 if (*s == '(')
709 return FUNC;
710 else
711 return LSTOP;
712}
713
ffb4593c
NT
714/*
715 * S_force_next
9cbb5ea2 716 * When the lexer realizes it knows the next token (for instance,
ffb4593c 717 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
718 * to know what token to return the next time the lexer is called. Caller
719 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
720 * handles the token correctly.
ffb4593c
NT
721 */
722
76e3520e 723STATIC void
cea2e8a9 724S_force_next(pTHX_ I32 type)
79072805 725{
3280af22
NIS
726 PL_nexttype[PL_nexttoke] = type;
727 PL_nexttoke++;
728 if (PL_lex_state != LEX_KNOWNEXT) {
729 PL_lex_defer = PL_lex_state;
730 PL_lex_expect = PL_expect;
731 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
732 }
733}
734
ffb4593c
NT
735/*
736 * S_force_word
737 * When the lexer knows the next thing is a word (for instance, it has
738 * just seen -> and it knows that the next char is a word char, then
739 * it calls S_force_word to stick the next word into the PL_next lookahead.
740 *
741 * Arguments:
b1b65b59 742 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
743 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
744 * int check_keyword : if true, Perl checks to make sure the word isn't
745 * a keyword (do this if the word is a label, e.g. goto FOO)
746 * int allow_pack : if true, : characters will also be allowed (require,
747 * use, etc. do this)
9cbb5ea2 748 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
749 */
750
76e3520e 751STATIC char *
cea2e8a9 752S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 753{
463ee0b2
LW
754 register char *s;
755 STRLEN len;
756
757 start = skipspace(start);
758 s = start;
834a4ddd 759 if (isIDFIRST_lazy(s) ||
a0d0e21e 760 (allow_pack && *s == ':') ||
15f0808c 761 (allow_initial_tick && *s == '\'') )
a0d0e21e 762 {
3280af22
NIS
763 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
764 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
765 return start;
766 if (token == METHOD) {
767 s = skipspace(s);
768 if (*s == '(')
3280af22 769 PL_expect = XTERM;
463ee0b2 770 else {
3280af22 771 PL_expect = XOPERATOR;
463ee0b2 772 }
79072805 773 }
3280af22
NIS
774 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
775 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
776 force_next(token);
777 }
778 return s;
779}
780
ffb4593c
NT
781/*
782 * S_force_ident
9cbb5ea2 783 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
784 * text only contains the "foo" portion. The first argument is a pointer
785 * to the "foo", and the second argument is the type symbol to prefix.
786 * Forces the next token to be a "WORD".
9cbb5ea2 787 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
788 */
789
76e3520e 790STATIC void
cea2e8a9 791S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
792{
793 if (s && *s) {
11343788 794 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 795 PL_nextval[PL_nexttoke].opval = o;
79072805 796 force_next(WORD);
748a9306 797 if (kind) {
e858de61 798 dTHR; /* just for in_eval */
11343788 799 o->op_private = OPpCONST_ENTERED;
55497cff 800 /* XXX see note in pp_entereval() for why we forgo typo
801 warnings if the symbol must be introduced in an eval.
802 GSAR 96-10-12 */
3280af22 803 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
804 kind == '$' ? SVt_PV :
805 kind == '@' ? SVt_PVAV :
806 kind == '%' ? SVt_PVHV :
807 SVt_PVGV
808 );
748a9306 809 }
79072805
LW
810 }
811}
812
ffb4593c
NT
813/*
814 * S_force_version
815 * Forces the next token to be a version number.
816 */
817
76e3520e 818STATIC char *
cea2e8a9 819S_force_version(pTHX_ char *s)
89bfa8cd 820{
821 OP *version = Nullop;
822
823 s = skipspace(s);
824
a7cb1f99
GS
825 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
826 char *d = s;
827 if (*d == 'v')
828 d++;
829 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
830 if ((*d == ';' || isSPACE(*d)) && *(skipspace(d)) != ',') {
89bfa8cd 831 s = scan_num(s);
832 /* real VERSION number -- GBARR */
833 version = yylval.opval;
834 }
835 }
836
837 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 838 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd 839 force_next(WORD);
840
841 return (s);
842}
843
ffb4593c
NT
844/*
845 * S_tokeq
846 * Tokenize a quoted string passed in as an SV. It finds the next
847 * chunk, up to end of string or a backslash. It may make a new
848 * SV containing that chunk (if HINT_NEW_STRING is on). It also
849 * turns \\ into \.
850 */
851
76e3520e 852STATIC SV *
cea2e8a9 853S_tokeq(pTHX_ SV *sv)
79072805
LW
854{
855 register char *s;
856 register char *send;
857 register char *d;
b3ac6de7
IZ
858 STRLEN len = 0;
859 SV *pv = sv;
79072805
LW
860
861 if (!SvLEN(sv))
b3ac6de7 862 goto finish;
79072805 863
a0d0e21e 864 s = SvPV_force(sv, len);
748a9306 865 if (SvIVX(sv) == -1)
b3ac6de7 866 goto finish;
463ee0b2 867 send = s + len;
79072805
LW
868 while (s < send && *s != '\\')
869 s++;
870 if (s == send)
b3ac6de7 871 goto finish;
79072805 872 d = s;
3280af22 873 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 874 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
875 while (s < send) {
876 if (*s == '\\') {
a0d0e21e 877 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
878 s++; /* all that, just for this */
879 }
880 *d++ = *s++;
881 }
882 *d = '\0';
463ee0b2 883 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 884 finish:
3280af22 885 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 886 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
887 return sv;
888}
889
ffb4593c
NT
890/*
891 * Now come three functions related to double-quote context,
892 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
893 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
894 * interact with PL_lex_state, and create fake ( ... ) argument lists
895 * to handle functions and concatenation.
896 * They assume that whoever calls them will be setting up a fake
897 * join call, because each subthing puts a ',' after it. This lets
898 * "lower \luPpEr"
899 * become
900 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
901 *
902 * (I'm not sure whether the spurious commas at the end of lcfirst's
903 * arguments and join's arguments are created or not).
904 */
905
906/*
907 * S_sublex_start
908 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
909 *
910 * Pattern matching will set PL_lex_op to the pattern-matching op to
911 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
912 *
913 * OP_CONST and OP_READLINE are easy--just make the new op and return.
914 *
915 * Everything else becomes a FUNC.
916 *
917 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
918 * had an OP_CONST or OP_READLINE). This just sets us up for a
919 * call to S_sublex_push().
920 */
921
76e3520e 922STATIC I32
cea2e8a9 923S_sublex_start(pTHX)
79072805
LW
924{
925 register I32 op_type = yylval.ival;
79072805
LW
926
927 if (op_type == OP_NULL) {
3280af22
NIS
928 yylval.opval = PL_lex_op;
929 PL_lex_op = Nullop;
79072805
LW
930 return THING;
931 }
932 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 933 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
934
935 if (SvTYPE(sv) == SVt_PVIV) {
936 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
937 STRLEN len;
938 char *p;
939 SV *nsv;
940
941 p = SvPV(sv, len);
79cb57f6 942 nsv = newSVpvn(p, len);
b3ac6de7
IZ
943 SvREFCNT_dec(sv);
944 sv = nsv;
945 }
946 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 947 PL_lex_stuff = Nullsv;
79072805
LW
948 return THING;
949 }
950
3280af22
NIS
951 PL_sublex_info.super_state = PL_lex_state;
952 PL_sublex_info.sub_inwhat = op_type;
953 PL_sublex_info.sub_op = PL_lex_op;
954 PL_lex_state = LEX_INTERPPUSH;
55497cff 955
3280af22
NIS
956 PL_expect = XTERM;
957 if (PL_lex_op) {
958 yylval.opval = PL_lex_op;
959 PL_lex_op = Nullop;
55497cff 960 return PMFUNC;
961 }
962 else
963 return FUNC;
964}
965
ffb4593c
NT
966/*
967 * S_sublex_push
968 * Create a new scope to save the lexing state. The scope will be
969 * ended in S_sublex_done. Returns a '(', starting the function arguments
970 * to the uc, lc, etc. found before.
971 * Sets PL_lex_state to LEX_INTERPCONCAT.
972 */
973
76e3520e 974STATIC I32
cea2e8a9 975S_sublex_push(pTHX)
55497cff 976{
0f15f207 977 dTHR;
f46d017c 978 ENTER;
55497cff 979
3280af22
NIS
980 PL_lex_state = PL_sublex_info.super_state;
981 SAVEI32(PL_lex_dojoin);
982 SAVEI32(PL_lex_brackets);
3280af22
NIS
983 SAVEI32(PL_lex_casemods);
984 SAVEI32(PL_lex_starts);
985 SAVEI32(PL_lex_state);
7766f137 986 SAVEVPTR(PL_lex_inpat);
3280af22 987 SAVEI32(PL_lex_inwhat);
57843af0 988 SAVECOPLINE(PL_curcop);
3280af22
NIS
989 SAVEPPTR(PL_bufptr);
990 SAVEPPTR(PL_oldbufptr);
991 SAVEPPTR(PL_oldoldbufptr);
992 SAVEPPTR(PL_linestart);
993 SAVESPTR(PL_linestr);
994 SAVEPPTR(PL_lex_brackstack);
995 SAVEPPTR(PL_lex_casestack);
996
997 PL_linestr = PL_lex_stuff;
998 PL_lex_stuff = Nullsv;
999
9cbb5ea2
GS
1000 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1001 = SvPVX(PL_linestr);
3280af22
NIS
1002 PL_bufend += SvCUR(PL_linestr);
1003 SAVEFREESV(PL_linestr);
1004
1005 PL_lex_dojoin = FALSE;
1006 PL_lex_brackets = 0;
3280af22
NIS
1007 New(899, PL_lex_brackstack, 120, char);
1008 New(899, PL_lex_casestack, 12, char);
1009 SAVEFREEPV(PL_lex_brackstack);
1010 SAVEFREEPV(PL_lex_casestack);
1011 PL_lex_casemods = 0;
1012 *PL_lex_casestack = '\0';
1013 PL_lex_starts = 0;
1014 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1015 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1016
1017 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1018 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1019 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1020 else
3280af22 1021 PL_lex_inpat = Nullop;
79072805 1022
55497cff 1023 return '(';
79072805
LW
1024}
1025
ffb4593c
NT
1026/*
1027 * S_sublex_done
1028 * Restores lexer state after a S_sublex_push.
1029 */
1030
76e3520e 1031STATIC I32
cea2e8a9 1032S_sublex_done(pTHX)
79072805 1033{
3280af22
NIS
1034 if (!PL_lex_starts++) {
1035 PL_expect = XOPERATOR;
79cb57f6 1036 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1037 return THING;
1038 }
1039
3280af22
NIS
1040 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1041 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1042 return yylex();
79072805
LW
1043 }
1044
ffb4593c 1045 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1046 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1047 PL_linestr = PL_lex_repl;
1048 PL_lex_inpat = 0;
1049 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1050 PL_bufend += SvCUR(PL_linestr);
1051 SAVEFREESV(PL_linestr);
1052 PL_lex_dojoin = FALSE;
1053 PL_lex_brackets = 0;
3280af22
NIS
1054 PL_lex_casemods = 0;
1055 *PL_lex_casestack = '\0';
1056 PL_lex_starts = 0;
25da4f38 1057 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1058 PL_lex_state = LEX_INTERPNORMAL;
1059 PL_lex_starts++;
e9fa98b2
HS
1060 /* we don't clear PL_lex_repl here, so that we can check later
1061 whether this is an evalled subst; that means we rely on the
1062 logic to ensure sublex_done() is called again only via the
1063 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1064 }
e9fa98b2 1065 else {
3280af22 1066 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1067 PL_lex_repl = Nullsv;
1068 }
79072805 1069 return ',';
ffed7fef
LW
1070 }
1071 else {
f46d017c 1072 LEAVE;
3280af22
NIS
1073 PL_bufend = SvPVX(PL_linestr);
1074 PL_bufend += SvCUR(PL_linestr);
1075 PL_expect = XOPERATOR;
09bef843 1076 PL_sublex_info.sub_inwhat = 0;
79072805 1077 return ')';
ffed7fef
LW
1078 }
1079}
1080
02aa26ce
NT
1081/*
1082 scan_const
1083
1084 Extracts a pattern, double-quoted string, or transliteration. This
1085 is terrifying code.
1086
3280af22
NIS
1087 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1088 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1089 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1090
9b599b2a
GS
1091 Returns a pointer to the character scanned up to. Iff this is
1092 advanced from the start pointer supplied (ie if anything was
1093 successfully parsed), will leave an OP for the substring scanned
1094 in yylval. Caller must intuit reason for not parsing further
1095 by looking at the next characters herself.
1096
02aa26ce
NT
1097 In patterns:
1098 backslashes:
1099 double-quoted style: \r and \n
1100 regexp special ones: \D \s
1101 constants: \x3
1102 backrefs: \1 (deprecated in substitution replacements)
1103 case and quoting: \U \Q \E
1104 stops on @ and $, but not for $ as tail anchor
1105
1106 In transliterations:
1107 characters are VERY literal, except for - not at the start or end
1108 of the string, which indicates a range. scan_const expands the
1109 range to the full set of intermediate characters.
1110
1111 In double-quoted strings:
1112 backslashes:
1113 double-quoted style: \r and \n
1114 constants: \x3
1115 backrefs: \1 (deprecated)
1116 case and quoting: \U \Q \E
1117 stops on @ and $
1118
1119 scan_const does *not* construct ops to handle interpolated strings.
1120 It stops processing as soon as it finds an embedded $ or @ variable
1121 and leaves it to the caller to work out what's going on.
1122
1123 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1124
1125 $ in pattern could be $foo or could be tail anchor. Assumption:
1126 it's a tail anchor if $ is the last thing in the string, or if it's
1127 followed by one of ")| \n\t"
1128
1129 \1 (backreferences) are turned into $1
1130
1131 The structure of the code is
1132 while (there's a character to process) {
1133 handle transliteration ranges
1134 skip regexp comments
1135 skip # initiated comments in //x patterns
1136 check for embedded @foo
1137 check for embedded scalars
1138 if (backslash) {
1139 leave intact backslashes from leave (below)
1140 deprecate \1 in strings and sub replacements
1141 handle string-changing backslashes \l \U \Q \E, etc.
1142 switch (what was escaped) {
1143 handle - in a transliteration (becomes a literal -)
1144 handle \132 octal characters
1145 handle 0x15 hex characters
1146 handle \cV (control V)
1147 handle printf backslashes (\f, \r, \n, etc)
1148 } (end switch)
1149 } (end if backslash)
1150 } (end while character to read)
1151
1152*/
1153
76e3520e 1154STATIC char *
cea2e8a9 1155S_scan_const(pTHX_ char *start)
79072805 1156{
3280af22 1157 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1158 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1159 register char *s = start; /* start of the constant */
1160 register char *d = SvPVX(sv); /* destination for copies */
1161 bool dorange = FALSE; /* are we in a translit range? */
1162 I32 len; /* ? */
ac2262e3 1163 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1164 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1165 : UTF;
ac2262e3 1166 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1167 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1168 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1169 : UTF;
dff6d3cd 1170 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1171 PL_lex_inpat
4a2d328f 1172 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1173 : "";
79072805
LW
1174
1175 while (s < send || dorange) {
02aa26ce 1176 /* get transliterations out of the way (they're most literal) */
3280af22 1177 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1178 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1179 if (dorange) {
02aa26ce 1180 I32 i; /* current expanded character */
8ada0baa 1181 I32 min; /* first character in range */
02aa26ce
NT
1182 I32 max; /* last character in range */
1183
1184 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1185 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1186 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1187 d -= 2; /* eat the first char and the - */
1188
8ada0baa
JH
1189 min = (U8)*d; /* first char in range */
1190 max = (U8)d[1]; /* last char in range */
1191
1192#ifndef ASCIIish
1193 if ((isLOWER(min) && isLOWER(max)) ||
1194 (isUPPER(min) && isUPPER(max))) {
1195 if (isLOWER(min)) {
1196 for (i = min; i <= max; i++)
1197 if (isLOWER(i))
1198 *d++ = i;
1199 } else {
1200 for (i = min; i <= max; i++)
1201 if (isUPPER(i))
1202 *d++ = i;
1203 }
1204 }
1205 else
1206#endif
1207 for (i = min; i <= max; i++)
1208 *d++ = i;
02aa26ce
NT
1209
1210 /* mark the range as done, and continue */
79072805
LW
1211 dorange = FALSE;
1212 continue;
1213 }
02aa26ce
NT
1214
1215 /* range begins (ignore - as first or last char) */
79072805 1216 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1217 if (utf) {
a176fa2a 1218 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1219 s++;
1220 continue;
1221 }
79072805
LW
1222 dorange = TRUE;
1223 s++;
1224 }
1225 }
02aa26ce
NT
1226
1227 /* if we get here, we're not doing a transliteration */
1228
0f5d15d6
IZ
1229 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1230 except for the last char, which will be done separately. */
3280af22 1231 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1232 if (s[2] == '#') {
1233 while (s < send && *s != ')')
1234 *d++ = *s++;
0f5d15d6
IZ
1235 } else if (s[2] == '{'
1236 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1237 I32 count = 1;
0f5d15d6 1238 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1239 char c;
1240
d9f97599
GS
1241 while (count && (c = *regparse)) {
1242 if (c == '\\' && regparse[1])
1243 regparse++;
cc6b7395
IZ
1244 else if (c == '{')
1245 count++;
1246 else if (c == '}')
1247 count--;
d9f97599 1248 regparse++;
cc6b7395 1249 }
5bdf89e7
IZ
1250 if (*regparse != ')') {
1251 regparse--; /* Leave one char for continuation. */
cc6b7395 1252 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1253 }
0f5d15d6 1254 while (s < regparse)
cc6b7395
IZ
1255 *d++ = *s++;
1256 }
748a9306 1257 }
02aa26ce
NT
1258
1259 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1260 else if (*s == '#' && PL_lex_inpat &&
1261 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1262 while (s+1 < send && *s != '\n')
1263 *d++ = *s++;
1264 }
02aa26ce
NT
1265
1266 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
834a4ddd 1267 else if (*s == '@' && s[1] && (isALNUM_lazy(s+1) || strchr(":'{$", s[1])))
79072805 1268 break;
02aa26ce
NT
1269
1270 /* check for embedded scalars. only stop if we're sure it's a
1271 variable.
1272 */
79072805 1273 else if (*s == '$') {
3280af22 1274 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1275 break;
c277df42 1276 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1277 break; /* in regexp, $ might be tail anchor */
1278 }
02aa26ce 1279
a0ed51b3
LW
1280 /* (now in tr/// code again) */
1281
d008e5eb
GS
1282 if (*s & 0x80 && thisutf) {
1283 dTHR; /* only for ckWARN */
1284 if (ckWARN(WARN_UTF8)) {
dfe13c55 1285 (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
d008e5eb
GS
1286 if (len) {
1287 while (len--)
1288 *d++ = *s++;
1289 continue;
1290 }
a0ed51b3
LW
1291 }
1292 }
1293
02aa26ce 1294 /* backslashes */
79072805
LW
1295 if (*s == '\\' && s+1 < send) {
1296 s++;
02aa26ce
NT
1297
1298 /* some backslashes we leave behind */
c9f97d15 1299 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1300 *d++ = '\\';
1301 *d++ = *s++;
1302 continue;
1303 }
02aa26ce
NT
1304
1305 /* deprecate \1 in strings and substitution replacements */
3280af22 1306 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1307 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1308 {
d008e5eb 1309 dTHR; /* only for ckWARN */
599cee73 1310 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1311 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1312 *--s = '$';
1313 break;
1314 }
02aa26ce
NT
1315
1316 /* string-change backslash escapes */
3280af22 1317 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1318 --s;
1319 break;
1320 }
02aa26ce
NT
1321
1322 /* if we get here, it's either a quoted -, or a digit */
79072805 1323 switch (*s) {
02aa26ce
NT
1324
1325 /* quoted - in transliterations */
79072805 1326 case '-':
3280af22 1327 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1328 *d++ = *s++;
1329 continue;
1330 }
1331 /* FALL THROUGH */
1332 default:
11b8faa4
JH
1333 {
1334 dTHR;
1335 if (ckWARN(WARN_UNSAFE) && isALPHA(*s))
cea2e8a9 1336 Perl_warner(aTHX_ WARN_UNSAFE,
11b8faa4
JH
1337 "Unrecognized escape \\%c passed through",
1338 *s);
1339 /* default action is to copy the quoted character */
1340 *d++ = *s++;
1341 continue;
1342 }
02aa26ce
NT
1343
1344 /* \132 indicates an octal constant */
79072805
LW
1345 case '0': case '1': case '2': case '3':
1346 case '4': case '5': case '6': case '7':
dff6d3cd 1347 *d++ = (char)scan_oct(s, 3, &len);
79072805
LW
1348 s += len;
1349 continue;
02aa26ce
NT
1350
1351 /* \x24 indicates a hex constant */
79072805 1352 case 'x':
a0ed51b3
LW
1353 ++s;
1354 if (*s == '{') {
1355 char* e = strchr(s, '}');
1356
adaeee49 1357 if (!e) {
a0ed51b3 1358 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1359 e = s;
1360 }
d008e5eb
GS
1361 if (!utf) {
1362 dTHR;
1363 if (ckWARN(WARN_UTF8))
cea2e8a9 1364 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1365 "Use of \\x{} without utf8 declaration");
1366 }
a0ed51b3 1367 /* note: utf always shorter than hex */
dfe13c55 1368 d = (char*)uv_to_utf8((U8*)d,
dff6d3cd 1369 (UV)scan_hex(s + 1, e - s - 1, &len));
a0ed51b3 1370 s = e + 1;
a0ed51b3
LW
1371 }
1372 else {
1373 UV uv = (UV)scan_hex(s, 2, &len);
1374 if (utf && PL_lex_inwhat == OP_TRANS &&
1375 utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1376 {
dfe13c55 1377 d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
a0ed51b3
LW
1378 }
1379 else {
d008e5eb
GS
1380 if (uv >= 127 && UTF) {
1381 dTHR;
1382 if (ckWARN(WARN_UTF8))
cea2e8a9 1383 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb 1384 "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
894356b3 1385 (int)len,s,(int)len,s);
d008e5eb 1386 }
a0ed51b3
LW
1387 *d++ = (char)uv;
1388 }
1389 s += len;
1390 }
79072805 1391 continue;
02aa26ce 1392
4a2d328f
IZ
1393 /* \N{latin small letter a} is a named character */
1394 case 'N':
423cee85
JH
1395 ++s;
1396 if (*s == '{') {
1397 char* e = strchr(s, '}');
1398 HV *hv;
1399 SV **svp;
1400 SV *res, *cv;
1401 STRLEN len;
1402 char *str;
1403 char *why = Nullch;
1404
1405 if (!e) {
5777a3f7 1406 yyerror("Missing right brace on \\N{}");
423cee85
JH
1407 e = s - 1;
1408 goto cont_scan;
1409 }
1410 res = newSVpvn(s + 1, e - s - 1);
1411 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1412 res, Nullsv, "\\N{...}" );
423cee85
JH
1413 str = SvPV(res,len);
1414 if (len > e - s + 4) {
1415 char *odest = SvPVX(sv);
1416
1417 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1418 d = SvPVX(sv) + (d - odest);
1419 }
1420 Copy(str, d, len, char);
1421 d += len;
1422 SvREFCNT_dec(res);
1423 cont_scan:
1424 s = e + 1;
1425 }
1426 else
5777a3f7 1427 yyerror("Missing braces on \\N{}");
423cee85
JH
1428 continue;
1429
02aa26ce 1430 /* \c is a control character */
79072805
LW
1431 case 'c':
1432 s++;
9d116dd7
JH
1433#ifdef EBCDIC
1434 *d = *s++;
1435 if (isLOWER(*d))
1436 *d = toUPPER(*d);
1437 *d++ = toCTRL(*d);
1438#else
bbce6d69 1439 len = *s++;
1440 *d++ = toCTRL(len);
9d116dd7 1441#endif
79072805 1442 continue;
02aa26ce
NT
1443
1444 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1445 case 'b':
1446 *d++ = '\b';
1447 break;
1448 case 'n':
1449 *d++ = '\n';
1450 break;
1451 case 'r':
1452 *d++ = '\r';
1453 break;
1454 case 'f':
1455 *d++ = '\f';
1456 break;
1457 case 't':
1458 *d++ = '\t';
1459 break;
34a3fe2a
PP
1460#ifdef EBCDIC
1461 case 'e':
1462 *d++ = '\047'; /* CP 1047 */
1463 break;
1464 case 'a':
1465 *d++ = '\057'; /* CP 1047 */
1466 break;
1467#else
79072805
LW
1468 case 'e':
1469 *d++ = '\033';
1470 break;
1471 case 'a':
1472 *d++ = '\007';
1473 break;
34a3fe2a 1474#endif
02aa26ce
NT
1475 } /* end switch */
1476
79072805
LW
1477 s++;
1478 continue;
02aa26ce
NT
1479 } /* end if (backslash) */
1480
79072805 1481 *d++ = *s++;
02aa26ce
NT
1482 } /* while loop to process each character */
1483
1484 /* terminate the string and set up the sv */
79072805 1485 *d = '\0';
463ee0b2 1486 SvCUR_set(sv, d - SvPVX(sv));
79072805
LW
1487 SvPOK_on(sv);
1488
02aa26ce 1489 /* shrink the sv if we allocated more than we used */
79072805
LW
1490 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1491 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1492 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1493 }
02aa26ce 1494
9b599b2a 1495 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1496 if (s > PL_bufptr) {
1497 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1498 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1499 sv, Nullsv,
3280af22 1500 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1501 ? "tr"
3280af22 1502 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1503 ? "s"
1504 : "qq")));
79072805 1505 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1506 } else
8990e307 1507 SvREFCNT_dec(sv);
79072805
LW
1508 return s;
1509}
1510
ffb4593c
NT
1511/* S_intuit_more
1512 * Returns TRUE if there's more to the expression (e.g., a subscript),
1513 * FALSE otherwise.
ffb4593c
NT
1514 *
1515 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1516 *
1517 * ->[ and ->{ return TRUE
1518 * { and [ outside a pattern are always subscripts, so return TRUE
1519 * if we're outside a pattern and it's not { or [, then return FALSE
1520 * if we're in a pattern and the first char is a {
1521 * {4,5} (any digits around the comma) returns FALSE
1522 * if we're in a pattern and the first char is a [
1523 * [] returns FALSE
1524 * [SOMETHING] has a funky algorithm to decide whether it's a
1525 * character class or not. It has to deal with things like
1526 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1527 * anything else returns TRUE
1528 */
1529
9cbb5ea2
GS
1530/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1531
76e3520e 1532STATIC int
cea2e8a9 1533S_intuit_more(pTHX_ register char *s)
79072805 1534{
3280af22 1535 if (PL_lex_brackets)
79072805
LW
1536 return TRUE;
1537 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1538 return TRUE;
1539 if (*s != '{' && *s != '[')
1540 return FALSE;
3280af22 1541 if (!PL_lex_inpat)
79072805
LW
1542 return TRUE;
1543
1544 /* In a pattern, so maybe we have {n,m}. */
1545 if (*s == '{') {
1546 s++;
1547 if (!isDIGIT(*s))
1548 return TRUE;
1549 while (isDIGIT(*s))
1550 s++;
1551 if (*s == ',')
1552 s++;
1553 while (isDIGIT(*s))
1554 s++;
1555 if (*s == '}')
1556 return FALSE;
1557 return TRUE;
1558
1559 }
1560
1561 /* On the other hand, maybe we have a character class */
1562
1563 s++;
1564 if (*s == ']' || *s == '^')
1565 return FALSE;
1566 else {
ffb4593c 1567 /* this is terrifying, and it works */
79072805
LW
1568 int weight = 2; /* let's weigh the evidence */
1569 char seen[256];
f27ffc4a 1570 unsigned char un_char = 255, last_un_char;
93a17b20 1571 char *send = strchr(s,']');
3280af22 1572 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1573
1574 if (!send) /* has to be an expression */
1575 return TRUE;
1576
1577 Zero(seen,256,char);
1578 if (*s == '$')
1579 weight -= 3;
1580 else if (isDIGIT(*s)) {
1581 if (s[1] != ']') {
1582 if (isDIGIT(s[1]) && s[2] == ']')
1583 weight -= 10;
1584 }
1585 else
1586 weight -= 100;
1587 }
1588 for (; s < send; s++) {
1589 last_un_char = un_char;
1590 un_char = (unsigned char)*s;
1591 switch (*s) {
1592 case '@':
1593 case '&':
1594 case '$':
1595 weight -= seen[un_char] * 10;
834a4ddd 1596 if (isALNUM_lazy(s+1)) {
8903cb82 1597 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1598 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1599 weight -= 100;
1600 else
1601 weight -= 10;
1602 }
1603 else if (*s == '$' && s[1] &&
93a17b20
LW
1604 strchr("[#!%*<>()-=",s[1])) {
1605 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1606 weight -= 10;
1607 else
1608 weight -= 1;
1609 }
1610 break;
1611 case '\\':
1612 un_char = 254;
1613 if (s[1]) {
93a17b20 1614 if (strchr("wds]",s[1]))
79072805
LW
1615 weight += 100;
1616 else if (seen['\''] || seen['"'])
1617 weight += 1;
93a17b20 1618 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1619 weight += 40;
1620 else if (isDIGIT(s[1])) {
1621 weight += 40;
1622 while (s[1] && isDIGIT(s[1]))
1623 s++;
1624 }
1625 }
1626 else
1627 weight += 100;
1628 break;
1629 case '-':
1630 if (s[1] == '\\')
1631 weight += 50;
93a17b20 1632 if (strchr("aA01! ",last_un_char))
79072805 1633 weight += 30;
93a17b20 1634 if (strchr("zZ79~",s[1]))
79072805 1635 weight += 30;
f27ffc4a
GS
1636 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1637 weight -= 5; /* cope with negative subscript */
79072805
LW
1638 break;
1639 default:
93a17b20 1640 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1641 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1642 char *d = tmpbuf;
1643 while (isALPHA(*s))
1644 *d++ = *s++;
1645 *d = '\0';
1646 if (keyword(tmpbuf, d - tmpbuf))
1647 weight -= 150;
1648 }
1649 if (un_char == last_un_char + 1)
1650 weight += 5;
1651 weight -= seen[un_char];
1652 break;
1653 }
1654 seen[un_char]++;
1655 }
1656 if (weight >= 0) /* probably a character class */
1657 return FALSE;
1658 }
1659
1660 return TRUE;
1661}
ffed7fef 1662
ffb4593c
NT
1663/*
1664 * S_intuit_method
1665 *
1666 * Does all the checking to disambiguate
1667 * foo bar
1668 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1669 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1670 *
1671 * First argument is the stuff after the first token, e.g. "bar".
1672 *
1673 * Not a method if bar is a filehandle.
1674 * Not a method if foo is a subroutine prototyped to take a filehandle.
1675 * Not a method if it's really "Foo $bar"
1676 * Method if it's "foo $bar"
1677 * Not a method if it's really "print foo $bar"
1678 * Method if it's really "foo package::" (interpreted as package->foo)
1679 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
1680 * Not a method if bar is a filehandle or package, but is quotd with
1681 * =>
1682 */
1683
76e3520e 1684STATIC int
cea2e8a9 1685S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1686{
1687 char *s = start + (*start == '$');
3280af22 1688 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1689 STRLEN len;
1690 GV* indirgv;
1691
1692 if (gv) {
b6c543e3 1693 CV *cv;
a0d0e21e
LW
1694 if (GvIO(gv))
1695 return 0;
b6c543e3
IZ
1696 if ((cv = GvCVu(gv))) {
1697 char *proto = SvPVX(cv);
1698 if (proto) {
1699 if (*proto == ';')
1700 proto++;
1701 if (*proto == '*')
1702 return 0;
1703 }
1704 } else
a0d0e21e
LW
1705 gv = 0;
1706 }
8903cb82 1707 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1708 /* start is the beginning of the possible filehandle/object,
1709 * and s is the end of it
1710 * tmpbuf is a copy of it
1711 */
1712
a0d0e21e 1713 if (*start == '$') {
3280af22 1714 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1715 return 0;
1716 s = skipspace(s);
3280af22
NIS
1717 PL_bufptr = start;
1718 PL_expect = XREF;
a0d0e21e
LW
1719 return *s == '(' ? FUNCMETH : METHOD;
1720 }
1721 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1722 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1723 len -= 2;
1724 tmpbuf[len] = '\0';
1725 goto bare_package;
1726 }
1727 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1728 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1729 return 0;
1730 /* filehandle or package name makes it a method */
89bfa8cd 1731 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1732 s = skipspace(s);
3280af22 1733 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1734 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1735 bare_package:
3280af22 1736 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1737 newSVpvn(tmpbuf,len));
3280af22
NIS
1738 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1739 PL_expect = XTERM;
a0d0e21e 1740 force_next(WORD);
3280af22 1741 PL_bufptr = s;
a0d0e21e
LW
1742 return *s == '(' ? FUNCMETH : METHOD;
1743 }
1744 }
1745 return 0;
1746}
1747
ffb4593c
NT
1748/*
1749 * S_incl_perldb
1750 * Return a string of Perl code to load the debugger. If PERL5DB
1751 * is set, it will return the contents of that, otherwise a
1752 * compile-time require of perl5db.pl.
1753 */
1754
76e3520e 1755STATIC char*
cea2e8a9 1756S_incl_perldb(pTHX)
a0d0e21e 1757{
3280af22 1758 if (PL_perldb) {
76e3520e 1759 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1760
1761 if (pdb)
1762 return pdb;
61bb5906 1763 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1764 return "BEGIN { require 'perl5db.pl' }";
1765 }
1766 return "";
1767}
1768
1769
16d20bd9
AD
1770/* Encoded script support. filter_add() effectively inserts a
1771 * 'pre-processing' function into the current source input stream.
1772 * Note that the filter function only applies to the current source file
1773 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1774 *
1775 * The datasv parameter (which may be NULL) can be used to pass
1776 * private data to this instance of the filter. The filter function
1777 * can recover the SV using the FILTER_DATA macro and use it to
1778 * store private buffers and state information.
1779 *
1780 * The supplied datasv parameter is upgraded to a PVIO type
e0c19803
GS
1781 * and the IoDIRP field is used to store the function pointer,
1782 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1783 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1784 * private use must be set using malloc'd pointers.
1785 */
16d20bd9
AD
1786
1787SV *
864dbfa3 1788Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1789{
f4c556ac
GS
1790 if (!funcp)
1791 return Nullsv;
1792
3280af22
NIS
1793 if (!PL_rsfp_filters)
1794 PL_rsfp_filters = newAV();
16d20bd9 1795 if (!datasv)
8c52afec 1796 datasv = NEWSV(255,0);
16d20bd9 1797 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1798 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1799 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
e0c19803 1800 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1801 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1802 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1803 av_unshift(PL_rsfp_filters, 1);
1804 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1805 return(datasv);
1806}
1807
1808
1809/* Delete most recently added instance of this filter function. */
a0d0e21e 1810void
864dbfa3 1811Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1812{
e0c19803 1813 SV *datasv;
f4c556ac 1814 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1815 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1816 return;
1817 /* if filter is on top of stack (usual case) just pop it off */
e0c19803
GS
1818 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1819 if (IoDIRP(datasv) == (DIR*)funcp) {
1820 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
1821 IoDIRP(datasv) = (DIR*)NULL;
3280af22 1822 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1823
16d20bd9
AD
1824 return;
1825 }
1826 /* we need to search for the correct entry and clear it */
cea2e8a9 1827 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1828}
1829
1830
1831/* Invoke the n'th filter function for the current rsfp. */
1832I32
864dbfa3 1833Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1834
1835
1836 /* 0 = read one text line */
a0d0e21e 1837{
16d20bd9
AD
1838 filter_t funcp;
1839 SV *datasv = NULL;
e50aee73 1840
3280af22 1841 if (!PL_rsfp_filters)
16d20bd9 1842 return -1;
3280af22 1843 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1844 /* Provide a default input filter to make life easy. */
1845 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1846 DEBUG_P(PerlIO_printf(Perl_debug_log,
1847 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1848 if (maxlen) {
1849 /* Want a block */
1850 int len ;
1851 int old_len = SvCUR(buf_sv) ;
1852
1853 /* ensure buf_sv is large enough */
1854 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1855 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1856 if (PerlIO_error(PL_rsfp))
37120919
AD
1857 return -1; /* error */
1858 else
1859 return 0 ; /* end of file */
1860 }
16d20bd9
AD
1861 SvCUR_set(buf_sv, old_len + len) ;
1862 } else {
1863 /* Want a line */
3280af22
NIS
1864 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1865 if (PerlIO_error(PL_rsfp))
37120919
AD
1866 return -1; /* error */
1867 else
1868 return 0 ; /* end of file */
1869 }
16d20bd9
AD
1870 }
1871 return SvCUR(buf_sv);
1872 }
1873 /* Skip this filter slot if filter has been deleted */
3280af22 1874 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1875 DEBUG_P(PerlIO_printf(Perl_debug_log,
1876 "filter_read %d: skipped (filter deleted)\n",
1877 idx));
16d20bd9
AD
1878 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1879 }
1880 /* Get function pointer hidden within datasv */
1881 funcp = (filter_t)IoDIRP(datasv);
f4c556ac
GS
1882 DEBUG_P(PerlIO_printf(Perl_debug_log,
1883 "filter_read %d: via function %p (%s)\n",
1884 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1885 /* Call function. The function is expected to */
1886 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1887 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1888 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1889}
1890
76e3520e 1891STATIC char *
cea2e8a9 1892S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1893{
c39cd008 1894#ifdef PERL_CR_FILTER
3280af22 1895 if (!PL_rsfp_filters) {
c39cd008 1896 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1897 }
1898#endif
3280af22 1899 if (PL_rsfp_filters) {
16d20bd9 1900
55497cff 1901 if (!append)
1902 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1903 if (FILTER_READ(0, sv, 0) > 0)
1904 return ( SvPVX(sv) ) ;
1905 else
1906 return Nullch ;
1907 }
9d116dd7 1908 else
fd049845 1909 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1910}
1911
1912
748a9306
LW
1913#ifdef DEBUGGING
1914 static char* exp_name[] =
09bef843
SB
1915 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
1916 "ATTRTERM", "TERMBLOCK"
1917 };
748a9306 1918#endif
463ee0b2 1919
02aa26ce
NT
1920/*
1921 yylex
1922
1923 Works out what to call the token just pulled out of the input
1924 stream. The yacc parser takes care of taking the ops we return and
1925 stitching them into a tree.
1926
1927 Returns:
1928 PRIVATEREF
1929
1930 Structure:
1931 if read an identifier
1932 if we're in a my declaration
1933 croak if they tried to say my($foo::bar)
1934 build the ops for a my() declaration
1935 if it's an access to a my() variable
1936 are we in a sort block?
1937 croak if my($a); $a <=> $b
1938 build ops for access to a my() variable
1939 if in a dq string, and they've said @foo and we can't find @foo
1940 croak
1941 build ops for a bareword
1942 if we already built the token before, use it.
1943*/
1944
864dbfa3
GS
1945int
1946#ifdef USE_PURE_BISON
cea2e8a9 1947Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1948#else
cea2e8a9 1949Perl_yylex(pTHX)
864dbfa3 1950#endif
378cc40b 1951{
11343788 1952 dTHR;
79072805 1953 register char *s;
378cc40b 1954 register char *d;
79072805 1955 register I32 tmp;
463ee0b2 1956 STRLEN len;
161b471a
NIS
1957 GV *gv = Nullgv;
1958 GV **gvp = 0;
a687059c 1959
a1a0e61e
TD
1960#ifdef USE_PURE_BISON
1961 yylval_pointer = lvalp;
1962 yychar_pointer = lcharp;
1963#endif
1964
02aa26ce 1965 /* check if there's an identifier for us to look at */
3280af22 1966 if (PL_pending_ident) {
02aa26ce 1967 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
1968 char pit = PL_pending_ident;
1969 PL_pending_ident = 0;
bbce6d69 1970
02aa26ce
NT
1971 /* if we're in a my(), we can't allow dynamics here.
1972 $foo'bar has already been turned into $foo::bar, so
1973 just check for colons.
1974
1975 if it's a legal name, the OP is a PADANY.
1976 */
3280af22 1977 if (PL_in_my) {
77ca0c92
LW
1978 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1979 tmp = pad_allocmy(PL_tokenbuf);
1980 }
1981 else {
1982 if (strchr(PL_tokenbuf,':'))
1983 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 1984
77ca0c92
LW
1985 yylval.opval = newOP(OP_PADANY, 0);
1986 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
1987 return PRIVATEREF;
1988 }
bbce6d69 1989 }
1990
02aa26ce
NT
1991 /*
1992 build the ops for accesses to a my() variable.
1993
1994 Deny my($a) or my($b) in a sort block, *if* $a or $b is
1995 then used in a comparison. This catches most, but not
1996 all cases. For instance, it catches
1997 sort { my($a); $a <=> $b }
1998 but not
1999 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2000 (although why you'd do that is anyone's guess).
2001 */
2002
3280af22 2003 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2004#ifdef USE_THREADS
54b9620d 2005 /* Check for single character per-thread SVs */
3280af22
NIS
2006 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2007 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2008 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2009 {
2faa37cc 2010 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2011 yylval.opval->op_targ = tmp;
2012 return PRIVATEREF;
2013 }
2014#endif /* USE_THREADS */
3280af22 2015 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2016 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2017 /* might be an "our" variable" */
f472eb5c 2018 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2019 /* build ops for a bareword */
f472eb5c
GS
2020 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2021 sv_catpvn(sym, "::", 2);
2022 sv_catpv(sym, PL_tokenbuf+1);
2023 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2024 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2025 gv_fetchpv(SvPVX(sym),
77ca0c92 2026 (PL_in_eval
f472eb5c
GS
2027 ? (GV_ADDMULTI | GV_ADDINEVAL)
2028 : TRUE
77ca0c92
LW
2029 ),
2030 ((PL_tokenbuf[0] == '$') ? SVt_PV
2031 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2032 : SVt_PVHV));
2033 return WORD;
2034 }
2035
02aa26ce 2036 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2037 if (PL_last_lop_op == OP_SORT &&
2038 PL_tokenbuf[0] == '$' &&
2039 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2040 && !PL_tokenbuf[2])
bbce6d69 2041 {
3280af22
NIS
2042 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2043 d < PL_bufend && *d != '\n';
a863c7d1
MB
2044 d++)
2045 {
2046 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2047 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2048 PL_tokenbuf);
a863c7d1 2049 }
bbce6d69 2050 }
2051 }
bbce6d69 2052
a863c7d1
MB
2053 yylval.opval = newOP(OP_PADANY, 0);
2054 yylval.opval->op_targ = tmp;
2055 return PRIVATEREF;
2056 }
bbce6d69 2057 }
2058
02aa26ce
NT
2059 /*
2060 Whine if they've said @foo in a doublequoted string,
2061 and @foo isn't a variable we can find in the symbol
2062 table.
2063 */
3280af22
NIS
2064 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2065 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2066 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2067 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2068 PL_tokenbuf, PL_tokenbuf));
bbce6d69 2069 }
2070
02aa26ce 2071 /* build ops for a bareword */
3280af22 2072 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2073 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2074 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2075 ((PL_tokenbuf[0] == '$') ? SVt_PV
2076 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 2077 : SVt_PVHV));
2078 return WORD;
2079 }
2080
02aa26ce
NT
2081 /* no identifier pending identification */
2082
3280af22 2083 switch (PL_lex_state) {
79072805
LW
2084#ifdef COMMENTARY
2085 case LEX_NORMAL: /* Some compilers will produce faster */
2086 case LEX_INTERPNORMAL: /* code if we comment these out. */
2087 break;
2088#endif
2089
09bef843 2090 /* when we've already built the next token, just pull it out of the queue */
79072805 2091 case LEX_KNOWNEXT:
3280af22
NIS
2092 PL_nexttoke--;
2093 yylval = PL_nextval[PL_nexttoke];
2094 if (!PL_nexttoke) {
2095 PL_lex_state = PL_lex_defer;
2096 PL_expect = PL_lex_expect;
2097 PL_lex_defer = LEX_NORMAL;
463ee0b2 2098 }
3280af22 2099 return(PL_nexttype[PL_nexttoke]);
79072805 2100
02aa26ce 2101 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2102 when we get here, PL_bufptr is at the \
02aa26ce 2103 */
79072805
LW
2104 case LEX_INTERPCASEMOD:
2105#ifdef DEBUGGING
3280af22 2106 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2107 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2108#endif
02aa26ce 2109 /* handle \E or end of string */
3280af22 2110 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2111 char oldmod;
02aa26ce
NT
2112
2113 /* if at a \E */
3280af22
NIS
2114 if (PL_lex_casemods) {
2115 oldmod = PL_lex_casestack[--PL_lex_casemods];
2116 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2117
3280af22
NIS
2118 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2119 PL_bufptr += 2;
2120 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2121 }
79072805
LW
2122 return ')';
2123 }
3280af22
NIS
2124 if (PL_bufptr != PL_bufend)
2125 PL_bufptr += 2;
2126 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2127 return yylex();
79072805
LW
2128 }
2129 else {
3280af22 2130 s = PL_bufptr + 1;
79072805
LW
2131 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2132 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2133 if (strchr("LU", *s) &&
3280af22 2134 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2135 {
3280af22 2136 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2137 return ')';
2138 }
3280af22
NIS
2139 if (PL_lex_casemods > 10) {
2140 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2141 if (newlb != PL_lex_casestack) {
a0d0e21e 2142 SAVEFREEPV(newlb);
3280af22 2143 PL_lex_casestack = newlb;
a0d0e21e
LW
2144 }
2145 }
3280af22
NIS
2146 PL_lex_casestack[PL_lex_casemods++] = *s;
2147 PL_lex_casestack[PL_lex_casemods] = '\0';
2148 PL_lex_state = LEX_INTERPCONCAT;
2149 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2150 force_next('(');
2151 if (*s == 'l')
3280af22 2152 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2153 else if (*s == 'u')
3280af22 2154 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2155 else if (*s == 'L')
3280af22 2156 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2157 else if (*s == 'U')
3280af22 2158 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2159 else if (*s == 'Q')
3280af22 2160 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2161 else
cea2e8a9 2162 Perl_croak(aTHX_ "panic: yylex");
3280af22 2163 PL_bufptr = s + 1;
79072805 2164 force_next(FUNC);
3280af22
NIS
2165 if (PL_lex_starts) {
2166 s = PL_bufptr;
2167 PL_lex_starts = 0;
79072805
LW
2168 Aop(OP_CONCAT);
2169 }
2170 else
cea2e8a9 2171 return yylex();
79072805
LW
2172 }
2173
55497cff 2174 case LEX_INTERPPUSH:
2175 return sublex_push();
2176
79072805 2177 case LEX_INTERPSTART:
3280af22 2178 if (PL_bufptr == PL_bufend)
79072805 2179 return sublex_done();
3280af22
NIS
2180 PL_expect = XTERM;
2181 PL_lex_dojoin = (*PL_bufptr == '@');
2182 PL_lex_state = LEX_INTERPNORMAL;
2183 if (PL_lex_dojoin) {
2184 PL_nextval[PL_nexttoke].ival = 0;
79072805 2185 force_next(',');
554b3eca 2186#ifdef USE_THREADS
533c011a
NIS
2187 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2188 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2189 force_next(PRIVATEREF);
2190#else
a0d0e21e 2191 force_ident("\"", '$');
554b3eca 2192#endif /* USE_THREADS */
3280af22 2193 PL_nextval[PL_nexttoke].ival = 0;
79072805 2194 force_next('$');
3280af22 2195 PL_nextval[PL_nexttoke].ival = 0;
79072805 2196 force_next('(');
3280af22 2197 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2198 force_next(FUNC);
2199 }
3280af22
NIS
2200 if (PL_lex_starts++) {
2201 s = PL_bufptr;
79072805
LW
2202 Aop(OP_CONCAT);
2203 }
cea2e8a9 2204 return yylex();
79072805
LW
2205
2206 case LEX_INTERPENDMAYBE:
3280af22
NIS
2207 if (intuit_more(PL_bufptr)) {
2208 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2209 break;
2210 }
2211 /* FALL THROUGH */
2212
2213 case LEX_INTERPEND:
3280af22
NIS
2214 if (PL_lex_dojoin) {
2215 PL_lex_dojoin = FALSE;
2216 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2217 return ')';
2218 }
43a16006 2219 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2220 && SvEVALED(PL_lex_repl))
43a16006 2221 {
e9fa98b2 2222 if (PL_bufptr != PL_bufend)
cea2e8a9 2223 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2224 PL_lex_repl = Nullsv;
2225 }
79072805
LW
2226 /* FALLTHROUGH */
2227 case LEX_INTERPCONCAT:
2228#ifdef DEBUGGING
3280af22 2229 if (PL_lex_brackets)
cea2e8a9 2230 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2231#endif
3280af22 2232 if (PL_bufptr == PL_bufend)
79072805
LW
2233 return sublex_done();
2234
3280af22
NIS
2235 if (SvIVX(PL_linestr) == '\'') {
2236 SV *sv = newSVsv(PL_linestr);
2237 if (!PL_lex_inpat)
76e3520e 2238 sv = tokeq(sv);
3280af22 2239 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2240 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2241 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2242 s = PL_bufend;
79072805
LW
2243 }
2244 else {
3280af22 2245 s = scan_const(PL_bufptr);
79072805 2246 if (*s == '\\')
3280af22 2247 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2248 else
3280af22 2249 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2250 }
2251
3280af22
NIS
2252 if (s != PL_bufptr) {
2253 PL_nextval[PL_nexttoke] = yylval;
2254 PL_expect = XTERM;
79072805 2255 force_next(THING);
3280af22 2256 if (PL_lex_starts++)
79072805
LW
2257 Aop(OP_CONCAT);
2258 else {
3280af22 2259 PL_bufptr = s;
cea2e8a9 2260 return yylex();
79072805
LW
2261 }
2262 }
2263
cea2e8a9 2264 return yylex();
a0d0e21e 2265 case LEX_FORMLINE:
3280af22
NIS
2266 PL_lex_state = LEX_NORMAL;
2267 s = scan_formline(PL_bufptr);
2268 if (!PL_lex_formbrack)
a0d0e21e
LW
2269 goto rightbracket;
2270 OPERATOR(';');
79072805
LW
2271 }
2272
3280af22
NIS
2273 s = PL_bufptr;
2274 PL_oldoldbufptr = PL_oldbufptr;
2275 PL_oldbufptr = s;
79072805 2276 DEBUG_p( {
bf49b057
GS
2277 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2278 exp_name[PL_expect], s);
79072805 2279 } )
463ee0b2
LW
2280
2281 retry:
378cc40b
LW
2282 switch (*s) {
2283 default:
834a4ddd
LW
2284 if (isIDFIRST_lazy(s))
2285 goto keylookup;
cea2e8a9 2286 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2287 case 4:
2288 case 26:
2289 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2290 case 0:
3280af22
NIS
2291 if (!PL_rsfp) {
2292 PL_last_uni = 0;
2293 PL_last_lop = 0;
2294 if (PL_lex_brackets)
d98d5fff 2295 yyerror("Missing right curly or square bracket");
79072805 2296 TOKEN(0);
463ee0b2 2297 }
3280af22 2298 if (s++ < PL_bufend)
a687059c 2299 goto retry; /* ignore stray nulls */
3280af22
NIS
2300 PL_last_uni = 0;
2301 PL_last_lop = 0;
2302 if (!PL_in_eval && !PL_preambled) {
2303 PL_preambled = TRUE;
2304 sv_setpv(PL_linestr,incl_perldb());
2305 if (SvCUR(PL_linestr))
2306 sv_catpv(PL_linestr,";");
2307 if (PL_preambleav){
2308 while(AvFILLp(PL_preambleav) >= 0) {
2309 SV *tmpsv = av_shift(PL_preambleav);
2310 sv_catsv(PL_linestr, tmpsv);
2311 sv_catpv(PL_linestr, ";");
91b7def8 2312 sv_free(tmpsv);
2313 }
3280af22
NIS
2314 sv_free((SV*)PL_preambleav);
2315 PL_preambleav = NULL;
91b7def8 2316 }
3280af22
NIS
2317 if (PL_minus_n || PL_minus_p) {
2318 sv_catpv(PL_linestr, "LINE: while (<>) {");
2319 if (PL_minus_l)
2320 sv_catpv(PL_linestr,"chomp;");
2321 if (PL_minus_a) {
8fd239a7
CS
2322 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2323 if (gv)
2324 GvIMPORTED_AV_on(gv);
3280af22
NIS
2325 if (PL_minus_F) {
2326 if (strchr("/'\"", *PL_splitstr)
2327 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2328 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2329 else {
2330 char delim;
2331 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2332 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2333 delim = *s;
cea2e8a9 2334 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2335 "q" + (delim == '\''), delim);
3280af22 2336 for (s = PL_splitstr; *s; s++) {
54310121 2337 if (*s == '\\')
3280af22
NIS
2338 sv_catpvn(PL_linestr, "\\", 1);
2339 sv_catpvn(PL_linestr, s, 1);
54310121 2340 }
cea2e8a9 2341 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2342 }
2304df62
AD
2343 }
2344 else
3280af22 2345 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2346 }
79072805 2347 }
3280af22
NIS
2348 sv_catpv(PL_linestr, "\n");
2349 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2350 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2351 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2352 SV *sv = NEWSV(85,0);
2353
2354 sv_upgrade(sv, SVt_PVMG);
3280af22 2355 sv_setsv(sv,PL_linestr);
57843af0 2356 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2357 }
79072805 2358 goto retry;
a687059c 2359 }
e929a76b 2360 do {
3280af22 2361 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2362 fake_eof:
3280af22
NIS
2363 if (PL_rsfp) {
2364 if (PL_preprocess && !PL_in_eval)
2365 (void)PerlProc_pclose(PL_rsfp);
2366 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2367 PerlIO_clearerr(PL_rsfp);
395c3793 2368 else
3280af22
NIS
2369 (void)PerlIO_close(PL_rsfp);
2370 PL_rsfp = Nullfp;
4a9ae47a 2371 PL_doextract = FALSE;
395c3793 2372 }
3280af22
NIS
2373 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2374 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2375 sv_catpv(PL_linestr,";}");
2376 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2377 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2378 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2379 goto retry;
2380 }
3280af22
NIS
2381 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2382 sv_setpv(PL_linestr,"");
79072805 2383 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2384 }
3280af22 2385 if (PL_doextract) {
a0d0e21e 2386 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2387 PL_doextract = FALSE;
a0d0e21e
LW
2388
2389 /* Incest with pod. */
2390 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2391 sv_setpv(PL_linestr, "");
2392 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2393 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2394 PL_doextract = FALSE;
a0d0e21e
LW
2395 }
2396 }
463ee0b2 2397 incline(s);
3280af22
NIS
2398 } while (PL_doextract);
2399 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2400 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2401 SV *sv = NEWSV(85,0);
a687059c 2402
93a17b20 2403 sv_upgrade(sv, SVt_PVMG);
3280af22 2404 sv_setsv(sv,PL_linestr);
57843af0 2405 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2406 }
3280af22 2407 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2408 if (CopLINE(PL_curcop) == 1) {
3280af22 2409 while (s < PL_bufend && isSPACE(*s))
79072805 2410 s++;
a0d0e21e 2411 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2412 s++;
44a8e56a 2413 d = Nullch;
3280af22 2414 if (!PL_in_eval) {
44a8e56a 2415 if (*s == '#' && *(s+1) == '!')
2416 d = s + 2;
2417#ifdef ALTERNATE_SHEBANG
2418 else {
2419 static char as[] = ALTERNATE_SHEBANG;
2420 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2421 d = s + (sizeof(as) - 1);
2422 }
2423#endif /* ALTERNATE_SHEBANG */
2424 }
2425 if (d) {
b8378b72 2426 char *ipath;
774d564b 2427 char *ipathend;
b8378b72 2428
774d564b 2429 while (isSPACE(*d))
b8378b72
CS
2430 d++;
2431 ipath = d;
774d564b 2432 while (*d && !isSPACE(*d))
2433 d++;
2434 ipathend = d;
2435
2436#ifdef ARG_ZERO_IS_SCRIPT
2437 if (ipathend > ipath) {
2438 /*
2439 * HP-UX (at least) sets argv[0] to the script name,
2440 * which makes $^X incorrect. And Digital UNIX and Linux,
2441 * at least, set argv[0] to the basename of the Perl
2442 * interpreter. So, having found "#!", we'll set it right.
2443 */
2444 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2445 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2446 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2447 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2448 SvSETMAGIC(x);
2449 }
774d564b 2450 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2451 }
774d564b 2452#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2453
2454 /*
2455 * Look for options.
2456 */
748a9306 2457 d = instr(s,"perl -");
84e30d1a 2458 if (!d) {
748a9306 2459 d = instr(s,"perl");
84e30d1a
GS
2460#if defined(DOSISH)
2461 /* avoid getting into infinite loops when shebang
2462 * line contains "Perl" rather than "perl" */
2463 if (!d) {
2464 for (d = ipathend-4; d >= ipath; --d) {
2465 if ((*d == 'p' || *d == 'P')
2466 && !ibcmp(d, "perl", 4))
2467 {
2468 break;
2469 }
2470 }
2471 if (d < ipath)
2472 d = Nullch;
2473 }
2474#endif
2475 }
44a8e56a 2476#ifdef ALTERNATE_SHEBANG
2477 /*
2478 * If the ALTERNATE_SHEBANG on this system starts with a
2479 * character that can be part of a Perl expression, then if
2480 * we see it but not "perl", we're probably looking at the
2481 * start of Perl code, not a request to hand off to some
2482 * other interpreter. Similarly, if "perl" is there, but
2483 * not in the first 'word' of the line, we assume the line
2484 * contains the start of the Perl program.
44a8e56a 2485 */
2486 if (d && *s != '#') {
774d564b 2487 char *c = ipath;
44a8e56a 2488 while (*c && !strchr("; \t\r\n\f\v#", *c))
2489 c++;
2490 if (c < d)
2491 d = Nullch; /* "perl" not in first word; ignore */
2492 else
2493 *s = '#'; /* Don't try to parse shebang line */
2494 }
774d564b 2495#endif /* ALTERNATE_SHEBANG */
748a9306 2496 if (!d &&
44a8e56a 2497 *s == '#' &&
774d564b 2498 ipathend > ipath &&
3280af22 2499 !PL_minus_c &&
748a9306 2500 !instr(s,"indir") &&
3280af22 2501 instr(PL_origargv[0],"perl"))
748a9306 2502 {
9f68db38 2503 char **newargv;
9f68db38 2504
774d564b 2505 *ipathend = '\0';
2506 s = ipathend + 1;
3280af22 2507 while (s < PL_bufend && isSPACE(*s))
9f68db38 2508 s++;
3280af22
NIS
2509 if (s < PL_bufend) {
2510 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2511 newargv[1] = s;
3280af22 2512 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2513 s++;
2514 *s = '\0';
3280af22 2515 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2516 }
2517 else
3280af22 2518 newargv = PL_origargv;
774d564b 2519 newargv[0] = ipath;
80252599 2520 PerlProc_execv(ipath, newargv);
cea2e8a9 2521 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2522 }
748a9306 2523 if (d) {
3280af22
NIS
2524 U32 oldpdb = PL_perldb;
2525 bool oldn = PL_minus_n;
2526 bool oldp = PL_minus_p;
748a9306
LW
2527
2528 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2529 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2530
2531 if (*d++ == '-') {
8cc95fdb 2532 do {
2533 if (*d == 'M' || *d == 'm') {
2534 char *m = d;
2535 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2536 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2537 (int)(d - m), m);
2538 }
2539 d = moreswitches(d);
2540 } while (d);
84902520 2541 if (PERLDB_LINE && !oldpdb ||
3280af22 2542 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b 2543 /* if we have already added "LINE: while (<>) {",
2544 we must not do it again */
748a9306 2545 {
3280af22
NIS
2546 sv_setpv(PL_linestr, "");
2547 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2548 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2549 PL_preambled = FALSE;
84902520 2550 if (PERLDB_LINE)
3280af22 2551 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2552 goto retry;
2553 }
a0d0e21e 2554 }
79072805 2555 }
9f68db38 2556 }
79072805 2557 }
3280af22
NIS
2558 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2559 PL_bufptr = s;
2560 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2561 return yylex();
ae986130 2562 }
378cc40b 2563 goto retry;
4fdae800 2564 case '\r':
6a27c188 2565#ifdef PERL_STRICT_CR
cea2e8a9
GS
2566 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2567 Perl_croak(aTHX_
54310121 2568 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2569#endif
4fdae800 2570 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2571 s++;
2572 goto retry;
378cc40b 2573 case '#':
e929a76b 2574 case '\n':
3280af22
NIS
2575 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2576 d = PL_bufend;
a687059c 2577 while (s < d && *s != '\n')
378cc40b 2578 s++;
0f85fab0 2579 if (s < d)
378cc40b 2580 s++;
463ee0b2 2581 incline(s);
3280af22
NIS
2582 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2583 PL_bufptr = s;
2584 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2585 return yylex();
a687059c 2586 }
378cc40b 2587 }
a687059c 2588 else {
378cc40b 2589 *s = '\0';
3280af22 2590 PL_bufend = s;
a687059c 2591 }
378cc40b
LW
2592 goto retry;
2593 case '-':
79072805 2594 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2595 s++;
3280af22 2596 PL_bufptr = s;
748a9306
LW
2597 tmp = *s++;
2598
3280af22 2599 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2600 s++;
2601
2602 if (strnEQ(s,"=>",2)) {
3280af22 2603 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2604 OPERATOR('-'); /* unary minus */
2605 }
3280af22
NIS
2606 PL_last_uni = PL_oldbufptr;
2607 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2608 switch (tmp) {
79072805
LW
2609 case 'r': FTST(OP_FTEREAD);
2610 case 'w': FTST(OP_FTEWRITE);
2611 case 'x': FTST(OP_FTEEXEC);
2612 case 'o': FTST(OP_FTEOWNED);
2613 case 'R': FTST(OP_FTRREAD);
2614 case 'W': FTST(OP_FTRWRITE);
2615 case 'X': FTST(OP_FTREXEC);
2616 case 'O': FTST(OP_FTROWNED);
2617 case 'e': FTST(OP_FTIS);
2618 case 'z': FTST(OP_FTZERO);
2619 case 's': FTST(OP_FTSIZE);
2620 case 'f': FTST(OP_FTFILE);
2621 case 'd': FTST(OP_FTDIR);
2622 case 'l': FTST(OP_FTLINK);
2623 case 'p': FTST(OP_FTPIPE);
2624 case 'S': FTST(OP_FTSOCK);
2625 case 'u': FTST(OP_FTSUID);
2626 case 'g': FTST(OP_FTSGID);
2627 case 'k': FTST(OP_FTSVTX);
2628 case 'b': FTST(OP_FTBLK);
2629 case 'c': FTST(OP_FTCHR);
2630 case 't': FTST(OP_FTTTY);
2631 case 'T': FTST(OP_FTTEXT);
2632 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2633 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2634 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2635 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2636 default:
cea2e8a9 2637 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2638 break;
2639 }
2640 }
a687059c
LW
2641 tmp = *s++;
2642 if (*s == tmp) {
2643 s++;
3280af22 2644 if (PL_expect == XOPERATOR)
79072805
LW
2645 TERM(POSTDEC);
2646 else
2647 OPERATOR(PREDEC);
2648 }
2649 else if (*s == '>') {
2650 s++;
2651 s = skipspace(s);
834a4ddd 2652 if (isIDFIRST_lazy(s)) {
a0d0e21e 2653 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2654 TOKEN(ARROW);
79072805 2655 }
748a9306
LW
2656 else if (*s == '$')
2657 OPERATOR(ARROW);
463ee0b2 2658 else
748a9306 2659 TERM(ARROW);
a687059c 2660 }
3280af22 2661 if (PL_expect == XOPERATOR)
79072805
LW
2662 Aop(OP_SUBTRACT);
2663 else {
3280af22 2664 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2665 check_uni();
79072805 2666 OPERATOR('-'); /* unary minus */
2f3197b3 2667 }
79072805 2668
378cc40b 2669 case '+':
a687059c
LW
2670 tmp = *s++;
2671 if (*s == tmp) {
378cc40b 2672 s++;
3280af22 2673 if (PL_expect == XOPERATOR)
79072805
LW
2674 TERM(POSTINC);
2675 else
2676 OPERATOR(PREINC);
378cc40b 2677 }
3280af22 2678 if (PL_expect == XOPERATOR)
79072805
LW
2679 Aop(OP_ADD);
2680 else {
3280af22 2681 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2682 check_uni();
a687059c 2683 OPERATOR('+');
2f3197b3 2684 }
a687059c 2685
378cc40b 2686 case '*':
3280af22
NIS
2687 if (PL_expect != XOPERATOR) {
2688 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2689 PL_expect = XOPERATOR;
2690 force_ident(PL_tokenbuf, '*');
2691 if (!*PL_tokenbuf)
a0d0e21e 2692 PREREF('*');
79072805 2693 TERM('*');
a687059c 2694 }
79072805
LW
2695 s++;
2696 if (*s == '*') {
a687059c 2697 s++;
79072805 2698 PWop(OP_POW);
a687059c 2699 }
79072805
LW
2700 Mop(OP_MULTIPLY);
2701
378cc40b 2702 case '%':
3280af22 2703 if (PL_expect == XOPERATOR) {
bbce6d69 2704 ++s;
2705 Mop(OP_MODULO);
a687059c 2706 }
3280af22
NIS
2707 PL_tokenbuf[0] = '%';
2708 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2709 if (!PL_tokenbuf[1]) {
2710 if (s == PL_bufend)
bbce6d69 2711 yyerror("Final % should be \\% or %name");
2712 PREREF('%');
a687059c 2713 }
3280af22 2714 PL_pending_ident = '%';
bbce6d69 2715 TERM('%');
a687059c 2716
378cc40b 2717 case '^':
79072805 2718 s++;
a0d0e21e 2719 BOop(OP_BIT_XOR);
79072805 2720 case '[':
3280af22 2721 PL_lex_brackets++;
79072805 2722 /* FALL THROUGH */
378cc40b 2723 case '~':
378cc40b 2724 case ',':
378cc40b
LW
2725 tmp = *s++;
2726 OPERATOR(tmp);
a0d0e21e
LW
2727 case ':':
2728 if (s[1] == ':') {
2729 len = 0;
2730 goto just_a_word;
2731 }
2732 s++;
09bef843
SB
2733 switch (PL_expect) {
2734 OP *attrs;
2735 case XOPERATOR:
2736 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2737 break;
2738 PL_bufptr = s; /* update in case we back off */
2739 goto grabattrs;
2740 case XATTRBLOCK:
2741 PL_expect = XBLOCK;
2742 goto grabattrs;
2743 case XATTRTERM:
2744 PL_expect = XTERMBLOCK;
2745 grabattrs:
2746 s = skipspace(s);
2747 attrs = Nullop;
2748 while (isIDFIRST_lazy(s)) {
2749 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
2750 if (*d == '(') {
2751 d = scan_str(d,TRUE,TRUE);
2752 if (!d) {
2753 if (PL_lex_stuff) {
2754 SvREFCNT_dec(PL_lex_stuff);
2755 PL_lex_stuff = Nullsv;
2756 }
2757 /* MUST advance bufptr here to avoid bogus
2758 "at end of line" context messages from yyerror().
2759 */
2760 PL_bufptr = s + len;
2761 yyerror("Unterminated attribute parameter in attribute list");
2762 if (attrs)
2763 op_free(attrs);
2764 return 0; /* EOF indicator */
2765 }
2766 }
2767 if (PL_lex_stuff) {
2768 SV *sv = newSVpvn(s, len);
2769 sv_catsv(sv, PL_lex_stuff);
2770 attrs = append_elem(OP_LIST, attrs,
2771 newSVOP(OP_CONST, 0, sv));
2772 SvREFCNT_dec(PL_lex_stuff);
2773 PL_lex_stuff = Nullsv;
2774 }
2775 else {
2776 attrs = append_elem(OP_LIST, attrs,
2777 newSVOP(OP_CONST, 0,
2778 newSVpvn(s, len)));
2779 }
2780 s = skipspace(d);
2781 while (*s == ',')
2782 s = skipspace(s+1);
2783 }
2784 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}' for vi */
2785 if (*s != ';' && *s != tmp) {
2786 char q = ((*s == '\'') ? '"' : '\'');
2787 /* If here for an expression, and parsed no attrs, back off. */
2788 if (tmp == '=' && !attrs) {
2789 s = PL_bufptr;
2790 break;
2791 }
2792 /* MUST advance bufptr here to avoid bogus "at end of line"
2793 context messages from yyerror().
2794 */
2795 PL_bufptr = s;
2796 if (!*s)
2797 yyerror("Unterminated attribute list");
2798 else
2799 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2800 q, *s, q));
2801 if (attrs)
2802 op_free(attrs);
2803 OPERATOR(':');
2804 }
2805 if (attrs) {
2806 PL_nextval[PL_nexttoke].opval = attrs;
2807 force_next(THING);
2808 }
2809 TOKEN(COLONATTR);
2810 }
a0d0e21e 2811 OPERATOR(':');
8990e307
LW
2812 case '(':
2813 s++;
3280af22
NIS
2814 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2815 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2816 else
3280af22 2817 PL_expect = XTERM;
a0d0e21e 2818 TOKEN('(');
378cc40b 2819 case ';':
57843af0
GS
2820 if (CopLINE(PL_curcop) < PL_copline)
2821 PL_copline = CopLINE(PL_curcop);
378cc40b
LW
2822 tmp = *s++;
2823 OPERATOR(tmp);
2824 case ')':
378cc40b 2825 tmp = *s++;
16d20bd9
AD
2826 s = skipspace(s);
2827 if (*s == '{')
2828 PREBLOCK(tmp);
378cc40b 2829 TERM(tmp);
79072805
LW
2830 case ']':
2831 s++;
3280af22 2832 if (PL_lex_brackets <= 0)
d98d5fff 2833 yyerror("Unmatched right square bracket");
463ee0b2 2834 else
3280af22
NIS
2835 --PL_lex_brackets;
2836 if (PL_lex_state == LEX_INTERPNORMAL) {
2837 if (PL_lex_brackets == 0) {
a0d0e21e 2838 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2839 PL_lex_state = LEX_INTERPEND;
79072805
LW
2840 }
2841 }
4633a7c4 2842 TERM(']');
79072805
LW
2843 case '{':
2844 leftbracket:
79072805 2845 s++;
3280af22
NIS
2846 if (PL_lex_brackets > 100) {
2847 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2848 if (newlb != PL_lex_brackstack) {
8990e307 2849 SAVEFREEPV(newlb);
3280af22 2850 PL_lex_brackstack = newlb;
8990e307
LW
2851 }
2852 }
3280af22 2853 switch (PL_expect) {
a0d0e21e 2854 case XTERM:
3280af22 2855 if (PL_lex_formbrack) {
a0d0e21e
LW
2856 s--;
2857 PRETERMBLOCK(DO);
2858 }
3280af22
NIS
2859 if (PL_oldoldbufptr == PL_last_lop)
2860 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2861 else
3280af22 2862 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2863 OPERATOR(HASHBRACK);
a0d0e21e 2864 case XOPERATOR:
3280af22 2865 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2866 s++;
44a8e56a 2867 d = s;
3280af22
NIS
2868 PL_tokenbuf[0] = '\0';
2869 if (d < PL_bufend && *d == '-') {
2870 PL_tokenbuf[0] = '-';
44a8e56a 2871 d++;
3280af22 2872 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a 2873 d++;
2874 }
834a4ddd 2875 if (d < PL_bufend && isIDFIRST_lazy(d)) {
3280af22 2876 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2877 FALSE, &len);
3280af22 2878 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2879 d++;
2880 if (*d == '}') {
3280af22 2881 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 2882 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2883 if (minus)
2884 force_next('-');
748a9306
LW
2885 }
2886 }
2887 /* FALL THROUGH */
09bef843 2888 case XATTRBLOCK:
748a9306 2889 case XBLOCK:
3280af22
NIS
2890 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2891 PL_expect = XSTATE;
a0d0e21e 2892 break;
09bef843 2893 case XATTRTERM:
a0d0e21e 2894 case XTERMBLOCK:
3280af22
NIS
2895 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2896 PL_expect = XSTATE;
a0d0e21e
LW
2897 break;
2898 default: {
2899 char *t;
3280af22
NIS
2900 if (PL_oldoldbufptr == PL_last_lop)
2901 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2902 else
3280af22 2903 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2904 s = skipspace(s);
09ecc4b6 2905 if (*s == '}')
a0d0e21e 2906 OPERATOR(HASHBRACK);
b8a4b1be
GS
2907 /* This hack serves to disambiguate a pair of curlies
2908 * as being a block or an anon hash. Normally, expectation
2909 * determines that, but in cases where we're not in a
2910 * position to expect anything in particular (like inside
2911 * eval"") we have to resolve the ambiguity. This code
2912 * covers the case where the first term in the curlies is a
2913 * quoted string. Most other cases need to be explicitly
2914 * disambiguated by prepending a `+' before the opening
2915 * curly in order to force resolution as an anon hash.
2916 *
2917 * XXX should probably propagate the outer expectation
2918 * into eval"" to rely less on this hack, but that could
2919 * potentially break current behavior of eval"".
2920 * GSAR 97-07-21
2921 */
2922 t = s;
2923 if (*s == '\'' || *s == '"' || *s == '`') {
2924 /* common case: get past first string, handling escapes */
3280af22 2925 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2926 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2927 t++;
2928 t++;
a0d0e21e 2929 }
b8a4b1be 2930 else if (*s == 'q') {
3280af22 2931 if (++t < PL_bufend
b8a4b1be 2932 && (!isALNUM(*t)
3280af22 2933 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
2934 && !isALNUM(*t))))
2935 {
b8a4b1be
GS
2936 char *tmps;
2937 char open, close, term;
2938 I32 brackets = 1;
2939
3280af22 2940 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
2941 t++;
2942 term = *t;
2943 open = term;
2944 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
2945 term = tmps[5];
2946 close = term;
2947 if (open == close)
3280af22
NIS
2948 for (t++; t < PL_bufend; t++) {
2949 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 2950 t++;
6d07e5e9 2951 else if (*t == open)
b8a4b1be
GS
2952 break;
2953 }
2954 else
3280af22
NIS
2955 for (t++; t < PL_bufend; t++) {
2956 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 2957 t++;
6d07e5e9 2958 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
2959 break;
2960 else if (*t == open)
2961 brackets++;
2962 }
2963 }
2964 t++;
a0d0e21e 2965 }
0505442f
GS
2966 else if (isALNUM_lazy(t)) {
2967 t += UTF8SKIP(t);
2968 while (t < PL_bufend && isALNUM_lazy(t))
2969 t += UTF8SKIP(t);
a0d0e21e 2970 }
3280af22 2971 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 2972 t++;
b8a4b1be
GS
2973 /* if comma follows first term, call it an anon hash */
2974 /* XXX it could be a comma expression with loop modifiers */
3280af22 2975 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 2976 || (*t == '=' && t[1] == '>')))
a0d0e21e 2977 OPERATOR(HASHBRACK);
3280af22 2978 if (PL_expect == XREF)
4e4e412b 2979 PL_expect = XTERM;
a0d0e21e 2980 else {
3280af22
NIS
2981 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
2982 PL_expect = XSTATE;
a0d0e21e 2983 }
8990e307 2984 }
a0d0e21e 2985 break;
463ee0b2 2986 }
57843af0 2987 yylval.ival = CopLINE(PL_curcop);
79072805 2988 if (isSPACE(*s) || *s == '#')
3280af22 2989 PL_copline = NOLINE; /* invalidate current command line number */
79072805 2990 TOKEN('{');
378cc40b 2991 case '}':
79072805
LW
2992 rightbracket:
2993 s++;
3280af22 2994 if (PL_lex_brackets <= 0)
d98d5fff 2995 yyerror("Unmatched right curly bracket");
463ee0b2 2996 else
3280af22
NIS
2997 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
2998 if (PL_lex_brackets < PL_lex_formbrack)
2999 PL_lex_formbrack = 0;
3000 if (PL_lex_state == LEX_INTERPNORMAL) {
3001 if (PL_lex_brackets == 0) {
9059aa12
LW
3002 if (PL_expect & XFAKEBRACK) {
3003 PL_expect &= XENUMMASK;
3280af22
NIS
3004 PL_lex_state = LEX_INTERPEND;
3005 PL_bufptr = s;
cea2e8a9 3006 return yylex(); /* ignore fake brackets */
79072805 3007 }
fa83b5b6 3008 if (*s == '-' && s[1] == '>')
3280af22 3009 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3010 else if (*s != '[' && *s != '{')
3280af22 3011 PL_lex_state = LEX_INTERPEND;
79072805
LW
3012 }
3013 }
9059aa12
LW
3014 if (PL_expect & XFAKEBRACK) {
3015 PL_expect &= XENUMMASK;
3280af22 3016 PL_bufptr = s;
cea2e8a9 3017 return yylex(); /* ignore fake brackets */
748a9306 3018 }
79072805
LW
3019 force_next('}');
3020 TOKEN(';');
378cc40b
LW
3021 case '&':
3022 s++;
3023 tmp = *s++;
3024 if (tmp == '&')
a0d0e21e 3025 AOPERATOR(ANDAND);
378cc40b 3026 s--;
3280af22 3027 if (PL_expect == XOPERATOR) {
834a4ddd 3028 if (ckWARN(WARN_SEMICOLON) && isIDFIRST_lazy(s) && PL_bufptr == PL_linestart) {
57843af0 3029 CopLINE_dec(PL_curcop);
cea2e8a9 3030 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3031 CopLINE_inc(PL_curcop);
463ee0b2 3032 }
79072805 3033 BAop(OP_BIT_AND);
463ee0b2 3034 }
79072805 3035
3280af22
NIS
3036 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3037 if (*PL_tokenbuf) {
3038 PL_expect = XOPERATOR;
3039 force_ident(PL_tokenbuf, '&');
463ee0b2 3040 }
79072805
LW
3041 else
3042 PREREF('&');
c07a80fd 3043 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3044 TERM('&');
3045
378cc40b
LW
3046 case '|':
3047 s++;
3048 tmp = *s++;
3049 if (tmp == '|')
a0d0e21e 3050 AOPERATOR(OROR);
378cc40b 3051 s--;
79072805 3052 BOop(OP_BIT_OR);
378cc40b
LW
3053 case '=':
3054 s++;
3055 tmp = *s++;
3056 if (tmp == '=')
79072805
LW
3057 Eop(OP_EQ);
3058 if (tmp == '>')
3059 OPERATOR(',');
378cc40b 3060 if (tmp == '~')
79072805 3061 PMop(OP_MATCH);
599cee73 3062 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3063 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3064 s--;
3280af22
NIS
3065 if (PL_expect == XSTATE && isALPHA(tmp) &&
3066 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3067 {
3280af22
NIS
3068 if (PL_in_eval && !PL_rsfp) {
3069 d = PL_bufend;
a5f75d66
AD
3070 while (s < d) {
3071 if (*s++ == '\n') {
3072 incline(s);
3073 if (strnEQ(s,"=cut",4)) {
3074 s = strchr(s,'\n');
3075 if (s)
3076 s++;
3077 else
3078 s = d;
3079 incline(s);
3080 goto retry;
3081 }
3082 }
3083 }
3084 goto retry;
3085 }
3280af22
NIS
3086 s = PL_bufend;
3087 PL_doextract = TRUE;
a0d0e21e
LW
3088 goto retry;
3089 }
3280af22 3090 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3091 char *t;
51882d45 3092#ifdef PERL_STRICT_CR
a0d0e21e 3093 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
3094#else
3095 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
3096#endif
a0d0e21e
LW
3097 if (*t == '\n' || *t == '#') {
3098 s--;
3280af22 3099 PL_expect = XBLOCK;
a0d0e21e
LW
3100 goto leftbracket;
3101 }
79072805 3102 }
a0d0e21e
LW
3103 yylval.ival = 0;
3104 OPERATOR(ASSIGNOP);
378cc40b
LW
3105 case '!':
3106 s++;
3107 tmp = *s++;
3108 if (tmp == '=')
79072805 3109 Eop(OP_NE);
378cc40b 3110 if (tmp == '~')
79072805 3111 PMop(OP_NOT);
378cc40b
LW
3112 s--;
3113 OPERATOR('!');
3114 case '<':
3280af22 3115 if (PL_expect != XOPERATOR) {
93a17b20 3116 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3117 check_uni();
79072805
LW
3118 if (s[1] == '<')
3119 s = scan_heredoc(s);
3120 else
3121 s = scan_inputsymbol(s);
3122 TERM(sublex_start());
378cc40b
LW
3123 }
3124 s++;
3125 tmp = *s++;
3126 if (tmp == '<')
79072805 3127 SHop(OP_LEFT_SHIFT);
395c3793
LW
3128 if (tmp == '=') {
3129 tmp = *s++;
3130 if (tmp == '>')
79072805 3131 Eop(OP_NCMP);
395c3793 3132 s--;
79072805 3133 Rop(OP_LE);
395c3793 3134 }
378cc40b 3135 s--;
79072805 3136 Rop(OP_LT);
378cc40b
LW
3137 case '>':
3138 s++;
3139 tmp = *s++;
3140 if (tmp == '>')
79072805 3141 SHop(OP_RIGHT_SHIFT);
378cc40b 3142 if (tmp == '=')
79072805 3143 Rop(OP_GE);
378cc40b 3144 s--;
79072805 3145 Rop(OP_GT);
378cc40b
LW
3146
3147 case '$':
bbce6d69 3148 CLINE;
3149
3280af22
NIS
3150 if (PL_expect == XOPERATOR) {
3151 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3152 PL_expect = XTERM;
a0d0e21e 3153 depcom();
bbce6d69 3154 return ','; /* grandfather non-comma-format format */
a0d0e21e 3155 }
8990e307 3156 }
a0d0e21e 3157
834a4ddd 3158 if (s[1] == '#' && (isIDFIRST_lazy(s+2) || strchr("{$:+-", s[2]))) {
3280af22 3159 PL_tokenbuf[0] = '@';
376b8730
SM
3160 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3161 sizeof PL_tokenbuf - 1, FALSE);
3162 if (PL_expect == XOPERATOR)
3163 no_op("Array length", s);
3280af22 3164 if (!PL_tokenbuf[1])
a0d0e21e 3165 PREREF(DOLSHARP);
3280af22
NIS
3166 PL_expect = XOPERATOR;
3167 PL_pending_ident = '#';
463ee0b2 3168 TOKEN(DOLSHARP);
79072805 3169 }
bbce6d69 3170
3280af22 3171 PL_tokenbuf[0] = '$';
376b8730
SM
3172 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3173 sizeof PL_tokenbuf - 1, FALSE);
3174 if (PL_expect == XOPERATOR)
3175 no_op("Scalar", s);
3280af22
NIS
3176 if (!PL_tokenbuf[1]) {
3177 if (s == PL_bufend)
bbce6d69 3178 yyerror("Final $ should be \\$ or $name");
3179 PREREF('$');
8990e307 3180 }
a0d0e21e 3181
bbce6d69 3182 /* This kludge not intended to be bulletproof. */
3280af22 3183 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3184 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3185 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69 3186 yylval.opval->op_private = OPpCONST_ARYBASE;
3187 TERM(THING);
3188 }
3189
ff68c719 3190 d = s;
69d2bceb 3191 tmp = (I32)*s;
3280af22 3192 if (PL_lex_state == LEX_NORMAL)
ff68c719 3193 s = skipspace(s);
3194
3280af22 3195 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3196 char *t;
3197 if (*s == '[') {
3280af22 3198 PL_tokenbuf[0] = '@';
599cee73 3199 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3200 for(t = s + 1;
834a4ddd 3201 isSPACE(*t) || isALNUM_lazy(t) || *t == '$';
bbce6d69 3202 t++) ;
a0d0e21e 3203 if (*t++ == ',') {
3280af22
NIS
3204 PL_bufptr = skipspace(PL_bufptr);
3205 while (t < PL_bufend && *t != ']')
bbce6d69 3206 t++;
cea2e8a9 3207 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3208 "Multidimensional syntax %.*s not supported",
3209 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3210 }
3211 }
bbce6d69 3212 }
3213 else if (*s == '{') {
3280af22 3214 PL_tokenbuf[0] = '%';
599cee73 3215 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3216 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3217 {
3280af22 3218 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3219 STRLEN len;
3220 for (t++; isSPACE(*t); t++) ;
834a4ddd 3221 if (isIDFIRST_lazy(t)) {
8903cb82 3222 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3223 for (; isSPACE(*t); t++) ;
864dbfa3 3224 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3225 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3226 "You need to quote \"%s\"", tmpbuf);
748a9306 3227 }
93a17b20
LW
3228 }
3229 }
2f3197b3 3230 }
bbce6d69 3231
3280af22 3232 PL_expect = XOPERATOR;
69d2bceb 3233 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3234 bool islop = (PL_last_lop == PL_oldoldbufptr);
3235 if (!islop || PL_last_lop_op == OP_GREPSTART)
3236 PL_expect = XOPERATOR;
bbce6d69 3237 else if (strchr("$@\"'`q", *s))
3280af22 3238 PL_expect = XTERM; /* e.g. print $fh "foo" */
834a4ddd 3239 else if (strchr("&*<%", *s) && isIDFIRST_lazy(s+1))
3280af22 3240 PL_expect = XTERM; /* e.g. print $fh &sub */
834a4ddd 3241 else if (isIDFIRST_lazy(s)) {
3280af22 3242 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3243 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3244 if (tmp = keyword(tmpbuf, len)) {
3245 /* binary operators exclude handle interpretations */
3246 switch (tmp) {
3247 case -KEY_x:
3248 case -KEY_eq:
3249 case -KEY_ne:
3250 case -KEY_gt:
3251 case -KEY_lt:
3252 case -KEY_ge:
3253 case -KEY_le:
3254 case -KEY_cmp:
3255 break;
3256 default:
3280af22 3257 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3258 break;
3259 }
3260 }
68dc0745 3261 else {
3262 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3263 if (gv && GvCVu(gv))
3280af22 3264 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3265 }
93a17b20 3266 }
bbce6d69 3267 else if (isDIGIT(*s))
3280af22 3268 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3269 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3270 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3271 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3272 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3273 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3274 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3275 }
3280af22 3276 PL_pending_ident = '$';
79072805 3277 TOKEN('$');
378cc40b
LW
3278
3279 case '@':
3280af22 3280 if (PL_expect == XOPERATOR)
bbce6d69 3281 no_op("Array", s);
3280af22
NIS
3282 PL_tokenbuf[0] = '@';
3283 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3284 if (!PL_tokenbuf[1]) {
3285 if (s == PL_bufend)
bbce6d69 3286 yyerror("Final @ should be \\@ or @name");
3287 PREREF('@');
3288 }
3280af22 3289 if (PL_lex_state == LEX_NORMAL)
ff68c719 3290 s = skipspace(s);
3280af22 3291 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3292 if (*s == '{')
3280af22 3293 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3294
3295 /* Warn about @ where they meant $. */
599cee73 3296 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3297 if (*s == '[' || *s == '{') {
3298 char *t = s + 1;
834a4ddd 3299 while (*t && (isALNUM_lazy(t) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3300 t++;
3301 if (*t == '}' || *t == ']') {
3302 t++;
3280af22 3303 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3304 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3305 "Scalar value %.*s better written as $%.*s",
3280af22 3306 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3307 }
93a17b20
LW
3308 }
3309 }
463ee0b2 3310 }
3280af22 3311 PL_pending_ident = '@';
79072805 3312 TERM('@');
378cc40b
LW
3313
3314 case '/': /* may either be division or pattern */
3315 case '?': /* may either be conditional or pattern */
3280af22 3316 if (PL_expect != XOPERATOR) {
c277df42 3317 /* Disable warning on "study /blah/" */
3280af22
NIS
3318 if (PL_oldoldbufptr == PL_last_uni
3319 && (*PL_last_uni != 's' || s - PL_last_uni < 5
834a4ddd 3320 || memNE(PL_last_uni, "study", 5) || isALNUM_lazy(PL_last_uni+5)))
c277df42 3321 check_uni();
8782bef2 3322 s = scan_pat(s,OP_MATCH);
79072805 3323 TERM(sublex_start());
378cc40b
LW
3324 }
3325 tmp = *s++;
a687059c 3326 if (tmp == '/')
79072805 3327 Mop(OP_DIVIDE);
378cc40b
LW
3328 OPERATOR(tmp);
3329
3330 case '.':
51882d45
GS
3331 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3332#ifdef PERL_STRICT_CR
3333 && s[1] == '\n'
3334#else
3335 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3336#endif
3337 && (s == PL_linestart || s[-1] == '\n') )
3338 {
3280af22
NIS
3339 PL_lex_formbrack = 0;
3340 PL_expect = XSTATE;
79072805
LW
3341 goto rightbracket;
3342 }
3280af22 3343 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3344 tmp = *s++;
a687059c
LW
3345 if (*s == tmp) {
3346 s++;
2f3197b3
LW
3347 if (*s == tmp) {
3348 s++;
79072805 3349 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3350 }
3351 else
79072805 3352 yylval.ival = 0;
378cc40b 3353 OPERATOR(DOTDOT);
a687059c 3354 }
3280af22 3355 if (PL_expect != XOPERATOR)
2f3197b3 3356 check_uni();
79072805 3357 Aop(OP_CONCAT);
378cc40b
LW
3358 }
3359 /* FALL THROUGH */
3360 case '0': case '1': case '2': case '3': case '4':
3361 case '5': case '6': case '7': case '8': case '9':
79072805 3362 s = scan_num(s);
3280af22 3363 if (PL_expect == XOPERATOR)
8990e307 3364 no_op("Number",s);
79072805
LW
3365 TERM(THING);
3366
3367 case '\'':
09bef843 3368 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3369 if (PL_expect == XOPERATOR) {
3370 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3371 PL_expect = XTERM;
a0d0e21e
LW
3372 depcom();
3373 return ','; /* grandfather non-comma-format format */
3374 }
463ee0b2 3375 else
8990e307 3376 no_op("String",s);
463ee0b2 3377 }
79072805 3378 if (!s)
85e6fe83 3379 missingterm((char*)0);
79072805
LW
3380 yylval.ival = OP_CONST;
3381 TERM(sublex_start());
3382
3383 case '"':
09bef843 3384 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3385 if (PL_expect == XOPERATOR) {
3386 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3387 PL_expect = XTERM;
a0d0e21e
LW
3388 depcom();
3389 return ','; /* grandfather non-comma-format format */
3390 }
463ee0b2 3391 else
8990e307 3392 no_op("String",s);
463ee0b2 3393 }
79072805 3394 if (!s)
85e6fe83 3395 missingterm((char*)0);
4633a7c4 3396 yylval.ival = OP_CONST;
3280af22 3397 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3398 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3399 yylval.ival = OP_STRINGIFY;
3400 break;
3401 }
3402 }
79072805
LW
3403 TERM(sublex_start());
3404
3405 case '`':
09bef843 3406 s = scan_str(s,FALSE,FALSE);
3280af22 3407 if (PL_expect == XOPERATOR)
8990e307 3408 no_op("Backticks",s);
79072805 3409 if (!s)
85e6fe83 3410 missingterm((char*)0);
79072805
LW
3411 yylval.ival = OP_BACKTICK;
3412 set_csh();
3413 TERM(sublex_start());
3414
3415 case '\\':
3416 s++;
599cee73 3417 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3418 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3419 *s, *s);
3280af22 3420 if (PL_expect == XOPERATOR)
8990e307 3421 no_op("Backslash",s);
79072805
LW
3422 OPERATOR(REFGEN);
3423
a7cb1f99
GS
3424 case 'v':
3425 if (isDIGIT(s[1]) && PL_expect == XTERM) {
3426 char *start = s;
3427 start++;
3428 start++;
3429 while (isDIGIT(*start))
3430 start++;
3431 if (*start == '.' && isDIGIT(start[1])) {
3432 s = scan_num(s);
3433 TERM(THING);
3434 }
3435 }
3436 goto keylookup;
79072805 3437 case 'x':
3280af22 3438 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3439 s++;
3440 Mop(OP_REPEAT);
2f3197b3 3441 }
79072805
LW
3442 goto keylookup;
3443
378cc40b 3444 case '_':
79072805
LW
3445 case 'a': case 'A':
3446 case 'b': case 'B':
3447 case 'c': case 'C':
3448 case 'd': case 'D':
3449 case 'e': case 'E':
3450 case 'f': case 'F':
3451 case 'g': case 'G':
3452 case 'h': case 'H':
3453 case 'i': case 'I':
3454 case 'j': case 'J':
3455 case 'k': case 'K':
3456 case 'l': case 'L':
3457 case 'm': case 'M':
3458 case 'n': case 'N':
3459 case 'o': case 'O':
3460 case 'p': case 'P':
3461 case 'q': case 'Q':
3462 case 'r': case 'R':
3463 case 's': case 'S':
3464 case 't': case 'T':
3465 case 'u': case 'U':
a7cb1f99 3466 case 'V':
79072805
LW
3467 case 'w': case 'W':
3468 case 'X':
3469 case 'y': case 'Y':
3470 case 'z': case 'Z':
3471
49dc05e3 3472 keylookup: {
2d8e6c8d 3473 STRLEN n_a;
161b471a
NIS
3474 gv = Nullgv;
3475 gvp = 0;
49dc05e3 3476
3280af22
NIS
3477 PL_bufptr = s;
3478 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3479
3480 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3481 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3482 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3483 (PL_tokenbuf[0] == 'q' &&
3484 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01 3485
3486 /* x::* is just a word, unless x is "CORE" */
3280af22 3487 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3488 goto just_a_word;
3489
3643fb5f 3490 d = s;
3280af22 3491 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3492 d++; /* no comments skipped here, or s### is misparsed */
3493
3494 /* Is this a label? */
3280af22
NIS
3495 if (!tmp && PL_expect == XSTATE
3496 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3497 s = d + 1;
3280af22 3498 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3499 CLINE;
3500 TOKEN(LABEL);
3643fb5f
CS
3501 }
3502
3503 /* Check for keywords */
3280af22 3504 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3505
3506 /* Is this a word before a => operator? */
748a9306
LW
3507 if (strnEQ(d,"=>",2)) {
3508 CLINE;
3280af22 3509 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3510 yylval.opval->op_private = OPpCONST_BARE;
3511 TERM(WORD);
3512 }
3513
a0d0e21e 3514 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3515 GV *ogv = Nullgv; /* override (winner) */
3516 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3517 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3518 CV *cv;
3280af22 3519 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3520 (cv = GvCVu(gv)))
3521 {
3522 if (GvIMPORTED_CV(gv))
3523 ogv = gv;
3524 else if (! CvMETHOD(cv))
3525 hgv = gv;
3526 }
3527 if (!ogv &&
3280af22
NIS
3528 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3529 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3530 GvCVu(gv) && GvIMPORTED_CV(gv))
3531 {
3532 ogv = gv;
3533 }
3534 }
3535 if (ogv) {
3536 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3537 }
3538 else if (gv && !gvp
3539 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3540 && GvCVu(gv)
3280af22 3541 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3542 {
3543 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3544 }
56f7f34b
CS
3545 else { /* no override */
3546 tmp = -tmp;
3547 gv = Nullgv;
3548 gvp = 0;
4944e2f7
GS
3549 if (ckWARN(WARN_AMBIGUOUS) && hgv
3550 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3551 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3552 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3553 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3554 }
a0d0e21e
LW
3555 }
3556
3557 reserved_word:
3558 switch (tmp) {
79072805
LW
3559
3560 default: /* not a keyword */
93a17b20 3561 just_a_word: {
96e4d5b1 3562 SV *sv;
3280af22 3563 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3564
3565 /* Get the rest if it looks like a package qualifier */
3566
a0d0e21e 3567 if (*s == '\'' || *s == ':' && s[1] == ':') {
c3e0f903 3568 STRLEN morelen;
3280af22 3569 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3570 TRUE, &morelen);
3571 if (!morelen)
cea2e8a9 3572 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3573 *s == '\'' ? "'" : "::");
c3e0f903 3574 len += morelen;
a0d0e21e 3575 }
8990e307 3576
3280af22
NIS
3577 if (PL_expect == XOPERATOR) {
3578 if (PL_bufptr == PL_linestart) {
57843af0 3579 CopLINE_dec(PL_curcop);
cea2e8a9 3580 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3581 CopLINE_inc(PL_curcop);
463ee0b2
LW
3582 }
3583 else
54310121 3584 no_op("Bareword",s);
463ee0b2 3585 }
8990e307 3586
c3e0f903
GS
3587 /* Look for a subroutine with this name in current package,
3588 unless name is "Foo::", in which case Foo is a bearword
3589 (and a package name). */
3590
3591 if (len > 2 &&
3280af22 3592 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3593 {
599cee73 3594 if (ckWARN(WARN_UNSAFE) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
cea2e8a9 3595 Perl_warner(aTHX_ WARN_UNSAFE,
599cee73 3596 "Bareword \"%s\" refers to nonexistent package",
3280af22 3597 PL_tokenbuf);
c3e0f903