This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
avoid spurious "Useless use of variable" warning on C<our $foo;>
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4eb8286e 3 * Copyright (c) 1991-1999, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
51371543 31
9059aa12
LW
32#define XFAKEBRACK 128
33#define XENUMMASK 127
34
a0ed51b3 35#define UTF (PL_hints & HINT_UTF8)
834a4ddd
LW
36/*
37 * Note: we try to be careful never to call the isXXX_utf8() functions
38 * unless we're pretty sure we've seen the beginning of a UTF-8 character
39 * (that is, the two high bits are set). Otherwise we risk loading in the
40 * heavy-duty SWASHINIT and SWASHGET routines unnecessarily.
41 */
42#define isIDFIRST_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
43 ? isIDFIRST(*(p)) \
44 : isIDFIRST_utf8((U8*)p))
45#define isALNUM_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
46 ? isALNUM(*(p)) \
47 : isALNUM_utf8((U8*)p))
a0ed51b3 48
2b92dfce
GS
49/* In variables name $^X, these are the legal values for X.
50 * 1999-02-27 mjd-perl-patch@plover.com */
51#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a 58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff 60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
395c3793
LW
72#ifdef I_FCNTL
73#include <fcntl.h>
74#endif
fe14fcc3
LW
75#ifdef I_SYS_FILE
76#include <sys/file.h>
77#endif
395c3793 78
a790bc05 79/* XXX If this causes problems, set i_unistd=undef in the hint file. */
80#ifdef I_UNISTD
81# include <unistd.h> /* Needed for execv() */
82#endif
83
84
79072805
LW
85#ifdef ff_next
86#undef ff_next
d48672a2
LW
87#endif
88
a1a0e61e
TD
89#ifdef USE_PURE_BISON
90YYSTYPE* yylval_pointer = NULL;
91int* yychar_pointer = NULL;
22c35a8c
GS
92# undef yylval
93# undef yychar
e4bfbdd4
JH
94# define yylval (*yylval_pointer)
95# define yychar (*yychar_pointer)
96# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
97# undef yylex
98# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
99#endif
100
79072805 101#include "keywords.h"
fe14fcc3 102
ffb4593c
NT
103/* CLINE is a macro that ensures PL_copline has a sane value */
104
ae986130
LW
105#ifdef CLINE
106#undef CLINE
107#endif
57843af0 108#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 109
ffb4593c
NT
110/*
111 * Convenience functions to return different tokens and prime the
9cbb5ea2 112 * lexer for the next token. They all take an argument.
ffb4593c
NT
113 *
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
2d2e263d 124 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
125 * BOop : bitwise or or xor
126 * BAop : bitwise and
127 * SHop : shift operator
128 * PWop : power operator
9cbb5ea2 129 * PMop : pattern-matching operator
ffb4593c
NT
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
133 * Rop : relational operator <= != gt
134 *
135 * Also see LOP and lop() below.
136 */
137
3280af22
NIS
138#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
139#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
140#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
141#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
142#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
143#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
144#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
145#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
146#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
147#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
148#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
149#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
150#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
151#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
152#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
153#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
154#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
155#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
156#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
157#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 158
a687059c
LW
159/* This bit of chicanery makes a unary function followed by
160 * a parenthesis into a function with one argument, highest precedence.
161 */
2f3197b3 162#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
163 PL_expect = XTERM, \
164 PL_bufptr = s, \
165 PL_last_uni = PL_oldbufptr, \
166 PL_last_lop_op = f, \
a687059c
LW
167 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
168
79072805 169#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
79072805
LW
172 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
173
9f68db38 174/* grandfather return to old style */
3280af22 175#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 176
ffb4593c
NT
177/*
178 * S_ao
179 *
180 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
181 * into an OP_ANDASSIGN or OP_ORASSIGN
182 */
183
76e3520e 184STATIC int
cea2e8a9 185S_ao(pTHX_ int toketype)
a0d0e21e 186{
3280af22
NIS
187 if (*PL_bufptr == '=') {
188 PL_bufptr++;
a0d0e21e
LW
189 if (toketype == ANDAND)
190 yylval.ival = OP_ANDASSIGN;
191 else if (toketype == OROR)
192 yylval.ival = OP_ORASSIGN;
193 toketype = ASSIGNOP;
194 }
195 return toketype;
196}
197
ffb4593c
NT
198/*
199 * S_no_op
200 * When Perl expects an operator and finds something else, no_op
201 * prints the warning. It always prints "<something> found where
202 * operator expected. It prints "Missing semicolon on previous line?"
203 * if the surprise occurs at the start of the line. "do you need to
204 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
205 * where the compiler doesn't know if foo is a method call or a function.
206 * It prints "Missing operator before end of line" if there's nothing
207 * after the missing operator, or "... before <...>" if there is something
208 * after the missing operator.
209 */
210
76e3520e 211STATIC void
cea2e8a9 212S_no_op(pTHX_ char *what, char *s)
463ee0b2 213{
3280af22
NIS
214 char *oldbp = PL_bufptr;
215 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 216
1189a94a
GS
217 if (!s)
218 s = oldbp;
219 else {
220 assert(s >= oldbp);
221 PL_bufptr = s;
222 }
cea2e8a9 223 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 224 if (is_first)
cea2e8a9 225 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
834a4ddd 226 else if (PL_oldoldbufptr && isIDFIRST_lazy(PL_oldoldbufptr)) {
748a9306 227 char *t;
834a4ddd 228 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy(t) || *t == ':'); t++) ;
3280af22 229 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 230 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 231 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
232 }
233 else
cea2e8a9 234 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 235 PL_bufptr = oldbp;
8990e307
LW
236}
237
ffb4593c
NT
238/*
239 * S_missingterm
240 * Complain about missing quote/regexp/heredoc terminator.
241 * If it's called with (char *)NULL then it cauterizes the line buffer.
242 * If we're in a delimited string and the delimiter is a control
243 * character, it's reformatted into a two-char sequence like ^C.
244 * This is fatal.
245 */
246
76e3520e 247STATIC void
cea2e8a9 248S_missingterm(pTHX_ char *s)
8990e307
LW
249{
250 char tmpbuf[3];
251 char q;
252 if (s) {
253 char *nl = strrchr(s,'\n');
d2719217 254 if (nl)
8990e307
LW
255 *nl = '\0';
256 }
9d116dd7
JH
257 else if (
258#ifdef EBCDIC
259 iscntrl(PL_multi_close)
260#else
261 PL_multi_close < 32 || PL_multi_close == 127
262#endif
263 ) {
8990e307 264 *tmpbuf = '^';
3280af22 265 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
266 s = "\\n";
267 tmpbuf[2] = '\0';
268 s = tmpbuf;
269 }
270 else {
3280af22 271 *tmpbuf = PL_multi_close;
8990e307
LW
272 tmpbuf[1] = '\0';
273 s = tmpbuf;
274 }
275 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 276 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 277}
79072805 278
ffb4593c
NT
279/*
280 * Perl_deprecate
ffb4593c
NT
281 */
282
79072805 283void
864dbfa3 284Perl_deprecate(pTHX_ char *s)
a0d0e21e 285{
d008e5eb 286 dTHR;
599cee73 287 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 288 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
289}
290
ffb4593c
NT
291/*
292 * depcom
9cbb5ea2 293 * Deprecate a comma-less variable list.
ffb4593c
NT
294 */
295
76e3520e 296STATIC void
cea2e8a9 297S_depcom(pTHX)
a0d0e21e
LW
298{
299 deprecate("comma-less variable list");
300}
301
ffb4593c 302/*
9cbb5ea2
GS
303 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
304 * utf16-to-utf8-reversed.
ffb4593c
NT
305 */
306
c39cd008
GS
307#ifdef PERL_CR_FILTER
308static void
309strip_return(SV *sv)
310{
311 register char *s = SvPVX(sv);
312 register char *e = s + SvCUR(sv);
313 /* outer loop optimized to do nothing if there are no CR-LFs */
314 while (s < e) {
315 if (*s++ == '\r' && *s == '\n') {
316 /* hit a CR-LF, need to copy the rest */
317 register char *d = s - 1;
318 *d++ = *s++;
319 while (s < e) {
320 if (*s == '\r' && s[1] == '\n')
321 s++;
322 *d++ = *s++;
323 }
324 SvCUR(sv) -= s - d;
325 return;
326 }
327 }
328}
a868473f 329
76e3520e 330STATIC I32
c39cd008 331S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 332{
c39cd008
GS
333 I32 count = FILTER_READ(idx+1, sv, maxlen);
334 if (count > 0 && !maxlen)
335 strip_return(sv);
336 return count;
a868473f
NIS
337}
338#endif
339
a0ed51b3 340STATIC I32
cea2e8a9 341S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
342{
343 I32 count = FILTER_READ(idx+1, sv, maxlen);
344 if (count) {
dfe13c55
GS
345 U8* tmps;
346 U8* tend;
347 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 348 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 349 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
350
351 }
352 return count;
353}
354
355STATIC I32
cea2e8a9 356S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
357{
358 I32 count = FILTER_READ(idx+1, sv, maxlen);
359 if (count) {
dfe13c55
GS
360 U8* tmps;
361 U8* tend;
362 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 363 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 364 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
365
366 }
367 return count;
368}
a868473f 369
ffb4593c
NT
370/*
371 * Perl_lex_start
9cbb5ea2
GS
372 * Initialize variables. Uses the Perl save_stack to save its state (for
373 * recursive calls to the parser).
ffb4593c
NT
374 */
375
a0d0e21e 376void
864dbfa3 377Perl_lex_start(pTHX_ SV *line)
79072805 378{
0f15f207 379 dTHR;
8990e307
LW
380 char *s;
381 STRLEN len;
382
3280af22
NIS
383 SAVEI32(PL_lex_dojoin);
384 SAVEI32(PL_lex_brackets);
3280af22
NIS
385 SAVEI32(PL_lex_casemods);
386 SAVEI32(PL_lex_starts);
387 SAVEI32(PL_lex_state);
7766f137 388 SAVEVPTR(PL_lex_inpat);
3280af22 389 SAVEI32(PL_lex_inwhat);
57843af0 390 SAVECOPLINE(PL_curcop);
3280af22
NIS
391 SAVEPPTR(PL_bufptr);
392 SAVEPPTR(PL_bufend);
393 SAVEPPTR(PL_oldbufptr);
394 SAVEPPTR(PL_oldoldbufptr);
395 SAVEPPTR(PL_linestart);
396 SAVESPTR(PL_linestr);
397 SAVEPPTR(PL_lex_brackstack);
398 SAVEPPTR(PL_lex_casestack);
c76ac1ee 399 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
400 SAVESPTR(PL_lex_stuff);
401 SAVEI32(PL_lex_defer);
09bef843 402 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 403 SAVESPTR(PL_lex_repl);
bebdddfc
GS
404 SAVEINT(PL_expect);
405 SAVEINT(PL_lex_expect);
3280af22
NIS
406
407 PL_lex_state = LEX_NORMAL;
408 PL_lex_defer = 0;
409 PL_expect = XSTATE;
410 PL_lex_brackets = 0;
3280af22
NIS
411 New(899, PL_lex_brackstack, 120, char);
412 New(899, PL_lex_casestack, 12, char);
413 SAVEFREEPV(PL_lex_brackstack);
414 SAVEFREEPV(PL_lex_casestack);
415 PL_lex_casemods = 0;
416 *PL_lex_casestack = '\0';
417 PL_lex_dojoin = 0;
418 PL_lex_starts = 0;
419 PL_lex_stuff = Nullsv;
420 PL_lex_repl = Nullsv;
421 PL_lex_inpat = 0;
422 PL_lex_inwhat = 0;
09bef843 423 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
424 PL_linestr = line;
425 if (SvREADONLY(PL_linestr))
426 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
427 s = SvPV(PL_linestr, len);
8990e307 428 if (len && s[len-1] != ';') {
3280af22
NIS
429 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
430 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
431 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 432 }
3280af22
NIS
433 SvTEMP_off(PL_linestr);
434 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
435 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
436 SvREFCNT_dec(PL_rs);
79cb57f6 437 PL_rs = newSVpvn("\n", 1);
3280af22 438 PL_rsfp = 0;
79072805 439}
a687059c 440
ffb4593c
NT
441/*
442 * Perl_lex_end
9cbb5ea2
GS
443 * Finalizer for lexing operations. Must be called when the parser is
444 * done with the lexer.
ffb4593c
NT
445 */
446
463ee0b2 447void
864dbfa3 448Perl_lex_end(pTHX)
463ee0b2 449{
3280af22 450 PL_doextract = FALSE;
463ee0b2
LW
451}
452
ffb4593c
NT
453/*
454 * S_incline
455 * This subroutine has nothing to do with tilting, whether at windmills
456 * or pinball tables. Its name is short for "increment line". It
57843af0 457 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 458 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
459 * # line 500 "foo.pm"
460 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
461 */
462
76e3520e 463STATIC void
cea2e8a9 464S_incline(pTHX_ char *s)
463ee0b2 465{
0f15f207 466 dTHR;
463ee0b2
LW
467 char *t;
468 char *n;
469 char ch;
470 int sawline = 0;
471
57843af0 472 CopLINE_inc(PL_curcop);
463ee0b2
LW
473 if (*s++ != '#')
474 return;
475 while (*s == ' ' || *s == '\t') s++;
476 if (strnEQ(s, "line ", 5)) {
477 s += 5;
478 sawline = 1;
479 }
480 if (!isDIGIT(*s))
481 return;
482 n = s;
483 while (isDIGIT(*s))
484 s++;
485 while (*s == ' ' || *s == '\t')
486 s++;
487 if (*s == '"' && (t = strchr(s+1, '"')))
488 s++;
489 else {
490 if (!sawline)
491 return; /* false alarm */
492 for (t = s; !isSPACE(*t); t++) ;
493 }
494 ch = *t;
495 *t = '\0';
496 if (t - s > 0)
57843af0 497 CopFILE_set(PL_curcop, s);
463ee0b2 498 else
57843af0 499 CopFILE_set(PL_curcop, PL_origfilename);
463ee0b2 500 *t = ch;
57843af0 501 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
502}
503
ffb4593c
NT
504/*
505 * S_skipspace
506 * Called to gobble the appropriate amount and type of whitespace.
507 * Skips comments as well.
508 */
509
76e3520e 510STATIC char *
cea2e8a9 511S_skipspace(pTHX_ register char *s)
a687059c 512{
11343788 513 dTHR;
3280af22
NIS
514 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
515 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
516 s++;
517 return s;
518 }
519 for (;;) {
fd049845 520 STRLEN prevlen;
09bef843
SB
521 SSize_t oldprevlen, oldoldprevlen;
522 SSize_t oldloplen, oldunilen;
60e6418e
GS
523 while (s < PL_bufend && isSPACE(*s)) {
524 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
525 incline(s);
526 }
ffb4593c
NT
527
528 /* comment */
3280af22
NIS
529 if (s < PL_bufend && *s == '#') {
530 while (s < PL_bufend && *s != '\n')
463ee0b2 531 s++;
60e6418e 532 if (s < PL_bufend) {
463ee0b2 533 s++;
60e6418e
GS
534 if (PL_in_eval && !PL_rsfp) {
535 incline(s);
536 continue;
537 }
538 }
463ee0b2 539 }
ffb4593c
NT
540
541 /* only continue to recharge the buffer if we're at the end
542 * of the buffer, we're not reading from a source filter, and
543 * we're in normal lexing mode
544 */
09bef843
SB
545 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
546 PL_lex_state == LEX_FORMLINE)
463ee0b2 547 return s;
ffb4593c
NT
548
549 /* try to recharge the buffer */
9cbb5ea2
GS
550 if ((s = filter_gets(PL_linestr, PL_rsfp,
551 (prevlen = SvCUR(PL_linestr)))) == Nullch)
552 {
553 /* end of file. Add on the -p or -n magic */
3280af22
NIS
554 if (PL_minus_n || PL_minus_p) {
555 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
556 ";}continue{print or die qq(-p destination: $!\\n)" :
557 "");
3280af22
NIS
558 sv_catpv(PL_linestr,";}");
559 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
560 }
561 else
3280af22 562 sv_setpv(PL_linestr,";");
ffb4593c
NT
563
564 /* reset variables for next time we lex */
9cbb5ea2
GS
565 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
566 = SvPVX(PL_linestr);
3280af22 567 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
568
569 /* Close the filehandle. Could be from -P preprocessor,
570 * STDIN, or a regular file. If we were reading code from
571 * STDIN (because the commandline held no -e or filename)
572 * then we don't close it, we reset it so the code can
573 * read from STDIN too.
574 */
575
3280af22
NIS
576 if (PL_preprocess && !PL_in_eval)
577 (void)PerlProc_pclose(PL_rsfp);
578 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
579 PerlIO_clearerr(PL_rsfp);
8990e307 580 else
3280af22
NIS
581 (void)PerlIO_close(PL_rsfp);
582 PL_rsfp = Nullfp;
463ee0b2
LW
583 return s;
584 }
ffb4593c
NT
585
586 /* not at end of file, so we only read another line */
09bef843
SB
587 /* make corresponding updates to old pointers, for yyerror() */
588 oldprevlen = PL_oldbufptr - PL_bufend;
589 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
590 if (PL_last_uni)
591 oldunilen = PL_last_uni - PL_bufend;
592 if (PL_last_lop)
593 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
594 PL_linestart = PL_bufptr = s + prevlen;
595 PL_bufend = s + SvCUR(PL_linestr);
596 s = PL_bufptr;
09bef843
SB
597 PL_oldbufptr = s + oldprevlen;
598 PL_oldoldbufptr = s + oldoldprevlen;
599 if (PL_last_uni)
600 PL_last_uni = s + oldunilen;
601 if (PL_last_lop)
602 PL_last_lop = s + oldloplen;
a0d0e21e 603 incline(s);
ffb4593c
NT
604
605 /* debugger active and we're not compiling the debugger code,
606 * so store the line into the debugger's array of lines
607 */
3280af22 608 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
609 SV *sv = NEWSV(85,0);
610
611 sv_upgrade(sv, SVt_PVMG);
3280af22 612 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 613 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 614 }
463ee0b2 615 }
a687059c 616}
378cc40b 617
ffb4593c
NT
618/*
619 * S_check_uni
620 * Check the unary operators to ensure there's no ambiguity in how they're
621 * used. An ambiguous piece of code would be:
622 * rand + 5
623 * This doesn't mean rand() + 5. Because rand() is a unary operator,
624 * the +5 is its argument.
625 */
626
76e3520e 627STATIC void
cea2e8a9 628S_check_uni(pTHX)
ba106d47 629{
2f3197b3 630 char *s;
a0d0e21e 631 char *t;
0453d815 632 dTHR;
2f3197b3 633
3280af22 634 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 635 return;
3280af22
NIS
636 while (isSPACE(*PL_last_uni))
637 PL_last_uni++;
834a4ddd 638 for (s = PL_last_uni; isALNUM_lazy(s) || *s == '-'; s++) ;
3280af22 639 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 640 return;
0453d815 641 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 642 char ch = *s;
0453d815
PM
643 *s = '\0';
644 Perl_warner(aTHX_ WARN_AMBIGUOUS,
645 "Warning: Use of \"%s\" without parens is ambiguous",
646 PL_last_uni);
647 *s = ch;
648 }
2f3197b3
LW
649}
650
ffb4593c
NT
651/* workaround to replace the UNI() macro with a function. Only the
652 * hints/uts.sh file mentions this. Other comments elsewhere in the
653 * source indicate Microport Unix might need it too.
654 */
655
ffed7fef
LW
656#ifdef CRIPPLED_CC
657
658#undef UNI
ffed7fef 659#define UNI(f) return uni(f,s)
ffed7fef 660
76e3520e 661STATIC int
cea2e8a9 662S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
663{
664 yylval.ival = f;
3280af22
NIS
665 PL_expect = XTERM;
666 PL_bufptr = s;
8f872242
NIS
667 PL_last_uni = PL_oldbufptr;
668 PL_last_lop_op = f;
ffed7fef
LW
669 if (*s == '(')
670 return FUNC1;
671 s = skipspace(s);
672 if (*s == '(')
673 return FUNC1;
674 else
675 return UNIOP;
676}
677
a0d0e21e
LW
678#endif /* CRIPPLED_CC */
679
ffb4593c
NT
680/*
681 * LOP : macro to build a list operator. Its behaviour has been replaced
682 * with a subroutine, S_lop() for which LOP is just another name.
683 */
684
a0d0e21e
LW
685#define LOP(f,x) return lop(f,x,s)
686
ffb4593c
NT
687/*
688 * S_lop
689 * Build a list operator (or something that might be one). The rules:
690 * - if we have a next token, then it's a list operator [why?]
691 * - if the next thing is an opening paren, then it's a function
692 * - else it's a list operator
693 */
694
76e3520e 695STATIC I32
a0be28da 696S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 697{
0f15f207 698 dTHR;
79072805 699 yylval.ival = f;
35c8bce7 700 CLINE;
3280af22
NIS
701 PL_expect = x;
702 PL_bufptr = s;
703 PL_last_lop = PL_oldbufptr;
704 PL_last_lop_op = f;
705 if (PL_nexttoke)
a0d0e21e 706 return LSTOP;
79072805
LW
707 if (*s == '(')
708 return FUNC;
709 s = skipspace(s);
710 if (*s == '(')
711 return FUNC;
712 else
713 return LSTOP;
714}
715
ffb4593c
NT
716/*
717 * S_force_next
9cbb5ea2 718 * When the lexer realizes it knows the next token (for instance,
ffb4593c 719 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
720 * to know what token to return the next time the lexer is called. Caller
721 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
722 * handles the token correctly.
ffb4593c
NT
723 */
724
76e3520e 725STATIC void
cea2e8a9 726S_force_next(pTHX_ I32 type)
79072805 727{
3280af22
NIS
728 PL_nexttype[PL_nexttoke] = type;
729 PL_nexttoke++;
730 if (PL_lex_state != LEX_KNOWNEXT) {
731 PL_lex_defer = PL_lex_state;
732 PL_lex_expect = PL_expect;
733 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
734 }
735}
736
ffb4593c
NT
737/*
738 * S_force_word
739 * When the lexer knows the next thing is a word (for instance, it has
740 * just seen -> and it knows that the next char is a word char, then
741 * it calls S_force_word to stick the next word into the PL_next lookahead.
742 *
743 * Arguments:
b1b65b59 744 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
745 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
746 * int check_keyword : if true, Perl checks to make sure the word isn't
747 * a keyword (do this if the word is a label, e.g. goto FOO)
748 * int allow_pack : if true, : characters will also be allowed (require,
749 * use, etc. do this)
9cbb5ea2 750 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
751 */
752
76e3520e 753STATIC char *
cea2e8a9 754S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 755{
463ee0b2
LW
756 register char *s;
757 STRLEN len;
758
759 start = skipspace(start);
760 s = start;
834a4ddd 761 if (isIDFIRST_lazy(s) ||
a0d0e21e 762 (allow_pack && *s == ':') ||
15f0808c 763 (allow_initial_tick && *s == '\'') )
a0d0e21e 764 {
3280af22
NIS
765 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
766 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
767 return start;
768 if (token == METHOD) {
769 s = skipspace(s);
770 if (*s == '(')
3280af22 771 PL_expect = XTERM;
463ee0b2 772 else {
3280af22 773 PL_expect = XOPERATOR;
463ee0b2 774 }
79072805 775 }
3280af22
NIS
776 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
777 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
778 force_next(token);
779 }
780 return s;
781}
782
ffb4593c
NT
783/*
784 * S_force_ident
9cbb5ea2 785 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
786 * text only contains the "foo" portion. The first argument is a pointer
787 * to the "foo", and the second argument is the type symbol to prefix.
788 * Forces the next token to be a "WORD".
9cbb5ea2 789 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
790 */
791
76e3520e 792STATIC void
cea2e8a9 793S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
794{
795 if (s && *s) {
11343788 796 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 797 PL_nextval[PL_nexttoke].opval = o;
79072805 798 force_next(WORD);
748a9306 799 if (kind) {
e858de61 800 dTHR; /* just for in_eval */
11343788 801 o->op_private = OPpCONST_ENTERED;
55497cff 802 /* XXX see note in pp_entereval() for why we forgo typo
803 warnings if the symbol must be introduced in an eval.
804 GSAR 96-10-12 */
3280af22 805 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
806 kind == '$' ? SVt_PV :
807 kind == '@' ? SVt_PVAV :
808 kind == '%' ? SVt_PVHV :
809 SVt_PVGV
810 );
748a9306 811 }
79072805
LW
812 }
813}
814
ffb4593c
NT
815/*
816 * S_force_version
817 * Forces the next token to be a version number.
818 */
819
76e3520e 820STATIC char *
cea2e8a9 821S_force_version(pTHX_ char *s)
89bfa8cd 822{
823 OP *version = Nullop;
824
825 s = skipspace(s);
826
a7cb1f99
GS
827 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
828 char *d = s;
829 if (*d == 'v')
830 d++;
831 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
832 if ((*d == ';' || isSPACE(*d)) && *(skipspace(d)) != ',') {
89bfa8cd 833 s = scan_num(s);
834 /* real VERSION number -- GBARR */
835 version = yylval.opval;
836 }
837 }
838
839 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 840 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd 841 force_next(WORD);
842
843 return (s);
844}
845
ffb4593c
NT
846/*
847 * S_tokeq
848 * Tokenize a quoted string passed in as an SV. It finds the next
849 * chunk, up to end of string or a backslash. It may make a new
850 * SV containing that chunk (if HINT_NEW_STRING is on). It also
851 * turns \\ into \.
852 */
853
76e3520e 854STATIC SV *
cea2e8a9 855S_tokeq(pTHX_ SV *sv)
79072805
LW
856{
857 register char *s;
858 register char *send;
859 register char *d;
b3ac6de7
IZ
860 STRLEN len = 0;
861 SV *pv = sv;
79072805
LW
862
863 if (!SvLEN(sv))
b3ac6de7 864 goto finish;
79072805 865
a0d0e21e 866 s = SvPV_force(sv, len);
748a9306 867 if (SvIVX(sv) == -1)
b3ac6de7 868 goto finish;
463ee0b2 869 send = s + len;
79072805
LW
870 while (s < send && *s != '\\')
871 s++;
872 if (s == send)
b3ac6de7 873 goto finish;
79072805 874 d = s;
3280af22 875 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 876 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
877 while (s < send) {
878 if (*s == '\\') {
a0d0e21e 879 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
880 s++; /* all that, just for this */
881 }
882 *d++ = *s++;
883 }
884 *d = '\0';
463ee0b2 885 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 886 finish:
3280af22 887 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 888 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
889 return sv;
890}
891
ffb4593c
NT
892/*
893 * Now come three functions related to double-quote context,
894 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
895 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
896 * interact with PL_lex_state, and create fake ( ... ) argument lists
897 * to handle functions and concatenation.
898 * They assume that whoever calls them will be setting up a fake
899 * join call, because each subthing puts a ',' after it. This lets
900 * "lower \luPpEr"
901 * become
902 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
903 *
904 * (I'm not sure whether the spurious commas at the end of lcfirst's
905 * arguments and join's arguments are created or not).
906 */
907
908/*
909 * S_sublex_start
910 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
911 *
912 * Pattern matching will set PL_lex_op to the pattern-matching op to
913 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
914 *
915 * OP_CONST and OP_READLINE are easy--just make the new op and return.
916 *
917 * Everything else becomes a FUNC.
918 *
919 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
920 * had an OP_CONST or OP_READLINE). This just sets us up for a
921 * call to S_sublex_push().
922 */
923
76e3520e 924STATIC I32
cea2e8a9 925S_sublex_start(pTHX)
79072805
LW
926{
927 register I32 op_type = yylval.ival;
79072805
LW
928
929 if (op_type == OP_NULL) {
3280af22
NIS
930 yylval.opval = PL_lex_op;
931 PL_lex_op = Nullop;
79072805
LW
932 return THING;
933 }
934 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 935 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
936
937 if (SvTYPE(sv) == SVt_PVIV) {
938 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
939 STRLEN len;
940 char *p;
941 SV *nsv;
942
943 p = SvPV(sv, len);
79cb57f6 944 nsv = newSVpvn(p, len);
b3ac6de7
IZ
945 SvREFCNT_dec(sv);
946 sv = nsv;
947 }
948 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 949 PL_lex_stuff = Nullsv;
79072805
LW
950 return THING;
951 }
952
3280af22
NIS
953 PL_sublex_info.super_state = PL_lex_state;
954 PL_sublex_info.sub_inwhat = op_type;
955 PL_sublex_info.sub_op = PL_lex_op;
956 PL_lex_state = LEX_INTERPPUSH;
55497cff 957
3280af22
NIS
958 PL_expect = XTERM;
959 if (PL_lex_op) {
960 yylval.opval = PL_lex_op;
961 PL_lex_op = Nullop;
55497cff 962 return PMFUNC;
963 }
964 else
965 return FUNC;
966}
967
ffb4593c
NT
968/*
969 * S_sublex_push
970 * Create a new scope to save the lexing state. The scope will be
971 * ended in S_sublex_done. Returns a '(', starting the function arguments
972 * to the uc, lc, etc. found before.
973 * Sets PL_lex_state to LEX_INTERPCONCAT.
974 */
975
76e3520e 976STATIC I32
cea2e8a9 977S_sublex_push(pTHX)
55497cff 978{
0f15f207 979 dTHR;
f46d017c 980 ENTER;
55497cff 981
3280af22
NIS
982 PL_lex_state = PL_sublex_info.super_state;
983 SAVEI32(PL_lex_dojoin);
984 SAVEI32(PL_lex_brackets);
3280af22
NIS
985 SAVEI32(PL_lex_casemods);
986 SAVEI32(PL_lex_starts);
987 SAVEI32(PL_lex_state);
7766f137 988 SAVEVPTR(PL_lex_inpat);
3280af22 989 SAVEI32(PL_lex_inwhat);
57843af0 990 SAVECOPLINE(PL_curcop);
3280af22
NIS
991 SAVEPPTR(PL_bufptr);
992 SAVEPPTR(PL_oldbufptr);
993 SAVEPPTR(PL_oldoldbufptr);
994 SAVEPPTR(PL_linestart);
995 SAVESPTR(PL_linestr);
996 SAVEPPTR(PL_lex_brackstack);
997 SAVEPPTR(PL_lex_casestack);
998
999 PL_linestr = PL_lex_stuff;
1000 PL_lex_stuff = Nullsv;
1001
9cbb5ea2
GS
1002 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1003 = SvPVX(PL_linestr);
3280af22
NIS
1004 PL_bufend += SvCUR(PL_linestr);
1005 SAVEFREESV(PL_linestr);
1006
1007 PL_lex_dojoin = FALSE;
1008 PL_lex_brackets = 0;
3280af22
NIS
1009 New(899, PL_lex_brackstack, 120, char);
1010 New(899, PL_lex_casestack, 12, char);
1011 SAVEFREEPV(PL_lex_brackstack);
1012 SAVEFREEPV(PL_lex_casestack);
1013 PL_lex_casemods = 0;
1014 *PL_lex_casestack = '\0';
1015 PL_lex_starts = 0;
1016 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1017 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1018
1019 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1020 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1021 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1022 else
3280af22 1023 PL_lex_inpat = Nullop;
79072805 1024
55497cff 1025 return '(';
79072805
LW
1026}
1027
ffb4593c
NT
1028/*
1029 * S_sublex_done
1030 * Restores lexer state after a S_sublex_push.
1031 */
1032
76e3520e 1033STATIC I32
cea2e8a9 1034S_sublex_done(pTHX)
79072805 1035{
3280af22
NIS
1036 if (!PL_lex_starts++) {
1037 PL_expect = XOPERATOR;
79cb57f6 1038 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1039 return THING;
1040 }
1041
3280af22
NIS
1042 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1043 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1044 return yylex();
79072805
LW
1045 }
1046
ffb4593c 1047 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1048 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1049 PL_linestr = PL_lex_repl;
1050 PL_lex_inpat = 0;
1051 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1052 PL_bufend += SvCUR(PL_linestr);
1053 SAVEFREESV(PL_linestr);
1054 PL_lex_dojoin = FALSE;
1055 PL_lex_brackets = 0;
3280af22
NIS
1056 PL_lex_casemods = 0;
1057 *PL_lex_casestack = '\0';
1058 PL_lex_starts = 0;
25da4f38 1059 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1060 PL_lex_state = LEX_INTERPNORMAL;
1061 PL_lex_starts++;
e9fa98b2
HS
1062 /* we don't clear PL_lex_repl here, so that we can check later
1063 whether this is an evalled subst; that means we rely on the
1064 logic to ensure sublex_done() is called again only via the
1065 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1066 }
e9fa98b2 1067 else {
3280af22 1068 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1069 PL_lex_repl = Nullsv;
1070 }
79072805 1071 return ',';
ffed7fef
LW
1072 }
1073 else {
f46d017c 1074 LEAVE;
3280af22
NIS
1075 PL_bufend = SvPVX(PL_linestr);
1076 PL_bufend += SvCUR(PL_linestr);
1077 PL_expect = XOPERATOR;
09bef843 1078 PL_sublex_info.sub_inwhat = 0;
79072805 1079 return ')';
ffed7fef
LW
1080 }
1081}
1082
02aa26ce
NT
1083/*
1084 scan_const
1085
1086 Extracts a pattern, double-quoted string, or transliteration. This
1087 is terrifying code.
1088
3280af22
NIS
1089 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1090 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1091 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1092
9b599b2a
GS
1093 Returns a pointer to the character scanned up to. Iff this is
1094 advanced from the start pointer supplied (ie if anything was
1095 successfully parsed), will leave an OP for the substring scanned
1096 in yylval. Caller must intuit reason for not parsing further
1097 by looking at the next characters herself.
1098
02aa26ce
NT
1099 In patterns:
1100 backslashes:
1101 double-quoted style: \r and \n
1102 regexp special ones: \D \s
1103 constants: \x3
1104 backrefs: \1 (deprecated in substitution replacements)
1105 case and quoting: \U \Q \E
1106 stops on @ and $, but not for $ as tail anchor
1107
1108 In transliterations:
1109 characters are VERY literal, except for - not at the start or end
1110 of the string, which indicates a range. scan_const expands the
1111 range to the full set of intermediate characters.
1112
1113 In double-quoted strings:
1114 backslashes:
1115 double-quoted style: \r and \n
1116 constants: \x3
1117 backrefs: \1 (deprecated)
1118 case and quoting: \U \Q \E
1119 stops on @ and $
1120
1121 scan_const does *not* construct ops to handle interpolated strings.
1122 It stops processing as soon as it finds an embedded $ or @ variable
1123 and leaves it to the caller to work out what's going on.
1124
1125 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1126
1127 $ in pattern could be $foo or could be tail anchor. Assumption:
1128 it's a tail anchor if $ is the last thing in the string, or if it's
1129 followed by one of ")| \n\t"
1130
1131 \1 (backreferences) are turned into $1
1132
1133 The structure of the code is
1134 while (there's a character to process) {
1135 handle transliteration ranges
1136 skip regexp comments
1137 skip # initiated comments in //x patterns
1138 check for embedded @foo
1139 check for embedded scalars
1140 if (backslash) {
1141 leave intact backslashes from leave (below)
1142 deprecate \1 in strings and sub replacements
1143 handle string-changing backslashes \l \U \Q \E, etc.
1144 switch (what was escaped) {
1145 handle - in a transliteration (becomes a literal -)
1146 handle \132 octal characters
1147 handle 0x15 hex characters
1148 handle \cV (control V)
1149 handle printf backslashes (\f, \r, \n, etc)
1150 } (end switch)
1151 } (end if backslash)
1152 } (end while character to read)
1153
1154*/
1155
76e3520e 1156STATIC char *
cea2e8a9 1157S_scan_const(pTHX_ char *start)
79072805 1158{
3280af22 1159 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1160 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1161 register char *s = start; /* start of the constant */
1162 register char *d = SvPVX(sv); /* destination for copies */
1163 bool dorange = FALSE; /* are we in a translit range? */
1164 I32 len; /* ? */
ac2262e3 1165 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1166 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1167 : UTF;
ac2262e3 1168 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1169 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1170 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1171 : UTF;
dff6d3cd 1172 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1173 PL_lex_inpat
4a2d328f 1174 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1175 : "";
79072805
LW
1176
1177 while (s < send || dorange) {
02aa26ce 1178 /* get transliterations out of the way (they're most literal) */
3280af22 1179 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1180 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1181 if (dorange) {
02aa26ce 1182 I32 i; /* current expanded character */
8ada0baa 1183 I32 min; /* first character in range */
02aa26ce
NT
1184 I32 max; /* last character in range */
1185
1186 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1187 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1188 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1189 d -= 2; /* eat the first char and the - */
1190
8ada0baa
JH
1191 min = (U8)*d; /* first char in range */
1192 max = (U8)d[1]; /* last char in range */
1193
1194#ifndef ASCIIish
1195 if ((isLOWER(min) && isLOWER(max)) ||
1196 (isUPPER(min) && isUPPER(max))) {
1197 if (isLOWER(min)) {
1198 for (i = min; i <= max; i++)
1199 if (isLOWER(i))
1200 *d++ = i;
1201 } else {
1202 for (i = min; i <= max; i++)
1203 if (isUPPER(i))
1204 *d++ = i;
1205 }
1206 }
1207 else
1208#endif
1209 for (i = min; i <= max; i++)
1210 *d++ = i;
02aa26ce
NT
1211
1212 /* mark the range as done, and continue */
79072805
LW
1213 dorange = FALSE;
1214 continue;
1215 }
02aa26ce
NT
1216
1217 /* range begins (ignore - as first or last char) */
79072805 1218 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1219 if (utf) {
a176fa2a 1220 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1221 s++;
1222 continue;
1223 }
79072805
LW
1224 dorange = TRUE;
1225 s++;
1226 }
1227 }
02aa26ce
NT
1228
1229 /* if we get here, we're not doing a transliteration */
1230
0f5d15d6
IZ
1231 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1232 except for the last char, which will be done separately. */
3280af22 1233 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1234 if (s[2] == '#') {
1235 while (s < send && *s != ')')
1236 *d++ = *s++;
0f5d15d6
IZ
1237 } else if (s[2] == '{'
1238 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1239 I32 count = 1;
0f5d15d6 1240 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1241 char c;
1242
d9f97599
GS
1243 while (count && (c = *regparse)) {
1244 if (c == '\\' && regparse[1])
1245 regparse++;
cc6b7395
IZ
1246 else if (c == '{')
1247 count++;
1248 else if (c == '}')
1249 count--;
d9f97599 1250 regparse++;
cc6b7395 1251 }
5bdf89e7
IZ
1252 if (*regparse != ')') {
1253 regparse--; /* Leave one char for continuation. */
cc6b7395 1254 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1255 }
0f5d15d6 1256 while (s < regparse)
cc6b7395
IZ
1257 *d++ = *s++;
1258 }
748a9306 1259 }
02aa26ce
NT
1260
1261 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1262 else if (*s == '#' && PL_lex_inpat &&
1263 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1264 while (s+1 < send && *s != '\n')
1265 *d++ = *s++;
1266 }
02aa26ce
NT
1267
1268 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
834a4ddd 1269 else if (*s == '@' && s[1] && (isALNUM_lazy(s+1) || strchr(":'{$", s[1])))
79072805 1270 break;
02aa26ce
NT
1271
1272 /* check for embedded scalars. only stop if we're sure it's a
1273 variable.
1274 */
79072805 1275 else if (*s == '$') {
3280af22 1276 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1277 break;
c277df42 1278 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1279 break; /* in regexp, $ might be tail anchor */
1280 }
02aa26ce 1281
a0ed51b3
LW
1282 /* (now in tr/// code again) */
1283
d008e5eb
GS
1284 if (*s & 0x80 && thisutf) {
1285 dTHR; /* only for ckWARN */
1286 if (ckWARN(WARN_UTF8)) {
dfe13c55 1287 (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
d008e5eb
GS
1288 if (len) {
1289 while (len--)
1290 *d++ = *s++;
1291 continue;
1292 }
a0ed51b3
LW
1293 }
1294 }
1295
02aa26ce 1296 /* backslashes */
79072805
LW
1297 if (*s == '\\' && s+1 < send) {
1298 s++;
02aa26ce
NT
1299
1300 /* some backslashes we leave behind */
c9f97d15 1301 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1302 *d++ = '\\';
1303 *d++ = *s++;
1304 continue;
1305 }
02aa26ce
NT
1306
1307 /* deprecate \1 in strings and substitution replacements */
3280af22 1308 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1309 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1310 {
d008e5eb 1311 dTHR; /* only for ckWARN */
599cee73 1312 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1313 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1314 *--s = '$';
1315 break;
1316 }
02aa26ce
NT
1317
1318 /* string-change backslash escapes */
3280af22 1319 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1320 --s;
1321 break;
1322 }
02aa26ce
NT
1323
1324 /* if we get here, it's either a quoted -, or a digit */
79072805 1325 switch (*s) {
02aa26ce
NT
1326
1327 /* quoted - in transliterations */
79072805 1328 case '-':
3280af22 1329 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1330 *d++ = *s++;
1331 continue;
1332 }
1333 /* FALL THROUGH */
1334 default:
11b8faa4
JH
1335 {
1336 dTHR;
1337 if (ckWARN(WARN_UNSAFE) && isALPHA(*s))
cea2e8a9 1338 Perl_warner(aTHX_ WARN_UNSAFE,
11b8faa4
JH
1339 "Unrecognized escape \\%c passed through",
1340 *s);
1341 /* default action is to copy the quoted character */
1342 *d++ = *s++;
1343 continue;
1344 }
02aa26ce
NT
1345
1346 /* \132 indicates an octal constant */
79072805
LW
1347 case '0': case '1': case '2': case '3':
1348 case '4': case '5': case '6': case '7':
dff6d3cd 1349 *d++ = (char)scan_oct(s, 3, &len);
79072805
LW
1350 s += len;
1351 continue;
02aa26ce
NT
1352
1353 /* \x24 indicates a hex constant */
79072805 1354 case 'x':
a0ed51b3
LW
1355 ++s;
1356 if (*s == '{') {
1357 char* e = strchr(s, '}');
1358
adaeee49 1359 if (!e) {
a0ed51b3 1360 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1361 e = s;
1362 }
d008e5eb
GS
1363 if (!utf) {
1364 dTHR;
1365 if (ckWARN(WARN_UTF8))
cea2e8a9 1366 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1367 "Use of \\x{} without utf8 declaration");
1368 }
a0ed51b3 1369 /* note: utf always shorter than hex */
dfe13c55 1370 d = (char*)uv_to_utf8((U8*)d,
dff6d3cd 1371 (UV)scan_hex(s + 1, e - s - 1, &len));
a0ed51b3 1372 s = e + 1;
a0ed51b3
LW
1373 }
1374 else {
1375 UV uv = (UV)scan_hex(s, 2, &len);
1376 if (utf && PL_lex_inwhat == OP_TRANS &&
1377 utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1378 {
dfe13c55 1379 d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
a0ed51b3
LW
1380 }
1381 else {
d008e5eb
GS
1382 if (uv >= 127 && UTF) {
1383 dTHR;
1384 if (ckWARN(WARN_UTF8))
cea2e8a9 1385 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1386 "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
1387 len,s,len,s);
1388 }
a0ed51b3
LW
1389 *d++ = (char)uv;
1390 }
1391 s += len;
1392 }
79072805 1393 continue;
02aa26ce 1394
4a2d328f
IZ
1395 /* \N{latin small letter a} is a named character */
1396 case 'N':
423cee85
JH
1397 ++s;
1398 if (*s == '{') {
1399 char* e = strchr(s, '}');
1400 HV *hv;
1401 SV **svp;
1402 SV *res, *cv;
1403 STRLEN len;
1404 char *str;
1405 char *why = Nullch;
1406
1407 if (!e) {
5777a3f7 1408 yyerror("Missing right brace on \\N{}");
423cee85
JH
1409 e = s - 1;
1410 goto cont_scan;
1411 }
1412 res = newSVpvn(s + 1, e - s - 1);
1413 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1414 res, Nullsv, "\\N{...}" );
423cee85
JH
1415 str = SvPV(res,len);
1416 if (len > e - s + 4) {
1417 char *odest = SvPVX(sv);
1418
1419 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1420 d = SvPVX(sv) + (d - odest);
1421 }
1422 Copy(str, d, len, char);
1423 d += len;
1424 SvREFCNT_dec(res);
1425 cont_scan:
1426 s = e + 1;
1427 }
1428 else
5777a3f7 1429 yyerror("Missing braces on \\N{}");
423cee85
JH
1430 continue;
1431
02aa26ce 1432 /* \c is a control character */
79072805
LW
1433 case 'c':
1434 s++;
9d116dd7
JH
1435#ifdef EBCDIC
1436 *d = *s++;
1437 if (isLOWER(*d))
1438 *d = toUPPER(*d);
1439 *d++ = toCTRL(*d);
1440#else
bbce6d69 1441 len = *s++;
1442 *d++ = toCTRL(len);
9d116dd7 1443#endif
79072805 1444 continue;
02aa26ce
NT
1445
1446 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1447 case 'b':
1448 *d++ = '\b';
1449 break;
1450 case 'n':
1451 *d++ = '\n';
1452 break;
1453 case 'r':
1454 *d++ = '\r';
1455 break;
1456 case 'f':
1457 *d++ = '\f';
1458 break;
1459 case 't':
1460 *d++ = '\t';
1461 break;
34a3fe2a
PP
1462#ifdef EBCDIC
1463 case 'e':
1464 *d++ = '\047'; /* CP 1047 */
1465 break;
1466 case 'a':
1467 *d++ = '\057'; /* CP 1047 */
1468 break;
1469#else
79072805
LW
1470 case 'e':
1471 *d++ = '\033';
1472 break;
1473 case 'a':
1474 *d++ = '\007';
1475 break;
34a3fe2a 1476#endif
02aa26ce
NT
1477 } /* end switch */
1478
79072805
LW
1479 s++;
1480 continue;
02aa26ce
NT
1481 } /* end if (backslash) */
1482
79072805 1483 *d++ = *s++;
02aa26ce
NT
1484 } /* while loop to process each character */
1485
1486 /* terminate the string and set up the sv */
79072805 1487 *d = '\0';
463ee0b2 1488 SvCUR_set(sv, d - SvPVX(sv));
79072805
LW
1489 SvPOK_on(sv);
1490
02aa26ce 1491 /* shrink the sv if we allocated more than we used */
79072805
LW
1492 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1493 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1494 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1495 }
02aa26ce 1496
9b599b2a 1497 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1498 if (s > PL_bufptr) {
1499 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1500 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1501 sv, Nullsv,
3280af22 1502 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1503 ? "tr"
3280af22 1504 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1505 ? "s"
1506 : "qq")));
79072805 1507 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1508 } else
8990e307 1509 SvREFCNT_dec(sv);
79072805
LW
1510 return s;
1511}
1512
ffb4593c
NT
1513/* S_intuit_more
1514 * Returns TRUE if there's more to the expression (e.g., a subscript),
1515 * FALSE otherwise.
ffb4593c
NT
1516 *
1517 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1518 *
1519 * ->[ and ->{ return TRUE
1520 * { and [ outside a pattern are always subscripts, so return TRUE
1521 * if we're outside a pattern and it's not { or [, then return FALSE
1522 * if we're in a pattern and the first char is a {
1523 * {4,5} (any digits around the comma) returns FALSE
1524 * if we're in a pattern and the first char is a [
1525 * [] returns FALSE
1526 * [SOMETHING] has a funky algorithm to decide whether it's a
1527 * character class or not. It has to deal with things like
1528 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1529 * anything else returns TRUE
1530 */
1531
9cbb5ea2
GS
1532/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1533
76e3520e 1534STATIC int
cea2e8a9 1535S_intuit_more(pTHX_ register char *s)
79072805 1536{
3280af22 1537 if (PL_lex_brackets)
79072805
LW
1538 return TRUE;
1539 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1540 return TRUE;
1541 if (*s != '{' && *s != '[')
1542 return FALSE;
3280af22 1543 if (!PL_lex_inpat)
79072805
LW
1544 return TRUE;
1545
1546 /* In a pattern, so maybe we have {n,m}. */
1547 if (*s == '{') {
1548 s++;
1549 if (!isDIGIT(*s))
1550 return TRUE;
1551 while (isDIGIT(*s))
1552 s++;
1553 if (*s == ',')
1554 s++;
1555 while (isDIGIT(*s))
1556 s++;
1557 if (*s == '}')
1558 return FALSE;
1559 return TRUE;
1560
1561 }
1562
1563 /* On the other hand, maybe we have a character class */
1564
1565 s++;
1566 if (*s == ']' || *s == '^')
1567 return FALSE;
1568 else {
ffb4593c 1569 /* this is terrifying, and it works */
79072805
LW
1570 int weight = 2; /* let's weigh the evidence */
1571 char seen[256];
f27ffc4a 1572 unsigned char un_char = 255, last_un_char;
93a17b20 1573 char *send = strchr(s,']');
3280af22 1574 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1575
1576 if (!send) /* has to be an expression */
1577 return TRUE;
1578
1579 Zero(seen,256,char);
1580 if (*s == '$')
1581 weight -= 3;
1582 else if (isDIGIT(*s)) {
1583 if (s[1] != ']') {
1584 if (isDIGIT(s[1]) && s[2] == ']')
1585 weight -= 10;
1586 }
1587 else
1588 weight -= 100;
1589 }
1590 for (; s < send; s++) {
1591 last_un_char = un_char;
1592 un_char = (unsigned char)*s;
1593 switch (*s) {
1594 case '@':
1595 case '&':
1596 case '$':
1597 weight -= seen[un_char] * 10;
834a4ddd 1598 if (isALNUM_lazy(s+1)) {
8903cb82 1599 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1600 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1601 weight -= 100;
1602 else
1603 weight -= 10;
1604 }
1605 else if (*s == '$' && s[1] &&
93a17b20
LW
1606 strchr("[#!%*<>()-=",s[1])) {
1607 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1608 weight -= 10;
1609 else
1610 weight -= 1;
1611 }
1612 break;
1613 case '\\':
1614 un_char = 254;
1615 if (s[1]) {
93a17b20 1616 if (strchr("wds]",s[1]))
79072805
LW
1617 weight += 100;
1618 else if (seen['\''] || seen['"'])
1619 weight += 1;
93a17b20 1620 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1621 weight += 40;
1622 else if (isDIGIT(s[1])) {
1623 weight += 40;
1624 while (s[1] && isDIGIT(s[1]))
1625 s++;
1626 }
1627 }
1628 else
1629 weight += 100;
1630 break;
1631 case '-':
1632 if (s[1] == '\\')
1633 weight += 50;
93a17b20 1634 if (strchr("aA01! ",last_un_char))
79072805 1635 weight += 30;
93a17b20 1636 if (strchr("zZ79~",s[1]))
79072805 1637 weight += 30;
f27ffc4a
GS
1638 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1639 weight -= 5; /* cope with negative subscript */
79072805
LW
1640 break;
1641 default:
93a17b20 1642 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1643 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1644 char *d = tmpbuf;
1645 while (isALPHA(*s))
1646 *d++ = *s++;
1647 *d = '\0';
1648 if (keyword(tmpbuf, d - tmpbuf))
1649 weight -= 150;
1650 }
1651 if (un_char == last_un_char + 1)
1652 weight += 5;
1653 weight -= seen[un_char];
1654 break;
1655 }
1656 seen[un_char]++;
1657 }
1658 if (weight >= 0) /* probably a character class */
1659 return FALSE;
1660 }
1661
1662 return TRUE;
1663}
ffed7fef 1664
ffb4593c
NT
1665/*
1666 * S_intuit_method
1667 *
1668 * Does all the checking to disambiguate
1669 * foo bar
1670 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1671 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1672 *
1673 * First argument is the stuff after the first token, e.g. "bar".
1674 *
1675 * Not a method if bar is a filehandle.
1676 * Not a method if foo is a subroutine prototyped to take a filehandle.
1677 * Not a method if it's really "Foo $bar"
1678 * Method if it's "foo $bar"
1679 * Not a method if it's really "print foo $bar"
1680 * Method if it's really "foo package::" (interpreted as package->foo)
1681 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
1682 * Not a method if bar is a filehandle or package, but is quotd with
1683 * =>
1684 */
1685
76e3520e 1686STATIC int
cea2e8a9 1687S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1688{
1689 char *s = start + (*start == '$');
3280af22 1690 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1691 STRLEN len;
1692 GV* indirgv;
1693
1694 if (gv) {
b6c543e3 1695 CV *cv;
a0d0e21e
LW
1696 if (GvIO(gv))
1697 return 0;
b6c543e3
IZ
1698 if ((cv = GvCVu(gv))) {
1699 char *proto = SvPVX(cv);
1700 if (proto) {
1701 if (*proto == ';')
1702 proto++;
1703 if (*proto == '*')
1704 return 0;
1705 }
1706 } else
a0d0e21e
LW
1707 gv = 0;
1708 }
8903cb82 1709 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1710 /* start is the beginning of the possible filehandle/object,
1711 * and s is the end of it
1712 * tmpbuf is a copy of it
1713 */
1714
a0d0e21e 1715 if (*start == '$') {
3280af22 1716 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1717 return 0;
1718 s = skipspace(s);
3280af22
NIS
1719 PL_bufptr = start;
1720 PL_expect = XREF;
a0d0e21e
LW
1721 return *s == '(' ? FUNCMETH : METHOD;
1722 }
1723 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1724 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1725 len -= 2;
1726 tmpbuf[len] = '\0';
1727 goto bare_package;
1728 }
1729 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1730 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1731 return 0;
1732 /* filehandle or package name makes it a method */
89bfa8cd 1733 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1734 s = skipspace(s);
3280af22 1735 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1736 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1737 bare_package:
3280af22 1738 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1739 newSVpvn(tmpbuf,len));
3280af22
NIS
1740 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1741 PL_expect = XTERM;
a0d0e21e 1742 force_next(WORD);
3280af22 1743 PL_bufptr = s;
a0d0e21e
LW
1744 return *s == '(' ? FUNCMETH : METHOD;
1745 }
1746 }
1747 return 0;
1748}
1749
ffb4593c
NT
1750/*
1751 * S_incl_perldb
1752 * Return a string of Perl code to load the debugger. If PERL5DB
1753 * is set, it will return the contents of that, otherwise a
1754 * compile-time require of perl5db.pl.
1755 */
1756
76e3520e 1757STATIC char*
cea2e8a9 1758S_incl_perldb(pTHX)
a0d0e21e 1759{
3280af22 1760 if (PL_perldb) {
76e3520e 1761 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1762
1763 if (pdb)
1764 return pdb;
61bb5906 1765 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1766 return "BEGIN { require 'perl5db.pl' }";
1767 }
1768 return "";
1769}
1770
1771
16d20bd9
AD
1772/* Encoded script support. filter_add() effectively inserts a
1773 * 'pre-processing' function into the current source input stream.
1774 * Note that the filter function only applies to the current source file
1775 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1776 *
1777 * The datasv parameter (which may be NULL) can be used to pass
1778 * private data to this instance of the filter. The filter function
1779 * can recover the SV using the FILTER_DATA macro and use it to
1780 * store private buffers and state information.
1781 *
1782 * The supplied datasv parameter is upgraded to a PVIO type
e0c19803
GS
1783 * and the IoDIRP field is used to store the function pointer,
1784 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1785 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1786 * private use must be set using malloc'd pointers.
1787 */
16d20bd9
AD
1788
1789SV *
864dbfa3 1790Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1791{
f4c556ac
GS
1792 if (!funcp)
1793 return Nullsv;
1794
3280af22
NIS
1795 if (!PL_rsfp_filters)
1796 PL_rsfp_filters = newAV();
16d20bd9 1797 if (!datasv)
8c52afec 1798 datasv = NEWSV(255,0);
16d20bd9 1799 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1800 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1801 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
e0c19803 1802 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1803 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1804 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1805 av_unshift(PL_rsfp_filters, 1);
1806 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1807 return(datasv);
1808}
1809
1810
1811/* Delete most recently added instance of this filter function. */
a0d0e21e 1812void
864dbfa3 1813Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1814{
e0c19803 1815 SV *datasv;
f4c556ac 1816 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1817 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1818 return;
1819 /* if filter is on top of stack (usual case) just pop it off */
e0c19803
GS
1820 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1821 if (IoDIRP(datasv) == (DIR*)funcp) {
1822 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
1823 IoDIRP(datasv) = (DIR*)NULL;
3280af22 1824 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1825
16d20bd9
AD
1826 return;
1827 }
1828 /* we need to search for the correct entry and clear it */
cea2e8a9 1829 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1830}
1831
1832
1833/* Invoke the n'th filter function for the current rsfp. */
1834I32
864dbfa3 1835Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1836
1837
1838 /* 0 = read one text line */
a0d0e21e 1839{
16d20bd9
AD
1840 filter_t funcp;
1841 SV *datasv = NULL;
e50aee73 1842
3280af22 1843 if (!PL_rsfp_filters)
16d20bd9 1844 return -1;
3280af22 1845 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1846 /* Provide a default input filter to make life easy. */
1847 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1848 DEBUG_P(PerlIO_printf(Perl_debug_log,
1849 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1850 if (maxlen) {
1851 /* Want a block */
1852 int len ;
1853 int old_len = SvCUR(buf_sv) ;
1854
1855 /* ensure buf_sv is large enough */
1856 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1857 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1858 if (PerlIO_error(PL_rsfp))
37120919
AD
1859 return -1; /* error */
1860 else
1861 return 0 ; /* end of file */
1862 }
16d20bd9
AD
1863 SvCUR_set(buf_sv, old_len + len) ;
1864 } else {
1865 /* Want a line */
3280af22
NIS
1866 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1867 if (PerlIO_error(PL_rsfp))
37120919
AD
1868 return -1; /* error */
1869 else
1870 return 0 ; /* end of file */
1871 }
16d20bd9
AD
1872 }
1873 return SvCUR(buf_sv);
1874 }
1875 /* Skip this filter slot if filter has been deleted */
3280af22 1876 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1877 DEBUG_P(PerlIO_printf(Perl_debug_log,
1878 "filter_read %d: skipped (filter deleted)\n",
1879 idx));
16d20bd9
AD
1880 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1881 }
1882 /* Get function pointer hidden within datasv */
1883 funcp = (filter_t)IoDIRP(datasv);
f4c556ac
GS
1884 DEBUG_P(PerlIO_printf(Perl_debug_log,
1885 "filter_read %d: via function %p (%s)\n",
1886 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1887 /* Call function. The function is expected to */
1888 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1889 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1890 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1891}
1892
76e3520e 1893STATIC char *
cea2e8a9 1894S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1895{
c39cd008 1896#ifdef PERL_CR_FILTER
3280af22 1897 if (!PL_rsfp_filters) {
c39cd008 1898 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1899 }
1900#endif
3280af22 1901 if (PL_rsfp_filters) {
16d20bd9 1902
55497cff 1903 if (!append)
1904 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1905 if (FILTER_READ(0, sv, 0) > 0)
1906 return ( SvPVX(sv) ) ;
1907 else
1908 return Nullch ;
1909 }
9d116dd7 1910 else
fd049845 1911 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1912}
1913
1914
748a9306
LW
1915#ifdef DEBUGGING
1916 static char* exp_name[] =
09bef843
SB
1917 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
1918 "ATTRTERM", "TERMBLOCK"
1919 };
748a9306 1920#endif
463ee0b2 1921
02aa26ce
NT
1922/*
1923 yylex
1924
1925 Works out what to call the token just pulled out of the input
1926 stream. The yacc parser takes care of taking the ops we return and
1927 stitching them into a tree.
1928
1929 Returns:
1930 PRIVATEREF
1931
1932 Structure:
1933 if read an identifier
1934 if we're in a my declaration
1935 croak if they tried to say my($foo::bar)
1936 build the ops for a my() declaration
1937 if it's an access to a my() variable
1938 are we in a sort block?
1939 croak if my($a); $a <=> $b
1940 build ops for access to a my() variable
1941 if in a dq string, and they've said @foo and we can't find @foo
1942 croak
1943 build ops for a bareword
1944 if we already built the token before, use it.
1945*/
1946
864dbfa3
GS
1947int
1948#ifdef USE_PURE_BISON
cea2e8a9 1949Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1950#else
cea2e8a9 1951Perl_yylex(pTHX)
864dbfa3 1952#endif
378cc40b 1953{
11343788 1954 dTHR;
79072805 1955 register char *s;
378cc40b 1956 register char *d;
79072805 1957 register I32 tmp;
463ee0b2 1958 STRLEN len;
161b471a
NIS
1959 GV *gv = Nullgv;
1960 GV **gvp = 0;
a687059c 1961
a1a0e61e
TD
1962#ifdef USE_PURE_BISON
1963 yylval_pointer = lvalp;
1964 yychar_pointer = lcharp;
1965#endif
1966
02aa26ce 1967 /* check if there's an identifier for us to look at */
3280af22 1968 if (PL_pending_ident) {
02aa26ce 1969 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
1970 char pit = PL_pending_ident;
1971 PL_pending_ident = 0;
bbce6d69 1972
02aa26ce
NT
1973 /* if we're in a my(), we can't allow dynamics here.
1974 $foo'bar has already been turned into $foo::bar, so
1975 just check for colons.
1976
1977 if it's a legal name, the OP is a PADANY.
1978 */
3280af22 1979 if (PL_in_my) {
77ca0c92
LW
1980 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1981 tmp = pad_allocmy(PL_tokenbuf);
1982 }
1983 else {
1984 if (strchr(PL_tokenbuf,':'))
1985 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 1986
77ca0c92
LW
1987 yylval.opval = newOP(OP_PADANY, 0);
1988 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
1989 return PRIVATEREF;
1990 }
bbce6d69 1991 }
1992
02aa26ce
NT
1993 /*
1994 build the ops for accesses to a my() variable.
1995
1996 Deny my($a) or my($b) in a sort block, *if* $a or $b is
1997 then used in a comparison. This catches most, but not
1998 all cases. For instance, it catches
1999 sort { my($a); $a <=> $b }
2000 but not
2001 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2002 (although why you'd do that is anyone's guess).
2003 */
2004
3280af22 2005 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2006#ifdef USE_THREADS
54b9620d 2007 /* Check for single character per-thread SVs */
3280af22
NIS
2008 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2009 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2010 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2011 {
2faa37cc 2012 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2013 yylval.opval->op_targ = tmp;
2014 return PRIVATEREF;
2015 }
2016#endif /* USE_THREADS */
3280af22 2017 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
77ca0c92
LW
2018 /* might be an "our" variable" */
2019 if (SvFLAGS(AvARRAY(PL_comppad_name)[tmp]) & SVpad_OUR) {
2020 /* build ops for a bareword */
2021 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
2022 yylval.opval->op_private = OPpCONST_ENTERED;
2023 gv_fetchpv(PL_tokenbuf+1,
2024 (PL_in_eval
2025 ? (GV_ADDMULTI | GV_ADDINEVAL | GV_ADDOUR)
2026 : GV_ADDOUR
2027 ),
2028 ((PL_tokenbuf[0] == '$') ? SVt_PV
2029 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2030 : SVt_PVHV));
2031 return WORD;
2032 }
2033
02aa26ce 2034 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2035 if (PL_last_lop_op == OP_SORT &&
2036 PL_tokenbuf[0] == '$' &&
2037 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2038 && !PL_tokenbuf[2])
bbce6d69 2039 {
3280af22
NIS
2040 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2041 d < PL_bufend && *d != '\n';
a863c7d1
MB
2042 d++)
2043 {
2044 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2045 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2046 PL_tokenbuf);
a863c7d1 2047 }
bbce6d69 2048 }
2049 }
bbce6d69 2050
a863c7d1
MB
2051 yylval.opval = newOP(OP_PADANY, 0);
2052 yylval.opval->op_targ = tmp;
2053 return PRIVATEREF;
2054 }
bbce6d69 2055 }
2056
02aa26ce
NT
2057 /*
2058 Whine if they've said @foo in a doublequoted string,
2059 and @foo isn't a variable we can find in the symbol
2060 table.
2061 */
3280af22
NIS
2062 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2063 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2064 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2065 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2066 PL_tokenbuf, PL_tokenbuf));
bbce6d69 2067 }
2068
02aa26ce 2069 /* build ops for a bareword */
3280af22 2070 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2071 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2072 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2073 ((PL_tokenbuf[0] == '$') ? SVt_PV
2074 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 2075 : SVt_PVHV));
2076 return WORD;
2077 }
2078
02aa26ce
NT
2079 /* no identifier pending identification */
2080
3280af22 2081 switch (PL_lex_state) {
79072805
LW
2082#ifdef COMMENTARY
2083 case LEX_NORMAL: /* Some compilers will produce faster */
2084 case LEX_INTERPNORMAL: /* code if we comment these out. */
2085 break;
2086#endif
2087
09bef843 2088 /* when we've already built the next token, just pull it out of the queue */
79072805 2089 case LEX_KNOWNEXT:
3280af22
NIS
2090 PL_nexttoke--;
2091 yylval = PL_nextval[PL_nexttoke];
2092 if (!PL_nexttoke) {
2093 PL_lex_state = PL_lex_defer;
2094 PL_expect = PL_lex_expect;
2095 PL_lex_defer = LEX_NORMAL;
463ee0b2 2096 }
3280af22 2097 return(PL_nexttype[PL_nexttoke]);
79072805 2098
02aa26ce 2099 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2100 when we get here, PL_bufptr is at the \
02aa26ce 2101 */
79072805
LW
2102 case LEX_INTERPCASEMOD:
2103#ifdef DEBUGGING
3280af22 2104 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2105 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2106#endif
02aa26ce 2107 /* handle \E or end of string */
3280af22 2108 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2109 char oldmod;
02aa26ce
NT
2110
2111 /* if at a \E */
3280af22
NIS
2112 if (PL_lex_casemods) {
2113 oldmod = PL_lex_casestack[--PL_lex_casemods];
2114 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2115
3280af22
NIS
2116 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2117 PL_bufptr += 2;
2118 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2119 }
79072805
LW
2120 return ')';
2121 }
3280af22
NIS
2122 if (PL_bufptr != PL_bufend)
2123 PL_bufptr += 2;
2124 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2125 return yylex();
79072805
LW
2126 }
2127 else {
3280af22 2128 s = PL_bufptr + 1;
79072805
LW
2129 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2130 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2131 if (strchr("LU", *s) &&
3280af22 2132 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2133 {
3280af22 2134 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2135 return ')';
2136 }
3280af22
NIS
2137 if (PL_lex_casemods > 10) {
2138 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2139 if (newlb != PL_lex_casestack) {
a0d0e21e 2140 SAVEFREEPV(newlb);
3280af22 2141 PL_lex_casestack = newlb;
a0d0e21e
LW
2142 }
2143 }
3280af22
NIS
2144 PL_lex_casestack[PL_lex_casemods++] = *s;
2145 PL_lex_casestack[PL_lex_casemods] = '\0';
2146 PL_lex_state = LEX_INTERPCONCAT;
2147 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2148 force_next('(');
2149 if (*s == 'l')
3280af22 2150 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2151 else if (*s == 'u')
3280af22 2152 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2153 else if (*s == 'L')
3280af22 2154 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2155 else if (*s == 'U')
3280af22 2156 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2157 else if (*s == 'Q')
3280af22 2158 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2159 else
cea2e8a9 2160 Perl_croak(aTHX_ "panic: yylex");
3280af22 2161 PL_bufptr = s + 1;
79072805 2162 force_next(FUNC);
3280af22
NIS
2163 if (PL_lex_starts) {
2164 s = PL_bufptr;
2165 PL_lex_starts = 0;
79072805
LW
2166 Aop(OP_CONCAT);
2167 }
2168 else
cea2e8a9 2169 return yylex();
79072805
LW
2170 }
2171
55497cff 2172 case LEX_INTERPPUSH:
2173 return sublex_push();
2174
79072805 2175 case LEX_INTERPSTART:
3280af22 2176 if (PL_bufptr == PL_bufend)
79072805 2177 return sublex_done();
3280af22
NIS
2178 PL_expect = XTERM;
2179 PL_lex_dojoin = (*PL_bufptr == '@');
2180 PL_lex_state = LEX_INTERPNORMAL;
2181 if (PL_lex_dojoin) {
2182 PL_nextval[PL_nexttoke].ival = 0;
79072805 2183 force_next(',');
554b3eca 2184#ifdef USE_THREADS
533c011a
NIS
2185 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2186 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2187 force_next(PRIVATEREF);
2188#else
a0d0e21e 2189 force_ident("\"", '$');
554b3eca 2190#endif /* USE_THREADS */
3280af22 2191 PL_nextval[PL_nexttoke].ival = 0;
79072805 2192 force_next('$');
3280af22 2193 PL_nextval[PL_nexttoke].ival = 0;
79072805 2194 force_next('(');
3280af22 2195 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2196 force_next(FUNC);
2197 }
3280af22
NIS
2198 if (PL_lex_starts++) {
2199 s = PL_bufptr;
79072805
LW
2200 Aop(OP_CONCAT);
2201 }
cea2e8a9 2202 return yylex();
79072805
LW
2203
2204 case LEX_INTERPENDMAYBE:
3280af22
NIS
2205 if (intuit_more(PL_bufptr)) {
2206 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2207 break;
2208 }
2209 /* FALL THROUGH */
2210
2211 case LEX_INTERPEND:
3280af22
NIS
2212 if (PL_lex_dojoin) {
2213 PL_lex_dojoin = FALSE;
2214 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2215 return ')';
2216 }
43a16006 2217 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2218 && SvEVALED(PL_lex_repl))
43a16006 2219 {
e9fa98b2 2220 if (PL_bufptr != PL_bufend)
cea2e8a9 2221 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2222 PL_lex_repl = Nullsv;
2223 }
79072805
LW
2224 /* FALLTHROUGH */
2225 case LEX_INTERPCONCAT:
2226#ifdef DEBUGGING
3280af22 2227 if (PL_lex_brackets)
cea2e8a9 2228 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2229#endif
3280af22 2230 if (PL_bufptr == PL_bufend)
79072805
LW
2231 return sublex_done();
2232
3280af22
NIS
2233 if (SvIVX(PL_linestr) == '\'') {
2234 SV *sv = newSVsv(PL_linestr);
2235 if (!PL_lex_inpat)
76e3520e 2236 sv = tokeq(sv);
3280af22 2237 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2238 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2239 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2240 s = PL_bufend;
79072805
LW
2241 }
2242 else {
3280af22 2243 s = scan_const(PL_bufptr);
79072805 2244 if (*s == '\\')
3280af22 2245 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2246 else
3280af22 2247 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2248 }
2249
3280af22
NIS
2250 if (s != PL_bufptr) {
2251 PL_nextval[PL_nexttoke] = yylval;
2252 PL_expect = XTERM;
79072805 2253 force_next(THING);
3280af22 2254 if (PL_lex_starts++)
79072805
LW
2255 Aop(OP_CONCAT);
2256 else {
3280af22 2257 PL_bufptr = s;
cea2e8a9 2258 return yylex();
79072805
LW
2259 }
2260 }
2261
cea2e8a9 2262 return yylex();
a0d0e21e 2263 case LEX_FORMLINE:
3280af22
NIS
2264 PL_lex_state = LEX_NORMAL;
2265 s = scan_formline(PL_bufptr);
2266 if (!PL_lex_formbrack)
a0d0e21e
LW
2267 goto rightbracket;
2268 OPERATOR(';');
79072805
LW
2269 }
2270
3280af22
NIS
2271 s = PL_bufptr;
2272 PL_oldoldbufptr = PL_oldbufptr;
2273 PL_oldbufptr = s;
79072805 2274 DEBUG_p( {
bf49b057
GS
2275 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2276 exp_name[PL_expect], s);
79072805 2277 } )
463ee0b2
LW
2278
2279 retry:
378cc40b
LW
2280 switch (*s) {
2281 default:
834a4ddd
LW
2282 if (isIDFIRST_lazy(s))
2283 goto keylookup;
cea2e8a9 2284 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2285 case 4:
2286 case 26:
2287 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2288 case 0:
3280af22
NIS
2289 if (!PL_rsfp) {
2290 PL_last_uni = 0;
2291 PL_last_lop = 0;
2292 if (PL_lex_brackets)
d98d5fff 2293 yyerror("Missing right curly or square bracket");
79072805 2294 TOKEN(0);
463ee0b2 2295 }
3280af22 2296 if (s++ < PL_bufend)
a687059c 2297 goto retry; /* ignore stray nulls */
3280af22
NIS
2298 PL_last_uni = 0;
2299 PL_last_lop = 0;
2300 if (!PL_in_eval && !PL_preambled) {
2301 PL_preambled = TRUE;
2302 sv_setpv(PL_linestr,incl_perldb());
2303 if (SvCUR(PL_linestr))
2304 sv_catpv(PL_linestr,";");
2305 if (PL_preambleav){
2306 while(AvFILLp(PL_preambleav) >= 0) {
2307 SV *tmpsv = av_shift(PL_preambleav);
2308 sv_catsv(PL_linestr, tmpsv);
2309 sv_catpv(PL_linestr, ";");
91b7def8 2310 sv_free(tmpsv);
2311 }
3280af22
NIS
2312 sv_free((SV*)PL_preambleav);
2313 PL_preambleav = NULL;
91b7def8 2314 }
3280af22
NIS
2315 if (PL_minus_n || PL_minus_p) {
2316 sv_catpv(PL_linestr, "LINE: while (<>) {");
2317 if (PL_minus_l)
2318 sv_catpv(PL_linestr,"chomp;");
2319 if (PL_minus_a) {
8fd239a7
CS
2320 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2321 if (gv)
2322 GvIMPORTED_AV_on(gv);
3280af22
NIS
2323 if (PL_minus_F) {
2324 if (strchr("/'\"", *PL_splitstr)
2325 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2326 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2327 else {
2328 char delim;
2329 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2330 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2331 delim = *s;
cea2e8a9 2332 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2333 "q" + (delim == '\''), delim);
3280af22 2334 for (s = PL_splitstr; *s; s++) {
54310121 2335 if (*s == '\\')
3280af22
NIS
2336 sv_catpvn(PL_linestr, "\\", 1);
2337 sv_catpvn(PL_linestr, s, 1);
54310121 2338 }
cea2e8a9 2339 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2340 }
2304df62
AD
2341 }
2342 else
3280af22 2343 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2344 }
79072805 2345 }
3280af22
NIS
2346 sv_catpv(PL_linestr, "\n");
2347 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2348 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2349 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2350 SV *sv = NEWSV(85,0);
2351
2352 sv_upgrade(sv, SVt_PVMG);
3280af22 2353 sv_setsv(sv,PL_linestr);
57843af0 2354 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2355 }
79072805 2356 goto retry;
a687059c 2357 }
e929a76b 2358 do {
3280af22 2359 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2360 fake_eof:
3280af22
NIS
2361 if (PL_rsfp) {
2362 if (PL_preprocess && !PL_in_eval)
2363 (void)PerlProc_pclose(PL_rsfp);
2364 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2365 PerlIO_clearerr(PL_rsfp);
395c3793 2366 else
3280af22
NIS
2367 (void)PerlIO_close(PL_rsfp);
2368 PL_rsfp = Nullfp;
4a9ae47a 2369 PL_doextract = FALSE;
395c3793 2370 }
3280af22
NIS
2371 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2372 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2373 sv_catpv(PL_linestr,";}");
2374 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2375 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2376 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2377 goto retry;
2378 }
3280af22
NIS
2379 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2380 sv_setpv(PL_linestr,"");
79072805 2381 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2382 }
3280af22 2383 if (PL_doextract) {
a0d0e21e 2384 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2385 PL_doextract = FALSE;
a0d0e21e
LW
2386
2387 /* Incest with pod. */
2388 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2389 sv_setpv(PL_linestr, "");
2390 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2391 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2392 PL_doextract = FALSE;
a0d0e21e
LW
2393 }
2394 }
463ee0b2 2395 incline(s);
3280af22
NIS
2396 } while (PL_doextract);
2397 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2398 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2399 SV *sv = NEWSV(85,0);
a687059c 2400
93a17b20 2401 sv_upgrade(sv, SVt_PVMG);
3280af22 2402 sv_setsv(sv,PL_linestr);
57843af0 2403 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2404 }
3280af22 2405 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2406 if (CopLINE(PL_curcop) == 1) {
3280af22 2407 while (s < PL_bufend && isSPACE(*s))
79072805 2408 s++;
a0d0e21e 2409 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2410 s++;
44a8e56a 2411 d = Nullch;
3280af22 2412 if (!PL_in_eval) {
44a8e56a 2413 if (*s == '#' && *(s+1) == '!')
2414 d = s + 2;
2415#ifdef ALTERNATE_SHEBANG
2416 else {
2417 static char as[] = ALTERNATE_SHEBANG;
2418 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2419 d = s + (sizeof(as) - 1);
2420 }
2421#endif /* ALTERNATE_SHEBANG */
2422 }
2423 if (d) {
b8378b72 2424 char *ipath;
774d564b 2425 char *ipathend;
b8378b72 2426
774d564b 2427 while (isSPACE(*d))
b8378b72
CS
2428 d++;
2429 ipath = d;
774d564b 2430 while (*d && !isSPACE(*d))
2431 d++;
2432 ipathend = d;
2433
2434#ifdef ARG_ZERO_IS_SCRIPT
2435 if (ipathend > ipath) {
2436 /*
2437 * HP-UX (at least) sets argv[0] to the script name,
2438 * which makes $^X incorrect. And Digital UNIX and Linux,
2439 * at least, set argv[0] to the basename of the Perl
2440 * interpreter. So, having found "#!", we'll set it right.
2441 */
2442 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2443 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2444 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2445 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2446 SvSETMAGIC(x);
2447 }
774d564b 2448 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2449 }
774d564b 2450#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2451
2452 /*
2453 * Look for options.
2454 */
748a9306 2455 d = instr(s,"perl -");
84e30d1a 2456 if (!d) {
748a9306 2457 d = instr(s,"perl");
84e30d1a
GS
2458#if defined(DOSISH)
2459 /* avoid getting into infinite loops when shebang
2460 * line contains "Perl" rather than "perl" */
2461 if (!d) {
2462 for (d = ipathend-4; d >= ipath; --d) {
2463 if ((*d == 'p' || *d == 'P')
2464 && !ibcmp(d, "perl", 4))
2465 {
2466 break;
2467 }
2468 }
2469 if (d < ipath)
2470 d = Nullch;
2471 }
2472#endif
2473 }
44a8e56a 2474#ifdef ALTERNATE_SHEBANG
2475 /*
2476 * If the ALTERNATE_SHEBANG on this system starts with a
2477 * character that can be part of a Perl expression, then if
2478 * we see it but not "perl", we're probably looking at the
2479 * start of Perl code, not a request to hand off to some
2480 * other interpreter. Similarly, if "perl" is there, but
2481 * not in the first 'word' of the line, we assume the line
2482 * contains the start of the Perl program.
44a8e56a 2483 */
2484 if (d && *s != '#') {
774d564b 2485 char *c = ipath;
44a8e56a 2486 while (*c && !strchr("; \t\r\n\f\v#", *c))
2487 c++;
2488 if (c < d)
2489 d = Nullch; /* "perl" not in first word; ignore */
2490 else
2491 *s = '#'; /* Don't try to parse shebang line */
2492 }
774d564b 2493#endif /* ALTERNATE_SHEBANG */
748a9306 2494 if (!d &&
44a8e56a 2495 *s == '#' &&
774d564b 2496 ipathend > ipath &&
3280af22 2497 !PL_minus_c &&
748a9306 2498 !instr(s,"indir") &&
3280af22 2499 instr(PL_origargv[0],"perl"))
748a9306 2500 {
9f68db38 2501 char **newargv;
9f68db38 2502
774d564b 2503 *ipathend = '\0';
2504 s = ipathend + 1;
3280af22 2505 while (s < PL_bufend && isSPACE(*s))
9f68db38 2506 s++;
3280af22
NIS
2507 if (s < PL_bufend) {
2508 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2509 newargv[1] = s;
3280af22 2510 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2511 s++;
2512 *s = '\0';
3280af22 2513 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2514 }
2515 else
3280af22 2516 newargv = PL_origargv;
774d564b 2517 newargv[0] = ipath;
80252599 2518 PerlProc_execv(ipath, newargv);
cea2e8a9 2519 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2520 }
748a9306 2521 if (d) {
3280af22
NIS
2522 U32 oldpdb = PL_perldb;
2523 bool oldn = PL_minus_n;
2524 bool oldp = PL_minus_p;
748a9306
LW
2525
2526 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2527 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2528
2529 if (*d++ == '-') {
8cc95fdb 2530 do {
2531 if (*d == 'M' || *d == 'm') {
2532 char *m = d;
2533 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2534 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2535 (int)(d - m), m);
2536 }
2537 d = moreswitches(d);
2538 } while (d);
84902520 2539 if (PERLDB_LINE && !oldpdb ||
3280af22 2540 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b 2541 /* if we have already added "LINE: while (<>) {",
2542 we must not do it again */
748a9306 2543 {
3280af22
NIS
2544 sv_setpv(PL_linestr, "");
2545 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2546 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2547 PL_preambled = FALSE;
84902520 2548 if (PERLDB_LINE)
3280af22 2549 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2550 goto retry;
2551 }
a0d0e21e 2552 }
79072805 2553 }
9f68db38 2554 }
79072805 2555 }
3280af22
NIS
2556 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2557 PL_bufptr = s;
2558 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2559 return yylex();
ae986130 2560 }
378cc40b 2561 goto retry;
4fdae800 2562 case '\r':
6a27c188 2563#ifdef PERL_STRICT_CR
cea2e8a9
GS
2564 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2565 Perl_croak(aTHX_
54310121 2566 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2567#endif
4fdae800 2568 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2569 s++;
2570 goto retry;
378cc40b 2571 case '#':
e929a76b 2572 case '\n':
3280af22
NIS
2573 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2574 d = PL_bufend;
a687059c 2575 while (s < d && *s != '\n')
378cc40b 2576 s++;
0f85fab0 2577 if (s < d)
378cc40b 2578 s++;
463ee0b2 2579 incline(s);
3280af22
NIS
2580 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2581 PL_bufptr = s;
2582 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2583 return yylex();
a687059c 2584 }
378cc40b 2585 }
a687059c 2586 else {
378cc40b 2587 *s = '\0';
3280af22 2588 PL_bufend = s;
a687059c 2589 }
378cc40b
LW
2590 goto retry;
2591 case '-':
79072805 2592 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2593 s++;
3280af22 2594 PL_bufptr = s;
748a9306
LW
2595 tmp = *s++;
2596
3280af22 2597 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2598 s++;
2599
2600 if (strnEQ(s,"=>",2)) {
3280af22 2601 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2602 OPERATOR('-'); /* unary minus */
2603 }
3280af22
NIS
2604 PL_last_uni = PL_oldbufptr;
2605 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2606 switch (tmp) {
79072805
LW
2607 case 'r': FTST(OP_FTEREAD);
2608 case 'w': FTST(OP_FTEWRITE);
2609 case 'x': FTST(OP_FTEEXEC);
2610 case 'o': FTST(OP_FTEOWNED);
2611 case 'R': FTST(OP_FTRREAD);
2612 case 'W': FTST(OP_FTRWRITE);
2613 case 'X': FTST(OP_FTREXEC);
2614 case 'O': FTST(OP_FTROWNED);
2615 case 'e': FTST(OP_FTIS);
2616 case 'z': FTST(OP_FTZERO);
2617 case 's': FTST(OP_FTSIZE);
2618 case 'f': FTST(OP_FTFILE);
2619 case 'd': FTST(OP_FTDIR);
2620 case 'l': FTST(OP_FTLINK);
2621 case 'p': FTST(OP_FTPIPE);
2622 case 'S': FTST(OP_FTSOCK);
2623 case 'u': FTST(OP_FTSUID);
2624 case 'g': FTST(OP_FTSGID);
2625 case 'k': FTST(OP_FTSVTX);
2626 case 'b': FTST(OP_FTBLK);
2627 case 'c': FTST(OP_FTCHR);
2628 case 't': FTST(OP_FTTTY);
2629 case 'T': FTST(OP_FTTEXT);
2630 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2631 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2632 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2633 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2634 default:
cea2e8a9 2635 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2636 break;
2637 }
2638 }
a687059c
LW
2639 tmp = *s++;
2640 if (*s == tmp) {
2641 s++;
3280af22 2642 if (PL_expect == XOPERATOR)
79072805
LW
2643 TERM(POSTDEC);
2644 else
2645 OPERATOR(PREDEC);
2646 }
2647 else if (*s == '>') {
2648 s++;
2649 s = skipspace(s);
834a4ddd 2650 if (isIDFIRST_lazy(s)) {
a0d0e21e 2651 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2652 TOKEN(ARROW);
79072805 2653 }
748a9306
LW
2654 else if (*s == '$')
2655 OPERATOR(ARROW);
463ee0b2 2656 else
748a9306 2657 TERM(ARROW);
a687059c 2658 }
3280af22 2659 if (PL_expect == XOPERATOR)
79072805
LW
2660 Aop(OP_SUBTRACT);
2661 else {
3280af22 2662 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2663 check_uni();
79072805 2664 OPERATOR('-'); /* unary minus */
2f3197b3 2665 }
79072805 2666
378cc40b 2667 case '+':
a687059c
LW
2668 tmp = *s++;
2669 if (*s == tmp) {
378cc40b 2670 s++;
3280af22 2671 if (PL_expect == XOPERATOR)
79072805
LW
2672 TERM(POSTINC);
2673 else
2674 OPERATOR(PREINC);
378cc40b 2675 }
3280af22 2676 if (PL_expect == XOPERATOR)
79072805
LW
2677 Aop(OP_ADD);
2678 else {
3280af22 2679 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2680 check_uni();
a687059c 2681 OPERATOR('+');
2f3197b3 2682 }
a687059c 2683
378cc40b 2684 case '*':
3280af22
NIS
2685 if (PL_expect != XOPERATOR) {
2686 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2687 PL_expect = XOPERATOR;
2688 force_ident(PL_tokenbuf, '*');
2689 if (!*PL_tokenbuf)
a0d0e21e 2690 PREREF('*');
79072805 2691 TERM('*');
a687059c 2692 }
79072805
LW
2693 s++;
2694 if (*s == '*') {
a687059c 2695 s++;
79072805 2696 PWop(OP_POW);
a687059c 2697 }
79072805
LW
2698 Mop(OP_MULTIPLY);
2699
378cc40b 2700 case '%':
3280af22 2701 if (PL_expect == XOPERATOR) {
bbce6d69 2702 ++s;
2703 Mop(OP_MODULO);
a687059c 2704 }
3280af22
NIS
2705 PL_tokenbuf[0] = '%';
2706 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2707 if (!PL_tokenbuf[1]) {
2708 if (s == PL_bufend)
bbce6d69 2709 yyerror("Final % should be \\% or %name");
2710 PREREF('%');
a687059c 2711 }
3280af22 2712 PL_pending_ident = '%';
bbce6d69 2713 TERM('%');
a687059c 2714
378cc40b 2715 case '^':
79072805 2716 s++;
a0d0e21e 2717 BOop(OP_BIT_XOR);
79072805 2718 case '[':
3280af22 2719 PL_lex_brackets++;
79072805 2720 /* FALL THROUGH */
378cc40b 2721 case '~':
378cc40b 2722 case ',':
378cc40b
LW
2723 tmp = *s++;
2724 OPERATOR(tmp);
a0d0e21e
LW
2725 case ':':
2726 if (s[1] == ':') {
2727 len = 0;
2728 goto just_a_word;
2729 }
2730 s++;
09bef843
SB
2731 switch (PL_expect) {
2732 OP *attrs;
2733 case XOPERATOR:
2734 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2735 break;
2736 PL_bufptr = s; /* update in case we back off */
2737 goto grabattrs;
2738 case XATTRBLOCK:
2739 PL_expect = XBLOCK;
2740 goto grabattrs;
2741 case XATTRTERM:
2742 PL_expect = XTERMBLOCK;
2743 grabattrs:
2744 s = skipspace(s);
2745 attrs = Nullop;
2746 while (isIDFIRST_lazy(s)) {
2747 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
2748 if (*d == '(') {
2749 d = scan_str(d,TRUE,TRUE);
2750 if (!d) {
2751 if (PL_lex_stuff) {
2752 SvREFCNT_dec(PL_lex_stuff);
2753 PL_lex_stuff = Nullsv;
2754 }
2755 /* MUST advance bufptr here to avoid bogus
2756 "at end of line" context messages from yyerror().
2757 */
2758 PL_bufptr = s + len;
2759 yyerror("Unterminated attribute parameter in attribute list");
2760 if (attrs)
2761 op_free(attrs);
2762 return 0; /* EOF indicator */
2763 }
2764 }
2765 if (PL_lex_stuff) {
2766 SV *sv = newSVpvn(s, len);
2767 sv_catsv(sv, PL_lex_stuff);
2768 attrs = append_elem(OP_LIST, attrs,
2769 newSVOP(OP_CONST, 0, sv));
2770 SvREFCNT_dec(PL_lex_stuff);
2771 PL_lex_stuff = Nullsv;
2772 }
2773 else {
2774 attrs = append_elem(OP_LIST, attrs,
2775 newSVOP(OP_CONST, 0,
2776 newSVpvn(s, len)));
2777 }
2778 s = skipspace(d);
2779 while (*s == ',')
2780 s = skipspace(s+1);
2781 }
2782 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}' for vi */
2783 if (*s != ';' && *s != tmp) {
2784 char q = ((*s == '\'') ? '"' : '\'');
2785 /* If here for an expression, and parsed no attrs, back off. */
2786 if (tmp == '=' && !attrs) {
2787 s = PL_bufptr;
2788 break;
2789 }
2790 /* MUST advance bufptr here to avoid bogus "at end of line"
2791 context messages from yyerror().
2792 */
2793 PL_bufptr = s;
2794 if (!*s)
2795 yyerror("Unterminated attribute list");
2796 else
2797 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2798 q, *s, q));
2799 if (attrs)
2800 op_free(attrs);
2801 OPERATOR(':');
2802 }
2803 if (attrs) {
2804 PL_nextval[PL_nexttoke].opval = attrs;
2805 force_next(THING);
2806 }
2807 TOKEN(COLONATTR);
2808 }
a0d0e21e 2809 OPERATOR(':');
8990e307
LW
2810 case '(':
2811 s++;
3280af22
NIS
2812 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2813 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2814 else
3280af22 2815 PL_expect = XTERM;
a0d0e21e 2816 TOKEN('(');
378cc40b 2817 case ';':
57843af0
GS
2818 if (CopLINE(PL_curcop) < PL_copline)
2819 PL_copline = CopLINE(PL_curcop);
378cc40b
LW
2820 tmp = *s++;
2821 OPERATOR(tmp);
2822 case ')':
378cc40b 2823 tmp = *s++;
16d20bd9
AD
2824 s = skipspace(s);
2825 if (*s == '{')
2826 PREBLOCK(tmp);
378cc40b 2827 TERM(tmp);
79072805
LW
2828 case ']':
2829 s++;
3280af22 2830 if (PL_lex_brackets <= 0)
d98d5fff 2831 yyerror("Unmatched right square bracket");
463ee0b2 2832 else
3280af22
NIS
2833 --PL_lex_brackets;
2834 if (PL_lex_state == LEX_INTERPNORMAL) {
2835 if (PL_lex_brackets == 0) {
a0d0e21e 2836 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2837 PL_lex_state = LEX_INTERPEND;
79072805
LW
2838 }
2839 }
4633a7c4 2840 TERM(']');
79072805
LW
2841 case '{':
2842 leftbracket:
79072805 2843 s++;
3280af22
NIS
2844 if (PL_lex_brackets > 100) {
2845 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2846 if (newlb != PL_lex_brackstack) {
8990e307 2847 SAVEFREEPV(newlb);
3280af22 2848 PL_lex_brackstack = newlb;
8990e307
LW
2849 }
2850 }
3280af22 2851 switch (PL_expect) {
a0d0e21e 2852 case XTERM:
3280af22 2853 if (PL_lex_formbrack) {
a0d0e21e
LW
2854 s--;
2855 PRETERMBLOCK(DO);
2856 }
3280af22
NIS
2857 if (PL_oldoldbufptr == PL_last_lop)
2858 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2859 else
3280af22 2860 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2861 OPERATOR(HASHBRACK);
a0d0e21e 2862 case XOPERATOR:
3280af22 2863 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2864 s++;
44a8e56a 2865 d = s;
3280af22
NIS
2866 PL_tokenbuf[0] = '\0';
2867 if (d < PL_bufend && *d == '-') {
2868 PL_tokenbuf[0] = '-';
44a8e56a 2869 d++;
3280af22 2870 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a 2871 d++;
2872 }
834a4ddd 2873 if (d < PL_bufend && isIDFIRST_lazy(d)) {
3280af22 2874 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2875 FALSE, &len);
3280af22 2876 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2877 d++;
2878 if (*d == '}') {
3280af22 2879 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 2880 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2881 if (minus)
2882 force_next('-');
748a9306
LW
2883 }
2884 }
2885 /* FALL THROUGH */
09bef843 2886 case XATTRBLOCK:
748a9306 2887 case XBLOCK:
3280af22
NIS
2888 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2889 PL_expect = XSTATE;
a0d0e21e 2890 break;
09bef843 2891 case XATTRTERM:
a0d0e21e 2892 case XTERMBLOCK:
3280af22
NIS
2893 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2894 PL_expect = XSTATE;
a0d0e21e
LW
2895 break;
2896 default: {
2897 char *t;
3280af22
NIS
2898 if (PL_oldoldbufptr == PL_last_lop)
2899 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2900 else
3280af22 2901 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2902 s = skipspace(s);
09ecc4b6 2903 if (*s == '}')
a0d0e21e 2904 OPERATOR(HASHBRACK);
b8a4b1be
GS
2905 /* This hack serves to disambiguate a pair of curlies
2906 * as being a block or an anon hash. Normally, expectation
2907 * determines that, but in cases where we're not in a
2908 * position to expect anything in particular (like inside
2909 * eval"") we have to resolve the ambiguity. This code
2910 * covers the case where the first term in the curlies is a
2911 * quoted string. Most other cases need to be explicitly
2912 * disambiguated by prepending a `+' before the opening
2913 * curly in order to force resolution as an anon hash.
2914 *
2915 * XXX should probably propagate the outer expectation
2916 * into eval"" to rely less on this hack, but that could
2917 * potentially break current behavior of eval"".
2918 * GSAR 97-07-21
2919 */
2920 t = s;
2921 if (*s == '\'' || *s == '"' || *s == '`') {
2922 /* common case: get past first string, handling escapes */
3280af22 2923 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2924 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2925 t++;
2926 t++;
a0d0e21e 2927 }
b8a4b1be 2928 else if (*s == 'q') {
3280af22 2929 if (++t < PL_bufend
b8a4b1be 2930 && (!isALNUM(*t)
3280af22 2931 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
2932 && !isALNUM(*t))))
2933 {
b8a4b1be
GS
2934 char *tmps;
2935 char open, close, term;
2936 I32 brackets = 1;
2937
3280af22 2938 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
2939 t++;
2940 term = *t;
2941 open = term;
2942 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
2943 term = tmps[5];
2944 close = term;
2945 if (open == close)
3280af22
NIS
2946 for (t++; t < PL_bufend; t++) {
2947 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 2948 t++;
6d07e5e9 2949 else if (*t == open)
b8a4b1be
GS
2950 break;
2951 }
2952 else
3280af22
NIS
2953 for (t++; t < PL_bufend; t++) {
2954 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 2955 t++;
6d07e5e9 2956 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
2957 break;
2958 else if (*t == open)
2959 brackets++;
2960 }
2961 }
2962 t++;
a0d0e21e 2963 }
0505442f
GS
2964 else if (isALNUM_lazy(t)) {
2965 t += UTF8SKIP(t);
2966 while (t < PL_bufend && isALNUM_lazy(t))
2967 t += UTF8SKIP(t);
a0d0e21e 2968 }
3280af22 2969 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 2970 t++;
b8a4b1be
GS
2971 /* if comma follows first term, call it an anon hash */
2972 /* XXX it could be a comma expression with loop modifiers */
3280af22 2973 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 2974 || (*t == '=' && t[1] == '>')))
a0d0e21e 2975 OPERATOR(HASHBRACK);
3280af22 2976 if (PL_expect == XREF)
4e4e412b 2977 PL_expect = XTERM;
a0d0e21e 2978 else {
3280af22
NIS
2979 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
2980 PL_expect = XSTATE;
a0d0e21e 2981 }
8990e307 2982 }
a0d0e21e 2983 break;
463ee0b2 2984 }
57843af0 2985 yylval.ival = CopLINE(PL_curcop);
79072805 2986 if (isSPACE(*s) || *s == '#')
3280af22 2987 PL_copline = NOLINE; /* invalidate current command line number */
79072805 2988 TOKEN('{');
378cc40b 2989 case '}':
79072805
LW
2990 rightbracket:
2991 s++;
3280af22 2992 if (PL_lex_brackets <= 0)
d98d5fff 2993 yyerror("Unmatched right curly bracket");
463ee0b2 2994 else
3280af22
NIS
2995 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
2996 if (PL_lex_brackets < PL_lex_formbrack)
2997 PL_lex_formbrack = 0;
2998 if (PL_lex_state == LEX_INTERPNORMAL) {
2999 if (PL_lex_brackets == 0) {
9059aa12
LW
3000 if (PL_expect & XFAKEBRACK) {
3001 PL_expect &= XENUMMASK;
3280af22
NIS
3002 PL_lex_state = LEX_INTERPEND;
3003 PL_bufptr = s;
cea2e8a9 3004 return yylex(); /* ignore fake brackets */
79072805 3005 }
fa83b5b6 3006 if (*s == '-' && s[1] == '>')
3280af22 3007 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3008 else if (*s != '[' && *s != '{')
3280af22 3009 PL_lex_state = LEX_INTERPEND;
79072805
LW
3010 }
3011 }
9059aa12
LW
3012 if (PL_expect & XFAKEBRACK) {
3013 PL_expect &= XENUMMASK;
3280af22 3014 PL_bufptr = s;
cea2e8a9 3015 return yylex(); /* ignore fake brackets */
748a9306 3016 }
79072805
LW
3017 force_next('}');
3018 TOKEN(';');
378cc40b
LW
3019 case '&':
3020 s++;
3021 tmp = *s++;
3022 if (tmp == '&')
a0d0e21e 3023 AOPERATOR(ANDAND);
378cc40b 3024 s--;
3280af22 3025 if (PL_expect == XOPERATOR) {
834a4ddd 3026 if (ckWARN(WARN_SEMICOLON) && isIDFIRST_lazy(s) && PL_bufptr == PL_linestart) {
57843af0 3027 CopLINE_dec(PL_curcop);
cea2e8a9 3028 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3029 CopLINE_inc(PL_curcop);
463ee0b2 3030 }
79072805 3031 BAop(OP_BIT_AND);
463ee0b2 3032 }
79072805 3033
3280af22
NIS
3034 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3035 if (*PL_tokenbuf) {
3036 PL_expect = XOPERATOR;
3037 force_ident(PL_tokenbuf, '&');
463ee0b2 3038 }
79072805
LW
3039 else
3040 PREREF('&');
c07a80fd 3041 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3042 TERM('&');
3043
378cc40b
LW
3044 case '|':
3045 s++;
3046 tmp = *s++;
3047 if (tmp == '|')
a0d0e21e 3048 AOPERATOR(OROR);
378cc40b 3049 s--;
79072805 3050 BOop(OP_BIT_OR);
378cc40b
LW
3051 case '=':
3052 s++;
3053 tmp = *s++;
3054 if (tmp == '=')
79072805
LW
3055 Eop(OP_EQ);
3056 if (tmp == '>')
3057 OPERATOR(',');
378cc40b 3058 if (tmp == '~')
79072805 3059 PMop(OP_MATCH);
599cee73 3060 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3061 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3062 s--;
3280af22
NIS
3063 if (PL_expect == XSTATE && isALPHA(tmp) &&
3064 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3065 {
3280af22
NIS
3066 if (PL_in_eval && !PL_rsfp) {
3067 d = PL_bufend;
a5f75d66
AD
3068 while (s < d) {
3069 if (*s++ == '\n') {
3070 incline(s);
3071 if (strnEQ(s,"=cut",4)) {
3072 s = strchr(s,'\n');
3073 if (s)
3074 s++;
3075 else
3076 s = d;
3077 incline(s);
3078 goto retry;
3079 }
3080 }
3081 }
3082 goto retry;
3083 }
3280af22
NIS
3084 s = PL_bufend;
3085 PL_doextract = TRUE;
a0d0e21e
LW
3086 goto retry;
3087 }
3280af22 3088 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3089 char *t;
51882d45 3090#ifdef PERL_STRICT_CR
a0d0e21e 3091 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
3092#else
3093 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
3094#endif
a0d0e21e
LW
3095 if (*t == '\n' || *t == '#') {
3096 s--;
3280af22 3097 PL_expect = XBLOCK;
a0d0e21e
LW
3098 goto leftbracket;
3099 }
79072805 3100 }
a0d0e21e
LW
3101 yylval.ival = 0;
3102 OPERATOR(ASSIGNOP);
378cc40b
LW
3103 case '!':
3104 s++;
3105 tmp = *s++;
3106 if (tmp == '=')
79072805 3107 Eop(OP_NE);
378cc40b 3108 if (tmp == '~')
79072805 3109 PMop(OP_NOT);
378cc40b
LW
3110 s--;
3111 OPERATOR('!');
3112 case '<':
3280af22 3113 if (PL_expect != XOPERATOR) {
93a17b20 3114 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3115 check_uni();
79072805
LW
3116 if (s[1] == '<')
3117 s = scan_heredoc(s);
3118 else
3119 s = scan_inputsymbol(s);
3120 TERM(sublex_start());
378cc40b
LW
3121 }
3122 s++;
3123 tmp = *s++;
3124 if (tmp == '<')
79072805 3125 SHop(OP_LEFT_SHIFT);
395c3793
LW
3126 if (tmp == '=') {
3127 tmp = *s++;
3128 if (tmp == '>')
79072805 3129 Eop(OP_NCMP);
395c3793 3130 s--;
79072805 3131 Rop(OP_LE);
395c3793 3132 }
378cc40b 3133 s--;
79072805 3134 Rop(OP_LT);
378cc40b
LW
3135 case '>':
3136 s++;
3137 tmp = *s++;
3138 if (tmp == '>')
79072805 3139 SHop(OP_RIGHT_SHIFT);
378cc40b 3140 if (tmp == '=')
79072805 3141 Rop(OP_GE);
378cc40b 3142 s--;
79072805 3143 Rop(OP_GT);
378cc40b
LW
3144
3145 case '$':
bbce6d69 3146 CLINE;
3147
3280af22
NIS
3148 if (PL_expect == XOPERATOR) {
3149 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3150 PL_expect = XTERM;
a0d0e21e 3151 depcom();
bbce6d69 3152 return ','; /* grandfather non-comma-format format */
a0d0e21e 3153 }
8990e307 3154 }
a0d0e21e 3155
834a4ddd 3156 if (s[1] == '#' && (isIDFIRST_lazy(s+2) || strchr("{$:+-", s[2]))) {
3280af22 3157 PL_tokenbuf[0] = '@';
376b8730
SM
3158 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3159 sizeof PL_tokenbuf - 1, FALSE);
3160 if (PL_expect == XOPERATOR)
3161 no_op("Array length", s);
3280af22 3162 if (!PL_tokenbuf[1])
a0d0e21e 3163 PREREF(DOLSHARP);
3280af22
NIS
3164 PL_expect = XOPERATOR;
3165 PL_pending_ident = '#';
463ee0b2 3166 TOKEN(DOLSHARP);
79072805 3167 }
bbce6d69 3168
3280af22 3169 PL_tokenbuf[0] = '$';
376b8730
SM
3170 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3171 sizeof PL_tokenbuf - 1, FALSE);
3172 if (PL_expect == XOPERATOR)
3173 no_op("Scalar", s);
3280af22
NIS
3174 if (!PL_tokenbuf[1]) {
3175 if (s == PL_bufend)
bbce6d69 3176 yyerror("Final $ should be \\$ or $name");
3177 PREREF('$');
8990e307 3178 }
a0d0e21e 3179
bbce6d69 3180 /* This kludge not intended to be bulletproof. */
3280af22 3181 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3182 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3183 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69 3184 yylval.opval->op_private = OPpCONST_ARYBASE;
3185 TERM(THING);
3186 }
3187
ff68c719 3188 d = s;
69d2bceb 3189 tmp = (I32)*s;
3280af22 3190 if (PL_lex_state == LEX_NORMAL)
ff68c719 3191 s = skipspace(s);
3192
3280af22 3193 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3194 char *t;
3195 if (*s == '[') {
3280af22 3196 PL_tokenbuf[0] = '@';
599cee73 3197 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3198 for(t = s + 1;
834a4ddd 3199 isSPACE(*t) || isALNUM_lazy(t) || *t == '$';
bbce6d69 3200 t++) ;
a0d0e21e 3201 if (*t++ == ',') {
3280af22
NIS
3202 PL_bufptr = skipspace(PL_bufptr);
3203 while (t < PL_bufend && *t != ']')
bbce6d69 3204 t++;
cea2e8a9 3205 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3206 "Multidimensional syntax %.*s not supported",
3207 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3208 }
3209 }
bbce6d69 3210 }
3211 else if (*s == '{') {
3280af22 3212 PL_tokenbuf[0] = '%';
599cee73 3213 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3214 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3215 {
3280af22 3216 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3217 STRLEN len;
3218 for (t++; isSPACE(*t); t++) ;
834a4ddd 3219 if (isIDFIRST_lazy(t)) {
8903cb82 3220 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3221 for (; isSPACE(*t); t++) ;
864dbfa3 3222 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3223 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3224 "You need to quote \"%s\"", tmpbuf);
748a9306 3225 }
93a17b20
LW
3226 }
3227 }
2f3197b3 3228 }
bbce6d69 3229
3280af22 3230 PL_expect = XOPERATOR;
69d2bceb 3231 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3232 bool islop = (PL_last_lop == PL_oldoldbufptr);
3233 if (!islop || PL_last_lop_op == OP_GREPSTART)
3234 PL_expect = XOPERATOR;
bbce6d69 3235 else if (strchr("$@\"'`q", *s))
3280af22 3236 PL_expect = XTERM; /* e.g. print $fh "foo" */
834a4ddd 3237 else if (strchr("&*<%", *s) && isIDFIRST_lazy(s+1))
3280af22 3238 PL_expect = XTERM; /* e.g. print $fh &sub */
834a4ddd 3239 else if (isIDFIRST_lazy(s)) {
3280af22 3240 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3241 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3242 if (tmp = keyword(tmpbuf, len)) {
3243 /* binary operators exclude handle interpretations */
3244 switch (tmp) {
3245 case -KEY_x:
3246 case -KEY_eq:
3247 case -KEY_ne:
3248 case -KEY_gt:
3249 case -KEY_lt:
3250 case -KEY_ge:
3251 case -KEY_le:
3252 case -KEY_cmp:
3253 break;
3254 default:
3280af22 3255 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3256 break;
3257 }
3258 }
68dc0745 3259 else {
3260 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3261 if (gv && GvCVu(gv))
3280af22 3262 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3263 }
93a17b20 3264 }
bbce6d69 3265 else if (isDIGIT(*s))
3280af22 3266 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3267 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3268 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3269 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3270 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3271 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3272 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3273 }
3280af22 3274 PL_pending_ident = '$';
79072805 3275 TOKEN('$');
378cc40b
LW
3276
3277 case '@':
3280af22 3278 if (PL_expect == XOPERATOR)
bbce6d69 3279 no_op("Array", s);
3280af22
NIS
3280 PL_tokenbuf[0] = '@';
3281 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3282 if (!PL_tokenbuf[1]) {
3283 if (s == PL_bufend)
bbce6d69 3284 yyerror("Final @ should be \\@ or @name");
3285 PREREF('@');
3286 }
3280af22 3287 if (PL_lex_state == LEX_NORMAL)
ff68c719 3288 s = skipspace(s);
3280af22 3289 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3290 if (*s == '{')
3280af22 3291 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3292
3293 /* Warn about @ where they meant $. */
599cee73 3294 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3295 if (*s == '[' || *s == '{') {
3296 char *t = s + 1;
834a4ddd 3297 while (*t && (isALNUM_lazy(t) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3298 t++;
3299 if (*t == '}' || *t == ']') {
3300 t++;
3280af22 3301 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3302 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3303 "Scalar value %.*s better written as $%.*s",
3280af22 3304 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3305 }
93a17b20
LW
3306 }
3307 }
463ee0b2 3308 }
3280af22 3309 PL_pending_ident = '@';
79072805 3310 TERM('@');
378cc40b
LW
3311
3312 case '/': /* may either be division or pattern */
3313 case '?': /* may either be conditional or pattern */
3280af22 3314 if (PL_expect != XOPERATOR) {
c277df42 3315 /* Disable warning on "study /blah/" */
3280af22
NIS
3316 if (PL_oldoldbufptr == PL_last_uni
3317 && (*PL_last_uni != 's' || s - PL_last_uni < 5
834a4ddd 3318 || memNE(PL_last_uni, "study", 5) || isALNUM_lazy(PL_last_uni+5)))
c277df42 3319 check_uni();
8782bef2 3320 s = scan_pat(s,OP_MATCH);
79072805 3321 TERM(sublex_start());
378cc40b
LW
3322 }
3323 tmp = *s++;
a687059c 3324 if (tmp == '/')
79072805 3325 Mop(OP_DIVIDE);
378cc40b
LW
3326 OPERATOR(tmp);
3327
3328 case '.':
51882d45
GS
3329 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3330#ifdef PERL_STRICT_CR
3331 && s[1] == '\n'
3332#else
3333 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3334#endif
3335 && (s == PL_linestart || s[-1] == '\n') )
3336 {
3280af22
NIS
3337 PL_lex_formbrack = 0;
3338 PL_expect = XSTATE;
79072805
LW
3339 goto rightbracket;
3340 }
3280af22 3341 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3342 tmp = *s++;
a687059c
LW
3343 if (*s == tmp) {
3344 s++;
2f3197b3
LW
3345 if (*s == tmp) {
3346 s++;
79072805 3347 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3348 }
3349 else
79072805 3350 yylval.ival = 0;
378cc40b 3351 OPERATOR(DOTDOT);
a687059c 3352 }
3280af22 3353 if (PL_expect != XOPERATOR)
2f3197b3 3354 check_uni();
79072805 3355 Aop(OP_CONCAT);
378cc40b
LW
3356 }
3357 /* FALL THROUGH */
3358 case '0': case '1': case '2': case '3': case '4':
3359 case '5': case '6': case '7': case '8': case '9':
79072805 3360 s = scan_num(s);
3280af22 3361 if (PL_expect == XOPERATOR)
8990e307 3362 no_op("Number",s);
79072805
LW
3363 TERM(THING);
3364
3365 case '\'':
09bef843 3366 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3367 if (PL_expect == XOPERATOR) {
3368 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3369 PL_expect = XTERM;
a0d0e21e
LW
3370 depcom();
3371 return ','; /* grandfather non-comma-format format */
3372 }
463ee0b2 3373 else
8990e307 3374 no_op("String",s);
463ee0b2 3375 }
79072805 3376 if (!s)
85e6fe83 3377 missingterm((char*)0);
79072805
LW
3378 yylval.ival = OP_CONST;
3379 TERM(sublex_start());
3380
3381 case '"':
09bef843 3382 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3383 if (PL_expect == XOPERATOR) {
3384 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3385 PL_expect = XTERM;
a0d0e21e
LW
3386 depcom();
3387 return ','; /* grandfather non-comma-format format */
3388 }
463ee0b2 3389 else
8990e307 3390 no_op("String",s);
463ee0b2 3391 }
79072805 3392 if (!s)
85e6fe83 3393 missingterm((char*)0);
4633a7c4 3394 yylval.ival = OP_CONST;
3280af22 3395 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3396 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3397 yylval.ival = OP_STRINGIFY;
3398 break;
3399 }
3400 }
79072805
LW
3401 TERM(sublex_start());
3402
3403 case '`':
09bef843 3404 s = scan_str(s,FALSE,FALSE);
3280af22 3405 if (PL_expect == XOPERATOR)
8990e307 3406 no_op("Backticks",s);
79072805 3407 if (!s)
85e6fe83 3408 missingterm((char*)0);
79072805
LW
3409 yylval.ival = OP_BACKTICK;
3410 set_csh();
3411 TERM(sublex_start());
3412
3413 case '\\':
3414 s++;
599cee73 3415 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3416 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3417 *s, *s);
3280af22 3418 if (PL_expect == XOPERATOR)
8990e307 3419 no_op("Backslash",s);
79072805
LW
3420 OPERATOR(REFGEN);
3421
a7cb1f99
GS
3422 case 'v':
3423 if (isDIGIT(s[1]) && PL_expect == XTERM) {
3424 char *start = s;
3425 start++;
3426 start++;
3427 while (isDIGIT(*start))
3428 start++;
3429 if (*start == '.' && isDIGIT(start[1])) {
3430 s = scan_num(s);
3431 TERM(THING);
3432 }
3433 }
3434 goto keylookup;
79072805 3435 case 'x':
3280af22 3436 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3437 s++;
3438 Mop(OP_REPEAT);
2f3197b3 3439 }
79072805
LW
3440 goto keylookup;
3441
378cc40b 3442 case '_':
79072805
LW
3443 case 'a': case 'A':
3444 case 'b': case 'B':
3445 case 'c': case 'C':
3446 case 'd': case 'D':
3447 case 'e': case 'E':
3448 case 'f': case 'F':
3449 case 'g': case 'G':
3450 case 'h': case 'H':
3451 case 'i': case 'I':
3452 case 'j': case 'J':
3453 case 'k': case 'K':
3454 case 'l': case 'L':
3455 case 'm': case 'M':
3456 case 'n': case 'N':
3457 case 'o': case 'O':
3458 case 'p': case 'P':
3459 case 'q': case 'Q':
3460 case 'r': case 'R':
3461 case 's': case 'S':
3462 case 't': case 'T':
3463 case 'u': case 'U':
a7cb1f99 3464 case 'V':
79072805
LW
3465 case 'w': case 'W':
3466 case 'X':
3467 case 'y': case 'Y':
3468 case 'z': case 'Z':
3469
49dc05e3 3470 keylookup: {
2d8e6c8d 3471 STRLEN n_a;
161b471a
NIS
3472 gv = Nullgv;
3473 gvp = 0;
49dc05e3 3474
3280af22
NIS
3475 PL_bufptr = s;
3476 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3477
3478 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3479 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3480 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3481 (PL_tokenbuf[0] == 'q' &&
3482 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01 3483
3484 /* x::* is just a word, unless x is "CORE" */
3280af22 3485 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3486 goto just_a_word;
3487
3643fb5f 3488 d = s;
3280af22 3489 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3490 d++; /* no comments skipped here, or s### is misparsed */
3491
3492 /* Is this a label? */
3280af22
NIS
3493 if (!tmp && PL_expect == XSTATE
3494 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3495 s = d + 1;
3280af22 3496 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3497 CLINE;
3498 TOKEN(LABEL);
3643fb5f
CS
3499 }
3500
3501 /* Check for keywords */
3280af22 3502 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3503
3504 /* Is this a word before a => operator? */
748a9306
LW
3505 if (strnEQ(d,"=>",2)) {
3506 CLINE;
3280af22 3507 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3508 yylval.opval->op_private = OPpCONST_BARE;
3509 TERM(WORD);
3510 }
3511
a0d0e21e 3512 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3513 GV *ogv = Nullgv; /* override (winner) */
3514 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3515 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3516 CV *cv;
3280af22 3517 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3518 (cv = GvCVu(gv)))
3519 {
3520 if (GvIMPORTED_CV(gv))
3521 ogv = gv;
3522 else if (! CvMETHOD(cv))
3523 hgv = gv;
3524 }
3525 if (!ogv &&
3280af22
NIS
3526 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3527 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3528 GvCVu(gv) && GvIMPORTED_CV(gv))
3529 {
3530 ogv = gv;
3531 }
3532 }
3533 if (ogv) {
3534 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3535 }
3536 else if (gv && !gvp
3537 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3538 && GvCVu(gv)
3280af22 3539 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3540 {
3541 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3542 }
56f7f34b
CS
3543 else { /* no override */
3544 tmp = -tmp;
3545 gv = Nullgv;
3546 gvp = 0;
4944e2f7
GS
3547 if (ckWARN(WARN_AMBIGUOUS) && hgv
3548 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3549 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3550 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3551 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3552 }
a0d0e21e
LW
3553 }
3554
3555 reserved_word:
3556 switch (tmp) {
79072805
LW
3557
3558 default: /* not a keyword */
93a17b20 3559 just_a_word: {
96e4d5b1 3560 SV *sv;
3280af22 3561 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3562
3563 /* Get the rest if it looks like a package qualifier */
3564
a0d0e21e 3565 if (*s == '\'' || *s == ':' && s[1] == ':') {
c3e0f903 3566 STRLEN morelen;
3280af22 3567 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3568 TRUE, &morelen);
3569 if (!morelen)
cea2e8a9 3570 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3571 *s == '\'' ? "'" : "::");
c3e0f903 3572 len += morelen;
a0d0e21e 3573 }
8990e307 3574
3280af22
NIS
3575 if (PL_expect == XOPERATOR) {
3576 if (PL_bufptr == PL_linestart) {
57843af0 3577 CopLINE_dec(PL_curcop);
cea2e8a9 3578 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3579 CopLINE_inc(PL_curcop);
463ee0b2
LW
3580 }
3581 else
54310121 3582 no_op("Bareword",s);
463ee0b2 3583 }
8990e307 3584
c3e0f903
GS
3585 /* Look for a subroutine with this name in current package,
3586 unless name is "Foo::", in which case Foo is a bearword
3587 (and a package name). */
3588
3589 if (len > 2 &&
3280af22 3590 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3591 {
599cee73 3592 if (ckWARN(WARN_UNSAFE) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
cea2e8a9 3593 Perl_warner(aTHX_ WARN_UNSAFE,
599cee73 3594 "Bareword \"%s\" refers to nonexistent package",
3280af22 3595 PL_tokenbuf);
c3e0f903 3596 len -= 2;
3280af22 3597 PL_tokenbuf[len] = '\0';
c3e0f903
GS
3598 gv = Nullgv;
3599 gvp = 0;
3600 }
3601 else {
3602 len = 0;
3603 if (!gv)
3280af