This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
make Perl's malloc work for allocations over 2GB (from Ilya
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
51371543 31
9059aa12
LW
32#define XFAKEBRACK 128
33#define XENUMMASK 127
34
7e2040f0 35/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
36#define UTF (PL_hints & HINT_UTF8)
37
2b92dfce
GS
38/* In variables name $^X, these are the legal values for X.
39 * 1999-02-27 mjd-perl-patch@plover.com */
40#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
41
ffb4593c
NT
42/* LEX_* are values for PL_lex_state, the state of the lexer.
43 * They are arranged oddly so that the guard on the switch statement
79072805
LW
44 * can get by with a single comparison (if the compiler is smart enough).
45 */
46
fb73857a 47/* #define LEX_NOTPARSING 11 is done in perl.h. */
48
55497cff 49#define LEX_NORMAL 10
50#define LEX_INTERPNORMAL 9
51#define LEX_INTERPCASEMOD 8
52#define LEX_INTERPPUSH 7
53#define LEX_INTERPSTART 6
54#define LEX_INTERPEND 5
55#define LEX_INTERPENDMAYBE 4
56#define LEX_INTERPCONCAT 3
57#define LEX_INTERPCONST 2
58#define LEX_FORMLINE 1
59#define LEX_KNOWNEXT 0
79072805 60
a790bc05 61/* XXX If this causes problems, set i_unistd=undef in the hint file. */
62#ifdef I_UNISTD
63# include <unistd.h> /* Needed for execv() */
64#endif
65
66
79072805
LW
67#ifdef ff_next
68#undef ff_next
d48672a2
LW
69#endif
70
a1a0e61e
TD
71#ifdef USE_PURE_BISON
72YYSTYPE* yylval_pointer = NULL;
73int* yychar_pointer = NULL;
22c35a8c
GS
74# undef yylval
75# undef yychar
e4bfbdd4
JH
76# define yylval (*yylval_pointer)
77# define yychar (*yychar_pointer)
78# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
79# undef yylex
80# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
81#endif
82
79072805 83#include "keywords.h"
fe14fcc3 84
ffb4593c
NT
85/* CLINE is a macro that ensures PL_copline has a sane value */
86
ae986130
LW
87#ifdef CLINE
88#undef CLINE
89#endif
57843af0 90#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 91
ffb4593c
NT
92/*
93 * Convenience functions to return different tokens and prime the
9cbb5ea2 94 * lexer for the next token. They all take an argument.
ffb4593c
NT
95 *
96 * TOKEN : generic token (used for '(', DOLSHARP, etc)
97 * OPERATOR : generic operator
98 * AOPERATOR : assignment operator
99 * PREBLOCK : beginning the block after an if, while, foreach, ...
100 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
101 * PREREF : *EXPR where EXPR is not a simple identifier
102 * TERM : expression term
103 * LOOPX : loop exiting command (goto, last, dump, etc)
104 * FTST : file test operator
105 * FUN0 : zero-argument function
2d2e263d 106 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
107 * BOop : bitwise or or xor
108 * BAop : bitwise and
109 * SHop : shift operator
110 * PWop : power operator
9cbb5ea2 111 * PMop : pattern-matching operator
ffb4593c
NT
112 * Aop : addition-level operator
113 * Mop : multiplication-level operator
114 * Eop : equality-testing operator
115 * Rop : relational operator <= != gt
116 *
117 * Also see LOP and lop() below.
118 */
119
3280af22
NIS
120#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
121#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
122#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
123#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
124#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
125#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
126#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
127#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
128#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
129#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
130#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
131#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
132#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
133#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
134#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
135#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
136#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
137#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
138#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
139#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 140
a687059c
LW
141/* This bit of chicanery makes a unary function followed by
142 * a parenthesis into a function with one argument, highest precedence.
143 */
2f3197b3 144#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
145 PL_expect = XTERM, \
146 PL_bufptr = s, \
147 PL_last_uni = PL_oldbufptr, \
148 PL_last_lop_op = f, \
a687059c
LW
149 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
150
79072805 151#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
152 PL_bufptr = s, \
153 PL_last_uni = PL_oldbufptr, \
79072805
LW
154 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
155
9f68db38 156/* grandfather return to old style */
3280af22 157#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 158
ffb4593c
NT
159/*
160 * S_ao
161 *
162 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
163 * into an OP_ANDASSIGN or OP_ORASSIGN
164 */
165
76e3520e 166STATIC int
cea2e8a9 167S_ao(pTHX_ int toketype)
a0d0e21e 168{
3280af22
NIS
169 if (*PL_bufptr == '=') {
170 PL_bufptr++;
a0d0e21e
LW
171 if (toketype == ANDAND)
172 yylval.ival = OP_ANDASSIGN;
173 else if (toketype == OROR)
174 yylval.ival = OP_ORASSIGN;
175 toketype = ASSIGNOP;
176 }
177 return toketype;
178}
179
ffb4593c
NT
180/*
181 * S_no_op
182 * When Perl expects an operator and finds something else, no_op
183 * prints the warning. It always prints "<something> found where
184 * operator expected. It prints "Missing semicolon on previous line?"
185 * if the surprise occurs at the start of the line. "do you need to
186 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
187 * where the compiler doesn't know if foo is a method call or a function.
188 * It prints "Missing operator before end of line" if there's nothing
189 * after the missing operator, or "... before <...>" if there is something
190 * after the missing operator.
191 */
192
76e3520e 193STATIC void
cea2e8a9 194S_no_op(pTHX_ char *what, char *s)
463ee0b2 195{
3280af22
NIS
196 char *oldbp = PL_bufptr;
197 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 198
1189a94a
GS
199 if (!s)
200 s = oldbp;
201 else {
202 assert(s >= oldbp);
203 PL_bufptr = s;
204 }
cea2e8a9 205 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 206 if (is_first)
cea2e8a9 207 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 208 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 209 char *t;
7e2040f0 210 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 211 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 212 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 213 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
214 }
215 else
cea2e8a9 216 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 217 PL_bufptr = oldbp;
8990e307
LW
218}
219
ffb4593c
NT
220/*
221 * S_missingterm
222 * Complain about missing quote/regexp/heredoc terminator.
223 * If it's called with (char *)NULL then it cauterizes the line buffer.
224 * If we're in a delimited string and the delimiter is a control
225 * character, it's reformatted into a two-char sequence like ^C.
226 * This is fatal.
227 */
228
76e3520e 229STATIC void
cea2e8a9 230S_missingterm(pTHX_ char *s)
8990e307
LW
231{
232 char tmpbuf[3];
233 char q;
234 if (s) {
235 char *nl = strrchr(s,'\n');
d2719217 236 if (nl)
8990e307
LW
237 *nl = '\0';
238 }
9d116dd7
JH
239 else if (
240#ifdef EBCDIC
241 iscntrl(PL_multi_close)
242#else
243 PL_multi_close < 32 || PL_multi_close == 127
244#endif
245 ) {
8990e307 246 *tmpbuf = '^';
3280af22 247 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
248 s = "\\n";
249 tmpbuf[2] = '\0';
250 s = tmpbuf;
251 }
252 else {
3280af22 253 *tmpbuf = PL_multi_close;
8990e307
LW
254 tmpbuf[1] = '\0';
255 s = tmpbuf;
256 }
257 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 258 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 259}
79072805 260
ffb4593c
NT
261/*
262 * Perl_deprecate
ffb4593c
NT
263 */
264
79072805 265void
864dbfa3 266Perl_deprecate(pTHX_ char *s)
a0d0e21e 267{
d008e5eb 268 dTHR;
599cee73 269 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 270 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
271}
272
ffb4593c
NT
273/*
274 * depcom
9cbb5ea2 275 * Deprecate a comma-less variable list.
ffb4593c
NT
276 */
277
76e3520e 278STATIC void
cea2e8a9 279S_depcom(pTHX)
a0d0e21e
LW
280{
281 deprecate("comma-less variable list");
282}
283
ffb4593c 284/*
9cbb5ea2
GS
285 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
286 * utf16-to-utf8-reversed.
ffb4593c
NT
287 */
288
c39cd008
GS
289#ifdef PERL_CR_FILTER
290static void
291strip_return(SV *sv)
292{
293 register char *s = SvPVX(sv);
294 register char *e = s + SvCUR(sv);
295 /* outer loop optimized to do nothing if there are no CR-LFs */
296 while (s < e) {
297 if (*s++ == '\r' && *s == '\n') {
298 /* hit a CR-LF, need to copy the rest */
299 register char *d = s - 1;
300 *d++ = *s++;
301 while (s < e) {
302 if (*s == '\r' && s[1] == '\n')
303 s++;
304 *d++ = *s++;
305 }
306 SvCUR(sv) -= s - d;
307 return;
308 }
309 }
310}
a868473f 311
76e3520e 312STATIC I32
c39cd008 313S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 314{
c39cd008
GS
315 I32 count = FILTER_READ(idx+1, sv, maxlen);
316 if (count > 0 && !maxlen)
317 strip_return(sv);
318 return count;
a868473f
NIS
319}
320#endif
321
a0ed51b3 322STATIC I32
cea2e8a9 323S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
324{
325 I32 count = FILTER_READ(idx+1, sv, maxlen);
326 if (count) {
dfe13c55
GS
327 U8* tmps;
328 U8* tend;
329 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 330 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 331 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
332
333 }
334 return count;
335}
336
337STATIC I32
cea2e8a9 338S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
339{
340 I32 count = FILTER_READ(idx+1, sv, maxlen);
341 if (count) {
dfe13c55
GS
342 U8* tmps;
343 U8* tend;
344 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 345 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 346 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
347
348 }
349 return count;
350}
a868473f 351
ffb4593c
NT
352/*
353 * Perl_lex_start
9cbb5ea2
GS
354 * Initialize variables. Uses the Perl save_stack to save its state (for
355 * recursive calls to the parser).
ffb4593c
NT
356 */
357
a0d0e21e 358void
864dbfa3 359Perl_lex_start(pTHX_ SV *line)
79072805 360{
0f15f207 361 dTHR;
8990e307
LW
362 char *s;
363 STRLEN len;
364
3280af22
NIS
365 SAVEI32(PL_lex_dojoin);
366 SAVEI32(PL_lex_brackets);
3280af22
NIS
367 SAVEI32(PL_lex_casemods);
368 SAVEI32(PL_lex_starts);
369 SAVEI32(PL_lex_state);
7766f137 370 SAVEVPTR(PL_lex_inpat);
3280af22 371 SAVEI32(PL_lex_inwhat);
18b09519
GS
372 if (PL_lex_state == LEX_KNOWNEXT) {
373 I32 toke = PL_nexttoke;
374 while (--toke >= 0) {
375 SAVEI32(PL_nexttype[toke]);
376 SAVEVPTR(PL_nextval[toke]);
377 }
378 SAVEI32(PL_nexttoke);
379 PL_nexttoke = 0;
380 }
57843af0 381 SAVECOPLINE(PL_curcop);
3280af22
NIS
382 SAVEPPTR(PL_bufptr);
383 SAVEPPTR(PL_bufend);
384 SAVEPPTR(PL_oldbufptr);
385 SAVEPPTR(PL_oldoldbufptr);
386 SAVEPPTR(PL_linestart);
387 SAVESPTR(PL_linestr);
388 SAVEPPTR(PL_lex_brackstack);
389 SAVEPPTR(PL_lex_casestack);
c76ac1ee 390 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
391 SAVESPTR(PL_lex_stuff);
392 SAVEI32(PL_lex_defer);
09bef843 393 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 394 SAVESPTR(PL_lex_repl);
bebdddfc
GS
395 SAVEINT(PL_expect);
396 SAVEINT(PL_lex_expect);
3280af22
NIS
397
398 PL_lex_state = LEX_NORMAL;
399 PL_lex_defer = 0;
400 PL_expect = XSTATE;
401 PL_lex_brackets = 0;
3280af22
NIS
402 New(899, PL_lex_brackstack, 120, char);
403 New(899, PL_lex_casestack, 12, char);
404 SAVEFREEPV(PL_lex_brackstack);
405 SAVEFREEPV(PL_lex_casestack);
406 PL_lex_casemods = 0;
407 *PL_lex_casestack = '\0';
408 PL_lex_dojoin = 0;
409 PL_lex_starts = 0;
410 PL_lex_stuff = Nullsv;
411 PL_lex_repl = Nullsv;
412 PL_lex_inpat = 0;
413 PL_lex_inwhat = 0;
09bef843 414 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
415 PL_linestr = line;
416 if (SvREADONLY(PL_linestr))
417 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
418 s = SvPV(PL_linestr, len);
8990e307 419 if (len && s[len-1] != ';') {
3280af22
NIS
420 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
421 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
422 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 423 }
3280af22
NIS
424 SvTEMP_off(PL_linestr);
425 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
426 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
427 SvREFCNT_dec(PL_rs);
79cb57f6 428 PL_rs = newSVpvn("\n", 1);
3280af22 429 PL_rsfp = 0;
79072805 430}
a687059c 431
ffb4593c
NT
432/*
433 * Perl_lex_end
9cbb5ea2
GS
434 * Finalizer for lexing operations. Must be called when the parser is
435 * done with the lexer.
ffb4593c
NT
436 */
437
463ee0b2 438void
864dbfa3 439Perl_lex_end(pTHX)
463ee0b2 440{
3280af22 441 PL_doextract = FALSE;
463ee0b2
LW
442}
443
ffb4593c
NT
444/*
445 * S_incline
446 * This subroutine has nothing to do with tilting, whether at windmills
447 * or pinball tables. Its name is short for "increment line". It
57843af0 448 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 449 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
450 * # line 500 "foo.pm"
451 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
452 */
453
76e3520e 454STATIC void
cea2e8a9 455S_incline(pTHX_ char *s)
463ee0b2 456{
0f15f207 457 dTHR;
463ee0b2
LW
458 char *t;
459 char *n;
73659bf1 460 char *e;
463ee0b2 461 char ch;
463ee0b2 462
57843af0 463 CopLINE_inc(PL_curcop);
463ee0b2
LW
464 if (*s++ != '#')
465 return;
466 while (*s == ' ' || *s == '\t') s++;
73659bf1
GS
467 if (strnEQ(s, "line", 4))
468 s += 4;
469 else
470 return;
471 if (*s == ' ' || *s == '\t')
472 s++;
473 else
474 return;
475 while (*s == ' ' || *s == '\t') s++;
463ee0b2
LW
476 if (!isDIGIT(*s))
477 return;
478 n = s;
479 while (isDIGIT(*s))
480 s++;
481 while (*s == ' ' || *s == '\t')
482 s++;
73659bf1 483 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 484 s++;
73659bf1
GS
485 e = t + 1;
486 }
463ee0b2 487 else {
463ee0b2 488 for (t = s; !isSPACE(*t); t++) ;
73659bf1 489 e = t;
463ee0b2 490 }
73659bf1
GS
491 while (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\f')
492 e++;
493 if (*e != '\n' && *e != '\0')
494 return; /* false alarm */
495
463ee0b2
LW
496 ch = *t;
497 *t = '\0';
498 if (t - s > 0)
57843af0 499 CopFILE_set(PL_curcop, s);
463ee0b2 500 *t = ch;
57843af0 501 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
502}
503
ffb4593c
NT
504/*
505 * S_skipspace
506 * Called to gobble the appropriate amount and type of whitespace.
507 * Skips comments as well.
508 */
509
76e3520e 510STATIC char *
cea2e8a9 511S_skipspace(pTHX_ register char *s)
a687059c 512{
11343788 513 dTHR;
3280af22
NIS
514 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
515 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
516 s++;
517 return s;
518 }
519 for (;;) {
fd049845 520 STRLEN prevlen;
09bef843
SB
521 SSize_t oldprevlen, oldoldprevlen;
522 SSize_t oldloplen, oldunilen;
60e6418e
GS
523 while (s < PL_bufend && isSPACE(*s)) {
524 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
525 incline(s);
526 }
ffb4593c
NT
527
528 /* comment */
3280af22
NIS
529 if (s < PL_bufend && *s == '#') {
530 while (s < PL_bufend && *s != '\n')
463ee0b2 531 s++;
60e6418e 532 if (s < PL_bufend) {
463ee0b2 533 s++;
60e6418e
GS
534 if (PL_in_eval && !PL_rsfp) {
535 incline(s);
536 continue;
537 }
538 }
463ee0b2 539 }
ffb4593c
NT
540
541 /* only continue to recharge the buffer if we're at the end
542 * of the buffer, we're not reading from a source filter, and
543 * we're in normal lexing mode
544 */
09bef843
SB
545 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
546 PL_lex_state == LEX_FORMLINE)
463ee0b2 547 return s;
ffb4593c
NT
548
549 /* try to recharge the buffer */
9cbb5ea2
GS
550 if ((s = filter_gets(PL_linestr, PL_rsfp,
551 (prevlen = SvCUR(PL_linestr)))) == Nullch)
552 {
553 /* end of file. Add on the -p or -n magic */
3280af22
NIS
554 if (PL_minus_n || PL_minus_p) {
555 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
556 ";}continue{print or die qq(-p destination: $!\\n)" :
557 "");
3280af22
NIS
558 sv_catpv(PL_linestr,";}");
559 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
560 }
561 else
3280af22 562 sv_setpv(PL_linestr,";");
ffb4593c
NT
563
564 /* reset variables for next time we lex */
9cbb5ea2
GS
565 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
566 = SvPVX(PL_linestr);
3280af22 567 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
568
569 /* Close the filehandle. Could be from -P preprocessor,
570 * STDIN, or a regular file. If we were reading code from
571 * STDIN (because the commandline held no -e or filename)
572 * then we don't close it, we reset it so the code can
573 * read from STDIN too.
574 */
575
3280af22
NIS
576 if (PL_preprocess && !PL_in_eval)
577 (void)PerlProc_pclose(PL_rsfp);
578 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
579 PerlIO_clearerr(PL_rsfp);
8990e307 580 else
3280af22
NIS
581 (void)PerlIO_close(PL_rsfp);
582 PL_rsfp = Nullfp;
463ee0b2
LW
583 return s;
584 }
ffb4593c
NT
585
586 /* not at end of file, so we only read another line */
09bef843
SB
587 /* make corresponding updates to old pointers, for yyerror() */
588 oldprevlen = PL_oldbufptr - PL_bufend;
589 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
590 if (PL_last_uni)
591 oldunilen = PL_last_uni - PL_bufend;
592 if (PL_last_lop)
593 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
594 PL_linestart = PL_bufptr = s + prevlen;
595 PL_bufend = s + SvCUR(PL_linestr);
596 s = PL_bufptr;
09bef843
SB
597 PL_oldbufptr = s + oldprevlen;
598 PL_oldoldbufptr = s + oldoldprevlen;
599 if (PL_last_uni)
600 PL_last_uni = s + oldunilen;
601 if (PL_last_lop)
602 PL_last_lop = s + oldloplen;
a0d0e21e 603 incline(s);
ffb4593c
NT
604
605 /* debugger active and we're not compiling the debugger code,
606 * so store the line into the debugger's array of lines
607 */
3280af22 608 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
609 SV *sv = NEWSV(85,0);
610
611 sv_upgrade(sv, SVt_PVMG);
3280af22 612 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 613 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 614 }
463ee0b2 615 }
a687059c 616}
378cc40b 617
ffb4593c
NT
618/*
619 * S_check_uni
620 * Check the unary operators to ensure there's no ambiguity in how they're
621 * used. An ambiguous piece of code would be:
622 * rand + 5
623 * This doesn't mean rand() + 5. Because rand() is a unary operator,
624 * the +5 is its argument.
625 */
626
76e3520e 627STATIC void
cea2e8a9 628S_check_uni(pTHX)
ba106d47 629{
2f3197b3 630 char *s;
a0d0e21e 631 char *t;
0453d815 632 dTHR;
2f3197b3 633
3280af22 634 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 635 return;
3280af22
NIS
636 while (isSPACE(*PL_last_uni))
637 PL_last_uni++;
7e2040f0 638 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 639 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 640 return;
0453d815 641 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 642 char ch = *s;
0453d815
PM
643 *s = '\0';
644 Perl_warner(aTHX_ WARN_AMBIGUOUS,
645 "Warning: Use of \"%s\" without parens is ambiguous",
646 PL_last_uni);
647 *s = ch;
648 }
2f3197b3
LW
649}
650
ffb4593c
NT
651/* workaround to replace the UNI() macro with a function. Only the
652 * hints/uts.sh file mentions this. Other comments elsewhere in the
653 * source indicate Microport Unix might need it too.
654 */
655
ffed7fef
LW
656#ifdef CRIPPLED_CC
657
658#undef UNI
ffed7fef 659#define UNI(f) return uni(f,s)
ffed7fef 660
76e3520e 661STATIC int
cea2e8a9 662S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
663{
664 yylval.ival = f;
3280af22
NIS
665 PL_expect = XTERM;
666 PL_bufptr = s;
8f872242
NIS
667 PL_last_uni = PL_oldbufptr;
668 PL_last_lop_op = f;
ffed7fef
LW
669 if (*s == '(')
670 return FUNC1;
671 s = skipspace(s);
672 if (*s == '(')
673 return FUNC1;
674 else
675 return UNIOP;
676}
677
a0d0e21e
LW
678#endif /* CRIPPLED_CC */
679
ffb4593c
NT
680/*
681 * LOP : macro to build a list operator. Its behaviour has been replaced
682 * with a subroutine, S_lop() for which LOP is just another name.
683 */
684
a0d0e21e
LW
685#define LOP(f,x) return lop(f,x,s)
686
ffb4593c
NT
687/*
688 * S_lop
689 * Build a list operator (or something that might be one). The rules:
690 * - if we have a next token, then it's a list operator [why?]
691 * - if the next thing is an opening paren, then it's a function
692 * - else it's a list operator
693 */
694
76e3520e 695STATIC I32
a0be28da 696S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 697{
0f15f207 698 dTHR;
79072805 699 yylval.ival = f;
35c8bce7 700 CLINE;
3280af22
NIS
701 PL_expect = x;
702 PL_bufptr = s;
703 PL_last_lop = PL_oldbufptr;
704 PL_last_lop_op = f;
705 if (PL_nexttoke)
a0d0e21e 706 return LSTOP;
79072805
LW
707 if (*s == '(')
708 return FUNC;
709 s = skipspace(s);
710 if (*s == '(')
711 return FUNC;
712 else
713 return LSTOP;
714}
715
ffb4593c
NT
716/*
717 * S_force_next
9cbb5ea2 718 * When the lexer realizes it knows the next token (for instance,
ffb4593c 719 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
720 * to know what token to return the next time the lexer is called. Caller
721 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
722 * handles the token correctly.
ffb4593c
NT
723 */
724
76e3520e 725STATIC void
cea2e8a9 726S_force_next(pTHX_ I32 type)
79072805 727{
3280af22
NIS
728 PL_nexttype[PL_nexttoke] = type;
729 PL_nexttoke++;
730 if (PL_lex_state != LEX_KNOWNEXT) {
731 PL_lex_defer = PL_lex_state;
732 PL_lex_expect = PL_expect;
733 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
734 }
735}
736
ffb4593c
NT
737/*
738 * S_force_word
739 * When the lexer knows the next thing is a word (for instance, it has
740 * just seen -> and it knows that the next char is a word char, then
741 * it calls S_force_word to stick the next word into the PL_next lookahead.
742 *
743 * Arguments:
b1b65b59 744 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
745 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
746 * int check_keyword : if true, Perl checks to make sure the word isn't
747 * a keyword (do this if the word is a label, e.g. goto FOO)
748 * int allow_pack : if true, : characters will also be allowed (require,
749 * use, etc. do this)
9cbb5ea2 750 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
751 */
752
76e3520e 753STATIC char *
cea2e8a9 754S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 755{
463ee0b2
LW
756 register char *s;
757 STRLEN len;
758
759 start = skipspace(start);
760 s = start;
7e2040f0 761 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 762 (allow_pack && *s == ':') ||
15f0808c 763 (allow_initial_tick && *s == '\'') )
a0d0e21e 764 {
3280af22
NIS
765 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
766 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
767 return start;
768 if (token == METHOD) {
769 s = skipspace(s);
770 if (*s == '(')
3280af22 771 PL_expect = XTERM;
463ee0b2 772 else {
3280af22 773 PL_expect = XOPERATOR;
463ee0b2 774 }
79072805 775 }
3280af22
NIS
776 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
777 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
778 force_next(token);
779 }
780 return s;
781}
782
ffb4593c
NT
783/*
784 * S_force_ident
9cbb5ea2 785 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
786 * text only contains the "foo" portion. The first argument is a pointer
787 * to the "foo", and the second argument is the type symbol to prefix.
788 * Forces the next token to be a "WORD".
9cbb5ea2 789 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
790 */
791
76e3520e 792STATIC void
cea2e8a9 793S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
794{
795 if (s && *s) {
11343788 796 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 797 PL_nextval[PL_nexttoke].opval = o;
79072805 798 force_next(WORD);
748a9306 799 if (kind) {
e858de61 800 dTHR; /* just for in_eval */
11343788 801 o->op_private = OPpCONST_ENTERED;
55497cff 802 /* XXX see note in pp_entereval() for why we forgo typo
803 warnings if the symbol must be introduced in an eval.
804 GSAR 96-10-12 */
3280af22 805 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
806 kind == '$' ? SVt_PV :
807 kind == '@' ? SVt_PVAV :
808 kind == '%' ? SVt_PVHV :
809 SVt_PVGV
810 );
748a9306 811 }
79072805
LW
812 }
813}
814
ffb4593c
NT
815/*
816 * S_force_version
817 * Forces the next token to be a version number.
818 */
819
76e3520e 820STATIC char *
cea2e8a9 821S_force_version(pTHX_ char *s)
89bfa8cd 822{
823 OP *version = Nullop;
44dcb63b
GS
824 bool is_vstr = FALSE;
825 char *d;
89bfa8cd 826
827 s = skipspace(s);
828
44dcb63b
GS
829 d = s;
830 if (*d == 'v') {
831 is_vstr = TRUE;
832 d++;
833 }
834 if (isDIGIT(*d)) {
a7cb1f99 835 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 836 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
89bfa8cd 837 s = scan_num(s);
838 /* real VERSION number -- GBARR */
839 version = yylval.opval;
44dcb63b
GS
840 if (is_vstr) {
841 SV *ver = cSVOPx(version)->op_sv;
842 SvUPGRADE(ver, SVt_PVIV);
843 SvIOKp_on(ver); /* hint that it is a version */
844 }
89bfa8cd 845 }
846 }
847
848 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 849 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd 850 force_next(WORD);
851
852 return (s);
853}
854
ffb4593c
NT
855/*
856 * S_tokeq
857 * Tokenize a quoted string passed in as an SV. It finds the next
858 * chunk, up to end of string or a backslash. It may make a new
859 * SV containing that chunk (if HINT_NEW_STRING is on). It also
860 * turns \\ into \.
861 */
862
76e3520e 863STATIC SV *
cea2e8a9 864S_tokeq(pTHX_ SV *sv)
79072805
LW
865{
866 register char *s;
867 register char *send;
868 register char *d;
b3ac6de7
IZ
869 STRLEN len = 0;
870 SV *pv = sv;
79072805
LW
871
872 if (!SvLEN(sv))
b3ac6de7 873 goto finish;
79072805 874
a0d0e21e 875 s = SvPV_force(sv, len);
748a9306 876 if (SvIVX(sv) == -1)
b3ac6de7 877 goto finish;
463ee0b2 878 send = s + len;
79072805
LW
879 while (s < send && *s != '\\')
880 s++;
881 if (s == send)
b3ac6de7 882 goto finish;
79072805 883 d = s;
3280af22 884 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 885 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
886 while (s < send) {
887 if (*s == '\\') {
a0d0e21e 888 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
889 s++; /* all that, just for this */
890 }
891 *d++ = *s++;
892 }
893 *d = '\0';
463ee0b2 894 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 895 finish:
3280af22 896 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 897 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
898 return sv;
899}
900
ffb4593c
NT
901/*
902 * Now come three functions related to double-quote context,
903 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
904 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
905 * interact with PL_lex_state, and create fake ( ... ) argument lists
906 * to handle functions and concatenation.
907 * They assume that whoever calls them will be setting up a fake
908 * join call, because each subthing puts a ',' after it. This lets
909 * "lower \luPpEr"
910 * become
911 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
912 *
913 * (I'm not sure whether the spurious commas at the end of lcfirst's
914 * arguments and join's arguments are created or not).
915 */
916
917/*
918 * S_sublex_start
919 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
920 *
921 * Pattern matching will set PL_lex_op to the pattern-matching op to
922 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
923 *
924 * OP_CONST and OP_READLINE are easy--just make the new op and return.
925 *
926 * Everything else becomes a FUNC.
927 *
928 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
929 * had an OP_CONST or OP_READLINE). This just sets us up for a
930 * call to S_sublex_push().
931 */
932
76e3520e 933STATIC I32
cea2e8a9 934S_sublex_start(pTHX)
79072805
LW
935{
936 register I32 op_type = yylval.ival;
79072805
LW
937
938 if (op_type == OP_NULL) {
3280af22
NIS
939 yylval.opval = PL_lex_op;
940 PL_lex_op = Nullop;
79072805
LW
941 return THING;
942 }
943 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 944 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
945
946 if (SvTYPE(sv) == SVt_PVIV) {
947 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
948 STRLEN len;
949 char *p;
950 SV *nsv;
951
952 p = SvPV(sv, len);
79cb57f6 953 nsv = newSVpvn(p, len);
b3ac6de7
IZ
954 SvREFCNT_dec(sv);
955 sv = nsv;
956 }
957 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 958 PL_lex_stuff = Nullsv;
79072805
LW
959 return THING;
960 }
961
3280af22
NIS
962 PL_sublex_info.super_state = PL_lex_state;
963 PL_sublex_info.sub_inwhat = op_type;
964 PL_sublex_info.sub_op = PL_lex_op;
965 PL_lex_state = LEX_INTERPPUSH;
55497cff 966
3280af22
NIS
967 PL_expect = XTERM;
968 if (PL_lex_op) {
969 yylval.opval = PL_lex_op;
970 PL_lex_op = Nullop;
55497cff 971 return PMFUNC;
972 }
973 else
974 return FUNC;
975}
976
ffb4593c
NT
977/*
978 * S_sublex_push
979 * Create a new scope to save the lexing state. The scope will be
980 * ended in S_sublex_done. Returns a '(', starting the function arguments
981 * to the uc, lc, etc. found before.
982 * Sets PL_lex_state to LEX_INTERPCONCAT.
983 */
984
76e3520e 985STATIC I32
cea2e8a9 986S_sublex_push(pTHX)
55497cff 987{
0f15f207 988 dTHR;
f46d017c 989 ENTER;
55497cff 990
3280af22
NIS
991 PL_lex_state = PL_sublex_info.super_state;
992 SAVEI32(PL_lex_dojoin);
993 SAVEI32(PL_lex_brackets);
3280af22
NIS
994 SAVEI32(PL_lex_casemods);
995 SAVEI32(PL_lex_starts);
996 SAVEI32(PL_lex_state);
7766f137 997 SAVEVPTR(PL_lex_inpat);
3280af22 998 SAVEI32(PL_lex_inwhat);
57843af0 999 SAVECOPLINE(PL_curcop);
3280af22
NIS
1000 SAVEPPTR(PL_bufptr);
1001 SAVEPPTR(PL_oldbufptr);
1002 SAVEPPTR(PL_oldoldbufptr);
1003 SAVEPPTR(PL_linestart);
1004 SAVESPTR(PL_linestr);
1005 SAVEPPTR(PL_lex_brackstack);
1006 SAVEPPTR(PL_lex_casestack);
1007
1008 PL_linestr = PL_lex_stuff;
1009 PL_lex_stuff = Nullsv;
1010
9cbb5ea2
GS
1011 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1012 = SvPVX(PL_linestr);
3280af22
NIS
1013 PL_bufend += SvCUR(PL_linestr);
1014 SAVEFREESV(PL_linestr);
1015
1016 PL_lex_dojoin = FALSE;
1017 PL_lex_brackets = 0;
3280af22
NIS
1018 New(899, PL_lex_brackstack, 120, char);
1019 New(899, PL_lex_casestack, 12, char);
1020 SAVEFREEPV(PL_lex_brackstack);
1021 SAVEFREEPV(PL_lex_casestack);
1022 PL_lex_casemods = 0;
1023 *PL_lex_casestack = '\0';
1024 PL_lex_starts = 0;
1025 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1026 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1027
1028 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1029 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1030 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1031 else
3280af22 1032 PL_lex_inpat = Nullop;
79072805 1033
55497cff 1034 return '(';
79072805
LW
1035}
1036
ffb4593c
NT
1037/*
1038 * S_sublex_done
1039 * Restores lexer state after a S_sublex_push.
1040 */
1041
76e3520e 1042STATIC I32
cea2e8a9 1043S_sublex_done(pTHX)
79072805 1044{
3280af22
NIS
1045 if (!PL_lex_starts++) {
1046 PL_expect = XOPERATOR;
79cb57f6 1047 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1048 return THING;
1049 }
1050
3280af22
NIS
1051 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1052 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1053 return yylex();
79072805
LW
1054 }
1055
ffb4593c 1056 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1057 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1058 PL_linestr = PL_lex_repl;
1059 PL_lex_inpat = 0;
1060 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1061 PL_bufend += SvCUR(PL_linestr);
1062 SAVEFREESV(PL_linestr);
1063 PL_lex_dojoin = FALSE;
1064 PL_lex_brackets = 0;
3280af22
NIS
1065 PL_lex_casemods = 0;
1066 *PL_lex_casestack = '\0';
1067 PL_lex_starts = 0;
25da4f38 1068 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1069 PL_lex_state = LEX_INTERPNORMAL;
1070 PL_lex_starts++;
e9fa98b2
HS
1071 /* we don't clear PL_lex_repl here, so that we can check later
1072 whether this is an evalled subst; that means we rely on the
1073 logic to ensure sublex_done() is called again only via the
1074 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1075 }
e9fa98b2 1076 else {
3280af22 1077 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1078 PL_lex_repl = Nullsv;
1079 }
79072805 1080 return ',';
ffed7fef
LW
1081 }
1082 else {
f46d017c 1083 LEAVE;
3280af22
NIS
1084 PL_bufend = SvPVX(PL_linestr);
1085 PL_bufend += SvCUR(PL_linestr);
1086 PL_expect = XOPERATOR;
09bef843 1087 PL_sublex_info.sub_inwhat = 0;
79072805 1088 return ')';
ffed7fef
LW
1089 }
1090}
1091
02aa26ce
NT
1092/*
1093 scan_const
1094
1095 Extracts a pattern, double-quoted string, or transliteration. This
1096 is terrifying code.
1097
3280af22
NIS
1098 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1099 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1100 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1101
9b599b2a
GS
1102 Returns a pointer to the character scanned up to. Iff this is
1103 advanced from the start pointer supplied (ie if anything was
1104 successfully parsed), will leave an OP for the substring scanned
1105 in yylval. Caller must intuit reason for not parsing further
1106 by looking at the next characters herself.
1107
02aa26ce
NT
1108 In patterns:
1109 backslashes:
1110 double-quoted style: \r and \n
1111 regexp special ones: \D \s
1112 constants: \x3
1113 backrefs: \1 (deprecated in substitution replacements)
1114 case and quoting: \U \Q \E
1115 stops on @ and $, but not for $ as tail anchor
1116
1117 In transliterations:
1118 characters are VERY literal, except for - not at the start or end
1119 of the string, which indicates a range. scan_const expands the
1120 range to the full set of intermediate characters.
1121
1122 In double-quoted strings:
1123 backslashes:
1124 double-quoted style: \r and \n
1125 constants: \x3
1126 backrefs: \1 (deprecated)
1127 case and quoting: \U \Q \E
1128 stops on @ and $
1129
1130 scan_const does *not* construct ops to handle interpolated strings.
1131 It stops processing as soon as it finds an embedded $ or @ variable
1132 and leaves it to the caller to work out what's going on.
1133
1134 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1135
1136 $ in pattern could be $foo or could be tail anchor. Assumption:
1137 it's a tail anchor if $ is the last thing in the string, or if it's
1138 followed by one of ")| \n\t"
1139
1140 \1 (backreferences) are turned into $1
1141
1142 The structure of the code is
1143 while (there's a character to process) {
1144 handle transliteration ranges
1145 skip regexp comments
1146 skip # initiated comments in //x patterns
1147 check for embedded @foo
1148 check for embedded scalars
1149 if (backslash) {
1150 leave intact backslashes from leave (below)
1151 deprecate \1 in strings and sub replacements
1152 handle string-changing backslashes \l \U \Q \E, etc.
1153 switch (what was escaped) {
1154 handle - in a transliteration (becomes a literal -)
1155 handle \132 octal characters
1156 handle 0x15 hex characters
1157 handle \cV (control V)
1158 handle printf backslashes (\f, \r, \n, etc)
1159 } (end switch)
1160 } (end if backslash)
1161 } (end while character to read)
1162
1163*/
1164
76e3520e 1165STATIC char *
cea2e8a9 1166S_scan_const(pTHX_ char *start)
79072805 1167{
3280af22 1168 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1169 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1170 register char *s = start; /* start of the constant */
1171 register char *d = SvPVX(sv); /* destination for copies */
1172 bool dorange = FALSE; /* are we in a translit range? */
7e2040f0 1173 bool has_utf = FALSE; /* embedded \x{} */
02aa26ce 1174 I32 len; /* ? */
012bcf8d
GS
1175 UV uv;
1176
ac2262e3 1177 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1178 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1179 : UTF;
ac2262e3 1180 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1181 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1182 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1183 : UTF;
dff6d3cd 1184 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1185 PL_lex_inpat
4a2d328f 1186 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1187 : "";
79072805
LW
1188
1189 while (s < send || dorange) {
02aa26ce 1190 /* get transliterations out of the way (they're most literal) */
3280af22 1191 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1192 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1193 if (dorange) {
02aa26ce 1194 I32 i; /* current expanded character */
8ada0baa 1195 I32 min; /* first character in range */
02aa26ce
NT
1196 I32 max; /* last character in range */
1197
1198 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1199 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1200 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1201 d -= 2; /* eat the first char and the - */
1202
8ada0baa
JH
1203 min = (U8)*d; /* first char in range */
1204 max = (U8)d[1]; /* last char in range */
1205
1206#ifndef ASCIIish
1207 if ((isLOWER(min) && isLOWER(max)) ||
1208 (isUPPER(min) && isUPPER(max))) {
1209 if (isLOWER(min)) {
1210 for (i = min; i <= max; i++)
1211 if (isLOWER(i))
1212 *d++ = i;
1213 } else {
1214 for (i = min; i <= max; i++)
1215 if (isUPPER(i))
1216 *d++ = i;
1217 }
1218 }
1219 else
1220#endif
1221 for (i = min; i <= max; i++)
1222 *d++ = i;
02aa26ce
NT
1223
1224 /* mark the range as done, and continue */
79072805
LW
1225 dorange = FALSE;
1226 continue;
1227 }
02aa26ce
NT
1228
1229 /* range begins (ignore - as first or last char) */
79072805 1230 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1231 if (utf) {
a176fa2a 1232 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1233 s++;
1234 continue;
1235 }
79072805
LW
1236 dorange = TRUE;
1237 s++;
1238 }
1239 }
02aa26ce
NT
1240
1241 /* if we get here, we're not doing a transliteration */
1242
0f5d15d6
IZ
1243 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1244 except for the last char, which will be done separately. */
3280af22 1245 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1246 if (s[2] == '#') {
1247 while (s < send && *s != ')')
1248 *d++ = *s++;
0f5d15d6
IZ
1249 } else if (s[2] == '{'
1250 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1251 I32 count = 1;
0f5d15d6 1252 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1253 char c;
1254
d9f97599
GS
1255 while (count && (c = *regparse)) {
1256 if (c == '\\' && regparse[1])
1257 regparse++;
cc6b7395
IZ
1258 else if (c == '{')
1259 count++;
1260 else if (c == '}')
1261 count--;
d9f97599 1262 regparse++;
cc6b7395 1263 }
5bdf89e7
IZ
1264 if (*regparse != ')') {
1265 regparse--; /* Leave one char for continuation. */
cc6b7395 1266 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1267 }
0f5d15d6 1268 while (s < regparse)
cc6b7395
IZ
1269 *d++ = *s++;
1270 }
748a9306 1271 }
02aa26ce
NT
1272
1273 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1274 else if (*s == '#' && PL_lex_inpat &&
1275 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1276 while (s+1 < send && *s != '\n')
1277 *d++ = *s++;
1278 }
02aa26ce
NT
1279
1280 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
7e2040f0
GS
1281 else if (*s == '@' && s[1]
1282 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$", s[1])))
79072805 1283 break;
02aa26ce
NT
1284
1285 /* check for embedded scalars. only stop if we're sure it's a
1286 variable.
1287 */
79072805 1288 else if (*s == '$') {
3280af22 1289 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1290 break;
c277df42 1291 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1292 break; /* in regexp, $ might be tail anchor */
1293 }
02aa26ce 1294
a0ed51b3
LW
1295 /* (now in tr/// code again) */
1296
d008e5eb 1297 if (*s & 0x80 && thisutf) {
012bcf8d
GS
1298 (void)utf8_to_uv((U8*)s, &len);
1299 if (len == 1) {
1300 /* illegal UTF8, make it valid */
012bcf8d 1301 char *old_pvx = SvPVX(sv);
bee7c419
GS
1302 /* need space for one extra char (NOTE: SvCUR() not set here) */
1303 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
012bcf8d
GS
1304 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1305 }
1306 else {
1307 while (len--)
1308 *d++ = *s++;
1309 }
1310 has_utf = TRUE;
1311 continue;
a0ed51b3
LW
1312 }
1313
02aa26ce 1314 /* backslashes */
79072805
LW
1315 if (*s == '\\' && s+1 < send) {
1316 s++;
02aa26ce
NT
1317
1318 /* some backslashes we leave behind */
c9f97d15 1319 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1320 *d++ = '\\';
1321 *d++ = *s++;
1322 continue;
1323 }
02aa26ce
NT
1324
1325 /* deprecate \1 in strings and substitution replacements */
3280af22 1326 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1327 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1328 {
d008e5eb 1329 dTHR; /* only for ckWARN */
599cee73 1330 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1331 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1332 *--s = '$';
1333 break;
1334 }
02aa26ce
NT
1335
1336 /* string-change backslash escapes */
3280af22 1337 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1338 --s;
1339 break;
1340 }
02aa26ce
NT
1341
1342 /* if we get here, it's either a quoted -, or a digit */
79072805 1343 switch (*s) {
02aa26ce
NT
1344
1345 /* quoted - in transliterations */
79072805 1346 case '-':
3280af22 1347 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1348 *d++ = *s++;
1349 continue;
1350 }
1351 /* FALL THROUGH */
1352 default:
11b8faa4
JH
1353 {
1354 dTHR;
e476b1b5
GS
1355 if (ckWARN(WARN_MISC) && isALPHA(*s))
1356 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1357 "Unrecognized escape \\%c passed through",
1358 *s);
1359 /* default action is to copy the quoted character */
1360 *d++ = *s++;
1361 continue;
1362 }
02aa26ce
NT
1363
1364 /* \132 indicates an octal constant */
79072805
LW
1365 case '0': case '1': case '2': case '3':
1366 case '4': case '5': case '6': case '7':
012bcf8d 1367 uv = (UV)scan_oct(s, 3, &len);
79072805 1368 s += len;
012bcf8d 1369 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1370
1371 /* \x24 indicates a hex constant */
79072805 1372 case 'x':
a0ed51b3
LW
1373 ++s;
1374 if (*s == '{') {
1375 char* e = strchr(s, '}');
adaeee49 1376 if (!e) {
a0ed51b3 1377 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1378 e = s;
1379 }
012bcf8d
GS
1380 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1381 s = e + 1;
a0ed51b3
LW
1382 }
1383 else {
012bcf8d
GS
1384 uv = (UV)scan_hex(s, 2, &len);
1385 s += len;
1386 }
1387
1388 NUM_ESCAPE_INSERT:
1389 /* Insert oct or hex escaped character.
1390 * There will always enough room in sv since such escapes will
1391 * be longer than any utf8 sequence they can end up as
1392 */
1393 if (uv > 127) {
1394 if (!thisutf && !has_utf && uv > 255) {
1395 /* might need to recode whatever we have accumulated so far
1396 * if it contains any hibit chars
1397 */
1398 int hicount = 0;
1399 char *c;
1400 for (c = SvPVX(sv); c < d; c++) {
1401 if (*c & 0x80)
1402 hicount++;
1403 }
1404 if (hicount) {
1405 char *old_pvx = SvPVX(sv);
1406 char *src, *dst;
1407 d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
1408
1409 src = d - 1;
1410 d += hicount;
1411 dst = d - 1;
1412
1413 while (src < dst) {
1414 if (*src & 0x80) {
1415 dst--;
1416 uv_to_utf8((U8*)dst, (U8)*src--);
1417 dst--;
1418 }
1419 else {
1420 *dst-- = *src--;
1421 }
1422 }
1423 }
1424 }
1425
1426 if (thisutf || uv > 255) {
1427 d = (char*)uv_to_utf8((U8*)d, uv);
7e2040f0 1428 has_utf = TRUE;
012bcf8d 1429 }
a0ed51b3 1430 else {
012bcf8d 1431 *d++ = (char)uv;
a0ed51b3 1432 }
012bcf8d
GS
1433 }
1434 else {
1435 *d++ = (char)uv;
a0ed51b3 1436 }
79072805 1437 continue;
02aa26ce 1438
4a2d328f
IZ
1439 /* \N{latin small letter a} is a named character */
1440 case 'N':
423cee85
JH
1441 ++s;
1442 if (*s == '{') {
1443 char* e = strchr(s, '}');
1444 HV *hv;
1445 SV **svp;
1446 SV *res, *cv;
1447 STRLEN len;
1448 char *str;
1449 char *why = Nullch;
1450
1451 if (!e) {
5777a3f7 1452 yyerror("Missing right brace on \\N{}");
423cee85
JH
1453 e = s - 1;
1454 goto cont_scan;
1455 }
1456 res = newSVpvn(s + 1, e - s - 1);
1457 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1458 res, Nullsv, "\\N{...}" );
423cee85
JH
1459 str = SvPV(res,len);
1460 if (len > e - s + 4) {
1461 char *odest = SvPVX(sv);
1462
1463 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1464 d = SvPVX(sv) + (d - odest);
1465 }
1466 Copy(str, d, len, char);
1467 d += len;
1468 SvREFCNT_dec(res);
1469 cont_scan:
1470 s = e + 1;
1471 }
1472 else
5777a3f7 1473 yyerror("Missing braces on \\N{}");
423cee85
JH
1474 continue;
1475
02aa26ce 1476 /* \c is a control character */
79072805
LW
1477 case 'c':
1478 s++;
9d116dd7
JH
1479#ifdef EBCDIC
1480 *d = *s++;
1481 if (isLOWER(*d))
1482 *d = toUPPER(*d);
1483 *d++ = toCTRL(*d);
1484#else
bbce6d69 1485 len = *s++;
1486 *d++ = toCTRL(len);
9d116dd7 1487#endif
79072805 1488 continue;
02aa26ce
NT
1489
1490 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1491 case 'b':
1492 *d++ = '\b';
1493 break;
1494 case 'n':
1495 *d++ = '\n';
1496 break;
1497 case 'r':
1498 *d++ = '\r';
1499 break;
1500 case 'f':
1501 *d++ = '\f';
1502 break;
1503 case 't':
1504 *d++ = '\t';
1505 break;
34a3fe2a
PP
1506#ifdef EBCDIC
1507 case 'e':
1508 *d++ = '\047'; /* CP 1047 */
1509 break;
1510 case 'a':
1511 *d++ = '\057'; /* CP 1047 */
1512 break;
1513#else
79072805
LW
1514 case 'e':
1515 *d++ = '\033';
1516 break;
1517 case 'a':
1518 *d++ = '\007';
1519 break;
34a3fe2a 1520#endif
02aa26ce
NT
1521 } /* end switch */
1522
79072805
LW
1523 s++;
1524 continue;
02aa26ce
NT
1525 } /* end if (backslash) */
1526
79072805 1527 *d++ = *s++;
02aa26ce
NT
1528 } /* while loop to process each character */
1529
1530 /* terminate the string and set up the sv */
79072805 1531 *d = '\0';
463ee0b2 1532 SvCUR_set(sv, d - SvPVX(sv));
79072805 1533 SvPOK_on(sv);
7e2040f0
GS
1534 if (has_utf)
1535 SvUTF8_on(sv);
79072805 1536
02aa26ce 1537 /* shrink the sv if we allocated more than we used */
79072805
LW
1538 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1539 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1540 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1541 }
02aa26ce 1542
9b599b2a 1543 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1544 if (s > PL_bufptr) {
1545 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1546 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1547 sv, Nullsv,
3280af22 1548 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1549 ? "tr"
3280af22 1550 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1551 ? "s"
1552 : "qq")));
79072805 1553 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1554 } else
8990e307 1555 SvREFCNT_dec(sv);
79072805
LW
1556 return s;
1557}
1558
ffb4593c
NT
1559/* S_intuit_more
1560 * Returns TRUE if there's more to the expression (e.g., a subscript),
1561 * FALSE otherwise.
ffb4593c
NT
1562 *
1563 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1564 *
1565 * ->[ and ->{ return TRUE
1566 * { and [ outside a pattern are always subscripts, so return TRUE
1567 * if we're outside a pattern and it's not { or [, then return FALSE
1568 * if we're in a pattern and the first char is a {
1569 * {4,5} (any digits around the comma) returns FALSE
1570 * if we're in a pattern and the first char is a [
1571 * [] returns FALSE
1572 * [SOMETHING] has a funky algorithm to decide whether it's a
1573 * character class or not. It has to deal with things like
1574 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1575 * anything else returns TRUE
1576 */
1577
9cbb5ea2
GS
1578/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1579
76e3520e 1580STATIC int
cea2e8a9 1581S_intuit_more(pTHX_ register char *s)
79072805 1582{
3280af22 1583 if (PL_lex_brackets)
79072805
LW
1584 return TRUE;
1585 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1586 return TRUE;
1587 if (*s != '{' && *s != '[')
1588 return FALSE;
3280af22 1589 if (!PL_lex_inpat)
79072805
LW
1590 return TRUE;
1591
1592 /* In a pattern, so maybe we have {n,m}. */
1593 if (*s == '{') {
1594 s++;
1595 if (!isDIGIT(*s))
1596 return TRUE;
1597 while (isDIGIT(*s))
1598 s++;
1599 if (*s == ',')
1600 s++;
1601 while (isDIGIT(*s))
1602 s++;
1603 if (*s == '}')
1604 return FALSE;
1605 return TRUE;
1606
1607 }
1608
1609 /* On the other hand, maybe we have a character class */
1610
1611 s++;
1612 if (*s == ']' || *s == '^')
1613 return FALSE;
1614 else {
ffb4593c 1615 /* this is terrifying, and it works */
79072805
LW
1616 int weight = 2; /* let's weigh the evidence */
1617 char seen[256];
f27ffc4a 1618 unsigned char un_char = 255, last_un_char;
93a17b20 1619 char *send = strchr(s,']');
3280af22 1620 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1621
1622 if (!send) /* has to be an expression */
1623 return TRUE;
1624
1625 Zero(seen,256,char);
1626 if (*s == '$')
1627 weight -= 3;
1628 else if (isDIGIT(*s)) {
1629 if (s[1] != ']') {
1630 if (isDIGIT(s[1]) && s[2] == ']')
1631 weight -= 10;
1632 }
1633 else
1634 weight -= 100;
1635 }
1636 for (; s < send; s++) {
1637 last_un_char = un_char;
1638 un_char = (unsigned char)*s;
1639 switch (*s) {
1640 case '@':
1641 case '&':
1642 case '$':
1643 weight -= seen[un_char] * 10;
7e2040f0 1644 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1645 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1646 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1647 weight -= 100;
1648 else
1649 weight -= 10;
1650 }
1651 else if (*s == '$' && s[1] &&
93a17b20
LW
1652 strchr("[#!%*<>()-=",s[1])) {
1653 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1654 weight -= 10;
1655 else
1656 weight -= 1;
1657 }
1658 break;
1659 case '\\':
1660 un_char = 254;
1661 if (s[1]) {
93a17b20 1662 if (strchr("wds]",s[1]))
79072805
LW
1663 weight += 100;
1664 else if (seen['\''] || seen['"'])
1665 weight += 1;
93a17b20 1666 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1667 weight += 40;
1668 else if (isDIGIT(s[1])) {
1669 weight += 40;
1670 while (s[1] && isDIGIT(s[1]))
1671 s++;
1672 }
1673 }
1674 else
1675 weight += 100;
1676 break;
1677 case '-':
1678 if (s[1] == '\\')
1679 weight += 50;
93a17b20 1680 if (strchr("aA01! ",last_un_char))
79072805 1681 weight += 30;
93a17b20 1682 if (strchr("zZ79~",s[1]))
79072805 1683 weight += 30;
f27ffc4a
GS
1684 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1685 weight -= 5; /* cope with negative subscript */
79072805
LW
1686 break;
1687 default:
93a17b20 1688 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1689 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1690 char *d = tmpbuf;
1691 while (isALPHA(*s))
1692 *d++ = *s++;
1693 *d = '\0';
1694 if (keyword(tmpbuf, d - tmpbuf))
1695 weight -= 150;
1696 }
1697 if (un_char == last_un_char + 1)
1698 weight += 5;
1699 weight -= seen[un_char];
1700 break;
1701 }
1702 seen[un_char]++;
1703 }
1704 if (weight >= 0) /* probably a character class */
1705 return FALSE;
1706 }
1707
1708 return TRUE;
1709}
ffed7fef 1710
ffb4593c
NT
1711/*
1712 * S_intuit_method
1713 *
1714 * Does all the checking to disambiguate
1715 * foo bar
1716 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1717 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1718 *
1719 * First argument is the stuff after the first token, e.g. "bar".
1720 *
1721 * Not a method if bar is a filehandle.
1722 * Not a method if foo is a subroutine prototyped to take a filehandle.
1723 * Not a method if it's really "Foo $bar"
1724 * Method if it's "foo $bar"
1725 * Not a method if it's really "print foo $bar"
1726 * Method if it's really "foo package::" (interpreted as package->foo)
1727 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1728 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1729 * =>
1730 */
1731
76e3520e 1732STATIC int
cea2e8a9 1733S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1734{
1735 char *s = start + (*start == '$');
3280af22 1736 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1737 STRLEN len;
1738 GV* indirgv;
1739
1740 if (gv) {
b6c543e3 1741 CV *cv;
a0d0e21e
LW
1742 if (GvIO(gv))
1743 return 0;
b6c543e3
IZ
1744 if ((cv = GvCVu(gv))) {
1745 char *proto = SvPVX(cv);
1746 if (proto) {
1747 if (*proto == ';')
1748 proto++;
1749 if (*proto == '*')
1750 return 0;
1751 }
1752 } else
a0d0e21e
LW
1753 gv = 0;
1754 }
8903cb82 1755 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1756 /* start is the beginning of the possible filehandle/object,
1757 * and s is the end of it
1758 * tmpbuf is a copy of it
1759 */
1760
a0d0e21e 1761 if (*start == '$') {
3280af22 1762 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1763 return 0;
1764 s = skipspace(s);
3280af22
NIS
1765 PL_bufptr = start;
1766 PL_expect = XREF;
a0d0e21e
LW
1767 return *s == '(' ? FUNCMETH : METHOD;
1768 }
1769 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1770 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1771 len -= 2;
1772 tmpbuf[len] = '\0';
1773 goto bare_package;
1774 }
1775 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1776 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1777 return 0;
1778 /* filehandle or package name makes it a method */
89bfa8cd 1779 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1780 s = skipspace(s);
3280af22 1781 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1782 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1783 bare_package:
3280af22 1784 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1785 newSVpvn(tmpbuf,len));
3280af22
NIS
1786 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1787 PL_expect = XTERM;
a0d0e21e 1788 force_next(WORD);
3280af22 1789 PL_bufptr = s;
a0d0e21e
LW
1790 return *s == '(' ? FUNCMETH : METHOD;
1791 }
1792 }
1793 return 0;
1794}
1795
ffb4593c
NT
1796/*
1797 * S_incl_perldb
1798 * Return a string of Perl code to load the debugger. If PERL5DB
1799 * is set, it will return the contents of that, otherwise a
1800 * compile-time require of perl5db.pl.
1801 */
1802
76e3520e 1803STATIC char*
cea2e8a9 1804S_incl_perldb(pTHX)
a0d0e21e 1805{
3280af22 1806 if (PL_perldb) {
76e3520e 1807 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1808
1809 if (pdb)
1810 return pdb;
61bb5906 1811 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1812 return "BEGIN { require 'perl5db.pl' }";
1813 }
1814 return "";
1815}
1816
1817
16d20bd9
AD
1818/* Encoded script support. filter_add() effectively inserts a
1819 * 'pre-processing' function into the current source input stream.
1820 * Note that the filter function only applies to the current source file
1821 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1822 *
1823 * The datasv parameter (which may be NULL) can be used to pass
1824 * private data to this instance of the filter. The filter function
1825 * can recover the SV using the FILTER_DATA macro and use it to
1826 * store private buffers and state information.
1827 *
1828 * The supplied datasv parameter is upgraded to a PVIO type
e0c19803
GS
1829 * and the IoDIRP field is used to store the function pointer,
1830 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1831 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1832 * private use must be set using malloc'd pointers.
1833 */
16d20bd9
AD
1834
1835SV *
864dbfa3 1836Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1837{
f4c556ac
GS
1838 if (!funcp)
1839 return Nullsv;
1840
3280af22
NIS
1841 if (!PL_rsfp_filters)
1842 PL_rsfp_filters = newAV();
16d20bd9 1843 if (!datasv)
8c52afec 1844 datasv = NEWSV(255,0);
16d20bd9 1845 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1846 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1847 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
e0c19803 1848 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1849 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1850 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1851 av_unshift(PL_rsfp_filters, 1);
1852 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1853 return(datasv);
1854}
1855
1856
1857/* Delete most recently added instance of this filter function. */
a0d0e21e 1858void
864dbfa3 1859Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1860{
e0c19803 1861 SV *datasv;
f4c556ac 1862 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1863 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1864 return;
1865 /* if filter is on top of stack (usual case) just pop it off */
e0c19803
GS
1866 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1867 if (IoDIRP(datasv) == (DIR*)funcp) {
1868 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
1869 IoDIRP(datasv) = (DIR*)NULL;
3280af22 1870 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1871
16d20bd9
AD
1872 return;
1873 }
1874 /* we need to search for the correct entry and clear it */
cea2e8a9 1875 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1876}
1877
1878
1879/* Invoke the n'th filter function for the current rsfp. */
1880I32
864dbfa3 1881Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1882
1883
1884 /* 0 = read one text line */
a0d0e21e 1885{
16d20bd9
AD
1886 filter_t funcp;
1887 SV *datasv = NULL;
e50aee73 1888
3280af22 1889 if (!PL_rsfp_filters)
16d20bd9 1890 return -1;
3280af22 1891 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1892 /* Provide a default input filter to make life easy. */
1893 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1894 DEBUG_P(PerlIO_printf(Perl_debug_log,
1895 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1896 if (maxlen) {
1897 /* Want a block */
1898 int len ;
1899 int old_len = SvCUR(buf_sv) ;
1900
1901 /* ensure buf_sv is large enough */
1902 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1903 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1904 if (PerlIO_error(PL_rsfp))
37120919
AD
1905 return -1; /* error */
1906 else
1907 return 0 ; /* end of file */
1908 }
16d20bd9
AD
1909 SvCUR_set(buf_sv, old_len + len) ;
1910 } else {
1911 /* Want a line */
3280af22
NIS
1912 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1913 if (PerlIO_error(PL_rsfp))
37120919
AD
1914 return -1; /* error */
1915 else
1916 return 0 ; /* end of file */
1917 }
16d20bd9
AD
1918 }
1919 return SvCUR(buf_sv);
1920 }
1921 /* Skip this filter slot if filter has been deleted */
3280af22 1922 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1923 DEBUG_P(PerlIO_printf(Perl_debug_log,
1924 "filter_read %d: skipped (filter deleted)\n",
1925 idx));
16d20bd9
AD
1926 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1927 }
1928 /* Get function pointer hidden within datasv */
1929 funcp = (filter_t)IoDIRP(datasv);
f4c556ac
GS
1930 DEBUG_P(PerlIO_printf(Perl_debug_log,
1931 "filter_read %d: via function %p (%s)\n",
1932 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1933 /* Call function. The function is expected to */
1934 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1935 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1936 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1937}
1938
76e3520e 1939STATIC char *
cea2e8a9 1940S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1941{
c39cd008 1942#ifdef PERL_CR_FILTER
3280af22 1943 if (!PL_rsfp_filters) {
c39cd008 1944 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1945 }
1946#endif
3280af22 1947 if (PL_rsfp_filters) {
16d20bd9 1948
55497cff 1949 if (!append)
1950 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1951 if (FILTER_READ(0, sv, 0) > 0)
1952 return ( SvPVX(sv) ) ;
1953 else
1954 return Nullch ;
1955 }
9d116dd7 1956 else
fd049845 1957 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1958}
1959
1960
748a9306
LW
1961#ifdef DEBUGGING
1962 static char* exp_name[] =
09bef843
SB
1963 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
1964 "ATTRTERM", "TERMBLOCK"
1965 };
748a9306 1966#endif
463ee0b2 1967
02aa26ce
NT
1968/*
1969 yylex
1970
1971 Works out what to call the token just pulled out of the input
1972 stream. The yacc parser takes care of taking the ops we return and
1973 stitching them into a tree.
1974
1975 Returns:
1976 PRIVATEREF
1977
1978 Structure:
1979 if read an identifier
1980 if we're in a my declaration
1981 croak if they tried to say my($foo::bar)
1982 build the ops for a my() declaration
1983 if it's an access to a my() variable
1984 are we in a sort block?
1985 croak if my($a); $a <=> $b
1986 build ops for access to a my() variable
1987 if in a dq string, and they've said @foo and we can't find @foo
1988 croak
1989 build ops for a bareword
1990 if we already built the token before, use it.
1991*/
1992
864dbfa3
GS
1993int
1994#ifdef USE_PURE_BISON
cea2e8a9 1995Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1996#else
cea2e8a9 1997Perl_yylex(pTHX)
864dbfa3 1998#endif
378cc40b 1999{
11343788 2000 dTHR;
79072805 2001 register char *s;
378cc40b 2002 register char *d;
79072805 2003 register I32 tmp;
463ee0b2 2004 STRLEN len;
161b471a
NIS
2005 GV *gv = Nullgv;
2006 GV **gvp = 0;
a687059c 2007
a1a0e61e
TD
2008#ifdef USE_PURE_BISON
2009 yylval_pointer = lvalp;
2010 yychar_pointer = lcharp;
2011#endif
2012
02aa26ce 2013 /* check if there's an identifier for us to look at */
3280af22 2014 if (PL_pending_ident) {
02aa26ce 2015 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2016 char pit = PL_pending_ident;
2017 PL_pending_ident = 0;
bbce6d69 2018
02aa26ce
NT
2019 /* if we're in a my(), we can't allow dynamics here.
2020 $foo'bar has already been turned into $foo::bar, so
2021 just check for colons.
2022
2023 if it's a legal name, the OP is a PADANY.
2024 */
3280af22 2025 if (PL_in_my) {
77ca0c92 2026 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2027 if (strchr(PL_tokenbuf,':'))
2028 yyerror(Perl_form(aTHX_ "No package name allowed for "
2029 "variable %s in \"our\"",
2030 PL_tokenbuf));
77ca0c92
LW
2031 tmp = pad_allocmy(PL_tokenbuf);
2032 }
2033 else {
2034 if (strchr(PL_tokenbuf,':'))
2035 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2036
77ca0c92
LW
2037 yylval.opval = newOP(OP_PADANY, 0);
2038 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2039 return PRIVATEREF;
2040 }
bbce6d69 2041 }
2042
02aa26ce
NT
2043 /*
2044 build the ops for accesses to a my() variable.
2045
2046 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2047 then used in a comparison. This catches most, but not
2048 all cases. For instance, it catches
2049 sort { my($a); $a <=> $b }
2050 but not
2051 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2052 (although why you'd do that is anyone's guess).
2053 */
2054
3280af22 2055 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2056#ifdef USE_THREADS
54b9620d 2057 /* Check for single character per-thread SVs */
3280af22
NIS
2058 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2059 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2060 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2061 {
2faa37cc 2062 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2063 yylval.opval->op_targ = tmp;
2064 return PRIVATEREF;
2065 }
2066#endif /* USE_THREADS */
3280af22 2067 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2068 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2069 /* might be an "our" variable" */
f472eb5c 2070 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2071 /* build ops for a bareword */
f472eb5c
GS
2072 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2073 sv_catpvn(sym, "::", 2);
2074 sv_catpv(sym, PL_tokenbuf+1);
2075 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2076 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2077 gv_fetchpv(SvPVX(sym),
77ca0c92 2078 (PL_in_eval
f472eb5c
GS
2079 ? (GV_ADDMULTI | GV_ADDINEVAL)
2080 : TRUE
77ca0c92
LW
2081 ),
2082 ((PL_tokenbuf[0] == '$') ? SVt_PV
2083 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2084 : SVt_PVHV));
2085 return WORD;
2086 }
2087
02aa26ce 2088 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2089 if (PL_last_lop_op == OP_SORT &&
2090 PL_tokenbuf[0] == '$' &&
2091 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2092 && !PL_tokenbuf[2])
bbce6d69 2093 {
3280af22
NIS
2094 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2095 d < PL_bufend && *d != '\n';
a863c7d1
MB
2096 d++)
2097 {
2098 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2099 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2100 PL_tokenbuf);
a863c7d1 2101 }
bbce6d69 2102 }
2103 }
bbce6d69 2104
a863c7d1
MB
2105 yylval.opval = newOP(OP_PADANY, 0);
2106 yylval.opval->op_targ = tmp;
2107 return PRIVATEREF;
2108 }
bbce6d69 2109 }
2110
02aa26ce
NT
2111 /*
2112 Whine if they've said @foo in a doublequoted string,
2113 and @foo isn't a variable we can find in the symbol
2114 table.
2115 */
3280af22
NIS
2116 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2117 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2118 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2119 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2120 PL_tokenbuf, PL_tokenbuf));
bbce6d69 2121 }
2122
02aa26ce 2123 /* build ops for a bareword */
3280af22 2124 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2125 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2126 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2127 ((PL_tokenbuf[0] == '$') ? SVt_PV
2128 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 2129 : SVt_PVHV));
2130 return WORD;
2131 }
2132
02aa26ce
NT
2133 /* no identifier pending identification */
2134
3280af22 2135 switch (PL_lex_state) {
79072805
LW
2136#ifdef COMMENTARY
2137 case LEX_NORMAL: /* Some compilers will produce faster */
2138 case LEX_INTERPNORMAL: /* code if we comment these out. */
2139 break;
2140#endif
2141
09bef843 2142 /* when we've already built the next token, just pull it out of the queue */
79072805 2143 case LEX_KNOWNEXT:
3280af22
NIS
2144 PL_nexttoke--;
2145 yylval = PL_nextval[PL_nexttoke];
2146 if (!PL_nexttoke) {
2147 PL_lex_state = PL_lex_defer;
2148 PL_expect = PL_lex_expect;
2149 PL_lex_defer = LEX_NORMAL;
463ee0b2 2150 }
3280af22 2151 return(PL_nexttype[PL_nexttoke]);
79072805 2152
02aa26ce 2153 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2154 when we get here, PL_bufptr is at the \
02aa26ce 2155 */
79072805
LW
2156 case LEX_INTERPCASEMOD:
2157#ifdef DEBUGGING
3280af22 2158 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2159 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2160#endif
02aa26ce 2161 /* handle \E or end of string */
3280af22 2162 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2163 char oldmod;
02aa26ce
NT
2164
2165 /* if at a \E */
3280af22
NIS
2166 if (PL_lex_casemods) {
2167 oldmod = PL_lex_casestack[--PL_lex_casemods];
2168 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2169
3280af22
NIS
2170 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2171 PL_bufptr += 2;
2172 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2173 }
79072805
LW
2174 return ')';
2175 }
3280af22
NIS
2176 if (PL_bufptr != PL_bufend)
2177 PL_bufptr += 2;
2178 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2179 return yylex();
79072805
LW
2180 }
2181 else {
3280af22 2182 s = PL_bufptr + 1;
79072805
LW
2183 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2184 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2185 if (strchr("LU", *s) &&
3280af22 2186 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2187 {
3280af22 2188 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2189 return ')';
2190 }
3280af22
NIS
2191 if (PL_lex_casemods > 10) {
2192 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2193 if (newlb != PL_lex_casestack) {
a0d0e21e 2194 SAVEFREEPV(newlb);
3280af22 2195 PL_lex_casestack = newlb;
a0d0e21e
LW
2196 }
2197 }
3280af22
NIS
2198 PL_lex_casestack[PL_lex_casemods++] = *s;
2199 PL_lex_casestack[PL_lex_casemods] = '\0';
2200 PL_lex_state = LEX_INTERPCONCAT;
2201 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2202 force_next('(');
2203 if (*s == 'l')
3280af22 2204 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2205 else if (*s == 'u')
3280af22 2206 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2207 else if (*s == 'L')
3280af22 2208 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2209 else if (*s == 'U')
3280af22 2210 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2211 else if (*s == 'Q')
3280af22 2212 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2213 else
cea2e8a9 2214 Perl_croak(aTHX_ "panic: yylex");
3280af22 2215 PL_bufptr = s + 1;
79072805 2216 force_next(FUNC);
3280af22
NIS
2217 if (PL_lex_starts) {
2218 s = PL_bufptr;
2219 PL_lex_starts = 0;
79072805
LW
2220 Aop(OP_CONCAT);
2221 }
2222 else
cea2e8a9 2223 return yylex();
79072805
LW
2224 }
2225
55497cff 2226 case LEX_INTERPPUSH:
2227 return sublex_push();
2228
79072805 2229 case LEX_INTERPSTART:
3280af22 2230 if (PL_bufptr == PL_bufend)
79072805 2231 return sublex_done();
3280af22
NIS
2232 PL_expect = XTERM;
2233 PL_lex_dojoin = (*PL_bufptr == '@');
2234 PL_lex_state = LEX_INTERPNORMAL;
2235 if (PL_lex_dojoin) {
2236 PL_nextval[PL_nexttoke].ival = 0;
79072805 2237 force_next(',');
554b3eca 2238#ifdef USE_THREADS
533c011a
NIS
2239 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2240 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2241 force_next(PRIVATEREF);
2242#else
a0d0e21e 2243 force_ident("\"", '$');
554b3eca 2244#endif /* USE_THREADS */
3280af22 2245 PL_nextval[PL_nexttoke].ival = 0;
79072805 2246 force_next('$');
3280af22 2247 PL_nextval[PL_nexttoke].ival = 0;
79072805 2248 force_next('(');
3280af22 2249 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2250 force_next(FUNC);
2251 }
3280af22
NIS
2252 if (PL_lex_starts++) {
2253 s = PL_bufptr;
79072805
LW
2254 Aop(OP_CONCAT);
2255 }
cea2e8a9 2256 return yylex();
79072805
LW
2257
2258 case LEX_INTERPENDMAYBE:
3280af22
NIS
2259 if (intuit_more(PL_bufptr)) {
2260 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2261 break;
2262 }
2263 /* FALL THROUGH */
2264
2265 case LEX_INTERPEND:
3280af22
NIS
2266 if (PL_lex_dojoin) {
2267 PL_lex_dojoin = FALSE;
2268 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2269 return ')';
2270 }
43a16006 2271 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2272 && SvEVALED(PL_lex_repl))
43a16006 2273 {
e9fa98b2 2274 if (PL_bufptr != PL_bufend)
cea2e8a9 2275 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2276 PL_lex_repl = Nullsv;
2277 }
79072805
LW
2278 /* FALLTHROUGH */
2279 case LEX_INTERPCONCAT:
2280#ifdef DEBUGGING
3280af22 2281 if (PL_lex_brackets)
cea2e8a9 2282 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2283#endif
3280af22 2284 if (PL_bufptr == PL_bufend)
79072805
LW
2285 return sublex_done();
2286
3280af22
NIS
2287 if (SvIVX(PL_linestr) == '\'') {
2288 SV *sv = newSVsv(PL_linestr);
2289 if (!PL_lex_inpat)
76e3520e 2290 sv = tokeq(sv);
3280af22 2291 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2292 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2293 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2294 s = PL_bufend;
79072805
LW
2295 }
2296 else {
3280af22 2297 s = scan_const(PL_bufptr);
79072805 2298 if (*s == '\\')
3280af22 2299 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2300 else
3280af22 2301 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2302 }
2303
3280af22
NIS
2304 if (s != PL_bufptr) {
2305 PL_nextval[PL_nexttoke] = yylval;
2306 PL_expect = XTERM;
79072805 2307 force_next(THING);
3280af22 2308 if (PL_lex_starts++)
79072805
LW
2309 Aop(OP_CONCAT);
2310 else {
3280af22 2311 PL_bufptr = s;
cea2e8a9 2312 return yylex();
79072805
LW
2313 }
2314 }
2315
cea2e8a9 2316 return yylex();
a0d0e21e 2317 case LEX_FORMLINE:
3280af22
NIS
2318 PL_lex_state = LEX_NORMAL;
2319 s = scan_formline(PL_bufptr);
2320 if (!PL_lex_formbrack)
a0d0e21e
LW
2321 goto rightbracket;
2322 OPERATOR(';');
79072805
LW
2323 }
2324
3280af22
NIS
2325 s = PL_bufptr;
2326 PL_oldoldbufptr = PL_oldbufptr;
2327 PL_oldbufptr = s;
79072805 2328 DEBUG_p( {
bf49b057
GS
2329 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2330 exp_name[PL_expect], s);
79072805 2331 } )
463ee0b2
LW
2332
2333 retry:
378cc40b
LW
2334 switch (*s) {
2335 default:
7e2040f0 2336 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2337 goto keylookup;
cea2e8a9 2338 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2339 case 4:
2340 case 26:
2341 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2342 case 0:
3280af22
NIS
2343 if (!PL_rsfp) {
2344 PL_last_uni = 0;
2345 PL_last_lop = 0;
2346 if (PL_lex_brackets)
d98d5fff 2347 yyerror("Missing right curly or square bracket");
79072805 2348 TOKEN(0);
463ee0b2 2349 }
3280af22 2350 if (s++ < PL_bufend)
a687059c 2351 goto retry; /* ignore stray nulls */
3280af22
NIS
2352 PL_last_uni = 0;
2353 PL_last_lop = 0;
2354 if (!PL_in_eval && !PL_preambled) {
2355 PL_preambled = TRUE;
2356 sv_setpv(PL_linestr,incl_perldb());
2357 if (SvCUR(PL_linestr))
2358 sv_catpv(PL_linestr,";");
2359 if (PL_preambleav){
2360 while(AvFILLp(PL_preambleav) >= 0) {
2361 SV *tmpsv = av_shift(PL_preambleav);
2362 sv_catsv(PL_linestr, tmpsv);
2363 sv_catpv(PL_linestr, ";");
91b7def8 2364 sv_free(tmpsv);
2365 }
3280af22
NIS
2366 sv_free((SV*)PL_preambleav);
2367 PL_preambleav = NULL;
91b7def8 2368 }
3280af22
NIS
2369 if (PL_minus_n || PL_minus_p) {
2370 sv_catpv(PL_linestr, "LINE: while (<>) {");
2371 if (PL_minus_l)
2372 sv_catpv(PL_linestr,"chomp;");
2373 if (PL_minus_a) {
8fd239a7
CS
2374 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2375 if (gv)
2376 GvIMPORTED_AV_on(gv);
3280af22
NIS
2377 if (PL_minus_F) {
2378 if (strchr("/'\"", *PL_splitstr)
2379 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2380 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2381 else {
2382 char delim;
2383 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2384 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2385 delim = *s;
cea2e8a9 2386 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2387 "q" + (delim == '\''), delim);
3280af22 2388 for (s = PL_splitstr; *s; s++) {
54310121 2389 if (*s == '\\')
3280af22
NIS
2390 sv_catpvn(PL_linestr, "\\", 1);
2391 sv_catpvn(PL_linestr, s, 1);
54310121 2392 }
cea2e8a9 2393 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2394 }
2304df62
AD
2395 }
2396 else
3280af22 2397 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2398 }
79072805 2399 }
3280af22
NIS
2400 sv_catpv(PL_linestr, "\n");
2401 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2402 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2403 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2404 SV *sv = NEWSV(85,0);
2405
2406 sv_upgrade(sv, SVt_PVMG);
3280af22 2407 sv_setsv(sv,PL_linestr);
57843af0 2408 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2409 }
79072805 2410 goto retry;
a687059c 2411 }
e929a76b 2412 do {
3280af22 2413 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2414 fake_eof:
3280af22
NIS
2415 if (PL_rsfp) {
2416 if (PL_preprocess && !PL_in_eval)
2417 (void)PerlProc_pclose(PL_rsfp);
2418 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2419 PerlIO_clearerr(PL_rsfp);
395c3793 2420 else
3280af22
NIS
2421 (void)PerlIO_close(PL_rsfp);
2422 PL_rsfp = Nullfp;
4a9ae47a 2423 PL_doextract = FALSE;
395c3793 2424 }
3280af22
NIS
2425 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2426 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2427 sv_catpv(PL_linestr,";}");
2428 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2429 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2430 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2431 goto retry;
2432 }
3280af22
NIS
2433 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2434 sv_setpv(PL_linestr,"");
79072805 2435 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2436 }
3280af22 2437 if (PL_doextract) {
a0d0e21e 2438 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2439 PL_doextract = FALSE;
a0d0e21e
LW
2440
2441 /* Incest with pod. */
2442 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2443 sv_setpv(PL_linestr, "");
2444 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2445 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2446 PL_doextract = FALSE;
a0d0e21e
LW
2447 }
2448 }
463ee0b2 2449 incline(s);
3280af22
NIS
2450 } while (PL_doextract);
2451 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2452 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2453 SV *sv = NEWSV(85,0);
a687059c 2454
93a17b20 2455 sv_upgrade(sv, SVt_PVMG);
3280af22 2456 sv_setsv(sv,PL_linestr);
57843af0 2457 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2458 }
3280af22 2459 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2460 if (CopLINE(PL_curcop) == 1) {
3280af22 2461 while (s < PL_bufend && isSPACE(*s))
79072805 2462 s++;
a0d0e21e 2463 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2464 s++;
44a8e56a 2465 d = Nullch;
3280af22 2466 if (!PL_in_eval) {
44a8e56a 2467 if (*s == '#' && *(s+1) == '!')
2468 d = s + 2;
2469#ifdef ALTERNATE_SHEBANG
2470 else {
2471 static char as[] = ALTERNATE_SHEBANG;
2472 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2473 d = s + (sizeof(as) - 1);
2474 }
2475#endif /* ALTERNATE_SHEBANG */
2476 }
2477 if (d) {
b8378b72 2478 char *ipath;
774d564b 2479 char *ipathend;
b8378b72 2480
774d564b 2481 while (isSPACE(*d))
b8378b72
CS
2482 d++;
2483 ipath = d;
774d564b 2484 while (*d && !isSPACE(*d))
2485 d++;
2486 ipathend = d;
2487
2488#ifdef ARG_ZERO_IS_SCRIPT
2489 if (ipathend > ipath) {
2490 /*
2491 * HP-UX (at least) sets argv[0] to the script name,
2492 * which makes $^X incorrect. And Digital UNIX and Linux,
2493 * at least, set argv[0] to the basename of the Perl
2494 * interpreter. So, having found "#!", we'll set it right.
2495 */
2496 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2497 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2498 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2499 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2500 SvSETMAGIC(x);
2501 }
774d564b 2502 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2503 }
774d564b 2504#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2505
2506 /*
2507 * Look for options.
2508 */
748a9306 2509 d = instr(s,"perl -");
84e30d1a 2510 if (!d) {
748a9306 2511 d = instr(s,"perl");
84e30d1a
GS
2512#if defined(DOSISH)
2513 /* avoid getting into infinite loops when shebang
2514 * line contains "Perl" rather than "perl" */
2515 if (!d) {
2516 for (d = ipathend-4; d >= ipath; --d) {
2517 if ((*d == 'p' || *d == 'P')
2518 && !ibcmp(d, "perl", 4))
2519 {
2520 break;
2521 }
2522 }
2523 if (d < ipath)
2524 d = Nullch;
2525 }
2526#endif
2527 }
44a8e56a 2528#ifdef ALTERNATE_SHEBANG
2529 /*
2530 * If the ALTERNATE_SHEBANG on this system starts with a
2531 * character that can be part of a Perl expression, then if
2532 * we see it but not "perl", we're probably looking at the
2533 * start of Perl code, not a request to hand off to some
2534 * other interpreter. Similarly, if "perl" is there, but
2535 * not in the first 'word' of the line, we assume the line
2536 * contains the start of the Perl program.
44a8e56a 2537 */
2538 if (d && *s != '#') {
774d564b 2539 char *c = ipath;
44a8e56a 2540 while (*c && !strchr("; \t\r\n\f\v#", *c))
2541 c++;
2542 if (c < d)
2543 d = Nullch; /* "perl" not in first word; ignore */
2544 else
2545 *s = '#'; /* Don't try to parse shebang line */
2546 }
774d564b 2547#endif /* ALTERNATE_SHEBANG */
748a9306 2548 if (!d &&
44a8e56a 2549 *s == '#' &&
774d564b 2550 ipathend > ipath &&
3280af22 2551 !PL_minus_c &&
748a9306 2552 !instr(s,"indir") &&
3280af22 2553 instr(PL_origargv[0],"perl"))
748a9306 2554 {
9f68db38 2555 char **newargv;
9f68db38 2556
774d564b 2557 *ipathend = '\0';
2558 s = ipathend + 1;
3280af22 2559 while (s < PL_bufend && isSPACE(*s))
9f68db38 2560 s++;
3280af22
NIS
2561 if (s < PL_bufend) {
2562 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2563 newargv[1] = s;
3280af22 2564 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2565 s++;
2566 *s = '\0';
3280af22 2567 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2568 }
2569 else
3280af22 2570 newargv = PL_origargv;
774d564b 2571 newargv[0] = ipath;
80252599 2572 PerlProc_execv(ipath, newargv);
cea2e8a9 2573 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2574 }
748a9306 2575 if (d) {
3280af22
NIS
2576 U32 oldpdb = PL_perldb;
2577 bool oldn = PL_minus_n;
2578 bool oldp = PL_minus_p;
748a9306
LW
2579
2580 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2581 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2582
2583 if (*d++ == '-') {
8cc95fdb 2584 do {
2585 if (*d == 'M' || *d == 'm') {
2586 char *m = d;
2587 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2588 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2589 (int)(d - m), m);
2590 }
2591 d = moreswitches(d);
2592 } while (d);
84902520 2593 if (PERLDB_LINE && !oldpdb ||
3280af22 2594 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b 2595 /* if we have already added "LINE: while (<>) {",
2596 we must not do it again */
748a9306 2597 {
3280af22
NIS
2598 sv_setpv(PL_linestr, "");
2599 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2600 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2601 PL_preambled = FALSE;
84902520 2602 if (PERLDB_LINE)
3280af22 2603 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2604 goto retry;
2605 }
a0d0e21e 2606 }
79072805 2607 }
9f68db38 2608 }
79072805 2609 }
3280af22
NIS
2610 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2611 PL_bufptr = s;
2612 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2613 return yylex();
ae986130 2614 }
378cc40b 2615 goto retry;
4fdae800 2616 case '\r':
6a27c188 2617#ifdef PERL_STRICT_CR
cea2e8a9
GS
2618 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2619 Perl_croak(aTHX_
54310121 2620 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2621#endif
4fdae800 2622 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2623 s++;
2624 goto retry;
378cc40b 2625 case '#':
e929a76b 2626 case '\n':
3280af22
NIS
2627 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2628 d = PL_bufend;
a687059c 2629 while (s < d && *s != '\n')
378cc40b 2630 s++;
0f85fab0 2631 if (s < d)
378cc40b 2632 s++;
463ee0b2 2633 incline(s);
3280af22
NIS
2634 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2635 PL_bufptr = s;
2636 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2637 return yylex();
a687059c 2638 }
378cc40b 2639 }
a687059c 2640 else {
378cc40b 2641 *s = '\0';
3280af22 2642 PL_bufend = s;
a687059c 2643 }
378cc40b
LW
2644 goto retry;
2645 case '-':
79072805 2646 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2647 s++;
3280af22 2648 PL_bufptr = s;
748a9306
LW
2649 tmp = *s++;
2650
3280af22 2651 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2652 s++;
2653
2654 if (strnEQ(s,"=>",2)) {
3280af22 2655 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2656 OPERATOR('-'); /* unary minus */
2657 }
3280af22
NIS
2658 PL_last_uni = PL_oldbufptr;
2659 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2660 switch (tmp) {
79072805
LW
2661 case 'r': FTST(OP_FTEREAD);
2662 case 'w': FTST(OP_FTEWRITE);
2663 case 'x': FTST(OP_FTEEXEC);
2664 case 'o': FTST(OP_FTEOWNED);
2665 case 'R': FTST(OP_FTRREAD);
2666 case 'W': FTST(OP_FTRWRITE);
2667 case 'X': FTST(OP_FTREXEC);
2668 case 'O': FTST(OP_FTROWNED);
2669 case 'e': FTST(OP_FTIS);
2670 case 'z': FTST(OP_FTZERO);
2671 case 's': FTST(OP_FTSIZE);
2672 case 'f': FTST(OP_FTFILE);
2673 case 'd': FTST(OP_FTDIR);
2674 case 'l': FTST(OP_FTLINK);
2675 case 'p': FTST(OP_FTPIPE);
2676 case 'S': FTST(OP_FTSOCK);
2677 case 'u': FTST(OP_FTSUID);
2678 case 'g': FTST(OP_FTSGID);
2679 case 'k': FTST(OP_FTSVTX);
2680 case 'b': FTST(OP_FTBLK);
2681 case 'c': FTST(OP_FTCHR);
2682 case 't': FTST(OP_FTTTY);
2683 case 'T': FTST(OP_FTTEXT);
2684 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2685 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2686 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2687 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2688 default:
cea2e8a9 2689 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2690 break;
2691 }
2692 }
a687059c
LW
2693 tmp = *s++;
2694 if (*s == tmp) {
2695 s++;
3280af22 2696 if (PL_expect == XOPERATOR)
79072805
LW
2697 TERM(POSTDEC);
2698 else
2699 OPERATOR(PREDEC);
2700 }
2701 else if (*s == '>') {
2702 s++;
2703 s = skipspace(s);
7e2040f0 2704 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2705 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2706 TOKEN(ARROW);
79072805 2707 }
748a9306
LW
2708 else if (*s == '$')
2709 OPERATOR(ARROW);
463ee0b2 2710 else
748a9306 2711 TERM(ARROW);
a687059c 2712 }
3280af22 2713 if (PL_expect == XOPERATOR)
79072805
LW
2714 Aop(OP_SUBTRACT);
2715 else {
3280af22 2716 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2717 check_uni();
79072805 2718 OPERATOR('-'); /* unary minus */
2f3197b3 2719 }
79072805 2720
378cc40b 2721 case '+':
a687059c
LW
2722 tmp = *s++;
2723 if (*s == tmp) {
378cc40b 2724 s++;
3280af22 2725 if (PL_expect == XOPERATOR)
79072805
LW
2726 TERM(POSTINC);
2727 else
2728 OPERATOR(PREINC);
378cc40b 2729 }
3280af22 2730 if (PL_expect == XOPERATOR)
79072805
LW
2731 Aop(OP_ADD);
2732 else {
3280af22 2733 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2734 check_uni();
a687059c 2735 OPERATOR('+');
2f3197b3 2736 }
a687059c 2737
378cc40b 2738 case '*':
3280af22
NIS
2739 if (PL_expect != XOPERATOR) {
2740 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2741 PL_expect = XOPERATOR;
2742 force_ident(PL_tokenbuf, '*');
2743 if (!*PL_tokenbuf)
a0d0e21e 2744 PREREF('*');
79072805 2745 TERM('*');
a687059c 2746 }
79072805
LW
2747 s++;
2748 if (*s == '*') {
a687059c 2749 s++;
79072805 2750 PWop(OP_POW);
a687059c 2751 }
79072805
LW
2752 Mop(OP_MULTIPLY);
2753
378cc40b 2754 case '%':
3280af22 2755 if (PL_expect == XOPERATOR) {
bbce6d69 2756 ++s;
2757 Mop(OP_MODULO);
a687059c 2758 }
3280af22
NIS
2759 PL_tokenbuf[0] = '%';
2760 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2761 if (!PL_tokenbuf[1]) {
2762 if (s == PL_bufend)
bbce6d69 2763 yyerror("Final % should be \\% or %name");
2764 PREREF('%');
a687059c 2765 }
3280af22 2766 PL_pending_ident = '%';
bbce6d69 2767 TERM('%');
a687059c 2768
378cc40b 2769 case '^':
79072805 2770 s++;
a0d0e21e 2771 BOop(OP_BIT_XOR);
79072805 2772 case '[':
3280af22 2773 PL_lex_brackets++;
79072805 2774 /* FALL THROUGH */
378cc40b 2775 case '~':
378cc40b 2776 case ',':
378cc40b
LW
2777 tmp = *s++;
2778 OPERATOR(tmp);
a0d0e21e
LW
2779 case ':':
2780 if (s[1] == ':') {
2781 len = 0;
2782 goto just_a_word;
2783 }
2784 s++;
09bef843
SB
2785 switch (PL_expect) {
2786 OP *attrs;
2787 case XOPERATOR:
2788 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2789 break;
2790 PL_bufptr = s; /* update in case we back off */
2791 goto grabattrs;
2792 case XATTRBLOCK:
2793 PL_expect = XBLOCK;
2794 goto grabattrs;
2795 case XATTRTERM:
2796 PL_expect = XTERMBLOCK;
2797 grabattrs:
2798 s = skipspace(s);
2799 attrs = Nullop;
7e2040f0 2800 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2801 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2802 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2803 if (tmp < 0) tmp = -tmp;
2804 switch (tmp) {
2805 case KEY_or:
2806 case KEY_and:
2807 case KEY_for:
2808 case KEY_unless:
2809 case KEY_if:
2810 case KEY_while:
2811 case KEY_until:
2812 goto got_attrs;
2813 default:
2814 break;
2815 }
2816 }
09bef843
SB
2817 if (*d == '(') {
2818 d = scan_str(d,TRUE,TRUE);
2819 if (!d) {
2820 if (PL_lex_stuff) {
2821 SvREFCNT_dec(PL_lex_stuff);
2822 PL_lex_stuff = Nullsv;
2823 }
2824 /* MUST advance bufptr here to avoid bogus
2825 "at end of line" context messages from yyerror().
2826 */
2827 PL_bufptr = s + len;
2828 yyerror("Unterminated attribute parameter in attribute list");
2829 if (attrs)
2830 op_free(attrs);
2831 return 0; /* EOF indicator */
2832 }
2833 }
2834 if (PL_lex_stuff) {
2835 SV *sv = newSVpvn(s, len);
2836 sv_catsv(sv, PL_lex_stuff);
2837 attrs = append_elem(OP_LIST, attrs,
2838 newSVOP(OP_CONST, 0, sv));
2839 SvREFCNT_dec(PL_lex_stuff);
2840 PL_lex_stuff = Nullsv;
2841 }
2842 else {
2843 attrs = append_elem(OP_LIST, attrs,
2844 newSVOP(OP_CONST, 0,
2845 newSVpvn(s, len)));
2846 }
2847 s = skipspace(d);
0120eecf 2848 if (*s == ':' && s[1] != ':')
09bef843 2849 s = skipspace(s+1);
0120eecf
GS
2850 else if (s == d)
2851 break; /* require real whitespace or :'s */
09bef843 2852 }
f9829d6b
GS
2853 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2854 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2855 char q = ((*s == '\'') ? '"' : '\'');
2856 /* If here for an expression, and parsed no attrs, back off. */
2857 if (tmp == '=' && !attrs) {
2858 s = PL_bufptr;
2859 break;
2860 }
2861 /* MUST advance bufptr here to avoid bogus "at end of line"
2862 context messages from yyerror().
2863 */
2864 PL_bufptr = s;
2865 if (!*s)
2866 yyerror("Unterminated attribute list");
2867 else
2868 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2869 q, *s, q));
2870 if (attrs)
2871 op_free(attrs);
2872 OPERATOR(':');
2873 }
f9829d6b 2874 got_attrs:
09bef843
SB
2875 if (attrs) {
2876 PL_nextval[PL_nexttoke].opval = attrs;
2877 force_next(THING);
2878 }
2879 TOKEN(COLONATTR);
2880 }
a0d0e21e 2881 OPERATOR(':');
8990e307
LW
2882 case '(':
2883 s++;
3280af22
NIS
2884 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2885 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2886 else
3280af22 2887 PL_expect = XTERM;
a0d0e21e 2888 TOKEN('(');
378cc40b 2889 case ';':
57843af0
GS
2890 if (CopLINE(PL_curcop) < PL_copline)
2891 PL_copline = CopLINE(PL_curcop);
378cc40b
LW
2892 tmp = *s++;
2893 OPERATOR(tmp);
2894 case ')':
378cc40b 2895 tmp = *s++;
16d20bd9
AD
2896 s = skipspace(s);
2897 if (*s == '{')
2898 PREBLOCK(tmp);
378cc40b 2899 TERM(tmp);
79072805
LW
2900 case ']':
2901 s++;
3280af22 2902 if (PL_lex_brackets <= 0)
d98d5fff 2903 yyerror("Unmatched right square bracket");
463ee0b2 2904 else
3280af22
NIS
2905 --PL_lex_brackets;
2906 if (PL_lex_state == LEX_INTERPNORMAL) {
2907 if (PL_lex_brackets == 0) {
a0d0e21e 2908 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2909 PL_lex_state = LEX_INTERPEND;
79072805
LW
2910 }
2911 }
4633a7c4 2912 TERM(']');
79072805
LW
2913 case '{':
2914 leftbracket:
79072805 2915 s++;
3280af22
NIS
2916 if (PL_lex_brackets > 100) {
2917 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2918 if (newlb != PL_lex_brackstack) {
8990e307 2919 SAVEFREEPV(newlb);
3280af22 2920 PL_lex_brackstack = newlb;
8990e307
LW
2921 }
2922 }
3280af22 2923 switch (PL_expect) {
a0d0e21e 2924 case XTERM:
3280af22 2925 if (PL_lex_formbrack) {
a0d0e21e
LW
2926 s--;
2927 PRETERMBLOCK(DO);
2928 }
3280af22
NIS
2929 if (PL_oldoldbufptr == PL_last_lop)
2930 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2931 else
3280af22 2932 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2933 OPERATOR(HASHBRACK);
a0d0e21e 2934 case XOPERATOR:
3280af22 2935 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2936 s++;
44a8e56a 2937 d = s;
3280af22
NIS
2938 PL_tokenbuf[0] = '\0';
2939 if (d < PL_bufend && *d == '-') {
2940 PL_tokenbuf[0] = '-';
44a8e56a 2941 d++;
3280af22 2942 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a 2943 d++;
2944 }
7e2040f0 2945 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 2946 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2947 FALSE, &len);
3280af22 2948 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2949 d++;
2950 if (*d == '}') {
3280af22 2951 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 2952 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2953 if (minus)
2954 force_next('-');
748a9306
LW
2955 }
2956 }
2957 /* FALL THROUGH */
09bef843 2958 case XATTRBLOCK:
748a9306 2959 case XBLOCK:
3280af22
NIS
2960 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2961 PL_expect = XSTATE;
a0d0e21e 2962 break;
09bef843 2963 case XATTRTERM:
a0d0e21e 2964 case XTERMBLOCK:
3280af22
NIS
2965 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2966 PL_expect = XSTATE;
a0d0e21e
LW
2967 break;
2968 default: {
2969 char *t;
3280af22
NIS
2970 if (PL_oldoldbufptr == PL_last_lop)
2971 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2972 else
3280af22 2973 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2974 s = skipspace(s);
09ecc4b6 2975 if (*s == '}')
a0d0e21e 2976 OPERATOR(HASHBRACK);
b8a4b1be
GS
2977 /* This hack serves to disambiguate a pair of curlies
2978 * as being a block or an anon hash. Normally, expectation
2979 * determines that, but in cases where we're not in a
2980 * position to expect anything in particular (like inside
2981 * eval"") we have to resolve the ambiguity. This code
2982 * covers the case where the first term in the curlies is a
2983 * quoted string. Most other cases need to be explicitly
2984 * disambiguated by prepending a `+' before the opening
2985 * curly in order to force resolution as an anon hash.
2986 *
2987 * XXX should probably propagate the outer expectation
2988 * into eval"" to rely less on this hack, but that could
2989 * potentially break current behavior of eval"".
2990 * GSAR 97-07-21
2991 */
2992 t = s;
2993 if (*s == '\'' || *s == '"' || *s == '`') {
2994 /* common case: get past first string, handling escapes */
3280af22 2995 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2996 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2997 t++;
2998 t++;
a0d0e21e 2999 }
b8a4b1be 3000 else if (*s == 'q') {
3280af22 3001 if (++t < PL_bufend
b8a4b1be 3002 && (!isALNUM(*t)
3280af22 3003 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3004 && !isALNUM(*t))))
3005 {
b8a4b1be
GS
3006 char *tmps;
3007 char open, close, term;
3008 I32 brackets = 1;
3009
3280af22 3010 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3011 t++;
3012 term = *t;
3013 open = term;
3014 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3015 term = tmps[5];
3016 close = term;
3017 if (open == close)
3280af22
NIS
3018 for (t++; t < PL_bufend; t++) {
3019 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3020 t++;
6d07e5e9 3021 else if (*t == open)
b8a4b1be
GS
3022 break;
3023 }
3024 else
3280af22
NIS
3025 for (t++; t < PL_bufend; t++) {
3026 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3027 t++;
6d07e5e9 3028 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3029 break;
3030 else if (*t == open)
3031 brackets++;
3032 }
3033 }
3034 t++;
a0d0e21e 3035 }
7e2040f0 3036 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3037 t += UTF8SKIP(t);
7e2040f0 3038 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3039 t += UTF8SKIP(t);
a0d0e21e 3040 }
3280af22 3041 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3042 t++;
b8a4b1be
GS
3043 /* if comma follows first term, call it an anon hash */
3044 /* XXX it could be a comma expression with loop modifiers */
3280af22 3045 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3046 || (*t == '=' && t[1] == '>')))
a0d0e21e 3047 OPERATOR(HASHBRACK);
3280af22 3048 if (PL_expect == XREF)
4e4e412b 3049 PL_expect = XTERM;
a0d0e21e 3050 else {
3280af22
NIS
3051 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3052 PL_expect = XSTATE;
a0d0e21e 3053 }
8990e307 3054 }
a0d0e21e 3055 break;
463ee0b2 3056 }
57843af0 3057 yylval.ival = CopLINE(PL_curcop);
79072805 3058 if (isSPACE(*s) || *s == '#')
3280af22 3059 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3060 TOKEN('{');
378cc40b 3061 case '}':
79072805
LW
3062 rightbracket:
3063 s++;
3280af22 3064 if (PL_lex_brackets <= 0)
d98d5fff 3065 yyerror("Unmatched right curly bracket");
463ee0b2 3066 else
3280af22
NIS
3067 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3068 if (PL_lex_brackets < PL_lex_formbrack)
3069 PL_lex_formbrack = 0;
3070 if (PL_lex_state == LEX_INTERPNORMAL) {
3071 if (PL_lex_brackets == 0) {
9059aa12
LW
3072 if (PL_expect & XFAKEBRACK) {
3073 PL_expect &= XENUMMASK;
3280af22
NIS
3074 PL_lex_state = LEX_INTERPEND;
3075 PL_bufptr = s;
cea2e8a9 3076 return yylex(); /* ignore fake brackets */
79072805 3077 }
fa83b5b6 3078 if (*s == '-' && s[1] == '>')
3280af22 3079 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3080 else if (*s != '[' && *s != '{')
3280af22 3081 PL_lex_state = LEX_INTERPEND;
79072805
LW
3082 }
3083 }
9059aa12
LW
3084 if (PL_expect & XFAKEBRACK) {
3085 PL_expect &= XENUMMASK;
3280af22 3086 PL_bufptr = s;
cea2e8a9 3087 return yylex(); /* ignore fake brackets */
748a9306 3088 }
79072805
LW
3089 force_next('}');
3090 TOKEN(';');
378cc40b
LW
3091 case '&':
3092 s++;
3093 tmp = *s++;
3094 if (tmp == '&')
a0d0e21e 3095 AOPERATOR(ANDAND);
378cc40b 3096 s--;
3280af22 3097 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3098 if (ckWARN(WARN_SEMICOLON)
3099 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3100 {
57843af0 3101 CopLINE_dec(PL_curcop);
cea2e8a9 3102 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3103 CopLINE_inc(PL_curcop);
463ee0b2 3104 }
79072805 3105 BAop(OP_BIT_AND);
463ee0b2 3106 }
79072805 3107
3280af22
NIS
3108 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3109 if (*PL_tokenbuf) {
3110 PL_expect = XOPERATOR;
3111 force_ident(PL_tokenbuf, '&');
463ee0b2 3112 }
79072805
LW
3113 else
3114 PREREF('&');
c07a80fd 3115 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3116 TERM('&');
3117
378cc40b
LW
3118 case '|':
3119 s++;
3120 tmp = *s++;
3121 if (tmp == '|')
a0d0e21e 3122 AOPERATOR(OROR);
378cc40b 3123 s--;
79072805 3124 BOop(OP_BIT_OR);
378cc40b
LW
3125 case '=':
3126 s++;
3127 tmp = *s++;
3128 if (tmp == '=')
79072805
LW
3129 Eop(OP_EQ);
3130 if (tmp == '>')
3131 OPERATOR(',');
378cc40b 3132 if (tmp == '~')
79072805 3133 PMop(OP_MATCH);
599cee73 3134 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3135 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3136 s--;
3280af22
NIS
3137 if (PL_expect == XSTATE && isALPHA(tmp) &&
3138 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3139 {
3280af22
NIS
3140 if (PL_in_eval && !PL_rsfp) {
3141 d = PL_bufend;
a5f75d66
AD
3142 while (s < d) {
3143 if (*s++ == '\n') {
3144 incline(s);
3145 if (strnEQ(s,"=cut",4)) {
3146 s = strchr(s,'\n');
3147 if (s)
3148 s++;
3149 else
3150 s = d;
3151 incline(s);
3152 goto retry;
3153 }
3154 }
3155 }
3156 goto retry;
3157 }
3280af22
NIS
3158 s = PL_bufend;
3159 PL_doextract = TRUE;
a0d0e21e
LW
3160 goto retry;
3161 }
3280af22 3162 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3163 char *t;
51882d45 3164#ifdef PERL_STRICT_CR
a0d0e21e 3165 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
3166#else
3167 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
3168#endif
a0d0e21e
LW
3169 if (*t == '\n' || *t == '#') {
3170 s--;
3280af22 3171 PL_expect = XBLOCK;
a0d0e21e
LW
3172 goto leftbracket;
3173 }
79072805 3174 }
a0d0e21e
LW
3175 yylval.ival = 0;
3176 OPERATOR(ASSIGNOP);
378cc40b
LW
3177 case '!':
3178 s++;
3179 tmp = *s++;
3180 if (tmp == '=')
79072805 3181 Eop(OP_NE);
378cc40b 3182 if (tmp == '~')
79072805 3183 PMop(OP_NOT);
378cc40b
LW
3184 s--;
3185 OPERATOR('!');
3186 case '<':
3280af22 3187 if (PL_expect != XOPERATOR) {
93a17b20 3188 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3189 check_uni();
79072805
LW
3190 if (s[1] == '<')
3191 s = scan_heredoc(s);
3192 else
3193 s = scan_inputsymbol(s);
3194 TERM(sublex_start());
378cc40b
LW
3195 }
3196 s++;
3197 tmp = *s++;
3198 if (tmp == '<')
79072805 3199 SHop(OP_LEFT_SHIFT);
395c3793
LW
3200 if (tmp == '=') {
3201 tmp = *s++;
3202 if (tmp == '>')
79072805 3203 Eop(OP_NCMP);
395c3793 3204 s--;
79072805 3205 Rop(OP_LE);
395c3793 3206 }
378cc40b 3207 s--;
79072805 3208 Rop(OP_LT);
378cc40b
LW
3209 case '>':
3210 s++;
3211 tmp = *s++;
3212 if (tmp == '>')
79072805 3213 SHop(OP_RIGHT_SHIFT);
378cc40b 3214 if (tmp == '=')
79072805 3215 Rop(OP_GE);
378cc40b 3216 s--;
79072805 3217 Rop(OP_GT);
378cc40b
LW
3218
3219 case '$':
bbce6d69 3220 CLINE;
3221
3280af22
NIS
3222 if (PL_expect == XOPERATOR) {
3223 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3224 PL_expect = XTERM;
a0d0e21e 3225 depcom();
bbce6d69 3226 return ','; /* grandfather non-comma-format format */
a0d0e21e 3227 }
8990e307 3228 }
a0d0e21e 3229
7e2040f0 3230 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3231 PL_tokenbuf[0] = '@';
376b8730
SM
3232 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3233 sizeof PL_tokenbuf - 1, FALSE);
3234 if (PL_expect == XOPERATOR)
3235 no_op("Array length", s);
3280af22 3236 if (!PL_tokenbuf[1])
a0d0e21e 3237 PREREF(DOLSHARP);
3280af22
NIS
3238 PL_expect = XOPERATOR;
3239 PL_pending_ident = '#';
463ee0b2 3240 TOKEN(DOLSHARP);
79072805 3241 }
bbce6d69 3242
3280af22 3243 PL_tokenbuf[0] = '$';
376b8730
SM
3244 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3245 sizeof PL_tokenbuf - 1, FALSE);
3246 if (PL_expect == XOPERATOR)
3247 no_op("Scalar", s);
3280af22
NIS
3248 if (!PL_tokenbuf[1]) {
3249 if (s == PL_bufend)
bbce6d69 3250 yyerror("Final $ should be \\$ or $name");
3251 PREREF('$');
8990e307 3252 }
a0d0e21e 3253
bbce6d69 3254 /* This kludge not intended to be bulletproof. */
3280af22 3255 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3256 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3257 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69 3258 yylval.opval->op_private = OPpCONST_ARYBASE;
3259 TERM(THING);
3260 }
3261
ff68c719 3262 d = s;
69d2bceb 3263 tmp = (I32)*s;
3280af22 3264 if (PL_lex_state == LEX_NORMAL)
ff68c719 3265 s = skipspace(s);
3266
3280af22 3267 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3268 char *t;
3269 if (*s == '[') {
3280af22 3270 PL_tokenbuf[0] = '@';
599cee73 3271 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3272 for(t = s + 1;
7e2040f0 3273 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3274 t++) ;
a0d0e21e 3275 if (*t++ == ',') {
3280af22
NIS
3276 PL_bufptr = skipspace(PL_bufptr);
3277 while (t < PL_bufend && *t != ']')
bbce6d69 3278 t++;
cea2e8a9 3279 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3280 "Multidimensional syntax %.*s not supported",
3281 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3282 }
3283 }
bbce6d69 3284 }
3285 else if (*s == '{') {
3280af22 3286 PL_tokenbuf[0] = '%';
599cee73 3287 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3288 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3289 {
3280af22 3290 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3291 STRLEN len;
3292 for (t++; isSPACE(*t); t++) ;
7e2040f0 3293 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3294 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3295 for (; isSPACE(*t); t++) ;
864dbfa3 3296 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3297 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3298 "You need to quote \"%s\"", tmpbuf);
748a9306 3299 }
93a17b20
LW
3300 }
3301 }
2f3197b3 3302 }
bbce6d69 3303
3280af22 3304 PL_expect = XOPERATOR;
69d2bceb 3305 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3306 bool islop = (PL_last_lop == PL_oldoldbufptr);
3307 if (!islop || PL_last_lop_op == OP_GREPSTART)
3308 PL_expect = XOPERATOR;
bbce6d69 3309 else if (strchr("$@\"'`q", *s))
3280af22 3310 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3311 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3312 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3313 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3314 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3315 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3316 if (tmp = keyword(tmpbuf, len)) {
3317 /* binary operators exclude handle interpretations */
3318 switch (tmp) {
3319 case -KEY_x:
3320 case -KEY_eq:
3321 case -KEY_ne:
3322 case -KEY_gt:
3323 case -KEY_lt:
3324 case -KEY_ge:
3325 case -KEY_le:
3326 case -KEY_cmp:
3327 break;
3328 default:
3280af22 3329 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3330 break;
3331 }
3332 }
68dc0745 3333 else {
3334 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3335 if (gv && GvCVu(gv))
3280af22 3336 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3337 }
93a17b20 3338 }
bbce6d69 3339 else if (isDIGIT(*s))
3280af22 3340 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3341 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3342 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3343 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3344 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3345 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3346 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3347 }
3280af22 3348 PL_pending_ident = '$';
79072805 3349 TOKEN('$');
378cc40b
LW
3350
3351 case '@':
3280af22 3352 if (PL_expect == XOPERATOR)
bbce6d69 3353 no_op("Array", s);
3280af22
NIS
3354 PL_tokenbuf[0] = '@';
3355 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3356 if (!PL_tokenbuf[1]) {
3357 if (s == PL_bufend)
bbce6d69 3358 yyerror("Final @ should be \\@ or @name");
3359 PREREF('@');
3360 }
3280af22 3361 if (PL_lex_state == LEX_NORMAL)
ff68c719 3362 s = skipspace(s);
3280af22 3363 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3364 if (*s == '{')
3280af22 3365 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3366
3367 /* Warn about @ where they meant $. */
599cee73 3368 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3369 if (*s == '[' || *s == '{') {
3370 char *t = s + 1;
7e2040f0 3371 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3372 t++;
3373 if (*t == '}' || *t == ']') {
3374 t++;
3280af22 3375 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3376 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3377 "Scalar value %.*s better written as $%.*s",
3280af22 3378 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3379 }
93a17b20
LW
3380 }
3381 }
463ee0b2 3382 }
3280af22 3383 PL_pending_ident = '@';
79072805 3384 TERM('@');
378cc40b
LW
3385
3386 case '/': /* may either be division or pattern */
3387 case '?': /* may either be conditional or pattern */
3280af22 3388 if (PL_expect != XOPERATOR) {
c277df42 3389 /* Disable warning on "study /blah/" */
3280af22
NIS
3390 if (PL_oldoldbufptr == PL_last_uni
3391 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3392 || memNE(PL_last_uni, "study", 5)
3393 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3394 check_uni();
8782bef2 3395 s = scan_pat(s,OP_MATCH);
79072805 3396 TERM(sublex_start());
378cc40b
LW
3397 }
3398 tmp = *s++;
a687059c 3399 if (tmp == '/')
79072805 3400 Mop(OP_DIVIDE);
378cc40b
LW
3401 OPERATOR(tmp);
3402
3403 case '.':
51882d45
GS
3404 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3405#ifdef PERL_STRICT_CR
3406 && s[1] == '\n'
3407#else
3408 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3409#endif
3410 && (s == PL_linestart || s[-1] == '\n') )
3411 {
3280af22
NIS
3412 PL_lex_formbrack = 0;
3413 PL_expect = XSTATE;
79072805
LW
3414 goto rightbracket;
3415 }
3280af22 3416 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3417 tmp = *s++;
a687059c
LW
3418 if (*s == tmp) {
3419 s++;
2f3197b3
LW
3420 if (*s == tmp) {
3421 s++;
79072805 3422 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3423 }
3424 else
79072805 3425 yylval.ival = 0;
378cc40b 3426 OPERATOR(DOTDOT);
a687059c 3427 }
3280af22 3428 if (PL_expect != XOPERATOR)
2f3197b3 3429 check_uni();
79072805 3430 Aop(OP_CONCAT);
378cc40b
LW
3431 }
3432 /* FALL THROUGH */
3433 case '0': case '1': case '2': case '3': case '4':
3434 case '5': case '6': case '7': case '8': case '9':
79072805 3435 s = scan_num(s);
3280af22 3436 if (PL_expect == XOPERATOR)
8990e307 3437 no_op("Number",s);
79072805
LW
3438 TERM(THING);
3439
3440 case '\'':
09bef843 3441 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3442 if (PL_expect == XOPERATOR) {
3443 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3444 PL_expect = XTERM;
a0d0e21e
LW
3445 depcom();
3446 return ','; /* grandfather non-comma-format format */
3447 }
463ee0b2 3448 else
8990e307 3449 no_op("String",s);
463ee0b2 3450 }
79072805 3451 if (!s)
85e6fe83 3452 missingterm((char*)0);
79072805
LW
3453 yylval.ival = OP_CONST;
3454 TERM(sublex_start());
3455
3456 case '"':
09bef843 3457 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3458 if (PL_expect == XOPERATOR) {
3459 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3460 PL_expect = XTERM;
a0d0e21e
LW
3461 depcom();
3462 return ','; /* grandfather non-comma-format format */
3463 }
463ee0b2 3464 else
8990e307 3465 no_op("String",s);
463ee0b2 3466 }
79072805 3467 if (!s)
85e6fe83 3468 missingterm((char*)0);
4633a7c4 3469 yylval.ival = OP_CONST;
3280af22 3470 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3471 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3472 yylval.ival = OP_STRINGIFY;
3473 break;
3474 }
3475 }
79072805
LW
3476 TERM(sublex_start());
3477
3478 case '`':
09bef843 3479 s = scan_str(s,FALSE,FALSE);
3280af22 3480 if (PL_expect == XOPERATOR)
8990e307 3481 no_op("Backticks",s);
79072805 3482 if (!s)
85e6fe83 3483 missingterm((char*)0);
79072805
LW
3484 yylval.ival = OP_BACKTICK;
3485 set_csh();
3486 TERM(sublex_start());
3487
3488 case '\\':
3489 s++;
599cee73 3490 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3491 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3492 *s, *s);
3280af22 3493 if (PL_expect == XOPERATOR)
8990e307 3494 no_op("Backslash",s);
79072805
LW
3495 OPERATOR(REFGEN);
3496
a7cb1f99 3497 case 'v':
e526c9e6 3498 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
a7cb1f99
GS
3499 char *start = s;
3500 start++;
3501 start++;
3502 while (isDIGIT(*start))
3503 start++;
3504 if (*start == '.' && isDIGIT(start[1])) {
3505 s = scan_num(s);
3506 TERM(THING);
3507 }
e526c9e6
GS
3508 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3509 else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF)) {
3510 char c = *start;
3511 GV *gv;
3512 *start = '\0';
3513 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3514 *start = c;
3515 if (!gv) {
3516 s = scan_num(s);
3517 TERM(THING);
3518 }
3519 }
a7cb1f99
GS
3520 }
3521 goto keylookup;
79072805 3522 case 'x':
3280af22 3523 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3524 s++;
3525 Mop(OP_REPEAT);
2f3197b3 3526 }
79072805
LW
3527 goto keylookup;
3528
378cc40b 3529 case '_':
79072805
LW
3530 case 'a': case 'A':
3531 case 'b': case 'B':
3532 case 'c': case 'C':
3533 case 'd': case 'D':
3534 case 'e': case 'E':
3535 case 'f': case 'F':
3536 case 'g': case 'G':
3537 case 'h': case 'H':
3538 case 'i': case 'I':
3539 case 'j': case 'J':
3540 case 'k': case 'K':
3541 case 'l': case 'L':
3542 case 'm': case 'M':
3543 case 'n': case 'N':
3544 case 'o': case 'O':
3545 case 'p': case 'P':
3546 case 'q': case 'Q':
3547 case 'r': case 'R':
3548 case 's': case 'S':
3549 case 't': case 'T':
3550 case 'u': case 'U':
a7cb1f99 3551 case 'V':
79072805
LW
3552 case 'w': case 'W':
3553 case 'X':
3554 case 'y': case 'Y':
3555 case 'z': case 'Z':
3556
49dc05e3 3557 keylookup: {
2d8e6c8d 3558 STRLEN n_a;
161b471a
NIS
3559 gv = Nullgv;
3560 gvp = 0;
49dc05e3 3561
3280af22
NIS
3562 PL_bufptr = s;
3563 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3564
3565 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3566 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3567 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3568 (PL_tokenbuf[0] == 'q' &&
3569 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01 3570
3571 /* x::* is just a word, unless x is "CORE" */
3280af22 3572 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3573 goto just_a_word;
3574
3643fb5f 3575 d = s;
3280af22 3576 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3577 d++; /* no comments skipped here, or s### is misparsed */
3578
3579 /* Is this a label? */
3280af22
NIS
3580 if (!tmp && PL_expect == XSTATE
3581 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3582 s = d + 1;
3280af22 3583 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3584 CLINE;
3585 TOKEN(LABEL);
3643fb5f
CS
3586 }
3587
3588 /* Check for keywords */
3280af22 3589 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3590
3591 /* Is this a word before a => operator? */
748a9306
LW
3592 if (strnEQ(d,"=>",2)) {
3593 CLINE;
3280af22 3594 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3595 yylval.opval->op_private = OPpCONST_BARE;
3596 TERM(WORD);
3597 }
3598
a0d0e21e 3599 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3600 GV *ogv = Nullgv; /* override (winner) */
3601 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3602 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {