This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
fix missing perl_alloc() declaration under PERL_IMPLICIT_SYS
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
51371543 31
9059aa12
LW
32#define XFAKEBRACK 128
33#define XENUMMASK 127
34
7e2040f0 35/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
36#define UTF (PL_hints & HINT_UTF8)
37
2b92dfce
GS
38/* In variables name $^X, these are the legal values for X.
39 * 1999-02-27 mjd-perl-patch@plover.com */
40#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
41
ffb4593c
NT
42/* LEX_* are values for PL_lex_state, the state of the lexer.
43 * They are arranged oddly so that the guard on the switch statement
79072805
LW
44 * can get by with a single comparison (if the compiler is smart enough).
45 */
46
fb73857a
PP
47/* #define LEX_NOTPARSING 11 is done in perl.h. */
48
55497cff
PP
49#define LEX_NORMAL 10
50#define LEX_INTERPNORMAL 9
51#define LEX_INTERPCASEMOD 8
52#define LEX_INTERPPUSH 7
53#define LEX_INTERPSTART 6
54#define LEX_INTERPEND 5
55#define LEX_INTERPENDMAYBE 4
56#define LEX_INTERPCONCAT 3
57#define LEX_INTERPCONST 2
58#define LEX_FORMLINE 1
59#define LEX_KNOWNEXT 0
79072805 60
a790bc05
PP
61/* XXX If this causes problems, set i_unistd=undef in the hint file. */
62#ifdef I_UNISTD
63# include <unistd.h> /* Needed for execv() */
64#endif
65
66
79072805
LW
67#ifdef ff_next
68#undef ff_next
d48672a2
LW
69#endif
70
a1a0e61e
TD
71#ifdef USE_PURE_BISON
72YYSTYPE* yylval_pointer = NULL;
73int* yychar_pointer = NULL;
22c35a8c
GS
74# undef yylval
75# undef yychar
e4bfbdd4
JH
76# define yylval (*yylval_pointer)
77# define yychar (*yychar_pointer)
78# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
79# undef yylex
80# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
81#endif
82
79072805 83#include "keywords.h"
fe14fcc3 84
ffb4593c
NT
85/* CLINE is a macro that ensures PL_copline has a sane value */
86
ae986130
LW
87#ifdef CLINE
88#undef CLINE
89#endif
57843af0 90#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 91
ffb4593c
NT
92/*
93 * Convenience functions to return different tokens and prime the
9cbb5ea2 94 * lexer for the next token. They all take an argument.
ffb4593c
NT
95 *
96 * TOKEN : generic token (used for '(', DOLSHARP, etc)
97 * OPERATOR : generic operator
98 * AOPERATOR : assignment operator
99 * PREBLOCK : beginning the block after an if, while, foreach, ...
100 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
101 * PREREF : *EXPR where EXPR is not a simple identifier
102 * TERM : expression term
103 * LOOPX : loop exiting command (goto, last, dump, etc)
104 * FTST : file test operator
105 * FUN0 : zero-argument function
2d2e263d 106 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
107 * BOop : bitwise or or xor
108 * BAop : bitwise and
109 * SHop : shift operator
110 * PWop : power operator
9cbb5ea2 111 * PMop : pattern-matching operator
ffb4593c
NT
112 * Aop : addition-level operator
113 * Mop : multiplication-level operator
114 * Eop : equality-testing operator
115 * Rop : relational operator <= != gt
116 *
117 * Also see LOP and lop() below.
118 */
119
3280af22
NIS
120#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
121#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
122#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
123#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
124#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
125#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
126#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
127#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
128#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
129#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
130#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
131#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
132#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
133#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
134#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
135#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
136#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
137#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
138#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
139#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 140
a687059c
LW
141/* This bit of chicanery makes a unary function followed by
142 * a parenthesis into a function with one argument, highest precedence.
143 */
2f3197b3 144#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
145 PL_expect = XTERM, \
146 PL_bufptr = s, \
147 PL_last_uni = PL_oldbufptr, \
148 PL_last_lop_op = f, \
a687059c
LW
149 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
150
79072805 151#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
152 PL_bufptr = s, \
153 PL_last_uni = PL_oldbufptr, \
79072805
LW
154 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
155
9f68db38 156/* grandfather return to old style */
3280af22 157#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 158
ffb4593c
NT
159/*
160 * S_ao
161 *
162 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
163 * into an OP_ANDASSIGN or OP_ORASSIGN
164 */
165
76e3520e 166STATIC int
cea2e8a9 167S_ao(pTHX_ int toketype)
a0d0e21e 168{
3280af22
NIS
169 if (*PL_bufptr == '=') {
170 PL_bufptr++;
a0d0e21e
LW
171 if (toketype == ANDAND)
172 yylval.ival = OP_ANDASSIGN;
173 else if (toketype == OROR)
174 yylval.ival = OP_ORASSIGN;
175 toketype = ASSIGNOP;
176 }
177 return toketype;
178}
179
ffb4593c
NT
180/*
181 * S_no_op
182 * When Perl expects an operator and finds something else, no_op
183 * prints the warning. It always prints "<something> found where
184 * operator expected. It prints "Missing semicolon on previous line?"
185 * if the surprise occurs at the start of the line. "do you need to
186 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
187 * where the compiler doesn't know if foo is a method call or a function.
188 * It prints "Missing operator before end of line" if there's nothing
189 * after the missing operator, or "... before <...>" if there is something
190 * after the missing operator.
191 */
192
76e3520e 193STATIC void
cea2e8a9 194S_no_op(pTHX_ char *what, char *s)
463ee0b2 195{
3280af22
NIS
196 char *oldbp = PL_bufptr;
197 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 198
1189a94a
GS
199 if (!s)
200 s = oldbp;
201 else {
202 assert(s >= oldbp);
203 PL_bufptr = s;
204 }
cea2e8a9 205 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 206 if (is_first)
cea2e8a9 207 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 208 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 209 char *t;
7e2040f0 210 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 211 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 212 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 213 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
214 }
215 else
cea2e8a9 216 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 217 PL_bufptr = oldbp;
8990e307
LW
218}
219
ffb4593c
NT
220/*
221 * S_missingterm
222 * Complain about missing quote/regexp/heredoc terminator.
223 * If it's called with (char *)NULL then it cauterizes the line buffer.
224 * If we're in a delimited string and the delimiter is a control
225 * character, it's reformatted into a two-char sequence like ^C.
226 * This is fatal.
227 */
228
76e3520e 229STATIC void
cea2e8a9 230S_missingterm(pTHX_ char *s)
8990e307
LW
231{
232 char tmpbuf[3];
233 char q;
234 if (s) {
235 char *nl = strrchr(s,'\n');
d2719217 236 if (nl)
8990e307
LW
237 *nl = '\0';
238 }
9d116dd7
JH
239 else if (
240#ifdef EBCDIC
241 iscntrl(PL_multi_close)
242#else
243 PL_multi_close < 32 || PL_multi_close == 127
244#endif
245 ) {
8990e307 246 *tmpbuf = '^';
3280af22 247 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
248 s = "\\n";
249 tmpbuf[2] = '\0';
250 s = tmpbuf;
251 }
252 else {
3280af22 253 *tmpbuf = PL_multi_close;
8990e307
LW
254 tmpbuf[1] = '\0';
255 s = tmpbuf;
256 }
257 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 258 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 259}
79072805 260
ffb4593c
NT
261/*
262 * Perl_deprecate
ffb4593c
NT
263 */
264
79072805 265void
864dbfa3 266Perl_deprecate(pTHX_ char *s)
a0d0e21e 267{
d008e5eb 268 dTHR;
599cee73 269 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 270 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
271}
272
ffb4593c
NT
273/*
274 * depcom
9cbb5ea2 275 * Deprecate a comma-less variable list.
ffb4593c
NT
276 */
277
76e3520e 278STATIC void
cea2e8a9 279S_depcom(pTHX)
a0d0e21e
LW
280{
281 deprecate("comma-less variable list");
282}
283
ffb4593c 284/*
9cbb5ea2
GS
285 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
286 * utf16-to-utf8-reversed.
ffb4593c
NT
287 */
288
c39cd008
GS
289#ifdef PERL_CR_FILTER
290static void
291strip_return(SV *sv)
292{
293 register char *s = SvPVX(sv);
294 register char *e = s + SvCUR(sv);
295 /* outer loop optimized to do nothing if there are no CR-LFs */
296 while (s < e) {
297 if (*s++ == '\r' && *s == '\n') {
298 /* hit a CR-LF, need to copy the rest */
299 register char *d = s - 1;
300 *d++ = *s++;
301 while (s < e) {
302 if (*s == '\r' && s[1] == '\n')
303 s++;
304 *d++ = *s++;
305 }
306 SvCUR(sv) -= s - d;
307 return;
308 }
309 }
310}
a868473f 311
76e3520e 312STATIC I32
c39cd008 313S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 314{
c39cd008
GS
315 I32 count = FILTER_READ(idx+1, sv, maxlen);
316 if (count > 0 && !maxlen)
317 strip_return(sv);
318 return count;
a868473f
NIS
319}
320#endif
321
a0ed51b3 322STATIC I32
cea2e8a9 323S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
324{
325 I32 count = FILTER_READ(idx+1, sv, maxlen);
326 if (count) {
dfe13c55
GS
327 U8* tmps;
328 U8* tend;
329 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 330 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 331 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
332
333 }
334 return count;
335}
336
337STATIC I32
cea2e8a9 338S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
339{
340 I32 count = FILTER_READ(idx+1, sv, maxlen);
341 if (count) {
dfe13c55
GS
342 U8* tmps;
343 U8* tend;
344 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 345 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 346 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
347
348 }
349 return count;
350}
a868473f 351
ffb4593c
NT
352/*
353 * Perl_lex_start
9cbb5ea2
GS
354 * Initialize variables. Uses the Perl save_stack to save its state (for
355 * recursive calls to the parser).
ffb4593c
NT
356 */
357
a0d0e21e 358void
864dbfa3 359Perl_lex_start(pTHX_ SV *line)
79072805 360{
0f15f207 361 dTHR;
8990e307
LW
362 char *s;
363 STRLEN len;
364
3280af22
NIS
365 SAVEI32(PL_lex_dojoin);
366 SAVEI32(PL_lex_brackets);
3280af22
NIS
367 SAVEI32(PL_lex_casemods);
368 SAVEI32(PL_lex_starts);
369 SAVEI32(PL_lex_state);
7766f137 370 SAVEVPTR(PL_lex_inpat);
3280af22 371 SAVEI32(PL_lex_inwhat);
18b09519
GS
372 if (PL_lex_state == LEX_KNOWNEXT) {
373 I32 toke = PL_nexttoke;
374 while (--toke >= 0) {
375 SAVEI32(PL_nexttype[toke]);
376 SAVEVPTR(PL_nextval[toke]);
377 }
378 SAVEI32(PL_nexttoke);
379 PL_nexttoke = 0;
380 }
57843af0 381 SAVECOPLINE(PL_curcop);
3280af22
NIS
382 SAVEPPTR(PL_bufptr);
383 SAVEPPTR(PL_bufend);
384 SAVEPPTR(PL_oldbufptr);
385 SAVEPPTR(PL_oldoldbufptr);
386 SAVEPPTR(PL_linestart);
387 SAVESPTR(PL_linestr);
388 SAVEPPTR(PL_lex_brackstack);
389 SAVEPPTR(PL_lex_casestack);
c76ac1ee 390 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
391 SAVESPTR(PL_lex_stuff);
392 SAVEI32(PL_lex_defer);
09bef843 393 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 394 SAVESPTR(PL_lex_repl);
bebdddfc
GS
395 SAVEINT(PL_expect);
396 SAVEINT(PL_lex_expect);
3280af22
NIS
397
398 PL_lex_state = LEX_NORMAL;
399 PL_lex_defer = 0;
400 PL_expect = XSTATE;
401 PL_lex_brackets = 0;
3280af22
NIS
402 New(899, PL_lex_brackstack, 120, char);
403 New(899, PL_lex_casestack, 12, char);
404 SAVEFREEPV(PL_lex_brackstack);
405 SAVEFREEPV(PL_lex_casestack);
406 PL_lex_casemods = 0;
407 *PL_lex_casestack = '\0';
408 PL_lex_dojoin = 0;
409 PL_lex_starts = 0;
410 PL_lex_stuff = Nullsv;
411 PL_lex_repl = Nullsv;
412 PL_lex_inpat = 0;
413 PL_lex_inwhat = 0;
09bef843 414 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
415 PL_linestr = line;
416 if (SvREADONLY(PL_linestr))
417 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
418 s = SvPV(PL_linestr, len);
8990e307 419 if (len && s[len-1] != ';') {
3280af22
NIS
420 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
421 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
422 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 423 }
3280af22
NIS
424 SvTEMP_off(PL_linestr);
425 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
426 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
427 SvREFCNT_dec(PL_rs);
79cb57f6 428 PL_rs = newSVpvn("\n", 1);
3280af22 429 PL_rsfp = 0;
79072805 430}
a687059c 431
ffb4593c
NT
432/*
433 * Perl_lex_end
9cbb5ea2
GS
434 * Finalizer for lexing operations. Must be called when the parser is
435 * done with the lexer.
ffb4593c
NT
436 */
437
463ee0b2 438void
864dbfa3 439Perl_lex_end(pTHX)
463ee0b2 440{
3280af22 441 PL_doextract = FALSE;
463ee0b2
LW
442}
443
ffb4593c
NT
444/*
445 * S_incline
446 * This subroutine has nothing to do with tilting, whether at windmills
447 * or pinball tables. Its name is short for "increment line". It
57843af0 448 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 449 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
450 * # line 500 "foo.pm"
451 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
452 */
453
76e3520e 454STATIC void
cea2e8a9 455S_incline(pTHX_ char *s)
463ee0b2 456{
0f15f207 457 dTHR;
463ee0b2
LW
458 char *t;
459 char *n;
73659bf1 460 char *e;
463ee0b2 461 char ch;
463ee0b2 462
57843af0 463 CopLINE_inc(PL_curcop);
463ee0b2
LW
464 if (*s++ != '#')
465 return;
466 while (*s == ' ' || *s == '\t') s++;
73659bf1
GS
467 if (strnEQ(s, "line", 4))
468 s += 4;
469 else
470 return;
471 if (*s == ' ' || *s == '\t')
472 s++;
473 else
474 return;
475 while (*s == ' ' || *s == '\t') s++;
463ee0b2
LW
476 if (!isDIGIT(*s))
477 return;
478 n = s;
479 while (isDIGIT(*s))
480 s++;
481 while (*s == ' ' || *s == '\t')
482 s++;
73659bf1 483 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 484 s++;
73659bf1
GS
485 e = t + 1;
486 }
463ee0b2 487 else {
463ee0b2 488 for (t = s; !isSPACE(*t); t++) ;
73659bf1 489 e = t;
463ee0b2 490 }
73659bf1
GS
491 while (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\f')
492 e++;
493 if (*e != '\n' && *e != '\0')
494 return; /* false alarm */
495
463ee0b2
LW
496 ch = *t;
497 *t = '\0';
498 if (t - s > 0)
57843af0 499 CopFILE_set(PL_curcop, s);
463ee0b2 500 *t = ch;
57843af0 501 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
502}
503
ffb4593c
NT
504/*
505 * S_skipspace
506 * Called to gobble the appropriate amount and type of whitespace.
507 * Skips comments as well.
508 */
509
76e3520e 510STATIC char *
cea2e8a9 511S_skipspace(pTHX_ register char *s)
a687059c 512{
11343788 513 dTHR;
3280af22
NIS
514 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
515 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
516 s++;
517 return s;
518 }
519 for (;;) {
fd049845 520 STRLEN prevlen;
09bef843
SB
521 SSize_t oldprevlen, oldoldprevlen;
522 SSize_t oldloplen, oldunilen;
60e6418e
GS
523 while (s < PL_bufend && isSPACE(*s)) {
524 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
525 incline(s);
526 }
ffb4593c
NT
527
528 /* comment */
3280af22
NIS
529 if (s < PL_bufend && *s == '#') {
530 while (s < PL_bufend && *s != '\n')
463ee0b2 531 s++;
60e6418e 532 if (s < PL_bufend) {
463ee0b2 533 s++;
60e6418e
GS
534 if (PL_in_eval && !PL_rsfp) {
535 incline(s);
536 continue;
537 }
538 }
463ee0b2 539 }
ffb4593c
NT
540
541 /* only continue to recharge the buffer if we're at the end
542 * of the buffer, we're not reading from a source filter, and
543 * we're in normal lexing mode
544 */
09bef843
SB
545 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
546 PL_lex_state == LEX_FORMLINE)
463ee0b2 547 return s;
ffb4593c
NT
548
549 /* try to recharge the buffer */
9cbb5ea2
GS
550 if ((s = filter_gets(PL_linestr, PL_rsfp,
551 (prevlen = SvCUR(PL_linestr)))) == Nullch)
552 {
553 /* end of file. Add on the -p or -n magic */
3280af22
NIS
554 if (PL_minus_n || PL_minus_p) {
555 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
556 ";}continue{print or die qq(-p destination: $!\\n)" :
557 "");
3280af22
NIS
558 sv_catpv(PL_linestr,";}");
559 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
560 }
561 else
3280af22 562 sv_setpv(PL_linestr,";");
ffb4593c
NT
563
564 /* reset variables for next time we lex */
9cbb5ea2
GS
565 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
566 = SvPVX(PL_linestr);
3280af22 567 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
568
569 /* Close the filehandle. Could be from -P preprocessor,
570 * STDIN, or a regular file. If we were reading code from
571 * STDIN (because the commandline held no -e or filename)
572 * then we don't close it, we reset it so the code can
573 * read from STDIN too.
574 */
575
3280af22
NIS
576 if (PL_preprocess && !PL_in_eval)
577 (void)PerlProc_pclose(PL_rsfp);
578 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
579 PerlIO_clearerr(PL_rsfp);
8990e307 580 else
3280af22
NIS
581 (void)PerlIO_close(PL_rsfp);
582 PL_rsfp = Nullfp;
463ee0b2
LW
583 return s;
584 }
ffb4593c
NT
585
586 /* not at end of file, so we only read another line */
09bef843
SB
587 /* make corresponding updates to old pointers, for yyerror() */
588 oldprevlen = PL_oldbufptr - PL_bufend;
589 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
590 if (PL_last_uni)
591 oldunilen = PL_last_uni - PL_bufend;
592 if (PL_last_lop)
593 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
594 PL_linestart = PL_bufptr = s + prevlen;
595 PL_bufend = s + SvCUR(PL_linestr);
596 s = PL_bufptr;
09bef843
SB
597 PL_oldbufptr = s + oldprevlen;
598 PL_oldoldbufptr = s + oldoldprevlen;
599 if (PL_last_uni)
600 PL_last_uni = s + oldunilen;
601 if (PL_last_lop)
602 PL_last_lop = s + oldloplen;
a0d0e21e 603 incline(s);
ffb4593c
NT
604
605 /* debugger active and we're not compiling the debugger code,
606 * so store the line into the debugger's array of lines
607 */
3280af22 608 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
609 SV *sv = NEWSV(85,0);
610
611 sv_upgrade(sv, SVt_PVMG);
3280af22 612 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 613 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 614 }
463ee0b2 615 }
a687059c 616}
378cc40b 617
ffb4593c
NT
618/*
619 * S_check_uni
620 * Check the unary operators to ensure there's no ambiguity in how they're
621 * used. An ambiguous piece of code would be:
622 * rand + 5
623 * This doesn't mean rand() + 5. Because rand() is a unary operator,
624 * the +5 is its argument.
625 */
626
76e3520e 627STATIC void
cea2e8a9 628S_check_uni(pTHX)
ba106d47 629{
2f3197b3 630 char *s;
a0d0e21e 631 char *t;
0453d815 632 dTHR;
2f3197b3 633
3280af22 634 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 635 return;
3280af22
NIS
636 while (isSPACE(*PL_last_uni))
637 PL_last_uni++;
7e2040f0 638 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 639 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 640 return;
0453d815 641 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 642 char ch = *s;
0453d815
PM
643 *s = '\0';
644 Perl_warner(aTHX_ WARN_AMBIGUOUS,
645 "Warning: Use of \"%s\" without parens is ambiguous",
646 PL_last_uni);
647 *s = ch;
648 }
2f3197b3
LW
649}
650
ffb4593c
NT
651/* workaround to replace the UNI() macro with a function. Only the
652 * hints/uts.sh file mentions this. Other comments elsewhere in the
653 * source indicate Microport Unix might need it too.
654 */
655
ffed7fef
LW
656#ifdef CRIPPLED_CC
657
658#undef UNI
ffed7fef 659#define UNI(f) return uni(f,s)
ffed7fef 660
76e3520e 661STATIC int
cea2e8a9 662S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
663{
664 yylval.ival = f;
3280af22
NIS
665 PL_expect = XTERM;
666 PL_bufptr = s;
8f872242
NIS
667 PL_last_uni = PL_oldbufptr;
668 PL_last_lop_op = f;
ffed7fef
LW
669 if (*s == '(')
670 return FUNC1;
671 s = skipspace(s);
672 if (*s == '(')
673 return FUNC1;
674 else
675 return UNIOP;
676}
677
a0d0e21e
LW
678#endif /* CRIPPLED_CC */
679
ffb4593c
NT
680/*
681 * LOP : macro to build a list operator. Its behaviour has been replaced
682 * with a subroutine, S_lop() for which LOP is just another name.
683 */
684
a0d0e21e
LW
685#define LOP(f,x) return lop(f,x,s)
686
ffb4593c
NT
687/*
688 * S_lop
689 * Build a list operator (or something that might be one). The rules:
690 * - if we have a next token, then it's a list operator [why?]
691 * - if the next thing is an opening paren, then it's a function
692 * - else it's a list operator
693 */
694
76e3520e 695STATIC I32
a0be28da 696S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 697{
0f15f207 698 dTHR;
79072805 699 yylval.ival = f;
35c8bce7 700 CLINE;
3280af22
NIS
701 PL_expect = x;
702 PL_bufptr = s;
703 PL_last_lop = PL_oldbufptr;
704 PL_last_lop_op = f;
705 if (PL_nexttoke)
a0d0e21e 706 return LSTOP;
79072805
LW
707 if (*s == '(')
708 return FUNC;
709 s = skipspace(s);
710 if (*s == '(')
711 return FUNC;
712 else
713 return LSTOP;
714}
715
ffb4593c
NT
716/*
717 * S_force_next
9cbb5ea2 718 * When the lexer realizes it knows the next token (for instance,
ffb4593c 719 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
720 * to know what token to return the next time the lexer is called. Caller
721 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
722 * handles the token correctly.
ffb4593c
NT
723 */
724
76e3520e 725STATIC void
cea2e8a9 726S_force_next(pTHX_ I32 type)
79072805 727{
3280af22
NIS
728 PL_nexttype[PL_nexttoke] = type;
729 PL_nexttoke++;
730 if (PL_lex_state != LEX_KNOWNEXT) {
731 PL_lex_defer = PL_lex_state;
732 PL_lex_expect = PL_expect;
733 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
734 }
735}
736
ffb4593c
NT
737/*
738 * S_force_word
739 * When the lexer knows the next thing is a word (for instance, it has
740 * just seen -> and it knows that the next char is a word char, then
741 * it calls S_force_word to stick the next word into the PL_next lookahead.
742 *
743 * Arguments:
b1b65b59 744 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
745 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
746 * int check_keyword : if true, Perl checks to make sure the word isn't
747 * a keyword (do this if the word is a label, e.g. goto FOO)
748 * int allow_pack : if true, : characters will also be allowed (require,
749 * use, etc. do this)
9cbb5ea2 750 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
751 */
752
76e3520e 753STATIC char *
cea2e8a9 754S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 755{
463ee0b2
LW
756 register char *s;
757 STRLEN len;
758
759 start = skipspace(start);
760 s = start;
7e2040f0 761 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 762 (allow_pack && *s == ':') ||
15f0808c 763 (allow_initial_tick && *s == '\'') )
a0d0e21e 764 {
3280af22
NIS
765 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
766 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
767 return start;
768 if (token == METHOD) {
769 s = skipspace(s);
770 if (*s == '(')
3280af22 771 PL_expect = XTERM;
463ee0b2 772 else {
3280af22 773 PL_expect = XOPERATOR;
463ee0b2 774 }
79072805 775 }
3280af22
NIS
776 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
777 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
778 force_next(token);
779 }
780 return s;
781}
782
ffb4593c
NT
783/*
784 * S_force_ident
9cbb5ea2 785 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
786 * text only contains the "foo" portion. The first argument is a pointer
787 * to the "foo", and the second argument is the type symbol to prefix.
788 * Forces the next token to be a "WORD".
9cbb5ea2 789 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
790 */
791
76e3520e 792STATIC void
cea2e8a9 793S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
794{
795 if (s && *s) {
11343788 796 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 797 PL_nextval[PL_nexttoke].opval = o;
79072805 798 force_next(WORD);
748a9306 799 if (kind) {
e858de61 800 dTHR; /* just for in_eval */
11343788 801 o->op_private = OPpCONST_ENTERED;
55497cff
PP
802 /* XXX see note in pp_entereval() for why we forgo typo
803 warnings if the symbol must be introduced in an eval.
804 GSAR 96-10-12 */
3280af22 805 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
806 kind == '$' ? SVt_PV :
807 kind == '@' ? SVt_PVAV :
808 kind == '%' ? SVt_PVHV :
809 SVt_PVGV
810 );
748a9306 811 }
79072805
LW
812 }
813}
814
ffb4593c
NT
815/*
816 * S_force_version
817 * Forces the next token to be a version number.
818 */
819
76e3520e 820STATIC char *
cea2e8a9 821S_force_version(pTHX_ char *s)
89bfa8cd
PP
822{
823 OP *version = Nullop;
44dcb63b 824 char *d;
89bfa8cd
PP
825
826 s = skipspace(s);
827
44dcb63b 828 d = s;
dd629d5b 829 if (*d == 'v')
44dcb63b 830 d++;
44dcb63b 831 if (isDIGIT(*d)) {
a7cb1f99 832 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 833 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 834 SV *ver;
89bfa8cd
PP
835 s = scan_num(s);
836 /* real VERSION number -- GBARR */
837 version = yylval.opval;
dd629d5b
GS
838 ver = cSVOPx(version)->op_sv;
839 if (SvPOK(ver) && !SvNIOK(ver)) {
44dcb63b
GS
840 SvUPGRADE(ver, SVt_PVIV);
841 SvIOKp_on(ver); /* hint that it is a version */
842 }
89bfa8cd
PP
843 }
844 }
845
846 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 847 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd
PP
848 force_next(WORD);
849
850 return (s);
851}
852
ffb4593c
NT
853/*
854 * S_tokeq
855 * Tokenize a quoted string passed in as an SV. It finds the next
856 * chunk, up to end of string or a backslash. It may make a new
857 * SV containing that chunk (if HINT_NEW_STRING is on). It also
858 * turns \\ into \.
859 */
860
76e3520e 861STATIC SV *
cea2e8a9 862S_tokeq(pTHX_ SV *sv)
79072805
LW
863{
864 register char *s;
865 register char *send;
866 register char *d;
b3ac6de7
IZ
867 STRLEN len = 0;
868 SV *pv = sv;
79072805
LW
869
870 if (!SvLEN(sv))
b3ac6de7 871 goto finish;
79072805 872
a0d0e21e 873 s = SvPV_force(sv, len);
748a9306 874 if (SvIVX(sv) == -1)
b3ac6de7 875 goto finish;
463ee0b2 876 send = s + len;
79072805
LW
877 while (s < send && *s != '\\')
878 s++;
879 if (s == send)
b3ac6de7 880 goto finish;
79072805 881 d = s;
3280af22 882 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 883 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
884 while (s < send) {
885 if (*s == '\\') {
a0d0e21e 886 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
887 s++; /* all that, just for this */
888 }
889 *d++ = *s++;
890 }
891 *d = '\0';
463ee0b2 892 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 893 finish:
3280af22 894 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 895 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
896 return sv;
897}
898
ffb4593c
NT
899/*
900 * Now come three functions related to double-quote context,
901 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
902 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
903 * interact with PL_lex_state, and create fake ( ... ) argument lists
904 * to handle functions and concatenation.
905 * They assume that whoever calls them will be setting up a fake
906 * join call, because each subthing puts a ',' after it. This lets
907 * "lower \luPpEr"
908 * become
909 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
910 *
911 * (I'm not sure whether the spurious commas at the end of lcfirst's
912 * arguments and join's arguments are created or not).
913 */
914
915/*
916 * S_sublex_start
917 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
918 *
919 * Pattern matching will set PL_lex_op to the pattern-matching op to
920 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
921 *
922 * OP_CONST and OP_READLINE are easy--just make the new op and return.
923 *
924 * Everything else becomes a FUNC.
925 *
926 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
927 * had an OP_CONST or OP_READLINE). This just sets us up for a
928 * call to S_sublex_push().
929 */
930
76e3520e 931STATIC I32
cea2e8a9 932S_sublex_start(pTHX)
79072805
LW
933{
934 register I32 op_type = yylval.ival;
79072805
LW
935
936 if (op_type == OP_NULL) {
3280af22
NIS
937 yylval.opval = PL_lex_op;
938 PL_lex_op = Nullop;
79072805
LW
939 return THING;
940 }
941 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 942 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
943
944 if (SvTYPE(sv) == SVt_PVIV) {
945 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
946 STRLEN len;
947 char *p;
948 SV *nsv;
949
950 p = SvPV(sv, len);
79cb57f6 951 nsv = newSVpvn(p, len);
b3ac6de7
IZ
952 SvREFCNT_dec(sv);
953 sv = nsv;
954 }
955 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 956 PL_lex_stuff = Nullsv;
79072805
LW
957 return THING;
958 }
959
3280af22
NIS
960 PL_sublex_info.super_state = PL_lex_state;
961 PL_sublex_info.sub_inwhat = op_type;
962 PL_sublex_info.sub_op = PL_lex_op;
963 PL_lex_state = LEX_INTERPPUSH;
55497cff 964
3280af22
NIS
965 PL_expect = XTERM;
966 if (PL_lex_op) {
967 yylval.opval = PL_lex_op;
968 PL_lex_op = Nullop;
55497cff
PP
969 return PMFUNC;
970 }
971 else
972 return FUNC;
973}
974
ffb4593c
NT
975/*
976 * S_sublex_push
977 * Create a new scope to save the lexing state. The scope will be
978 * ended in S_sublex_done. Returns a '(', starting the function arguments
979 * to the uc, lc, etc. found before.
980 * Sets PL_lex_state to LEX_INTERPCONCAT.
981 */
982
76e3520e 983STATIC I32
cea2e8a9 984S_sublex_push(pTHX)
55497cff 985{
0f15f207 986 dTHR;
f46d017c 987 ENTER;
55497cff 988
3280af22
NIS
989 PL_lex_state = PL_sublex_info.super_state;
990 SAVEI32(PL_lex_dojoin);
991 SAVEI32(PL_lex_brackets);
3280af22
NIS
992 SAVEI32(PL_lex_casemods);
993 SAVEI32(PL_lex_starts);
994 SAVEI32(PL_lex_state);
7766f137 995 SAVEVPTR(PL_lex_inpat);
3280af22 996 SAVEI32(PL_lex_inwhat);
57843af0 997 SAVECOPLINE(PL_curcop);
3280af22
NIS
998 SAVEPPTR(PL_bufptr);
999 SAVEPPTR(PL_oldbufptr);
1000 SAVEPPTR(PL_oldoldbufptr);
1001 SAVEPPTR(PL_linestart);
1002 SAVESPTR(PL_linestr);
1003 SAVEPPTR(PL_lex_brackstack);
1004 SAVEPPTR(PL_lex_casestack);
1005
1006 PL_linestr = PL_lex_stuff;
1007 PL_lex_stuff = Nullsv;
1008
9cbb5ea2
GS
1009 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1010 = SvPVX(PL_linestr);
3280af22
NIS
1011 PL_bufend += SvCUR(PL_linestr);
1012 SAVEFREESV(PL_linestr);
1013
1014 PL_lex_dojoin = FALSE;
1015 PL_lex_brackets = 0;
3280af22
NIS
1016 New(899, PL_lex_brackstack, 120, char);
1017 New(899, PL_lex_casestack, 12, char);
1018 SAVEFREEPV(PL_lex_brackstack);
1019 SAVEFREEPV(PL_lex_casestack);
1020 PL_lex_casemods = 0;
1021 *PL_lex_casestack = '\0';
1022 PL_lex_starts = 0;
1023 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1024 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1025
1026 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1027 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1028 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1029 else
3280af22 1030 PL_lex_inpat = Nullop;
79072805 1031
55497cff 1032 return '(';
79072805
LW
1033}
1034
ffb4593c
NT
1035/*
1036 * S_sublex_done
1037 * Restores lexer state after a S_sublex_push.
1038 */
1039
76e3520e 1040STATIC I32
cea2e8a9 1041S_sublex_done(pTHX)
79072805 1042{
3280af22
NIS
1043 if (!PL_lex_starts++) {
1044 PL_expect = XOPERATOR;
79cb57f6 1045 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1046 return THING;
1047 }
1048
3280af22
NIS
1049 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1050 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1051 return yylex();
79072805
LW
1052 }
1053
ffb4593c 1054 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1055 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1056 PL_linestr = PL_lex_repl;
1057 PL_lex_inpat = 0;
1058 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1059 PL_bufend += SvCUR(PL_linestr);
1060 SAVEFREESV(PL_linestr);
1061 PL_lex_dojoin = FALSE;
1062 PL_lex_brackets = 0;
3280af22
NIS
1063 PL_lex_casemods = 0;
1064 *PL_lex_casestack = '\0';
1065 PL_lex_starts = 0;
25da4f38 1066 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1067 PL_lex_state = LEX_INTERPNORMAL;
1068 PL_lex_starts++;
e9fa98b2
HS
1069 /* we don't clear PL_lex_repl here, so that we can check later
1070 whether this is an evalled subst; that means we rely on the
1071 logic to ensure sublex_done() is called again only via the
1072 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1073 }
e9fa98b2 1074 else {
3280af22 1075 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1076 PL_lex_repl = Nullsv;
1077 }
79072805 1078 return ',';
ffed7fef
LW
1079 }
1080 else {
f46d017c 1081 LEAVE;
3280af22
NIS
1082 PL_bufend = SvPVX(PL_linestr);
1083 PL_bufend += SvCUR(PL_linestr);
1084 PL_expect = XOPERATOR;
09bef843 1085 PL_sublex_info.sub_inwhat = 0;
79072805 1086 return ')';
ffed7fef
LW
1087 }
1088}
1089
02aa26ce
NT
1090/*
1091 scan_const
1092
1093 Extracts a pattern, double-quoted string, or transliteration. This
1094 is terrifying code.
1095
3280af22
NIS
1096 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1097 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1098 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1099
9b599b2a
GS
1100 Returns a pointer to the character scanned up to. Iff this is
1101 advanced from the start pointer supplied (ie if anything was
1102 successfully parsed), will leave an OP for the substring scanned
1103 in yylval. Caller must intuit reason for not parsing further
1104 by looking at the next characters herself.
1105
02aa26ce
NT
1106 In patterns:
1107 backslashes:
1108 double-quoted style: \r and \n
1109 regexp special ones: \D \s
1110 constants: \x3
1111 backrefs: \1 (deprecated in substitution replacements)
1112 case and quoting: \U \Q \E
1113 stops on @ and $, but not for $ as tail anchor
1114
1115 In transliterations:
1116 characters are VERY literal, except for - not at the start or end
1117 of the string, which indicates a range. scan_const expands the
1118 range to the full set of intermediate characters.
1119
1120 In double-quoted strings:
1121 backslashes:
1122 double-quoted style: \r and \n
1123 constants: \x3
1124 backrefs: \1 (deprecated)
1125 case and quoting: \U \Q \E
1126 stops on @ and $
1127
1128 scan_const does *not* construct ops to handle interpolated strings.
1129 It stops processing as soon as it finds an embedded $ or @ variable
1130 and leaves it to the caller to work out what's going on.
1131
1132 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1133
1134 $ in pattern could be $foo or could be tail anchor. Assumption:
1135 it's a tail anchor if $ is the last thing in the string, or if it's
1136 followed by one of ")| \n\t"
1137
1138 \1 (backreferences) are turned into $1
1139
1140 The structure of the code is
1141 while (there's a character to process) {
1142 handle transliteration ranges
1143 skip regexp comments
1144 skip # initiated comments in //x patterns
1145 check for embedded @foo
1146 check for embedded scalars
1147 if (backslash) {
1148 leave intact backslashes from leave (below)
1149 deprecate \1 in strings and sub replacements
1150 handle string-changing backslashes \l \U \Q \E, etc.
1151 switch (what was escaped) {
1152 handle - in a transliteration (becomes a literal -)
1153 handle \132 octal characters
1154 handle 0x15 hex characters
1155 handle \cV (control V)
1156 handle printf backslashes (\f, \r, \n, etc)
1157 } (end switch)
1158 } (end if backslash)
1159 } (end while character to read)
1160
1161*/
1162
76e3520e 1163STATIC char *
cea2e8a9 1164S_scan_const(pTHX_ char *start)
79072805 1165{
3280af22 1166 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1167 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1168 register char *s = start; /* start of the constant */
1169 register char *d = SvPVX(sv); /* destination for copies */
1170 bool dorange = FALSE; /* are we in a translit range? */
7e2040f0 1171 bool has_utf = FALSE; /* embedded \x{} */
02aa26ce 1172 I32 len; /* ? */
012bcf8d
GS
1173 UV uv;
1174
ac2262e3 1175 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1176 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1177 : UTF;
ac2262e3 1178 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1179 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1180 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1181 : UTF;
dff6d3cd 1182 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1183 PL_lex_inpat
4a2d328f 1184 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1185 : "";
79072805
LW
1186
1187 while (s < send || dorange) {
02aa26ce 1188 /* get transliterations out of the way (they're most literal) */
3280af22 1189 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1190 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1191 if (dorange) {
02aa26ce 1192 I32 i; /* current expanded character */
8ada0baa 1193 I32 min; /* first character in range */
02aa26ce
NT
1194 I32 max; /* last character in range */
1195
1196 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1197 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1198 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1199 d -= 2; /* eat the first char and the - */
1200
8ada0baa
JH
1201 min = (U8)*d; /* first char in range */
1202 max = (U8)d[1]; /* last char in range */
1203
1204#ifndef ASCIIish
1205 if ((isLOWER(min) && isLOWER(max)) ||
1206 (isUPPER(min) && isUPPER(max))) {
1207 if (isLOWER(min)) {
1208 for (i = min; i <= max; i++)
1209 if (isLOWER(i))
1210 *d++ = i;
1211 } else {
1212 for (i = min; i <= max; i++)
1213 if (isUPPER(i))
1214 *d++ = i;
1215 }
1216 }
1217 else
1218#endif
1219 for (i = min; i <= max; i++)
1220 *d++ = i;
02aa26ce
NT
1221
1222 /* mark the range as done, and continue */
79072805
LW
1223 dorange = FALSE;
1224 continue;
1225 }
02aa26ce
NT
1226
1227 /* range begins (ignore - as first or last char) */
79072805 1228 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1229 if (utf) {
a176fa2a 1230 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1231 s++;
1232 continue;
1233 }
79072805
LW
1234 dorange = TRUE;
1235 s++;
1236 }
1237 }
02aa26ce
NT
1238
1239 /* if we get here, we're not doing a transliteration */
1240
0f5d15d6
IZ
1241 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1242 except for the last char, which will be done separately. */
3280af22 1243 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1244 if (s[2] == '#') {
1245 while (s < send && *s != ')')
1246 *d++ = *s++;
0f5d15d6
IZ
1247 } else if (s[2] == '{'
1248 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1249 I32 count = 1;
0f5d15d6 1250 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1251 char c;
1252
d9f97599
GS
1253 while (count && (c = *regparse)) {
1254 if (c == '\\' && regparse[1])
1255 regparse++;
cc6b7395
IZ
1256 else if (c == '{')
1257 count++;
1258 else if (c == '}')
1259 count--;
d9f97599 1260 regparse++;
cc6b7395 1261 }
5bdf89e7
IZ
1262 if (*regparse != ')') {
1263 regparse--; /* Leave one char for continuation. */
cc6b7395 1264 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1265 }
0f5d15d6 1266 while (s < regparse)
cc6b7395
IZ
1267 *d++ = *s++;
1268 }
748a9306 1269 }
02aa26ce
NT
1270
1271 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1272 else if (*s == '#' && PL_lex_inpat &&
1273 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1274 while (s+1 < send && *s != '\n')
1275 *d++ = *s++;
1276 }
02aa26ce
NT
1277
1278 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
7e2040f0
GS
1279 else if (*s == '@' && s[1]
1280 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$", s[1])))
79072805 1281 break;
02aa26ce
NT
1282
1283 /* check for embedded scalars. only stop if we're sure it's a
1284 variable.
1285 */
79072805 1286 else if (*s == '$') {
3280af22 1287 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1288 break;
c277df42 1289 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1290 break; /* in regexp, $ might be tail anchor */
1291 }
02aa26ce 1292
a0ed51b3
LW
1293 /* (now in tr/// code again) */
1294
d008e5eb 1295 if (*s & 0x80 && thisutf) {
012bcf8d
GS
1296 (void)utf8_to_uv((U8*)s, &len);
1297 if (len == 1) {
1298 /* illegal UTF8, make it valid */
012bcf8d 1299 char *old_pvx = SvPVX(sv);
bee7c419
GS
1300 /* need space for one extra char (NOTE: SvCUR() not set here) */
1301 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
012bcf8d
GS
1302 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1303 }
1304 else {
1305 while (len--)
1306 *d++ = *s++;
1307 }
1308 has_utf = TRUE;
1309 continue;
a0ed51b3
LW
1310 }
1311
02aa26ce 1312 /* backslashes */
79072805
LW
1313 if (*s == '\\' && s+1 < send) {
1314 s++;
02aa26ce
NT
1315
1316 /* some backslashes we leave behind */
c9f97d15 1317 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1318 *d++ = '\\';
1319 *d++ = *s++;
1320 continue;
1321 }
02aa26ce
NT
1322
1323 /* deprecate \1 in strings and substitution replacements */
3280af22 1324 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1325 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1326 {
d008e5eb 1327 dTHR; /* only for ckWARN */
599cee73 1328 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1329 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1330 *--s = '$';
1331 break;
1332 }
02aa26ce
NT
1333
1334 /* string-change backslash escapes */
3280af22 1335 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1336 --s;
1337 break;
1338 }
02aa26ce
NT
1339
1340 /* if we get here, it's either a quoted -, or a digit */
79072805 1341 switch (*s) {
02aa26ce
NT
1342
1343 /* quoted - in transliterations */
79072805 1344 case '-':
3280af22 1345 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1346 *d++ = *s++;
1347 continue;
1348 }
1349 /* FALL THROUGH */
1350 default:
11b8faa4
JH
1351 {
1352 dTHR;
e476b1b5
GS
1353 if (ckWARN(WARN_MISC) && isALPHA(*s))
1354 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1355 "Unrecognized escape \\%c passed through",
1356 *s);
1357 /* default action is to copy the quoted character */
1358 *d++ = *s++;
1359 continue;
1360 }
02aa26ce
NT
1361
1362 /* \132 indicates an octal constant */
79072805
LW
1363 case '0': case '1': case '2': case '3':
1364 case '4': case '5': case '6': case '7':
012bcf8d 1365 uv = (UV)scan_oct(s, 3, &len);
79072805 1366 s += len;
012bcf8d 1367 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1368
1369 /* \x24 indicates a hex constant */
79072805 1370 case 'x':
a0ed51b3
LW
1371 ++s;
1372 if (*s == '{') {
1373 char* e = strchr(s, '}');
adaeee49 1374 if (!e) {
a0ed51b3 1375 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1376 e = s;
1377 }
012bcf8d
GS
1378 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1379 s = e + 1;
a0ed51b3
LW
1380 }
1381 else {
012bcf8d
GS
1382 uv = (UV)scan_hex(s, 2, &len);
1383 s += len;
1384 }
1385
1386 NUM_ESCAPE_INSERT:
1387 /* Insert oct or hex escaped character.
1388 * There will always enough room in sv since such escapes will
1389 * be longer than any utf8 sequence they can end up as
1390 */
1391 if (uv > 127) {
1392 if (!thisutf && !has_utf && uv > 255) {
1393 /* might need to recode whatever we have accumulated so far
1394 * if it contains any hibit chars
1395 */
1396 int hicount = 0;
1397 char *c;
1398 for (c = SvPVX(sv); c < d; c++) {
1399 if (*c & 0x80)
1400 hicount++;
1401 }
1402 if (hicount) {
1403 char *old_pvx = SvPVX(sv);
1404 char *src, *dst;
1405 d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
1406
1407 src = d - 1;
1408 d += hicount;
1409 dst = d - 1;
1410
1411 while (src < dst) {
1412 if (*src & 0x80) {
1413 dst--;
1414 uv_to_utf8((U8*)dst, (U8)*src--);
1415 dst--;
1416 }
1417 else {
1418 *dst-- = *src--;
1419 }
1420 }
1421 }
1422 }
1423
1424 if (thisutf || uv > 255) {
1425 d = (char*)uv_to_utf8((U8*)d, uv);
7e2040f0 1426 has_utf = TRUE;
012bcf8d 1427 }
a0ed51b3 1428 else {
012bcf8d 1429 *d++ = (char)uv;
a0ed51b3 1430 }
012bcf8d
GS
1431 }
1432 else {
1433 *d++ = (char)uv;
a0ed51b3 1434 }
79072805 1435 continue;
02aa26ce 1436
4a2d328f
IZ
1437 /* \N{latin small letter a} is a named character */
1438 case 'N':
423cee85
JH
1439 ++s;
1440 if (*s == '{') {
1441 char* e = strchr(s, '}');
1442 HV *hv;
1443 SV **svp;
1444 SV *res, *cv;
1445 STRLEN len;
1446 char *str;
1447 char *why = Nullch;
1448
1449 if (!e) {
5777a3f7 1450 yyerror("Missing right brace on \\N{}");
423cee85
JH
1451 e = s - 1;
1452 goto cont_scan;
1453 }
1454 res = newSVpvn(s + 1, e - s - 1);
1455 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1456 res, Nullsv, "\\N{...}" );
423cee85
JH
1457 str = SvPV(res,len);
1458 if (len > e - s + 4) {
1459 char *odest = SvPVX(sv);
1460
1461 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1462 d = SvPVX(sv) + (d - odest);
1463 }
1464 Copy(str, d, len, char);
1465 d += len;
1466 SvREFCNT_dec(res);
1467 cont_scan:
1468 s = e + 1;
1469 }
1470 else
5777a3f7 1471 yyerror("Missing braces on \\N{}");
423cee85
JH
1472 continue;
1473
02aa26ce 1474 /* \c is a control character */
79072805
LW
1475 case 'c':
1476 s++;
9d116dd7
JH
1477#ifdef EBCDIC
1478 *d = *s++;
1479 if (isLOWER(*d))
1480 *d = toUPPER(*d);
1481 *d++ = toCTRL(*d);
1482#else
bbce6d69
PP
1483 len = *s++;
1484 *d++ = toCTRL(len);
9d116dd7 1485#endif
79072805 1486 continue;
02aa26ce
NT
1487
1488 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1489 case 'b':
1490 *d++ = '\b';
1491 break;
1492 case 'n':
1493 *d++ = '\n';
1494 break;
1495 case 'r':
1496 *d++ = '\r';
1497 break;
1498 case 'f':
1499 *d++ = '\f';
1500 break;
1501 case 't':
1502 *d++ = '\t';
1503 break;
34a3fe2a
PP
1504#ifdef EBCDIC
1505 case 'e':
1506 *d++ = '\047'; /* CP 1047 */
1507 break;
1508 case 'a':
1509 *d++ = '\057'; /* CP 1047 */
1510 break;
1511#else
79072805
LW
1512 case 'e':
1513 *d++ = '\033';
1514 break;
1515 case 'a':
1516 *d++ = '\007';
1517 break;
34a3fe2a 1518#endif
02aa26ce
NT
1519 } /* end switch */
1520
79072805
LW
1521 s++;
1522 continue;
02aa26ce
NT
1523 } /* end if (backslash) */
1524
79072805 1525 *d++ = *s++;
02aa26ce
NT
1526 } /* while loop to process each character */
1527
1528 /* terminate the string and set up the sv */
79072805 1529 *d = '\0';
463ee0b2 1530 SvCUR_set(sv, d - SvPVX(sv));
79072805 1531 SvPOK_on(sv);
7e2040f0
GS
1532 if (has_utf)
1533 SvUTF8_on(sv);
79072805 1534
02aa26ce 1535 /* shrink the sv if we allocated more than we used */
79072805
LW
1536 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1537 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1538 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1539 }
02aa26ce 1540
9b599b2a 1541 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1542 if (s > PL_bufptr) {
1543 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1544 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1545 sv, Nullsv,
3280af22 1546 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1547 ? "tr"
3280af22 1548 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1549 ? "s"
1550 : "qq")));
79072805 1551 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1552 } else
8990e307 1553 SvREFCNT_dec(sv);
79072805
LW
1554 return s;
1555}
1556
ffb4593c
NT
1557/* S_intuit_more
1558 * Returns TRUE if there's more to the expression (e.g., a subscript),
1559 * FALSE otherwise.
ffb4593c
NT
1560 *
1561 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1562 *
1563 * ->[ and ->{ return TRUE
1564 * { and [ outside a pattern are always subscripts, so return TRUE
1565 * if we're outside a pattern and it's not { or [, then return FALSE
1566 * if we're in a pattern and the first char is a {
1567 * {4,5} (any digits around the comma) returns FALSE
1568 * if we're in a pattern and the first char is a [
1569 * [] returns FALSE
1570 * [SOMETHING] has a funky algorithm to decide whether it's a
1571 * character class or not. It has to deal with things like
1572 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1573 * anything else returns TRUE
1574 */
1575
9cbb5ea2
GS
1576/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1577
76e3520e 1578STATIC int
cea2e8a9 1579S_intuit_more(pTHX_ register char *s)
79072805 1580{
3280af22 1581 if (PL_lex_brackets)
79072805
LW
1582 return TRUE;
1583 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1584 return TRUE;
1585 if (*s != '{' && *s != '[')
1586 return FALSE;
3280af22 1587 if (!PL_lex_inpat)
79072805
LW
1588 return TRUE;
1589
1590 /* In a pattern, so maybe we have {n,m}. */
1591 if (*s == '{') {
1592 s++;
1593 if (!isDIGIT(*s))
1594 return TRUE;
1595 while (isDIGIT(*s))
1596 s++;
1597 if (*s == ',')
1598 s++;
1599 while (isDIGIT(*s))
1600 s++;
1601 if (*s == '}')
1602 return FALSE;
1603 return TRUE;
1604
1605 }
1606
1607 /* On the other hand, maybe we have a character class */
1608
1609 s++;
1610 if (*s == ']' || *s == '^')
1611 return FALSE;
1612 else {
ffb4593c 1613 /* this is terrifying, and it works */
79072805
LW
1614 int weight = 2; /* let's weigh the evidence */
1615 char seen[256];
f27ffc4a 1616 unsigned char un_char = 255, last_un_char;
93a17b20 1617 char *send = strchr(s,']');
3280af22 1618 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1619
1620 if (!send) /* has to be an expression */
1621 return TRUE;
1622
1623 Zero(seen,256,char);
1624 if (*s == '$')
1625 weight -= 3;
1626 else if (isDIGIT(*s)) {
1627 if (s[1] != ']') {
1628 if (isDIGIT(s[1]) && s[2] == ']')
1629 weight -= 10;
1630 }
1631 else
1632 weight -= 100;
1633 }
1634 for (; s < send; s++) {
1635 last_un_char = un_char;
1636 un_char = (unsigned char)*s;
1637 switch (*s) {
1638 case '@':
1639 case '&':
1640 case '$':
1641 weight -= seen[un_char] * 10;
7e2040f0 1642 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1643 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1644 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1645 weight -= 100;
1646 else
1647 weight -= 10;
1648 }
1649 else if (*s == '$' && s[1] &&
93a17b20
LW
1650 strchr("[#!%*<>()-=",s[1])) {
1651 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1652 weight -= 10;
1653 else
1654 weight -= 1;
1655 }
1656 break;
1657 case '\\':
1658 un_char = 254;
1659 if (s[1]) {
93a17b20 1660 if (strchr("wds]",s[1]))
79072805
LW
1661 weight += 100;
1662 else if (seen['\''] || seen['"'])
1663 weight += 1;
93a17b20 1664 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1665 weight += 40;
1666 else if (isDIGIT(s[1])) {
1667 weight += 40;
1668 while (s[1] && isDIGIT(s[1]))
1669 s++;
1670 }
1671 }
1672 else
1673 weight += 100;
1674 break;
1675 case '-':
1676 if (s[1] == '\\')
1677 weight += 50;
93a17b20 1678 if (strchr("aA01! ",last_un_char))
79072805 1679 weight += 30;
93a17b20 1680 if (strchr("zZ79~",s[1]))
79072805 1681 weight += 30;
f27ffc4a
GS
1682 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1683 weight -= 5; /* cope with negative subscript */
79072805
LW
1684 break;
1685 default:
93a17b20 1686 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1687 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1688 char *d = tmpbuf;
1689 while (isALPHA(*s))
1690 *d++ = *s++;
1691 *d = '\0';
1692 if (keyword(tmpbuf, d - tmpbuf))
1693 weight -= 150;
1694 }
1695 if (un_char == last_un_char + 1)
1696 weight += 5;
1697 weight -= seen[un_char];
1698 break;
1699 }
1700 seen[un_char]++;
1701 }
1702 if (weight >= 0) /* probably a character class */
1703 return FALSE;
1704 }
1705
1706 return TRUE;
1707}
ffed7fef 1708
ffb4593c
NT
1709/*
1710 * S_intuit_method
1711 *
1712 * Does all the checking to disambiguate
1713 * foo bar
1714 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1715 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1716 *
1717 * First argument is the stuff after the first token, e.g. "bar".
1718 *
1719 * Not a method if bar is a filehandle.
1720 * Not a method if foo is a subroutine prototyped to take a filehandle.
1721 * Not a method if it's really "Foo $bar"
1722 * Method if it's "foo $bar"
1723 * Not a method if it's really "print foo $bar"
1724 * Method if it's really "foo package::" (interpreted as package->foo)
1725 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1726 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1727 * =>
1728 */
1729
76e3520e 1730STATIC int
cea2e8a9 1731S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1732{
1733 char *s = start + (*start == '$');
3280af22 1734 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1735 STRLEN len;
1736 GV* indirgv;
1737
1738 if (gv) {
b6c543e3 1739 CV *cv;
a0d0e21e
LW
1740 if (GvIO(gv))
1741 return 0;
b6c543e3
IZ
1742 if ((cv = GvCVu(gv))) {
1743 char *proto = SvPVX(cv);
1744 if (proto) {
1745 if (*proto == ';')
1746 proto++;
1747 if (*proto == '*')
1748 return 0;
1749 }
1750 } else
a0d0e21e
LW
1751 gv = 0;
1752 }
8903cb82 1753 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1754 /* start is the beginning of the possible filehandle/object,
1755 * and s is the end of it
1756 * tmpbuf is a copy of it
1757 */
1758
a0d0e21e 1759 if (*start == '$') {
3280af22 1760 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1761 return 0;
1762 s = skipspace(s);
3280af22
NIS
1763 PL_bufptr = start;
1764 PL_expect = XREF;
a0d0e21e
LW
1765 return *s == '(' ? FUNCMETH : METHOD;
1766 }
1767 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1768 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1769 len -= 2;
1770 tmpbuf[len] = '\0';
1771 goto bare_package;
1772 }
1773 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1774 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1775 return 0;
1776 /* filehandle or package name makes it a method */
89bfa8cd 1777 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1778 s = skipspace(s);
3280af22 1779 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1780 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1781 bare_package:
3280af22 1782 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1783 newSVpvn(tmpbuf,len));
3280af22
NIS
1784 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1785 PL_expect = XTERM;
a0d0e21e 1786 force_next(WORD);
3280af22 1787 PL_bufptr = s;
a0d0e21e
LW
1788 return *s == '(' ? FUNCMETH : METHOD;
1789 }
1790 }
1791 return 0;
1792}
1793
ffb4593c
NT
1794/*
1795 * S_incl_perldb
1796 * Return a string of Perl code to load the debugger. If PERL5DB
1797 * is set, it will return the contents of that, otherwise a
1798 * compile-time require of perl5db.pl.
1799 */
1800
76e3520e 1801STATIC char*
cea2e8a9 1802S_incl_perldb(pTHX)
a0d0e21e 1803{
3280af22 1804 if (PL_perldb) {
76e3520e 1805 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1806
1807 if (pdb)
1808 return pdb;
61bb5906 1809 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1810 return "BEGIN { require 'perl5db.pl' }";
1811 }
1812 return "";
1813}
1814
1815
16d20bd9
AD
1816/* Encoded script support. filter_add() effectively inserts a
1817 * 'pre-processing' function into the current source input stream.
1818 * Note that the filter function only applies to the current source file
1819 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1820 *
1821 * The datasv parameter (which may be NULL) can be used to pass
1822 * private data to this instance of the filter. The filter function
1823 * can recover the SV using the FILTER_DATA macro and use it to
1824 * store private buffers and state information.
1825 *
1826 * The supplied datasv parameter is upgraded to a PVIO type
e0c19803
GS
1827 * and the IoDIRP field is used to store the function pointer,
1828 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1829 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1830 * private use must be set using malloc'd pointers.
1831 */
16d20bd9
AD
1832
1833SV *
864dbfa3 1834Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1835{
f4c556ac
GS
1836 if (!funcp)
1837 return Nullsv;
1838
3280af22
NIS
1839 if (!PL_rsfp_filters)
1840 PL_rsfp_filters = newAV();
16d20bd9 1841 if (!datasv)
8c52afec 1842 datasv = NEWSV(255,0);
16d20bd9 1843 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1844 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1845 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
e0c19803 1846 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1847 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1848 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1849 av_unshift(PL_rsfp_filters, 1);
1850 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1851 return(datasv);
1852}
1853
1854
1855/* Delete most recently added instance of this filter function. */
a0d0e21e 1856void
864dbfa3 1857Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1858{
e0c19803 1859 SV *datasv;
f4c556ac 1860 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1861 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1862 return;
1863 /* if filter is on top of stack (usual case) just pop it off */
e0c19803
GS
1864 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1865 if (IoDIRP(datasv) == (DIR*)funcp) {
1866 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
1867 IoDIRP(datasv) = (DIR*)NULL;
3280af22 1868 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1869
16d20bd9
AD
1870 return;
1871 }
1872 /* we need to search for the correct entry and clear it */
cea2e8a9 1873 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1874}
1875
1876
1877/* Invoke the n'th filter function for the current rsfp. */
1878I32
864dbfa3 1879Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1880
1881
1882 /* 0 = read one text line */
a0d0e21e 1883{
16d20bd9
AD
1884 filter_t funcp;
1885 SV *datasv = NULL;
e50aee73 1886
3280af22 1887 if (!PL_rsfp_filters)
16d20bd9 1888 return -1;
3280af22 1889 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1890 /* Provide a default input filter to make life easy. */
1891 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1892 DEBUG_P(PerlIO_printf(Perl_debug_log,
1893 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1894 if (maxlen) {
1895 /* Want a block */
1896 int len ;
1897 int old_len = SvCUR(buf_sv) ;
1898
1899 /* ensure buf_sv is large enough */
1900 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1901 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1902 if (PerlIO_error(PL_rsfp))
37120919
AD
1903 return -1; /* error */
1904 else
1905 return 0 ; /* end of file */
1906 }
16d20bd9
AD
1907 SvCUR_set(buf_sv, old_len + len) ;
1908 } else {
1909 /* Want a line */
3280af22
NIS
1910 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1911 if (PerlIO_error(PL_rsfp))
37120919
AD
1912 return -1; /* error */
1913 else
1914 return 0 ; /* end of file */
1915 }
16d20bd9
AD
1916 }
1917 return SvCUR(buf_sv);
1918 }
1919 /* Skip this filter slot if filter has been deleted */
3280af22 1920 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1921 DEBUG_P(PerlIO_printf(Perl_debug_log,
1922 "filter_read %d: skipped (filter deleted)\n",
1923 idx));
16d20bd9
AD
1924 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1925 }
1926 /* Get function pointer hidden within datasv */
1927 funcp = (filter_t)IoDIRP(datasv);
f4c556ac
GS
1928 DEBUG_P(PerlIO_printf(Perl_debug_log,
1929 "filter_read %d: via function %p (%s)\n",
1930 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1931 /* Call function. The function is expected to */
1932 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1933 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1934 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1935}
1936
76e3520e 1937STATIC char *
cea2e8a9 1938S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1939{
c39cd008 1940#ifdef PERL_CR_FILTER
3280af22 1941 if (!PL_rsfp_filters) {
c39cd008 1942 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1943 }
1944#endif
3280af22 1945 if (PL_rsfp_filters) {
16d20bd9 1946
55497cff
PP
1947 if (!append)
1948 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1949 if (FILTER_READ(0, sv, 0) > 0)
1950 return ( SvPVX(sv) ) ;
1951 else
1952 return Nullch ;
1953 }
9d116dd7 1954 else
fd049845 1955 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1956}
1957
1958
748a9306
LW
1959#ifdef DEBUGGING
1960 static char* exp_name[] =
09bef843
SB
1961 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
1962 "ATTRTERM", "TERMBLOCK"
1963 };
748a9306 1964#endif
463ee0b2 1965
02aa26ce
NT
1966/*
1967 yylex
1968
1969 Works out what to call the token just pulled out of the input
1970 stream. The yacc parser takes care of taking the ops we return and
1971 stitching them into a tree.
1972
1973 Returns:
1974 PRIVATEREF
1975
1976 Structure:
1977 if read an identifier
1978 if we're in a my declaration
1979 croak if they tried to say my($foo::bar)
1980 build the ops for a my() declaration
1981 if it's an access to a my() variable
1982 are we in a sort block?
1983 croak if my($a); $a <=> $b
1984 build ops for access to a my() variable
1985 if in a dq string, and they've said @foo and we can't find @foo
1986 croak
1987 build ops for a bareword
1988 if we already built the token before, use it.
1989*/
1990
864dbfa3
GS
1991int
1992#ifdef USE_PURE_BISON
cea2e8a9 1993Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1994#else
cea2e8a9 1995Perl_yylex(pTHX)
864dbfa3 1996#endif
378cc40b 1997{
11343788 1998 dTHR;
79072805 1999 register char *s;
378cc40b 2000 register char *d;
79072805 2001 register I32 tmp;
463ee0b2 2002 STRLEN len;
161b471a
NIS
2003 GV *gv = Nullgv;
2004 GV **gvp = 0;
a687059c 2005
a1a0e61e
TD
2006#ifdef USE_PURE_BISON
2007 yylval_pointer = lvalp;
2008 yychar_pointer = lcharp;
2009#endif
2010
02aa26ce 2011 /* check if there's an identifier for us to look at */
3280af22 2012 if (PL_pending_ident) {
02aa26ce 2013 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2014 char pit = PL_pending_ident;
2015 PL_pending_ident = 0;
bbce6d69 2016
02aa26ce
NT
2017 /* if we're in a my(), we can't allow dynamics here.
2018 $foo'bar has already been turned into $foo::bar, so
2019 just check for colons.
2020
2021 if it's a legal name, the OP is a PADANY.
2022 */
3280af22 2023 if (PL_in_my) {
77ca0c92 2024 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2025 if (strchr(PL_tokenbuf,':'))
2026 yyerror(Perl_form(aTHX_ "No package name allowed for "
2027 "variable %s in \"our\"",
2028 PL_tokenbuf));
77ca0c92
LW
2029 tmp = pad_allocmy(PL_tokenbuf);
2030 }
2031 else {
2032 if (strchr(PL_tokenbuf,':'))
2033 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2034
77ca0c92
LW
2035 yylval.opval = newOP(OP_PADANY, 0);
2036 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2037 return PRIVATEREF;
2038 }
bbce6d69
PP
2039 }
2040
02aa26ce
NT
2041 /*
2042 build the ops for accesses to a my() variable.
2043
2044 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2045 then used in a comparison. This catches most, but not
2046 all cases. For instance, it catches
2047 sort { my($a); $a <=> $b }
2048 but not
2049 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2050 (although why you'd do that is anyone's guess).
2051 */
2052
3280af22 2053 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2054#ifdef USE_THREADS
54b9620d 2055 /* Check for single character per-thread SVs */
3280af22
NIS
2056 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2057 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2058 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2059 {
2faa37cc 2060 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2061 yylval.opval->op_targ = tmp;
2062 return PRIVATEREF;
2063 }
2064#endif /* USE_THREADS */
3280af22 2065 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2066 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2067 /* might be an "our" variable" */
f472eb5c 2068 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2069 /* build ops for a bareword */
f472eb5c
GS
2070 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2071 sv_catpvn(sym, "::", 2);
2072 sv_catpv(sym, PL_tokenbuf+1);
2073 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2074 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2075 gv_fetchpv(SvPVX(sym),
77ca0c92 2076 (PL_in_eval
f472eb5c
GS
2077 ? (GV_ADDMULTI | GV_ADDINEVAL)
2078 : TRUE
77ca0c92
LW
2079 ),
2080 ((PL_tokenbuf[0] == '$') ? SVt_PV
2081 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2082 : SVt_PVHV));
2083 return WORD;
2084 }
2085
02aa26ce 2086 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2087 if (PL_last_lop_op == OP_SORT &&
2088 PL_tokenbuf[0] == '$' &&
2089 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2090 && !PL_tokenbuf[2])
bbce6d69 2091 {
3280af22
NIS
2092 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2093 d < PL_bufend && *d != '\n';
a863c7d1
MB
2094 d++)
2095 {
2096 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2097 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2098 PL_tokenbuf);
a863c7d1 2099 }
bbce6d69
PP
2100 }
2101 }
bbce6d69 2102
a863c7d1
MB
2103 yylval.opval = newOP(OP_PADANY, 0);
2104 yylval.opval->op_targ = tmp;
2105 return PRIVATEREF;
2106 }
bbce6d69
PP
2107 }
2108
02aa26ce
NT
2109 /*
2110 Whine if they've said @foo in a doublequoted string,
2111 and @foo isn't a variable we can find in the symbol
2112 table.
2113 */
3280af22
NIS
2114 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2115 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2116 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2117 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2118 PL_tokenbuf, PL_tokenbuf));
bbce6d69
PP
2119 }
2120
02aa26ce 2121 /* build ops for a bareword */
3280af22 2122 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2123 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2124 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2125 ((PL_tokenbuf[0] == '$') ? SVt_PV
2126 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2127 : SVt_PVHV));
2128 return WORD;
2129 }
2130
02aa26ce
NT
2131 /* no identifier pending identification */
2132
3280af22 2133 switch (PL_lex_state) {
79072805
LW
2134#ifdef COMMENTARY
2135 case LEX_NORMAL: /* Some compilers will produce faster */
2136 case LEX_INTERPNORMAL: /* code if we comment these out. */
2137 break;
2138#endif
2139
09bef843 2140 /* when we've already built the next token, just pull it out of the queue */
79072805 2141 case LEX_KNOWNEXT:
3280af22
NIS
2142 PL_nexttoke--;
2143 yylval = PL_nextval[PL_nexttoke];
2144 if (!PL_nexttoke) {
2145 PL_lex_state = PL_lex_defer;
2146 PL_expect = PL_lex_expect;
2147 PL_lex_defer = LEX_NORMAL;
463ee0b2 2148 }
3280af22 2149 return(PL_nexttype[PL_nexttoke]);
79072805 2150
02aa26ce 2151 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2152 when we get here, PL_bufptr is at the \
02aa26ce 2153 */
79072805
LW
2154 case LEX_INTERPCASEMOD:
2155#ifdef DEBUGGING
3280af22 2156 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2157 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2158#endif
02aa26ce 2159 /* handle \E or end of string */
3280af22 2160 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2161 char oldmod;
02aa26ce
NT
2162
2163 /* if at a \E */
3280af22
NIS
2164 if (PL_lex_casemods) {
2165 oldmod = PL_lex_casestack[--PL_lex_casemods];
2166 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2167
3280af22
NIS
2168 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2169 PL_bufptr += 2;
2170 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2171 }
79072805
LW
2172 return ')';
2173 }
3280af22
NIS
2174 if (PL_bufptr != PL_bufend)
2175 PL_bufptr += 2;
2176 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2177 return yylex();
79072805
LW
2178 }
2179 else {
3280af22 2180 s = PL_bufptr + 1;
79072805
LW
2181 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2182 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2183 if (strchr("LU", *s) &&
3280af22 2184 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2185 {
3280af22 2186 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2187 return ')';
2188 }
3280af22
NIS
2189 if (PL_lex_casemods > 10) {
2190 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2191 if (newlb != PL_lex_casestack) {
a0d0e21e 2192 SAVEFREEPV(newlb);
3280af22 2193 PL_lex_casestack = newlb;
a0d0e21e
LW
2194 }
2195 }
3280af22
NIS
2196 PL_lex_casestack[PL_lex_casemods++] = *s;
2197 PL_lex_casestack[PL_lex_casemods] = '\0';
2198 PL_lex_state = LEX_INTERPCONCAT;
2199 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2200 force_next('(');
2201 if (*s == 'l')
3280af22 2202 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2203 else if (*s == 'u')
3280af22 2204 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2205 else if (*s == 'L')
3280af22 2206 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2207 else if (*s == 'U')
3280af22 2208 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2209 else if (*s == 'Q')
3280af22 2210 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2211 else
cea2e8a9 2212 Perl_croak(aTHX_ "panic: yylex");
3280af22 2213 PL_bufptr = s + 1;
79072805 2214 force_next(FUNC);
3280af22
NIS
2215 if (PL_lex_starts) {
2216 s = PL_bufptr;
2217 PL_lex_starts = 0;
79072805
LW
2218 Aop(OP_CONCAT);
2219 }
2220 else
cea2e8a9 2221 return yylex();
79072805
LW
2222 }
2223
55497cff
PP
2224 case LEX_INTERPPUSH:
2225 return sublex_push();
2226
79072805 2227 case LEX_INTERPSTART:
3280af22 2228 if (PL_bufptr == PL_bufend)
79072805 2229 return sublex_done();
3280af22
NIS
2230 PL_expect = XTERM;
2231 PL_lex_dojoin = (*PL_bufptr == '@');
2232 PL_lex_state = LEX_INTERPNORMAL;
2233 if (PL_lex_dojoin) {
2234 PL_nextval[PL_nexttoke].ival = 0;
79072805 2235 force_next(',');
554b3eca 2236#ifdef USE_THREADS
533c011a
NIS
2237 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2238 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2239 force_next(PRIVATEREF);
2240#else
a0d0e21e 2241 force_ident("\"", '$');
554b3eca 2242#endif /* USE_THREADS */
3280af22 2243 PL_nextval[PL_nexttoke].ival = 0;
79072805 2244 force_next('$');
3280af22 2245 PL_nextval[PL_nexttoke].ival = 0;
79072805 2246 force_next('(');
3280af22 2247 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2248 force_next(FUNC);
2249 }
3280af22
NIS
2250 if (PL_lex_starts++) {
2251 s = PL_bufptr;
79072805
LW
2252 Aop(OP_CONCAT);
2253 }
cea2e8a9 2254 return yylex();
79072805
LW
2255
2256 case LEX_INTERPENDMAYBE:
3280af22
NIS
2257 if (intuit_more(PL_bufptr)) {
2258 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2259 break;
2260 }
2261 /* FALL THROUGH */
2262
2263 case LEX_INTERPEND:
3280af22
NIS
2264 if (PL_lex_dojoin) {
2265 PL_lex_dojoin = FALSE;
2266 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2267 return ')';
2268 }
43a16006 2269 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2270 && SvEVALED(PL_lex_repl))
43a16006 2271 {
e9fa98b2 2272 if (PL_bufptr != PL_bufend)
cea2e8a9 2273 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2274 PL_lex_repl = Nullsv;
2275 }
79072805
LW
2276 /* FALLTHROUGH */
2277 case LEX_INTERPCONCAT:
2278#ifdef DEBUGGING
3280af22 2279 if (PL_lex_brackets)
cea2e8a9 2280 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2281#endif
3280af22 2282 if (PL_bufptr == PL_bufend)
79072805
LW
2283 return sublex_done();
2284
3280af22
NIS
2285 if (SvIVX(PL_linestr) == '\'') {
2286 SV *sv = newSVsv(PL_linestr);
2287 if (!PL_lex_inpat)
76e3520e 2288 sv = tokeq(sv);
3280af22 2289 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2290 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2291 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2292 s = PL_bufend;
79072805
LW
2293 }
2294 else {
3280af22 2295 s = scan_const(PL_bufptr);
79072805 2296 if (*s == '\\')
3280af22 2297 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2298 else
3280af22 2299 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2300 }
2301
3280af22
NIS
2302 if (s != PL_bufptr) {
2303 PL_nextval[PL_nexttoke] = yylval;
2304 PL_expect = XTERM;
79072805 2305 force_next(THING);
3280af22 2306 if (PL_lex_starts++)
79072805
LW
2307 Aop(OP_CONCAT);
2308 else {
3280af22 2309 PL_bufptr = s;
cea2e8a9 2310 return yylex();
79072805
LW
2311 }
2312 }
2313
cea2e8a9 2314 return yylex();
a0d0e21e 2315 case LEX_FORMLINE:
3280af22
NIS
2316 PL_lex_state = LEX_NORMAL;
2317 s = scan_formline(PL_bufptr);
2318 if (!PL_lex_formbrack)
a0d0e21e
LW
2319 goto rightbracket;
2320 OPERATOR(';');
79072805
LW
2321 }
2322
3280af22
NIS
2323 s = PL_bufptr;
2324 PL_oldoldbufptr = PL_oldbufptr;
2325 PL_oldbufptr = s;
79072805 2326 DEBUG_p( {
bf49b057
GS
2327 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2328 exp_name[PL_expect], s);
79072805 2329 } )
463ee0b2
LW
2330
2331 retry:
378cc40b
LW
2332 switch (*s) {
2333 default:
7e2040f0 2334 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2335 goto keylookup;
cea2e8a9 2336 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2337 case 4:
2338 case 26:
2339 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2340 case 0:
3280af22
NIS
2341 if (!PL_rsfp) {
2342 PL_last_uni = 0;
2343 PL_last_lop = 0;
2344 if (PL_lex_brackets)
d98d5fff 2345 yyerror("Missing right curly or square bracket");
79072805 2346 TOKEN(0);
463ee0b2 2347 }
3280af22 2348 if (s++ < PL_bufend)
a687059c 2349 goto retry; /* ignore stray nulls */
3280af22
NIS
2350 PL_last_uni = 0;
2351 PL_last_lop = 0;
2352 if (!PL_in_eval && !PL_preambled) {
2353 PL_preambled = TRUE;
2354 sv_setpv(PL_linestr,incl_perldb());
2355 if (SvCUR(PL_linestr))
2356 sv_catpv(PL_linestr,";");
2357 if (PL_preambleav){
2358 while(AvFILLp(PL_preambleav) >= 0) {
2359 SV *tmpsv = av_shift(PL_preambleav);
2360 sv_catsv(PL_linestr, tmpsv);
2361 sv_catpv(PL_linestr, ";");
91b7def8
PP
2362 sv_free(tmpsv);
2363 }
3280af22
NIS
2364 sv_free((SV*)PL_preambleav);
2365 PL_preambleav = NULL;
91b7def8 2366 }
3280af22
NIS
2367 if (PL_minus_n || PL_minus_p) {
2368 sv_catpv(PL_linestr, "LINE: while (<>) {");
2369 if (PL_minus_l)
2370 sv_catpv(PL_linestr,"chomp;");
2371 if (PL_minus_a) {
8fd239a7
CS
2372 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2373 if (gv)
2374 GvIMPORTED_AV_on(gv);
3280af22
NIS
2375 if (PL_minus_F) {
2376 if (strchr("/'\"", *PL_splitstr)
2377 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2378 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2379 else {
2380 char delim;
2381 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2382 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2383 delim = *s;
cea2e8a9 2384 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2385 "q" + (delim == '\''), delim);
3280af22 2386 for (s = PL_splitstr; *s; s++) {
54310121 2387 if (*s == '\\')
3280af22
NIS
2388 sv_catpvn(PL_linestr, "\\", 1);
2389 sv_catpvn(PL_linestr, s, 1);
54310121 2390 }
cea2e8a9 2391 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2392 }
2304df62
AD
2393 }
2394 else
3280af22 2395 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2396 }
79072805 2397 }
3280af22
NIS
2398 sv_catpv(PL_linestr, "\n");
2399 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2400 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2401 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2402 SV *sv = NEWSV(85,0);
2403
2404 sv_upgrade(sv, SVt_PVMG);
3280af22 2405 sv_setsv(sv,PL_linestr);
57843af0 2406 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2407 }
79072805 2408 goto retry;
a687059c 2409 }
e929a76b 2410 do {
3280af22 2411 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2412 fake_eof:
3280af22
NIS
2413 if (PL_rsfp) {
2414 if (PL_preprocess && !PL_in_eval)
2415 (void)PerlProc_pclose(PL_rsfp);
2416 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2417 PerlIO_clearerr(PL_rsfp);
395c3793 2418 else
3280af22
NIS
2419 (void)PerlIO_close(PL_rsfp);
2420 PL_rsfp = Nullfp;
4a9ae47a 2421 PL_doextract = FALSE;
395c3793 2422 }
3280af22
NIS
2423 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2424 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2425 sv_catpv(PL_linestr,";}");
2426 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2427 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2428 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2429 goto retry;
2430 }
3280af22
NIS
2431 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2432 sv_setpv(PL_linestr,"");
79072805 2433 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2434 }
3280af22 2435 if (PL_doextract) {
a0d0e21e 2436 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2437 PL_doextract = FALSE;
a0d0e21e
LW
2438
2439 /* Incest with pod. */
2440 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2441 sv_setpv(PL_linestr, "");
2442 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2443 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2444 PL_doextract = FALSE;
a0d0e21e
LW
2445 }
2446 }
463ee0b2 2447 incline(s);
3280af22
NIS
2448 } while (PL_doextract);
2449 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2450 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2451 SV *sv = NEWSV(85,0);
a687059c 2452
93a17b20 2453 sv_upgrade(sv, SVt_PVMG);
3280af22 2454 sv_setsv(sv,PL_linestr);
57843af0 2455 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2456 }
3280af22 2457 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2458 if (CopLINE(PL_curcop) == 1) {
3280af22 2459 while (s < PL_bufend && isSPACE(*s))
79072805 2460 s++;
a0d0e21e 2461 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2462 s++;
44a8e56a 2463 d = Nullch;
3280af22 2464 if (!PL_in_eval) {
44a8e56a
PP
2465 if (*s == '#' && *(s+1) == '!')
2466 d = s + 2;
2467#ifdef ALTERNATE_SHEBANG
2468 else {
2469 static char as[] = ALTERNATE_SHEBANG;
2470 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2471 d = s + (sizeof(as) - 1);
2472 }
2473#endif /* ALTERNATE_SHEBANG */
2474 }
2475 if (d) {
b8378b72 2476 char *ipath;
774d564b 2477 char *ipathend;
b8378b72 2478
774d564b 2479 while (isSPACE(*d))
b8378b72
CS
2480 d++;
2481 ipath = d;
774d564b
PP
2482 while (*d && !isSPACE(*d))
2483 d++;
2484 ipathend = d;
2485
2486#ifdef ARG_ZERO_IS_SCRIPT
2487 if (ipathend > ipath) {
2488 /*
2489 * HP-UX (at least) sets argv[0] to the script name,
2490 * which makes $^X incorrect. And Digital UNIX and Linux,
2491 * at least, set argv[0] to the basename of the Perl
2492 * interpreter. So, having found "#!", we'll set it right.
2493 */
2494 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2495 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2496 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2497 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2498 SvSETMAGIC(x);
2499 }
774d564b 2500 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2501 }
774d564b 2502#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2503
2504 /*
2505 * Look for options.
2506 */
748a9306 2507 d = instr(s,"perl -");
84e30d1a 2508 if (!d) {
748a9306 2509 d = instr(s,"perl");
84e30d1a
GS
2510#if defined(DOSISH)
2511 /* avoid getting into infinite loops when shebang
2512 * line contains "Perl" rather than "perl" */
2513 if (!d) {
2514 for (d = ipathend-4; d >= ipath; --d) {
2515 if ((*d == 'p' || *d == 'P')
2516 && !ibcmp(d, "perl", 4))
2517 {
2518 break;
2519 }
2520 }
2521 if (d < ipath)
2522 d = Nullch;
2523 }
2524#endif
2525 }
44a8e56a
PP
2526#ifdef ALTERNATE_SHEBANG
2527 /*
2528 * If the ALTERNATE_SHEBANG on this system starts with a
2529 * character that can be part of a Perl expression, then if
2530 * we see it but not "perl", we're probably looking at the
2531 * start of Perl code, not a request to hand off to some
2532 * other interpreter. Similarly, if "perl" is there, but
2533 * not in the first 'word' of the line, we assume the line
2534 * contains the start of the Perl program.
44a8e56a
PP
2535 */
2536 if (d && *s != '#') {
774d564b 2537 char *c = ipath;
44a8e56a
PP
2538 while (*c && !strchr("; \t\r\n\f\v#", *c))
2539 c++;
2540 if (c < d)
2541 d = Nullch; /* "perl" not in first word; ignore */
2542 else
2543 *s = '#'; /* Don't try to parse shebang line */
2544 }
774d564b 2545#endif /* ALTERNATE_SHEBANG */
748a9306 2546 if (!d &&
44a8e56a 2547 *s == '#' &&
774d564b 2548 ipathend > ipath &&
3280af22 2549 !PL_minus_c &&
748a9306 2550 !instr(s,"indir") &&
3280af22 2551 instr(PL_origargv[0],"perl"))
748a9306 2552 {
9f68db38 2553 char **newargv;
9f68db38 2554
774d564b
PP
2555 *ipathend = '\0';
2556 s = ipathend + 1;
3280af22 2557 while (s < PL_bufend && isSPACE(*s))
9f68db38 2558 s++;
3280af22
NIS
2559 if (s < PL_bufend) {
2560 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2561 newargv[1] = s;
3280af22 2562 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2563 s++;
2564 *s = '\0';
3280af22 2565 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2566 }
2567 else
3280af22 2568 newargv = PL_origargv;
774d564b 2569 newargv[0] = ipath;
80252599 2570 PerlProc_execv(ipath, newargv);
cea2e8a9 2571 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2572 }
748a9306 2573 if (d) {
3280af22
NIS
2574 U32 oldpdb = PL_perldb;
2575 bool oldn = PL_minus_n;
2576 bool oldp = PL_minus_p;
748a9306
LW
2577
2578 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2579 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2580
2581 if (*d++ == '-') {
8cc95fdb
PP
2582 do {
2583 if (*d == 'M' || *d == 'm') {
2584 char *m = d;
2585 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2586 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2587 (int)(d - m), m);
2588 }
2589 d = moreswitches(d);
2590 } while (d);
84902520 2591 if (PERLDB_LINE && !oldpdb ||
3280af22 2592 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b
PP
2593 /* if we have already added "LINE: while (<>) {",
2594 we must not do it again */
748a9306 2595 {
3280af22
NIS
2596 sv_setpv(PL_linestr, "");
2597 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2598 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2599 PL_preambled = FALSE;
84902520 2600 if (PERLDB_LINE)
3280af22 2601 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2602 goto retry;
2603 }
a0d0e21e 2604 }
79072805 2605 }
9f68db38 2606 }
79072805 2607 }
3280af22
NIS
2608 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2609 PL_bufptr = s;
2610 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2611 return yylex();
ae986130 2612 }
378cc40b 2613 goto retry;
4fdae800 2614 case '\r':
6a27c188 2615#ifdef PERL_STRICT_CR
cea2e8a9
GS
2616 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2617 Perl_croak(aTHX_
54310121 2618 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2619#endif
4fdae800 2620 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2621 s++;
2622 goto retry;
378cc40b 2623 case '#':
e929a76b 2624 case '\n':
3280af22
NIS
2625 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2626 d = PL_bufend;
a687059c 2627 while (s < d && *s != '\n')
378cc40b 2628 s++;
0f85fab0 2629 if (s < d)
378cc40b 2630 s++;
463ee0b2 2631 incline(s);
3280af22
NIS
2632 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2633 PL_bufptr = s;
2634 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2635 return yylex();
a687059c 2636 }
378cc40b 2637 }
a687059c 2638 else {
378cc40b 2639 *s = '\0';
3280af22 2640 PL_bufend = s;
a687059c 2641 }
378cc40b
LW
2642 goto retry;
2643 case '-':
79072805 2644 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2645 s++;
3280af22 2646 PL_bufptr = s;
748a9306
LW
2647 tmp = *s++;
2648
3280af22 2649 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2650 s++;
2651
2652 if (strnEQ(s,"=>",2)) {
3280af22 2653 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2654 OPERATOR('-'); /* unary minus */
2655 }
3280af22
NIS
2656 PL_last_uni = PL_oldbufptr;
2657 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2658 switch (tmp) {
79072805
LW
2659 case 'r': FTST(OP_FTEREAD);
2660 case 'w': FTST(OP_FTEWRITE);
2661 case 'x': FTST(OP_FTEEXEC);
2662 case 'o': FTST(OP_FTEOWNED);
2663 case 'R': FTST(OP_FTRREAD);
2664 case 'W': FTST(OP_FTRWRITE);
2665 case 'X': FTST(OP_FTREXEC);
2666 case 'O': FTST(OP_FTROWNED);
2667 case 'e': FTST(OP_FTIS);
2668 case 'z': FTST(OP_FTZERO);
2669 case 's': FTST(OP_FTSIZE);
2670 case 'f': FTST(OP_FTFILE);
2671 case 'd': FTST(OP_FTDIR);
2672 case 'l': FTST(OP_FTLINK);
2673 case 'p': FTST(OP_FTPIPE);
2674 case 'S': FTST(OP_FTSOCK);
2675 case 'u': FTST(OP_FTSUID);
2676 case 'g': FTST(OP_FTSGID);
2677 case 'k': FTST(OP_FTSVTX);
2678 case 'b': FTST(OP_FTBLK);
2679 case 'c': FTST(OP_FTCHR);
2680 case 't': FTST(OP_FTTTY);
2681 case 'T': FTST(OP_FTTEXT);
2682 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2683 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2684 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2685 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2686 default:
cea2e8a9 2687 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2688 break;
2689 }
2690 }
a687059c
LW
2691 tmp = *s++;
2692 if (*s == tmp) {
2693 s++;
3280af22 2694 if (PL_expect == XOPERATOR)
79072805
LW
2695 TERM(POSTDEC);
2696 else
2697 OPERATOR(PREDEC);
2698 }
2699 else if (*s == '>') {
2700 s++;
2701 s = skipspace(s);
7e2040f0 2702 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2703 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2704 TOKEN(ARROW);
79072805 2705 }
748a9306
LW
2706 else if (*s == '$')
2707 OPERATOR(ARROW);
463ee0b2 2708 else
748a9306 2709 TERM(ARROW);
a687059c 2710 }
3280af22 2711 if (PL_expect == XOPERATOR)
79072805
LW
2712 Aop(OP_SUBTRACT);
2713 else {
3280af22 2714 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2715 check_uni();
79072805 2716 OPERATOR('-'); /* unary minus */
2f3197b3 2717 }
79072805 2718
378cc40b 2719 case '+':
a687059c
LW
2720 tmp = *s++;
2721 if (*s == tmp) {
378cc40b 2722 s++;
3280af22 2723 if (PL_expect == XOPERATOR)
79072805
LW
2724 TERM(POSTINC);
2725 else
2726 OPERATOR(PREINC);
378cc40b 2727 }
3280af22 2728 if (PL_expect == XOPERATOR)
79072805
LW
2729 Aop(OP_ADD);
2730 else {
3280af22 2731 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2732 check_uni();
a687059c 2733 OPERATOR('+');
2f3197b3 2734 }
a687059c 2735
378cc40b 2736 case '*':
3280af22
NIS
2737 if (PL_expect != XOPERATOR) {
2738 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2739 PL_expect = XOPERATOR;
2740 force_ident(PL_tokenbuf, '*');
2741 if (!*PL_tokenbuf)
a0d0e21e 2742 PREREF('*');
79072805 2743 TERM('*');
a687059c 2744 }
79072805
LW
2745 s++;
2746 if (*s == '*') {
a687059c 2747 s++;
79072805 2748 PWop(OP_POW);
a687059c 2749 }
79072805
LW
2750 Mop(OP_MULTIPLY);
2751
378cc40b 2752 case '%':
3280af22 2753 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2754 ++s;
2755 Mop(OP_MODULO);
a687059c 2756 }
3280af22
NIS
2757 PL_tokenbuf[0] = '%';
2758 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2759 if (!PL_tokenbuf[1]) {
2760 if (s == PL_bufend)
bbce6d69
PP
2761 yyerror("Final % should be \\% or %name");
2762 PREREF('%');
a687059c 2763 }
3280af22 2764 PL_pending_ident = '%';
bbce6d69 2765 TERM('%');
a687059c 2766
378cc40b 2767 case '^':
79072805 2768 s++;
a0d0e21e 2769 BOop(OP_BIT_XOR);
79072805 2770 case '[':
3280af22 2771 PL_lex_brackets++;
79072805 2772 /* FALL THROUGH */
378cc40b 2773 case '~':
378cc40b 2774 case ',':
378cc40b
LW
2775 tmp = *s++;
2776 OPERATOR(tmp);
a0d0e21e
LW
2777 case ':':
2778 if (s[1] == ':') {
2779 len = 0;
2780 goto just_a_word;
2781 }
2782 s++;
09bef843
SB
2783 switch (PL_expect) {
2784 OP *attrs;
2785 case XOPERATOR:
2786 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2787 break;
2788 PL_bufptr = s; /* update in case we back off */
2789 goto grabattrs;
2790 case XATTRBLOCK:
2791 PL_expect = XBLOCK;
2792 goto grabattrs;
2793 case XATTRTERM:
2794 PL_expect = XTERMBLOCK;
2795 grabattrs:
2796 s = skipspace(s);
2797 attrs = Nullop;
7e2040f0 2798 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2799 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2800 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2801 if (tmp < 0) tmp = -tmp;
2802 switch (tmp) {
2803 case KEY_or:
2804 case KEY_and:
2805 case KEY_for:
2806 case KEY_unless:
2807 case KEY_if:
2808 case KEY_while:
2809 case KEY_until:
2810 goto got_attrs;
2811 default:
2812 break;
2813 }
2814 }
09bef843
SB
2815 if (*d == '(') {
2816 d = scan_str(d,TRUE,TRUE);
2817 if (!d) {
2818 if (PL_lex_stuff) {
2819 SvREFCNT_dec(PL_lex_stuff);
2820 PL_lex_stuff = Nullsv;
2821 }
2822 /* MUST advance bufptr here to avoid bogus
2823 "at end of line" context messages from yyerror().
2824 */
2825 PL_bufptr = s + len;
2826 yyerror("Unterminated attribute parameter in attribute list");
2827 if (attrs)
2828 op_free(attrs);
2829 return 0; /* EOF indicator */
2830 }
2831 }
2832 if (PL_lex_stuff) {
2833 SV *sv = newSVpvn(s, len);
2834 sv_catsv(sv, PL_lex_stuff);
2835 attrs = append_elem(OP_LIST, attrs,
2836 newSVOP(OP_CONST, 0, sv));
2837 SvREFCNT_dec(PL_lex_stuff);
2838 PL_lex_stuff = Nullsv;
2839 }
2840 else {
2841 attrs = append_elem(OP_LIST, attrs,
2842 newSVOP(OP_CONST, 0,
2843 newSVpvn(s, len)));
2844 }
2845 s = skipspace(d);
0120eecf 2846 if (*s == ':' && s[1] != ':')
09bef843 2847 s = skipspace(s+1);
0120eecf
GS
2848 else if (s == d)
2849 break; /* require real whitespace or :'s */
09bef843 2850 }
f9829d6b
GS
2851 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2852 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2853 char q = ((*s == '\'') ? '"' : '\'');
2854 /* If here for an expression, and parsed no attrs, back off. */
2855 if (tmp == '=' && !attrs) {
2856 s = PL_bufptr;
2857 break;
2858 }
2859 /* MUST advance bufptr here to avoid bogus "at end of line"
2860 context messages from yyerror().
2861 */
2862 PL_bufptr = s;
2863 if (!*s)
2864 yyerror("Unterminated attribute list");
2865 else
2866 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2867 q, *s, q));
2868 if (attrs)
2869 op_free(attrs);
2870 OPERATOR(':');
2871 }
f9829d6b 2872 got_attrs:
09bef843
SB
2873 if (attrs) {
2874 PL_nextval[PL_nexttoke].opval = attrs;
2875 force_next(THING);
2876 }
2877 TOKEN(COLONATTR);
2878 }
a0d0e21e 2879 OPERATOR(':');
8990e307
LW
2880 case '(':
2881 s++;
3280af22
NIS
2882 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2883 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2884 else
3280af22 2885 PL_expect = XTERM;
a0d0e21e 2886 TOKEN('(');
378cc40b 2887 case ';':
57843af0
GS
2888 if (CopLINE(PL_curcop) < PL_copline)
2889 PL_copline = CopLINE(PL_curcop);
378cc40b
LW
2890 tmp = *s++;
2891 OPERATOR(tmp);
2892 case ')':
378cc40b 2893 tmp = *s++;
16d20bd9
AD
2894 s = skipspace(s);
2895 if (*s == '{')
2896 PREBLOCK(tmp);
378cc40b 2897 TERM(tmp);
79072805
LW
2898 case ']':
2899 s++;
3280af22 2900 if (PL_lex_brackets <= 0)
d98d5fff 2901 yyerror("Unmatched right square bracket");
463ee0b2 2902 else
3280af22
NIS
2903 --PL_lex_brackets;
2904 if (PL_lex_state == LEX_INTERPNORMAL) {
2905 if (PL_lex_brackets == 0) {
a0d0e21e 2906 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2907 PL_lex_state = LEX_INTERPEND;
79072805
LW
2908 }
2909 }
4633a7c4 2910 TERM(']');
79072805
LW
2911 case '{':
2912 leftbracket:
79072805 2913 s++;
3280af22
NIS
2914 if (PL_lex_brackets > 100) {
2915 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2916 if (newlb != PL_lex_brackstack) {
8990e307 2917 SAVEFREEPV(newlb);
3280af22 2918 PL_lex_brackstack = newlb;
8990e307
LW
2919 }
2920 }
3280af22 2921 switch (PL_expect) {
a0d0e21e 2922 case XTERM:
3280af22 2923 if (PL_lex_formbrack) {
a0d0e21e
LW
2924 s--;
2925 PRETERMBLOCK(DO);
2926 }
3280af22
NIS
2927 if (PL_oldoldbufptr == PL_last_lop)
2928 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2929 else
3280af22 2930 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2931 OPERATOR(HASHBRACK);
a0d0e21e 2932 case XOPERATOR:
3280af22 2933 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2934 s++;
44a8e56a 2935 d = s;
3280af22
NIS
2936 PL_tokenbuf[0] = '\0';
2937 if (d < PL_bufend && *d == '-') {
2938 PL_tokenbuf[0] = '-';
44a8e56a 2939 d++;
3280af22 2940 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a
PP
2941 d++;
2942 }
7e2040f0 2943 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 2944 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2945 FALSE, &len);
3280af22 2946 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2947 d++;
2948 if (*d == '}') {
3280af22 2949 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
2950 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2951 if (minus)
2952 force_next('-');
748a9306
LW
2953 }
2954 }
2955 /* FALL THROUGH */
09bef843 2956 case XATTRBLOCK:
748a9306 2957 case XBLOCK:
3280af22
NIS
2958 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2959 PL_expect = XSTATE;
a0d0e21e 2960 break;
09bef843 2961 case XATTRTERM:
a0d0e21e 2962 case XTERMBLOCK:
3280af22
NIS
2963 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2964 PL_expect = XSTATE;
a0d0e21e
LW
2965 break;
2966 default: {
2967 char *t;
3280af22
NIS
2968 if (PL_oldoldbufptr == PL_last_lop)
2969 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2970 else
3280af22 2971 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2972 s = skipspace(s);
09ecc4b6 2973 if (*s == '}')
a0d0e21e 2974 OPERATOR(HASHBRACK);
b8a4b1be
GS
2975 /* This hack serves to disambiguate a pair of curlies
2976 * as being a block or an anon hash. Normally, expectation
2977 * determines that, but in cases where we're not in a
2978 * position to expect anything in particular (like inside
2979 * eval"") we have to resolve the ambiguity. This code
2980 * covers the case where the first term in the curlies is a
2981 * quoted string. Most other cases need to be explicitly
2982 * disambiguated by prepending a `+' before the opening
2983 * curly in order to force resolution as an anon hash.
2984 *
2985 * XXX should probably propagate the outer expectation
2986 * into eval"" to rely less on this hack, but that could
2987 * potentially break current behavior of eval"".
2988 * GSAR 97-07-21
2989 */
2990 t = s;
2991 if (*s == '\'' || *s == '"' || *s == '`') {
2992 /* common case: get past first string, handling escapes */
3280af22 2993 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2994 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2995 t++;
2996 t++;
a0d0e21e 2997 }
b8a4b1be 2998 else if (*s == 'q') {
3280af22 2999 if (++t < PL_bufend
b8a4b1be 3000 && (!isALNUM(*t)
3280af22 3001 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3002 && !isALNUM(*t))))
3003 {
b8a4b1be
GS
3004 char *tmps;
3005 char open, close, term;
3006 I32 brackets = 1;
3007
3280af22 3008 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3009 t++;
3010 term = *t;
3011 open = term;
3012 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3013 term = tmps[5];
3014 close = term;
3015 if (open == close)
3280af22
NIS
3016 for (t++; t < PL_bufend; t++) {
3017 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3018 t++;
6d07e5e9 3019 else if (*t == open)
b8a4b1be
GS
3020 break;
3021 }
3022 else
3280af22
NIS
3023 for (t++; t < PL_bufend; t++) {
3024 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3025 t++;
6d07e5e9 3026 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3027 break;
3028 else if (*t == open)
3029 brackets++;
3030 }
3031 }
3032 t++;
a0d0e21e 3033 }
7e2040f0 3034 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3035 t += UTF8SKIP(t);
7e2040f0 3036 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3037 t += UTF8SKIP(t);
a0d0e21e 3038 }
3280af22 3039 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3040 t++;
b8a4b1be
GS
3041 /* if comma follows first term, call it an anon hash */
3042 /* XXX it could be a comma expression with loop modifiers */
3280af22 3043 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3044 || (*t == '=' && t[1] == '>')))
a0d0e21e 3045 OPERATOR(HASHBRACK);
3280af22 3046 if (PL_expect == XREF)
4e4e412b 3047 PL_expect = XTERM;
a0d0e21e 3048 else {
3280af22
NIS
3049 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3050 PL_expect = XSTATE;
a0d0e21e 3051 }
8990e307 3052 }
a0d0e21e 3053 break;
463ee0b2 3054 }
57843af0 3055 yylval.ival = CopLINE(PL_curcop);
79072805 3056 if (isSPACE(*s) || *s == '#')
3280af22 3057 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3058 TOKEN('{');
378cc40b 3059 case '}':
79072805
LW
3060 rightbracket:
3061 s++;
3280af22 3062 if (PL_lex_brackets <= 0)
d98d5fff 3063 yyerror("Unmatched right curly bracket");
463ee0b2 3064 else
3280af22
NIS
3065 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3066 if (PL_lex_brackets < PL_lex_formbrack)
3067 PL_lex_formbrack = 0;
3068 if (PL_lex_state == LEX_INTERPNORMAL) {
3069 if (PL_lex_brackets == 0) {
9059aa12
LW
3070 if (PL_expect & XFAKEBRACK) {
3071 PL_expect &= XENUMMASK;
3280af22
NIS
3072 PL_lex_state = LEX_INTERPEND;
3073 PL_bufptr = s;
cea2e8a9 3074 return yylex(); /* ignore fake brackets */
79072805 3075 }
fa83b5b6 3076 if (*s == '-' && s[1] == '>')
3280af22 3077 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3078 else if (*s != '[' && *s != '{')
3280af22 3079 PL_lex_state = LEX_INTERPEND;
79072805
LW
3080 }
3081 }
9059aa12
LW
3082 if (PL_expect & XFAKEBRACK) {
3083 PL_expect &= XENUMMASK;
3280af22 3084 PL_bufptr = s;
cea2e8a9 3085 return yylex(); /* ignore fake brackets */
748a9306 3086 }
79072805
LW
3087 force_next('}');
3088 TOKEN(';');
378cc40b
LW
3089 case '&':
3090 s++;
3091 tmp = *s++;
3092 if (tmp == '&')
a0d0e21e 3093 AOPERATOR(ANDAND);
378cc40b 3094 s--;
3280af22 3095 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3096 if (ckWARN(WARN_SEMICOLON)
3097 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3098 {
57843af0 3099 CopLINE_dec(PL_curcop);
cea2e8a9 3100 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3101 CopLINE_inc(PL_curcop);
463ee0b2 3102 }
79072805 3103 BAop(OP_BIT_AND);
463ee0b2 3104 }
79072805 3105
3280af22
NIS
3106 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3107 if (*PL_tokenbuf) {
3108 PL_expect = XOPERATOR;
3109 force_ident(PL_tokenbuf, '&');
463ee0b2 3110 }
79072805
LW
3111 else
3112 PREREF('&');
c07a80fd 3113 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3114 TERM('&');
3115
378cc40b
LW
3116 case '|':
3117 s++;
3118 tmp = *s++;
3119 if (tmp == '|')
a0d0e21e 3120 AOPERATOR(OROR);
378cc40b 3121 s--;
79072805 3122 BOop(OP_BIT_OR);
378cc40b
LW
3123 case '=':
3124 s++;
3125 tmp = *s++;
3126 if (tmp == '=')
79072805
LW
3127 Eop(OP_EQ);
3128 if (tmp == '>')
3129 OPERATOR(',');
378cc40b 3130 if (tmp == '~')
79072805 3131 PMop(OP_MATCH);
599cee73 3132 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3133 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3134 s--;
3280af22
NIS
3135 if (PL_expect == XSTATE && isALPHA(tmp) &&
3136 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3137 {
3280af22
NIS
3138 if (PL_in_eval && !PL_rsfp) {
3139 d = PL_bufend;
a5f75d66
AD
3140 while (s < d) {
3141 if (*s++ == '\n') {
3142 incline(s);
3143 if (strnEQ(s,"=cut",4)) {
3144 s = strchr(s,'\n');
3145 if (s)
3146 s++;
3147 else
3148 s = d;
3149 incline(s);
3150 goto retry;
3151 }
3152 }
3153 }
3154 goto retry;
3155 }
3280af22
NIS
3156 s = PL_bufend;
3157 PL_doextract = TRUE;
a0d0e21e
LW
3158 goto retry;
3159 }
3280af22 3160 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3161 char *t;
51882d45 3162#ifdef PERL_STRICT_CR
a0d0e21e 3163 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
3164#else
3165 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
3166#endif
a0d0e21e
LW
3167 if (*t == '\n' || *t == '#') {
3168 s--;
3280af22 3169 PL_expect = XBLOCK;
a0d0e21e
LW
3170 goto leftbracket;
3171 }
79072805 3172 }
a0d0e21e
LW
3173 yylval.ival = 0;
3174 OPERATOR(ASSIGNOP);
378cc40b
LW
3175 case '!':
3176 s++;
3177 tmp = *s++;
3178 if (tmp == '=')
79072805 3179 Eop(OP_NE);
378cc40b 3180 if (tmp == '~')
79072805 3181 PMop(OP_NOT);
378cc40b
LW
3182 s--;
3183 OPERATOR('!');
3184 case '<':
3280af22 3185 if (PL_expect != XOPERATOR) {
93a17b20 3186 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3187 check_uni();
79072805
LW
3188 if (s[1] == '<')
3189 s = scan_heredoc(s);
3190 else
3191 s = scan_inputsymbol(s);
3192 TERM(sublex_start());
378cc40b
LW
3193 }
3194 s++;
3195 tmp = *s++;
3196 if (tmp == '<')
79072805 3197 SHop(OP_LEFT_SHIFT);
395c3793
LW
3198 if (tmp == '=') {
3199 tmp = *s++;
3200 if (tmp == '>')
79072805 3201 Eop(OP_NCMP);
395c3793 3202 s--;
79072805 3203 Rop(OP_LE);
395c3793 3204 }
378cc40b 3205 s--;
79072805 3206 Rop(OP_LT);
378cc40b
LW
3207 case '>':
3208 s++;
3209 tmp = *s++;
3210 if (tmp == '>')
79072805 3211 SHop(OP_RIGHT_SHIFT);
378cc40b 3212 if (tmp == '=')
79072805 3213 Rop(OP_GE);
378cc40b 3214 s--;
79072805 3215 Rop(OP_GT);
378cc40b
LW
3216
3217 case '$':
bbce6d69
PP
3218 CLINE;
3219
3280af22
NIS
3220 if (PL_expect == XOPERATOR) {
3221 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3222 PL_expect = XTERM;
a0d0e21e 3223 depcom();
bbce6d69 3224 return ','; /* grandfather non-comma-format format */
a0d0e21e 3225 }
8990e307 3226 }
a0d0e21e 3227
7e2040f0 3228 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3229 PL_tokenbuf[0] = '@';
376b8730
SM
3230 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3231 sizeof PL_tokenbuf - 1, FALSE);
3232 if (PL_expect == XOPERATOR)
3233 no_op("Array length", s);
3280af22 3234 if (!PL_tokenbuf[1])
a0d0e21e 3235 PREREF(DOLSHARP);
3280af22
NIS
3236 PL_expect = XOPERATOR;
3237 PL_pending_ident = '#';
463ee0b2 3238 TOKEN(DOLSHARP);
79072805 3239 }
bbce6d69 3240
3280af22 3241 PL_tokenbuf[0] = '$';
376b8730
SM
3242 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3243 sizeof PL_tokenbuf - 1, FALSE);
3244 if (PL_expect == XOPERATOR)
3245 no_op("Scalar", s);
3280af22
NIS
3246 if (!PL_tokenbuf[1]) {
3247 if (s == PL_bufend)
bbce6d69
PP
3248 yyerror("Final $ should be \\$ or $name");
3249 PREREF('$');
8990e307 3250 }
a0d0e21e 3251
bbce6d69 3252 /* This kludge not intended to be bulletproof. */
3280af22 3253 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3254 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3255 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69
PP
3256 yylval.opval->op_private = OPpCONST_ARYBASE;
3257 TERM(THING);
3258 }
3259
ff68c719 3260 d = s;
69d2bceb 3261 tmp = (I32)*s;
3280af22 3262 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3263 s = skipspace(s);
3264
3280af22 3265 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3266 char *t;
3267 if (*s == '[') {
3280af22 3268 PL_tokenbuf[0] = '@';
599cee73 3269 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3270 for(t = s + 1;
7e2040f0 3271 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3272 t++) ;
a0d0e21e 3273 if (*t++ == ',') {
3280af22
NIS
3274 PL_bufptr = skipspace(PL_bufptr);
3275 while (t < PL_bufend && *t != ']')
bbce6d69 3276 t++;
cea2e8a9 3277 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3278 "Multidimensional syntax %.*s not supported",
3279 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3280 }
3281 }
bbce6d69
PP
3282 }
3283 else if (*s == '{') {
3280af22 3284 PL_tokenbuf[0] = '%';
599cee73 3285 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3286 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3287 {
3280af22 3288 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3289 STRLEN len;
3290 for (t++; isSPACE(*t); t++) ;
7e2040f0 3291 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3292 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3293 for (; isSPACE(*t); t++) ;
864dbfa3 3294 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3295 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3296 "You need to quote \"%s\"", tmpbuf);
748a9306 3297 }
93a17b20
LW
3298 }
3299 }
2f3197b3 3300 }
bbce6d69 3301
3280af22 3302 PL_expect = XOPERATOR;
69d2bceb 3303 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3304 bool islop = (PL_last_lop == PL_oldoldbufptr);
3305 if (!islop || PL_last_lop_op == OP_GREPSTART)
3306 PL_expect = XOPERATOR;
bbce6d69 3307 else if (strchr("$@\"'`q", *s))
3280af22 3308 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3309 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3310 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3311 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3312 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3313 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3314 if (tmp = keyword(tmpbuf, len)) {
3315 /* binary operators exclude handle interpretations */
3316 switch (tmp) {
3317 case -KEY_x:
3318 case -KEY_eq:
3319 case -KEY_ne:
3320 case -KEY_gt:
3321 case -KEY_lt:
3322 case -KEY_ge:
3323 case -KEY_le:
3324 case -KEY_cmp:
3325 break;
3326 default:
3280af22 3327 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3328 break;
3329 }
3330 }
68dc0745
PP
3331 else {
3332 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3333 if (gv && GvCVu(gv))
3280af22 3334 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3335 }
93a17b20 3336 }
bbce6d69 3337 else if (isDIGIT(*s))
3280af22 3338 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3339 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3340 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3341 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3342 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3343 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3344 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3345 }
3280af22 3346 PL_pending_ident = '$';
79072805 3347 TOKEN('$');
378cc40b
LW
3348
3349 case '@':
3280af22 3350 if (PL_expect == XOPERATOR)
bbce6d69 3351 no_op("Array", s);
3280af22
NIS
3352 PL_tokenbuf[0] = '@';
3353 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3354 if (!PL_tokenbuf[1]) {
3355 if (s == PL_bufend)
bbce6d69
PP
3356 yyerror("Final @ should be \\@ or @name");
3357 PREREF('@');
3358 }
3280af22 3359 if (PL_lex_state == LEX_NORMAL)
ff68c719 3360 s = skipspace(s);
3280af22 3361 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3362 if (*s == '{')
3280af22 3363 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3364
3365 /* Warn about @ where they meant $. */
599cee73 3366 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3367 if (*s == '[' || *s == '{') {
3368 char *t = s + 1;
7e2040f0 3369 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3370 t++;
3371 if (*t == '}' || *t == ']') {
3372 t++;
3280af22 3373 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3374 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3375 "Scalar value %.*s better written as $%.*s",
3280af22 3376 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3377 }
93a17b20
LW
3378 }
3379 }
463ee0b2 3380 }
3280af22 3381 PL_pending_ident = '@';
79072805 3382 TERM('@');
378cc40b
LW
3383
3384 case '/': /* may either be division or pattern */
3385 case '?': /* may either be conditional or pattern */
3280af22 3386 if (PL_expect != XOPERATOR) {
c277df42 3387 /* Disable warning on "study /blah/" */
3280af22
NIS
3388 if (PL_oldoldbufptr == PL_last_uni
3389 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3390 || memNE(PL_last_uni, "study", 5)
3391 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3392 check_uni();
8782bef2 3393 s = scan_pat(s,OP_MATCH);
79072805 3394 TERM(sublex_start());
378cc40b
LW
3395 }
3396 tmp = *s++;
a687059c 3397 if (tmp == '/')
79072805 3398 Mop(OP_DIVIDE);
378cc40b
LW
3399 OPERATOR(tmp);
3400
3401 case '.':
51882d45
GS
3402 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3403#ifdef PERL_STRICT_CR
3404 && s[1] == '\n'
3405#else
3406 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3407#endif
3408 && (s == PL_linestart || s[-1] == '\n') )
3409 {
3280af22
NIS
3410 PL_lex_formbrack = 0;
3411 PL_expect = XSTATE;
79072805
LW
3412 goto rightbracket;
3413 }
3280af22 3414 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3415 tmp = *s++;
a687059c
LW
3416 if (*s == tmp) {
3417 s++;
2f3197b3
LW
3418 if (*s == tmp) {
3419 s++;
79072805 3420 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3421 }
3422 else
79072805 3423 yylval.ival = 0;
378cc40b 3424 OPERATOR(DOTDOT);
a687059c 3425 }
3280af22 3426 if (PL_expect != XOPERATOR)
2f3197b3 3427 check_uni();
79072805 3428 Aop(OP_CONCAT);
378cc40b
LW
3429 }
3430 /* FALL THROUGH */
3431 case '0': case '1': case '2': case '3': case '4':
3432 case '5': case '6': case '7': case '8': case '9':
79072805 3433 s = scan_num(s);
3280af22 3434 if (PL_expect == XOPERATOR)
8990e307 3435 no_op("Number",s);
79072805
LW
3436 TERM(THING);
3437
3438 case '\'':
09bef843 3439 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3440 if (PL_expect == XOPERATOR) {
3441 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3442 PL_expect = XTERM;
a0d0e21e
LW
3443 depcom();
3444 return ','; /* grandfather non-comma-format format */
3445 }
463ee0b2 3446 else
8990e307 3447 no_op("String",s);
463ee0b2 3448 }
79072805 3449 if (!s)
85e6fe83 3450 missingterm((char*)0);
79072805
LW
3451 yylval.ival = OP_CONST;
3452 TERM(sublex_start());
3453
3454 case '"':
09bef843 3455 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3456 if (PL_expect == XOPERATOR) {
3457 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3458 PL_expect = XTERM;
a0d0e21e
LW
3459 depcom();
3460 return ','; /* grandfather non-comma-format format */
3461 }
463ee0b2 3462 else
8990e307 3463 no_op("String",s);
463ee0b2 3464 }
79072805 3465 if (!s)
85e6fe83 3466 missingterm((char*)0);
4633a7c4 3467 yylval.ival = OP_CONST;
3280af22 3468 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3469 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3470 yylval.ival = OP_STRINGIFY;
3471 break;
3472 }
3473 }
79072805
LW
3474 TERM(sublex_start());
3475
3476 case '`':
09bef843 3477 s = scan_str(s,FALSE,FALSE);
3280af22 3478 if (PL_expect == XOPERATOR)
8990e307 3479 no_op("Backticks",s);
79072805 3480 if (!s)
85e6fe83 3481 missingterm((char*)0);
79072805
LW
3482 yylval.ival = OP_BACKTICK;
3483 set_csh();
3484 TERM(sublex_start());
3485
3486 case '\\':
3487 s++;
599cee73 3488 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3489 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3490 *s, *s);
3280af22 3491 if (PL_expect == XOPERATOR)
8990e307 3492 no_op("Backslash",s);
79072805
LW
3493 OPERATOR(REFGEN);
3494
a7cb1f99 3495 case 'v':
e526c9e6 3496 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
a7cb1f99
GS
3497 char *start = s;
3498 start++;
3499 start++;
dd629d5b 3500 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
3501 start++;
3502 if (*start == '.' && isDIGIT(start[1])) {
3503 s = scan_num(s);
3504 TERM(THING);
3505 }
e526c9e6
GS
3506 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3507 else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF)) {
3508 char c = *start;
3509 GV *gv;
3510 *start = '\0';
3511 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3512 *start = c;
3513 if (!gv) {
3514 s = scan_num(s);
3515 TERM(THING);
3516 }
3517 }
a7cb1f99
GS
3518 }
3519 goto keylookup;
79072805 3520 case 'x':
3280af22 3521 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3522 s++;
3523 Mop(OP_REPEAT);
2f3197b3 3524 }
79072805
LW
3525 goto keylookup;
3526
378cc40b 3527 case '_':
79072805
LW
3528 case 'a': case 'A':
3529 case 'b': case 'B':
3530 case 'c': case 'C':
3531 case 'd': case 'D':
3532 case 'e': case 'E':
3533 case 'f': case 'F':
3534 case 'g': case 'G':
3535 case 'h': case 'H':
3536 case 'i': case 'I':
3537 case 'j': case 'J':
3538 case 'k': case 'K':
3539 case 'l': case 'L':
3540 case 'm': case 'M':
3541 case 'n': case 'N':
3542 case 'o': case 'O':
3543 case 'p': case 'P':
3544 case 'q': case 'Q':
3545 case 'r': case 'R':
3546 case 's': case 'S':
3547 case 't': case 'T':
3548 case 'u': case 'U':
a7cb1f99 3549 case 'V':
79072805
LW
3550 case 'w': case 'W':
3551 case 'X':
3552 case 'y': case 'Y':
3553 case 'z': case 'Z':
3554
49dc05e3 3555 keylookup: {
2d8e6c8d 3556 STRLEN n_a;
161b471a
NIS
3557 gv = Nullgv;
3558 gvp = 0;
49dc05e3 3559
3280af22
NIS
3560 PL_bufptr = s;
3561 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01
PP
3562
3563 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3564 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3565 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3566 (PL_tokenbuf[0] == 'q' &&
3567 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01
PP
3568
3569 /* x::* is just a word, unless x is "CORE" */
3280af22 3570 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3571 goto just_a_word;
3572
3643fb5f 3573 d = s;
3280af22 3574 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3575 d++; /* no comments skipped here, or s### is misparsed */
3576
3577 /* Is this a label? */
3280af22
NIS
3578 if (!tmp && PL_expect == XSTATE
3579 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3580 s = d + 1;
3280af22 3581 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01
PP
3582 CLINE;
3583 TOKEN(LABEL);
3643fb5f
CS
3584 }
3585
3586 /* Check for keywords */
3280af22 3587 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3588
3589 /* Is this a word before a => operator? */
748a9306
LW
3590 if (strnEQ(d,"=>",2)) {
3591 CLINE;
3280af22 3592 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3593 yylval.opval->op_private = OPpCONST_BARE;
3594 TERM(WORD);