This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
allow C<print v10>, $h{v13.10} etc.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
51371543 31
9059aa12
LW
32#define XFAKEBRACK 128
33#define XENUMMASK 127
34
7e2040f0 35/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
36#define UTF (PL_hints & HINT_UTF8)
37
2b92dfce
GS
38/* In variables name $^X, these are the legal values for X.
39 * 1999-02-27 mjd-perl-patch@plover.com */
40#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
41
ffb4593c
NT
42/* LEX_* are values for PL_lex_state, the state of the lexer.
43 * They are arranged oddly so that the guard on the switch statement
79072805
LW
44 * can get by with a single comparison (if the compiler is smart enough).
45 */
46
fb73857a 47/* #define LEX_NOTPARSING 11 is done in perl.h. */
48
55497cff 49#define LEX_NORMAL 10
50#define LEX_INTERPNORMAL 9
51#define LEX_INTERPCASEMOD 8
52#define LEX_INTERPPUSH 7
53#define LEX_INTERPSTART 6
54#define LEX_INTERPEND 5
55#define LEX_INTERPENDMAYBE 4
56#define LEX_INTERPCONCAT 3
57#define LEX_INTERPCONST 2
58#define LEX_FORMLINE 1
59#define LEX_KNOWNEXT 0
79072805 60
a790bc05 61/* XXX If this causes problems, set i_unistd=undef in the hint file. */
62#ifdef I_UNISTD
63# include <unistd.h> /* Needed for execv() */
64#endif
65
66
79072805
LW
67#ifdef ff_next
68#undef ff_next
d48672a2
LW
69#endif
70
a1a0e61e
TD
71#ifdef USE_PURE_BISON
72YYSTYPE* yylval_pointer = NULL;
73int* yychar_pointer = NULL;
22c35a8c
GS
74# undef yylval
75# undef yychar
e4bfbdd4
JH
76# define yylval (*yylval_pointer)
77# define yychar (*yychar_pointer)
78# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
79# undef yylex
80# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
81#endif
82
79072805 83#include "keywords.h"
fe14fcc3 84
ffb4593c
NT
85/* CLINE is a macro that ensures PL_copline has a sane value */
86
ae986130
LW
87#ifdef CLINE
88#undef CLINE
89#endif
57843af0 90#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 91
ffb4593c
NT
92/*
93 * Convenience functions to return different tokens and prime the
9cbb5ea2 94 * lexer for the next token. They all take an argument.
ffb4593c
NT
95 *
96 * TOKEN : generic token (used for '(', DOLSHARP, etc)
97 * OPERATOR : generic operator
98 * AOPERATOR : assignment operator
99 * PREBLOCK : beginning the block after an if, while, foreach, ...
100 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
101 * PREREF : *EXPR where EXPR is not a simple identifier
102 * TERM : expression term
103 * LOOPX : loop exiting command (goto, last, dump, etc)
104 * FTST : file test operator
105 * FUN0 : zero-argument function
2d2e263d 106 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
107 * BOop : bitwise or or xor
108 * BAop : bitwise and
109 * SHop : shift operator
110 * PWop : power operator
9cbb5ea2 111 * PMop : pattern-matching operator
ffb4593c
NT
112 * Aop : addition-level operator
113 * Mop : multiplication-level operator
114 * Eop : equality-testing operator
115 * Rop : relational operator <= != gt
116 *
117 * Also see LOP and lop() below.
118 */
119
3280af22
NIS
120#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
121#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
122#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
123#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
124#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
125#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
126#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
127#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
128#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
129#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
130#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
131#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
132#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
133#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
134#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
135#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
136#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
137#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
138#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
139#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 140
a687059c
LW
141/* This bit of chicanery makes a unary function followed by
142 * a parenthesis into a function with one argument, highest precedence.
143 */
2f3197b3 144#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
145 PL_expect = XTERM, \
146 PL_bufptr = s, \
147 PL_last_uni = PL_oldbufptr, \
148 PL_last_lop_op = f, \
a687059c
LW
149 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
150
79072805 151#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
152 PL_bufptr = s, \
153 PL_last_uni = PL_oldbufptr, \
79072805
LW
154 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
155
9f68db38 156/* grandfather return to old style */
3280af22 157#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 158
ffb4593c
NT
159/*
160 * S_ao
161 *
162 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
163 * into an OP_ANDASSIGN or OP_ORASSIGN
164 */
165
76e3520e 166STATIC int
cea2e8a9 167S_ao(pTHX_ int toketype)
a0d0e21e 168{
3280af22
NIS
169 if (*PL_bufptr == '=') {
170 PL_bufptr++;
a0d0e21e
LW
171 if (toketype == ANDAND)
172 yylval.ival = OP_ANDASSIGN;
173 else if (toketype == OROR)
174 yylval.ival = OP_ORASSIGN;
175 toketype = ASSIGNOP;
176 }
177 return toketype;
178}
179
ffb4593c
NT
180/*
181 * S_no_op
182 * When Perl expects an operator and finds something else, no_op
183 * prints the warning. It always prints "<something> found where
184 * operator expected. It prints "Missing semicolon on previous line?"
185 * if the surprise occurs at the start of the line. "do you need to
186 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
187 * where the compiler doesn't know if foo is a method call or a function.
188 * It prints "Missing operator before end of line" if there's nothing
189 * after the missing operator, or "... before <...>" if there is something
190 * after the missing operator.
191 */
192
76e3520e 193STATIC void
cea2e8a9 194S_no_op(pTHX_ char *what, char *s)
463ee0b2 195{
3280af22
NIS
196 char *oldbp = PL_bufptr;
197 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 198
1189a94a
GS
199 if (!s)
200 s = oldbp;
201 else {
202 assert(s >= oldbp);
203 PL_bufptr = s;
204 }
cea2e8a9 205 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 206 if (is_first)
cea2e8a9 207 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 208 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 209 char *t;
7e2040f0 210 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 211 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 212 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 213 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
214 }
215 else
cea2e8a9 216 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 217 PL_bufptr = oldbp;
8990e307
LW
218}
219
ffb4593c
NT
220/*
221 * S_missingterm
222 * Complain about missing quote/regexp/heredoc terminator.
223 * If it's called with (char *)NULL then it cauterizes the line buffer.
224 * If we're in a delimited string and the delimiter is a control
225 * character, it's reformatted into a two-char sequence like ^C.
226 * This is fatal.
227 */
228
76e3520e 229STATIC void
cea2e8a9 230S_missingterm(pTHX_ char *s)
8990e307
LW
231{
232 char tmpbuf[3];
233 char q;
234 if (s) {
235 char *nl = strrchr(s,'\n');
d2719217 236 if (nl)
8990e307
LW
237 *nl = '\0';
238 }
9d116dd7
JH
239 else if (
240#ifdef EBCDIC
241 iscntrl(PL_multi_close)
242#else
243 PL_multi_close < 32 || PL_multi_close == 127
244#endif
245 ) {
8990e307 246 *tmpbuf = '^';
3280af22 247 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
248 s = "\\n";
249 tmpbuf[2] = '\0';
250 s = tmpbuf;
251 }
252 else {
3280af22 253 *tmpbuf = PL_multi_close;
8990e307
LW
254 tmpbuf[1] = '\0';
255 s = tmpbuf;
256 }
257 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 258 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 259}
79072805 260
ffb4593c
NT
261/*
262 * Perl_deprecate
ffb4593c
NT
263 */
264
79072805 265void
864dbfa3 266Perl_deprecate(pTHX_ char *s)
a0d0e21e 267{
d008e5eb 268 dTHR;
599cee73 269 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 270 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
271}
272
ffb4593c
NT
273/*
274 * depcom
9cbb5ea2 275 * Deprecate a comma-less variable list.
ffb4593c
NT
276 */
277
76e3520e 278STATIC void
cea2e8a9 279S_depcom(pTHX)
a0d0e21e
LW
280{
281 deprecate("comma-less variable list");
282}
283
ffb4593c 284/*
9cbb5ea2
GS
285 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
286 * utf16-to-utf8-reversed.
ffb4593c
NT
287 */
288
c39cd008
GS
289#ifdef PERL_CR_FILTER
290static void
291strip_return(SV *sv)
292{
293 register char *s = SvPVX(sv);
294 register char *e = s + SvCUR(sv);
295 /* outer loop optimized to do nothing if there are no CR-LFs */
296 while (s < e) {
297 if (*s++ == '\r' && *s == '\n') {
298 /* hit a CR-LF, need to copy the rest */
299 register char *d = s - 1;
300 *d++ = *s++;
301 while (s < e) {
302 if (*s == '\r' && s[1] == '\n')
303 s++;
304 *d++ = *s++;
305 }
306 SvCUR(sv) -= s - d;
307 return;
308 }
309 }
310}
a868473f 311
76e3520e 312STATIC I32
c39cd008 313S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 314{
c39cd008
GS
315 I32 count = FILTER_READ(idx+1, sv, maxlen);
316 if (count > 0 && !maxlen)
317 strip_return(sv);
318 return count;
a868473f
NIS
319}
320#endif
321
a0ed51b3 322STATIC I32
cea2e8a9 323S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
324{
325 I32 count = FILTER_READ(idx+1, sv, maxlen);
326 if (count) {
dfe13c55
GS
327 U8* tmps;
328 U8* tend;
329 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 330 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 331 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
332
333 }
334 return count;
335}
336
337STATIC I32
cea2e8a9 338S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
339{
340 I32 count = FILTER_READ(idx+1, sv, maxlen);
341 if (count) {
dfe13c55
GS
342 U8* tmps;
343 U8* tend;
344 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 345 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 346 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
347
348 }
349 return count;
350}
a868473f 351
ffb4593c
NT
352/*
353 * Perl_lex_start
9cbb5ea2
GS
354 * Initialize variables. Uses the Perl save_stack to save its state (for
355 * recursive calls to the parser).
ffb4593c
NT
356 */
357
a0d0e21e 358void
864dbfa3 359Perl_lex_start(pTHX_ SV *line)
79072805 360{
0f15f207 361 dTHR;
8990e307
LW
362 char *s;
363 STRLEN len;
364
3280af22
NIS
365 SAVEI32(PL_lex_dojoin);
366 SAVEI32(PL_lex_brackets);
3280af22
NIS
367 SAVEI32(PL_lex_casemods);
368 SAVEI32(PL_lex_starts);
369 SAVEI32(PL_lex_state);
7766f137 370 SAVEVPTR(PL_lex_inpat);
3280af22 371 SAVEI32(PL_lex_inwhat);
18b09519
GS
372 if (PL_lex_state == LEX_KNOWNEXT) {
373 I32 toke = PL_nexttoke;
374 while (--toke >= 0) {
375 SAVEI32(PL_nexttype[toke]);
376 SAVEVPTR(PL_nextval[toke]);
377 }
378 SAVEI32(PL_nexttoke);
379 PL_nexttoke = 0;
380 }
57843af0 381 SAVECOPLINE(PL_curcop);
3280af22
NIS
382 SAVEPPTR(PL_bufptr);
383 SAVEPPTR(PL_bufend);
384 SAVEPPTR(PL_oldbufptr);
385 SAVEPPTR(PL_oldoldbufptr);
386 SAVEPPTR(PL_linestart);
387 SAVESPTR(PL_linestr);
388 SAVEPPTR(PL_lex_brackstack);
389 SAVEPPTR(PL_lex_casestack);
c76ac1ee 390 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
391 SAVESPTR(PL_lex_stuff);
392 SAVEI32(PL_lex_defer);
09bef843 393 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 394 SAVESPTR(PL_lex_repl);
bebdddfc
GS
395 SAVEINT(PL_expect);
396 SAVEINT(PL_lex_expect);
3280af22
NIS
397
398 PL_lex_state = LEX_NORMAL;
399 PL_lex_defer = 0;
400 PL_expect = XSTATE;
401 PL_lex_brackets = 0;
3280af22
NIS
402 New(899, PL_lex_brackstack, 120, char);
403 New(899, PL_lex_casestack, 12, char);
404 SAVEFREEPV(PL_lex_brackstack);
405 SAVEFREEPV(PL_lex_casestack);
406 PL_lex_casemods = 0;
407 *PL_lex_casestack = '\0';
408 PL_lex_dojoin = 0;
409 PL_lex_starts = 0;
410 PL_lex_stuff = Nullsv;
411 PL_lex_repl = Nullsv;
412 PL_lex_inpat = 0;
413 PL_lex_inwhat = 0;
09bef843 414 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
415 PL_linestr = line;
416 if (SvREADONLY(PL_linestr))
417 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
418 s = SvPV(PL_linestr, len);
8990e307 419 if (len && s[len-1] != ';') {
3280af22
NIS
420 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
421 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
422 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 423 }
3280af22
NIS
424 SvTEMP_off(PL_linestr);
425 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
426 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
427 SvREFCNT_dec(PL_rs);
79cb57f6 428 PL_rs = newSVpvn("\n", 1);
3280af22 429 PL_rsfp = 0;
79072805 430}
a687059c 431
ffb4593c
NT
432/*
433 * Perl_lex_end
9cbb5ea2
GS
434 * Finalizer for lexing operations. Must be called when the parser is
435 * done with the lexer.
ffb4593c
NT
436 */
437
463ee0b2 438void
864dbfa3 439Perl_lex_end(pTHX)
463ee0b2 440{
3280af22 441 PL_doextract = FALSE;
463ee0b2
LW
442}
443
ffb4593c
NT
444/*
445 * S_incline
446 * This subroutine has nothing to do with tilting, whether at windmills
447 * or pinball tables. Its name is short for "increment line". It
57843af0 448 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 449 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
450 * # line 500 "foo.pm"
451 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
452 */
453
76e3520e 454STATIC void
cea2e8a9 455S_incline(pTHX_ char *s)
463ee0b2 456{
0f15f207 457 dTHR;
463ee0b2
LW
458 char *t;
459 char *n;
73659bf1 460 char *e;
463ee0b2 461 char ch;
463ee0b2 462
57843af0 463 CopLINE_inc(PL_curcop);
463ee0b2
LW
464 if (*s++ != '#')
465 return;
466 while (*s == ' ' || *s == '\t') s++;
73659bf1
GS
467 if (strnEQ(s, "line", 4))
468 s += 4;
469 else
470 return;
471 if (*s == ' ' || *s == '\t')
472 s++;
473 else
474 return;
475 while (*s == ' ' || *s == '\t') s++;
463ee0b2
LW
476 if (!isDIGIT(*s))
477 return;
478 n = s;
479 while (isDIGIT(*s))
480 s++;
481 while (*s == ' ' || *s == '\t')
482 s++;
73659bf1 483 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 484 s++;
73659bf1
GS
485 e = t + 1;
486 }
463ee0b2 487 else {
463ee0b2 488 for (t = s; !isSPACE(*t); t++) ;
73659bf1 489 e = t;
463ee0b2 490 }
73659bf1
GS
491 while (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\f')
492 e++;
493 if (*e != '\n' && *e != '\0')
494 return; /* false alarm */
495
463ee0b2
LW
496 ch = *t;
497 *t = '\0';
498 if (t - s > 0)
57843af0 499 CopFILE_set(PL_curcop, s);
463ee0b2 500 *t = ch;
57843af0 501 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
502}
503
ffb4593c
NT
504/*
505 * S_skipspace
506 * Called to gobble the appropriate amount and type of whitespace.
507 * Skips comments as well.
508 */
509
76e3520e 510STATIC char *
cea2e8a9 511S_skipspace(pTHX_ register char *s)
a687059c 512{
11343788 513 dTHR;
3280af22
NIS
514 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
515 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
516 s++;
517 return s;
518 }
519 for (;;) {
fd049845 520 STRLEN prevlen;
09bef843
SB
521 SSize_t oldprevlen, oldoldprevlen;
522 SSize_t oldloplen, oldunilen;
60e6418e
GS
523 while (s < PL_bufend && isSPACE(*s)) {
524 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
525 incline(s);
526 }
ffb4593c
NT
527
528 /* comment */
3280af22
NIS
529 if (s < PL_bufend && *s == '#') {
530 while (s < PL_bufend && *s != '\n')
463ee0b2 531 s++;
60e6418e 532 if (s < PL_bufend) {
463ee0b2 533 s++;
60e6418e
GS
534 if (PL_in_eval && !PL_rsfp) {
535 incline(s);
536 continue;
537 }
538 }
463ee0b2 539 }
ffb4593c
NT
540
541 /* only continue to recharge the buffer if we're at the end
542 * of the buffer, we're not reading from a source filter, and
543 * we're in normal lexing mode
544 */
09bef843
SB
545 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
546 PL_lex_state == LEX_FORMLINE)
463ee0b2 547 return s;
ffb4593c
NT
548
549 /* try to recharge the buffer */
9cbb5ea2
GS
550 if ((s = filter_gets(PL_linestr, PL_rsfp,
551 (prevlen = SvCUR(PL_linestr)))) == Nullch)
552 {
553 /* end of file. Add on the -p or -n magic */
3280af22
NIS
554 if (PL_minus_n || PL_minus_p) {
555 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
556 ";}continue{print or die qq(-p destination: $!\\n)" :
557 "");
3280af22
NIS
558 sv_catpv(PL_linestr,";}");
559 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
560 }
561 else
3280af22 562 sv_setpv(PL_linestr,";");
ffb4593c
NT
563
564 /* reset variables for next time we lex */
9cbb5ea2
GS
565 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
566 = SvPVX(PL_linestr);
3280af22 567 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
568
569 /* Close the filehandle. Could be from -P preprocessor,
570 * STDIN, or a regular file. If we were reading code from
571 * STDIN (because the commandline held no -e or filename)
572 * then we don't close it, we reset it so the code can
573 * read from STDIN too.
574 */
575
3280af22
NIS
576 if (PL_preprocess && !PL_in_eval)
577 (void)PerlProc_pclose(PL_rsfp);
578 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
579 PerlIO_clearerr(PL_rsfp);
8990e307 580 else
3280af22
NIS
581 (void)PerlIO_close(PL_rsfp);
582 PL_rsfp = Nullfp;
463ee0b2
LW
583 return s;
584 }
ffb4593c
NT
585
586 /* not at end of file, so we only read another line */
09bef843
SB
587 /* make corresponding updates to old pointers, for yyerror() */
588 oldprevlen = PL_oldbufptr - PL_bufend;
589 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
590 if (PL_last_uni)
591 oldunilen = PL_last_uni - PL_bufend;
592 if (PL_last_lop)
593 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
594 PL_linestart = PL_bufptr = s + prevlen;
595 PL_bufend = s + SvCUR(PL_linestr);
596 s = PL_bufptr;
09bef843
SB
597 PL_oldbufptr = s + oldprevlen;
598 PL_oldoldbufptr = s + oldoldprevlen;
599 if (PL_last_uni)
600 PL_last_uni = s + oldunilen;
601 if (PL_last_lop)
602 PL_last_lop = s + oldloplen;
a0d0e21e 603 incline(s);
ffb4593c
NT
604
605 /* debugger active and we're not compiling the debugger code,
606 * so store the line into the debugger's array of lines
607 */
3280af22 608 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
609 SV *sv = NEWSV(85,0);
610
611 sv_upgrade(sv, SVt_PVMG);
3280af22 612 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 613 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 614 }
463ee0b2 615 }
a687059c 616}
378cc40b 617
ffb4593c
NT
618/*
619 * S_check_uni
620 * Check the unary operators to ensure there's no ambiguity in how they're
621 * used. An ambiguous piece of code would be:
622 * rand + 5
623 * This doesn't mean rand() + 5. Because rand() is a unary operator,
624 * the +5 is its argument.
625 */
626
76e3520e 627STATIC void
cea2e8a9 628S_check_uni(pTHX)
ba106d47 629{
2f3197b3 630 char *s;
a0d0e21e 631 char *t;
0453d815 632 dTHR;
2f3197b3 633
3280af22 634 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 635 return;
3280af22
NIS
636 while (isSPACE(*PL_last_uni))
637 PL_last_uni++;
7e2040f0 638 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 639 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 640 return;
0453d815 641 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 642 char ch = *s;
0453d815
PM
643 *s = '\0';
644 Perl_warner(aTHX_ WARN_AMBIGUOUS,
645 "Warning: Use of \"%s\" without parens is ambiguous",
646 PL_last_uni);
647 *s = ch;
648 }
2f3197b3
LW
649}
650
ffb4593c
NT
651/* workaround to replace the UNI() macro with a function. Only the
652 * hints/uts.sh file mentions this. Other comments elsewhere in the
653 * source indicate Microport Unix might need it too.
654 */
655
ffed7fef
LW
656#ifdef CRIPPLED_CC
657
658#undef UNI
ffed7fef 659#define UNI(f) return uni(f,s)
ffed7fef 660
76e3520e 661STATIC int
cea2e8a9 662S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
663{
664 yylval.ival = f;
3280af22
NIS
665 PL_expect = XTERM;
666 PL_bufptr = s;
8f872242
NIS
667 PL_last_uni = PL_oldbufptr;
668 PL_last_lop_op = f;
ffed7fef
LW
669 if (*s == '(')
670 return FUNC1;
671 s = skipspace(s);
672 if (*s == '(')
673 return FUNC1;
674 else
675 return UNIOP;
676}
677
a0d0e21e
LW
678#endif /* CRIPPLED_CC */
679
ffb4593c
NT
680/*
681 * LOP : macro to build a list operator. Its behaviour has been replaced
682 * with a subroutine, S_lop() for which LOP is just another name.
683 */
684
a0d0e21e
LW
685#define LOP(f,x) return lop(f,x,s)
686
ffb4593c
NT
687/*
688 * S_lop
689 * Build a list operator (or something that might be one). The rules:
690 * - if we have a next token, then it's a list operator [why?]
691 * - if the next thing is an opening paren, then it's a function
692 * - else it's a list operator
693 */
694
76e3520e 695STATIC I32
a0be28da 696S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 697{
0f15f207 698 dTHR;
79072805 699 yylval.ival = f;
35c8bce7 700 CLINE;
3280af22
NIS
701 PL_expect = x;
702 PL_bufptr = s;
703 PL_last_lop = PL_oldbufptr;
704 PL_last_lop_op = f;
705 if (PL_nexttoke)
a0d0e21e 706 return LSTOP;
79072805
LW
707 if (*s == '(')
708 return FUNC;
709 s = skipspace(s);
710 if (*s == '(')
711 return FUNC;
712 else
713 return LSTOP;
714}
715
ffb4593c
NT
716/*
717 * S_force_next
9cbb5ea2 718 * When the lexer realizes it knows the next token (for instance,
ffb4593c 719 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
720 * to know what token to return the next time the lexer is called. Caller
721 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
722 * handles the token correctly.
ffb4593c
NT
723 */
724
76e3520e 725STATIC void
cea2e8a9 726S_force_next(pTHX_ I32 type)
79072805 727{
3280af22
NIS
728 PL_nexttype[PL_nexttoke] = type;
729 PL_nexttoke++;
730 if (PL_lex_state != LEX_KNOWNEXT) {
731 PL_lex_defer = PL_lex_state;
732 PL_lex_expect = PL_expect;
733 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
734 }
735}
736
ffb4593c
NT
737/*
738 * S_force_word
739 * When the lexer knows the next thing is a word (for instance, it has
740 * just seen -> and it knows that the next char is a word char, then
741 * it calls S_force_word to stick the next word into the PL_next lookahead.
742 *
743 * Arguments:
b1b65b59 744 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
745 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
746 * int check_keyword : if true, Perl checks to make sure the word isn't
747 * a keyword (do this if the word is a label, e.g. goto FOO)
748 * int allow_pack : if true, : characters will also be allowed (require,
749 * use, etc. do this)
9cbb5ea2 750 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
751 */
752
76e3520e 753STATIC char *
cea2e8a9 754S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 755{
463ee0b2
LW
756 register char *s;
757 STRLEN len;
758
759 start = skipspace(start);
760 s = start;
7e2040f0 761 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 762 (allow_pack && *s == ':') ||
15f0808c 763 (allow_initial_tick && *s == '\'') )
a0d0e21e 764 {
3280af22
NIS
765 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
766 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
767 return start;
768 if (token == METHOD) {
769 s = skipspace(s);
770 if (*s == '(')
3280af22 771 PL_expect = XTERM;
463ee0b2 772 else {
3280af22 773 PL_expect = XOPERATOR;
463ee0b2 774 }
79072805 775 }
3280af22
NIS
776 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
777 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
778 force_next(token);
779 }
780 return s;
781}
782
ffb4593c
NT
783/*
784 * S_force_ident
9cbb5ea2 785 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
786 * text only contains the "foo" portion. The first argument is a pointer
787 * to the "foo", and the second argument is the type symbol to prefix.
788 * Forces the next token to be a "WORD".
9cbb5ea2 789 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
790 */
791
76e3520e 792STATIC void
cea2e8a9 793S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
794{
795 if (s && *s) {
11343788 796 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 797 PL_nextval[PL_nexttoke].opval = o;
79072805 798 force_next(WORD);
748a9306 799 if (kind) {
e858de61 800 dTHR; /* just for in_eval */
11343788 801 o->op_private = OPpCONST_ENTERED;
55497cff 802 /* XXX see note in pp_entereval() for why we forgo typo
803 warnings if the symbol must be introduced in an eval.
804 GSAR 96-10-12 */
3280af22 805 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
806 kind == '$' ? SVt_PV :
807 kind == '@' ? SVt_PVAV :
808 kind == '%' ? SVt_PVHV :
809 SVt_PVGV
810 );
748a9306 811 }
79072805
LW
812 }
813}
814
ffb4593c
NT
815/*
816 * S_force_version
817 * Forces the next token to be a version number.
818 */
819
76e3520e 820STATIC char *
cea2e8a9 821S_force_version(pTHX_ char *s)
89bfa8cd 822{
823 OP *version = Nullop;
44dcb63b
GS
824 bool is_vstr = FALSE;
825 char *d;
89bfa8cd 826
827 s = skipspace(s);
828
44dcb63b
GS
829 d = s;
830 if (*d == 'v') {
831 is_vstr = TRUE;
832 d++;
833 }
834 if (isDIGIT(*d)) {
a7cb1f99 835 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 836 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
89bfa8cd 837 s = scan_num(s);
838 /* real VERSION number -- GBARR */
839 version = yylval.opval;
44dcb63b
GS
840 if (is_vstr) {
841 SV *ver = cSVOPx(version)->op_sv;
842 SvUPGRADE(ver, SVt_PVIV);
843 SvIOKp_on(ver); /* hint that it is a version */
844 }
89bfa8cd 845 }
846 }
847
848 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 849 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd 850 force_next(WORD);
851
852 return (s);
853}
854
ffb4593c
NT
855/*
856 * S_tokeq
857 * Tokenize a quoted string passed in as an SV. It finds the next
858 * chunk, up to end of string or a backslash. It may make a new
859 * SV containing that chunk (if HINT_NEW_STRING is on). It also
860 * turns \\ into \.
861 */
862
76e3520e 863STATIC SV *
cea2e8a9 864S_tokeq(pTHX_ SV *sv)
79072805
LW
865{
866 register char *s;
867 register char *send;
868 register char *d;
b3ac6de7
IZ
869 STRLEN len = 0;
870 SV *pv = sv;
79072805
LW
871
872 if (!SvLEN(sv))
b3ac6de7 873 goto finish;
79072805 874
a0d0e21e 875 s = SvPV_force(sv, len);
748a9306 876 if (SvIVX(sv) == -1)
b3ac6de7 877 goto finish;
463ee0b2 878 send = s + len;
79072805
LW
879 while (s < send && *s != '\\')
880 s++;
881 if (s == send)
b3ac6de7 882 goto finish;
79072805 883 d = s;
3280af22 884 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 885 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
886 while (s < send) {
887 if (*s == '\\') {
a0d0e21e 888 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
889 s++; /* all that, just for this */
890 }
891 *d++ = *s++;
892 }
893 *d = '\0';
463ee0b2 894 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 895 finish:
3280af22 896 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 897 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
898 return sv;
899}
900
ffb4593c
NT
901/*
902 * Now come three functions related to double-quote context,
903 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
904 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
905 * interact with PL_lex_state, and create fake ( ... ) argument lists
906 * to handle functions and concatenation.
907 * They assume that whoever calls them will be setting up a fake
908 * join call, because each subthing puts a ',' after it. This lets
909 * "lower \luPpEr"
910 * become
911 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
912 *
913 * (I'm not sure whether the spurious commas at the end of lcfirst's
914 * arguments and join's arguments are created or not).
915 */
916
917/*
918 * S_sublex_start
919 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
920 *
921 * Pattern matching will set PL_lex_op to the pattern-matching op to
922 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
923 *
924 * OP_CONST and OP_READLINE are easy--just make the new op and return.
925 *
926 * Everything else becomes a FUNC.
927 *
928 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
929 * had an OP_CONST or OP_READLINE). This just sets us up for a
930 * call to S_sublex_push().
931 */
932
76e3520e 933STATIC I32
cea2e8a9 934S_sublex_start(pTHX)
79072805
LW
935{
936 register I32 op_type = yylval.ival;
79072805
LW
937
938 if (op_type == OP_NULL) {
3280af22
NIS
939 yylval.opval = PL_lex_op;
940 PL_lex_op = Nullop;
79072805
LW
941 return THING;
942 }
943 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 944 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
945
946 if (SvTYPE(sv) == SVt_PVIV) {
947 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
948 STRLEN len;
949 char *p;
950 SV *nsv;
951
952 p = SvPV(sv, len);
79cb57f6 953 nsv = newSVpvn(p, len);
b3ac6de7
IZ
954 SvREFCNT_dec(sv);
955 sv = nsv;
956 }
957 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 958 PL_lex_stuff = Nullsv;
79072805
LW
959 return THING;
960 }
961
3280af22
NIS
962 PL_sublex_info.super_state = PL_lex_state;
963 PL_sublex_info.sub_inwhat = op_type;
964 PL_sublex_info.sub_op = PL_lex_op;
965 PL_lex_state = LEX_INTERPPUSH;
55497cff 966
3280af22
NIS
967 PL_expect = XTERM;
968 if (PL_lex_op) {
969 yylval.opval = PL_lex_op;
970 PL_lex_op = Nullop;
55497cff 971 return PMFUNC;
972 }
973 else
974 return FUNC;
975}
976
ffb4593c
NT
977/*
978 * S_sublex_push
979 * Create a new scope to save the lexing state. The scope will be
980 * ended in S_sublex_done. Returns a '(', starting the function arguments
981 * to the uc, lc, etc. found before.
982 * Sets PL_lex_state to LEX_INTERPCONCAT.
983 */
984
76e3520e 985STATIC I32
cea2e8a9 986S_sublex_push(pTHX)
55497cff 987{
0f15f207 988 dTHR;
f46d017c 989 ENTER;
55497cff 990
3280af22
NIS
991 PL_lex_state = PL_sublex_info.super_state;
992 SAVEI32(PL_lex_dojoin);
993 SAVEI32(PL_lex_brackets);
3280af22
NIS
994 SAVEI32(PL_lex_casemods);
995 SAVEI32(PL_lex_starts);
996 SAVEI32(PL_lex_state);
7766f137 997 SAVEVPTR(PL_lex_inpat);
3280af22 998 SAVEI32(PL_lex_inwhat);
57843af0 999 SAVECOPLINE(PL_curcop);
3280af22
NIS
1000 SAVEPPTR(PL_bufptr);
1001 SAVEPPTR(PL_oldbufptr);
1002 SAVEPPTR(PL_oldoldbufptr);
1003 SAVEPPTR(PL_linestart);
1004 SAVESPTR(PL_linestr);
1005 SAVEPPTR(PL_lex_brackstack);
1006 SAVEPPTR(PL_lex_casestack);
1007
1008 PL_linestr = PL_lex_stuff;
1009 PL_lex_stuff = Nullsv;
1010
9cbb5ea2
GS
1011 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1012 = SvPVX(PL_linestr);
3280af22
NIS
1013 PL_bufend += SvCUR(PL_linestr);
1014 SAVEFREESV(PL_linestr);
1015
1016 PL_lex_dojoin = FALSE;
1017 PL_lex_brackets = 0;
3280af22
NIS
1018 New(899, PL_lex_brackstack, 120, char);
1019 New(899, PL_lex_casestack, 12, char);
1020 SAVEFREEPV(PL_lex_brackstack);
1021 SAVEFREEPV(PL_lex_casestack);
1022 PL_lex_casemods = 0;
1023 *PL_lex_casestack = '\0';
1024 PL_lex_starts = 0;
1025 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1026 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1027
1028 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1029 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1030 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1031 else
3280af22 1032 PL_lex_inpat = Nullop;
79072805 1033
55497cff 1034 return '(';
79072805
LW
1035}
1036
ffb4593c
NT
1037/*
1038 * S_sublex_done
1039 * Restores lexer state after a S_sublex_push.
1040 */
1041
76e3520e 1042STATIC I32
cea2e8a9 1043S_sublex_done(pTHX)
79072805 1044{
3280af22
NIS
1045 if (!PL_lex_starts++) {
1046 PL_expect = XOPERATOR;
79cb57f6 1047 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1048 return THING;
1049 }
1050
3280af22
NIS
1051 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1052 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1053 return yylex();
79072805
LW
1054 }
1055
ffb4593c 1056 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1057 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1058 PL_linestr = PL_lex_repl;
1059 PL_lex_inpat = 0;
1060 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1061 PL_bufend += SvCUR(PL_linestr);
1062 SAVEFREESV(PL_linestr);
1063 PL_lex_dojoin = FALSE;
1064 PL_lex_brackets = 0;
3280af22
NIS
1065 PL_lex_casemods = 0;
1066 *PL_lex_casestack = '\0';
1067 PL_lex_starts = 0;
25da4f38 1068 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1069 PL_lex_state = LEX_INTERPNORMAL;
1070 PL_lex_starts++;
e9fa98b2
HS
1071 /* we don't clear PL_lex_repl here, so that we can check later
1072 whether this is an evalled subst; that means we rely on the
1073 logic to ensure sublex_done() is called again only via the
1074 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1075 }
e9fa98b2 1076 else {
3280af22 1077 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1078 PL_lex_repl = Nullsv;
1079 }
79072805 1080 return ',';
ffed7fef
LW
1081 }
1082 else {
f46d017c 1083 LEAVE;
3280af22
NIS
1084 PL_bufend = SvPVX(PL_linestr);
1085 PL_bufend += SvCUR(PL_linestr);
1086 PL_expect = XOPERATOR;
09bef843 1087 PL_sublex_info.sub_inwhat = 0;
79072805 1088 return ')';
ffed7fef
LW
1089 }
1090}
1091
02aa26ce
NT
1092/*
1093 scan_const
1094
1095 Extracts a pattern, double-quoted string, or transliteration. This
1096 is terrifying code.
1097
3280af22
NIS
1098 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1099 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1100 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1101
9b599b2a
GS
1102 Returns a pointer to the character scanned up to. Iff this is
1103 advanced from the start pointer supplied (ie if anything was
1104 successfully parsed), will leave an OP for the substring scanned
1105 in yylval. Caller must intuit reason for not parsing further
1106 by looking at the next characters herself.
1107
02aa26ce
NT
1108 In patterns:
1109 backslashes:
1110 double-quoted style: \r and \n
1111 regexp special ones: \D \s
1112 constants: \x3
1113 backrefs: \1 (deprecated in substitution replacements)
1114 case and quoting: \U \Q \E
1115 stops on @ and $, but not for $ as tail anchor
1116
1117 In transliterations:
1118 characters are VERY literal, except for - not at the start or end
1119 of the string, which indicates a range. scan_const expands the
1120 range to the full set of intermediate characters.
1121
1122 In double-quoted strings:
1123 backslashes:
1124 double-quoted style: \r and \n
1125 constants: \x3
1126 backrefs: \1 (deprecated)
1127 case and quoting: \U \Q \E
1128 stops on @ and $
1129
1130 scan_const does *not* construct ops to handle interpolated strings.
1131 It stops processing as soon as it finds an embedded $ or @ variable
1132 and leaves it to the caller to work out what's going on.
1133
1134 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1135
1136 $ in pattern could be $foo or could be tail anchor. Assumption:
1137 it's a tail anchor if $ is the last thing in the string, or if it's
1138 followed by one of ")| \n\t"
1139
1140 \1 (backreferences) are turned into $1
1141
1142 The structure of the code is
1143 while (there's a character to process) {
1144 handle transliteration ranges
1145 skip regexp comments
1146 skip # initiated comments in //x patterns
1147 check for embedded @foo
1148 check for embedded scalars
1149 if (backslash) {
1150 leave intact backslashes from leave (below)
1151 deprecate \1 in strings and sub replacements
1152 handle string-changing backslashes \l \U \Q \E, etc.
1153 switch (what was escaped) {
1154 handle - in a transliteration (becomes a literal -)
1155 handle \132 octal characters
1156 handle 0x15 hex characters
1157 handle \cV (control V)
1158 handle printf backslashes (\f, \r, \n, etc)
1159 } (end switch)
1160 } (end if backslash)
1161 } (end while character to read)
1162
1163*/
1164
76e3520e 1165STATIC char *
cea2e8a9 1166S_scan_const(pTHX_ char *start)
79072805 1167{
3280af22 1168 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1169 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1170 register char *s = start; /* start of the constant */
1171 register char *d = SvPVX(sv); /* destination for copies */
1172 bool dorange = FALSE; /* are we in a translit range? */
7e2040f0 1173 bool has_utf = FALSE; /* embedded \x{} */
02aa26ce 1174 I32 len; /* ? */
ac2262e3 1175 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1176 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1177 : UTF;
ac2262e3 1178 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1179 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1180 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1181 : UTF;
dff6d3cd 1182 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1183 PL_lex_inpat
4a2d328f 1184 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1185 : "";
79072805
LW
1186
1187 while (s < send || dorange) {
02aa26ce 1188 /* get transliterations out of the way (they're most literal) */
3280af22 1189 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1190 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1191 if (dorange) {
02aa26ce 1192 I32 i; /* current expanded character */
8ada0baa 1193 I32 min; /* first character in range */
02aa26ce
NT
1194 I32 max; /* last character in range */
1195
1196 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1197 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1198 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1199 d -= 2; /* eat the first char and the - */
1200
8ada0baa
JH
1201 min = (U8)*d; /* first char in range */
1202 max = (U8)d[1]; /* last char in range */
1203
1204#ifndef ASCIIish
1205 if ((isLOWER(min) && isLOWER(max)) ||
1206 (isUPPER(min) && isUPPER(max))) {
1207 if (isLOWER(min)) {
1208 for (i = min; i <= max; i++)
1209 if (isLOWER(i))
1210 *d++ = i;
1211 } else {
1212 for (i = min; i <= max; i++)
1213 if (isUPPER(i))
1214 *d++ = i;
1215 }
1216 }
1217 else
1218#endif
1219 for (i = min; i <= max; i++)
1220 *d++ = i;
02aa26ce
NT
1221
1222 /* mark the range as done, and continue */
79072805
LW
1223 dorange = FALSE;
1224 continue;
1225 }
02aa26ce
NT
1226
1227 /* range begins (ignore - as first or last char) */
79072805 1228 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1229 if (utf) {
a176fa2a 1230 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1231 s++;
1232 continue;
1233 }
79072805
LW
1234 dorange = TRUE;
1235 s++;
1236 }
1237 }
02aa26ce
NT
1238
1239 /* if we get here, we're not doing a transliteration */
1240
0f5d15d6
IZ
1241 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1242 except for the last char, which will be done separately. */
3280af22 1243 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1244 if (s[2] == '#') {
1245 while (s < send && *s != ')')
1246 *d++ = *s++;
0f5d15d6
IZ
1247 } else if (s[2] == '{'
1248 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1249 I32 count = 1;
0f5d15d6 1250 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1251 char c;
1252
d9f97599
GS
1253 while (count && (c = *regparse)) {
1254 if (c == '\\' && regparse[1])
1255 regparse++;
cc6b7395
IZ
1256 else if (c == '{')
1257 count++;
1258 else if (c == '}')
1259 count--;
d9f97599 1260 regparse++;
cc6b7395 1261 }
5bdf89e7
IZ
1262 if (*regparse != ')') {
1263 regparse--; /* Leave one char for continuation. */
cc6b7395 1264 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1265 }
0f5d15d6 1266 while (s < regparse)
cc6b7395
IZ
1267 *d++ = *s++;
1268 }
748a9306 1269 }
02aa26ce
NT
1270
1271 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1272 else if (*s == '#' && PL_lex_inpat &&
1273 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1274 while (s+1 < send && *s != '\n')
1275 *d++ = *s++;
1276 }
02aa26ce
NT
1277
1278 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
7e2040f0
GS
1279 else if (*s == '@' && s[1]
1280 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$", s[1])))
79072805 1281 break;
02aa26ce
NT
1282
1283 /* check for embedded scalars. only stop if we're sure it's a
1284 variable.
1285 */
79072805 1286 else if (*s == '$') {
3280af22 1287 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1288 break;
c277df42 1289 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1290 break; /* in regexp, $ might be tail anchor */
1291 }
02aa26ce 1292
a0ed51b3
LW
1293 /* (now in tr/// code again) */
1294
d008e5eb
GS
1295 if (*s & 0x80 && thisutf) {
1296 dTHR; /* only for ckWARN */
1297 if (ckWARN(WARN_UTF8)) {
dfe13c55 1298 (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
d008e5eb 1299 if (len) {
b1c7b182 1300 has_utf = TRUE;
d008e5eb
GS
1301 while (len--)
1302 *d++ = *s++;
1303 continue;
1304 }
a0ed51b3 1305 }
b1c7b182
GS
1306 else
1307 has_utf = TRUE; /* assume valid utf8 */
a0ed51b3
LW
1308 }
1309
02aa26ce 1310 /* backslashes */
79072805
LW
1311 if (*s == '\\' && s+1 < send) {
1312 s++;
02aa26ce
NT
1313
1314 /* some backslashes we leave behind */
c9f97d15 1315 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1316 *d++ = '\\';
1317 *d++ = *s++;
1318 continue;
1319 }
02aa26ce
NT
1320
1321 /* deprecate \1 in strings and substitution replacements */
3280af22 1322 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1323 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1324 {
d008e5eb 1325 dTHR; /* only for ckWARN */
599cee73 1326 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1327 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1328 *--s = '$';
1329 break;
1330 }
02aa26ce
NT
1331
1332 /* string-change backslash escapes */
3280af22 1333 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1334 --s;
1335 break;
1336 }
02aa26ce
NT
1337
1338 /* if we get here, it's either a quoted -, or a digit */
79072805 1339 switch (*s) {
02aa26ce
NT
1340
1341 /* quoted - in transliterations */
79072805 1342 case '-':
3280af22 1343 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1344 *d++ = *s++;
1345 continue;
1346 }
1347 /* FALL THROUGH */
1348 default:
11b8faa4
JH
1349 {
1350 dTHR;
e476b1b5
GS
1351 if (ckWARN(WARN_MISC) && isALPHA(*s))
1352 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1353 "Unrecognized escape \\%c passed through",
1354 *s);
1355 /* default action is to copy the quoted character */
1356 *d++ = *s++;
1357 continue;
1358 }
02aa26ce
NT
1359
1360 /* \132 indicates an octal constant */
79072805
LW
1361 case '0': case '1': case '2': case '3':
1362 case '4': case '5': case '6': case '7':
dff6d3cd 1363 *d++ = (char)scan_oct(s, 3, &len);
79072805
LW
1364 s += len;
1365 continue;
02aa26ce
NT
1366
1367 /* \x24 indicates a hex constant */
79072805 1368 case 'x':
a0ed51b3
LW
1369 ++s;
1370 if (*s == '{') {
1371 char* e = strchr(s, '}');
234a4bc6 1372 UV uv;
a0ed51b3 1373
adaeee49 1374 if (!e) {
a0ed51b3 1375 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1376 e = s;
1377 }
a0ed51b3 1378 /* note: utf always shorter than hex */
234a4bc6
GS
1379 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1380 if (uv > 127) {
1381 d = (char*)uv_to_utf8((U8*)d, uv);
1382 has_utf = TRUE;
1383 }
1384 else
1385 *d++ = (char)uv;
a0ed51b3 1386 s = e + 1;
a0ed51b3
LW
1387 }
1388 else {
234a4bc6 1389 /* XXX collapse this branch into the one above */
a0ed51b3
LW
1390 UV uv = (UV)scan_hex(s, 2, &len);
1391 if (utf && PL_lex_inwhat == OP_TRANS &&
1392 utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1393 {
dfe13c55 1394 d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
7e2040f0 1395 has_utf = TRUE;
a0ed51b3
LW
1396 }
1397 else {
d008e5eb
GS
1398 if (uv >= 127 && UTF) {
1399 dTHR;
1400 if (ckWARN(WARN_UTF8))
cea2e8a9 1401 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb 1402 "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
894356b3 1403 (int)len,s,(int)len,s);
d008e5eb 1404 }
a0ed51b3
LW
1405 *d++ = (char)uv;
1406 }
1407 s += len;
1408 }
79072805 1409 continue;
02aa26ce 1410
4a2d328f
IZ
1411 /* \N{latin small letter a} is a named character */
1412 case 'N':
423cee85
JH
1413 ++s;
1414 if (*s == '{') {
1415 char* e = strchr(s, '}');
1416 HV *hv;
1417 SV **svp;
1418 SV *res, *cv;
1419 STRLEN len;
1420 char *str;
1421 char *why = Nullch;
1422
1423 if (!e) {
5777a3f7 1424 yyerror("Missing right brace on \\N{}");
423cee85
JH
1425 e = s - 1;
1426 goto cont_scan;
1427 }
1428 res = newSVpvn(s + 1, e - s - 1);
1429 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1430 res, Nullsv, "\\N{...}" );
423cee85
JH
1431 str = SvPV(res,len);
1432 if (len > e - s + 4) {
1433 char *odest = SvPVX(sv);
1434
1435 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1436 d = SvPVX(sv) + (d - odest);
1437 }
1438 Copy(str, d, len, char);
1439 d += len;
1440 SvREFCNT_dec(res);
1441 cont_scan:
1442 s = e + 1;
1443 }
1444 else
5777a3f7 1445 yyerror("Missing braces on \\N{}");
423cee85
JH
1446 continue;
1447
02aa26ce 1448 /* \c is a control character */
79072805
LW
1449 case 'c':
1450 s++;
9d116dd7
JH
1451#ifdef EBCDIC
1452 *d = *s++;
1453 if (isLOWER(*d))
1454 *d = toUPPER(*d);
1455 *d++ = toCTRL(*d);
1456#else
bbce6d69 1457 len = *s++;
1458 *d++ = toCTRL(len);
9d116dd7 1459#endif
79072805 1460 continue;
02aa26ce
NT
1461
1462 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1463 case 'b':
1464 *d++ = '\b';
1465 break;
1466 case 'n':
1467 *d++ = '\n';
1468 break;
1469 case 'r':
1470 *d++ = '\r';
1471 break;
1472 case 'f':
1473 *d++ = '\f';
1474 break;
1475 case 't':
1476 *d++ = '\t';
1477 break;
34a3fe2a
PP
1478#ifdef EBCDIC
1479 case 'e':
1480 *d++ = '\047'; /* CP 1047 */
1481 break;
1482 case 'a':
1483 *d++ = '\057'; /* CP 1047 */
1484 break;
1485#else
79072805
LW
1486 case 'e':
1487 *d++ = '\033';
1488 break;
1489 case 'a':
1490 *d++ = '\007';
1491 break;
34a3fe2a 1492#endif
02aa26ce
NT
1493 } /* end switch */
1494
79072805
LW
1495 s++;
1496 continue;
02aa26ce
NT
1497 } /* end if (backslash) */
1498
79072805 1499 *d++ = *s++;
02aa26ce
NT
1500 } /* while loop to process each character */
1501
1502 /* terminate the string and set up the sv */
79072805 1503 *d = '\0';
463ee0b2 1504 SvCUR_set(sv, d - SvPVX(sv));
79072805 1505 SvPOK_on(sv);
7e2040f0
GS
1506 if (has_utf)
1507 SvUTF8_on(sv);
79072805 1508
02aa26ce 1509 /* shrink the sv if we allocated more than we used */
79072805
LW
1510 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1511 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1512 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1513 }
02aa26ce 1514
9b599b2a 1515 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1516 if (s > PL_bufptr) {
1517 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1518 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1519 sv, Nullsv,
3280af22 1520 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1521 ? "tr"
3280af22 1522 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1523 ? "s"
1524 : "qq")));
79072805 1525 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1526 } else
8990e307 1527 SvREFCNT_dec(sv);
79072805
LW
1528 return s;
1529}
1530
ffb4593c
NT
1531/* S_intuit_more
1532 * Returns TRUE if there's more to the expression (e.g., a subscript),
1533 * FALSE otherwise.
ffb4593c
NT
1534 *
1535 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1536 *
1537 * ->[ and ->{ return TRUE
1538 * { and [ outside a pattern are always subscripts, so return TRUE
1539 * if we're outside a pattern and it's not { or [, then return FALSE
1540 * if we're in a pattern and the first char is a {
1541 * {4,5} (any digits around the comma) returns FALSE
1542 * if we're in a pattern and the first char is a [
1543 * [] returns FALSE
1544 * [SOMETHING] has a funky algorithm to decide whether it's a
1545 * character class or not. It has to deal with things like
1546 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1547 * anything else returns TRUE
1548 */
1549
9cbb5ea2
GS
1550/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1551
76e3520e 1552STATIC int
cea2e8a9 1553S_intuit_more(pTHX_ register char *s)
79072805 1554{
3280af22 1555 if (PL_lex_brackets)
79072805
LW
1556 return TRUE;
1557 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1558 return TRUE;
1559 if (*s != '{' && *s != '[')
1560 return FALSE;
3280af22 1561 if (!PL_lex_inpat)
79072805
LW
1562 return TRUE;
1563
1564 /* In a pattern, so maybe we have {n,m}. */
1565 if (*s == '{') {
1566 s++;
1567 if (!isDIGIT(*s))
1568 return TRUE;
1569 while (isDIGIT(*s))
1570 s++;
1571 if (*s == ',')
1572 s++;
1573 while (isDIGIT(*s))
1574 s++;
1575 if (*s == '}')
1576 return FALSE;
1577 return TRUE;
1578
1579 }
1580
1581 /* On the other hand, maybe we have a character class */
1582
1583 s++;
1584 if (*s == ']' || *s == '^')
1585 return FALSE;
1586 else {
ffb4593c 1587 /* this is terrifying, and it works */
79072805
LW
1588 int weight = 2; /* let's weigh the evidence */
1589 char seen[256];
f27ffc4a 1590 unsigned char un_char = 255, last_un_char;
93a17b20 1591 char *send = strchr(s,']');
3280af22 1592 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1593
1594 if (!send) /* has to be an expression */
1595 return TRUE;
1596
1597 Zero(seen,256,char);
1598 if (*s == '$')
1599 weight -= 3;
1600 else if (isDIGIT(*s)) {
1601 if (s[1] != ']') {
1602 if (isDIGIT(s[1]) && s[2] == ']')
1603 weight -= 10;
1604 }
1605 else
1606 weight -= 100;
1607 }
1608 for (; s < send; s++) {
1609 last_un_char = un_char;
1610 un_char = (unsigned char)*s;
1611 switch (*s) {
1612 case '@':
1613 case '&':
1614 case '$':
1615 weight -= seen[un_char] * 10;
7e2040f0 1616 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1617 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1618 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1619 weight -= 100;
1620 else
1621 weight -= 10;
1622 }
1623 else if (*s == '$' && s[1] &&
93a17b20
LW
1624 strchr("[#!%*<>()-=",s[1])) {
1625 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1626 weight -= 10;
1627 else
1628 weight -= 1;
1629 }
1630 break;
1631 case '\\':
1632 un_char = 254;
1633 if (s[1]) {
93a17b20 1634 if (strchr("wds]",s[1]))
79072805
LW
1635 weight += 100;
1636 else if (seen['\''] || seen['"'])
1637 weight += 1;
93a17b20 1638 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1639 weight += 40;
1640 else if (isDIGIT(s[1])) {
1641 weight += 40;
1642 while (s[1] && isDIGIT(s[1]))
1643 s++;
1644 }
1645 }
1646 else
1647 weight += 100;
1648 break;
1649 case '-':
1650 if (s[1] == '\\')
1651 weight += 50;
93a17b20 1652 if (strchr("aA01! ",last_un_char))
79072805 1653 weight += 30;
93a17b20 1654 if (strchr("zZ79~",s[1]))
79072805 1655 weight += 30;
f27ffc4a
GS
1656 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1657 weight -= 5; /* cope with negative subscript */
79072805
LW
1658 break;
1659 default:
93a17b20 1660 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1661 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1662 char *d = tmpbuf;
1663 while (isALPHA(*s))
1664 *d++ = *s++;
1665 *d = '\0';
1666 if (keyword(tmpbuf, d - tmpbuf))
1667 weight -= 150;
1668 }
1669 if (un_char == last_un_char + 1)
1670 weight += 5;
1671 weight -= seen[un_char];
1672 break;
1673 }
1674 seen[un_char]++;
1675 }
1676 if (weight >= 0) /* probably a character class */
1677 return FALSE;
1678 }
1679
1680 return TRUE;
1681}
ffed7fef 1682
ffb4593c
NT
1683/*
1684 * S_intuit_method
1685 *
1686 * Does all the checking to disambiguate
1687 * foo bar
1688 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1689 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1690 *
1691 * First argument is the stuff after the first token, e.g. "bar".
1692 *
1693 * Not a method if bar is a filehandle.
1694 * Not a method if foo is a subroutine prototyped to take a filehandle.
1695 * Not a method if it's really "Foo $bar"
1696 * Method if it's "foo $bar"
1697 * Not a method if it's really "print foo $bar"
1698 * Method if it's really "foo package::" (interpreted as package->foo)
1699 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1700 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1701 * =>
1702 */
1703
76e3520e 1704STATIC int
cea2e8a9 1705S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1706{
1707 char *s = start + (*start == '$');
3280af22 1708 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1709 STRLEN len;
1710 GV* indirgv;
1711
1712 if (gv) {
b6c543e3 1713 CV *cv;
a0d0e21e
LW
1714 if (GvIO(gv))
1715 return 0;
b6c543e3
IZ
1716 if ((cv = GvCVu(gv))) {
1717 char *proto = SvPVX(cv);
1718 if (proto) {
1719 if (*proto == ';')
1720 proto++;
1721 if (*proto == '*')
1722 return 0;
1723 }
1724 } else
a0d0e21e
LW
1725 gv = 0;
1726 }
8903cb82 1727 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1728 /* start is the beginning of the possible filehandle/object,
1729 * and s is the end of it
1730 * tmpbuf is a copy of it
1731 */
1732
a0d0e21e 1733 if (*start == '$') {
3280af22 1734 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1735 return 0;
1736 s = skipspace(s);
3280af22
NIS
1737 PL_bufptr = start;
1738 PL_expect = XREF;
a0d0e21e
LW
1739 return *s == '(' ? FUNCMETH : METHOD;
1740 }
1741 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1742 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1743 len -= 2;
1744 tmpbuf[len] = '\0';
1745 goto bare_package;
1746 }
1747 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1748 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1749 return 0;
1750 /* filehandle or package name makes it a method */
89bfa8cd 1751 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1752 s = skipspace(s);
3280af22 1753 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1754 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1755 bare_package:
3280af22 1756 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1757 newSVpvn(tmpbuf,len));
3280af22
NIS
1758 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1759 PL_expect = XTERM;
a0d0e21e 1760 force_next(WORD);
3280af22 1761 PL_bufptr = s;
a0d0e21e
LW
1762 return *s == '(' ? FUNCMETH : METHOD;
1763 }
1764 }
1765 return 0;
1766}
1767
ffb4593c
NT
1768/*
1769 * S_incl_perldb
1770 * Return a string of Perl code to load the debugger. If PERL5DB
1771 * is set, it will return the contents of that, otherwise a
1772 * compile-time require of perl5db.pl.
1773 */
1774
76e3520e 1775STATIC char*
cea2e8a9 1776S_incl_perldb(pTHX)
a0d0e21e 1777{
3280af22 1778 if (PL_perldb) {
76e3520e 1779 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1780
1781 if (pdb)
1782 return pdb;
61bb5906 1783 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1784 return "BEGIN { require 'perl5db.pl' }";
1785 }
1786 return "";
1787}
1788
1789
16d20bd9
AD
1790/* Encoded script support. filter_add() effectively inserts a
1791 * 'pre-processing' function into the current source input stream.
1792 * Note that the filter function only applies to the current source file
1793 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1794 *
1795 * The datasv parameter (which may be NULL) can be used to pass
1796 * private data to this instance of the filter. The filter function
1797 * can recover the SV using the FILTER_DATA macro and use it to
1798 * store private buffers and state information.
1799 *
1800 * The supplied datasv parameter is upgraded to a PVIO type
e0c19803
GS
1801 * and the IoDIRP field is used to store the function pointer,
1802 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1803 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1804 * private use must be set using malloc'd pointers.
1805 */
16d20bd9
AD
1806
1807SV *
864dbfa3 1808Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1809{
f4c556ac
GS
1810 if (!funcp)
1811 return Nullsv;
1812
3280af22
NIS
1813 if (!PL_rsfp_filters)
1814 PL_rsfp_filters = newAV();
16d20bd9 1815 if (!datasv)
8c52afec 1816 datasv = NEWSV(255,0);
16d20bd9 1817 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1818 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1819 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
e0c19803 1820 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1821 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1822 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1823 av_unshift(PL_rsfp_filters, 1);
1824 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1825 return(datasv);
1826}
1827
1828
1829/* Delete most recently added instance of this filter function. */
a0d0e21e 1830void
864dbfa3 1831Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1832{
e0c19803 1833 SV *datasv;
f4c556ac 1834 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1835 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1836 return;
1837 /* if filter is on top of stack (usual case) just pop it off */
e0c19803
GS
1838 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1839 if (IoDIRP(datasv) == (DIR*)funcp) {
1840 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
1841 IoDIRP(datasv) = (DIR*)NULL;
3280af22 1842 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1843
16d20bd9
AD
1844 return;
1845 }
1846 /* we need to search for the correct entry and clear it */
cea2e8a9 1847 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1848}
1849
1850
1851/* Invoke the n'th filter function for the current rsfp. */
1852I32
864dbfa3 1853Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1854
1855
1856 /* 0 = read one text line */
a0d0e21e 1857{
16d20bd9
AD
1858 filter_t funcp;
1859 SV *datasv = NULL;
e50aee73 1860
3280af22 1861 if (!PL_rsfp_filters)
16d20bd9 1862 return -1;
3280af22 1863 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1864 /* Provide a default input filter to make life easy. */
1865 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1866 DEBUG_P(PerlIO_printf(Perl_debug_log,
1867 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1868 if (maxlen) {
1869 /* Want a block */
1870 int len ;
1871 int old_len = SvCUR(buf_sv) ;
1872
1873 /* ensure buf_sv is large enough */
1874 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1875 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1876 if (PerlIO_error(PL_rsfp))
37120919
AD
1877 return -1; /* error */
1878 else
1879 return 0 ; /* end of file */
1880 }
16d20bd9
AD
1881 SvCUR_set(buf_sv, old_len + len) ;
1882 } else {
1883 /* Want a line */
3280af22
NIS
1884 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1885 if (PerlIO_error(PL_rsfp))
37120919
AD
1886 return -1; /* error */
1887 else
1888 return 0 ; /* end of file */
1889 }
16d20bd9
AD
1890 }
1891 return SvCUR(buf_sv);
1892 }
1893 /* Skip this filter slot if filter has been deleted */
3280af22 1894 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1895 DEBUG_P(PerlIO_printf(Perl_debug_log,
1896 "filter_read %d: skipped (filter deleted)\n",
1897 idx));
16d20bd9
AD
1898 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1899 }
1900 /* Get function pointer hidden within datasv */
1901 funcp = (filter_t)IoDIRP(datasv);
f4c556ac
GS
1902 DEBUG_P(PerlIO_printf(Perl_debug_log,
1903 "filter_read %d: via function %p (%s)\n",
1904 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1905 /* Call function. The function is expected to */
1906 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1907 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1908 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1909}
1910
76e3520e 1911STATIC char *
cea2e8a9 1912S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1913{
c39cd008 1914#ifdef PERL_CR_FILTER
3280af22 1915 if (!PL_rsfp_filters) {
c39cd008 1916 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1917 }
1918#endif
3280af22 1919 if (PL_rsfp_filters) {
16d20bd9 1920
55497cff 1921 if (!append)
1922 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1923 if (FILTER_READ(0, sv, 0) > 0)
1924 return ( SvPVX(sv) ) ;
1925 else
1926 return Nullch ;
1927 }
9d116dd7 1928 else
fd049845 1929 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1930}
1931
1932
748a9306
LW
1933#ifdef DEBUGGING
1934 static char* exp_name[] =
09bef843
SB
1935 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
1936 "ATTRTERM", "TERMBLOCK"
1937 };
748a9306 1938#endif
463ee0b2 1939
02aa26ce
NT
1940/*
1941 yylex
1942
1943 Works out what to call the token just pulled out of the input
1944 stream. The yacc parser takes care of taking the ops we return and
1945 stitching them into a tree.
1946
1947 Returns:
1948 PRIVATEREF
1949
1950 Structure:
1951 if read an identifier
1952 if we're in a my declaration
1953 croak if they tried to say my($foo::bar)
1954 build the ops for a my() declaration
1955 if it's an access to a my() variable
1956 are we in a sort block?
1957 croak if my($a); $a <=> $b
1958 build ops for access to a my() variable
1959 if in a dq string, and they've said @foo and we can't find @foo
1960 croak
1961 build ops for a bareword
1962 if we already built the token before, use it.
1963*/
1964
864dbfa3
GS
1965int
1966#ifdef USE_PURE_BISON
cea2e8a9 1967Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1968#else
cea2e8a9 1969Perl_yylex(pTHX)
864dbfa3 1970#endif
378cc40b 1971{
11343788 1972 dTHR;
79072805 1973 register char *s;
378cc40b 1974 register char *d;
79072805 1975 register I32 tmp;
463ee0b2 1976 STRLEN len;
161b471a
NIS
1977 GV *gv = Nullgv;
1978 GV **gvp = 0;
a687059c 1979
a1a0e61e
TD
1980#ifdef USE_PURE_BISON
1981 yylval_pointer = lvalp;
1982 yychar_pointer = lcharp;
1983#endif
1984
02aa26ce 1985 /* check if there's an identifier for us to look at */
3280af22 1986 if (PL_pending_ident) {
02aa26ce 1987 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
1988 char pit = PL_pending_ident;
1989 PL_pending_ident = 0;
bbce6d69 1990
02aa26ce
NT
1991 /* if we're in a my(), we can't allow dynamics here.
1992 $foo'bar has already been turned into $foo::bar, so
1993 just check for colons.
1994
1995 if it's a legal name, the OP is a PADANY.
1996 */
3280af22 1997 if (PL_in_my) {
77ca0c92 1998 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
1999 if (strchr(PL_tokenbuf,':'))
2000 yyerror(Perl_form(aTHX_ "No package name allowed for "
2001 "variable %s in \"our\"",
2002 PL_tokenbuf));
77ca0c92
LW
2003 tmp = pad_allocmy(PL_tokenbuf);
2004 }
2005 else {
2006 if (strchr(PL_tokenbuf,':'))
2007 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2008
77ca0c92
LW
2009 yylval.opval = newOP(OP_PADANY, 0);
2010 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2011 return PRIVATEREF;
2012 }
bbce6d69 2013 }
2014
02aa26ce
NT
2015 /*
2016 build the ops for accesses to a my() variable.
2017
2018 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2019 then used in a comparison. This catches most, but not
2020 all cases. For instance, it catches
2021 sort { my($a); $a <=> $b }
2022 but not
2023 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2024 (although why you'd do that is anyone's guess).
2025 */
2026
3280af22 2027 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2028#ifdef USE_THREADS
54b9620d 2029 /* Check for single character per-thread SVs */
3280af22
NIS
2030 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2031 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2032 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2033 {
2faa37cc 2034 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2035 yylval.opval->op_targ = tmp;
2036 return PRIVATEREF;
2037 }
2038#endif /* USE_THREADS */
3280af22 2039 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2040 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2041 /* might be an "our" variable" */
f472eb5c 2042 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2043 /* build ops for a bareword */
f472eb5c
GS
2044 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2045 sv_catpvn(sym, "::", 2);
2046 sv_catpv(sym, PL_tokenbuf+1);
2047 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2048 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2049 gv_fetchpv(SvPVX(sym),
77ca0c92 2050 (PL_in_eval
f472eb5c
GS
2051 ? (GV_ADDMULTI | GV_ADDINEVAL)
2052 : TRUE
77ca0c92
LW
2053 ),
2054 ((PL_tokenbuf[0] == '$') ? SVt_PV
2055 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2056 : SVt_PVHV));
2057 return WORD;
2058 }
2059
02aa26ce 2060 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2061 if (PL_last_lop_op == OP_SORT &&
2062 PL_tokenbuf[0] == '$' &&
2063 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2064 && !PL_tokenbuf[2])
bbce6d69 2065 {
3280af22
NIS
2066 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2067 d < PL_bufend && *d != '\n';
a863c7d1
MB
2068 d++)
2069 {
2070 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2071 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2072 PL_tokenbuf);
a863c7d1 2073 }
bbce6d69 2074 }
2075 }
bbce6d69 2076
a863c7d1
MB
2077 yylval.opval = newOP(OP_PADANY, 0);
2078 yylval.opval->op_targ = tmp;
2079 return PRIVATEREF;
2080 }
bbce6d69 2081 }
2082
02aa26ce
NT
2083 /*
2084 Whine if they've said @foo in a doublequoted string,
2085 and @foo isn't a variable we can find in the symbol
2086 table.
2087 */
3280af22
NIS
2088 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2089 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2090 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2091 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2092 PL_tokenbuf, PL_tokenbuf));
bbce6d69 2093 }
2094
02aa26ce 2095 /* build ops for a bareword */
3280af22 2096 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2097 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2098 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2099 ((PL_tokenbuf[0] == '$') ? SVt_PV
2100 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 2101 : SVt_PVHV));
2102 return WORD;
2103 }
2104
02aa26ce
NT
2105 /* no identifier pending identification */
2106
3280af22 2107 switch (PL_lex_state) {
79072805
LW
2108#ifdef COMMENTARY
2109 case LEX_NORMAL: /* Some compilers will produce faster */
2110 case LEX_INTERPNORMAL: /* code if we comment these out. */
2111 break;
2112#endif
2113
09bef843 2114 /* when we've already built the next token, just pull it out of the queue */
79072805 2115 case LEX_KNOWNEXT:
3280af22
NIS
2116 PL_nexttoke--;
2117 yylval = PL_nextval[PL_nexttoke];
2118 if (!PL_nexttoke) {
2119 PL_lex_state = PL_lex_defer;
2120 PL_expect = PL_lex_expect;
2121 PL_lex_defer = LEX_NORMAL;
463ee0b2 2122 }
3280af22 2123 return(PL_nexttype[PL_nexttoke]);
79072805 2124
02aa26ce 2125 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2126 when we get here, PL_bufptr is at the \
02aa26ce 2127 */
79072805
LW
2128 case LEX_INTERPCASEMOD:
2129#ifdef DEBUGGING
3280af22 2130 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2131 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2132#endif
02aa26ce 2133 /* handle \E or end of string */
3280af22 2134 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2135 char oldmod;
02aa26ce
NT
2136
2137 /* if at a \E */
3280af22
NIS
2138 if (PL_lex_casemods) {
2139 oldmod = PL_lex_casestack[--PL_lex_casemods];
2140 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2141
3280af22
NIS
2142 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2143 PL_bufptr += 2;
2144 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2145 }
79072805
LW
2146 return ')';
2147 }
3280af22
NIS
2148 if (PL_bufptr != PL_bufend)
2149 PL_bufptr += 2;
2150 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2151 return yylex();
79072805
LW
2152 }
2153 else {
3280af22 2154 s = PL_bufptr + 1;
79072805
LW
2155 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2156 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2157 if (strchr("LU", *s) &&
3280af22 2158 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2159 {
3280af22 2160 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2161 return ')';
2162 }
3280af22
NIS
2163 if (PL_lex_casemods > 10) {
2164 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2165 if (newlb != PL_lex_casestack) {
a0d0e21e 2166 SAVEFREEPV(newlb);
3280af22 2167 PL_lex_casestack = newlb;
a0d0e21e
LW
2168 }
2169 }
3280af22
NIS
2170 PL_lex_casestack[PL_lex_casemods++] = *s;
2171 PL_lex_casestack[PL_lex_casemods] = '\0';
2172 PL_lex_state = LEX_INTERPCONCAT;
2173 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2174 force_next('(');
2175 if (*s == 'l')
3280af22 2176 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2177 else if (*s == 'u')
3280af22 2178 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2179 else if (*s == 'L')
3280af22 2180 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2181 else if (*s == 'U')
3280af22 2182 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2183 else if (*s == 'Q')
3280af22 2184 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2185 else
cea2e8a9 2186 Perl_croak(aTHX_ "panic: yylex");
3280af22 2187 PL_bufptr = s + 1;
79072805 2188 force_next(FUNC);
3280af22
NIS
2189 if (PL_lex_starts) {
2190 s = PL_bufptr;
2191 PL_lex_starts = 0;
79072805
LW
2192 Aop(OP_CONCAT);
2193 }
2194 else
cea2e8a9 2195 return yylex();
79072805
LW
2196 }
2197
55497cff 2198 case LEX_INTERPPUSH:
2199 return sublex_push();
2200
79072805 2201 case LEX_INTERPSTART:
3280af22 2202 if (PL_bufptr == PL_bufend)
79072805 2203 return sublex_done();
3280af22
NIS
2204 PL_expect = XTERM;
2205 PL_lex_dojoin = (*PL_bufptr == '@');
2206 PL_lex_state = LEX_INTERPNORMAL;
2207 if (PL_lex_dojoin) {
2208 PL_nextval[PL_nexttoke].ival = 0;
79072805 2209 force_next(',');
554b3eca 2210#ifdef USE_THREADS
533c011a
NIS
2211 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2212 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2213 force_next(PRIVATEREF);
2214#else
a0d0e21e 2215 force_ident("\"", '$');
554b3eca 2216#endif /* USE_THREADS */
3280af22 2217 PL_nextval[PL_nexttoke].ival = 0;
79072805 2218 force_next('$');
3280af22 2219 PL_nextval[PL_nexttoke].ival = 0;
79072805 2220 force_next('(');
3280af22 2221 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2222 force_next(FUNC);
2223 }
3280af22
NIS
2224 if (PL_lex_starts++) {
2225 s = PL_bufptr;
79072805
LW
2226 Aop(OP_CONCAT);
2227 }
cea2e8a9 2228 return yylex();
79072805
LW
2229
2230 case LEX_INTERPENDMAYBE:
3280af22
NIS
2231 if (intuit_more(PL_bufptr)) {
2232 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2233 break;
2234 }
2235 /* FALL THROUGH */
2236
2237 case LEX_INTERPEND:
3280af22
NIS
2238 if (PL_lex_dojoin) {
2239 PL_lex_dojoin = FALSE;
2240 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2241 return ')';
2242 }
43a16006 2243 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2244 && SvEVALED(PL_lex_repl))
43a16006 2245 {
e9fa98b2 2246 if (PL_bufptr != PL_bufend)
cea2e8a9 2247 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2248 PL_lex_repl = Nullsv;
2249 }
79072805
LW
2250 /* FALLTHROUGH */
2251 case LEX_INTERPCONCAT:
2252#ifdef DEBUGGING
3280af22 2253 if (PL_lex_brackets)
cea2e8a9 2254 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2255#endif
3280af22 2256 if (PL_bufptr == PL_bufend)
79072805
LW
2257 return sublex_done();
2258
3280af22
NIS
2259 if (SvIVX(PL_linestr) == '\'') {
2260 SV *sv = newSVsv(PL_linestr);
2261 if (!PL_lex_inpat)
76e3520e 2262 sv = tokeq(sv);
3280af22 2263 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2264 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2265 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2266 s = PL_bufend;
79072805
LW
2267 }
2268 else {
3280af22 2269 s = scan_const(PL_bufptr);
79072805 2270 if (*s == '\\')
3280af22 2271 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2272 else
3280af22 2273 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2274 }
2275
3280af22
NIS
2276 if (s != PL_bufptr) {
2277 PL_nextval[PL_nexttoke] = yylval;
2278 PL_expect = XTERM;
79072805 2279 force_next(THING);
3280af22 2280 if (PL_lex_starts++)
79072805
LW
2281 Aop(OP_CONCAT);
2282 else {
3280af22 2283 PL_bufptr = s;
cea2e8a9 2284 return yylex();
79072805
LW
2285 }
2286 }
2287
cea2e8a9 2288 return yylex();
a0d0e21e 2289 case LEX_FORMLINE:
3280af22
NIS
2290 PL_lex_state = LEX_NORMAL;
2291 s = scan_formline(PL_bufptr);
2292 if (!PL_lex_formbrack)
a0d0e21e
LW
2293 goto rightbracket;
2294 OPERATOR(';');
79072805
LW
2295 }
2296
3280af22
NIS
2297 s = PL_bufptr;
2298 PL_oldoldbufptr = PL_oldbufptr;
2299 PL_oldbufptr = s;
79072805 2300 DEBUG_p( {
bf49b057
GS
2301 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2302 exp_name[PL_expect], s);
79072805 2303 } )
463ee0b2
LW
2304
2305 retry:
378cc40b
LW
2306 switch (*s) {
2307 default:
7e2040f0 2308 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2309 goto keylookup;
cea2e8a9 2310 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2311 case 4:
2312 case 26:
2313 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2314 case 0:
3280af22
NIS
2315 if (!PL_rsfp) {
2316 PL_last_uni = 0;
2317 PL_last_lop = 0;
2318 if (PL_lex_brackets)
d98d5fff 2319 yyerror("Missing right curly or square bracket");
79072805 2320 TOKEN(0);
463ee0b2 2321 }
3280af22 2322 if (s++ < PL_bufend)
a687059c 2323 goto retry; /* ignore stray nulls */
3280af22
NIS
2324 PL_last_uni = 0;
2325 PL_last_lop = 0;
2326 if (!PL_in_eval && !PL_preambled) {
2327 PL_preambled = TRUE;
2328 sv_setpv(PL_linestr,incl_perldb());
2329 if (SvCUR(PL_linestr))
2330 sv_catpv(PL_linestr,";");
2331 if (PL_preambleav){
2332 while(AvFILLp(PL_preambleav) >= 0) {
2333 SV *tmpsv = av_shift(PL_preambleav);
2334 sv_catsv(PL_linestr, tmpsv);
2335 sv_catpv(PL_linestr, ";");
91b7def8 2336 sv_free(tmpsv);
2337 }
3280af22
NIS
2338 sv_free((SV*)PL_preambleav);
2339 PL_preambleav = NULL;
91b7def8 2340 }
3280af22
NIS
2341 if (PL_minus_n || PL_minus_p) {
2342 sv_catpv(PL_linestr, "LINE: while (<>) {");
2343 if (PL_minus_l)
2344 sv_catpv(PL_linestr,"chomp;");
2345 if (PL_minus_a) {
8fd239a7
CS
2346 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2347 if (gv)
2348 GvIMPORTED_AV_on(gv);
3280af22
NIS
2349 if (PL_minus_F) {
2350 if (strchr("/'\"", *PL_splitstr)
2351 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2352 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2353 else {
2354 char delim;
2355 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2356 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2357 delim = *s;
cea2e8a9 2358 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2359 "q" + (delim == '\''), delim);
3280af22 2360 for (s = PL_splitstr; *s; s++) {
54310121 2361 if (*s == '\\')
3280af22
NIS
2362 sv_catpvn(PL_linestr, "\\", 1);
2363 sv_catpvn(PL_linestr, s, 1);
54310121 2364 }
cea2e8a9 2365 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2366 }
2304df62
AD
2367 }
2368 else
3280af22 2369 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2370 }
79072805 2371 }
3280af22
NIS
2372 sv_catpv(PL_linestr, "\n");
2373 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2374 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2375 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2376 SV *sv = NEWSV(85,0);
2377
2378 sv_upgrade(sv, SVt_PVMG);
3280af22 2379 sv_setsv(sv,PL_linestr);
57843af0 2380 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2381 }
79072805 2382 goto retry;
a687059c 2383 }
e929a76b 2384 do {
3280af22 2385 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2386 fake_eof:
3280af22
NIS
2387 if (PL_rsfp) {
2388 if (PL_preprocess && !PL_in_eval)
2389 (void)PerlProc_pclose(PL_rsfp);
2390 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2391 PerlIO_clearerr(PL_rsfp);
395c3793 2392 else
3280af22
NIS
2393 (void)PerlIO_close(PL_rsfp);
2394 PL_rsfp = Nullfp;
4a9ae47a 2395 PL_doextract = FALSE;
395c3793 2396 }
3280af22
NIS
2397 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2398 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2399 sv_catpv(PL_linestr,";}");
2400 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2401 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2402 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2403 goto retry;
2404 }
3280af22
NIS
2405 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2406 sv_setpv(PL_linestr,"");
79072805 2407 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2408 }
3280af22 2409 if (PL_doextract) {
a0d0e21e 2410 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2411 PL_doextract = FALSE;
a0d0e21e
LW
2412
2413 /* Incest with pod. */
2414 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2415 sv_setpv(PL_linestr, "");
2416 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2417 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2418 PL_doextract = FALSE;
a0d0e21e
LW
2419 }
2420 }
463ee0b2 2421 incline(s);
3280af22
NIS
2422 } while (PL_doextract);
2423 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2424 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2425 SV *sv = NEWSV(85,0);
a687059c 2426
93a17b20 2427 sv_upgrade(sv, SVt_PVMG);
3280af22 2428 sv_setsv(sv,PL_linestr);
57843af0 2429 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2430 }
3280af22 2431 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2432 if (CopLINE(PL_curcop) == 1) {
3280af22 2433 while (s < PL_bufend && isSPACE(*s))
79072805 2434 s++;
a0d0e21e 2435 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2436 s++;
44a8e56a 2437 d = Nullch;
3280af22 2438 if (!PL_in_eval) {
44a8e56a 2439 if (*s == '#' && *(s+1) == '!')
2440 d = s + 2;
2441#ifdef ALTERNATE_SHEBANG
2442 else {
2443 static char as[] = ALTERNATE_SHEBANG;
2444 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2445 d = s + (sizeof(as) - 1);
2446 }
2447#endif /* ALTERNATE_SHEBANG */
2448 }
2449 if (d) {
b8378b72 2450 char *ipath;
774d564b 2451 char *ipathend;
b8378b72 2452
774d564b 2453 while (isSPACE(*d))
b8378b72
CS
2454 d++;
2455 ipath = d;
774d564b 2456 while (*d && !isSPACE(*d))
2457 d++;
2458 ipathend = d;
2459
2460#ifdef ARG_ZERO_IS_SCRIPT
2461 if (ipathend > ipath) {
2462 /*
2463 * HP-UX (at least) sets argv[0] to the script name,
2464 * which makes $^X incorrect. And Digital UNIX and Linux,
2465 * at least, set argv[0] to the basename of the Perl
2466 * interpreter. So, having found "#!", we'll set it right.
2467 */
2468 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2469 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2470 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2471 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2472 SvSETMAGIC(x);
2473 }
774d564b 2474 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2475 }
774d564b 2476#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2477
2478 /*
2479 * Look for options.
2480 */
748a9306 2481 d = instr(s,"perl -");
84e30d1a 2482 if (!d) {
748a9306 2483 d = instr(s,"perl");
84e30d1a
GS
2484#if defined(DOSISH)
2485 /* avoid getting into infinite loops when shebang
2486 * line contains "Perl" rather than "perl" */
2487 if (!d) {
2488 for (d = ipathend-4; d >= ipath; --d) {
2489 if ((*d == 'p' || *d == 'P')
2490 && !ibcmp(d, "perl", 4))
2491 {
2492 break;
2493 }
2494 }
2495 if (d < ipath)
2496 d = Nullch;
2497 }
2498#endif
2499 }
44a8e56a 2500#ifdef ALTERNATE_SHEBANG
2501 /*
2502 * If the ALTERNATE_SHEBANG on this system starts with a
2503 * character that can be part of a Perl expression, then if
2504 * we see it but not "perl", we're probably looking at the
2505 * start of Perl code, not a request to hand off to some
2506 * other interpreter. Similarly, if "perl" is there, but
2507 * not in the first 'word' of the line, we assume the line
2508 * contains the start of the Perl program.
44a8e56a 2509 */
2510 if (d && *s != '#') {
774d564b 2511 char *c = ipath;
44a8e56a 2512 while (*c && !strchr("; \t\r\n\f\v#", *c))
2513 c++;
2514 if (c < d)
2515 d = Nullch; /* "perl" not in first word; ignore */
2516 else
2517 *s = '#'; /* Don't try to parse shebang line */
2518 }
774d564b 2519#endif /* ALTERNATE_SHEBANG */
748a9306 2520 if (!d &&
44a8e56a 2521 *s == '#' &&
774d564b 2522 ipathend > ipath &&
3280af22 2523 !PL_minus_c &&
748a9306 2524 !instr(s,"indir") &&
3280af22 2525 instr(PL_origargv[0],"perl"))
748a9306 2526 {
9f68db38 2527 char **newargv;
9f68db38 2528
774d564b 2529 *ipathend = '\0';
2530 s = ipathend + 1;
3280af22 2531 while (s < PL_bufend && isSPACE(*s))
9f68db38 2532 s++;
3280af22
NIS
2533 if (s < PL_bufend) {
2534 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2535 newargv[1] = s;
3280af22 2536 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2537 s++;
2538 *s = '\0';
3280af22 2539 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2540 }
2541 else
3280af22 2542 newargv = PL_origargv;
774d564b 2543 newargv[0] = ipath;
80252599 2544 PerlProc_execv(ipath, newargv);
cea2e8a9 2545 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2546 }
748a9306 2547 if (d) {
3280af22
NIS
2548 U32 oldpdb = PL_perldb;
2549 bool oldn = PL_minus_n;
2550 bool oldp = PL_minus_p;
748a9306
LW
2551
2552 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2553 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2554
2555 if (*d++ == '-') {
8cc95fdb 2556 do {
2557 if (*d == 'M' || *d == 'm') {
2558 char *m = d;
2559 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2560 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2561 (int)(d - m), m);
2562 }
2563 d = moreswitches(d);
2564 } while (d);
84902520 2565 if (PERLDB_LINE && !oldpdb ||
3280af22 2566 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b 2567 /* if we have already added "LINE: while (<>) {",
2568 we must not do it again */
748a9306 2569 {
3280af22
NIS
2570 sv_setpv(PL_linestr, "");
2571 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2572 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2573 PL_preambled = FALSE;
84902520 2574 if (PERLDB_LINE)
3280af22 2575 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2576 goto retry;
2577 }
a0d0e21e 2578 }
79072805 2579 }
9f68db38 2580 }
79072805 2581 }
3280af22
NIS
2582 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2583 PL_bufptr = s;
2584 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2585 return yylex();
ae986130 2586 }
378cc40b 2587 goto retry;
4fdae800 2588 case '\r':
6a27c188 2589#ifdef PERL_STRICT_CR
cea2e8a9
GS
2590 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2591 Perl_croak(aTHX_
54310121 2592 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2593#endif
4fdae800 2594 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2595 s++;
2596 goto retry;
378cc40b 2597 case '#':
e929a76b 2598 case '\n':
3280af22
NIS
2599 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2600 d = PL_bufend;
a687059c 2601 while (s < d && *s != '\n')
378cc40b 2602 s++;
0f85fab0 2603 if (s < d)
378cc40b 2604 s++;
463ee0b2 2605 incline(s);
3280af22
NIS
2606 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2607 PL_bufptr = s;
2608 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2609 return yylex();
a687059c 2610 }
378cc40b 2611 }
a687059c 2612 else {
378cc40b 2613 *s = '\0';
3280af22 2614 PL_bufend = s;
a687059c 2615 }
378cc40b
LW
2616 goto retry;
2617 case '-':
79072805 2618 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2619 s++;
3280af22 2620 PL_bufptr = s;
748a9306
LW
2621 tmp = *s++;
2622
3280af22 2623 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2624 s++;
2625
2626 if (strnEQ(s,"=>",2)) {
3280af22 2627 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2628 OPERATOR('-'); /* unary minus */
2629 }
3280af22
NIS
2630 PL_last_uni = PL_oldbufptr;
2631 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2632 switch (tmp) {
79072805
LW
2633 case 'r': FTST(OP_FTEREAD);
2634 case 'w': FTST(OP_FTEWRITE);
2635 case 'x': FTST(OP_FTEEXEC);
2636 case 'o': FTST(OP_FTEOWNED);
2637 case 'R': FTST(OP_FTRREAD);
2638 case 'W': FTST(OP_FTRWRITE);
2639 case 'X': FTST(OP_FTREXEC);
2640 case 'O': FTST(OP_FTROWNED);
2641 case 'e': FTST(OP_FTIS);
2642 case 'z': FTST(OP_FTZERO);
2643 case 's': FTST(OP_FTSIZE);
2644 case 'f': FTST(OP_FTFILE);
2645 case 'd': FTST(OP_FTDIR);
2646 case 'l': FTST(OP_FTLINK);
2647 case 'p': FTST(OP_FTPIPE);
2648 case 'S': FTST(OP_FTSOCK);
2649 case 'u': FTST(OP_FTSUID);
2650 case 'g': FTST(OP_FTSGID);
2651 case 'k': FTST(OP_FTSVTX);
2652 case 'b': FTST(OP_FTBLK);
2653 case 'c': FTST(OP_FTCHR);
2654 case 't': FTST(OP_FTTTY);
2655 case 'T': FTST(OP_FTTEXT);
2656 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2657 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2658 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2659 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2660 default:
cea2e8a9 2661 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2662 break;
2663 }
2664 }
a687059c
LW
2665 tmp = *s++;
2666 if (*s == tmp) {
2667 s++;
3280af22 2668 if (PL_expect == XOPERATOR)
79072805
LW
2669 TERM(POSTDEC);
2670 else
2671 OPERATOR(PREDEC);
2672 }
2673 else if (*s == '>') {
2674 s++;
2675 s = skipspace(s);
7e2040f0 2676 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2677 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2678 TOKEN(ARROW);
79072805 2679 }
748a9306
LW
2680 else if (*s == '$')
2681 OPERATOR(ARROW);
463ee0b2 2682 else
748a9306 2683 TERM(ARROW);
a687059c 2684 }
3280af22 2685 if (PL_expect == XOPERATOR)
79072805
LW
2686 Aop(OP_SUBTRACT);
2687 else {
3280af22 2688 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2689 check_uni();
79072805 2690 OPERATOR('-'); /* unary minus */
2f3197b3 2691 }
79072805 2692
378cc40b 2693 case '+':
a687059c
LW
2694 tmp = *s++;
2695 if (*s == tmp) {
378cc40b 2696 s++;
3280af22 2697 if (PL_expect == XOPERATOR)
79072805
LW
2698 TERM(POSTINC);
2699 else
2700 OPERATOR(PREINC);
378cc40b 2701 }
3280af22 2702 if (PL_expect == XOPERATOR)
79072805
LW
2703 Aop(OP_ADD);
2704 else {
3280af22 2705 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2706 check_uni();
a687059c 2707 OPERATOR('+');
2f3197b3 2708 }
a687059c 2709
378cc40b 2710 case '*':
3280af22
NIS
2711 if (PL_expect != XOPERATOR) {
2712 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2713 PL_expect = XOPERATOR;
2714 force_ident(PL_tokenbuf, '*');
2715 if (!*PL_tokenbuf)
a0d0e21e 2716 PREREF('*');
79072805 2717 TERM('*');
a687059c 2718 }
79072805
LW
2719 s++;
2720 if (*s == '*') {
a687059c 2721 s++;
79072805 2722 PWop(OP_POW);
a687059c 2723 }
79072805
LW
2724 Mop(OP_MULTIPLY);
2725
378cc40b 2726 case '%':
3280af22 2727 if (PL_expect == XOPERATOR) {
bbce6d69 2728 ++s;
2729 Mop(OP_MODULO);
a687059c 2730 }
3280af22
NIS
2731 PL_tokenbuf[0] = '%';
2732 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2733 if (!PL_tokenbuf[1]) {
2734 if (s == PL_bufend)
bbce6d69 2735 yyerror("Final % should be \\% or %name");
2736 PREREF('%');
a687059c 2737 }
3280af22 2738 PL_pending_ident = '%';
bbce6d69 2739 TERM('%');
a687059c 2740
378cc40b 2741 case '^':
79072805 2742 s++;
a0d0e21e 2743 BOop(OP_BIT_XOR);
79072805 2744 case '[':
3280af22 2745 PL_lex_brackets++;
79072805 2746 /* FALL THROUGH */
378cc40b 2747 case '~':
378cc40b 2748 case ',':
378cc40b
LW
2749 tmp = *s++;
2750 OPERATOR(tmp);
a0d0e21e
LW
2751 case ':':
2752 if (s[1] == ':') {
2753 len = 0;
2754 goto just_a_word;
2755 }
2756 s++;
09bef843
SB
2757 switch (PL_expect) {
2758 OP *attrs;
2759 case XOPERATOR:
2760 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2761 break;
2762 PL_bufptr = s; /* update in case we back off */
2763 goto grabattrs;
2764 case XATTRBLOCK:
2765 PL_expect = XBLOCK;
2766 goto grabattrs;
2767 case XATTRTERM:
2768 PL_expect = XTERMBLOCK;
2769 grabattrs:
2770 s = skipspace(s);
2771 attrs = Nullop;
7e2040f0 2772 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2773 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2774 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2775 if (tmp < 0) tmp = -tmp;
2776 switch (tmp) {
2777 case KEY_or:
2778 case KEY_and:
2779 case KEY_for:
2780 case KEY_unless:
2781 case KEY_if:
2782 case KEY_while:
2783 case KEY_until:
2784 goto got_attrs;
2785 default:
2786 break;
2787 }
2788 }
09bef843
SB
2789 if (*d == '(') {
2790 d = scan_str(d,TRUE,TRUE);
2791 if (!d) {
2792 if (PL_lex_stuff) {
2793 SvREFCNT_dec(PL_lex_stuff);
2794 PL_lex_stuff = Nullsv;
2795 }
2796 /* MUST advance bufptr here to avoid bogus
2797 "at end of line" context messages from yyerror().
2798 */
2799 PL_bufptr = s + len;
2800 yyerror("Unterminated attribute parameter in attribute list");
2801 if (attrs)
2802 op_free(attrs);
2803 return 0; /* EOF indicator */
2804 }
2805 }
2806 if (PL_lex_stuff) {
2807 SV *sv = newSVpvn(s, len);
2808 sv_catsv(sv, PL_lex_stuff);
2809 attrs = append_elem(OP_LIST, attrs,
2810 newSVOP(OP_CONST, 0, sv));
2811 SvREFCNT_dec(PL_lex_stuff);
2812 PL_lex_stuff = Nullsv;
2813 }
2814 else {
2815 attrs = append_elem(OP_LIST, attrs,
2816 newSVOP(OP_CONST, 0,
2817 newSVpvn(s, len)));
2818 }
2819 s = skipspace(d);
0120eecf 2820 if (*s == ':' && s[1] != ':')
09bef843 2821 s = skipspace(s+1);
0120eecf
GS
2822 else if (s == d)
2823 break; /* require real whitespace or :'s */
09bef843 2824 }
f9829d6b
GS
2825 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2826 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2827 char q = ((*s == '\'') ? '"' : '\'');
2828 /* If here for an expression, and parsed no attrs, back off. */
2829 if (tmp == '=' && !attrs) {
2830 s = PL_bufptr;
2831 break;
2832 }
2833 /* MUST advance bufptr here to avoid bogus "at end of line"
2834 context messages from yyerror().
2835 */
2836 PL_bufptr = s;
2837 if (!*s)
2838 yyerror("Unterminated attribute list");
2839 else
2840 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2841 q, *s, q));
2842 if (attrs)
2843 op_free(attrs);
2844 OPERATOR(':');
2845 }
f9829d6b 2846 got_attrs:
09bef843
SB
2847 if (attrs) {
2848 PL_nextval[PL_nexttoke].opval = attrs;
2849 force_next(THING);
2850 }
2851 TOKEN(COLONATTR);
2852 }
a0d0e21e 2853 OPERATOR(':');
8990e307
LW
2854 case '(':
2855 s++;
3280af22
NIS
2856 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2857 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2858 else
3280af22 2859 PL_expect = XTERM;
a0d0e21e 2860 TOKEN('(');
378cc40b 2861 case ';':
57843af0
GS
2862 if (CopLINE(PL_curcop) < PL_copline)
2863 PL_copline = CopLINE(PL_curcop);
378cc40b
LW
2864 tmp = *s++;
2865 OPERATOR(tmp);
2866 case ')':
378cc40b 2867 tmp = *s++;
16d20bd9
AD
2868 s = skipspace(s);
2869 if (*s == '{')
2870 PREBLOCK(tmp);
378cc40b 2871 TERM(tmp);
79072805
LW
2872 case ']':
2873 s++;
3280af22 2874 if (PL_lex_brackets <= 0)
d98d5fff 2875 yyerror("Unmatched right square bracket");
463ee0b2 2876 else
3280af22
NIS
2877 --PL_lex_brackets;
2878 if (PL_lex_state == LEX_INTERPNORMAL) {
2879 if (PL_lex_brackets == 0) {
a0d0e21e 2880 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2881 PL_lex_state = LEX_INTERPEND;
79072805
LW
2882 }
2883 }
4633a7c4 2884 TERM(']');
79072805
LW
2885 case '{':
2886 leftbracket:
79072805 2887 s++;
3280af22
NIS
2888 if (PL_lex_brackets > 100) {
2889 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2890 if (newlb != PL_lex_brackstack) {
8990e307 2891 SAVEFREEPV(newlb);
3280af22 2892 PL_lex_brackstack = newlb;
8990e307
LW
2893 }
2894 }
3280af22 2895 switch (PL_expect) {
a0d0e21e 2896 case XTERM:
3280af22 2897 if (PL_lex_formbrack) {
a0d0e21e
LW
2898 s--;
2899 PRETERMBLOCK(DO);
2900 }
3280af22
NIS
2901 if (PL_oldoldbufptr == PL_last_lop)
2902 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2903 else
3280af22 2904 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2905 OPERATOR(HASHBRACK);
a0d0e21e 2906 case XOPERATOR:
3280af22 2907 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2908 s++;
44a8e56a 2909 d = s;
3280af22
NIS
2910 PL_tokenbuf[0] = '\0';
2911 if (d < PL_bufend && *d == '-') {
2912 PL_tokenbuf[0] = '-';
44a8e56a 2913 d++;
3280af22 2914 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a 2915 d++;
2916 }
7e2040f0 2917 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 2918 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2919 FALSE, &len);
3280af22 2920 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2921 d++;
2922 if (*d == '}') {
3280af22 2923 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 2924 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2925 if (minus)
2926 force_next('-');
748a9306
LW
2927 }
2928 }
2929 /* FALL THROUGH */
09bef843 2930 case XATTRBLOCK:
748a9306 2931 case XBLOCK:
3280af22
NIS
2932 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2933 PL_expect = XSTATE;
a0d0e21e 2934 break;
09bef843 2935 case XATTRTERM:
a0d0e21e 2936 case XTERMBLOCK:
3280af22
NIS
2937 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2938 PL_expect = XSTATE;
a0d0e21e
LW
2939 break;
2940 default: {
2941 char *t;
3280af22
NIS
2942 if (PL_oldoldbufptr == PL_last_lop)
2943 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2944 else
3280af22 2945 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2946 s = skipspace(s);
09ecc4b6 2947 if (*s == '}')
a0d0e21e 2948 OPERATOR(HASHBRACK);
b8a4b1be
GS
2949 /* This hack serves to disambiguate a pair of curlies
2950 * as being a block or an anon hash. Normally, expectation
2951 * determines that, but in cases where we're not in a
2952 * position to expect anything in particular (like inside
2953 * eval"") we have to resolve the ambiguity. This code
2954 * covers the case where the first term in the curlies is a
2955 * quoted string. Most other cases need to be explicitly
2956 * disambiguated by prepending a `+' before the opening
2957 * curly in order to force resolution as an anon hash.
2958 *
2959 * XXX should probably propagate the outer expectation
2960 * into eval"" to rely less on this hack, but that could
2961 * potentially break current behavior of eval"".
2962 * GSAR 97-07-21
2963 */
2964 t = s;
2965 if (*s == '\'' || *s == '"' || *s == '`') {
2966 /* common case: get past first string, handling escapes */
3280af22 2967 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2968 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2969 t++;
2970 t++;
a0d0e21e 2971 }
b8a4b1be 2972 else if (*s == 'q') {
3280af22 2973 if (++t < PL_bufend
b8a4b1be 2974 && (!isALNUM(*t)
3280af22 2975 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
2976 && !isALNUM(*t))))
2977 {
b8a4b1be
GS
2978 char *tmps;
2979 char open, close, term;
2980 I32 brackets = 1;
2981
3280af22 2982 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
2983 t++;
2984 term = *t;
2985 open = term;
2986 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
2987 term = tmps[5];
2988 close = term;
2989 if (open == close)
3280af22
NIS
2990 for (t++; t < PL_bufend; t++) {
2991 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 2992 t++;
6d07e5e9 2993 else if (*t == open)
b8a4b1be
GS
2994 break;
2995 }
2996 else
3280af22
NIS
2997 for (t++; t < PL_bufend; t++) {
2998 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 2999 t++;
6d07e5e9 3000 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3001 break;
3002 else if (*t == open)
3003 brackets++;
3004 }
3005 }
3006 t++;
a0d0e21e 3007 }
7e2040f0 3008 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3009 t += UTF8SKIP(t);
7e2040f0 3010 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3011 t += UTF8SKIP(t);
a0d0e21e 3012 }
3280af22 3013 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3014 t++;
b8a4b1be
GS
3015 /* if comma follows first term, call it an anon hash */
3016 /* XXX it could be a comma expression with loop modifiers */
3280af22 3017 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3018 || (*t == '=' && t[1] == '>')))
a0d0e21e 3019 OPERATOR(HASHBRACK);
3280af22 3020 if (PL_expect == XREF)
4e4e412b 3021 PL_expect = XTERM;
a0d0e21e 3022 else {
3280af22
NIS
3023 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3024 PL_expect = XSTATE;
a0d0e21e 3025 }
8990e307 3026 }
a0d0e21e 3027 break;
463ee0b2 3028 }
57843af0 3029 yylval.ival = CopLINE(PL_curcop);
79072805 3030 if (isSPACE(*s) || *s == '#')
3280af22 3031 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3032 TOKEN('{');
378cc40b 3033 case '}':
79072805
LW
3034 rightbracket:
3035 s++;
3280af22 3036 if (PL_lex_brackets <= 0)
d98d5fff 3037 yyerror("Unmatched right curly bracket");
463ee0b2 3038 else
3280af22
NIS
3039 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
3040 if (PL_lex_brackets < PL_lex_formbrack)
3041 PL_lex_formbrack = 0;
3042 if (PL_lex_state == LEX_INTERPNORMAL) {
3043 if (PL_lex_brackets == 0) {
9059aa12
LW
3044 if (PL_expect & XFAKEBRACK) {
3045 PL_expect &= XENUMMASK;
3280af22
NIS
3046 PL_lex_state = LEX_INTERPEND;
3047 PL_bufptr = s;
cea2e8a9 3048 return yylex(); /* ignore fake brackets */
79072805 3049 }
fa83b5b6 3050 if (*s == '-' && s[1] == '>')
3280af22 3051 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3052 else if (*s != '[' && *s != '{')
3280af22 3053 PL_lex_state = LEX_INTERPEND;
79072805
LW
3054 }
3055 }
9059aa12
LW
3056 if (PL_expect & XFAKEBRACK) {
3057 PL_expect &= XENUMMASK;
3280af22 3058 PL_bufptr = s;
cea2e8a9 3059 return yylex(); /* ignore fake brackets */
748a9306 3060 }
79072805
LW
3061 force_next('}');
3062 TOKEN(';');
378cc40b
LW
3063 case '&':
3064 s++;
3065 tmp = *s++;
3066 if (tmp == '&')
a0d0e21e 3067 AOPERATOR(ANDAND);
378cc40b 3068 s--;
3280af22 3069 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3070 if (ckWARN(WARN_SEMICOLON)
3071 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3072 {
57843af0 3073 CopLINE_dec(PL_curcop);
cea2e8a9 3074 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3075 CopLINE_inc(PL_curcop);
463ee0b2 3076 }
79072805 3077 BAop(OP_BIT_AND);
463ee0b2 3078 }
79072805 3079
3280af22
NIS
3080 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3081 if (*PL_tokenbuf) {
3082 PL_expect = XOPERATOR;
3083 force_ident(PL_tokenbuf, '&');
463ee0b2 3084 }
79072805
LW
3085 else
3086 PREREF('&');
c07a80fd 3087 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3088 TERM('&');
3089
378cc40b
LW
3090 case '|':
3091 s++;
3092 tmp = *s++;
3093 if (tmp == '|')
a0d0e21e 3094 AOPERATOR(OROR);
378cc40b 3095 s--;
79072805 3096 BOop(OP_BIT_OR);
378cc40b
LW
3097 case '=':
3098 s++;
3099 tmp = *s++;
3100 if (tmp == '=')
79072805
LW
3101 Eop(OP_EQ);
3102 if (tmp == '>')
3103 OPERATOR(',');
378cc40b 3104 if (tmp == '~')
79072805 3105 PMop(OP_MATCH);
599cee73 3106 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3107 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3108 s--;
3280af22
NIS
3109 if (PL_expect == XSTATE && isALPHA(tmp) &&
3110 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3111 {
3280af22
NIS
3112 if (PL_in_eval && !PL_rsfp) {
3113 d = PL_bufend;
a5f75d66
AD
3114 while (s < d) {
3115 if (*s++ == '\n') {
3116 incline(s);
3117 if (strnEQ(s,"=cut",4)) {
3118 s = strchr(s,'\n');
3119 if (s)
3120 s++;
3121 else
3122 s = d;
3123 incline(s);
3124 goto retry;
3125 }
3126 }
3127 }
3128 goto retry;
3129 }
3280af22
NIS
3130 s = PL_bufend;
3131 PL_doextract = TRUE;
a0d0e21e
LW
3132 goto retry;
3133 }
3280af22 3134 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3135 char *t;
51882d45 3136#ifdef PERL_STRICT_CR
a0d0e21e 3137 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
3138#else
3139 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
3140#endif
a0d0e21e
LW
3141 if (*t == '\n' || *t == '#') {
3142 s--;
3280af22 3143 PL_expect = XBLOCK;
a0d0e21e
LW
3144 goto leftbracket;
3145 }
79072805 3146 }
a0d0e21e
LW
3147 yylval.ival = 0;
3148 OPERATOR(ASSIGNOP);
378cc40b
LW
3149 case '!':
3150 s++;
3151 tmp = *s++;
3152 if (tmp == '=')
79072805 3153 Eop(OP_NE);
378cc40b 3154 if (tmp == '~')
79072805 3155 PMop(OP_NOT);
378cc40b
LW
3156 s--;
3157 OPERATOR('!');
3158 case '<':
3280af22 3159 if (PL_expect != XOPERATOR) {
93a17b20 3160 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3161 check_uni();
79072805
LW
3162 if (s[1] == '<')
3163 s = scan_heredoc(s);
3164 else
3165 s = scan_inputsymbol(s);
3166 TERM(sublex_start());
378cc40b
LW
3167 }
3168 s++;
3169 tmp = *s++;
3170 if (tmp == '<')
79072805 3171 SHop(OP_LEFT_SHIFT);
395c3793
LW
3172 if (tmp == '=') {
3173 tmp = *s++;
3174 if (tmp == '>')
79072805 3175 Eop(OP_NCMP);
395c3793 3176 s--;
79072805 3177 Rop(OP_LE);
395c3793 3178 }
378cc40b 3179 s--;
79072805 3180 Rop(OP_LT);
378cc40b
LW
3181 case '>':
3182 s++;
3183 tmp = *s++;
3184 if (tmp == '>')
79072805 3185 SHop(OP_RIGHT_SHIFT);
378cc40b 3186 if (tmp == '=')
79072805 3187 Rop(OP_GE);
378cc40b 3188 s--;
79072805 3189 Rop(OP_GT);
378cc40b
LW
3190
3191 case '$':
bbce6d69 3192 CLINE;
3193
3280af22
NIS
3194 if (PL_expect == XOPERATOR) {
3195 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3196 PL_expect = XTERM;
a0d0e21e 3197 depcom();
bbce6d69 3198 return ','; /* grandfather non-comma-format format */
a0d0e21e 3199 }
8990e307 3200 }
a0d0e21e 3201
7e2040f0 3202 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3203 PL_tokenbuf[0] = '@';
376b8730
SM
3204 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3205 sizeof PL_tokenbuf - 1, FALSE);
3206 if (PL_expect == XOPERATOR)
3207 no_op("Array length", s);
3280af22 3208 if (!PL_tokenbuf[1])
a0d0e21e 3209 PREREF(DOLSHARP);
3280af22
NIS
3210 PL_expect = XOPERATOR;
3211 PL_pending_ident = '#';
463ee0b2 3212 TOKEN(DOLSHARP);
79072805 3213 }
bbce6d69 3214
3280af22 3215 PL_tokenbuf[0] = '$';
376b8730
SM
3216 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3217 sizeof PL_tokenbuf - 1, FALSE);
3218 if (PL_expect == XOPERATOR)
3219 no_op("Scalar", s);
3280af22
NIS
3220 if (!PL_tokenbuf[1]) {
3221 if (s == PL_bufend)
bbce6d69 3222 yyerror("Final $ should be \\$ or $name");
3223 PREREF('$');
8990e307 3224 }
a0d0e21e 3225
bbce6d69 3226 /* This kludge not intended to be bulletproof. */
3280af22 3227 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3228 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3229 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69 3230 yylval.opval->op_private = OPpCONST_ARYBASE;
3231 TERM(THING);
3232 }
3233
ff68c719 3234 d = s;
69d2bceb 3235 tmp = (I32)*s;
3280af22 3236 if (PL_lex_state == LEX_NORMAL)
ff68c719 3237 s = skipspace(s);
3238
3280af22 3239 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3240 char *t;
3241 if (*s == '[') {
3280af22 3242 PL_tokenbuf[0] = '@';
599cee73 3243 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3244 for(t = s + 1;
7e2040f0 3245 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3246 t++) ;
a0d0e21e 3247 if (*t++ == ',') {
3280af22
NIS
3248 PL_bufptr = skipspace(PL_bufptr);
3249 while (t < PL_bufend && *t != ']')
bbce6d69 3250 t++;
cea2e8a9 3251 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3252 "Multidimensional syntax %.*s not supported",
3253 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3254 }
3255 }
bbce6d69 3256 }
3257 else if (*s == '{') {
3280af22 3258 PL_tokenbuf[0] = '%';
599cee73 3259 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3260 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3261 {
3280af22 3262 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3263 STRLEN len;
3264 for (t++; isSPACE(*t); t++) ;
7e2040f0 3265 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3266 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3267 for (; isSPACE(*t); t++) ;
864dbfa3 3268 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3269 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3270 "You need to quote \"%s\"", tmpbuf);
748a9306 3271 }
93a17b20
LW
3272 }
3273 }
2f3197b3 3274 }
bbce6d69 3275
3280af22 3276 PL_expect = XOPERATOR;
69d2bceb 3277 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3278 bool islop = (PL_last_lop == PL_oldoldbufptr);
3279 if (!islop || PL_last_lop_op == OP_GREPSTART)
3280 PL_expect = XOPERATOR;
bbce6d69 3281 else if (strchr("$@\"'`q", *s))
3280af22 3282 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3283 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3284 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3285 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3286 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3287 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3288 if (tmp = keyword(tmpbuf, len)) {
3289 /* binary operators exclude handle interpretations */
3290 switch (tmp) {
3291 case -KEY_x:
3292 case -KEY_eq:
3293 case -KEY_ne:
3294 case -KEY_gt:
3295 case -KEY_lt:
3296 case -KEY_ge:
3297 case -KEY_le:
3298 case -KEY_cmp:
3299 break;
3300 default:
3280af22 3301 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3302 break;
3303 }
3304 }
68dc0745 3305 else {
3306 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3307 if (gv && GvCVu(gv))
3280af22 3308 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3309 }
93a17b20 3310 }
bbce6d69 3311 else if (isDIGIT(*s))
3280af22 3312 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3313 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3314 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3315 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3316 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3317 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3318 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3319 }
3280af22 3320 PL_pending_ident = '$';
79072805 3321 TOKEN('$');
378cc40b
LW
3322
3323 case '@':
3280af22 3324 if (PL_expect == XOPERATOR)
bbce6d69 3325 no_op("Array", s);
3280af22
NIS
3326 PL_tokenbuf[0] = '@';
3327 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3328 if (!PL_tokenbuf[1]) {
3329 if (s == PL_bufend)
bbce6d69 3330 yyerror("Final @ should be \\@ or @name");
3331 PREREF('@');
3332 }
3280af22 3333 if (PL_lex_state == LEX_NORMAL)
ff68c719 3334 s = skipspace(s);
3280af22 3335 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3336 if (*s == '{')
3280af22 3337 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3338
3339 /* Warn about @ where they meant $. */
599cee73 3340 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3341 if (*s == '[' || *s == '{') {
3342 char *t = s + 1;
7e2040f0 3343 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3344 t++;
3345 if (*t == '}' || *t == ']') {
3346 t++;
3280af22 3347 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3348 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3349 "Scalar value %.*s better written as $%.*s",
3280af22 3350 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3351 }
93a17b20
LW
3352 }
3353 }
463ee0b2 3354 }
3280af22 3355 PL_pending_ident = '@';
79072805 3356 TERM('@');
378cc40b
LW
3357
3358 case '/': /* may either be division or pattern */
3359 case '?': /* may either be conditional or pattern */
3280af22 3360 if (PL_expect != XOPERATOR) {
c277df42 3361 /* Disable warning on "study /blah/" */
3280af22
NIS
3362 if (PL_oldoldbufptr == PL_last_uni
3363 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3364 || memNE(PL_last_uni, "study", 5)
3365 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3366 check_uni();
8782bef2 3367 s = scan_pat(s,OP_MATCH);
79072805 3368 TERM(sublex_start());
378cc40b
LW
3369 }
3370 tmp = *s++;
a687059c 3371 if (tmp == '/')
79072805 3372 Mop(OP_DIVIDE);
378cc40b
LW
3373 OPERATOR(tmp);
3374
3375 case '.':
51882d45
GS
3376 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3377#ifdef PERL_STRICT_CR
3378 && s[1] == '\n'
3379#else
3380 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3381#endif
3382 && (s == PL_linestart || s[-1] == '\n') )
3383 {
3280af22
NIS
3384 PL_lex_formbrack = 0;
3385 PL_expect = XSTATE;
79072805
LW
3386 goto rightbracket;
3387 }
3280af22 3388 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3389 tmp = *s++;
a687059c
LW
3390 if (*s == tmp) {
3391 s++;
2f3197b3
LW
3392 if (*s == tmp) {
3393 s++;
79072805 3394 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3395 }
3396 else
79072805 3397 yylval.ival = 0;
378cc40b 3398 OPERATOR(DOTDOT);
a687059c 3399 }
3280af22 3400 if (PL_expect != XOPERATOR)
2f3197b3 3401 check_uni();
79072805 3402 Aop(OP_CONCAT);
378cc40b
LW
3403 }
3404 /* FALL THROUGH */
3405 case '0': case '1': case '2': case '3': case '4':
3406 case '5': case '6': case '7': case '8': case '9':
79072805 3407 s = scan_num(s);
3280af22 3408 if (PL_expect == XOPERATOR)
8990e307 3409 no_op("Number",s);
79072805
LW
3410 TERM(THING);
3411
3412 case '\'':
09bef843 3413 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3414 if (PL_expect == XOPERATOR) {
3415 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3416 PL_expect = XTERM;
a0d0e21e
LW
3417 depcom();
3418 return ','; /* grandfather non-comma-format format */
3419 }
463ee0b2 3420 else
8990e307 3421 no_op("String",s);
463ee0b2 3422 }
79072805 3423 if (!s)
85e6fe83 3424 missingterm((char*)0);
79072805
LW
3425 yylval.ival = OP_CONST;
3426 TERM(sublex_start());
3427
3428 case '"':
09bef843 3429 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3430 if (PL_expect == XOPERATOR) {
3431 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3432 PL_expect = XTERM;
a0d0e21e
LW
3433 depcom();
3434 return ','; /* grandfather non-comma-format format */
3435 }
463ee0b2 3436 else
8990e307 3437 no_op("String",s);
463ee0b2 3438 }
79072805 3439 if (!s)
85e6fe83 3440 missingterm((char*)0);
4633a7c4 3441 yylval.ival = OP_CONST;
3280af22 3442 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3443 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3444 yylval.ival = OP_STRINGIFY;
3445 break;
3446 }
3447 }
79072805
LW
3448 TERM(sublex_start());
3449
3450 case '`':
09bef843 3451 s = scan_str(s,FALSE,FALSE);
3280af22 3452 if (PL_expect == XOPERATOR)
8990e307 3453 no_op("Backticks",s);
79072805 3454 if (!s)
85e6fe83 3455 missingterm((char*)0);
79072805
LW
3456 yylval.ival = OP_BACKTICK;
3457 set_csh();
3458 TERM(sublex_start());
3459
3460 case '\\':
3461 s++;
599cee73 3462 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3463 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3464 *s, *s);
3280af22 3465 if (PL_expect == XOPERATOR)
8990e307 3466 no_op("Backslash",s);
79072805
LW
3467 OPERATOR(REFGEN);
3468
a7cb1f99 3469 case 'v':
e526c9e6 3470 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
a7cb1f99
GS
3471 char *start = s;
3472 start++;
3473 start++;
3474 while (isDIGIT(*start))
3475 start++;
3476 if (*start == '.' && isDIGIT(start[1])) {
3477 s = scan_num(s);
3478 TERM(THING);
3479 }
e526c9e6
GS
3480 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3481 else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF)) {
3482 char c = *start;
3483 GV *gv;
3484 *start = '\0';
3485 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3486 *start = c;
3487 if (!gv) {
3488 s = scan_num(s);
3489 TERM(THING);
3490 }
3491 }
a7cb1f99
GS
3492 }
3493 goto keylookup;
79072805 3494 case 'x':
3280af22 3495 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3496 s++;
3497 Mop(OP_REPEAT);
2f3197b3 3498 }
79072805
LW
3499 goto keylookup;
3500
378cc40b 3501 case '_':
79072805
LW
3502 case 'a': case 'A':
3503 case 'b': case 'B':
3504 case 'c': case 'C':
3505 case 'd': case 'D':
3506 case 'e': case 'E':
3507 case 'f': case 'F':
3508 case 'g': case 'G':
3509 case 'h': case 'H':
3510 case 'i': case 'I':
3511 case 'j': case 'J':
3512 case 'k': case 'K':
3513 case 'l': case 'L':
3514 case 'm': case 'M':
3515 case 'n': case 'N':
3516 case 'o': case 'O':
3517 case 'p': case 'P':
3518 case 'q': case 'Q':
3519 case 'r': case 'R':
3520 case 's': case 'S':
3521 case 't': case 'T':
3522 case 'u': case 'U':
a7cb1f99 3523 case 'V':
79072805
LW
3524 case 'w': case 'W':
3525 case 'X':
3526 case 'y': case 'Y':
3527 case 'z': case 'Z':
3528
49dc05e3 3529 keylookup: {
2d8e6c8d 3530 STRLEN n_a;
161b471a
NIS
3531 gv = Nullgv;
3532 gvp = 0;
49dc05e3 3533
3280af22
NIS
3534 PL_bufptr = s;
3535 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3536
3537 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3538 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3539 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3540 (PL_tokenbuf[0] == 'q' &&
3541 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01 3542
3543 /* x::* is just a word, unless x is "CORE" */
3280af22 3544 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3545 goto just_a_word;
3546
3643fb5f 3547 d = s;
3280af22 3548 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3549 d++; /* no comments skipped here, or s### is misparsed */
3550
3551 /* Is this a label? */
3280af22
NIS
3552 if (!tmp && PL_expect == XSTATE
3553 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3554 s = d + 1;
3280af22 3555 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3556 CLINE;
3557 TOKEN(LABEL);
3643fb5f
CS
3558 }
3559
3560 /* Check for keywords */
3280af22 3561 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3562
3563 /* Is this a word before a => operator? */
748a9306
LW
3564 if (strnEQ(d,"=>",2)) {
3565 CLINE;
3280af22 3566 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3567 yylval.opval->op_private = OPpCONST_BARE;
3568 TERM(WORD);
3569 }
3570
a0d0e21e 3571 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3572 GV *ogv = Nullgv; /* override (winner) */
3573 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3574 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3575 CV *cv;
3280af22 3576 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3577 (cv = GvCVu(gv)))
3578 {
3579 if (GvIMPORTED_CV(gv))
3580 ogv = gv;
3581 else if (! CvMETHOD(cv))
3582 hgv = gv;
3583 }
3584 if (!ogv &&
3280af22
NIS
3585 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3586 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3587 GvCVu(gv) && GvIMPORTED_CV(gv))
3588 {
3589 ogv = gv;
3590 }
3591 }
3592 if (ogv) {
3593 tmp = 0; /* overridden by import or by GLOBAL */