This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
patch to fix aix hints from ortmann@vnet.ibm.com
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4eb8286e 3 * Copyright (c) 1991-1999, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
51371543 31
a0ed51b3 32#define UTF (PL_hints & HINT_UTF8)
834a4ddd
LW
33/*
34 * Note: we try to be careful never to call the isXXX_utf8() functions
35 * unless we're pretty sure we've seen the beginning of a UTF-8 character
36 * (that is, the two high bits are set). Otherwise we risk loading in the
37 * heavy-duty SWASHINIT and SWASHGET routines unnecessarily.
38 */
39#define isIDFIRST_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
40 ? isIDFIRST(*(p)) \
41 : isIDFIRST_utf8((U8*)p))
42#define isALNUM_lazy(p) ((!UTF || (*((U8*)p) < 0xc0)) \
43 ? isALNUM(*(p)) \
44 : isALNUM_utf8((U8*)p))
a0ed51b3 45
2b92dfce
GS
46/* In variables name $^X, these are the legal values for X.
47 * 1999-02-27 mjd-perl-patch@plover.com */
48#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
49
ffb4593c
NT
50/* LEX_* are values for PL_lex_state, the state of the lexer.
51 * They are arranged oddly so that the guard on the switch statement
79072805
LW
52 * can get by with a single comparison (if the compiler is smart enough).
53 */
54
fb73857a 55/* #define LEX_NOTPARSING 11 is done in perl.h. */
56
55497cff 57#define LEX_NORMAL 10
58#define LEX_INTERPNORMAL 9
59#define LEX_INTERPCASEMOD 8
60#define LEX_INTERPPUSH 7
61#define LEX_INTERPSTART 6
62#define LEX_INTERPEND 5
63#define LEX_INTERPENDMAYBE 4
64#define LEX_INTERPCONCAT 3
65#define LEX_INTERPCONST 2
66#define LEX_FORMLINE 1
67#define LEX_KNOWNEXT 0
79072805 68
395c3793
LW
69#ifdef I_FCNTL
70#include <fcntl.h>
71#endif
fe14fcc3
LW
72#ifdef I_SYS_FILE
73#include <sys/file.h>
74#endif
395c3793 75
a790bc05 76/* XXX If this causes problems, set i_unistd=undef in the hint file. */
77#ifdef I_UNISTD
78# include <unistd.h> /* Needed for execv() */
79#endif
80
81
79072805
LW
82#ifdef ff_next
83#undef ff_next
d48672a2
LW
84#endif
85
a1a0e61e
TD
86#ifdef USE_PURE_BISON
87YYSTYPE* yylval_pointer = NULL;
88int* yychar_pointer = NULL;
22c35a8c
GS
89# undef yylval
90# undef yychar
e4bfbdd4
JH
91# define yylval (*yylval_pointer)
92# define yychar (*yychar_pointer)
93# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
94# undef yylex
95# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
96#endif
97
79072805 98#include "keywords.h"
fe14fcc3 99
ffb4593c
NT
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
ae986130
LW
102#ifdef CLINE
103#undef CLINE
104#endif
57843af0 105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 106
ffb4593c
NT
107/*
108 * Convenience functions to return different tokens and prime the
9cbb5ea2 109 * lexer for the next token. They all take an argument.
ffb4593c
NT
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
2d2e263d 121 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
9cbb5ea2 126 * PMop : pattern-matching operator
ffb4593c
NT
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
130 * Rop : relational operator <= != gt
131 *
132 * Also see LOP and lop() below.
133 */
134
3280af22
NIS
135#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
136#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
137#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
138#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
139#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
140#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
141#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
142#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
143#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
144#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
145#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
146#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
147#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
148#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
149#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
150#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
151#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
152#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
153#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
154#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 155
a687059c
LW
156/* This bit of chicanery makes a unary function followed by
157 * a parenthesis into a function with one argument, highest precedence.
158 */
2f3197b3 159#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
160 PL_expect = XTERM, \
161 PL_bufptr = s, \
162 PL_last_uni = PL_oldbufptr, \
163 PL_last_lop_op = f, \
a687059c
LW
164 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
165
79072805 166#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
167 PL_bufptr = s, \
168 PL_last_uni = PL_oldbufptr, \
79072805
LW
169 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
170
9f68db38 171/* grandfather return to old style */
3280af22 172#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 173
ffb4593c
NT
174/*
175 * S_ao
176 *
177 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
178 * into an OP_ANDASSIGN or OP_ORASSIGN
179 */
180
76e3520e 181STATIC int
cea2e8a9 182S_ao(pTHX_ int toketype)
a0d0e21e 183{
3280af22
NIS
184 if (*PL_bufptr == '=') {
185 PL_bufptr++;
a0d0e21e
LW
186 if (toketype == ANDAND)
187 yylval.ival = OP_ANDASSIGN;
188 else if (toketype == OROR)
189 yylval.ival = OP_ORASSIGN;
190 toketype = ASSIGNOP;
191 }
192 return toketype;
193}
194
ffb4593c
NT
195/*
196 * S_no_op
197 * When Perl expects an operator and finds something else, no_op
198 * prints the warning. It always prints "<something> found where
199 * operator expected. It prints "Missing semicolon on previous line?"
200 * if the surprise occurs at the start of the line. "do you need to
201 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
202 * where the compiler doesn't know if foo is a method call or a function.
203 * It prints "Missing operator before end of line" if there's nothing
204 * after the missing operator, or "... before <...>" if there is something
205 * after the missing operator.
206 */
207
76e3520e 208STATIC void
cea2e8a9 209S_no_op(pTHX_ char *what, char *s)
463ee0b2 210{
3280af22
NIS
211 char *oldbp = PL_bufptr;
212 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 213
1189a94a
GS
214 if (!s)
215 s = oldbp;
216 else {
217 assert(s >= oldbp);
218 PL_bufptr = s;
219 }
cea2e8a9 220 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 221 if (is_first)
cea2e8a9 222 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
834a4ddd 223 else if (PL_oldoldbufptr && isIDFIRST_lazy(PL_oldoldbufptr)) {
748a9306 224 char *t;
834a4ddd 225 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy(t) || *t == ':'); t++) ;
3280af22 226 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 227 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 228 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306
LW
229 }
230 else
cea2e8a9 231 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
3280af22 232 PL_bufptr = oldbp;
8990e307
LW
233}
234
ffb4593c
NT
235/*
236 * S_missingterm
237 * Complain about missing quote/regexp/heredoc terminator.
238 * If it's called with (char *)NULL then it cauterizes the line buffer.
239 * If we're in a delimited string and the delimiter is a control
240 * character, it's reformatted into a two-char sequence like ^C.
241 * This is fatal.
242 */
243
76e3520e 244STATIC void
cea2e8a9 245S_missingterm(pTHX_ char *s)
8990e307
LW
246{
247 char tmpbuf[3];
248 char q;
249 if (s) {
250 char *nl = strrchr(s,'\n');
d2719217 251 if (nl)
8990e307
LW
252 *nl = '\0';
253 }
9d116dd7
JH
254 else if (
255#ifdef EBCDIC
256 iscntrl(PL_multi_close)
257#else
258 PL_multi_close < 32 || PL_multi_close == 127
259#endif
260 ) {
8990e307 261 *tmpbuf = '^';
3280af22 262 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
263 s = "\\n";
264 tmpbuf[2] = '\0';
265 s = tmpbuf;
266 }
267 else {
3280af22 268 *tmpbuf = PL_multi_close;
8990e307
LW
269 tmpbuf[1] = '\0';
270 s = tmpbuf;
271 }
272 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 273 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 274}
79072805 275
ffb4593c
NT
276/*
277 * Perl_deprecate
ffb4593c
NT
278 */
279
79072805 280void
864dbfa3 281Perl_deprecate(pTHX_ char *s)
a0d0e21e 282{
d008e5eb 283 dTHR;
599cee73 284 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 285 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
286}
287
ffb4593c
NT
288/*
289 * depcom
9cbb5ea2 290 * Deprecate a comma-less variable list.
ffb4593c
NT
291 */
292
76e3520e 293STATIC void
cea2e8a9 294S_depcom(pTHX)
a0d0e21e
LW
295{
296 deprecate("comma-less variable list");
297}
298
ffb4593c 299/*
9cbb5ea2
GS
300 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
301 * utf16-to-utf8-reversed.
ffb4593c
NT
302 */
303
a868473f
NIS
304#ifdef WIN32
305
76e3520e 306STATIC I32
cea2e8a9 307S_win32_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f
NIS
308{
309 I32 count = FILTER_READ(idx+1, sv, maxlen);
310 if (count > 0 && !maxlen)
311 win32_strip_return(sv);
312 return count;
313}
314#endif
315
a0ed51b3 316STATIC I32
cea2e8a9 317S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
318{
319 I32 count = FILTER_READ(idx+1, sv, maxlen);
320 if (count) {
dfe13c55
GS
321 U8* tmps;
322 U8* tend;
323 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 324 tend = utf16_to_utf8((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 325 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
326
327 }
328 return count;
329}
330
331STATIC I32
cea2e8a9 332S_utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a0ed51b3
LW
333{
334 I32 count = FILTER_READ(idx+1, sv, maxlen);
335 if (count) {
dfe13c55
GS
336 U8* tmps;
337 U8* tend;
338 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
a0ed51b3 339 tend = utf16_to_utf8_reversed((U16*)SvPVX(sv), tmps, SvCUR(sv));
dfe13c55 340 sv_usepvn(sv, (char*)tmps, tend - tmps);
a0ed51b3
LW
341
342 }
343 return count;
344}
a868473f 345
ffb4593c
NT
346/*
347 * Perl_lex_start
9cbb5ea2
GS
348 * Initialize variables. Uses the Perl save_stack to save its state (for
349 * recursive calls to the parser).
ffb4593c
NT
350 */
351
a0d0e21e 352void
864dbfa3 353Perl_lex_start(pTHX_ SV *line)
79072805 354{
0f15f207 355 dTHR;
8990e307
LW
356 char *s;
357 STRLEN len;
358
3280af22
NIS
359 SAVEI32(PL_lex_dojoin);
360 SAVEI32(PL_lex_brackets);
361 SAVEI32(PL_lex_fakebrack);
362 SAVEI32(PL_lex_casemods);
363 SAVEI32(PL_lex_starts);
364 SAVEI32(PL_lex_state);
7766f137 365 SAVEVPTR(PL_lex_inpat);
3280af22 366 SAVEI32(PL_lex_inwhat);
57843af0 367 SAVECOPLINE(PL_curcop);
3280af22
NIS
368 SAVEPPTR(PL_bufptr);
369 SAVEPPTR(PL_bufend);
370 SAVEPPTR(PL_oldbufptr);
371 SAVEPPTR(PL_oldoldbufptr);
372 SAVEPPTR(PL_linestart);
373 SAVESPTR(PL_linestr);
374 SAVEPPTR(PL_lex_brackstack);
375 SAVEPPTR(PL_lex_casestack);
c76ac1ee 376 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
377 SAVESPTR(PL_lex_stuff);
378 SAVEI32(PL_lex_defer);
09bef843 379 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 380 SAVESPTR(PL_lex_repl);
bebdddfc
GS
381 SAVEINT(PL_expect);
382 SAVEINT(PL_lex_expect);
3280af22
NIS
383
384 PL_lex_state = LEX_NORMAL;
385 PL_lex_defer = 0;
386 PL_expect = XSTATE;
387 PL_lex_brackets = 0;
388 PL_lex_fakebrack = 0;
389 New(899, PL_lex_brackstack, 120, char);
390 New(899, PL_lex_casestack, 12, char);
391 SAVEFREEPV(PL_lex_brackstack);
392 SAVEFREEPV(PL_lex_casestack);
393 PL_lex_casemods = 0;
394 *PL_lex_casestack = '\0';
395 PL_lex_dojoin = 0;
396 PL_lex_starts = 0;
397 PL_lex_stuff = Nullsv;
398 PL_lex_repl = Nullsv;
399 PL_lex_inpat = 0;
400 PL_lex_inwhat = 0;
09bef843 401 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
402 PL_linestr = line;
403 if (SvREADONLY(PL_linestr))
404 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
405 s = SvPV(PL_linestr, len);
8990e307 406 if (len && s[len-1] != ';') {
3280af22
NIS
407 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
408 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
409 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 410 }
3280af22
NIS
411 SvTEMP_off(PL_linestr);
412 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
413 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
414 SvREFCNT_dec(PL_rs);
79cb57f6 415 PL_rs = newSVpvn("\n", 1);
3280af22 416 PL_rsfp = 0;
79072805 417}
a687059c 418
ffb4593c
NT
419/*
420 * Perl_lex_end
9cbb5ea2
GS
421 * Finalizer for lexing operations. Must be called when the parser is
422 * done with the lexer.
ffb4593c
NT
423 */
424
463ee0b2 425void
864dbfa3 426Perl_lex_end(pTHX)
463ee0b2 427{
3280af22 428 PL_doextract = FALSE;
463ee0b2
LW
429}
430
ffb4593c
NT
431/*
432 * S_incline
433 * This subroutine has nothing to do with tilting, whether at windmills
434 * or pinball tables. Its name is short for "increment line". It
57843af0 435 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 436 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
437 * # line 500 "foo.pm"
438 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
439 */
440
76e3520e 441STATIC void
cea2e8a9 442S_incline(pTHX_ char *s)
463ee0b2 443{
0f15f207 444 dTHR;
463ee0b2
LW
445 char *t;
446 char *n;
447 char ch;
448 int sawline = 0;
449
57843af0 450 CopLINE_inc(PL_curcop);
463ee0b2
LW
451 if (*s++ != '#')
452 return;
453 while (*s == ' ' || *s == '\t') s++;
454 if (strnEQ(s, "line ", 5)) {
455 s += 5;
456 sawline = 1;
457 }
458 if (!isDIGIT(*s))
459 return;
460 n = s;
461 while (isDIGIT(*s))
462 s++;
463 while (*s == ' ' || *s == '\t')
464 s++;
465 if (*s == '"' && (t = strchr(s+1, '"')))
466 s++;
467 else {
468 if (!sawline)
469 return; /* false alarm */
470 for (t = s; !isSPACE(*t); t++) ;
471 }
472 ch = *t;
473 *t = '\0';
474 if (t - s > 0)
57843af0 475 CopFILE_set(PL_curcop, s);
463ee0b2 476 else
57843af0 477 CopFILE_set(PL_curcop, PL_origfilename);
463ee0b2 478 *t = ch;
57843af0 479 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
480}
481
ffb4593c
NT
482/*
483 * S_skipspace
484 * Called to gobble the appropriate amount and type of whitespace.
485 * Skips comments as well.
486 */
487
76e3520e 488STATIC char *
cea2e8a9 489S_skipspace(pTHX_ register char *s)
a687059c 490{
11343788 491 dTHR;
3280af22
NIS
492 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
493 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
463ee0b2
LW
494 s++;
495 return s;
496 }
497 for (;;) {
fd049845 498 STRLEN prevlen;
09bef843
SB
499 SSize_t oldprevlen, oldoldprevlen;
500 SSize_t oldloplen, oldunilen;
60e6418e
GS
501 while (s < PL_bufend && isSPACE(*s)) {
502 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
503 incline(s);
504 }
ffb4593c
NT
505
506 /* comment */
3280af22
NIS
507 if (s < PL_bufend && *s == '#') {
508 while (s < PL_bufend && *s != '\n')
463ee0b2 509 s++;
60e6418e 510 if (s < PL_bufend) {
463ee0b2 511 s++;
60e6418e
GS
512 if (PL_in_eval && !PL_rsfp) {
513 incline(s);
514 continue;
515 }
516 }
463ee0b2 517 }
ffb4593c
NT
518
519 /* only continue to recharge the buffer if we're at the end
520 * of the buffer, we're not reading from a source filter, and
521 * we're in normal lexing mode
522 */
09bef843
SB
523 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
524 PL_lex_state == LEX_FORMLINE)
463ee0b2 525 return s;
ffb4593c
NT
526
527 /* try to recharge the buffer */
9cbb5ea2
GS
528 if ((s = filter_gets(PL_linestr, PL_rsfp,
529 (prevlen = SvCUR(PL_linestr)))) == Nullch)
530 {
531 /* end of file. Add on the -p or -n magic */
3280af22
NIS
532 if (PL_minus_n || PL_minus_p) {
533 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
534 ";}continue{print or die qq(-p destination: $!\\n)" :
535 "");
3280af22
NIS
536 sv_catpv(PL_linestr,";}");
537 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
538 }
539 else
3280af22 540 sv_setpv(PL_linestr,";");
ffb4593c
NT
541
542 /* reset variables for next time we lex */
9cbb5ea2
GS
543 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
544 = SvPVX(PL_linestr);
3280af22 545 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
546
547 /* Close the filehandle. Could be from -P preprocessor,
548 * STDIN, or a regular file. If we were reading code from
549 * STDIN (because the commandline held no -e or filename)
550 * then we don't close it, we reset it so the code can
551 * read from STDIN too.
552 */
553
3280af22
NIS
554 if (PL_preprocess && !PL_in_eval)
555 (void)PerlProc_pclose(PL_rsfp);
556 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
557 PerlIO_clearerr(PL_rsfp);
8990e307 558 else
3280af22
NIS
559 (void)PerlIO_close(PL_rsfp);
560 PL_rsfp = Nullfp;
463ee0b2
LW
561 return s;
562 }
ffb4593c
NT
563
564 /* not at end of file, so we only read another line */
09bef843
SB
565 /* make corresponding updates to old pointers, for yyerror() */
566 oldprevlen = PL_oldbufptr - PL_bufend;
567 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
568 if (PL_last_uni)
569 oldunilen = PL_last_uni - PL_bufend;
570 if (PL_last_lop)
571 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
572 PL_linestart = PL_bufptr = s + prevlen;
573 PL_bufend = s + SvCUR(PL_linestr);
574 s = PL_bufptr;
09bef843
SB
575 PL_oldbufptr = s + oldprevlen;
576 PL_oldoldbufptr = s + oldoldprevlen;
577 if (PL_last_uni)
578 PL_last_uni = s + oldunilen;
579 if (PL_last_lop)
580 PL_last_lop = s + oldloplen;
a0d0e21e 581 incline(s);
ffb4593c
NT
582
583 /* debugger active and we're not compiling the debugger code,
584 * so store the line into the debugger's array of lines
585 */
3280af22 586 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
587 SV *sv = NEWSV(85,0);
588
589 sv_upgrade(sv, SVt_PVMG);
3280af22 590 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 591 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 592 }
463ee0b2 593 }
a687059c 594}
378cc40b 595
ffb4593c
NT
596/*
597 * S_check_uni
598 * Check the unary operators to ensure there's no ambiguity in how they're
599 * used. An ambiguous piece of code would be:
600 * rand + 5
601 * This doesn't mean rand() + 5. Because rand() is a unary operator,
602 * the +5 is its argument.
603 */
604
76e3520e 605STATIC void
cea2e8a9 606S_check_uni(pTHX)
ba106d47 607{
2f3197b3 608 char *s;
a0d0e21e 609 char *t;
0453d815 610 dTHR;
2f3197b3 611
3280af22 612 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 613 return;
3280af22
NIS
614 while (isSPACE(*PL_last_uni))
615 PL_last_uni++;
834a4ddd 616 for (s = PL_last_uni; isALNUM_lazy(s) || *s == '-'; s++) ;
3280af22 617 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 618 return;
0453d815 619 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 620 char ch = *s;
0453d815
PM
621 *s = '\0';
622 Perl_warner(aTHX_ WARN_AMBIGUOUS,
623 "Warning: Use of \"%s\" without parens is ambiguous",
624 PL_last_uni);
625 *s = ch;
626 }
2f3197b3
LW
627}
628
ffb4593c
NT
629/* workaround to replace the UNI() macro with a function. Only the
630 * hints/uts.sh file mentions this. Other comments elsewhere in the
631 * source indicate Microport Unix might need it too.
632 */
633
ffed7fef
LW
634#ifdef CRIPPLED_CC
635
636#undef UNI
ffed7fef 637#define UNI(f) return uni(f,s)
ffed7fef 638
76e3520e 639STATIC int
cea2e8a9 640S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
641{
642 yylval.ival = f;
3280af22
NIS
643 PL_expect = XTERM;
644 PL_bufptr = s;
8f872242
NIS
645 PL_last_uni = PL_oldbufptr;
646 PL_last_lop_op = f;
ffed7fef
LW
647 if (*s == '(')
648 return FUNC1;
649 s = skipspace(s);
650 if (*s == '(')
651 return FUNC1;
652 else
653 return UNIOP;
654}
655
a0d0e21e
LW
656#endif /* CRIPPLED_CC */
657
ffb4593c
NT
658/*
659 * LOP : macro to build a list operator. Its behaviour has been replaced
660 * with a subroutine, S_lop() for which LOP is just another name.
661 */
662
a0d0e21e
LW
663#define LOP(f,x) return lop(f,x,s)
664
ffb4593c
NT
665/*
666 * S_lop
667 * Build a list operator (or something that might be one). The rules:
668 * - if we have a next token, then it's a list operator [why?]
669 * - if the next thing is an opening paren, then it's a function
670 * - else it's a list operator
671 */
672
76e3520e 673STATIC I32
cea2e8a9 674S_lop(pTHX_ I32 f, expectation x, char *s)
ffed7fef 675{
0f15f207 676 dTHR;
79072805 677 yylval.ival = f;
35c8bce7 678 CLINE;
3280af22
NIS
679 PL_expect = x;
680 PL_bufptr = s;
681 PL_last_lop = PL_oldbufptr;
682 PL_last_lop_op = f;
683 if (PL_nexttoke)
a0d0e21e 684 return LSTOP;
79072805
LW
685 if (*s == '(')
686 return FUNC;
687 s = skipspace(s);
688 if (*s == '(')
689 return FUNC;
690 else
691 return LSTOP;
692}
693
ffb4593c
NT
694/*
695 * S_force_next
9cbb5ea2 696 * When the lexer realizes it knows the next token (for instance,
ffb4593c 697 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
698 * to know what token to return the next time the lexer is called. Caller
699 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
700 * handles the token correctly.
ffb4593c
NT
701 */
702
76e3520e 703STATIC void
cea2e8a9 704S_force_next(pTHX_ I32 type)
79072805 705{
3280af22
NIS
706 PL_nexttype[PL_nexttoke] = type;
707 PL_nexttoke++;
708 if (PL_lex_state != LEX_KNOWNEXT) {
709 PL_lex_defer = PL_lex_state;
710 PL_lex_expect = PL_expect;
711 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
712 }
713}
714
ffb4593c
NT
715/*
716 * S_force_word
717 * When the lexer knows the next thing is a word (for instance, it has
718 * just seen -> and it knows that the next char is a word char, then
719 * it calls S_force_word to stick the next word into the PL_next lookahead.
720 *
721 * Arguments:
b1b65b59 722 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
723 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
724 * int check_keyword : if true, Perl checks to make sure the word isn't
725 * a keyword (do this if the word is a label, e.g. goto FOO)
726 * int allow_pack : if true, : characters will also be allowed (require,
727 * use, etc. do this)
9cbb5ea2 728 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
729 */
730
76e3520e 731STATIC char *
cea2e8a9 732S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 733{
463ee0b2
LW
734 register char *s;
735 STRLEN len;
736
737 start = skipspace(start);
738 s = start;
834a4ddd 739 if (isIDFIRST_lazy(s) ||
a0d0e21e 740 (allow_pack && *s == ':') ||
15f0808c 741 (allow_initial_tick && *s == '\'') )
a0d0e21e 742 {
3280af22
NIS
743 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
744 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
745 return start;
746 if (token == METHOD) {
747 s = skipspace(s);
748 if (*s == '(')
3280af22 749 PL_expect = XTERM;
463ee0b2 750 else {
3280af22 751 PL_expect = XOPERATOR;
463ee0b2 752 }
79072805 753 }
3280af22
NIS
754 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
755 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
756 force_next(token);
757 }
758 return s;
759}
760
ffb4593c
NT
761/*
762 * S_force_ident
9cbb5ea2 763 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
764 * text only contains the "foo" portion. The first argument is a pointer
765 * to the "foo", and the second argument is the type symbol to prefix.
766 * Forces the next token to be a "WORD".
9cbb5ea2 767 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
768 */
769
76e3520e 770STATIC void
cea2e8a9 771S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
772{
773 if (s && *s) {
11343788 774 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 775 PL_nextval[PL_nexttoke].opval = o;
79072805 776 force_next(WORD);
748a9306 777 if (kind) {
e858de61 778 dTHR; /* just for in_eval */
11343788 779 o->op_private = OPpCONST_ENTERED;
55497cff 780 /* XXX see note in pp_entereval() for why we forgo typo
781 warnings if the symbol must be introduced in an eval.
782 GSAR 96-10-12 */
3280af22 783 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
784 kind == '$' ? SVt_PV :
785 kind == '@' ? SVt_PVAV :
786 kind == '%' ? SVt_PVHV :
787 SVt_PVGV
788 );
748a9306 789 }
79072805
LW
790 }
791}
792
ffb4593c
NT
793/*
794 * S_force_version
795 * Forces the next token to be a version number.
796 */
797
76e3520e 798STATIC char *
cea2e8a9 799S_force_version(pTHX_ char *s)
89bfa8cd 800{
801 OP *version = Nullop;
802
803 s = skipspace(s);
804
805 /* default VERSION number -- GBARR */
806
807 if(isDIGIT(*s)) {
808 char *d;
809 int c;
55497cff 810 for( d=s, c = 1; isDIGIT(*d) || *d == '_' || (*d == '.' && c--); d++);
89bfa8cd 811 if((*d == ';' || isSPACE(*d)) && *(skipspace(d)) != ',') {
812 s = scan_num(s);
813 /* real VERSION number -- GBARR */
814 version = yylval.opval;
815 }
816 }
817
818 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 819 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd 820 force_next(WORD);
821
822 return (s);
823}
824
ffb4593c
NT
825/*
826 * S_tokeq
827 * Tokenize a quoted string passed in as an SV. It finds the next
828 * chunk, up to end of string or a backslash. It may make a new
829 * SV containing that chunk (if HINT_NEW_STRING is on). It also
830 * turns \\ into \.
831 */
832
76e3520e 833STATIC SV *
cea2e8a9 834S_tokeq(pTHX_ SV *sv)
79072805
LW
835{
836 register char *s;
837 register char *send;
838 register char *d;
b3ac6de7
IZ
839 STRLEN len = 0;
840 SV *pv = sv;
79072805
LW
841
842 if (!SvLEN(sv))
b3ac6de7 843 goto finish;
79072805 844
a0d0e21e 845 s = SvPV_force(sv, len);
748a9306 846 if (SvIVX(sv) == -1)
b3ac6de7 847 goto finish;
463ee0b2 848 send = s + len;
79072805
LW
849 while (s < send && *s != '\\')
850 s++;
851 if (s == send)
b3ac6de7 852 goto finish;
79072805 853 d = s;
3280af22 854 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 855 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
856 while (s < send) {
857 if (*s == '\\') {
a0d0e21e 858 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
859 s++; /* all that, just for this */
860 }
861 *d++ = *s++;
862 }
863 *d = '\0';
463ee0b2 864 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 865 finish:
3280af22 866 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 867 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
868 return sv;
869}
870
ffb4593c
NT
871/*
872 * Now come three functions related to double-quote context,
873 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
874 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
875 * interact with PL_lex_state, and create fake ( ... ) argument lists
876 * to handle functions and concatenation.
877 * They assume that whoever calls them will be setting up a fake
878 * join call, because each subthing puts a ',' after it. This lets
879 * "lower \luPpEr"
880 * become
881 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
882 *
883 * (I'm not sure whether the spurious commas at the end of lcfirst's
884 * arguments and join's arguments are created or not).
885 */
886
887/*
888 * S_sublex_start
889 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
890 *
891 * Pattern matching will set PL_lex_op to the pattern-matching op to
892 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
893 *
894 * OP_CONST and OP_READLINE are easy--just make the new op and return.
895 *
896 * Everything else becomes a FUNC.
897 *
898 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
899 * had an OP_CONST or OP_READLINE). This just sets us up for a
900 * call to S_sublex_push().
901 */
902
76e3520e 903STATIC I32
cea2e8a9 904S_sublex_start(pTHX)
79072805
LW
905{
906 register I32 op_type = yylval.ival;
79072805
LW
907
908 if (op_type == OP_NULL) {
3280af22
NIS
909 yylval.opval = PL_lex_op;
910 PL_lex_op = Nullop;
79072805
LW
911 return THING;
912 }
913 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 914 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
915
916 if (SvTYPE(sv) == SVt_PVIV) {
917 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
918 STRLEN len;
919 char *p;
920 SV *nsv;
921
922 p = SvPV(sv, len);
79cb57f6 923 nsv = newSVpvn(p, len);
b3ac6de7
IZ
924 SvREFCNT_dec(sv);
925 sv = nsv;
926 }
927 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 928 PL_lex_stuff = Nullsv;
79072805
LW
929 return THING;
930 }
931
3280af22
NIS
932 PL_sublex_info.super_state = PL_lex_state;
933 PL_sublex_info.sub_inwhat = op_type;
934 PL_sublex_info.sub_op = PL_lex_op;
935 PL_lex_state = LEX_INTERPPUSH;
55497cff 936
3280af22
NIS
937 PL_expect = XTERM;
938 if (PL_lex_op) {
939 yylval.opval = PL_lex_op;
940 PL_lex_op = Nullop;
55497cff 941 return PMFUNC;
942 }
943 else
944 return FUNC;
945}
946
ffb4593c
NT
947/*
948 * S_sublex_push
949 * Create a new scope to save the lexing state. The scope will be
950 * ended in S_sublex_done. Returns a '(', starting the function arguments
951 * to the uc, lc, etc. found before.
952 * Sets PL_lex_state to LEX_INTERPCONCAT.
953 */
954
76e3520e 955STATIC I32
cea2e8a9 956S_sublex_push(pTHX)
55497cff 957{
0f15f207 958 dTHR;
f46d017c 959 ENTER;
55497cff 960
3280af22
NIS
961 PL_lex_state = PL_sublex_info.super_state;
962 SAVEI32(PL_lex_dojoin);
963 SAVEI32(PL_lex_brackets);
964 SAVEI32(PL_lex_fakebrack);
965 SAVEI32(PL_lex_casemods);
966 SAVEI32(PL_lex_starts);
967 SAVEI32(PL_lex_state);
7766f137 968 SAVEVPTR(PL_lex_inpat);
3280af22 969 SAVEI32(PL_lex_inwhat);
57843af0 970 SAVECOPLINE(PL_curcop);
3280af22
NIS
971 SAVEPPTR(PL_bufptr);
972 SAVEPPTR(PL_oldbufptr);
973 SAVEPPTR(PL_oldoldbufptr);
974 SAVEPPTR(PL_linestart);
975 SAVESPTR(PL_linestr);
976 SAVEPPTR(PL_lex_brackstack);
977 SAVEPPTR(PL_lex_casestack);
978
979 PL_linestr = PL_lex_stuff;
980 PL_lex_stuff = Nullsv;
981
9cbb5ea2
GS
982 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
983 = SvPVX(PL_linestr);
3280af22
NIS
984 PL_bufend += SvCUR(PL_linestr);
985 SAVEFREESV(PL_linestr);
986
987 PL_lex_dojoin = FALSE;
988 PL_lex_brackets = 0;
989 PL_lex_fakebrack = 0;
990 New(899, PL_lex_brackstack, 120, char);
991 New(899, PL_lex_casestack, 12, char);
992 SAVEFREEPV(PL_lex_brackstack);
993 SAVEFREEPV(PL_lex_casestack);
994 PL_lex_casemods = 0;
995 *PL_lex_casestack = '\0';
996 PL_lex_starts = 0;
997 PL_lex_state = LEX_INTERPCONCAT;
57843af0 998 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
999
1000 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1001 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1002 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1003 else
3280af22 1004 PL_lex_inpat = Nullop;
79072805 1005
55497cff 1006 return '(';
79072805
LW
1007}
1008
ffb4593c
NT
1009/*
1010 * S_sublex_done
1011 * Restores lexer state after a S_sublex_push.
1012 */
1013
76e3520e 1014STATIC I32
cea2e8a9 1015S_sublex_done(pTHX)
79072805 1016{
3280af22
NIS
1017 if (!PL_lex_starts++) {
1018 PL_expect = XOPERATOR;
79cb57f6 1019 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1020 return THING;
1021 }
1022
3280af22
NIS
1023 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1024 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1025 return yylex();
79072805
LW
1026 }
1027
ffb4593c 1028 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1029 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1030 PL_linestr = PL_lex_repl;
1031 PL_lex_inpat = 0;
1032 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1033 PL_bufend += SvCUR(PL_linestr);
1034 SAVEFREESV(PL_linestr);
1035 PL_lex_dojoin = FALSE;
1036 PL_lex_brackets = 0;
1037 PL_lex_fakebrack = 0;
1038 PL_lex_casemods = 0;
1039 *PL_lex_casestack = '\0';
1040 PL_lex_starts = 0;
25da4f38 1041 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1042 PL_lex_state = LEX_INTERPNORMAL;
1043 PL_lex_starts++;
e9fa98b2
HS
1044 /* we don't clear PL_lex_repl here, so that we can check later
1045 whether this is an evalled subst; that means we rely on the
1046 logic to ensure sublex_done() is called again only via the
1047 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1048 }
e9fa98b2 1049 else {
3280af22 1050 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1051 PL_lex_repl = Nullsv;
1052 }
79072805 1053 return ',';
ffed7fef
LW
1054 }
1055 else {
f46d017c 1056 LEAVE;
3280af22
NIS
1057 PL_bufend = SvPVX(PL_linestr);
1058 PL_bufend += SvCUR(PL_linestr);
1059 PL_expect = XOPERATOR;
09bef843 1060 PL_sublex_info.sub_inwhat = 0;
79072805 1061 return ')';
ffed7fef
LW
1062 }
1063}
1064
02aa26ce
NT
1065/*
1066 scan_const
1067
1068 Extracts a pattern, double-quoted string, or transliteration. This
1069 is terrifying code.
1070
3280af22
NIS
1071 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1072 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1073 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1074
9b599b2a
GS
1075 Returns a pointer to the character scanned up to. Iff this is
1076 advanced from the start pointer supplied (ie if anything was
1077 successfully parsed), will leave an OP for the substring scanned
1078 in yylval. Caller must intuit reason for not parsing further
1079 by looking at the next characters herself.
1080
02aa26ce
NT
1081 In patterns:
1082 backslashes:
1083 double-quoted style: \r and \n
1084 regexp special ones: \D \s
1085 constants: \x3
1086 backrefs: \1 (deprecated in substitution replacements)
1087 case and quoting: \U \Q \E
1088 stops on @ and $, but not for $ as tail anchor
1089
1090 In transliterations:
1091 characters are VERY literal, except for - not at the start or end
1092 of the string, which indicates a range. scan_const expands the
1093 range to the full set of intermediate characters.
1094
1095 In double-quoted strings:
1096 backslashes:
1097 double-quoted style: \r and \n
1098 constants: \x3
1099 backrefs: \1 (deprecated)
1100 case and quoting: \U \Q \E
1101 stops on @ and $
1102
1103 scan_const does *not* construct ops to handle interpolated strings.
1104 It stops processing as soon as it finds an embedded $ or @ variable
1105 and leaves it to the caller to work out what's going on.
1106
1107 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1108
1109 $ in pattern could be $foo or could be tail anchor. Assumption:
1110 it's a tail anchor if $ is the last thing in the string, or if it's
1111 followed by one of ")| \n\t"
1112
1113 \1 (backreferences) are turned into $1
1114
1115 The structure of the code is
1116 while (there's a character to process) {
1117 handle transliteration ranges
1118 skip regexp comments
1119 skip # initiated comments in //x patterns
1120 check for embedded @foo
1121 check for embedded scalars
1122 if (backslash) {
1123 leave intact backslashes from leave (below)
1124 deprecate \1 in strings and sub replacements
1125 handle string-changing backslashes \l \U \Q \E, etc.
1126 switch (what was escaped) {
1127 handle - in a transliteration (becomes a literal -)
1128 handle \132 octal characters
1129 handle 0x15 hex characters
1130 handle \cV (control V)
1131 handle printf backslashes (\f, \r, \n, etc)
1132 } (end switch)
1133 } (end if backslash)
1134 } (end while character to read)
1135
1136*/
1137
76e3520e 1138STATIC char *
cea2e8a9 1139S_scan_const(pTHX_ char *start)
79072805 1140{
3280af22 1141 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1142 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1143 register char *s = start; /* start of the constant */
1144 register char *d = SvPVX(sv); /* destination for copies */
1145 bool dorange = FALSE; /* are we in a translit range? */
1146 I32 len; /* ? */
ac2262e3 1147 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1148 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1149 : UTF;
ac2262e3 1150 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1151 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1152 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1153 : UTF;
dff6d3cd 1154 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1155 PL_lex_inpat
4a2d328f 1156 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1157 : "";
79072805
LW
1158
1159 while (s < send || dorange) {
02aa26ce 1160 /* get transliterations out of the way (they're most literal) */
3280af22 1161 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1162 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1163 if (dorange) {
02aa26ce 1164 I32 i; /* current expanded character */
8ada0baa 1165 I32 min; /* first character in range */
02aa26ce
NT
1166 I32 max; /* last character in range */
1167
1168 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1169 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1170 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1171 d -= 2; /* eat the first char and the - */
1172
8ada0baa
JH
1173 min = (U8)*d; /* first char in range */
1174 max = (U8)d[1]; /* last char in range */
1175
1176#ifndef ASCIIish
1177 if ((isLOWER(min) && isLOWER(max)) ||
1178 (isUPPER(min) && isUPPER(max))) {
1179 if (isLOWER(min)) {
1180 for (i = min; i <= max; i++)
1181 if (isLOWER(i))
1182 *d++ = i;
1183 } else {
1184 for (i = min; i <= max; i++)
1185 if (isUPPER(i))
1186 *d++ = i;
1187 }
1188 }
1189 else
1190#endif
1191 for (i = min; i <= max; i++)
1192 *d++ = i;
02aa26ce
NT
1193
1194 /* mark the range as done, and continue */
79072805
LW
1195 dorange = FALSE;
1196 continue;
1197 }
02aa26ce
NT
1198
1199 /* range begins (ignore - as first or last char) */
79072805 1200 else if (*s == '-' && s+1 < send && s != start) {
a0ed51b3 1201 if (utf) {
a176fa2a 1202 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1203 s++;
1204 continue;
1205 }
79072805
LW
1206 dorange = TRUE;
1207 s++;
1208 }
1209 }
02aa26ce
NT
1210
1211 /* if we get here, we're not doing a transliteration */
1212
0f5d15d6
IZ
1213 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1214 except for the last char, which will be done separately. */
3280af22 1215 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1216 if (s[2] == '#') {
1217 while (s < send && *s != ')')
1218 *d++ = *s++;
0f5d15d6
IZ
1219 } else if (s[2] == '{'
1220 || s[2] == 'p' && s[3] == '{') { /* This should march regcomp.c */
cc6b7395 1221 I32 count = 1;
0f5d15d6 1222 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1223 char c;
1224
d9f97599
GS
1225 while (count && (c = *regparse)) {
1226 if (c == '\\' && regparse[1])
1227 regparse++;
cc6b7395
IZ
1228 else if (c == '{')
1229 count++;
1230 else if (c == '}')
1231 count--;
d9f97599 1232 regparse++;
cc6b7395 1233 }
5bdf89e7
IZ
1234 if (*regparse != ')') {
1235 regparse--; /* Leave one char for continuation. */
cc6b7395 1236 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1237 }
0f5d15d6 1238 while (s < regparse)
cc6b7395
IZ
1239 *d++ = *s++;
1240 }
748a9306 1241 }
02aa26ce
NT
1242
1243 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1244 else if (*s == '#' && PL_lex_inpat &&
1245 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1246 while (s+1 < send && *s != '\n')
1247 *d++ = *s++;
1248 }
02aa26ce
NT
1249
1250 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
834a4ddd 1251 else if (*s == '@' && s[1] && (isALNUM_lazy(s+1) || strchr(":'{$", s[1])))
79072805 1252 break;
02aa26ce
NT
1253
1254 /* check for embedded scalars. only stop if we're sure it's a
1255 variable.
1256 */
79072805 1257 else if (*s == '$') {
3280af22 1258 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1259 break;
c277df42 1260 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1261 break; /* in regexp, $ might be tail anchor */
1262 }
02aa26ce 1263
a0ed51b3
LW
1264 /* (now in tr/// code again) */
1265
d008e5eb
GS
1266 if (*s & 0x80 && thisutf) {
1267 dTHR; /* only for ckWARN */
1268 if (ckWARN(WARN_UTF8)) {
dfe13c55 1269 (void)utf8_to_uv((U8*)s, &len); /* could cvt latin-1 to utf8 here... */
d008e5eb
GS
1270 if (len) {
1271 while (len--)
1272 *d++ = *s++;
1273 continue;
1274 }
a0ed51b3
LW
1275 }
1276 }
1277
02aa26ce 1278 /* backslashes */
79072805
LW
1279 if (*s == '\\' && s+1 < send) {
1280 s++;
02aa26ce
NT
1281
1282 /* some backslashes we leave behind */
c9f97d15 1283 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1284 *d++ = '\\';
1285 *d++ = *s++;
1286 continue;
1287 }
02aa26ce
NT
1288
1289 /* deprecate \1 in strings and substitution replacements */
3280af22 1290 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1291 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1292 {
d008e5eb 1293 dTHR; /* only for ckWARN */
599cee73 1294 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1295 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1296 *--s = '$';
1297 break;
1298 }
02aa26ce
NT
1299
1300 /* string-change backslash escapes */
3280af22 1301 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1302 --s;
1303 break;
1304 }
02aa26ce
NT
1305
1306 /* if we get here, it's either a quoted -, or a digit */
79072805 1307 switch (*s) {
02aa26ce
NT
1308
1309 /* quoted - in transliterations */
79072805 1310 case '-':
3280af22 1311 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1312 *d++ = *s++;
1313 continue;
1314 }
1315 /* FALL THROUGH */
1316 default:
11b8faa4
JH
1317 {
1318 dTHR;
1319 if (ckWARN(WARN_UNSAFE) && isALPHA(*s))
cea2e8a9 1320 Perl_warner(aTHX_ WARN_UNSAFE,
11b8faa4
JH
1321 "Unrecognized escape \\%c passed through",
1322 *s);
1323 /* default action is to copy the quoted character */
1324 *d++ = *s++;
1325 continue;
1326 }
02aa26ce
NT
1327
1328 /* \132 indicates an octal constant */
79072805
LW
1329 case '0': case '1': case '2': case '3':
1330 case '4': case '5': case '6': case '7':
dff6d3cd 1331 *d++ = (char)scan_oct(s, 3, &len);
79072805
LW
1332 s += len;
1333 continue;
02aa26ce
NT
1334
1335 /* \x24 indicates a hex constant */
79072805 1336 case 'x':
a0ed51b3
LW
1337 ++s;
1338 if (*s == '{') {
1339 char* e = strchr(s, '}');
1340
adaeee49 1341 if (!e) {
a0ed51b3 1342 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1343 e = s;
1344 }
d008e5eb
GS
1345 if (!utf) {
1346 dTHR;
1347 if (ckWARN(WARN_UTF8))
cea2e8a9 1348 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1349 "Use of \\x{} without utf8 declaration");
1350 }
a0ed51b3 1351 /* note: utf always shorter than hex */
dfe13c55 1352 d = (char*)uv_to_utf8((U8*)d,
dff6d3cd 1353 (UV)scan_hex(s + 1, e - s - 1, &len));
a0ed51b3 1354 s = e + 1;
a0ed51b3
LW
1355 }
1356 else {
1357 UV uv = (UV)scan_hex(s, 2, &len);
1358 if (utf && PL_lex_inwhat == OP_TRANS &&
1359 utf != (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1360 {
dfe13c55 1361 d = (char*)uv_to_utf8((U8*)d, uv); /* doing a CU or UC */
a0ed51b3
LW
1362 }
1363 else {
d008e5eb
GS
1364 if (uv >= 127 && UTF) {
1365 dTHR;
1366 if (ckWARN(WARN_UTF8))
cea2e8a9 1367 Perl_warner(aTHX_ WARN_UTF8,
d008e5eb
GS
1368 "\\x%.*s will produce malformed UTF-8 character; use \\x{%.*s} for that",
1369 len,s,len,s);
1370 }
a0ed51b3
LW
1371 *d++ = (char)uv;
1372 }
1373 s += len;
1374 }
79072805 1375 continue;
02aa26ce 1376
4a2d328f
IZ
1377 /* \N{latin small letter a} is a named character */
1378 case 'N':
423cee85
JH
1379 ++s;
1380 if (*s == '{') {
1381 char* e = strchr(s, '}');
1382 HV *hv;
1383 SV **svp;
1384 SV *res, *cv;
1385 STRLEN len;
1386 char *str;
1387 char *why = Nullch;
1388
1389 if (!e) {
5777a3f7 1390 yyerror("Missing right brace on \\N{}");
423cee85
JH
1391 e = s - 1;
1392 goto cont_scan;
1393 }
1394 res = newSVpvn(s + 1, e - s - 1);
1395 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1396 res, Nullsv, "\\N{...}" );
423cee85
JH
1397 str = SvPV(res,len);
1398 if (len > e - s + 4) {
1399 char *odest = SvPVX(sv);
1400
1401 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1402 d = SvPVX(sv) + (d - odest);
1403 }
1404 Copy(str, d, len, char);
1405 d += len;
1406 SvREFCNT_dec(res);
1407 cont_scan:
1408 s = e + 1;
1409 }
1410 else
5777a3f7 1411 yyerror("Missing braces on \\N{}");
423cee85
JH
1412 continue;
1413
02aa26ce 1414 /* \c is a control character */
79072805
LW
1415 case 'c':
1416 s++;
9d116dd7
JH
1417#ifdef EBCDIC
1418 *d = *s++;
1419 if (isLOWER(*d))
1420 *d = toUPPER(*d);
1421 *d++ = toCTRL(*d);
1422#else
bbce6d69 1423 len = *s++;
1424 *d++ = toCTRL(len);
9d116dd7 1425#endif
79072805 1426 continue;
02aa26ce
NT
1427
1428 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1429 case 'b':
1430 *d++ = '\b';
1431 break;
1432 case 'n':
1433 *d++ = '\n';
1434 break;
1435 case 'r':
1436 *d++ = '\r';
1437 break;
1438 case 'f':
1439 *d++ = '\f';
1440 break;
1441 case 't':
1442 *d++ = '\t';
1443 break;
34a3fe2a
PP
1444#ifdef EBCDIC
1445 case 'e':
1446 *d++ = '\047'; /* CP 1047 */
1447 break;
1448 case 'a':
1449 *d++ = '\057'; /* CP 1047 */
1450 break;
1451#else
79072805
LW
1452 case 'e':
1453 *d++ = '\033';
1454 break;
1455 case 'a':
1456 *d++ = '\007';
1457 break;
34a3fe2a 1458#endif
02aa26ce
NT
1459 } /* end switch */
1460
79072805
LW
1461 s++;
1462 continue;
02aa26ce
NT
1463 } /* end if (backslash) */
1464
79072805 1465 *d++ = *s++;
02aa26ce
NT
1466 } /* while loop to process each character */
1467
1468 /* terminate the string and set up the sv */
79072805 1469 *d = '\0';
463ee0b2 1470 SvCUR_set(sv, d - SvPVX(sv));
79072805
LW
1471 SvPOK_on(sv);
1472
02aa26ce 1473 /* shrink the sv if we allocated more than we used */
79072805
LW
1474 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1475 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1476 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1477 }
02aa26ce 1478
9b599b2a 1479 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1480 if (s > PL_bufptr) {
1481 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1482 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1483 sv, Nullsv,
3280af22 1484 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1485 ? "tr"
3280af22 1486 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1487 ? "s"
1488 : "qq")));
79072805 1489 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1490 } else
8990e307 1491 SvREFCNT_dec(sv);
79072805
LW
1492 return s;
1493}
1494
ffb4593c
NT
1495/* S_intuit_more
1496 * Returns TRUE if there's more to the expression (e.g., a subscript),
1497 * FALSE otherwise.
ffb4593c
NT
1498 *
1499 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1500 *
1501 * ->[ and ->{ return TRUE
1502 * { and [ outside a pattern are always subscripts, so return TRUE
1503 * if we're outside a pattern and it's not { or [, then return FALSE
1504 * if we're in a pattern and the first char is a {
1505 * {4,5} (any digits around the comma) returns FALSE
1506 * if we're in a pattern and the first char is a [
1507 * [] returns FALSE
1508 * [SOMETHING] has a funky algorithm to decide whether it's a
1509 * character class or not. It has to deal with things like
1510 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1511 * anything else returns TRUE
1512 */
1513
9cbb5ea2
GS
1514/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1515
76e3520e 1516STATIC int
cea2e8a9 1517S_intuit_more(pTHX_ register char *s)
79072805 1518{
3280af22 1519 if (PL_lex_brackets)
79072805
LW
1520 return TRUE;
1521 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1522 return TRUE;
1523 if (*s != '{' && *s != '[')
1524 return FALSE;
3280af22 1525 if (!PL_lex_inpat)
79072805
LW
1526 return TRUE;
1527
1528 /* In a pattern, so maybe we have {n,m}. */
1529 if (*s == '{') {
1530 s++;
1531 if (!isDIGIT(*s))
1532 return TRUE;
1533 while (isDIGIT(*s))
1534 s++;
1535 if (*s == ',')
1536 s++;
1537 while (isDIGIT(*s))
1538 s++;
1539 if (*s == '}')
1540 return FALSE;
1541 return TRUE;
1542
1543 }
1544
1545 /* On the other hand, maybe we have a character class */
1546
1547 s++;
1548 if (*s == ']' || *s == '^')
1549 return FALSE;
1550 else {
ffb4593c 1551 /* this is terrifying, and it works */
79072805
LW
1552 int weight = 2; /* let's weigh the evidence */
1553 char seen[256];
f27ffc4a 1554 unsigned char un_char = 255, last_un_char;
93a17b20 1555 char *send = strchr(s,']');
3280af22 1556 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1557
1558 if (!send) /* has to be an expression */
1559 return TRUE;
1560
1561 Zero(seen,256,char);
1562 if (*s == '$')
1563 weight -= 3;
1564 else if (isDIGIT(*s)) {
1565 if (s[1] != ']') {
1566 if (isDIGIT(s[1]) && s[2] == ']')
1567 weight -= 10;
1568 }
1569 else
1570 weight -= 100;
1571 }
1572 for (; s < send; s++) {
1573 last_un_char = un_char;
1574 un_char = (unsigned char)*s;
1575 switch (*s) {
1576 case '@':
1577 case '&':
1578 case '$':
1579 weight -= seen[un_char] * 10;
834a4ddd 1580 if (isALNUM_lazy(s+1)) {
8903cb82 1581 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1582 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1583 weight -= 100;
1584 else
1585 weight -= 10;
1586 }
1587 else if (*s == '$' && s[1] &&
93a17b20
LW
1588 strchr("[#!%*<>()-=",s[1])) {
1589 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1590 weight -= 10;
1591 else
1592 weight -= 1;
1593 }
1594 break;
1595 case '\\':
1596 un_char = 254;
1597 if (s[1]) {
93a17b20 1598 if (strchr("wds]",s[1]))
79072805
LW
1599 weight += 100;
1600 else if (seen['\''] || seen['"'])
1601 weight += 1;
93a17b20 1602 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1603 weight += 40;
1604 else if (isDIGIT(s[1])) {
1605 weight += 40;
1606 while (s[1] && isDIGIT(s[1]))
1607 s++;
1608 }
1609 }
1610 else
1611 weight += 100;
1612 break;
1613 case '-':
1614 if (s[1] == '\\')
1615 weight += 50;
93a17b20 1616 if (strchr("aA01! ",last_un_char))
79072805 1617 weight += 30;
93a17b20 1618 if (strchr("zZ79~",s[1]))
79072805 1619 weight += 30;
f27ffc4a
GS
1620 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1621 weight -= 5; /* cope with negative subscript */
79072805
LW
1622 break;
1623 default:
93a17b20 1624 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1625 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1626 char *d = tmpbuf;
1627 while (isALPHA(*s))
1628 *d++ = *s++;
1629 *d = '\0';
1630 if (keyword(tmpbuf, d - tmpbuf))
1631 weight -= 150;
1632 }
1633 if (un_char == last_un_char + 1)
1634 weight += 5;
1635 weight -= seen[un_char];
1636 break;
1637 }
1638 seen[un_char]++;
1639 }
1640 if (weight >= 0) /* probably a character class */
1641 return FALSE;
1642 }
1643
1644 return TRUE;
1645}
ffed7fef 1646
ffb4593c
NT
1647/*
1648 * S_intuit_method
1649 *
1650 * Does all the checking to disambiguate
1651 * foo bar
1652 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1653 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1654 *
1655 * First argument is the stuff after the first token, e.g. "bar".
1656 *
1657 * Not a method if bar is a filehandle.
1658 * Not a method if foo is a subroutine prototyped to take a filehandle.
1659 * Not a method if it's really "Foo $bar"
1660 * Method if it's "foo $bar"
1661 * Not a method if it's really "print foo $bar"
1662 * Method if it's really "foo package::" (interpreted as package->foo)
1663 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
1664 * Not a method if bar is a filehandle or package, but is quotd with
1665 * =>
1666 */
1667
76e3520e 1668STATIC int
cea2e8a9 1669S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1670{
1671 char *s = start + (*start == '$');
3280af22 1672 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1673 STRLEN len;
1674 GV* indirgv;
1675
1676 if (gv) {
b6c543e3 1677 CV *cv;
a0d0e21e
LW
1678 if (GvIO(gv))
1679 return 0;
b6c543e3
IZ
1680 if ((cv = GvCVu(gv))) {
1681 char *proto = SvPVX(cv);
1682 if (proto) {
1683 if (*proto == ';')
1684 proto++;
1685 if (*proto == '*')
1686 return 0;
1687 }
1688 } else
a0d0e21e
LW
1689 gv = 0;
1690 }
8903cb82 1691 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1692 /* start is the beginning of the possible filehandle/object,
1693 * and s is the end of it
1694 * tmpbuf is a copy of it
1695 */
1696
a0d0e21e 1697 if (*start == '$') {
3280af22 1698 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1699 return 0;
1700 s = skipspace(s);
3280af22
NIS
1701 PL_bufptr = start;
1702 PL_expect = XREF;
a0d0e21e
LW
1703 return *s == '(' ? FUNCMETH : METHOD;
1704 }
1705 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1706 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1707 len -= 2;
1708 tmpbuf[len] = '\0';
1709 goto bare_package;
1710 }
1711 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1712 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1713 return 0;
1714 /* filehandle or package name makes it a method */
89bfa8cd 1715 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1716 s = skipspace(s);
3280af22 1717 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1718 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1719 bare_package:
3280af22 1720 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1721 newSVpvn(tmpbuf,len));
3280af22
NIS
1722 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1723 PL_expect = XTERM;
a0d0e21e 1724 force_next(WORD);
3280af22 1725 PL_bufptr = s;
a0d0e21e
LW
1726 return *s == '(' ? FUNCMETH : METHOD;
1727 }
1728 }
1729 return 0;
1730}
1731
ffb4593c
NT
1732/*
1733 * S_incl_perldb
1734 * Return a string of Perl code to load the debugger. If PERL5DB
1735 * is set, it will return the contents of that, otherwise a
1736 * compile-time require of perl5db.pl.
1737 */
1738
76e3520e 1739STATIC char*
cea2e8a9 1740S_incl_perldb(pTHX)
a0d0e21e 1741{
3280af22 1742 if (PL_perldb) {
76e3520e 1743 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1744
1745 if (pdb)
1746 return pdb;
61bb5906 1747 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1748 return "BEGIN { require 'perl5db.pl' }";
1749 }
1750 return "";
1751}
1752
1753
16d20bd9
AD
1754/* Encoded script support. filter_add() effectively inserts a
1755 * 'pre-processing' function into the current source input stream.
1756 * Note that the filter function only applies to the current source file
1757 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1758 *
1759 * The datasv parameter (which may be NULL) can be used to pass
1760 * private data to this instance of the filter. The filter function
1761 * can recover the SV using the FILTER_DATA macro and use it to
1762 * store private buffers and state information.
1763 *
1764 * The supplied datasv parameter is upgraded to a PVIO type
e0c19803
GS
1765 * and the IoDIRP field is used to store the function pointer,
1766 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1767 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1768 * private use must be set using malloc'd pointers.
1769 */
16d20bd9
AD
1770
1771SV *
864dbfa3 1772Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1773{
f4c556ac
GS
1774 if (!funcp)
1775 return Nullsv;
1776
3280af22
NIS
1777 if (!PL_rsfp_filters)
1778 PL_rsfp_filters = newAV();
16d20bd9 1779 if (!datasv)
8c52afec 1780 datasv = NEWSV(255,0);
16d20bd9 1781 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1782 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
16d20bd9 1783 IoDIRP(datasv) = (DIR*)funcp; /* stash funcp into spare field */
e0c19803 1784 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1785 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1786 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1787 av_unshift(PL_rsfp_filters, 1);
1788 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1789 return(datasv);
1790}
1791
1792
1793/* Delete most recently added instance of this filter function. */
a0d0e21e 1794void
864dbfa3 1795Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1796{
e0c19803 1797 SV *datasv;
f4c556ac 1798 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1799 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1800 return;
1801 /* if filter is on top of stack (usual case) just pop it off */
e0c19803
GS
1802 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
1803 if (IoDIRP(datasv) == (DIR*)funcp) {
1804 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
1805 IoDIRP(datasv) = (DIR*)NULL;
3280af22 1806 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1807
16d20bd9
AD
1808 return;
1809 }
1810 /* we need to search for the correct entry and clear it */
cea2e8a9 1811 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1812}
1813
1814
1815/* Invoke the n'th filter function for the current rsfp. */
1816I32
864dbfa3 1817Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1818
1819
1820 /* 0 = read one text line */
a0d0e21e 1821{
16d20bd9
AD
1822 filter_t funcp;
1823 SV *datasv = NULL;
e50aee73 1824
3280af22 1825 if (!PL_rsfp_filters)
16d20bd9 1826 return -1;
3280af22 1827 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1828 /* Provide a default input filter to make life easy. */
1829 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1830 DEBUG_P(PerlIO_printf(Perl_debug_log,
1831 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1832 if (maxlen) {
1833 /* Want a block */
1834 int len ;
1835 int old_len = SvCUR(buf_sv) ;
1836
1837 /* ensure buf_sv is large enough */
1838 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1839 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1840 if (PerlIO_error(PL_rsfp))
37120919
AD
1841 return -1; /* error */
1842 else
1843 return 0 ; /* end of file */
1844 }
16d20bd9
AD
1845 SvCUR_set(buf_sv, old_len + len) ;
1846 } else {
1847 /* Want a line */
3280af22
NIS
1848 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1849 if (PerlIO_error(PL_rsfp))
37120919
AD
1850 return -1; /* error */
1851 else
1852 return 0 ; /* end of file */
1853 }
16d20bd9
AD
1854 }
1855 return SvCUR(buf_sv);
1856 }
1857 /* Skip this filter slot if filter has been deleted */
3280af22 1858 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1859 DEBUG_P(PerlIO_printf(Perl_debug_log,
1860 "filter_read %d: skipped (filter deleted)\n",
1861 idx));
16d20bd9
AD
1862 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1863 }
1864 /* Get function pointer hidden within datasv */
1865 funcp = (filter_t)IoDIRP(datasv);
f4c556ac
GS
1866 DEBUG_P(PerlIO_printf(Perl_debug_log,
1867 "filter_read %d: via function %p (%s)\n",
1868 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1869 /* Call function. The function is expected to */
1870 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1871 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1872 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1873}
1874
76e3520e 1875STATIC char *
cea2e8a9 1876S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1877{
a868473f 1878#ifdef WIN32FILTER
3280af22 1879 if (!PL_rsfp_filters) {
a868473f
NIS
1880 filter_add(win32_textfilter,NULL);
1881 }
1882#endif
3280af22 1883 if (PL_rsfp_filters) {
16d20bd9 1884
55497cff 1885 if (!append)
1886 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1887 if (FILTER_READ(0, sv, 0) > 0)
1888 return ( SvPVX(sv) ) ;
1889 else
1890 return Nullch ;
1891 }
9d116dd7 1892 else
fd049845 1893 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1894}
1895
1896
748a9306
LW
1897#ifdef DEBUGGING
1898 static char* exp_name[] =
09bef843
SB
1899 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
1900 "ATTRTERM", "TERMBLOCK"
1901 };
748a9306 1902#endif
463ee0b2 1903
02aa26ce
NT
1904/*
1905 yylex
1906
1907 Works out what to call the token just pulled out of the input
1908 stream. The yacc parser takes care of taking the ops we return and
1909 stitching them into a tree.
1910
1911 Returns:
1912 PRIVATEREF
1913
1914 Structure:
1915 if read an identifier
1916 if we're in a my declaration
1917 croak if they tried to say my($foo::bar)
1918 build the ops for a my() declaration
1919 if it's an access to a my() variable
1920 are we in a sort block?
1921 croak if my($a); $a <=> $b
1922 build ops for access to a my() variable
1923 if in a dq string, and they've said @foo and we can't find @foo
1924 croak
1925 build ops for a bareword
1926 if we already built the token before, use it.
1927*/
1928
864dbfa3
GS
1929int
1930#ifdef USE_PURE_BISON
cea2e8a9 1931Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 1932#else
cea2e8a9 1933Perl_yylex(pTHX)
864dbfa3 1934#endif
378cc40b 1935{
11343788 1936 dTHR;
79072805 1937 register char *s;
378cc40b 1938 register char *d;
79072805 1939 register I32 tmp;
463ee0b2 1940 STRLEN len;
161b471a
NIS
1941 GV *gv = Nullgv;
1942 GV **gvp = 0;
a687059c 1943
a1a0e61e
TD
1944#ifdef USE_PURE_BISON
1945 yylval_pointer = lvalp;
1946 yychar_pointer = lcharp;
1947#endif
1948
02aa26ce 1949 /* check if there's an identifier for us to look at */
3280af22 1950 if (PL_pending_ident) {
02aa26ce 1951 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
1952 char pit = PL_pending_ident;
1953 PL_pending_ident = 0;
bbce6d69 1954
02aa26ce
NT
1955 /* if we're in a my(), we can't allow dynamics here.
1956 $foo'bar has already been turned into $foo::bar, so
1957 just check for colons.
1958
1959 if it's a legal name, the OP is a PADANY.
1960 */
3280af22 1961 if (PL_in_my) {
77ca0c92
LW
1962 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1963 tmp = pad_allocmy(PL_tokenbuf);
1964 }
1965 else {
1966 if (strchr(PL_tokenbuf,':'))
1967 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 1968
77ca0c92
LW
1969 yylval.opval = newOP(OP_PADANY, 0);
1970 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
1971 return PRIVATEREF;
1972 }
bbce6d69 1973 }
1974
02aa26ce
NT
1975 /*
1976 build the ops for accesses to a my() variable.
1977
1978 Deny my($a) or my($b) in a sort block, *if* $a or $b is
1979 then used in a comparison. This catches most, but not
1980 all cases. For instance, it catches
1981 sort { my($a); $a <=> $b }
1982 but not
1983 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
1984 (although why you'd do that is anyone's guess).
1985 */
1986
3280af22 1987 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 1988#ifdef USE_THREADS
54b9620d 1989 /* Check for single character per-thread SVs */
3280af22
NIS
1990 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
1991 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
1992 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 1993 {
2faa37cc 1994 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
1995 yylval.opval->op_targ = tmp;
1996 return PRIVATEREF;
1997 }
1998#endif /* USE_THREADS */
3280af22 1999 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
77ca0c92
LW
2000 /* might be an "our" variable" */
2001 if (SvFLAGS(AvARRAY(PL_comppad_name)[tmp]) & SVpad_OUR) {
2002 /* build ops for a bareword */
2003 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
2004 yylval.opval->op_private = OPpCONST_ENTERED;
2005 gv_fetchpv(PL_tokenbuf+1,
2006 (PL_in_eval
2007 ? (GV_ADDMULTI | GV_ADDINEVAL | GV_ADDOUR)
2008 : GV_ADDOUR
2009 ),
2010 ((PL_tokenbuf[0] == '$') ? SVt_PV
2011 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2012 : SVt_PVHV));
2013 return WORD;
2014 }
2015
02aa26ce 2016 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2017 if (PL_last_lop_op == OP_SORT &&
2018 PL_tokenbuf[0] == '$' &&
2019 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2020 && !PL_tokenbuf[2])
bbce6d69 2021 {
3280af22
NIS
2022 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2023 d < PL_bufend && *d != '\n';
a863c7d1
MB
2024 d++)
2025 {
2026 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2027 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2028 PL_tokenbuf);
a863c7d1 2029 }
bbce6d69 2030 }
2031 }
bbce6d69 2032
a863c7d1
MB
2033 yylval.opval = newOP(OP_PADANY, 0);
2034 yylval.opval->op_targ = tmp;
2035 return PRIVATEREF;
2036 }
bbce6d69 2037 }
2038
02aa26ce
NT
2039 /*
2040 Whine if they've said @foo in a doublequoted string,
2041 and @foo isn't a variable we can find in the symbol
2042 table.
2043 */
3280af22
NIS
2044 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2045 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
2046 if (!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
cea2e8a9 2047 yyerror(Perl_form(aTHX_ "In string, %s now must be written as \\%s",
3280af22 2048 PL_tokenbuf, PL_tokenbuf));
bbce6d69 2049 }
2050
02aa26ce 2051 /* build ops for a bareword */
3280af22 2052 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2053 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2054 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2055 ((PL_tokenbuf[0] == '$') ? SVt_PV
2056 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 2057 : SVt_PVHV));
2058 return WORD;
2059 }
2060
02aa26ce
NT
2061 /* no identifier pending identification */
2062
3280af22 2063 switch (PL_lex_state) {
79072805
LW
2064#ifdef COMMENTARY
2065 case LEX_NORMAL: /* Some compilers will produce faster */
2066 case LEX_INTERPNORMAL: /* code if we comment these out. */
2067 break;
2068#endif
2069
09bef843 2070 /* when we've already built the next token, just pull it out of the queue */
79072805 2071 case LEX_KNOWNEXT:
3280af22
NIS
2072 PL_nexttoke--;
2073 yylval = PL_nextval[PL_nexttoke];
2074 if (!PL_nexttoke) {
2075 PL_lex_state = PL_lex_defer;
2076 PL_expect = PL_lex_expect;
2077 PL_lex_defer = LEX_NORMAL;
463ee0b2 2078 }
3280af22 2079 return(PL_nexttype[PL_nexttoke]);
79072805 2080
02aa26ce 2081 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2082 when we get here, PL_bufptr is at the \
02aa26ce 2083 */
79072805
LW
2084 case LEX_INTERPCASEMOD:
2085#ifdef DEBUGGING
3280af22 2086 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2087 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2088#endif
02aa26ce 2089 /* handle \E or end of string */
3280af22 2090 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2091 char oldmod;
02aa26ce
NT
2092
2093 /* if at a \E */
3280af22
NIS
2094 if (PL_lex_casemods) {
2095 oldmod = PL_lex_casestack[--PL_lex_casemods];
2096 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2097
3280af22
NIS
2098 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2099 PL_bufptr += 2;
2100 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2101 }
79072805
LW
2102 return ')';
2103 }
3280af22
NIS
2104 if (PL_bufptr != PL_bufend)
2105 PL_bufptr += 2;
2106 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2107 return yylex();
79072805
LW
2108 }
2109 else {
3280af22 2110 s = PL_bufptr + 1;
79072805
LW
2111 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2112 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2113 if (strchr("LU", *s) &&
3280af22 2114 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2115 {
3280af22 2116 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2117 return ')';
2118 }
3280af22
NIS
2119 if (PL_lex_casemods > 10) {
2120 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2121 if (newlb != PL_lex_casestack) {
a0d0e21e 2122 SAVEFREEPV(newlb);
3280af22 2123 PL_lex_casestack = newlb;
a0d0e21e
LW
2124 }
2125 }
3280af22
NIS
2126 PL_lex_casestack[PL_lex_casemods++] = *s;
2127 PL_lex_casestack[PL_lex_casemods] = '\0';
2128 PL_lex_state = LEX_INTERPCONCAT;
2129 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2130 force_next('(');
2131 if (*s == 'l')
3280af22 2132 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2133 else if (*s == 'u')
3280af22 2134 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2135 else if (*s == 'L')
3280af22 2136 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2137 else if (*s == 'U')
3280af22 2138 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2139 else if (*s == 'Q')
3280af22 2140 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2141 else
cea2e8a9 2142 Perl_croak(aTHX_ "panic: yylex");
3280af22 2143 PL_bufptr = s + 1;
79072805 2144 force_next(FUNC);
3280af22
NIS
2145 if (PL_lex_starts) {
2146 s = PL_bufptr;
2147 PL_lex_starts = 0;
79072805
LW
2148 Aop(OP_CONCAT);
2149 }
2150 else
cea2e8a9 2151 return yylex();
79072805
LW
2152 }
2153
55497cff 2154 case LEX_INTERPPUSH:
2155 return sublex_push();
2156
79072805 2157 case LEX_INTERPSTART:
3280af22 2158 if (PL_bufptr == PL_bufend)
79072805 2159 return sublex_done();
3280af22
NIS
2160 PL_expect = XTERM;
2161 PL_lex_dojoin = (*PL_bufptr == '@');
2162 PL_lex_state = LEX_INTERPNORMAL;
2163 if (PL_lex_dojoin) {
2164 PL_nextval[PL_nexttoke].ival = 0;
79072805 2165 force_next(',');
554b3eca 2166#ifdef USE_THREADS
533c011a
NIS
2167 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2168 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2169 force_next(PRIVATEREF);
2170#else
a0d0e21e 2171 force_ident("\"", '$');
554b3eca 2172#endif /* USE_THREADS */
3280af22 2173 PL_nextval[PL_nexttoke].ival = 0;
79072805 2174 force_next('$');
3280af22 2175 PL_nextval[PL_nexttoke].ival = 0;
79072805 2176 force_next('(');
3280af22 2177 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2178 force_next(FUNC);
2179 }
3280af22
NIS
2180 if (PL_lex_starts++) {
2181 s = PL_bufptr;
79072805
LW
2182 Aop(OP_CONCAT);
2183 }
cea2e8a9 2184 return yylex();
79072805
LW
2185
2186 case LEX_INTERPENDMAYBE:
3280af22
NIS
2187 if (intuit_more(PL_bufptr)) {
2188 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2189 break;
2190 }
2191 /* FALL THROUGH */
2192
2193 case LEX_INTERPEND:
3280af22
NIS
2194 if (PL_lex_dojoin) {
2195 PL_lex_dojoin = FALSE;
2196 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2197 return ')';
2198 }
43a16006 2199 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2200 && SvEVALED(PL_lex_repl))
43a16006 2201 {
e9fa98b2 2202 if (PL_bufptr != PL_bufend)
cea2e8a9 2203 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2204 PL_lex_repl = Nullsv;
2205 }
79072805
LW
2206 /* FALLTHROUGH */
2207 case LEX_INTERPCONCAT:
2208#ifdef DEBUGGING
3280af22 2209 if (PL_lex_brackets)
cea2e8a9 2210 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2211#endif
3280af22 2212 if (PL_bufptr == PL_bufend)
79072805
LW
2213 return sublex_done();
2214
3280af22
NIS
2215 if (SvIVX(PL_linestr) == '\'') {
2216 SV *sv = newSVsv(PL_linestr);
2217 if (!PL_lex_inpat)
76e3520e 2218 sv = tokeq(sv);
3280af22 2219 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2220 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2221 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2222 s = PL_bufend;
79072805
LW
2223 }
2224 else {
3280af22 2225 s = scan_const(PL_bufptr);
79072805 2226 if (*s == '\\')
3280af22 2227 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2228 else
3280af22 2229 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2230 }
2231
3280af22
NIS
2232 if (s != PL_bufptr) {
2233 PL_nextval[PL_nexttoke] = yylval;
2234 PL_expect = XTERM;
79072805 2235 force_next(THING);
3280af22 2236 if (PL_lex_starts++)
79072805
LW
2237 Aop(OP_CONCAT);
2238 else {
3280af22 2239 PL_bufptr = s;
cea2e8a9 2240 return yylex();
79072805
LW
2241 }
2242 }
2243
cea2e8a9 2244 return yylex();
a0d0e21e 2245 case LEX_FORMLINE:
3280af22
NIS
2246 PL_lex_state = LEX_NORMAL;
2247 s = scan_formline(PL_bufptr);
2248 if (!PL_lex_formbrack)
a0d0e21e
LW
2249 goto rightbracket;
2250 OPERATOR(';');
79072805
LW
2251 }
2252
3280af22
NIS
2253 s = PL_bufptr;
2254 PL_oldoldbufptr = PL_oldbufptr;
2255 PL_oldbufptr = s;
79072805 2256 DEBUG_p( {
bf49b057
GS
2257 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2258 exp_name[PL_expect], s);
79072805 2259 } )
463ee0b2
LW
2260
2261 retry:
378cc40b
LW
2262 switch (*s) {
2263 default:
834a4ddd
LW
2264 if (isIDFIRST_lazy(s))
2265 goto keylookup;
cea2e8a9 2266 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2267 case 4:
2268 case 26:
2269 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2270 case 0:
3280af22
NIS
2271 if (!PL_rsfp) {
2272 PL_last_uni = 0;
2273 PL_last_lop = 0;
2274 if (PL_lex_brackets)
d98d5fff 2275 yyerror("Missing right curly or square bracket");
79072805 2276 TOKEN(0);
463ee0b2 2277 }
3280af22 2278 if (s++ < PL_bufend)
a687059c 2279 goto retry; /* ignore stray nulls */
3280af22
NIS
2280 PL_last_uni = 0;
2281 PL_last_lop = 0;
2282 if (!PL_in_eval && !PL_preambled) {
2283 PL_preambled = TRUE;
2284 sv_setpv(PL_linestr,incl_perldb());
2285 if (SvCUR(PL_linestr))
2286 sv_catpv(PL_linestr,";");
2287 if (PL_preambleav){
2288 while(AvFILLp(PL_preambleav) >= 0) {
2289 SV *tmpsv = av_shift(PL_preambleav);
2290 sv_catsv(PL_linestr, tmpsv);
2291 sv_catpv(PL_linestr, ";");
91b7def8 2292 sv_free(tmpsv);
2293 }
3280af22
NIS
2294 sv_free((SV*)PL_preambleav);
2295 PL_preambleav = NULL;
91b7def8 2296 }
3280af22
NIS
2297 if (PL_minus_n || PL_minus_p) {
2298 sv_catpv(PL_linestr, "LINE: while (<>) {");
2299 if (PL_minus_l)
2300 sv_catpv(PL_linestr,"chomp;");
2301 if (PL_minus_a) {
8fd239a7
CS
2302 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2303 if (gv)
2304 GvIMPORTED_AV_on(gv);
3280af22
NIS
2305 if (PL_minus_F) {
2306 if (strchr("/'\"", *PL_splitstr)
2307 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2308 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2309 else {
2310 char delim;
2311 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2312 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2313 delim = *s;
cea2e8a9 2314 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2315 "q" + (delim == '\''), delim);
3280af22 2316 for (s = PL_splitstr; *s; s++) {
54310121 2317 if (*s == '\\')
3280af22
NIS
2318 sv_catpvn(PL_linestr, "\\", 1);
2319 sv_catpvn(PL_linestr, s, 1);
54310121 2320 }
cea2e8a9 2321 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2322 }
2304df62
AD
2323 }
2324 else
3280af22 2325 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2326 }
79072805 2327 }
3280af22
NIS
2328 sv_catpv(PL_linestr, "\n");
2329 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2330 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2331 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2332 SV *sv = NEWSV(85,0);
2333
2334 sv_upgrade(sv, SVt_PVMG);
3280af22 2335 sv_setsv(sv,PL_linestr);
57843af0 2336 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2337 }
79072805 2338 goto retry;
a687059c 2339 }
e929a76b 2340 do {
3280af22 2341 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
e929a76b 2342 fake_eof:
3280af22
NIS
2343 if (PL_rsfp) {
2344 if (PL_preprocess && !PL_in_eval)
2345 (void)PerlProc_pclose(PL_rsfp);
2346 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2347 PerlIO_clearerr(PL_rsfp);
395c3793 2348 else
3280af22
NIS
2349 (void)PerlIO_close(PL_rsfp);
2350 PL_rsfp = Nullfp;
4a9ae47a 2351 PL_doextract = FALSE;
395c3793 2352 }
3280af22
NIS
2353 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2354 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2355 sv_catpv(PL_linestr,";}");
2356 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2357 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2358 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2359 goto retry;
2360 }
3280af22
NIS
2361 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2362 sv_setpv(PL_linestr,"");
79072805 2363 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
378cc40b 2364 }
3280af22 2365 if (PL_doextract) {
a0d0e21e 2366 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2367 PL_doextract = FALSE;
a0d0e21e
LW
2368
2369 /* Incest with pod. */
2370 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2371 sv_setpv(PL_linestr, "");
2372 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2373 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2374 PL_doextract = FALSE;
a0d0e21e
LW
2375 }
2376 }
463ee0b2 2377 incline(s);
3280af22
NIS
2378 } while (PL_doextract);
2379 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2380 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2381 SV *sv = NEWSV(85,0);
a687059c 2382
93a17b20 2383 sv_upgrade(sv, SVt_PVMG);
3280af22 2384 sv_setsv(sv,PL_linestr);
57843af0 2385 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2386 }
3280af22 2387 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2388 if (CopLINE(PL_curcop) == 1) {
3280af22 2389 while (s < PL_bufend && isSPACE(*s))
79072805 2390 s++;
a0d0e21e 2391 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2392 s++;
44a8e56a 2393 d = Nullch;
3280af22 2394 if (!PL_in_eval) {
44a8e56a 2395 if (*s == '#' && *(s+1) == '!')
2396 d = s + 2;
2397#ifdef ALTERNATE_SHEBANG
2398 else {
2399 static char as[] = ALTERNATE_SHEBANG;
2400 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2401 d = s + (sizeof(as) - 1);
2402 }
2403#endif /* ALTERNATE_SHEBANG */
2404 }
2405 if (d) {
b8378b72 2406 char *ipath;
774d564b 2407 char *ipathend;
b8378b72 2408
774d564b 2409 while (isSPACE(*d))
b8378b72
CS
2410 d++;
2411 ipath = d;
774d564b 2412 while (*d && !isSPACE(*d))
2413 d++;
2414 ipathend = d;
2415
2416#ifdef ARG_ZERO_IS_SCRIPT
2417 if (ipathend > ipath) {
2418 /*
2419 * HP-UX (at least) sets argv[0] to the script name,
2420 * which makes $^X incorrect. And Digital UNIX and Linux,
2421 * at least, set argv[0] to the basename of the Perl
2422 * interpreter. So, having found "#!", we'll set it right.
2423 */
2424 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2425 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2426 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2427 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2428 SvSETMAGIC(x);
2429 }
774d564b 2430 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2431 }
774d564b 2432#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2433
2434 /*
2435 * Look for options.
2436 */
748a9306 2437 d = instr(s,"perl -");
84e30d1a 2438 if (!d) {
748a9306 2439 d = instr(s,"perl");
84e30d1a
GS
2440#if defined(DOSISH)
2441 /* avoid getting into infinite loops when shebang
2442 * line contains "Perl" rather than "perl" */
2443 if (!d) {
2444 for (d = ipathend-4; d >= ipath; --d) {
2445 if ((*d == 'p' || *d == 'P')
2446 && !ibcmp(d, "perl", 4))
2447 {
2448 break;
2449 }
2450 }
2451 if (d < ipath)
2452 d = Nullch;
2453 }
2454#endif
2455 }
44a8e56a 2456#ifdef ALTERNATE_SHEBANG
2457 /*
2458 * If the ALTERNATE_SHEBANG on this system starts with a
2459 * character that can be part of a Perl expression, then if
2460 * we see it but not "perl", we're probably looking at the
2461 * start of Perl code, not a request to hand off to some
2462 * other interpreter. Similarly, if "perl" is there, but
2463 * not in the first 'word' of the line, we assume the line
2464 * contains the start of the Perl program.
44a8e56a 2465 */
2466 if (d && *s != '#') {
774d564b 2467 char *c = ipath;
44a8e56a 2468 while (*c && !strchr("; \t\r\n\f\v#", *c))
2469 c++;
2470 if (c < d)
2471 d = Nullch; /* "perl" not in first word; ignore */
2472 else
2473 *s = '#'; /* Don't try to parse shebang line */
2474 }
774d564b 2475#endif /* ALTERNATE_SHEBANG */
748a9306 2476 if (!d &&
44a8e56a 2477 *s == '#' &&
774d564b 2478 ipathend > ipath &&
3280af22 2479 !PL_minus_c &&
748a9306 2480 !instr(s,"indir") &&
3280af22 2481 instr(PL_origargv[0],"perl"))
748a9306 2482 {
9f68db38 2483 char **newargv;
9f68db38 2484
774d564b 2485 *ipathend = '\0';
2486 s = ipathend + 1;
3280af22 2487 while (s < PL_bufend && isSPACE(*s))
9f68db38 2488 s++;
3280af22
NIS
2489 if (s < PL_bufend) {
2490 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2491 newargv[1] = s;
3280af22 2492 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2493 s++;
2494 *s = '\0';
3280af22 2495 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2496 }
2497 else
3280af22 2498 newargv = PL_origargv;
774d564b 2499 newargv[0] = ipath;
80252599 2500 PerlProc_execv(ipath, newargv);
cea2e8a9 2501 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2502 }
748a9306 2503 if (d) {
3280af22
NIS
2504 U32 oldpdb = PL_perldb;
2505 bool oldn = PL_minus_n;
2506 bool oldp = PL_minus_p;
748a9306
LW
2507
2508 while (*d && !isSPACE(*d)) d++;
89bfa8cd 2509 while (*d == ' ' || *d == '\t') d++;
748a9306
LW
2510
2511 if (*d++ == '-') {
8cc95fdb 2512 do {
2513 if (*d == 'M' || *d == 'm') {
2514 char *m = d;
2515 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2516 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2517 (int)(d - m), m);
2518 }
2519 d = moreswitches(d);
2520 } while (d);
84902520 2521 if (PERLDB_LINE && !oldpdb ||
3280af22 2522 ( PL_minus_n || PL_minus_p ) && !(oldn || oldp) )
b084f20b 2523 /* if we have already added "LINE: while (<>) {",
2524 we must not do it again */
748a9306 2525 {
3280af22
NIS
2526 sv_setpv(PL_linestr, "");
2527 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2528 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2529 PL_preambled = FALSE;
84902520 2530 if (PERLDB_LINE)
3280af22 2531 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2532 goto retry;
2533 }
a0d0e21e 2534 }
79072805 2535 }
9f68db38 2536 }
79072805 2537 }
3280af22
NIS
2538 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2539 PL_bufptr = s;
2540 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2541 return yylex();
ae986130 2542 }
378cc40b 2543 goto retry;
4fdae800 2544 case '\r':
6a27c188 2545#ifdef PERL_STRICT_CR
cea2e8a9
GS
2546 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2547 Perl_croak(aTHX_
54310121 2548 "(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2549#endif
4fdae800 2550 case ' ': case '\t': case '\f': case 013:
378cc40b
LW
2551 s++;
2552 goto retry;
378cc40b 2553 case '#':
e929a76b 2554 case '\n':
3280af22
NIS
2555 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
2556 d = PL_bufend;
a687059c 2557 while (s < d && *s != '\n')
378cc40b 2558 s++;
0f85fab0 2559 if (s < d)
378cc40b 2560 s++;
463ee0b2 2561 incline(s);
3280af22
NIS
2562 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2563 PL_bufptr = s;
2564 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2565 return yylex();
a687059c 2566 }
378cc40b 2567 }
a687059c 2568 else {
378cc40b 2569 *s = '\0';
3280af22 2570 PL_bufend = s;
a687059c 2571 }
378cc40b
LW
2572 goto retry;
2573 case '-':
79072805 2574 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2575 s++;
3280af22 2576 PL_bufptr = s;
748a9306
LW
2577 tmp = *s++;
2578
3280af22 2579 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306
LW
2580 s++;
2581
2582 if (strnEQ(s,"=>",2)) {
3280af22 2583 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2584 OPERATOR('-'); /* unary minus */
2585 }
3280af22
NIS
2586 PL_last_uni = PL_oldbufptr;
2587 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2588 switch (tmp) {
79072805
LW
2589 case 'r': FTST(OP_FTEREAD);
2590 case 'w': FTST(OP_FTEWRITE);
2591 case 'x': FTST(OP_FTEEXEC);
2592 case 'o': FTST(OP_FTEOWNED);
2593 case 'R': FTST(OP_FTRREAD);
2594 case 'W': FTST(OP_FTRWRITE);
2595 case 'X': FTST(OP_FTREXEC);
2596 case 'O': FTST(OP_FTROWNED);
2597 case 'e': FTST(OP_FTIS);
2598 case 'z': FTST(OP_FTZERO);
2599 case 's': FTST(OP_FTSIZE);
2600 case 'f': FTST(OP_FTFILE);
2601 case 'd': FTST(OP_FTDIR);
2602 case 'l': FTST(OP_FTLINK);
2603 case 'p': FTST(OP_FTPIPE);
2604 case 'S': FTST(OP_FTSOCK);
2605 case 'u': FTST(OP_FTSUID);
2606 case 'g': FTST(OP_FTSGID);
2607 case 'k': FTST(OP_FTSVTX);
2608 case 'b': FTST(OP_FTBLK);
2609 case 'c': FTST(OP_FTCHR);
2610 case 't': FTST(OP_FTTTY);
2611 case 'T': FTST(OP_FTTEXT);
2612 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2613 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2614 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2615 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2616 default:
cea2e8a9 2617 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2618 break;
2619 }
2620 }
a687059c
LW
2621 tmp = *s++;
2622 if (*s == tmp) {
2623 s++;
3280af22 2624 if (PL_expect == XOPERATOR)
79072805
LW
2625 TERM(POSTDEC);
2626 else
2627 OPERATOR(PREDEC);
2628 }
2629 else if (*s == '>') {
2630 s++;
2631 s = skipspace(s);
834a4ddd 2632 if (isIDFIRST_lazy(s)) {
a0d0e21e 2633 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2634 TOKEN(ARROW);
79072805 2635 }
748a9306
LW
2636 else if (*s == '$')
2637 OPERATOR(ARROW);
463ee0b2 2638 else
748a9306 2639 TERM(ARROW);
a687059c 2640 }
3280af22 2641 if (PL_expect == XOPERATOR)
79072805
LW
2642 Aop(OP_SUBTRACT);
2643 else {
3280af22 2644 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2645 check_uni();
79072805 2646 OPERATOR('-'); /* unary minus */
2f3197b3 2647 }
79072805 2648
378cc40b 2649 case '+':
a687059c
LW
2650 tmp = *s++;
2651 if (*s == tmp) {
378cc40b 2652 s++;
3280af22 2653 if (PL_expect == XOPERATOR)
79072805
LW
2654 TERM(POSTINC);
2655 else
2656 OPERATOR(PREINC);
378cc40b 2657 }
3280af22 2658 if (PL_expect == XOPERATOR)
79072805
LW
2659 Aop(OP_ADD);
2660 else {
3280af22 2661 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2662 check_uni();
a687059c 2663 OPERATOR('+');
2f3197b3 2664 }
a687059c 2665
378cc40b 2666 case '*':
3280af22
NIS
2667 if (PL_expect != XOPERATOR) {
2668 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2669 PL_expect = XOPERATOR;
2670 force_ident(PL_tokenbuf, '*');
2671 if (!*PL_tokenbuf)
a0d0e21e 2672 PREREF('*');
79072805 2673 TERM('*');
a687059c 2674 }
79072805
LW
2675 s++;
2676 if (*s == '*') {
a687059c 2677 s++;
79072805 2678 PWop(OP_POW);
a687059c 2679 }
79072805
LW
2680 Mop(OP_MULTIPLY);
2681
378cc40b 2682 case '%':
3280af22 2683 if (PL_expect == XOPERATOR) {
bbce6d69 2684 ++s;
2685 Mop(OP_MODULO);
a687059c 2686 }
3280af22
NIS
2687 PL_tokenbuf[0] = '%';
2688 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2689 if (!PL_tokenbuf[1]) {
2690 if (s == PL_bufend)
bbce6d69 2691 yyerror("Final % should be \\% or %name");
2692 PREREF('%');
a687059c 2693 }
3280af22 2694 PL_pending_ident = '%';
bbce6d69 2695 TERM('%');
a687059c 2696
378cc40b 2697 case '^':
79072805 2698 s++;
a0d0e21e 2699 BOop(OP_BIT_XOR);
79072805 2700 case '[':
3280af22 2701 PL_lex_brackets++;
79072805 2702 /* FALL THROUGH */
378cc40b 2703 case '~':
378cc40b 2704 case ',':
378cc40b
LW
2705 tmp = *s++;
2706 OPERATOR(tmp);
a0d0e21e
LW
2707 case ':':
2708 if (s[1] == ':') {
2709 len = 0;
2710 goto just_a_word;
2711 }
2712 s++;
09bef843
SB
2713 switch (PL_expect) {
2714 OP *attrs;
2715 case XOPERATOR:
2716 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2717 break;
2718 PL_bufptr = s; /* update in case we back off */
2719 goto grabattrs;
2720 case XATTRBLOCK:
2721 PL_expect = XBLOCK;
2722 goto grabattrs;
2723 case XATTRTERM:
2724 PL_expect = XTERMBLOCK;
2725 grabattrs:
2726 s = skipspace(s);
2727 attrs = Nullop;
2728 while (isIDFIRST_lazy(s)) {
2729 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
2730 if (*d == '(') {
2731 d = scan_str(d,TRUE,TRUE);
2732 if (!d) {
2733 if (PL_lex_stuff) {
2734 SvREFCNT_dec(PL_lex_stuff);
2735 PL_lex_stuff = Nullsv;
2736 }
2737 /* MUST advance bufptr here to avoid bogus
2738 "at end of line" context messages from yyerror().
2739 */
2740 PL_bufptr = s + len;
2741 yyerror("Unterminated attribute parameter in attribute list");
2742 if (attrs)
2743 op_free(attrs);
2744 return 0; /* EOF indicator */
2745 }
2746 }
2747 if (PL_lex_stuff) {
2748 SV *sv = newSVpvn(s, len);
2749 sv_catsv(sv, PL_lex_stuff);
2750 attrs = append_elem(OP_LIST, attrs,
2751 newSVOP(OP_CONST, 0, sv));
2752 SvREFCNT_dec(PL_lex_stuff);
2753 PL_lex_stuff = Nullsv;
2754 }
2755 else {
2756 attrs = append_elem(OP_LIST, attrs,
2757 newSVOP(OP_CONST, 0,
2758 newSVpvn(s, len)));
2759 }
2760 s = skipspace(d);
2761 while (*s == ',')
2762 s = skipspace(s+1);
2763 }
2764 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}' for vi */
2765 if (*s != ';' && *s != tmp) {
2766 char q = ((*s == '\'') ? '"' : '\'');
2767 /* If here for an expression, and parsed no attrs, back off. */
2768 if (tmp == '=' && !attrs) {
2769 s = PL_bufptr;
2770 break;
2771 }
2772 /* MUST advance bufptr here to avoid bogus "at end of line"
2773 context messages from yyerror().
2774 */
2775 PL_bufptr = s;
2776 if (!*s)
2777 yyerror("Unterminated attribute list");
2778 else
2779 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2780 q, *s, q));
2781 if (attrs)
2782 op_free(attrs);
2783 OPERATOR(':');
2784 }
2785 if (attrs) {
2786 PL_nextval[PL_nexttoke].opval = attrs;
2787 force_next(THING);
2788 }
2789 TOKEN(COLONATTR);
2790 }
a0d0e21e 2791 OPERATOR(':');
8990e307
LW
2792 case '(':
2793 s++;
3280af22
NIS
2794 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2795 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2796 else
3280af22 2797 PL_expect = XTERM;
a0d0e21e 2798 TOKEN('(');
378cc40b 2799 case ';':
57843af0
GS
2800 if (CopLINE(PL_curcop) < PL_copline)
2801 PL_copline = CopLINE(PL_curcop);
378cc40b
LW
2802 tmp = *s++;
2803 OPERATOR(tmp);
2804 case ')':
378cc40b 2805 tmp = *s++;
16d20bd9
AD
2806 s = skipspace(s);
2807 if (*s == '{')
2808 PREBLOCK(tmp);
378cc40b 2809 TERM(tmp);
79072805
LW
2810 case ']':
2811 s++;
3280af22 2812 if (PL_lex_brackets <= 0)
d98d5fff 2813 yyerror("Unmatched right square bracket");
463ee0b2 2814 else
3280af22
NIS
2815 --PL_lex_brackets;
2816 if (PL_lex_state == LEX_INTERPNORMAL) {
2817 if (PL_lex_brackets == 0) {
a0d0e21e 2818 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2819 PL_lex_state = LEX_INTERPEND;
79072805
LW
2820 }
2821 }
4633a7c4 2822 TERM(']');
79072805
LW
2823 case '{':
2824 leftbracket:
79072805 2825 s++;
3280af22
NIS
2826 if (PL_lex_brackets > 100) {
2827 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
2828 if (newlb != PL_lex_brackstack) {
8990e307 2829 SAVEFREEPV(newlb);
3280af22 2830 PL_lex_brackstack = newlb;
8990e307
LW
2831 }
2832 }
3280af22 2833 switch (PL_expect) {
a0d0e21e 2834 case XTERM:
3280af22 2835 if (PL_lex_formbrack) {
a0d0e21e
LW
2836 s--;
2837 PRETERMBLOCK(DO);
2838 }
3280af22
NIS
2839 if (PL_oldoldbufptr == PL_last_lop)
2840 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2841 else
3280af22 2842 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 2843 OPERATOR(HASHBRACK);
a0d0e21e 2844 case XOPERATOR:
3280af22 2845 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
748a9306 2846 s++;
44a8e56a 2847 d = s;
3280af22
NIS
2848 PL_tokenbuf[0] = '\0';
2849 if (d < PL_bufend && *d == '-') {
2850 PL_tokenbuf[0] = '-';
44a8e56a 2851 d++;
3280af22 2852 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
44a8e56a 2853 d++;
2854 }
834a4ddd 2855 if (d < PL_bufend && isIDFIRST_lazy(d)) {
3280af22 2856 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 2857 FALSE, &len);
3280af22 2858 while (d < PL_bufend && (*d == ' ' || *d == '\t'))
748a9306
LW
2859 d++;
2860 if (*d == '}') {
3280af22 2861 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 2862 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
2863 if (minus)
2864 force_next('-');
748a9306
LW
2865 }
2866 }
2867 /* FALL THROUGH */
09bef843 2868 case XATTRBLOCK:
748a9306 2869 case XBLOCK:
3280af22
NIS
2870 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
2871 PL_expect = XSTATE;
a0d0e21e 2872 break;
09bef843 2873 case XATTRTERM:
a0d0e21e 2874 case XTERMBLOCK:
3280af22
NIS
2875 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
2876 PL_expect = XSTATE;
a0d0e21e
LW
2877 break;
2878 default: {
2879 char *t;
3280af22
NIS
2880 if (PL_oldoldbufptr == PL_last_lop)
2881 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 2882 else
3280af22 2883 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 2884 s = skipspace(s);
09ecc4b6 2885 if (*s == '}')
a0d0e21e 2886 OPERATOR(HASHBRACK);
b8a4b1be
GS
2887 /* This hack serves to disambiguate a pair of curlies
2888 * as being a block or an anon hash. Normally, expectation
2889 * determines that, but in cases where we're not in a
2890 * position to expect anything in particular (like inside
2891 * eval"") we have to resolve the ambiguity. This code
2892 * covers the case where the first term in the curlies is a
2893 * quoted string. Most other cases need to be explicitly
2894 * disambiguated by prepending a `+' before the opening
2895 * curly in order to force resolution as an anon hash.
2896 *
2897 * XXX should probably propagate the outer expectation
2898 * into eval"" to rely less on this hack, but that could
2899 * potentially break current behavior of eval"".
2900 * GSAR 97-07-21
2901 */
2902 t = s;
2903 if (*s == '\'' || *s == '"' || *s == '`') {
2904 /* common case: get past first string, handling escapes */
3280af22 2905 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
2906 if (*t++ == '\\' && (*t == '\\' || *t == *s))
2907 t++;
2908 t++;
a0d0e21e 2909 }
b8a4b1be 2910 else if (*s == 'q') {
3280af22 2911 if (++t < PL_bufend
b8a4b1be 2912 && (!isALNUM(*t)
3280af22 2913 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
b8a4b1be
GS
2914 && !isALNUM(*t)))) {
2915 char *tmps;
2916 char open, close, term;
2917 I32 brackets = 1;
2918
3280af22 2919 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
2920 t++;
2921 term = *t;
2922 open = term;
2923 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
2924 term = tmps[5];
2925 close = term;
2926 if (open == close)
3280af22
NIS
2927 for (t++; t < PL_bufend; t++) {
2928 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 2929 t++;
6d07e5e9 2930 else if (*t == open)
b8a4b1be
GS
2931 break;
2932 }
2933 else
3280af22
NIS
2934 for (t++; t < PL_bufend; t++) {
2935 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 2936 t++;
6d07e5e9 2937 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
2938 break;
2939 else if (*t == open)
2940 brackets++;
2941 }
2942 }
2943 t++;
a0d0e21e 2944 }
834a4ddd
LW
2945 else if (isIDFIRST_lazy(s)) {
2946 for (t++; t < PL_bufend && isALNUM_lazy(t); t++) ;
a0d0e21e 2947 }
3280af22 2948 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 2949 t++;
b8a4b1be
GS
2950 /* if comma follows first term, call it an anon hash */
2951 /* XXX it could be a comma expression with loop modifiers */
3280af22 2952 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 2953 || (*t == '=' && t[1] == '>')))
a0d0e21e 2954 OPERATOR(HASHBRACK);
3280af22 2955 if (PL_expect == XREF)
4e4e412b 2956 PL_expect = XTERM;
a0d0e21e 2957 else {
3280af22
NIS
2958 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
2959 PL_expect = XSTATE;
a0d0e21e 2960 }
8990e307 2961 }
a0d0e21e 2962 break;
463ee0b2 2963 }
57843af0 2964 yylval.ival = CopLINE(PL_curcop);
79072805 2965 if (isSPACE(*s) || *s == '#')
3280af22 2966 PL_copline = NOLINE; /* invalidate current command line number */
79072805 2967 TOKEN('{');
378cc40b 2968 case '}':
79072805
LW
2969 rightbracket:
2970 s++;
3280af22 2971 if (PL_lex_brackets <= 0)
d98d5fff 2972 yyerror("Unmatched right curly bracket");
463ee0b2 2973 else
3280af22
NIS
2974 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
2975 if (PL_lex_brackets < PL_lex_formbrack)
2976 PL_lex_formbrack = 0;
2977 if (PL_lex_state == LEX_INTERPNORMAL) {
2978 if (PL_lex_brackets == 0) {
2979 if (PL_lex_fakebrack) {
2980 PL_lex_state = LEX_INTERPEND;
2981 PL_bufptr = s;
cea2e8a9 2982 return yylex(); /* ignore fake brackets */
79072805 2983 }
fa83b5b6 2984 if (*s == '-' && s[1] == '>')
3280af22 2985 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 2986 else if (*s != '[' && *s != '{')
3280af22 2987 PL_lex_state = LEX_INTERPEND;
79072805
LW
2988 }
2989 }
3280af22
NIS
2990 if (PL_lex_brackets < PL_lex_fakebrack) {
2991 PL_bufptr = s;
2992 PL_lex_fakebrack = 0;
cea2e8a9 2993 return yylex(); /* ignore fake brackets */
748a9306 2994 }
79072805
LW
2995 force_next('}');
2996 TOKEN(';');
378cc40b
LW
2997 case '&':
2998 s++;
2999 tmp = *s++;
3000 if (tmp == '&')
a0d0e21e 3001 AOPERATOR(ANDAND);
378cc40b 3002 s--;
3280af22 3003 if (PL_expect == XOPERATOR) {
834a4ddd 3004 if (ckWARN(WARN_SEMICOLON) && isIDFIRST_lazy(s) && PL_bufptr == PL_linestart) {
57843af0 3005 CopLINE_dec(PL_curcop);
cea2e8a9 3006 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3007 CopLINE_inc(PL_curcop);
463ee0b2 3008 }
79072805 3009 BAop(OP_BIT_AND);
463ee0b2 3010 }
79072805 3011
3280af22
NIS
3012 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3013 if (*PL_tokenbuf) {
3014 PL_expect = XOPERATOR;
3015 force_ident(PL_tokenbuf, '&');
463ee0b2 3016 }
79072805
LW
3017 else
3018 PREREF('&');
c07a80fd 3019 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3020 TERM('&');
3021
378cc40b
LW
3022 case '|':
3023 s++;
3024 tmp = *s++;
3025 if (tmp == '|')
a0d0e21e 3026 AOPERATOR(OROR);
378cc40b 3027 s--;
79072805 3028 BOop(OP_BIT_OR);
378cc40b
LW
3029 case '=':
3030 s++;
3031 tmp = *s++;
3032 if (tmp == '=')
79072805
LW
3033 Eop(OP_EQ);
3034 if (tmp == '>')
3035 OPERATOR(',');
378cc40b 3036 if (tmp == '~')
79072805 3037 PMop(OP_MATCH);
599cee73 3038 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3039 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3040 s--;
3280af22
NIS
3041 if (PL_expect == XSTATE && isALPHA(tmp) &&
3042 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3043 {
3280af22
NIS
3044 if (PL_in_eval && !PL_rsfp) {
3045 d = PL_bufend;
a5f75d66
AD
3046 while (s < d) {
3047 if (*s++ == '\n') {
3048 incline(s);
3049 if (strnEQ(s,"=cut",4)) {
3050 s = strchr(s,'\n');
3051 if (s)
3052 s++;
3053 else
3054 s = d;
3055 incline(s);
3056 goto retry;
3057 }
3058 }
3059 }
3060 goto retry;
3061 }
3280af22
NIS
3062 s = PL_bufend;
3063 PL_doextract = TRUE;
a0d0e21e
LW
3064 goto retry;
3065 }
3280af22 3066 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3067 char *t;
51882d45 3068#ifdef PERL_STRICT_CR
a0d0e21e 3069 for (t = s; *t == ' ' || *t == '\t'; t++) ;
51882d45
GS
3070#else
3071 for (t = s; *t == ' ' || *t == '\t' || *t == '\r'; t++) ;
3072#endif
a0d0e21e
LW
3073 if (*t == '\n' || *t == '#') {
3074 s--;
3280af22 3075 PL_expect = XBLOCK;
a0d0e21e
LW
3076 goto leftbracket;
3077 }
79072805 3078 }
a0d0e21e
LW
3079 yylval.ival = 0;
3080 OPERATOR(ASSIGNOP);
378cc40b
LW
3081 case '!':
3082 s++;
3083 tmp = *s++;
3084 if (tmp == '=')
79072805 3085 Eop(OP_NE);
378cc40b 3086 if (tmp == '~')
79072805 3087 PMop(OP_NOT);
378cc40b
LW
3088 s--;
3089 OPERATOR('!');
3090 case '<':
3280af22 3091 if (PL_expect != XOPERATOR) {
93a17b20 3092 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3093 check_uni();
79072805
LW
3094 if (s[1] == '<')
3095 s = scan_heredoc(s);
3096 else
3097 s = scan_inputsymbol(s);
3098 TERM(sublex_start());
378cc40b
LW
3099 }
3100 s++;
3101 tmp = *s++;
3102 if (tmp == '<')
79072805 3103 SHop(OP_LEFT_SHIFT);
395c3793
LW
3104 if (tmp == '=') {
3105 tmp = *s++;
3106 if (tmp == '>')
79072805 3107 Eop(OP_NCMP);
395c3793 3108 s--;
79072805 3109 Rop(OP_LE);
395c3793 3110 }
378cc40b 3111 s--;
79072805 3112 Rop(OP_LT);
378cc40b
LW
3113 case '>':
3114 s++;
3115 tmp = *s++;
3116 if (tmp == '>')
79072805 3117 SHop(OP_RIGHT_SHIFT);
378cc40b 3118 if (tmp == '=')
79072805 3119 Rop(OP_GE);
378cc40b 3120 s--;
79072805 3121 Rop(OP_GT);
378cc40b
LW
3122
3123 case '$':
bbce6d69 3124 CLINE;
3125
3280af22
NIS
3126 if (PL_expect == XOPERATOR) {
3127 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3128 PL_expect = XTERM;
a0d0e21e 3129 depcom();
bbce6d69 3130 return ','; /* grandfather non-comma-format format */
a0d0e21e 3131 }
8990e307 3132 }
a0d0e21e 3133
834a4ddd 3134 if (s[1] == '#' && (isIDFIRST_lazy(s+2) || strchr("{$:+-", s[2]))) {
3280af22 3135 PL_tokenbuf[0] = '@';
376b8730
SM
3136 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3137 sizeof PL_tokenbuf - 1, FALSE);
3138 if (PL_expect == XOPERATOR)
3139 no_op("Array length", s);
3280af22 3140 if (!PL_tokenbuf[1])
a0d0e21e 3141 PREREF(DOLSHARP);
3280af22
NIS
3142 PL_expect = XOPERATOR;
3143 PL_pending_ident = '#';
463ee0b2 3144 TOKEN(DOLSHARP);
79072805 3145 }
bbce6d69 3146
3280af22 3147 PL_tokenbuf[0] = '$';
376b8730
SM
3148 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3149 sizeof PL_tokenbuf - 1, FALSE);
3150 if (PL_expect == XOPERATOR)
3151 no_op("Scalar", s);
3280af22
NIS
3152 if (!PL_tokenbuf[1]) {
3153 if (s == PL_bufend)
bbce6d69 3154 yyerror("Final $ should be \\$ or $name");
3155 PREREF('$');
8990e307 3156 }
a0d0e21e 3157
bbce6d69 3158 /* This kludge not intended to be bulletproof. */
3280af22 3159 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3160 yylval.opval = newSVOP(OP_CONST, 0,
3280af22 3161 newSViv((IV)PL_compiling.cop_arybase));
bbce6d69 3162 yylval.opval->op_private = OPpCONST_ARYBASE;
3163 TERM(THING);
3164 }
3165
ff68c719 3166 d = s;
69d2bceb 3167 tmp = (I32)*s;
3280af22 3168 if (PL_lex_state == LEX_NORMAL)
ff68c719 3169 s = skipspace(s);
3170
3280af22 3171 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3172 char *t;
3173 if (*s == '[') {
3280af22 3174 PL_tokenbuf[0] = '@';
599cee73 3175 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3176 for(t = s + 1;
834a4ddd 3177 isSPACE(*t) || isALNUM_lazy(t) || *t == '$';
bbce6d69 3178 t++) ;
a0d0e21e 3179 if (*t++ == ',') {
3280af22
NIS
3180 PL_bufptr = skipspace(PL_bufptr);
3181 while (t < PL_bufend && *t != ']')
bbce6d69 3182 t++;
cea2e8a9 3183 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3184 "Multidimensional syntax %.*s not supported",
3185 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3186 }
3187 }
bbce6d69 3188 }
3189 else if (*s == '{') {
3280af22 3190 PL_tokenbuf[0] = '%';
599cee73 3191 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3192 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3193 {
3280af22 3194 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3195 STRLEN len;
3196 for (t++; isSPACE(*t); t++) ;
834a4ddd 3197 if (isIDFIRST_lazy(t)) {
8903cb82 3198 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3199 for (; isSPACE(*t); t++) ;
864dbfa3 3200 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3201 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3202 "You need to quote \"%s\"", tmpbuf);
748a9306 3203 }
93a17b20
LW
3204 }
3205 }
2f3197b3 3206 }
bbce6d69 3207
3280af22 3208 PL_expect = XOPERATOR;
69d2bceb 3209 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3210 bool islop = (PL_last_lop == PL_oldoldbufptr);
3211 if (!islop || PL_last_lop_op == OP_GREPSTART)
3212 PL_expect = XOPERATOR;
bbce6d69 3213 else if (strchr("$@\"'`q", *s))
3280af22 3214 PL_expect = XTERM; /* e.g. print $fh "foo" */
834a4ddd 3215 else if (strchr("&*<%", *s) && isIDFIRST_lazy(s+1))
3280af22 3216 PL_expect = XTERM; /* e.g. print $fh &sub */
834a4ddd 3217 else if (isIDFIRST_lazy(s)) {
3280af22 3218 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3219 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
84902520
TB
3220 if (tmp = keyword(tmpbuf, len)) {
3221 /* binary operators exclude handle interpretations */
3222 switch (tmp) {
3223 case -KEY_x:
3224 case -KEY_eq:
3225 case -KEY_ne:
3226 case -KEY_gt:
3227 case -KEY_lt:
3228 case -KEY_ge:
3229 case -KEY_le:
3230 case -KEY_cmp:
3231 break;
3232 default:
3280af22 3233 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3234 break;
3235 }
3236 }
68dc0745 3237 else {
3238 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3239 if (gv && GvCVu(gv))
3280af22 3240 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3241 }
93a17b20 3242 }
bbce6d69 3243 else if (isDIGIT(*s))
3280af22 3244 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3245 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3246 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3247 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3248 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3249 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3250 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3251 }
3280af22 3252 PL_pending_ident = '$';
79072805 3253 TOKEN('$');
378cc40b
LW
3254
3255 case '@':
3280af22 3256 if (PL_expect == XOPERATOR)
bbce6d69 3257 no_op("Array", s);
3280af22
NIS
3258 PL_tokenbuf[0] = '@';
3259 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3260 if (!PL_tokenbuf[1]) {
3261 if (s == PL_bufend)
bbce6d69 3262 yyerror("Final @ should be \\@ or @name");
3263 PREREF('@');
3264 }
3280af22 3265 if (PL_lex_state == LEX_NORMAL)
ff68c719 3266 s = skipspace(s);
3280af22 3267 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3268 if (*s == '{')
3280af22 3269 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3270
3271 /* Warn about @ where they meant $. */
599cee73 3272 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3273 if (*s == '[' || *s == '{') {
3274 char *t = s + 1;
834a4ddd 3275 while (*t && (isALNUM_lazy(t) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3276 t++;
3277 if (*t == '}' || *t == ']') {
3278 t++;
3280af22 3279 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3280 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3281 "Scalar value %.*s better written as $%.*s",
3280af22 3282 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3283 }
93a17b20
LW
3284 }
3285 }
463ee0b2 3286 }
3280af22 3287 PL_pending_ident = '@';
79072805 3288 TERM('@');
378cc40b
LW
3289
3290 case '/': /* may either be division or pattern */
3291 case '?': /* may either be conditional or pattern */
3280af22 3292 if (PL_expect != XOPERATOR) {
c277df42 3293 /* Disable warning on "study /blah/" */
3280af22
NIS
3294 if (PL_oldoldbufptr == PL_last_uni
3295 && (*PL_last_uni != 's' || s - PL_last_uni < 5
834a4ddd 3296 || memNE(PL_last_uni, "study", 5) || isALNUM_lazy(PL_last_uni+5)))
c277df42 3297 check_uni();
8782bef2 3298 s = scan_pat(s,OP_MATCH);
79072805 3299 TERM(sublex_start());
378cc40b
LW
3300 }
3301 tmp = *s++;
a687059c 3302 if (tmp == '/')
79072805 3303 Mop(OP_DIVIDE);
378cc40b
LW
3304 OPERATOR(tmp);
3305
3306 case '.':
51882d45
GS
3307 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3308#ifdef PERL_STRICT_CR
3309 && s[1] == '\n'
3310#else
3311 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3312#endif
3313 && (s == PL_linestart || s[-1] == '\n') )
3314 {
3280af22
NIS
3315 PL_lex_formbrack = 0;
3316 PL_expect = XSTATE;
79072805
LW
3317 goto rightbracket;
3318 }
3280af22 3319 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3320 tmp = *s++;
a687059c
LW
3321 if (*s == tmp) {
3322 s++;
2f3197b3
LW
3323 if (*s == tmp) {
3324 s++;
79072805 3325 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3326 }
3327 else
79072805 3328 yylval.ival = 0;
378cc40b 3329 OPERATOR(DOTDOT);
a687059c 3330 }
3280af22 3331 if (PL_expect != XOPERATOR)
2f3197b3 3332 check_uni();
79072805 3333 Aop(OP_CONCAT);
378cc40b
LW
3334 }
3335 /* FALL THROUGH */
3336 case '0': case '1': case '2': case '3': case '4':
3337 case '5': case '6': case '7': case '8': case '9':
79072805 3338 s = scan_num(s);
3280af22 3339 if (PL_expect == XOPERATOR)
8990e307 3340 no_op("Number",s);
79072805
LW
3341 TERM(THING);
3342
3343 case '\'':
09bef843 3344 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3345 if (PL_expect == XOPERATOR) {
3346 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3347 PL_expect = XTERM;
a0d0e21e
LW
3348 depcom();
3349 return ','; /* grandfather non-comma-format format */
3350 }
463ee0b2 3351 else
8990e307 3352 no_op("String",s);
463ee0b2 3353 }
79072805 3354 if (!s)
85e6fe83 3355 missingterm((char*)0);
79072805
LW
3356 yylval.ival = OP_CONST;
3357 TERM(sublex_start());
3358
3359 case '"':
09bef843 3360 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3361 if (PL_expect == XOPERATOR) {
3362 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3363 PL_expect = XTERM;
a0d0e21e
LW
3364 depcom();
3365 return ','; /* grandfather non-comma-format format */
3366 }
463ee0b2 3367 else
8990e307 3368 no_op("String",s);
463ee0b2 3369 }
79072805 3370 if (!s)
85e6fe83 3371 missingterm((char*)0);
4633a7c4 3372 yylval.ival = OP_CONST;
3280af22 3373 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3374 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3375 yylval.ival = OP_STRINGIFY;
3376 break;
3377 }
3378 }
79072805
LW
3379 TERM(sublex_start());
3380
3381 case '`':
09bef843 3382 s = scan_str(s,FALSE,FALSE);
3280af22 3383 if (PL_expect == XOPERATOR)
8990e307 3384 no_op("Backticks",s);
79072805 3385 if (!s)
85e6fe83 3386 missingterm((char*)0);
79072805
LW
3387 yylval.ival = OP_BACKTICK;
3388 set_csh();
3389 TERM(sublex_start());
3390
3391 case '\\':
3392 s++;
599cee73 3393 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3394 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3395 *s, *s);
3280af22 3396 if (PL_expect == XOPERATOR)
8990e307 3397 no_op("Backslash",s);
79072805
LW
3398 OPERATOR(REFGEN);
3399
3400 case 'x':
3280af22 3401 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3402 s++;
3403 Mop(OP_REPEAT);
2f3197b3 3404 }
79072805
LW
3405 goto keylookup;
3406
378cc40b 3407 case '_':
79072805
LW
3408 case 'a': case 'A':
3409 case 'b': case 'B':
3410 case 'c': case 'C':
3411 case 'd': case 'D':
3412 case 'e': case 'E':
3413 case 'f': case 'F':
3414 case 'g': case 'G':
3415 case 'h': case 'H':
3416 case 'i': case 'I':
3417 case 'j': case 'J':
3418 case 'k': case 'K':
3419 case 'l': case 'L':
3420 case 'm': case 'M':
3421 case 'n': case 'N':
3422 case 'o': case 'O':
3423 case 'p': case 'P':
3424 case 'q': case 'Q':
3425 case 'r': case 'R':
3426 case 's': case 'S':
3427 case 't': case 'T':
3428 case 'u': case 'U':
3429 case 'v': case 'V':
3430 case 'w': case 'W':
3431 case 'X':
3432 case 'y': case 'Y':
3433 case 'z': case 'Z':
3434
49dc05e3 3435 keylookup: {
2d8e6c8d 3436 STRLEN n_a;
161b471a
NIS
3437 gv = Nullgv;
3438 gvp = 0;
49dc05e3 3439
3280af22
NIS
3440 PL_bufptr = s;
3441 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3442
3443 /* Some keywords can be followed by any delimiter, including ':' */
3280af22
NIS
3444 tmp = (len == 1 && strchr("msyq", PL_tokenbuf[0]) ||
3445 len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3446 (PL_tokenbuf[0] == 'q' &&
3447 strchr("qwxr", PL_tokenbuf[1]))));
8ebc5c01 3448
3449 /* x::* is just a word, unless x is "CORE" */
3280af22 3450 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3451 goto just_a_word;
3452
3643fb5f 3453 d = s;
3280af22 3454 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3455 d++; /* no comments skipped here, or s### is misparsed */
3456
3457 /* Is this a label? */
3280af22
NIS
3458 if (!tmp && PL_expect == XSTATE
3459 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3460 s = d + 1;
3280af22 3461 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3462 CLINE;
3463 TOKEN(LABEL);
3643fb5f
CS
3464 }
3465
3466 /* Check for keywords */
3280af22 3467 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3468
3469 /* Is this a word before a => operator? */
748a9306
LW
3470 if (strnEQ(d,"=>",2)) {
3471 CLINE;
3280af22 3472 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306
LW
3473 yylval.opval->op_private = OPpCONST_BARE;
3474 TERM(WORD);
3475 }
3476
a0d0e21e 3477 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3478 GV *ogv = Nullgv; /* override (winner) */
3479 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3480 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3481 CV *cv;
3280af22 3482 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3483 (cv = GvCVu(gv)))
3484 {
3485 if (GvIMPORTED_CV(gv))
3486 ogv = gv;
3487 else if (! CvMETHOD(cv))
3488 hgv = gv;
3489 }
3490 if (!ogv &&
3280af22
NIS
3491 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3492 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3493 GvCVu(gv) && GvIMPORTED_CV(gv))
3494 {
3495 ogv = gv;
3496 }
3497 }
3498 if (ogv) {
3499 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3500 }
3501 else if (gv && !gvp
3502 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3503 && GvCVu(gv)
3280af22 3504 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3505 {
3506 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3507 }
56f7f34b
CS
3508 else { /* no override */
3509 tmp = -tmp;
3510 gv = Nullgv;
3511 gvp = 0;
4944e2f7
GS
3512 if (ckWARN(WARN_AMBIGUOUS) && hgv
3513 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3514 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3515 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3516 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3517 }
a0d0e21e
LW
3518 }
3519
3520 reserved_word:
3521 switch (tmp) {
79072805
LW
3522
3523 default: /* not a keyword */
93a17b20 3524 just_a_word: {
96e4d5b1 3525 SV *sv;
3280af22 3526 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3527
3528 /* Get the rest if it looks like a package qualifier */
3529
a0d0e21e 3530 if (*s == '\'' || *s == ':' && s[1] == ':') {
c3e0f903 3531 STRLEN morelen;
3280af22 3532 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3533 TRUE, &morelen);
3534 if (!morelen)
cea2e8a9 3535 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3536 *s == '\'' ? "'" : "::");
c3e0f903 3537 len += morelen;
a0d0e21e 3538 }
8990e307 3539
3280af22
NIS
3540 if (PL_expect == XOPERATOR) {
3541 if (PL_bufptr == PL_linestart) {
57843af0 3542 CopLINE_dec(PL_curcop);
cea2e8a9 3543 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3544 CopLINE_inc(PL_curcop);
463ee0b2
LW
3545 }
3546 else
54310121 3547 no_op("Bareword",s);
463ee0b2 3548 }
8990e307 3549
c3e0f903
GS
3550 /* Look for a subroutine with this name in current package,
3551 unless name is "Foo::", in which case Foo is a bearword
3552 (and a package name). */
3553
3554 if (len > 2 &&
3280af22 3555 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3556 {
599cee73 3557 if (ckWARN(WARN_UNSAFE) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
cea2e8a9 3558 Perl_warner(aTHX_ WARN_UNSAFE,
599cee73 3559 "Bareword \"%s\" refers to nonexistent package",
3280af22 3560 PL_tokenbuf);
c3e0f903 3561 len -= 2;
3280af22 3562 PL_tokenbuf[len] = '\0';
c3e0f903
GS
3563 gv = Nullgv;
3564 gvp = 0;
3565 }
3566 else {
3567 len = 0;
3568 if (!gv)
3280af22 3569 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
c3e0f903
GS
3570 }
3571
3572 /* if we saw a global override before, get the right name */
8990e307 3573
49dc05e3 3574 if (gvp) {
79cb57f6 3575 sv = newSVpvn("CORE::GLOBAL::",14);
3280af22 3576 sv_catpv(sv,PL_tokenbuf);
49dc05e3
GS
3577 }
3578 else
3280af22 3579 sv = newSVpv(PL_tokenbuf,0);
8990e307 3580
a0d0e21e
LW
3581 /* Presume this is going to be a bareword of some sort. */
3582
3583 CLINE;
49dc05e3 3584 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
a0d0e21e
LW
3585 yylval.opval->op_private = OPpCONST_BARE;
3586
c3e0f903
GS
3587 /* And if "Foo::", then that's what it certainly is. */
3588
3589 if (len)
3590 goto safe_bareword;
3591
8990e307
LW
3592 /* See if it's the indirect object for a list operator. */
3593
3280af22
NIS
3594 if (PL_oldoldbufptr &&
3595 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
3596 (PL_oldoldbufptr == PL_last_lop
3597 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 3598 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
3599 (PL_expect == XREF ||
3600 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 3601 {
748a9306
LW