This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate change #9006 from maintperl to mainline,
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
bc89e66f 3 * Copyright (c) 1991-2001, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
4e553d73 42/* In variables name $^X, these are the legal values for X.
2b92dfce
GS
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
79072805
LW
72#ifdef ff_next
73#undef ff_next
d48672a2
LW
74#endif
75
a1a0e61e 76#ifdef USE_PURE_BISON
dba4d153
JH
77# ifndef YYMAXLEVEL
78# define YYMAXLEVEL 100
79# endif
20141f0e
RI
80YYSTYPE* yylval_pointer[YYMAXLEVEL];
81int* yychar_pointer[YYMAXLEVEL];
6f202aea 82int yyactlevel = -1;
22c35a8c
GS
83# undef yylval
84# undef yychar
20141f0e
RI
85# define yylval (*yylval_pointer[yyactlevel])
86# define yychar (*yychar_pointer[yyactlevel])
87# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 88# undef yylex
dba4d153 89# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
90#endif
91
79072805 92#include "keywords.h"
fe14fcc3 93
ffb4593c
NT
94/* CLINE is a macro that ensures PL_copline has a sane value */
95
ae986130
LW
96#ifdef CLINE
97#undef CLINE
98#endif
57843af0 99#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 100
ffb4593c
NT
101/*
102 * Convenience functions to return different tokens and prime the
9cbb5ea2 103 * lexer for the next token. They all take an argument.
ffb4593c
NT
104 *
105 * TOKEN : generic token (used for '(', DOLSHARP, etc)
106 * OPERATOR : generic operator
107 * AOPERATOR : assignment operator
108 * PREBLOCK : beginning the block after an if, while, foreach, ...
109 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
110 * PREREF : *EXPR where EXPR is not a simple identifier
111 * TERM : expression term
112 * LOOPX : loop exiting command (goto, last, dump, etc)
113 * FTST : file test operator
114 * FUN0 : zero-argument function
2d2e263d 115 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
116 * BOop : bitwise or or xor
117 * BAop : bitwise and
118 * SHop : shift operator
119 * PWop : power operator
9cbb5ea2 120 * PMop : pattern-matching operator
ffb4593c
NT
121 * Aop : addition-level operator
122 * Mop : multiplication-level operator
123 * Eop : equality-testing operator
e5edeb50 124 * Rop : relational operator <= != gt
ffb4593c
NT
125 *
126 * Also see LOP and lop() below.
127 */
128
075953c3
JH
129/* Note that REPORT() and REPORT2() will be expressions that supply
130 * their own trailing comma, not suitable for statements as such. */
998054bd 131#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
132# define REPORT(x,retval) tokereport(x,s,(int)retval),
133# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 134#else
075953c3
JH
135# define REPORT(x,retval)
136# define REPORT2(x,retval)
998054bd
SC
137#endif
138
075953c3
JH
139#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
140#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
141#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
142#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
143#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
144#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
145#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
146#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
147#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
148#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
149#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
150#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
151#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
152#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
153#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
154#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
155#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
156#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
157#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
158#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 159
a687059c
LW
160/* This bit of chicanery makes a unary function followed by
161 * a parenthesis into a function with one argument, highest precedence.
162 */
2f3197b3 163#define UNI(f) return(yylval.ival = f, \
075953c3 164 REPORT("uni",f) \
3280af22
NIS
165 PL_expect = XTERM, \
166 PL_bufptr = s, \
167 PL_last_uni = PL_oldbufptr, \
168 PL_last_lop_op = f, \
a687059c
LW
169 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
170
79072805 171#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 172 REPORT("uni",f) \
3280af22
NIS
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
79072805
LW
175 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
176
9f68db38 177/* grandfather return to old style */
3280af22 178#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 179
2d00ba3b 180STATIC void
61b2116b 181S_tokereport(pTHX_ char *thing, char* s, I32 rv)
998054bd
SC
182{
183 SV *report;
184 DEBUG_T({
185 report = newSVpv(thing, 0);
61b2116b 186 Perl_sv_catpvf(aTHX_ report, ":line %i:%i:", CopLINE(PL_curcop), rv);
998054bd
SC
187
188 if (s - PL_bufptr > 0)
189 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
190 else {
191 if (PL_oldbufptr && *PL_oldbufptr)
192 sv_catpv(report, PL_tokenbuf);
193 }
194 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
195 })
196}
197
ffb4593c
NT
198/*
199 * S_ao
200 *
201 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
202 * into an OP_ANDASSIGN or OP_ORASSIGN
203 */
204
76e3520e 205STATIC int
cea2e8a9 206S_ao(pTHX_ int toketype)
a0d0e21e 207{
3280af22
NIS
208 if (*PL_bufptr == '=') {
209 PL_bufptr++;
a0d0e21e
LW
210 if (toketype == ANDAND)
211 yylval.ival = OP_ANDASSIGN;
212 else if (toketype == OROR)
213 yylval.ival = OP_ORASSIGN;
214 toketype = ASSIGNOP;
215 }
216 return toketype;
217}
218
ffb4593c
NT
219/*
220 * S_no_op
221 * When Perl expects an operator and finds something else, no_op
222 * prints the warning. It always prints "<something> found where
223 * operator expected. It prints "Missing semicolon on previous line?"
224 * if the surprise occurs at the start of the line. "do you need to
225 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
226 * where the compiler doesn't know if foo is a method call or a function.
227 * It prints "Missing operator before end of line" if there's nothing
228 * after the missing operator, or "... before <...>" if there is something
229 * after the missing operator.
230 */
231
76e3520e 232STATIC void
cea2e8a9 233S_no_op(pTHX_ char *what, char *s)
463ee0b2 234{
3280af22
NIS
235 char *oldbp = PL_bufptr;
236 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 237
1189a94a
GS
238 if (!s)
239 s = oldbp;
07c798fb 240 else
1189a94a 241 PL_bufptr = s;
cea2e8a9 242 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 243 if (is_first)
cea2e8a9 244 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 245 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 246 char *t;
7e2040f0 247 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 248 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 249 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 250 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 251 }
07c798fb
HS
252 else {
253 assert(s >= oldbp);
cea2e8a9 254 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 255 }
3280af22 256 PL_bufptr = oldbp;
8990e307
LW
257}
258
ffb4593c
NT
259/*
260 * S_missingterm
261 * Complain about missing quote/regexp/heredoc terminator.
262 * If it's called with (char *)NULL then it cauterizes the line buffer.
263 * If we're in a delimited string and the delimiter is a control
264 * character, it's reformatted into a two-char sequence like ^C.
265 * This is fatal.
266 */
267
76e3520e 268STATIC void
cea2e8a9 269S_missingterm(pTHX_ char *s)
8990e307
LW
270{
271 char tmpbuf[3];
272 char q;
273 if (s) {
274 char *nl = strrchr(s,'\n');
d2719217 275 if (nl)
8990e307
LW
276 *nl = '\0';
277 }
9d116dd7
JH
278 else if (
279#ifdef EBCDIC
280 iscntrl(PL_multi_close)
281#else
282 PL_multi_close < 32 || PL_multi_close == 127
283#endif
284 ) {
8990e307 285 *tmpbuf = '^';
3280af22 286 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
287 s = "\\n";
288 tmpbuf[2] = '\0';
289 s = tmpbuf;
290 }
291 else {
3280af22 292 *tmpbuf = PL_multi_close;
8990e307
LW
293 tmpbuf[1] = '\0';
294 s = tmpbuf;
295 }
296 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 297 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 298}
79072805 299
ffb4593c
NT
300/*
301 * Perl_deprecate
ffb4593c
NT
302 */
303
79072805 304void
864dbfa3 305Perl_deprecate(pTHX_ char *s)
a0d0e21e 306{
599cee73 307 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 308 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
309}
310
ffb4593c
NT
311/*
312 * depcom
9cbb5ea2 313 * Deprecate a comma-less variable list.
ffb4593c
NT
314 */
315
76e3520e 316STATIC void
cea2e8a9 317S_depcom(pTHX)
a0d0e21e
LW
318{
319 deprecate("comma-less variable list");
320}
321
ffb4593c 322/*
9cbb5ea2
GS
323 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
324 * utf16-to-utf8-reversed.
ffb4593c
NT
325 */
326
c39cd008
GS
327#ifdef PERL_CR_FILTER
328static void
329strip_return(SV *sv)
330{
331 register char *s = SvPVX(sv);
332 register char *e = s + SvCUR(sv);
333 /* outer loop optimized to do nothing if there are no CR-LFs */
334 while (s < e) {
335 if (*s++ == '\r' && *s == '\n') {
336 /* hit a CR-LF, need to copy the rest */
337 register char *d = s - 1;
338 *d++ = *s++;
339 while (s < e) {
340 if (*s == '\r' && s[1] == '\n')
341 s++;
342 *d++ = *s++;
343 }
344 SvCUR(sv) -= s - d;
345 return;
346 }
347 }
348}
a868473f 349
76e3520e 350STATIC I32
c39cd008 351S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 352{
c39cd008
GS
353 I32 count = FILTER_READ(idx+1, sv, maxlen);
354 if (count > 0 && !maxlen)
355 strip_return(sv);
356 return count;
a868473f
NIS
357}
358#endif
359
ffb4593c
NT
360/*
361 * Perl_lex_start
9cbb5ea2
GS
362 * Initialize variables. Uses the Perl save_stack to save its state (for
363 * recursive calls to the parser).
ffb4593c
NT
364 */
365
a0d0e21e 366void
864dbfa3 367Perl_lex_start(pTHX_ SV *line)
79072805 368{
8990e307
LW
369 char *s;
370 STRLEN len;
371
3280af22
NIS
372 SAVEI32(PL_lex_dojoin);
373 SAVEI32(PL_lex_brackets);
3280af22
NIS
374 SAVEI32(PL_lex_casemods);
375 SAVEI32(PL_lex_starts);
376 SAVEI32(PL_lex_state);
7766f137 377 SAVEVPTR(PL_lex_inpat);
3280af22 378 SAVEI32(PL_lex_inwhat);
18b09519
GS
379 if (PL_lex_state == LEX_KNOWNEXT) {
380 I32 toke = PL_nexttoke;
381 while (--toke >= 0) {
382 SAVEI32(PL_nexttype[toke]);
383 SAVEVPTR(PL_nextval[toke]);
384 }
385 SAVEI32(PL_nexttoke);
18b09519 386 }
57843af0 387 SAVECOPLINE(PL_curcop);
3280af22
NIS
388 SAVEPPTR(PL_bufptr);
389 SAVEPPTR(PL_bufend);
390 SAVEPPTR(PL_oldbufptr);
391 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
392 SAVEPPTR(PL_last_lop);
393 SAVEPPTR(PL_last_uni);
3280af22
NIS
394 SAVEPPTR(PL_linestart);
395 SAVESPTR(PL_linestr);
396 SAVEPPTR(PL_lex_brackstack);
397 SAVEPPTR(PL_lex_casestack);
c76ac1ee 398 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
399 SAVESPTR(PL_lex_stuff);
400 SAVEI32(PL_lex_defer);
09bef843 401 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 402 SAVESPTR(PL_lex_repl);
bebdddfc
GS
403 SAVEINT(PL_expect);
404 SAVEINT(PL_lex_expect);
3280af22
NIS
405
406 PL_lex_state = LEX_NORMAL;
407 PL_lex_defer = 0;
408 PL_expect = XSTATE;
409 PL_lex_brackets = 0;
3280af22
NIS
410 New(899, PL_lex_brackstack, 120, char);
411 New(899, PL_lex_casestack, 12, char);
412 SAVEFREEPV(PL_lex_brackstack);
413 SAVEFREEPV(PL_lex_casestack);
414 PL_lex_casemods = 0;
415 *PL_lex_casestack = '\0';
416 PL_lex_dojoin = 0;
417 PL_lex_starts = 0;
418 PL_lex_stuff = Nullsv;
419 PL_lex_repl = Nullsv;
420 PL_lex_inpat = 0;
76be56bc 421 PL_nexttoke = 0;
3280af22 422 PL_lex_inwhat = 0;
09bef843 423 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
424 PL_linestr = line;
425 if (SvREADONLY(PL_linestr))
426 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
427 s = SvPV(PL_linestr, len);
8990e307 428 if (len && s[len-1] != ';') {
3280af22
NIS
429 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
430 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
431 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 432 }
3280af22
NIS
433 SvTEMP_off(PL_linestr);
434 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
435 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 436 PL_last_lop = PL_last_uni = Nullch;
3280af22 437 SvREFCNT_dec(PL_rs);
79cb57f6 438 PL_rs = newSVpvn("\n", 1);
3280af22 439 PL_rsfp = 0;
79072805 440}
a687059c 441
ffb4593c
NT
442/*
443 * Perl_lex_end
9cbb5ea2
GS
444 * Finalizer for lexing operations. Must be called when the parser is
445 * done with the lexer.
ffb4593c
NT
446 */
447
463ee0b2 448void
864dbfa3 449Perl_lex_end(pTHX)
463ee0b2 450{
3280af22 451 PL_doextract = FALSE;
463ee0b2
LW
452}
453
ffb4593c
NT
454/*
455 * S_incline
456 * This subroutine has nothing to do with tilting, whether at windmills
457 * or pinball tables. Its name is short for "increment line". It
57843af0 458 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 459 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
460 * # line 500 "foo.pm"
461 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
462 */
463
76e3520e 464STATIC void
cea2e8a9 465S_incline(pTHX_ char *s)
463ee0b2
LW
466{
467 char *t;
468 char *n;
73659bf1 469 char *e;
463ee0b2 470 char ch;
463ee0b2 471
57843af0 472 CopLINE_inc(PL_curcop);
463ee0b2
LW
473 if (*s++ != '#')
474 return;
bf4acbe4 475 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
476 if (strnEQ(s, "line", 4))
477 s += 4;
478 else
479 return;
084592ab 480 if (SPACE_OR_TAB(*s))
73659bf1 481 s++;
4e553d73 482 else
73659bf1 483 return;
bf4acbe4 484 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
485 if (!isDIGIT(*s))
486 return;
487 n = s;
488 while (isDIGIT(*s))
489 s++;
bf4acbe4 490 while (SPACE_OR_TAB(*s))
463ee0b2 491 s++;
73659bf1 492 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 493 s++;
73659bf1
GS
494 e = t + 1;
495 }
463ee0b2 496 else {
463ee0b2 497 for (t = s; !isSPACE(*t); t++) ;
73659bf1 498 e = t;
463ee0b2 499 }
bf4acbe4 500 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
501 e++;
502 if (*e != '\n' && *e != '\0')
503 return; /* false alarm */
504
463ee0b2
LW
505 ch = *t;
506 *t = '\0';
f4dd75d9
GS
507 if (t - s > 0) {
508#ifdef USE_ITHREADS
509 Safefree(CopFILE(PL_curcop));
510#else
511 SvREFCNT_dec(CopFILEGV(PL_curcop));
512#endif
57843af0 513 CopFILE_set(PL_curcop, s);
f4dd75d9 514 }
463ee0b2 515 *t = ch;
57843af0 516 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
517}
518
ffb4593c
NT
519/*
520 * S_skipspace
521 * Called to gobble the appropriate amount and type of whitespace.
522 * Skips comments as well.
523 */
524
76e3520e 525STATIC char *
cea2e8a9 526S_skipspace(pTHX_ register char *s)
a687059c 527{
3280af22 528 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 529 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
530 s++;
531 return s;
532 }
533 for (;;) {
fd049845 534 STRLEN prevlen;
09bef843
SB
535 SSize_t oldprevlen, oldoldprevlen;
536 SSize_t oldloplen, oldunilen;
60e6418e
GS
537 while (s < PL_bufend && isSPACE(*s)) {
538 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
539 incline(s);
540 }
ffb4593c
NT
541
542 /* comment */
3280af22
NIS
543 if (s < PL_bufend && *s == '#') {
544 while (s < PL_bufend && *s != '\n')
463ee0b2 545 s++;
60e6418e 546 if (s < PL_bufend) {
463ee0b2 547 s++;
60e6418e
GS
548 if (PL_in_eval && !PL_rsfp) {
549 incline(s);
550 continue;
551 }
552 }
463ee0b2 553 }
ffb4593c
NT
554
555 /* only continue to recharge the buffer if we're at the end
556 * of the buffer, we're not reading from a source filter, and
557 * we're in normal lexing mode
558 */
09bef843
SB
559 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
560 PL_lex_state == LEX_FORMLINE)
463ee0b2 561 return s;
ffb4593c
NT
562
563 /* try to recharge the buffer */
9cbb5ea2
GS
564 if ((s = filter_gets(PL_linestr, PL_rsfp,
565 (prevlen = SvCUR(PL_linestr)))) == Nullch)
566 {
567 /* end of file. Add on the -p or -n magic */
3280af22
NIS
568 if (PL_minus_n || PL_minus_p) {
569 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
570 ";}continue{print or die qq(-p destination: $!\\n)" :
571 "");
3280af22
NIS
572 sv_catpv(PL_linestr,";}");
573 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
574 }
575 else
3280af22 576 sv_setpv(PL_linestr,";");
ffb4593c
NT
577
578 /* reset variables for next time we lex */
9cbb5ea2
GS
579 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
580 = SvPVX(PL_linestr);
3280af22 581 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 582 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
583
584 /* Close the filehandle. Could be from -P preprocessor,
585 * STDIN, or a regular file. If we were reading code from
586 * STDIN (because the commandline held no -e or filename)
587 * then we don't close it, we reset it so the code can
588 * read from STDIN too.
589 */
590
3280af22
NIS
591 if (PL_preprocess && !PL_in_eval)
592 (void)PerlProc_pclose(PL_rsfp);
593 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
594 PerlIO_clearerr(PL_rsfp);
8990e307 595 else
3280af22
NIS
596 (void)PerlIO_close(PL_rsfp);
597 PL_rsfp = Nullfp;
463ee0b2
LW
598 return s;
599 }
ffb4593c
NT
600
601 /* not at end of file, so we only read another line */
09bef843
SB
602 /* make corresponding updates to old pointers, for yyerror() */
603 oldprevlen = PL_oldbufptr - PL_bufend;
604 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
605 if (PL_last_uni)
606 oldunilen = PL_last_uni - PL_bufend;
607 if (PL_last_lop)
608 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
609 PL_linestart = PL_bufptr = s + prevlen;
610 PL_bufend = s + SvCUR(PL_linestr);
611 s = PL_bufptr;
09bef843
SB
612 PL_oldbufptr = s + oldprevlen;
613 PL_oldoldbufptr = s + oldoldprevlen;
614 if (PL_last_uni)
615 PL_last_uni = s + oldunilen;
616 if (PL_last_lop)
617 PL_last_lop = s + oldloplen;
a0d0e21e 618 incline(s);
ffb4593c
NT
619
620 /* debugger active and we're not compiling the debugger code,
621 * so store the line into the debugger's array of lines
622 */
3280af22 623 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
624 SV *sv = NEWSV(85,0);
625
626 sv_upgrade(sv, SVt_PVMG);
3280af22 627 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 628 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 629 }
463ee0b2 630 }
a687059c 631}
378cc40b 632
ffb4593c
NT
633/*
634 * S_check_uni
635 * Check the unary operators to ensure there's no ambiguity in how they're
636 * used. An ambiguous piece of code would be:
637 * rand + 5
638 * This doesn't mean rand() + 5. Because rand() is a unary operator,
639 * the +5 is its argument.
640 */
641
76e3520e 642STATIC void
cea2e8a9 643S_check_uni(pTHX)
ba106d47 644{
2f3197b3 645 char *s;
a0d0e21e 646 char *t;
2f3197b3 647
3280af22 648 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 649 return;
3280af22
NIS
650 while (isSPACE(*PL_last_uni))
651 PL_last_uni++;
7e2040f0 652 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 653 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 654 return;
0453d815 655 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 656 char ch = *s;
0453d815 657 *s = '\0';
4e553d73
NIS
658 Perl_warner(aTHX_ WARN_AMBIGUOUS,
659 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
660 PL_last_uni);
661 *s = ch;
662 }
2f3197b3
LW
663}
664
ffb4593c
NT
665/* workaround to replace the UNI() macro with a function. Only the
666 * hints/uts.sh file mentions this. Other comments elsewhere in the
667 * source indicate Microport Unix might need it too.
668 */
669
ffed7fef
LW
670#ifdef CRIPPLED_CC
671
672#undef UNI
ffed7fef 673#define UNI(f) return uni(f,s)
ffed7fef 674
76e3520e 675STATIC int
cea2e8a9 676S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
677{
678 yylval.ival = f;
3280af22
NIS
679 PL_expect = XTERM;
680 PL_bufptr = s;
8f872242
NIS
681 PL_last_uni = PL_oldbufptr;
682 PL_last_lop_op = f;
ffed7fef
LW
683 if (*s == '(')
684 return FUNC1;
685 s = skipspace(s);
686 if (*s == '(')
687 return FUNC1;
688 else
689 return UNIOP;
690}
691
a0d0e21e
LW
692#endif /* CRIPPLED_CC */
693
ffb4593c
NT
694/*
695 * LOP : macro to build a list operator. Its behaviour has been replaced
696 * with a subroutine, S_lop() for which LOP is just another name.
697 */
698
a0d0e21e
LW
699#define LOP(f,x) return lop(f,x,s)
700
ffb4593c
NT
701/*
702 * S_lop
703 * Build a list operator (or something that might be one). The rules:
704 * - if we have a next token, then it's a list operator [why?]
705 * - if the next thing is an opening paren, then it's a function
706 * - else it's a list operator
707 */
708
76e3520e 709STATIC I32
a0be28da 710S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 711{
79072805 712 yylval.ival = f;
35c8bce7 713 CLINE;
075953c3 714 REPORT("lop", f)
3280af22
NIS
715 PL_expect = x;
716 PL_bufptr = s;
717 PL_last_lop = PL_oldbufptr;
718 PL_last_lop_op = f;
719 if (PL_nexttoke)
a0d0e21e 720 return LSTOP;
79072805
LW
721 if (*s == '(')
722 return FUNC;
723 s = skipspace(s);
724 if (*s == '(')
725 return FUNC;
726 else
727 return LSTOP;
728}
729
ffb4593c
NT
730/*
731 * S_force_next
9cbb5ea2 732 * When the lexer realizes it knows the next token (for instance,
ffb4593c 733 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
734 * to know what token to return the next time the lexer is called. Caller
735 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
736 * handles the token correctly.
ffb4593c
NT
737 */
738
4e553d73 739STATIC void
cea2e8a9 740S_force_next(pTHX_ I32 type)
79072805 741{
3280af22
NIS
742 PL_nexttype[PL_nexttoke] = type;
743 PL_nexttoke++;
744 if (PL_lex_state != LEX_KNOWNEXT) {
745 PL_lex_defer = PL_lex_state;
746 PL_lex_expect = PL_expect;
747 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
748 }
749}
750
ffb4593c
NT
751/*
752 * S_force_word
753 * When the lexer knows the next thing is a word (for instance, it has
754 * just seen -> and it knows that the next char is a word char, then
755 * it calls S_force_word to stick the next word into the PL_next lookahead.
756 *
757 * Arguments:
b1b65b59 758 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
759 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
760 * int check_keyword : if true, Perl checks to make sure the word isn't
761 * a keyword (do this if the word is a label, e.g. goto FOO)
762 * int allow_pack : if true, : characters will also be allowed (require,
763 * use, etc. do this)
9cbb5ea2 764 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
765 */
766
76e3520e 767STATIC char *
cea2e8a9 768S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 769{
463ee0b2
LW
770 register char *s;
771 STRLEN len;
4e553d73 772
463ee0b2
LW
773 start = skipspace(start);
774 s = start;
7e2040f0 775 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 776 (allow_pack && *s == ':') ||
15f0808c 777 (allow_initial_tick && *s == '\'') )
a0d0e21e 778 {
3280af22
NIS
779 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
780 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
781 return start;
782 if (token == METHOD) {
783 s = skipspace(s);
784 if (*s == '(')
3280af22 785 PL_expect = XTERM;
463ee0b2 786 else {
3280af22 787 PL_expect = XOPERATOR;
463ee0b2 788 }
79072805 789 }
3280af22
NIS
790 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
791 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
792 force_next(token);
793 }
794 return s;
795}
796
ffb4593c
NT
797/*
798 * S_force_ident
9cbb5ea2 799 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
800 * text only contains the "foo" portion. The first argument is a pointer
801 * to the "foo", and the second argument is the type symbol to prefix.
802 * Forces the next token to be a "WORD".
9cbb5ea2 803 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
804 */
805
76e3520e 806STATIC void
cea2e8a9 807S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
808{
809 if (s && *s) {
11343788 810 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 811 PL_nextval[PL_nexttoke].opval = o;
79072805 812 force_next(WORD);
748a9306 813 if (kind) {
11343788 814 o->op_private = OPpCONST_ENTERED;
55497cff
PP
815 /* XXX see note in pp_entereval() for why we forgo typo
816 warnings if the symbol must be introduced in an eval.
817 GSAR 96-10-12 */
3280af22 818 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
819 kind == '$' ? SVt_PV :
820 kind == '@' ? SVt_PVAV :
821 kind == '%' ? SVt_PVHV :
822 SVt_PVGV
823 );
748a9306 824 }
79072805
LW
825 }
826}
827
1571675a
GS
828NV
829Perl_str_to_version(pTHX_ SV *sv)
830{
831 NV retval = 0.0;
832 NV nshift = 1.0;
833 STRLEN len;
834 char *start = SvPVx(sv,len);
3aa33fe5 835 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
836 char *end = start + len;
837 while (start < end) {
ba210ebe 838 STRLEN skip;
1571675a
GS
839 UV n;
840 if (utf)
dcad2880 841 n = utf8_to_uv((U8*)start, len, &skip, 0);
1571675a
GS
842 else {
843 n = *(U8*)start;
844 skip = 1;
845 }
846 retval += ((NV)n)/nshift;
847 start += skip;
848 nshift *= 1000;
849 }
850 return retval;
851}
852
4e553d73 853/*
ffb4593c
NT
854 * S_force_version
855 * Forces the next token to be a version number.
856 */
857
76e3520e 858STATIC char *
cea2e8a9 859S_force_version(pTHX_ char *s)
89bfa8cd
PP
860{
861 OP *version = Nullop;
44dcb63b 862 char *d;
89bfa8cd
PP
863
864 s = skipspace(s);
865
44dcb63b 866 d = s;
dd629d5b 867 if (*d == 'v')
44dcb63b 868 d++;
44dcb63b 869 if (isDIGIT(*d)) {
a7cb1f99 870 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 871 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 872 SV *ver;
b73d6f50 873 s = scan_num(s, &yylval);
89bfa8cd 874 version = yylval.opval;
dd629d5b
GS
875 ver = cSVOPx(version)->op_sv;
876 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 877 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
878 SvNVX(ver) = str_to_version(ver);
879 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 880 }
89bfa8cd
PP
881 }
882 }
883
884 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 885 PL_nextval[PL_nexttoke].opval = version;
4e553d73 886 force_next(WORD);
89bfa8cd
PP
887
888 return (s);
889}
890
ffb4593c
NT
891/*
892 * S_tokeq
893 * Tokenize a quoted string passed in as an SV. It finds the next
894 * chunk, up to end of string or a backslash. It may make a new
895 * SV containing that chunk (if HINT_NEW_STRING is on). It also
896 * turns \\ into \.
897 */
898
76e3520e 899STATIC SV *
cea2e8a9 900S_tokeq(pTHX_ SV *sv)
79072805
LW
901{
902 register char *s;
903 register char *send;
904 register char *d;
b3ac6de7
IZ
905 STRLEN len = 0;
906 SV *pv = sv;
79072805
LW
907
908 if (!SvLEN(sv))
b3ac6de7 909 goto finish;
79072805 910
a0d0e21e 911 s = SvPV_force(sv, len);
21a311ee 912 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 913 goto finish;
463ee0b2 914 send = s + len;
79072805
LW
915 while (s < send && *s != '\\')
916 s++;
917 if (s == send)
b3ac6de7 918 goto finish;
79072805 919 d = s;
3280af22 920 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 921 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
922 while (s < send) {
923 if (*s == '\\') {
a0d0e21e 924 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
925 s++; /* all that, just for this */
926 }
927 *d++ = *s++;
928 }
929 *d = '\0';
463ee0b2 930 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 931 finish:
3280af22 932 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 933 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
934 return sv;
935}
936
ffb4593c
NT
937/*
938 * Now come three functions related to double-quote context,
939 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
940 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
941 * interact with PL_lex_state, and create fake ( ... ) argument lists
942 * to handle functions and concatenation.
943 * They assume that whoever calls them will be setting up a fake
944 * join call, because each subthing puts a ',' after it. This lets
945 * "lower \luPpEr"
946 * become
947 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
948 *
949 * (I'm not sure whether the spurious commas at the end of lcfirst's
950 * arguments and join's arguments are created or not).
951 */
952
953/*
954 * S_sublex_start
955 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
956 *
957 * Pattern matching will set PL_lex_op to the pattern-matching op to
958 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
959 *
960 * OP_CONST and OP_READLINE are easy--just make the new op and return.
961 *
962 * Everything else becomes a FUNC.
963 *
964 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
965 * had an OP_CONST or OP_READLINE). This just sets us up for a
966 * call to S_sublex_push().
967 */
968
76e3520e 969STATIC I32
cea2e8a9 970S_sublex_start(pTHX)
79072805
LW
971{
972 register I32 op_type = yylval.ival;
79072805
LW
973
974 if (op_type == OP_NULL) {
3280af22
NIS
975 yylval.opval = PL_lex_op;
976 PL_lex_op = Nullop;
79072805
LW
977 return THING;
978 }
979 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 980 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
981
982 if (SvTYPE(sv) == SVt_PVIV) {
983 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
984 STRLEN len;
985 char *p;
986 SV *nsv;
987
988 p = SvPV(sv, len);
79cb57f6 989 nsv = newSVpvn(p, len);
01ec43d0
GS
990 if (SvUTF8(sv))
991 SvUTF8_on(nsv);
b3ac6de7
IZ
992 SvREFCNT_dec(sv);
993 sv = nsv;
4e553d73 994 }
b3ac6de7 995 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 996 PL_lex_stuff = Nullsv;
79072805
LW
997 return THING;
998 }
999
3280af22
NIS
1000 PL_sublex_info.super_state = PL_lex_state;
1001 PL_sublex_info.sub_inwhat = op_type;
1002 PL_sublex_info.sub_op = PL_lex_op;
1003 PL_lex_state = LEX_INTERPPUSH;
55497cff 1004
3280af22
NIS
1005 PL_expect = XTERM;
1006 if (PL_lex_op) {
1007 yylval.opval = PL_lex_op;
1008 PL_lex_op = Nullop;
55497cff
PP
1009 return PMFUNC;
1010 }
1011 else
1012 return FUNC;
1013}
1014
ffb4593c
NT
1015/*
1016 * S_sublex_push
1017 * Create a new scope to save the lexing state. The scope will be
1018 * ended in S_sublex_done. Returns a '(', starting the function arguments
1019 * to the uc, lc, etc. found before.
1020 * Sets PL_lex_state to LEX_INTERPCONCAT.
1021 */
1022
76e3520e 1023STATIC I32
cea2e8a9 1024S_sublex_push(pTHX)
55497cff 1025{
f46d017c 1026 ENTER;
55497cff 1027
3280af22
NIS
1028 PL_lex_state = PL_sublex_info.super_state;
1029 SAVEI32(PL_lex_dojoin);
1030 SAVEI32(PL_lex_brackets);
3280af22
NIS
1031 SAVEI32(PL_lex_casemods);
1032 SAVEI32(PL_lex_starts);
1033 SAVEI32(PL_lex_state);
7766f137 1034 SAVEVPTR(PL_lex_inpat);
3280af22 1035 SAVEI32(PL_lex_inwhat);
57843af0 1036 SAVECOPLINE(PL_curcop);
3280af22
NIS
1037 SAVEPPTR(PL_bufptr);
1038 SAVEPPTR(PL_oldbufptr);
1039 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1040 SAVEPPTR(PL_last_lop);
1041 SAVEPPTR(PL_last_uni);
3280af22
NIS
1042 SAVEPPTR(PL_linestart);
1043 SAVESPTR(PL_linestr);
1044 SAVEPPTR(PL_lex_brackstack);
1045 SAVEPPTR(PL_lex_casestack);
1046
1047 PL_linestr = PL_lex_stuff;
1048 PL_lex_stuff = Nullsv;
1049
9cbb5ea2
GS
1050 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1051 = SvPVX(PL_linestr);
3280af22 1052 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1053 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1054 SAVEFREESV(PL_linestr);
1055
1056 PL_lex_dojoin = FALSE;
1057 PL_lex_brackets = 0;
3280af22
NIS
1058 New(899, PL_lex_brackstack, 120, char);
1059 New(899, PL_lex_casestack, 12, char);
1060 SAVEFREEPV(PL_lex_brackstack);
1061 SAVEFREEPV(PL_lex_casestack);
1062 PL_lex_casemods = 0;
1063 *PL_lex_casestack = '\0';
1064 PL_lex_starts = 0;
1065 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1066 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1067
1068 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1069 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1070 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1071 else
3280af22 1072 PL_lex_inpat = Nullop;
79072805 1073
55497cff 1074 return '(';
79072805
LW
1075}
1076
ffb4593c
NT
1077/*
1078 * S_sublex_done
1079 * Restores lexer state after a S_sublex_push.
1080 */
1081
76e3520e 1082STATIC I32
cea2e8a9 1083S_sublex_done(pTHX)
79072805 1084{
3280af22 1085 if (!PL_lex_starts++) {
9aa983d2
JH
1086 SV *sv = newSVpvn("",0);
1087 if (SvUTF8(PL_linestr))
1088 SvUTF8_on(sv);
3280af22 1089 PL_expect = XOPERATOR;
9aa983d2 1090 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1091 return THING;
1092 }
1093
3280af22
NIS
1094 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1095 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1096 return yylex();
79072805
LW
1097 }
1098
ffb4593c 1099 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1100 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1101 PL_linestr = PL_lex_repl;
1102 PL_lex_inpat = 0;
1103 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1104 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1105 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1106 SAVEFREESV(PL_linestr);
1107 PL_lex_dojoin = FALSE;
1108 PL_lex_brackets = 0;
3280af22
NIS
1109 PL_lex_casemods = 0;
1110 *PL_lex_casestack = '\0';
1111 PL_lex_starts = 0;
25da4f38 1112 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1113 PL_lex_state = LEX_INTERPNORMAL;
1114 PL_lex_starts++;
e9fa98b2
HS
1115 /* we don't clear PL_lex_repl here, so that we can check later
1116 whether this is an evalled subst; that means we rely on the
1117 logic to ensure sublex_done() is called again only via the
1118 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1119 }
e9fa98b2 1120 else {
3280af22 1121 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1122 PL_lex_repl = Nullsv;
1123 }
79072805 1124 return ',';
ffed7fef
LW
1125 }
1126 else {
f46d017c 1127 LEAVE;
3280af22
NIS
1128 PL_bufend = SvPVX(PL_linestr);
1129 PL_bufend += SvCUR(PL_linestr);
1130 PL_expect = XOPERATOR;
09bef843 1131 PL_sublex_info.sub_inwhat = 0;
79072805 1132 return ')';
ffed7fef
LW
1133 }
1134}
1135
02aa26ce
NT
1136/*
1137 scan_const
1138
1139 Extracts a pattern, double-quoted string, or transliteration. This
1140 is terrifying code.
1141
3280af22
NIS
1142 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1143 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1144 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1145
9b599b2a
GS
1146 Returns a pointer to the character scanned up to. Iff this is
1147 advanced from the start pointer supplied (ie if anything was
1148 successfully parsed), will leave an OP for the substring scanned
1149 in yylval. Caller must intuit reason for not parsing further
1150 by looking at the next characters herself.
1151
02aa26ce
NT
1152 In patterns:
1153 backslashes:
1154 double-quoted style: \r and \n
1155 regexp special ones: \D \s
1156 constants: \x3
1157 backrefs: \1 (deprecated in substitution replacements)
1158 case and quoting: \U \Q \E
1159 stops on @ and $, but not for $ as tail anchor
1160
1161 In transliterations:
1162 characters are VERY literal, except for - not at the start or end
1163 of the string, which indicates a range. scan_const expands the
1164 range to the full set of intermediate characters.
1165
1166 In double-quoted strings:
1167 backslashes:
1168 double-quoted style: \r and \n
1169 constants: \x3
1170 backrefs: \1 (deprecated)
1171 case and quoting: \U \Q \E
1172 stops on @ and $
1173
1174 scan_const does *not* construct ops to handle interpolated strings.
1175 It stops processing as soon as it finds an embedded $ or @ variable
1176 and leaves it to the caller to work out what's going on.
1177
1178 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1179
1180 $ in pattern could be $foo or could be tail anchor. Assumption:
1181 it's a tail anchor if $ is the last thing in the string, or if it's
1182 followed by one of ")| \n\t"
1183
1184 \1 (backreferences) are turned into $1
1185
1186 The structure of the code is
1187 while (there's a character to process) {
1188 handle transliteration ranges
1189 skip regexp comments
1190 skip # initiated comments in //x patterns
1191 check for embedded @foo
1192 check for embedded scalars
1193 if (backslash) {
1194 leave intact backslashes from leave (below)
1195 deprecate \1 in strings and sub replacements
1196 handle string-changing backslashes \l \U \Q \E, etc.
1197 switch (what was escaped) {
1198 handle - in a transliteration (becomes a literal -)
1199 handle \132 octal characters
1200 handle 0x15 hex characters
1201 handle \cV (control V)
1202 handle printf backslashes (\f, \r, \n, etc)
1203 } (end switch)
1204 } (end if backslash)
1205 } (end while character to read)
4e553d73 1206
02aa26ce
NT
1207*/
1208
76e3520e 1209STATIC char *
cea2e8a9 1210S_scan_const(pTHX_ char *start)
79072805 1211{
3280af22 1212 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1213 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1214 register char *s = start; /* start of the constant */
1215 register char *d = SvPVX(sv); /* destination for copies */
1216 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1217 bool didrange = FALSE; /* did we just finish a range? */
9aa983d2
JH
1218 bool has_utf8 = (PL_linestr && SvUTF8(PL_linestr));
1219 /* the constant is UTF8 */
012bcf8d
GS
1220 UV uv;
1221
ac2262e3 1222 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1223 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1224 : UTF;
89491803 1225 I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1226 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1227 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1228 : UTF;
dff6d3cd 1229 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1230 PL_lex_inpat
4a2d328f 1231 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1232 : "";
79072805
LW
1233
1234 while (s < send || dorange) {
02aa26ce 1235 /* get transliterations out of the way (they're most literal) */
3280af22 1236 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1237 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1238 if (dorange) {
1ba5c669
JH
1239 I32 i; /* current expanded character */
1240 I32 min; /* first character in range */
1241 I32 max; /* last character in range */
02aa26ce
NT
1242
1243 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1244 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1245 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1246 d -= 2; /* eat the first char and the - */
1247
8ada0baa
JH
1248 min = (U8)*d; /* first char in range */
1249 max = (U8)d[1]; /* last char in range */
1250
c2e66d9e 1251 if (min > max) {
01ec43d0 1252 Perl_croak(aTHX_
1ba5c669
JH
1253 "Invalid [] range \"%c-%c\" in transliteration operator",
1254 (char)min, (char)max);
c2e66d9e
GS
1255 }
1256
1ba5c669 1257#ifndef ASCIIish
8ada0baa
JH
1258 if ((isLOWER(min) && isLOWER(max)) ||
1259 (isUPPER(min) && isUPPER(max))) {
1260 if (isLOWER(min)) {
1261 for (i = min; i <= max; i++)
1262 if (isLOWER(i))
1263 *d++ = i;
1264 } else {
1265 for (i = min; i <= max; i++)
1266 if (isUPPER(i))
1267 *d++ = i;
1268 }
1269 }
1270 else
1271#endif
1272 for (i = min; i <= max; i++)
1273 *d++ = i;
02aa26ce
NT
1274
1275 /* mark the range as done, and continue */
79072805 1276 dorange = FALSE;
01ec43d0 1277 didrange = TRUE;
79072805 1278 continue;
4e553d73 1279 }
02aa26ce
NT
1280
1281 /* range begins (ignore - as first or last char) */
79072805 1282 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1283 if (didrange) {
1fafa243 1284 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1285 }
a0ed51b3 1286 if (utf) {
a176fa2a 1287 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1288 s++;
1289 continue;
1290 }
79072805
LW
1291 dorange = TRUE;
1292 s++;
01ec43d0
GS
1293 }
1294 else {
1295 didrange = FALSE;
1296 }
79072805 1297 }
02aa26ce
NT
1298
1299 /* if we get here, we're not doing a transliteration */
1300
0f5d15d6
IZ
1301 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1302 except for the last char, which will be done separately. */
3280af22 1303 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1304 if (s[2] == '#') {
1305 while (s < send && *s != ')')
1306 *d++ = *s++;
155aba94
GS
1307 }
1308 else if (s[2] == '{' /* This should match regcomp.c */
1309 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1310 {
cc6b7395 1311 I32 count = 1;
0f5d15d6 1312 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1313 char c;
1314
d9f97599
GS
1315 while (count && (c = *regparse)) {
1316 if (c == '\\' && regparse[1])
1317 regparse++;
4e553d73 1318 else if (c == '{')
cc6b7395 1319 count++;
4e553d73 1320 else if (c == '}')
cc6b7395 1321 count--;
d9f97599 1322 regparse++;
cc6b7395 1323 }
5bdf89e7
IZ
1324 if (*regparse != ')') {
1325 regparse--; /* Leave one char for continuation. */
cc6b7395 1326 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1327 }
0f5d15d6 1328 while (s < regparse)
cc6b7395
IZ
1329 *d++ = *s++;
1330 }
748a9306 1331 }
02aa26ce
NT
1332
1333 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1334 else if (*s == '#' && PL_lex_inpat &&
1335 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1336 while (s+1 < send && *s != '\n')
1337 *d++ = *s++;
1338 }
02aa26ce 1339
5d1d4326
JH
1340 /* check for embedded arrays
1341 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1342 */
7e2040f0 1343 else if (*s == '@' && s[1]
5d1d4326 1344 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1345 break;
02aa26ce
NT
1346
1347 /* check for embedded scalars. only stop if we're sure it's a
1348 variable.
1349 */
79072805 1350 else if (*s == '$') {
3280af22 1351 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1352 break;
c277df42 1353 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1354 break; /* in regexp, $ might be tail anchor */
1355 }
02aa26ce
NT
1356
1357 /* backslashes */
79072805
LW
1358 if (*s == '\\' && s+1 < send) {
1359 s++;
02aa26ce
NT
1360
1361 /* some backslashes we leave behind */
c9f97d15 1362 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1363 *d++ = '\\';
1364 *d++ = *s++;
1365 continue;
1366 }
02aa26ce
NT
1367
1368 /* deprecate \1 in strings and substitution replacements */
3280af22 1369 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1370 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1371 {
599cee73 1372 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1373 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1374 *--s = '$';
1375 break;
1376 }
02aa26ce
NT
1377
1378 /* string-change backslash escapes */
3280af22 1379 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1380 --s;
1381 break;
1382 }
02aa26ce
NT
1383
1384 /* if we get here, it's either a quoted -, or a digit */
79072805 1385 switch (*s) {
02aa26ce
NT
1386
1387 /* quoted - in transliterations */
79072805 1388 case '-':
3280af22 1389 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1390 *d++ = *s++;
1391 continue;
1392 }
1393 /* FALL THROUGH */
1394 default:
11b8faa4 1395 {
7e84c16c 1396 if (ckWARN(WARN_MISC) && isALNUM(*s))
4e553d73 1397 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1398 "Unrecognized escape \\%c passed through",
1399 *s);
1400 /* default action is to copy the quoted character */
f9a63242 1401 goto default_action;
11b8faa4 1402 }
02aa26ce
NT
1403
1404 /* \132 indicates an octal constant */
79072805
LW
1405 case '0': case '1': case '2': case '3':
1406 case '4': case '5': case '6': case '7':
ba210ebe
JH
1407 {
1408 STRLEN len = 0; /* disallow underscores */
1409 uv = (UV)scan_oct(s, 3, &len);
1410 s += len;
1411 }
012bcf8d 1412 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1413
1414 /* \x24 indicates a hex constant */
79072805 1415 case 'x':
a0ed51b3
LW
1416 ++s;
1417 if (*s == '{') {
1418 char* e = strchr(s, '}');
adaeee49 1419 if (!e) {
a0ed51b3 1420 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1421 e = s;
1422 }
89491803 1423 else {
ba210ebe
JH
1424 STRLEN len = 1; /* allow underscores */
1425 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1426 }
1427 s = e + 1;
a0ed51b3
LW
1428 }
1429 else {
ba210ebe
JH
1430 {
1431 STRLEN len = 0; /* disallow underscores */
1432 uv = (UV)scan_hex(s, 2, &len);
1433 s += len;
1434 }
012bcf8d
GS
1435 }
1436
1437 NUM_ESCAPE_INSERT:
1438 /* Insert oct or hex escaped character.
301d3d20
JH
1439 * There will always enough room in sv since such
1440 * escapes will be longer than any UT-F8 sequence
1441 * they can end up as. */
ba7cea30
JH
1442
1443 /* This spot is wrong for EBCDIC. Characters like
1444 * the lowercase letters and digits are >127 in EBCDIC,
1445 * so here they would need to be mapped to the Unicode
1446 * repertoire. --jhi */
1447
7948272d 1448 if (uv > 127) {
9aa983d2 1449 if (!has_utf8 && uv > 255) {
301d3d20
JH
1450 /* Might need to recode whatever we have
1451 * accumulated so far if it contains any
1452 * hibit chars.
1453 *
1454 * (Can't we keep track of that and avoid
1455 * this rescan? --jhi)
012bcf8d
GS
1456 */
1457 int hicount = 0;
1458 char *c;
301d3d20 1459
012bcf8d 1460 for (c = SvPVX(sv); c < d; c++) {
fd400ab9 1461 if (UTF8_IS_CONTINUED(*c))
012bcf8d
GS
1462 hicount++;
1463 }
1464 if (hicount) {
1465 char *old_pvx = SvPVX(sv);
1466 char *src, *dst;
301d3d20
JH
1467
1468 d = SvGROW(sv,
1469 SvCUR(sv) + hicount + 1) +
1470 (d - old_pvx);
012bcf8d
GS
1471
1472 src = d - 1;
1473 d += hicount;
1474 dst = d - 1;
1475
1476 while (src < dst) {
fd400ab9 1477 if (UTF8_IS_CONTINUED(*src)) {
9b877dbb
IH
1478 *dst-- = UTF8_EIGHT_BIT_LO(*src);
1479 *dst-- = UTF8_EIGHT_BIT_HI(*src--);
012bcf8d
GS
1480 }
1481 else {
1482 *dst-- = *src--;
1483 }
1484 }
1485 }
1486 }
1487
9aa983d2 1488 if (has_utf8 || uv > 255) {
012bcf8d 1489 d = (char*)uv_to_utf8((U8*)d, uv);
4e553d73 1490 has_utf8 = TRUE;
f9a63242
JH
1491 if (PL_lex_inwhat == OP_TRANS &&
1492 PL_sublex_info.sub_op) {
1493 PL_sublex_info.sub_op->op_private |=
1494 (PL_lex_repl ? OPpTRANS_FROM_UTF
1495 : OPpTRANS_TO_UTF);
1496 utf = TRUE;
1497 }
012bcf8d 1498 }
a0ed51b3 1499 else {
012bcf8d 1500 *d++ = (char)uv;
a0ed51b3 1501 }
012bcf8d
GS
1502 }
1503 else {
1504 *d++ = (char)uv;
a0ed51b3 1505 }
79072805 1506 continue;
02aa26ce 1507
4a2d328f
IZ
1508 /* \N{latin small letter a} is a named character */
1509 case 'N':
423cee85
JH
1510 ++s;
1511 if (*s == '{') {
1512 char* e = strchr(s, '}');
155aba94 1513 SV *res;
423cee85
JH
1514 STRLEN len;
1515 char *str;
4e553d73 1516
423cee85 1517 if (!e) {
5777a3f7 1518 yyerror("Missing right brace on \\N{}");
423cee85
JH
1519 e = s - 1;
1520 goto cont_scan;
1521 }
1522 res = newSVpvn(s + 1, e - s - 1);
4e553d73 1523 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1524 res, Nullsv, "\\N{...}" );
f9a63242
JH
1525 if (has_utf8)
1526 sv_utf8_upgrade(res);
423cee85 1527 str = SvPV(res,len);
89491803 1528 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1529 char *ostart = SvPVX(sv);
1530 SvCUR_set(sv, d - ostart);
1531 SvPOK_on(sv);
e4f3eed8 1532 *d = '\0';
f08d6ad9 1533 sv_utf8_upgrade(sv);
d2f449dd
SB
1534 /* this just broke our allocation above... */
1535 SvGROW(sv, send - start);
f08d6ad9 1536 d = SvPVX(sv) + SvCUR(sv);
89491803 1537 has_utf8 = TRUE;
f08d6ad9 1538 }
423cee85
JH
1539 if (len > e - s + 4) {
1540 char *odest = SvPVX(sv);
1541
1542 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1543 d = SvPVX(sv) + (d - odest);
1544 }
1545 Copy(str, d, len, char);
1546 d += len;
1547 SvREFCNT_dec(res);
1548 cont_scan:
1549 s = e + 1;
1550 }
1551 else
5777a3f7 1552 yyerror("Missing braces on \\N{}");
423cee85
JH
1553 continue;
1554
02aa26ce 1555 /* \c is a control character */
79072805
LW
1556 case 'c':
1557 s++;
9d116dd7
JH
1558#ifdef EBCDIC
1559 *d = *s++;
1560 if (isLOWER(*d))
1561 *d = toUPPER(*d);
4e553d73 1562 *d = toCTRL(*d);
774a9426 1563 d++;
9d116dd7 1564#else
ba210ebe
JH
1565 {
1566 U8 c = *s++;
1567 *d++ = toCTRL(c);
1568 }
9d116dd7 1569#endif
79072805 1570 continue;
02aa26ce
NT
1571
1572 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1573 case 'b':
1574 *d++ = '\b';
1575 break;
1576 case 'n':
1577 *d++ = '\n';
1578 break;
1579 case 'r':
1580 *d++ = '\r';
1581 break;
1582 case 'f':
1583 *d++ = '\f';
1584 break;
1585 case 't':
1586 *d++ = '\t';
1587 break;
34a3fe2a
PP
1588#ifdef EBCDIC
1589 case 'e':
1590 *d++ = '\047'; /* CP 1047 */
1591 break;
1592 case 'a':
1593 *d++ = '\057'; /* CP 1047 */
1594 break;
1595#else
79072805
LW
1596 case 'e':
1597 *d++ = '\033';
1598 break;
1599 case 'a':
1600 *d++ = '\007';
1601 break;
34a3fe2a 1602#endif
02aa26ce
NT
1603 } /* end switch */
1604
79072805
LW
1605 s++;
1606 continue;
02aa26ce
NT
1607 } /* end if (backslash) */
1608
f9a63242 1609 default_action:
fd400ab9 1610 if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
a5a960be
RI
1611 STRLEN len = (STRLEN) -1;
1612 UV uv;
1613 if (this_utf8) {
a0dbb045 1614 uv = utf8_to_uv((U8*)s, send - s, &len, 0);
a5a960be
RI
1615 }
1616 if (len == (STRLEN)-1) {
1617 /* Illegal UTF8 (a high-bit byte), make it valid. */
1618 char *old_pvx = SvPVX(sv);
1619 /* need space for one extra char (NOTE: SvCUR() not set here) */
1620 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
1621 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1622 }
1623 else {
1624 while (len--)
1625 *d++ = *s++;
1626 }
1627 has_utf8 = TRUE;
f9a63242
JH
1628 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1629 PL_sublex_info.sub_op->op_private |=
1630 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1631 utf = TRUE;
1632 }
a5a960be
RI
1633 continue;
1634 }
1635
f9a63242 1636 *d++ = *s++;
02aa26ce
NT
1637 } /* while loop to process each character */
1638
1639 /* terminate the string and set up the sv */
79072805 1640 *d = '\0';
463ee0b2 1641 SvCUR_set(sv, d - SvPVX(sv));
79072805 1642 SvPOK_on(sv);
89491803 1643 if (has_utf8)
7e2040f0 1644 SvUTF8_on(sv);
79072805 1645
02aa26ce 1646 /* shrink the sv if we allocated more than we used */
79072805
LW
1647 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1648 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1649 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1650 }
02aa26ce 1651
9b599b2a 1652 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1653 if (s > PL_bufptr) {
1654 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1655 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1656 sv, Nullsv,
4e553d73 1657 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1658 ? "tr"
3280af22 1659 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1660 ? "s"
1661 : "qq")));
79072805 1662 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1663 } else
8990e307 1664 SvREFCNT_dec(sv);
79072805
LW
1665 return s;
1666}
1667
ffb4593c
NT
1668/* S_intuit_more
1669 * Returns TRUE if there's more to the expression (e.g., a subscript),
1670 * FALSE otherwise.
ffb4593c
NT
1671 *
1672 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1673 *
1674 * ->[ and ->{ return TRUE
1675 * { and [ outside a pattern are always subscripts, so return TRUE
1676 * if we're outside a pattern and it's not { or [, then return FALSE
1677 * if we're in a pattern and the first char is a {
1678 * {4,5} (any digits around the comma) returns FALSE
1679 * if we're in a pattern and the first char is a [
1680 * [] returns FALSE
1681 * [SOMETHING] has a funky algorithm to decide whether it's a
1682 * character class or not. It has to deal with things like
1683 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1684 * anything else returns TRUE
1685 */
1686
9cbb5ea2
GS
1687/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1688
76e3520e 1689STATIC int
cea2e8a9 1690S_intuit_more(pTHX_ register char *s)
79072805 1691{
3280af22 1692 if (PL_lex_brackets)
79072805
LW
1693 return TRUE;
1694 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1695 return TRUE;
1696 if (*s != '{' && *s != '[')
1697 return FALSE;
3280af22 1698 if (!PL_lex_inpat)
79072805
LW
1699 return TRUE;
1700
1701 /* In a pattern, so maybe we have {n,m}. */
1702 if (*s == '{') {
1703 s++;
1704 if (!isDIGIT(*s))
1705 return TRUE;
1706 while (isDIGIT(*s))
1707 s++;
1708 if (*s == ',')
1709 s++;
1710 while (isDIGIT(*s))
1711 s++;
1712 if (*s == '}')
1713 return FALSE;
1714 return TRUE;
1715
1716 }
1717
1718 /* On the other hand, maybe we have a character class */
1719
1720 s++;
1721 if (*s == ']' || *s == '^')
1722 return FALSE;
1723 else {
ffb4593c 1724 /* this is terrifying, and it works */
79072805
LW
1725 int weight = 2; /* let's weigh the evidence */
1726 char seen[256];
f27ffc4a 1727 unsigned char un_char = 255, last_un_char;
93a17b20 1728 char *send = strchr(s,']');
3280af22 1729 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1730
1731 if (!send) /* has to be an expression */
1732 return TRUE;
1733
1734 Zero(seen,256,char);
1735 if (*s == '$')
1736 weight -= 3;
1737 else if (isDIGIT(*s)) {
1738 if (s[1] != ']') {
1739 if (isDIGIT(s[1]) && s[2] == ']')
1740 weight -= 10;
1741 }
1742 else
1743 weight -= 100;
1744 }
1745 for (; s < send; s++) {
1746 last_un_char = un_char;
1747 un_char = (unsigned char)*s;
1748 switch (*s) {
1749 case '@':
1750 case '&':
1751 case '$':
1752 weight -= seen[un_char] * 10;
7e2040f0 1753 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1754 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1755 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1756 weight -= 100;
1757 else
1758 weight -= 10;
1759 }
1760 else if (*s == '$' && s[1] &&
93a17b20
LW
1761 strchr("[#!%*<>()-=",s[1])) {
1762 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1763 weight -= 10;
1764 else
1765 weight -= 1;
1766 }
1767 break;
1768 case '\\':
1769 un_char = 254;
1770 if (s[1]) {
93a17b20 1771 if (strchr("wds]",s[1]))
79072805
LW
1772 weight += 100;
1773 else if (seen['\''] || seen['"'])
1774 weight += 1;
93a17b20 1775 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1776 weight += 40;
1777 else if (isDIGIT(s[1])) {
1778 weight += 40;
1779 while (s[1] && isDIGIT(s[1]))
1780 s++;
1781 }
1782 }
1783 else
1784 weight += 100;
1785 break;
1786 case '-':
1787 if (s[1] == '\\')
1788 weight += 50;
93a17b20 1789 if (strchr("aA01! ",last_un_char))
79072805 1790 weight += 30;
93a17b20 1791 if (strchr("zZ79~",s[1]))
79072805 1792 weight += 30;
f27ffc4a
GS
1793 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1794 weight -= 5; /* cope with negative subscript */
79072805
LW
1795 break;
1796 default:
93a17b20 1797 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1798 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1799 char *d = tmpbuf;
1800 while (isALPHA(*s))
1801 *d++ = *s++;
1802 *d = '\0';
1803 if (keyword(tmpbuf, d - tmpbuf))
1804 weight -= 150;
1805 }
1806 if (un_char == last_un_char + 1)
1807 weight += 5;
1808 weight -= seen[un_char];
1809 break;
1810 }
1811 seen[un_char]++;
1812 }
1813 if (weight >= 0) /* probably a character class */
1814 return FALSE;
1815 }
1816
1817 return TRUE;
1818}
ffed7fef 1819
ffb4593c
NT
1820/*
1821 * S_intuit_method
1822 *
1823 * Does all the checking to disambiguate
1824 * foo bar
1825 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1826 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1827 *
1828 * First argument is the stuff after the first token, e.g. "bar".
1829 *
1830 * Not a method if bar is a filehandle.
1831 * Not a method if foo is a subroutine prototyped to take a filehandle.
1832 * Not a method if it's really "Foo $bar"
1833 * Method if it's "foo $bar"
1834 * Not a method if it's really "print foo $bar"
1835 * Method if it's really "foo package::" (interpreted as package->foo)
1836 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1837 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1838 * =>
1839 */
1840
76e3520e 1841STATIC int
cea2e8a9 1842S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1843{
1844 char *s = start + (*start == '$');
3280af22 1845 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1846 STRLEN len;
1847 GV* indirgv;
1848
1849 if (gv) {
b6c543e3 1850 CV *cv;
a0d0e21e
LW
1851 if (GvIO(gv))
1852 return 0;
b6c543e3
IZ
1853 if ((cv = GvCVu(gv))) {
1854 char *proto = SvPVX(cv);
1855 if (proto) {
1856 if (*proto == ';')
1857 proto++;
1858 if (*proto == '*')
1859 return 0;
1860 }
1861 } else
a0d0e21e
LW
1862 gv = 0;
1863 }
8903cb82 1864 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1865 /* start is the beginning of the possible filehandle/object,
1866 * and s is the end of it
1867 * tmpbuf is a copy of it
1868 */
1869
a0d0e21e 1870 if (*start == '$') {
3280af22 1871 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1872 return 0;
1873 s = skipspace(s);
3280af22
NIS
1874 PL_bufptr = start;
1875 PL_expect = XREF;
a0d0e21e
LW
1876 return *s == '(' ? FUNCMETH : METHOD;
1877 }
1878 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1879 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1880 len -= 2;
1881 tmpbuf[len] = '\0';
1882 goto bare_package;
1883 }
1884 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1885 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1886 return 0;
1887 /* filehandle or package name makes it a method */
89bfa8cd 1888 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1889 s = skipspace(s);
3280af22 1890 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1891 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1892 bare_package:
3280af22 1893 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1894 newSVpvn(tmpbuf,len));
3280af22
NIS
1895 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1896 PL_expect = XTERM;
a0d0e21e 1897 force_next(WORD);
3280af22 1898 PL_bufptr = s;
a0d0e21e
LW
1899 return *s == '(' ? FUNCMETH : METHOD;
1900 }
1901 }
1902 return 0;
1903}
1904
ffb4593c
NT
1905/*
1906 * S_incl_perldb
1907 * Return a string of Perl code to load the debugger. If PERL5DB
1908 * is set, it will return the contents of that, otherwise a
1909 * compile-time require of perl5db.pl.
1910 */
1911
76e3520e 1912STATIC char*
cea2e8a9 1913S_incl_perldb(pTHX)
a0d0e21e 1914{
3280af22 1915 if (PL_perldb) {
76e3520e 1916 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1917
1918 if (pdb)
1919 return pdb;
61bb5906 1920 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1921 return "BEGIN { require 'perl5db.pl' }";
1922 }
1923 return "";
1924}
1925
1926
16d20bd9 1927/* Encoded script support. filter_add() effectively inserts a
4e553d73 1928 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1929 * Note that the filter function only applies to the current source file
1930 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1931 *
1932 * The datasv parameter (which may be NULL) can be used to pass
1933 * private data to this instance of the filter. The filter function
1934 * can recover the SV using the FILTER_DATA macro and use it to
1935 * store private buffers and state information.
1936 *
1937 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1938 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1939 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1940 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1941 * private use must be set using malloc'd pointers.
1942 */
16d20bd9
AD
1943
1944SV *
864dbfa3 1945Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1946{
f4c556ac
GS
1947 if (!funcp)
1948 return Nullsv;
1949
3280af22
NIS
1950 if (!PL_rsfp_filters)
1951 PL_rsfp_filters = newAV();
16d20bd9 1952 if (!datasv)
8c52afec 1953 datasv = NEWSV(255,0);
16d20bd9 1954 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1955 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1956 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1957 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1958 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1959 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1960 av_unshift(PL_rsfp_filters, 1);
1961 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1962 return(datasv);
1963}
4e553d73 1964
16d20bd9
AD
1965
1966/* Delete most recently added instance of this filter function. */
a0d0e21e 1967void
864dbfa3 1968Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1969{
e0c19803 1970 SV *datasv;
f4c556ac 1971 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1972 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1973 return;
1974 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1975 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1976 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1977 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1978 IoANY(datasv) = (void *)NULL;
3280af22 1979 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1980
16d20bd9
AD
1981 return;
1982 }
1983 /* we need to search for the correct entry and clear it */
cea2e8a9 1984 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1985}
1986
1987
1988/* Invoke the n'th filter function for the current rsfp. */
1989I32
864dbfa3 1990Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
1991
1992
8ac85365 1993 /* 0 = read one text line */
a0d0e21e 1994{
16d20bd9
AD
1995 filter_t funcp;
1996 SV *datasv = NULL;
e50aee73 1997
3280af22 1998 if (!PL_rsfp_filters)
16d20bd9 1999 return -1;
3280af22 2000 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2001 /* Provide a default input filter to make life easy. */
2002 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2003 DEBUG_P(PerlIO_printf(Perl_debug_log,
2004 "filter_read %d: from rsfp\n", idx));
4e553d73 2005 if (maxlen) {
16d20bd9
AD
2006 /* Want a block */
2007 int len ;
2008 int old_len = SvCUR(buf_sv) ;
2009
2010 /* ensure buf_sv is large enough */
2011 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
2012 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2013 if (PerlIO_error(PL_rsfp))
37120919
AD
2014 return -1; /* error */
2015 else
2016 return 0 ; /* end of file */
2017 }
16d20bd9
AD
2018 SvCUR_set(buf_sv, old_len + len) ;
2019 } else {
2020 /* Want a line */
3280af22
NIS
2021 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2022 if (PerlIO_error(PL_rsfp))
37120919
AD
2023 return -1; /* error */
2024 else
2025 return 0 ; /* end of file */
2026 }
16d20bd9
AD
2027 }
2028 return SvCUR(buf_sv);
2029 }
2030 /* Skip this filter slot if filter has been deleted */
3280af22 2031 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2032 DEBUG_P(PerlIO_printf(Perl_debug_log,
2033 "filter_read %d: skipped (filter deleted)\n",
2034 idx));
16d20bd9
AD
2035 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2036 }
2037 /* Get function pointer hidden within datasv */
4755096e 2038 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2039 DEBUG_P(PerlIO_printf(Perl_debug_log,
2040 "filter_read %d: via function %p (%s)\n",
2041 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2042 /* Call function. The function is expected to */
2043 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2044 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 2045 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
2046}
2047
76e3520e 2048STATIC char *
cea2e8a9 2049S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2050{
c39cd008 2051#ifdef PERL_CR_FILTER
3280af22 2052 if (!PL_rsfp_filters) {
c39cd008 2053 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2054 }
2055#endif
3280af22 2056 if (PL_rsfp_filters) {
16d20bd9 2057
55497cff
PP
2058 if (!append)
2059 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2060 if (FILTER_READ(0, sv, 0) > 0)
2061 return ( SvPVX(sv) ) ;
2062 else
2063 return Nullch ;
2064 }
9d116dd7 2065 else
fd049845 2066 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2067}
2068
01ec43d0
GS
2069STATIC HV *
2070S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2071{
2072 GV *gv;
2073
01ec43d0 2074 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2075 return PL_curstash;
2076
2077 if (len > 2 &&
2078 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2079 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2080 {
2081 return GvHV(gv); /* Foo:: */
def3634b
GS
2082 }
2083
2084 /* use constant CLASS => 'MyClass' */
2085 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2086 SV *sv;
2087 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2088 pkgname = SvPV_nolen(sv);
2089 }
2090 }
2091
2092 return gv_stashpv(pkgname, FALSE);
2093}
a0d0e21e 2094
748a9306
LW
2095#ifdef DEBUGGING
2096 static char* exp_name[] =
09bef843
SB
2097 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2098 "ATTRTERM", "TERMBLOCK"
2099 };
748a9306 2100#endif
463ee0b2 2101
02aa26ce
NT
2102/*
2103 yylex
2104
2105 Works out what to call the token just pulled out of the input
2106 stream. The yacc parser takes care of taking the ops we return and
2107 stitching them into a tree.
2108
2109 Returns:
2110 PRIVATEREF
2111
2112 Structure:
2113 if read an identifier
2114 if we're in a my declaration
2115 croak if they tried to say my($foo::bar)
2116 build the ops for a my() declaration
2117 if it's an access to a my() variable
2118 are we in a sort block?
2119 croak if my($a); $a <=> $b
2120 build ops for access to a my() variable
2121 if in a dq string, and they've said @foo and we can't find @foo
2122 croak
2123 build ops for a bareword
2124 if we already built the token before, use it.
2125*/
2126
dba4d153 2127#ifdef USE_PURE_BISON
864dbfa3 2128int
dba4d153 2129Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2130{
20141f0e
RI
2131 int r;
2132
6f202aea 2133 yyactlevel++;
20141f0e
RI
2134 yylval_pointer[yyactlevel] = lvalp;
2135 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2136 if (yyactlevel >= YYMAXLEVEL)
2137 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2138
dba4d153 2139 r = Perl_yylex(aTHX);
20141f0e 2140
d8ae6756
RI
2141 if (yyactlevel > 0)
2142 yyactlevel--;
20141f0e
RI
2143
2144 return r;
2145}
dba4d153 2146#endif
20141f0e 2147
dba4d153
JH
2148#ifdef __SC__
2149#pragma segment Perl_yylex
2150#endif
dba4d153 2151int
dba4d153 2152Perl_yylex(pTHX)
20141f0e 2153{
79072805 2154 register char *s;
378cc40b 2155 register char *d;
79072805 2156 register I32 tmp;
463ee0b2 2157 STRLEN len;
161b471a
NIS
2158 GV *gv = Nullgv;
2159 GV **gvp = 0;
aa7440fb 2160 bool bof = FALSE;
a687059c 2161
02aa26ce 2162 /* check if there's an identifier for us to look at */
3280af22 2163 if (PL_pending_ident) {
02aa26ce 2164 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2165 char pit = PL_pending_ident;
2166 PL_pending_ident = 0;
bbce6d69 2167
607df283
SC
2168 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2169 "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
2170
02aa26ce
NT
2171 /* if we're in a my(), we can't allow dynamics here.
2172 $foo'bar has already been turned into $foo::bar, so
2173 just check for colons.
2174
2175 if it's a legal name, the OP is a PADANY.
2176 */
3280af22 2177 if (PL_in_my) {
77ca0c92 2178 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2179 if (strchr(PL_tokenbuf,':'))
2180 yyerror(Perl_form(aTHX_ "No package name allowed for "
2181 "variable %s in \"our\"",
2182 PL_tokenbuf));
77ca0c92
LW
2183 tmp = pad_allocmy(PL_tokenbuf);
2184 }
2185 else {
2186 if (strchr(PL_tokenbuf,':'))
2187 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2188
77ca0c92
LW
2189 yylval.opval = newOP(OP_PADANY, 0);
2190 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2191 return PRIVATEREF;
2192 }
bbce6d69
PP
2193 }
2194
4e553d73 2195 /*
02aa26ce
NT
2196 build the ops for accesses to a my() variable.
2197
2198 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2199 then used in a comparison. This catches most, but not
2200 all cases. For instance, it catches
2201 sort { my($a); $a <=> $b }
2202 but not
2203 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2204 (although why you'd do that is anyone's guess).
2205 */
2206
3280af22 2207 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2208#ifdef USE_THREADS
54b9620d 2209 /* Check for single character per-thread SVs */
3280af22
NIS
2210 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2211 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2212 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2213 {
2faa37cc 2214 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2215 yylval.opval->op_targ = tmp;
2216 return PRIVATEREF;
2217 }
2218#endif /* USE_THREADS */
3280af22 2219 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2220 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2221 /* might be an "our" variable" */
f472eb5c 2222 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2223 /* build ops for a bareword */
f472eb5c
GS
2224 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2225 sv_catpvn(sym, "::", 2);
2226 sv_catpv(sym, PL_tokenbuf+1);
2227 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2228 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2229 gv_fetchpv(SvPVX(sym),
77ca0c92 2230 (PL_in_eval
f472eb5c
GS
2231 ? (GV_ADDMULTI | GV_ADDINEVAL)
2232 : TRUE
77ca0c92
LW
2233 ),
2234 ((PL_tokenbuf[0] == '$') ? SVt_PV
2235 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2236 : SVt_PVHV));
2237 return WORD;
2238 }
2239
02aa26ce 2240 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2241 if (PL_last_lop_op == OP_SORT &&
2242 PL_tokenbuf[0] == '$' &&
2243 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2244 && !PL_tokenbuf[2])
bbce6d69 2245 {
3280af22
NIS
2246 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2247 d < PL_bufend && *d != '\n';
a863c7d1
MB
2248 d++)
2249 {
2250 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2251 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2252 PL_tokenbuf);
a863c7d1 2253 }
bbce6d69
PP
2254 }
2255 }
bbce6d69 2256
a863c7d1
MB
2257 yylval.opval = newOP(OP_PADANY, 0);
2258 yylval.opval->op_targ = tmp;
2259 return PRIVATEREF;
2260 }
bbce6d69
PP
2261 }
2262
02aa26ce
NT
2263 /*
2264 Whine if they've said @foo in a doublequoted string,
2265 and @foo isn't a variable we can find in the symbol
2266 table.
2267 */
3280af22
NIS
2268 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2269 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2270 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2271 && ckWARN(WARN_AMBIGUOUS))
2272 {
2273 /* Downgraded from fatal to warning 20000522 mjd */
2274 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2275 "Possible unintended interpolation of %s in string",
2276 PL_tokenbuf);
2277 }
bbce6d69
PP
2278 }
2279
02aa26ce 2280 /* build ops for a bareword */
3280af22 2281 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2282 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2283 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2284 ((PL_tokenbuf[0] == '$') ? SVt_PV
2285 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2286 : SVt_PVHV));
2287 return WORD;
2288 }
2289
02aa26ce
NT
2290 /* no identifier pending identification */
2291
3280af22 2292 switch (PL_lex_state) {
79072805
LW
2293#ifdef COMMENTARY
2294 case LEX_NORMAL: /* Some compilers will produce faster */
2295 case LEX_INTERPNORMAL: /* code if we comment these out. */
2296 break;
2297#endif
2298
09bef843 2299 /* when we've already built the next token, just pull it out of the queue */
79072805 2300 case LEX_KNOWNEXT:
3280af22
NIS
2301 PL_nexttoke--;
2302 yylval = PL_nextval[PL_nexttoke];
2303 if (!PL_nexttoke) {
2304 PL_lex_state = PL_lex_defer;
2305 PL_expect = PL_lex_expect;
2306 PL_lex_defer = LEX_NORMAL;
463ee0b2 2307 }
607df283 2308 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f
RB
2309 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2310 (IV)PL_nexttype[PL_nexttoke]); })
607df283 2311
3280af22 2312 return(PL_nexttype[PL_nexttoke]);
79072805 2313
02aa26ce 2314 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2315 when we get here, PL_bufptr is at the \
02aa26ce 2316 */
79072805
LW
2317 case LEX_INTERPCASEMOD:
2318#ifdef DEBUGGING
3280af22 2319 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2320 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2321#endif
02aa26ce 2322 /* handle \E or end of string */
3280af22 2323 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2324 char oldmod;
02aa26ce
NT
2325
2326 /* if at a \E */
3280af22
NIS
2327 if (PL_lex_casemods) {
2328 oldmod = PL_lex_casestack[--PL_lex_casemods];
2329 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2330
3280af22
NIS
2331 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2332 PL_bufptr += 2;
2333 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2334 }
79072805
LW
2335 return ')';
2336 }
3280af22
NIS
2337 if (PL_bufptr != PL_bufend)
2338 PL_bufptr += 2;
2339 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2340 return yylex();
79072805
LW
2341 }
2342 else {
607df283
SC
2343 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2344 "### Saw case modifier at '%s'\n", PL_bufptr); })
3280af22 2345 s = PL_bufptr + 1;
79072805
LW
2346 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2347 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2348 if (strchr("LU", *s) &&
3280af22 2349 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2350 {
3280af22 2351 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2352 return ')';
2353 }
3280af22
NIS
2354 if (PL_lex_casemods > 10) {
2355 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2356 if (newlb != PL_lex_casestack) {
a0d0e21e 2357 SAVEFREEPV(newlb);
3280af22 2358 PL_lex_casestack = newlb;
a0d0e21e
LW
2359 }
2360 }
3280af22
NIS
2361 PL_lex_casestack[PL_lex_casemods++] = *s;
2362 PL_lex_casestack[PL_lex_casemods] = '\0';
2363 PL_lex_state = LEX_INTERPCONCAT;
2364 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2365 force_next('(');
2366 if (*s == 'l')
3280af22 2367 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2368 else if (*s == 'u')
3280af22 2369 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2370 else if (*s == 'L')
3280af22 2371 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2372 else if (*s == 'U')
3280af22 2373 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2374 else if (*s == 'Q')
3280af22 2375 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2376 else
cea2e8a9 2377 Perl_croak(aTHX_ "panic: yylex");
3280af22 2378 PL_bufptr = s + 1;
79072805 2379 force_next(FUNC);
3280af22
NIS
2380 if (PL_lex_starts) {
2381 s = PL_bufptr;
2382 PL_lex_starts = 0;
79072805
LW
2383 Aop(OP_CONCAT);
2384 }
2385 else
cea2e8a9 2386 return yylex();
79072805
LW
2387 }
2388
55497cff
PP
2389 case LEX_INTERPPUSH:
2390 return sublex_push();
2391
79072805 2392 case LEX_INTERPSTART:
3280af22 2393 if (PL_bufptr == PL_bufend)
79072805 2394 return sublex_done();
607df283
SC
2395 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2396 "### Interpolated variable at '%s'\n", PL_bufptr); })
3280af22
NIS
2397 PL_expect = XTERM;
2398 PL_lex_dojoin = (*PL_bufptr == '@');
2399 PL_lex_state = LEX_INTERPNORMAL;
2400 if (PL_lex_dojoin) {
2401 PL_nextval[PL_nexttoke].ival = 0;
79072805 2402 force_next(',');
554b3eca 2403#ifdef USE_THREADS
533c011a
NIS
2404 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2405 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2406 force_next(PRIVATEREF);
2407#else
a0d0e21e 2408 force_ident("\"", '$');
554b3eca 2409#endif /* USE_THREADS */
3280af22 2410 PL_nextval[PL_nexttoke].ival = 0;
79072805 2411 force_next('$');
3280af22 2412 PL_nextval[PL_nexttoke].ival = 0;
79072805 2413 force_next('(');
3280af22 2414 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2415 force_next(FUNC);
2416 }
3280af22
NIS
2417 if (PL_lex_starts++) {
2418 s = PL_bufptr;
79072805
LW
2419 Aop(OP_CONCAT);
2420 }
cea2e8a9 2421 return yylex();
79072805
LW
2422
2423 case LEX_INTERPENDMAYBE:
3280af22
NIS
2424 if (intuit_more(PL_bufptr)) {
2425 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2426 break;
2427 }
2428 /* FALL THROUGH */
2429
2430 case LEX_INTERPEND:
3280af22
NIS
2431 if (PL_lex_dojoin) {
2432 PL_lex_dojoin = FALSE;
2433 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2434 return ')';
2435 }
43a16006 2436 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2437 && SvEVALED(PL_lex_repl))
43a16006 2438 {
e9fa98b2 2439 if (PL_bufptr != PL_bufend)
cea2e8a9 2440 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2441 PL_lex_repl = Nullsv;
2442 }
79072805
LW
2443 /* FALLTHROUGH */
2444 case LEX_INTERPCONCAT:
2445#ifdef DEBUGGING
3280af22 2446 if (PL_lex_brackets)
cea2e8a9 2447 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2448#endif
3280af22 2449 if (PL_bufptr == PL_bufend)
79072805
LW
2450 return sublex_done();
2451
3280af22
NIS
2452 if (SvIVX(PL_linestr) == '\'') {
2453 SV *sv = newSVsv(PL_linestr);
2454 if (!PL_lex_inpat)
76e3520e 2455 sv = tokeq(sv);
3280af22 2456 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2457 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2458 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2459 s = PL_bufend;
79072805
LW
2460 }
2461 else {
3280af22 2462 s = scan_const(PL_bufptr);
79072805 2463 if (*s == '\\')
3280af22 2464 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2465 else
3280af22 2466 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2467 }
2468
3280af22
NIS
2469 if (s != PL_bufptr) {
2470 PL_nextval[PL_nexttoke] = yylval;
2471 PL_expect = XTERM;
79072805 2472 force_next(THING);
3280af22 2473 if (PL_lex_starts++)
79072805
LW
2474 Aop(OP_CONCAT);
2475 else {
3280af22 2476 PL_bufptr = s;
cea2e8a9 2477 return yylex();
79072805
LW
2478 }
2479 }
2480
cea2e8a9 2481 return yylex();
a0d0e21e 2482 case LEX_FORMLINE:
3280af22
NIS
2483 PL_lex_state = LEX_NORMAL;
2484 s = scan_formline(PL_bufptr);
2485 if (!PL_lex_formbrack)
a0d0e21e
LW
2486 goto rightbracket;
2487 OPERATOR(';');
79072805
LW
2488 }
2489
3280af22
NIS
2490 s = PL_bufptr;
2491 PL_oldoldbufptr = PL_oldbufptr;
2492 PL_oldbufptr = s;
607df283 2493 DEBUG_T( {
bf49b057
GS
2494 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2495 exp_name[PL_expect], s);
79072805 2496 } )
463ee0b2
LW
2497
2498 retry:
378cc40b
LW
2499 switch (*s) {
2500 default:
7e2040f0 2501 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2502 goto keylookup;
cea2e8a9 2503 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2504 case 4:
2505 case 26:
2506 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2507 case 0:
3280af22
NIS
2508 if (!PL_rsfp) {
2509 PL_last_uni = 0;
2510 PL_last_lop = 0;
2511 if (PL_lex_brackets)
d98d5fff 2512 yyerror("Missing right curly or square bracket");
4e553d73 2513 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2514 "### Tokener got EOF\n");
2515 } )
79072805 2516 TOKEN(0);
463ee0b2 2517 }
3280af22 2518 if (s++ < PL_bufend)
a687059c 2519 goto retry; /* ignore stray nulls */
3280af22
NIS
2520 PL_last_uni = 0;
2521 PL_last_lop = 0;
2522 if (!PL_in_eval && !PL_preambled) {
2523 PL_preambled = TRUE;
2524 sv_setpv(PL_linestr,incl_perldb());
2525 if (SvCUR(PL_linestr))
2526 sv_catpv(PL_linestr,";");
2527 if (PL_preambleav){
2528 while(AvFILLp(PL_preambleav) >= 0) {
2529 SV *tmpsv = av_shift(PL_preambleav);
2530 sv_catsv(PL_linestr, tmpsv);
2531 sv_catpv(PL_linestr, ";");
91b7def8
PP
2532 sv_free(tmpsv);
2533 }
3280af22
NIS
2534 sv_free((SV*)PL_preambleav);
2535 PL_preambleav = NULL;
91b7def8 2536 }
3280af22
NIS
2537 if (PL_minus_n || PL_minus_p) {
2538 sv_catpv(PL_linestr, "LINE: while (<>) {");
2539 if (PL_minus_l)
2540 sv_catpv(PL_linestr,"chomp;");
2541 if (PL_minus_a) {
8fd239a7
CS
2542 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2543 if (gv)
2544 GvIMPORTED_AV_on(gv);
3280af22
NIS
2545 if (PL_minus_F) {
2546 if (strchr("/'\"", *PL_splitstr)
2547 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2548 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2549 else {
2550 char delim;
2551 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2552 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2553 delim = *s;
cea2e8a9 2554 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2555 "q" + (delim == '\''), delim);
3280af22 2556 for (s = PL_splitstr; *s; s++) {
54310121 2557 if (*s == '\\')
3280af22
NIS
2558 sv_catpvn(PL_linestr, "\\", 1);
2559 sv_catpvn(PL_linestr, s, 1);
54310121 2560 }
cea2e8a9 2561 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2562 }
2304df62
AD
2563 }
2564 else
3280af22 2565 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2566 }
79072805 2567 }
3280af22
NIS
2568 sv_catpv(PL_linestr, "\n");
2569 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2570 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2571 PL_last_lop = PL_last_uni = Nullch;
3280af22 2572 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2573 SV *sv = NEWSV(85,0);
2574
2575 sv_upgrade(sv, SVt_PVMG);
3280af22 2576 sv_setsv(sv,PL_linestr);
57843af0 2577 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2578 }
79072805 2579 goto retry;
a687059c 2580 }
e929a76b 2581 do {
aa7440fb 2582 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2583 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2584 fake_eof:
2585 if (PL_rsfp) {
2586 if (PL_preprocess && !PL_in_eval)
2587 (void)PerlProc_pclose(PL_rsfp);
2588 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2589 PerlIO_clearerr(PL_rsfp);
2590 else
2591 (void)PerlIO_close(PL_rsfp);
2592 PL_rsfp = Nullfp;
2593 PL_doextract = FALSE;
2594 }
2595 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2596 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2597 sv_catpv(PL_linestr,";}");
2598 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2599 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2600 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2601 PL_minus_n = PL_minus_p = 0;
2602 goto retry;
2603 }
2604 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2605 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2606 sv_setpv(PL_linestr,"");
2607 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2608 }
2609 /* if it looks like the start of a BOM, check if it in fact is */
2610 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2611#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2612# ifdef __GNU_LIBRARY__
2613# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2614# define FTELL_FOR_PIPE_IS_BROKEN
2615# endif
e3f494f1
JH
2616# else
2617# ifdef __GLIBC__
2618# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2619# define FTELL_FOR_PIPE_IS_BROKEN
2620# endif
2621# endif
226017aa
DD
2622# endif
2623#endif
2624#ifdef FTELL_FOR_PIPE_IS_BROKEN
2625 /* This loses the possibility to detect the bof
2626 * situation on perl -P when the libc5 is being used.
2627 * Workaround? Maybe attach some extra state to PL_rsfp?
2628 */
2629 if (!PL_preprocess)
7e28d3af 2630 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2631#else
7e28d3af 2632 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2633#endif
7e28d3af 2634 if (bof) {
3280af22 2635 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2636 s = swallow_bom((U8*)s);
e929a76b 2637 }
378cc40b 2638 }
3280af22 2639 if (PL_doextract) {
a0d0e21e 2640 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2641 PL_doextract = FALSE;
a0d0e21e
LW
2642
2643 /* Incest with pod. */
2644 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2645 sv_setpv(PL_linestr, "");
2646 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2647 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2648 PL_last_lop = PL_last_uni = Nullch;
3280af22 2649 PL_doextract = FALSE;
a0d0e21e 2650 }
4e553d73 2651 }
463ee0b2 2652 incline(s);
3280af22
NIS
2653 } while (PL_doextract);
2654 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2655 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2656 SV *sv = NEWSV(85,0);
a687059c 2657
93a17b20 2658 sv_upgrade(sv, SVt_PVMG);
3280af22 2659 sv_setsv(sv,PL_linestr);
57843af0 2660 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2661 }
3280af22 2662 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2663 PL_last_lop = PL_last_uni = Nullch;
57843af0 2664 if (CopLINE(PL_curcop) == 1) {
3280af22 2665 while (s < PL_bufend && isSPACE(*s))
79072805 2666 s++;
a0d0e21e 2667 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2668 s++;
44a8e56a 2669 d = Nullch;
3280af22 2670 if (!PL_in_eval) {
44a8e56a
PP
2671 if (*s == '#' && *(s+1) == '!')
2672 d = s + 2;
2673#ifdef ALTERNATE_SHEBANG
2674 else {
2675 static char as[] = ALTERNATE_SHEBANG;
2676 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2677 d = s + (sizeof(as) - 1);
2678 }
2679#endif /* ALTERNATE_SHEBANG */
2680 }
2681 if (d) {
b8378b72 2682 char *ipath;
774d564b 2683 char *ipathend;
b8378b72 2684
774d564b 2685 while (isSPACE(*d))
b8378b72
CS
2686 d++;
2687 ipath = d;
774d564b
PP
2688 while (*d && !isSPACE(*d))
2689 d++;
2690 ipathend = d;
2691
2692#ifdef ARG_ZERO_IS_SCRIPT
2693 if (ipathend > ipath) {
2694 /*
2695 * HP-UX (at least) sets argv[0] to the script name,
2696 * which makes $^X incorrect. And Digital UNIX and Linux,
2697 * at least, set argv[0] to the basename of the Perl
2698 * interpreter. So, having found "#!", we'll set it right.
2699 */
2700 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2701 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2702 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2703 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2704 SvSETMAGIC(x);
2705 }
774d564b 2706 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2707 }
774d564b 2708#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2709
2710 /*
2711 * Look for options.
2712 */
748a9306 2713 d = instr(s,"perl -");
84e30d1a 2714 if (!d) {
748a9306 2715 d = instr(s,"perl");
84e30d1a
GS
2716#if defined(DOSISH)
2717 /* avoid getting into infinite loops when shebang
2718 * line contains "Perl" rather than "perl" */
2719 if (!d) {
2720 for (d = ipathend-4; d >= ipath; --d) {
2721 if ((*d == 'p' || *d == 'P')
2722 && !ibcmp(d, "perl", 4))
2723 {
2724 break;
2725 }
2726 }
2727 if (d < ipath)
2728 d = Nullch;
2729 }
2730#endif
2731 }
44a8e56a
PP
2732#ifdef ALTERNATE_SHEBANG
2733 /*
2734 * If the ALTERNATE_SHEBANG on this system starts with a
2735 * character that can be part of a Perl expression, then if
2736 * we see it but not "perl", we're probably looking at the
2737 * start of Perl code, not a request to hand off to some
2738 * other interpreter. Similarly, if "perl" is there, but
2739 * not in the first 'word' of the line, we assume the line
2740 * contains the start of the Perl program.
44a8e56a
PP
2741 */
2742 if (d && *s != '#') {
774d564b 2743 char *c = ipath;
44a8e56a
PP
2744 while (*c && !strchr("; \t\r\n\f\v#", *c))
2745 c++;
2746 if (c < d)
2747 d = Nullch; /* "perl" not in first word; ignore */
2748 else
2749 *s = '#'; /* Don't try to parse shebang line */
2750 }
774d564b 2751#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2752#ifndef MACOS_TRADITIONAL
748a9306 2753 if (!d &&
44a8e56a 2754 *s == '#' &&
774d564b 2755 ipathend > ipath &&
3280af22 2756 !PL_minus_c &&
748a9306 2757 !instr(s,"indir") &&
3280af22 2758 instr(PL_origargv[0],"perl"))
748a9306 2759 {
9f68db38 2760 char **newargv;
9f68db38 2761
774d564b
PP
2762 *ipathend = '\0';
2763 s = ipathend + 1;
3280af22 2764 while (s < PL_bufend && isSPACE(*s))
9f68db38 2765 s++;
3280af22
NIS
2766 if (s < PL_bufend) {
2767 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2768 newargv[1] = s;
3280af22 2769 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2770 s++;
2771 *s = '\0';
3280af22 2772 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2773 }
2774 else
3280af22 2775 newargv = PL_origargv;
774d564b 2776 newargv[0] = ipath;
b4748376 2777 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2778 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2779 }
bf4acbe4 2780#endif
748a9306 2781 if (d) {
3280af22
NIS
2782 U32 oldpdb = PL_perldb;
2783 bool oldn = PL_minus_n;
2784 bool oldp = PL_minus_p;
748a9306
LW
2785
2786 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2787 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2788
2789 if (*d++ == '-') {
8cc95fdb
PP
2790 do {
2791 if (*d == 'M' || *d == 'm') {
2792 char *m = d;
2793 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2794 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2795 (int)(d - m), m);
2796 }
2797 d = moreswitches(d);
2798 } while (d);
155aba94
GS
2799 if ((PERLDB_LINE && !oldpdb) ||
2800 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2801 /* if we have already added "LINE: while (<>) {",
2802 we must not do it again */
748a9306 2803 {
3280af22
NIS
2804 sv_setpv(PL_linestr, "");
2805 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2806 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2807 PL_last_lop = PL_last_uni = Nullch;
3280af22 2808 PL_preambled = FALSE;
84902520 2809 if (PERLDB_LINE)
3280af22 2810 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2811 goto retry;
2812 }
a0d0e21e 2813 }
79072805 2814 }
9f68db38 2815 }
79072805 2816 }
3280af22
NIS
2817 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2818 PL_bufptr = s;
2819 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2820 return yylex();
ae986130 2821 }
378cc40b 2822 goto retry;
4fdae800 2823 case '\r':
6a27c188 2824#ifdef PERL_STRICT_CR
cea2e8a9 2825 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2826 Perl_croak(aTHX_
cc507455 2827 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2828#endif
4fdae800 2829 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2830#ifdef MACOS_TRADITIONAL
2831 case '\312':
2832#endif
378cc40b
LW
2833 s++;
2834 goto retry;
378cc40b 2835 case '#':
e929a76b 2836 case '\n':
3280af22 2837 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2838 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2839 /* handle eval qq[#line 1 "foo"\n ...] */
2840 CopLINE_dec(PL_curcop);
2841 incline(s);
2842 }
3280af22 2843 d = PL_bufend;
a687059c 2844 while (s < d && *s != '\n')
378cc40b 2845 s++;
0f85fab0 2846 if (s < d)
378cc40b 2847 s++;
463ee0b2 2848 incline(s);
3280af22
NIS
2849 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2850 PL_bufptr = s;
2851 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2852 return yylex();
a687059c 2853 }
378cc40b 2854 }
a687059c 2855 else {
378cc40b 2856 *s = '\0';
3280af22 2857 PL_bufend = s;
a687059c 2858 }
378cc40b
LW
2859 goto retry;
2860 case '-':
79072805 2861 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2862 I32 ftst = 0;
2863
378cc40b 2864 s++;
3280af22 2865 PL_bufptr = s;
748a9306
LW
2866 tmp = *s++;
2867
bf4acbe4 2868 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2869 s++;
2870
2871 if (strnEQ(s,"=>",2)) {
3280af22 2872 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2873 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2874 "### Saw unary minus before =>, forcing word '%s'\n", s);
2875 } )
748a9306
LW
2876 OPERATOR('-'); /* unary minus */
2877 }
3280af22 2878 PL_last_uni = PL_oldbufptr;
748a9306 2879 switch (tmp) {
e5edeb50
JH
2880 case 'r': ftst = OP_FTEREAD; break;
2881 case 'w': ftst = OP_FTEWRITE; break;
2882 case 'x': ftst = OP_FTEEXEC; break;
2883 case 'o': ftst = OP_FTEOWNED; break;
2884 case 'R': ftst = OP_FTRREAD; break;
2885 case 'W': ftst = OP_FTRWRITE; break;
2886 case 'X': ftst = OP_FTREXEC; break;
2887 case 'O': ftst = OP_FTROWNED; break;
2888 case 'e': ftst = OP_FTIS; break;
2889 case 'z': ftst = OP_FTZERO; break;
2890 case 's': ftst = OP_FTSIZE; break;
2891 case 'f': ftst = OP_FTFILE; break;
2892 case 'd': ftst = OP_FTDIR; break;
2893 case 'l': ftst = OP_FTLINK; break;
2894 case 'p': ftst = OP_FTPIPE; break;
2895 case 'S': ftst = OP_FTSOCK; break;
2896 case 'u': ftst = OP_FTSUID; break;
2897 case 'g': ftst = OP_FTSGID; break;
2898 case 'k': ftst = OP_FTSVTX; break;
2899 case 'b': ftst = OP_FTBLK; break;
2900 case 'c': ftst = OP_FTCHR; break;
2901 case 't': ftst = OP_FTTTY; break;
2902 case 'T': ftst = OP_FTTEXT; break;
2903 case 'B': ftst = OP_FTBINARY; break;
2904 case 'M': case 'A': case 'C':
2905 gv_fetchpv("\024",TRUE, SVt_PV);
2906 switch (tmp) {
2907 case 'M': ftst = OP_FTMTIME; break;
2908 case 'A': ftst = OP_FTATIME; break;
2909 case 'C': ftst = OP_FTCTIME; break;
2910 default: break;
2911 }
2912 break;
378cc40b 2913 default:
378cc40b
LW
2914 break;
2915 }
e5edeb50
JH
2916 if (ftst) {
2917 PL_last_lop_op = ftst;
4e553d73 2918 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2919 "### Saw file test %c\n", (int)ftst);
e5edeb50 2920 } )
e5edeb50
JH
2921 FTST(ftst);
2922 }
2923 else {
2924 /* Assume it was a minus followed by a one-letter named
2925 * subroutine call (or a -bareword), then. */
95c31fe3 2926 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2927 "### %c looked like a file test but was not\n",
2928 (int)ftst);
95c31fe3 2929 } )
e5edeb50
JH
2930 s -= 2;
2931 }
378cc40b 2932 }
a687059c
LW
2933 tmp = *s++;
2934 if (*s == tmp) {
2935 s++;
3280af22 2936 if (PL_expect == XOPERATOR)
79072805
LW
2937 TERM(POSTDEC);
2938 else
2939 OPERATOR(PREDEC);
2940 }
2941 else if (*s == '>') {
2942 s++;
2943 s = skipspace(s);
7e2040f0 2944 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2945 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2946 TOKEN(ARROW);
79072805 2947 }
748a9306
LW
2948 else if (*s == '$')
2949 OPERATOR(ARROW);
463ee0b2 2950 else
748a9306 2951 TERM(ARROW);
a687059c 2952 }
3280af22 2953 if (PL_expect == XOPERATOR)
79072805
LW
2954 Aop(OP_SUBTRACT);
2955 else {
3280af22 2956 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2957 check_uni();
79072805 2958 OPERATOR('-'); /* unary minus */
2f3197b3 2959 }
79072805 2960
378cc40b 2961 case '+':
a687059c
LW
2962 tmp = *s++;
2963 if (*s == tmp) {
378cc40b 2964 s++;
3280af22 2965 if (PL_expect == XOPERATOR)
79072805
LW
2966 TERM(POSTINC);
2967 else
2968 OPERATOR(PREINC);
378cc40b 2969 }
3280af22 2970 if (PL_expect == XOPERATOR)
79072805
LW
2971 Aop(OP_ADD);
2972 else {
3280af22 2973 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2974 check_uni();
a687059c 2975 OPERATOR('+');
2f3197b3 2976 }
a687059c 2977
378cc40b 2978 case '*':
3280af22
NIS
2979 if (PL_expect != XOPERATOR) {
2980 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2981 PL_expect = XOPERATOR;
2982 force_ident(PL_tokenbuf, '*');
2983 if (!*PL_tokenbuf)
a0d0e21e 2984 PREREF('*');
79072805 2985 TERM('*');
a687059c 2986 }
79072805
LW
2987 s++;
2988 if (*s == '*') {
a687059c 2989 s++;
79072805 2990 PWop(OP_POW);
a687059c 2991 }
79072805
LW
2992 Mop(OP_MULTIPLY);
2993
378cc40b 2994 case '%':
3280af22 2995 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2996 ++s;
2997 Mop(OP_MODULO);
a687059c 2998 }
3280af22
NIS
2999 PL_tokenbuf[0] = '%';
3000 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3001 if (!PL_tokenbuf[1]) {
3002 if (s == PL_bufend)
bbce6d69
PP
3003 yyerror("Final % should be \\% or %name");
3004 PREREF('%');
a687059c 3005 }
3280af22 3006 PL_pending_ident = '%';
bbce6d69 3007 TERM('%');
a687059c 3008
378cc40b 3009 case '^':
79072805 3010 s++;
a0d0e21e 3011 BOop(OP_BIT_XOR);
79072805 3012 case '[':
3280af22 3013 PL_lex_brackets++;
79072805 3014 /* FALL THROUGH */
378cc40b 3015 case '~':
378cc40b 3016 case ',':
378cc40b
LW
3017 tmp = *s++;
3018 OPERATOR(tmp);
a0d0e21e
LW
3019 case ':':
3020 if (s[1] == ':') {
3021 len = 0;
3022 goto just_a_word;
3023 }
3024 s++;
09bef843
SB
3025 switch (PL_expect) {
3026 OP *attrs;
3027 case XOPERATOR:
3028 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3029 break;
3030 PL_bufptr = s; /* update in case we back off */
3031 goto grabattrs;
3032 case XATTRBLOCK:
3033 PL_expect = XBLOCK;
3034 goto grabattrs;
3035 case XATTRTERM:
3036 PL_expect = XTERMBLOCK;
3037 grabattrs:
3038 s = skipspace(s);
3039 attrs = Nullop;
7e2040f0 3040 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3041 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3042 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3043 if (tmp < 0) tmp = -tmp;
3044 switch (tmp) {
3045 case KEY_or:
3046 case KEY_and:
3047 case KEY_for:
3048 case KEY_unless:
3049 case KEY_if:
3050 case KEY_while:
3051 case KEY_until:
3052 goto got_attrs;
3053 default:
3054 break;
3055 }
3056 }
09bef843
SB
3057 if (*d == '(') {
3058 d = scan_str(d,TRUE,TRUE);
3059 if (!d) {
09bef843
SB
3060 /* MUST advance bufptr here to avoid bogus
3061 "at end of line" context messages from yyerror().
3062 */
3063 PL_bufptr = s + len;
3064 yyerror("Unterminated attribute parameter in attribute list");
3065 if (attrs)
3066 op_free(attrs);
3067 return 0; /* EOF indicator */
3068 }
3069 }
3070 if (PL_lex_stuff) {
3071 SV *sv = newSVpvn(s, len);
3072 sv_catsv(sv, PL_lex_stuff);
3073 attrs = append_elem(OP_LIST, attrs,
3074 newSVOP(OP_CONST, 0, sv));
3075 SvREFCNT_dec(PL_lex_stuff);
3076 PL_lex_stuff = Nullsv;
3077 }
3078 else {
78f9721b
SM
3079 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3080 CvLVALUE_on(PL_compcv);
3081 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3082 CvLOCKED_on(PL_compcv);
3083 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3084 CvMETHOD_on(PL_compcv);
87ecf892
DM
3085#ifdef USE_ITHREADS
3086 else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "shared", len))
3087 GvSHARED_on(cGVOPx_gv(yylval.opval));
3088#endif
78f9721b
SM
3089 /* After we've set the flags, it could be argued that
3090 we don't need to do the attributes.pm-based setting
3091 process, and shouldn't bother appending recognized
3092 flags. To experiment with that, uncomment the
3093 following "else": */
0256094b 3094 else
78f9721b
SM
3095 attrs = append_elem(OP_LIST, attrs,
3096 newSVOP(OP_CONST, 0,
3097 newSVpvn(s, len)));
09bef843
SB
3098 }
3099 s = skipspace(d);
0120eecf 3100 if (*s == ':' && s[1] != ':')
09bef843 3101 s = skipspace(s+1);
0120eecf
GS
3102 else if (s == d)
3103 break; /* require real whitespace or :'s */
09bef843 3104 }
f9829d6b
GS
3105 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3106 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3107 char q = ((*s == '\'') ? '"' : '\'');
3108 /* If here for an expression, and parsed no attrs, back off. */
3109 if (tmp == '=' && !attrs) {
3110 s = PL_bufptr;
3111 break;
3112 }
3113 /* MUST advance bufptr here to avoid bogus "at end of line"
3114 context messages from yyerror().
3115 */
3116 PL_bufptr = s;
3117 if (!*s)
3118 yyerror("Unterminated attribute list");
3119 else
3120 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3121 q, *s, q));
3122 if (attrs)
3123 op_free(attrs);
3124 OPERATOR(':');
3125 }
f9829d6b 3126 got_attrs:
09bef843
SB
3127 if (attrs) {
3128 PL_nextval[PL_nexttoke].opval = attrs;
3129 force_next(THING);
3130 }
3131 TOKEN(COLONATTR);
3132 }
a0d0e21e 3133 OPERATOR(':');
8990e307
LW
3134 case '(':
3135 s++;
3280af22
NIS
3136 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3137 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3138 else
3280af22 3139 PL_expect = XTERM;
a0d0e21e 3140 TOKEN('(');
378cc40b 3141 case ';':
f4dd75d9 3142 CLINE;
378cc40b
LW
3143 tmp = *s++;
3144 OPERATOR(tmp);
3145 case ')':
378cc40b 3146 tmp = *s++;
16d20bd9
AD
3147 s = skipspace(s);
3148 if (*s == '{')
3149 PREBLOCK(tmp);
378cc40b 3150 TERM(tmp);
79072805
LW
3151 case ']':
3152 s++;
3280af22 3153 if (PL_lex_brackets <= 0)
d98d5fff 3154 yyerror("Unmatched right square bracket");
463ee0b2 3155 else
3280af22
NIS
3156 --PL_lex_brackets;
3157 if (PL_lex_state == LEX_INTERPNORMAL) {
3158 if (PL_lex_brackets == 0) {
a0d0e21e 3159 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3160 PL_lex_state = LEX_INTERPEND;
79072805
LW
3161 }
3162 }
4633a7c4 3163 TERM(']');
79072805
LW
3164 case '{':
3165 leftbracket:
79072805 3166 s++;
3280af22
NIS
3167 if (PL_lex_brackets > 100) {
3168 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3169 if (newlb != PL_lex_brackstack) {
8990e307 3170 SAVEFREEPV(newlb);
3280af22 3171 PL_lex_brackstack = newlb;
8990e307
LW
3172 }
3173 }
3280af22 3174 switch (PL_expect) {
a0d0e21e 3175 case XTERM:
3280af22 3176 if (PL_lex_formbrack) {
a0d0e21e
LW
3177 s--;
3178 PRETERMBLOCK(DO);
3179 }
3280af22
NIS
3180 if (PL_oldoldbufptr == PL_last_lop)
3181 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3182 else
3280af22 3183 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3184 OPERATOR(HASHBRACK);
a0d0e21e 3185 case XOPERATOR:
bf4acbe4 3186 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3187 s++;
44a8e56a 3188 d = s;
3280af22
NIS
3189 PL_tokenbuf[0] = '\0';
3190 if (d < PL_bufend && *d == '-') {
3191 PL_tokenbuf[0] = '-';
44a8e56a 3192 d++;
bf4acbe4 3193 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3194 d++;
3195 }
7e2040f0 3196 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3197 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3198 FALSE, &len);
bf4acbe4 3199 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3200 d++;
3201 if (*d == '}') {
3280af22 3202 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3203 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3204 if (minus)
3205 force_next('-');
748a9306
LW
3206 }
3207 }
3208 /* FALL THROUGH */
09bef843 3209 case XATTRBLOCK:
748a9306 3210 case XBLOCK:
3280af22
NIS
3211 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3212 PL_expect = XSTATE;
a0d0e21e 3213 break;
09bef843 3214 case XATTRTERM:
a0d0e21e 3215 case XTERMBLOCK:
3280af22
NIS
3216 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3217 PL_expect = XSTATE;
a0d0e21e
LW
3218 break;
3219 default: {
3220 char *t;
3280af22
NIS
3221 if (PL_oldoldbufptr == PL_last_lop)
3222 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3223 else
3280af22 3224 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3225 s = skipspace(s);
09ecc4b6 3226 if (*s == '}')
a0d0e21e 3227 OPERATOR(HASHBRACK);
b8a4b1be
GS
3228 /* This hack serves to disambiguate a pair of curlies
3229 * as being a block or an anon hash. Normally, expectation
3230 * determines that, but in cases where we're not in a
3231 * position to expect anything in particular (like inside
3232 * eval"") we have to resolve the ambiguity. This code
3233 * covers the case where the first term in the curlies is a
3234 * quoted string. Most other cases need to be explicitly
3235 * disambiguated by prepending a `+' before the opening
3236 * curly in order to force resolution as an anon hash.
3237 *
3238 * XXX should probably propagate the outer expectation
3239 * into eval"" to rely less on this hack, but that could
3240 * potentially break current behavior of eval"".
3241 * GSAR 97-07-21
3242 */
3243 t = s;
3244 if (*s == '\'' || *s == '"' || *s == '`') {
3245 /* common case: get past first string, handling escapes */
3280af22 3246 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3247 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3248 t++;
3249 t++;
a0d0e21e 3250 }
b8a4b1be 3251 else if (*s == 'q') {
3280af22 3252 if (++t < PL_bufend
b8a4b1be 3253 && (!isALNUM(*t)
3280af22 3254 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3255 && !isALNUM(*t))))
3256 {
b8a4b1be
GS
3257 char *tmps;
3258 char open, close, term;
3259 I32 brackets = 1;
3260
3280af22 3261 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3262 t++;
3263 term = *t;
3264 open = term;
3265 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3266 term = tmps[5];
3267 close = term;
3268 if (open == close)
3280af22
NIS
3269 for (t++; t < PL_bufend; t++) {
3270 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3271 t++;
6d07e5e9 3272 else if (*t == open)
b8a4b1be
GS
3273 break;
3274 }
3275 else
3280af22
NIS
3276 for (t++; t < PL_bufend; t++) {
3277 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3278 t++;
6d07e5e9 3279 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3280 break;
3281 else if (*t == open)
3282 brackets++;
3283 }
3284 }
3285 t++;
a0d0e21e 3286 }
7e2040f0 3287 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3288 t += UTF8SKIP(t);
7e2040f0 3289 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3290 t += UTF8SKIP(t);
a0d0e21e 3291 }
3280af22 3292 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3293 t++;
b8a4b1be
GS
3294 /* if comma follows first term, call it an anon hash */
3295 /* XXX it could be a comma expression with loop modifiers */
3280af22 3296 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3297 || (*t == '=' && t[1] == '>')))
a0d0e21e 3298 OPERATOR(HASHBRACK);
3280af22 3299 if (PL_expect == XREF)
4e4e412b 3300 PL_expect = XTERM;
a0d0e21e 3301 else {
3280af22
NIS
3302 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3303 PL_expect = XSTATE;
a0d0e21e 3304 }
8990e307 3305 }
a0d0e21e 3306 break;
463ee0b2 3307 }
57843af0 3308 yylval.ival = CopLINE(PL_curcop);
79072805 3309 if (isSPACE(*s) || *s == '#')
3280af22 3310 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3311 TOKEN('{');
378cc40b 3312 case '}':
79072805
LW
3313 rightbracket:
3314 s++;
3280af22 3315 if (PL_lex_brackets <= 0)
d98d5fff 3316 yyerror("Unmatched right curly bracket");
463ee0b2 3317 else
3280af22 3318 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3319 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3320 PL_lex_formbrack = 0;
3321 if (PL_lex_state == LEX_INTERPNORMAL) {
3322 if (PL_lex_brackets == 0) {
9059aa12
LW
3323 if (PL_expect & XFAKEBRACK) {
3324 PL_expect &= XENUMMASK;
3280af22
NIS
3325 PL_lex_state = LEX_INTERPEND;
3326 PL_bufptr = s;
cea2e8a9 3327 return yylex(); /* ignore fake brackets */
79072805 3328 }
fa83b5b6 3329 if (*s == '-' && s[1] == '>')
3280af22 3330 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3331 else if (*s != '[' && *s != '{')
3280af22 3332 PL_lex_state = LEX_INTERPEND;
79072805
LW
3333 }
3334 }
9059aa12
LW
3335 if (PL_expect & XFAKEBRACK) {
3336 PL_expect &= XENUMMASK;
3280af22 3337 PL_bufptr = s;
cea2e8a9 3338 return yylex(); /* ignore fake brackets */
748a9306 3339 }
79072805
LW
3340 force_next('}');
3341 TOKEN(';');
378cc40b
LW
3342 case '&':
3343 s++;
3344 tmp = *s++;
3345 if (tmp == '&')
a0d0e21e 3346 AOPERATOR(ANDAND);
378cc40b 3347 s--;
3280af22 3348 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3349 if (ckWARN(WARN_SEMICOLON)
3350 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3351 {
57843af0 3352 CopLINE_dec(PL_curcop);
cea2e8a9 3353 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3354 CopLINE_inc(PL_curcop);
463ee0b2 3355 }
79072805 3356 BAop(OP_BIT_AND);
463ee0b2 3357 }
79072805 3358
3280af22
NIS
3359 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3360 if (*PL_tokenbuf) {
3361 PL_expect = XOPERATOR;
3362 force_ident(PL_tokenbuf, '&');
463ee0b2 3363 }
79072805
LW
3364 else
3365 PREREF('&');
c07a80fd 3366 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3367 TERM('&');
3368
378cc40b
LW
3369 case '|':
3370 s++;
3371 tmp = *s++;
3372 if (tmp == '|')
a0d0e21e 3373 AOPERATOR(OROR);
378cc40b 3374 s--;
79072805 3375 BOop(OP_BIT_OR);
378cc40b
LW
3376 case '=':
3377 s++;
3378 tmp = *s++;
3379 if (tmp == '=')
79072805
LW
3380 Eop(OP_EQ);
3381 if (tmp == '>')
3382 OPERATOR(',');
378cc40b 3383 if (tmp == '~')
79072805 3384 PMop(OP_MATCH);
599cee73 3385 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3386 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3387 s--;
3280af22
NIS
3388 if (PL_expect == XSTATE && isALPHA(tmp) &&
3389 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3390 {
3280af22
NIS
3391 if (PL_in_eval && !PL_rsfp) {
3392 d = PL_bufend;
a5f75d66
AD
3393 while (s < d) {
3394 if (*s++ == '\n') {
3395 incline(s);
3396 if (strnEQ(s,"=cut",4)) {
3397 s = strchr(s,'\n');
3398 if (s)
3399 s++;
3400 else
3401 s = d;
3402 incline(s);
3403 goto retry;
3404 }
3405 }
3406 }
3407 goto retry;
3408 }
3280af22
NIS
3409 s = PL_bufend;
3410 PL_doextract = TRUE;
a0d0e21e
LW
3411 goto retry;
3412 }
3280af22 3413 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3414 char *t;
51882d45 3415#ifdef PERL_STRICT_CR
bf4acbe4 3416 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3417#else
bf4acbe4 3418 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3419#endif
a0d0e21e
LW
3420 if (*t == '\n' || *t == '#') {
3421 s--;
3280af22 3422 PL_expect = XBLOCK;
a0d0e21e
LW
3423 goto leftbracket;
3424 }
79072805 3425 }
a0d0e21e
LW
3426 yylval.ival = 0;
3427 OPERATOR(ASSIGNOP);
378cc40b
LW
3428 case '!':
3429 s++;
3430 tmp = *s++;
3431 if (tmp == '=')
79072805 3432 Eop(OP_NE);
378cc40b 3433 if (tmp == '~')
79072805 3434 PMop(OP_NOT);
378cc40b
LW
3435 s--;
3436 OPERATOR('!');
3437 case '<':
3280af22 3438 if (PL_expect != XOPERATOR) {
93a17b20 3439 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3440 check_uni();
79072805
LW
3441 if (s[1] == '<')
3442 s = scan_heredoc(s);
3443 else
3444 s = scan_inputsymbol(s);
3445 TERM(sublex_start());
378cc40b
LW
3446 }
3447 s++;
3448 tmp = *s++;
3449 if (tmp == '<')
79072805 3450 SHop(OP_LEFT_SHIFT);
395c3793
LW
3451 if (tmp == '=') {
3452 tmp = *s++;
3453 if (tmp == '>')
79072805 3454 Eop(OP_NCMP);
395c3793 3455 s--;
79072805 3456 Rop(OP_LE);
395c3793 3457 }
378cc40b 3458 s--;
79072805 3459 Rop(OP_LT);
378cc40b
LW
3460 case '>':
3461 s++;
3462 tmp = *s++;
3463 if (tmp == '>')
79072805 3464 SHop(OP_RIGHT_SHIFT);
378cc40b 3465 if (tmp == '=')
79072805 3466 Rop(OP_GE);
378cc40b 3467 s--;
79072805 3468 Rop(OP_GT);
378cc40b
LW
3469
3470 case '$':
bbce6d69
PP
3471 CLINE;
3472
3280af22
NIS
3473 if (PL_expect == XOPERATOR) {
3474 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3475 PL_expect = XTERM;
a0d0e21e 3476 depcom();
bbce6d69 3477 return ','; /* grandfather non-comma-format format */
a0d0e21e 3478 }
8990e307 3479 }
a0d0e21e 3480
7e2040f0 3481 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3482 PL_tokenbuf[0] = '@';
376b8730
SM
3483 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3484 sizeof PL_tokenbuf - 1, FALSE);
3485 if (PL_expect == XOPERATOR)
3486 no_op("Array length", s);
3280af22 3487 if (!PL_tokenbuf[1])
a0d0e21e 3488 PREREF(DOLSHARP);
3280af22
NIS
3489 PL_expect = XOPERATOR;
3490 PL_pending_ident = '#';
463ee0b2 3491 TOKEN(DOLSHARP);
79072805 3492 }
bbce6d69 3493
3280af22 3494 PL_tokenbuf[0] = '$';
376b8730
SM
3495 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3496 sizeof PL_tokenbuf - 1, FALSE);
3497 if (PL_expect == XOPERATOR)
3498 no_op("Scalar", s);
3280af22
NIS
3499 if (!PL_tokenbuf[1]) {
3500 if (s == PL_bufend)
bbce6d69
PP
3501 yyerror("Final $ should be \\$ or $name");
3502 PREREF('$');
8990e307 3503 }
a0d0e21e 3504
bbce6d69 3505 /* This kludge not intended to be bulletproof. */
3280af22 3506 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3507 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3508 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3509 yylval.opval->op_private = OPpCONST_ARYBASE;
3510 TERM(THING);
3511 }
3512
ff68c719 3513 d = s;
69d2bceb 3514 tmp = (I32)*s;
3280af22 3515 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3516 s = skipspace(s);
3517
3280af22 3518 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3519 char *t;
3520 if (*s == '[') {
3280af22 3521 PL_tokenbuf[0] = '@';
599cee73 3522 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3523 for(t = s + 1;
7e2040f0 3524 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3525 t++) ;
a0d0e21e 3526 if (*t++ == ',') {
3280af22
NIS
3527 PL_bufptr = skipspace(PL_bufptr);
3528 while (t < PL_bufend && *t != ']')
bbce6d69 3529 t++;
cea2e8a9 3530 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3531 "Multidimensional syntax %.*s not supported",
3532 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3533 }
3534 }
bbce6d69
PP
3535 }
3536 else if (*s == '{') {
3280af22 3537 PL_tokenbuf[0] = '%';
599cee73 3538 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3539 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3540 {
3280af22 3541 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3542 STRLEN len;
3543 for (t++; isSPACE(*t); t++) ;
7e2040f0 3544 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3545 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3546 for (; isSPACE(*t); t++) ;
864dbfa3 3547 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3548 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3549 "You need to quote \"%s\"", tmpbuf);
748a9306 3550 }
93a17b20
LW
3551 }
3552 }
2f3197b3 3553 }
bbce6d69 3554
3280af22 3555 PL_expect = XOPERATOR;
69d2bceb 3556 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {