This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Retract #8552.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
bc89e66f 3 * Copyright (c) 1991-2001, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
4e553d73 42/* In variables name $^X, these are the legal values for X.
2b92dfce
GS
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a 58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff 60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
79072805
LW
72#ifdef ff_next
73#undef ff_next
d48672a2
LW
74#endif
75
a1a0e61e 76#ifdef USE_PURE_BISON
dba4d153
JH
77# ifndef YYMAXLEVEL
78# define YYMAXLEVEL 100
79# endif
20141f0e
IRC
80YYSTYPE* yylval_pointer[YYMAXLEVEL];
81int* yychar_pointer[YYMAXLEVEL];
6f202aea 82int yyactlevel = -1;
22c35a8c
GS
83# undef yylval
84# undef yychar
20141f0e
IRC
85# define yylval (*yylval_pointer[yyactlevel])
86# define yychar (*yychar_pointer[yyactlevel])
87# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 88# undef yylex
dba4d153 89# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
90#endif
91
79072805 92#include "keywords.h"
fe14fcc3 93
ffb4593c
NT
94/* CLINE is a macro that ensures PL_copline has a sane value */
95
ae986130
LW
96#ifdef CLINE
97#undef CLINE
98#endif
57843af0 99#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 100
ffb4593c
NT
101/*
102 * Convenience functions to return different tokens and prime the
9cbb5ea2 103 * lexer for the next token. They all take an argument.
ffb4593c
NT
104 *
105 * TOKEN : generic token (used for '(', DOLSHARP, etc)
106 * OPERATOR : generic operator
107 * AOPERATOR : assignment operator
108 * PREBLOCK : beginning the block after an if, while, foreach, ...
109 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
110 * PREREF : *EXPR where EXPR is not a simple identifier
111 * TERM : expression term
112 * LOOPX : loop exiting command (goto, last, dump, etc)
113 * FTST : file test operator
114 * FUN0 : zero-argument function
2d2e263d 115 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
116 * BOop : bitwise or or xor
117 * BAop : bitwise and
118 * SHop : shift operator
119 * PWop : power operator
9cbb5ea2 120 * PMop : pattern-matching operator
ffb4593c
NT
121 * Aop : addition-level operator
122 * Mop : multiplication-level operator
123 * Eop : equality-testing operator
e5edeb50 124 * Rop : relational operator <= != gt
ffb4593c
NT
125 *
126 * Also see LOP and lop() below.
127 */
128
998054bd
SC
129#ifdef DEBUGGING /* Serve -DT. */
130# define REPORT(x,retval) tokereport(x,s,(int)retval)
131# define REPORT2(x,retval) tokereport(x,s, yylval.ival)
132#else
133# define REPORT(x,retval) 1
134# define REPORT2(x,retval) 1
135#endif
136
137#define TOKEN(retval) return (REPORT2("token",retval), PL_bufptr = s,(int)retval)
138#define OPERATOR(retval) return (REPORT2("operator",retval), PL_expect = XTERM, PL_bufptr = s,(int)retval)
139#define AOPERATOR(retval) return ao((REPORT2("aop",retval), PL_expect = XTERM, PL_bufptr = s,(int)retval))
140#define PREBLOCK(retval) return (REPORT2("preblock",retval), PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
141#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval), PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
142#define PREREF(retval) return (REPORT2("preref",retval), PL_expect = XREF,PL_bufptr = s,(int)retval)
143#define TERM(retval) return (CLINE, REPORT2("term",retval), PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
144#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f), PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
145#define FTST(f) return(yylval.ival=f, REPORT("ftst",f), PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
146#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f), PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
147#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f), PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
148#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f), PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
149#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f), PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
150#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f), PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
151#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f), PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
152#define PMop(f) return(yylval.ival=f, REPORT("matchop",f), PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
153#define Aop(f) return ao((yylval.ival=f, REPORT("add",f), PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
154#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f), PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
155#define Eop(f) return(yylval.ival=f, REPORT("eq",f), PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
156#define Rop(f) return(yylval.ival=f, REPORT("rel",f), PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 157
a687059c
LW
158/* This bit of chicanery makes a unary function followed by
159 * a parenthesis into a function with one argument, highest precedence.
160 */
2f3197b3 161#define UNI(f) return(yylval.ival = f, \
998054bd 162 REPORT("uni",f), \
3280af22
NIS
163 PL_expect = XTERM, \
164 PL_bufptr = s, \
165 PL_last_uni = PL_oldbufptr, \
166 PL_last_lop_op = f, \
a687059c
LW
167 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
168
79072805 169#define UNIBRACK(f) return(yylval.ival = f, \
998054bd 170 REPORT("uni",f), \
3280af22
NIS
171 PL_bufptr = s, \
172 PL_last_uni = PL_oldbufptr, \
79072805
LW
173 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
174
9f68db38 175/* grandfather return to old style */
3280af22 176#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 177
998054bd 178void
61b2116b 179S_tokereport(pTHX_ char *thing, char* s, I32 rv)
998054bd
SC
180{
181 SV *report;
182 DEBUG_T({
183 report = newSVpv(thing, 0);
61b2116b 184 Perl_sv_catpvf(aTHX_ report, ":line %i:%i:", CopLINE(PL_curcop), rv);
998054bd
SC
185
186 if (s - PL_bufptr > 0)
187 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
188 else {
189 if (PL_oldbufptr && *PL_oldbufptr)
190 sv_catpv(report, PL_tokenbuf);
191 }
192 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
193 })
194}
195
ffb4593c
NT
196/*
197 * S_ao
198 *
199 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
200 * into an OP_ANDASSIGN or OP_ORASSIGN
201 */
202
76e3520e 203STATIC int
cea2e8a9 204S_ao(pTHX_ int toketype)
a0d0e21e 205{
3280af22
NIS
206 if (*PL_bufptr == '=') {
207 PL_bufptr++;
a0d0e21e
LW
208 if (toketype == ANDAND)
209 yylval.ival = OP_ANDASSIGN;
210 else if (toketype == OROR)
211 yylval.ival = OP_ORASSIGN;
212 toketype = ASSIGNOP;
213 }
214 return toketype;
215}
216
ffb4593c
NT
217/*
218 * S_no_op
219 * When Perl expects an operator and finds something else, no_op
220 * prints the warning. It always prints "<something> found where
221 * operator expected. It prints "Missing semicolon on previous line?"
222 * if the surprise occurs at the start of the line. "do you need to
223 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
224 * where the compiler doesn't know if foo is a method call or a function.
225 * It prints "Missing operator before end of line" if there's nothing
226 * after the missing operator, or "... before <...>" if there is something
227 * after the missing operator.
228 */
229
76e3520e 230STATIC void
cea2e8a9 231S_no_op(pTHX_ char *what, char *s)
463ee0b2 232{
3280af22
NIS
233 char *oldbp = PL_bufptr;
234 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 235
1189a94a
GS
236 if (!s)
237 s = oldbp;
07c798fb 238 else
1189a94a 239 PL_bufptr = s;
cea2e8a9 240 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 241 if (is_first)
cea2e8a9 242 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 243 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 244 char *t;
7e2040f0 245 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 246 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 247 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 248 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 249 }
07c798fb
HS
250 else {
251 assert(s >= oldbp);
cea2e8a9 252 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 253 }
3280af22 254 PL_bufptr = oldbp;
8990e307
LW
255}
256
ffb4593c
NT
257/*
258 * S_missingterm
259 * Complain about missing quote/regexp/heredoc terminator.
260 * If it's called with (char *)NULL then it cauterizes the line buffer.
261 * If we're in a delimited string and the delimiter is a control
262 * character, it's reformatted into a two-char sequence like ^C.
263 * This is fatal.
264 */
265
76e3520e 266STATIC void
cea2e8a9 267S_missingterm(pTHX_ char *s)
8990e307
LW
268{
269 char tmpbuf[3];
270 char q;
271 if (s) {
272 char *nl = strrchr(s,'\n');
d2719217 273 if (nl)
8990e307
LW
274 *nl = '\0';
275 }
9d116dd7
JH
276 else if (
277#ifdef EBCDIC
278 iscntrl(PL_multi_close)
279#else
280 PL_multi_close < 32 || PL_multi_close == 127
281#endif
282 ) {
8990e307 283 *tmpbuf = '^';
3280af22 284 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
285 s = "\\n";
286 tmpbuf[2] = '\0';
287 s = tmpbuf;
288 }
289 else {
3280af22 290 *tmpbuf = PL_multi_close;
8990e307
LW
291 tmpbuf[1] = '\0';
292 s = tmpbuf;
293 }
294 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 295 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 296}
79072805 297
ffb4593c
NT
298/*
299 * Perl_deprecate
ffb4593c
NT
300 */
301
79072805 302void
864dbfa3 303Perl_deprecate(pTHX_ char *s)
a0d0e21e 304{
599cee73 305 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 306 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
307}
308
ffb4593c
NT
309/*
310 * depcom
9cbb5ea2 311 * Deprecate a comma-less variable list.
ffb4593c
NT
312 */
313
76e3520e 314STATIC void
cea2e8a9 315S_depcom(pTHX)
a0d0e21e
LW
316{
317 deprecate("comma-less variable list");
318}
319
ffb4593c 320/*
9cbb5ea2
GS
321 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
322 * utf16-to-utf8-reversed.
ffb4593c
NT
323 */
324
c39cd008
GS
325#ifdef PERL_CR_FILTER
326static void
327strip_return(SV *sv)
328{
329 register char *s = SvPVX(sv);
330 register char *e = s + SvCUR(sv);
331 /* outer loop optimized to do nothing if there are no CR-LFs */
332 while (s < e) {
333 if (*s++ == '\r' && *s == '\n') {
334 /* hit a CR-LF, need to copy the rest */
335 register char *d = s - 1;
336 *d++ = *s++;
337 while (s < e) {
338 if (*s == '\r' && s[1] == '\n')
339 s++;
340 *d++ = *s++;
341 }
342 SvCUR(sv) -= s - d;
343 return;
344 }
345 }
346}
a868473f 347
76e3520e 348STATIC I32
c39cd008 349S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 350{
c39cd008
GS
351 I32 count = FILTER_READ(idx+1, sv, maxlen);
352 if (count > 0 && !maxlen)
353 strip_return(sv);
354 return count;
a868473f
NIS
355}
356#endif
357
ffb4593c
NT
358/*
359 * Perl_lex_start
9cbb5ea2
GS
360 * Initialize variables. Uses the Perl save_stack to save its state (for
361 * recursive calls to the parser).
ffb4593c
NT
362 */
363
a0d0e21e 364void
864dbfa3 365Perl_lex_start(pTHX_ SV *line)
79072805 366{
8990e307
LW
367 char *s;
368 STRLEN len;
369
3280af22
NIS
370 SAVEI32(PL_lex_dojoin);
371 SAVEI32(PL_lex_brackets);
3280af22
NIS
372 SAVEI32(PL_lex_casemods);
373 SAVEI32(PL_lex_starts);
374 SAVEI32(PL_lex_state);
7766f137 375 SAVEVPTR(PL_lex_inpat);
3280af22 376 SAVEI32(PL_lex_inwhat);
18b09519
GS
377 if (PL_lex_state == LEX_KNOWNEXT) {
378 I32 toke = PL_nexttoke;
379 while (--toke >= 0) {
380 SAVEI32(PL_nexttype[toke]);
381 SAVEVPTR(PL_nextval[toke]);
382 }
383 SAVEI32(PL_nexttoke);
18b09519 384 }
57843af0 385 SAVECOPLINE(PL_curcop);
3280af22
NIS
386 SAVEPPTR(PL_bufptr);
387 SAVEPPTR(PL_bufend);
388 SAVEPPTR(PL_oldbufptr);
389 SAVEPPTR(PL_oldoldbufptr);
390 SAVEPPTR(PL_linestart);
391 SAVESPTR(PL_linestr);
392 SAVEPPTR(PL_lex_brackstack);
393 SAVEPPTR(PL_lex_casestack);
c76ac1ee 394 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
395 SAVESPTR(PL_lex_stuff);
396 SAVEI32(PL_lex_defer);
09bef843 397 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 398 SAVESPTR(PL_lex_repl);
bebdddfc
GS
399 SAVEINT(PL_expect);
400 SAVEINT(PL_lex_expect);
3280af22
NIS
401
402 PL_lex_state = LEX_NORMAL;
403 PL_lex_defer = 0;
404 PL_expect = XSTATE;
405 PL_lex_brackets = 0;
3280af22
NIS
406 New(899, PL_lex_brackstack, 120, char);
407 New(899, PL_lex_casestack, 12, char);
408 SAVEFREEPV(PL_lex_brackstack);
409 SAVEFREEPV(PL_lex_casestack);
410 PL_lex_casemods = 0;
411 *PL_lex_casestack = '\0';
412 PL_lex_dojoin = 0;
413 PL_lex_starts = 0;
414 PL_lex_stuff = Nullsv;
415 PL_lex_repl = Nullsv;
416 PL_lex_inpat = 0;
76be56bc 417 PL_nexttoke = 0;
3280af22 418 PL_lex_inwhat = 0;
09bef843 419 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
420 PL_linestr = line;
421 if (SvREADONLY(PL_linestr))
422 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
423 s = SvPV(PL_linestr, len);
8990e307 424 if (len && s[len-1] != ';') {
3280af22
NIS
425 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
426 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
427 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 428 }
3280af22
NIS
429 SvTEMP_off(PL_linestr);
430 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
431 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
432 SvREFCNT_dec(PL_rs);
79cb57f6 433 PL_rs = newSVpvn("\n", 1);
3280af22 434 PL_rsfp = 0;
79072805 435}
a687059c 436
ffb4593c
NT
437/*
438 * Perl_lex_end
9cbb5ea2
GS
439 * Finalizer for lexing operations. Must be called when the parser is
440 * done with the lexer.
ffb4593c
NT
441 */
442
463ee0b2 443void
864dbfa3 444Perl_lex_end(pTHX)
463ee0b2 445{
3280af22 446 PL_doextract = FALSE;
463ee0b2
LW
447}
448
ffb4593c
NT
449/*
450 * S_incline
451 * This subroutine has nothing to do with tilting, whether at windmills
452 * or pinball tables. Its name is short for "increment line". It
57843af0 453 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 454 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
455 * # line 500 "foo.pm"
456 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
457 */
458
76e3520e 459STATIC void
cea2e8a9 460S_incline(pTHX_ char *s)
463ee0b2
LW
461{
462 char *t;
463 char *n;
73659bf1 464 char *e;
463ee0b2 465 char ch;
463ee0b2 466
57843af0 467 CopLINE_inc(PL_curcop);
463ee0b2
LW
468 if (*s++ != '#')
469 return;
bf4acbe4 470 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
471 if (strnEQ(s, "line", 4))
472 s += 4;
473 else
474 return;
475 if (*s == ' ' || *s == '\t')
476 s++;
4e553d73 477 else
73659bf1 478 return;
bf4acbe4 479 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
480 if (!isDIGIT(*s))
481 return;
482 n = s;
483 while (isDIGIT(*s))
484 s++;
bf4acbe4 485 while (SPACE_OR_TAB(*s))
463ee0b2 486 s++;
73659bf1 487 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 488 s++;
73659bf1
GS
489 e = t + 1;
490 }
463ee0b2 491 else {
463ee0b2 492 for (t = s; !isSPACE(*t); t++) ;
73659bf1 493 e = t;
463ee0b2 494 }
bf4acbe4 495 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
496 e++;
497 if (*e != '\n' && *e != '\0')
498 return; /* false alarm */
499
463ee0b2
LW
500 ch = *t;
501 *t = '\0';
f4dd75d9
GS
502 if (t - s > 0) {
503#ifdef USE_ITHREADS
504 Safefree(CopFILE(PL_curcop));
505#else
506 SvREFCNT_dec(CopFILEGV(PL_curcop));
507#endif
57843af0 508 CopFILE_set(PL_curcop, s);
f4dd75d9 509 }
463ee0b2 510 *t = ch;
57843af0 511 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
512}
513
ffb4593c
NT
514/*
515 * S_skipspace
516 * Called to gobble the appropriate amount and type of whitespace.
517 * Skips comments as well.
518 */
519
76e3520e 520STATIC char *
cea2e8a9 521S_skipspace(pTHX_ register char *s)
a687059c 522{
3280af22 523 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 524 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
525 s++;
526 return s;
527 }
528 for (;;) {
fd049845 529 STRLEN prevlen;
09bef843
SB
530 SSize_t oldprevlen, oldoldprevlen;
531 SSize_t oldloplen, oldunilen;
60e6418e
GS
532 while (s < PL_bufend && isSPACE(*s)) {
533 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
534 incline(s);
535 }
ffb4593c
NT
536
537 /* comment */
3280af22
NIS
538 if (s < PL_bufend && *s == '#') {
539 while (s < PL_bufend && *s != '\n')
463ee0b2 540 s++;
60e6418e 541 if (s < PL_bufend) {
463ee0b2 542 s++;
60e6418e
GS
543 if (PL_in_eval && !PL_rsfp) {
544 incline(s);
545 continue;
546 }
547 }
463ee0b2 548 }
ffb4593c
NT
549
550 /* only continue to recharge the buffer if we're at the end
551 * of the buffer, we're not reading from a source filter, and
552 * we're in normal lexing mode
553 */
09bef843
SB
554 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
555 PL_lex_state == LEX_FORMLINE)
463ee0b2 556 return s;
ffb4593c
NT
557
558 /* try to recharge the buffer */
9cbb5ea2
GS
559 if ((s = filter_gets(PL_linestr, PL_rsfp,
560 (prevlen = SvCUR(PL_linestr)))) == Nullch)
561 {
562 /* end of file. Add on the -p or -n magic */
3280af22
NIS
563 if (PL_minus_n || PL_minus_p) {
564 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
565 ";}continue{print or die qq(-p destination: $!\\n)" :
566 "");
3280af22
NIS
567 sv_catpv(PL_linestr,";}");
568 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
569 }
570 else
3280af22 571 sv_setpv(PL_linestr,";");
ffb4593c
NT
572
573 /* reset variables for next time we lex */
9cbb5ea2
GS
574 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
575 = SvPVX(PL_linestr);
3280af22 576 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
577
578 /* Close the filehandle. Could be from -P preprocessor,
579 * STDIN, or a regular file. If we were reading code from
580 * STDIN (because the commandline held no -e or filename)
581 * then we don't close it, we reset it so the code can
582 * read from STDIN too.
583 */
584
3280af22
NIS
585 if (PL_preprocess && !PL_in_eval)
586 (void)PerlProc_pclose(PL_rsfp);
587 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
588 PerlIO_clearerr(PL_rsfp);
8990e307 589 else
3280af22
NIS
590 (void)PerlIO_close(PL_rsfp);
591 PL_rsfp = Nullfp;
463ee0b2
LW
592 return s;
593 }
ffb4593c
NT
594
595 /* not at end of file, so we only read another line */
09bef843
SB
596 /* make corresponding updates to old pointers, for yyerror() */
597 oldprevlen = PL_oldbufptr - PL_bufend;
598 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
599 if (PL_last_uni)
600 oldunilen = PL_last_uni - PL_bufend;
601 if (PL_last_lop)
602 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
603 PL_linestart = PL_bufptr = s + prevlen;
604 PL_bufend = s + SvCUR(PL_linestr);
605 s = PL_bufptr;
09bef843
SB
606 PL_oldbufptr = s + oldprevlen;
607 PL_oldoldbufptr = s + oldoldprevlen;
608 if (PL_last_uni)
609 PL_last_uni = s + oldunilen;
610 if (PL_last_lop)
611 PL_last_lop = s + oldloplen;
a0d0e21e 612 incline(s);
ffb4593c
NT
613
614 /* debugger active and we're not compiling the debugger code,
615 * so store the line into the debugger's array of lines
616 */
3280af22 617 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
618 SV *sv = NEWSV(85,0);
619
620 sv_upgrade(sv, SVt_PVMG);
3280af22 621 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 622 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 623 }
463ee0b2 624 }
a687059c 625}
378cc40b 626
ffb4593c
NT
627/*
628 * S_check_uni
629 * Check the unary operators to ensure there's no ambiguity in how they're
630 * used. An ambiguous piece of code would be:
631 * rand + 5
632 * This doesn't mean rand() + 5. Because rand() is a unary operator,
633 * the +5 is its argument.
634 */
635
76e3520e 636STATIC void
cea2e8a9 637S_check_uni(pTHX)
ba106d47 638{
2f3197b3 639 char *s;
a0d0e21e 640 char *t;
2f3197b3 641
3280af22 642 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 643 return;
3280af22
NIS
644 while (isSPACE(*PL_last_uni))
645 PL_last_uni++;
7e2040f0 646 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 647 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 648 return;
0453d815 649 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 650 char ch = *s;
0453d815 651 *s = '\0';
4e553d73
NIS
652 Perl_warner(aTHX_ WARN_AMBIGUOUS,
653 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
654 PL_last_uni);
655 *s = ch;
656 }
2f3197b3
LW
657}
658
ffb4593c
NT
659/* workaround to replace the UNI() macro with a function. Only the
660 * hints/uts.sh file mentions this. Other comments elsewhere in the
661 * source indicate Microport Unix might need it too.
662 */
663
ffed7fef
LW
664#ifdef CRIPPLED_CC
665
666#undef UNI
ffed7fef 667#define UNI(f) return uni(f,s)
ffed7fef 668
76e3520e 669STATIC int
cea2e8a9 670S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
671{
672 yylval.ival = f;
3280af22
NIS
673 PL_expect = XTERM;
674 PL_bufptr = s;
8f872242
NIS
675 PL_last_uni = PL_oldbufptr;
676 PL_last_lop_op = f;
ffed7fef
LW
677 if (*s == '(')
678 return FUNC1;
679 s = skipspace(s);
680 if (*s == '(')
681 return FUNC1;
682 else
683 return UNIOP;
684}
685
a0d0e21e
LW
686#endif /* CRIPPLED_CC */
687
ffb4593c
NT
688/*
689 * LOP : macro to build a list operator. Its behaviour has been replaced
690 * with a subroutine, S_lop() for which LOP is just another name.
691 */
692
a0d0e21e
LW
693#define LOP(f,x) return lop(f,x,s)
694
ffb4593c
NT
695/*
696 * S_lop
697 * Build a list operator (or something that might be one). The rules:
698 * - if we have a next token, then it's a list operator [why?]
699 * - if the next thing is an opening paren, then it's a function
700 * - else it's a list operator
701 */
702
76e3520e 703STATIC I32
a0be28da 704S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 705{
79072805 706 yylval.ival = f;
35c8bce7 707 CLINE;
998054bd 708 REPORT("lop", f);
3280af22
NIS
709 PL_expect = x;
710 PL_bufptr = s;
711 PL_last_lop = PL_oldbufptr;
712 PL_last_lop_op = f;
713 if (PL_nexttoke)
a0d0e21e 714 return LSTOP;
79072805
LW
715 if (*s == '(')
716 return FUNC;
717 s = skipspace(s);
718 if (*s == '(')
719 return FUNC;
720 else
721 return LSTOP;
722}
723
ffb4593c
NT
724/*
725 * S_force_next
9cbb5ea2 726 * When the lexer realizes it knows the next token (for instance,
ffb4593c 727 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
728 * to know what token to return the next time the lexer is called. Caller
729 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
730 * handles the token correctly.
ffb4593c
NT
731 */
732
4e553d73 733STATIC void
cea2e8a9 734S_force_next(pTHX_ I32 type)
79072805 735{
3280af22
NIS
736 PL_nexttype[PL_nexttoke] = type;
737 PL_nexttoke++;
738 if (PL_lex_state != LEX_KNOWNEXT) {
739 PL_lex_defer = PL_lex_state;
740 PL_lex_expect = PL_expect;
741 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
742 }
743}
744
ffb4593c
NT
745/*
746 * S_force_word
747 * When the lexer knows the next thing is a word (for instance, it has
748 * just seen -> and it knows that the next char is a word char, then
749 * it calls S_force_word to stick the next word into the PL_next lookahead.
750 *
751 * Arguments:
b1b65b59 752 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
753 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
754 * int check_keyword : if true, Perl checks to make sure the word isn't
755 * a keyword (do this if the word is a label, e.g. goto FOO)
756 * int allow_pack : if true, : characters will also be allowed (require,
757 * use, etc. do this)
9cbb5ea2 758 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
759 */
760
76e3520e 761STATIC char *
cea2e8a9 762S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 763{
463ee0b2
LW
764 register char *s;
765 STRLEN len;
4e553d73 766
463ee0b2
LW
767 start = skipspace(start);
768 s = start;
7e2040f0 769 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 770 (allow_pack && *s == ':') ||
15f0808c 771 (allow_initial_tick && *s == '\'') )
a0d0e21e 772 {
3280af22
NIS
773 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
774 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
775 return start;
776 if (token == METHOD) {
777 s = skipspace(s);
778 if (*s == '(')
3280af22 779 PL_expect = XTERM;
463ee0b2 780 else {
3280af22 781 PL_expect = XOPERATOR;
463ee0b2 782 }
79072805 783 }
3280af22
NIS
784 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
785 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
786 force_next(token);
787 }
788 return s;
789}
790
ffb4593c
NT
791/*
792 * S_force_ident
9cbb5ea2 793 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
794 * text only contains the "foo" portion. The first argument is a pointer
795 * to the "foo", and the second argument is the type symbol to prefix.
796 * Forces the next token to be a "WORD".
9cbb5ea2 797 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
798 */
799
76e3520e 800STATIC void
cea2e8a9 801S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
802{
803 if (s && *s) {
11343788 804 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 805 PL_nextval[PL_nexttoke].opval = o;
79072805 806 force_next(WORD);
748a9306 807 if (kind) {
11343788 808 o->op_private = OPpCONST_ENTERED;
55497cff 809 /* XXX see note in pp_entereval() for why we forgo typo
810 warnings if the symbol must be introduced in an eval.
811 GSAR 96-10-12 */
3280af22 812 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
813 kind == '$' ? SVt_PV :
814 kind == '@' ? SVt_PVAV :
815 kind == '%' ? SVt_PVHV :
816 SVt_PVGV
817 );
748a9306 818 }
79072805
LW
819 }
820}
821
1571675a
GS
822NV
823Perl_str_to_version(pTHX_ SV *sv)
824{
825 NV retval = 0.0;
826 NV nshift = 1.0;
827 STRLEN len;
828 char *start = SvPVx(sv,len);
3aa33fe5 829 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
830 char *end = start + len;
831 while (start < end) {
ba210ebe 832 STRLEN skip;
1571675a
GS
833 UV n;
834 if (utf)
dcad2880 835 n = utf8_to_uv((U8*)start, len, &skip, 0);
1571675a
GS
836 else {
837 n = *(U8*)start;
838 skip = 1;
839 }
840 retval += ((NV)n)/nshift;
841 start += skip;
842 nshift *= 1000;
843 }
844 return retval;
845}
846
4e553d73 847/*
ffb4593c
NT
848 * S_force_version
849 * Forces the next token to be a version number.
850 */
851
76e3520e 852STATIC char *
cea2e8a9 853S_force_version(pTHX_ char *s)
89bfa8cd 854{
855 OP *version = Nullop;
44dcb63b 856 char *d;
89bfa8cd 857
858 s = skipspace(s);
859
44dcb63b 860 d = s;
dd629d5b 861 if (*d == 'v')
44dcb63b 862 d++;
44dcb63b 863 if (isDIGIT(*d)) {
a7cb1f99 864 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 865 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 866 SV *ver;
b73d6f50 867 s = scan_num(s, &yylval);
89bfa8cd 868 version = yylval.opval;
dd629d5b
GS
869 ver = cSVOPx(version)->op_sv;
870 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 871 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
872 SvNVX(ver) = str_to_version(ver);
873 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 874 }
89bfa8cd 875 }
876 }
877
878 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 879 PL_nextval[PL_nexttoke].opval = version;
4e553d73 880 force_next(WORD);
89bfa8cd 881
882 return (s);
883}
884
ffb4593c
NT
885/*
886 * S_tokeq
887 * Tokenize a quoted string passed in as an SV. It finds the next
888 * chunk, up to end of string or a backslash. It may make a new
889 * SV containing that chunk (if HINT_NEW_STRING is on). It also
890 * turns \\ into \.
891 */
892
76e3520e 893STATIC SV *
cea2e8a9 894S_tokeq(pTHX_ SV *sv)
79072805
LW
895{
896 register char *s;
897 register char *send;
898 register char *d;
b3ac6de7
IZ
899 STRLEN len = 0;
900 SV *pv = sv;
79072805
LW
901
902 if (!SvLEN(sv))
b3ac6de7 903 goto finish;
79072805 904
a0d0e21e 905 s = SvPV_force(sv, len);
21a311ee 906 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 907 goto finish;
463ee0b2 908 send = s + len;
79072805
LW
909 while (s < send && *s != '\\')
910 s++;
911 if (s == send)
b3ac6de7 912 goto finish;
79072805 913 d = s;
3280af22 914 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 915 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
916 while (s < send) {
917 if (*s == '\\') {
a0d0e21e 918 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
919 s++; /* all that, just for this */
920 }
921 *d++ = *s++;
922 }
923 *d = '\0';
463ee0b2 924 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 925 finish:
3280af22 926 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 927 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
928 return sv;
929}
930
ffb4593c
NT
931/*
932 * Now come three functions related to double-quote context,
933 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
934 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
935 * interact with PL_lex_state, and create fake ( ... ) argument lists
936 * to handle functions and concatenation.
937 * They assume that whoever calls them will be setting up a fake
938 * join call, because each subthing puts a ',' after it. This lets
939 * "lower \luPpEr"
940 * become
941 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
942 *
943 * (I'm not sure whether the spurious commas at the end of lcfirst's
944 * arguments and join's arguments are created or not).
945 */
946
947/*
948 * S_sublex_start
949 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
950 *
951 * Pattern matching will set PL_lex_op to the pattern-matching op to
952 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
953 *
954 * OP_CONST and OP_READLINE are easy--just make the new op and return.
955 *
956 * Everything else becomes a FUNC.
957 *
958 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
959 * had an OP_CONST or OP_READLINE). This just sets us up for a
960 * call to S_sublex_push().
961 */
962
76e3520e 963STATIC I32
cea2e8a9 964S_sublex_start(pTHX)
79072805
LW
965{
966 register I32 op_type = yylval.ival;
79072805
LW
967
968 if (op_type == OP_NULL) {
3280af22
NIS
969 yylval.opval = PL_lex_op;
970 PL_lex_op = Nullop;
79072805
LW
971 return THING;
972 }
973 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 974 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
975
976 if (SvTYPE(sv) == SVt_PVIV) {
977 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
978 STRLEN len;
979 char *p;
980 SV *nsv;
981
982 p = SvPV(sv, len);
79cb57f6 983 nsv = newSVpvn(p, len);
01ec43d0
GS
984 if (SvUTF8(sv))
985 SvUTF8_on(nsv);
b3ac6de7
IZ
986 SvREFCNT_dec(sv);
987 sv = nsv;
4e553d73 988 }
b3ac6de7 989 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 990 PL_lex_stuff = Nullsv;
79072805
LW
991 return THING;
992 }
993
3280af22
NIS
994 PL_sublex_info.super_state = PL_lex_state;
995 PL_sublex_info.sub_inwhat = op_type;
996 PL_sublex_info.sub_op = PL_lex_op;
997 PL_lex_state = LEX_INTERPPUSH;
55497cff 998
3280af22
NIS
999 PL_expect = XTERM;
1000 if (PL_lex_op) {
1001 yylval.opval = PL_lex_op;
1002 PL_lex_op = Nullop;
55497cff 1003 return PMFUNC;
1004 }
1005 else
1006 return FUNC;
1007}
1008
ffb4593c
NT
1009/*
1010 * S_sublex_push
1011 * Create a new scope to save the lexing state. The scope will be
1012 * ended in S_sublex_done. Returns a '(', starting the function arguments
1013 * to the uc, lc, etc. found before.
1014 * Sets PL_lex_state to LEX_INTERPCONCAT.
1015 */
1016
76e3520e 1017STATIC I32
cea2e8a9 1018S_sublex_push(pTHX)
55497cff 1019{
f46d017c 1020 ENTER;
55497cff 1021
3280af22
NIS
1022 PL_lex_state = PL_sublex_info.super_state;
1023 SAVEI32(PL_lex_dojoin);
1024 SAVEI32(PL_lex_brackets);
3280af22
NIS
1025 SAVEI32(PL_lex_casemods);
1026 SAVEI32(PL_lex_starts);
1027 SAVEI32(PL_lex_state);
7766f137 1028 SAVEVPTR(PL_lex_inpat);
3280af22 1029 SAVEI32(PL_lex_inwhat);
57843af0 1030 SAVECOPLINE(PL_curcop);
3280af22
NIS
1031 SAVEPPTR(PL_bufptr);
1032 SAVEPPTR(PL_oldbufptr);
1033 SAVEPPTR(PL_oldoldbufptr);
1034 SAVEPPTR(PL_linestart);
1035 SAVESPTR(PL_linestr);
1036 SAVEPPTR(PL_lex_brackstack);
1037 SAVEPPTR(PL_lex_casestack);
1038
1039 PL_linestr = PL_lex_stuff;
1040 PL_lex_stuff = Nullsv;
1041
9cbb5ea2
GS
1042 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1043 = SvPVX(PL_linestr);
3280af22
NIS
1044 PL_bufend += SvCUR(PL_linestr);
1045 SAVEFREESV(PL_linestr);
1046
1047 PL_lex_dojoin = FALSE;
1048 PL_lex_brackets = 0;
3280af22
NIS
1049 New(899, PL_lex_brackstack, 120, char);
1050 New(899, PL_lex_casestack, 12, char);
1051 SAVEFREEPV(PL_lex_brackstack);
1052 SAVEFREEPV(PL_lex_casestack);
1053 PL_lex_casemods = 0;
1054 *PL_lex_casestack = '\0';
1055 PL_lex_starts = 0;
1056 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1057 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1058
1059 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1060 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1061 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1062 else
3280af22 1063 PL_lex_inpat = Nullop;
79072805 1064
55497cff 1065 return '(';
79072805
LW
1066}
1067
ffb4593c
NT
1068/*
1069 * S_sublex_done
1070 * Restores lexer state after a S_sublex_push.
1071 */
1072
76e3520e 1073STATIC I32
cea2e8a9 1074S_sublex_done(pTHX)
79072805 1075{
3280af22 1076 if (!PL_lex_starts++) {
9aa983d2
JH
1077 SV *sv = newSVpvn("",0);
1078 if (SvUTF8(PL_linestr))
1079 SvUTF8_on(sv);
3280af22 1080 PL_expect = XOPERATOR;
9aa983d2 1081 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1082 return THING;
1083 }
1084
3280af22
NIS
1085 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1086 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1087 return yylex();
79072805
LW
1088 }
1089
ffb4593c 1090 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1091 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1092 PL_linestr = PL_lex_repl;
1093 PL_lex_inpat = 0;
1094 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1095 PL_bufend += SvCUR(PL_linestr);
1096 SAVEFREESV(PL_linestr);
1097 PL_lex_dojoin = FALSE;
1098 PL_lex_brackets = 0;
3280af22
NIS
1099 PL_lex_casemods = 0;
1100 *PL_lex_casestack = '\0';
1101 PL_lex_starts = 0;
25da4f38 1102 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1103 PL_lex_state = LEX_INTERPNORMAL;
1104 PL_lex_starts++;
e9fa98b2
HS
1105 /* we don't clear PL_lex_repl here, so that we can check later
1106 whether this is an evalled subst; that means we rely on the
1107 logic to ensure sublex_done() is called again only via the
1108 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1109 }
e9fa98b2 1110 else {
3280af22 1111 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1112 PL_lex_repl = Nullsv;
1113 }
79072805 1114 return ',';
ffed7fef
LW
1115 }
1116 else {
f46d017c 1117 LEAVE;
3280af22
NIS
1118 PL_bufend = SvPVX(PL_linestr);
1119 PL_bufend += SvCUR(PL_linestr);
1120 PL_expect = XOPERATOR;
09bef843 1121 PL_sublex_info.sub_inwhat = 0;
79072805 1122 return ')';
ffed7fef
LW
1123 }
1124}
1125
02aa26ce
NT
1126/*
1127 scan_const
1128
1129 Extracts a pattern, double-quoted string, or transliteration. This
1130 is terrifying code.
1131
3280af22
NIS
1132 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1133 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1134 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1135
9b599b2a
GS
1136 Returns a pointer to the character scanned up to. Iff this is
1137 advanced from the start pointer supplied (ie if anything was
1138 successfully parsed), will leave an OP for the substring scanned
1139 in yylval. Caller must intuit reason for not parsing further
1140 by looking at the next characters herself.
1141
02aa26ce
NT
1142 In patterns:
1143 backslashes:
1144 double-quoted style: \r and \n
1145 regexp special ones: \D \s
1146 constants: \x3
1147 backrefs: \1 (deprecated in substitution replacements)
1148 case and quoting: \U \Q \E
1149 stops on @ and $, but not for $ as tail anchor
1150
1151 In transliterations:
1152 characters are VERY literal, except for - not at the start or end
1153 of the string, which indicates a range. scan_const expands the
1154 range to the full set of intermediate characters.
1155
1156 In double-quoted strings:
1157 backslashes:
1158 double-quoted style: \r and \n
1159 constants: \x3
1160 backrefs: \1 (deprecated)
1161 case and quoting: \U \Q \E
1162 stops on @ and $
1163
1164 scan_const does *not* construct ops to handle interpolated strings.
1165 It stops processing as soon as it finds an embedded $ or @ variable
1166 and leaves it to the caller to work out what's going on.
1167
1168 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1169
1170 $ in pattern could be $foo or could be tail anchor. Assumption:
1171 it's a tail anchor if $ is the last thing in the string, or if it's
1172 followed by one of ")| \n\t"
1173
1174 \1 (backreferences) are turned into $1
1175
1176 The structure of the code is
1177 while (there's a character to process) {
1178 handle transliteration ranges
1179 skip regexp comments
1180 skip # initiated comments in //x patterns
1181 check for embedded @foo
1182 check for embedded scalars
1183 if (backslash) {
1184 leave intact backslashes from leave (below)
1185 deprecate \1 in strings and sub replacements
1186 handle string-changing backslashes \l \U \Q \E, etc.
1187 switch (what was escaped) {
1188 handle - in a transliteration (becomes a literal -)
1189 handle \132 octal characters
1190 handle 0x15 hex characters
1191 handle \cV (control V)
1192 handle printf backslashes (\f, \r, \n, etc)
1193 } (end switch)
1194 } (end if backslash)
1195 } (end while character to read)
4e553d73 1196
02aa26ce
NT
1197*/
1198
76e3520e 1199STATIC char *
cea2e8a9 1200S_scan_const(pTHX_ char *start)
79072805 1201{
3280af22 1202 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1203 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1204 register char *s = start; /* start of the constant */
1205 register char *d = SvPVX(sv); /* destination for copies */
1206 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1207 bool didrange = FALSE; /* did we just finish a range? */
9aa983d2
JH
1208 bool has_utf8 = (PL_linestr && SvUTF8(PL_linestr));
1209 /* the constant is UTF8 */
012bcf8d
GS
1210 UV uv;
1211
ac2262e3 1212 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1213 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1214 : UTF;
89491803 1215 I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1216 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1217 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1218 : UTF;
dff6d3cd 1219 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1220 PL_lex_inpat
4a2d328f 1221 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1222 : "";
79072805
LW
1223
1224 while (s < send || dorange) {
02aa26ce 1225 /* get transliterations out of the way (they're most literal) */
3280af22 1226 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1227 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1228 if (dorange) {
02aa26ce 1229 I32 i; /* current expanded character */
8ada0baa 1230 I32 min; /* first character in range */
02aa26ce
NT
1231 I32 max; /* last character in range */
1232
1233 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1234 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1235 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1236 d -= 2; /* eat the first char and the - */
1237
8ada0baa
JH
1238 min = (U8)*d; /* first char in range */
1239 max = (U8)d[1]; /* last char in range */
1240
c2e66d9e 1241 if (min > max) {
01ec43d0
GS
1242 Perl_croak(aTHX_
1243 "Invalid [] range \"%c-%c\" in transliteration operator",
d2560b70 1244 (char)min, (char)max);
c2e66d9e
GS
1245 }
1246
8ada0baa
JH
1247#ifndef ASCIIish
1248 if ((isLOWER(min) && isLOWER(max)) ||
1249 (isUPPER(min) && isUPPER(max))) {
1250 if (isLOWER(min)) {
1251 for (i = min; i <= max; i++)
1252 if (isLOWER(i))
1253 *d++ = i;
1254 } else {
1255 for (i = min; i <= max; i++)
1256 if (isUPPER(i))
1257 *d++ = i;
1258 }
1259 }
1260 else
1261#endif
1262 for (i = min; i <= max; i++)
1263 *d++ = i;
02aa26ce
NT
1264
1265 /* mark the range as done, and continue */
79072805 1266 dorange = FALSE;
01ec43d0 1267 didrange = TRUE;
79072805 1268 continue;
4e553d73 1269 }
02aa26ce
NT
1270
1271 /* range begins (ignore - as first or last char) */
79072805 1272 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1273 if (didrange) {
1fafa243 1274 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1275 }
a0ed51b3 1276 if (utf) {
a176fa2a 1277 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1278 s++;
1279 continue;
1280 }
79072805
LW
1281 dorange = TRUE;
1282 s++;
01ec43d0
GS
1283 }
1284 else {
1285 didrange = FALSE;
1286 }
79072805 1287 }
02aa26ce
NT
1288
1289 /* if we get here, we're not doing a transliteration */
1290
0f5d15d6
IZ
1291 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1292 except for the last char, which will be done separately. */
3280af22 1293 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1294 if (s[2] == '#') {
1295 while (s < send && *s != ')')
1296 *d++ = *s++;
155aba94
GS
1297 }
1298 else if (s[2] == '{' /* This should match regcomp.c */
1299 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1300 {
cc6b7395 1301 I32 count = 1;
0f5d15d6 1302 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1303 char c;
1304
d9f97599
GS
1305 while (count && (c = *regparse)) {
1306 if (c == '\\' && regparse[1])
1307 regparse++;
4e553d73 1308 else if (c == '{')
cc6b7395 1309 count++;
4e553d73 1310 else if (c == '}')
cc6b7395 1311 count--;
d9f97599 1312 regparse++;
cc6b7395 1313 }
5bdf89e7
IZ
1314 if (*regparse != ')') {
1315 regparse--; /* Leave one char for continuation. */
cc6b7395 1316 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1317 }
0f5d15d6 1318 while (s < regparse)
cc6b7395
IZ
1319 *d++ = *s++;
1320 }
748a9306 1321 }
02aa26ce
NT
1322
1323 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1324 else if (*s == '#' && PL_lex_inpat &&
1325 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1326 while (s+1 < send && *s != '\n')
1327 *d++ = *s++;
1328 }
02aa26ce 1329
5d1d4326
JH
1330 /* check for embedded arrays
1331 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1332 */
7e2040f0 1333 else if (*s == '@' && s[1]
5d1d4326 1334 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1335 break;
02aa26ce
NT
1336
1337 /* check for embedded scalars. only stop if we're sure it's a
1338 variable.
1339 */
79072805 1340 else if (*s == '$') {
3280af22 1341 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1342 break;
c277df42 1343 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1344 break; /* in regexp, $ might be tail anchor */
1345 }
02aa26ce
NT
1346
1347 /* backslashes */
79072805
LW
1348 if (*s == '\\' && s+1 < send) {
1349 s++;
02aa26ce
NT
1350
1351 /* some backslashes we leave behind */
c9f97d15 1352 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1353 *d++ = '\\';
1354 *d++ = *s++;
1355 continue;
1356 }
02aa26ce
NT
1357
1358 /* deprecate \1 in strings and substitution replacements */
3280af22 1359 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1360 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1361 {
599cee73 1362 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1363 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1364 *--s = '$';
1365 break;
1366 }
02aa26ce
NT
1367
1368 /* string-change backslash escapes */
3280af22 1369 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1370 --s;
1371 break;
1372 }
02aa26ce
NT
1373
1374 /* if we get here, it's either a quoted -, or a digit */
79072805 1375 switch (*s) {
02aa26ce
NT
1376
1377 /* quoted - in transliterations */
79072805 1378 case '-':
3280af22 1379 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1380 *d++ = *s++;
1381 continue;
1382 }
1383 /* FALL THROUGH */
1384 default:
11b8faa4 1385 {
7e84c16c 1386 if (ckWARN(WARN_MISC) && isALNUM(*s))
4e553d73 1387 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1388 "Unrecognized escape \\%c passed through",
1389 *s);
1390 /* default action is to copy the quoted character */
1391 *d++ = *s++;
1392 continue;
1393 }
02aa26ce
NT
1394
1395 /* \132 indicates an octal constant */
79072805
LW
1396 case '0': case '1': case '2': case '3':
1397 case '4': case '5': case '6': case '7':
ba210ebe
JH
1398 {
1399 STRLEN len = 0; /* disallow underscores */
1400 uv = (UV)scan_oct(s, 3, &len);
1401 s += len;
1402 }
012bcf8d 1403 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1404
1405 /* \x24 indicates a hex constant */
79072805 1406 case 'x':
a0ed51b3
LW
1407 ++s;
1408 if (*s == '{') {
1409 char* e = strchr(s, '}');
adaeee49 1410 if (!e) {
a0ed51b3 1411 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1412 e = s;
1413 }
89491803 1414 else {
ba210ebe
JH
1415 STRLEN len = 1; /* allow underscores */
1416 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1417 }
1418 s = e + 1;
a0ed51b3
LW
1419 }
1420 else {
ba210ebe
JH
1421 {
1422 STRLEN len = 0; /* disallow underscores */
1423 uv = (UV)scan_hex(s, 2, &len);
1424 s += len;
1425 }
012bcf8d
GS
1426 }
1427
1428 NUM_ESCAPE_INSERT:
1429 /* Insert oct or hex escaped character.
301d3d20
JH
1430 * There will always enough room in sv since such
1431 * escapes will be longer than any UT-F8 sequence
1432 * they can end up as. */
ba7cea30
JH
1433
1434 /* This spot is wrong for EBCDIC. Characters like
1435 * the lowercase letters and digits are >127 in EBCDIC,
1436 * so here they would need to be mapped to the Unicode
1437 * repertoire. --jhi */
1438
7948272d 1439 if (uv > 127) {
9aa983d2 1440 if (!has_utf8 && uv > 255) {
301d3d20
JH
1441 /* Might need to recode whatever we have
1442 * accumulated so far if it contains any
1443 * hibit chars.
1444 *
1445 * (Can't we keep track of that and avoid
1446 * this rescan? --jhi)
012bcf8d
GS
1447 */
1448 int hicount = 0;
1449 char *c;
301d3d20 1450
012bcf8d 1451 for (c = SvPVX(sv); c < d; c++) {
fd400ab9 1452 if (UTF8_IS_CONTINUED(*c))
012bcf8d
GS
1453 hicount++;
1454 }
1455 if (hicount) {
1456 char *old_pvx = SvPVX(sv);
1457 char *src, *dst;
301d3d20
JH
1458
1459 d = SvGROW(sv,
1460 SvCUR(sv) + hicount + 1) +
1461 (d - old_pvx);
012bcf8d
GS
1462
1463 src = d - 1;
1464 d += hicount;
1465 dst = d - 1;
1466
1467 while (src < dst) {
fd400ab9 1468 if (UTF8_IS_CONTINUED(*src)) {
9b877dbb
IH
1469 *dst-- = UTF8_EIGHT_BIT_LO(*src);
1470 *dst-- = UTF8_EIGHT_BIT_HI(*src--);
012bcf8d
GS
1471 }
1472 else {
1473 *dst-- = *src--;
1474 }
1475 }
1476 }
1477 }
1478
9aa983d2 1479 if (has_utf8 || uv > 255) {
012bcf8d 1480 d = (char*)uv_to_utf8((U8*)d, uv);
4e553d73 1481 has_utf8 = TRUE;
012bcf8d 1482 }
a0ed51b3 1483 else {
012bcf8d 1484 *d++ = (char)uv;
a0ed51b3 1485 }
012bcf8d
GS
1486 }
1487 else {
1488 *d++ = (char)uv;
a0ed51b3 1489 }
79072805 1490 continue;
02aa26ce 1491
4a2d328f
IZ
1492 /* \N{latin small letter a} is a named character */
1493 case 'N':
423cee85
JH
1494 ++s;
1495 if (*s == '{') {
1496 char* e = strchr(s, '}');
155aba94 1497 SV *res;
423cee85
JH
1498 STRLEN len;
1499 char *str;
4e553d73 1500
423cee85 1501 if (!e) {
5777a3f7 1502 yyerror("Missing right brace on \\N{}");
423cee85
JH
1503 e = s - 1;
1504 goto cont_scan;
1505 }
1506 res = newSVpvn(s + 1, e - s - 1);
4e553d73 1507 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1508 res, Nullsv, "\\N{...}" );
423cee85 1509 str = SvPV(res,len);
89491803 1510 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1511 char *ostart = SvPVX(sv);
1512 SvCUR_set(sv, d - ostart);
1513 SvPOK_on(sv);
e4f3eed8 1514 *d = '\0';
f08d6ad9 1515 sv_utf8_upgrade(sv);
d2f449dd
SB
1516 /* this just broke our allocation above... */
1517 SvGROW(sv, send - start);
f08d6ad9 1518 d = SvPVX(sv) + SvCUR(sv);
89491803 1519 has_utf8 = TRUE;
f08d6ad9 1520 }
423cee85
JH
1521 if (len > e - s + 4) {
1522 char *odest = SvPVX(sv);
1523
1524 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1525 d = SvPVX(sv) + (d - odest);
1526 }
1527 Copy(str, d, len, char);
1528 d += len;
1529 SvREFCNT_dec(res);
1530 cont_scan:
1531 s = e + 1;
1532 }
1533 else
5777a3f7 1534 yyerror("Missing braces on \\N{}");
423cee85
JH
1535 continue;
1536
02aa26ce 1537 /* \c is a control character */
79072805
LW
1538 case 'c':
1539 s++;
9d116dd7
JH
1540#ifdef EBCDIC
1541 *d = *s++;
1542 if (isLOWER(*d))
1543 *d = toUPPER(*d);
4e553d73 1544 *d = toCTRL(*d);
774a9426 1545 d++;
9d116dd7 1546#else
ba210ebe
JH
1547 {
1548 U8 c = *s++;
1549 *d++ = toCTRL(c);
1550 }
9d116dd7 1551#endif
79072805 1552 continue;
02aa26ce
NT
1553
1554 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1555 case 'b':
1556 *d++ = '\b';
1557 break;
1558 case 'n':
1559 *d++ = '\n';
1560 break;
1561 case 'r':
1562 *d++ = '\r';
1563 break;
1564 case 'f':
1565 *d++ = '\f';
1566 break;
1567 case 't':
1568 *d++ = '\t';
1569 break;
34a3fe2a
PP
1570#ifdef EBCDIC
1571 case 'e':
1572 *d++ = '\047'; /* CP 1047 */
1573 break;
1574 case 'a':
1575 *d++ = '\057'; /* CP 1047 */
1576 break;
1577#else
79072805
LW
1578 case 'e':
1579 *d++ = '\033';
1580 break;
1581 case 'a':
1582 *d++ = '\007';
1583 break;
34a3fe2a 1584#endif
02aa26ce
NT
1585 } /* end switch */
1586
79072805
LW
1587 s++;
1588 continue;
02aa26ce
NT
1589 } /* end if (backslash) */
1590
a5a960be
IRC
1591 /* (now in tr/// code again) */
1592
fd400ab9 1593 if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
a5a960be
IRC
1594 STRLEN len = (STRLEN) -1;
1595 UV uv;
1596 if (this_utf8) {
a0dbb045 1597 uv = utf8_to_uv((U8*)s, send - s, &len, 0);
a5a960be
IRC
1598 }
1599 if (len == (STRLEN)-1) {
1600 /* Illegal UTF8 (a high-bit byte), make it valid. */
1601 char *old_pvx = SvPVX(sv);
1602 /* need space for one extra char (NOTE: SvCUR() not set here) */
1603 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
1604 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1605 }
1606 else {
1607 while (len--)
1608 *d++ = *s++;
1609 }
1610 has_utf8 = TRUE;
1611 continue;
1612 }
1613
79072805 1614 *d++ = *s++;
02aa26ce
NT
1615 } /* while loop to process each character */
1616
1617 /* terminate the string and set up the sv */
79072805 1618 *d = '\0';
463ee0b2 1619 SvCUR_set(sv, d - SvPVX(sv));
79072805 1620 SvPOK_on(sv);
89491803 1621 if (has_utf8)
7e2040f0 1622 SvUTF8_on(sv);
79072805 1623
02aa26ce 1624 /* shrink the sv if we allocated more than we used */
79072805
LW
1625 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1626 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1627 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1628 }
02aa26ce 1629
9b599b2a 1630 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1631 if (s > PL_bufptr) {
1632 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1633 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1634 sv, Nullsv,
4e553d73 1635 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1636 ? "tr"
3280af22 1637 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1638 ? "s"
1639 : "qq")));
79072805 1640 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1641 } else
8990e307 1642 SvREFCNT_dec(sv);
79072805
LW
1643 return s;
1644}
1645
ffb4593c
NT
1646/* S_intuit_more
1647 * Returns TRUE if there's more to the expression (e.g., a subscript),
1648 * FALSE otherwise.
ffb4593c
NT
1649 *
1650 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1651 *
1652 * ->[ and ->{ return TRUE
1653 * { and [ outside a pattern are always subscripts, so return TRUE
1654 * if we're outside a pattern and it's not { or [, then return FALSE
1655 * if we're in a pattern and the first char is a {
1656 * {4,5} (any digits around the comma) returns FALSE
1657 * if we're in a pattern and the first char is a [
1658 * [] returns FALSE
1659 * [SOMETHING] has a funky algorithm to decide whether it's a
1660 * character class or not. It has to deal with things like
1661 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1662 * anything else returns TRUE
1663 */
1664
9cbb5ea2
GS
1665/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1666
76e3520e 1667STATIC int
cea2e8a9 1668S_intuit_more(pTHX_ register char *s)
79072805 1669{
3280af22 1670 if (PL_lex_brackets)
79072805
LW
1671 return TRUE;
1672 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1673 return TRUE;
1674 if (*s != '{' && *s != '[')
1675 return FALSE;
3280af22 1676 if (!PL_lex_inpat)
79072805
LW
1677 return TRUE;
1678
1679 /* In a pattern, so maybe we have {n,m}. */
1680 if (*s == '{') {
1681 s++;
1682 if (!isDIGIT(*s))
1683 return TRUE;
1684 while (isDIGIT(*s))
1685 s++;
1686 if (*s == ',')
1687 s++;
1688 while (isDIGIT(*s))
1689 s++;
1690 if (*s == '}')
1691 return FALSE;
1692 return TRUE;
1693
1694 }
1695
1696 /* On the other hand, maybe we have a character class */
1697
1698 s++;
1699 if (*s == ']' || *s == '^')
1700 return FALSE;
1701 else {
ffb4593c 1702 /* this is terrifying, and it works */
79072805
LW
1703 int weight = 2; /* let's weigh the evidence */
1704 char seen[256];
f27ffc4a 1705 unsigned char un_char = 255, last_un_char;
93a17b20 1706 char *send = strchr(s,']');
3280af22 1707 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1708
1709 if (!send) /* has to be an expression */
1710 return TRUE;
1711
1712 Zero(seen,256,char);
1713 if (*s == '$')
1714 weight -= 3;
1715 else if (isDIGIT(*s)) {
1716 if (s[1] != ']') {
1717 if (isDIGIT(s[1]) && s[2] == ']')
1718 weight -= 10;
1719 }
1720 else
1721 weight -= 100;
1722 }
1723 for (; s < send; s++) {
1724 last_un_char = un_char;
1725 un_char = (unsigned char)*s;
1726 switch (*s) {
1727 case '@':
1728 case '&':
1729 case '$':
1730 weight -= seen[un_char] * 10;
7e2040f0 1731 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1732 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1733 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1734 weight -= 100;
1735 else
1736 weight -= 10;
1737 }
1738 else if (*s == '$' && s[1] &&
93a17b20
LW
1739 strchr("[#!%*<>()-=",s[1])) {
1740 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1741 weight -= 10;
1742 else
1743 weight -= 1;
1744 }
1745 break;
1746 case '\\':
1747 un_char = 254;
1748 if (s[1]) {
93a17b20 1749 if (strchr("wds]",s[1]))
79072805
LW
1750 weight += 100;
1751 else if (seen['\''] || seen['"'])
1752 weight += 1;
93a17b20 1753 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1754 weight += 40;
1755 else if (isDIGIT(s[1])) {
1756 weight += 40;
1757 while (s[1] && isDIGIT(s[1]))
1758 s++;
1759 }
1760 }
1761 else
1762 weight += 100;
1763 break;
1764 case '-':
1765 if (s[1] == '\\')
1766 weight += 50;
93a17b20 1767 if (strchr("aA01! ",last_un_char))
79072805 1768 weight += 30;
93a17b20 1769 if (strchr("zZ79~",s[1]))
79072805 1770 weight += 30;
f27ffc4a
GS
1771 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1772 weight -= 5; /* cope with negative subscript */
79072805
LW
1773 break;
1774 default:
93a17b20 1775 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1776 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1777 char *d = tmpbuf;
1778 while (isALPHA(*s))
1779 *d++ = *s++;
1780 *d = '\0';
1781 if (keyword(tmpbuf, d - tmpbuf))
1782 weight -= 150;
1783 }
1784 if (un_char == last_un_char + 1)
1785 weight += 5;
1786 weight -= seen[un_char];
1787 break;
1788 }
1789 seen[un_char]++;
1790 }
1791 if (weight >= 0) /* probably a character class */
1792 return FALSE;
1793 }
1794
1795 return TRUE;
1796}
ffed7fef 1797
ffb4593c
NT
1798/*
1799 * S_intuit_method
1800 *
1801 * Does all the checking to disambiguate
1802 * foo bar
1803 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1804 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1805 *
1806 * First argument is the stuff after the first token, e.g. "bar".
1807 *
1808 * Not a method if bar is a filehandle.
1809 * Not a method if foo is a subroutine prototyped to take a filehandle.
1810 * Not a method if it's really "Foo $bar"
1811 * Method if it's "foo $bar"
1812 * Not a method if it's really "print foo $bar"
1813 * Method if it's really "foo package::" (interpreted as package->foo)
1814 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1815 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1816 * =>
1817 */
1818
76e3520e 1819STATIC int
cea2e8a9 1820S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1821{
1822 char *s = start + (*start == '$');
3280af22 1823 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1824 STRLEN len;
1825 GV* indirgv;
1826
1827 if (gv) {
b6c543e3 1828 CV *cv;
a0d0e21e
LW
1829 if (GvIO(gv))
1830 return 0;
b6c543e3
IZ
1831 if ((cv = GvCVu(gv))) {
1832 char *proto = SvPVX(cv);
1833 if (proto) {
1834 if (*proto == ';')
1835 proto++;
1836 if (*proto == '*')
1837 return 0;
1838 }
1839 } else
a0d0e21e
LW
1840 gv = 0;
1841 }
8903cb82 1842 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1843 /* start is the beginning of the possible filehandle/object,
1844 * and s is the end of it
1845 * tmpbuf is a copy of it
1846 */
1847
a0d0e21e 1848 if (*start == '$') {
3280af22 1849 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1850 return 0;
1851 s = skipspace(s);
3280af22
NIS
1852 PL_bufptr = start;
1853 PL_expect = XREF;
a0d0e21e
LW
1854 return *s == '(' ? FUNCMETH : METHOD;
1855 }
1856 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1857 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1858 len -= 2;
1859 tmpbuf[len] = '\0';
1860 goto bare_package;
1861 }
1862 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1863 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1864 return 0;
1865 /* filehandle or package name makes it a method */
89bfa8cd 1866 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1867 s = skipspace(s);
3280af22 1868 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1869 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1870 bare_package:
3280af22 1871 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1872 newSVpvn(tmpbuf,len));
3280af22
NIS
1873 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1874 PL_expect = XTERM;
a0d0e21e 1875 force_next(WORD);
3280af22 1876 PL_bufptr = s;
a0d0e21e
LW
1877 return *s == '(' ? FUNCMETH : METHOD;
1878 }
1879 }
1880 return 0;
1881}
1882
ffb4593c
NT
1883/*
1884 * S_incl_perldb
1885 * Return a string of Perl code to load the debugger. If PERL5DB
1886 * is set, it will return the contents of that, otherwise a
1887 * compile-time require of perl5db.pl.
1888 */
1889
76e3520e 1890STATIC char*
cea2e8a9 1891S_incl_perldb(pTHX)
a0d0e21e 1892{
3280af22 1893 if (PL_perldb) {
76e3520e 1894 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1895
1896 if (pdb)
1897 return pdb;
61bb5906 1898 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1899 return "BEGIN { require 'perl5db.pl' }";
1900 }
1901 return "";
1902}
1903
1904
16d20bd9 1905/* Encoded script support. filter_add() effectively inserts a
4e553d73 1906 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1907 * Note that the filter function only applies to the current source file
1908 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1909 *
1910 * The datasv parameter (which may be NULL) can be used to pass
1911 * private data to this instance of the filter. The filter function
1912 * can recover the SV using the FILTER_DATA macro and use it to
1913 * store private buffers and state information.
1914 *
1915 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1916 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1917 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1918 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1919 * private use must be set using malloc'd pointers.
1920 */
16d20bd9
AD
1921
1922SV *
864dbfa3 1923Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1924{
f4c556ac
GS
1925 if (!funcp)
1926 return Nullsv;
1927
3280af22
NIS
1928 if (!PL_rsfp_filters)
1929 PL_rsfp_filters = newAV();
16d20bd9 1930 if (!datasv)
8c52afec 1931 datasv = NEWSV(255,0);
16d20bd9 1932 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1933 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1934 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1935 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1936 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1937 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1938 av_unshift(PL_rsfp_filters, 1);
1939 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1940 return(datasv);
1941}
4e553d73 1942
16d20bd9
AD
1943
1944/* Delete most recently added instance of this filter function. */
a0d0e21e 1945void
864dbfa3 1946Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1947{
e0c19803 1948 SV *datasv;
f4c556ac 1949 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1950 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1951 return;
1952 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1953 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1954 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1955 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1956 IoANY(datasv) = (void *)NULL;
3280af22 1957 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1958
16d20bd9
AD
1959 return;
1960 }
1961 /* we need to search for the correct entry and clear it */
cea2e8a9 1962 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1963}
1964
1965
1966/* Invoke the n'th filter function for the current rsfp. */
1967I32
864dbfa3 1968Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
1969
1970
8ac85365 1971 /* 0 = read one text line */
a0d0e21e 1972{
16d20bd9
AD
1973 filter_t funcp;
1974 SV *datasv = NULL;
e50aee73 1975
3280af22 1976 if (!PL_rsfp_filters)
16d20bd9 1977 return -1;
3280af22 1978 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1979 /* Provide a default input filter to make life easy. */
1980 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1981 DEBUG_P(PerlIO_printf(Perl_debug_log,
1982 "filter_read %d: from rsfp\n", idx));
4e553d73 1983 if (maxlen) {
16d20bd9
AD
1984 /* Want a block */
1985 int len ;
1986 int old_len = SvCUR(buf_sv) ;
1987
1988 /* ensure buf_sv is large enough */
1989 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1990 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1991 if (PerlIO_error(PL_rsfp))
37120919
AD
1992 return -1; /* error */
1993 else
1994 return 0 ; /* end of file */
1995 }
16d20bd9
AD
1996 SvCUR_set(buf_sv, old_len + len) ;
1997 } else {
1998 /* Want a line */
3280af22
NIS
1999 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2000 if (PerlIO_error(PL_rsfp))
37120919
AD
2001 return -1; /* error */
2002 else
2003 return 0 ; /* end of file */
2004 }
16d20bd9
AD
2005 }
2006 return SvCUR(buf_sv);
2007 }
2008 /* Skip this filter slot if filter has been deleted */
3280af22 2009 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2010 DEBUG_P(PerlIO_printf(Perl_debug_log,
2011 "filter_read %d: skipped (filter deleted)\n",
2012 idx));
16d20bd9
AD
2013 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2014 }
2015 /* Get function pointer hidden within datasv */
4755096e 2016 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2017 DEBUG_P(PerlIO_printf(Perl_debug_log,
2018 "filter_read %d: via function %p (%s)\n",
2019 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2020 /* Call function. The function is expected to */
2021 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2022 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 2023 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
2024}
2025
76e3520e 2026STATIC char *
cea2e8a9 2027S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2028{
c39cd008 2029#ifdef PERL_CR_FILTER
3280af22 2030 if (!PL_rsfp_filters) {
c39cd008 2031 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2032 }
2033#endif
3280af22 2034 if (PL_rsfp_filters) {
16d20bd9 2035
55497cff 2036 if (!append)
2037 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2038 if (FILTER_READ(0, sv, 0) > 0)
2039 return ( SvPVX(sv) ) ;
2040 else
2041 return Nullch ;
2042 }
9d116dd7 2043 else
fd049845 2044 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2045}
2046
01ec43d0
GS
2047STATIC HV *
2048S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2049{
2050 GV *gv;
2051
01ec43d0 2052 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2053 return PL_curstash;
2054
2055 if (len > 2 &&
2056 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2057 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2058 {
2059 return GvHV(gv); /* Foo:: */
def3634b
GS
2060 }
2061
2062 /* use constant CLASS => 'MyClass' */
2063 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2064 SV *sv;
2065 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2066 pkgname = SvPV_nolen(sv);
2067 }
2068 }
2069
2070 return gv_stashpv(pkgname, FALSE);
2071}
a0d0e21e 2072
748a9306
LW
2073#ifdef DEBUGGING
2074 static char* exp_name[] =
09bef843
SB
2075 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2076 "ATTRTERM", "TERMBLOCK"
2077 };
748a9306 2078#endif
463ee0b2 2079
02aa26ce
NT
2080/*
2081 yylex
2082
2083 Works out what to call the token just pulled out of the input
2084 stream. The yacc parser takes care of taking the ops we return and
2085 stitching them into a tree.
2086
2087 Returns:
2088 PRIVATEREF
2089
2090 Structure:
2091 if read an identifier
2092 if we're in a my declaration
2093 croak if they tried to say my($foo::bar)
2094 build the ops for a my() declaration
2095 if it's an access to a my() variable
2096 are we in a sort block?
2097 croak if my($a); $a <=> $b
2098 build ops for access to a my() variable
2099 if in a dq string, and they've said @foo and we can't find @foo
2100 croak
2101 build ops for a bareword
2102 if we already built the token before, use it.
2103*/
2104
dba4d153 2105#ifdef USE_PURE_BISON
bf4acbe4 2106#ifdef __SC__
dba4d153 2107#pragma segment Perl_yylex_r
bf4acbe4 2108#endif
864dbfa3 2109int
dba4d153 2110Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2111{
20141f0e
IRC
2112 int r;
2113
6f202aea 2114 yyactlevel++;
20141f0e
IRC
2115 yylval_pointer[yyactlevel] = lvalp;
2116 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
IRC
2117 if (yyactlevel >= YYMAXLEVEL)
2118 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2119
dba4d153 2120 r = Perl_yylex(aTHX);
20141f0e 2121
d8ae6756
IRC
2122 if (yyactlevel > 0)
2123 yyactlevel--;
20141f0e
IRC
2124
2125 return r;
2126}
dba4d153 2127#endif
20141f0e 2128
dba4d153
JH
2129#ifdef __SC__
2130#pragma segment Perl_yylex
2131#endif
dba4d153 2132int
dba4d153 2133Perl_yylex(pTHX)
20141f0e 2134{
79072805 2135 register char *s;
378cc40b 2136 register char *d;
79072805 2137 register I32 tmp;
463ee0b2 2138 STRLEN len;
161b471a
NIS
2139 GV *gv = Nullgv;
2140 GV **gvp = 0;
aa7440fb 2141 bool bof = FALSE;
a687059c 2142
02aa26ce 2143 /* check if there's an identifier for us to look at */
3280af22 2144 if (PL_pending_ident) {
02aa26ce 2145 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2146 char pit = PL_pending_ident;
2147 PL_pending_ident = 0;
bbce6d69 2148
607df283
SC
2149 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2150 "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
2151
02aa26ce
NT
2152 /* if we're in a my(), we can't allow dynamics here.
2153 $foo'bar has already been turned into $foo::bar, so
2154 just check for colons.
2155
2156 if it's a legal name, the OP is a PADANY.
2157 */
3280af22 2158 if (PL_in_my) {
77ca0c92 2159 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2160 if (strchr(PL_tokenbuf,':'))
2161 yyerror(Perl_form(aTHX_ "No package name allowed for "
2162 "variable %s in \"our\"",
2163 PL_tokenbuf));
77ca0c92
LW
2164 tmp = pad_allocmy(PL_tokenbuf);
2165 }
2166 else {
2167 if (strchr(PL_tokenbuf,':'))
2168 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2169
77ca0c92
LW
2170 yylval.opval = newOP(OP_PADANY, 0);
2171 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2172 return PRIVATEREF;
2173 }
bbce6d69 2174 }
2175
4e553d73 2176 /*
02aa26ce
NT
2177 build the ops for accesses to a my() variable.
2178
2179 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2180 then used in a comparison. This catches most, but not
2181 all cases. For instance, it catches
2182 sort { my($a); $a <=> $b }
2183 but not
2184 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2185 (although why you'd do that is anyone's guess).
2186 */
2187
3280af22 2188 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2189#ifdef USE_THREADS
54b9620d 2190 /* Check for single character per-thread SVs */
3280af22
NIS
2191 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2192 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2193 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2194 {
2faa37cc 2195 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2196 yylval.opval->op_targ = tmp;
2197 return PRIVATEREF;
2198 }
2199#endif /* USE_THREADS */
3280af22 2200 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2201 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2202 /* might be an "our" variable" */
f472eb5c 2203 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2204 /* build ops for a bareword */
f472eb5c
GS
2205 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2206 sv_catpvn(sym, "::", 2);
2207 sv_catpv(sym, PL_tokenbuf+1);
2208 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2209 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2210 gv_fetchpv(SvPVX(sym),
77ca0c92 2211 (PL_in_eval
f472eb5c
GS
2212 ? (GV_ADDMULTI | GV_ADDINEVAL)
2213 : TRUE
77ca0c92
LW
2214 ),
2215 ((PL_tokenbuf[0] == '$') ? SVt_PV
2216 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2217 : SVt_PVHV));
2218 return WORD;
2219 }
2220
02aa26ce 2221 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2222 if (PL_last_lop_op == OP_SORT &&
2223 PL_tokenbuf[0] == '$' &&
2224 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2225 && !PL_tokenbuf[2])
bbce6d69 2226 {
3280af22
NIS
2227 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2228 d < PL_bufend && *d != '\n';
a863c7d1
MB
2229 d++)
2230 {
2231 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2232 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2233 PL_tokenbuf);
a863c7d1 2234 }
bbce6d69 2235 }
2236 }
bbce6d69 2237
a863c7d1
MB
2238 yylval.opval = newOP(OP_PADANY, 0);
2239 yylval.opval->op_targ = tmp;
2240 return PRIVATEREF;
2241 }
bbce6d69 2242 }
2243
02aa26ce
NT
2244 /*
2245 Whine if they've said @foo in a doublequoted string,
2246 and @foo isn't a variable we can find in the symbol
2247 table.
2248 */
3280af22
NIS
2249 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2250 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2251 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2252 && ckWARN(WARN_AMBIGUOUS))
2253 {
2254 /* Downgraded from fatal to warning 20000522 mjd */
2255 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2256 "Possible unintended interpolation of %s in string",
2257 PL_tokenbuf);
2258 }
bbce6d69 2259 }
2260
02aa26ce 2261 /* build ops for a bareword */
3280af22 2262 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2263 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2264 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2265 ((PL_tokenbuf[0] == '$') ? SVt_PV
2266 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 2267 : SVt_PVHV));
2268 return WORD;
2269 }
2270
02aa26ce
NT
2271 /* no identifier pending identification */
2272
3280af22 2273 switch (PL_lex_state) {
79072805
LW
2274#ifdef COMMENTARY
2275 case LEX_NORMAL: /* Some compilers will produce faster */
2276 case LEX_INTERPNORMAL: /* code if we comment these out. */
2277 break;
2278#endif
2279
09bef843 2280 /* when we've already built the next token, just pull it out of the queue */
79072805 2281 case LEX_KNOWNEXT:
3280af22
NIS
2282 PL_nexttoke--;
2283 yylval = PL_nextval[PL_nexttoke];
2284 if (!PL_nexttoke) {
2285 PL_lex_state = PL_lex_defer;
2286 PL_expect = PL_lex_expect;
2287 PL_lex_defer = LEX_NORMAL;
463ee0b2 2288 }
607df283 2289 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f
RB
2290 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2291 (IV)PL_nexttype[PL_nexttoke]); })
607df283 2292
3280af22 2293 return(PL_nexttype[PL_nexttoke]);
79072805 2294
02aa26ce 2295 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2296 when we get here, PL_bufptr is at the \
02aa26ce 2297 */
79072805
LW
2298 case LEX_INTERPCASEMOD:
2299#ifdef DEBUGGING
3280af22 2300 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2301 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2302#endif
02aa26ce 2303 /* handle \E or end of string */
3280af22 2304 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2305 char oldmod;
02aa26ce
NT
2306
2307 /* if at a \E */
3280af22
NIS
2308 if (PL_lex_casemods) {
2309 oldmod = PL_lex_casestack[--PL_lex_casemods];
2310 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2311
3280af22
NIS
2312 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2313 PL_bufptr += 2;
2314 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2315 }
79072805
LW
2316 return ')';
2317 }
3280af22
NIS
2318 if (PL_bufptr != PL_bufend)
2319 PL_bufptr += 2;
2320 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2321 return yylex();
79072805
LW
2322 }
2323 else {
607df283
SC
2324 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2325 "### Saw case modifier at '%s'\n", PL_bufptr); })
3280af22 2326 s = PL_bufptr + 1;
79072805
LW
2327 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2328 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2329 if (strchr("LU", *s) &&
3280af22 2330 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2331 {
3280af22 2332 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2333 return ')';
2334 }
3280af22
NIS
2335 if (PL_lex_casemods > 10) {
2336 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2337 if (newlb != PL_lex_casestack) {
a0d0e21e 2338 SAVEFREEPV(newlb);
3280af22 2339 PL_lex_casestack = newlb;
a0d0e21e
LW
2340 }
2341 }
3280af22
NIS
2342 PL_lex_casestack[PL_lex_casemods++] = *s;
2343 PL_lex_casestack[PL_lex_casemods] = '\0';
2344 PL_lex_state = LEX_INTERPCONCAT;
2345 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2346 force_next('(');
2347 if (*s == 'l')
3280af22 2348 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2349 else if (*s == 'u')
3280af22 2350 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2351 else if (*s == 'L')
3280af22 2352 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2353 else if (*s == 'U')
3280af22 2354 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2355 else if (*s == 'Q')
3280af22 2356 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2357 else
cea2e8a9 2358 Perl_croak(aTHX_ "panic: yylex");
3280af22 2359 PL_bufptr = s + 1;
79072805 2360 force_next(FUNC);
3280af22
NIS
2361 if (PL_lex_starts) {
2362 s = PL_bufptr;
2363 PL_lex_starts = 0;
79072805
LW
2364 Aop(OP_CONCAT);
2365 }
2366 else
cea2e8a9 2367 return yylex();
79072805
LW
2368 }
2369
55497cff 2370 case LEX_INTERPPUSH:
2371 return sublex_push();
2372
79072805 2373 case LEX_INTERPSTART:
3280af22 2374 if (PL_bufptr == PL_bufend)
79072805 2375 return sublex_done();
607df283
SC
2376 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2377 "### Interpolated variable at '%s'\n", PL_bufptr); })
3280af22
NIS
2378 PL_expect = XTERM;
2379 PL_lex_dojoin = (*PL_bufptr == '@');
2380 PL_lex_state = LEX_INTERPNORMAL;
2381 if (PL_lex_dojoin) {
2382 PL_nextval[PL_nexttoke].ival = 0;
79072805 2383 force_next(',');
554b3eca 2384#ifdef USE_THREADS
533c011a
NIS
2385 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2386 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2387 force_next(PRIVATEREF);
2388#else
a0d0e21e 2389 force_ident("\"", '$');
554b3eca 2390#endif /* USE_THREADS */
3280af22 2391 PL_nextval[PL_nexttoke].ival = 0;
79072805 2392 force_next('$');
3280af22 2393 PL_nextval[PL_nexttoke].ival = 0;
79072805 2394 force_next('(');
3280af22 2395 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2396 force_next(FUNC);
2397 }
3280af22
NIS
2398 if (PL_lex_starts++) {
2399 s = PL_bufptr;
79072805
LW
2400 Aop(OP_CONCAT);
2401 }
cea2e8a9 2402 return yylex();
79072805
LW
2403
2404 case LEX_INTERPENDMAYBE:
3280af22
NIS
2405 if (intuit_more(PL_bufptr)) {
2406 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2407 break;
2408 }
2409 /* FALL THROUGH */
2410
2411 case LEX_INTERPEND:
3280af22
NIS
2412 if (PL_lex_dojoin) {
2413 PL_lex_dojoin = FALSE;
2414 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2415 return ')';
2416 }
43a16006 2417 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2418 && SvEVALED(PL_lex_repl))
43a16006 2419 {
e9fa98b2 2420 if (PL_bufptr != PL_bufend)
cea2e8a9 2421 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2422 PL_lex_repl = Nullsv;
2423 }
79072805
LW
2424 /* FALLTHROUGH */
2425 case LEX_INTERPCONCAT:
2426#ifdef DEBUGGING
3280af22 2427 if (PL_lex_brackets)
cea2e8a9 2428 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2429#endif
3280af22 2430 if (PL_bufptr == PL_bufend)
79072805
LW
2431 return sublex_done();
2432
3280af22
NIS
2433 if (SvIVX(PL_linestr) == '\'') {
2434 SV *sv = newSVsv(PL_linestr);
2435 if (!PL_lex_inpat)
76e3520e 2436 sv = tokeq(sv);
3280af22 2437 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2438 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2439 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2440 s = PL_bufend;
79072805
LW
2441 }
2442 else {
3280af22 2443 s = scan_const(PL_bufptr);
79072805 2444 if (*s == '\\')
3280af22 2445 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2446 else
3280af22 2447 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2448 }
2449
3280af22
NIS
2450 if (s != PL_bufptr) {
2451 PL_nextval[PL_nexttoke] = yylval;
2452 PL_expect = XTERM;
79072805 2453 force_next(THING);
3280af22 2454 if (PL_lex_starts++)
79072805
LW
2455 Aop(OP_CONCAT);
2456 else {
3280af22 2457 PL_bufptr = s;
cea2e8a9 2458 return yylex();
79072805
LW
2459 }
2460 }
2461
cea2e8a9 2462 return yylex();
a0d0e21e 2463 case LEX_FORMLINE:
3280af22
NIS
2464 PL_lex_state = LEX_NORMAL;
2465 s = scan_formline(PL_bufptr);
2466 if (!PL_lex_formbrack)
a0d0e21e
LW
2467 goto rightbracket;
2468 OPERATOR(';');
79072805
LW
2469 }
2470
3280af22
NIS
2471 s = PL_bufptr;
2472 PL_oldoldbufptr = PL_oldbufptr;
2473 PL_oldbufptr = s;
607df283 2474 DEBUG_T( {
bf49b057
GS
2475 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2476 exp_name[PL_expect], s);
79072805 2477 } )
463ee0b2
LW
2478
2479 retry:
378cc40b
LW
2480 switch (*s) {
2481 default:
7e2040f0 2482 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2483 goto keylookup;
cea2e8a9 2484 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2485 case 4:
2486 case 26:
2487 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2488 case 0:
3280af22
NIS
2489 if (!PL_rsfp) {
2490 PL_last_uni = 0;
2491 PL_last_lop = 0;
2492 if (PL_lex_brackets)
d98d5fff 2493 yyerror("Missing right curly or square bracket");
4e553d73 2494 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2495 "### Tokener got EOF\n");
2496 } )
79072805 2497 TOKEN(0);
463ee0b2 2498 }
3280af22 2499 if (s++ < PL_bufend)
a687059c 2500 goto retry; /* ignore stray nulls */
3280af22
NIS
2501 PL_last_uni = 0;
2502 PL_last_lop = 0;
2503 if (!PL_in_eval && !PL_preambled) {
2504 PL_preambled = TRUE;
2505 sv_setpv(PL_linestr,incl_perldb());
2506 if (SvCUR(PL_linestr))
2507 sv_catpv(PL_linestr,";");
2508 if (PL_preambleav){
2509 while(AvFILLp(PL_preambleav) >= 0) {
2510 SV *tmpsv = av_shift(PL_preambleav);
2511 sv_catsv(PL_linestr, tmpsv);
2512 sv_catpv(PL_linestr, ";");
91b7def8 2513 sv_free(tmpsv);
2514 }
3280af22
NIS
2515 sv_free((SV*)PL_preambleav);
2516 PL_preambleav = NULL;
91b7def8 2517 }
3280af22
NIS
2518 if (PL_minus_n || PL_minus_p) {
2519 sv_catpv(PL_linestr, "LINE: while (<>) {");
2520 if (PL_minus_l)
2521 sv_catpv(PL_linestr,"chomp;");
2522 if (PL_minus_a) {
8fd239a7
CS
2523 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2524 if (gv)
2525 GvIMPORTED_AV_on(gv);
3280af22
NIS
2526 if (PL_minus_F) {
2527 if (strchr("/'\"", *PL_splitstr)
2528 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2529 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2530 else {
2531 char delim;
2532 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2533 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2534 delim = *s;
cea2e8a9 2535 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2536 "q" + (delim == '\''), delim);
3280af22 2537 for (s = PL_splitstr; *s; s++) {
54310121 2538 if (*s == '\\')
3280af22
NIS
2539 sv_catpvn(PL_linestr, "\\", 1);
2540 sv_catpvn(PL_linestr, s, 1);
54310121 2541 }
cea2e8a9 2542 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2543 }
2304df62
AD
2544 }
2545 else
3280af22 2546 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2547 }
79072805 2548 }
3280af22
NIS
2549 sv_catpv(PL_linestr, "\n");
2550 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2551 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2552 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2553 SV *sv = NEWSV(85,0);
2554
2555 sv_upgrade(sv, SVt_PVMG);
3280af22 2556 sv_setsv(sv,PL_linestr);
57843af0 2557 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2558 }
79072805 2559 goto retry;
a687059c 2560 }
e929a76b 2561 do {
aa7440fb 2562 bof = PL_rsfp ? TRUE : FALSE;
226017aa
DD
2563 if (bof) {
2564#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2565# ifdef __GNU_LIBRARY__
2566# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2567# define FTELL_FOR_PIPE_IS_BROKEN
2568# endif
e3f494f1
JH
2569# else
2570# ifdef __GLIBC__
2571# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2572# define FTELL_FOR_PIPE_IS_BROKEN
2573# endif
2574# endif
226017aa
DD
2575# endif
2576#endif
2577#ifdef FTELL_FOR_PIPE_IS_BROKEN
2578 /* This loses the possibility to detect the bof
2579 * situation on perl -P when the libc5 is being used.
2580 * Workaround? Maybe attach some extra state to PL_rsfp?
2581 */
2582 if (!PL_preprocess)
2583 bof = PerlIO_tell(PL_rsfp) == 0;
2584#else
2585 bof = PerlIO_tell(PL_rsfp) == 0;
2586#endif
2587 }
dea0fc0b
JH
2588 s = filter_gets(PL_linestr, PL_rsfp, 0);
2589 if (s == Nullch) {
e929a76b 2590 fake_eof:
3280af22
NIS
2591 if (PL_rsfp) {
2592 if (PL_preprocess && !PL_in_eval)
2593 (void)PerlProc_pclose(PL_rsfp);
2594 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2595 PerlIO_clearerr(PL_rsfp);
395c3793 2596 else
3280af22
NIS
2597 (void)PerlIO_close(PL_rsfp);
2598 PL_rsfp = Nullfp;
4a9ae47a 2599 PL_doextract = FALSE;
395c3793 2600 }
3280af22
NIS
2601 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2602 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2603 sv_catpv(PL_linestr,";}");
2604 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2605 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2606 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2607 goto retry;
2608 }
3280af22
NIS
2609 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2610 sv_setpv(PL_linestr,"");
79072805 2611 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
dea0fc0b
JH
2612 } else if (bof) {
2613 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2614 s = swallow_bom((U8*)s);
378cc40b 2615 }
3280af22 2616 if (PL_doextract) {
a0d0e21e 2617 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2618 PL_doextract = FALSE;
a0d0e21e
LW
2619
2620 /* Incest with pod. */
2621 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2622 sv_setpv(PL_linestr, "");
2623 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2624 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2625 PL_doextract = FALSE;
a0d0e21e 2626 }
4e553d73 2627 }
463ee0b2 2628 incline(s);
3280af22
NIS
2629 } while (PL_doextract);
2630 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2631 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2632 SV *sv = NEWSV(85,0);
a687059c 2633
93a17b20 2634 sv_upgrade(sv, SVt_PVMG);
3280af22 2635 sv_setsv(sv,PL_linestr);
57843af0 2636 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2637 }
3280af22 2638 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2639 if (CopLINE(PL_curcop) == 1) {
3280af22 2640 while (s < PL_bufend && isSPACE(*s))
79072805 2641 s++;
a0d0e21e 2642 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2643 s++;
44a8e56a 2644 d = Nullch;
3280af22 2645 if (!PL_in_eval) {
44a8e56a 2646 if (*s == '#' && *(s+1) == '!')
2647 d = s + 2;
2648#ifdef ALTERNATE_SHEBANG
2649 else {
2650 static char as[] = ALTERNATE_SHEBANG;
2651 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2652 d = s + (sizeof(as) - 1);
2653 }
2654#endif /* ALTERNATE_SHEBANG */
2655 }
2656 if (d) {
b8378b72 2657 char *ipath;
774d564b 2658 char *ipathend;
b8378b72 2659
774d564b 2660 while (isSPACE(*d))
b8378b72
CS
2661 d++;
2662 ipath = d;
774d564b 2663 while (*d && !isSPACE(*d))
2664 d++;
2665 ipathend = d;
2666
2667#ifdef ARG_ZERO_IS_SCRIPT
2668 if (ipathend > ipath) {
2669 /*
2670 * HP-UX (at least) sets argv[0] to the script name,
2671 * which makes $^X incorrect. And Digital UNIX and Linux,
2672 * at least, set argv[0] to the basename of the Perl
2673 * interpreter. So, having found "#!", we'll set it right.
2674 */
2675 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2676 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2677 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2678 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2679 SvSETMAGIC(x);
2680 }
774d564b 2681 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2682 }
774d564b 2683#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2684
2685 /*
2686 * Look for options.
2687 */
748a9306 2688 d = instr(s,"perl -");
84e30d1a 2689 if (!d) {
748a9306 2690 d = instr(s,"perl");
84e30d1a
GS
2691#if defined(DOSISH)
2692 /* avoid getting into infinite loops when shebang
2693 * line contains "Perl" rather than "perl" */
2694 if (!d) {
2695 for (d = ipathend-4; d >= ipath; --d) {
2696 if ((*d == 'p' || *d == 'P')
2697 && !ibcmp(d, "perl", 4))
2698 {
2699 break;
2700 }
2701 }
2702 if (d < ipath)
2703 d = Nullch;
2704 }
2705#endif
2706 }
44a8e56a 2707#ifdef ALTERNATE_SHEBANG
2708 /*
2709 * If the ALTERNATE_SHEBANG on this system starts with a
2710 * character that can be part of a Perl expression, then if
2711 * we see it but not "perl", we're probably looking at the
2712 * start of Perl code, not a request to hand off to some
2713 * other interpreter. Similarly, if "perl" is there, but
2714 * not in the first 'word' of the line, we assume the line
2715 * contains the start of the Perl program.
44a8e56a 2716 */
2717 if (d && *s != '#') {
774d564b 2718 char *c = ipath;
44a8e56a 2719 while (*c && !strchr("; \t\r\n\f\v#", *c))
2720 c++;
2721 if (c < d)
2722 d = Nullch; /* "perl" not in first word; ignore */
2723 else
2724 *s = '#'; /* Don't try to parse shebang line */
2725 }
774d564b 2726#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2727#ifndef MACOS_TRADITIONAL
748a9306 2728 if (!d &&
44a8e56a 2729 *s == '#' &&
774d564b 2730 ipathend > ipath &&
3280af22 2731 !PL_minus_c &&
748a9306 2732 !instr(s,"indir") &&
3280af22 2733 instr(PL_origargv[0],"perl"))
748a9306 2734 {
9f68db38 2735 char **newargv;
9f68db38 2736
774d564b 2737 *ipathend = '\0';
2738 s = ipathend + 1;
3280af22 2739 while (s < PL_bufend && isSPACE(*s))
9f68db38 2740 s++;
3280af22
NIS
2741 if (s < PL_bufend) {
2742 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2743 newargv[1] = s;
3280af22 2744 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2745 s++;
2746 *s = '\0';
3280af22 2747 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2748 }
2749 else
3280af22 2750 newargv = PL_origargv;
774d564b 2751 newargv[0] = ipath;
b4748376 2752 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2753 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2754 }
bf4acbe4 2755#endif
748a9306 2756 if (d) {
3280af22
NIS
2757 U32 oldpdb = PL_perldb;
2758 bool oldn = PL_minus_n;
2759 bool oldp = PL_minus_p;
748a9306
LW
2760
2761 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2762 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2763
2764 if (*d++ == '-') {
8cc95fdb 2765 do {
2766 if (*d == 'M' || *d == 'm') {
2767 char *m = d;
2768 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2769 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2770 (int)(d - m), m);
2771 }
2772 d = moreswitches(d);
2773 } while (d);
155aba94
GS
2774 if ((PERLDB_LINE && !oldpdb) ||
2775 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2776 /* if we have already added "LINE: while (<>) {",
2777 we must not do it again */
748a9306 2778 {
3280af22
NIS
2779 sv_setpv(PL_linestr, "");
2780 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2781 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2782 PL_preambled = FALSE;
84902520 2783 if (PERLDB_LINE)
3280af22 2784 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2785 goto retry;
2786 }
a0d0e21e 2787 }
79072805 2788 }
9f68db38 2789 }
79072805 2790 }
3280af22
NIS
2791 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2792 PL_bufptr = s;
2793 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2794 return yylex();
ae986130 2795 }
378cc40b 2796 goto retry;
4fdae800 2797 case '\r':
6a27c188 2798#ifdef PERL_STRICT_CR
cea2e8a9 2799 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2800 Perl_croak(aTHX_
cc507455 2801 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2802#endif
4fdae800 2803 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2804#ifdef MACOS_TRADITIONAL
2805 case '\312':
2806#endif
378cc40b
LW
2807 s++;
2808 goto retry;
378cc40b 2809 case '#':
e929a76b 2810 case '\n':
3280af22 2811 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2812 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2813 /* handle eval qq[#line 1 "foo"\n ...] */
2814 CopLINE_dec(PL_curcop);
2815 incline(s);
2816 }
3280af22 2817 d = PL_bufend;
a687059c 2818 while (s < d && *s != '\n')
378cc40b 2819 s++;
0f85fab0 2820 if (s < d)
378cc40b 2821 s++;
463ee0b2 2822 incline(s);
3280af22
NIS
2823 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2824 PL_bufptr = s;
2825 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2826 return yylex();
a687059c 2827 }
378cc40b 2828 }
a687059c 2829 else {
378cc40b 2830 *s = '\0';
3280af22 2831 PL_bufend = s;
a687059c 2832 }
378cc40b
LW
2833 goto retry;
2834 case '-':
79072805 2835 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2836 I32 ftst = 0;
2837
378cc40b 2838 s++;
3280af22 2839 PL_bufptr = s;
748a9306
LW
2840 tmp = *s++;
2841
bf4acbe4 2842 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2843 s++;
2844
2845 if (strnEQ(s,"=>",2)) {
3280af22 2846 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2847 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2848 "### Saw unary minus before =>, forcing word '%s'\n", s);
2849 } )
748a9306
LW
2850 OPERATOR('-'); /* unary minus */
2851 }
3280af22 2852 PL_last_uni = PL_oldbufptr;
748a9306 2853 switch (tmp) {
e5edeb50
JH
2854 case 'r': ftst = OP_FTEREAD; break;
2855 case 'w': ftst = OP_FTEWRITE; break;
2856 case 'x': ftst = OP_FTEEXEC; break;
2857 case 'o': ftst = OP_FTEOWNED; break;
2858 case 'R': ftst = OP_FTRREAD; break;
2859 case 'W': ftst = OP_FTRWRITE; break;
2860 case 'X': ftst = OP_FTREXEC; break;
2861 case 'O': ftst = OP_FTROWNED; break;
2862 case 'e': ftst = OP_FTIS; break;
2863 case 'z': ftst = OP_FTZERO; break;
2864 case 's': ftst = OP_FTSIZE; break;
2865 case 'f': ftst = OP_FTFILE; break;
2866 case 'd': ftst = OP_FTDIR; break;
2867 case 'l': ftst = OP_FTLINK; break;
2868 case 'p': ftst = OP_FTPIPE; break;
2869 case 'S': ftst = OP_FTSOCK; break;
2870 case 'u': ftst = OP_FTSUID; break;
2871 case 'g': ftst = OP_FTSGID; break;
2872 case 'k': ftst = OP_FTSVTX; break;
2873 case 'b': ftst = OP_FTBLK; break;
2874 case 'c': ftst = OP_FTCHR; break;
2875 case 't': ftst = OP_FTTTY; break;
2876 case 'T': ftst = OP_FTTEXT; break;
2877 case 'B': ftst = OP_FTBINARY; break;
2878 case 'M': case 'A': case 'C':
2879 gv_fetchpv("\024",TRUE, SVt_PV);
2880 switch (tmp) {
2881 case 'M': ftst = OP_FTMTIME; break;
2882 case 'A': ftst = OP_FTATIME; break;
2883 case 'C': ftst = OP_FTCTIME; break;
2884 default: break;
2885 }
2886 break;
378cc40b 2887 default:
378cc40b
LW
2888 break;
2889 }
e5edeb50
JH
2890 if (ftst) {
2891 PL_last_lop_op = ftst;
4e553d73 2892 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2893 "### Saw file test %c\n", (int)ftst);
e5edeb50 2894 } )
e5edeb50
JH
2895 FTST(ftst);
2896 }
2897 else {
2898 /* Assume it was a minus followed by a one-letter named
2899 * subroutine call (or a -bareword), then. */
95c31fe3 2900 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2901 "### %c looked like a file test but was not\n",
2902 (int)ftst);
95c31fe3 2903 } )
e5edeb50
JH
2904 s -= 2;
2905 }
378cc40b 2906 }
a687059c
LW
2907 tmp = *s++;
2908 if (*s == tmp) {
2909 s++;
3280af22 2910 if (PL_expect == XOPERATOR)
79072805
LW
2911 TERM(POSTDEC);
2912 else
2913 OPERATOR(PREDEC);
2914 }
2915 else if (*s == '>') {
2916 s++;
2917 s = skipspace(s);
7e2040f0 2918 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2919 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2920 TOKEN(ARROW);
79072805 2921 }
748a9306
LW
2922 else if (*s == '$')
2923 OPERATOR(ARROW);
463ee0b2 2924 else
748a9306 2925 TERM(ARROW);
a687059c 2926 }
3280af22 2927 if (PL_expect == XOPERATOR)
79072805
LW
2928 Aop(OP_SUBTRACT);
2929 else {
3280af22 2930 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2931 check_uni();
79072805 2932 OPERATOR('-'); /* unary minus */
2f3197b3 2933 }
79072805 2934
378cc40b 2935 case '+':
a687059c
LW
2936 tmp = *s++;
2937 if (*s == tmp) {
378cc40b 2938 s++;
3280af22 2939 if (PL_expect == XOPERATOR)
79072805
LW
2940 TERM(POSTINC);
2941 else
2942 OPERATOR(PREINC);
378cc40b 2943 }
3280af22 2944 if (PL_expect == XOPERATOR)
79072805
LW
2945 Aop(OP_ADD);
2946 else {
3280af22 2947 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2948 check_uni();
a687059c 2949 OPERATOR('+');
2f3197b3 2950 }
a687059c 2951
378cc40b 2952 case '*':
3280af22
NIS
2953 if (PL_expect != XOPERATOR) {
2954 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2955 PL_expect = XOPERATOR;
2956 force_ident(PL_tokenbuf, '*');
2957 if (!*PL_tokenbuf)
a0d0e21e 2958 PREREF('*');
79072805 2959 TERM('*');
a687059c 2960 }
79072805
LW
2961 s++;
2962 if (*s == '*') {
a687059c 2963 s++;
79072805 2964 PWop(OP_POW);
a687059c 2965 }
79072805
LW
2966 Mop(OP_MULTIPLY);
2967
378cc40b 2968 case '%':
3280af22 2969 if (PL_expect == XOPERATOR) {
bbce6d69 2970 ++s;
2971 Mop(OP_MODULO);
a687059c 2972 }
3280af22
NIS
2973 PL_tokenbuf[0] = '%';
2974 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2975 if (!PL_tokenbuf[1]) {
2976 if (s == PL_bufend)
bbce6d69 2977 yyerror("Final % should be \\% or %name");
2978 PREREF('%');
a687059c 2979 }
3280af22 2980 PL_pending_ident = '%';
bbce6d69 2981 TERM('%');
a687059c 2982
378cc40b 2983 case '^':
79072805 2984 s++;
a0d0e21e 2985 BOop(OP_BIT_XOR);
79072805 2986 case '[':
3280af22 2987 PL_lex_brackets++;
79072805 2988 /* FALL THROUGH */
378cc40b 2989 case '~':
378cc40b 2990 case ',':
378cc40b
LW
2991 tmp = *s++;
2992 OPERATOR(tmp);
a0d0e21e
LW
2993 case ':':
2994 if (s[1] == ':') {
2995 len = 0;
2996 goto just_a_word;
2997 }
2998 s++;
09bef843
SB
2999 switch (PL_expect) {
3000 OP *attrs;
3001 case XOPERATOR:
3002 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3003 break;
3004 PL_bufptr = s; /* update in case we back off */
3005 goto grabattrs;
3006 case XATTRBLOCK:
3007 PL_expect = XBLOCK;
3008 goto grabattrs;
3009 case XATTRTERM:
3010 PL_expect = XTERMBLOCK;
3011 grabattrs:
3012 s = skipspace(s);
3013 attrs = Nullop;
7e2040f0 3014 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3015 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3016 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3017 if (tmp < 0) tmp = -tmp;
3018 switch (tmp) {
3019 case KEY_or:
3020 case KEY_and:
3021 case KEY_for:
3022 case KEY_unless:
3023 case KEY_if:
3024 case KEY_while:
3025 case KEY_until:
3026 goto got_attrs;
3027 default:
3028 break;
3029 }
3030 }
09bef843
SB
3031 if (*d == '(') {
3032 d = scan_str(d,TRUE,TRUE);
3033 if (!d) {
3034 if (PL_lex_stuff) {
3035 SvREFCNT_dec(PL_lex_stuff);
3036 PL_lex_stuff = Nullsv;
3037 }
3038 /* MUST advance bufptr here to avoid bogus
3039 "at end of line" context messages from yyerror().
3040 */
3041 PL_bufptr = s + len;
3042 yyerror("Unterminated attribute parameter in attribute list");
3043 if (attrs)
3044 op_free(attrs);
3045 return 0; /* EOF indicator */
3046 }
3047 }
3048 if (PL_lex_stuff) {
3049 SV *sv = newSVpvn(s, len);
3050 sv_catsv(sv, PL_lex_stuff);
3051 attrs = append_elem(OP_LIST, attrs,
3052 newSVOP(OP_CONST, 0, sv));
3053 SvREFCNT_dec(PL_lex_stuff);
3054 PL_lex_stuff = Nullsv;
3055 }
3056 else {
78f9721b
SM
3057 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3058 CvLVALUE_on(PL_compcv);
3059 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3060 CvLOCKED_on(PL_compcv);
3061 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3062 CvMETHOD_on(PL_compcv);
3063 /* After we've set the flags, it could be argued that
3064 we don't need to do the attributes.pm-based setting
3065 process, and shouldn't bother appending recognized
3066 flags. To experiment with that, uncomment the
3067 following "else": */
3068 /* else */
3069 attrs = append_elem(OP_LIST, attrs,
3070 newSVOP(OP_CONST, 0,
3071 newSVpvn(s, len)));
09bef843
SB
3072 }
3073 s = skipspace(d);
0120eecf 3074 if (*s == ':' && s[1] != ':')
09bef843 3075 s = skipspace(s+1);
0120eecf
GS
3076 else if (s == d)
3077 break; /* require real whitespace or :'s */
09bef843 3078 }
f9829d6b
GS
3079 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3080 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3081 char q = ((*s == '\'') ? '"' : '\'');
3082 /* If here for an expression, and parsed no attrs, back off. */
3083 if (tmp == '=' && !attrs) {
3084 s = PL_bufptr;
3085 break;
3086 }
3087 /* MUST advance bufptr here to avoid bogus "at end of line"
3088 context messages from yyerror().
3089 */
3090 PL_bufptr = s;
3091 if (!*s)
3092 yyerror("Unterminated attribute list");
3093 else
3094 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3095 q, *s, q));
3096 if (attrs)
3097 op_free(attrs);
3098 OPERATOR(':');
3099 }
f9829d6b 3100 got_attrs:
09bef843
SB
3101 if (attrs) {
3102 PL_nextval[PL_nexttoke].opval = attrs;
3103 force_next(THING);
3104 }
3105 TOKEN(COLONATTR);
3106 }
a0d0e21e 3107 OPERATOR(':');
8990e307
LW
3108 case '(':
3109 s++;
3280af22
NIS
3110 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3111 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3112 else
3280af22 3113 PL_expect = XTERM;
a0d0e21e 3114 TOKEN('(');
378cc40b 3115 case ';':
f4dd75d9 3116 CLINE;
378cc40b
LW
3117 tmp = *s++;
3118 OPERATOR(tmp);
3119 case ')':
378cc40b 3120 tmp = *s++;
16d20bd9
AD
3121 s = skipspace(s);
3122 if (*s == '{')
3123 PREBLOCK(tmp);
378cc40b 3124 TERM(tmp);
79072805
LW
3125 case ']':
3126 s++;
3280af22 3127 if (PL_lex_brackets <= 0)
d98d5fff 3128 yyerror("Unmatched right square bracket");
463ee0b2 3129 else
3280af22
NIS
3130 --PL_lex_brackets;
3131 if (PL_lex_state == LEX_INTERPNORMAL) {
3132 if (PL_lex_brackets == 0) {
a0d0e21e 3133 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3134 PL_lex_state = LEX_INTERPEND;
79072805
LW
3135 }
3136 }
4633a7c4 3137 TERM(']');
79072805
LW
3138 case '{':
3139 leftbracket:
79072805 3140 s++;
3280af22
NIS
3141 if (PL_lex_brackets > 100) {
3142 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3143 if (newlb != PL_lex_brackstack) {
8990e307 3144 SAVEFREEPV(newlb);
3280af22 3145 PL_lex_brackstack = newlb;
8990e307
LW
3146 }
3147 }
3280af22 3148 switch (PL_expect) {
a0d0e21e 3149 case XTERM:
3280af22 3150 if (PL_lex_formbrack) {
a0d0e21e
LW
3151 s--;
3152 PRETERMBLOCK(DO);
3153 }
3280af22
NIS
3154 if (PL_oldoldbufptr == PL_last_lop)
3155 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3156 else
3280af22 3157 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3158 OPERATOR(HASHBRACK);
a0d0e21e 3159 case XOPERATOR:
bf4acbe4 3160 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3161 s++;
44a8e56a 3162 d = s;
3280af22
NIS
3163 PL_tokenbuf[0] = '\0';
3164 if (d < PL_bufend && *d == '-') {
3165 PL_tokenbuf[0] = '-';
44a8e56a 3166 d++;
bf4acbe4 3167 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3168 d++;
3169 }
7e2040f0 3170 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3171 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3172 FALSE, &len);
bf4acbe4 3173 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3174 d++;
3175 if (*d == '}') {
3280af22 3176 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3177 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
7948272d
NIS
3178 if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, 0) &&
3179 PL_nextval[PL_nexttoke-1].opval)
3180 SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke-1].opval)->op_sv);
44a8e56a 3181 if (minus)
3182 force_next('-');
748a9306
LW
3183 }
3184 }
3185 /* FALL THROUGH */
09bef843 3186 case XATTRBLOCK:
748a9306 3187 case XBLOCK:
3280af22
NIS
3188 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3189 PL_expect = XSTATE;
a0d0e21e 3190 break;
09bef843 3191 case XATTRTERM:
a0d0e21e 3192 case XTERMBLOCK:
3280af22
NIS
3193 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3194 PL_expect = XSTATE;
a0d0e21e
LW
3195 break;
3196 default: {
3197 char *t;
3280af22
NIS
3198 if (PL_oldoldbufptr == PL_last_lop)
3199 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3200 else
3280af22 3201 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3202 s = skipspace(s);
09ecc4b6 3203 if (*s == '}')
a0d0e21e 3204 OPERATOR(HASHBRACK);
b8a4b1be
GS
3205 /* This hack serves to disambiguate a pair of curlies
3206 * as being a block or an anon hash. Normally, expectation
3207 * determines that, but in cases where we're not in a
3208 * position to expect anything in particular (like inside
3209 * eval"") we have to resolve the ambiguity. This code
3210 * covers the case where the first term in the curlies is a
3211 * quoted string. Most other cases need to be explicitly
3212 * disambiguated by prepending a `+' before the opening
3213 * curly in order to force resolution as an anon hash.
3214 *
3215 * XXX should probably propagate the outer expectation
3216 * into eval"" to rely less on this hack, but that could
3217 * potentially break current behavior of eval"".
3218 * GSAR 97-07-21
3219 */
3220 t = s;
3221 if (*s == '\'' || *s == '"' || *s == '`') {
3222 /* common case: get past first string, handling escapes */
3280af22 3223 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3224 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3225 t++;
3226 t++;
a0d0e21e 3227 }
b8a4b1be 3228 else if (*s == 'q') {
3280af22 3229 if (++t < PL_bufend
b8a4b1be 3230 && (!isALNUM(*t)
3280af22 3231 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3232 && !isALNUM(*t))))
3233 {
b8a4b1be
GS
3234 char *tmps;
3235 char open, close, term;
3236 I32 brackets = 1;
3237
3280af22 3238 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3239 t++;
3240 term = *t;
3241 open = term;
3242 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3243 term = tmps[5];
3244 close = term;
3245 if (open == close)
3280af22
NIS
3246 for (t++; t < PL_bufend; t++) {
3247 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3248 t++;
6d07e5e9 3249 else if (*t == open)
b8a4b1be
GS
3250 break;
3251 }
3252 else
3280af22
NIS
3253 for (t++; t < PL_bufend; t++) {
3254 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3255 t++;
6d07e5e9 3256 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3257 break;
3258 else if (*t == open)
3259 brackets++;
3260 }
3261 }
3262 t++;
a0d0e21e 3263 }
7e2040f0 3264 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3265 t += UTF8SKIP(t);
7e2040f0 3266 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3267 t += UTF8SKIP(t);
a0d0e21e 3268 }
3280af22 3269 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3270 t++;
b8a4b1be
GS
3271 /* if comma follows first term, call it an anon hash */
3272 /* XXX it could be a comma expression with loop modifiers */
3280af22 3273 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3274 || (*t == '=' && t[1] == '>')))
a0d0e21e 3275 OPERATOR(HASHBRACK);
3280af22 3276 if (PL_expect == XREF)
4e4e412b 3277 PL_expect = XTERM;
a0d0e21e 3278 else {
3280af22
NIS
3279 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3280 PL_expect = XSTATE;
a0d0e21e 3281 }
8990e307 3282 }
a0d0e21e 3283 break;
463ee0b2 3284 }
57843af0 3285 yylval.ival = CopLINE(PL_curcop);
79072805 3286 if (isSPACE(*s) || *s == '#')
3280af22 3287 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3288 TOKEN('{');
378cc40b 3289 case '}':
79072805
LW
3290 rightbracket:
3291 s++;
3280af22 3292 if (PL_lex_brackets <= 0)
d98d5fff 3293 yyerror("Unmatched right curly bracket");
463ee0b2 3294 else
3280af22 3295 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3296 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3297 PL_lex_formbrack = 0;
3298 if (PL_lex_state == LEX_INTERPNORMAL) {
3299 if (PL_lex_brackets == 0) {
9059aa12
LW
3300 if (PL_expect & XFAKEBRACK) {
3301 PL_expect &= XENUMMASK;
3280af22
NIS
3302 PL_lex_state = LEX_INTERPEND;
3303 PL_bufptr = s;
cea2e8a9 3304 return yylex(); /* ignore fake brackets */
79072805 3305 }
fa83b5b6 3306 if (*s == '-' && s[1] == '>')
3280af22 3307 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3308 else if (*s != '[' && *s != '{')
3280af22 3309 PL_lex_state = LEX_INTERPEND;
79072805
LW
3310 }
3311 }
9059aa12
LW
3312 if (PL_expect & XFAKEBRACK) {
3313 PL_expect &= XENUMMASK;
3280af22 3314 PL_bufptr = s;
cea2e8a9 3315 return yylex(); /* ignore fake brackets */
748a9306 3316 }
79072805
LW
3317 force_next('}');
3318 TOKEN(';');
378cc40b
LW
3319 case '&':
3320 s++;
3321 tmp = *s++;
3322 if (tmp == '&')
a0d0e21e 3323 AOPERATOR(ANDAND);
378cc40b 3324 s--;
3280af22 3325 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3326 if (ckWARN(WARN_SEMICOLON)
3327 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3328 {
57843af0 3329 CopLINE_dec(PL_curcop);
cea2e8a9 3330 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3331 CopLINE_inc(PL_curcop);
463ee0b2 3332 }
79072805 3333 BAop(OP_BIT_AND);
463ee0b2 3334 }
79072805 3335
3280af22
NIS
3336 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3337 if (*PL_tokenbuf) {
3338 PL_expect = XOPERATOR;
3339 force_ident(PL_tokenbuf, '&');
463ee0b2 3340 }
79072805
LW
3341 else
3342 PREREF('&');
c07a80fd 3343 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3344 TERM('&');
3345
378cc40b
LW
3346 case '|':
3347 s++;
3348 tmp = *s++;
3349 if (tmp == '|')
a0d0e21e 3350 AOPERATOR(OROR);
378cc40b 3351 s--;
79072805 3352 BOop(OP_BIT_OR);
378cc40b
LW
3353 case '=':
3354 s++;
3355 tmp = *s++;
3356 if (tmp == '=')
79072805
LW
3357 Eop(OP_EQ);
3358 if (tmp == '>')
3359 OPERATOR(',');
378cc40b 3360 if (tmp == '~')
79072805 3361 PMop(OP_MATCH);
599cee73 3362 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3363 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3364 s--;
3280af22
NIS
3365 if (PL_expect == XSTATE && isALPHA(tmp) &&
3366 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3367 {
3280af22
NIS
3368 if (PL_in_eval && !PL_rsfp) {
3369 d = PL_bufend;
a5f75d66
AD
3370 while (s < d) {
3371 if (*s++ == '\n') {
3372 incline(s);
3373 if (strnEQ(s,"=cut",4)) {
3374 s = strchr(s,'\n');
3375 if (s)
3376 s++;
3377 else
3378 s = d;
3379 incline(s);
3380 goto retry;
3381 }
3382 }
3383 }
3384 goto retry;
3385 }
3280af22
NIS
3386 s = PL_bufend;
3387 PL_doextract = TRUE;
a0d0e21e
LW
3388 goto retry;
3389 }
3280af22 3390 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3391 char *t;
51882d45 3392#ifdef PERL_STRICT_CR
bf4acbe4 3393 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3394#else
bf4acbe4 3395 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3396#endif
a0d0e21e
LW
3397 if (*t == '\n' || *t == '#') {
3398 s--;
3280af22 3399 PL_expect = XBLOCK;
a0d0e21e
LW
3400 goto leftbracket;
3401 }
79072805 3402 }
a0d0e21e
LW
3403 yylval.ival = 0;
3404 OPERATOR(ASSIGNOP);
378cc40b
LW
3405 case '!':
3406 s++;
3407 tmp = *s++;
3408 if (tmp == '=')
79072805 3409 Eop(OP_NE);
378cc40b 3410 if (tmp == '~')
79072805 3411 PMop(OP_NOT);
378cc40b
LW
3412 s--;
3413 OPERATOR('!');
3414 case '<':
3280af22 3415 if (PL_expect != XOPERATOR) {
93a17b20 3416 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3417 check_uni();
79072805
LW
3418 if (s[1] == '<')
3419 s = scan_heredoc(s);
3420 else
3421 s = scan_inputsymbol(s);
3422 TERM(sublex_start());
378cc40b
LW
3423 }
3424 s++;
3425 tmp = *s++;
3426 if (tmp == '<')
79072805 3427 SHop(OP_LEFT_SHIFT);
395c3793
LW
3428 if (tmp == '=') {
3429 tmp = *s++;
3430 if (tmp == '>')
79072805 3431 Eop(OP_NCMP);
395c3793 3432 s--;
79072805 3433 Rop(OP_LE);
395c3793 3434 }
378cc40b 3435 s--;
79072805 3436 Rop(OP_LT);
378cc40b
LW
3437 case '>':
3438 s++;
3439 tmp = *s++;
3440 if (tmp == '>')
79072805 3441 SHop(OP_RIGHT_SHIFT);
378cc40b 3442 if (tmp == '=')
79072805 3443 Rop(OP_GE);
378cc40b 3444 s--;
79072805 3445 Rop(OP_GT);
378cc40b
LW
3446
3447 case '$':
bbce6d69 3448 CLINE;
3449
3280af22
NIS
3450 if (PL_expect == XOPERATOR) {
3451 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3452 PL_expect = XTERM;
a0d0e21e 3453 depcom();
bbce6d69 3454 return ','; /* grandfather non-comma-format format */
a0d0e21e 3455 }
8990e307 3456 }
a0d0e21e 3457
7e2040f0 3458 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3459 PL_tokenbuf[0] = '@';
376b8730
SM
3460 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3461 sizeof PL_tokenbuf - 1, FALSE);
3462 if (PL_expect == XOPERATOR)
3463 no_op("Array length", s);
3280af22 3464 if (!PL_tokenbuf[1])
a0d0e21e 3465 PREREF(DOLSHARP);
3280af22
NIS
3466 PL_expect = XOPERATOR;
3467 PL_pending_ident = '#';
463ee0b2 3468 TOKEN(DOLSHARP);
79072805 3469 }
bbce6d69 3470
3280af22 3471 PL_tokenbuf[0] = '$';
376b8730
SM
3472 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3473 sizeof PL_tokenbuf - 1, FALSE);
3474 if (PL_expect == XOPERATOR)
3475 no_op("Scalar", s);
3280af22
NIS
3476 if (!PL_tokenbuf[1]) {
3477 if (s == PL_bufend)
bbce6d69 3478 yyerror("Final $ should be \\$ or $name");
3479 PREREF('$');
8990e307 3480 }
a0d0e21e 3481
bbce6d69 3482 /* This kludge not intended to be bulletproof. */
3280af22 3483 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3484 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3485 newSViv(PL_compiling.cop_arybase));
bbce6d69 3486 yylval.opval->op_private = OPpCONST_ARYBASE;
3487 TERM(THING);
3488 }
3489
ff68c719 3490 d = s;
69d2bceb 3491 tmp = (I32)*s;
3280af22 3492 if (PL_lex_state == LEX_NORMAL)
ff68c719 3493 s = skipspace(s);
3494
3280af22 3495 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3496 char *t;
3497 if (*s == '[') {
3280af22 3498 PL_tokenbuf[0] = '@';
599cee73 3499 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3500 for(t = s + 1;
7e2040f0 3501 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3502 t++) ;
a0d0e21e 3503 if (*t++ == ',') {
3280af22
NIS
3504 PL_bufptr = skipspace(PL_bufptr);
3505 while (t < PL_bufend && *t != ']')
bbce6d69 3506 t++;
cea2e8a9 3507 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3508 "Multidimensional syntax %.*s not supported",
3509 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3510 }
3511 }
bbce6d69 3512 }
3513 else if (*s == '{') {
3280af22 3514 PL_tokenbuf[0] = '%';
599cee73 3515 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3516 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3517 {
3280af22 3518 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3519 STRLEN len;
3520 for (t++; isSPACE(*t); t++) ;
7e2040f0 3521 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3522 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3523 for (; isSPACE(*t); t++) ;
864dbfa3 3524 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3525 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3526 "You need to quote \"%s\"", tmpbuf);
748a9306 3527 }
93a17b20
LW
3528 }
3529 }
2f3197b3 3530 }
bbce6d69 3531
3280af22 3532 PL_expect = XOPERATOR;
69d2bceb 3533 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3534 bool islop = (PL_last_lop == PL_oldoldbufptr);
3535 if (!islop || PL_last_lop_op == OP_GREPSTART)
3536 PL_expect = XOPERATOR;
bbce6d69 3537 else if (strchr("$@\"'`q", *s))
3280af22 3538 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3539 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3540 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3541 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3542 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3543 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3544 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3545 /* binary operators exclude handle interpretations */
3546 switch (tmp) {
3547 case -KEY_x:
3548 case -KEY_eq:
3549 case -KEY_ne:
3550 case -KEY_gt:
3551 case -KEY_lt:
3552 case -KEY_ge:
3553 case -KEY_le:
3554 case -KEY_cmp:
3555 break;
3556 default:
3280af22 3557 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3558 break;
3559 }
3560 }
68dc0745 3561 else {
3562 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3563 if (gv && GvCVu(gv))
3280af22 3564 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3565 }
93a17b20 3566 }
bbce6d69 3567 else if (isDIGIT(*s))
3280af22 3568 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3569 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3570 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3571 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22