This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Retract #8970 because of a completely mysterious core dump:
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
bc89e66f 3 * Copyright (c) 1991-2001, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
4e553d73 42/* In variables name $^X, these are the legal values for X.
2b92dfce
GS
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
79072805
LW
72#ifdef ff_next
73#undef ff_next
d48672a2
LW
74#endif
75
a1a0e61e 76#ifdef USE_PURE_BISON
dba4d153
JH
77# ifndef YYMAXLEVEL
78# define YYMAXLEVEL 100
79# endif
20141f0e
RI
80YYSTYPE* yylval_pointer[YYMAXLEVEL];
81int* yychar_pointer[YYMAXLEVEL];
6f202aea 82int yyactlevel = -1;
22c35a8c
GS
83# undef yylval
84# undef yychar
20141f0e
RI
85# define yylval (*yylval_pointer[yyactlevel])
86# define yychar (*yychar_pointer[yyactlevel])
87# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 88# undef yylex
dba4d153 89# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
90#endif
91
79072805 92#include "keywords.h"
fe14fcc3 93
ffb4593c
NT
94/* CLINE is a macro that ensures PL_copline has a sane value */
95
ae986130
LW
96#ifdef CLINE
97#undef CLINE
98#endif
57843af0 99#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 100
ffb4593c
NT
101/*
102 * Convenience functions to return different tokens and prime the
9cbb5ea2 103 * lexer for the next token. They all take an argument.
ffb4593c
NT
104 *
105 * TOKEN : generic token (used for '(', DOLSHARP, etc)
106 * OPERATOR : generic operator
107 * AOPERATOR : assignment operator
108 * PREBLOCK : beginning the block after an if, while, foreach, ...
109 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
110 * PREREF : *EXPR where EXPR is not a simple identifier
111 * TERM : expression term
112 * LOOPX : loop exiting command (goto, last, dump, etc)
113 * FTST : file test operator
114 * FUN0 : zero-argument function
2d2e263d 115 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
116 * BOop : bitwise or or xor
117 * BAop : bitwise and
118 * SHop : shift operator
119 * PWop : power operator
9cbb5ea2 120 * PMop : pattern-matching operator
ffb4593c
NT
121 * Aop : addition-level operator
122 * Mop : multiplication-level operator
123 * Eop : equality-testing operator
e5edeb50 124 * Rop : relational operator <= != gt
ffb4593c
NT
125 *
126 * Also see LOP and lop() below.
127 */
128
998054bd
SC
129#ifdef DEBUGGING /* Serve -DT. */
130# define REPORT(x,retval) tokereport(x,s,(int)retval)
131# define REPORT2(x,retval) tokereport(x,s, yylval.ival)
132#else
133# define REPORT(x,retval) 1
134# define REPORT2(x,retval) 1
135#endif
136
137#define TOKEN(retval) return (REPORT2("token",retval), PL_bufptr = s,(int)retval)
138#define OPERATOR(retval) return (REPORT2("operator",retval), PL_expect = XTERM, PL_bufptr = s,(int)retval)
139#define AOPERATOR(retval) return ao((REPORT2("aop",retval), PL_expect = XTERM, PL_bufptr = s,(int)retval))
140#define PREBLOCK(retval) return (REPORT2("preblock",retval), PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
141#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval), PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
142#define PREREF(retval) return (REPORT2("preref",retval), PL_expect = XREF,PL_bufptr = s,(int)retval)
143#define TERM(retval) return (CLINE, REPORT2("term",retval), PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
144#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f), PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
145#define FTST(f) return(yylval.ival=f, REPORT("ftst",f), PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
146#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f), PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
147#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f), PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
148#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f), PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
149#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f), PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
150#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f), PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
151#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f), PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
152#define PMop(f) return(yylval.ival=f, REPORT("matchop",f), PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
153#define Aop(f) return ao((yylval.ival=f, REPORT("add",f), PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
154#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f), PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
155#define Eop(f) return(yylval.ival=f, REPORT("eq",f), PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
156#define Rop(f) return(yylval.ival=f, REPORT("rel",f), PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 157
a687059c
LW
158/* This bit of chicanery makes a unary function followed by
159 * a parenthesis into a function with one argument, highest precedence.
160 */
2f3197b3 161#define UNI(f) return(yylval.ival = f, \
998054bd 162 REPORT("uni",f), \
3280af22
NIS
163 PL_expect = XTERM, \
164 PL_bufptr = s, \
165 PL_last_uni = PL_oldbufptr, \
166 PL_last_lop_op = f, \
a687059c
LW
167 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
168
79072805 169#define UNIBRACK(f) return(yylval.ival = f, \
998054bd 170 REPORT("uni",f), \
3280af22
NIS
171 PL_bufptr = s, \
172 PL_last_uni = PL_oldbufptr, \
79072805
LW
173 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
174
9f68db38 175/* grandfather return to old style */
3280af22 176#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 177
2d00ba3b 178STATIC void
61b2116b 179S_tokereport(pTHX_ char *thing, char* s, I32 rv)
998054bd
SC
180{
181 SV *report;
182 DEBUG_T({
183 report = newSVpv(thing, 0);
61b2116b 184 Perl_sv_catpvf(aTHX_ report, ":line %i:%i:", CopLINE(PL_curcop), rv);
998054bd
SC
185
186 if (s - PL_bufptr > 0)
187 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
188 else {
189 if (PL_oldbufptr && *PL_oldbufptr)
190 sv_catpv(report, PL_tokenbuf);
191 }
192 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
193 })
194}
195
ffb4593c
NT
196/*
197 * S_ao
198 *
199 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
200 * into an OP_ANDASSIGN or OP_ORASSIGN
201 */
202
76e3520e 203STATIC int
cea2e8a9 204S_ao(pTHX_ int toketype)
a0d0e21e 205{
3280af22
NIS
206 if (*PL_bufptr == '=') {
207 PL_bufptr++;
a0d0e21e
LW
208 if (toketype == ANDAND)
209 yylval.ival = OP_ANDASSIGN;
210 else if (toketype == OROR)
211 yylval.ival = OP_ORASSIGN;
212 toketype = ASSIGNOP;
213 }
214 return toketype;
215}
216
ffb4593c
NT
217/*
218 * S_no_op
219 * When Perl expects an operator and finds something else, no_op
220 * prints the warning. It always prints "<something> found where
221 * operator expected. It prints "Missing semicolon on previous line?"
222 * if the surprise occurs at the start of the line. "do you need to
223 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
224 * where the compiler doesn't know if foo is a method call or a function.
225 * It prints "Missing operator before end of line" if there's nothing
226 * after the missing operator, or "... before <...>" if there is something
227 * after the missing operator.
228 */
229
76e3520e 230STATIC void
cea2e8a9 231S_no_op(pTHX_ char *what, char *s)
463ee0b2 232{
3280af22
NIS
233 char *oldbp = PL_bufptr;
234 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 235
1189a94a
GS
236 if (!s)
237 s = oldbp;
07c798fb 238 else
1189a94a 239 PL_bufptr = s;
cea2e8a9 240 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 241 if (is_first)
cea2e8a9 242 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 243 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 244 char *t;
7e2040f0 245 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 246 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 247 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 248 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 249 }
07c798fb
HS
250 else {
251 assert(s >= oldbp);
cea2e8a9 252 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 253 }
3280af22 254 PL_bufptr = oldbp;
8990e307
LW
255}
256
ffb4593c
NT
257/*
258 * S_missingterm
259 * Complain about missing quote/regexp/heredoc terminator.
260 * If it's called with (char *)NULL then it cauterizes the line buffer.
261 * If we're in a delimited string and the delimiter is a control
262 * character, it's reformatted into a two-char sequence like ^C.
263 * This is fatal.
264 */
265
76e3520e 266STATIC void
cea2e8a9 267S_missingterm(pTHX_ char *s)
8990e307
LW
268{
269 char tmpbuf[3];
270 char q;
271 if (s) {
272 char *nl = strrchr(s,'\n');
d2719217 273 if (nl)
8990e307
LW
274 *nl = '\0';
275 }
9d116dd7
JH
276 else if (
277#ifdef EBCDIC
278 iscntrl(PL_multi_close)
279#else
280 PL_multi_close < 32 || PL_multi_close == 127
281#endif
282 ) {
8990e307 283 *tmpbuf = '^';
3280af22 284 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
285 s = "\\n";
286 tmpbuf[2] = '\0';
287 s = tmpbuf;
288 }
289 else {
3280af22 290 *tmpbuf = PL_multi_close;
8990e307
LW
291 tmpbuf[1] = '\0';
292 s = tmpbuf;
293 }
294 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 295 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 296}
79072805 297
ffb4593c
NT
298/*
299 * Perl_deprecate
ffb4593c
NT
300 */
301
79072805 302void
864dbfa3 303Perl_deprecate(pTHX_ char *s)
a0d0e21e 304{
599cee73 305 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 306 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
307}
308
ffb4593c
NT
309/*
310 * depcom
9cbb5ea2 311 * Deprecate a comma-less variable list.
ffb4593c
NT
312 */
313
76e3520e 314STATIC void
cea2e8a9 315S_depcom(pTHX)
a0d0e21e
LW
316{
317 deprecate("comma-less variable list");
318}
319
ffb4593c 320/*
9cbb5ea2
GS
321 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
322 * utf16-to-utf8-reversed.
ffb4593c
NT
323 */
324
c39cd008
GS
325#ifdef PERL_CR_FILTER
326static void
327strip_return(SV *sv)
328{
329 register char *s = SvPVX(sv);
330 register char *e = s + SvCUR(sv);
331 /* outer loop optimized to do nothing if there are no CR-LFs */
332 while (s < e) {
333 if (*s++ == '\r' && *s == '\n') {
334 /* hit a CR-LF, need to copy the rest */
335 register char *d = s - 1;
336 *d++ = *s++;
337 while (s < e) {
338 if (*s == '\r' && s[1] == '\n')
339 s++;
340 *d++ = *s++;
341 }
342 SvCUR(sv) -= s - d;
343 return;
344 }
345 }
346}
a868473f 347
76e3520e 348STATIC I32
c39cd008 349S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 350{
c39cd008
GS
351 I32 count = FILTER_READ(idx+1, sv, maxlen);
352 if (count > 0 && !maxlen)
353 strip_return(sv);
354 return count;
a868473f
NIS
355}
356#endif
357
ffb4593c
NT
358/*
359 * Perl_lex_start
9cbb5ea2
GS
360 * Initialize variables. Uses the Perl save_stack to save its state (for
361 * recursive calls to the parser).
ffb4593c
NT
362 */
363
a0d0e21e 364void
864dbfa3 365Perl_lex_start(pTHX_ SV *line)
79072805 366{
8990e307
LW
367 char *s;
368 STRLEN len;
369
3280af22
NIS
370 SAVEI32(PL_lex_dojoin);
371 SAVEI32(PL_lex_brackets);
3280af22
NIS
372 SAVEI32(PL_lex_casemods);
373 SAVEI32(PL_lex_starts);
374 SAVEI32(PL_lex_state);
7766f137 375 SAVEVPTR(PL_lex_inpat);
3280af22 376 SAVEI32(PL_lex_inwhat);
18b09519
GS
377 if (PL_lex_state == LEX_KNOWNEXT) {
378 I32 toke = PL_nexttoke;
379 while (--toke >= 0) {
380 SAVEI32(PL_nexttype[toke]);
381 SAVEVPTR(PL_nextval[toke]);
382 }
383 SAVEI32(PL_nexttoke);
18b09519 384 }
57843af0 385 SAVECOPLINE(PL_curcop);
3280af22
NIS
386 SAVEPPTR(PL_bufptr);
387 SAVEPPTR(PL_bufend);
388 SAVEPPTR(PL_oldbufptr);
389 SAVEPPTR(PL_oldoldbufptr);
390 SAVEPPTR(PL_linestart);
391 SAVESPTR(PL_linestr);
392 SAVEPPTR(PL_lex_brackstack);
393 SAVEPPTR(PL_lex_casestack);
c76ac1ee 394 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
395 SAVESPTR(PL_lex_stuff);
396 SAVEI32(PL_lex_defer);
09bef843 397 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 398 SAVESPTR(PL_lex_repl);
bebdddfc
GS
399 SAVEINT(PL_expect);
400 SAVEINT(PL_lex_expect);
3280af22
NIS
401
402 PL_lex_state = LEX_NORMAL;
403 PL_lex_defer = 0;
404 PL_expect = XSTATE;
405 PL_lex_brackets = 0;
3280af22
NIS
406 New(899, PL_lex_brackstack, 120, char);
407 New(899, PL_lex_casestack, 12, char);
408 SAVEFREEPV(PL_lex_brackstack);
409 SAVEFREEPV(PL_lex_casestack);
410 PL_lex_casemods = 0;
411 *PL_lex_casestack = '\0';
412 PL_lex_dojoin = 0;
413 PL_lex_starts = 0;
414 PL_lex_stuff = Nullsv;
415 PL_lex_repl = Nullsv;
416 PL_lex_inpat = 0;
76be56bc 417 PL_nexttoke = 0;
3280af22 418 PL_lex_inwhat = 0;
09bef843 419 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
420 PL_linestr = line;
421 if (SvREADONLY(PL_linestr))
422 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
423 s = SvPV(PL_linestr, len);
8990e307 424 if (len && s[len-1] != ';') {
3280af22
NIS
425 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
426 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
427 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 428 }
3280af22
NIS
429 SvTEMP_off(PL_linestr);
430 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
431 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
432 SvREFCNT_dec(PL_rs);
79cb57f6 433 PL_rs = newSVpvn("\n", 1);
3280af22 434 PL_rsfp = 0;
79072805 435}
a687059c 436
ffb4593c
NT
437/*
438 * Perl_lex_end
9cbb5ea2
GS
439 * Finalizer for lexing operations. Must be called when the parser is
440 * done with the lexer.
ffb4593c
NT
441 */
442
463ee0b2 443void
864dbfa3 444Perl_lex_end(pTHX)
463ee0b2 445{
3280af22 446 PL_doextract = FALSE;
463ee0b2
LW
447}
448
ffb4593c
NT
449/*
450 * S_incline
451 * This subroutine has nothing to do with tilting, whether at windmills
452 * or pinball tables. Its name is short for "increment line". It
57843af0 453 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 454 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
455 * # line 500 "foo.pm"
456 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
457 */
458
76e3520e 459STATIC void
cea2e8a9 460S_incline(pTHX_ char *s)
463ee0b2
LW
461{
462 char *t;
463 char *n;
73659bf1 464 char *e;
463ee0b2 465 char ch;
463ee0b2 466
57843af0 467 CopLINE_inc(PL_curcop);
463ee0b2
LW
468 if (*s++ != '#')
469 return;
bf4acbe4 470 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
471 if (strnEQ(s, "line", 4))
472 s += 4;
473 else
474 return;
084592ab 475 if (SPACE_OR_TAB(*s))
73659bf1 476 s++;
4e553d73 477 else
73659bf1 478 return;
bf4acbe4 479 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
480 if (!isDIGIT(*s))
481 return;
482 n = s;
483 while (isDIGIT(*s))
484 s++;
bf4acbe4 485 while (SPACE_OR_TAB(*s))
463ee0b2 486 s++;
73659bf1 487 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 488 s++;
73659bf1
GS
489 e = t + 1;
490 }
463ee0b2 491 else {
463ee0b2 492 for (t = s; !isSPACE(*t); t++) ;
73659bf1 493 e = t;
463ee0b2 494 }
bf4acbe4 495 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
496 e++;
497 if (*e != '\n' && *e != '\0')
498 return; /* false alarm */
499
463ee0b2
LW
500 ch = *t;
501 *t = '\0';
f4dd75d9
GS
502 if (t - s > 0) {
503#ifdef USE_ITHREADS
504 Safefree(CopFILE(PL_curcop));
505#else
506 SvREFCNT_dec(CopFILEGV(PL_curcop));
507#endif
57843af0 508 CopFILE_set(PL_curcop, s);
f4dd75d9 509 }
463ee0b2 510 *t = ch;
57843af0 511 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
512}
513
ffb4593c
NT
514/*
515 * S_skipspace
516 * Called to gobble the appropriate amount and type of whitespace.
517 * Skips comments as well.
518 */
519
76e3520e 520STATIC char *
cea2e8a9 521S_skipspace(pTHX_ register char *s)
a687059c 522{
3280af22 523 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 524 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
525 s++;
526 return s;
527 }
528 for (;;) {
fd049845 529 STRLEN prevlen;
09bef843
SB
530 SSize_t oldprevlen, oldoldprevlen;
531 SSize_t oldloplen, oldunilen;
60e6418e
GS
532 while (s < PL_bufend && isSPACE(*s)) {
533 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
534 incline(s);
535 }
ffb4593c
NT
536
537 /* comment */
3280af22
NIS
538 if (s < PL_bufend && *s == '#') {
539 while (s < PL_bufend && *s != '\n')
463ee0b2 540 s++;
60e6418e 541 if (s < PL_bufend) {
463ee0b2 542 s++;
60e6418e
GS
543 if (PL_in_eval && !PL_rsfp) {
544 incline(s);
545 continue;
546 }
547 }
463ee0b2 548 }
ffb4593c
NT
549
550 /* only continue to recharge the buffer if we're at the end
551 * of the buffer, we're not reading from a source filter, and
552 * we're in normal lexing mode
553 */
09bef843
SB
554 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
555 PL_lex_state == LEX_FORMLINE)
463ee0b2 556 return s;
ffb4593c
NT
557
558 /* try to recharge the buffer */
9cbb5ea2
GS
559 if ((s = filter_gets(PL_linestr, PL_rsfp,
560 (prevlen = SvCUR(PL_linestr)))) == Nullch)
561 {
562 /* end of file. Add on the -p or -n magic */
3280af22
NIS
563 if (PL_minus_n || PL_minus_p) {
564 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
565 ";}continue{print or die qq(-p destination: $!\\n)" :
566 "");
3280af22
NIS
567 sv_catpv(PL_linestr,";}");
568 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
569 }
570 else
3280af22 571 sv_setpv(PL_linestr,";");
ffb4593c
NT
572
573 /* reset variables for next time we lex */
9cbb5ea2
GS
574 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
575 = SvPVX(PL_linestr);
3280af22 576 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
577
578 /* Close the filehandle. Could be from -P preprocessor,
579 * STDIN, or a regular file. If we were reading code from
580 * STDIN (because the commandline held no -e or filename)
581 * then we don't close it, we reset it so the code can
582 * read from STDIN too.
583 */
584
3280af22
NIS
585 if (PL_preprocess && !PL_in_eval)
586 (void)PerlProc_pclose(PL_rsfp);
587 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
588 PerlIO_clearerr(PL_rsfp);
8990e307 589 else
3280af22
NIS
590 (void)PerlIO_close(PL_rsfp);
591 PL_rsfp = Nullfp;
463ee0b2
LW
592 return s;
593 }
ffb4593c
NT
594
595 /* not at end of file, so we only read another line */
09bef843
SB
596 /* make corresponding updates to old pointers, for yyerror() */
597 oldprevlen = PL_oldbufptr - PL_bufend;
598 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
599 if (PL_last_uni)
600 oldunilen = PL_last_uni - PL_bufend;
601 if (PL_last_lop)
602 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
603 PL_linestart = PL_bufptr = s + prevlen;
604 PL_bufend = s + SvCUR(PL_linestr);
605 s = PL_bufptr;
09bef843
SB
606 PL_oldbufptr = s + oldprevlen;
607 PL_oldoldbufptr = s + oldoldprevlen;
608 if (PL_last_uni)
609 PL_last_uni = s + oldunilen;
610 if (PL_last_lop)
611 PL_last_lop = s + oldloplen;
a0d0e21e 612 incline(s);
ffb4593c
NT
613
614 /* debugger active and we're not compiling the debugger code,
615 * so store the line into the debugger's array of lines
616 */
3280af22 617 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
618 SV *sv = NEWSV(85,0);
619
620 sv_upgrade(sv, SVt_PVMG);
3280af22 621 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 622 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 623 }
463ee0b2 624 }
a687059c 625}
378cc40b 626
ffb4593c
NT
627/*
628 * S_check_uni
629 * Check the unary operators to ensure there's no ambiguity in how they're
630 * used. An ambiguous piece of code would be:
631 * rand + 5
632 * This doesn't mean rand() + 5. Because rand() is a unary operator,
633 * the +5 is its argument.
634 */
635
76e3520e 636STATIC void
cea2e8a9 637S_check_uni(pTHX)
ba106d47 638{
2f3197b3 639 char *s;
a0d0e21e 640 char *t;
2f3197b3 641
3280af22 642 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 643 return;
3280af22
NIS
644 while (isSPACE(*PL_last_uni))
645 PL_last_uni++;
7e2040f0 646 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 647 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 648 return;
0453d815 649 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 650 char ch = *s;
0453d815 651 *s = '\0';
4e553d73
NIS
652 Perl_warner(aTHX_ WARN_AMBIGUOUS,
653 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
654 PL_last_uni);
655 *s = ch;
656 }
2f3197b3
LW
657}
658
ffb4593c
NT
659/* workaround to replace the UNI() macro with a function. Only the
660 * hints/uts.sh file mentions this. Other comments elsewhere in the
661 * source indicate Microport Unix might need it too.
662 */
663
ffed7fef
LW
664#ifdef CRIPPLED_CC
665
666#undef UNI
ffed7fef 667#define UNI(f) return uni(f,s)
ffed7fef 668
76e3520e 669STATIC int
cea2e8a9 670S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
671{
672 yylval.ival = f;
3280af22
NIS
673 PL_expect = XTERM;
674 PL_bufptr = s;
8f872242
NIS
675 PL_last_uni = PL_oldbufptr;
676 PL_last_lop_op = f;
ffed7fef
LW
677 if (*s == '(')
678 return FUNC1;
679 s = skipspace(s);
680 if (*s == '(')
681 return FUNC1;
682 else
683 return UNIOP;
684}
685
a0d0e21e
LW
686#endif /* CRIPPLED_CC */
687
ffb4593c
NT
688/*
689 * LOP : macro to build a list operator. Its behaviour has been replaced
690 * with a subroutine, S_lop() for which LOP is just another name.
691 */
692
a0d0e21e
LW
693#define LOP(f,x) return lop(f,x,s)
694
ffb4593c
NT
695/*
696 * S_lop
697 * Build a list operator (or something that might be one). The rules:
698 * - if we have a next token, then it's a list operator [why?]
699 * - if the next thing is an opening paren, then it's a function
700 * - else it's a list operator
701 */
702
76e3520e 703STATIC I32
a0be28da 704S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 705{
79072805 706 yylval.ival = f;
35c8bce7 707 CLINE;
998054bd 708 REPORT("lop", f);
3280af22
NIS
709 PL_expect = x;
710 PL_bufptr = s;
711 PL_last_lop = PL_oldbufptr;
712 PL_last_lop_op = f;
713 if (PL_nexttoke)
a0d0e21e 714 return LSTOP;
79072805
LW
715 if (*s == '(')
716 return FUNC;
717 s = skipspace(s);
718 if (*s == '(')
719 return FUNC;
720 else
721 return LSTOP;
722}
723
ffb4593c
NT
724/*
725 * S_force_next
9cbb5ea2 726 * When the lexer realizes it knows the next token (for instance,
ffb4593c 727 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
728 * to know what token to return the next time the lexer is called. Caller
729 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
730 * handles the token correctly.
ffb4593c
NT
731 */
732
4e553d73 733STATIC void
cea2e8a9 734S_force_next(pTHX_ I32 type)
79072805 735{
3280af22
NIS
736 PL_nexttype[PL_nexttoke] = type;
737 PL_nexttoke++;
738 if (PL_lex_state != LEX_KNOWNEXT) {
739 PL_lex_defer = PL_lex_state;
740 PL_lex_expect = PL_expect;
741 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
742 }
743}
744
ffb4593c
NT
745/*
746 * S_force_word
747 * When the lexer knows the next thing is a word (for instance, it has
748 * just seen -> and it knows that the next char is a word char, then
749 * it calls S_force_word to stick the next word into the PL_next lookahead.
750 *
751 * Arguments:
b1b65b59 752 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
753 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
754 * int check_keyword : if true, Perl checks to make sure the word isn't
755 * a keyword (do this if the word is a label, e.g. goto FOO)
756 * int allow_pack : if true, : characters will also be allowed (require,
757 * use, etc. do this)
9cbb5ea2 758 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
759 */
760
76e3520e 761STATIC char *
cea2e8a9 762S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 763{
463ee0b2
LW
764 register char *s;
765 STRLEN len;
4e553d73 766
463ee0b2
LW
767 start = skipspace(start);
768 s = start;
7e2040f0 769 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 770 (allow_pack && *s == ':') ||
15f0808c 771 (allow_initial_tick && *s == '\'') )
a0d0e21e 772 {
3280af22
NIS
773 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
774 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
775 return start;
776 if (token == METHOD) {
777 s = skipspace(s);
778 if (*s == '(')
3280af22 779 PL_expect = XTERM;
463ee0b2 780 else {
3280af22 781 PL_expect = XOPERATOR;
463ee0b2 782 }
79072805 783 }
3280af22
NIS
784 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
785 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
786 force_next(token);
787 }
788 return s;
789}
790
ffb4593c
NT
791/*
792 * S_force_ident
9cbb5ea2 793 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
794 * text only contains the "foo" portion. The first argument is a pointer
795 * to the "foo", and the second argument is the type symbol to prefix.
796 * Forces the next token to be a "WORD".
9cbb5ea2 797 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
798 */
799
76e3520e 800STATIC void
cea2e8a9 801S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
802{
803 if (s && *s) {
11343788 804 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 805 PL_nextval[PL_nexttoke].opval = o;
79072805 806 force_next(WORD);
748a9306 807 if (kind) {
11343788 808 o->op_private = OPpCONST_ENTERED;
55497cff
PP
809 /* XXX see note in pp_entereval() for why we forgo typo
810 warnings if the symbol must be introduced in an eval.
811 GSAR 96-10-12 */
3280af22 812 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
813 kind == '$' ? SVt_PV :
814 kind == '@' ? SVt_PVAV :
815 kind == '%' ? SVt_PVHV :
816 SVt_PVGV
817 );
748a9306 818 }
79072805
LW
819 }
820}
821
1571675a
GS
822NV
823Perl_str_to_version(pTHX_ SV *sv)
824{
825 NV retval = 0.0;
826 NV nshift = 1.0;
827 STRLEN len;
828 char *start = SvPVx(sv,len);
3aa33fe5 829 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
830 char *end = start + len;
831 while (start < end) {
ba210ebe 832 STRLEN skip;
1571675a
GS
833 UV n;
834 if (utf)
dcad2880 835 n = utf8_to_uv((U8*)start, len, &skip, 0);
1571675a
GS
836 else {
837 n = *(U8*)start;
838 skip = 1;
839 }
840 retval += ((NV)n)/nshift;
841 start += skip;
842 nshift *= 1000;
843 }
844 return retval;
845}
846
4e553d73 847/*
ffb4593c
NT
848 * S_force_version
849 * Forces the next token to be a version number.
850 */
851
76e3520e 852STATIC char *
cea2e8a9 853S_force_version(pTHX_ char *s)
89bfa8cd
PP
854{
855 OP *version = Nullop;
44dcb63b 856 char *d;
89bfa8cd
PP
857
858 s = skipspace(s);
859
44dcb63b 860 d = s;
dd629d5b 861 if (*d == 'v')
44dcb63b 862 d++;
44dcb63b 863 if (isDIGIT(*d)) {
a7cb1f99 864 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 865 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 866 SV *ver;
b73d6f50 867 s = scan_num(s, &yylval);
89bfa8cd 868 version = yylval.opval;
dd629d5b
GS
869 ver = cSVOPx(version)->op_sv;
870 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 871 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
872 SvNVX(ver) = str_to_version(ver);
873 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 874 }
89bfa8cd
PP
875 }
876 }
877
878 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 879 PL_nextval[PL_nexttoke].opval = version;
4e553d73 880 force_next(WORD);
89bfa8cd
PP
881
882 return (s);
883}
884
ffb4593c
NT
885/*
886 * S_tokeq
887 * Tokenize a quoted string passed in as an SV. It finds the next
888 * chunk, up to end of string or a backslash. It may make a new
889 * SV containing that chunk (if HINT_NEW_STRING is on). It also
890 * turns \\ into \.
891 */
892
76e3520e 893STATIC SV *
cea2e8a9 894S_tokeq(pTHX_ SV *sv)
79072805
LW
895{
896 register char *s;
897 register char *send;
898 register char *d;
b3ac6de7
IZ
899 STRLEN len = 0;
900 SV *pv = sv;
79072805
LW
901
902 if (!SvLEN(sv))
b3ac6de7 903 goto finish;
79072805 904
a0d0e21e 905 s = SvPV_force(sv, len);
21a311ee 906 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 907 goto finish;
463ee0b2 908 send = s + len;
79072805
LW
909 while (s < send && *s != '\\')
910 s++;
911 if (s == send)
b3ac6de7 912 goto finish;
79072805 913 d = s;
3280af22 914 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 915 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
916 while (s < send) {
917 if (*s == '\\') {
a0d0e21e 918 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
919 s++; /* all that, just for this */
920 }
921 *d++ = *s++;
922 }
923 *d = '\0';
463ee0b2 924 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 925 finish:
3280af22 926 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 927 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
928 return sv;
929}
930
ffb4593c
NT
931/*
932 * Now come three functions related to double-quote context,
933 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
934 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
935 * interact with PL_lex_state, and create fake ( ... ) argument lists
936 * to handle functions and concatenation.
937 * They assume that whoever calls them will be setting up a fake
938 * join call, because each subthing puts a ',' after it. This lets
939 * "lower \luPpEr"
940 * become
941 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
942 *
943 * (I'm not sure whether the spurious commas at the end of lcfirst's
944 * arguments and join's arguments are created or not).
945 */
946
947/*
948 * S_sublex_start
949 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
950 *
951 * Pattern matching will set PL_lex_op to the pattern-matching op to
952 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
953 *
954 * OP_CONST and OP_READLINE are easy--just make the new op and return.
955 *
956 * Everything else becomes a FUNC.
957 *
958 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
959 * had an OP_CONST or OP_READLINE). This just sets us up for a
960 * call to S_sublex_push().
961 */
962
76e3520e 963STATIC I32
cea2e8a9 964S_sublex_start(pTHX)
79072805
LW
965{
966 register I32 op_type = yylval.ival;
79072805
LW
967
968 if (op_type == OP_NULL) {
3280af22
NIS
969 yylval.opval = PL_lex_op;
970 PL_lex_op = Nullop;
79072805
LW
971 return THING;
972 }
973 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 974 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
975
976 if (SvTYPE(sv) == SVt_PVIV) {
977 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
978 STRLEN len;
979 char *p;
980 SV *nsv;
981
982 p = SvPV(sv, len);
79cb57f6 983 nsv = newSVpvn(p, len);
01ec43d0
GS
984 if (SvUTF8(sv))
985 SvUTF8_on(nsv);
b3ac6de7
IZ
986 SvREFCNT_dec(sv);
987 sv = nsv;
4e553d73 988 }
b3ac6de7 989 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 990 PL_lex_stuff = Nullsv;
79072805
LW
991 return THING;
992 }
993
3280af22
NIS
994 PL_sublex_info.super_state = PL_lex_state;
995 PL_sublex_info.sub_inwhat = op_type;
996 PL_sublex_info.sub_op = PL_lex_op;
997 PL_lex_state = LEX_INTERPPUSH;
55497cff 998
3280af22
NIS
999 PL_expect = XTERM;
1000 if (PL_lex_op) {
1001 yylval.opval = PL_lex_op;
1002 PL_lex_op = Nullop;
55497cff
PP
1003 return PMFUNC;
1004 }
1005 else
1006 return FUNC;
1007}
1008
ffb4593c
NT
1009/*
1010 * S_sublex_push
1011 * Create a new scope to save the lexing state. The scope will be
1012 * ended in S_sublex_done. Returns a '(', starting the function arguments
1013 * to the uc, lc, etc. found before.
1014 * Sets PL_lex_state to LEX_INTERPCONCAT.
1015 */
1016
76e3520e 1017STATIC I32
cea2e8a9 1018S_sublex_push(pTHX)
55497cff 1019{
f46d017c 1020 ENTER;
55497cff 1021
3280af22
NIS
1022 PL_lex_state = PL_sublex_info.super_state;
1023 SAVEI32(PL_lex_dojoin);
1024 SAVEI32(PL_lex_brackets);
3280af22
NIS
1025 SAVEI32(PL_lex_casemods);
1026 SAVEI32(PL_lex_starts);
1027 SAVEI32(PL_lex_state);
7766f137 1028 SAVEVPTR(PL_lex_inpat);
3280af22 1029 SAVEI32(PL_lex_inwhat);
57843af0 1030 SAVECOPLINE(PL_curcop);
3280af22
NIS
1031 SAVEPPTR(PL_bufptr);
1032 SAVEPPTR(PL_oldbufptr);
1033 SAVEPPTR(PL_oldoldbufptr);
1034 SAVEPPTR(PL_linestart);
1035 SAVESPTR(PL_linestr);
1036 SAVEPPTR(PL_lex_brackstack);
1037 SAVEPPTR(PL_lex_casestack);
1038
1039 PL_linestr = PL_lex_stuff;
1040 PL_lex_stuff = Nullsv;
1041
9cbb5ea2
GS
1042 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1043 = SvPVX(PL_linestr);
3280af22
NIS
1044 PL_bufend += SvCUR(PL_linestr);
1045 SAVEFREESV(PL_linestr);
1046
1047 PL_lex_dojoin = FALSE;
1048 PL_lex_brackets = 0;
3280af22
NIS
1049 New(899, PL_lex_brackstack, 120, char);
1050 New(899, PL_lex_casestack, 12, char);
1051 SAVEFREEPV(PL_lex_brackstack);
1052 SAVEFREEPV(PL_lex_casestack);
1053 PL_lex_casemods = 0;
1054 *PL_lex_casestack = '\0';
1055 PL_lex_starts = 0;
1056 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1057 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1058
1059 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1060 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1061 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1062 else
3280af22 1063 PL_lex_inpat = Nullop;
79072805 1064
55497cff 1065 return '(';
79072805
LW
1066}
1067
ffb4593c
NT
1068/*
1069 * S_sublex_done
1070 * Restores lexer state after a S_sublex_push.
1071 */
1072
76e3520e 1073STATIC I32
cea2e8a9 1074S_sublex_done(pTHX)
79072805 1075{
3280af22 1076 if (!PL_lex_starts++) {
9aa983d2
JH
1077 SV *sv = newSVpvn("",0);
1078 if (SvUTF8(PL_linestr))
1079 SvUTF8_on(sv);
3280af22 1080 PL_expect = XOPERATOR;
9aa983d2 1081 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1082 return THING;
1083 }
1084
3280af22
NIS
1085 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1086 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1087 return yylex();
79072805
LW
1088 }
1089
ffb4593c 1090 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1091 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1092 PL_linestr = PL_lex_repl;
1093 PL_lex_inpat = 0;
1094 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1095 PL_bufend += SvCUR(PL_linestr);
1096 SAVEFREESV(PL_linestr);
1097 PL_lex_dojoin = FALSE;
1098 PL_lex_brackets = 0;
3280af22
NIS
1099 PL_lex_casemods = 0;
1100 *PL_lex_casestack = '\0';
1101 PL_lex_starts = 0;
25da4f38 1102 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1103 PL_lex_state = LEX_INTERPNORMAL;
1104 PL_lex_starts++;
e9fa98b2
HS
1105 /* we don't clear PL_lex_repl here, so that we can check later
1106 whether this is an evalled subst; that means we rely on the
1107 logic to ensure sublex_done() is called again only via the
1108 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1109 }
e9fa98b2 1110 else {
3280af22 1111 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1112 PL_lex_repl = Nullsv;
1113 }
79072805 1114 return ',';
ffed7fef
LW
1115 }
1116 else {
f46d017c 1117 LEAVE;
3280af22
NIS
1118 PL_bufend = SvPVX(PL_linestr);
1119 PL_bufend += SvCUR(PL_linestr);
1120 PL_expect = XOPERATOR;
09bef843 1121 PL_sublex_info.sub_inwhat = 0;
79072805 1122 return ')';
ffed7fef
LW
1123 }
1124}
1125
02aa26ce
NT
1126/*
1127 scan_const
1128
1129 Extracts a pattern, double-quoted string, or transliteration. This
1130 is terrifying code.
1131
3280af22
NIS
1132 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1133 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1134 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1135
9b599b2a
GS
1136 Returns a pointer to the character scanned up to. Iff this is
1137 advanced from the start pointer supplied (ie if anything was
1138 successfully parsed), will leave an OP for the substring scanned
1139 in yylval. Caller must intuit reason for not parsing further
1140 by looking at the next characters herself.
1141
02aa26ce
NT
1142 In patterns:
1143 backslashes:
1144 double-quoted style: \r and \n
1145 regexp special ones: \D \s
1146 constants: \x3
1147 backrefs: \1 (deprecated in substitution replacements)
1148 case and quoting: \U \Q \E
1149 stops on @ and $, but not for $ as tail anchor
1150
1151 In transliterations:
1152 characters are VERY literal, except for - not at the start or end
1153 of the string, which indicates a range. scan_const expands the
1154 range to the full set of intermediate characters.
1155
1156 In double-quoted strings:
1157 backslashes:
1158 double-quoted style: \r and \n
1159 constants: \x3
1160 backrefs: \1 (deprecated)
1161 case and quoting: \U \Q \E
1162 stops on @ and $
1163
1164 scan_const does *not* construct ops to handle interpolated strings.
1165 It stops processing as soon as it finds an embedded $ or @ variable
1166 and leaves it to the caller to work out what's going on.
1167
1168 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1169
1170 $ in pattern could be $foo or could be tail anchor. Assumption:
1171 it's a tail anchor if $ is the last thing in the string, or if it's
1172 followed by one of ")| \n\t"
1173
1174 \1 (backreferences) are turned into $1
1175
1176 The structure of the code is
1177 while (there's a character to process) {
1178 handle transliteration ranges
1179 skip regexp comments
1180 skip # initiated comments in //x patterns
1181 check for embedded @foo
1182 check for embedded scalars
1183 if (backslash) {
1184 leave intact backslashes from leave (below)
1185 deprecate \1 in strings and sub replacements
1186 handle string-changing backslashes \l \U \Q \E, etc.
1187 switch (what was escaped) {
1188 handle - in a transliteration (becomes a literal -)
1189 handle \132 octal characters
1190 handle 0x15 hex characters
1191 handle \cV (control V)
1192 handle printf backslashes (\f, \r, \n, etc)
1193 } (end switch)
1194 } (end if backslash)
1195 } (end while character to read)
4e553d73 1196
02aa26ce
NT
1197*/
1198
76e3520e 1199STATIC char *
cea2e8a9 1200S_scan_const(pTHX_ char *start)
79072805 1201{
3280af22 1202 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1203 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1204 register char *s = start; /* start of the constant */
1205 register char *d = SvPVX(sv); /* destination for copies */
1206 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1207 bool didrange = FALSE; /* did we just finish a range? */
9aa983d2
JH
1208 bool has_utf8 = (PL_linestr && SvUTF8(PL_linestr));
1209 /* the constant is UTF8 */
012bcf8d
GS
1210 UV uv;
1211
ac2262e3 1212 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1213 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1214 : UTF;
89491803 1215 I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1216 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1217 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1218 : UTF;
dff6d3cd 1219 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1220 PL_lex_inpat
4a2d328f 1221 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1222 : "";
79072805
LW
1223
1224 while (s < send || dorange) {
02aa26ce 1225 /* get transliterations out of the way (they're most literal) */
3280af22 1226 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1227 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1228 if (dorange) {
1ba5c669
JH
1229 I32 i; /* current expanded character */
1230 I32 min; /* first character in range */
1231 I32 max; /* last character in range */
02aa26ce
NT
1232
1233 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1234 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1235 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1236 d -= 2; /* eat the first char and the - */
1237
8ada0baa
JH
1238 min = (U8)*d; /* first char in range */
1239 max = (U8)d[1]; /* last char in range */
1240
c2e66d9e 1241 if (min > max) {
01ec43d0 1242 Perl_croak(aTHX_
1ba5c669
JH
1243 "Invalid [] range \"%c-%c\" in transliteration operator",
1244 (char)min, (char)max);
c2e66d9e
GS
1245 }
1246
1ba5c669 1247#ifndef ASCIIish
8ada0baa
JH
1248 if ((isLOWER(min) && isLOWER(max)) ||
1249 (isUPPER(min) && isUPPER(max))) {
1250 if (isLOWER(min)) {
1251 for (i = min; i <= max; i++)
1252 if (isLOWER(i))
1253 *d++ = i;
1254 } else {
1255 for (i = min; i <= max; i++)
1256 if (isUPPER(i))
1257 *d++ = i;
1258 }
1259 }
1260 else
1261#endif
1262 for (i = min; i <= max; i++)
1263 *d++ = i;
02aa26ce
NT
1264
1265 /* mark the range as done, and continue */
79072805 1266 dorange = FALSE;
01ec43d0 1267 didrange = TRUE;
79072805 1268 continue;
4e553d73 1269 }
02aa26ce
NT
1270
1271 /* range begins (ignore - as first or last char) */
79072805 1272 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1273 if (didrange) {
1fafa243 1274 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1275 }
a0ed51b3 1276 if (utf) {
a176fa2a 1277 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1278 s++;
1279 continue;
1280 }
79072805
LW
1281 dorange = TRUE;
1282 s++;
01ec43d0
GS
1283 }
1284 else {
1285 didrange = FALSE;
1286 }
79072805 1287 }
02aa26ce
NT
1288
1289 /* if we get here, we're not doing a transliteration */
1290
0f5d15d6
IZ
1291 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1292 except for the last char, which will be done separately. */
3280af22 1293 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1294 if (s[2] == '#') {
1295 while (s < send && *s != ')')
1296 *d++ = *s++;
155aba94
GS
1297 }
1298 else if (s[2] == '{' /* This should match regcomp.c */
1299 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1300 {
cc6b7395 1301 I32 count = 1;
0f5d15d6 1302 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1303 char c;
1304
d9f97599
GS
1305 while (count && (c = *regparse)) {
1306 if (c == '\\' && regparse[1])
1307 regparse++;
4e553d73 1308 else if (c == '{')
cc6b7395 1309 count++;
4e553d73 1310 else if (c == '}')
cc6b7395 1311 count--;
d9f97599 1312 regparse++;
cc6b7395 1313 }
5bdf89e7
IZ
1314 if (*regparse != ')') {
1315 regparse--; /* Leave one char for continuation. */
cc6b7395 1316 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1317 }
0f5d15d6 1318 while (s < regparse)
cc6b7395
IZ
1319 *d++ = *s++;
1320 }
748a9306 1321 }
02aa26ce
NT
1322
1323 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1324 else if (*s == '#' && PL_lex_inpat &&
1325 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1326 while (s+1 < send && *s != '\n')
1327 *d++ = *s++;
1328 }
02aa26ce 1329
5d1d4326
JH
1330 /* check for embedded arrays
1331 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1332 */
7e2040f0 1333 else if (*s == '@' && s[1]
5d1d4326 1334 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1335 break;
02aa26ce
NT
1336
1337 /* check for embedded scalars. only stop if we're sure it's a
1338 variable.
1339 */
79072805 1340 else if (*s == '$') {
3280af22 1341 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1342 break;
c277df42 1343 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1344 break; /* in regexp, $ might be tail anchor */
1345 }
02aa26ce
NT
1346
1347 /* backslashes */
79072805
LW
1348 if (*s == '\\' && s+1 < send) {
1349 s++;
02aa26ce
NT
1350
1351 /* some backslashes we leave behind */
c9f97d15 1352 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1353 *d++ = '\\';
1354 *d++ = *s++;
1355 continue;
1356 }
02aa26ce
NT
1357
1358 /* deprecate \1 in strings and substitution replacements */
3280af22 1359 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1360 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1361 {
599cee73 1362 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1363 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1364 *--s = '$';
1365 break;
1366 }
02aa26ce
NT
1367
1368 /* string-change backslash escapes */
3280af22 1369 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1370 --s;
1371 break;
1372 }
02aa26ce
NT
1373
1374 /* if we get here, it's either a quoted -, or a digit */
79072805 1375 switch (*s) {
02aa26ce
NT
1376
1377 /* quoted - in transliterations */
79072805 1378 case '-':
3280af22 1379 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1380 *d++ = *s++;
1381 continue;
1382 }
1383 /* FALL THROUGH */
1384 default:
11b8faa4 1385 {
7e84c16c 1386 if (ckWARN(WARN_MISC) && isALNUM(*s))
4e553d73 1387 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1388 "Unrecognized escape \\%c passed through",
1389 *s);
1390 /* default action is to copy the quoted character */
f9a63242 1391 goto default_action;
11b8faa4 1392 }
02aa26ce
NT
1393
1394 /* \132 indicates an octal constant */
79072805
LW
1395 case '0': case '1': case '2': case '3':
1396 case '4': case '5': case '6': case '7':
ba210ebe
JH
1397 {
1398 STRLEN len = 0; /* disallow underscores */
1399 uv = (UV)scan_oct(s, 3, &len);
1400 s += len;
1401 }
012bcf8d 1402 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1403
1404 /* \x24 indicates a hex constant */
79072805 1405 case 'x':
a0ed51b3
LW
1406 ++s;
1407 if (*s == '{') {
1408 char* e = strchr(s, '}');
adaeee49 1409 if (!e) {
a0ed51b3 1410 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1411 e = s;
1412 }
89491803 1413 else {
ba210ebe
JH
1414 STRLEN len = 1; /* allow underscores */
1415 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1416 }
1417 s = e + 1;
a0ed51b3
LW
1418 }
1419 else {
ba210ebe
JH
1420 {
1421 STRLEN len = 0; /* disallow underscores */
1422 uv = (UV)scan_hex(s, 2, &len);
1423 s += len;
1424 }
012bcf8d
GS
1425 }
1426
1427 NUM_ESCAPE_INSERT:
1428 /* Insert oct or hex escaped character.
301d3d20
JH
1429 * There will always enough room in sv since such
1430 * escapes will be longer than any UT-F8 sequence
1431 * they can end up as. */
ba7cea30
JH
1432
1433 /* This spot is wrong for EBCDIC. Characters like
1434 * the lowercase letters and digits are >127 in EBCDIC,
1435 * so here they would need to be mapped to the Unicode
1436 * repertoire. --jhi */
1437
7948272d 1438 if (uv > 127) {
9aa983d2 1439 if (!has_utf8 && uv > 255) {
301d3d20
JH
1440 /* Might need to recode whatever we have
1441 * accumulated so far if it contains any
1442 * hibit chars.
1443 *
1444 * (Can't we keep track of that and avoid
1445 * this rescan? --jhi)
012bcf8d
GS
1446 */
1447 int hicount = 0;
1448 char *c;
301d3d20 1449
012bcf8d 1450 for (c = SvPVX(sv); c < d; c++) {
fd400ab9 1451 if (UTF8_IS_CONTINUED(*c))
012bcf8d
GS
1452 hicount++;
1453 }
1454 if (hicount) {
1455 char *old_pvx = SvPVX(sv);
1456 char *src, *dst;
301d3d20
JH
1457
1458 d = SvGROW(sv,
1459 SvCUR(sv) + hicount + 1) +
1460 (d - old_pvx);
012bcf8d
GS
1461
1462 src = d - 1;
1463 d += hicount;
1464 dst = d - 1;
1465
1466 while (src < dst) {
fd400ab9 1467 if (UTF8_IS_CONTINUED(*src)) {
9b877dbb
IH
1468 *dst-- = UTF8_EIGHT_BIT_LO(*src);
1469 *dst-- = UTF8_EIGHT_BIT_HI(*src--);
012bcf8d
GS
1470 }
1471 else {
1472 *dst-- = *src--;
1473 }
1474 }
1475 }
1476 }
1477
9aa983d2 1478 if (has_utf8 || uv > 255) {
012bcf8d 1479 d = (char*)uv_to_utf8((U8*)d, uv);
4e553d73 1480 has_utf8 = TRUE;
f9a63242
JH
1481 if (PL_lex_inwhat == OP_TRANS &&
1482 PL_sublex_info.sub_op) {
1483 PL_sublex_info.sub_op->op_private |=
1484 (PL_lex_repl ? OPpTRANS_FROM_UTF
1485 : OPpTRANS_TO_UTF);
1486 utf = TRUE;
1487 }
012bcf8d 1488 }
a0ed51b3 1489 else {
012bcf8d 1490 *d++ = (char)uv;
a0ed51b3 1491 }
012bcf8d
GS
1492 }
1493 else {
1494 *d++ = (char)uv;
a0ed51b3 1495 }
79072805 1496 continue;
02aa26ce 1497
4a2d328f
IZ
1498 /* \N{latin small letter a} is a named character */
1499 case 'N':
423cee85
JH
1500 ++s;
1501 if (*s == '{') {
1502 char* e = strchr(s, '}');
155aba94 1503 SV *res;
423cee85
JH
1504 STRLEN len;
1505 char *str;
4e553d73 1506
423cee85 1507 if (!e) {
5777a3f7 1508 yyerror("Missing right brace on \\N{}");
423cee85
JH
1509 e = s - 1;
1510 goto cont_scan;
1511 }
1512 res = newSVpvn(s + 1, e - s - 1);
4e553d73 1513 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1514 res, Nullsv, "\\N{...}" );
f9a63242
JH
1515 if (has_utf8)
1516 sv_utf8_upgrade(res);
423cee85 1517 str = SvPV(res,len);
89491803 1518 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1519 char *ostart = SvPVX(sv);
1520 SvCUR_set(sv, d - ostart);
1521 SvPOK_on(sv);
e4f3eed8 1522 *d = '\0';
f08d6ad9 1523 sv_utf8_upgrade(sv);
d2f449dd
SB
1524 /* this just broke our allocation above... */
1525 SvGROW(sv, send - start);
f08d6ad9 1526 d = SvPVX(sv) + SvCUR(sv);
89491803 1527 has_utf8 = TRUE;
f08d6ad9 1528 }
423cee85
JH
1529 if (len > e - s + 4) {
1530 char *odest = SvPVX(sv);
1531
1532 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1533 d = SvPVX(sv) + (d - odest);
1534 }
1535 Copy(str, d, len, char);
1536 d += len;
1537 SvREFCNT_dec(res);
1538 cont_scan:
1539 s = e + 1;
1540 }
1541 else
5777a3f7 1542 yyerror("Missing braces on \\N{}");
423cee85
JH
1543 continue;
1544
02aa26ce 1545 /* \c is a control character */
79072805
LW
1546 case 'c':
1547 s++;
9d116dd7
JH
1548#ifdef EBCDIC
1549 *d = *s++;
1550 if (isLOWER(*d))
1551 *d = toUPPER(*d);
4e553d73 1552 *d = toCTRL(*d);
774a9426 1553 d++;
9d116dd7 1554#else
ba210ebe
JH
1555 {
1556 U8 c = *s++;
1557 *d++ = toCTRL(c);
1558 }
9d116dd7 1559#endif
79072805 1560 continue;
02aa26ce
NT
1561
1562 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1563 case 'b':
1564 *d++ = '\b';
1565 break;
1566 case 'n':
1567 *d++ = '\n';
1568 break;
1569 case 'r':
1570 *d++ = '\r';
1571 break;
1572 case 'f':
1573 *d++ = '\f';
1574 break;
1575 case 't':
1576 *d++ = '\t';
1577 break;
34a3fe2a
PP
1578#ifdef EBCDIC
1579 case 'e':
1580 *d++ = '\047'; /* CP 1047 */
1581 break;
1582 case 'a':
1583 *d++ = '\057'; /* CP 1047 */
1584 break;
1585#else
79072805
LW
1586 case 'e':
1587 *d++ = '\033';
1588 break;
1589 case 'a':
1590 *d++ = '\007';
1591 break;
34a3fe2a 1592#endif
02aa26ce
NT
1593 } /* end switch */
1594
79072805
LW
1595 s++;
1596 continue;
02aa26ce
NT
1597 } /* end if (backslash) */
1598
f9a63242 1599 default_action:
fd400ab9 1600 if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
a5a960be
RI
1601 STRLEN len = (STRLEN) -1;
1602 UV uv;
1603 if (this_utf8) {
a0dbb045 1604 uv = utf8_to_uv((U8*)s, send - s, &len, 0);
a5a960be
RI
1605 }
1606 if (len == (STRLEN)-1) {
1607 /* Illegal UTF8 (a high-bit byte), make it valid. */
1608 char *old_pvx = SvPVX(sv);
1609 /* need space for one extra char (NOTE: SvCUR() not set here) */
1610 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
1611 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1612 }
1613 else {
1614 while (len--)
1615 *d++ = *s++;
1616 }
1617 has_utf8 = TRUE;
f9a63242
JH
1618 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1619 PL_sublex_info.sub_op->op_private |=
1620 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1621 utf = TRUE;
1622 }
a5a960be
RI
1623 continue;
1624 }
1625
f9a63242 1626 *d++ = *s++;
02aa26ce
NT
1627 } /* while loop to process each character */
1628
1629 /* terminate the string and set up the sv */
79072805 1630 *d = '\0';
463ee0b2 1631 SvCUR_set(sv, d - SvPVX(sv));
79072805 1632 SvPOK_on(sv);
89491803 1633 if (has_utf8)
7e2040f0 1634 SvUTF8_on(sv);
79072805 1635
02aa26ce 1636 /* shrink the sv if we allocated more than we used */
79072805
LW
1637 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1638 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1639 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1640 }
02aa26ce 1641
9b599b2a 1642 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1643 if (s > PL_bufptr) {
1644 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1645 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1646 sv, Nullsv,
4e553d73 1647 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1648 ? "tr"
3280af22 1649 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1650 ? "s"
1651 : "qq")));
79072805 1652 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1653 } else
8990e307 1654 SvREFCNT_dec(sv);
79072805
LW
1655 return s;
1656}
1657
ffb4593c
NT
1658/* S_intuit_more
1659 * Returns TRUE if there's more to the expression (e.g., a subscript),
1660 * FALSE otherwise.
ffb4593c
NT
1661 *
1662 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1663 *
1664 * ->[ and ->{ return TRUE
1665 * { and [ outside a pattern are always subscripts, so return TRUE
1666 * if we're outside a pattern and it's not { or [, then return FALSE
1667 * if we're in a pattern and the first char is a {
1668 * {4,5} (any digits around the comma) returns FALSE
1669 * if we're in a pattern and the first char is a [
1670 * [] returns FALSE
1671 * [SOMETHING] has a funky algorithm to decide whether it's a
1672 * character class or not. It has to deal with things like
1673 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1674 * anything else returns TRUE
1675 */
1676
9cbb5ea2
GS
1677/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1678
76e3520e 1679STATIC int
cea2e8a9 1680S_intuit_more(pTHX_ register char *s)
79072805 1681{
3280af22 1682 if (PL_lex_brackets)
79072805
LW
1683 return TRUE;
1684 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1685 return TRUE;
1686 if (*s != '{' && *s != '[')
1687 return FALSE;
3280af22 1688 if (!PL_lex_inpat)
79072805
LW
1689 return TRUE;
1690
1691 /* In a pattern, so maybe we have {n,m}. */
1692 if (*s == '{') {
1693 s++;
1694 if (!isDIGIT(*s))
1695 return TRUE;
1696 while (isDIGIT(*s))
1697 s++;
1698 if (*s == ',')
1699 s++;
1700 while (isDIGIT(*s))
1701 s++;
1702 if (*s == '}')
1703 return FALSE;
1704 return TRUE;
1705
1706 }
1707
1708 /* On the other hand, maybe we have a character class */
1709
1710 s++;
1711 if (*s == ']' || *s == '^')
1712 return FALSE;
1713 else {
ffb4593c 1714 /* this is terrifying, and it works */
79072805
LW
1715 int weight = 2; /* let's weigh the evidence */
1716 char seen[256];
f27ffc4a 1717 unsigned char un_char = 255, last_un_char;
93a17b20 1718 char *send = strchr(s,']');
3280af22 1719 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1720
1721 if (!send) /* has to be an expression */
1722 return TRUE;
1723
1724 Zero(seen,256,char);
1725 if (*s == '$')
1726 weight -= 3;
1727 else if (isDIGIT(*s)) {
1728 if (s[1] != ']') {
1729 if (isDIGIT(s[1]) && s[2] == ']')
1730 weight -= 10;
1731 }
1732 else
1733 weight -= 100;
1734 }
1735 for (; s < send; s++) {
1736 last_un_char = un_char;
1737 un_char = (unsigned char)*s;
1738 switch (*s) {
1739 case '@':
1740 case '&':
1741 case '$':
1742 weight -= seen[un_char] * 10;
7e2040f0 1743 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1744 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1745 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1746 weight -= 100;
1747 else
1748 weight -= 10;
1749 }
1750 else if (*s == '$' && s[1] &&
93a17b20
LW
1751 strchr("[#!%*<>()-=",s[1])) {
1752 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1753 weight -= 10;
1754 else
1755 weight -= 1;
1756 }
1757 break;
1758 case '\\':
1759 un_char = 254;
1760 if (s[1]) {
93a17b20 1761 if (strchr("wds]",s[1]))
79072805
LW
1762 weight += 100;
1763 else if (seen['\''] || seen['"'])
1764 weight += 1;
93a17b20 1765 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1766 weight += 40;
1767 else if (isDIGIT(s[1])) {
1768 weight += 40;
1769 while (s[1] && isDIGIT(s[1]))
1770 s++;
1771 }
1772 }
1773 else
1774 weight += 100;
1775 break;
1776 case '-':
1777 if (s[1] == '\\')
1778 weight += 50;
93a17b20 1779 if (strchr("aA01! ",last_un_char))
79072805 1780 weight += 30;
93a17b20 1781 if (strchr("zZ79~",s[1]))
79072805 1782 weight += 30;
f27ffc4a
GS
1783 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1784 weight -= 5; /* cope with negative subscript */
79072805
LW
1785 break;
1786 default:
93a17b20 1787 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1788 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1789 char *d = tmpbuf;
1790 while (isALPHA(*s))
1791 *d++ = *s++;
1792 *d = '\0';
1793 if (keyword(tmpbuf, d - tmpbuf))
1794 weight -= 150;
1795 }
1796 if (un_char == last_un_char + 1)
1797 weight += 5;
1798 weight -= seen[un_char];
1799 break;
1800 }
1801 seen[un_char]++;
1802 }
1803 if (weight >= 0) /* probably a character class */
1804 return FALSE;
1805 }
1806
1807 return TRUE;
1808}
ffed7fef 1809
ffb4593c
NT
1810/*
1811 * S_intuit_method
1812 *
1813 * Does all the checking to disambiguate
1814 * foo bar
1815 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1816 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1817 *
1818 * First argument is the stuff after the first token, e.g. "bar".
1819 *
1820 * Not a method if bar is a filehandle.
1821 * Not a method if foo is a subroutine prototyped to take a filehandle.
1822 * Not a method if it's really "Foo $bar"
1823 * Method if it's "foo $bar"
1824 * Not a method if it's really "print foo $bar"
1825 * Method if it's really "foo package::" (interpreted as package->foo)
1826 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1827 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1828 * =>
1829 */
1830
76e3520e 1831STATIC int
cea2e8a9 1832S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1833{
1834 char *s = start + (*start == '$');
3280af22 1835 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1836 STRLEN len;
1837 GV* indirgv;
1838
1839 if (gv) {
b6c543e3 1840 CV *cv;
a0d0e21e
LW
1841 if (GvIO(gv))
1842 return 0;
b6c543e3
IZ
1843 if ((cv = GvCVu(gv))) {
1844 char *proto = SvPVX(cv);
1845 if (proto) {
1846 if (*proto == ';')
1847 proto++;
1848 if (*proto == '*')
1849 return 0;
1850 }
1851 } else
a0d0e21e
LW
1852 gv = 0;
1853 }
8903cb82 1854 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1855 /* start is the beginning of the possible filehandle/object,
1856 * and s is the end of it
1857 * tmpbuf is a copy of it
1858 */
1859
a0d0e21e 1860 if (*start == '$') {
3280af22 1861 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1862 return 0;
1863 s = skipspace(s);
3280af22
NIS
1864 PL_bufptr = start;
1865 PL_expect = XREF;
a0d0e21e
LW
1866 return *s == '(' ? FUNCMETH : METHOD;
1867 }
1868 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1869 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1870 len -= 2;
1871 tmpbuf[len] = '\0';
1872 goto bare_package;
1873 }
1874 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1875 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1876 return 0;
1877 /* filehandle or package name makes it a method */
89bfa8cd 1878 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1879 s = skipspace(s);
3280af22 1880 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1881 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1882 bare_package:
3280af22 1883 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1884 newSVpvn(tmpbuf,len));
3280af22
NIS
1885 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1886 PL_expect = XTERM;
a0d0e21e 1887 force_next(WORD);
3280af22 1888 PL_bufptr = s;
a0d0e21e
LW
1889 return *s == '(' ? FUNCMETH : METHOD;
1890 }
1891 }
1892 return 0;
1893}
1894
ffb4593c
NT
1895/*
1896 * S_incl_perldb
1897 * Return a string of Perl code to load the debugger. If PERL5DB
1898 * is set, it will return the contents of that, otherwise a
1899 * compile-time require of perl5db.pl.
1900 */
1901
76e3520e 1902STATIC char*
cea2e8a9 1903S_incl_perldb(pTHX)
a0d0e21e 1904{
3280af22 1905 if (PL_perldb) {
76e3520e 1906 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1907
1908 if (pdb)
1909 return pdb;
61bb5906 1910 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1911 return "BEGIN { require 'perl5db.pl' }";
1912 }
1913 return "";
1914}
1915
1916
16d20bd9 1917/* Encoded script support. filter_add() effectively inserts a
4e553d73 1918 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1919 * Note that the filter function only applies to the current source file
1920 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1921 *
1922 * The datasv parameter (which may be NULL) can be used to pass
1923 * private data to this instance of the filter. The filter function
1924 * can recover the SV using the FILTER_DATA macro and use it to
1925 * store private buffers and state information.
1926 *
1927 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1928 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1929 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1930 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1931 * private use must be set using malloc'd pointers.
1932 */
16d20bd9
AD
1933
1934SV *
864dbfa3 1935Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1936{
f4c556ac
GS
1937 if (!funcp)
1938 return Nullsv;
1939
3280af22
NIS
1940 if (!PL_rsfp_filters)
1941 PL_rsfp_filters = newAV();
16d20bd9 1942 if (!datasv)
8c52afec 1943 datasv = NEWSV(255,0);
16d20bd9 1944 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1945 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1946 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1947 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1948 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1949 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1950 av_unshift(PL_rsfp_filters, 1);
1951 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1952 return(datasv);
1953}
4e553d73 1954
16d20bd9
AD
1955
1956/* Delete most recently added instance of this filter function. */
a0d0e21e 1957void
864dbfa3 1958Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1959{
e0c19803 1960 SV *datasv;
f4c556ac 1961 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1962 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1963 return;
1964 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1965 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1966 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1967 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1968 IoANY(datasv) = (void *)NULL;
3280af22 1969 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1970
16d20bd9
AD
1971 return;
1972 }
1973 /* we need to search for the correct entry and clear it */
cea2e8a9 1974 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1975}
1976
1977
1978/* Invoke the n'th filter function for the current rsfp. */
1979I32
864dbfa3 1980Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
1981
1982
8ac85365 1983 /* 0 = read one text line */
a0d0e21e 1984{
16d20bd9
AD
1985 filter_t funcp;
1986 SV *datasv = NULL;
e50aee73 1987
3280af22 1988 if (!PL_rsfp_filters)
16d20bd9 1989 return -1;
3280af22 1990 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1991 /* Provide a default input filter to make life easy. */
1992 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1993 DEBUG_P(PerlIO_printf(Perl_debug_log,
1994 "filter_read %d: from rsfp\n", idx));
4e553d73 1995 if (maxlen) {
16d20bd9
AD
1996 /* Want a block */
1997 int len ;
1998 int old_len = SvCUR(buf_sv) ;
1999
2000 /* ensure buf_sv is large enough */
2001 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
2002 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2003 if (PerlIO_error(PL_rsfp))
37120919
AD
2004 return -1; /* error */
2005 else
2006 return 0 ; /* end of file */
2007 }
16d20bd9
AD
2008 SvCUR_set(buf_sv, old_len + len) ;
2009 } else {
2010 /* Want a line */
3280af22
NIS
2011 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2012 if (PerlIO_error(PL_rsfp))
37120919
AD
2013 return -1; /* error */
2014 else
2015 return 0 ; /* end of file */
2016 }
16d20bd9
AD
2017 }
2018 return SvCUR(buf_sv);
2019 }
2020 /* Skip this filter slot if filter has been deleted */
3280af22 2021 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2022 DEBUG_P(PerlIO_printf(Perl_debug_log,
2023 "filter_read %d: skipped (filter deleted)\n",
2024 idx));
16d20bd9
AD
2025 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2026 }
2027 /* Get function pointer hidden within datasv */
4755096e 2028 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2029 DEBUG_P(PerlIO_printf(Perl_debug_log,
2030 "filter_read %d: via function %p (%s)\n",
2031 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2032 /* Call function. The function is expected to */
2033 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2034 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 2035 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
2036}
2037
76e3520e 2038STATIC char *
cea2e8a9 2039S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2040{
c39cd008 2041#ifdef PERL_CR_FILTER
3280af22 2042 if (!PL_rsfp_filters) {
c39cd008 2043 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2044 }
2045#endif
3280af22 2046 if (PL_rsfp_filters) {
16d20bd9 2047
55497cff
PP
2048 if (!append)
2049 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2050 if (FILTER_READ(0, sv, 0) > 0)
2051 return ( SvPVX(sv) ) ;
2052 else
2053 return Nullch ;
2054 }
9d116dd7 2055 else
fd049845 2056 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2057}
2058
01ec43d0
GS
2059STATIC HV *
2060S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2061{
2062 GV *gv;
2063
01ec43d0 2064 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2065 return PL_curstash;
2066
2067 if (len > 2 &&
2068 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2069 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2070 {
2071 return GvHV(gv); /* Foo:: */
def3634b
GS
2072 }
2073
2074 /* use constant CLASS => 'MyClass' */
2075 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2076 SV *sv;
2077 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2078 pkgname = SvPV_nolen(sv);
2079 }
2080 }
2081
2082 return gv_stashpv(pkgname, FALSE);
2083}
a0d0e21e 2084
748a9306
LW
2085#ifdef DEBUGGING
2086 static char* exp_name[] =
09bef843
SB
2087 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2088 "ATTRTERM", "TERMBLOCK"
2089 };
748a9306 2090#endif
463ee0b2 2091
02aa26ce
NT
2092/*
2093 yylex
2094
2095 Works out what to call the token just pulled out of the input
2096 stream. The yacc parser takes care of taking the ops we return and
2097 stitching them into a tree.
2098
2099 Returns:
2100 PRIVATEREF
2101
2102 Structure:
2103 if read an identifier
2104 if we're in a my declaration
2105 croak if they tried to say my($foo::bar)
2106 build the ops for a my() declaration
2107 if it's an access to a my() variable
2108 are we in a sort block?
2109 croak if my($a); $a <=> $b
2110 build ops for access to a my() variable
2111 if in a dq string, and they've said @foo and we can't find @foo
2112 croak
2113 build ops for a bareword
2114 if we already built the token before, use it.
2115*/
2116
dba4d153 2117#ifdef USE_PURE_BISON
864dbfa3 2118int
dba4d153 2119Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2120{
20141f0e
RI
2121 int r;
2122
6f202aea 2123 yyactlevel++;
20141f0e
RI
2124 yylval_pointer[yyactlevel] = lvalp;
2125 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2126 if (yyactlevel >= YYMAXLEVEL)
2127 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2128
dba4d153 2129 r = Perl_yylex(aTHX);
20141f0e 2130
d8ae6756
RI
2131 if (yyactlevel > 0)
2132 yyactlevel--;
20141f0e
RI
2133
2134 return r;
2135}
dba4d153 2136#endif
20141f0e 2137
dba4d153
JH
2138#ifdef __SC__
2139#pragma segment Perl_yylex
2140#endif
dba4d153 2141int
dba4d153 2142Perl_yylex(pTHX)
20141f0e 2143{
79072805 2144 register char *s;
378cc40b 2145 register char *d;
79072805 2146 register I32 tmp;
463ee0b2 2147 STRLEN len;
161b471a
NIS
2148 GV *gv = Nullgv;
2149 GV **gvp = 0;
aa7440fb 2150 bool bof = FALSE;
a687059c 2151
02aa26ce 2152 /* check if there's an identifier for us to look at */
3280af22 2153 if (PL_pending_ident) {
02aa26ce 2154 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2155 char pit = PL_pending_ident;
2156 PL_pending_ident = 0;
bbce6d69 2157
607df283
SC
2158 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2159 "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
2160
02aa26ce
NT
2161 /* if we're in a my(), we can't allow dynamics here.
2162 $foo'bar has already been turned into $foo::bar, so
2163 just check for colons.
2164
2165 if it's a legal name, the OP is a PADANY.
2166 */
3280af22 2167 if (PL_in_my) {
77ca0c92 2168 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2169 if (strchr(PL_tokenbuf,':'))
2170 yyerror(Perl_form(aTHX_ "No package name allowed for "
2171 "variable %s in \"our\"",
2172 PL_tokenbuf));
77ca0c92
LW
2173 tmp = pad_allocmy(PL_tokenbuf);
2174 }
2175 else {
2176 if (strchr(PL_tokenbuf,':'))
2177 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2178
77ca0c92
LW
2179 yylval.opval = newOP(OP_PADANY, 0);
2180 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2181 return PRIVATEREF;
2182 }
bbce6d69
PP
2183 }
2184
4e553d73 2185 /*
02aa26ce
NT
2186 build the ops for accesses to a my() variable.
2187
2188 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2189 then used in a comparison. This catches most, but not
2190 all cases. For instance, it catches
2191 sort { my($a); $a <=> $b }
2192 but not
2193 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2194 (although why you'd do that is anyone's guess).
2195 */
2196
3280af22 2197 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2198#ifdef USE_THREADS
54b9620d 2199 /* Check for single character per-thread SVs */
3280af22
NIS
2200 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2201 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2202 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2203 {
2faa37cc 2204 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2205 yylval.opval->op_targ = tmp;
2206 return PRIVATEREF;
2207 }
2208#endif /* USE_THREADS */
3280af22 2209 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2210 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2211 /* might be an "our" variable" */
f472eb5c 2212 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2213 /* build ops for a bareword */
f472eb5c
GS
2214 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2215 sv_catpvn(sym, "::", 2);
2216 sv_catpv(sym, PL_tokenbuf+1);
2217 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2218 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2219 gv_fetchpv(SvPVX(sym),
77ca0c92 2220 (PL_in_eval
f472eb5c
GS
2221 ? (GV_ADDMULTI | GV_ADDINEVAL)
2222 : TRUE
77ca0c92
LW
2223 ),
2224 ((PL_tokenbuf[0] == '$') ? SVt_PV
2225 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2226 : SVt_PVHV));
2227 return WORD;
2228 }
2229
02aa26ce 2230 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2231 if (PL_last_lop_op == OP_SORT &&
2232 PL_tokenbuf[0] == '$' &&
2233 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2234 && !PL_tokenbuf[2])
bbce6d69 2235 {
3280af22
NIS
2236 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2237 d < PL_bufend && *d != '\n';
a863c7d1
MB
2238 d++)
2239 {
2240 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2241 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2242 PL_tokenbuf);
a863c7d1 2243 }
bbce6d69
PP
2244 }
2245 }
bbce6d69 2246
a863c7d1
MB
2247 yylval.opval = newOP(OP_PADANY, 0);
2248 yylval.opval->op_targ = tmp;
2249 return PRIVATEREF;
2250 }
bbce6d69
PP
2251 }
2252
02aa26ce
NT
2253 /*
2254 Whine if they've said @foo in a doublequoted string,
2255 and @foo isn't a variable we can find in the symbol
2256 table.
2257 */
3280af22
NIS
2258 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2259 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2260 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2261 && ckWARN(WARN_AMBIGUOUS))
2262 {
2263 /* Downgraded from fatal to warning 20000522 mjd */
2264 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2265 "Possible unintended interpolation of %s in string",
2266 PL_tokenbuf);
2267 }
bbce6d69
PP
2268 }
2269
02aa26ce 2270 /* build ops for a bareword */
3280af22 2271 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2272 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2273 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2274 ((PL_tokenbuf[0] == '$') ? SVt_PV
2275 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2276 : SVt_PVHV));
2277 return WORD;
2278 }
2279
02aa26ce
NT
2280 /* no identifier pending identification */
2281
3280af22 2282 switch (PL_lex_state) {
79072805
LW
2283#ifdef COMMENTARY
2284 case LEX_NORMAL: /* Some compilers will produce faster */
2285 case LEX_INTERPNORMAL: /* code if we comment these out. */
2286 break;
2287#endif
2288
09bef843 2289 /* when we've already built the next token, just pull it out of the queue */
79072805 2290 case LEX_KNOWNEXT:
3280af22
NIS
2291 PL_nexttoke--;
2292 yylval = PL_nextval[PL_nexttoke];
2293 if (!PL_nexttoke) {
2294 PL_lex_state = PL_lex_defer;
2295 PL_expect = PL_lex_expect;
2296 PL_lex_defer = LEX_NORMAL;
463ee0b2 2297 }
607df283 2298 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f
RB
2299 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2300 (IV)PL_nexttype[PL_nexttoke]); })
607df283 2301
3280af22 2302 return(PL_nexttype[PL_nexttoke]);
79072805 2303
02aa26ce 2304 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2305 when we get here, PL_bufptr is at the \
02aa26ce 2306 */
79072805
LW
2307 case LEX_INTERPCASEMOD:
2308#ifdef DEBUGGING
3280af22 2309 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2310 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2311#endif
02aa26ce 2312 /* handle \E or end of string */
3280af22 2313 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2314 char oldmod;
02aa26ce
NT
2315
2316 /* if at a \E */
3280af22
NIS
2317 if (PL_lex_casemods) {
2318 oldmod = PL_lex_casestack[--PL_lex_casemods];
2319 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2320
3280af22
NIS
2321 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2322 PL_bufptr += 2;
2323 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2324 }
79072805
LW
2325 return ')';
2326 }
3280af22
NIS
2327 if (PL_bufptr != PL_bufend)
2328 PL_bufptr += 2;
2329 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2330 return yylex();
79072805
LW
2331 }
2332 else {
607df283
SC
2333 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2334 "### Saw case modifier at '%s'\n", PL_bufptr); })
3280af22 2335 s = PL_bufptr + 1;
79072805
LW
2336 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2337 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2338 if (strchr("LU", *s) &&
3280af22 2339 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2340 {
3280af22 2341 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2342 return ')';
2343 }
3280af22
NIS
2344 if (PL_lex_casemods > 10) {
2345 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2346 if (newlb != PL_lex_casestack) {
a0d0e21e 2347 SAVEFREEPV(newlb);
3280af22 2348 PL_lex_casestack = newlb;
a0d0e21e
LW
2349 }
2350 }
3280af22
NIS
2351 PL_lex_casestack[PL_lex_casemods++] = *s;
2352 PL_lex_casestack[PL_lex_casemods] = '\0';
2353 PL_lex_state = LEX_INTERPCONCAT;
2354 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2355 force_next('(');
2356 if (*s == 'l')
3280af22 2357 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2358 else if (*s == 'u')
3280af22 2359 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2360 else if (*s == 'L')
3280af22 2361 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2362 else if (*s == 'U')
3280af22 2363 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2364 else if (*s == 'Q')
3280af22 2365 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2366 else
cea2e8a9 2367 Perl_croak(aTHX_ "panic: yylex");
3280af22 2368 PL_bufptr = s + 1;
79072805 2369 force_next(FUNC);
3280af22
NIS
2370 if (PL_lex_starts) {
2371 s = PL_bufptr;
2372 PL_lex_starts = 0;
79072805
LW
2373 Aop(OP_CONCAT);
2374 }
2375 else
cea2e8a9 2376 return yylex();
79072805
LW
2377 }
2378
55497cff
PP
2379 case LEX_INTERPPUSH:
2380 return sublex_push();
2381
79072805 2382 case LEX_INTERPSTART:
3280af22 2383 if (PL_bufptr == PL_bufend)
79072805 2384 return sublex_done();
607df283
SC
2385 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2386 "### Interpolated variable at '%s'\n", PL_bufptr); })
3280af22
NIS
2387 PL_expect = XTERM;
2388 PL_lex_dojoin = (*PL_bufptr == '@');
2389 PL_lex_state = LEX_INTERPNORMAL;
2390 if (PL_lex_dojoin) {
2391 PL_nextval[PL_nexttoke].ival = 0;
79072805 2392 force_next(',');
554b3eca 2393#ifdef USE_THREADS
533c011a
NIS
2394 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2395 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2396 force_next(PRIVATEREF);
2397#else
a0d0e21e 2398 force_ident("\"", '$');
554b3eca 2399#endif /* USE_THREADS */
3280af22 2400 PL_nextval[PL_nexttoke].ival = 0;
79072805 2401 force_next('$');
3280af22 2402 PL_nextval[PL_nexttoke].ival = 0;
79072805 2403 force_next('(');
3280af22 2404 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2405 force_next(FUNC);
2406 }
3280af22
NIS
2407 if (PL_lex_starts++) {
2408 s = PL_bufptr;
79072805
LW
2409 Aop(OP_CONCAT);
2410 }
cea2e8a9 2411 return yylex();
79072805
LW
2412
2413 case LEX_INTERPENDMAYBE:
3280af22
NIS
2414 if (intuit_more(PL_bufptr)) {
2415 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2416 break;
2417 }
2418 /* FALL THROUGH */
2419
2420 case LEX_INTERPEND:
3280af22
NIS
2421 if (PL_lex_dojoin) {
2422 PL_lex_dojoin = FALSE;
2423 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2424 return ')';
2425 }
43a16006 2426 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2427 && SvEVALED(PL_lex_repl))
43a16006 2428 {
e9fa98b2 2429 if (PL_bufptr != PL_bufend)
cea2e8a9 2430 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2431 PL_lex_repl = Nullsv;
2432 }
79072805
LW
2433 /* FALLTHROUGH */
2434 case LEX_INTERPCONCAT:
2435#ifdef DEBUGGING
3280af22 2436 if (PL_lex_brackets)
cea2e8a9 2437 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2438#endif
3280af22 2439 if (PL_bufptr == PL_bufend)
79072805
LW
2440 return sublex_done();
2441
3280af22
NIS
2442 if (SvIVX(PL_linestr) == '\'') {
2443 SV *sv = newSVsv(PL_linestr);
2444 if (!PL_lex_inpat)
76e3520e 2445 sv = tokeq(sv);
3280af22 2446 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2447 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2448 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2449 s = PL_bufend;
79072805
LW
2450 }
2451 else {
3280af22 2452 s = scan_const(PL_bufptr);
79072805 2453 if (*s == '\\')
3280af22 2454 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2455 else
3280af22 2456 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2457 }
2458
3280af22
NIS
2459 if (s != PL_bufptr) {
2460 PL_nextval[PL_nexttoke] = yylval;
2461 PL_expect = XTERM;
79072805 2462 force_next(THING);
3280af22 2463 if (PL_lex_starts++)
79072805
LW
2464 Aop(OP_CONCAT);
2465 else {
3280af22 2466 PL_bufptr = s;
cea2e8a9 2467 return yylex();
79072805
LW
2468 }
2469 }
2470
cea2e8a9 2471 return yylex();
a0d0e21e 2472 case LEX_FORMLINE:
3280af22
NIS
2473 PL_lex_state = LEX_NORMAL;
2474 s = scan_formline(PL_bufptr);
2475 if (!PL_lex_formbrack)
a0d0e21e
LW
2476 goto rightbracket;
2477 OPERATOR(';');
79072805
LW
2478 }
2479
3280af22
NIS
2480 s = PL_bufptr;
2481 PL_oldoldbufptr = PL_oldbufptr;
2482 PL_oldbufptr = s;
607df283 2483 DEBUG_T( {
bf49b057
GS
2484 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2485 exp_name[PL_expect], s);
79072805 2486 } )
463ee0b2
LW
2487
2488 retry:
378cc40b
LW
2489 switch (*s) {
2490 default:
7e2040f0 2491 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2492 goto keylookup;
cea2e8a9 2493 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2494 case 4:
2495 case 26:
2496 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2497 case 0:
3280af22
NIS
2498 if (!PL_rsfp) {
2499 PL_last_uni = 0;
2500 PL_last_lop = 0;
2501 if (PL_lex_brackets)
d98d5fff 2502 yyerror("Missing right curly or square bracket");
4e553d73 2503 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2504 "### Tokener got EOF\n");
2505 } )
79072805 2506 TOKEN(0);
463ee0b2 2507 }
3280af22 2508 if (s++ < PL_bufend)
a687059c 2509 goto retry; /* ignore stray nulls */
3280af22
NIS
2510 PL_last_uni = 0;
2511 PL_last_lop = 0;
2512 if (!PL_in_eval && !PL_preambled) {
2513 PL_preambled = TRUE;
2514 sv_setpv(PL_linestr,incl_perldb());
2515 if (SvCUR(PL_linestr))
2516 sv_catpv(PL_linestr,";");
2517 if (PL_preambleav){
2518 while(AvFILLp(PL_preambleav) >= 0) {
2519 SV *tmpsv = av_shift(PL_preambleav);
2520 sv_catsv(PL_linestr, tmpsv);
2521 sv_catpv(PL_linestr, ";");
91b7def8
PP
2522 sv_free(tmpsv);
2523 }
3280af22
NIS
2524 sv_free((SV*)PL_preambleav);
2525 PL_preambleav = NULL;
91b7def8 2526 }
3280af22
NIS
2527 if (PL_minus_n || PL_minus_p) {
2528 sv_catpv(PL_linestr, "LINE: while (<>) {");
2529 if (PL_minus_l)
2530 sv_catpv(PL_linestr,"chomp;");
2531 if (PL_minus_a) {
8fd239a7
CS
2532 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2533 if (gv)
2534 GvIMPORTED_AV_on(gv);
3280af22
NIS
2535 if (PL_minus_F) {
2536 if (strchr("/'\"", *PL_splitstr)
2537 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2538 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2539 else {
2540 char delim;
2541 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2542 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2543 delim = *s;
cea2e8a9 2544 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2545 "q" + (delim == '\''), delim);
3280af22 2546 for (s = PL_splitstr; *s; s++) {
54310121 2547 if (*s == '\\')
3280af22
NIS
2548 sv_catpvn(PL_linestr, "\\", 1);
2549 sv_catpvn(PL_linestr, s, 1);
54310121 2550 }
cea2e8a9 2551 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2552 }
2304df62
AD
2553 }
2554 else
3280af22 2555 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2556 }
79072805 2557 }
3280af22
NIS
2558 sv_catpv(PL_linestr, "\n");
2559 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2560 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2561 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2562 SV *sv = NEWSV(85,0);
2563
2564 sv_upgrade(sv, SVt_PVMG);
3280af22 2565 sv_setsv(sv,PL_linestr);
57843af0 2566 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2567 }
79072805 2568 goto retry;
a687059c 2569 }
e929a76b 2570 do {
aa7440fb 2571 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2572 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2573 fake_eof:
2574 if (PL_rsfp) {
2575 if (PL_preprocess && !PL_in_eval)
2576 (void)PerlProc_pclose(PL_rsfp);
2577 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2578 PerlIO_clearerr(PL_rsfp);
2579 else
2580 (void)PerlIO_close(PL_rsfp);
2581 PL_rsfp = Nullfp;
2582 PL_doextract = FALSE;
2583 }
2584 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2585 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2586 sv_catpv(PL_linestr,";}");
2587 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2588 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2589 PL_minus_n = PL_minus_p = 0;
2590 goto retry;
2591 }
2592 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2593 sv_setpv(PL_linestr,"");
2594 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2595 }
2596 /* if it looks like the start of a BOM, check if it in fact is */
2597 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2598#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2599# ifdef __GNU_LIBRARY__
2600# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2601# define FTELL_FOR_PIPE_IS_BROKEN
2602# endif
e3f494f1
JH
2603# else
2604# ifdef __GLIBC__
2605# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2606# define FTELL_FOR_PIPE_IS_BROKEN
2607# endif
2608# endif
226017aa
DD
2609# endif
2610#endif
2611#ifdef FTELL_FOR_PIPE_IS_BROKEN
2612 /* This loses the possibility to detect the bof
2613 * situation on perl -P when the libc5 is being used.
2614 * Workaround? Maybe attach some extra state to PL_rsfp?
2615 */
2616 if (!PL_preprocess)
7e28d3af 2617 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2618#else
7e28d3af 2619 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2620#endif
7e28d3af 2621 if (bof) {
3280af22 2622 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2623 s = swallow_bom((U8*)s);
e929a76b 2624 }
378cc40b 2625 }
3280af22 2626 if (PL_doextract) {
a0d0e21e 2627 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2628 PL_doextract = FALSE;
a0d0e21e
LW
2629
2630 /* Incest with pod. */
2631 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2632 sv_setpv(PL_linestr, "");
2633 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2634 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2635 PL_doextract = FALSE;
a0d0e21e 2636 }
4e553d73 2637 }
463ee0b2 2638 incline(s);
3280af22
NIS
2639 } while (PL_doextract);
2640 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2641 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2642 SV *sv = NEWSV(85,0);
a687059c 2643
93a17b20 2644 sv_upgrade(sv, SVt_PVMG);
3280af22 2645 sv_setsv(sv,PL_linestr);
57843af0 2646 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2647 }
3280af22 2648 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2649 if (CopLINE(PL_curcop) == 1) {
3280af22 2650 while (s < PL_bufend && isSPACE(*s))
79072805 2651 s++;
a0d0e21e 2652 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2653 s++;
44a8e56a 2654 d = Nullch;
3280af22 2655 if (!PL_in_eval) {
44a8e56a
PP
2656 if (*s == '#' && *(s+1) == '!')
2657 d = s + 2;
2658#ifdef ALTERNATE_SHEBANG
2659 else {
2660 static char as[] = ALTERNATE_SHEBANG;
2661 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2662 d = s + (sizeof(as) - 1);
2663 }
2664#endif /* ALTERNATE_SHEBANG */
2665 }
2666 if (d) {
b8378b72 2667 char *ipath;
774d564b 2668 char *ipathend;
b8378b72 2669
774d564b 2670 while (isSPACE(*d))
b8378b72
CS
2671 d++;
2672 ipath = d;
774d564b
PP
2673 while (*d && !isSPACE(*d))
2674 d++;
2675 ipathend = d;
2676
2677#ifdef ARG_ZERO_IS_SCRIPT
2678 if (ipathend > ipath) {
2679 /*
2680 * HP-UX (at least) sets argv[0] to the script name,
2681 * which makes $^X incorrect. And Digital UNIX and Linux,
2682 * at least, set argv[0] to the basename of the Perl
2683 * interpreter. So, having found "#!", we'll set it right.
2684 */
2685 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2686 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2687 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2688 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2689 SvSETMAGIC(x);
2690 }
774d564b 2691 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2692 }
774d564b 2693#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2694
2695 /*
2696 * Look for options.
2697 */
748a9306 2698 d = instr(s,"perl -");
84e30d1a 2699 if (!d) {
748a9306 2700 d = instr(s,"perl");
84e30d1a
GS
2701#if defined(DOSISH)
2702 /* avoid getting into infinite loops when shebang
2703 * line contains "Perl" rather than "perl" */
2704 if (!d) {
2705 for (d = ipathend-4; d >= ipath; --d) {
2706 if ((*d == 'p' || *d == 'P')
2707 && !ibcmp(d, "perl", 4))
2708 {
2709 break;
2710 }
2711 }
2712 if (d < ipath)
2713 d = Nullch;
2714 }
2715#endif
2716 }
44a8e56a
PP
2717#ifdef ALTERNATE_SHEBANG
2718 /*
2719 * If the ALTERNATE_SHEBANG on this system starts with a
2720 * character that can be part of a Perl expression, then if
2721 * we see it but not "perl", we're probably looking at the
2722 * start of Perl code, not a request to hand off to some
2723 * other interpreter. Similarly, if "perl" is there, but
2724 * not in the first 'word' of the line, we assume the line
2725 * contains the start of the Perl program.
44a8e56a
PP
2726 */
2727 if (d && *s != '#') {
774d564b 2728 char *c = ipath;
44a8e56a
PP
2729 while (*c && !strchr("; \t\r\n\f\v#", *c))
2730 c++;
2731 if (c < d)
2732 d = Nullch; /* "perl" not in first word; ignore */
2733 else
2734 *s = '#'; /* Don't try to parse shebang line */
2735 }
774d564b 2736#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2737#ifndef MACOS_TRADITIONAL
748a9306 2738 if (!d &&
44a8e56a 2739 *s == '#' &&
774d564b 2740 ipathend > ipath &&
3280af22 2741 !PL_minus_c &&
748a9306 2742 !instr(s,"indir") &&
3280af22 2743 instr(PL_origargv[0],"perl"))
748a9306 2744 {
9f68db38 2745 char **newargv;
9f68db38 2746
774d564b
PP
2747 *ipathend = '\0';
2748 s = ipathend + 1;
3280af22 2749 while (s < PL_bufend && isSPACE(*s))
9f68db38 2750 s++;
3280af22
NIS
2751 if (s < PL_bufend) {
2752 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2753 newargv[1] = s;
3280af22 2754 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2755 s++;
2756 *s = '\0';
3280af22 2757 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2758 }
2759 else
3280af22 2760 newargv = PL_origargv;
774d564b 2761 newargv[0] = ipath;
b4748376 2762 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2763 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2764 }
bf4acbe4 2765#endif
748a9306 2766 if (d) {
3280af22
NIS
2767 U32 oldpdb = PL_perldb;
2768 bool oldn = PL_minus_n;
2769 bool oldp = PL_minus_p;
748a9306
LW
2770
2771 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2772 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2773
2774 if (*d++ == '-') {
8cc95fdb
PP
2775 do {
2776 if (*d == 'M' || *d == 'm') {
2777 char *m = d;
2778 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2779 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2780 (int)(d - m), m);
2781 }
2782 d = moreswitches(d);
2783 } while (d);
155aba94
GS
2784 if ((PERLDB_LINE && !oldpdb) ||
2785 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2786 /* if we have already added "LINE: while (<>) {",
2787 we must not do it again */
748a9306 2788 {
3280af22
NIS
2789 sv_setpv(PL_linestr, "");
2790 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2791 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2792 PL_preambled = FALSE;
84902520 2793 if (PERLDB_LINE)
3280af22 2794 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2795 goto retry;
2796 }
a0d0e21e 2797 }
79072805 2798 }
9f68db38 2799 }
79072805 2800 }
3280af22
NIS
2801 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2802 PL_bufptr = s;
2803 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2804 return yylex();
ae986130 2805 }
378cc40b 2806 goto retry;
4fdae800 2807 case '\r':
6a27c188 2808#ifdef PERL_STRICT_CR
cea2e8a9 2809 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2810 Perl_croak(aTHX_
cc507455 2811 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2812#endif
4fdae800 2813 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2814#ifdef MACOS_TRADITIONAL
2815 case '\312':
2816#endif
378cc40b
LW
2817 s++;
2818 goto retry;
378cc40b 2819 case '#':
e929a76b 2820 case '\n':
3280af22 2821 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2822 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2823 /* handle eval qq[#line 1 "foo"\n ...] */
2824 CopLINE_dec(PL_curcop);
2825 incline(s);
2826 }
3280af22 2827 d = PL_bufend;
a687059c 2828 while (s < d && *s != '\n')
378cc40b 2829 s++;
0f85fab0 2830 if (s < d)
378cc40b 2831 s++;
463ee0b2 2832 incline(s);
3280af22
NIS
2833 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2834 PL_bufptr = s;
2835 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2836 return yylex();
a687059c 2837 }
378cc40b 2838 }
a687059c 2839 else {
378cc40b 2840 *s = '\0';
3280af22 2841 PL_bufend = s;
a687059c 2842 }
378cc40b
LW
2843 goto retry;
2844 case '-':
79072805 2845 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2846 I32 ftst = 0;
2847
378cc40b 2848 s++;
3280af22 2849 PL_bufptr = s;
748a9306
LW
2850 tmp = *s++;
2851
bf4acbe4 2852 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2853 s++;
2854
2855 if (strnEQ(s,"=>",2)) {
3280af22 2856 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2857 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2858 "### Saw unary minus before =>, forcing word '%s'\n", s);
2859 } )
748a9306
LW
2860 OPERATOR('-'); /* unary minus */
2861 }
3280af22 2862 PL_last_uni = PL_oldbufptr;
748a9306 2863 switch (tmp) {
e5edeb50
JH
2864 case 'r': ftst = OP_FTEREAD; break;
2865 case 'w': ftst = OP_FTEWRITE; break;
2866 case 'x': ftst = OP_FTEEXEC; break;
2867 case 'o': ftst = OP_FTEOWNED; break;
2868 case 'R': ftst = OP_FTRREAD; break;
2869 case 'W': ftst = OP_FTRWRITE; break;
2870 case 'X': ftst = OP_FTREXEC; break;
2871 case 'O': ftst = OP_FTROWNED; break;
2872 case 'e': ftst = OP_FTIS; break;
2873 case 'z': ftst = OP_FTZERO; break;
2874 case 's': ftst = OP_FTSIZE; break;
2875 case 'f': ftst = OP_FTFILE; break;
2876 case 'd': ftst = OP_FTDIR; break;
2877 case 'l': ftst = OP_FTLINK; break;
2878 case 'p': ftst = OP_FTPIPE; break;
2879 case 'S': ftst = OP_FTSOCK; break;
2880 case 'u': ftst = OP_FTSUID; break;
2881 case 'g': ftst = OP_FTSGID; break;
2882 case 'k': ftst = OP_FTSVTX; break;
2883 case 'b': ftst = OP_FTBLK; break;
2884 case 'c': ftst = OP_FTCHR; break;
2885 case 't': ftst = OP_FTTTY; break;
2886 case 'T': ftst = OP_FTTEXT; break;
2887 case 'B': ftst = OP_FTBINARY; break;
2888 case 'M': case 'A': case 'C':
2889 gv_fetchpv("\024",TRUE, SVt_PV);
2890 switch (tmp) {
2891 case 'M': ftst = OP_FTMTIME; break;
2892 case 'A': ftst = OP_FTATIME; break;
2893 case 'C': ftst = OP_FTCTIME; break;
2894 default: break;
2895 }
2896 break;
378cc40b 2897 default:
378cc40b
LW
2898 break;
2899 }
e5edeb50
JH
2900 if (ftst) {
2901 PL_last_lop_op = ftst;
4e553d73 2902 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2903 "### Saw file test %c\n", (int)ftst);
e5edeb50 2904 } )
e5edeb50
JH
2905 FTST(ftst);
2906 }
2907 else {
2908 /* Assume it was a minus followed by a one-letter named
2909 * subroutine call (or a -bareword), then. */
95c31fe3 2910 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2911 "### %c looked like a file test but was not\n",
2912 (int)ftst);
95c31fe3 2913 } )
e5edeb50
JH
2914 s -= 2;
2915 }
378cc40b 2916 }
a687059c
LW
2917 tmp = *s++;
2918 if (*s == tmp) {
2919 s++;
3280af22 2920 if (PL_expect == XOPERATOR)
79072805
LW
2921 TERM(POSTDEC);
2922 else
2923 OPERATOR(PREDEC);
2924 }
2925 else if (*s == '>') {
2926 s++;
2927 s = skipspace(s);
7e2040f0 2928 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2929 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2930 TOKEN(ARROW);
79072805 2931 }
748a9306
LW
2932 else if (*s == '$')
2933 OPERATOR(ARROW);
463ee0b2 2934 else
748a9306 2935 TERM(ARROW);
a687059c 2936 }
3280af22 2937 if (PL_expect == XOPERATOR)
79072805
LW
2938 Aop(OP_SUBTRACT);
2939 else {
3280af22 2940 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2941 check_uni();
79072805 2942 OPERATOR('-'); /* unary minus */
2f3197b3 2943 }
79072805 2944
378cc40b 2945 case '+':
a687059c
LW
2946 tmp = *s++;
2947 if (*s == tmp) {
378cc40b 2948 s++;
3280af22 2949 if (PL_expect == XOPERATOR)
79072805
LW
2950 TERM(POSTINC);
2951 else
2952 OPERATOR(PREINC);
378cc40b 2953 }
3280af22 2954 if (PL_expect == XOPERATOR)
79072805
LW
2955 Aop(OP_ADD);
2956 else {
3280af22 2957 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2958 check_uni();
a687059c 2959 OPERATOR('+');
2f3197b3 2960 }
a687059c 2961
378cc40b 2962 case '*':
3280af22
NIS
2963 if (PL_expect != XOPERATOR) {
2964 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2965 PL_expect = XOPERATOR;
2966 force_ident(PL_tokenbuf, '*');
2967 if (!*PL_tokenbuf)
a0d0e21e 2968 PREREF('*');
79072805 2969 TERM('*');
a687059c 2970 }
79072805
LW
2971 s++;
2972 if (*s == '*') {
a687059c 2973 s++;
79072805 2974 PWop(OP_POW);
a687059c 2975 }
79072805
LW
2976 Mop(OP_MULTIPLY);
2977
378cc40b 2978 case '%':
3280af22 2979 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2980 ++s;
2981 Mop(OP_MODULO);
a687059c 2982 }
3280af22
NIS
2983 PL_tokenbuf[0] = '%';
2984 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2985 if (!PL_tokenbuf[1]) {
2986 if (s == PL_bufend)
bbce6d69
PP
2987 yyerror("Final % should be \\% or %name");
2988 PREREF('%');
a687059c 2989 }
3280af22 2990 PL_pending_ident = '%';
bbce6d69 2991 TERM('%');
a687059c 2992
378cc40b 2993 case '^':
79072805 2994 s++;
a0d0e21e 2995 BOop(OP_BIT_XOR);
79072805 2996 case '[':
3280af22 2997 PL_lex_brackets++;
79072805 2998 /* FALL THROUGH */
378cc40b 2999 case '~':
378cc40b 3000 case ',':
378cc40b
LW
3001 tmp = *s++;
3002 OPERATOR(tmp);
a0d0e21e
LW
3003 case ':':
3004 if (s[1] == ':') {
3005 len = 0;
3006 goto just_a_word;
3007 }
3008 s++;
09bef843
SB
3009 switch (PL_expect) {
3010 OP *attrs;
3011 case XOPERATOR:
3012 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3013 break;
3014 PL_bufptr = s; /* update in case we back off */
3015 goto grabattrs;
3016 case XATTRBLOCK:
3017 PL_expect = XBLOCK;
3018 goto grabattrs;
3019 case XATTRTERM:
3020 PL_expect = XTERMBLOCK;
3021 grabattrs:
3022 s = skipspace(s);
3023 attrs = Nullop;
7e2040f0 3024 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3025 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3026 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3027 if (tmp < 0) tmp = -tmp;
3028 switch (tmp) {
3029 case KEY_or:
3030 case KEY_and:
3031 case KEY_for:
3032 case KEY_unless:
3033 case KEY_if:
3034 case KEY_while:
3035 case KEY_until:
3036 goto got_attrs;
3037 default:
3038 break;
3039 }
3040 }
09bef843
SB
3041 if (*d == '(') {
3042 d = scan_str(d,TRUE,TRUE);
3043 if (!d) {
09bef843
SB
3044 /* MUST advance bufptr here to avoid bogus
3045 "at end of line" context messages from yyerror().
3046 */
3047 PL_bufptr = s + len;
3048 yyerror("Unterminated attribute parameter in attribute list");
3049 if (attrs)
3050 op_free(attrs);
3051 return 0; /* EOF indicator */
3052 }
3053 }
3054 if (PL_lex_stuff) {
3055 SV *sv = newSVpvn(s, len);
3056 sv_catsv(sv, PL_lex_stuff);
3057 attrs = append_elem(OP_LIST, attrs,
3058 newSVOP(OP_CONST, 0, sv));
3059 SvREFCNT_dec(PL_lex_stuff);
3060 PL_lex_stuff = Nullsv;
3061 }
3062 else {
78f9721b
SM
3063 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3064 CvLVALUE_on(PL_compcv);
3065 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3066 CvLOCKED_on(PL_compcv);
3067 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3068 CvMETHOD_on(PL_compcv);
87ecf892
DM
3069#ifdef USE_ITHREADS
3070 else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "shared", len))
3071 GvSHARED_on(cGVOPx_gv(yylval.opval));
3072#endif
78f9721b
SM
3073 /* After we've set the flags, it could be argued that
3074 we don't need to do the attributes.pm-based setting
3075 process, and shouldn't bother appending recognized
3076 flags. To experiment with that, uncomment the
3077 following "else": */
0256094b 3078 else
78f9721b
SM
3079 attrs = append_elem(OP_LIST, attrs,
3080 newSVOP(OP_CONST, 0,
3081 newSVpvn(s, len)));
09bef843
SB
3082 }
3083 s = skipspace(d);
0120eecf 3084 if (*s == ':' && s[1] != ':')
09bef843 3085 s = skipspace(s+1);
0120eecf
GS
3086 else if (s == d)
3087 break; /* require real whitespace or :'s */
09bef843 3088 }
f9829d6b
GS
3089 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3090 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3091 char q = ((*s == '\'') ? '"' : '\'');
3092 /* If here for an expression, and parsed no attrs, back off. */
3093 if (tmp == '=' && !attrs) {
3094 s = PL_bufptr;
3095 break;
3096 }
3097 /* MUST advance bufptr here to avoid bogus "at end of line"
3098 context messages from yyerror().
3099 */
3100 PL_bufptr = s;
3101 if (!*s)
3102 yyerror("Unterminated attribute list");
3103 else
3104 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3105 q, *s, q));
3106 if (attrs)
3107 op_free(attrs);
3108 OPERATOR(':');
3109 }
f9829d6b 3110 got_attrs:
09bef843
SB
3111 if (attrs) {
3112 PL_nextval[PL_nexttoke].opval = attrs;
3113 force_next(THING);
3114 }
3115 TOKEN(COLONATTR);
3116 }
a0d0e21e 3117 OPERATOR(':');
8990e307
LW
3118 case '(':
3119 s++;
3280af22
NIS
3120 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3121 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3122 else
3280af22 3123 PL_expect = XTERM;
a0d0e21e 3124 TOKEN('(');
378cc40b 3125 case ';':
f4dd75d9 3126 CLINE;
378cc40b
LW
3127 tmp = *s++;
3128 OPERATOR(tmp);
3129 case ')':
378cc40b 3130 tmp = *s++;
16d20bd9
AD
3131 s = skipspace(s);
3132 if (*s == '{')
3133 PREBLOCK(tmp);
378cc40b 3134 TERM(tmp);
79072805
LW
3135 case ']':
3136 s++;
3280af22 3137 if (PL_lex_brackets <= 0)
d98d5fff 3138 yyerror("Unmatched right square bracket");
463ee0b2 3139 else
3280af22
NIS
3140 --PL_lex_brackets;
3141 if (PL_lex_state == LEX_INTERPNORMAL) {
3142 if (PL_lex_brackets == 0) {
a0d0e21e 3143 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3144 PL_lex_state = LEX_INTERPEND;
79072805
LW
3145 }
3146 }
4633a7c4 3147 TERM(']');
79072805
LW
3148 case '{':
3149 leftbracket:
79072805 3150 s++;
3280af22
NIS
3151 if (PL_lex_brackets > 100) {
3152 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3153 if (newlb != PL_lex_brackstack) {
8990e307 3154 SAVEFREEPV(newlb);
3280af22 3155 PL_lex_brackstack = newlb;
8990e307
LW
3156 }
3157 }
3280af22 3158 switch (PL_expect) {
a0d0e21e 3159 case XTERM:
3280af22 3160 if (PL_lex_formbrack) {
a0d0e21e
LW
3161 s--;
3162 PRETERMBLOCK(DO);
3163 }
3280af22
NIS
3164 if (PL_oldoldbufptr == PL_last_lop)
3165 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3166 else
3280af22 3167 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3168 OPERATOR(HASHBRACK);
a0d0e21e 3169 case XOPERATOR:
bf4acbe4 3170 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3171 s++;
44a8e56a 3172 d = s;
3280af22
NIS
3173 PL_tokenbuf[0] = '\0';
3174 if (d < PL_bufend && *d == '-') {
3175 PL_tokenbuf[0] = '-';
44a8e56a 3176 d++;
bf4acbe4 3177 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3178 d++;
3179 }
7e2040f0 3180 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3181 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3182 FALSE, &len);
bf4acbe4 3183 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3184 d++;
3185 if (*d == '}') {
3280af22 3186 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3187 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3188 if (minus)
3189 force_next('-');
748a9306
LW
3190 }
3191 }
3192 /* FALL THROUGH */
09bef843 3193 case XATTRBLOCK:
748a9306 3194 case XBLOCK:
3280af22
NIS
3195 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3196 PL_expect = XSTATE;
a0d0e21e 3197 break;
09bef843 3198 case XATTRTERM:
a0d0e21e 3199 case XTERMBLOCK:
3280af22
NIS
3200 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3201 PL_expect = XSTATE;
a0d0e21e
LW
3202 break;
3203 default: {
3204 char *t;
3280af22
NIS
3205 if (PL_oldoldbufptr == PL_last_lop)
3206 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3207 else
3280af22 3208 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3209 s = skipspace(s);
09ecc4b6 3210 if (*s == '}')
a0d0e21e 3211 OPERATOR(HASHBRACK);
b8a4b1be
GS
3212 /* This hack serves to disambiguate a pair of curlies
3213 * as being a block or an anon hash. Normally, expectation
3214 * determines that, but in cases where we're not in a
3215 * position to expect anything in particular (like inside
3216 * eval"") we have to resolve the ambiguity. This code
3217 * covers the case where the first term in the curlies is a
3218 * quoted string. Most other cases need to be explicitly
3219 * disambiguated by prepending a `+' before the opening
3220 * curly in order to force resolution as an anon hash.
3221 *
3222 * XXX should probably propagate the outer expectation
3223 * into eval"" to rely less on this hack, but that could
3224 * potentially break current behavior of eval"".
3225 * GSAR 97-07-21
3226 */
3227 t = s;
3228 if (*s == '\'' || *s == '"' || *s == '`') {
3229 /* common case: get past first string, handling escapes */
3280af22 3230 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3231 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3232 t++;
3233 t++;
a0d0e21e 3234 }
b8a4b1be 3235 else if (*s == 'q') {
3280af22 3236 if (++t < PL_bufend
b8a4b1be 3237 && (!isALNUM(*t)
3280af22 3238 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3239 && !isALNUM(*t))))
3240 {
b8a4b1be
GS
3241 char *tmps;
3242 char open, close, term;
3243 I32 brackets = 1;
3244
3280af22 3245 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3246 t++;
3247 term = *t;
3248 open = term;
3249 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3250 term = tmps[5];
3251 close = term;
3252 if (open == close)
3280af22
NIS
3253 for (t++; t < PL_bufend; t++) {
3254 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3255 t++;
6d07e5e9 3256 else if (*t == open)
b8a4b1be
GS
3257 break;
3258 }
3259 else
3280af22
NIS
3260 for (t++; t < PL_bufend; t++) {
3261 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3262 t++;
6d07e5e9 3263 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3264 break;
3265 else if (*t == open)
3266 brackets++;
3267 }
3268 }
3269 t++;
a0d0e21e 3270 }
7e2040f0 3271 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3272 t += UTF8SKIP(t);
7e2040f0 3273 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3274 t += UTF8SKIP(t);
a0d0e21e 3275 }
3280af22 3276 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3277 t++;
b8a4b1be
GS
3278 /* if comma follows first term, call it an anon hash */
3279 /* XXX it could be a comma expression with loop modifiers */
3280af22 3280 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3281 || (*t == '=' && t[1] == '>')))
a0d0e21e 3282 OPERATOR(HASHBRACK);
3280af22 3283 if (PL_expect == XREF)
4e4e412b 3284 PL_expect = XTERM;
a0d0e21e 3285 else {
3280af22
NIS
3286 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3287 PL_expect = XSTATE;
a0d0e21e 3288 }
8990e307 3289 }
a0d0e21e 3290 break;
463ee0b2 3291 }
57843af0 3292 yylval.ival = CopLINE(PL_curcop);
79072805 3293 if (isSPACE(*s) || *s == '#')
3280af22 3294 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3295 TOKEN('{');
378cc40b 3296 case '}':
79072805
LW
3297 rightbracket:
3298 s++;
3280af22 3299 if (PL_lex_brackets <= 0)
d98d5fff 3300 yyerror("Unmatched right curly bracket");
463ee0b2 3301 else
3280af22 3302 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3303 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3304 PL_lex_formbrack = 0;
3305 if (PL_lex_state == LEX_INTERPNORMAL) {
3306 if (PL_lex_brackets == 0) {
9059aa12
LW
3307 if (PL_expect & XFAKEBRACK) {
3308 PL_expect &= XENUMMASK;
3280af22
NIS
3309 PL_lex_state = LEX_INTERPEND;
3310 PL_bufptr = s;
cea2e8a9 3311 return yylex(); /* ignore fake brackets */
79072805 3312 }
fa83b5b6 3313 if (*s == '-' && s[1] == '>')
3280af22 3314 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3315 else if (*s != '[' && *s != '{')
3280af22 3316 PL_lex_state = LEX_INTERPEND;
79072805
LW
3317 }
3318 }
9059aa12
LW
3319 if (PL_expect & XFAKEBRACK) {
3320 PL_expect &= XENUMMASK;
3280af22 3321 PL_bufptr = s;
cea2e8a9 3322 return yylex(); /* ignore fake brackets */
748a9306 3323 }
79072805
LW
3324 force_next('}');
3325 TOKEN(';');
378cc40b
LW
3326 case '&':
3327 s++;
3328 tmp = *s++;
3329 if (tmp == '&')
a0d0e21e 3330 AOPERATOR(ANDAND);
378cc40b 3331 s--;
3280af22 3332 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3333 if (ckWARN(WARN_SEMICOLON)
3334 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3335 {
57843af0 3336 CopLINE_dec(PL_curcop);
cea2e8a9 3337 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3338 CopLINE_inc(PL_curcop);
463ee0b2 3339 }
79072805 3340 BAop(OP_BIT_AND);
463ee0b2 3341 }
79072805 3342
3280af22
NIS
3343 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3344 if (*PL_tokenbuf) {
3345 PL_expect = XOPERATOR;
3346 force_ident(PL_tokenbuf, '&');
463ee0b2 3347 }
79072805
LW
3348 else
3349 PREREF('&');
c07a80fd 3350 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3351 TERM('&');
3352
378cc40b
LW
3353 case '|':
3354 s++;
3355 tmp = *s++;
3356 if (tmp == '|')
a0d0e21e 3357 AOPERATOR(OROR);
378cc40b 3358 s--;
79072805 3359 BOop(OP_BIT_OR);
378cc40b
LW
3360 case '=':
3361 s++;
3362 tmp = *s++;
3363 if (tmp == '=')
79072805
LW
3364 Eop(OP_EQ);
3365 if (tmp == '>')
3366 OPERATOR(',');
378cc40b 3367 if (tmp == '~')
79072805 3368 PMop(OP_MATCH);
599cee73 3369 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3370 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3371 s--;
3280af22
NIS
3372 if (PL_expect == XSTATE && isALPHA(tmp) &&
3373 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3374 {
3280af22
NIS
3375 if (PL_in_eval && !PL_rsfp) {
3376 d = PL_bufend;
a5f75d66
AD
3377 while (s < d) {
3378 if (*s++ == '\n') {
3379 incline(s);
3380 if (strnEQ(s,"=cut",4)) {
3381 s = strchr(s,'\n');
3382 if (s)
3383 s++;
3384 else
3385 s = d;
3386 incline(s);
3387 goto retry;
3388 }
3389 }
3390 }
3391 goto retry;
3392 }
3280af22
NIS
3393 s = PL_bufend;
3394 PL_doextract = TRUE;
a0d0e21e
LW
3395 goto retry;
3396 }
3280af22 3397 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3398 char *t;
51882d45 3399#ifdef PERL_STRICT_CR
bf4acbe4 3400 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3401#else
bf4acbe4 3402 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3403#endif
a0d0e21e
LW
3404 if (*t == '\n' || *t == '#') {
3405 s--;
3280af22 3406 PL_expect = XBLOCK;
a0d0e21e
LW
3407 goto leftbracket;
3408 }
79072805 3409 }
a0d0e21e
LW
3410 yylval.ival = 0;
3411 OPERATOR(ASSIGNOP);
378cc40b
LW
3412 case '!':
3413 s++;
3414 tmp = *s++;
3415 if (tmp == '=')
79072805 3416 Eop(OP_NE);
378cc40b 3417 if (tmp == '~')
79072805 3418 PMop(OP_NOT);
378cc40b
LW
3419 s--;
3420 OPERATOR('!');
3421 case '<':
3280af22 3422 if (PL_expect != XOPERATOR) {
93a17b20 3423 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3424 check_uni();
79072805
LW
3425 if (s[1] == '<')
3426 s = scan_heredoc(s);
3427 else
3428 s = scan_inputsymbol(s);
3429 TERM(sublex_start());
378cc40b
LW
3430 }
3431 s++;
3432 tmp = *s++;
3433 if (tmp == '<')
79072805 3434 SHop(OP_LEFT_SHIFT);
395c3793
LW
3435 if (tmp == '=') {
3436 tmp = *s++;
3437 if (tmp == '>')
79072805 3438 Eop(OP_NCMP);
395c3793 3439 s--;
79072805 3440 Rop(OP_LE);
395c3793 3441 }
378cc40b 3442 s--;
79072805 3443 Rop(OP_LT);
378cc40b
LW
3444 case '>':
3445 s++;
3446 tmp = *s++;
3447 if (tmp == '>')
79072805 3448 SHop(OP_RIGHT_SHIFT);
378cc40b 3449 if (tmp == '=')
79072805 3450 Rop(OP_GE);
378cc40b 3451 s--;
79072805 3452 Rop(OP_GT);
378cc40b
LW
3453
3454 case '$':
bbce6d69
PP
3455 CLINE;
3456
3280af22
NIS
3457 if (PL_expect == XOPERATOR) {
3458 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3459 PL_expect = XTERM;
a0d0e21e 3460 depcom();
bbce6d69 3461 return ','; /* grandfather non-comma-format format */
a0d0e21e 3462 }
8990e307 3463 }
a0d0e21e 3464
7e2040f0 3465 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3466 PL_tokenbuf[0] = '@';
376b8730
SM
3467 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3468 sizeof PL_tokenbuf - 1, FALSE);
3469 if (PL_expect == XOPERATOR)
3470 no_op("Array length", s);
3280af22 3471 if (!PL_tokenbuf[1])
a0d0e21e 3472 PREREF(DOLSHARP);
3280af22
NIS
3473 PL_expect = XOPERATOR;
3474 PL_pending_ident = '#';
463ee0b2 3475 TOKEN(DOLSHARP);
79072805 3476 }
bbce6d69 3477
3280af22 3478 PL_tokenbuf[0] = '$';
376b8730
SM
3479 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3480 sizeof PL_tokenbuf - 1, FALSE);
3481 if (PL_expect == XOPERATOR)
3482 no_op("Scalar", s);
3280af22
NIS
3483 if (!PL_tokenbuf[1]) {
3484 if (s == PL_bufend)
bbce6d69
PP
3485 yyerror("Final $ should be \\$ or $name");
3486 PREREF('$');
8990e307 3487 }
a0d0e21e 3488
bbce6d69 3489 /* This kludge not intended to be bulletproof. */
3280af22 3490 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3491 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3492 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3493 yylval.opval->op_private = OPpCONST_ARYBASE;
3494 TERM(THING);
3495 }
3496
ff68c719 3497 d = s;
69d2bceb 3498 tmp = (I32)*s;
3280af22 3499 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3500 s = skipspace(s);
3501
3280af22 3502 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3503 char *t;
3504 if (*s == '[') {
3280af22 3505 PL_tokenbuf[0] = '@';
599cee73 3506 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3507 for(t = s + 1;
7e2040f0 3508 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3509 t++) ;
a0d0e21e 3510 if (*t++ == ',') {
3280af22
NIS
3511 PL_bufptr = skipspace(PL_bufptr);
3512 while (t < PL_bufend && *t != ']')
bbce6d69 3513 t++;
cea2e8a9 3514 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3515 "Multidimensional syntax %.*s not supported",
3516 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3517 }
3518 }
bbce6d69
PP
3519 }
3520 else if (*s == '{') {
3280af22 3521 PL_tokenbuf[0] = '%';
599cee73 3522 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3523 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3524 {
3280af22 3525 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3526 STRLEN len;
3527 for (t++; isSPACE(*t); t++) ;
7e2040f0 3528 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3529 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3530 for (; isSPACE(*t); t++) ;
864dbfa3 3531 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3532 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3533 "You need to quote \"%s\"", tmpbuf);
748a9306 3534 }
93a17b20
LW
3535 }
3536 }
2f3197b3 3537 }
bbce6d69 3538
3280af22 3539 PL_expect = XOPERATOR;
69d2bceb 3540 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3541 bool islop = (PL_last_lop == PL_oldoldbufptr);
3542 if (!islop || PL_last_lop_op == OP_GREPSTART)
3543 PL_expect = XOPERATOR;
bbce6d69 3544 else if (strchr("$@\"'`q", *s))
3280af22 3545 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3546 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3547 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3548 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3549 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3550 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3551 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3552 /* binary operators exclude handle interpretations */
3553 switch (tmp) {
3554 case -KEY_x:
3555 case -KEY_eq:
3556 case -KEY_ne:
3557 case -KEY_gt:
3558 case -KEY_lt:
3559 case -KEY_ge:
3560 case -KEY_le:
3561 case -KEY_cmp:
3562 break;
3563 default:
3280af22 3564 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3565 break;
3566 }
3567 }
68dc0745
PP
3568 else {
3569 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);