This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
MAking Solaris malloc() the default
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
bc89e66f 3 * Copyright (c) 1991-2001, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
4e553d73 42/* In variables name $^X, these are the legal values for X.
2b92dfce
GS
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
79072805
LW
72#ifdef ff_next
73#undef ff_next
d48672a2
LW
74#endif
75
a1a0e61e 76#ifdef USE_PURE_BISON
dba4d153
JH
77# ifndef YYMAXLEVEL
78# define YYMAXLEVEL 100
79# endif
20141f0e
RI
80YYSTYPE* yylval_pointer[YYMAXLEVEL];
81int* yychar_pointer[YYMAXLEVEL];
6f202aea 82int yyactlevel = -1;
22c35a8c
GS
83# undef yylval
84# undef yychar
20141f0e
RI
85# define yylval (*yylval_pointer[yyactlevel])
86# define yychar (*yychar_pointer[yyactlevel])
87# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 88# undef yylex
dba4d153 89# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
90#endif
91
79072805 92#include "keywords.h"
fe14fcc3 93
ffb4593c
NT
94/* CLINE is a macro that ensures PL_copline has a sane value */
95
ae986130
LW
96#ifdef CLINE
97#undef CLINE
98#endif
57843af0 99#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 100
ffb4593c
NT
101/*
102 * Convenience functions to return different tokens and prime the
9cbb5ea2 103 * lexer for the next token. They all take an argument.
ffb4593c
NT
104 *
105 * TOKEN : generic token (used for '(', DOLSHARP, etc)
106 * OPERATOR : generic operator
107 * AOPERATOR : assignment operator
108 * PREBLOCK : beginning the block after an if, while, foreach, ...
109 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
110 * PREREF : *EXPR where EXPR is not a simple identifier
111 * TERM : expression term
112 * LOOPX : loop exiting command (goto, last, dump, etc)
113 * FTST : file test operator
114 * FUN0 : zero-argument function
2d2e263d 115 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
116 * BOop : bitwise or or xor
117 * BAop : bitwise and
118 * SHop : shift operator
119 * PWop : power operator
9cbb5ea2 120 * PMop : pattern-matching operator
ffb4593c
NT
121 * Aop : addition-level operator
122 * Mop : multiplication-level operator
123 * Eop : equality-testing operator
e5edeb50 124 * Rop : relational operator <= != gt
ffb4593c
NT
125 *
126 * Also see LOP and lop() below.
127 */
128
998054bd
SC
129#ifdef DEBUGGING /* Serve -DT. */
130# define REPORT(x,retval) tokereport(x,s,(int)retval)
131# define REPORT2(x,retval) tokereport(x,s, yylval.ival)
132#else
133# define REPORT(x,retval) 1
134# define REPORT2(x,retval) 1
135#endif
136
137#define TOKEN(retval) return (REPORT2("token",retval), PL_bufptr = s,(int)retval)
138#define OPERATOR(retval) return (REPORT2("operator",retval), PL_expect = XTERM, PL_bufptr = s,(int)retval)
139#define AOPERATOR(retval) return ao((REPORT2("aop",retval), PL_expect = XTERM, PL_bufptr = s,(int)retval))
140#define PREBLOCK(retval) return (REPORT2("preblock",retval), PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
141#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval), PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
142#define PREREF(retval) return (REPORT2("preref",retval), PL_expect = XREF,PL_bufptr = s,(int)retval)
143#define TERM(retval) return (CLINE, REPORT2("term",retval), PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
144#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f), PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
145#define FTST(f) return(yylval.ival=f, REPORT("ftst",f), PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
146#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f), PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
147#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f), PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
148#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f), PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
149#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f), PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
150#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f), PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
151#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f), PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
152#define PMop(f) return(yylval.ival=f, REPORT("matchop",f), PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
153#define Aop(f) return ao((yylval.ival=f, REPORT("add",f), PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
154#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f), PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
155#define Eop(f) return(yylval.ival=f, REPORT("eq",f), PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
156#define Rop(f) return(yylval.ival=f, REPORT("rel",f), PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 157
a687059c
LW
158/* This bit of chicanery makes a unary function followed by
159 * a parenthesis into a function with one argument, highest precedence.
160 */
2f3197b3 161#define UNI(f) return(yylval.ival = f, \
998054bd 162 REPORT("uni",f), \
3280af22
NIS
163 PL_expect = XTERM, \
164 PL_bufptr = s, \
165 PL_last_uni = PL_oldbufptr, \
166 PL_last_lop_op = f, \
a687059c
LW
167 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
168
79072805 169#define UNIBRACK(f) return(yylval.ival = f, \
998054bd 170 REPORT("uni",f), \
3280af22
NIS
171 PL_bufptr = s, \
172 PL_last_uni = PL_oldbufptr, \
79072805
LW
173 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
174
9f68db38 175/* grandfather return to old style */
3280af22 176#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 177
998054bd 178void
61b2116b 179S_tokereport(pTHX_ char *thing, char* s, I32 rv)
998054bd
SC
180{
181 SV *report;
182 DEBUG_T({
183 report = newSVpv(thing, 0);
61b2116b 184 Perl_sv_catpvf(aTHX_ report, ":line %i:%i:", CopLINE(PL_curcop), rv);
998054bd
SC
185
186 if (s - PL_bufptr > 0)
187 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
188 else {
189 if (PL_oldbufptr && *PL_oldbufptr)
190 sv_catpv(report, PL_tokenbuf);
191 }
192 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
193 })
194}
195
ffb4593c
NT
196/*
197 * S_ao
198 *
199 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
200 * into an OP_ANDASSIGN or OP_ORASSIGN
201 */
202
76e3520e 203STATIC int
cea2e8a9 204S_ao(pTHX_ int toketype)
a0d0e21e 205{
3280af22
NIS
206 if (*PL_bufptr == '=') {
207 PL_bufptr++;
a0d0e21e
LW
208 if (toketype == ANDAND)
209 yylval.ival = OP_ANDASSIGN;
210 else if (toketype == OROR)
211 yylval.ival = OP_ORASSIGN;
212 toketype = ASSIGNOP;
213 }
214 return toketype;
215}
216
ffb4593c
NT
217/*
218 * S_no_op
219 * When Perl expects an operator and finds something else, no_op
220 * prints the warning. It always prints "<something> found where
221 * operator expected. It prints "Missing semicolon on previous line?"
222 * if the surprise occurs at the start of the line. "do you need to
223 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
224 * where the compiler doesn't know if foo is a method call or a function.
225 * It prints "Missing operator before end of line" if there's nothing
226 * after the missing operator, or "... before <...>" if there is something
227 * after the missing operator.
228 */
229
76e3520e 230STATIC void
cea2e8a9 231S_no_op(pTHX_ char *what, char *s)
463ee0b2 232{
3280af22
NIS
233 char *oldbp = PL_bufptr;
234 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 235
1189a94a
GS
236 if (!s)
237 s = oldbp;
07c798fb 238 else
1189a94a 239 PL_bufptr = s;
cea2e8a9 240 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 241 if (is_first)
cea2e8a9 242 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 243 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 244 char *t;
7e2040f0 245 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 246 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 247 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 248 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 249 }
07c798fb
HS
250 else {
251 assert(s >= oldbp);
cea2e8a9 252 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 253 }
3280af22 254 PL_bufptr = oldbp;
8990e307
LW
255}
256
ffb4593c
NT
257/*
258 * S_missingterm
259 * Complain about missing quote/regexp/heredoc terminator.
260 * If it's called with (char *)NULL then it cauterizes the line buffer.
261 * If we're in a delimited string and the delimiter is a control
262 * character, it's reformatted into a two-char sequence like ^C.
263 * This is fatal.
264 */
265
76e3520e 266STATIC void
cea2e8a9 267S_missingterm(pTHX_ char *s)
8990e307
LW
268{
269 char tmpbuf[3];
270 char q;
271 if (s) {
272 char *nl = strrchr(s,'\n');
d2719217 273 if (nl)
8990e307
LW
274 *nl = '\0';
275 }
9d116dd7
JH
276 else if (
277#ifdef EBCDIC
278 iscntrl(PL_multi_close)
279#else
280 PL_multi_close < 32 || PL_multi_close == 127
281#endif
282 ) {
8990e307 283 *tmpbuf = '^';
3280af22 284 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
285 s = "\\n";
286 tmpbuf[2] = '\0';
287 s = tmpbuf;
288 }
289 else {
3280af22 290 *tmpbuf = PL_multi_close;
8990e307
LW
291 tmpbuf[1] = '\0';
292 s = tmpbuf;
293 }
294 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 295 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 296}
79072805 297
ffb4593c
NT
298/*
299 * Perl_deprecate
ffb4593c
NT
300 */
301
79072805 302void
864dbfa3 303Perl_deprecate(pTHX_ char *s)
a0d0e21e 304{
599cee73 305 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 306 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
307}
308
ffb4593c
NT
309/*
310 * depcom
9cbb5ea2 311 * Deprecate a comma-less variable list.
ffb4593c
NT
312 */
313
76e3520e 314STATIC void
cea2e8a9 315S_depcom(pTHX)
a0d0e21e
LW
316{
317 deprecate("comma-less variable list");
318}
319
ffb4593c 320/*
9cbb5ea2
GS
321 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
322 * utf16-to-utf8-reversed.
ffb4593c
NT
323 */
324
c39cd008
GS
325#ifdef PERL_CR_FILTER
326static void
327strip_return(SV *sv)
328{
329 register char *s = SvPVX(sv);
330 register char *e = s + SvCUR(sv);
331 /* outer loop optimized to do nothing if there are no CR-LFs */
332 while (s < e) {
333 if (*s++ == '\r' && *s == '\n') {
334 /* hit a CR-LF, need to copy the rest */
335 register char *d = s - 1;
336 *d++ = *s++;
337 while (s < e) {
338 if (*s == '\r' && s[1] == '\n')
339 s++;
340 *d++ = *s++;
341 }
342 SvCUR(sv) -= s - d;
343 return;
344 }
345 }
346}
a868473f 347
76e3520e 348STATIC I32
c39cd008 349S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 350{
c39cd008
GS
351 I32 count = FILTER_READ(idx+1, sv, maxlen);
352 if (count > 0 && !maxlen)
353 strip_return(sv);
354 return count;
a868473f
NIS
355}
356#endif
357
ffb4593c
NT
358/*
359 * Perl_lex_start
9cbb5ea2
GS
360 * Initialize variables. Uses the Perl save_stack to save its state (for
361 * recursive calls to the parser).
ffb4593c
NT
362 */
363
a0d0e21e 364void
864dbfa3 365Perl_lex_start(pTHX_ SV *line)
79072805 366{
8990e307
LW
367 char *s;
368 STRLEN len;
369
3280af22
NIS
370 SAVEI32(PL_lex_dojoin);
371 SAVEI32(PL_lex_brackets);
3280af22
NIS
372 SAVEI32(PL_lex_casemods);
373 SAVEI32(PL_lex_starts);
374 SAVEI32(PL_lex_state);
7766f137 375 SAVEVPTR(PL_lex_inpat);
3280af22 376 SAVEI32(PL_lex_inwhat);
18b09519
GS
377 if (PL_lex_state == LEX_KNOWNEXT) {
378 I32 toke = PL_nexttoke;
379 while (--toke >= 0) {
380 SAVEI32(PL_nexttype[toke]);
381 SAVEVPTR(PL_nextval[toke]);
382 }
383 SAVEI32(PL_nexttoke);
18b09519 384 }
57843af0 385 SAVECOPLINE(PL_curcop);
3280af22
NIS
386 SAVEPPTR(PL_bufptr);
387 SAVEPPTR(PL_bufend);
388 SAVEPPTR(PL_oldbufptr);
389 SAVEPPTR(PL_oldoldbufptr);
390 SAVEPPTR(PL_linestart);
391 SAVESPTR(PL_linestr);
392 SAVEPPTR(PL_lex_brackstack);
393 SAVEPPTR(PL_lex_casestack);
c76ac1ee 394 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
395 SAVESPTR(PL_lex_stuff);
396 SAVEI32(PL_lex_defer);
09bef843 397 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 398 SAVESPTR(PL_lex_repl);
bebdddfc
GS
399 SAVEINT(PL_expect);
400 SAVEINT(PL_lex_expect);
3280af22
NIS
401
402 PL_lex_state = LEX_NORMAL;
403 PL_lex_defer = 0;
404 PL_expect = XSTATE;
405 PL_lex_brackets = 0;
3280af22
NIS
406 New(899, PL_lex_brackstack, 120, char);
407 New(899, PL_lex_casestack, 12, char);
408 SAVEFREEPV(PL_lex_brackstack);
409 SAVEFREEPV(PL_lex_casestack);
410 PL_lex_casemods = 0;
411 *PL_lex_casestack = '\0';
412 PL_lex_dojoin = 0;
413 PL_lex_starts = 0;
414 PL_lex_stuff = Nullsv;
415 PL_lex_repl = Nullsv;
416 PL_lex_inpat = 0;
76be56bc 417 PL_nexttoke = 0;
3280af22 418 PL_lex_inwhat = 0;
09bef843 419 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
420 PL_linestr = line;
421 if (SvREADONLY(PL_linestr))
422 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
423 s = SvPV(PL_linestr, len);
8990e307 424 if (len && s[len-1] != ';') {
3280af22
NIS
425 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
426 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
427 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 428 }
3280af22
NIS
429 SvTEMP_off(PL_linestr);
430 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
431 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
432 SvREFCNT_dec(PL_rs);
79cb57f6 433 PL_rs = newSVpvn("\n", 1);
3280af22 434 PL_rsfp = 0;
79072805 435}
a687059c 436
ffb4593c
NT
437/*
438 * Perl_lex_end
9cbb5ea2
GS
439 * Finalizer for lexing operations. Must be called when the parser is
440 * done with the lexer.
ffb4593c
NT
441 */
442
463ee0b2 443void
864dbfa3 444Perl_lex_end(pTHX)
463ee0b2 445{
3280af22 446 PL_doextract = FALSE;
463ee0b2
LW
447}
448
ffb4593c
NT
449/*
450 * S_incline
451 * This subroutine has nothing to do with tilting, whether at windmills
452 * or pinball tables. Its name is short for "increment line". It
57843af0 453 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 454 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
455 * # line 500 "foo.pm"
456 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
457 */
458
76e3520e 459STATIC void
cea2e8a9 460S_incline(pTHX_ char *s)
463ee0b2
LW
461{
462 char *t;
463 char *n;
73659bf1 464 char *e;
463ee0b2 465 char ch;
463ee0b2 466
57843af0 467 CopLINE_inc(PL_curcop);
463ee0b2
LW
468 if (*s++ != '#')
469 return;
bf4acbe4 470 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
471 if (strnEQ(s, "line", 4))
472 s += 4;
473 else
474 return;
475 if (*s == ' ' || *s == '\t')
476 s++;
4e553d73 477 else
73659bf1 478 return;
bf4acbe4 479 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
480 if (!isDIGIT(*s))
481 return;
482 n = s;
483 while (isDIGIT(*s))
484 s++;
bf4acbe4 485 while (SPACE_OR_TAB(*s))
463ee0b2 486 s++;
73659bf1 487 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 488 s++;
73659bf1
GS
489 e = t + 1;
490 }
463ee0b2 491 else {
463ee0b2 492 for (t = s; !isSPACE(*t); t++) ;
73659bf1 493 e = t;
463ee0b2 494 }
bf4acbe4 495 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
496 e++;
497 if (*e != '\n' && *e != '\0')
498 return; /* false alarm */
499
463ee0b2
LW
500 ch = *t;
501 *t = '\0';
f4dd75d9
GS
502 if (t - s > 0) {
503#ifdef USE_ITHREADS
504 Safefree(CopFILE(PL_curcop));
505#else
506 SvREFCNT_dec(CopFILEGV(PL_curcop));
507#endif
57843af0 508 CopFILE_set(PL_curcop, s);
f4dd75d9 509 }
463ee0b2 510 *t = ch;
57843af0 511 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
512}
513
ffb4593c
NT
514/*
515 * S_skipspace
516 * Called to gobble the appropriate amount and type of whitespace.
517 * Skips comments as well.
518 */
519
76e3520e 520STATIC char *
cea2e8a9 521S_skipspace(pTHX_ register char *s)
a687059c 522{
3280af22 523 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 524 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
525 s++;
526 return s;
527 }
528 for (;;) {
fd049845 529 STRLEN prevlen;
09bef843
SB
530 SSize_t oldprevlen, oldoldprevlen;
531 SSize_t oldloplen, oldunilen;
60e6418e
GS
532 while (s < PL_bufend && isSPACE(*s)) {
533 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
534 incline(s);
535 }
ffb4593c
NT
536
537 /* comment */
3280af22
NIS
538 if (s < PL_bufend && *s == '#') {
539 while (s < PL_bufend && *s != '\n')
463ee0b2 540 s++;
60e6418e 541 if (s < PL_bufend) {
463ee0b2 542 s++;
60e6418e
GS
543 if (PL_in_eval && !PL_rsfp) {
544 incline(s);
545 continue;
546 }
547 }
463ee0b2 548 }
ffb4593c
NT
549
550 /* only continue to recharge the buffer if we're at the end
551 * of the buffer, we're not reading from a source filter, and
552 * we're in normal lexing mode
553 */
09bef843
SB
554 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
555 PL_lex_state == LEX_FORMLINE)
463ee0b2 556 return s;
ffb4593c
NT
557
558 /* try to recharge the buffer */
9cbb5ea2
GS
559 if ((s = filter_gets(PL_linestr, PL_rsfp,
560 (prevlen = SvCUR(PL_linestr)))) == Nullch)
561 {
562 /* end of file. Add on the -p or -n magic */
3280af22
NIS
563 if (PL_minus_n || PL_minus_p) {
564 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
565 ";}continue{print or die qq(-p destination: $!\\n)" :
566 "");
3280af22
NIS
567 sv_catpv(PL_linestr,";}");
568 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
569 }
570 else
3280af22 571 sv_setpv(PL_linestr,";");
ffb4593c
NT
572
573 /* reset variables for next time we lex */
9cbb5ea2
GS
574 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
575 = SvPVX(PL_linestr);
3280af22 576 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
577
578 /* Close the filehandle. Could be from -P preprocessor,
579 * STDIN, or a regular file. If we were reading code from
580 * STDIN (because the commandline held no -e or filename)
581 * then we don't close it, we reset it so the code can
582 * read from STDIN too.
583 */
584
3280af22
NIS
585 if (PL_preprocess && !PL_in_eval)
586 (void)PerlProc_pclose(PL_rsfp);
587 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
588 PerlIO_clearerr(PL_rsfp);
8990e307 589 else
3280af22
NIS
590 (void)PerlIO_close(PL_rsfp);
591 PL_rsfp = Nullfp;
463ee0b2
LW
592 return s;
593 }
ffb4593c
NT
594
595 /* not at end of file, so we only read another line */
09bef843
SB
596 /* make corresponding updates to old pointers, for yyerror() */
597 oldprevlen = PL_oldbufptr - PL_bufend;
598 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
599 if (PL_last_uni)
600 oldunilen = PL_last_uni - PL_bufend;
601 if (PL_last_lop)
602 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
603 PL_linestart = PL_bufptr = s + prevlen;
604 PL_bufend = s + SvCUR(PL_linestr);
605 s = PL_bufptr;
09bef843
SB
606 PL_oldbufptr = s + oldprevlen;
607 PL_oldoldbufptr = s + oldoldprevlen;
608 if (PL_last_uni)
609 PL_last_uni = s + oldunilen;
610 if (PL_last_lop)
611 PL_last_lop = s + oldloplen;
a0d0e21e 612 incline(s);
ffb4593c
NT
613
614 /* debugger active and we're not compiling the debugger code,
615 * so store the line into the debugger's array of lines
616 */
3280af22 617 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
618 SV *sv = NEWSV(85,0);
619
620 sv_upgrade(sv, SVt_PVMG);
3280af22 621 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 622 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 623 }
463ee0b2 624 }
a687059c 625}
378cc40b 626
ffb4593c
NT
627/*
628 * S_check_uni
629 * Check the unary operators to ensure there's no ambiguity in how they're
630 * used. An ambiguous piece of code would be:
631 * rand + 5
632 * This doesn't mean rand() + 5. Because rand() is a unary operator,
633 * the +5 is its argument.
634 */
635
76e3520e 636STATIC void
cea2e8a9 637S_check_uni(pTHX)
ba106d47 638{
2f3197b3 639 char *s;
a0d0e21e 640 char *t;
2f3197b3 641
3280af22 642 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 643 return;
3280af22
NIS
644 while (isSPACE(*PL_last_uni))
645 PL_last_uni++;
7e2040f0 646 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 647 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 648 return;
0453d815 649 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 650 char ch = *s;
0453d815 651 *s = '\0';
4e553d73
NIS
652 Perl_warner(aTHX_ WARN_AMBIGUOUS,
653 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
654 PL_last_uni);
655 *s = ch;
656 }
2f3197b3
LW
657}
658
ffb4593c
NT
659/* workaround to replace the UNI() macro with a function. Only the
660 * hints/uts.sh file mentions this. Other comments elsewhere in the
661 * source indicate Microport Unix might need it too.
662 */
663
ffed7fef
LW
664#ifdef CRIPPLED_CC
665
666#undef UNI
ffed7fef 667#define UNI(f) return uni(f,s)
ffed7fef 668
76e3520e 669STATIC int
cea2e8a9 670S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
671{
672 yylval.ival = f;
3280af22
NIS
673 PL_expect = XTERM;
674 PL_bufptr = s;
8f872242
NIS
675 PL_last_uni = PL_oldbufptr;
676 PL_last_lop_op = f;
ffed7fef
LW
677 if (*s == '(')
678 return FUNC1;
679 s = skipspace(s);
680 if (*s == '(')
681 return FUNC1;
682 else
683 return UNIOP;
684}
685
a0d0e21e
LW
686#endif /* CRIPPLED_CC */
687
ffb4593c
NT
688/*
689 * LOP : macro to build a list operator. Its behaviour has been replaced
690 * with a subroutine, S_lop() for which LOP is just another name.
691 */
692
a0d0e21e
LW
693#define LOP(f,x) return lop(f,x,s)
694
ffb4593c
NT
695/*
696 * S_lop
697 * Build a list operator (or something that might be one). The rules:
698 * - if we have a next token, then it's a list operator [why?]
699 * - if the next thing is an opening paren, then it's a function
700 * - else it's a list operator
701 */
702
76e3520e 703STATIC I32
a0be28da 704S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 705{
79072805 706 yylval.ival = f;
35c8bce7 707 CLINE;
998054bd 708 REPORT("lop", f);
3280af22
NIS
709 PL_expect = x;
710 PL_bufptr = s;
711 PL_last_lop = PL_oldbufptr;
712 PL_last_lop_op = f;
713 if (PL_nexttoke)
a0d0e21e 714 return LSTOP;
79072805
LW
715 if (*s == '(')
716 return FUNC;
717 s = skipspace(s);
718 if (*s == '(')
719 return FUNC;
720 else
721 return LSTOP;
722}
723
ffb4593c
NT
724/*
725 * S_force_next
9cbb5ea2 726 * When the lexer realizes it knows the next token (for instance,
ffb4593c 727 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
728 * to know what token to return the next time the lexer is called. Caller
729 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
730 * handles the token correctly.
ffb4593c
NT
731 */
732
4e553d73 733STATIC void
cea2e8a9 734S_force_next(pTHX_ I32 type)
79072805 735{
3280af22
NIS
736 PL_nexttype[PL_nexttoke] = type;
737 PL_nexttoke++;
738 if (PL_lex_state != LEX_KNOWNEXT) {
739 PL_lex_defer = PL_lex_state;
740 PL_lex_expect = PL_expect;
741 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
742 }
743}
744
ffb4593c
NT
745/*
746 * S_force_word
747 * When the lexer knows the next thing is a word (for instance, it has
748 * just seen -> and it knows that the next char is a word char, then
749 * it calls S_force_word to stick the next word into the PL_next lookahead.
750 *
751 * Arguments:
b1b65b59 752 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
753 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
754 * int check_keyword : if true, Perl checks to make sure the word isn't
755 * a keyword (do this if the word is a label, e.g. goto FOO)
756 * int allow_pack : if true, : characters will also be allowed (require,
757 * use, etc. do this)
9cbb5ea2 758 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
759 */
760
76e3520e 761STATIC char *
cea2e8a9 762S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 763{
463ee0b2
LW
764 register char *s;
765 STRLEN len;
4e553d73 766
463ee0b2
LW
767 start = skipspace(start);
768 s = start;
7e2040f0 769 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 770 (allow_pack && *s == ':') ||
15f0808c 771 (allow_initial_tick && *s == '\'') )
a0d0e21e 772 {
3280af22
NIS
773 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
774 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
775 return start;
776 if (token == METHOD) {
777 s = skipspace(s);
778 if (*s == '(')
3280af22 779 PL_expect = XTERM;
463ee0b2 780 else {
3280af22 781 PL_expect = XOPERATOR;
463ee0b2 782 }
79072805 783 }
3280af22
NIS
784 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
785 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
786 force_next(token);
787 }
788 return s;
789}
790
ffb4593c
NT
791/*
792 * S_force_ident
9cbb5ea2 793 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
794 * text only contains the "foo" portion. The first argument is a pointer
795 * to the "foo", and the second argument is the type symbol to prefix.
796 * Forces the next token to be a "WORD".
9cbb5ea2 797 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
798 */
799
76e3520e 800STATIC void
cea2e8a9 801S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
802{
803 if (s && *s) {
11343788 804 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 805 PL_nextval[PL_nexttoke].opval = o;
79072805 806 force_next(WORD);
748a9306 807 if (kind) {
11343788 808 o->op_private = OPpCONST_ENTERED;
55497cff
PP
809 /* XXX see note in pp_entereval() for why we forgo typo
810 warnings if the symbol must be introduced in an eval.
811 GSAR 96-10-12 */
3280af22 812 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
813 kind == '$' ? SVt_PV :
814 kind == '@' ? SVt_PVAV :
815 kind == '%' ? SVt_PVHV :
816 SVt_PVGV
817 );
748a9306 818 }
79072805
LW
819 }
820}
821
1571675a
GS
822NV
823Perl_str_to_version(pTHX_ SV *sv)
824{
825 NV retval = 0.0;
826 NV nshift = 1.0;
827 STRLEN len;
828 char *start = SvPVx(sv,len);
3aa33fe5 829 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
830 char *end = start + len;
831 while (start < end) {
ba210ebe 832 STRLEN skip;
1571675a
GS
833 UV n;
834 if (utf)
dcad2880 835 n = utf8_to_uv((U8*)start, len, &skip, 0);
1571675a
GS
836 else {
837 n = *(U8*)start;
838 skip = 1;
839 }
840 retval += ((NV)n)/nshift;
841 start += skip;
842 nshift *= 1000;
843 }
844 return retval;
845}
846
4e553d73 847/*
ffb4593c
NT
848 * S_force_version
849 * Forces the next token to be a version number.
850 */
851
76e3520e 852STATIC char *
cea2e8a9 853S_force_version(pTHX_ char *s)
89bfa8cd
PP
854{
855 OP *version = Nullop;
44dcb63b 856 char *d;
89bfa8cd
PP
857
858 s = skipspace(s);
859
44dcb63b 860 d = s;
dd629d5b 861 if (*d == 'v')
44dcb63b 862 d++;
44dcb63b 863 if (isDIGIT(*d)) {
a7cb1f99 864 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 865 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 866 SV *ver;
b73d6f50 867 s = scan_num(s, &yylval);
89bfa8cd 868 version = yylval.opval;
dd629d5b
GS
869 ver = cSVOPx(version)->op_sv;
870 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 871 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
872 SvNVX(ver) = str_to_version(ver);
873 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 874 }
89bfa8cd
PP
875 }
876 }
877
878 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 879 PL_nextval[PL_nexttoke].opval = version;
4e553d73 880 force_next(WORD);
89bfa8cd
PP
881
882 return (s);
883}
884
ffb4593c
NT
885/*
886 * S_tokeq
887 * Tokenize a quoted string passed in as an SV. It finds the next
888 * chunk, up to end of string or a backslash. It may make a new
889 * SV containing that chunk (if HINT_NEW_STRING is on). It also
890 * turns \\ into \.
891 */
892
76e3520e 893STATIC SV *
cea2e8a9 894S_tokeq(pTHX_ SV *sv)
79072805
LW
895{
896 register char *s;
897 register char *send;
898 register char *d;
b3ac6de7
IZ
899 STRLEN len = 0;
900 SV *pv = sv;
79072805
LW
901
902 if (!SvLEN(sv))
b3ac6de7 903 goto finish;
79072805 904
a0d0e21e 905 s = SvPV_force(sv, len);
21a311ee 906 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 907 goto finish;
463ee0b2 908 send = s + len;
79072805
LW
909 while (s < send && *s != '\\')
910 s++;
911 if (s == send)
b3ac6de7 912 goto finish;
79072805 913 d = s;
3280af22 914 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 915 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
916 while (s < send) {
917 if (*s == '\\') {
a0d0e21e 918 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
919 s++; /* all that, just for this */
920 }
921 *d++ = *s++;
922 }
923 *d = '\0';
463ee0b2 924 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 925 finish:
3280af22 926 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 927 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
928 return sv;
929}
930
ffb4593c
NT
931/*
932 * Now come three functions related to double-quote context,
933 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
934 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
935 * interact with PL_lex_state, and create fake ( ... ) argument lists
936 * to handle functions and concatenation.
937 * They assume that whoever calls them will be setting up a fake
938 * join call, because each subthing puts a ',' after it. This lets
939 * "lower \luPpEr"
940 * become
941 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
942 *
943 * (I'm not sure whether the spurious commas at the end of lcfirst's
944 * arguments and join's arguments are created or not).
945 */
946
947/*
948 * S_sublex_start
949 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
950 *
951 * Pattern matching will set PL_lex_op to the pattern-matching op to
952 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
953 *
954 * OP_CONST and OP_READLINE are easy--just make the new op and return.
955 *
956 * Everything else becomes a FUNC.
957 *
958 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
959 * had an OP_CONST or OP_READLINE). This just sets us up for a
960 * call to S_sublex_push().
961 */
962
76e3520e 963STATIC I32
cea2e8a9 964S_sublex_start(pTHX)
79072805
LW
965{
966 register I32 op_type = yylval.ival;
79072805
LW
967
968 if (op_type == OP_NULL) {
3280af22
NIS
969 yylval.opval = PL_lex_op;
970 PL_lex_op = Nullop;
79072805
LW
971 return THING;
972 }
973 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 974 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
975
976 if (SvTYPE(sv) == SVt_PVIV) {
977 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
978 STRLEN len;
979 char *p;
980 SV *nsv;
981
982 p = SvPV(sv, len);
79cb57f6 983 nsv = newSVpvn(p, len);
01ec43d0
GS
984 if (SvUTF8(sv))
985 SvUTF8_on(nsv);
b3ac6de7
IZ
986 SvREFCNT_dec(sv);
987 sv = nsv;
4e553d73 988 }
b3ac6de7 989 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 990 PL_lex_stuff = Nullsv;
79072805
LW
991 return THING;
992 }
993
3280af22
NIS
994 PL_sublex_info.super_state = PL_lex_state;
995 PL_sublex_info.sub_inwhat = op_type;
996 PL_sublex_info.sub_op = PL_lex_op;
997 PL_lex_state = LEX_INTERPPUSH;
55497cff 998
3280af22
NIS
999 PL_expect = XTERM;
1000 if (PL_lex_op) {
1001 yylval.opval = PL_lex_op;
1002 PL_lex_op = Nullop;
55497cff
PP
1003 return PMFUNC;
1004 }
1005 else
1006 return FUNC;
1007}
1008
ffb4593c
NT
1009/*
1010 * S_sublex_push
1011 * Create a new scope to save the lexing state. The scope will be
1012 * ended in S_sublex_done. Returns a '(', starting the function arguments
1013 * to the uc, lc, etc. found before.
1014 * Sets PL_lex_state to LEX_INTERPCONCAT.
1015 */
1016
76e3520e 1017STATIC I32
cea2e8a9 1018S_sublex_push(pTHX)
55497cff 1019{
f46d017c 1020 ENTER;
55497cff 1021
3280af22
NIS
1022 PL_lex_state = PL_sublex_info.super_state;
1023 SAVEI32(PL_lex_dojoin);
1024 SAVEI32(PL_lex_brackets);
3280af22
NIS
1025 SAVEI32(PL_lex_casemods);
1026 SAVEI32(PL_lex_starts);
1027 SAVEI32(PL_lex_state);
7766f137 1028 SAVEVPTR(PL_lex_inpat);
3280af22 1029 SAVEI32(PL_lex_inwhat);
57843af0 1030 SAVECOPLINE(PL_curcop);
3280af22
NIS
1031 SAVEPPTR(PL_bufptr);
1032 SAVEPPTR(PL_oldbufptr);
1033 SAVEPPTR(PL_oldoldbufptr);
1034 SAVEPPTR(PL_linestart);
1035 SAVESPTR(PL_linestr);
1036 SAVEPPTR(PL_lex_brackstack);
1037 SAVEPPTR(PL_lex_casestack);
1038
1039 PL_linestr = PL_lex_stuff;
1040 PL_lex_stuff = Nullsv;
1041
9cbb5ea2
GS
1042 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1043 = SvPVX(PL_linestr);
3280af22
NIS
1044 PL_bufend += SvCUR(PL_linestr);
1045 SAVEFREESV(PL_linestr);
1046
1047 PL_lex_dojoin = FALSE;
1048 PL_lex_brackets = 0;
3280af22
NIS
1049 New(899, PL_lex_brackstack, 120, char);
1050 New(899, PL_lex_casestack, 12, char);
1051 SAVEFREEPV(PL_lex_brackstack);
1052 SAVEFREEPV(PL_lex_casestack);
1053 PL_lex_casemods = 0;
1054 *PL_lex_casestack = '\0';
1055 PL_lex_starts = 0;
1056 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1057 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1058
1059 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1060 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1061 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1062 else
3280af22 1063 PL_lex_inpat = Nullop;
79072805 1064
55497cff 1065 return '(';
79072805
LW
1066}
1067
ffb4593c
NT
1068/*
1069 * S_sublex_done
1070 * Restores lexer state after a S_sublex_push.
1071 */
1072
76e3520e 1073STATIC I32
cea2e8a9 1074S_sublex_done(pTHX)
79072805 1075{
3280af22 1076 if (!PL_lex_starts++) {
9aa983d2
JH
1077 SV *sv = newSVpvn("",0);
1078 if (SvUTF8(PL_linestr))
1079 SvUTF8_on(sv);
3280af22 1080 PL_expect = XOPERATOR;
9aa983d2 1081 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1082 return THING;
1083 }
1084
3280af22
NIS
1085 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1086 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1087 return yylex();
79072805
LW
1088 }
1089
ffb4593c 1090 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1091 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1092 PL_linestr = PL_lex_repl;
1093 PL_lex_inpat = 0;
1094 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1095 PL_bufend += SvCUR(PL_linestr);
1096 SAVEFREESV(PL_linestr);
1097 PL_lex_dojoin = FALSE;
1098 PL_lex_brackets = 0;
3280af22
NIS
1099 PL_lex_casemods = 0;
1100 *PL_lex_casestack = '\0';
1101 PL_lex_starts = 0;
25da4f38 1102 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1103 PL_lex_state = LEX_INTERPNORMAL;
1104 PL_lex_starts++;
e9fa98b2
HS
1105 /* we don't clear PL_lex_repl here, so that we can check later
1106 whether this is an evalled subst; that means we rely on the
1107 logic to ensure sublex_done() is called again only via the
1108 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1109 }
e9fa98b2 1110 else {
3280af22 1111 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1112 PL_lex_repl = Nullsv;
1113 }
79072805 1114 return ',';
ffed7fef
LW
1115 }
1116 else {
f46d017c 1117 LEAVE;
3280af22
NIS
1118 PL_bufend = SvPVX(PL_linestr);
1119 PL_bufend += SvCUR(PL_linestr);
1120 PL_expect = XOPERATOR;
09bef843 1121 PL_sublex_info.sub_inwhat = 0;
79072805 1122 return ')';
ffed7fef
LW
1123 }
1124}
1125
02aa26ce
NT
1126/*
1127 scan_const
1128
1129 Extracts a pattern, double-quoted string, or transliteration. This
1130 is terrifying code.
1131
3280af22
NIS
1132 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1133 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1134 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1135
9b599b2a
GS
1136 Returns a pointer to the character scanned up to. Iff this is
1137 advanced from the start pointer supplied (ie if anything was
1138 successfully parsed), will leave an OP for the substring scanned
1139 in yylval. Caller must intuit reason for not parsing further
1140 by looking at the next characters herself.
1141
02aa26ce
NT
1142 In patterns:
1143 backslashes:
1144 double-quoted style: \r and \n
1145 regexp special ones: \D \s
1146 constants: \x3
1147 backrefs: \1 (deprecated in substitution replacements)
1148 case and quoting: \U \Q \E
1149 stops on @ and $, but not for $ as tail anchor
1150
1151 In transliterations:
1152 characters are VERY literal, except for - not at the start or end
1153 of the string, which indicates a range. scan_const expands the
1154 range to the full set of intermediate characters.
1155
1156 In double-quoted strings:
1157 backslashes:
1158 double-quoted style: \r and \n
1159 constants: \x3
1160 backrefs: \1 (deprecated)
1161 case and quoting: \U \Q \E
1162 stops on @ and $
1163
1164 scan_const does *not* construct ops to handle interpolated strings.
1165 It stops processing as soon as it finds an embedded $ or @ variable
1166 and leaves it to the caller to work out what's going on.
1167
1168 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1169
1170 $ in pattern could be $foo or could be tail anchor. Assumption:
1171 it's a tail anchor if $ is the last thing in the string, or if it's
1172 followed by one of ")| \n\t"
1173
1174 \1 (backreferences) are turned into $1
1175
1176 The structure of the code is
1177 while (there's a character to process) {
1178 handle transliteration ranges
1179 skip regexp comments
1180 skip # initiated comments in //x patterns
1181 check for embedded @foo
1182 check for embedded scalars
1183 if (backslash) {
1184 leave intact backslashes from leave (below)
1185 deprecate \1 in strings and sub replacements
1186 handle string-changing backslashes \l \U \Q \E, etc.
1187 switch (what was escaped) {
1188 handle - in a transliteration (becomes a literal -)
1189 handle \132 octal characters
1190 handle 0x15 hex characters
1191 handle \cV (control V)
1192 handle printf backslashes (\f, \r, \n, etc)
1193 } (end switch)
1194 } (end if backslash)
1195 } (end while character to read)
4e553d73 1196
02aa26ce
NT
1197*/
1198
76e3520e 1199STATIC char *
cea2e8a9 1200S_scan_const(pTHX_ char *start)
79072805 1201{
3280af22 1202 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1203 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1204 register char *s = start; /* start of the constant */
1205 register char *d = SvPVX(sv); /* destination for copies */
1206 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1207 bool didrange = FALSE; /* did we just finish a range? */
9aa983d2
JH
1208 bool has_utf8 = (PL_linestr && SvUTF8(PL_linestr));
1209 /* the constant is UTF8 */
012bcf8d
GS
1210 UV uv;
1211
ac2262e3 1212 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1213 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1214 : UTF;
89491803 1215 I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1216 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1217 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1218 : UTF;
dff6d3cd 1219 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1220 PL_lex_inpat
4a2d328f 1221 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1222 : "";
79072805
LW
1223
1224 while (s < send || dorange) {
02aa26ce 1225 /* get transliterations out of the way (they're most literal) */
3280af22 1226 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1227 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1228 if (dorange) {
02aa26ce 1229 I32 i; /* current expanded character */
8ada0baa 1230 I32 min; /* first character in range */
02aa26ce
NT
1231 I32 max; /* last character in range */
1232
1233 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1234 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1235 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1236 d -= 2; /* eat the first char and the - */
1237
8ada0baa
JH
1238 min = (U8)*d; /* first char in range */
1239 max = (U8)d[1]; /* last char in range */
1240
c2e66d9e 1241 if (min > max) {
01ec43d0
GS
1242 Perl_croak(aTHX_
1243 "Invalid [] range \"%c-%c\" in transliteration operator",
d2560b70 1244 (char)min, (char)max);
c2e66d9e
GS
1245 }
1246
8ada0baa
JH
1247#ifndef ASCIIish
1248 if ((isLOWER(min) && isLOWER(max)) ||
1249 (isUPPER(min) && isUPPER(max))) {
1250 if (isLOWER(min)) {
1251 for (i = min; i <= max; i++)
1252 if (isLOWER(i))
1253 *d++ = i;
1254 } else {
1255 for (i = min; i <= max; i++)
1256 if (isUPPER(i))
1257 *d++ = i;
1258 }
1259 }
1260 else
1261#endif
1262 for (i = min; i <= max; i++)
1263 *d++ = i;
02aa26ce
NT
1264
1265 /* mark the range as done, and continue */
79072805 1266 dorange = FALSE;
01ec43d0 1267 didrange = TRUE;
79072805 1268 continue;
4e553d73 1269 }
02aa26ce
NT
1270
1271 /* range begins (ignore - as first or last char) */
79072805 1272 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1273 if (didrange) {
1fafa243 1274 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1275 }
a0ed51b3 1276 if (utf) {
a176fa2a 1277 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1278 s++;
1279 continue;
1280 }
79072805
LW
1281 dorange = TRUE;
1282 s++;
01ec43d0
GS
1283 }
1284 else {
1285 didrange = FALSE;
1286 }
79072805 1287 }
02aa26ce
NT
1288
1289 /* if we get here, we're not doing a transliteration */
1290
0f5d15d6
IZ
1291 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1292 except for the last char, which will be done separately. */
3280af22 1293 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1294 if (s[2] == '#') {
1295 while (s < send && *s != ')')
1296 *d++ = *s++;
155aba94
GS
1297 }
1298 else if (s[2] == '{' /* This should match regcomp.c */
1299 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1300 {
cc6b7395 1301 I32 count = 1;
0f5d15d6 1302 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1303 char c;
1304
d9f97599
GS
1305 while (count && (c = *regparse)) {
1306 if (c == '\\' && regparse[1])
1307 regparse++;
4e553d73 1308 else if (c == '{')
cc6b7395 1309 count++;
4e553d73 1310 else if (c == '}')
cc6b7395 1311 count--;
d9f97599 1312 regparse++;
cc6b7395 1313 }
5bdf89e7
IZ
1314 if (*regparse != ')') {
1315 regparse--; /* Leave one char for continuation. */
cc6b7395 1316 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1317 }
0f5d15d6 1318 while (s < regparse)
cc6b7395
IZ
1319 *d++ = *s++;
1320 }
748a9306 1321 }
02aa26ce
NT
1322
1323 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1324 else if (*s == '#' && PL_lex_inpat &&
1325 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1326 while (s+1 < send && *s != '\n')
1327 *d++ = *s++;
1328 }
02aa26ce 1329
5d1d4326
JH
1330 /* check for embedded arrays
1331 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1332 */
7e2040f0 1333 else if (*s == '@' && s[1]
5d1d4326 1334 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1335 break;
02aa26ce
NT
1336
1337 /* check for embedded scalars. only stop if we're sure it's a
1338 variable.
1339 */
79072805 1340 else if (*s == '$') {
3280af22 1341 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1342 break;
c277df42 1343 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1344 break; /* in regexp, $ might be tail anchor */
1345 }
02aa26ce
NT
1346
1347 /* backslashes */
79072805
LW
1348 if (*s == '\\' && s+1 < send) {
1349 s++;
02aa26ce
NT
1350
1351 /* some backslashes we leave behind */
c9f97d15 1352 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1353 *d++ = '\\';
1354 *d++ = *s++;
1355 continue;
1356 }
02aa26ce
NT
1357
1358 /* deprecate \1 in strings and substitution replacements */
3280af22 1359 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1360 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1361 {
599cee73 1362 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1363 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1364 *--s = '$';
1365 break;
1366 }
02aa26ce
NT
1367
1368 /* string-change backslash escapes */
3280af22 1369 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1370 --s;
1371 break;
1372 }
02aa26ce
NT
1373
1374 /* if we get here, it's either a quoted -, or a digit */
79072805 1375 switch (*s) {
02aa26ce
NT
1376
1377 /* quoted - in transliterations */
79072805 1378 case '-':
3280af22 1379 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1380 *d++ = *s++;
1381 continue;
1382 }
1383 /* FALL THROUGH */
1384 default:
11b8faa4 1385 {
7e84c16c 1386 if (ckWARN(WARN_MISC) && isALNUM(*s))
4e553d73 1387 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1388 "Unrecognized escape \\%c passed through",
1389 *s);
1390 /* default action is to copy the quoted character */
f9a63242 1391 goto default_action;
11b8faa4 1392 }
02aa26ce
NT
1393
1394 /* \132 indicates an octal constant */
79072805
LW
1395 case '0': case '1': case '2': case '3':
1396 case '4': case '5': case '6': case '7':
ba210ebe
JH
1397 {
1398 STRLEN len = 0; /* disallow underscores */
1399 uv = (UV)scan_oct(s, 3, &len);
1400 s += len;
1401 }
012bcf8d 1402 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1403
1404 /* \x24 indicates a hex constant */
79072805 1405 case 'x':
a0ed51b3
LW
1406 ++s;
1407 if (*s == '{') {
1408 char* e = strchr(s, '}');
adaeee49 1409 if (!e) {
a0ed51b3 1410 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1411 e = s;
1412 }
89491803 1413 else {
ba210ebe
JH
1414 STRLEN len = 1; /* allow underscores */
1415 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1416 }
1417 s = e + 1;
a0ed51b3
LW
1418 }
1419 else {
ba210ebe
JH
1420 {
1421 STRLEN len = 0; /* disallow underscores */
1422 uv = (UV)scan_hex(s, 2, &len);
1423 s += len;
1424 }
012bcf8d
GS
1425 }
1426
1427 NUM_ESCAPE_INSERT:
1428 /* Insert oct or hex escaped character.
301d3d20
JH
1429 * There will always enough room in sv since such
1430 * escapes will be longer than any UT-F8 sequence
1431 * they can end up as. */
ba7cea30
JH
1432
1433 /* This spot is wrong for EBCDIC. Characters like
1434 * the lowercase letters and digits are >127 in EBCDIC,
1435 * so here they would need to be mapped to the Unicode
1436 * repertoire. --jhi */
1437
7948272d 1438 if (uv > 127) {
9aa983d2 1439 if (!has_utf8 && uv > 255) {
301d3d20
JH
1440 /* Might need to recode whatever we have
1441 * accumulated so far if it contains any
1442 * hibit chars.
1443 *
1444 * (Can't we keep track of that and avoid
1445 * this rescan? --jhi)
012bcf8d
GS
1446 */
1447 int hicount = 0;
1448 char *c;
301d3d20 1449
012bcf8d 1450 for (c = SvPVX(sv); c < d; c++) {
fd400ab9 1451 if (UTF8_IS_CONTINUED(*c))
012bcf8d
GS
1452 hicount++;
1453 }
1454 if (hicount) {
1455 char *old_pvx = SvPVX(sv);
1456 char *src, *dst;
301d3d20
JH
1457
1458 d = SvGROW(sv,
1459 SvCUR(sv) + hicount + 1) +
1460 (d - old_pvx);
012bcf8d
GS
1461
1462 src = d - 1;
1463 d += hicount;
1464 dst = d - 1;
1465
1466 while (src < dst) {
fd400ab9 1467 if (UTF8_IS_CONTINUED(*src)) {
9b877dbb
IH
1468 *dst-- = UTF8_EIGHT_BIT_LO(*src);
1469 *dst-- = UTF8_EIGHT_BIT_HI(*src--);
012bcf8d
GS
1470 }
1471 else {
1472 *dst-- = *src--;
1473 }
1474 }
1475 }
1476 }
1477
9aa983d2 1478 if (has_utf8 || uv > 255) {
012bcf8d 1479 d = (char*)uv_to_utf8((U8*)d, uv);
4e553d73 1480 has_utf8 = TRUE;
f9a63242
JH
1481 if (PL_lex_inwhat == OP_TRANS &&
1482 PL_sublex_info.sub_op) {
1483 PL_sublex_info.sub_op->op_private |=
1484 (PL_lex_repl ? OPpTRANS_FROM_UTF
1485 : OPpTRANS_TO_UTF);
1486 utf = TRUE;
1487 }
012bcf8d 1488 }
a0ed51b3 1489 else {
012bcf8d 1490 *d++ = (char)uv;
a0ed51b3 1491 }
012bcf8d
GS
1492 }
1493 else {
1494 *d++ = (char)uv;
a0ed51b3 1495 }
79072805 1496 continue;
02aa26ce 1497
4a2d328f
IZ
1498 /* \N{latin small letter a} is a named character */
1499 case 'N':
423cee85
JH
1500 ++s;
1501 if (*s == '{') {
1502 char* e = strchr(s, '}');
155aba94 1503 SV *res;
423cee85
JH
1504 STRLEN len;
1505 char *str;
4e553d73 1506
423cee85 1507 if (!e) {
5777a3f7 1508 yyerror("Missing right brace on \\N{}");
423cee85
JH
1509 e = s - 1;
1510 goto cont_scan;
1511 }
1512 res = newSVpvn(s + 1, e - s - 1);
4e553d73 1513 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1514 res, Nullsv, "\\N{...}" );
f9a63242
JH
1515 if (has_utf8)
1516 sv_utf8_upgrade(res);
423cee85 1517 str = SvPV(res,len);
89491803 1518 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1519 char *ostart = SvPVX(sv);
1520 SvCUR_set(sv, d - ostart);
1521 SvPOK_on(sv);
e4f3eed8 1522 *d = '\0';
f08d6ad9 1523 sv_utf8_upgrade(sv);
d2f449dd
SB
1524 /* this just broke our allocation above... */
1525 SvGROW(sv, send - start);
f08d6ad9 1526 d = SvPVX(sv) + SvCUR(sv);
89491803 1527 has_utf8 = TRUE;
f08d6ad9 1528 }
423cee85
JH
1529 if (len > e - s + 4) {
1530 char *odest = SvPVX(sv);
1531
1532 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1533 d = SvPVX(sv) + (d - odest);
1534 }
1535 Copy(str, d, len, char);
1536 d += len;
1537 SvREFCNT_dec(res);
1538 cont_scan:
1539 s = e + 1;
1540 }
1541 else
5777a3f7 1542 yyerror("Missing braces on \\N{}");
423cee85
JH
1543 continue;
1544
02aa26ce 1545 /* \c is a control character */
79072805
LW
1546 case 'c':
1547 s++;
9d116dd7
JH
1548#ifdef EBCDIC
1549 *d = *s++;
1550 if (isLOWER(*d))
1551 *d = toUPPER(*d);
4e553d73 1552 *d = toCTRL(*d);
774a9426 1553 d++;
9d116dd7 1554#else
ba210ebe
JH
1555 {
1556 U8 c = *s++;
1557 *d++ = toCTRL(c);
1558 }
9d116dd7 1559#endif
79072805 1560 continue;
02aa26ce
NT
1561
1562 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1563 case 'b':
1564 *d++ = '\b';
1565 break;
1566 case 'n':
1567 *d++ = '\n';
1568 break;
1569 case 'r':
1570 *d++ = '\r';
1571 break;
1572 case 'f':
1573 *d++ = '\f';
1574 break;
1575 case 't':
1576 *d++ = '\t';
1577 break;
34a3fe2a
PP
1578#ifdef EBCDIC
1579 case 'e':
1580 *d++ = '\047'; /* CP 1047 */
1581 break;
1582 case 'a':
1583 *d++ = '\057'; /* CP 1047 */
1584 break;
1585#else
79072805
LW
1586 case 'e':
1587 *d++ = '\033';
1588 break;
1589 case 'a':
1590 *d++ = '\007';
1591 break;
34a3fe2a 1592#endif
02aa26ce
NT
1593 } /* end switch */
1594
79072805
LW
1595 s++;
1596 continue;
02aa26ce
NT
1597 } /* end if (backslash) */
1598
f9a63242 1599 default_action:
fd400ab9 1600 if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
a5a960be
RI
1601 STRLEN len = (STRLEN) -1;
1602 UV uv;
1603 if (this_utf8) {
a0dbb045 1604 uv = utf8_to_uv((U8*)s, send - s, &len, 0);
a5a960be
RI
1605 }
1606 if (len == (STRLEN)-1) {
1607 /* Illegal UTF8 (a high-bit byte), make it valid. */
1608 char *old_pvx = SvPVX(sv);
1609 /* need space for one extra char (NOTE: SvCUR() not set here) */
1610 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
1611 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1612 }
1613 else {
1614 while (len--)
1615 *d++ = *s++;
1616 }
1617 has_utf8 = TRUE;
f9a63242
JH
1618 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1619 PL_sublex_info.sub_op->op_private |=
1620 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1621 utf = TRUE;
1622 }
a5a960be
RI
1623 continue;
1624 }
1625
f9a63242 1626 *d++ = *s++;
02aa26ce
NT
1627 } /* while loop to process each character */
1628
1629 /* terminate the string and set up the sv */
79072805 1630 *d = '\0';
463ee0b2 1631 SvCUR_set(sv, d - SvPVX(sv));
79072805 1632 SvPOK_on(sv);
89491803 1633 if (has_utf8)
7e2040f0 1634 SvUTF8_on(sv);
79072805 1635
02aa26ce 1636 /* shrink the sv if we allocated more than we used */
79072805
LW
1637 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1638 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1639 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1640 }
02aa26ce 1641
9b599b2a 1642 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1643 if (s > PL_bufptr) {
1644 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1645 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1646 sv, Nullsv,
4e553d73 1647 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1648 ? "tr"
3280af22 1649 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1650 ? "s"
1651 : "qq")));
79072805 1652 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1653 } else
8990e307 1654 SvREFCNT_dec(sv);
79072805
LW
1655 return s;
1656}
1657
ffb4593c
NT
1658/* S_intuit_more
1659 * Returns TRUE if there's more to the expression (e.g., a subscript),
1660 * FALSE otherwise.
ffb4593c
NT
1661 *
1662 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1663 *
1664 * ->[ and ->{ return TRUE
1665 * { and [ outside a pattern are always subscripts, so return TRUE
1666 * if we're outside a pattern and it's not { or [, then return FALSE
1667 * if we're in a pattern and the first char is a {
1668 * {4,5} (any digits around the comma) returns FALSE
1669 * if we're in a pattern and the first char is a [
1670 * [] returns FALSE
1671 * [SOMETHING] has a funky algorithm to decide whether it's a
1672 * character class or not. It has to deal with things like
1673 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1674 * anything else returns TRUE
1675 */
1676
9cbb5ea2
GS
1677/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1678
76e3520e 1679STATIC int
cea2e8a9 1680S_intuit_more(pTHX_ register char *s)
79072805 1681{
3280af22 1682 if (PL_lex_brackets)
79072805
LW
1683 return TRUE;
1684 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1685 return TRUE;
1686 if (*s != '{' && *s != '[')
1687 return FALSE;
3280af22 1688 if (!PL_lex_inpat)
79072805
LW
1689 return TRUE;
1690
1691 /* In a pattern, so maybe we have {n,m}. */
1692 if (*s == '{') {
1693 s++;
1694 if (!isDIGIT(*s))
1695 return TRUE;
1696 while (isDIGIT(*s))
1697 s++;
1698 if (*s == ',')
1699 s++;
1700 while (isDIGIT(*s))
1701 s++;
1702 if (*s == '}')
1703 return FALSE;
1704 return TRUE;
1705
1706 }
1707
1708 /* On the other hand, maybe we have a character class */
1709
1710 s++;
1711 if (*s == ']' || *s == '^')
1712 return FALSE;
1713 else {
ffb4593c 1714 /* this is terrifying, and it works */
79072805
LW
1715 int weight = 2; /* let's weigh the evidence */
1716 char seen[256];
f27ffc4a 1717 unsigned char un_char = 255, last_un_char;
93a17b20 1718 char *send = strchr(s,']');
3280af22 1719 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1720
1721 if (!send) /* has to be an expression */
1722 return TRUE;
1723
1724 Zero(seen,256,char);
1725 if (*s == '$')
1726 weight -= 3;
1727 else if (isDIGIT(*s)) {
1728 if (s[1] != ']') {
1729 if (isDIGIT(s[1]) && s[2] == ']')
1730 weight -= 10;
1731 }
1732 else
1733 weight -= 100;
1734 }
1735 for (; s < send; s++) {
1736 last_un_char = un_char;
1737 un_char = (unsigned char)*s;
1738 switch (*s) {
1739 case '@':
1740 case '&':
1741 case '$':
1742 weight -= seen[un_char] * 10;
7e2040f0 1743 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1744 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1745 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1746 weight -= 100;
1747 else
1748 weight -= 10;
1749 }
1750 else if (*s == '$' && s[1] &&
93a17b20
LW
1751 strchr("[#!%*<>()-=",s[1])) {
1752 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1753 weight -= 10;
1754 else
1755 weight -= 1;
1756 }
1757 break;
1758 case '\\':
1759 un_char = 254;
1760 if (s[1]) {
93a17b20 1761 if (strchr("wds]",s[1]))
79072805
LW
1762 weight += 100;
1763 else if (seen['\''] || seen['"'])
1764 weight += 1;
93a17b20 1765 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1766 weight += 40;
1767 else if (isDIGIT(s[1])) {
1768 weight += 40;
1769 while (s[1] && isDIGIT(s[1]))
1770 s++;
1771 }
1772 }
1773 else
1774 weight += 100;
1775 break;
1776 case '-':
1777 if (s[1] == '\\')
1778 weight += 50;
93a17b20 1779 if (strchr("aA01! ",last_un_char))
79072805 1780 weight += 30;
93a17b20 1781 if (strchr("zZ79~",s[1]))
79072805 1782 weight += 30;
f27ffc4a
GS
1783 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1784 weight -= 5; /* cope with negative subscript */
79072805
LW
1785 break;
1786 default:
93a17b20 1787 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1788 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1789 char *d = tmpbuf;
1790 while (isALPHA(*s))
1791 *d++ = *s++;
1792 *d = '\0';
1793 if (keyword(tmpbuf, d - tmpbuf))
1794 weight -= 150;
1795 }
1796 if (un_char == last_un_char + 1)
1797 weight += 5;
1798 weight -= seen[un_char];
1799 break;
1800 }
1801 seen[un_char]++;
1802 }
1803 if (weight >= 0) /* probably a character class */
1804 return FALSE;
1805 }
1806
1807 return TRUE;
1808}
ffed7fef 1809
ffb4593c
NT
1810/*
1811 * S_intuit_method
1812 *
1813 * Does all the checking to disambiguate
1814 * foo bar
1815 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1816 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1817 *
1818 * First argument is the stuff after the first token, e.g. "bar".
1819 *
1820 * Not a method if bar is a filehandle.
1821 * Not a method if foo is a subroutine prototyped to take a filehandle.
1822 * Not a method if it's really "Foo $bar"
1823 * Method if it's "foo $bar"
1824 * Not a method if it's really "print foo $bar"
1825 * Method if it's really "foo package::" (interpreted as package->foo)
1826 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1827 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1828 * =>
1829 */
1830
76e3520e 1831STATIC int
cea2e8a9 1832S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1833{
1834 char *s = start + (*start == '$');
3280af22 1835 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1836 STRLEN len;
1837 GV* indirgv;
1838
1839 if (gv) {
b6c543e3 1840 CV *cv;
a0d0e21e
LW
1841 if (GvIO(gv))
1842 return 0;
b6c543e3
IZ
1843 if ((cv = GvCVu(gv))) {
1844 char *proto = SvPVX(cv);
1845 if (proto) {
1846 if (*proto == ';')
1847 proto++;
1848 if (*proto == '*')
1849 return 0;
1850 }
1851 } else
a0d0e21e
LW
1852 gv = 0;
1853 }
8903cb82 1854 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1855 /* start is the beginning of the possible filehandle/object,
1856 * and s is the end of it
1857 * tmpbuf is a copy of it
1858 */
1859
a0d0e21e 1860 if (*start == '$') {
3280af22 1861 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1862 return 0;
1863 s = skipspace(s);
3280af22
NIS
1864 PL_bufptr = start;
1865 PL_expect = XREF;
a0d0e21e
LW
1866 return *s == '(' ? FUNCMETH : METHOD;
1867 }
1868 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1869 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1870 len -= 2;
1871 tmpbuf[len] = '\0';
1872 goto bare_package;
1873 }
1874 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1875 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1876 return 0;
1877 /* filehandle or package name makes it a method */
89bfa8cd 1878 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1879 s = skipspace(s);
3280af22 1880 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1881 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1882 bare_package:
3280af22 1883 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1884 newSVpvn(tmpbuf,len));
3280af22
NIS
1885 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1886 PL_expect = XTERM;
a0d0e21e 1887 force_next(WORD);
3280af22 1888 PL_bufptr = s;
a0d0e21e
LW
1889 return *s == '(' ? FUNCMETH : METHOD;
1890 }
1891 }
1892 return 0;
1893}
1894
ffb4593c
NT
1895/*
1896 * S_incl_perldb
1897 * Return a string of Perl code to load the debugger. If PERL5DB
1898 * is set, it will return the contents of that, otherwise a
1899 * compile-time require of perl5db.pl.
1900 */
1901
76e3520e 1902STATIC char*
cea2e8a9 1903S_incl_perldb(pTHX)
a0d0e21e 1904{
3280af22 1905 if (PL_perldb) {
76e3520e 1906 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1907
1908 if (pdb)
1909 return pdb;
61bb5906 1910 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1911 return "BEGIN { require 'perl5db.pl' }";
1912 }
1913 return "";
1914}
1915
1916
16d20bd9 1917/* Encoded script support. filter_add() effectively inserts a
4e553d73 1918 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1919 * Note that the filter function only applies to the current source file
1920 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1921 *
1922 * The datasv parameter (which may be NULL) can be used to pass
1923 * private data to this instance of the filter. The filter function
1924 * can recover the SV using the FILTER_DATA macro and use it to
1925 * store private buffers and state information.
1926 *
1927 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1928 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1929 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1930 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1931 * private use must be set using malloc'd pointers.
1932 */
16d20bd9
AD
1933
1934SV *
864dbfa3 1935Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1936{
f4c556ac
GS
1937 if (!funcp)
1938 return Nullsv;
1939
3280af22
NIS
1940 if (!PL_rsfp_filters)
1941 PL_rsfp_filters = newAV();
16d20bd9 1942 if (!datasv)
8c52afec 1943 datasv = NEWSV(255,0);
16d20bd9 1944 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1945 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1946 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1947 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1948 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1949 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1950 av_unshift(PL_rsfp_filters, 1);
1951 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1952 return(datasv);
1953}
4e553d73 1954
16d20bd9
AD
1955
1956/* Delete most recently added instance of this filter function. */
a0d0e21e 1957void
864dbfa3 1958Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1959{
e0c19803 1960 SV *datasv;
f4c556ac 1961 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1962 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1963 return;
1964 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1965 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1966 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1967 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1968 IoANY(datasv) = (void *)NULL;
3280af22 1969 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1970
16d20bd9
AD
1971 return;
1972 }
1973 /* we need to search for the correct entry and clear it */
cea2e8a9 1974 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1975}
1976
1977
1978/* Invoke the n'th filter function for the current rsfp. */
1979I32
864dbfa3 1980Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
1981
1982
8ac85365 1983 /* 0 = read one text line */
a0d0e21e 1984{
16d20bd9
AD
1985 filter_t funcp;
1986 SV *datasv = NULL;
e50aee73 1987
3280af22 1988 if (!PL_rsfp_filters)
16d20bd9 1989 return -1;
3280af22 1990 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1991 /* Provide a default input filter to make life easy. */
1992 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1993 DEBUG_P(PerlIO_printf(Perl_debug_log,
1994 "filter_read %d: from rsfp\n", idx));
4e553d73 1995 if (maxlen) {
16d20bd9
AD
1996 /* Want a block */
1997 int len ;
1998 int old_len = SvCUR(buf_sv) ;
1999
2000 /* ensure buf_sv is large enough */
2001 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
2002 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2003 if (PerlIO_error(PL_rsfp))
37120919
AD
2004 return -1; /* error */
2005 else
2006 return 0 ; /* end of file */
2007 }
16d20bd9
AD
2008 SvCUR_set(buf_sv, old_len + len) ;
2009 } else {
2010 /* Want a line */
3280af22
NIS
2011 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2012 if (PerlIO_error(PL_rsfp))
37120919
AD
2013 return -1; /* error */
2014 else
2015 return 0 ; /* end of file */
2016 }
16d20bd9
AD
2017 }
2018 return SvCUR(buf_sv);
2019 }
2020 /* Skip this filter slot if filter has been deleted */
3280af22 2021 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2022 DEBUG_P(PerlIO_printf(Perl_debug_log,
2023 "filter_read %d: skipped (filter deleted)\n",
2024 idx));
16d20bd9
AD
2025 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2026 }
2027 /* Get function pointer hidden within datasv */
4755096e 2028 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2029 DEBUG_P(PerlIO_printf(Perl_debug_log,
2030 "filter_read %d: via function %p (%s)\n",
2031 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2032 /* Call function. The function is expected to */
2033 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2034 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 2035 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
2036}
2037
76e3520e 2038STATIC char *
cea2e8a9 2039S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2040{
c39cd008 2041#ifdef PERL_CR_FILTER
3280af22 2042 if (!PL_rsfp_filters) {
c39cd008 2043 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2044 }
2045#endif
3280af22 2046 if (PL_rsfp_filters) {
16d20bd9 2047
55497cff
PP
2048 if (!append)
2049 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2050 if (FILTER_READ(0, sv, 0) > 0)
2051 return ( SvPVX(sv) ) ;
2052 else
2053 return Nullch ;
2054 }
9d116dd7 2055 else
fd049845 2056 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2057}
2058
01ec43d0
GS
2059STATIC HV *
2060S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2061{
2062 GV *gv;
2063
01ec43d0 2064 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2065 return PL_curstash;
2066
2067 if (len > 2 &&
2068 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2069 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2070 {
2071 return GvHV(gv); /* Foo:: */
def3634b
GS
2072 }
2073
2074 /* use constant CLASS => 'MyClass' */
2075 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2076 SV *sv;
2077 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2078 pkgname = SvPV_nolen(sv);
2079 }
2080 }
2081
2082 return gv_stashpv(pkgname, FALSE);
2083}
a0d0e21e 2084
748a9306
LW
2085#ifdef DEBUGGING
2086 static char* exp_name[] =
09bef843
SB
2087 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2088 "ATTRTERM", "TERMBLOCK"
2089 };
748a9306 2090#endif
463ee0b2 2091
02aa26ce
NT
2092/*
2093 yylex
2094
2095 Works out what to call the token just pulled out of the input
2096 stream. The yacc parser takes care of taking the ops we return and
2097 stitching them into a tree.
2098
2099 Returns:
2100 PRIVATEREF
2101
2102 Structure:
2103 if read an identifier
2104 if we're in a my declaration
2105 croak if they tried to say my($foo::bar)
2106 build the ops for a my() declaration
2107 if it's an access to a my() variable
2108 are we in a sort block?
2109 croak if my($a); $a <=> $b
2110 build ops for access to a my() variable
2111 if in a dq string, and they've said @foo and we can't find @foo
2112 croak
2113 build ops for a bareword
2114 if we already built the token before, use it.
2115*/
2116
dba4d153 2117#ifdef USE_PURE_BISON
bf4acbe4 2118#ifdef __SC__
dba4d153 2119#pragma segment Perl_yylex_r
bf4acbe4 2120#endif
864dbfa3 2121int
dba4d153 2122Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2123{
20141f0e
RI
2124 int r;
2125
6f202aea 2126 yyactlevel++;
20141f0e
RI
2127 yylval_pointer[yyactlevel] = lvalp;
2128 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2129 if (yyactlevel >= YYMAXLEVEL)
2130 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2131
dba4d153 2132 r = Perl_yylex(aTHX);
20141f0e 2133
d8ae6756
RI
2134 if (yyactlevel > 0)
2135 yyactlevel--;
20141f0e
RI
2136
2137 return r;
2138}
dba4d153 2139#endif
20141f0e 2140
dba4d153
JH
2141#ifdef __SC__
2142#pragma segment Perl_yylex
2143#endif
dba4d153 2144int
dba4d153 2145Perl_yylex(pTHX)
20141f0e 2146{
79072805 2147 register char *s;
378cc40b 2148 register char *d;
79072805 2149 register I32 tmp;
463ee0b2 2150 STRLEN len;
161b471a
NIS
2151 GV *gv = Nullgv;
2152 GV **gvp = 0;
aa7440fb 2153 bool bof = FALSE;
a687059c 2154
02aa26ce 2155 /* check if there's an identifier for us to look at */
3280af22 2156 if (PL_pending_ident) {
02aa26ce 2157 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2158 char pit = PL_pending_ident;
2159 PL_pending_ident = 0;
bbce6d69 2160
607df283
SC
2161 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2162 "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
2163
02aa26ce
NT
2164 /* if we're in a my(), we can't allow dynamics here.
2165 $foo'bar has already been turned into $foo::bar, so
2166 just check for colons.
2167
2168 if it's a legal name, the OP is a PADANY.
2169 */
3280af22 2170 if (PL_in_my) {
77ca0c92 2171 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2172 if (strchr(PL_tokenbuf,':'))
2173 yyerror(Perl_form(aTHX_ "No package name allowed for "
2174 "variable %s in \"our\"",
2175 PL_tokenbuf));
77ca0c92
LW
2176 tmp = pad_allocmy(PL_tokenbuf);
2177 }
2178 else {
2179 if (strchr(PL_tokenbuf,':'))
2180 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2181
77ca0c92
LW
2182 yylval.opval = newOP(OP_PADANY, 0);
2183 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2184 return PRIVATEREF;
2185 }
bbce6d69
PP
2186 }
2187
4e553d73 2188 /*
02aa26ce
NT
2189 build the ops for accesses to a my() variable.
2190
2191 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2192 then used in a comparison. This catches most, but not
2193 all cases. For instance, it catches
2194 sort { my($a); $a <=> $b }
2195 but not
2196 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2197 (although why you'd do that is anyone's guess).
2198 */
2199
3280af22 2200 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2201#ifdef USE_THREADS
54b9620d 2202 /* Check for single character per-thread SVs */
3280af22
NIS
2203 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2204 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2205 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2206 {
2faa37cc 2207 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2208 yylval.opval->op_targ = tmp;
2209 return PRIVATEREF;
2210 }
2211#endif /* USE_THREADS */
3280af22 2212 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2213 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2214 /* might be an "our" variable" */
f472eb5c 2215 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2216 /* build ops for a bareword */
f472eb5c
GS
2217 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2218 sv_catpvn(sym, "::", 2);
2219 sv_catpv(sym, PL_tokenbuf+1);
2220 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2221 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2222 gv_fetchpv(SvPVX(sym),
77ca0c92 2223 (PL_in_eval
f472eb5c
GS
2224 ? (GV_ADDMULTI | GV_ADDINEVAL)
2225 : TRUE
77ca0c92
LW
2226 ),
2227 ((PL_tokenbuf[0] == '$') ? SVt_PV
2228 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2229 : SVt_PVHV));
2230 return WORD;
2231 }
2232
02aa26ce 2233 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2234 if (PL_last_lop_op == OP_SORT &&
2235 PL_tokenbuf[0] == '$' &&
2236 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2237 && !PL_tokenbuf[2])
bbce6d69 2238 {
3280af22
NIS
2239 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2240 d < PL_bufend && *d != '\n';
a863c7d1
MB
2241 d++)
2242 {
2243 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2244 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2245 PL_tokenbuf);
a863c7d1 2246 }
bbce6d69
PP
2247 }
2248 }
bbce6d69 2249
a863c7d1
MB
2250 yylval.opval = newOP(OP_PADANY, 0);
2251 yylval.opval->op_targ = tmp;
2252 return PRIVATEREF;
2253 }
bbce6d69
PP
2254 }
2255
02aa26ce
NT
2256 /*
2257 Whine if they've said @foo in a doublequoted string,
2258 and @foo isn't a variable we can find in the symbol
2259 table.
2260 */
3280af22
NIS
2261 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2262 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2263 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2264 && ckWARN(WARN_AMBIGUOUS))
2265 {
2266 /* Downgraded from fatal to warning 20000522 mjd */
2267 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2268 "Possible unintended interpolation of %s in string",
2269 PL_tokenbuf);
2270 }
bbce6d69
PP
2271 }
2272
02aa26ce 2273 /* build ops for a bareword */
3280af22 2274 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2275 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2276 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2277 ((PL_tokenbuf[0] == '$') ? SVt_PV
2278 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2279 : SVt_PVHV));
2280 return WORD;
2281 }
2282
02aa26ce
NT
2283 /* no identifier pending identification */
2284
3280af22 2285 switch (PL_lex_state) {
79072805
LW
2286#ifdef COMMENTARY
2287 case LEX_NORMAL: /* Some compilers will produce faster */
2288 case LEX_INTERPNORMAL: /* code if we comment these out. */
2289 break;
2290#endif
2291
09bef843 2292 /* when we've already built the next token, just pull it out of the queue */
79072805 2293 case LEX_KNOWNEXT:
3280af22
NIS
2294 PL_nexttoke--;
2295 yylval = PL_nextval[PL_nexttoke];
2296 if (!PL_nexttoke) {
2297 PL_lex_state = PL_lex_defer;
2298 PL_expect = PL_lex_expect;
2299 PL_lex_defer = LEX_NORMAL;
463ee0b2 2300 }
607df283 2301 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f
RB
2302 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2303 (IV)PL_nexttype[PL_nexttoke]); })
607df283 2304
3280af22 2305 return(PL_nexttype[PL_nexttoke]);
79072805 2306
02aa26ce 2307 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2308 when we get here, PL_bufptr is at the \
02aa26ce 2309 */
79072805
LW
2310 case LEX_INTERPCASEMOD:
2311#ifdef DEBUGGING
3280af22 2312 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2313 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2314#endif
02aa26ce 2315 /* handle \E or end of string */
3280af22 2316 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2317 char oldmod;
02aa26ce
NT
2318
2319 /* if at a \E */
3280af22
NIS
2320 if (PL_lex_casemods) {
2321 oldmod = PL_lex_casestack[--PL_lex_casemods];
2322 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2323
3280af22
NIS
2324 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2325 PL_bufptr += 2;
2326 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2327 }
79072805
LW
2328 return ')';
2329 }
3280af22
NIS
2330 if (PL_bufptr != PL_bufend)
2331 PL_bufptr += 2;
2332 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2333 return yylex();
79072805
LW
2334 }
2335 else {
607df283
SC
2336 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2337 "### Saw case modifier at '%s'\n", PL_bufptr); })
3280af22 2338 s = PL_bufptr + 1;
79072805
LW
2339 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2340 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2341 if (strchr("LU", *s) &&
3280af22 2342 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2343 {
3280af22 2344 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2345 return ')';
2346 }
3280af22
NIS
2347 if (PL_lex_casemods > 10) {
2348 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2349 if (newlb != PL_lex_casestack) {
a0d0e21e 2350 SAVEFREEPV(newlb);
3280af22 2351 PL_lex_casestack = newlb;
a0d0e21e
LW
2352 }
2353 }
3280af22
NIS
2354 PL_lex_casestack[PL_lex_casemods++] = *s;
2355 PL_lex_casestack[PL_lex_casemods] = '\0';
2356 PL_lex_state = LEX_INTERPCONCAT;
2357 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2358 force_next('(');
2359 if (*s == 'l')
3280af22 2360 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2361 else if (*s == 'u')
3280af22 2362 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2363 else if (*s == 'L')
3280af22 2364 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2365 else if (*s == 'U')
3280af22 2366 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2367 else if (*s == 'Q')
3280af22 2368 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2369 else
cea2e8a9 2370 Perl_croak(aTHX_ "panic: yylex");
3280af22 2371 PL_bufptr = s + 1;
79072805 2372 force_next(FUNC);
3280af22
NIS
2373 if (PL_lex_starts) {
2374 s = PL_bufptr;
2375 PL_lex_starts = 0;
79072805
LW
2376 Aop(OP_CONCAT);
2377 }
2378 else
cea2e8a9 2379 return yylex();
79072805
LW
2380 }
2381
55497cff
PP
2382 case LEX_INTERPPUSH:
2383 return sublex_push();
2384
79072805 2385 case LEX_INTERPSTART:
3280af22 2386 if (PL_bufptr == PL_bufend)
79072805 2387 return sublex_done();
607df283
SC
2388 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2389 "### Interpolated variable at '%s'\n", PL_bufptr); })
3280af22
NIS
2390 PL_expect = XTERM;
2391 PL_lex_dojoin = (*PL_bufptr == '@');
2392 PL_lex_state = LEX_INTERPNORMAL;
2393 if (PL_lex_dojoin) {
2394 PL_nextval[PL_nexttoke].ival = 0;
79072805 2395 force_next(',');
554b3eca 2396#ifdef USE_THREADS
533c011a
NIS
2397 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2398 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2399 force_next(PRIVATEREF);
2400#else
a0d0e21e 2401 force_ident("\"", '$');
554b3eca 2402#endif /* USE_THREADS */
3280af22 2403 PL_nextval[PL_nexttoke].ival = 0;
79072805 2404 force_next('$');
3280af22 2405 PL_nextval[PL_nexttoke].ival = 0;
79072805 2406 force_next('(');
3280af22 2407 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2408 force_next(FUNC);
2409 }
3280af22
NIS
2410 if (PL_lex_starts++) {
2411 s = PL_bufptr;
79072805
LW
2412 Aop(OP_CONCAT);
2413 }
cea2e8a9 2414 return yylex();
79072805
LW
2415
2416 case LEX_INTERPENDMAYBE:
3280af22
NIS
2417 if (intuit_more(PL_bufptr)) {
2418 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2419 break;
2420 }
2421 /* FALL THROUGH */
2422
2423 case LEX_INTERPEND:
3280af22
NIS
2424 if (PL_lex_dojoin) {
2425 PL_lex_dojoin = FALSE;
2426 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2427 return ')';
2428 }
43a16006 2429 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2430 && SvEVALED(PL_lex_repl))
43a16006 2431 {
e9fa98b2 2432 if (PL_bufptr != PL_bufend)
cea2e8a9 2433 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2434 PL_lex_repl = Nullsv;
2435 }
79072805
LW
2436 /* FALLTHROUGH */
2437 case LEX_INTERPCONCAT:
2438#ifdef DEBUGGING
3280af22 2439 if (PL_lex_brackets)
cea2e8a9 2440 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2441#endif
3280af22 2442 if (PL_bufptr == PL_bufend)
79072805
LW
2443 return sublex_done();
2444
3280af22
NIS
2445 if (SvIVX(PL_linestr) == '\'') {
2446 SV *sv = newSVsv(PL_linestr);
2447 if (!PL_lex_inpat)
76e3520e 2448 sv = tokeq(sv);
3280af22 2449 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2450 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2451 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2452 s = PL_bufend;
79072805
LW
2453 }
2454 else {
3280af22 2455 s = scan_const(PL_bufptr);
79072805 2456 if (*s == '\\')
3280af22 2457 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2458 else
3280af22 2459 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2460 }
2461
3280af22
NIS
2462 if (s != PL_bufptr) {
2463 PL_nextval[PL_nexttoke] = yylval;
2464 PL_expect = XTERM;
79072805 2465 force_next(THING);
3280af22 2466 if (PL_lex_starts++)
79072805
LW
2467 Aop(OP_CONCAT);
2468 else {
3280af22 2469 PL_bufptr = s;
cea2e8a9 2470 return yylex();
79072805
LW
2471 }
2472 }
2473
cea2e8a9 2474 return yylex();
a0d0e21e 2475 case LEX_FORMLINE:
3280af22
NIS
2476 PL_lex_state = LEX_NORMAL;
2477 s = scan_formline(PL_bufptr);
2478 if (!PL_lex_formbrack)
a0d0e21e
LW
2479 goto rightbracket;
2480 OPERATOR(';');
79072805
LW
2481 }
2482
3280af22
NIS
2483 s = PL_bufptr;
2484 PL_oldoldbufptr = PL_oldbufptr;
2485 PL_oldbufptr = s;
607df283 2486 DEBUG_T( {
bf49b057
GS
2487 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2488 exp_name[PL_expect], s);
79072805 2489 } )
463ee0b2
LW
2490
2491 retry:
378cc40b
LW
2492 switch (*s) {
2493 default:
7e2040f0 2494 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2495 goto keylookup;
cea2e8a9 2496 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2497 case 4:
2498 case 26:
2499 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2500 case 0:
3280af22
NIS
2501 if (!PL_rsfp) {
2502 PL_last_uni = 0;
2503 PL_last_lop = 0;
2504 if (PL_lex_brackets)
d98d5fff 2505 yyerror("Missing right curly or square bracket");
4e553d73 2506 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2507 "### Tokener got EOF\n");
2508 } )
79072805 2509 TOKEN(0);
463ee0b2 2510 }
3280af22 2511 if (s++ < PL_bufend)
a687059c 2512 goto retry; /* ignore stray nulls */
3280af22
NIS
2513 PL_last_uni = 0;
2514 PL_last_lop = 0;
2515 if (!PL_in_eval && !PL_preambled) {
2516 PL_preambled = TRUE;
2517 sv_setpv(PL_linestr,incl_perldb());
2518 if (SvCUR(PL_linestr))
2519 sv_catpv(PL_linestr,";");
2520 if (PL_preambleav){
2521 while(AvFILLp(PL_preambleav) >= 0) {
2522 SV *tmpsv = av_shift(PL_preambleav);
2523 sv_catsv(PL_linestr, tmpsv);
2524 sv_catpv(PL_linestr, ";");
91b7def8
PP
2525 sv_free(tmpsv);
2526 }
3280af22
NIS
2527 sv_free((SV*)PL_preambleav);
2528 PL_preambleav = NULL;
91b7def8 2529 }
3280af22
NIS
2530 if (PL_minus_n || PL_minus_p) {
2531 sv_catpv(PL_linestr, "LINE: while (<>) {");
2532 if (PL_minus_l)
2533 sv_catpv(PL_linestr,"chomp;");
2534 if (PL_minus_a) {
8fd239a7
CS
2535 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2536 if (gv)
2537 GvIMPORTED_AV_on(gv);
3280af22
NIS
2538 if (PL_minus_F) {
2539 if (strchr("/'\"", *PL_splitstr)
2540 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2541 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2542 else {
2543 char delim;
2544 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2545 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2546 delim = *s;
cea2e8a9 2547 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2548 "q" + (delim == '\''), delim);
3280af22 2549 for (s = PL_splitstr; *s; s++) {
54310121 2550 if (*s == '\\')
3280af22
NIS
2551 sv_catpvn(PL_linestr, "\\", 1);
2552 sv_catpvn(PL_linestr, s, 1);
54310121 2553 }
cea2e8a9 2554 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2555 }
2304df62
AD
2556 }
2557 else
3280af22 2558 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2559 }
79072805 2560 }
3280af22
NIS
2561 sv_catpv(PL_linestr, "\n");
2562 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2563 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2564 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2565 SV *sv = NEWSV(85,0);
2566
2567 sv_upgrade(sv, SVt_PVMG);
3280af22 2568 sv_setsv(sv,PL_linestr);
57843af0 2569 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2570 }
79072805 2571 goto retry;
a687059c 2572 }
e929a76b 2573 do {
aa7440fb 2574 bof = PL_rsfp ? TRUE : FALSE;
226017aa
DD
2575 if (bof) {
2576#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2577# ifdef __GNU_LIBRARY__
2578# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2579# define FTELL_FOR_PIPE_IS_BROKEN
2580# endif
e3f494f1
JH
2581# else
2582# ifdef __GLIBC__
2583# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2584# define FTELL_FOR_PIPE_IS_BROKEN
2585# endif
2586# endif
226017aa
DD
2587# endif
2588#endif
2589#ifdef FTELL_FOR_PIPE_IS_BROKEN
2590 /* This loses the possibility to detect the bof
2591 * situation on perl -P when the libc5 is being used.
2592 * Workaround? Maybe attach some extra state to PL_rsfp?
2593 */
2594 if (!PL_preprocess)
2595 bof = PerlIO_tell(PL_rsfp) == 0;
2596#else
2597 bof = PerlIO_tell(PL_rsfp) == 0;
2598#endif
2599 }
dea0fc0b
JH
2600 s = filter_gets(PL_linestr, PL_rsfp, 0);
2601 if (s == Nullch) {
e929a76b 2602 fake_eof:
3280af22
NIS
2603 if (PL_rsfp) {
2604 if (PL_preprocess && !PL_in_eval)
2605 (void)PerlProc_pclose(PL_rsfp);
2606 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2607 PerlIO_clearerr(PL_rsfp);
395c3793 2608 else
3280af22
NIS
2609 (void)PerlIO_close(PL_rsfp);
2610 PL_rsfp = Nullfp;
4a9ae47a 2611 PL_doextract = FALSE;
395c3793 2612 }
3280af22
NIS
2613 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2614 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2615 sv_catpv(PL_linestr,";}");
2616 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2617 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2618 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2619 goto retry;
2620 }
3280af22
NIS
2621 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2622 sv_setpv(PL_linestr,"");
79072805 2623 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
dea0fc0b
JH
2624 } else if (bof) {
2625 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2626 s = swallow_bom((U8*)s);
378cc40b 2627 }
3280af22 2628 if (PL_doextract) {
a0d0e21e 2629 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2630 PL_doextract = FALSE;
a0d0e21e
LW
2631
2632 /* Incest with pod. */
2633 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2634 sv_setpv(PL_linestr, "");
2635 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2636 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2637 PL_doextract = FALSE;
a0d0e21e 2638 }
4e553d73 2639 }
463ee0b2 2640 incline(s);
3280af22
NIS
2641 } while (PL_doextract);
2642 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2643 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2644 SV *sv = NEWSV(85,0);
a687059c 2645
93a17b20 2646 sv_upgrade(sv, SVt_PVMG);
3280af22 2647 sv_setsv(sv,PL_linestr);
57843af0 2648 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2649 }
3280af22 2650 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2651 if (CopLINE(PL_curcop) == 1) {
3280af22 2652 while (s < PL_bufend && isSPACE(*s))
79072805 2653 s++;
a0d0e21e 2654 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2655 s++;
44a8e56a 2656 d = Nullch;
3280af22 2657 if (!PL_in_eval) {
44a8e56a
PP
2658 if (*s == '#' && *(s+1) == '!')
2659 d = s + 2;
2660#ifdef ALTERNATE_SHEBANG
2661 else {
2662 static char as[] = ALTERNATE_SHEBANG;
2663 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2664 d = s + (sizeof(as) - 1);
2665 }
2666#endif /* ALTERNATE_SHEBANG */
2667 }
2668 if (d) {
b8378b72 2669 char *ipath;
774d564b 2670 char *ipathend;
b8378b72 2671
774d564b 2672 while (isSPACE(*d))
b8378b72
CS
2673 d++;
2674 ipath = d;
774d564b
PP
2675 while (*d && !isSPACE(*d))
2676 d++;
2677 ipathend = d;
2678
2679#ifdef ARG_ZERO_IS_SCRIPT
2680 if (ipathend > ipath) {
2681 /*
2682 * HP-UX (at least) sets argv[0] to the script name,
2683 * which makes $^X incorrect. And Digital UNIX and Linux,
2684 * at least, set argv[0] to the basename of the Perl
2685 * interpreter. So, having found "#!", we'll set it right.
2686 */
2687 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2688 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2689 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2690 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2691 SvSETMAGIC(x);
2692 }
774d564b 2693 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2694 }
774d564b 2695#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2696
2697 /*
2698 * Look for options.
2699 */
748a9306 2700 d = instr(s,"perl -");
84e30d1a 2701 if (!d) {
748a9306 2702 d = instr(s,"perl");
84e30d1a
GS
2703#if defined(DOSISH)
2704 /* avoid getting into infinite loops when shebang
2705 * line contains "Perl" rather than "perl" */
2706 if (!d) {
2707 for (d = ipathend-4; d >= ipath; --d) {
2708 if ((*d == 'p' || *d == 'P')
2709 && !ibcmp(d, "perl", 4))
2710 {
2711 break;
2712 }
2713 }
2714 if (d < ipath)
2715 d = Nullch;
2716 }
2717#endif
2718 }
44a8e56a
PP
2719#ifdef ALTERNATE_SHEBANG
2720 /*
2721 * If the ALTERNATE_SHEBANG on this system starts with a
2722 * character that can be part of a Perl expression, then if
2723 * we see it but not "perl", we're probably looking at the
2724 * start of Perl code, not a request to hand off to some
2725 * other interpreter. Similarly, if "perl" is there, but
2726 * not in the first 'word' of the line, we assume the line
2727 * contains the start of the Perl program.
44a8e56a
PP
2728 */
2729 if (d && *s != '#') {
774d564b 2730 char *c = ipath;
44a8e56a
PP
2731 while (*c && !strchr("; \t\r\n\f\v#", *c))
2732 c++;
2733 if (c < d)
2734 d = Nullch; /* "perl" not in first word; ignore */
2735 else
2736 *s = '#'; /* Don't try to parse shebang line */
2737 }
774d564b 2738#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2739#ifndef MACOS_TRADITIONAL
748a9306 2740 if (!d &&
44a8e56a 2741 *s == '#' &&
774d564b 2742 ipathend > ipath &&
3280af22 2743 !PL_minus_c &&
748a9306 2744 !instr(s,"indir") &&
3280af22 2745 instr(PL_origargv[0],"perl"))
748a9306 2746 {
9f68db38 2747 char **newargv;
9f68db38 2748
774d564b
PP
2749 *ipathend = '\0';
2750 s = ipathend + 1;
3280af22 2751 while (s < PL_bufend && isSPACE(*s))
9f68db38 2752 s++;
3280af22
NIS
2753 if (s < PL_bufend) {
2754 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2755 newargv[1] = s;
3280af22 2756 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2757 s++;
2758 *s = '\0';
3280af22 2759 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2760 }
2761 else
3280af22 2762 newargv = PL_origargv;
774d564b 2763 newargv[0] = ipath;
b4748376 2764 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2765 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2766 }
bf4acbe4 2767#endif
748a9306 2768 if (d) {
3280af22
NIS
2769 U32 oldpdb = PL_perldb;
2770 bool oldn = PL_minus_n;
2771 bool oldp = PL_minus_p;
748a9306
LW
2772
2773 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2774 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2775
2776 if (*d++ == '-') {
8cc95fdb
PP
2777 do {
2778 if (*d == 'M' || *d == 'm') {
2779 char *m = d;
2780 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2781 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2782 (int)(d - m), m);
2783 }
2784 d = moreswitches(d);
2785 } while (d);
155aba94
GS
2786 if ((PERLDB_LINE && !oldpdb) ||
2787 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2788 /* if we have already added "LINE: while (<>) {",
2789 we must not do it again */
748a9306 2790 {
3280af22
NIS
2791 sv_setpv(PL_linestr, "");
2792 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2793 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2794 PL_preambled = FALSE;
84902520 2795 if (PERLDB_LINE)
3280af22 2796 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2797 goto retry;
2798 }
a0d0e21e 2799 }
79072805 2800 }
9f68db38 2801 }
79072805 2802 }
3280af22
NIS
2803 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2804 PL_bufptr = s;
2805 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2806 return yylex();
ae986130 2807 }
378cc40b 2808 goto retry;
4fdae800 2809 case '\r':
6a27c188 2810#ifdef PERL_STRICT_CR
cea2e8a9 2811 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2812 Perl_croak(aTHX_
cc507455 2813 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2814#endif
4fdae800 2815 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2816#ifdef MACOS_TRADITIONAL
2817 case '\312':
2818#endif
378cc40b
LW
2819 s++;
2820 goto retry;
378cc40b 2821 case '#':
e929a76b 2822 case '\n':
3280af22 2823 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2824 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2825 /* handle eval qq[#line 1 "foo"\n ...] */
2826 CopLINE_dec(PL_curcop);
2827 incline(s);
2828 }
3280af22 2829 d = PL_bufend;
a687059c 2830 while (s < d && *s != '\n')
378cc40b 2831 s++;
0f85fab0 2832 if (s < d)
378cc40b 2833 s++;
463ee0b2 2834 incline(s);
3280af22
NIS
2835 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2836 PL_bufptr = s;
2837 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2838 return yylex();
a687059c 2839 }
378cc40b 2840 }
a687059c 2841 else {
378cc40b 2842 *s = '\0';
3280af22 2843 PL_bufend = s;
a687059c 2844 }
378cc40b
LW
2845 goto retry;
2846 case '-':
79072805 2847 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2848 I32 ftst = 0;
2849
378cc40b 2850 s++;
3280af22 2851 PL_bufptr = s;
748a9306
LW
2852 tmp = *s++;
2853
bf4acbe4 2854 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2855 s++;
2856
2857 if (strnEQ(s,"=>",2)) {
3280af22 2858 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2859 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2860 "### Saw unary minus before =>, forcing word '%s'\n", s);
2861 } )
748a9306
LW
2862 OPERATOR('-'); /* unary minus */
2863 }
3280af22 2864 PL_last_uni = PL_oldbufptr;
748a9306 2865 switch (tmp) {
e5edeb50
JH
2866 case 'r': ftst = OP_FTEREAD; break;
2867 case 'w': ftst = OP_FTEWRITE; break;
2868 case 'x': ftst = OP_FTEEXEC; break;
2869 case 'o': ftst = OP_FTEOWNED; break;
2870 case 'R': ftst = OP_FTRREAD; break;
2871 case 'W': ftst = OP_FTRWRITE; break;
2872 case 'X': ftst = OP_FTREXEC; break;
2873 case 'O': ftst = OP_FTROWNED; break;
2874 case 'e': ftst = OP_FTIS; break;
2875 case 'z': ftst = OP_FTZERO; break;
2876 case 's': ftst = OP_FTSIZE; break;
2877 case 'f': ftst = OP_FTFILE; break;
2878 case 'd': ftst = OP_FTDIR; break;
2879 case 'l': ftst = OP_FTLINK; break;
2880 case 'p': ftst = OP_FTPIPE; break;
2881 case 'S': ftst = OP_FTSOCK; break;
2882 case 'u': ftst = OP_FTSUID; break;
2883 case 'g': ftst = OP_FTSGID; break;
2884 case 'k': ftst = OP_FTSVTX; break;
2885 case 'b': ftst = OP_FTBLK; break;
2886 case 'c': ftst = OP_FTCHR; break;
2887 case 't': ftst = OP_FTTTY; break;
2888 case 'T': ftst = OP_FTTEXT; break;
2889 case 'B': ftst = OP_FTBINARY; break;
2890 case 'M': case 'A': case 'C':
2891 gv_fetchpv("\024",TRUE, SVt_PV);
2892 switch (tmp) {
2893 case 'M': ftst = OP_FTMTIME; break;
2894 case 'A': ftst = OP_FTATIME; break;
2895 case 'C': ftst = OP_FTCTIME; break;
2896 default: break;
2897 }
2898 break;
378cc40b 2899 default:
378cc40b
LW
2900 break;
2901 }
e5edeb50
JH
2902 if (ftst) {
2903 PL_last_lop_op = ftst;
4e553d73 2904 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2905 "### Saw file test %c\n", (int)ftst);
e5edeb50 2906 } )
e5edeb50
JH
2907 FTST(ftst);
2908 }
2909 else {
2910 /* Assume it was a minus followed by a one-letter named
2911 * subroutine call (or a -bareword), then. */
95c31fe3 2912 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2913 "### %c looked like a file test but was not\n",
2914 (int)ftst);
95c31fe3 2915 } )
e5edeb50
JH
2916 s -= 2;
2917 }
378cc40b 2918 }
a687059c
LW
2919 tmp = *s++;
2920 if (*s == tmp) {
2921 s++;
3280af22 2922 if (PL_expect == XOPERATOR)
79072805
LW
2923 TERM(POSTDEC);
2924 else
2925 OPERATOR(PREDEC);
2926 }
2927 else if (*s == '>') {
2928 s++;
2929 s = skipspace(s);
7e2040f0 2930 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2931 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2932 TOKEN(ARROW);
79072805 2933 }
748a9306
LW
2934 else if (*s == '$')
2935 OPERATOR(ARROW);
463ee0b2 2936 else
748a9306 2937 TERM(ARROW);
a687059c 2938 }
3280af22 2939 if (PL_expect == XOPERATOR)
79072805
LW
2940 Aop(OP_SUBTRACT);
2941 else {
3280af22 2942 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2943 check_uni();
79072805 2944 OPERATOR('-'); /* unary minus */
2f3197b3 2945 }
79072805 2946
378cc40b 2947 case '+':
a687059c
LW
2948 tmp = *s++;
2949 if (*s == tmp) {
378cc40b 2950 s++;
3280af22 2951 if (PL_expect == XOPERATOR)
79072805
LW
2952 TERM(POSTINC);
2953 else
2954 OPERATOR(PREINC);
378cc40b 2955 }
3280af22 2956 if (PL_expect == XOPERATOR)
79072805
LW
2957 Aop(OP_ADD);
2958 else {
3280af22 2959 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2960 check_uni();
a687059c 2961 OPERATOR('+');
2f3197b3 2962 }
a687059c 2963
378cc40b 2964 case '*':
3280af22
NIS
2965 if (PL_expect != XOPERATOR) {
2966 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2967 PL_expect = XOPERATOR;
2968 force_ident(PL_tokenbuf, '*');
2969 if (!*PL_tokenbuf)
a0d0e21e 2970 PREREF('*');
79072805 2971 TERM('*');
a687059c 2972 }
79072805
LW
2973 s++;
2974 if (*s == '*') {
a687059c 2975 s++;
79072805 2976 PWop(OP_POW);
a687059c 2977 }
79072805
LW
2978 Mop(OP_MULTIPLY);
2979
378cc40b 2980 case '%':
3280af22 2981 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2982 ++s;
2983 Mop(OP_MODULO);
a687059c 2984 }
3280af22
NIS
2985 PL_tokenbuf[0] = '%';
2986 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2987 if (!PL_tokenbuf[1]) {
2988 if (s == PL_bufend)
bbce6d69
PP
2989 yyerror("Final % should be \\% or %name");
2990 PREREF('%');
a687059c 2991 }
3280af22 2992 PL_pending_ident = '%';
bbce6d69 2993 TERM('%');
a687059c 2994
378cc40b 2995 case '^':
79072805 2996 s++;
a0d0e21e 2997 BOop(OP_BIT_XOR);
79072805 2998 case '[':
3280af22 2999 PL_lex_brackets++;
79072805 3000 /* FALL THROUGH */
378cc40b 3001 case '~':
378cc40b 3002 case ',':
378cc40b
LW
3003 tmp = *s++;
3004 OPERATOR(tmp);
a0d0e21e
LW
3005 case ':':
3006 if (s[1] == ':') {
3007 len = 0;
3008 goto just_a_word;
3009 }
3010 s++;
09bef843
SB
3011 switch (PL_expect) {
3012 OP *attrs;
3013 case XOPERATOR:
3014 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3015 break;
3016 PL_bufptr = s; /* update in case we back off */
3017 goto grabattrs;
3018 case XATTRBLOCK:
3019 PL_expect = XBLOCK;
3020 goto grabattrs;
3021 case XATTRTERM:
3022 PL_expect = XTERMBLOCK;
3023 grabattrs:
3024 s = skipspace(s);
3025 attrs = Nullop;
7e2040f0 3026 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3027 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3028 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3029 if (tmp < 0) tmp = -tmp;
3030 switch (tmp) {
3031 case KEY_or:
3032 case KEY_and:
3033 case KEY_for:
3034 case KEY_unless:
3035 case KEY_if:
3036 case KEY_while:
3037 case KEY_until:
3038 goto got_attrs;
3039 default:
3040 break;
3041 }
3042 }
09bef843
SB
3043 if (*d == '(') {
3044 d = scan_str(d,TRUE,TRUE);
3045 if (!d) {
3046 if (PL_lex_stuff) {
3047 SvREFCNT_dec(PL_lex_stuff);
3048 PL_lex_stuff = Nullsv;
3049 }
3050 /* MUST advance bufptr here to avoid bogus
3051 "at end of line" context messages from yyerror().
3052 */
3053 PL_bufptr = s + len;
3054 yyerror("Unterminated attribute parameter in attribute list");
3055 if (attrs)
3056 op_free(attrs);
3057 return 0; /* EOF indicator */
3058 }
3059 }
3060 if (PL_lex_stuff) {
3061 SV *sv = newSVpvn(s, len);
3062 sv_catsv(sv, PL_lex_stuff);
3063 attrs = append_elem(OP_LIST, attrs,
3064 newSVOP(OP_CONST, 0, sv));
3065 SvREFCNT_dec(PL_lex_stuff);
3066 PL_lex_stuff = Nullsv;
3067 }
3068 else {
78f9721b
SM
3069 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3070 CvLVALUE_on(PL_compcv);
3071 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3072 CvLOCKED_on(PL_compcv);
3073 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3074 CvMETHOD_on(PL_compcv);
3075 /* After we've set the flags, it could be argued that
3076 we don't need to do the attributes.pm-based setting
3077 process, and shouldn't bother appending recognized
3078 flags. To experiment with that, uncomment the
3079 following "else": */
3080 /* else */
3081 attrs = append_elem(OP_LIST, attrs,
3082 newSVOP(OP_CONST, 0,
3083 newSVpvn(s, len)));
09bef843
SB
3084 }
3085 s = skipspace(d);
0120eecf 3086 if (*s == ':' && s[1] != ':')
09bef843 3087 s = skipspace(s+1);
0120eecf
GS
3088 else if (s == d)
3089 break; /* require real whitespace or :'s */
09bef843 3090 }
f9829d6b
GS
3091 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3092 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3093 char q = ((*s == '\'') ? '"' : '\'');
3094 /* If here for an expression, and parsed no attrs, back off. */
3095 if (tmp == '=' && !attrs) {
3096 s = PL_bufptr;
3097 break;
3098 }
3099 /* MUST advance bufptr here to avoid bogus "at end of line"
3100 context messages from yyerror().
3101 */
3102 PL_bufptr = s;
3103 if (!*s)
3104 yyerror("Unterminated attribute list");
3105 else
3106 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3107 q, *s, q));
3108 if (attrs)
3109 op_free(attrs);
3110 OPERATOR(':');
3111 }
f9829d6b 3112 got_attrs:
09bef843
SB
3113 if (attrs) {
3114 PL_nextval[PL_nexttoke].opval = attrs;
3115 force_next(THING);
3116 }
3117 TOKEN(COLONATTR);
3118 }
a0d0e21e 3119 OPERATOR(':');
8990e307
LW
3120 case '(':
3121 s++;
3280af22
NIS
3122 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3123 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3124 else
3280af22 3125 PL_expect = XTERM;
a0d0e21e 3126 TOKEN('(');
378cc40b 3127 case ';':
f4dd75d9 3128 CLINE;
378cc40b
LW
3129 tmp = *s++;
3130 OPERATOR(tmp);
3131 case ')':
378cc40b 3132 tmp = *s++;
16d20bd9
AD
3133 s = skipspace(s);
3134 if (*s == '{')
3135 PREBLOCK(tmp);
378cc40b 3136 TERM(tmp);
79072805
LW
3137 case ']':
3138 s++;
3280af22 3139 if (PL_lex_brackets <= 0)
d98d5fff 3140 yyerror("Unmatched right square bracket");
463ee0b2 3141 else
3280af22
NIS
3142 --PL_lex_brackets;
3143 if (PL_lex_state == LEX_INTERPNORMAL) {
3144 if (PL_lex_brackets == 0) {
a0d0e21e 3145 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3146 PL_lex_state = LEX_INTERPEND;
79072805
LW
3147 }
3148 }
4633a7c4 3149 TERM(']');
79072805
LW
3150 case '{':
3151 leftbracket:
79072805 3152 s++;
3280af22
NIS
3153 if (PL_lex_brackets > 100) {
3154 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3155 if (newlb != PL_lex_brackstack) {
8990e307 3156 SAVEFREEPV(newlb);
3280af22 3157 PL_lex_brackstack = newlb;
8990e307
LW
3158 }
3159 }
3280af22 3160 switch (PL_expect) {
a0d0e21e 3161 case XTERM:
3280af22 3162 if (PL_lex_formbrack) {
a0d0e21e
LW
3163 s--;
3164 PRETERMBLOCK(DO);
3165 }
3280af22
NIS
3166 if (PL_oldoldbufptr == PL_last_lop)
3167 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3168 else
3280af22 3169 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3170 OPERATOR(HASHBRACK);
a0d0e21e 3171 case XOPERATOR:
bf4acbe4 3172 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3173 s++;
44a8e56a 3174 d = s;
3280af22
NIS
3175 PL_tokenbuf[0] = '\0';
3176 if (d < PL_bufend && *d == '-') {
3177 PL_tokenbuf[0] = '-';
44a8e56a 3178 d++;
bf4acbe4 3179 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3180 d++;
3181 }
7e2040f0 3182 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3183 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3184 FALSE, &len);
bf4acbe4 3185 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3186 d++;
3187 if (*d == '}') {
3280af22 3188 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3189 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
7948272d
NIS
3190 if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, 0) &&
3191 PL_nextval[PL_nexttoke-1].opval)
3192 SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke-1].opval)->op_sv);
44a8e56a
PP
3193 if (minus)
3194 force_next('-');
748a9306
LW
3195 }
3196 }
3197 /* FALL THROUGH */
09bef843 3198 case XATTRBLOCK:
748a9306 3199 case XBLOCK:
3280af22
NIS
3200 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3201 PL_expect = XSTATE;
a0d0e21e 3202 break;
09bef843 3203 case XATTRTERM:
a0d0e21e 3204 case XTERMBLOCK:
3280af22
NIS
3205 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3206 PL_expect = XSTATE;
a0d0e21e
LW
3207 break;
3208 default: {
3209 char *t;
3280af22
NIS
3210 if (PL_oldoldbufptr == PL_last_lop)
3211 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3212 else
3280af22 3213 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3214 s = skipspace(s);
09ecc4b6 3215 if (*s == '}')
a0d0e21e 3216 OPERATOR(HASHBRACK);
b8a4b1be
GS
3217 /* This hack serves to disambiguate a pair of curlies
3218 * as being a block or an anon hash. Normally, expectation
3219 * determines that, but in cases where we're not in a
3220 * position to expect anything in particular (like inside
3221 * eval"") we have to resolve the ambiguity. This code
3222 * covers the case where the first term in the curlies is a
3223 * quoted string. Most other cases need to be explicitly
3224 * disambiguated by prepending a `+' before the opening
3225 * curly in order to force resolution as an anon hash.
3226 *
3227 * XXX should probably propagate the outer expectation
3228 * into eval"" to rely less on this hack, but that could
3229 * potentially break current behavior of eval"".
3230 * GSAR 97-07-21
3231 */
3232 t = s;
3233 if (*s == '\'' || *s == '"' || *s == '`') {
3234 /* common case: get past first string, handling escapes */
3280af22 3235 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3236 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3237 t++;
3238 t++;
a0d0e21e 3239 }
b8a4b1be 3240 else if (*s == 'q') {
3280af22 3241 if (++t < PL_bufend
b8a4b1be 3242 && (!isALNUM(*t)
3280af22 3243 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3244 && !isALNUM(*t))))
3245 {
b8a4b1be
GS
3246 char *tmps;
3247 char open, close, term;
3248 I32 brackets = 1;
3249
3280af22 3250 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3251 t++;
3252 term = *t;
3253 open = term;
3254 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3255 term = tmps[5];
3256 close = term;
3257 if (open == close)
3280af22
NIS
3258 for (t++; t < PL_bufend; t++) {
3259 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3260 t++;
6d07e5e9 3261 else if (*t == open)
b8a4b1be
GS
3262 break;
3263 }
3264 else
3280af22
NIS
3265 for (t++; t < PL_bufend; t++) {
3266 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3267 t++;
6d07e5e9 3268 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3269 break;
3270 else if (*t == open)
3271 brackets++;
3272 }
3273 }
3274 t++;
a0d0e21e 3275 }
7e2040f0 3276 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3277 t += UTF8SKIP(t);
7e2040f0 3278 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3279 t += UTF8SKIP(t);
a0d0e21e 3280 }
3280af22 3281 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3282 t++;
b8a4b1be
GS
3283 /* if comma follows first term, call it an anon hash */
3284 /* XXX it could be a comma expression with loop modifiers */
3280af22 3285 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3286 || (*t == '=' && t[1] == '>')))
a0d0e21e 3287 OPERATOR(HASHBRACK);
3280af22 3288 if (PL_expect == XREF)
4e4e412b 3289 PL_expect = XTERM;
a0d0e21e 3290 else {
3280af22
NIS
3291 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3292 PL_expect = XSTATE;
a0d0e21e 3293 }
8990e307 3294 }
a0d0e21e 3295 break;
463ee0b2 3296 }
57843af0 3297 yylval.ival = CopLINE(PL_curcop);
79072805 3298 if (isSPACE(*s) || *s == '#')
3280af22 3299 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3300 TOKEN('{');
378cc40b 3301 case '}':
79072805
LW
3302 rightbracket:
3303 s++;
3280af22 3304 if (PL_lex_brackets <= 0)
d98d5fff 3305 yyerror("Unmatched right curly bracket");
463ee0b2 3306 else
3280af22 3307 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3308 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3309 PL_lex_formbrack = 0;
3310 if (PL_lex_state == LEX_INTERPNORMAL) {
3311 if (PL_lex_brackets == 0) {
9059aa12
LW
3312 if (PL_expect & XFAKEBRACK) {
3313 PL_expect &= XENUMMASK;
3280af22
NIS
3314 PL_lex_state = LEX_INTERPEND;
3315 PL_bufptr = s;
cea2e8a9 3316 return yylex(); /* ignore fake brackets */
79072805 3317 }
fa83b5b6 3318 if (*s == '-' && s[1] == '>')
3280af22 3319 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3320 else if (*s != '[' && *s != '{')
3280af22 3321 PL_lex_state = LEX_INTERPEND;
79072805
LW
3322 }
3323 }
9059aa12
LW
3324 if (PL_expect & XFAKEBRACK) {
3325 PL_expect &= XENUMMASK;
3280af22 3326 PL_bufptr = s;
cea2e8a9 3327 return yylex(); /* ignore fake brackets */
748a9306 3328 }
79072805
LW
3329 force_next('}');
3330 TOKEN(';');
378cc40b
LW
3331 case '&':
3332 s++;
3333 tmp = *s++;
3334 if (tmp == '&')
a0d0e21e 3335 AOPERATOR(ANDAND);
378cc40b 3336 s--;
3280af22 3337 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3338 if (ckWARN(WARN_SEMICOLON)
3339 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3340 {
57843af0 3341 CopLINE_dec(PL_curcop);
cea2e8a9 3342 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3343 CopLINE_inc(PL_curcop);
463ee0b2 3344 }
79072805 3345 BAop(OP_BIT_AND);
463ee0b2 3346 }
79072805 3347
3280af22
NIS
3348 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3349 if (*PL_tokenbuf) {
3350 PL_expect = XOPERATOR;
3351 force_ident(PL_tokenbuf, '&');
463ee0b2 3352 }
79072805
LW
3353 else
3354 PREREF('&');
c07a80fd 3355 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3356 TERM('&');
3357
378cc40b
LW
3358 case '|':
3359 s++;
3360 tmp = *s++;
3361 if (tmp == '|')
a0d0e21e 3362 AOPERATOR(OROR);
378cc40b 3363 s--;
79072805 3364 BOop(OP_BIT_OR);
378cc40b
LW
3365 case '=':
3366 s++;
3367 tmp = *s++;
3368 if (tmp == '=')
79072805
LW
3369 Eop(OP_EQ);
3370 if (tmp == '>')
3371 OPERATOR(',');
378cc40b 3372 if (tmp == '~')
79072805 3373 PMop(OP_MATCH);
599cee73 3374 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3375 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3376 s--;
3280af22
NIS
3377 if (PL_expect == XSTATE && isALPHA(tmp) &&
3378 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3379 {
3280af22
NIS
3380 if (PL_in_eval && !PL_rsfp) {
3381 d = PL_bufend;
a5f75d66
AD
3382 while (s < d) {
3383 if (*s++ == '\n') {
3384 incline(s);
3385 if (strnEQ(s,"=cut",4)) {
3386 s = strchr(s,'\n');
3387 if (s)
3388 s++;
3389 else
3390 s = d;
3391 incline(s);
3392 goto retry;
3393 }
3394 }
3395 }
3396 goto retry;
3397 }
3280af22
NIS
3398 s = PL_bufend;
3399 PL_doextract = TRUE;
a0d0e21e
LW
3400 goto retry;
3401 }
3280af22 3402 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3403 char *t;
51882d45 3404#ifdef PERL_STRICT_CR
bf4acbe4 3405 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3406#else
bf4acbe4 3407 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3408#endif
a0d0e21e
LW
3409 if (*t == '\n' || *t == '#') {
3410 s--;
3280af22 3411 PL_expect = XBLOCK;
a0d0e21e
LW
3412 goto leftbracket;
3413 }
79072805 3414 }
a0d0e21e
LW
3415 yylval.ival = 0;
3416 OPERATOR(ASSIGNOP);
378cc40b
LW
3417 case '!':
3418 s++;
3419 tmp = *s++;
3420 if (tmp == '=')
79072805 3421 Eop(OP_NE);
378cc40b 3422 if (tmp == '~')
79072805 3423 PMop(OP_NOT);
378cc40b
LW
3424 s--;
3425 OPERATOR('!');
3426 case '<':
3280af22 3427 if (PL_expect != XOPERATOR) {
93a17b20 3428 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3429 check_uni();
79072805
LW
3430 if (s[1] == '<')
3431 s = scan_heredoc(s);
3432 else
3433 s = scan_inputsymbol(s);
3434 TERM(sublex_start());
378cc40b
LW
3435 }
3436 s++;
3437 tmp = *s++;
3438 if (tmp == '<')
79072805 3439 SHop(OP_LEFT_SHIFT);
395c3793
LW
3440 if (tmp == '=') {
3441 tmp = *s++;
3442 if (tmp == '>')
79072805 3443 Eop(OP_NCMP);
395c3793 3444 s--;
79072805 3445 Rop(OP_LE);
395c3793 3446 }
378cc40b 3447 s--;
79072805 3448 Rop(OP_LT);
378cc40b
LW
3449 case '>':
3450 s++;
3451 tmp = *s++;
3452 if (tmp == '>')
79072805 3453 SHop(OP_RIGHT_SHIFT);
378cc40b 3454 if (tmp == '=')
79072805 3455 Rop(OP_GE);
378cc40b 3456 s--;
79072805 3457 Rop(OP_GT);
378cc40b
LW
3458
3459 case '$':
bbce6d69
PP
3460 CLINE;
3461
3280af22
NIS
3462 if (PL_expect == XOPERATOR) {
3463 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3464 PL_expect = XTERM;
a0d0e21e 3465 depcom();
bbce6d69 3466 return ','; /* grandfather non-comma-format format */
a0d0e21e 3467 }
8990e307 3468 }
a0d0e21e 3469
7e2040f0 3470 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3471 PL_tokenbuf[0] = '@';
376b8730
SM
3472 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3473 sizeof PL_tokenbuf - 1, FALSE);
3474 if (PL_expect == XOPERATOR)
3475 no_op("Array length", s);
3280af22 3476 if (!PL_tokenbuf[1])
a0d0e21e 3477 PREREF(DOLSHARP);
3280af22
NIS
3478 PL_expect = XOPERATOR;
3479 PL_pending_ident = '#';
463ee0b2 3480 TOKEN(DOLSHARP);
79072805 3481 }
bbce6d69 3482
3280af22 3483 PL_tokenbuf[0] = '$';
376b8730
SM
3484 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3485 sizeof PL_tokenbuf - 1, FALSE);
3486 if (PL_expect == XOPERATOR)
3487 no_op("Scalar", s);
3280af22
NIS
3488 if (!PL_tokenbuf[1]) {
3489 if (s == PL_bufend)
bbce6d69
PP
3490 yyerror("Final $ should be \\$ or $name");
3491 PREREF('$');
8990e307 3492 }
a0d0e21e 3493
bbce6d69 3494 /* This kludge not intended to be bulletproof. */
3280af22 3495 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3496 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3497 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3498 yylval.opval->op_private = OPpCONST_ARYBASE;
3499 TERM(THING);
3500 }
3501
ff68c719 3502 d = s;
69d2bceb 3503 tmp = (I32)*s;
3280af22 3504 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3505 s = skipspace(s);
3506
3280af22 3507 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3508 char *t;
3509 if (*s == '[') {
3280af22 3510 PL_tokenbuf[0] = '@';
599cee73 3511 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3512 for(t = s + 1;
7e2040f0 3513 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3514 t++) ;
a0d0e21e 3515 if (*t++ == ',') {
3280af22
NIS
3516 PL_bufptr = skipspace(PL_bufptr);
3517 while (t < PL_bufend && *t != ']')
bbce6d69 3518 t++;
cea2e8a9 3519 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3520 "Multidimensional syntax %.*s not supported",
3521 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3522 }
3523 }
bbce6d69
PP
3524 }
3525 else if (*s == '{') {
3280af22 3526 PL_tokenbuf[0] = '%';
599cee73 3527 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3528 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3529 {
3280af22 3530 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3531 STRLEN len;
3532 for (t++; isSPACE(*t); t++) ;
7e2040f0 3533 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3534 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3535 for (; isSPACE(*t); t++) ;
864dbfa3 3536 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3537 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3538 "You need to quote \"%s\"", tmpbuf);
748a9306 3539 }
93a17b20
LW
3540 }
3541 }
2f3197b3 3542 }
bbce6d69 3543
3280af22 3544 PL_expect = XOPERATOR;
69d2bceb 3545 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3546 bool islop = (PL_last_lop == PL_oldoldbufptr);
3547 if (!islop || PL_last_lop_op == OP_GREPSTART)
3548 PL_expect = XOPERATOR;
bbce6d69 3549 else if (strchr("$@\"'`q", *s))
3280af22 3550 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3551 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3552 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3553 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3554 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3555 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3556 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3557 /* binary operators exclude handle interpretations */
3558 switch (tmp) {
3559 case -KEY_x:
3560 case -KEY_eq:
3561 case -KEY_ne:
3562 case -KEY_gt:
3563 case -KEY_lt:
3564 case -KEY_ge:
3565 case -KEY_le:
3566 case -KEY_cmp:
3567 break;
3568 default:
3280af22 3569 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
35