This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix bug #15837, temporary from storable call hadn't gotten
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
be3c0a43 3 * Copyright (c) 1991-2002, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
4ac733c9 29static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 30static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
9059aa12
LW
38#define XFAKEBRACK 128
39#define XENUMMASK 127
40
39e02b42
JH
41#ifdef USE_UTF8_SCRIPTS
42# define UTF (!IN_BYTES)
2b9d42f0 43#else
746b446a 44# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 45#endif
a0ed51b3 46
61f0cdd9 47/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
48 * 1999-02-27 mjd-perl-patch@plover.com */
49#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50
bf4acbe4
GS
51/* On MacOS, respect nonbreaking spaces */
52#ifdef MACOS_TRADITIONAL
53#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54#else
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
56#endif
57
ffb4593c
NT
58/* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
79072805
LW
60 * can get by with a single comparison (if the compiler is smart enough).
61 */
62
fb73857a 63/* #define LEX_NOTPARSING 11 is done in perl.h. */
64
55497cff 65#define LEX_NORMAL 10
66#define LEX_INTERPNORMAL 9
67#define LEX_INTERPCASEMOD 8
68#define LEX_INTERPPUSH 7
69#define LEX_INTERPSTART 6
70#define LEX_INTERPEND 5
71#define LEX_INTERPENDMAYBE 4
72#define LEX_INTERPCONCAT 3
73#define LEX_INTERPCONST 2
74#define LEX_FORMLINE 1
75#define LEX_KNOWNEXT 0
79072805 76
79072805
LW
77#ifdef ff_next
78#undef ff_next
d48672a2
LW
79#endif
80
a1a0e61e 81#ifdef USE_PURE_BISON
dba4d153
JH
82# ifndef YYMAXLEVEL
83# define YYMAXLEVEL 100
84# endif
20141f0e
IRC
85YYSTYPE* yylval_pointer[YYMAXLEVEL];
86int* yychar_pointer[YYMAXLEVEL];
6f202aea 87int yyactlevel = -1;
22c35a8c
GS
88# undef yylval
89# undef yychar
20141f0e
IRC
90# define yylval (*yylval_pointer[yyactlevel])
91# define yychar (*yychar_pointer[yyactlevel])
92# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 93# undef yylex
dba4d153 94# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
95#endif
96
79072805 97#include "keywords.h"
fe14fcc3 98
ffb4593c
NT
99/* CLINE is a macro that ensures PL_copline has a sane value */
100
ae986130
LW
101#ifdef CLINE
102#undef CLINE
103#endif
57843af0 104#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 105
ffb4593c
NT
106/*
107 * Convenience functions to return different tokens and prime the
9cbb5ea2 108 * lexer for the next token. They all take an argument.
ffb4593c
NT
109 *
110 * TOKEN : generic token (used for '(', DOLSHARP, etc)
111 * OPERATOR : generic operator
112 * AOPERATOR : assignment operator
113 * PREBLOCK : beginning the block after an if, while, foreach, ...
114 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
115 * PREREF : *EXPR where EXPR is not a simple identifier
116 * TERM : expression term
117 * LOOPX : loop exiting command (goto, last, dump, etc)
118 * FTST : file test operator
119 * FUN0 : zero-argument function
2d2e263d 120 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
121 * BOop : bitwise or or xor
122 * BAop : bitwise and
123 * SHop : shift operator
124 * PWop : power operator
9cbb5ea2 125 * PMop : pattern-matching operator
ffb4593c
NT
126 * Aop : addition-level operator
127 * Mop : multiplication-level operator
128 * Eop : equality-testing operator
e5edeb50 129 * Rop : relational operator <= != gt
ffb4593c
NT
130 *
131 * Also see LOP and lop() below.
132 */
133
075953c3
JH
134/* Note that REPORT() and REPORT2() will be expressions that supply
135 * their own trailing comma, not suitable for statements as such. */
998054bd 136#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
137# define REPORT(x,retval) tokereport(x,s,(int)retval),
138# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 139#else
075953c3
JH
140# define REPORT(x,retval)
141# define REPORT2(x,retval)
998054bd
SC
142#endif
143
075953c3
JH
144#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
145#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
146#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
147#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
148#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
149#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
150#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
151#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
6f33ba73 152#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERMORDORDOR,PL_bufptr = s,(int)UNIOP)
075953c3
JH
153#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
154#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
155#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
156#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
157#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
158#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
159#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
160#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
161#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
162#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
163#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
a687059c 169 */
6f33ba73 170#define UNI2(f,x) return(yylval.ival = f, \
075953c3 171 REPORT("uni",f) \
6f33ba73 172 PL_expect = x, \
3280af22
NIS
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
175 PL_last_lop_op = f, \
a687059c 176 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
6f33ba73
RGS
177#define UNI(f) UNI2(f,XTERM)
178#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 179
79072805 180#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 181 REPORT("uni",f) \
3280af22
NIS
182 PL_bufptr = s, \
183 PL_last_uni = PL_oldbufptr, \
79072805
LW
184 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
185
9f68db38 186/* grandfather return to old style */
3280af22 187#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 188
8fa7f367
JH
189#ifdef DEBUGGING
190
2d00ba3b 191STATIC void
61b2116b 192S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 193{
998054bd 194 DEBUG_T({
9c5ffd7c 195 SV* report = newSVpv(thing, 0);
29b291f7
RB
196 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
197 (IV)rv);
998054bd
SC
198
199 if (s - PL_bufptr > 0)
200 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
201 else {
202 if (PL_oldbufptr && *PL_oldbufptr)
203 sv_catpv(report, PL_tokenbuf);
204 }
205 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
5f80b19c 206 });
998054bd
SC
207}
208
8fa7f367
JH
209#endif
210
ffb4593c
NT
211/*
212 * S_ao
213 *
c963b151
BD
214 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
215 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
216 */
217
76e3520e 218STATIC int
cea2e8a9 219S_ao(pTHX_ int toketype)
a0d0e21e 220{
3280af22
NIS
221 if (*PL_bufptr == '=') {
222 PL_bufptr++;
a0d0e21e
LW
223 if (toketype == ANDAND)
224 yylval.ival = OP_ANDASSIGN;
225 else if (toketype == OROR)
226 yylval.ival = OP_ORASSIGN;
c963b151
BD
227 else if (toketype == DORDOR)
228 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
229 toketype = ASSIGNOP;
230 }
231 return toketype;
232}
233
ffb4593c
NT
234/*
235 * S_no_op
236 * When Perl expects an operator and finds something else, no_op
237 * prints the warning. It always prints "<something> found where
238 * operator expected. It prints "Missing semicolon on previous line?"
239 * if the surprise occurs at the start of the line. "do you need to
240 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
241 * where the compiler doesn't know if foo is a method call or a function.
242 * It prints "Missing operator before end of line" if there's nothing
243 * after the missing operator, or "... before <...>" if there is something
244 * after the missing operator.
245 */
246
76e3520e 247STATIC void
cea2e8a9 248S_no_op(pTHX_ char *what, char *s)
463ee0b2 249{
3280af22
NIS
250 char *oldbp = PL_bufptr;
251 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 252
1189a94a
GS
253 if (!s)
254 s = oldbp;
07c798fb 255 else
1189a94a 256 PL_bufptr = s;
cea2e8a9 257 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 258 if (is_first)
cea2e8a9 259 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 260 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 261 char *t;
7e2040f0 262 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 263 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 264 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 265 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 266 }
07c798fb
HS
267 else {
268 assert(s >= oldbp);
cea2e8a9 269 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 270 }
3280af22 271 PL_bufptr = oldbp;
8990e307
LW
272}
273
ffb4593c
NT
274/*
275 * S_missingterm
276 * Complain about missing quote/regexp/heredoc terminator.
277 * If it's called with (char *)NULL then it cauterizes the line buffer.
278 * If we're in a delimited string and the delimiter is a control
279 * character, it's reformatted into a two-char sequence like ^C.
280 * This is fatal.
281 */
282
76e3520e 283STATIC void
cea2e8a9 284S_missingterm(pTHX_ char *s)
8990e307
LW
285{
286 char tmpbuf[3];
287 char q;
288 if (s) {
289 char *nl = strrchr(s,'\n');
d2719217 290 if (nl)
8990e307
LW
291 *nl = '\0';
292 }
9d116dd7
JH
293 else if (
294#ifdef EBCDIC
295 iscntrl(PL_multi_close)
296#else
297 PL_multi_close < 32 || PL_multi_close == 127
298#endif
299 ) {
8990e307 300 *tmpbuf = '^';
3280af22 301 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
302 s = "\\n";
303 tmpbuf[2] = '\0';
304 s = tmpbuf;
305 }
306 else {
eb160463 307 *tmpbuf = (char)PL_multi_close;
8990e307
LW
308 tmpbuf[1] = '\0';
309 s = tmpbuf;
310 }
311 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 312 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 313}
79072805 314
ffb4593c
NT
315/*
316 * Perl_deprecate
ffb4593c
NT
317 */
318
79072805 319void
864dbfa3 320Perl_deprecate(pTHX_ char *s)
a0d0e21e 321{
599cee73 322 if (ckWARN(WARN_DEPRECATED))
9014280d 323 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
324}
325
12bcd1a6
PM
326void
327Perl_deprecate_old(pTHX_ char *s)
328{
329 /* This function should NOT be called for any new deprecated warnings */
330 /* Use Perl_deprecate instead */
331 /* */
332 /* It is here to maintain backward compatibility with the pre-5.8 */
333 /* warnings category hierarchy. The "deprecated" category used to */
334 /* live under the "syntax" category. It is now a top-level category */
335 /* in its own right. */
336
337 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
338 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
339 "Use of %s is deprecated", s);
340}
341
ffb4593c
NT
342/*
343 * depcom
9cbb5ea2 344 * Deprecate a comma-less variable list.
ffb4593c
NT
345 */
346
76e3520e 347STATIC void
cea2e8a9 348S_depcom(pTHX)
a0d0e21e 349{
12bcd1a6 350 deprecate_old("comma-less variable list");
a0d0e21e
LW
351}
352
ffb4593c 353/*
9cbb5ea2
GS
354 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
355 * utf16-to-utf8-reversed.
ffb4593c
NT
356 */
357
c39cd008
GS
358#ifdef PERL_CR_FILTER
359static void
360strip_return(SV *sv)
361{
362 register char *s = SvPVX(sv);
363 register char *e = s + SvCUR(sv);
364 /* outer loop optimized to do nothing if there are no CR-LFs */
365 while (s < e) {
366 if (*s++ == '\r' && *s == '\n') {
367 /* hit a CR-LF, need to copy the rest */
368 register char *d = s - 1;
369 *d++ = *s++;
370 while (s < e) {
371 if (*s == '\r' && s[1] == '\n')
372 s++;
373 *d++ = *s++;
374 }
375 SvCUR(sv) -= s - d;
376 return;
377 }
378 }
379}
a868473f 380
76e3520e 381STATIC I32
c39cd008 382S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 383{
c39cd008
GS
384 I32 count = FILTER_READ(idx+1, sv, maxlen);
385 if (count > 0 && !maxlen)
386 strip_return(sv);
387 return count;
a868473f
NIS
388}
389#endif
390
ffb4593c
NT
391/*
392 * Perl_lex_start
9cbb5ea2
GS
393 * Initialize variables. Uses the Perl save_stack to save its state (for
394 * recursive calls to the parser).
ffb4593c
NT
395 */
396
a0d0e21e 397void
864dbfa3 398Perl_lex_start(pTHX_ SV *line)
79072805 399{
8990e307
LW
400 char *s;
401 STRLEN len;
402
3280af22
NIS
403 SAVEI32(PL_lex_dojoin);
404 SAVEI32(PL_lex_brackets);
3280af22
NIS
405 SAVEI32(PL_lex_casemods);
406 SAVEI32(PL_lex_starts);
407 SAVEI32(PL_lex_state);
7766f137 408 SAVEVPTR(PL_lex_inpat);
3280af22 409 SAVEI32(PL_lex_inwhat);
18b09519
GS
410 if (PL_lex_state == LEX_KNOWNEXT) {
411 I32 toke = PL_nexttoke;
412 while (--toke >= 0) {
413 SAVEI32(PL_nexttype[toke]);
414 SAVEVPTR(PL_nextval[toke]);
415 }
416 SAVEI32(PL_nexttoke);
18b09519 417 }
57843af0 418 SAVECOPLINE(PL_curcop);
3280af22
NIS
419 SAVEPPTR(PL_bufptr);
420 SAVEPPTR(PL_bufend);
421 SAVEPPTR(PL_oldbufptr);
422 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
423 SAVEPPTR(PL_last_lop);
424 SAVEPPTR(PL_last_uni);
3280af22
NIS
425 SAVEPPTR(PL_linestart);
426 SAVESPTR(PL_linestr);
8edd5f42
RGS
427 SAVEGENERICPV(PL_lex_brackstack);
428 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 429 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
430 SAVESPTR(PL_lex_stuff);
431 SAVEI32(PL_lex_defer);
09bef843 432 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 433 SAVESPTR(PL_lex_repl);
bebdddfc
GS
434 SAVEINT(PL_expect);
435 SAVEINT(PL_lex_expect);
3280af22
NIS
436
437 PL_lex_state = LEX_NORMAL;
438 PL_lex_defer = 0;
439 PL_expect = XSTATE;
440 PL_lex_brackets = 0;
3280af22
NIS
441 New(899, PL_lex_brackstack, 120, char);
442 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
443 PL_lex_casemods = 0;
444 *PL_lex_casestack = '\0';
445 PL_lex_dojoin = 0;
446 PL_lex_starts = 0;
447 PL_lex_stuff = Nullsv;
448 PL_lex_repl = Nullsv;
449 PL_lex_inpat = 0;
76be56bc 450 PL_nexttoke = 0;
3280af22 451 PL_lex_inwhat = 0;
09bef843 452 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
453 PL_linestr = line;
454 if (SvREADONLY(PL_linestr))
455 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
456 s = SvPV(PL_linestr, len);
6f27f9a7 457 if (!len || s[len-1] != ';') {
3280af22
NIS
458 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
459 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
460 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 461 }
3280af22
NIS
462 SvTEMP_off(PL_linestr);
463 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
464 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 465 PL_last_lop = PL_last_uni = Nullch;
3280af22 466 PL_rsfp = 0;
79072805 467}
a687059c 468
ffb4593c
NT
469/*
470 * Perl_lex_end
9cbb5ea2
GS
471 * Finalizer for lexing operations. Must be called when the parser is
472 * done with the lexer.
ffb4593c
NT
473 */
474
463ee0b2 475void
864dbfa3 476Perl_lex_end(pTHX)
463ee0b2 477{
3280af22 478 PL_doextract = FALSE;
463ee0b2
LW
479}
480
ffb4593c
NT
481/*
482 * S_incline
483 * This subroutine has nothing to do with tilting, whether at windmills
484 * or pinball tables. Its name is short for "increment line". It
57843af0 485 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 486 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
487 * # line 500 "foo.pm"
488 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
489 */
490
76e3520e 491STATIC void
cea2e8a9 492S_incline(pTHX_ char *s)
463ee0b2
LW
493{
494 char *t;
495 char *n;
73659bf1 496 char *e;
463ee0b2 497 char ch;
463ee0b2 498
57843af0 499 CopLINE_inc(PL_curcop);
463ee0b2
LW
500 if (*s++ != '#')
501 return;
bf4acbe4 502 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
503 if (strnEQ(s, "line", 4))
504 s += 4;
505 else
506 return;
084592ab 507 if (SPACE_OR_TAB(*s))
73659bf1 508 s++;
4e553d73 509 else
73659bf1 510 return;
bf4acbe4 511 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
512 if (!isDIGIT(*s))
513 return;
514 n = s;
515 while (isDIGIT(*s))
516 s++;
bf4acbe4 517 while (SPACE_OR_TAB(*s))
463ee0b2 518 s++;
73659bf1 519 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 520 s++;
73659bf1
GS
521 e = t + 1;
522 }
463ee0b2 523 else {
463ee0b2 524 for (t = s; !isSPACE(*t); t++) ;
73659bf1 525 e = t;
463ee0b2 526 }
bf4acbe4 527 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
528 e++;
529 if (*e != '\n' && *e != '\0')
530 return; /* false alarm */
531
463ee0b2
LW
532 ch = *t;
533 *t = '\0';
f4dd75d9 534 if (t - s > 0) {
05ec9bb3 535 CopFILE_free(PL_curcop);
57843af0 536 CopFILE_set(PL_curcop, s);
f4dd75d9 537 }
463ee0b2 538 *t = ch;
57843af0 539 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
540}
541
ffb4593c
NT
542/*
543 * S_skipspace
544 * Called to gobble the appropriate amount and type of whitespace.
545 * Skips comments as well.
546 */
547
76e3520e 548STATIC char *
cea2e8a9 549S_skipspace(pTHX_ register char *s)
a687059c 550{
3280af22 551 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 552 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
553 s++;
554 return s;
555 }
556 for (;;) {
fd049845 557 STRLEN prevlen;
09bef843 558 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 559 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
560 while (s < PL_bufend && isSPACE(*s)) {
561 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
562 incline(s);
563 }
ffb4593c
NT
564
565 /* comment */
3280af22
NIS
566 if (s < PL_bufend && *s == '#') {
567 while (s < PL_bufend && *s != '\n')
463ee0b2 568 s++;
60e6418e 569 if (s < PL_bufend) {
463ee0b2 570 s++;
60e6418e
GS
571 if (PL_in_eval && !PL_rsfp) {
572 incline(s);
573 continue;
574 }
575 }
463ee0b2 576 }
ffb4593c
NT
577
578 /* only continue to recharge the buffer if we're at the end
579 * of the buffer, we're not reading from a source filter, and
580 * we're in normal lexing mode
581 */
09bef843
SB
582 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
583 PL_lex_state == LEX_FORMLINE)
463ee0b2 584 return s;
ffb4593c
NT
585
586 /* try to recharge the buffer */
9cbb5ea2
GS
587 if ((s = filter_gets(PL_linestr, PL_rsfp,
588 (prevlen = SvCUR(PL_linestr)))) == Nullch)
589 {
590 /* end of file. Add on the -p or -n magic */
3280af22
NIS
591 if (PL_minus_n || PL_minus_p) {
592 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
593 ";}continue{print or die qq(-p destination: $!\\n)" :
594 "");
3280af22
NIS
595 sv_catpv(PL_linestr,";}");
596 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
597 }
598 else
3280af22 599 sv_setpv(PL_linestr,";");
ffb4593c
NT
600
601 /* reset variables for next time we lex */
9cbb5ea2
GS
602 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
603 = SvPVX(PL_linestr);
3280af22 604 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 605 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
606
607 /* Close the filehandle. Could be from -P preprocessor,
608 * STDIN, or a regular file. If we were reading code from
609 * STDIN (because the commandline held no -e or filename)
610 * then we don't close it, we reset it so the code can
611 * read from STDIN too.
612 */
613
3280af22
NIS
614 if (PL_preprocess && !PL_in_eval)
615 (void)PerlProc_pclose(PL_rsfp);
616 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
617 PerlIO_clearerr(PL_rsfp);
8990e307 618 else
3280af22
NIS
619 (void)PerlIO_close(PL_rsfp);
620 PL_rsfp = Nullfp;
463ee0b2
LW
621 return s;
622 }
ffb4593c
NT
623
624 /* not at end of file, so we only read another line */
09bef843
SB
625 /* make corresponding updates to old pointers, for yyerror() */
626 oldprevlen = PL_oldbufptr - PL_bufend;
627 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
628 if (PL_last_uni)
629 oldunilen = PL_last_uni - PL_bufend;
630 if (PL_last_lop)
631 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
632 PL_linestart = PL_bufptr = s + prevlen;
633 PL_bufend = s + SvCUR(PL_linestr);
634 s = PL_bufptr;
09bef843
SB
635 PL_oldbufptr = s + oldprevlen;
636 PL_oldoldbufptr = s + oldoldprevlen;
637 if (PL_last_uni)
638 PL_last_uni = s + oldunilen;
639 if (PL_last_lop)
640 PL_last_lop = s + oldloplen;
a0d0e21e 641 incline(s);
ffb4593c
NT
642
643 /* debugger active and we're not compiling the debugger code,
644 * so store the line into the debugger's array of lines
645 */
3280af22 646 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
647 SV *sv = NEWSV(85,0);
648
649 sv_upgrade(sv, SVt_PVMG);
3280af22 650 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
651 (void)SvIOK_on(sv);
652 SvIVX(sv) = 0;
57843af0 653 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 654 }
463ee0b2 655 }
a687059c 656}
378cc40b 657
ffb4593c
NT
658/*
659 * S_check_uni
660 * Check the unary operators to ensure there's no ambiguity in how they're
661 * used. An ambiguous piece of code would be:
662 * rand + 5
663 * This doesn't mean rand() + 5. Because rand() is a unary operator,
664 * the +5 is its argument.
665 */
666
76e3520e 667STATIC void
cea2e8a9 668S_check_uni(pTHX)
ba106d47 669{
2f3197b3 670 char *s;
a0d0e21e 671 char *t;
2f3197b3 672
3280af22 673 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 674 return;
3280af22
NIS
675 while (isSPACE(*PL_last_uni))
676 PL_last_uni++;
7e2040f0 677 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 678 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 679 return;
0453d815 680 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 681 char ch = *s;
0453d815 682 *s = '\0';
9014280d 683 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4e553d73 684 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
685 PL_last_uni);
686 *s = ch;
687 }
2f3197b3
LW
688}
689
ffb4593c
NT
690/*
691 * LOP : macro to build a list operator. Its behaviour has been replaced
692 * with a subroutine, S_lop() for which LOP is just another name.
693 */
694
a0d0e21e
LW
695#define LOP(f,x) return lop(f,x,s)
696
ffb4593c
NT
697/*
698 * S_lop
699 * Build a list operator (or something that might be one). The rules:
700 * - if we have a next token, then it's a list operator [why?]
701 * - if the next thing is an opening paren, then it's a function
702 * - else it's a list operator
703 */
704
76e3520e 705STATIC I32
a0be28da 706S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 707{
79072805 708 yylval.ival = f;
35c8bce7 709 CLINE;
075953c3 710 REPORT("lop", f)
3280af22
NIS
711 PL_expect = x;
712 PL_bufptr = s;
713 PL_last_lop = PL_oldbufptr;
eb160463 714 PL_last_lop_op = (OPCODE)f;
3280af22 715 if (PL_nexttoke)
a0d0e21e 716 return LSTOP;
79072805
LW
717 if (*s == '(')
718 return FUNC;
719 s = skipspace(s);
720 if (*s == '(')
721 return FUNC;
722 else
723 return LSTOP;
724}
725
ffb4593c
NT
726/*
727 * S_force_next
9cbb5ea2 728 * When the lexer realizes it knows the next token (for instance,
ffb4593c 729 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
730 * to know what token to return the next time the lexer is called. Caller
731 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
732 * handles the token correctly.
ffb4593c
NT
733 */
734
4e553d73 735STATIC void
cea2e8a9 736S_force_next(pTHX_ I32 type)
79072805 737{
3280af22
NIS
738 PL_nexttype[PL_nexttoke] = type;
739 PL_nexttoke++;
740 if (PL_lex_state != LEX_KNOWNEXT) {
741 PL_lex_defer = PL_lex_state;
742 PL_lex_expect = PL_expect;
743 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
744 }
745}
746
ffb4593c
NT
747/*
748 * S_force_word
749 * When the lexer knows the next thing is a word (for instance, it has
750 * just seen -> and it knows that the next char is a word char, then
751 * it calls S_force_word to stick the next word into the PL_next lookahead.
752 *
753 * Arguments:
b1b65b59 754 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
755 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
756 * int check_keyword : if true, Perl checks to make sure the word isn't
757 * a keyword (do this if the word is a label, e.g. goto FOO)
758 * int allow_pack : if true, : characters will also be allowed (require,
759 * use, etc. do this)
9cbb5ea2 760 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
761 */
762
76e3520e 763STATIC char *
cea2e8a9 764S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 765{
463ee0b2
LW
766 register char *s;
767 STRLEN len;
4e553d73 768
463ee0b2
LW
769 start = skipspace(start);
770 s = start;
7e2040f0 771 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 772 (allow_pack && *s == ':') ||
15f0808c 773 (allow_initial_tick && *s == '\'') )
a0d0e21e 774 {
3280af22
NIS
775 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
776 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
777 return start;
778 if (token == METHOD) {
779 s = skipspace(s);
780 if (*s == '(')
3280af22 781 PL_expect = XTERM;
463ee0b2 782 else {
3280af22 783 PL_expect = XOPERATOR;
463ee0b2 784 }
79072805 785 }
3280af22
NIS
786 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
787 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
788 force_next(token);
789 }
790 return s;
791}
792
ffb4593c
NT
793/*
794 * S_force_ident
9cbb5ea2 795 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
796 * text only contains the "foo" portion. The first argument is a pointer
797 * to the "foo", and the second argument is the type symbol to prefix.
798 * Forces the next token to be a "WORD".
9cbb5ea2 799 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
800 */
801
76e3520e 802STATIC void
cea2e8a9 803S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
804{
805 if (s && *s) {
11343788 806 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 807 PL_nextval[PL_nexttoke].opval = o;
79072805 808 force_next(WORD);
748a9306 809 if (kind) {
11343788 810 o->op_private = OPpCONST_ENTERED;
55497cff 811 /* XXX see note in pp_entereval() for why we forgo typo
812 warnings if the symbol must be introduced in an eval.
813 GSAR 96-10-12 */
3280af22 814 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
815 kind == '$' ? SVt_PV :
816 kind == '@' ? SVt_PVAV :
817 kind == '%' ? SVt_PVHV :
818 SVt_PVGV
819 );
748a9306 820 }
79072805
LW
821 }
822}
823
1571675a
GS
824NV
825Perl_str_to_version(pTHX_ SV *sv)
826{
827 NV retval = 0.0;
828 NV nshift = 1.0;
829 STRLEN len;
830 char *start = SvPVx(sv,len);
3aa33fe5 831 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
832 char *end = start + len;
833 while (start < end) {
ba210ebe 834 STRLEN skip;
1571675a
GS
835 UV n;
836 if (utf)
9041c2e3 837 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
838 else {
839 n = *(U8*)start;
840 skip = 1;
841 }
842 retval += ((NV)n)/nshift;
843 start += skip;
844 nshift *= 1000;
845 }
846 return retval;
847}
848
4e553d73 849/*
ffb4593c
NT
850 * S_force_version
851 * Forces the next token to be a version number.
e759cc13
RGS
852 * If the next token appears to be an invalid version number, (e.g. "v2b"),
853 * and if "guessing" is TRUE, then no new token is created (and the caller
854 * must use an alternative parsing method).
ffb4593c
NT
855 */
856
76e3520e 857STATIC char *
e759cc13 858S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 859{
860 OP *version = Nullop;
44dcb63b 861 char *d;
89bfa8cd 862
863 s = skipspace(s);
864
44dcb63b 865 d = s;
dd629d5b 866 if (*d == 'v')
44dcb63b 867 d++;
44dcb63b 868 if (isDIGIT(*d)) {
e759cc13
RGS
869 while (isDIGIT(*d) || *d == '_' || *d == '.')
870 d++;
9f3d182e 871 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 872 SV *ver;
b73d6f50 873 s = scan_num(s, &yylval);
89bfa8cd 874 version = yylval.opval;
dd629d5b
GS
875 ver = cSVOPx(version)->op_sv;
876 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 877 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
878 SvNVX(ver) = str_to_version(ver);
879 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 880 }
89bfa8cd 881 }
e759cc13
RGS
882 else if (guessing)
883 return s;
89bfa8cd 884 }
885
886 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 887 PL_nextval[PL_nexttoke].opval = version;
4e553d73 888 force_next(WORD);
89bfa8cd 889
e759cc13 890 return s;
89bfa8cd 891}
892
ffb4593c
NT
893/*
894 * S_tokeq
895 * Tokenize a quoted string passed in as an SV. It finds the next
896 * chunk, up to end of string or a backslash. It may make a new
897 * SV containing that chunk (if HINT_NEW_STRING is on). It also
898 * turns \\ into \.
899 */
900
76e3520e 901STATIC SV *
cea2e8a9 902S_tokeq(pTHX_ SV *sv)
79072805
LW
903{
904 register char *s;
905 register char *send;
906 register char *d;
b3ac6de7
IZ
907 STRLEN len = 0;
908 SV *pv = sv;
79072805
LW
909
910 if (!SvLEN(sv))
b3ac6de7 911 goto finish;
79072805 912
a0d0e21e 913 s = SvPV_force(sv, len);
21a311ee 914 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 915 goto finish;
463ee0b2 916 send = s + len;
79072805
LW
917 while (s < send && *s != '\\')
918 s++;
919 if (s == send)
b3ac6de7 920 goto finish;
79072805 921 d = s;
be4731d2 922 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 923 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
924 if (SvUTF8(sv))
925 SvUTF8_on(pv);
926 }
79072805
LW
927 while (s < send) {
928 if (*s == '\\') {
a0d0e21e 929 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
930 s++; /* all that, just for this */
931 }
932 *d++ = *s++;
933 }
934 *d = '\0';
463ee0b2 935 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 936 finish:
3280af22 937 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 938 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
939 return sv;
940}
941
ffb4593c
NT
942/*
943 * Now come three functions related to double-quote context,
944 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
945 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
946 * interact with PL_lex_state, and create fake ( ... ) argument lists
947 * to handle functions and concatenation.
948 * They assume that whoever calls them will be setting up a fake
949 * join call, because each subthing puts a ',' after it. This lets
950 * "lower \luPpEr"
951 * become
952 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
953 *
954 * (I'm not sure whether the spurious commas at the end of lcfirst's
955 * arguments and join's arguments are created or not).
956 */
957
958/*
959 * S_sublex_start
960 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
961 *
962 * Pattern matching will set PL_lex_op to the pattern-matching op to
963 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
964 *
965 * OP_CONST and OP_READLINE are easy--just make the new op and return.
966 *
967 * Everything else becomes a FUNC.
968 *
969 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
970 * had an OP_CONST or OP_READLINE). This just sets us up for a
971 * call to S_sublex_push().
972 */
973
76e3520e 974STATIC I32
cea2e8a9 975S_sublex_start(pTHX)
79072805
LW
976{
977 register I32 op_type = yylval.ival;
79072805
LW
978
979 if (op_type == OP_NULL) {
3280af22
NIS
980 yylval.opval = PL_lex_op;
981 PL_lex_op = Nullop;
79072805
LW
982 return THING;
983 }
984 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 985 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
986
987 if (SvTYPE(sv) == SVt_PVIV) {
988 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
989 STRLEN len;
990 char *p;
991 SV *nsv;
992
993 p = SvPV(sv, len);
79cb57f6 994 nsv = newSVpvn(p, len);
01ec43d0
GS
995 if (SvUTF8(sv))
996 SvUTF8_on(nsv);
b3ac6de7
IZ
997 SvREFCNT_dec(sv);
998 sv = nsv;
4e553d73 999 }
b3ac6de7 1000 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1001 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1002 /* Allow <FH> // "foo" */
1003 if (op_type == OP_READLINE)
1004 PL_expect = XTERMORDORDOR;
79072805
LW
1005 return THING;
1006 }
1007
3280af22
NIS
1008 PL_sublex_info.super_state = PL_lex_state;
1009 PL_sublex_info.sub_inwhat = op_type;
1010 PL_sublex_info.sub_op = PL_lex_op;
1011 PL_lex_state = LEX_INTERPPUSH;
55497cff 1012
3280af22
NIS
1013 PL_expect = XTERM;
1014 if (PL_lex_op) {
1015 yylval.opval = PL_lex_op;
1016 PL_lex_op = Nullop;
55497cff 1017 return PMFUNC;
1018 }
1019 else
1020 return FUNC;
1021}
1022
ffb4593c
NT
1023/*
1024 * S_sublex_push
1025 * Create a new scope to save the lexing state. The scope will be
1026 * ended in S_sublex_done. Returns a '(', starting the function arguments
1027 * to the uc, lc, etc. found before.
1028 * Sets PL_lex_state to LEX_INTERPCONCAT.
1029 */
1030
76e3520e 1031STATIC I32
cea2e8a9 1032S_sublex_push(pTHX)
55497cff 1033{
f46d017c 1034 ENTER;
55497cff 1035
3280af22
NIS
1036 PL_lex_state = PL_sublex_info.super_state;
1037 SAVEI32(PL_lex_dojoin);
1038 SAVEI32(PL_lex_brackets);
3280af22
NIS
1039 SAVEI32(PL_lex_casemods);
1040 SAVEI32(PL_lex_starts);
1041 SAVEI32(PL_lex_state);
7766f137 1042 SAVEVPTR(PL_lex_inpat);
3280af22 1043 SAVEI32(PL_lex_inwhat);
57843af0 1044 SAVECOPLINE(PL_curcop);
3280af22 1045 SAVEPPTR(PL_bufptr);
8452ff4b 1046 SAVEPPTR(PL_bufend);
3280af22
NIS
1047 SAVEPPTR(PL_oldbufptr);
1048 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1049 SAVEPPTR(PL_last_lop);
1050 SAVEPPTR(PL_last_uni);
3280af22
NIS
1051 SAVEPPTR(PL_linestart);
1052 SAVESPTR(PL_linestr);
8edd5f42
RGS
1053 SAVEGENERICPV(PL_lex_brackstack);
1054 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1055
1056 PL_linestr = PL_lex_stuff;
1057 PL_lex_stuff = Nullsv;
1058
9cbb5ea2
GS
1059 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1060 = SvPVX(PL_linestr);
3280af22 1061 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1062 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1063 SAVEFREESV(PL_linestr);
1064
1065 PL_lex_dojoin = FALSE;
1066 PL_lex_brackets = 0;
3280af22
NIS
1067 New(899, PL_lex_brackstack, 120, char);
1068 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
1069 PL_lex_casemods = 0;
1070 *PL_lex_casestack = '\0';
1071 PL_lex_starts = 0;
1072 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1073 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1074
1075 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1076 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1077 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1078 else
3280af22 1079 PL_lex_inpat = Nullop;
79072805 1080
55497cff 1081 return '(';
79072805
LW
1082}
1083
ffb4593c
NT
1084/*
1085 * S_sublex_done
1086 * Restores lexer state after a S_sublex_push.
1087 */
1088
76e3520e 1089STATIC I32
cea2e8a9 1090S_sublex_done(pTHX)
79072805 1091{
3280af22 1092 if (!PL_lex_starts++) {
9aa983d2
JH
1093 SV *sv = newSVpvn("",0);
1094 if (SvUTF8(PL_linestr))
1095 SvUTF8_on(sv);
3280af22 1096 PL_expect = XOPERATOR;
9aa983d2 1097 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1098 return THING;
1099 }
1100
3280af22
NIS
1101 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1102 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1103 return yylex();
79072805
LW
1104 }
1105
ffb4593c 1106 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1107 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1108 PL_linestr = PL_lex_repl;
1109 PL_lex_inpat = 0;
1110 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1111 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1112 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1113 SAVEFREESV(PL_linestr);
1114 PL_lex_dojoin = FALSE;
1115 PL_lex_brackets = 0;
3280af22
NIS
1116 PL_lex_casemods = 0;
1117 *PL_lex_casestack = '\0';
1118 PL_lex_starts = 0;
25da4f38 1119 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1120 PL_lex_state = LEX_INTERPNORMAL;
1121 PL_lex_starts++;
e9fa98b2
HS
1122 /* we don't clear PL_lex_repl here, so that we can check later
1123 whether this is an evalled subst; that means we rely on the
1124 logic to ensure sublex_done() is called again only via the
1125 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1126 }
e9fa98b2 1127 else {
3280af22 1128 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1129 PL_lex_repl = Nullsv;
1130 }
79072805 1131 return ',';
ffed7fef
LW
1132 }
1133 else {
f46d017c 1134 LEAVE;
3280af22
NIS
1135 PL_bufend = SvPVX(PL_linestr);
1136 PL_bufend += SvCUR(PL_linestr);
1137 PL_expect = XOPERATOR;
09bef843 1138 PL_sublex_info.sub_inwhat = 0;
79072805 1139 return ')';
ffed7fef
LW
1140 }
1141}
1142
02aa26ce
NT
1143/*
1144 scan_const
1145
1146 Extracts a pattern, double-quoted string, or transliteration. This
1147 is terrifying code.
1148
3280af22
NIS
1149 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1150 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1151 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1152
9b599b2a
GS
1153 Returns a pointer to the character scanned up to. Iff this is
1154 advanced from the start pointer supplied (ie if anything was
1155 successfully parsed), will leave an OP for the substring scanned
1156 in yylval. Caller must intuit reason for not parsing further
1157 by looking at the next characters herself.
1158
02aa26ce
NT
1159 In patterns:
1160 backslashes:
1161 double-quoted style: \r and \n
1162 regexp special ones: \D \s
1163 constants: \x3
1164 backrefs: \1 (deprecated in substitution replacements)
1165 case and quoting: \U \Q \E
1166 stops on @ and $, but not for $ as tail anchor
1167
1168 In transliterations:
1169 characters are VERY literal, except for - not at the start or end
1170 of the string, which indicates a range. scan_const expands the
1171 range to the full set of intermediate characters.
1172
1173 In double-quoted strings:
1174 backslashes:
1175 double-quoted style: \r and \n
1176 constants: \x3
1177 backrefs: \1 (deprecated)
1178 case and quoting: \U \Q \E
1179 stops on @ and $
1180
1181 scan_const does *not* construct ops to handle interpolated strings.
1182 It stops processing as soon as it finds an embedded $ or @ variable
1183 and leaves it to the caller to work out what's going on.
1184
da6eedaa 1185 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1186
1187 $ in pattern could be $foo or could be tail anchor. Assumption:
1188 it's a tail anchor if $ is the last thing in the string, or if it's
1189 followed by one of ")| \n\t"
1190
1191 \1 (backreferences) are turned into $1
1192
1193 The structure of the code is
1194 while (there's a character to process) {
1195 handle transliteration ranges
1196 skip regexp comments
1197 skip # initiated comments in //x patterns
1198 check for embedded @foo
1199 check for embedded scalars
1200 if (backslash) {
1201 leave intact backslashes from leave (below)
1202 deprecate \1 in strings and sub replacements
1203 handle string-changing backslashes \l \U \Q \E, etc.
1204 switch (what was escaped) {
1205 handle - in a transliteration (becomes a literal -)
1206 handle \132 octal characters
1207 handle 0x15 hex characters
1208 handle \cV (control V)
1209 handle printf backslashes (\f, \r, \n, etc)
1210 } (end switch)
1211 } (end if backslash)
1212 } (end while character to read)
4e553d73 1213
02aa26ce
NT
1214*/
1215
76e3520e 1216STATIC char *
cea2e8a9 1217S_scan_const(pTHX_ char *start)
79072805 1218{
3280af22 1219 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1220 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1221 register char *s = start; /* start of the constant */
1222 register char *d = SvPVX(sv); /* destination for copies */
1223 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1224 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1225 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1226 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1227 UV uv;
1228
dff6d3cd 1229 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1230 PL_lex_inpat
4a2d328f 1231 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1232 : "";
79072805 1233
2b9d42f0
NIS
1234 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1235 /* If we are doing a trans and we know we want UTF8 set expectation */
1236 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1237 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1238 }
1239
1240
79072805 1241 while (s < send || dorange) {
02aa26ce 1242 /* get transliterations out of the way (they're most literal) */
3280af22 1243 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1244 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1245 if (dorange) {
1ba5c669
JH
1246 I32 i; /* current expanded character */
1247 I32 min; /* first character in range */
1248 I32 max; /* last character in range */
02aa26ce 1249
2b9d42f0 1250 if (has_utf8) {
8973db79
JH
1251 char *c = (char*)utf8_hop((U8*)d, -1);
1252 char *e = d++;
1253 while (e-- > c)
1254 *(e + 1) = *e;
25716404 1255 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1256 /* mark the range as done, and continue */
1257 dorange = FALSE;
1258 didrange = TRUE;
1259 continue;
1260 }
2b9d42f0 1261
02aa26ce 1262 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1263 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1264 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1265 d -= 2; /* eat the first char and the - */
1266
8ada0baa
JH
1267 min = (U8)*d; /* first char in range */
1268 max = (U8)d[1]; /* last char in range */
1269
c2e66d9e 1270 if (min > max) {
01ec43d0 1271 Perl_croak(aTHX_
d1573ac7 1272 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1273 (char)min, (char)max);
c2e66d9e
GS
1274 }
1275
c7f1f016 1276#ifdef EBCDIC
8ada0baa
JH
1277 if ((isLOWER(min) && isLOWER(max)) ||
1278 (isUPPER(min) && isUPPER(max))) {
1279 if (isLOWER(min)) {
1280 for (i = min; i <= max; i++)
1281 if (isLOWER(i))
db42d148 1282 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1283 } else {
1284 for (i = min; i <= max; i++)
1285 if (isUPPER(i))
db42d148 1286 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1287 }
1288 }
1289 else
1290#endif
1291 for (i = min; i <= max; i++)
eb160463 1292 *d++ = (char)i;
02aa26ce
NT
1293
1294 /* mark the range as done, and continue */
79072805 1295 dorange = FALSE;
01ec43d0 1296 didrange = TRUE;
79072805 1297 continue;
4e553d73 1298 }
02aa26ce
NT
1299
1300 /* range begins (ignore - as first or last char) */
79072805 1301 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1302 if (didrange) {
1fafa243 1303 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1304 }
2b9d42f0 1305 if (has_utf8) {
25716404 1306 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1307 s++;
1308 continue;
1309 }
79072805
LW
1310 dorange = TRUE;
1311 s++;
01ec43d0
GS
1312 }
1313 else {
1314 didrange = FALSE;
1315 }
79072805 1316 }
02aa26ce
NT
1317
1318 /* if we get here, we're not doing a transliteration */
1319
0f5d15d6
IZ
1320 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1321 except for the last char, which will be done separately. */
3280af22 1322 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1323 if (s[2] == '#') {
1324 while (s < send && *s != ')')
db42d148 1325 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1326 }
1327 else if (s[2] == '{' /* This should match regcomp.c */
1328 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1329 {
cc6b7395 1330 I32 count = 1;
0f5d15d6 1331 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1332 char c;
1333
d9f97599
GS
1334 while (count && (c = *regparse)) {
1335 if (c == '\\' && regparse[1])
1336 regparse++;
4e553d73 1337 else if (c == '{')
cc6b7395 1338 count++;
4e553d73 1339 else if (c == '}')
cc6b7395 1340 count--;
d9f97599 1341 regparse++;
cc6b7395 1342 }
5bdf89e7
IZ
1343 if (*regparse != ')') {
1344 regparse--; /* Leave one char for continuation. */
cc6b7395 1345 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1346 }
0f5d15d6 1347 while (s < regparse)
db42d148 1348 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1349 }
748a9306 1350 }
02aa26ce
NT
1351
1352 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1353 else if (*s == '#' && PL_lex_inpat &&
1354 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1355 while (s+1 < send && *s != '\n')
db42d148 1356 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1357 }
02aa26ce 1358
5d1d4326 1359 /* check for embedded arrays
da6eedaa 1360 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1361 */
7e2040f0 1362 else if (*s == '@' && s[1]
5d1d4326 1363 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1364 break;
02aa26ce
NT
1365
1366 /* check for embedded scalars. only stop if we're sure it's a
1367 variable.
1368 */
79072805 1369 else if (*s == '$') {
3280af22 1370 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1371 break;
6002328a 1372 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1373 break; /* in regexp, $ might be tail anchor */
1374 }
02aa26ce 1375
2b9d42f0
NIS
1376 /* End of else if chain - OP_TRANS rejoin rest */
1377
02aa26ce 1378 /* backslashes */
79072805
LW
1379 if (*s == '\\' && s+1 < send) {
1380 s++;
02aa26ce
NT
1381
1382 /* some backslashes we leave behind */
c9f97d15 1383 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1384 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1385 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1386 continue;
1387 }
02aa26ce
NT
1388
1389 /* deprecate \1 in strings and substitution replacements */
3280af22 1390 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1391 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1392 {
599cee73 1393 if (ckWARN(WARN_SYNTAX))
9014280d 1394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1395 *--s = '$';
1396 break;
1397 }
02aa26ce
NT
1398
1399 /* string-change backslash escapes */
3280af22 1400 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1401 --s;
1402 break;
1403 }
02aa26ce
NT
1404
1405 /* if we get here, it's either a quoted -, or a digit */
79072805 1406 switch (*s) {
02aa26ce
NT
1407
1408 /* quoted - in transliterations */
79072805 1409 case '-':
3280af22 1410 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1411 *d++ = *s++;
1412 continue;
1413 }
1414 /* FALL THROUGH */
1415 default:
11b8faa4 1416 {
707afd92
MS
1417 if (ckWARN(WARN_MISC) &&
1418 isALNUM(*s) &&
1419 *s != '_')
9014280d 1420 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1421 "Unrecognized escape \\%c passed through",
1422 *s);
1423 /* default action is to copy the quoted character */
f9a63242 1424 goto default_action;
11b8faa4 1425 }
02aa26ce
NT
1426
1427 /* \132 indicates an octal constant */
79072805
LW
1428 case '0': case '1': case '2': case '3':
1429 case '4': case '5': case '6': case '7':
ba210ebe 1430 {
53305cf1
NC
1431 I32 flags = 0;
1432 STRLEN len = 3;
1433 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1434 s += len;
1435 }
012bcf8d 1436 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1437
1438 /* \x24 indicates a hex constant */
79072805 1439 case 'x':
a0ed51b3
LW
1440 ++s;
1441 if (*s == '{') {
1442 char* e = strchr(s, '}');
a4c04bdc
NC
1443 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1444 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1445 STRLEN len;
355860ce 1446
53305cf1 1447 ++s;
adaeee49 1448 if (!e) {
a0ed51b3 1449 yyerror("Missing right brace on \\x{}");
355860ce 1450 continue;
ba210ebe 1451 }
53305cf1
NC
1452 len = e - s;
1453 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1454 s = e + 1;
a0ed51b3
LW
1455 }
1456 else {
ba210ebe 1457 {
53305cf1 1458 STRLEN len = 2;
a4c04bdc 1459 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1460 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1461 s += len;
1462 }
012bcf8d
GS
1463 }
1464
1465 NUM_ESCAPE_INSERT:
1466 /* Insert oct or hex escaped character.
301d3d20 1467 * There will always enough room in sv since such
db42d148 1468 * escapes will be longer than any UTF-8 sequence
301d3d20 1469 * they can end up as. */
ba7cea30 1470
c7f1f016
NIS
1471 /* We need to map to chars to ASCII before doing the tests
1472 to cover EBCDIC
1473 */
c4d5f83a 1474 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1475 if (!has_utf8 && uv > 255) {
301d3d20
JH
1476 /* Might need to recode whatever we have
1477 * accumulated so far if it contains any
1478 * hibit chars.
1479 *
1480 * (Can't we keep track of that and avoid
1481 * this rescan? --jhi)
012bcf8d 1482 */
c7f1f016 1483 int hicount = 0;
63cd0674
NIS
1484 U8 *c;
1485 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1486 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1487 hicount++;
db42d148 1488 }
012bcf8d 1489 }
63cd0674 1490 if (hicount) {
db42d148
NIS
1491 STRLEN offset = d - SvPVX(sv);
1492 U8 *src, *dst;
1493 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1494 src = (U8 *)d - 1;
1495 dst = src+hicount;
1496 d += hicount;
1497 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1498 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1499 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1500 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1501 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1502 }
1503 else {
63cd0674 1504 *dst-- = *src;
012bcf8d 1505 }
c7f1f016 1506 src--;
012bcf8d
GS
1507 }
1508 }
1509 }
1510
9aa983d2 1511 if (has_utf8 || uv > 255) {
9041c2e3 1512 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1513 has_utf8 = TRUE;
f9a63242
JH
1514 if (PL_lex_inwhat == OP_TRANS &&
1515 PL_sublex_info.sub_op) {
1516 PL_sublex_info.sub_op->op_private |=
1517 (PL_lex_repl ? OPpTRANS_FROM_UTF
1518 : OPpTRANS_TO_UTF);
f9a63242 1519 }
012bcf8d 1520 }
a0ed51b3 1521 else {
012bcf8d 1522 *d++ = (char)uv;
a0ed51b3 1523 }
012bcf8d
GS
1524 }
1525 else {
c4d5f83a 1526 *d++ = (char) uv;
a0ed51b3 1527 }
79072805 1528 continue;
02aa26ce 1529
b239daa5 1530 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1531 case 'N':
55eda711 1532 ++s;
423cee85
JH
1533 if (*s == '{') {
1534 char* e = strchr(s, '}');
155aba94 1535 SV *res;
423cee85
JH
1536 STRLEN len;
1537 char *str;
4e553d73 1538
423cee85 1539 if (!e) {
5777a3f7 1540 yyerror("Missing right brace on \\N{}");
423cee85
JH
1541 e = s - 1;
1542 goto cont_scan;
1543 }
dbc0d4f2
JH
1544 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1545 /* \N{U+...} */
1546 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1547 PERL_SCAN_DISALLOW_PREFIX;
1548 s += 3;
1549 len = e - s;
1550 uv = grok_hex(s, &len, &flags, NULL);
1551 s = e + 1;
1552 goto NUM_ESCAPE_INSERT;
1553 }
55eda711
JH
1554 res = newSVpvn(s + 1, e - s - 1);
1555 res = new_constant( Nullch, 0, "charnames",
1556 res, Nullsv, "\\N{...}" );
f9a63242
JH
1557 if (has_utf8)
1558 sv_utf8_upgrade(res);
423cee85 1559 str = SvPV(res,len);
1c47067b
JH
1560#ifdef EBCDIC_NEVER_MIND
1561 /* charnames uses pack U and that has been
1562 * recently changed to do the below uni->native
1563 * mapping, so this would be redundant (and wrong,
1564 * the code point would be doubly converted).
1565 * But leave this in just in case the pack U change
1566 * gets revoked, but the semantics is still
1567 * desireable for charnames. --jhi */
cddc7ef4
JH
1568 {
1569 UV uv = utf8_to_uvchr((U8*)str, 0);
1570
1571 if (uv < 0x100) {
1572 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1573
1574 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1575 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1576 str = SvPV(res, len);
1577 }
1578 }
1579#endif
89491803 1580 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1581 char *ostart = SvPVX(sv);
1582 SvCUR_set(sv, d - ostart);
1583 SvPOK_on(sv);
e4f3eed8 1584 *d = '\0';
f08d6ad9 1585 sv_utf8_upgrade(sv);
d2f449dd 1586 /* this just broke our allocation above... */
eb160463 1587 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1588 d = SvPVX(sv) + SvCUR(sv);
89491803 1589 has_utf8 = TRUE;
f08d6ad9 1590 }
eb160463 1591 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1592 char *odest = SvPVX(sv);
1593
8973db79 1594 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1595 d = SvPVX(sv) + (d - odest);
1596 }
1597 Copy(str, d, len, char);
1598 d += len;
1599 SvREFCNT_dec(res);
1600 cont_scan:
1601 s = e + 1;
1602 }
1603 else
5777a3f7 1604 yyerror("Missing braces on \\N{}");
423cee85
JH
1605 continue;
1606
02aa26ce 1607 /* \c is a control character */
79072805
LW
1608 case 'c':
1609 s++;
961ce445 1610 if (s < send) {
ba210ebe 1611 U8 c = *s++;
c7f1f016
NIS
1612#ifdef EBCDIC
1613 if (isLOWER(c))
1614 c = toUPPER(c);
1615#endif
db42d148 1616 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1617 }
961ce445
RGS
1618 else {
1619 yyerror("Missing control char name in \\c");
1620 }
79072805 1621 continue;
02aa26ce
NT
1622
1623 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1624 case 'b':
db42d148 1625 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1626 break;
1627 case 'n':
db42d148 1628 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1629 break;
1630 case 'r':
db42d148 1631 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1632 break;
1633 case 'f':
db42d148 1634 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1635 break;
1636 case 't':
db42d148 1637 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1638 break;
34a3fe2a 1639 case 'e':
db42d148 1640 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1641 break;
1642 case 'a':
db42d148 1643 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1644 break;
02aa26ce
NT
1645 } /* end switch */
1646
79072805
LW
1647 s++;
1648 continue;
02aa26ce
NT
1649 } /* end if (backslash) */
1650
f9a63242 1651 default_action:
2b9d42f0
NIS
1652 /* If we started with encoded form, or already know we want it
1653 and then encode the next character */
1654 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1655 STRLEN len = 1;
1656 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1657 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1658 s += len;
1659 if (need > len) {
1660 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1661 STRLEN off = d - SvPVX(sv);
1662 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1663 }
1664 d = (char*)uvchr_to_utf8((U8*)d, uv);
1665 has_utf8 = TRUE;
1666 }
1667 else {
1668 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1669 }
02aa26ce
NT
1670 } /* while loop to process each character */
1671
1672 /* terminate the string and set up the sv */
79072805 1673 *d = '\0';
463ee0b2 1674 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1675 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1676 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1677
79072805 1678 SvPOK_on(sv);
9f4817db 1679 if (PL_encoding && !has_utf8) {
d0063567
DK
1680 sv_recode_to_utf8(sv, PL_encoding);
1681 if (SvUTF8(sv))
1682 has_utf8 = TRUE;
9f4817db 1683 }
2b9d42f0 1684 if (has_utf8) {
7e2040f0 1685 SvUTF8_on(sv);
2b9d42f0 1686 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1687 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1688 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1689 }
1690 }
79072805 1691
02aa26ce 1692 /* shrink the sv if we allocated more than we used */
79072805
LW
1693 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1694 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1695 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1696 }
02aa26ce 1697
9b599b2a 1698 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1699 if (s > PL_bufptr) {
1700 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1701 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1702 sv, Nullsv,
4e553d73 1703 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1704 ? "tr"
3280af22 1705 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1706 ? "s"
1707 : "qq")));
79072805 1708 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1709 } else
8990e307 1710 SvREFCNT_dec(sv);
79072805
LW
1711 return s;
1712}
1713
ffb4593c
NT
1714/* S_intuit_more
1715 * Returns TRUE if there's more to the expression (e.g., a subscript),
1716 * FALSE otherwise.
ffb4593c
NT
1717 *
1718 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1719 *
1720 * ->[ and ->{ return TRUE
1721 * { and [ outside a pattern are always subscripts, so return TRUE
1722 * if we're outside a pattern and it's not { or [, then return FALSE
1723 * if we're in a pattern and the first char is a {
1724 * {4,5} (any digits around the comma) returns FALSE
1725 * if we're in a pattern and the first char is a [
1726 * [] returns FALSE
1727 * [SOMETHING] has a funky algorithm to decide whether it's a
1728 * character class or not. It has to deal with things like
1729 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1730 * anything else returns TRUE
1731 */
1732
9cbb5ea2
GS
1733/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1734
76e3520e 1735STATIC int
cea2e8a9 1736S_intuit_more(pTHX_ register char *s)
79072805 1737{
3280af22 1738 if (PL_lex_brackets)
79072805
LW
1739 return TRUE;
1740 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1741 return TRUE;
1742 if (*s != '{' && *s != '[')
1743 return FALSE;
3280af22 1744 if (!PL_lex_inpat)
79072805
LW
1745 return TRUE;
1746
1747 /* In a pattern, so maybe we have {n,m}. */
1748 if (*s == '{') {
1749 s++;
1750 if (!isDIGIT(*s))
1751 return TRUE;
1752 while (isDIGIT(*s))
1753 s++;
1754 if (*s == ',')
1755 s++;
1756 while (isDIGIT(*s))
1757 s++;
1758 if (*s == '}')
1759 return FALSE;
1760 return TRUE;
1761
1762 }
1763
1764 /* On the other hand, maybe we have a character class */
1765
1766 s++;
1767 if (*s == ']' || *s == '^')
1768 return FALSE;
1769 else {
ffb4593c 1770 /* this is terrifying, and it works */
79072805
LW
1771 int weight = 2; /* let's weigh the evidence */
1772 char seen[256];
f27ffc4a 1773 unsigned char un_char = 255, last_un_char;
93a17b20 1774 char *send = strchr(s,']');
3280af22 1775 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1776
1777 if (!send) /* has to be an expression */
1778 return TRUE;
1779
1780 Zero(seen,256,char);
1781 if (*s == '$')
1782 weight -= 3;
1783 else if (isDIGIT(*s)) {
1784 if (s[1] != ']') {
1785 if (isDIGIT(s[1]) && s[2] == ']')
1786 weight -= 10;
1787 }
1788 else
1789 weight -= 100;
1790 }
1791 for (; s < send; s++) {
1792 last_un_char = un_char;
1793 un_char = (unsigned char)*s;
1794 switch (*s) {
1795 case '@':
1796 case '&':
1797 case '$':
1798 weight -= seen[un_char] * 10;
7e2040f0 1799 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1800 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1801 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1802 weight -= 100;
1803 else
1804 weight -= 10;
1805 }
1806 else if (*s == '$' && s[1] &&
93a17b20
LW
1807 strchr("[#!%*<>()-=",s[1])) {
1808 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1809 weight -= 10;
1810 else
1811 weight -= 1;
1812 }
1813 break;
1814 case '\\':
1815 un_char = 254;
1816 if (s[1]) {
93a17b20 1817 if (strchr("wds]",s[1]))
79072805
LW
1818 weight += 100;
1819 else if (seen['\''] || seen['"'])
1820 weight += 1;
93a17b20 1821 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1822 weight += 40;
1823 else if (isDIGIT(s[1])) {
1824 weight += 40;
1825 while (s[1] && isDIGIT(s[1]))
1826 s++;
1827 }
1828 }
1829 else
1830 weight += 100;
1831 break;
1832 case '-':
1833 if (s[1] == '\\')
1834 weight += 50;
93a17b20 1835 if (strchr("aA01! ",last_un_char))
79072805 1836 weight += 30;
93a17b20 1837 if (strchr("zZ79~",s[1]))
79072805 1838 weight += 30;
f27ffc4a
GS
1839 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1840 weight -= 5; /* cope with negative subscript */
79072805
LW
1841 break;
1842 default:
93a17b20 1843 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1844 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1845 char *d = tmpbuf;
1846 while (isALPHA(*s))
1847 *d++ = *s++;
1848 *d = '\0';
1849 if (keyword(tmpbuf, d - tmpbuf))
1850 weight -= 150;
1851 }
1852 if (un_char == last_un_char + 1)
1853 weight += 5;
1854 weight -= seen[un_char];
1855 break;
1856 }
1857 seen[un_char]++;
1858 }
1859 if (weight >= 0) /* probably a character class */
1860 return FALSE;
1861 }
1862
1863 return TRUE;
1864}
ffed7fef 1865
ffb4593c
NT
1866/*
1867 * S_intuit_method
1868 *
1869 * Does all the checking to disambiguate
1870 * foo bar
1871 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1872 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1873 *
1874 * First argument is the stuff after the first token, e.g. "bar".
1875 *
1876 * Not a method if bar is a filehandle.
1877 * Not a method if foo is a subroutine prototyped to take a filehandle.
1878 * Not a method if it's really "Foo $bar"
1879 * Method if it's "foo $bar"
1880 * Not a method if it's really "print foo $bar"
1881 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 1882 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 1883 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1884 * =>
1885 */
1886
76e3520e 1887STATIC int
cea2e8a9 1888S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1889{
1890 char *s = start + (*start == '$');
3280af22 1891 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1892 STRLEN len;
1893 GV* indirgv;
1894
1895 if (gv) {
b6c543e3 1896 CV *cv;
a0d0e21e
LW
1897 if (GvIO(gv))
1898 return 0;
b6c543e3
IZ
1899 if ((cv = GvCVu(gv))) {
1900 char *proto = SvPVX(cv);
1901 if (proto) {
1902 if (*proto == ';')
1903 proto++;
1904 if (*proto == '*')
1905 return 0;
1906 }
1907 } else
a0d0e21e
LW
1908 gv = 0;
1909 }
8903cb82 1910 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1911 /* start is the beginning of the possible filehandle/object,
1912 * and s is the end of it
1913 * tmpbuf is a copy of it
1914 */
1915
a0d0e21e 1916 if (*start == '$') {
3280af22 1917 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1918 return 0;
1919 s = skipspace(s);
3280af22
NIS
1920 PL_bufptr = start;
1921 PL_expect = XREF;
a0d0e21e
LW
1922 return *s == '(' ? FUNCMETH : METHOD;
1923 }
1924 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1925 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1926 len -= 2;
1927 tmpbuf[len] = '\0';
1928 goto bare_package;
1929 }
1930 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1931 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1932 return 0;
1933 /* filehandle or package name makes it a method */
89bfa8cd 1934 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1935 s = skipspace(s);
3280af22 1936 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1937 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1938 bare_package:
3280af22 1939 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1940 newSVpvn(tmpbuf,len));
3280af22
NIS
1941 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1942 PL_expect = XTERM;
a0d0e21e 1943 force_next(WORD);
3280af22 1944 PL_bufptr = s;
a0d0e21e
LW
1945 return *s == '(' ? FUNCMETH : METHOD;
1946 }
1947 }
1948 return 0;
1949}
1950
ffb4593c
NT
1951/*
1952 * S_incl_perldb
1953 * Return a string of Perl code to load the debugger. If PERL5DB
1954 * is set, it will return the contents of that, otherwise a
1955 * compile-time require of perl5db.pl.
1956 */
1957
76e3520e 1958STATIC char*
cea2e8a9 1959S_incl_perldb(pTHX)
a0d0e21e 1960{
3280af22 1961 if (PL_perldb) {
76e3520e 1962 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1963
1964 if (pdb)
1965 return pdb;
93189314 1966 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
1967 return "BEGIN { require 'perl5db.pl' }";
1968 }
1969 return "";
1970}
1971
1972
16d20bd9 1973/* Encoded script support. filter_add() effectively inserts a
4e553d73 1974 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1975 * Note that the filter function only applies to the current source file
1976 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1977 *
1978 * The datasv parameter (which may be NULL) can be used to pass
1979 * private data to this instance of the filter. The filter function
1980 * can recover the SV using the FILTER_DATA macro and use it to
1981 * store private buffers and state information.
1982 *
1983 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1984 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1985 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1986 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1987 * private use must be set using malloc'd pointers.
1988 */
16d20bd9
AD
1989
1990SV *
864dbfa3 1991Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1992{
f4c556ac
GS
1993 if (!funcp)
1994 return Nullsv;
1995
3280af22
NIS
1996 if (!PL_rsfp_filters)
1997 PL_rsfp_filters = newAV();
16d20bd9 1998 if (!datasv)
8c52afec 1999 datasv = NEWSV(255,0);
16d20bd9 2000 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2001 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2002 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2003 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2004 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2005 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2006 av_unshift(PL_rsfp_filters, 1);
2007 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2008 return(datasv);
2009}
4e553d73 2010
16d20bd9
AD
2011
2012/* Delete most recently added instance of this filter function. */
a0d0e21e 2013void
864dbfa3 2014Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2015{
e0c19803 2016 SV *datasv;
fe5a182c 2017 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2018 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2019 return;
2020 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2021 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2022 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2023 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2024 IoANY(datasv) = (void *)NULL;
3280af22 2025 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2026
16d20bd9
AD
2027 return;
2028 }
2029 /* we need to search for the correct entry and clear it */
cea2e8a9 2030 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2031}
2032
2033
2034/* Invoke the n'th filter function for the current rsfp. */
2035I32
864dbfa3 2036Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
2037
2038
8ac85365 2039 /* 0 = read one text line */
a0d0e21e 2040{
16d20bd9
AD
2041 filter_t funcp;
2042 SV *datasv = NULL;
e50aee73 2043
3280af22 2044 if (!PL_rsfp_filters)
16d20bd9 2045 return -1;
3280af22 2046 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2047 /* Provide a default input filter to make life easy. */
2048 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2049 DEBUG_P(PerlIO_printf(Perl_debug_log,
2050 "filter_read %d: from rsfp\n", idx));
4e553d73 2051 if (maxlen) {
16d20bd9
AD
2052 /* Want a block */
2053 int len ;
2054 int old_len = SvCUR(buf_sv) ;
2055
2056 /* ensure buf_sv is large enough */
eb160463 2057 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2058 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2059 if (PerlIO_error(PL_rsfp))
37120919
AD
2060 return -1; /* error */
2061 else
2062 return 0 ; /* end of file */
2063 }
16d20bd9
AD
2064 SvCUR_set(buf_sv, old_len + len) ;
2065 } else {
2066 /* Want a line */
3280af22
NIS
2067 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2068 if (PerlIO_error(PL_rsfp))
37120919
AD
2069 return -1; /* error */
2070 else
2071 return 0 ; /* end of file */
2072 }
16d20bd9
AD
2073 }
2074 return SvCUR(buf_sv);
2075 }
2076 /* Skip this filter slot if filter has been deleted */
3280af22 2077 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2078 DEBUG_P(PerlIO_printf(Perl_debug_log,
2079 "filter_read %d: skipped (filter deleted)\n",
2080 idx));
16d20bd9
AD
2081 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2082 }
2083 /* Get function pointer hidden within datasv */
4755096e 2084 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2085 DEBUG_P(PerlIO_printf(Perl_debug_log,
2086 "filter_read %d: via function %p (%s)\n",
fe5a182c 2087 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2088 /* Call function. The function is expected to */
2089 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2090 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2091 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2092}
2093
76e3520e 2094STATIC char *
cea2e8a9 2095S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2096{
c39cd008 2097#ifdef PERL_CR_FILTER
3280af22 2098 if (!PL_rsfp_filters) {
c39cd008 2099 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2100 }
2101#endif
3280af22 2102 if (PL_rsfp_filters) {
16d20bd9 2103
55497cff 2104 if (!append)
2105 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2106 if (FILTER_READ(0, sv, 0) > 0)
2107 return ( SvPVX(sv) ) ;
2108 else
2109 return Nullch ;
2110 }
9d116dd7 2111 else
fd049845 2112 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2113}
2114
01ec43d0
GS
2115STATIC HV *
2116S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2117{
2118 GV *gv;
2119
01ec43d0 2120 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2121 return PL_curstash;
2122
2123 if (len > 2 &&
2124 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2125 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2126 {
2127 return GvHV(gv); /* Foo:: */
def3634b
GS
2128 }
2129
2130 /* use constant CLASS => 'MyClass' */
2131 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2132 SV *sv;
2133 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2134 pkgname = SvPV_nolen(sv);
2135 }
2136 }
2137
2138 return gv_stashpv(pkgname, FALSE);
2139}
a0d0e21e 2140
748a9306
LW
2141#ifdef DEBUGGING
2142 static char* exp_name[] =
09bef843
SB
2143 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2144 "ATTRTERM", "TERMBLOCK"
2145 };
748a9306 2146#endif
463ee0b2 2147
02aa26ce
NT
2148/*
2149 yylex
2150
2151 Works out what to call the token just pulled out of the input
2152 stream. The yacc parser takes care of taking the ops we return and
2153 stitching them into a tree.
2154
2155 Returns:
2156 PRIVATEREF
2157
2158 Structure:
2159 if read an identifier
2160 if we're in a my declaration
2161 croak if they tried to say my($foo::bar)
2162 build the ops for a my() declaration
2163 if it's an access to a my() variable
2164 are we in a sort block?
2165 croak if my($a); $a <=> $b
2166 build ops for access to a my() variable
2167 if in a dq string, and they've said @foo and we can't find @foo
2168 croak
2169 build ops for a bareword
2170 if we already built the token before, use it.
2171*/
2172
dba4d153 2173#ifdef USE_PURE_BISON
864dbfa3 2174int
dba4d153 2175Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2176{
20141f0e
IRC
2177 int r;
2178
6f202aea 2179 yyactlevel++;
20141f0e
IRC
2180 yylval_pointer[yyactlevel] = lvalp;
2181 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
IRC
2182 if (yyactlevel >= YYMAXLEVEL)
2183 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2184
dba4d153 2185 r = Perl_yylex(aTHX);
20141f0e 2186
d8ae6756
IRC
2187 if (yyactlevel > 0)
2188 yyactlevel--;
20141f0e
IRC
2189
2190 return r;
2191}
dba4d153 2192#endif
20141f0e 2193
dba4d153
JH
2194#ifdef __SC__
2195#pragma segment Perl_yylex
2196#endif
dba4d153 2197int
dba4d153 2198Perl_yylex(pTHX)
20141f0e 2199{
79072805 2200 register char *s;
378cc40b 2201 register char *d;
79072805 2202 register I32 tmp;
463ee0b2 2203 STRLEN len;
161b471a
NIS
2204 GV *gv = Nullgv;
2205 GV **gvp = 0;
aa7440fb 2206 bool bof = FALSE;
1d239bbb 2207 I32 orig_keyword = 0;
a687059c 2208
02aa26ce 2209 /* check if there's an identifier for us to look at */
ba979b31 2210 if (PL_pending_ident)
e930465f 2211 return S_pending_ident(aTHX);
bbce6d69 2212
02aa26ce
NT
2213 /* no identifier pending identification */
2214
3280af22 2215 switch (PL_lex_state) {
79072805
LW
2216#ifdef COMMENTARY
2217 case LEX_NORMAL: /* Some compilers will produce faster */
2218 case LEX_INTERPNORMAL: /* code if we comment these out. */
2219 break;
2220#endif
2221
09bef843 2222 /* when we've already built the next token, just pull it out of the queue */
79072805 2223 case LEX_KNOWNEXT:
3280af22
NIS
2224 PL_nexttoke--;
2225 yylval = PL_nextval[PL_nexttoke];
2226 if (!PL_nexttoke) {
2227 PL_lex_state = PL_lex_defer;
2228 PL_expect = PL_lex_expect;
2229 PL_lex_defer = LEX_NORMAL;
463ee0b2 2230 }
607df283 2231 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2232 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2233 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2234
3280af22 2235 return(PL_nexttype[PL_nexttoke]);
79072805 2236
02aa26ce 2237 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2238 when we get here, PL_bufptr is at the \
02aa26ce 2239 */
79072805
LW
2240 case LEX_INTERPCASEMOD:
2241#ifdef DEBUGGING
3280af22 2242 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2243 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2244#endif
02aa26ce 2245 /* handle \E or end of string */
3280af22 2246 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2247 char oldmod;
02aa26ce
NT
2248
2249 /* if at a \E */
3280af22
NIS
2250 if (PL_lex_casemods) {
2251 oldmod = PL_lex_casestack[--PL_lex_casemods];
2252 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2253
3280af22
NIS
2254 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2255 PL_bufptr += 2;
2256 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2257 }
79072805
LW
2258 return ')';
2259 }
3280af22
NIS
2260 if (PL_bufptr != PL_bufend)
2261 PL_bufptr += 2;
2262 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2263 return yylex();
79072805
LW
2264 }
2265 else {
607df283 2266 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2267 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2268 s = PL_bufptr + 1;
79072805 2269 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
eb160463 2270 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
a0d0e21e 2271 if (strchr("LU", *s) &&
3280af22 2272 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2273 {
3280af22 2274 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2275 return ')';
2276 }
3280af22 2277 if (PL_lex_casemods > 10) {
8edd5f42 2278 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
a0d0e21e 2279 }
3280af22
NIS
2280 PL_lex_casestack[PL_lex_casemods++] = *s;
2281 PL_lex_casestack[PL_lex_casemods] = '\0';
2282 PL_lex_state = LEX_INTERPCONCAT;
2283 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2284 force_next('(');
2285 if (*s == 'l')
3280af22 2286 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2287 else if (*s == 'u')
3280af22 2288 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2289 else if (*s == 'L')
3280af22 2290 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2291 else if (*s == 'U')
3280af22 2292 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2293 else if (*s == 'Q')
3280af22 2294 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2295 else
cea2e8a9 2296 Perl_croak(aTHX_ "panic: yylex");
3280af22 2297 PL_bufptr = s + 1;
79072805 2298 force_next(FUNC);
3280af22
NIS
2299 if (PL_lex_starts) {
2300 s = PL_bufptr;
2301 PL_lex_starts = 0;
79072805
LW
2302 Aop(OP_CONCAT);
2303 }
2304 else
cea2e8a9 2305 return yylex();
79072805
LW
2306 }
2307
55497cff 2308 case LEX_INTERPPUSH:
2309 return sublex_push();
2310
79072805 2311 case LEX_INTERPSTART:
3280af22 2312 if (PL_bufptr == PL_bufend)
79072805 2313 return sublex_done();
607df283 2314 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2315 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2316 PL_expect = XTERM;
2317 PL_lex_dojoin = (*PL_bufptr == '@');
2318 PL_lex_state = LEX_INTERPNORMAL;
2319 if (PL_lex_dojoin) {
2320 PL_nextval[PL_nexttoke].ival = 0;
79072805 2321 force_next(',');
a0d0e21e 2322 force_ident("\"", '$');
3280af22 2323 PL_nextval[PL_nexttoke].ival = 0;
79072805 2324 force_next('$');
3280af22 2325 PL_nextval[PL_nexttoke].ival = 0;
79072805 2326 force_next('(');
3280af22 2327 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2328 force_next(FUNC);
2329 }
3280af22
NIS
2330 if (PL_lex_starts++) {
2331 s = PL_bufptr;
79072805
LW
2332 Aop(OP_CONCAT);
2333 }
cea2e8a9 2334 return yylex();
79072805
LW
2335
2336 case LEX_INTERPENDMAYBE:
3280af22
NIS
2337 if (intuit_more(PL_bufptr)) {
2338 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2339 break;
2340 }
2341 /* FALL THROUGH */
2342
2343 case LEX_INTERPEND:
3280af22
NIS
2344 if (PL_lex_dojoin) {
2345 PL_lex_dojoin = FALSE;
2346 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2347 return ')';
2348 }
43a16006 2349 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2350 && SvEVALED(PL_lex_repl))
43a16006 2351 {
e9fa98b2 2352 if (PL_bufptr != PL_bufend)
cea2e8a9 2353 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2354 PL_lex_repl = Nullsv;
2355 }
79072805
LW
2356 /* FALLTHROUGH */
2357 case LEX_INTERPCONCAT:
2358#ifdef DEBUGGING
3280af22 2359 if (PL_lex_brackets)
cea2e8a9 2360 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2361#endif
3280af22 2362 if (PL_bufptr == PL_bufend)
79072805
LW
2363 return sublex_done();
2364
3280af22
NIS
2365 if (SvIVX(PL_linestr) == '\'') {
2366 SV *sv = newSVsv(PL_linestr);
2367 if (!PL_lex_inpat)
76e3520e 2368 sv = tokeq(sv);
3280af22 2369 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2370 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2371 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2372 s = PL_bufend;
79072805
LW
2373 }
2374 else {
3280af22 2375 s = scan_const(PL_bufptr);
79072805 2376 if (*s == '\\')
3280af22 2377 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2378 else
3280af22 2379 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2380 }
2381
3280af22
NIS
2382 if (s != PL_bufptr) {
2383 PL_nextval[PL_nexttoke] = yylval;
2384 PL_expect = XTERM;
79072805 2385 force_next(THING);
3280af22 2386 if (PL_lex_starts++)
79072805
LW
2387 Aop(OP_CONCAT);
2388 else {
3280af22 2389 PL_bufptr = s;
cea2e8a9 2390 return yylex();
79072805
LW
2391 }
2392 }
2393
cea2e8a9 2394 return yylex();
a0d0e21e 2395 case LEX_FORMLINE:
3280af22
NIS
2396 PL_lex_state = LEX_NORMAL;
2397 s = scan_formline(PL_bufptr);
2398 if (!PL_lex_formbrack)
a0d0e21e
LW
2399 goto rightbracket;
2400 OPERATOR(';');
79072805
LW
2401 }
2402
3280af22
NIS
2403 s = PL_bufptr;
2404 PL_oldoldbufptr = PL_oldbufptr;
2405 PL_oldbufptr = s;
607df283 2406 DEBUG_T( {
bf49b057
GS
2407 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2408 exp_name[PL_expect], s);
5f80b19c 2409 } );
463ee0b2
LW
2410
2411 retry:
378cc40b
LW
2412 switch (*s) {
2413 default:
7e2040f0 2414 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2415 goto keylookup;
cea2e8a9 2416 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2417 case 4:
2418 case 26:
2419 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2420 case 0:
3280af22
NIS
2421 if (!PL_rsfp) {
2422 PL_last_uni = 0;
2423 PL_last_lop = 0;
2424 if (PL_lex_brackets)
d98d5fff 2425 yyerror("Missing right curly or square bracket");
4e553d73 2426 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2427 "### Tokener got EOF\n");
5f80b19c 2428 } );
79072805 2429 TOKEN(0);
463ee0b2 2430 }
3280af22 2431 if (s++ < PL_bufend)
a687059c 2432 goto retry; /* ignore stray nulls */
3280af22
NIS
2433 PL_last_uni = 0;
2434 PL_last_lop = 0;
2435 if (!PL_in_eval && !PL_preambled) {
2436 PL_preambled = TRUE;
2437 sv_setpv(PL_linestr,incl_perldb());
2438 if (SvCUR(PL_linestr))
2439 sv_catpv(PL_linestr,";");
2440 if (PL_preambleav){
2441 while(AvFILLp(PL_preambleav) >= 0) {
2442 SV *tmpsv = av_shift(PL_preambleav);
2443 sv_catsv(PL_linestr, tmpsv);
2444 sv_catpv(PL_linestr, ";");
91b7def8 2445 sv_free(tmpsv);
2446 }
3280af22
NIS
2447 sv_free((SV*)PL_preambleav);
2448 PL_preambleav = NULL;
91b7def8 2449 }
3280af22
NIS
2450 if (PL_minus_n || PL_minus_p) {
2451 sv_catpv(PL_linestr, "LINE: while (<>) {");
2452 if (PL_minus_l)
2453 sv_catpv(PL_linestr,"chomp;");
2454 if (PL_minus_a) {
3280af22
NIS
2455 if (PL_minus_F) {
2456 if (strchr("/'\"", *PL_splitstr)
2457 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2458 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2459 else {
2460 char delim;
2461 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2462 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2463 delim = *s;
75c72d73 2464 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2465 "q" + (delim == '\''), delim);
3280af22 2466 for (s = PL_splitstr; *s; s++) {
54310121 2467 if (*s == '\\')
3280af22
NIS
2468 sv_catpvn(PL_linestr, "\\", 1);
2469 sv_catpvn(PL_linestr, s, 1);
54310121 2470 }
cea2e8a9 2471 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2472 }
2304df62
AD
2473 }
2474 else
75c72d73 2475 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2476 }
79072805 2477 }
3280af22
NIS
2478 sv_catpv(PL_linestr, "\n");
2479 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2480 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2481 PL_last_lop = PL_last_uni = Nullch;
3280af22 2482 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2483 SV *sv = NEWSV(85,0);
2484
2485 sv_upgrade(sv, SVt_PVMG);
3280af22 2486 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2487 (void)SvIOK_on(sv);
2488 SvIVX(sv) = 0;
57843af0 2489 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2490 }
79072805 2491 goto retry;
a687059c 2492 }
e929a76b 2493 do {
aa7440fb 2494 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2495 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2496 fake_eof:
2497 if (PL_rsfp) {
2498 if (PL_preprocess && !PL_in_eval)
2499 (void)PerlProc_pclose(PL_rsfp);
2500 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2501 PerlIO_clearerr(PL_rsfp);
2502 else
2503 (void)PerlIO_close(PL_rsfp);
2504 PL_rsfp = Nullfp;
2505 PL_doextract = FALSE;
2506 }
2507 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2508 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2509 sv_catpv(PL_linestr,";}");
2510 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2511 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2512 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2513 PL_minus_n = PL_minus_p = 0;
2514 goto retry;
2515 }
2516 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2517 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2518 sv_setpv(PL_linestr,"");
2519 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2520 }
2521 /* if it looks like the start of a BOM, check if it in fact is */
2522 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2523#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2524# ifdef __GNU_LIBRARY__
2525# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2526# define FTELL_FOR_PIPE_IS_BROKEN
2527# endif
e3f494f1
JH
2528# else
2529# ifdef __GLIBC__
2530# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2531# define FTELL_FOR_PIPE_IS_BROKEN
2532# endif
2533# endif
226017aa
DD
2534# endif
2535#endif
2536#ifdef FTELL_FOR_PIPE_IS_BROKEN
2537 /* This loses the possibility to detect the bof
2538 * situation on perl -P when the libc5 is being used.
2539 * Workaround? Maybe attach some extra state to PL_rsfp?
2540 */
2541 if (!PL_preprocess)
7e28d3af 2542 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2543#else
eb160463 2544 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2545#endif
7e28d3af 2546 if (bof) {
3280af22 2547 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2548 s = swallow_bom((U8*)s);
e929a76b 2549 }
378cc40b 2550 }
3280af22 2551 if (PL_doextract) {
a0d0e21e
LW
2552 /* Incest with pod. */
2553 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2554 sv_setpv(PL_linestr, "");
2555 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2556 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2557 PL_last_lop = PL_last_uni = Nullch;
3280af22 2558 PL_doextract = FALSE;
a0d0e21e 2559 }
4e553d73 2560 }
463ee0b2 2561 incline(s);
3280af22
NIS
2562 } while (PL_doextract);
2563 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2564 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2565 SV *sv = NEWSV(85,0);
a687059c 2566
93a17b20 2567 sv_upgrade(sv, SVt_PVMG);
3280af22 2568 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2569 (void)SvIOK_on(sv);
2570 SvIVX(sv) = 0;
57843af0 2571 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2572 }
3280af22 2573 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2574 PL_last_lop = PL_last_uni = Nullch;
57843af0 2575 if (CopLINE(PL_curcop) == 1) {
3280af22 2576 while (s < PL_bufend && isSPACE(*s))
79072805 2577 s++;
a0d0e21e 2578 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2579 s++;
44a8e56a 2580 d = Nullch;
3280af22 2581 if (!PL_in_eval) {
44a8e56a 2582 if (*s == '#' && *(s+1) == '!')
2583 d = s + 2;
2584#ifdef ALTERNATE_SHEBANG
2585 else {
2586 static char as[] = ALTERNATE_SHEBANG;
2587 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2588 d = s + (sizeof(as) - 1);
2589 }
2590#endif /* ALTERNATE_SHEBANG */
2591 }
2592 if (d) {
b8378b72 2593 char *ipath;
774d564b 2594 char *ipathend;
b8378b72 2595
774d564b 2596 while (isSPACE(*d))
b8378b72
CS
2597 d++;
2598 ipath = d;
774d564b 2599 while (*d && !isSPACE(*d))
2600 d++;
2601 ipathend = d;
2602
2603#ifdef ARG_ZERO_IS_SCRIPT
2604 if (ipathend > ipath) {
2605 /*
2606 * HP-UX (at least) sets argv[0] to the script name,
2607 * which makes $^X incorrect. And Digital UNIX and Linux,
2608 * at least, set argv[0] to the basename of the Perl
2609 * interpreter. So, having found "#!", we'll set it right.
2610 */
ee2f7564 2611 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2612 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2613 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2614 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2615 SvSETMAGIC(x);
2616 }
556c1dec
JH
2617 else {
2618 STRLEN blen;
2619 STRLEN llen;
2620 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2621 char *lstart = SvPV(x,llen);
2622 if (llen < blen) {
2623 bstart += blen - llen;
2624 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2625 sv_setpvn(x, ipath, ipathend - ipath);
2626 SvSETMAGIC(x);
2627 }
2628 }
2629 }
774d564b 2630 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2631 }
774d564b 2632#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2633
2634 /*
2635 * Look for options.
2636 */
748a9306 2637 d = instr(s,"perl -");
84e30d1a 2638 if (!d) {
748a9306 2639 d = instr(s,"perl");
84e30d1a
GS
2640#if defined(DOSISH)
2641 /* avoid getting into infinite loops when shebang
2642 * line contains "Perl" rather than "perl" */
2643 if (!d) {
2644 for (d = ipathend-4; d >= ipath; --d) {
2645 if ((*d == 'p' || *d == 'P')
2646 && !ibcmp(d, "perl", 4))
2647 {
2648 break;
2649 }
2650 }
2651 if (d < ipath)
2652 d = Nullch;
2653 }
2654#endif
2655 }
44a8e56a 2656#ifdef ALTERNATE_SHEBANG
2657 /*
2658 * If the ALTERNATE_SHEBANG on this system starts with a
2659 * character that can be part of a Perl expression, then if
2660 * we see it but not "perl", we're probably looking at the
2661 * start of Perl code, not a request to hand off to some
2662 * other interpreter. Similarly, if "perl" is there, but
2663 * not in the first 'word' of the line, we assume the line
2664 * contains the start of the Perl program.
44a8e56a 2665 */
2666 if (d && *s != '#') {
774d564b 2667 char *c = ipath;
44a8e56a 2668 while (*c && !strchr("; \t\r\n\f\v#", *c))
2669 c++;
2670 if (c < d)
2671 d = Nullch; /* "perl" not in first word; ignore */
2672 else
2673 *s = '#'; /* Don't try to parse shebang line */
2674 }
774d564b 2675#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2676#ifndef MACOS_TRADITIONAL
748a9306 2677 if (!d &&
44a8e56a 2678 *s == '#' &&
774d564b 2679 ipathend > ipath &&
3280af22 2680 !PL_minus_c &&
748a9306 2681 !instr(s,"indir") &&
3280af22 2682 instr(PL_origargv[0],"perl"))
748a9306 2683 {
9f68db38 2684 char **newargv;
9f68db38 2685
774d564b 2686 *ipathend = '\0';
2687 s = ipathend + 1;
3280af22 2688 while (s < PL_bufend && isSPACE(*s))
9f68db38 2689 s++;
3280af22
NIS
2690 if (s < PL_bufend) {
2691 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2692 newargv[1] = s;
3280af22 2693 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2694 s++;
2695 *s = '\0';
3280af22 2696 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2697 }
2698 else
3280af22 2699 newargv = PL_origargv;
774d564b 2700 newargv[0] = ipath;
b4748376 2701 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2702 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2703 }
bf4acbe4 2704#endif
748a9306 2705 if (d) {
3280af22
NIS
2706 U32 oldpdb = PL_perldb;
2707 bool oldn = PL_minus_n;
2708 bool oldp = PL_minus_p;
748a9306
LW
2709
2710 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2711 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2712
2713 if (*d++ == '-') {
a11ec5a9 2714 bool switches_done = PL_doswitches;
8cc95fdb 2715 do {
2716 if (*d == 'M' || *d == 'm') {
2717 char *m = d;
2718 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2719 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2720 (int)(d - m), m);
2721 }
2722 d = moreswitches(d);
2723 } while (d);
155aba94
GS
2724 if ((PERLDB_LINE && !oldpdb) ||
2725 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2726 /* if we have already added "LINE: while (<>) {",
2727 we must not do it again */
748a9306 2728 {
3280af22
NIS
2729 sv_setpv(PL_linestr, "");
2730 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2732 PL_last_lop = PL_last_uni = Nullch;
3280af22 2733 PL_preambled = FALSE;
84902520 2734 if (PERLDB_LINE)
3280af22 2735 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2736 goto retry;
2737 }
a11ec5a9
RGS
2738 if (PL_doswitches && !switches_done) {
2739 int argc = PL_origargc;
2740 char **argv = PL_origargv;
2741 do {
2742 argc--,argv++;
2743 } while (argc && argv[0][0] == '-' && argv[0][1]);
2744 init_argv_symbols(argc,argv);
2745 }
a0d0e21e 2746 }
79072805 2747 }
9f68db38 2748 }
79072805 2749 }
3280af22
NIS
2750 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2751 PL_bufptr = s;
2752 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2753 return yylex();
ae986130 2754 }
378cc40b 2755 goto retry;
4fdae800 2756 case '\r':
6a27c188 2757#ifdef PERL_STRICT_CR
cea2e8a9 2758 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2759 Perl_croak(aTHX_
cc507455 2760 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2761#endif
4fdae800 2762 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2763#ifdef MACOS_TRADITIONAL
2764 case '\312':
2765#endif
378cc40b
LW
2766 s++;
2767 goto retry;
378cc40b 2768 case '#':
e929a76b 2769 case '\n':
3280af22 2770 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2771 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2772 /* handle eval qq[#line 1 "foo"\n ...] */
2773 CopLINE_dec(PL_curcop);
2774 incline(s);
2775 }
3280af22 2776 d = PL_bufend;
a687059c 2777 while (s < d && *s != '\n')
378cc40b 2778 s++;
0f85fab0 2779 if (s < d)
378cc40b 2780 s++;
78c267c1 2781 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2782 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2783 incline(s);
3280af22
NIS
2784 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2785 PL_bufptr = s;
2786 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2787 return yylex();
a687059c 2788 }
378cc40b 2789 }
a687059c 2790 else {
378cc40b 2791 *s = '\0';
3280af22 2792 PL_bufend = s;
a687059c 2793 }
378cc40b
LW
2794 goto retry;
2795 case '-':
79072805 2796 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2797 I32 ftst = 0;
2798
378cc40b 2799 s++;
3280af22 2800 PL_bufptr = s;
748a9306
LW
2801 tmp = *s++;
2802
bf4acbe4 2803 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2804 s++;
2805
2806 if (strnEQ(s,"=>",2)) {
3280af22 2807 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2808 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2809 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2810 } );
748a9306
LW
2811 OPERATOR('-'); /* unary minus */
2812 }
3280af22 2813 PL_last_uni = PL_oldbufptr;
748a9306 2814 switch (tmp) {
e5edeb50
JH
2815 case 'r': ftst = OP_FTEREAD; break;
2816 case 'w': ftst = OP_FTEWRITE; break;
2817 case 'x': ftst = OP_FTEEXEC; break;
2818 case 'o': ftst = OP_FTEOWNED; break;
2819 case 'R': ftst = OP_FTRREAD; break;
2820 case 'W': ftst = OP_FTRWRITE; break;
2821 case 'X': ftst = OP_FTREXEC; break;
2822 case 'O': ftst = OP_FTROWNED; break;
2823 case 'e': ftst = OP_FTIS; break;
2824 case 'z': ftst = OP_FTZERO; break;
2825 case 's': ftst = OP_FTSIZE; break;
2826 case 'f': ftst = OP_FTFILE; break;
2827 case 'd': ftst = OP_FTDIR; break;
2828 case 'l': ftst = OP_FTLINK; break;
2829 case 'p': ftst = OP_FTPIPE; break;
2830 case 'S': ftst = OP_FTSOCK; break;
2831 case 'u': ftst = OP_FTSUID; break;
2832 case 'g': ftst = OP_FTSGID; break;
2833 case 'k': ftst = OP_FTSVTX; break;
2834 case 'b': ftst = OP_FTBLK; break;
2835 case 'c': ftst = OP_FTCHR; break;
2836 case 't': ftst = OP_FTTTY; break;
2837 case 'T': ftst = OP_FTTEXT; break;
2838 case 'B': ftst = OP_FTBINARY; break;
2839 case 'M': case 'A': case 'C':
2840 gv_fetchpv("\024",TRUE, SVt_PV);
2841 switch (tmp) {
2842 case 'M': ftst = OP_FTMTIME; break;
2843 case 'A': ftst = OP_FTATIME; break;
2844 case 'C': ftst = OP_FTCTIME; break;
2845 default: break;
2846 }
2847 break;
378cc40b 2848 default:
378cc40b
LW
2849 break;
2850 }
e5edeb50 2851 if (ftst) {
eb160463 2852 PL_last_lop_op = (OPCODE)ftst;
4e553d73 2853 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2854 "### Saw file test %c\n", (int)ftst);
5f80b19c 2855 } );
e5edeb50
JH
2856 FTST(ftst);
2857 }
2858 else {
2859 /* Assume it was a minus followed by a one-letter named
2860 * subroutine call (or a -bareword), then. */
95c31fe3 2861 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2862 "### %c looked like a file test but was not\n",
2863 (int)ftst);
5f80b19c 2864 } );
e5edeb50
JH
2865 s -= 2;
2866 }
378cc40b 2867 }
a687059c
LW
2868 tmp = *s++;
2869 if (*s == tmp) {
2870 s++;
3280af22 2871 if (PL_expect == XOPERATOR)
79072805
LW
2872 TERM(POSTDEC);
2873 else
2874 OPERATOR(PREDEC);
2875 }
2876 else if (*s == '>') {
2877 s++;
2878 s = skipspace(s);
7e2040f0 2879 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2880 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2881 TOKEN(ARROW);
79072805 2882 }
748a9306
LW
2883 else if (*s == '$')
2884 OPERATOR(ARROW);
463ee0b2 2885 else
748a9306 2886 TERM(ARROW);
a687059c 2887 }
3280af22 2888 if (PL_expect == XOPERATOR)
79072805
LW
2889 Aop(OP_SUBTRACT);
2890 else {
3280af22 2891 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2892 check_uni();
79072805 2893 OPERATOR('-'); /* unary minus */
2f3197b3 2894 }
79072805 2895
378cc40b 2896 case '+':
a687059c
LW
2897 tmp = *s++;
2898 if (*s == tmp) {
378cc40b 2899 s++;
3280af22 2900 if (PL_expect == XOPERATOR)
79072805
LW
2901 TERM(POSTINC);
2902 else
2903 OPERATOR(PREINC);
378cc40b 2904 }
3280af22 2905 if (PL_expect == XOPERATOR)
79072805
LW
2906 Aop(OP_ADD);
2907 else {
3280af22 2908 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2909 check_uni();
a687059c 2910 OPERATOR('+');
2f3197b3 2911 }
a687059c 2912
378cc40b 2913 case '*':
3280af22
NIS
2914 if (PL_expect != XOPERATOR) {
2915 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2916 PL_expect = XOPERATOR;
2917 force_ident(PL_tokenbuf, '*');
2918 if (!*PL_tokenbuf)
a0d0e21e 2919 PREREF('*');
79072805 2920 TERM('*');
a687059c 2921 }
79072805
LW
2922 s++;
2923 if (*s == '*') {
a687059c 2924 s++;
79072805 2925 PWop(OP_POW);
a687059c 2926 }
79072805
LW
2927 Mop(OP_MULTIPLY);
2928
378cc40b 2929 case '%':
3280af22 2930 if (PL_expect == XOPERATOR) {
bbce6d69 2931 ++s;
2932 Mop(OP_MODULO);
a687059c 2933 }
3280af22
NIS
2934 PL_tokenbuf[0] = '%';
2935 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2936 if (!PL_tokenbuf[1]) {
2937 if (s == PL_bufend)
bbce6d69 2938 yyerror("Final % should be \\% or %name");
2939 PREREF('%');
a687059c 2940 }
3280af22 2941 PL_pending_ident = '%';
bbce6d69 2942 TERM('%');
a687059c 2943
378cc40b 2944 case '^':
79072805 2945 s++;
a0d0e21e 2946 BOop(OP_BIT_XOR);
79072805 2947 case '[':
3280af22 2948 PL_lex_brackets++;
79072805 2949 /* FALL THROUGH */
378cc40b 2950 case '~':
378cc40b 2951 case ',':
378cc40b
LW
2952 tmp = *s++;
2953 OPERATOR(tmp);
a0d0e21e
LW
2954 case ':':
2955 if (s[1] == ':') {
2956 len = 0;
2957 goto just_a_word;
2958 }
2959 s++;
09bef843
SB
2960 switch (PL_expect) {
2961 OP *attrs;
2962 case XOPERATOR:
2963 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2964 break;
2965 PL_bufptr = s; /* update in case we back off */
2966 goto grabattrs;
2967 case XATTRBLOCK:
2968 PL_expect = XBLOCK;
2969 goto grabattrs;
2970 case XATTRTERM:
2971 PL_expect = XTERMBLOCK;
2972 grabattrs:
2973 s = skipspace(s);
2974 attrs = Nullop;
7e2040f0 2975 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2976 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2977 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2978 if (tmp < 0) tmp = -tmp;
2979 switch (tmp) {
2980 case KEY_or:
2981 case KEY_and:
c963b151 2982 case KEY_err:
f9829d6b
GS
2983 case KEY_for:
2984 case KEY_unless:
2985 case KEY_if:
2986 case KEY_while:
2987 case KEY_until:
2988 goto got_attrs;
2989 default:
2990 break;
2991 }
2992 }
09bef843
SB
2993 if (*d == '(') {
2994 d = scan_str(d,TRUE,TRUE);
2995 if (!d) {
09bef843
SB
2996 /* MUST advance bufptr here to avoid bogus
2997 "at end of line" context messages from yyerror().
2998 */
2999 PL_bufptr = s + len;
3000 yyerror("Unterminated attribute parameter in attribute list");
3001 if (attrs)
3002 op_free(attrs);
3003 return 0; /* EOF indicator */
3004 }
3005 }
3006 if (PL_lex_stuff) {
3007 SV *sv = newSVpvn(s, len);
3008 sv_catsv(sv, PL_lex_stuff);
3009 attrs = append_elem(OP_LIST, attrs,
3010 newSVOP(OP_CONST, 0, sv));
3011 SvREFCNT_dec(PL_lex_stuff);
3012 PL_lex_stuff = Nullsv;
3013 }
3014 else {
d3cea301
SB
3015 /* NOTE: any CV attrs applied here need to be part of
3016 the CVf_BUILTIN_ATTRS define in cv.h! */
78f9721b
SM
3017 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3018 CvLVALUE_on(PL_compcv);
3019 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3020 CvLOCKED_on(PL_compcv);
3021 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3022 CvMETHOD_on(PL_compcv);
87ecf892 3023#ifdef USE_ITHREADS
d3cea301
SB
3024 else if (PL_in_my == KEY_our && len == 6 &&
3025 strnEQ(s, "unique", len))
7fb37951 3026 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
87ecf892 3027#endif
78f9721b
SM
3028 /* After we've set the flags, it could be argued that
3029 we don't need to do the attributes.pm-based setting
3030 process, and shouldn't bother appending recognized
d3cea301
SB
3031 flags. To experiment with that, uncomment the
3032 following "else". (Note that's already been
3033 uncommented. That keeps the above-applied built-in
3034 attributes from being intercepted (and possibly
3035 rejected) by a package's attribute routines, but is
3036 justified by the performance win for the common case
3037 of applying only built-in attributes.) */
0256094b 3038 else
78f9721b
SM
3039 attrs = append_elem(OP_LIST, attrs,
3040 newSVOP(OP_CONST, 0,
3041 newSVpvn(s, len)));
09bef843
SB
3042 }
3043 s = skipspace(d);
0120eecf 3044 if (*s == ':' && s[1] != ':')
09bef843 3045 s = skipspace(s+1);
0120eecf
GS
3046 else if (s == d)
3047 break; /* require real whitespace or :'s */
09bef843 3048 }
f9829d6b 3049 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3050 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3051 char q = ((*s == '\'') ? '"' : '\'');
3052 /* If here for an expression, and parsed no attrs, back off. */
3053 if (tmp == '=' && !attrs) {
3054 s = PL_bufptr;
3055 break;
3056 }
3057 /* MUST advance bufptr here to avoid bogus "at end of line"
3058 context messages from yyerror().
3059 */
3060 PL_bufptr = s;
3061 if (!*s)
3062 yyerror("Unterminated attribute list");
3063 else
3064 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3065 q, *s, q));
3066 if (attrs)
3067 op_free(attrs);
3068 OPERATOR(':');
3069 }
f9829d6b 3070 got_attrs:
09bef843
SB
3071 if (attrs) {
3072 PL_nextval[PL_nexttoke].opval = attrs;
3073 force_next(THING);
3074 }
3075 TOKEN(COLONATTR);
3076 }
a0d0e21e 3077 OPERATOR(':');
8990e307
LW
3078 case '(':
3079 s++;
3280af22
NIS
3080 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3081 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3082 else
3280af22 3083 PL_expect = XTERM;
a0d0e21e 3084 TOKEN('(');
378cc40b 3085 case ';':
f4dd75d9 3086 CLINE;
378cc40b
LW
3087 tmp = *s++;
3088 OPERATOR(tmp);
3089 case ')':
378cc40b 3090 tmp = *s++;
16d20bd9
AD
3091 s = skipspace(s);
3092 if (*s == '{')
3093 PREBLOCK(tmp);
378cc40b 3094 TERM(tmp);
79072805
LW
3095 case ']':
3096 s++;
3280af22 3097 if (PL_lex_brackets <= 0)
d98d5fff 3098 yyerror("Unmatched right square bracket");
463ee0b2 3099 else
3280af22
NIS
3100 --PL_lex_brackets;
3101 if (PL_lex_state == LEX_INTERPNORMAL) {
3102 if (PL_lex_brackets == 0) {
a0d0e21e 3103 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3104 PL_lex_state = LEX_INTERPEND;
79072805
LW
3105 }
3106 }
4633a7c4 3107 TERM(']');
79072805
LW
3108 case '{':
3109 leftbracket:
79072805 3110 s++;
3280af22 3111 if (PL_lex_brackets > 100) {
8edd5f42 3112 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3113 }
3280af22 3114 switch (PL_expect) {
a0d0e21e 3115 case XTERM:
3280af22 3116 if (PL_lex_formbrack) {
a0d0e21e
LW
3117 s--;
3118 PRETERMBLOCK(DO);
3119 }
3280af22
NIS
3120 if (PL_oldoldbufptr == PL_last_lop)
3121 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3122 else
3280af22 3123 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3124 OPERATOR(HASHBRACK);
a0d0e21e 3125 case XOPERATOR:
bf4acbe4 3126 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3127 s++;
44a8e56a 3128 d = s;
3280af22
NIS
3129 PL_tokenbuf[0] = '\0';
3130 if (d < PL_bufend && *d == '-') {
3131 PL_tokenbuf[0] = '-';
44a8e56a 3132 d++;
bf4acbe4 3133 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3134 d++;
3135 }
7e2040f0 3136 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3137 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3138 FALSE, &len);
bf4acbe4 3139 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3140 d++;
3141 if (*d == '}') {
3280af22 3142 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3143 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3144 if (minus)
3145 force_next('-');
748a9306
LW
3146 }
3147 }
3148 /* FALL THROUGH */
09bef843 3149 case XATTRBLOCK:
748a9306 3150 case XBLOCK:
3280af22
NIS
3151 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3152 PL_expect = XSTATE;
a0d0e21e 3153 break;
09bef843 3154 case XATTRTERM:
a0d0e21e 3155 case XTERMBLOCK:
3280af22
NIS
3156 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3157 PL_expect = XSTATE;
a0d0e21e
LW
3158 break;
3159 default: {
3160 char *t;
3280af22
NIS
3161 if (PL_oldoldbufptr == PL_last_lop)
3162 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3163 else
3280af22 3164 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3165 s = skipspace(s);
8452ff4b
SB
3166 if (*s == '}') {
3167 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3168 PL_expect = XTERM;
3169 /* This hack is to get the ${} in the message. */
3170 PL_bufptr = s+1;
3171 yyerror("syntax error");
3172 break;
3173 }
a0d0e21e 3174 OPERATOR(HASHBRACK);
8452ff4b 3175 }
b8a4b1be
GS
3176 /* This hack serves to disambiguate a pair of curlies
3177 * as being a block or an anon hash. Normally, expectation
3178 * determines that, but in cases where we're not in a
3179 * position to expect anything in particular (like inside
3180 * eval"") we have to resolve the ambiguity. This code
3181 * covers the case where the first term in the curlies is a
3182 * quoted string. Most other cases need to be explicitly
3183 * disambiguated by prepending a `+' before the opening
3184 * curly in order to force resolution as an anon hash.
3185 *
3186 * XXX should probably propagate the outer expectation
3187 * into eval"" to rely less on this hack, but that could
3188 * potentially break current behavior of eval"".
3189 * GSAR 97-07-21
3190 */
3191 t = s;
3192 if (*s == '\'' || *s == '"' || *s == '`') {
3193 /* common case: get past first string, handling escapes */
3280af22 3194 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3195 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3196 t++;
3197 t++;
a0d0e21e 3198 }
b8a4b1be 3199 else if (*s == 'q') {
3280af22 3200 if (++t < PL_bufend
b8a4b1be 3201 && (!isALNUM(*t)
3280af22 3202 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3203 && !isALNUM(*t))))
3204 {
b8a4b1be
GS
3205 char *tmps;
3206 char open, close, term;
3207 I32 brackets = 1;
3208
3280af22 3209 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3210 t++;
3211 term = *t;
3212 open = term;
3213 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3214 term = tmps[5];
3215 close = term;
3216 if (open == close)
3280af22
NIS
3217 for (t++; t < PL_bufend; t++) {
3218 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3219 t++;
6d07e5e9 3220 else if (*t == open)
b8a4b1be
GS
3221 break;
3222 }
3223 else
3280af22
NIS
3224 for (t++; t < PL_bufend; t++) {
3225 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3226 t++;
6d07e5e9 3227 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3228 break;
3229 else if (*t == open)
3230 brackets++;
3231 }
3232 }
3233 t++;
a0d0e21e 3234 }
7e2040f0 3235 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3236 t += UTF8SKIP(t);
7e2040f0 3237 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3238 t += UTF8SKIP(t);
a0d0e21e 3239 }
3280af22 3240 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3241 t++;
b8a4b1be
GS
3242 /* if comma follows first term, call it an anon hash */
3243 /* XXX it could be a comma expression with loop modifiers */
3280af22 3244 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3245 || (*t == '=' && t[1] == '>')))
a0d0e21e 3246 OPERATOR(HASHBRACK);
3280af22 3247 if (PL_expect == XREF)
4e4e412b 3248 PL_expect = XTERM;
a0d0e21e 3249 else {
3280af22
NIS
3250 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3251 PL_expect = XSTATE;
a0d0e21e 3252 }
8990e307 3253 }
a0d0e21e 3254 break;
463ee0b2 3255 }
57843af0 3256 yylval.ival = CopLINE(PL_curcop);
79072805 3257 if (isSPACE(*s) || *s == '#')
3280af22 3258 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3259 TOKEN('{');
378cc40b 3260 case '}':
79072805
LW
3261 rightbracket:
3262 s++;
3280af22 3263 if (PL_lex_brackets <= 0)
d98d5fff 3264 yyerror("Unmatched right curly bracket");
463ee0b2 3265 else
3280af22 3266 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3267 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3268 PL_lex_formbrack = 0;
3269 if (PL_lex_state == LEX_INTERPNORMAL) {
3270 if (PL_lex_brackets == 0) {
9059aa12
LW
3271 if (PL_expect & XFAKEBRACK) {
3272 PL_expect &= XENUMMASK;
3280af22
NIS
3273 PL_lex_state = LEX_INTERPEND;
3274 PL_bufptr = s;
cea2e8a9 3275 return yylex(); /* ignore fake brackets */
79072805 3276 }
fa83b5b6 3277 if (*s == '-' && s[1] == '>')
3280af22 3278 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3279 else if (*s != '[' && *s != '{')
3280af22 3280 PL_lex_state = LEX_INTERPEND;
79072805
LW
3281 }
3282 }
9059aa12
LW
3283 if (PL_expect & XFAKEBRACK) {
3284 PL_expect &= XENUMMASK;
3280af22 3285 PL_bufptr = s;
cea2e8a9 3286 return yylex(); /* ignore fake brackets */
748a9306 3287 }
79072805
LW
3288 force_next('}');
3289 TOKEN(';');
378cc40b
LW
3290 case '&':
3291 s++;
3292 tmp = *s++;
3293 if (tmp == '&')
a0d0e21e 3294 AOPERATOR(ANDAND);
378cc40b 3295 s--;
3280af22 3296 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3297 if (ckWARN(WARN_SEMICOLON)
3298 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3299 {
57843af0 3300 CopLINE_dec(PL_curcop);
9014280d 3301 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3302 CopLINE_inc(PL_curcop);
463ee0b2 3303 }
79072805 3304 BAop(OP_BIT_AND);
463ee0b2 3305 }
79072805 3306
3280af22
NIS
3307 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3308 if (*PL_tokenbuf) {
3309 PL_expect = XOPERATOR;
3310 force_ident(PL_tokenbuf, '&');
463ee0b2 3311 }
79072805
LW
3312 else
3313 PREREF('&');
c07a80fd 3314 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3315 TERM('&');
3316
378cc40b
LW
3317 case '|':
3318 s++;
3319 tmp = *s++;
3320 if (tmp == '|')
a0d0e21e 3321 AOPERATOR(OROR);
378cc40b 3322 s--;
79072805 3323 BOop(OP_BIT_OR);
378cc40b
LW
3324 case '=':
3325 s++;
3326 tmp = *s++;
3327 if (tmp == '=')
79072805
LW
3328 Eop(OP_EQ);
3329 if (tmp == '>')
3330 OPERATOR(',');
378cc40b 3331 if (tmp == '~')
79072805 3332 PMop(OP_MATCH);
599cee73 3333 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3334 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3335 s--;
3280af22
NIS
3336 if (PL_expect == XSTATE && isALPHA(tmp) &&
3337 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3338 {
3280af22
NIS
3339 if (PL_in_eval && !PL_rsfp) {
3340 d = PL_bufend;
a5f75d66
AD
3341 while (s < d) {
3342 if (*s++ == '\n') {
3343 incline(s);
3344 if (strnEQ(s,"=cut",4)) {
3345 s = strchr(s,'\n');
3346 if (s)
3347 s++;
3348 else
3349 s = d;
3350 incline(s);
3351 goto retry;
3352 }
3353 }
3354 }
3355 goto retry;
3356 }
3280af22
NIS
3357 s = PL_bufend;
3358 PL_doextract = TRUE;
a0d0e21e
LW
3359 goto retry;
3360 }
3280af22 3361 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3362 char *t;
51882d45 3363#ifdef PERL_STRICT_CR
bf4acbe4 3364 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3365#else
bf4acbe4 3366 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3367#endif
a0d0e21e
LW
3368 if (*t == '\n' || *t == '#') {
3369 s--;
3280af22 3370 PL_expect = XBLOCK;
a0d0e21e
LW
3371 goto leftbracket;
3372 }
79072805 3373 }
a0d0e21e
LW
3374 yylval.ival = 0;
3375 OPERATOR(ASSIGNOP);
378cc40b
LW
3376 case '!':
3377 s++;
3378 tmp = *s++;
3379 if (tmp == '=')
79072805 3380 Eop(OP_NE);
378cc40b 3381 if (tmp == '~')
79072805 3382 PMop(OP_NOT);
378cc40b
LW
3383 s--;
3384 OPERATOR('!');
3385 case '<':
3280af22 3386 if (PL_expect != XOPERATOR) {
93a17b20 3387 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3388 check_uni();
79072805
LW
3389 if (s[1] == '<')
3390 s = scan_heredoc(s);
3391 else
3392 s = scan_inputsymbol(s);
3393 TERM(sublex_start());
378cc40b
LW
3394 }
3395 s++;
3396 tmp = *s++;
3397 if (tmp == '<')
79072805 3398 SHop(OP_LEFT_SHIFT);
395c3793
LW
3399 if (tmp == '=') {
3400 tmp = *s++;
3401 if (tmp == '>')
79072805 3402 Eop(OP_NCMP);
395c3793 3403 s--;
79072805 3404 Rop(OP_LE);
395c3793 3405 }
378cc40b 3406 s--;
79072805 3407 Rop(OP_LT);
378cc40b
LW
3408 case '>':
3409 s++;
3410 tmp = *s++;
3411 if (tmp == '>')
79072805 3412 SHop(OP_RIGHT_SHIFT);
378cc40b 3413 if (tmp == '=')
79072805 3414 Rop(OP_GE);
378cc40b 3415 s--;
79072805 3416 Rop(OP_GT);
378cc40b
LW
3417
3418 case '$':
bbce6d69 3419 CLINE;
3420
3280af22
NIS
3421 if (PL_expect == XOPERATOR) {
3422 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3423 PL_expect = XTERM;
a0d0e21e 3424 depcom();
bbce6d69 3425 return ','; /* grandfather non-comma-format format */
a0d0e21e 3426 }
8990e307 3427 }
a0d0e21e 3428
7e2040f0 3429 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3430 PL_tokenbuf[0] = '@';
376b8730
SM
3431 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3432 sizeof PL_tokenbuf - 1, FALSE);
3433 if (PL_expect == XOPERATOR)
3434 no_op("Array length", s);
3280af22 3435 if (!PL_tokenbuf[1])
a0d0e21e 3436 PREREF(DOLSHARP);
3280af22
NIS
3437 PL_expect = XOPERATOR;
3438 PL_pending_ident = '#';
463ee0b2 3439 TOKEN(DOLSHARP);
79072805 3440 }
bbce6d69 3441
3280af22 3442 PL_tokenbuf[0] = '$';
376b8730
SM
3443 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3444 sizeof PL_tokenbuf - 1, FALSE);
3445 if (PL_expect == XOPERATOR)
3446 no_op("Scalar", s);
3280af22
NIS
3447 if (!PL_tokenbuf[1]) {
3448 if (s == PL_bufend)
bbce6d69 3449 yyerror("Final $ should be \\$ or $name");
3450 PREREF('$');
8990e307 3451 }
a0d0e21e 3452
bbce6d69 3453 /* This kludge not intended to be bulletproof. */
3280af22 3454 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3455 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3456 newSViv(PL_compiling.cop_arybase));
bbce6d69 3457 yylval.opval->op_private = OPpCONST_ARYBASE;
3458 TERM(THING);
3459 }
3460
ff68c719 3461 d = s;
69d2bceb 3462 tmp = (I32)*s;
3280af22 3463 if (PL_lex_state == LEX_NORMAL)
ff68c719 3464 s = skipspace(s);
3465
3280af22 3466 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3467 char *t;
3468 if (*s == '[') {
3280af22 3469 PL_tokenbuf[0] = '@';
599cee73 3470 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3471 for(t = s + 1;
7e2040f0 3472 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3473 t++) ;
a0d0e21e 3474 if (*t++ == ',') {
3280af22
NIS
3475 PL_bufptr = skipspace(PL_bufptr);
3476 while (t < PL_bufend && *t != ']')
bbce6d69 3477 t++;
9014280d 3478 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73
PM
3479 "Multidimensional syntax %.*s not supported",
3480 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3481 }
3482 }
bbce6d69 3483 }
3484 else if (*s == '{') {
3280af22 3485 PL_tokenbuf[0] = '%';
599cee73 3486 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3487 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3488 {
3280af22 3489 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3490 STRLEN len;
3491 for (t++; isSPACE(*t); t++) ;
7e2040f0 3492 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3493 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3494 for (; isSPACE(*t); t++) ;
864dbfa3 3495 if (*t == ';' && get_cv(tmpbuf, FALSE))
9014280d 3496 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 3497 "You need to quote \"%s\"", tmpbuf);
748a9306 3498 }
93a17b20
LW
3499 }
3500 }
2f3197b3 3501 }