This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[Encode] 1.80 released
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
be3c0a43 3 * Copyright (c) 1991-2002, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
4ac733c9 29static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 30static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
9059aa12
LW
38#define XFAKEBRACK 128
39#define XENUMMASK 127
40
39e02b42
JH
41#ifdef USE_UTF8_SCRIPTS
42# define UTF (!IN_BYTES)
2b9d42f0 43#else
746b446a 44# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 45#endif
a0ed51b3 46
61f0cdd9 47/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
48 * 1999-02-27 mjd-perl-patch@plover.com */
49#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50
bf4acbe4
GS
51/* On MacOS, respect nonbreaking spaces */
52#ifdef MACOS_TRADITIONAL
53#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54#else
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
56#endif
57
ffb4593c
NT
58/* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
79072805
LW
60 * can get by with a single comparison (if the compiler is smart enough).
61 */
62
fb73857a 63/* #define LEX_NOTPARSING 11 is done in perl.h. */
64
55497cff 65#define LEX_NORMAL 10
66#define LEX_INTERPNORMAL 9
67#define LEX_INTERPCASEMOD 8
68#define LEX_INTERPPUSH 7
69#define LEX_INTERPSTART 6
70#define LEX_INTERPEND 5
71#define LEX_INTERPENDMAYBE 4
72#define LEX_INTERPCONCAT 3
73#define LEX_INTERPCONST 2
74#define LEX_FORMLINE 1
75#define LEX_KNOWNEXT 0
79072805 76
79072805
LW
77#ifdef ff_next
78#undef ff_next
d48672a2
LW
79#endif
80
a1a0e61e 81#ifdef USE_PURE_BISON
dba4d153
JH
82# ifndef YYMAXLEVEL
83# define YYMAXLEVEL 100
84# endif
20141f0e
IRC
85YYSTYPE* yylval_pointer[YYMAXLEVEL];
86int* yychar_pointer[YYMAXLEVEL];
6f202aea 87int yyactlevel = -1;
22c35a8c
GS
88# undef yylval
89# undef yychar
20141f0e
IRC
90# define yylval (*yylval_pointer[yyactlevel])
91# define yychar (*yychar_pointer[yyactlevel])
92# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 93# undef yylex
dba4d153 94# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
95#endif
96
79072805 97#include "keywords.h"
fe14fcc3 98
ffb4593c
NT
99/* CLINE is a macro that ensures PL_copline has a sane value */
100
ae986130
LW
101#ifdef CLINE
102#undef CLINE
103#endif
57843af0 104#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 105
ffb4593c
NT
106/*
107 * Convenience functions to return different tokens and prime the
9cbb5ea2 108 * lexer for the next token. They all take an argument.
ffb4593c
NT
109 *
110 * TOKEN : generic token (used for '(', DOLSHARP, etc)
111 * OPERATOR : generic operator
112 * AOPERATOR : assignment operator
113 * PREBLOCK : beginning the block after an if, while, foreach, ...
114 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
115 * PREREF : *EXPR where EXPR is not a simple identifier
116 * TERM : expression term
117 * LOOPX : loop exiting command (goto, last, dump, etc)
118 * FTST : file test operator
119 * FUN0 : zero-argument function
2d2e263d 120 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
121 * BOop : bitwise or or xor
122 * BAop : bitwise and
123 * SHop : shift operator
124 * PWop : power operator
9cbb5ea2 125 * PMop : pattern-matching operator
ffb4593c
NT
126 * Aop : addition-level operator
127 * Mop : multiplication-level operator
128 * Eop : equality-testing operator
e5edeb50 129 * Rop : relational operator <= != gt
ffb4593c
NT
130 *
131 * Also see LOP and lop() below.
132 */
133
075953c3
JH
134/* Note that REPORT() and REPORT2() will be expressions that supply
135 * their own trailing comma, not suitable for statements as such. */
998054bd 136#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
137# define REPORT(x,retval) tokereport(x,s,(int)retval),
138# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 139#else
075953c3
JH
140# define REPORT(x,retval)
141# define REPORT2(x,retval)
998054bd
SC
142#endif
143
075953c3
JH
144#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
145#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
146#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
147#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
148#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
149#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
150#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
151#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
6f33ba73 152#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERMORDORDOR,PL_bufptr = s,(int)UNIOP)
075953c3
JH
153#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
154#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
155#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
156#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
157#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
158#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
159#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
160#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
161#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
162#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
163#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
a687059c 169 */
6f33ba73 170#define UNI2(f,x) return(yylval.ival = f, \
075953c3 171 REPORT("uni",f) \
6f33ba73 172 PL_expect = x, \
3280af22
NIS
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
175 PL_last_lop_op = f, \
a687059c 176 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
6f33ba73
RGS
177#define UNI(f) UNI2(f,XTERM)
178#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 179
79072805 180#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 181 REPORT("uni",f) \
3280af22
NIS
182 PL_bufptr = s, \
183 PL_last_uni = PL_oldbufptr, \
79072805
LW
184 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
185
9f68db38 186/* grandfather return to old style */
3280af22 187#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 188
8fa7f367
JH
189#ifdef DEBUGGING
190
2d00ba3b 191STATIC void
61b2116b 192S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 193{
998054bd 194 DEBUG_T({
9c5ffd7c 195 SV* report = newSVpv(thing, 0);
29b291f7
RB
196 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
197 (IV)rv);
998054bd
SC
198
199 if (s - PL_bufptr > 0)
200 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
201 else {
202 if (PL_oldbufptr && *PL_oldbufptr)
203 sv_catpv(report, PL_tokenbuf);
204 }
205 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
5f80b19c 206 });
998054bd
SC
207}
208
8fa7f367
JH
209#endif
210
ffb4593c
NT
211/*
212 * S_ao
213 *
c963b151
BD
214 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
215 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
216 */
217
76e3520e 218STATIC int
cea2e8a9 219S_ao(pTHX_ int toketype)
a0d0e21e 220{
3280af22
NIS
221 if (*PL_bufptr == '=') {
222 PL_bufptr++;
a0d0e21e
LW
223 if (toketype == ANDAND)
224 yylval.ival = OP_ANDASSIGN;
225 else if (toketype == OROR)
226 yylval.ival = OP_ORASSIGN;
c963b151
BD
227 else if (toketype == DORDOR)
228 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
229 toketype = ASSIGNOP;
230 }
231 return toketype;
232}
233
ffb4593c
NT
234/*
235 * S_no_op
236 * When Perl expects an operator and finds something else, no_op
237 * prints the warning. It always prints "<something> found where
238 * operator expected. It prints "Missing semicolon on previous line?"
239 * if the surprise occurs at the start of the line. "do you need to
240 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
241 * where the compiler doesn't know if foo is a method call or a function.
242 * It prints "Missing operator before end of line" if there's nothing
243 * after the missing operator, or "... before <...>" if there is something
244 * after the missing operator.
245 */
246
76e3520e 247STATIC void
cea2e8a9 248S_no_op(pTHX_ char *what, char *s)
463ee0b2 249{
3280af22
NIS
250 char *oldbp = PL_bufptr;
251 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 252
1189a94a
GS
253 if (!s)
254 s = oldbp;
07c798fb 255 else
1189a94a 256 PL_bufptr = s;
cea2e8a9 257 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 258 if (is_first)
cea2e8a9 259 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 260 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 261 char *t;
7e2040f0 262 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 263 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 264 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 265 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 266 }
07c798fb
HS
267 else {
268 assert(s >= oldbp);
cea2e8a9 269 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 270 }
3280af22 271 PL_bufptr = oldbp;
8990e307
LW
272}
273
ffb4593c
NT
274/*
275 * S_missingterm
276 * Complain about missing quote/regexp/heredoc terminator.
277 * If it's called with (char *)NULL then it cauterizes the line buffer.
278 * If we're in a delimited string and the delimiter is a control
279 * character, it's reformatted into a two-char sequence like ^C.
280 * This is fatal.
281 */
282
76e3520e 283STATIC void
cea2e8a9 284S_missingterm(pTHX_ char *s)
8990e307
LW
285{
286 char tmpbuf[3];
287 char q;
288 if (s) {
289 char *nl = strrchr(s,'\n');
d2719217 290 if (nl)
8990e307
LW
291 *nl = '\0';
292 }
9d116dd7
JH
293 else if (
294#ifdef EBCDIC
295 iscntrl(PL_multi_close)
296#else
297 PL_multi_close < 32 || PL_multi_close == 127
298#endif
299 ) {
8990e307 300 *tmpbuf = '^';
3280af22 301 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
302 s = "\\n";
303 tmpbuf[2] = '\0';
304 s = tmpbuf;
305 }
306 else {
eb160463 307 *tmpbuf = (char)PL_multi_close;
8990e307
LW
308 tmpbuf[1] = '\0';
309 s = tmpbuf;
310 }
311 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 312 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 313}
79072805 314
ffb4593c
NT
315/*
316 * Perl_deprecate
ffb4593c
NT
317 */
318
79072805 319void
864dbfa3 320Perl_deprecate(pTHX_ char *s)
a0d0e21e 321{
599cee73 322 if (ckWARN(WARN_DEPRECATED))
9014280d 323 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
324}
325
12bcd1a6
PM
326void
327Perl_deprecate_old(pTHX_ char *s)
328{
329 /* This function should NOT be called for any new deprecated warnings */
330 /* Use Perl_deprecate instead */
331 /* */
332 /* It is here to maintain backward compatibility with the pre-5.8 */
333 /* warnings category hierarchy. The "deprecated" category used to */
334 /* live under the "syntax" category. It is now a top-level category */
335 /* in its own right. */
336
337 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
338 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
339 "Use of %s is deprecated", s);
340}
341
ffb4593c
NT
342/*
343 * depcom
9cbb5ea2 344 * Deprecate a comma-less variable list.
ffb4593c
NT
345 */
346
76e3520e 347STATIC void
cea2e8a9 348S_depcom(pTHX)
a0d0e21e 349{
12bcd1a6 350 deprecate_old("comma-less variable list");
a0d0e21e
LW
351}
352
ffb4593c 353/*
9cbb5ea2
GS
354 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
355 * utf16-to-utf8-reversed.
ffb4593c
NT
356 */
357
c39cd008
GS
358#ifdef PERL_CR_FILTER
359static void
360strip_return(SV *sv)
361{
362 register char *s = SvPVX(sv);
363 register char *e = s + SvCUR(sv);
364 /* outer loop optimized to do nothing if there are no CR-LFs */
365 while (s < e) {
366 if (*s++ == '\r' && *s == '\n') {
367 /* hit a CR-LF, need to copy the rest */
368 register char *d = s - 1;
369 *d++ = *s++;
370 while (s < e) {
371 if (*s == '\r' && s[1] == '\n')
372 s++;
373 *d++ = *s++;
374 }
375 SvCUR(sv) -= s - d;
376 return;
377 }
378 }
379}
a868473f 380
76e3520e 381STATIC I32
c39cd008 382S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 383{
c39cd008
GS
384 I32 count = FILTER_READ(idx+1, sv, maxlen);
385 if (count > 0 && !maxlen)
386 strip_return(sv);
387 return count;
a868473f
NIS
388}
389#endif
390
ffb4593c
NT
391/*
392 * Perl_lex_start
9cbb5ea2
GS
393 * Initialize variables. Uses the Perl save_stack to save its state (for
394 * recursive calls to the parser).
ffb4593c
NT
395 */
396
a0d0e21e 397void
864dbfa3 398Perl_lex_start(pTHX_ SV *line)
79072805 399{
8990e307
LW
400 char *s;
401 STRLEN len;
402
3280af22
NIS
403 SAVEI32(PL_lex_dojoin);
404 SAVEI32(PL_lex_brackets);
3280af22
NIS
405 SAVEI32(PL_lex_casemods);
406 SAVEI32(PL_lex_starts);
407 SAVEI32(PL_lex_state);
7766f137 408 SAVEVPTR(PL_lex_inpat);
3280af22 409 SAVEI32(PL_lex_inwhat);
18b09519
GS
410 if (PL_lex_state == LEX_KNOWNEXT) {
411 I32 toke = PL_nexttoke;
412 while (--toke >= 0) {
413 SAVEI32(PL_nexttype[toke]);
414 SAVEVPTR(PL_nextval[toke]);
415 }
416 SAVEI32(PL_nexttoke);
18b09519 417 }
57843af0 418 SAVECOPLINE(PL_curcop);
3280af22
NIS
419 SAVEPPTR(PL_bufptr);
420 SAVEPPTR(PL_bufend);
421 SAVEPPTR(PL_oldbufptr);
422 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
423 SAVEPPTR(PL_last_lop);
424 SAVEPPTR(PL_last_uni);
3280af22
NIS
425 SAVEPPTR(PL_linestart);
426 SAVESPTR(PL_linestr);
427 SAVEPPTR(PL_lex_brackstack);
428 SAVEPPTR(PL_lex_casestack);
c76ac1ee 429 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
430 SAVESPTR(PL_lex_stuff);
431 SAVEI32(PL_lex_defer);
09bef843 432 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 433 SAVESPTR(PL_lex_repl);
bebdddfc
GS
434 SAVEINT(PL_expect);
435 SAVEINT(PL_lex_expect);
3280af22
NIS
436
437 PL_lex_state = LEX_NORMAL;
438 PL_lex_defer = 0;
439 PL_expect = XSTATE;
440 PL_lex_brackets = 0;
3280af22
NIS
441 New(899, PL_lex_brackstack, 120, char);
442 New(899, PL_lex_casestack, 12, char);
443 SAVEFREEPV(PL_lex_brackstack);
444 SAVEFREEPV(PL_lex_casestack);
445 PL_lex_casemods = 0;
446 *PL_lex_casestack = '\0';
447 PL_lex_dojoin = 0;
448 PL_lex_starts = 0;
449 PL_lex_stuff = Nullsv;
450 PL_lex_repl = Nullsv;
451 PL_lex_inpat = 0;
76be56bc 452 PL_nexttoke = 0;
3280af22 453 PL_lex_inwhat = 0;
09bef843 454 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
455 PL_linestr = line;
456 if (SvREADONLY(PL_linestr))
457 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
458 s = SvPV(PL_linestr, len);
6f27f9a7 459 if (!len || s[len-1] != ';') {
3280af22
NIS
460 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
461 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
462 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 463 }
3280af22
NIS
464 SvTEMP_off(PL_linestr);
465 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
466 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 467 PL_last_lop = PL_last_uni = Nullch;
3280af22 468 PL_rsfp = 0;
79072805 469}
a687059c 470
ffb4593c
NT
471/*
472 * Perl_lex_end
9cbb5ea2
GS
473 * Finalizer for lexing operations. Must be called when the parser is
474 * done with the lexer.
ffb4593c
NT
475 */
476
463ee0b2 477void
864dbfa3 478Perl_lex_end(pTHX)
463ee0b2 479{
3280af22 480 PL_doextract = FALSE;
463ee0b2
LW
481}
482
ffb4593c
NT
483/*
484 * S_incline
485 * This subroutine has nothing to do with tilting, whether at windmills
486 * or pinball tables. Its name is short for "increment line". It
57843af0 487 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 488 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
489 * # line 500 "foo.pm"
490 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
491 */
492
76e3520e 493STATIC void
cea2e8a9 494S_incline(pTHX_ char *s)
463ee0b2
LW
495{
496 char *t;
497 char *n;
73659bf1 498 char *e;
463ee0b2 499 char ch;
463ee0b2 500
57843af0 501 CopLINE_inc(PL_curcop);
463ee0b2
LW
502 if (*s++ != '#')
503 return;
bf4acbe4 504 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
505 if (strnEQ(s, "line", 4))
506 s += 4;
507 else
508 return;
084592ab 509 if (SPACE_OR_TAB(*s))
73659bf1 510 s++;
4e553d73 511 else
73659bf1 512 return;
bf4acbe4 513 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
514 if (!isDIGIT(*s))
515 return;
516 n = s;
517 while (isDIGIT(*s))
518 s++;
bf4acbe4 519 while (SPACE_OR_TAB(*s))
463ee0b2 520 s++;
73659bf1 521 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 522 s++;
73659bf1
GS
523 e = t + 1;
524 }
463ee0b2 525 else {
463ee0b2 526 for (t = s; !isSPACE(*t); t++) ;
73659bf1 527 e = t;
463ee0b2 528 }
bf4acbe4 529 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
530 e++;
531 if (*e != '\n' && *e != '\0')
532 return; /* false alarm */
533
463ee0b2
LW
534 ch = *t;
535 *t = '\0';
f4dd75d9 536 if (t - s > 0) {
05ec9bb3 537 CopFILE_free(PL_curcop);
57843af0 538 CopFILE_set(PL_curcop, s);
f4dd75d9 539 }
463ee0b2 540 *t = ch;
57843af0 541 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
542}
543
ffb4593c
NT
544/*
545 * S_skipspace
546 * Called to gobble the appropriate amount and type of whitespace.
547 * Skips comments as well.
548 */
549
76e3520e 550STATIC char *
cea2e8a9 551S_skipspace(pTHX_ register char *s)
a687059c 552{
3280af22 553 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 554 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
555 s++;
556 return s;
557 }
558 for (;;) {
fd049845 559 STRLEN prevlen;
09bef843 560 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 561 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
562 while (s < PL_bufend && isSPACE(*s)) {
563 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
564 incline(s);
565 }
ffb4593c
NT
566
567 /* comment */
3280af22
NIS
568 if (s < PL_bufend && *s == '#') {
569 while (s < PL_bufend && *s != '\n')
463ee0b2 570 s++;
60e6418e 571 if (s < PL_bufend) {
463ee0b2 572 s++;
60e6418e
GS
573 if (PL_in_eval && !PL_rsfp) {
574 incline(s);
575 continue;
576 }
577 }
463ee0b2 578 }
ffb4593c
NT
579
580 /* only continue to recharge the buffer if we're at the end
581 * of the buffer, we're not reading from a source filter, and
582 * we're in normal lexing mode
583 */
09bef843
SB
584 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
585 PL_lex_state == LEX_FORMLINE)
463ee0b2 586 return s;
ffb4593c
NT
587
588 /* try to recharge the buffer */
9cbb5ea2
GS
589 if ((s = filter_gets(PL_linestr, PL_rsfp,
590 (prevlen = SvCUR(PL_linestr)))) == Nullch)
591 {
592 /* end of file. Add on the -p or -n magic */
3280af22
NIS
593 if (PL_minus_n || PL_minus_p) {
594 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
595 ";}continue{print or die qq(-p destination: $!\\n)" :
596 "");
3280af22
NIS
597 sv_catpv(PL_linestr,";}");
598 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
599 }
600 else
3280af22 601 sv_setpv(PL_linestr,";");
ffb4593c
NT
602
603 /* reset variables for next time we lex */
9cbb5ea2
GS
604 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
605 = SvPVX(PL_linestr);
3280af22 606 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 607 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
608
609 /* Close the filehandle. Could be from -P preprocessor,
610 * STDIN, or a regular file. If we were reading code from
611 * STDIN (because the commandline held no -e or filename)
612 * then we don't close it, we reset it so the code can
613 * read from STDIN too.
614 */
615
3280af22
NIS
616 if (PL_preprocess && !PL_in_eval)
617 (void)PerlProc_pclose(PL_rsfp);
618 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
619 PerlIO_clearerr(PL_rsfp);
8990e307 620 else
3280af22
NIS
621 (void)PerlIO_close(PL_rsfp);
622 PL_rsfp = Nullfp;
463ee0b2
LW
623 return s;
624 }
ffb4593c
NT
625
626 /* not at end of file, so we only read another line */
09bef843
SB
627 /* make corresponding updates to old pointers, for yyerror() */
628 oldprevlen = PL_oldbufptr - PL_bufend;
629 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
630 if (PL_last_uni)
631 oldunilen = PL_last_uni - PL_bufend;
632 if (PL_last_lop)
633 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
634 PL_linestart = PL_bufptr = s + prevlen;
635 PL_bufend = s + SvCUR(PL_linestr);
636 s = PL_bufptr;
09bef843
SB
637 PL_oldbufptr = s + oldprevlen;
638 PL_oldoldbufptr = s + oldoldprevlen;
639 if (PL_last_uni)
640 PL_last_uni = s + oldunilen;
641 if (PL_last_lop)
642 PL_last_lop = s + oldloplen;
a0d0e21e 643 incline(s);
ffb4593c
NT
644
645 /* debugger active and we're not compiling the debugger code,
646 * so store the line into the debugger's array of lines
647 */
3280af22 648 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
649 SV *sv = NEWSV(85,0);
650
651 sv_upgrade(sv, SVt_PVMG);
3280af22 652 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
653 (void)SvIOK_on(sv);
654 SvIVX(sv) = 0;
57843af0 655 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 656 }
463ee0b2 657 }
a687059c 658}
378cc40b 659
ffb4593c
NT
660/*
661 * S_check_uni
662 * Check the unary operators to ensure there's no ambiguity in how they're
663 * used. An ambiguous piece of code would be:
664 * rand + 5
665 * This doesn't mean rand() + 5. Because rand() is a unary operator,
666 * the +5 is its argument.
667 */
668
76e3520e 669STATIC void
cea2e8a9 670S_check_uni(pTHX)
ba106d47 671{
2f3197b3 672 char *s;
a0d0e21e 673 char *t;
2f3197b3 674
3280af22 675 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 676 return;
3280af22
NIS
677 while (isSPACE(*PL_last_uni))
678 PL_last_uni++;
7e2040f0 679 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 680 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 681 return;
0453d815 682 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 683 char ch = *s;
0453d815 684 *s = '\0';
9014280d 685 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4e553d73 686 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
687 PL_last_uni);
688 *s = ch;
689 }
2f3197b3
LW
690}
691
ffb4593c
NT
692/*
693 * LOP : macro to build a list operator. Its behaviour has been replaced
694 * with a subroutine, S_lop() for which LOP is just another name.
695 */
696
a0d0e21e
LW
697#define LOP(f,x) return lop(f,x,s)
698
ffb4593c
NT
699/*
700 * S_lop
701 * Build a list operator (or something that might be one). The rules:
702 * - if we have a next token, then it's a list operator [why?]
703 * - if the next thing is an opening paren, then it's a function
704 * - else it's a list operator
705 */
706
76e3520e 707STATIC I32
a0be28da 708S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 709{
79072805 710 yylval.ival = f;
35c8bce7 711 CLINE;
075953c3 712 REPORT("lop", f)
3280af22
NIS
713 PL_expect = x;
714 PL_bufptr = s;
715 PL_last_lop = PL_oldbufptr;
eb160463 716 PL_last_lop_op = (OPCODE)f;
3280af22 717 if (PL_nexttoke)
a0d0e21e 718 return LSTOP;
79072805
LW
719 if (*s == '(')
720 return FUNC;
721 s = skipspace(s);
722 if (*s == '(')
723 return FUNC;
724 else
725 return LSTOP;
726}
727
ffb4593c
NT
728/*
729 * S_force_next
9cbb5ea2 730 * When the lexer realizes it knows the next token (for instance,
ffb4593c 731 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
732 * to know what token to return the next time the lexer is called. Caller
733 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
734 * handles the token correctly.
ffb4593c
NT
735 */
736
4e553d73 737STATIC void
cea2e8a9 738S_force_next(pTHX_ I32 type)
79072805 739{
3280af22
NIS
740 PL_nexttype[PL_nexttoke] = type;
741 PL_nexttoke++;
742 if (PL_lex_state != LEX_KNOWNEXT) {
743 PL_lex_defer = PL_lex_state;
744 PL_lex_expect = PL_expect;
745 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
746 }
747}
748
ffb4593c
NT
749/*
750 * S_force_word
751 * When the lexer knows the next thing is a word (for instance, it has
752 * just seen -> and it knows that the next char is a word char, then
753 * it calls S_force_word to stick the next word into the PL_next lookahead.
754 *
755 * Arguments:
b1b65b59 756 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
757 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
758 * int check_keyword : if true, Perl checks to make sure the word isn't
759 * a keyword (do this if the word is a label, e.g. goto FOO)
760 * int allow_pack : if true, : characters will also be allowed (require,
761 * use, etc. do this)
9cbb5ea2 762 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
763 */
764
76e3520e 765STATIC char *
cea2e8a9 766S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 767{
463ee0b2
LW
768 register char *s;
769 STRLEN len;
4e553d73 770
463ee0b2
LW
771 start = skipspace(start);
772 s = start;
7e2040f0 773 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 774 (allow_pack && *s == ':') ||
15f0808c 775 (allow_initial_tick && *s == '\'') )
a0d0e21e 776 {
3280af22
NIS
777 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
778 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
779 return start;
780 if (token == METHOD) {
781 s = skipspace(s);
782 if (*s == '(')
3280af22 783 PL_expect = XTERM;
463ee0b2 784 else {
3280af22 785 PL_expect = XOPERATOR;
463ee0b2 786 }
79072805 787 }
3280af22
NIS
788 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
789 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
790 force_next(token);
791 }
792 return s;
793}
794
ffb4593c
NT
795/*
796 * S_force_ident
9cbb5ea2 797 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
798 * text only contains the "foo" portion. The first argument is a pointer
799 * to the "foo", and the second argument is the type symbol to prefix.
800 * Forces the next token to be a "WORD".
9cbb5ea2 801 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
802 */
803
76e3520e 804STATIC void
cea2e8a9 805S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
806{
807 if (s && *s) {
11343788 808 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 809 PL_nextval[PL_nexttoke].opval = o;
79072805 810 force_next(WORD);
748a9306 811 if (kind) {
11343788 812 o->op_private = OPpCONST_ENTERED;
55497cff 813 /* XXX see note in pp_entereval() for why we forgo typo
814 warnings if the symbol must be introduced in an eval.
815 GSAR 96-10-12 */
3280af22 816 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
817 kind == '$' ? SVt_PV :
818 kind == '@' ? SVt_PVAV :
819 kind == '%' ? SVt_PVHV :
820 SVt_PVGV
821 );
748a9306 822 }
79072805
LW
823 }
824}
825
1571675a
GS
826NV
827Perl_str_to_version(pTHX_ SV *sv)
828{
829 NV retval = 0.0;
830 NV nshift = 1.0;
831 STRLEN len;
832 char *start = SvPVx(sv,len);
3aa33fe5 833 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
834 char *end = start + len;
835 while (start < end) {
ba210ebe 836 STRLEN skip;
1571675a
GS
837 UV n;
838 if (utf)
9041c2e3 839 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
840 else {
841 n = *(U8*)start;
842 skip = 1;
843 }
844 retval += ((NV)n)/nshift;
845 start += skip;
846 nshift *= 1000;
847 }
848 return retval;
849}
850
4e553d73 851/*
ffb4593c
NT
852 * S_force_version
853 * Forces the next token to be a version number.
e759cc13
RGS
854 * If the next token appears to be an invalid version number, (e.g. "v2b"),
855 * and if "guessing" is TRUE, then no new token is created (and the caller
856 * must use an alternative parsing method).
ffb4593c
NT
857 */
858
76e3520e 859STATIC char *
e759cc13 860S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 861{
862 OP *version = Nullop;
44dcb63b 863 char *d;
89bfa8cd 864
865 s = skipspace(s);
866
44dcb63b 867 d = s;
dd629d5b 868 if (*d == 'v')
44dcb63b 869 d++;
44dcb63b 870 if (isDIGIT(*d)) {
e759cc13
RGS
871 while (isDIGIT(*d) || *d == '_' || *d == '.')
872 d++;
9f3d182e 873 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 874 SV *ver;
b73d6f50 875 s = scan_num(s, &yylval);
89bfa8cd 876 version = yylval.opval;
dd629d5b
GS
877 ver = cSVOPx(version)->op_sv;
878 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 879 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
880 SvNVX(ver) = str_to_version(ver);
881 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 882 }
89bfa8cd 883 }
e759cc13
RGS
884 else if (guessing)
885 return s;
89bfa8cd 886 }
887
888 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 889 PL_nextval[PL_nexttoke].opval = version;
4e553d73 890 force_next(WORD);
89bfa8cd 891
e759cc13 892 return s;
89bfa8cd 893}
894
ffb4593c
NT
895/*
896 * S_tokeq
897 * Tokenize a quoted string passed in as an SV. It finds the next
898 * chunk, up to end of string or a backslash. It may make a new
899 * SV containing that chunk (if HINT_NEW_STRING is on). It also
900 * turns \\ into \.
901 */
902
76e3520e 903STATIC SV *
cea2e8a9 904S_tokeq(pTHX_ SV *sv)
79072805
LW
905{
906 register char *s;
907 register char *send;
908 register char *d;
b3ac6de7
IZ
909 STRLEN len = 0;
910 SV *pv = sv;
79072805
LW
911
912 if (!SvLEN(sv))
b3ac6de7 913 goto finish;
79072805 914
a0d0e21e 915 s = SvPV_force(sv, len);
21a311ee 916 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 917 goto finish;
463ee0b2 918 send = s + len;
79072805
LW
919 while (s < send && *s != '\\')
920 s++;
921 if (s == send)
b3ac6de7 922 goto finish;
79072805 923 d = s;
be4731d2 924 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 925 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
926 if (SvUTF8(sv))
927 SvUTF8_on(pv);
928 }
79072805
LW
929 while (s < send) {
930 if (*s == '\\') {
a0d0e21e 931 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
932 s++; /* all that, just for this */
933 }
934 *d++ = *s++;
935 }
936 *d = '\0';
463ee0b2 937 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 938 finish:
3280af22 939 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 940 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
941 return sv;
942}
943
ffb4593c
NT
944/*
945 * Now come three functions related to double-quote context,
946 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
947 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
948 * interact with PL_lex_state, and create fake ( ... ) argument lists
949 * to handle functions and concatenation.
950 * They assume that whoever calls them will be setting up a fake
951 * join call, because each subthing puts a ',' after it. This lets
952 * "lower \luPpEr"
953 * become
954 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
955 *
956 * (I'm not sure whether the spurious commas at the end of lcfirst's
957 * arguments and join's arguments are created or not).
958 */
959
960/*
961 * S_sublex_start
962 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
963 *
964 * Pattern matching will set PL_lex_op to the pattern-matching op to
965 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
966 *
967 * OP_CONST and OP_READLINE are easy--just make the new op and return.
968 *
969 * Everything else becomes a FUNC.
970 *
971 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
972 * had an OP_CONST or OP_READLINE). This just sets us up for a
973 * call to S_sublex_push().
974 */
975
76e3520e 976STATIC I32
cea2e8a9 977S_sublex_start(pTHX)
79072805
LW
978{
979 register I32 op_type = yylval.ival;
79072805
LW
980
981 if (op_type == OP_NULL) {
3280af22
NIS
982 yylval.opval = PL_lex_op;
983 PL_lex_op = Nullop;
79072805
LW
984 return THING;
985 }
986 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 987 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
988
989 if (SvTYPE(sv) == SVt_PVIV) {
990 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
991 STRLEN len;
992 char *p;
993 SV *nsv;
994
995 p = SvPV(sv, len);
79cb57f6 996 nsv = newSVpvn(p, len);
01ec43d0
GS
997 if (SvUTF8(sv))
998 SvUTF8_on(nsv);
b3ac6de7
IZ
999 SvREFCNT_dec(sv);
1000 sv = nsv;
4e553d73 1001 }
b3ac6de7 1002 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1003 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1004 /* Allow <FH> // "foo" */
1005 if (op_type == OP_READLINE)
1006 PL_expect = XTERMORDORDOR;
79072805
LW
1007 return THING;
1008 }
1009
3280af22
NIS
1010 PL_sublex_info.super_state = PL_lex_state;
1011 PL_sublex_info.sub_inwhat = op_type;
1012 PL_sublex_info.sub_op = PL_lex_op;
1013 PL_lex_state = LEX_INTERPPUSH;
55497cff 1014
3280af22
NIS
1015 PL_expect = XTERM;
1016 if (PL_lex_op) {
1017 yylval.opval = PL_lex_op;
1018 PL_lex_op = Nullop;
55497cff 1019 return PMFUNC;
1020 }
1021 else
1022 return FUNC;
1023}
1024
ffb4593c
NT
1025/*
1026 * S_sublex_push
1027 * Create a new scope to save the lexing state. The scope will be
1028 * ended in S_sublex_done. Returns a '(', starting the function arguments
1029 * to the uc, lc, etc. found before.
1030 * Sets PL_lex_state to LEX_INTERPCONCAT.
1031 */
1032
76e3520e 1033STATIC I32
cea2e8a9 1034S_sublex_push(pTHX)
55497cff 1035{
f46d017c 1036 ENTER;
55497cff 1037
3280af22
NIS
1038 PL_lex_state = PL_sublex_info.super_state;
1039 SAVEI32(PL_lex_dojoin);
1040 SAVEI32(PL_lex_brackets);
3280af22
NIS
1041 SAVEI32(PL_lex_casemods);
1042 SAVEI32(PL_lex_starts);
1043 SAVEI32(PL_lex_state);
7766f137 1044 SAVEVPTR(PL_lex_inpat);
3280af22 1045 SAVEI32(PL_lex_inwhat);
57843af0 1046 SAVECOPLINE(PL_curcop);
3280af22 1047 SAVEPPTR(PL_bufptr);
8452ff4b 1048 SAVEPPTR(PL_bufend);
3280af22
NIS
1049 SAVEPPTR(PL_oldbufptr);
1050 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1051 SAVEPPTR(PL_last_lop);
1052 SAVEPPTR(PL_last_uni);
3280af22
NIS
1053 SAVEPPTR(PL_linestart);
1054 SAVESPTR(PL_linestr);
1055 SAVEPPTR(PL_lex_brackstack);
1056 SAVEPPTR(PL_lex_casestack);
1057
1058 PL_linestr = PL_lex_stuff;
1059 PL_lex_stuff = Nullsv;
1060
9cbb5ea2
GS
1061 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1062 = SvPVX(PL_linestr);
3280af22 1063 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1064 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1065 SAVEFREESV(PL_linestr);
1066
1067 PL_lex_dojoin = FALSE;
1068 PL_lex_brackets = 0;
3280af22
NIS
1069 New(899, PL_lex_brackstack, 120, char);
1070 New(899, PL_lex_casestack, 12, char);
1071 SAVEFREEPV(PL_lex_brackstack);
1072 SAVEFREEPV(PL_lex_casestack);
1073 PL_lex_casemods = 0;
1074 *PL_lex_casestack = '\0';
1075 PL_lex_starts = 0;
1076 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1077 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1078
1079 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1080 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1081 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1082 else
3280af22 1083 PL_lex_inpat = Nullop;
79072805 1084
55497cff 1085 return '(';
79072805
LW
1086}
1087
ffb4593c
NT
1088/*
1089 * S_sublex_done
1090 * Restores lexer state after a S_sublex_push.
1091 */
1092
76e3520e 1093STATIC I32
cea2e8a9 1094S_sublex_done(pTHX)
79072805 1095{
3280af22 1096 if (!PL_lex_starts++) {
9aa983d2
JH
1097 SV *sv = newSVpvn("",0);
1098 if (SvUTF8(PL_linestr))
1099 SvUTF8_on(sv);
3280af22 1100 PL_expect = XOPERATOR;
9aa983d2 1101 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1102 return THING;
1103 }
1104
3280af22
NIS
1105 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1106 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1107 return yylex();
79072805
LW
1108 }
1109
ffb4593c 1110 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1111 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1112 PL_linestr = PL_lex_repl;
1113 PL_lex_inpat = 0;
1114 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1115 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1116 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1117 SAVEFREESV(PL_linestr);
1118 PL_lex_dojoin = FALSE;
1119 PL_lex_brackets = 0;
3280af22
NIS
1120 PL_lex_casemods = 0;
1121 *PL_lex_casestack = '\0';
1122 PL_lex_starts = 0;
25da4f38 1123 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1124 PL_lex_state = LEX_INTERPNORMAL;
1125 PL_lex_starts++;
e9fa98b2
HS
1126 /* we don't clear PL_lex_repl here, so that we can check later
1127 whether this is an evalled subst; that means we rely on the
1128 logic to ensure sublex_done() is called again only via the
1129 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1130 }
e9fa98b2 1131 else {
3280af22 1132 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1133 PL_lex_repl = Nullsv;
1134 }
79072805 1135 return ',';
ffed7fef
LW
1136 }
1137 else {
f46d017c 1138 LEAVE;
3280af22
NIS
1139 PL_bufend = SvPVX(PL_linestr);
1140 PL_bufend += SvCUR(PL_linestr);
1141 PL_expect = XOPERATOR;
09bef843 1142 PL_sublex_info.sub_inwhat = 0;
79072805 1143 return ')';
ffed7fef
LW
1144 }
1145}
1146
02aa26ce
NT
1147/*
1148 scan_const
1149
1150 Extracts a pattern, double-quoted string, or transliteration. This
1151 is terrifying code.
1152
3280af22
NIS
1153 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1154 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1155 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1156
9b599b2a
GS
1157 Returns a pointer to the character scanned up to. Iff this is
1158 advanced from the start pointer supplied (ie if anything was
1159 successfully parsed), will leave an OP for the substring scanned
1160 in yylval. Caller must intuit reason for not parsing further
1161 by looking at the next characters herself.
1162
02aa26ce
NT
1163 In patterns:
1164 backslashes:
1165 double-quoted style: \r and \n
1166 regexp special ones: \D \s
1167 constants: \x3
1168 backrefs: \1 (deprecated in substitution replacements)
1169 case and quoting: \U \Q \E
1170 stops on @ and $, but not for $ as tail anchor
1171
1172 In transliterations:
1173 characters are VERY literal, except for - not at the start or end
1174 of the string, which indicates a range. scan_const expands the
1175 range to the full set of intermediate characters.
1176
1177 In double-quoted strings:
1178 backslashes:
1179 double-quoted style: \r and \n
1180 constants: \x3
1181 backrefs: \1 (deprecated)
1182 case and quoting: \U \Q \E
1183 stops on @ and $
1184
1185 scan_const does *not* construct ops to handle interpolated strings.
1186 It stops processing as soon as it finds an embedded $ or @ variable
1187 and leaves it to the caller to work out what's going on.
1188
da6eedaa 1189 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1190
1191 $ in pattern could be $foo or could be tail anchor. Assumption:
1192 it's a tail anchor if $ is the last thing in the string, or if it's
1193 followed by one of ")| \n\t"
1194
1195 \1 (backreferences) are turned into $1
1196
1197 The structure of the code is
1198 while (there's a character to process) {
1199 handle transliteration ranges
1200 skip regexp comments
1201 skip # initiated comments in //x patterns
1202 check for embedded @foo
1203 check for embedded scalars
1204 if (backslash) {
1205 leave intact backslashes from leave (below)
1206 deprecate \1 in strings and sub replacements
1207 handle string-changing backslashes \l \U \Q \E, etc.
1208 switch (what was escaped) {
1209 handle - in a transliteration (becomes a literal -)
1210 handle \132 octal characters
1211 handle 0x15 hex characters
1212 handle \cV (control V)
1213 handle printf backslashes (\f, \r, \n, etc)
1214 } (end switch)
1215 } (end if backslash)
1216 } (end while character to read)
4e553d73 1217
02aa26ce
NT
1218*/
1219
76e3520e 1220STATIC char *
cea2e8a9 1221S_scan_const(pTHX_ char *start)
79072805 1222{
3280af22 1223 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1224 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1225 register char *s = start; /* start of the constant */
1226 register char *d = SvPVX(sv); /* destination for copies */
1227 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1228 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1229 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1230 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1231 UV uv;
1232
dff6d3cd 1233 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1234 PL_lex_inpat
4a2d328f 1235 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1236 : "";
79072805 1237
2b9d42f0
NIS
1238 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1239 /* If we are doing a trans and we know we want UTF8 set expectation */
1240 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1241 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1242 }
1243
1244
79072805 1245 while (s < send || dorange) {
02aa26ce 1246 /* get transliterations out of the way (they're most literal) */
3280af22 1247 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1248 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1249 if (dorange) {
1ba5c669
JH
1250 I32 i; /* current expanded character */
1251 I32 min; /* first character in range */
1252 I32 max; /* last character in range */
02aa26ce 1253
2b9d42f0 1254 if (has_utf8) {
8973db79
JH
1255 char *c = (char*)utf8_hop((U8*)d, -1);
1256 char *e = d++;
1257 while (e-- > c)
1258 *(e + 1) = *e;
25716404 1259 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1260 /* mark the range as done, and continue */
1261 dorange = FALSE;
1262 didrange = TRUE;
1263 continue;
1264 }
2b9d42f0 1265
02aa26ce 1266 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1267 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1268 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1269 d -= 2; /* eat the first char and the - */
1270
8ada0baa
JH
1271 min = (U8)*d; /* first char in range */
1272 max = (U8)d[1]; /* last char in range */
1273
c2e66d9e 1274 if (min > max) {
01ec43d0 1275 Perl_croak(aTHX_
d1573ac7 1276 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1277 (char)min, (char)max);
c2e66d9e
GS
1278 }
1279
c7f1f016 1280#ifdef EBCDIC
8ada0baa
JH
1281 if ((isLOWER(min) && isLOWER(max)) ||
1282 (isUPPER(min) && isUPPER(max))) {
1283 if (isLOWER(min)) {
1284 for (i = min; i <= max; i++)
1285 if (isLOWER(i))
db42d148 1286 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1287 } else {
1288 for (i = min; i <= max; i++)
1289 if (isUPPER(i))
db42d148 1290 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1291 }
1292 }
1293 else
1294#endif
1295 for (i = min; i <= max; i++)
eb160463 1296 *d++ = (char)i;
02aa26ce
NT
1297
1298 /* mark the range as done, and continue */
79072805 1299 dorange = FALSE;
01ec43d0 1300 didrange = TRUE;
79072805 1301 continue;
4e553d73 1302 }
02aa26ce
NT
1303
1304 /* range begins (ignore - as first or last char) */
79072805 1305 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1306 if (didrange) {
1fafa243 1307 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1308 }
2b9d42f0 1309 if (has_utf8) {
25716404 1310 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1311 s++;
1312 continue;
1313 }
79072805
LW
1314 dorange = TRUE;
1315 s++;
01ec43d0
GS
1316 }
1317 else {
1318 didrange = FALSE;
1319 }
79072805 1320 }
02aa26ce
NT
1321
1322 /* if we get here, we're not doing a transliteration */
1323
0f5d15d6
IZ
1324 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1325 except for the last char, which will be done separately. */
3280af22 1326 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1327 if (s[2] == '#') {
1328 while (s < send && *s != ')')
db42d148 1329 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1330 }
1331 else if (s[2] == '{' /* This should match regcomp.c */
1332 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1333 {
cc6b7395 1334 I32 count = 1;
0f5d15d6 1335 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1336 char c;
1337
d9f97599
GS
1338 while (count && (c = *regparse)) {
1339 if (c == '\\' && regparse[1])
1340 regparse++;
4e553d73 1341 else if (c == '{')
cc6b7395 1342 count++;
4e553d73 1343 else if (c == '}')
cc6b7395 1344 count--;
d9f97599 1345 regparse++;
cc6b7395 1346 }
5bdf89e7
IZ
1347 if (*regparse != ')') {
1348 regparse--; /* Leave one char for continuation. */
cc6b7395 1349 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1350 }
0f5d15d6 1351 while (s < regparse)
db42d148 1352 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1353 }
748a9306 1354 }
02aa26ce
NT
1355
1356 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1357 else if (*s == '#' && PL_lex_inpat &&
1358 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1359 while (s+1 < send && *s != '\n')
db42d148 1360 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1361 }
02aa26ce 1362
5d1d4326 1363 /* check for embedded arrays
da6eedaa 1364 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1365 */
7e2040f0 1366 else if (*s == '@' && s[1]
5d1d4326 1367 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1368 break;
02aa26ce
NT
1369
1370 /* check for embedded scalars. only stop if we're sure it's a
1371 variable.
1372 */
79072805 1373 else if (*s == '$') {
3280af22 1374 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1375 break;
6002328a 1376 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1377 break; /* in regexp, $ might be tail anchor */
1378 }
02aa26ce 1379
2b9d42f0
NIS
1380 /* End of else if chain - OP_TRANS rejoin rest */
1381
02aa26ce 1382 /* backslashes */
79072805
LW
1383 if (*s == '\\' && s+1 < send) {
1384 s++;
02aa26ce
NT
1385
1386 /* some backslashes we leave behind */
c9f97d15 1387 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1388 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1389 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1390 continue;
1391 }
02aa26ce
NT
1392
1393 /* deprecate \1 in strings and substitution replacements */
3280af22 1394 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1395 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1396 {
599cee73 1397 if (ckWARN(WARN_SYNTAX))
9014280d 1398 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1399 *--s = '$';
1400 break;
1401 }
02aa26ce
NT
1402
1403 /* string-change backslash escapes */
3280af22 1404 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1405 --s;
1406 break;
1407 }
02aa26ce
NT
1408
1409 /* if we get here, it's either a quoted -, or a digit */
79072805 1410 switch (*s) {
02aa26ce
NT
1411
1412 /* quoted - in transliterations */
79072805 1413 case '-':
3280af22 1414 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1415 *d++ = *s++;
1416 continue;
1417 }
1418 /* FALL THROUGH */
1419 default:
11b8faa4 1420 {
707afd92
MS
1421 if (ckWARN(WARN_MISC) &&
1422 isALNUM(*s) &&
1423 *s != '_')
9014280d 1424 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1425 "Unrecognized escape \\%c passed through",
1426 *s);
1427 /* default action is to copy the quoted character */
f9a63242 1428 goto default_action;
11b8faa4 1429 }
02aa26ce
NT
1430
1431 /* \132 indicates an octal constant */
79072805
LW
1432 case '0': case '1': case '2': case '3':
1433 case '4': case '5': case '6': case '7':
ba210ebe 1434 {
53305cf1
NC
1435 I32 flags = 0;
1436 STRLEN len = 3;
1437 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1438 s += len;
1439 }
012bcf8d 1440 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1441
1442 /* \x24 indicates a hex constant */
79072805 1443 case 'x':
a0ed51b3
LW
1444 ++s;
1445 if (*s == '{') {
1446 char* e = strchr(s, '}');
a4c04bdc
NC
1447 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1448 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1449 STRLEN len;
355860ce 1450
53305cf1 1451 ++s;
adaeee49 1452 if (!e) {
a0ed51b3 1453 yyerror("Missing right brace on \\x{}");
355860ce 1454 continue;
ba210ebe 1455 }
53305cf1
NC
1456 len = e - s;
1457 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1458 s = e + 1;
a0ed51b3
LW
1459 }
1460 else {
ba210ebe 1461 {
53305cf1 1462 STRLEN len = 2;
a4c04bdc 1463 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1464 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1465 s += len;
1466 }
012bcf8d
GS
1467 }
1468
1469 NUM_ESCAPE_INSERT:
1470 /* Insert oct or hex escaped character.
301d3d20 1471 * There will always enough room in sv since such
db42d148 1472 * escapes will be longer than any UTF-8 sequence
301d3d20 1473 * they can end up as. */
ba7cea30 1474
c7f1f016
NIS
1475 /* We need to map to chars to ASCII before doing the tests
1476 to cover EBCDIC
1477 */
c4d5f83a 1478 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1479 if (!has_utf8 && uv > 255) {
301d3d20
JH
1480 /* Might need to recode whatever we have
1481 * accumulated so far if it contains any
1482 * hibit chars.
1483 *
1484 * (Can't we keep track of that and avoid
1485 * this rescan? --jhi)
012bcf8d 1486 */
c7f1f016 1487 int hicount = 0;
63cd0674
NIS
1488 U8 *c;
1489 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1490 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1491 hicount++;
db42d148 1492 }
012bcf8d 1493 }
63cd0674 1494 if (hicount) {
db42d148
NIS
1495 STRLEN offset = d - SvPVX(sv);
1496 U8 *src, *dst;
1497 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1498 src = (U8 *)d - 1;
1499 dst = src+hicount;
1500 d += hicount;
1501 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1502 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1503 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1504 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1505 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1506 }
1507 else {
63cd0674 1508 *dst-- = *src;
012bcf8d 1509 }
c7f1f016 1510 src--;
012bcf8d
GS
1511 }
1512 }
1513 }
1514
9aa983d2 1515 if (has_utf8 || uv > 255) {
9041c2e3 1516 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1517 has_utf8 = TRUE;
f9a63242
JH
1518 if (PL_lex_inwhat == OP_TRANS &&
1519 PL_sublex_info.sub_op) {
1520 PL_sublex_info.sub_op->op_private |=
1521 (PL_lex_repl ? OPpTRANS_FROM_UTF
1522 : OPpTRANS_TO_UTF);
f9a63242 1523 }
012bcf8d 1524 }
a0ed51b3 1525 else {
012bcf8d 1526 *d++ = (char)uv;
a0ed51b3 1527 }
012bcf8d
GS
1528 }
1529 else {
c4d5f83a 1530 *d++ = (char) uv;
a0ed51b3 1531 }
79072805 1532 continue;
02aa26ce 1533
b239daa5 1534 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1535 case 'N':
55eda711 1536 ++s;
423cee85
JH
1537 if (*s == '{') {
1538 char* e = strchr(s, '}');
155aba94 1539 SV *res;
423cee85
JH
1540 STRLEN len;
1541 char *str;
4e553d73 1542
423cee85 1543 if (!e) {
5777a3f7 1544 yyerror("Missing right brace on \\N{}");
423cee85
JH
1545 e = s - 1;
1546 goto cont_scan;
1547 }
dbc0d4f2
JH
1548 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1549 /* \N{U+...} */
1550 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1551 PERL_SCAN_DISALLOW_PREFIX;
1552 s += 3;
1553 len = e - s;
1554 uv = grok_hex(s, &len, &flags, NULL);
1555 s = e + 1;
1556 goto NUM_ESCAPE_INSERT;
1557 }
55eda711
JH
1558 res = newSVpvn(s + 1, e - s - 1);
1559 res = new_constant( Nullch, 0, "charnames",
1560 res, Nullsv, "\\N{...}" );
f9a63242
JH
1561 if (has_utf8)
1562 sv_utf8_upgrade(res);
423cee85 1563 str = SvPV(res,len);
1c47067b
JH
1564#ifdef EBCDIC_NEVER_MIND
1565 /* charnames uses pack U and that has been
1566 * recently changed to do the below uni->native
1567 * mapping, so this would be redundant (and wrong,
1568 * the code point would be doubly converted).
1569 * But leave this in just in case the pack U change
1570 * gets revoked, but the semantics is still
1571 * desireable for charnames. --jhi */
cddc7ef4
JH
1572 {
1573 UV uv = utf8_to_uvchr((U8*)str, 0);
1574
1575 if (uv < 0x100) {
1576 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1577
1578 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1579 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1580 str = SvPV(res, len);
1581 }
1582 }
1583#endif
89491803 1584 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1585 char *ostart = SvPVX(sv);
1586 SvCUR_set(sv, d - ostart);
1587 SvPOK_on(sv);
e4f3eed8 1588 *d = '\0';
f08d6ad9 1589 sv_utf8_upgrade(sv);
d2f449dd 1590 /* this just broke our allocation above... */
eb160463 1591 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1592 d = SvPVX(sv) + SvCUR(sv);
89491803 1593 has_utf8 = TRUE;
f08d6ad9 1594 }
eb160463 1595 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1596 char *odest = SvPVX(sv);
1597
8973db79 1598 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1599 d = SvPVX(sv) + (d - odest);
1600 }
1601 Copy(str, d, len, char);
1602 d += len;
1603 SvREFCNT_dec(res);
1604 cont_scan:
1605 s = e + 1;
1606 }
1607 else
5777a3f7 1608 yyerror("Missing braces on \\N{}");
423cee85
JH
1609 continue;
1610
02aa26ce 1611 /* \c is a control character */
79072805
LW
1612 case 'c':
1613 s++;
ba210ebe
JH
1614 {
1615 U8 c = *s++;
c7f1f016
NIS
1616#ifdef EBCDIC
1617 if (isLOWER(c))
1618 c = toUPPER(c);
1619#endif
db42d148 1620 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1621 }
79072805 1622 continue;
02aa26ce
NT
1623
1624 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1625 case 'b':
db42d148 1626 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1627 break;
1628 case 'n':
db42d148 1629 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1630 break;
1631 case 'r':
db42d148 1632 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1633 break;
1634 case 'f':
db42d148 1635 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1636 break;
1637 case 't':
db42d148 1638 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1639 break;
34a3fe2a 1640 case 'e':
db42d148 1641 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1642 break;
1643 case 'a':
db42d148 1644 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1645 break;
02aa26ce
NT
1646 } /* end switch */
1647
79072805
LW
1648 s++;
1649 continue;
02aa26ce
NT
1650 } /* end if (backslash) */
1651
f9a63242 1652 default_action:
2b9d42f0
NIS
1653 /* If we started with encoded form, or already know we want it
1654 and then encode the next character */
1655 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1656 STRLEN len = 1;
1657 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1658 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1659 s += len;
1660 if (need > len) {
1661 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1662 STRLEN off = d - SvPVX(sv);
1663 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1664 }
1665 d = (char*)uvchr_to_utf8((U8*)d, uv);
1666 has_utf8 = TRUE;
1667 }
1668 else {
1669 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1670 }
02aa26ce
NT
1671 } /* while loop to process each character */
1672
1673 /* terminate the string and set up the sv */
79072805 1674 *d = '\0';
463ee0b2 1675 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1676 if (SvCUR(sv) >= SvLEN(sv))
585602fa 1677 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1678
79072805 1679 SvPOK_on(sv);
9f4817db 1680 if (PL_encoding && !has_utf8) {
799ef3cb 1681 sv_recode_to_utf8(sv, PL_encoding);
9f4817db
JH
1682 has_utf8 = TRUE;
1683 }
2b9d42f0 1684 if (has_utf8) {
7e2040f0 1685 SvUTF8_on(sv);
2b9d42f0
NIS
1686 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1687 PL_sublex_info.sub_op->op_private |=
1688 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1689 }
1690 }
79072805 1691
02aa26ce 1692 /* shrink the sv if we allocated more than we used */
79072805
LW
1693 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1694 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1695 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1696 }
02aa26ce 1697
9b599b2a 1698 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1699 if (s > PL_bufptr) {
1700 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1701 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1702 sv, Nullsv,
4e553d73 1703 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1704 ? "tr"
3280af22 1705 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1706 ? "s"
1707 : "qq")));
79072805 1708 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1709 } else
8990e307 1710 SvREFCNT_dec(sv);
79072805
LW
1711 return s;
1712}
1713
ffb4593c
NT
1714/* S_intuit_more
1715 * Returns TRUE if there's more to the expression (e.g., a subscript),
1716 * FALSE otherwise.
ffb4593c
NT
1717 *
1718 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1719 *
1720 * ->[ and ->{ return TRUE
1721 * { and [ outside a pattern are always subscripts, so return TRUE
1722 * if we're outside a pattern and it's not { or [, then return FALSE
1723 * if we're in a pattern and the first char is a {
1724 * {4,5} (any digits around the comma) returns FALSE
1725 * if we're in a pattern and the first char is a [
1726 * [] returns FALSE
1727 * [SOMETHING] has a funky algorithm to decide whether it's a
1728 * character class or not. It has to deal with things like
1729 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1730 * anything else returns TRUE
1731 */
1732
9cbb5ea2
GS
1733/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1734
76e3520e 1735STATIC int
cea2e8a9 1736S_intuit_more(pTHX_ register char *s)
79072805 1737{
3280af22 1738 if (PL_lex_brackets)
79072805
LW
1739 return TRUE;
1740 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1741 return TRUE;
1742 if (*s != '{' && *s != '[')
1743 return FALSE;
3280af22 1744 if (!PL_lex_inpat)
79072805
LW
1745 return TRUE;
1746
1747 /* In a pattern, so maybe we have {n,m}. */
1748 if (*s == '{') {
1749 s++;
1750 if (!isDIGIT(*s))
1751 return TRUE;
1752 while (isDIGIT(*s))
1753 s++;
1754 if (*s == ',')
1755 s++;
1756 while (isDIGIT(*s))
1757 s++;
1758 if (*s == '}')
1759 return FALSE;
1760 return TRUE;
1761
1762 }
1763
1764 /* On the other hand, maybe we have a character class */
1765
1766 s++;
1767 if (*s == ']' || *s == '^')
1768 return FALSE;
1769 else {
ffb4593c 1770 /* this is terrifying, and it works */
79072805
LW
1771 int weight = 2; /* let's weigh the evidence */
1772 char seen[256];
f27ffc4a 1773 unsigned char un_char = 255, last_un_char;
93a17b20 1774 char *send = strchr(s,']');
3280af22 1775 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1776
1777 if (!send) /* has to be an expression */
1778 return TRUE;
1779
1780 Zero(seen,256,char);
1781 if (*s == '$')
1782 weight -= 3;
1783 else if (isDIGIT(*s)) {
1784 if (s[1] != ']') {
1785 if (isDIGIT(s[1]) && s[2] == ']')
1786 weight -= 10;
1787 }
1788 else
1789 weight -= 100;
1790 }
1791 for (; s < send; s++) {
1792 last_un_char = un_char;
1793 un_char = (unsigned char)*s;
1794 switch (*s) {
1795 case '@':
1796 case '&':
1797 case '$':
1798 weight -= seen[un_char] * 10;
7e2040f0 1799 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1800 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1801 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1802 weight -= 100;
1803 else
1804 weight -= 10;
1805 }
1806 else if (*s == '$' && s[1] &&
93a17b20
LW
1807 strchr("[#!%*<>()-=",s[1])) {
1808 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1809 weight -= 10;
1810 else
1811 weight -= 1;
1812 }
1813 break;
1814 case '\\':
1815 un_char = 254;
1816 if (s[1]) {
93a17b20 1817 if (strchr("wds]",s[1]))
79072805
LW
1818 weight += 100;
1819 else if (seen['\''] || seen['"'])
1820 weight += 1;
93a17b20 1821 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1822 weight += 40;
1823 else if (isDIGIT(s[1])) {
1824 weight += 40;
1825 while (s[1] && isDIGIT(s[1]))
1826 s++;
1827 }
1828 }
1829 else
1830 weight += 100;
1831 break;
1832 case '-':
1833 if (s[1] == '\\')
1834 weight += 50;
93a17b20 1835 if (strchr("aA01! ",last_un_char))
79072805 1836 weight += 30;
93a17b20 1837 if (strchr("zZ79~",s[1]))
79072805 1838 weight += 30;
f27ffc4a
GS
1839 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1840 weight -= 5; /* cope with negative subscript */
79072805
LW
1841 break;
1842 default:
93a17b20 1843 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1844 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1845 char *d = tmpbuf;
1846 while (isALPHA(*s))
1847 *d++ = *s++;
1848 *d = '\0';
1849 if (keyword(tmpbuf, d - tmpbuf))
1850 weight -= 150;
1851 }
1852 if (un_char == last_un_char + 1)
1853 weight += 5;
1854 weight -= seen[un_char];
1855 break;
1856 }
1857 seen[un_char]++;
1858 }
1859 if (weight >= 0) /* probably a character class */
1860 return FALSE;
1861 }
1862
1863 return TRUE;
1864}
ffed7fef 1865
ffb4593c
NT
1866/*
1867 * S_intuit_method
1868 *
1869 * Does all the checking to disambiguate
1870 * foo bar
1871 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1872 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1873 *
1874 * First argument is the stuff after the first token, e.g. "bar".
1875 *
1876 * Not a method if bar is a filehandle.
1877 * Not a method if foo is a subroutine prototyped to take a filehandle.
1878 * Not a method if it's really "Foo $bar"
1879 * Method if it's "foo $bar"
1880 * Not a method if it's really "print foo $bar"
1881 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 1882 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 1883 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1884 * =>
1885 */
1886
76e3520e 1887STATIC int
cea2e8a9 1888S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1889{
1890 char *s = start + (*start == '$');
3280af22 1891 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1892 STRLEN len;
1893 GV* indirgv;
1894
1895 if (gv) {
b6c543e3 1896 CV *cv;
a0d0e21e
LW
1897 if (GvIO(gv))
1898 return 0;
b6c543e3
IZ
1899 if ((cv = GvCVu(gv))) {
1900 char *proto = SvPVX(cv);
1901 if (proto) {
1902 if (*proto == ';')
1903 proto++;
1904 if (*proto == '*')
1905 return 0;
1906 }
1907 } else
a0d0e21e
LW
1908 gv = 0;
1909 }
8903cb82 1910 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1911 /* start is the beginning of the possible filehandle/object,
1912 * and s is the end of it
1913 * tmpbuf is a copy of it
1914 */
1915
a0d0e21e 1916 if (*start == '$') {
3280af22 1917 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1918 return 0;
1919 s = skipspace(s);
3280af22
NIS
1920 PL_bufptr = start;
1921 PL_expect = XREF;
a0d0e21e
LW
1922 return *s == '(' ? FUNCMETH : METHOD;
1923 }
1924 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1925 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1926 len -= 2;
1927 tmpbuf[len] = '\0';
1928 goto bare_package;
1929 }
1930 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1931 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1932 return 0;
1933 /* filehandle or package name makes it a method */
89bfa8cd 1934 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1935 s = skipspace(s);
3280af22 1936 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1937 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1938 bare_package:
3280af22 1939 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1940 newSVpvn(tmpbuf,len));
3280af22
NIS
1941 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1942 PL_expect = XTERM;
a0d0e21e 1943 force_next(WORD);
3280af22 1944 PL_bufptr = s;
a0d0e21e
LW
1945 return *s == '(' ? FUNCMETH : METHOD;
1946 }
1947 }
1948 return 0;
1949}
1950
ffb4593c
NT
1951/*
1952 * S_incl_perldb
1953 * Return a string of Perl code to load the debugger. If PERL5DB
1954 * is set, it will return the contents of that, otherwise a
1955 * compile-time require of perl5db.pl.
1956 */
1957
76e3520e 1958STATIC char*
cea2e8a9 1959S_incl_perldb(pTHX)
a0d0e21e 1960{
3280af22 1961 if (PL_perldb) {
76e3520e 1962 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1963
1964 if (pdb)
1965 return pdb;
93189314 1966 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
1967 return "BEGIN { require 'perl5db.pl' }";
1968 }
1969 return "";
1970}
1971
1972
16d20bd9 1973/* Encoded script support. filter_add() effectively inserts a
4e553d73 1974 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1975 * Note that the filter function only applies to the current source file
1976 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1977 *
1978 * The datasv parameter (which may be NULL) can be used to pass
1979 * private data to this instance of the filter. The filter function
1980 * can recover the SV using the FILTER_DATA macro and use it to
1981 * store private buffers and state information.
1982 *
1983 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1984 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1985 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1986 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1987 * private use must be set using malloc'd pointers.
1988 */
16d20bd9
AD
1989
1990SV *
864dbfa3 1991Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1992{
f4c556ac
GS
1993 if (!funcp)
1994 return Nullsv;
1995
3280af22
NIS
1996 if (!PL_rsfp_filters)
1997 PL_rsfp_filters = newAV();
16d20bd9 1998 if (!datasv)
8c52afec 1999 datasv = NEWSV(255,0);
16d20bd9 2000 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2001 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2002 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2003 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2004 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2005 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2006 av_unshift(PL_rsfp_filters, 1);
2007 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2008 return(datasv);
2009}
4e553d73 2010
16d20bd9
AD
2011
2012/* Delete most recently added instance of this filter function. */
a0d0e21e 2013void
864dbfa3 2014Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2015{
e0c19803 2016 SV *datasv;
fe5a182c 2017 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2018 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2019 return;
2020 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2021 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2022 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2023 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2024 IoANY(datasv) = (void *)NULL;
3280af22 2025 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2026
16d20bd9
AD
2027 return;
2028 }
2029 /* we need to search for the correct entry and clear it */
cea2e8a9 2030 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2031}
2032
2033
2034/* Invoke the n'th filter function for the current rsfp. */
2035I32
864dbfa3 2036Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
2037
2038
8ac85365 2039 /* 0 = read one text line */
a0d0e21e 2040{
16d20bd9
AD
2041 filter_t funcp;
2042 SV *datasv = NULL;
e50aee73 2043
3280af22 2044 if (!PL_rsfp_filters)
16d20bd9 2045 return -1;
3280af22 2046 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2047 /* Provide a default input filter to make life easy. */
2048 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2049 DEBUG_P(PerlIO_printf(Perl_debug_log,
2050 "filter_read %d: from rsfp\n", idx));
4e553d73 2051 if (maxlen) {
16d20bd9
AD
2052 /* Want a block */
2053 int len ;
2054 int old_len = SvCUR(buf_sv) ;
2055
2056 /* ensure buf_sv is large enough */
eb160463 2057 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2058 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2059 if (PerlIO_error(PL_rsfp))
37120919
AD
2060 return -1; /* error */
2061 else
2062 return 0 ; /* end of file */
2063 }
16d20bd9
AD
2064 SvCUR_set(buf_sv, old_len + len) ;
2065 } else {
2066 /* Want a line */
3280af22
NIS
2067 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2068 if (PerlIO_error(PL_rsfp))
37120919
AD
2069 return -1; /* error */
2070 else
2071 return 0 ; /* end of file */
2072 }
16d20bd9
AD
2073 }
2074 return SvCUR(buf_sv);
2075 }
2076 /* Skip this filter slot if filter has been deleted */
3280af22 2077 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2078 DEBUG_P(PerlIO_printf(Perl_debug_log,
2079 "filter_read %d: skipped (filter deleted)\n",
2080 idx));
16d20bd9
AD
2081 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2082 }
2083 /* Get function pointer hidden within datasv */
4755096e 2084 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2085 DEBUG_P(PerlIO_printf(Perl_debug_log,
2086 "filter_read %d: via function %p (%s)\n",
fe5a182c 2087 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2088 /* Call function. The function is expected to */
2089 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2090 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2091 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2092}
2093
76e3520e 2094STATIC char *
cea2e8a9 2095S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2096{
c39cd008 2097#ifdef PERL_CR_FILTER
3280af22 2098 if (!PL_rsfp_filters) {
c39cd008 2099 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2100 }
2101#endif
3280af22 2102 if (PL_rsfp_filters) {
16d20bd9 2103
55497cff 2104 if (!append)
2105 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2106 if (FILTER_READ(0, sv, 0) > 0)
2107 return ( SvPVX(sv) ) ;
2108 else
2109 return Nullch ;
2110 }
9d116dd7 2111 else
fd049845 2112 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2113}
2114
01ec43d0
GS
2115STATIC HV *
2116S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2117{
2118 GV *gv;
2119
01ec43d0 2120 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2121 return PL_curstash;
2122
2123 if (len > 2 &&
2124 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2125 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2126 {
2127 return GvHV(gv); /* Foo:: */
def3634b
GS
2128 }
2129
2130 /* use constant CLASS => 'MyClass' */
2131 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2132 SV *sv;
2133 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2134 pkgname = SvPV_nolen(sv);
2135 }
2136 }
2137
2138 return gv_stashpv(pkgname, FALSE);
2139}
a0d0e21e 2140
748a9306
LW
2141#ifdef DEBUGGING
2142 static char* exp_name[] =
09bef843
SB
2143 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2144 "ATTRTERM", "TERMBLOCK"
2145 };
748a9306 2146#endif
463ee0b2 2147
02aa26ce
NT
2148/*
2149 yylex
2150
2151 Works out what to call the token just pulled out of the input
2152 stream. The yacc parser takes care of taking the ops we return and
2153 stitching them into a tree.
2154
2155 Returns:
2156 PRIVATEREF
2157
2158 Structure:
2159 if read an identifier
2160 if we're in a my declaration
2161 croak if they tried to say my($foo::bar)
2162 build the ops for a my() declaration
2163 if it's an access to a my() variable
2164 are we in a sort block?
2165 croak if my($a); $a <=> $b
2166 build ops for access to a my() variable
2167 if in a dq string, and they've said @foo and we can't find @foo
2168 croak
2169 build ops for a bareword
2170 if we already built the token before, use it.
2171*/
2172
dba4d153 2173#ifdef USE_PURE_BISON
864dbfa3 2174int
dba4d153 2175Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2176{
20141f0e
IRC
2177 int r;
2178
6f202aea 2179 yyactlevel++;
20141f0e
IRC
2180 yylval_pointer[yyactlevel] = lvalp;
2181 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
IRC
2182 if (yyactlevel >= YYMAXLEVEL)
2183 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2184
dba4d153 2185 r = Perl_yylex(aTHX);
20141f0e 2186
d8ae6756
IRC
2187 if (yyactlevel > 0)
2188 yyactlevel--;
20141f0e
IRC
2189
2190 return r;
2191}
dba4d153 2192#endif
20141f0e 2193
dba4d153
JH
2194#ifdef __SC__
2195#pragma segment Perl_yylex
2196#endif
dba4d153 2197int
dba4d153 2198Perl_yylex(pTHX)
20141f0e 2199{
79072805 2200 register char *s;
378cc40b 2201 register char *d;
79072805 2202 register I32 tmp;
463ee0b2 2203 STRLEN len;
161b471a
NIS
2204 GV *gv = Nullgv;
2205 GV **gvp = 0;
aa7440fb 2206 bool bof = FALSE;
a687059c 2207
02aa26ce 2208 /* check if there's an identifier for us to look at */
ba979b31 2209 if (PL_pending_ident)
e930465f 2210 return S_pending_ident(aTHX);
bbce6d69 2211
02aa26ce
NT
2212 /* no identifier pending identification */
2213
3280af22 2214 switch (PL_lex_state) {
79072805
LW
2215#ifdef COMMENTARY
2216 case LEX_NORMAL: /* Some compilers will produce faster */
2217 case LEX_INTERPNORMAL: /* code if we comment these out. */
2218 break;
2219#endif
2220
09bef843 2221 /* when we've already built the next token, just pull it out of the queue */
79072805 2222 case LEX_KNOWNEXT:
3280af22
NIS
2223 PL_nexttoke--;
2224 yylval = PL_nextval[PL_nexttoke];
2225 if (!PL_nexttoke) {
2226 PL_lex_state = PL_lex_defer;
2227 PL_expect = PL_lex_expect;
2228 PL_lex_defer = LEX_NORMAL;
463ee0b2 2229 }
607df283 2230 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2231 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2232 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2233
3280af22 2234 return(PL_nexttype[PL_nexttoke]);
79072805 2235
02aa26ce 2236 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2237 when we get here, PL_bufptr is at the \
02aa26ce 2238 */
79072805
LW
2239 case LEX_INTERPCASEMOD:
2240#ifdef DEBUGGING
3280af22 2241 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2242 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2243#endif
02aa26ce 2244 /* handle \E or end of string */
3280af22 2245 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2246 char oldmod;
02aa26ce
NT
2247
2248 /* if at a \E */
3280af22
NIS
2249 if (PL_lex_casemods) {
2250 oldmod = PL_lex_casestack[--PL_lex_casemods];
2251 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2252
3280af22
NIS
2253 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2254 PL_bufptr += 2;
2255 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2256 }
79072805
LW
2257 return ')';
2258 }
3280af22
NIS
2259 if (PL_bufptr != PL_bufend)
2260 PL_bufptr += 2;
2261 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2262 return yylex();
79072805
LW
2263 }
2264 else {
607df283 2265 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2266 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2267 s = PL_bufptr + 1;
79072805 2268 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
eb160463 2269 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
a0d0e21e 2270 if (strchr("LU", *s) &&
3280af22 2271 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2272 {
3280af22 2273 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2274 return ')';
2275 }
3280af22
NIS
2276 if (PL_lex_casemods > 10) {
2277 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2278 if (newlb != PL_lex_casestack) {
a0d0e21e 2279 SAVEFREEPV(newlb);
3280af22 2280 PL_lex_casestack = newlb;
a0d0e21e
LW
2281 }
2282 }
3280af22
NIS
2283 PL_lex_casestack[PL_lex_casemods++] = *s;
2284 PL_lex_casestack[PL_lex_casemods] = '\0';
2285 PL_lex_state = LEX_INTERPCONCAT;
2286 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2287 force_next('(');
2288 if (*s == 'l')
3280af22 2289 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2290 else if (*s == 'u')
3280af22 2291 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2292 else if (*s == 'L')
3280af22 2293 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2294 else if (*s == 'U')
3280af22 2295 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2296 else if (*s == 'Q')
3280af22 2297 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2298 else
cea2e8a9 2299 Perl_croak(aTHX_ "panic: yylex");
3280af22 2300 PL_bufptr = s + 1;
79072805 2301 force_next(FUNC);
3280af22
NIS
2302 if (PL_lex_starts) {
2303 s = PL_bufptr;
2304 PL_lex_starts = 0;
79072805
LW
2305 Aop(OP_CONCAT);
2306 }
2307 else
cea2e8a9 2308 return yylex();
79072805
LW
2309 }
2310
55497cff 2311 case LEX_INTERPPUSH:
2312 return sublex_push();
2313
79072805 2314 case LEX_INTERPSTART:
3280af22 2315 if (PL_bufptr == PL_bufend)
79072805 2316 return sublex_done();
607df283 2317 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2318 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2319 PL_expect = XTERM;
2320 PL_lex_dojoin = (*PL_bufptr == '@');
2321 PL_lex_state = LEX_INTERPNORMAL;
2322 if (PL_lex_dojoin) {
2323 PL_nextval[PL_nexttoke].ival = 0;
79072805 2324 force_next(',');
a0d0e21e 2325 force_ident("\"", '$');
3280af22 2326 PL_nextval[PL_nexttoke].ival = 0;
79072805 2327 force_next('$');
3280af22 2328 PL_nextval[PL_nexttoke].ival = 0;
79072805 2329 force_next('(');
3280af22 2330 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2331 force_next(FUNC);
2332 }
3280af22
NIS
2333 if (PL_lex_starts++) {
2334 s = PL_bufptr;
79072805
LW
2335 Aop(OP_CONCAT);
2336 }
cea2e8a9 2337 return yylex();
79072805
LW
2338
2339 case LEX_INTERPENDMAYBE:
3280af22
NIS
2340 if (intuit_more(PL_bufptr)) {
2341 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2342 break;
2343 }
2344 /* FALL THROUGH */
2345
2346 case LEX_INTERPEND:
3280af22
NIS
2347 if (PL_lex_dojoin) {
2348 PL_lex_dojoin = FALSE;
2349 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2350 return ')';
2351 }
43a16006 2352 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2353 && SvEVALED(PL_lex_repl))
43a16006 2354 {
e9fa98b2 2355 if (PL_bufptr != PL_bufend)
cea2e8a9 2356 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2357 PL_lex_repl = Nullsv;
2358 }
79072805
LW
2359 /* FALLTHROUGH */
2360 case LEX_INTERPCONCAT:
2361#ifdef DEBUGGING
3280af22 2362 if (PL_lex_brackets)
cea2e8a9 2363 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2364#endif
3280af22 2365 if (PL_bufptr == PL_bufend)
79072805
LW
2366 return sublex_done();
2367
3280af22
NIS
2368 if (SvIVX(PL_linestr) == '\'') {
2369 SV *sv = newSVsv(PL_linestr);
2370 if (!PL_lex_inpat)
76e3520e 2371 sv = tokeq(sv);
3280af22 2372 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2373 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2374 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2375 s = PL_bufend;
79072805
LW
2376 }
2377 else {
3280af22 2378 s = scan_const(PL_bufptr);
79072805 2379 if (*s == '\\')
3280af22 2380 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2381 else
3280af22 2382 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2383 }
2384
3280af22
NIS
2385 if (s != PL_bufptr) {
2386 PL_nextval[PL_nexttoke] = yylval;
2387 PL_expect = XTERM;
79072805 2388 force_next(THING);
3280af22 2389 if (PL_lex_starts++)
79072805
LW
2390 Aop(OP_CONCAT);
2391 else {
3280af22 2392 PL_bufptr = s;
cea2e8a9 2393 return yylex();
79072805
LW
2394 }
2395 }
2396
cea2e8a9 2397 return yylex();
a0d0e21e 2398 case LEX_FORMLINE:
3280af22
NIS
2399 PL_lex_state = LEX_NORMAL;
2400 s = scan_formline(PL_bufptr);
2401 if (!PL_lex_formbrack)
a0d0e21e
LW
2402 goto rightbracket;
2403 OPERATOR(';');
79072805
LW
2404 }
2405
3280af22
NIS
2406 s = PL_bufptr;
2407 PL_oldoldbufptr = PL_oldbufptr;
2408 PL_oldbufptr = s;
607df283 2409 DEBUG_T( {
bf49b057
GS
2410 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2411 exp_name[PL_expect], s);
5f80b19c 2412 } );
463ee0b2
LW
2413
2414 retry:
378cc40b
LW
2415 switch (*s) {
2416 default:
7e2040f0 2417 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2418 goto keylookup;
cea2e8a9 2419 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2420 case 4:
2421 case 26:
2422 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2423 case 0:
3280af22
NIS
2424 if (!PL_rsfp) {
2425 PL_last_uni = 0;
2426 PL_last_lop = 0;
2427 if (PL_lex_brackets)
d98d5fff 2428 yyerror("Missing right curly or square bracket");
4e553d73 2429 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2430 "### Tokener got EOF\n");
5f80b19c 2431 } );
79072805 2432 TOKEN(0);
463ee0b2 2433 }
3280af22 2434 if (s++ < PL_bufend)
a687059c 2435 goto retry; /* ignore stray nulls */
3280af22
NIS
2436 PL_last_uni = 0;
2437 PL_last_lop = 0;
2438 if (!PL_in_eval && !PL_preambled) {
2439 PL_preambled = TRUE;
2440 sv_setpv(PL_linestr,incl_perldb());
2441 if (SvCUR(PL_linestr))
2442 sv_catpv(PL_linestr,";");
2443 if (PL_preambleav){
2444 while(AvFILLp(PL_preambleav) >= 0) {
2445 SV *tmpsv = av_shift(PL_preambleav);
2446 sv_catsv(PL_linestr, tmpsv);
2447 sv_catpv(PL_linestr, ";");
91b7def8 2448 sv_free(tmpsv);
2449 }
3280af22
NIS
2450 sv_free((SV*)PL_preambleav);
2451 PL_preambleav = NULL;
91b7def8 2452 }
3280af22
NIS
2453 if (PL_minus_n || PL_minus_p) {
2454 sv_catpv(PL_linestr, "LINE: while (<>) {");
2455 if (PL_minus_l)
2456 sv_catpv(PL_linestr,"chomp;");
2457 if (PL_minus_a) {
3280af22
NIS
2458 if (PL_minus_F) {
2459 if (strchr("/'\"", *PL_splitstr)
2460 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2461 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2462 else {
2463 char delim;
2464 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2465 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2466 delim = *s;
75c72d73 2467 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2468 "q" + (delim == '\''), delim);
3280af22 2469 for (s = PL_splitstr; *s; s++) {
54310121 2470 if (*s == '\\')
3280af22
NIS
2471 sv_catpvn(PL_linestr, "\\", 1);
2472 sv_catpvn(PL_linestr, s, 1);
54310121 2473 }
cea2e8a9 2474 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2475 }
2304df62
AD
2476 }
2477 else
75c72d73 2478 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2479 }
79072805 2480 }
3280af22
NIS
2481 sv_catpv(PL_linestr, "\n");
2482 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2483 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2484 PL_last_lop = PL_last_uni = Nullch;
3280af22 2485 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2486 SV *sv = NEWSV(85,0);
2487
2488 sv_upgrade(sv, SVt_PVMG);
3280af22 2489 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2490 (void)SvIOK_on(sv);
2491 SvIVX(sv) = 0;
57843af0 2492 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2493 }
79072805 2494 goto retry;
a687059c 2495 }
e929a76b 2496 do {
aa7440fb 2497 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2498 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2499 fake_eof:
2500 if (PL_rsfp) {
2501 if (PL_preprocess && !PL_in_eval)
2502 (void)PerlProc_pclose(PL_rsfp);
2503 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2504 PerlIO_clearerr(PL_rsfp);
2505 else
2506 (void)PerlIO_close(PL_rsfp);
2507 PL_rsfp = Nullfp;
2508 PL_doextract = FALSE;
2509 }
2510 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2511 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2512 sv_catpv(PL_linestr,";}");
2513 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2514 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2515 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2516 PL_minus_n = PL_minus_p = 0;
2517 goto retry;
2518 }
2519 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2520 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2521 sv_setpv(PL_linestr,"");
2522 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2523 }
2524 /* if it looks like the start of a BOM, check if it in fact is */
2525 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2526#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2527# ifdef __GNU_LIBRARY__
2528# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2529# define FTELL_FOR_PIPE_IS_BROKEN
2530# endif
e3f494f1
JH
2531# else
2532# ifdef __GLIBC__
2533# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2534# define FTELL_FOR_PIPE_IS_BROKEN
2535# endif
2536# endif
226017aa
DD
2537# endif
2538#endif
2539#ifdef FTELL_FOR_PIPE_IS_BROKEN
2540 /* This loses the possibility to detect the bof
2541 * situation on perl -P when the libc5 is being used.
2542 * Workaround? Maybe attach some extra state to PL_rsfp?
2543 */
2544 if (!PL_preprocess)
7e28d3af 2545 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2546#else
eb160463 2547 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2548#endif
7e28d3af 2549 if (bof) {
3280af22 2550 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2551 s = swallow_bom((U8*)s);
e929a76b 2552 }
378cc40b 2553 }
3280af22 2554 if (PL_doextract) {
a0d0e21e
LW
2555 /* Incest with pod. */
2556 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2557 sv_setpv(PL_linestr, "");
2558 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2559 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2560 PL_last_lop = PL_last_uni = Nullch;
3280af22 2561 PL_doextract = FALSE;
a0d0e21e 2562 }
4e553d73 2563 }
463ee0b2 2564 incline(s);
3280af22
NIS
2565 } while (PL_doextract);
2566 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2567 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2568 SV *sv = NEWSV(85,0);
a687059c 2569
93a17b20 2570 sv_upgrade(sv, SVt_PVMG);
3280af22 2571 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2572 (void)SvIOK_on(sv);
2573 SvIVX(sv) = 0;
57843af0 2574 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2575 }
3280af22 2576 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2577 PL_last_lop = PL_last_uni = Nullch;
57843af0 2578 if (CopLINE(PL_curcop) == 1) {
3280af22 2579 while (s < PL_bufend && isSPACE(*s))
79072805 2580 s++;
a0d0e21e 2581 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2582 s++;
44a8e56a 2583 d = Nullch;
3280af22 2584 if (!PL_in_eval) {
44a8e56a 2585 if (*s == '#' && *(s+1) == '!')
2586 d = s + 2;
2587#ifdef ALTERNATE_SHEBANG
2588 else {
2589 static char as[] = ALTERNATE_SHEBANG;
2590 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2591 d = s + (sizeof(as) - 1);
2592 }
2593#endif /* ALTERNATE_SHEBANG */
2594 }
2595 if (d) {
b8378b72 2596 char *ipath;
774d564b 2597 char *ipathend;
b8378b72 2598
774d564b 2599 while (isSPACE(*d))
b8378b72
CS
2600 d++;
2601 ipath = d;
774d564b 2602 while (*d && !isSPACE(*d))
2603 d++;
2604 ipathend = d;
2605
2606#ifdef ARG_ZERO_IS_SCRIPT
2607 if (ipathend > ipath) {
2608 /*
2609 * HP-UX (at least) sets argv[0] to the script name,
2610 * which makes $^X incorrect. And Digital UNIX and Linux,
2611 * at least, set argv[0] to the basename of the Perl
2612 * interpreter. So, having found "#!", we'll set it right.
2613 */
ee2f7564 2614 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2615 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2616 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2617 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2618 SvSETMAGIC(x);
2619 }
556c1dec
JH
2620 else {
2621 STRLEN blen;
2622 STRLEN llen;
2623 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2624 char *lstart = SvPV(x,llen);
2625 if (llen < blen) {
2626 bstart += blen - llen;
2627 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2628 sv_setpvn(x, ipath, ipathend - ipath);
2629 SvSETMAGIC(x);
2630 }
2631 }
2632 }
774d564b 2633 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2634 }
774d564b 2635#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2636
2637 /*
2638 * Look for options.
2639 */
748a9306 2640 d = instr(s,"perl -");
84e30d1a 2641 if (!d) {
748a9306 2642 d = instr(s,"perl");
84e30d1a
GS
2643#if defined(DOSISH)
2644 /* avoid getting into infinite loops when shebang
2645 * line contains "Perl" rather than "perl" */
2646 if (!d) {
2647 for (d = ipathend-4; d >= ipath; --d) {
2648 if ((*d == 'p' || *d == 'P')
2649 && !ibcmp(d, "perl", 4))
2650 {
2651 break;
2652 }
2653 }
2654 if (d < ipath)
2655 d = Nullch;
2656 }
2657#endif
2658 }
44a8e56a 2659#ifdef ALTERNATE_SHEBANG
2660 /*
2661 * If the ALTERNATE_SHEBANG on this system starts with a
2662 * character that can be part of a Perl expression, then if
2663 * we see it but not "perl", we're probably looking at the
2664 * start of Perl code, not a request to hand off to some
2665 * other interpreter. Similarly, if "perl" is there, but
2666 * not in the first 'word' of the line, we assume the line
2667 * contains the start of the Perl program.
44a8e56a 2668 */
2669 if (d && *s != '#') {
774d564b 2670 char *c = ipath;
44a8e56a 2671 while (*c && !strchr("; \t\r\n\f\v#", *c))
2672 c++;
2673 if (c < d)
2674 d = Nullch; /* "perl" not in first word; ignore */
2675 else
2676 *s = '#'; /* Don't try to parse shebang line */
2677 }
774d564b 2678#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2679#ifndef MACOS_TRADITIONAL
748a9306 2680 if (!d &&
44a8e56a 2681 *s == '#' &&
774d564b 2682 ipathend > ipath &&
3280af22 2683 !PL_minus_c &&
748a9306 2684 !instr(s,"indir") &&
3280af22 2685 instr(PL_origargv[0],"perl"))
748a9306 2686 {
9f68db38 2687 char **newargv;
9f68db38 2688
774d564b 2689 *ipathend = '\0';
2690 s = ipathend + 1;
3280af22 2691 while (s < PL_bufend && isSPACE(*s))
9f68db38 2692 s++;
3280af22
NIS
2693 if (s < PL_bufend) {
2694 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2695 newargv[1] = s;
3280af22 2696 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2697 s++;
2698 *s = '\0';
3280af22 2699 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2700 }
2701 else
3280af22 2702 newargv = PL_origargv;
774d564b 2703 newargv[0] = ipath;
b4748376 2704 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2705 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2706 }
bf4acbe4 2707#endif
748a9306 2708 if (d) {
3280af22
NIS
2709 U32 oldpdb = PL_perldb;
2710 bool oldn = PL_minus_n;
2711 bool oldp = PL_minus_p;
748a9306
LW
2712
2713 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2714 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2715
2716 if (*d++ == '-') {
a11ec5a9 2717 bool switches_done = PL_doswitches;
8cc95fdb 2718 do {
2719 if (*d == 'M' || *d == 'm') {
2720 char *m = d;
2721 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2722 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2723 (int)(d - m), m);
2724 }
2725 d = moreswitches(d);
2726 } while (d);
155aba94
GS
2727 if ((PERLDB_LINE && !oldpdb) ||
2728 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2729 /* if we have already added "LINE: while (<>) {",
2730 we must not do it again */
748a9306 2731 {
3280af22
NIS
2732 sv_setpv(PL_linestr, "");
2733 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2734 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2735 PL_last_lop = PL_last_uni = Nullch;
3280af22 2736 PL_preambled = FALSE;
84902520 2737 if (PERLDB_LINE)
3280af22 2738 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2739 goto retry;
2740 }
a11ec5a9
RGS
2741 if (PL_doswitches && !switches_done) {
2742 int argc = PL_origargc;
2743 char **argv = PL_origargv;
2744 do {
2745 argc--,argv++;
2746 } while (argc && argv[0][0] == '-' && argv[0][1]);
2747 init_argv_symbols(argc,argv);
2748 }
a0d0e21e 2749 }
79072805 2750 }
9f68db38 2751 }
79072805 2752 }
3280af22
NIS
2753 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2754 PL_bufptr = s;
2755 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2756 return yylex();
ae986130 2757 }
378cc40b 2758 goto retry;
4fdae800 2759 case '\r':
6a27c188 2760#ifdef PERL_STRICT_CR
cea2e8a9 2761 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2762 Perl_croak(aTHX_
cc507455 2763 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2764#endif
4fdae800 2765 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2766#ifdef MACOS_TRADITIONAL
2767 case '\312':
2768#endif
378cc40b
LW
2769 s++;
2770 goto retry;
378cc40b 2771 case '#':
e929a76b 2772 case '\n':
3280af22 2773 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2774 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2775 /* handle eval qq[#line 1 "foo"\n ...] */
2776 CopLINE_dec(PL_curcop);
2777 incline(s);
2778 }
3280af22 2779 d = PL_bufend;
a687059c 2780 while (s < d && *s != '\n')
378cc40b 2781 s++;
0f85fab0 2782 if (s < d)
378cc40b 2783 s++;
78c267c1 2784 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2785 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2786 incline(s);
3280af22
NIS
2787 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2788 PL_bufptr = s;
2789 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2790 return yylex();
a687059c 2791 }
378cc40b 2792 }
a687059c 2793 else {
378cc40b 2794 *s = '\0';
3280af22 2795 PL_bufend = s;
a687059c 2796 }
378cc40b
LW
2797 goto retry;
2798 case '-':
79072805 2799 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2800 I32 ftst = 0;
2801
378cc40b 2802 s++;
3280af22 2803 PL_bufptr = s;
748a9306
LW
2804 tmp = *s++;
2805
bf4acbe4 2806 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2807 s++;
2808
2809 if (strnEQ(s,"=>",2)) {
3280af22 2810 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2811 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2812 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2813 } );
748a9306
LW
2814 OPERATOR('-'); /* unary minus */
2815 }
3280af22 2816 PL_last_uni = PL_oldbufptr;
748a9306 2817 switch (tmp) {
e5edeb50
JH
2818 case 'r': ftst = OP_FTEREAD; break;
2819 case 'w': ftst = OP_FTEWRITE; break;
2820 case 'x': ftst = OP_FTEEXEC; break;
2821 case 'o': ftst = OP_FTEOWNED; break;
2822 case 'R': ftst = OP_FTRREAD; break;
2823 case 'W': ftst = OP_FTRWRITE; break;
2824 case 'X': ftst = OP_FTREXEC; break;
2825 case 'O': ftst = OP_FTROWNED; break;
2826 case 'e': ftst = OP_FTIS; break;
2827 case 'z': ftst = OP_FTZERO; break;
2828 case 's': ftst = OP_FTSIZE; break;
2829 case 'f': ftst = OP_FTFILE; break;
2830 case 'd': ftst = OP_FTDIR; break;
2831 case 'l': ftst = OP_FTLINK; break;
2832 case 'p': ftst = OP_FTPIPE; break;
2833 case 'S': ftst = OP_FTSOCK; break;
2834 case 'u': ftst = OP_FTSUID; break;
2835 case 'g': ftst = OP_FTSGID; break;
2836 case 'k': ftst = OP_FTSVTX; break;
2837 case 'b': ftst = OP_FTBLK; break;
2838 case 'c': ftst = OP_FTCHR; break;
2839 case 't': ftst = OP_FTTTY; break;
2840 case 'T': ftst = OP_FTTEXT; break;
2841 case 'B': ftst = OP_FTBINARY; break;
2842 case 'M': case 'A': case 'C':
2843 gv_fetchpv("\024",TRUE, SVt_PV);
2844 switch (tmp) {
2845 case 'M': ftst = OP_FTMTIME; break;
2846 case 'A': ftst = OP_FTATIME; break;
2847 case 'C': ftst = OP_FTCTIME; break;
2848 default: break;
2849 }
2850 break;
378cc40b 2851 default:
378cc40b
LW
2852 break;
2853 }
e5edeb50 2854 if (ftst) {
eb160463 2855 PL_last_lop_op = (OPCODE)ftst;
4e553d73 2856 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2857 "### Saw file test %c\n", (int)ftst);
5f80b19c 2858 } );
e5edeb50
JH
2859 FTST(ftst);
2860 }
2861 else {
2862 /* Assume it was a minus followed by a one-letter named
2863 * subroutine call (or a -bareword), then. */
95c31fe3 2864 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2865 "### %c looked like a file test but was not\n",
2866 (int)ftst);
5f80b19c 2867 } );
e5edeb50
JH
2868 s -= 2;
2869 }
378cc40b 2870 }
a687059c
LW
2871 tmp = *s++;
2872 if (*s == tmp) {
2873 s++;
3280af22 2874 if (PL_expect == XOPERATOR)
79072805
LW
2875 TERM(POSTDEC);
2876 else
2877 OPERATOR(PREDEC);
2878 }
2879 else if (*s == '>') {
2880 s++;
2881 s = skipspace(s);
7e2040f0 2882 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2883 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2884 TOKEN(ARROW);
79072805 2885 }
748a9306
LW
2886 else if (*s == '$')
2887 OPERATOR(ARROW);
463ee0b2 2888 else
748a9306 2889 TERM(ARROW);
a687059c 2890 }
3280af22 2891 if (PL_expect == XOPERATOR)
79072805
LW
2892 Aop(OP_SUBTRACT);
2893 else {
3280af22 2894 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2895 check_uni();
79072805 2896 OPERATOR('-'); /* unary minus */
2f3197b3 2897 }
79072805 2898
378cc40b 2899 case '+':
a687059c
LW
2900 tmp = *s++;
2901 if (*s == tmp) {
378cc40b 2902 s++;
3280af22 2903 if (PL_expect == XOPERATOR)
79072805
LW
2904 TERM(POSTINC);
2905 else
2906 OPERATOR(PREINC);
378cc40b 2907 }
3280af22 2908 if (PL_expect == XOPERATOR)
79072805
LW
2909 Aop(OP_ADD);
2910 else {
3280af22 2911 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2912 check_uni();
a687059c 2913 OPERATOR('+');
2f3197b3 2914 }
a687059c 2915
378cc40b 2916 case '*':
3280af22
NIS
2917 if (PL_expect != XOPERATOR) {
2918 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2919 PL_expect = XOPERATOR;
2920 force_ident(PL_tokenbuf, '*');
2921 if (!*PL_tokenbuf)
a0d0e21e 2922 PREREF('*');
79072805 2923 TERM('*');
a687059c 2924 }
79072805
LW
2925 s++;
2926 if (*s == '*') {
a687059c 2927 s++;
79072805 2928 PWop(OP_POW);
a687059c 2929 }
79072805
LW
2930 Mop(OP_MULTIPLY);
2931
378cc40b 2932 case '%':
3280af22 2933 if (PL_expect == XOPERATOR) {
bbce6d69 2934 ++s;
2935 Mop(OP_MODULO);
a687059c 2936 }
3280af22
NIS
2937 PL_tokenbuf[0] = '%';
2938 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2939 if (!PL_tokenbuf[1]) {
2940 if (s == PL_bufend)
bbce6d69 2941 yyerror("Final % should be \\% or %name");
2942 PREREF('%');
a687059c 2943 }
3280af22 2944 PL_pending_ident = '%';
bbce6d69 2945 TERM('%');
a687059c 2946
378cc40b 2947 case '^':
79072805 2948 s++;
a0d0e21e 2949 BOop(OP_BIT_XOR);
79072805 2950 case '[':
3280af22 2951 PL_lex_brackets++;
79072805 2952 /* FALL THROUGH */
378cc40b 2953 case '~':
378cc40b 2954 case ',':
378cc40b
LW
2955 tmp = *s++;
2956 OPERATOR(tmp);
a0d0e21e
LW
2957 case ':':
2958 if (s[1] == ':') {
2959 len = 0;
2960 goto just_a_word;
2961 }
2962 s++;
09bef843
SB
2963 switch (PL_expect) {
2964 OP *attrs;
2965 case XOPERATOR:
2966 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2967 break;
2968 PL_bufptr = s; /* update in case we back off */
2969 goto grabattrs;
2970 case XATTRBLOCK:
2971 PL_expect = XBLOCK;
2972 goto grabattrs;
2973 case XATTRTERM:
2974 PL_expect = XTERMBLOCK;
2975 grabattrs:
2976 s = skipspace(s);
2977 attrs = Nullop;
7e2040f0 2978 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2979 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2980 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2981 if (tmp < 0) tmp = -tmp;
2982 switch (tmp) {
2983 case KEY_or:
2984 case KEY_and:
c963b151 2985 case KEY_err:
f9829d6b
GS
2986 case KEY_for:
2987 case KEY_unless:
2988 case KEY_if:
2989 case KEY_while:
2990 case KEY_until:
2991 goto got_attrs;
2992 default:
2993 break;
2994 }
2995 }
09bef843
SB
2996 if (*d == '(') {
2997 d = scan_str(d,TRUE,TRUE);
2998 if (!d) {
09bef843
SB
2999 /* MUST advance bufptr here to avoid bogus
3000 "at end of line" context messages from yyerror().
3001 */
3002 PL_bufptr = s + len;
3003 yyerror("Unterminated attribute parameter in attribute list");
3004 if (attrs)
3005 op_free(attrs);
3006 return 0; /* EOF indicator */
3007 }
3008 }
3009 if (PL_lex_stuff) {
3010 SV *sv = newSVpvn(s, len);
3011 sv_catsv(sv, PL_lex_stuff);
3012 attrs = append_elem(OP_LIST, attrs,
3013 newSVOP(OP_CONST, 0, sv));
3014 SvREFCNT_dec(PL_lex_stuff);
3015 PL_lex_stuff = Nullsv;
3016 }
3017 else {
d3cea301
SB
3018 /* NOTE: any CV attrs applied here need to be part of
3019 the CVf_BUILTIN_ATTRS define in cv.h! */
78f9721b
SM
3020 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3021 CvLVALUE_on(PL_compcv);
3022 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3023 CvLOCKED_on(PL_compcv);
3024 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3025 CvMETHOD_on(PL_compcv);
87ecf892 3026#ifdef USE_ITHREADS
d3cea301
SB
3027 else if (PL_in_my == KEY_our && len == 6 &&
3028 strnEQ(s, "unique", len))
7fb37951 3029 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
87ecf892 3030#endif
78f9721b
SM
3031 /* After we've set the flags, it could be argued that
3032 we don't need to do the attributes.pm-based setting
3033 process, and shouldn't bother appending recognized
d3cea301
SB
3034 flags. To experiment with that, uncomment the
3035 following "else". (Note that's already been
3036 uncommented. That keeps the above-applied built-in
3037 attributes from being intercepted (and possibly
3038 rejected) by a package's attribute routines, but is
3039 justified by the performance win for the common case
3040 of applying only built-in attributes.) */
0256094b 3041 else
78f9721b
SM
3042 attrs = append_elem(OP_LIST, attrs,
3043 newSVOP(OP_CONST, 0,
3044 newSVpvn(s, len)));
09bef843
SB
3045 }
3046 s = skipspace(d);
0120eecf 3047 if (*s == ':' && s[1] != ':')
09bef843 3048 s = skipspace(s+1);
0120eecf
GS
3049 else if (s == d)
3050 break; /* require real whitespace or :'s */
09bef843 3051 }
f9829d6b 3052 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3053 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3054 char q = ((*s == '\'') ? '"' : '\'');
3055 /* If here for an expression, and parsed no attrs, back off. */
3056 if (tmp == '=' && !attrs) {
3057 s = PL_bufptr;
3058 break;
3059 }
3060 /* MUST advance bufptr here to avoid bogus "at end of line"
3061 context messages from yyerror().
3062 */
3063 PL_bufptr = s;
3064 if (!*s)
3065 yyerror("Unterminated attribute list");
3066 else
3067 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3068 q, *s, q));
3069 if (attrs)
3070 op_free(attrs);
3071 OPERATOR(':');
3072 }
f9829d6b 3073 got_attrs:
09bef843
SB
3074 if (attrs) {
3075 PL_nextval[PL_nexttoke].opval = attrs;
3076 force_next(THING);
3077 }
3078 TOKEN(COLONATTR);
3079 }
a0d0e21e 3080 OPERATOR(':');
8990e307
LW
3081 case '(':
3082 s++;
3280af22
NIS
3083 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3084 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3085 else
3280af22 3086 PL_expect = XTERM;
a0d0e21e 3087 TOKEN('(');
378cc40b 3088 case ';':
f4dd75d9 3089 CLINE;
378cc40b
LW
3090 tmp = *s++;
3091 OPERATOR(tmp);
3092 case ')':
378cc40b 3093 tmp = *s++;
16d20bd9
AD
3094 s = skipspace(s);
3095 if (*s == '{')
3096 PREBLOCK(tmp);
378cc40b 3097 TERM(tmp);
79072805
LW
3098 case ']':
3099 s++;
3280af22 3100 if (PL_lex_brackets <= 0)
d98d5fff 3101 yyerror("Unmatched right square bracket");
463ee0b2 3102 else
3280af22
NIS
3103 --PL_lex_brackets;
3104 if (PL_lex_state == LEX_INTERPNORMAL) {
3105 if (PL_lex_brackets == 0) {
a0d0e21e 3106 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3107 PL_lex_state = LEX_INTERPEND;
79072805
LW
3108 }
3109 }
4633a7c4 3110 TERM(']');
79072805
LW
3111 case '{':
3112 leftbracket:
79072805 3113 s++;
3280af22
NIS
3114 if (PL_lex_brackets > 100) {
3115 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3116 if (newlb != PL_lex_brackstack) {
8990e307 3117 SAVEFREEPV(newlb);
3280af22 3118 PL_lex_brackstack = newlb;
8990e307
LW
3119 }
3120 }
3280af22 3121 switch (PL_expect) {
a0d0e21e 3122 case XTERM:
3280af22 3123 if (PL_lex_formbrack) {
a0d0e21e
LW
3124 s--;
3125 PRETERMBLOCK(DO);
3126 }
3280af22
NIS
3127 if (PL_oldoldbufptr == PL_last_lop)
3128 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3129 else
3280af22 3130 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3131 OPERATOR(HASHBRACK);
a0d0e21e 3132 case XOPERATOR:
bf4acbe4 3133 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3134 s++;
44a8e56a 3135 d = s;
3280af22
NIS
3136 PL_tokenbuf[0] = '\0';
3137 if (d < PL_bufend && *d == '-') {
3138 PL_tokenbuf[0] = '-';
44a8e56a 3139 d++;
bf4acbe4 3140 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3141 d++;
3142 }
7e2040f0 3143 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3144 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3145 FALSE, &len);
bf4acbe4 3146 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3147 d++;
3148 if (*d == '}') {
3280af22 3149 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3150 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3151 if (minus)
3152 force_next('-');
748a9306
LW
3153 }
3154 }
3155 /* FALL THROUGH */
09bef843 3156 case XATTRBLOCK:
748a9306 3157 case XBLOCK:
3280af22
NIS
3158 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3159 PL_expect = XSTATE;
a0d0e21e 3160 break;
09bef843 3161 case XATTRTERM:
a0d0e21e 3162 case XTERMBLOCK:
3280af22
NIS
3163 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3164 PL_expect = XSTATE;
a0d0e21e
LW
3165 break;
3166 default: {
3167 char *t;
3280af22
NIS
3168 if (PL_oldoldbufptr == PL_last_lop)
3169 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3170 else
3280af22 3171 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3172 s = skipspace(s);
8452ff4b
SB
3173 if (*s == '}') {
3174 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3175 PL_expect = XTERM;
3176 /* This hack is to get the ${} in the message. */
3177 PL_bufptr = s+1;
3178 yyerror("syntax error");
3179 break;
3180 }
a0d0e21e 3181 OPERATOR(HASHBRACK);
8452ff4b 3182 }
b8a4b1be
GS
3183 /* This hack serves to disambiguate a pair of curlies
3184 * as being a block or an anon hash. Normally, expectation
3185 * determines that, but in cases where we're not in a
3186 * position to expect anything in particular (like inside
3187 * eval"") we have to resolve the ambiguity. This code
3188 * covers the case where the first term in the curlies is a
3189 * quoted string. Most other cases need to be explicitly
3190 * disambiguated by prepending a `+' before the opening
3191 * curly in order to force resolution as an anon hash.
3192 *
3193 * XXX should probably propagate the outer expectation
3194 * into eval"" to rely less on this hack, but that could
3195 * potentially break current behavior of eval"".
3196 * GSAR 97-07-21
3197 */
3198 t = s;
3199 if (*s == '\'' || *s == '"' || *s == '`') {
3200 /* common case: get past first string, handling escapes */
3280af22 3201 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3202 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3203 t++;
3204 t++;
a0d0e21e 3205 }
b8a4b1be 3206 else if (*s == 'q') {
3280af22 3207 if (++t < PL_bufend
b8a4b1be 3208 && (!isALNUM(*t)
3280af22 3209 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3210 && !isALNUM(*t))))
3211 {
b8a4b1be
GS
3212 char *tmps;
3213 char open, close, term;
3214 I32 brackets = 1;
3215
3280af22 3216 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3217 t++;
3218 term = *t;
3219 open = term;
3220 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3221 term = tmps[5];
3222 close = term;
3223 if (open == close)
3280af22
NIS
3224 for (t++; t < PL_bufend; t++) {
3225 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3226 t++;
6d07e5e9 3227 else if (*t == open)
b8a4b1be
GS
3228 break;
3229 }
3230 else
3280af22
NIS
3231 for (t++; t < PL_bufend; t++) {
3232 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3233 t++;
6d07e5e9 3234 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3235 break;
3236 else if (*t == open)
3237 brackets++;
3238 }
3239 }
3240 t++;
a0d0e21e 3241 }
7e2040f0 3242 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3243 t += UTF8SKIP(t);
7e2040f0 3244 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3245 t += UTF8SKIP(t);
a0d0e21e 3246 }
3280af22 3247 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3248 t++;
b8a4b1be
GS
3249 /* if comma follows first term, call it an anon hash */
3250 /* XXX it could be a comma expression with loop modifiers */
3280af22 3251 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3252 || (*t == '=' && t[1] == '>')))
a0d0e21e 3253 OPERATOR(HASHBRACK);
3280af22 3254 if (PL_expect == XREF)
4e4e412b 3255 PL_expect = XTERM;
a0d0e21e 3256 else {
3280af22
NIS
3257 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3258 PL_expect = XSTATE;
a0d0e21e 3259 }
8990e307 3260 }
a0d0e21e 3261 break;
463ee0b2 3262 }
57843af0 3263 yylval.ival = CopLINE(PL_curcop);
79072805 3264 if (isSPACE(*s) || *s == '#')
3280af22 3265 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3266 TOKEN('{');
378cc40b 3267 case '}':
79072805
LW
3268 rightbracket:
3269 s++;
3280af22 3270 if (PL_lex_brackets <= 0)
d98d5fff 3271 yyerror("Unmatched right curly bracket");
463ee0b2 3272 else
3280af22 3273 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3274 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3275 PL_lex_formbrack = 0;
3276 if (PL_lex_state == LEX_INTERPNORMAL) {
3277 if (PL_lex_brackets == 0) {
9059aa12
LW
3278 if (PL_expect & XFAKEBRACK) {
3279 PL_expect &= XENUMMASK;
3280af22
NIS
3280 PL_lex_state = LEX_INTERPEND;
3281 PL_bufptr = s;
cea2e8a9 3282 return yylex(); /* ignore fake brackets */
79072805 3283 }
fa83b5b6 3284 if (*s == '-' && s[1] == '>')
3280af22 3285 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3286 else if (*s != '[' && *s != '{')
3280af22 3287 PL_lex_state = LEX_INTERPEND;
79072805
LW
3288 }
3289 }
9059aa12
LW
3290 if (PL_expect & XFAKEBRACK) {
3291 PL_expect &= XENUMMASK;
3280af22 3292 PL_bufptr = s;
cea2e8a9 3293 return yylex(); /* ignore fake brackets */
748a9306 3294 }
79072805
LW
3295 force_next('}');
3296 TOKEN(';');
378cc40b
LW
3297 case '&':
3298 s++;
3299 tmp = *s++;
3300 if (tmp == '&')
a0d0e21e 3301 AOPERATOR(ANDAND);
378cc40b 3302 s--;
3280af22 3303 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3304 if (ckWARN(WARN_SEMICOLON)
3305 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3306 {
57843af0 3307 CopLINE_dec(PL_curcop);
9014280d 3308 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3309 CopLINE_inc(PL_curcop);
463ee0b2 3310 }
79072805 3311 BAop(OP_BIT_AND);
463ee0b2 3312 }
79072805 3313
3280af22
NIS
3314 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3315 if (*PL_tokenbuf) {
3316 PL_expect = XOPERATOR;
3317 force_ident(PL_tokenbuf, '&');
463ee0b2 3318 }
79072805
LW
3319 else
3320 PREREF('&');
c07a80fd 3321 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3322 TERM('&');
3323
378cc40b
LW
3324 case '|':
3325 s++;
3326 tmp = *s++;
3327 if (tmp == '|')
a0d0e21e 3328 AOPERATOR(OROR);
378cc40b 3329 s--;
79072805 3330 BOop(OP_BIT_OR);
378cc40b
LW
3331 case '=':
3332 s++;
3333 tmp = *s++;
3334 if (tmp == '=')
79072805
LW
3335 Eop(OP_EQ);
3336 if (tmp == '>')
3337 OPERATOR(',');
378cc40b 3338 if (tmp == '~')
79072805 3339 PMop(OP_MATCH);
599cee73 3340 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3341 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3342 s--;
3280af22
NIS
3343 if (PL_expect == XSTATE && isALPHA(tmp) &&
3344 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3345 {
3280af22
NIS
3346 if (PL_in_eval && !PL_rsfp) {
3347 d = PL_bufend;
a5f75d66
AD
3348 while (s < d) {
3349 if (*s++ == '\n') {
3350 incline(s);
3351 if (strnEQ(s,"=cut",4)) {
3352 s = strchr(s,'\n');
3353 if (s)
3354 s++;
3355 else
3356 s = d;
3357 incline(s);
3358 goto retry;
3359 }
3360 }
3361 }
3362 goto retry;
3363 }
3280af22
NIS
3364 s = PL_bufend;
3365 PL_doextract = TRUE;
a0d0e21e
LW
3366 goto retry;
3367 }
3280af22 3368 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3369 char *t;
51882d45 3370#ifdef PERL_STRICT_CR
bf4acbe4 3371 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3372#else
bf4acbe4 3373 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3374#endif
a0d0e21e
LW
3375 if (*t == '\n' || *t == '#') {
3376 s--;
3280af22 3377 PL_expect = XBLOCK;
a0d0e21e
LW
3378 goto leftbracket;
3379 }
79072805 3380 }
a0d0e21e
LW
3381 yylval.ival = 0;
3382 OPERATOR(ASSIGNOP);
378cc40b
LW
3383 case '!':
3384 s++;
3385 tmp = *s++;
3386 if (tmp == '=')
79072805 3387 Eop(OP_NE);
378cc40b 3388 if (tmp == '~')
79072805 3389 PMop(OP_NOT);
378cc40b
LW
3390 s--;
3391 OPERATOR('!');
3392 case '<':
3280af22 3393 if (PL_expect != XOPERATOR) {
93a17b20 3394 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3395 check_uni();
79072805
LW
3396 if (s[1] == '<')
3397 s = scan_heredoc(s);
3398 else
3399 s = scan_inputsymbol(s);
3400 TERM(sublex_start());
378cc40b
LW
3401 }
3402 s++;
3403 tmp = *s++;
3404 if (tmp == '<')
79072805 3405 SHop(OP_LEFT_SHIFT);
395c3793
LW
3406 if (tmp == '=') {
3407 tmp = *s++;
3408 if (tmp == '>')
79072805 3409 Eop(OP_NCMP);
395c3793 3410 s--;
79072805 3411 Rop(OP_LE);
395c3793 3412 }
378cc40b 3413 s--;
79072805 3414 Rop(OP_LT);
378cc40b
LW
3415 case '>':
3416 s++;
3417 tmp = *s++;
3418 if (tmp == '>')
79072805 3419 SHop(OP_RIGHT_SHIFT);
378cc40b 3420 if (tmp == '=')
79072805 3421 Rop(OP_GE);
378cc40b 3422 s--;
79072805 3423 Rop(OP_GT);
378cc40b
LW
3424
3425 case '$':
bbce6d69 3426 CLINE;
3427
3280af22
NIS
3428 if (PL_expect == XOPERATOR) {
3429 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3430 PL_expect = XTERM;
a0d0e21e 3431 depcom();
bbce6d69 3432 return ','; /* grandfather non-comma-format format */
a0d0e21e 3433 }
8990e307 3434 }
a0d0e21e 3435
7e2040f0 3436 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3437 PL_tokenbuf[0] = '@';
376b8730
SM
3438 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3439 sizeof PL_tokenbuf - 1, FALSE);
3440 if (PL_expect == XOPERATOR)
3441 no_op("Array length", s);
3280af22 3442 if (!PL_tokenbuf[1])
a0d0e21e 3443 PREREF(DOLSHARP);
3280af22
NIS
3444 PL_expect = XOPERATOR;
3445 PL_pending_ident = '#';
463ee0b2 3446 TOKEN(DOLSHARP);
79072805 3447 }
bbce6d69 3448
3280af22 3449 PL_tokenbuf[0] = '$';
376b8730
SM
3450 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3451 sizeof PL_tokenbuf - 1, FALSE);
3452 if (PL_expect == XOPERATOR)
3453 no_op("Scalar", s);
3280af22
NIS
3454 if (!PL_tokenbuf[1]) {
3455 if (s == PL_bufend)
bbce6d69 3456 yyerror("Final $ should be \\$ or $name");
3457 PREREF('$');
8990e307 3458 }
a0d0e21e 3459
bbce6d69 3460 /* This kludge not intended to be bulletproof. */
3280af22 3461 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3462 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3463 newSViv(PL_compiling.cop_arybase));
bbce6d69 3464 yylval.opval->op_private = OPpCONST_ARYBASE;
3465 TERM(THING);
3466 }
3467
ff68c719 3468 d = s;
69d2bceb 3469 tmp = (I32)*s;
3280af22 3470 if (PL_lex_state == LEX_NORMAL)
ff68c719 3471 s = skipspace(s);
3472
3280af22 3473 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3474 char *t;
3475 if (*s == '[') {
3280af22 3476 PL_tokenbuf[0] = '@';
599cee73 3477 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3478 for(t = s + 1;
7e2040f0 3479 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3480 t++) ;
a0d0e21e 3481 if (*t++ == ',') {
3280af22
NIS
3482 PL_bufptr = skipspace(PL_bufptr);
3483 while (t < PL_bufend && *t != ']')
bbce6d69 3484 t++;
9014280d 3485 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73
PM
3486 "Multidimensional syntax %.*s not supported",
3487 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3488 }
3489 }
bbce6d69 3490 }
3491 else if (*s == '{') {
3280af22 3492 PL_tokenbuf[0] = '%';
599cee73 3493 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3494 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3495 {
3280af22 3496 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3497 STRLEN len;
3498 for (t++; isSPACE(*t); t++) ;
7e2040f0 3499 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3500 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3501 for (; isSPACE(*t); t++) ;
864dbfa3 3502 if (*t == ';' && get_cv(tmpbuf, FALSE))
9014280d 3503 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 3504 "You need to quote \"%s\"", tmpbuf);
748a9306 3505 }
93a17b20
LW
3506 }
3507 }
2f3197b3 3508 }