This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Implement "my $_".
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
371fce9b 4 * 2000, 2001, 2002, 2003, 2004, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
d3b6f988
GS
26#define yychar PL_yychar
27#define yylval PL_yylval
28
fc36a67e 29static char ident_too_long[] = "Identifier too long";
4ac733c9 30static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 31static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 32
acfe0abc 33static void restore_rsfp(pTHX_ void *f);
6e3aabd6 34#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
35static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
36static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 37#endif
51371543 38
9059aa12
LW
39#define XFAKEBRACK 128
40#define XENUMMASK 127
41
39e02b42
JH
42#ifdef USE_UTF8_SCRIPTS
43# define UTF (!IN_BYTES)
2b9d42f0 44#else
746b446a 45# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 46#endif
a0ed51b3 47
61f0cdd9 48/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
49 * 1999-02-27 mjd-perl-patch@plover.com */
50#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51
bf4acbe4
GS
52/* On MacOS, respect nonbreaking spaces */
53#ifdef MACOS_TRADITIONAL
54#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55#else
56#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
57#endif
58
ffb4593c
NT
59/* LEX_* are values for PL_lex_state, the state of the lexer.
60 * They are arranged oddly so that the guard on the switch statement
79072805
LW
61 * can get by with a single comparison (if the compiler is smart enough).
62 */
63
fb73857a
PP
64/* #define LEX_NOTPARSING 11 is done in perl.h. */
65
55497cff
PP
66#define LEX_NORMAL 10
67#define LEX_INTERPNORMAL 9
68#define LEX_INTERPCASEMOD 8
69#define LEX_INTERPPUSH 7
70#define LEX_INTERPSTART 6
71#define LEX_INTERPEND 5
72#define LEX_INTERPENDMAYBE 4
73#define LEX_INTERPCONCAT 3
74#define LEX_INTERPCONST 2
75#define LEX_FORMLINE 1
76#define LEX_KNOWNEXT 0
79072805 77
79072805
LW
78#ifdef ff_next
79#undef ff_next
d48672a2
LW
80#endif
81
a1a0e61e 82#ifdef USE_PURE_BISON
dba4d153
JH
83# ifndef YYMAXLEVEL
84# define YYMAXLEVEL 100
85# endif
20141f0e
RI
86YYSTYPE* yylval_pointer[YYMAXLEVEL];
87int* yychar_pointer[YYMAXLEVEL];
6f202aea 88int yyactlevel = -1;
22c35a8c
GS
89# undef yylval
90# undef yychar
20141f0e
RI
91# define yylval (*yylval_pointer[yyactlevel])
92# define yychar (*yychar_pointer[yyactlevel])
93# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 94# undef yylex
dba4d153 95# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
96#endif
97
79072805 98#include "keywords.h"
fe14fcc3 99
ffb4593c
NT
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
ae986130
LW
102#ifdef CLINE
103#undef CLINE
104#endif
57843af0 105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 106
ffb4593c
NT
107/*
108 * Convenience functions to return different tokens and prime the
9cbb5ea2 109 * lexer for the next token. They all take an argument.
ffb4593c
NT
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
2d2e263d 121 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
9cbb5ea2 126 * PMop : pattern-matching operator
ffb4593c
NT
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
e5edeb50 130 * Rop : relational operator <= != gt
ffb4593c
NT
131 *
132 * Also see LOP and lop() below.
133 */
134
075953c3
JH
135/* Note that REPORT() and REPORT2() will be expressions that supply
136 * their own trailing comma, not suitable for statements as such. */
998054bd 137#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
138# define REPORT(x,retval) tokereport(x,s,(int)retval),
139# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 140#else
075953c3
JH
141# define REPORT(x,retval)
142# define REPORT2(x,retval)
998054bd
SC
143#endif
144
075953c3
JH
145#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
146#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
147#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
148#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
149#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
150#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
151#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
152#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
6f33ba73 153#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERMORDORDOR,PL_bufptr = s,(int)UNIOP)
075953c3
JH
154#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
155#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
156#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
157#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
158#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
159#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
160#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
161#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
162#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
163#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
164#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 165
a687059c
LW
166/* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
a687059c 170 */
6f33ba73 171#define UNI2(f,x) return(yylval.ival = f, \
075953c3 172 REPORT("uni",f) \
6f33ba73 173 PL_expect = x, \
3280af22
NIS
174 PL_bufptr = s, \
175 PL_last_uni = PL_oldbufptr, \
176 PL_last_lop_op = f, \
a687059c 177 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
6f33ba73
RGS
178#define UNI(f) UNI2(f,XTERM)
179#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 180
79072805 181#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 182 REPORT("uni",f) \
3280af22
NIS
183 PL_bufptr = s, \
184 PL_last_uni = PL_oldbufptr, \
79072805
LW
185 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
186
9f68db38 187/* grandfather return to old style */
3280af22 188#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 189
8fa7f367
JH
190#ifdef DEBUGGING
191
2d00ba3b 192STATIC void
61b2116b 193S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 194{
998054bd 195 DEBUG_T({
9c5ffd7c 196 SV* report = newSVpv(thing, 0);
29b291f7
RB
197 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
198 (IV)rv);
998054bd
SC
199
200 if (s - PL_bufptr > 0)
201 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
202 else {
203 if (PL_oldbufptr && *PL_oldbufptr)
204 sv_catpv(report, PL_tokenbuf);
205 }
206 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
5f80b19c 207 });
998054bd
SC
208}
209
8fa7f367
JH
210#endif
211
ffb4593c
NT
212/*
213 * S_ao
214 *
c963b151
BD
215 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
216 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
217 */
218
76e3520e 219STATIC int
cea2e8a9 220S_ao(pTHX_ int toketype)
a0d0e21e 221{
3280af22
NIS
222 if (*PL_bufptr == '=') {
223 PL_bufptr++;
a0d0e21e
LW
224 if (toketype == ANDAND)
225 yylval.ival = OP_ANDASSIGN;
226 else if (toketype == OROR)
227 yylval.ival = OP_ORASSIGN;
c963b151
BD
228 else if (toketype == DORDOR)
229 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
230 toketype = ASSIGNOP;
231 }
232 return toketype;
233}
234
ffb4593c
NT
235/*
236 * S_no_op
237 * When Perl expects an operator and finds something else, no_op
238 * prints the warning. It always prints "<something> found where
239 * operator expected. It prints "Missing semicolon on previous line?"
240 * if the surprise occurs at the start of the line. "do you need to
241 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
242 * where the compiler doesn't know if foo is a method call or a function.
243 * It prints "Missing operator before end of line" if there's nothing
244 * after the missing operator, or "... before <...>" if there is something
245 * after the missing operator.
246 */
247
76e3520e 248STATIC void
cea2e8a9 249S_no_op(pTHX_ char *what, char *s)
463ee0b2 250{
3280af22
NIS
251 char *oldbp = PL_bufptr;
252 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 253
1189a94a
GS
254 if (!s)
255 s = oldbp;
07c798fb 256 else
1189a94a 257 PL_bufptr = s;
cea2e8a9 258 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
259 if (ckWARN_d(WARN_SYNTAX)) {
260 if (is_first)
261 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
262 "\t(Missing semicolon on previous line?)\n");
263 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
264 char *t;
265 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
266 if (t < PL_bufptr && isSPACE(*t))
267 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
268 "\t(Do you need to predeclare %.*s?)\n",
269 t - PL_oldoldbufptr, PL_oldoldbufptr);
270 }
271 else {
272 assert(s >= oldbp);
273 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
274 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
275 }
07c798fb 276 }
3280af22 277 PL_bufptr = oldbp;
8990e307
LW
278}
279
ffb4593c
NT
280/*
281 * S_missingterm
282 * Complain about missing quote/regexp/heredoc terminator.
283 * If it's called with (char *)NULL then it cauterizes the line buffer.
284 * If we're in a delimited string and the delimiter is a control
285 * character, it's reformatted into a two-char sequence like ^C.
286 * This is fatal.
287 */
288
76e3520e 289STATIC void
cea2e8a9 290S_missingterm(pTHX_ char *s)
8990e307
LW
291{
292 char tmpbuf[3];
293 char q;
294 if (s) {
295 char *nl = strrchr(s,'\n');
d2719217 296 if (nl)
8990e307
LW
297 *nl = '\0';
298 }
9d116dd7
JH
299 else if (
300#ifdef EBCDIC
301 iscntrl(PL_multi_close)
302#else
303 PL_multi_close < 32 || PL_multi_close == 127
304#endif
305 ) {
8990e307 306 *tmpbuf = '^';
3280af22 307 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
308 s = "\\n";
309 tmpbuf[2] = '\0';
310 s = tmpbuf;
311 }
312 else {
eb160463 313 *tmpbuf = (char)PL_multi_close;
8990e307
LW
314 tmpbuf[1] = '\0';
315 s = tmpbuf;
316 }
317 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 318 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 319}
79072805 320
ffb4593c
NT
321/*
322 * Perl_deprecate
ffb4593c
NT
323 */
324
79072805 325void
864dbfa3 326Perl_deprecate(pTHX_ char *s)
a0d0e21e 327{
599cee73 328 if (ckWARN(WARN_DEPRECATED))
9014280d 329 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
330}
331
12bcd1a6
PM
332void
333Perl_deprecate_old(pTHX_ char *s)
334{
335 /* This function should NOT be called for any new deprecated warnings */
336 /* Use Perl_deprecate instead */
337 /* */
338 /* It is here to maintain backward compatibility with the pre-5.8 */
339 /* warnings category hierarchy. The "deprecated" category used to */
340 /* live under the "syntax" category. It is now a top-level category */
341 /* in its own right. */
342
343 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
344 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
345 "Use of %s is deprecated", s);
346}
347
ffb4593c
NT
348/*
349 * depcom
9cbb5ea2 350 * Deprecate a comma-less variable list.
ffb4593c
NT
351 */
352
76e3520e 353STATIC void
cea2e8a9 354S_depcom(pTHX)
a0d0e21e 355{
12bcd1a6 356 deprecate_old("comma-less variable list");
a0d0e21e
LW
357}
358
ffb4593c 359/*
9cbb5ea2
GS
360 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
361 * utf16-to-utf8-reversed.
ffb4593c
NT
362 */
363
c39cd008
GS
364#ifdef PERL_CR_FILTER
365static void
366strip_return(SV *sv)
367{
368 register char *s = SvPVX(sv);
369 register char *e = s + SvCUR(sv);
370 /* outer loop optimized to do nothing if there are no CR-LFs */
371 while (s < e) {
372 if (*s++ == '\r' && *s == '\n') {
373 /* hit a CR-LF, need to copy the rest */
374 register char *d = s - 1;
375 *d++ = *s++;
376 while (s < e) {
377 if (*s == '\r' && s[1] == '\n')
378 s++;
379 *d++ = *s++;
380 }
381 SvCUR(sv) -= s - d;
382 return;
383 }
384 }
385}
a868473f 386
76e3520e 387STATIC I32
c39cd008 388S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 389{
c39cd008
GS
390 I32 count = FILTER_READ(idx+1, sv, maxlen);
391 if (count > 0 && !maxlen)
392 strip_return(sv);
393 return count;
a868473f
NIS
394}
395#endif
396
ffb4593c
NT
397/*
398 * Perl_lex_start
9cbb5ea2
GS
399 * Initialize variables. Uses the Perl save_stack to save its state (for
400 * recursive calls to the parser).
ffb4593c
NT
401 */
402
a0d0e21e 403void
864dbfa3 404Perl_lex_start(pTHX_ SV *line)
79072805 405{
8990e307
LW
406 char *s;
407 STRLEN len;
408
3280af22
NIS
409 SAVEI32(PL_lex_dojoin);
410 SAVEI32(PL_lex_brackets);
3280af22
NIS
411 SAVEI32(PL_lex_casemods);
412 SAVEI32(PL_lex_starts);
413 SAVEI32(PL_lex_state);
7766f137 414 SAVEVPTR(PL_lex_inpat);
3280af22 415 SAVEI32(PL_lex_inwhat);
18b09519
GS
416 if (PL_lex_state == LEX_KNOWNEXT) {
417 I32 toke = PL_nexttoke;
418 while (--toke >= 0) {
419 SAVEI32(PL_nexttype[toke]);
420 SAVEVPTR(PL_nextval[toke]);
421 }
422 SAVEI32(PL_nexttoke);
18b09519 423 }
57843af0 424 SAVECOPLINE(PL_curcop);
3280af22
NIS
425 SAVEPPTR(PL_bufptr);
426 SAVEPPTR(PL_bufend);
427 SAVEPPTR(PL_oldbufptr);
428 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
429 SAVEPPTR(PL_last_lop);
430 SAVEPPTR(PL_last_uni);
3280af22
NIS
431 SAVEPPTR(PL_linestart);
432 SAVESPTR(PL_linestr);
8edd5f42
RGS
433 SAVEGENERICPV(PL_lex_brackstack);
434 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 435 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
436 SAVESPTR(PL_lex_stuff);
437 SAVEI32(PL_lex_defer);
09bef843 438 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 439 SAVESPTR(PL_lex_repl);
bebdddfc
GS
440 SAVEINT(PL_expect);
441 SAVEINT(PL_lex_expect);
3280af22
NIS
442
443 PL_lex_state = LEX_NORMAL;
444 PL_lex_defer = 0;
445 PL_expect = XSTATE;
446 PL_lex_brackets = 0;
3280af22
NIS
447 New(899, PL_lex_brackstack, 120, char);
448 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
449 PL_lex_casemods = 0;
450 *PL_lex_casestack = '\0';
451 PL_lex_dojoin = 0;
452 PL_lex_starts = 0;
453 PL_lex_stuff = Nullsv;
454 PL_lex_repl = Nullsv;
455 PL_lex_inpat = 0;
76be56bc 456 PL_nexttoke = 0;
3280af22 457 PL_lex_inwhat = 0;
09bef843 458 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
459 PL_linestr = line;
460 if (SvREADONLY(PL_linestr))
461 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
462 s = SvPV(PL_linestr, len);
6f27f9a7 463 if (!len || s[len-1] != ';') {
3280af22
NIS
464 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
465 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
466 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 467 }
3280af22
NIS
468 SvTEMP_off(PL_linestr);
469 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
470 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 471 PL_last_lop = PL_last_uni = Nullch;
3280af22 472 PL_rsfp = 0;
79072805 473}
a687059c 474
ffb4593c
NT
475/*
476 * Perl_lex_end
9cbb5ea2
GS
477 * Finalizer for lexing operations. Must be called when the parser is
478 * done with the lexer.
ffb4593c
NT
479 */
480
463ee0b2 481void
864dbfa3 482Perl_lex_end(pTHX)
463ee0b2 483{
3280af22 484 PL_doextract = FALSE;
463ee0b2
LW
485}
486
ffb4593c
NT
487/*
488 * S_incline
489 * This subroutine has nothing to do with tilting, whether at windmills
490 * or pinball tables. Its name is short for "increment line". It
57843af0 491 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 492 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
493 * # line 500 "foo.pm"
494 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
495 */
496
76e3520e 497STATIC void
cea2e8a9 498S_incline(pTHX_ char *s)
463ee0b2
LW
499{
500 char *t;
501 char *n;
73659bf1 502 char *e;
463ee0b2 503 char ch;
463ee0b2 504
57843af0 505 CopLINE_inc(PL_curcop);
463ee0b2
LW
506 if (*s++ != '#')
507 return;
bf4acbe4 508 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
509 if (strnEQ(s, "line", 4))
510 s += 4;
511 else
512 return;
084592ab 513 if (SPACE_OR_TAB(*s))
73659bf1 514 s++;
4e553d73 515 else
73659bf1 516 return;
bf4acbe4 517 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
518 if (!isDIGIT(*s))
519 return;
520 n = s;
521 while (isDIGIT(*s))
522 s++;
bf4acbe4 523 while (SPACE_OR_TAB(*s))
463ee0b2 524 s++;
73659bf1 525 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 526 s++;
73659bf1
GS
527 e = t + 1;
528 }
463ee0b2 529 else {
463ee0b2 530 for (t = s; !isSPACE(*t); t++) ;
73659bf1 531 e = t;
463ee0b2 532 }
bf4acbe4 533 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
534 e++;
535 if (*e != '\n' && *e != '\0')
536 return; /* false alarm */
537
463ee0b2
LW
538 ch = *t;
539 *t = '\0';
f4dd75d9 540 if (t - s > 0) {
05ec9bb3 541 CopFILE_free(PL_curcop);
57843af0 542 CopFILE_set(PL_curcop, s);
f4dd75d9 543 }
463ee0b2 544 *t = ch;
57843af0 545 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
546}
547
ffb4593c
NT
548/*
549 * S_skipspace
550 * Called to gobble the appropriate amount and type of whitespace.
551 * Skips comments as well.
552 */
553
76e3520e 554STATIC char *
cea2e8a9 555S_skipspace(pTHX_ register char *s)
a687059c 556{
3280af22 557 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 558 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
559 s++;
560 return s;
561 }
562 for (;;) {
fd049845 563 STRLEN prevlen;
09bef843 564 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 565 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
566 while (s < PL_bufend && isSPACE(*s)) {
567 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
568 incline(s);
569 }
ffb4593c
NT
570
571 /* comment */
3280af22
NIS
572 if (s < PL_bufend && *s == '#') {
573 while (s < PL_bufend && *s != '\n')
463ee0b2 574 s++;
60e6418e 575 if (s < PL_bufend) {
463ee0b2 576 s++;
60e6418e
GS
577 if (PL_in_eval && !PL_rsfp) {
578 incline(s);
579 continue;
580 }
581 }
463ee0b2 582 }
ffb4593c
NT
583
584 /* only continue to recharge the buffer if we're at the end
585 * of the buffer, we're not reading from a source filter, and
586 * we're in normal lexing mode
587 */
09bef843
SB
588 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
589 PL_lex_state == LEX_FORMLINE)
463ee0b2 590 return s;
ffb4593c
NT
591
592 /* try to recharge the buffer */
9cbb5ea2
GS
593 if ((s = filter_gets(PL_linestr, PL_rsfp,
594 (prevlen = SvCUR(PL_linestr)))) == Nullch)
595 {
596 /* end of file. Add on the -p or -n magic */
3280af22
NIS
597 if (PL_minus_n || PL_minus_p) {
598 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
599 ";}continue{print or die qq(-p destination: $!\\n)" :
600 "");
3280af22
NIS
601 sv_catpv(PL_linestr,";}");
602 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
603 }
604 else
3280af22 605 sv_setpv(PL_linestr,";");
ffb4593c
NT
606
607 /* reset variables for next time we lex */
9cbb5ea2
GS
608 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
609 = SvPVX(PL_linestr);
3280af22 610 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 611 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
612
613 /* Close the filehandle. Could be from -P preprocessor,
614 * STDIN, or a regular file. If we were reading code from
615 * STDIN (because the commandline held no -e or filename)
616 * then we don't close it, we reset it so the code can
617 * read from STDIN too.
618 */
619
3280af22
NIS
620 if (PL_preprocess && !PL_in_eval)
621 (void)PerlProc_pclose(PL_rsfp);
622 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
623 PerlIO_clearerr(PL_rsfp);
8990e307 624 else
3280af22
NIS
625 (void)PerlIO_close(PL_rsfp);
626 PL_rsfp = Nullfp;
463ee0b2
LW
627 return s;
628 }
ffb4593c
NT
629
630 /* not at end of file, so we only read another line */
09bef843
SB
631 /* make corresponding updates to old pointers, for yyerror() */
632 oldprevlen = PL_oldbufptr - PL_bufend;
633 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
634 if (PL_last_uni)
635 oldunilen = PL_last_uni - PL_bufend;
636 if (PL_last_lop)
637 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
638 PL_linestart = PL_bufptr = s + prevlen;
639 PL_bufend = s + SvCUR(PL_linestr);
640 s = PL_bufptr;
09bef843
SB
641 PL_oldbufptr = s + oldprevlen;
642 PL_oldoldbufptr = s + oldoldprevlen;
643 if (PL_last_uni)
644 PL_last_uni = s + oldunilen;
645 if (PL_last_lop)
646 PL_last_lop = s + oldloplen;
a0d0e21e 647 incline(s);
ffb4593c
NT
648
649 /* debugger active and we're not compiling the debugger code,
650 * so store the line into the debugger's array of lines
651 */
3280af22 652 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
653 SV *sv = NEWSV(85,0);
654
655 sv_upgrade(sv, SVt_PVMG);
3280af22 656 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
657 (void)SvIOK_on(sv);
658 SvIVX(sv) = 0;
57843af0 659 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 660 }
463ee0b2 661 }
a687059c 662}
378cc40b 663
ffb4593c
NT
664/*
665 * S_check_uni
666 * Check the unary operators to ensure there's no ambiguity in how they're
667 * used. An ambiguous piece of code would be:
668 * rand + 5
669 * This doesn't mean rand() + 5. Because rand() is a unary operator,
670 * the +5 is its argument.
671 */
672
76e3520e 673STATIC void
cea2e8a9 674S_check_uni(pTHX)
ba106d47 675{
2f3197b3 676 char *s;
a0d0e21e 677 char *t;
2f3197b3 678
3280af22 679 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 680 return;
3280af22
NIS
681 while (isSPACE(*PL_last_uni))
682 PL_last_uni++;
7e2040f0 683 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 684 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 685 return;
0453d815 686 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 687 char ch = *s;
0453d815 688 *s = '\0';
9014280d 689 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 690 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
691 PL_last_uni);
692 *s = ch;
693 }
2f3197b3
LW
694}
695
ffb4593c
NT
696/*
697 * LOP : macro to build a list operator. Its behaviour has been replaced
698 * with a subroutine, S_lop() for which LOP is just another name.
699 */
700
a0d0e21e
LW
701#define LOP(f,x) return lop(f,x,s)
702
ffb4593c
NT
703/*
704 * S_lop
705 * Build a list operator (or something that might be one). The rules:
706 * - if we have a next token, then it's a list operator [why?]
707 * - if the next thing is an opening paren, then it's a function
708 * - else it's a list operator
709 */
710
76e3520e 711STATIC I32
a0be28da 712S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 713{
79072805 714 yylval.ival = f;
35c8bce7 715 CLINE;
075953c3 716 REPORT("lop", f)
3280af22
NIS
717 PL_expect = x;
718 PL_bufptr = s;
719 PL_last_lop = PL_oldbufptr;
eb160463 720 PL_last_lop_op = (OPCODE)f;
3280af22 721 if (PL_nexttoke)
a0d0e21e 722 return LSTOP;
79072805
LW
723 if (*s == '(')
724 return FUNC;
725 s = skipspace(s);
726 if (*s == '(')
727 return FUNC;
728 else
729 return LSTOP;
730}
731
ffb4593c
NT
732/*
733 * S_force_next
9cbb5ea2 734 * When the lexer realizes it knows the next token (for instance,
ffb4593c 735 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
736 * to know what token to return the next time the lexer is called. Caller
737 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
738 * handles the token correctly.
ffb4593c
NT
739 */
740
4e553d73 741STATIC void
cea2e8a9 742S_force_next(pTHX_ I32 type)
79072805 743{
3280af22
NIS
744 PL_nexttype[PL_nexttoke] = type;
745 PL_nexttoke++;
746 if (PL_lex_state != LEX_KNOWNEXT) {
747 PL_lex_defer = PL_lex_state;
748 PL_lex_expect = PL_expect;
749 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
750 }
751}
752
ffb4593c
NT
753/*
754 * S_force_word
755 * When the lexer knows the next thing is a word (for instance, it has
756 * just seen -> and it knows that the next char is a word char, then
757 * it calls S_force_word to stick the next word into the PL_next lookahead.
758 *
759 * Arguments:
b1b65b59 760 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
761 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
762 * int check_keyword : if true, Perl checks to make sure the word isn't
763 * a keyword (do this if the word is a label, e.g. goto FOO)
764 * int allow_pack : if true, : characters will also be allowed (require,
765 * use, etc. do this)
9cbb5ea2 766 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
767 */
768
76e3520e 769STATIC char *
cea2e8a9 770S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 771{
463ee0b2
LW
772 register char *s;
773 STRLEN len;
4e553d73 774
463ee0b2
LW
775 start = skipspace(start);
776 s = start;
7e2040f0 777 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 778 (allow_pack && *s == ':') ||
15f0808c 779 (allow_initial_tick && *s == '\'') )
a0d0e21e 780 {
3280af22
NIS
781 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
782 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
783 return start;
784 if (token == METHOD) {
785 s = skipspace(s);
786 if (*s == '(')
3280af22 787 PL_expect = XTERM;
463ee0b2 788 else {
3280af22 789 PL_expect = XOPERATOR;
463ee0b2 790 }
79072805 791 }
3280af22
NIS
792 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
793 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
5464dbd2
RGS
794 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
795 SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke].opval)->op_sv);
79072805
LW
796 force_next(token);
797 }
798 return s;
799}
800
ffb4593c
NT
801/*
802 * S_force_ident
9cbb5ea2 803 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
804 * text only contains the "foo" portion. The first argument is a pointer
805 * to the "foo", and the second argument is the type symbol to prefix.
806 * Forces the next token to be a "WORD".
9cbb5ea2 807 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
808 */
809
76e3520e 810STATIC void
cea2e8a9 811S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
812{
813 if (s && *s) {
11343788 814 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 815 PL_nextval[PL_nexttoke].opval = o;
79072805 816 force_next(WORD);
748a9306 817 if (kind) {
11343788 818 o->op_private = OPpCONST_ENTERED;
55497cff
PP
819 /* XXX see note in pp_entereval() for why we forgo typo
820 warnings if the symbol must be introduced in an eval.
821 GSAR 96-10-12 */
3280af22 822 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
823 kind == '$' ? SVt_PV :
824 kind == '@' ? SVt_PVAV :
825 kind == '%' ? SVt_PVHV :
826 SVt_PVGV
827 );
748a9306 828 }
79072805
LW
829 }
830}
831
1571675a
GS
832NV
833Perl_str_to_version(pTHX_ SV *sv)
834{
835 NV retval = 0.0;
836 NV nshift = 1.0;
837 STRLEN len;
838 char *start = SvPVx(sv,len);
3aa33fe5 839 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
840 char *end = start + len;
841 while (start < end) {
ba210ebe 842 STRLEN skip;
1571675a
GS
843 UV n;
844 if (utf)
9041c2e3 845 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
846 else {
847 n = *(U8*)start;
848 skip = 1;
849 }
850 retval += ((NV)n)/nshift;
851 start += skip;
852 nshift *= 1000;
853 }
854 return retval;
855}
856
4e553d73 857/*
ffb4593c
NT
858 * S_force_version
859 * Forces the next token to be a version number.
e759cc13
RGS
860 * If the next token appears to be an invalid version number, (e.g. "v2b"),
861 * and if "guessing" is TRUE, then no new token is created (and the caller
862 * must use an alternative parsing method).
ffb4593c
NT
863 */
864
76e3520e 865STATIC char *
e759cc13 866S_force_version(pTHX_ char *s, int guessing)
89bfa8cd
PP
867{
868 OP *version = Nullop;
44dcb63b 869 char *d;
89bfa8cd
PP
870
871 s = skipspace(s);
872
44dcb63b 873 d = s;
dd629d5b 874 if (*d == 'v')
44dcb63b 875 d++;
44dcb63b 876 if (isDIGIT(*d)) {
e759cc13
RGS
877 while (isDIGIT(*d) || *d == '_' || *d == '.')
878 d++;
9f3d182e 879 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 880 SV *ver;
b73d6f50 881 s = scan_num(s, &yylval);
89bfa8cd 882 version = yylval.opval;
dd629d5b
GS
883 ver = cSVOPx(version)->op_sv;
884 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 885 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
886 SvNVX(ver) = str_to_version(ver);
887 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 888 }
89bfa8cd 889 }
e759cc13
RGS
890 else if (guessing)
891 return s;
89bfa8cd
PP
892 }
893
894 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 895 PL_nextval[PL_nexttoke].opval = version;
4e553d73 896 force_next(WORD);
89bfa8cd 897
e759cc13 898 return s;
89bfa8cd
PP
899}
900
ffb4593c
NT
901/*
902 * S_tokeq
903 * Tokenize a quoted string passed in as an SV. It finds the next
904 * chunk, up to end of string or a backslash. It may make a new
905 * SV containing that chunk (if HINT_NEW_STRING is on). It also
906 * turns \\ into \.
907 */
908
76e3520e 909STATIC SV *
cea2e8a9 910S_tokeq(pTHX_ SV *sv)
79072805
LW
911{
912 register char *s;
913 register char *send;
914 register char *d;
b3ac6de7
IZ
915 STRLEN len = 0;
916 SV *pv = sv;
79072805
LW
917
918 if (!SvLEN(sv))
b3ac6de7 919 goto finish;
79072805 920
a0d0e21e 921 s = SvPV_force(sv, len);
21a311ee 922 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 923 goto finish;
463ee0b2 924 send = s + len;
79072805
LW
925 while (s < send && *s != '\\')
926 s++;
927 if (s == send)
b3ac6de7 928 goto finish;
79072805 929 d = s;
be4731d2 930 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 931 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
932 if (SvUTF8(sv))
933 SvUTF8_on(pv);
934 }
79072805
LW
935 while (s < send) {
936 if (*s == '\\') {
a0d0e21e 937 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
938 s++; /* all that, just for this */
939 }
940 *d++ = *s++;
941 }
942 *d = '\0';
463ee0b2 943 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 944 finish:
3280af22 945 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 946 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
947 return sv;
948}
949
ffb4593c
NT
950/*
951 * Now come three functions related to double-quote context,
952 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
953 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
954 * interact with PL_lex_state, and create fake ( ... ) argument lists
955 * to handle functions and concatenation.
956 * They assume that whoever calls them will be setting up a fake
957 * join call, because each subthing puts a ',' after it. This lets
958 * "lower \luPpEr"
959 * become
960 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
961 *
962 * (I'm not sure whether the spurious commas at the end of lcfirst's
963 * arguments and join's arguments are created or not).
964 */
965
966/*
967 * S_sublex_start
968 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
969 *
970 * Pattern matching will set PL_lex_op to the pattern-matching op to
971 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
972 *
973 * OP_CONST and OP_READLINE are easy--just make the new op and return.
974 *
975 * Everything else becomes a FUNC.
976 *
977 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
978 * had an OP_CONST or OP_READLINE). This just sets us up for a
979 * call to S_sublex_push().
980 */
981
76e3520e 982STATIC I32
cea2e8a9 983S_sublex_start(pTHX)
79072805
LW
984{
985 register I32 op_type = yylval.ival;
79072805
LW
986
987 if (op_type == OP_NULL) {
3280af22
NIS
988 yylval.opval = PL_lex_op;
989 PL_lex_op = Nullop;
79072805
LW
990 return THING;
991 }
992 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 993 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
994
995 if (SvTYPE(sv) == SVt_PVIV) {
996 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
997 STRLEN len;
998 char *p;
999 SV *nsv;
1000
1001 p = SvPV(sv, len);
79cb57f6 1002 nsv = newSVpvn(p, len);
01ec43d0
GS
1003 if (SvUTF8(sv))
1004 SvUTF8_on(nsv);
b3ac6de7
IZ
1005 SvREFCNT_dec(sv);
1006 sv = nsv;
4e553d73 1007 }
b3ac6de7 1008 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1009 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1010 /* Allow <FH> // "foo" */
1011 if (op_type == OP_READLINE)
1012 PL_expect = XTERMORDORDOR;
79072805
LW
1013 return THING;
1014 }
1015
3280af22
NIS
1016 PL_sublex_info.super_state = PL_lex_state;
1017 PL_sublex_info.sub_inwhat = op_type;
1018 PL_sublex_info.sub_op = PL_lex_op;
1019 PL_lex_state = LEX_INTERPPUSH;
55497cff 1020
3280af22
NIS
1021 PL_expect = XTERM;
1022 if (PL_lex_op) {
1023 yylval.opval = PL_lex_op;
1024 PL_lex_op = Nullop;
55497cff
PP
1025 return PMFUNC;
1026 }
1027 else
1028 return FUNC;
1029}
1030
ffb4593c
NT
1031/*
1032 * S_sublex_push
1033 * Create a new scope to save the lexing state. The scope will be
1034 * ended in S_sublex_done. Returns a '(', starting the function arguments
1035 * to the uc, lc, etc. found before.
1036 * Sets PL_lex_state to LEX_INTERPCONCAT.
1037 */
1038
76e3520e 1039STATIC I32
cea2e8a9 1040S_sublex_push(pTHX)
55497cff 1041{
f46d017c 1042 ENTER;
55497cff 1043
3280af22
NIS
1044 PL_lex_state = PL_sublex_info.super_state;
1045 SAVEI32(PL_lex_dojoin);
1046 SAVEI32(PL_lex_brackets);
3280af22
NIS
1047 SAVEI32(PL_lex_casemods);
1048 SAVEI32(PL_lex_starts);
1049 SAVEI32(PL_lex_state);
7766f137 1050 SAVEVPTR(PL_lex_inpat);
3280af22 1051 SAVEI32(PL_lex_inwhat);
57843af0 1052 SAVECOPLINE(PL_curcop);
3280af22 1053 SAVEPPTR(PL_bufptr);
8452ff4b 1054 SAVEPPTR(PL_bufend);
3280af22
NIS
1055 SAVEPPTR(PL_oldbufptr);
1056 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1057 SAVEPPTR(PL_last_lop);
1058 SAVEPPTR(PL_last_uni);
3280af22
NIS
1059 SAVEPPTR(PL_linestart);
1060 SAVESPTR(PL_linestr);
8edd5f42
RGS
1061 SAVEGENERICPV(PL_lex_brackstack);
1062 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1063
1064 PL_linestr = PL_lex_stuff;
1065 PL_lex_stuff = Nullsv;
1066
9cbb5ea2
GS
1067 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1068 = SvPVX(PL_linestr);
3280af22 1069 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1070 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1071 SAVEFREESV(PL_linestr);
1072
1073 PL_lex_dojoin = FALSE;
1074 PL_lex_brackets = 0;
3280af22
NIS
1075 New(899, PL_lex_brackstack, 120, char);
1076 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
1077 PL_lex_casemods = 0;
1078 *PL_lex_casestack = '\0';
1079 PL_lex_starts = 0;
1080 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1081 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1082
1083 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1084 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1085 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1086 else
3280af22 1087 PL_lex_inpat = Nullop;
79072805 1088
55497cff 1089 return '(';
79072805
LW
1090}
1091
ffb4593c
NT
1092/*
1093 * S_sublex_done
1094 * Restores lexer state after a S_sublex_push.
1095 */
1096
76e3520e 1097STATIC I32
cea2e8a9 1098S_sublex_done(pTHX)
79072805 1099{
3280af22 1100 if (!PL_lex_starts++) {
9aa983d2
JH
1101 SV *sv = newSVpvn("",0);
1102 if (SvUTF8(PL_linestr))
1103 SvUTF8_on(sv);
3280af22 1104 PL_expect = XOPERATOR;
9aa983d2 1105 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1106 return THING;
1107 }
1108
3280af22
NIS
1109 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1110 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1111 return yylex();
79072805
LW
1112 }
1113
ffb4593c 1114 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1115 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1116 PL_linestr = PL_lex_repl;
1117 PL_lex_inpat = 0;
1118 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1119 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1120 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1121 SAVEFREESV(PL_linestr);
1122 PL_lex_dojoin = FALSE;
1123 PL_lex_brackets = 0;
3280af22
NIS
1124 PL_lex_casemods = 0;
1125 *PL_lex_casestack = '\0';
1126 PL_lex_starts = 0;
25da4f38 1127 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1128 PL_lex_state = LEX_INTERPNORMAL;
1129 PL_lex_starts++;
e9fa98b2
HS
1130 /* we don't clear PL_lex_repl here, so that we can check later
1131 whether this is an evalled subst; that means we rely on the
1132 logic to ensure sublex_done() is called again only via the
1133 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1134 }
e9fa98b2 1135 else {
3280af22 1136 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1137 PL_lex_repl = Nullsv;
1138 }
79072805 1139 return ',';
ffed7fef
LW
1140 }
1141 else {
f46d017c 1142 LEAVE;
3280af22
NIS
1143 PL_bufend = SvPVX(PL_linestr);
1144 PL_bufend += SvCUR(PL_linestr);
1145 PL_expect = XOPERATOR;
09bef843 1146 PL_sublex_info.sub_inwhat = 0;
79072805 1147 return ')';
ffed7fef
LW
1148 }
1149}
1150
02aa26ce
NT
1151/*
1152 scan_const
1153
1154 Extracts a pattern, double-quoted string, or transliteration. This
1155 is terrifying code.
1156
3280af22
NIS
1157 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1158 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1159 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1160
9b599b2a
GS
1161 Returns a pointer to the character scanned up to. Iff this is
1162 advanced from the start pointer supplied (ie if anything was
1163 successfully parsed), will leave an OP for the substring scanned
1164 in yylval. Caller must intuit reason for not parsing further
1165 by looking at the next characters herself.
1166
02aa26ce
NT
1167 In patterns:
1168 backslashes:
1169 double-quoted style: \r and \n
1170 regexp special ones: \D \s
1171 constants: \x3
1172 backrefs: \1 (deprecated in substitution replacements)
1173 case and quoting: \U \Q \E
1174 stops on @ and $, but not for $ as tail anchor
1175
1176 In transliterations:
1177 characters are VERY literal, except for - not at the start or end
1178 of the string, which indicates a range. scan_const expands the
1179 range to the full set of intermediate characters.
1180
1181 In double-quoted strings:
1182 backslashes:
1183 double-quoted style: \r and \n
1184 constants: \x3
1185 backrefs: \1 (deprecated)
1186 case and quoting: \U \Q \E
1187 stops on @ and $
1188
1189 scan_const does *not* construct ops to handle interpolated strings.
1190 It stops processing as soon as it finds an embedded $ or @ variable
1191 and leaves it to the caller to work out what's going on.
1192
da6eedaa 1193 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1194
1195 $ in pattern could be $foo or could be tail anchor. Assumption:
1196 it's a tail anchor if $ is the last thing in the string, or if it's
1197 followed by one of ")| \n\t"
1198
1199 \1 (backreferences) are turned into $1
1200
1201 The structure of the code is
1202 while (there's a character to process) {
1203 handle transliteration ranges
1204 skip regexp comments
1205 skip # initiated comments in //x patterns
1206 check for embedded @foo
1207 check for embedded scalars
1208 if (backslash) {
1209 leave intact backslashes from leave (below)
1210 deprecate \1 in strings and sub replacements
1211 handle string-changing backslashes \l \U \Q \E, etc.
1212 switch (what was escaped) {
1213 handle - in a transliteration (becomes a literal -)
1214 handle \132 octal characters
1215 handle 0x15 hex characters
1216 handle \cV (control V)
1217 handle printf backslashes (\f, \r, \n, etc)
1218 } (end switch)
1219 } (end if backslash)
1220 } (end while character to read)
4e553d73 1221
02aa26ce
NT
1222*/
1223
76e3520e 1224STATIC char *
cea2e8a9 1225S_scan_const(pTHX_ char *start)
79072805 1226{
3280af22 1227 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1228 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1229 register char *s = start; /* start of the constant */
1230 register char *d = SvPVX(sv); /* destination for copies */
1231 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1232 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1233 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1234 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1235 UV uv;
1236
dff6d3cd 1237 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1238 PL_lex_inpat
4a2d328f 1239 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1240 : "";
79072805 1241
2b9d42f0
NIS
1242 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1243 /* If we are doing a trans and we know we want UTF8 set expectation */
1244 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1245 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1246 }
1247
1248
79072805 1249 while (s < send || dorange) {
02aa26ce 1250 /* get transliterations out of the way (they're most literal) */
3280af22 1251 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1252 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1253 if (dorange) {
1ba5c669
JH
1254 I32 i; /* current expanded character */
1255 I32 min; /* first character in range */
1256 I32 max; /* last character in range */
02aa26ce 1257
2b9d42f0 1258 if (has_utf8) {
8973db79
JH
1259 char *c = (char*)utf8_hop((U8*)d, -1);
1260 char *e = d++;
1261 while (e-- > c)
1262 *(e + 1) = *e;
25716404 1263 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1264 /* mark the range as done, and continue */
1265 dorange = FALSE;
1266 didrange = TRUE;
1267 continue;
1268 }
2b9d42f0 1269
02aa26ce 1270 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1271 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1272 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1273 d -= 2; /* eat the first char and the - */
1274
8ada0baa
JH
1275 min = (U8)*d; /* first char in range */
1276 max = (U8)d[1]; /* last char in range */
1277
c2e66d9e 1278 if (min > max) {
01ec43d0 1279 Perl_croak(aTHX_
d1573ac7 1280 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1281 (char)min, (char)max);
c2e66d9e
GS
1282 }
1283
c7f1f016 1284#ifdef EBCDIC
8ada0baa
JH
1285 if ((isLOWER(min) && isLOWER(max)) ||
1286 (isUPPER(min) && isUPPER(max))) {
1287 if (isLOWER(min)) {
1288 for (i = min; i <= max; i++)
1289 if (isLOWER(i))
db42d148 1290 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1291 } else {
1292 for (i = min; i <= max; i++)
1293 if (isUPPER(i))
db42d148 1294 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1295 }
1296 }
1297 else
1298#endif
1299 for (i = min; i <= max; i++)
eb160463 1300 *d++ = (char)i;
02aa26ce
NT
1301
1302 /* mark the range as done, and continue */
79072805 1303 dorange = FALSE;
01ec43d0 1304 didrange = TRUE;
79072805 1305 continue;
4e553d73 1306 }
02aa26ce
NT
1307
1308 /* range begins (ignore - as first or last char) */
79072805 1309 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1310 if (didrange) {
1fafa243 1311 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1312 }
2b9d42f0 1313 if (has_utf8) {
25716404 1314 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1315 s++;
1316 continue;
1317 }
79072805
LW
1318 dorange = TRUE;
1319 s++;
01ec43d0
GS
1320 }
1321 else {
1322 didrange = FALSE;
1323 }
79072805 1324 }
02aa26ce
NT
1325
1326 /* if we get here, we're not doing a transliteration */
1327
0f5d15d6
IZ
1328 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1329 except for the last char, which will be done separately. */
3280af22 1330 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1331 if (s[2] == '#') {
e994fd66 1332 while (s+1 < send && *s != ')')
db42d148 1333 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1334 }
1335 else if (s[2] == '{' /* This should match regcomp.c */
1336 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1337 {
cc6b7395 1338 I32 count = 1;
0f5d15d6 1339 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1340 char c;
1341
d9f97599
GS
1342 while (count && (c = *regparse)) {
1343 if (c == '\\' && regparse[1])
1344 regparse++;
4e553d73 1345 else if (c == '{')
cc6b7395 1346 count++;
4e553d73 1347 else if (c == '}')
cc6b7395 1348 count--;
d9f97599 1349 regparse++;
cc6b7395 1350 }
e994fd66 1351 if (*regparse != ')')
5bdf89e7 1352 regparse--; /* Leave one char for continuation. */
0f5d15d6 1353 while (s < regparse)
db42d148 1354 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1355 }
748a9306 1356 }
02aa26ce
NT
1357
1358 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1359 else if (*s == '#' && PL_lex_inpat &&
1360 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1361 while (s+1 < send && *s != '\n')
db42d148 1362 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1363 }
02aa26ce 1364
5d1d4326 1365 /* check for embedded arrays
da6eedaa 1366 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1367 */
7e2040f0 1368 else if (*s == '@' && s[1]
5d1d4326 1369 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1370 break;
02aa26ce
NT
1371
1372 /* check for embedded scalars. only stop if we're sure it's a
1373 variable.
1374 */
79072805 1375 else if (*s == '$') {
3280af22 1376 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1377 break;
6002328a 1378 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1379 break; /* in regexp, $ might be tail anchor */
1380 }
02aa26ce 1381
2b9d42f0
NIS
1382 /* End of else if chain - OP_TRANS rejoin rest */
1383
02aa26ce 1384 /* backslashes */
79072805
LW
1385 if (*s == '\\' && s+1 < send) {
1386 s++;
02aa26ce
NT
1387
1388 /* some backslashes we leave behind */
c9f97d15 1389 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1390 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1391 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1392 continue;
1393 }
02aa26ce
NT
1394
1395 /* deprecate \1 in strings and substitution replacements */
3280af22 1396 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1397 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1398 {
599cee73 1399 if (ckWARN(WARN_SYNTAX))
9014280d 1400 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1401 *--s = '$';
1402 break;
1403 }
02aa26ce
NT
1404
1405 /* string-change backslash escapes */
3280af22 1406 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1407 --s;
1408 break;
1409 }
02aa26ce
NT
1410
1411 /* if we get here, it's either a quoted -, or a digit */
79072805 1412 switch (*s) {
02aa26ce
NT
1413
1414 /* quoted - in transliterations */
79072805 1415 case '-':
3280af22 1416 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1417 *d++ = *s++;
1418 continue;
1419 }
1420 /* FALL THROUGH */
1421 default:
11b8faa4 1422 {
707afd92
MS
1423 if (ckWARN(WARN_MISC) &&
1424 isALNUM(*s) &&
1425 *s != '_')
9014280d 1426 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1427 "Unrecognized escape \\%c passed through",
1428 *s);
1429 /* default action is to copy the quoted character */
f9a63242 1430 goto default_action;
11b8faa4 1431 }
02aa26ce
NT
1432
1433 /* \132 indicates an octal constant */
79072805
LW
1434 case '0': case '1': case '2': case '3':
1435 case '4': case '5': case '6': case '7':
ba210ebe 1436 {
53305cf1
NC
1437 I32 flags = 0;
1438 STRLEN len = 3;
1439 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1440 s += len;
1441 }
012bcf8d 1442 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1443
1444 /* \x24 indicates a hex constant */
79072805 1445 case 'x':
a0ed51b3
LW
1446 ++s;
1447 if (*s == '{') {
1448 char* e = strchr(s, '}');
a4c04bdc
NC
1449 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1450 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1451 STRLEN len;
355860ce 1452
53305cf1 1453 ++s;
adaeee49 1454 if (!e) {
a0ed51b3 1455 yyerror("Missing right brace on \\x{}");
355860ce 1456 continue;
ba210ebe 1457 }
53305cf1
NC
1458 len = e - s;
1459 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1460 s = e + 1;
a0ed51b3
LW
1461 }
1462 else {
ba210ebe 1463 {
53305cf1 1464 STRLEN len = 2;
a4c04bdc 1465 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1466 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1467 s += len;
1468 }
012bcf8d
GS
1469 }
1470
1471 NUM_ESCAPE_INSERT:
1472 /* Insert oct or hex escaped character.
301d3d20 1473 * There will always enough room in sv since such
db42d148 1474 * escapes will be longer than any UTF-8 sequence
301d3d20 1475 * they can end up as. */
ba7cea30 1476
c7f1f016
NIS
1477 /* We need to map to chars to ASCII before doing the tests
1478 to cover EBCDIC
1479 */
c4d5f83a 1480 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1481 if (!has_utf8 && uv > 255) {
301d3d20
JH
1482 /* Might need to recode whatever we have
1483 * accumulated so far if it contains any
1484 * hibit chars.
1485 *
1486 * (Can't we keep track of that and avoid
1487 * this rescan? --jhi)
012bcf8d 1488 */
c7f1f016 1489 int hicount = 0;
63cd0674
NIS
1490 U8 *c;
1491 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1492 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1493 hicount++;
db42d148 1494 }
012bcf8d 1495 }
63cd0674 1496 if (hicount) {
db42d148
NIS
1497 STRLEN offset = d - SvPVX(sv);
1498 U8 *src, *dst;
1499 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1500 src = (U8 *)d - 1;
1501 dst = src+hicount;
1502 d += hicount;
1503 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1504 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1505 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1506 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1507 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1508 }
1509 else {
63cd0674 1510 *dst-- = *src;
012bcf8d 1511 }
c7f1f016 1512 src--;
012bcf8d
GS
1513 }
1514 }
1515 }
1516
9aa983d2 1517 if (has_utf8 || uv > 255) {
9041c2e3 1518 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1519 has_utf8 = TRUE;
f9a63242
JH
1520 if (PL_lex_inwhat == OP_TRANS &&
1521 PL_sublex_info.sub_op) {
1522 PL_sublex_info.sub_op->op_private |=
1523 (PL_lex_repl ? OPpTRANS_FROM_UTF
1524 : OPpTRANS_TO_UTF);
f9a63242 1525 }
012bcf8d 1526 }
a0ed51b3 1527 else {
012bcf8d 1528 *d++ = (char)uv;
a0ed51b3 1529 }
012bcf8d
GS
1530 }
1531 else {
c4d5f83a 1532 *d++ = (char) uv;
a0ed51b3 1533 }
79072805 1534 continue;
02aa26ce 1535
b239daa5 1536 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1537 case 'N':
55eda711 1538 ++s;
423cee85
JH
1539 if (*s == '{') {
1540 char* e = strchr(s, '}');
155aba94 1541 SV *res;
423cee85
JH
1542 STRLEN len;
1543 char *str;
4e553d73 1544
423cee85 1545 if (!e) {
5777a3f7 1546 yyerror("Missing right brace on \\N{}");
423cee85
JH
1547 e = s - 1;
1548 goto cont_scan;
1549 }
dbc0d4f2
JH
1550 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1551 /* \N{U+...} */
1552 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1553 PERL_SCAN_DISALLOW_PREFIX;
1554 s += 3;
1555 len = e - s;
1556 uv = grok_hex(s, &len, &flags, NULL);
1557 s = e + 1;
1558 goto NUM_ESCAPE_INSERT;
1559 }
55eda711
JH
1560 res = newSVpvn(s + 1, e - s - 1);
1561 res = new_constant( Nullch, 0, "charnames",
1562 res, Nullsv, "\\N{...}" );
f9a63242
JH
1563 if (has_utf8)
1564 sv_utf8_upgrade(res);
423cee85 1565 str = SvPV(res,len);
1c47067b
JH
1566#ifdef EBCDIC_NEVER_MIND
1567 /* charnames uses pack U and that has been
1568 * recently changed to do the below uni->native
1569 * mapping, so this would be redundant (and wrong,
1570 * the code point would be doubly converted).
1571 * But leave this in just in case the pack U change
1572 * gets revoked, but the semantics is still
1573 * desireable for charnames. --jhi */
cddc7ef4
JH
1574 {
1575 UV uv = utf8_to_uvchr((U8*)str, 0);
1576
1577 if (uv < 0x100) {
1578 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1579
1580 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1581 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1582 str = SvPV(res, len);
1583 }
1584 }
1585#endif
89491803 1586 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1587 char *ostart = SvPVX(sv);
1588 SvCUR_set(sv, d - ostart);
1589 SvPOK_on(sv);
e4f3eed8 1590 *d = '\0';
f08d6ad9 1591 sv_utf8_upgrade(sv);
d2f449dd 1592 /* this just broke our allocation above... */
eb160463 1593 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1594 d = SvPVX(sv) + SvCUR(sv);
89491803 1595 has_utf8 = TRUE;
f08d6ad9 1596 }
eb160463 1597 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1598 char *odest = SvPVX(sv);
1599
8973db79 1600 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1601 d = SvPVX(sv) + (d - odest);
1602 }
1603 Copy(str, d, len, char);
1604 d += len;
1605 SvREFCNT_dec(res);
1606 cont_scan:
1607 s = e + 1;
1608 }
1609 else
5777a3f7 1610 yyerror("Missing braces on \\N{}");
423cee85
JH
1611 continue;
1612
02aa26ce 1613 /* \c is a control character */
79072805
LW
1614 case 'c':
1615 s++;
961ce445 1616 if (s < send) {
ba210ebe 1617 U8 c = *s++;
c7f1f016
NIS
1618#ifdef EBCDIC
1619 if (isLOWER(c))
1620 c = toUPPER(c);
1621#endif
db42d148 1622 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1623 }
961ce445
RGS
1624 else {
1625 yyerror("Missing control char name in \\c");
1626 }
79072805 1627 continue;
02aa26ce
NT
1628
1629 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1630 case 'b':
db42d148 1631 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1632 break;
1633 case 'n':
db42d148 1634 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1635 break;
1636 case 'r':
db42d148 1637 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1638 break;
1639 case 'f':
db42d148 1640 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1641 break;
1642 case 't':
db42d148 1643 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1644 break;
34a3fe2a 1645 case 'e':
db42d148 1646 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1647 break;
1648 case 'a':
db42d148 1649 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1650 break;
02aa26ce
NT
1651 } /* end switch */
1652
79072805
LW
1653 s++;
1654 continue;
02aa26ce
NT
1655 } /* end if (backslash) */
1656
f9a63242 1657 default_action:
2b9d42f0
NIS
1658 /* If we started with encoded form, or already know we want it
1659 and then encode the next character */
1660 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1661 STRLEN len = 1;
1662 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1663 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1664 s += len;
1665 if (need > len) {
1666 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1667 STRLEN off = d - SvPVX(sv);
1668 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1669 }
1670 d = (char*)uvchr_to_utf8((U8*)d, uv);
1671 has_utf8 = TRUE;
1672 }
1673 else {
1674 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1675 }
02aa26ce
NT
1676 } /* while loop to process each character */
1677
1678 /* terminate the string and set up the sv */
79072805 1679 *d = '\0';
463ee0b2 1680 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1681 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1682 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1683
79072805 1684 SvPOK_on(sv);
9f4817db 1685 if (PL_encoding && !has_utf8) {
d0063567
DK
1686 sv_recode_to_utf8(sv, PL_encoding);
1687 if (SvUTF8(sv))
1688 has_utf8 = TRUE;
9f4817db 1689 }
2b9d42f0 1690 if (has_utf8) {
7e2040f0 1691 SvUTF8_on(sv);
2b9d42f0 1692 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1693 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1694 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1695 }
1696 }
79072805 1697
02aa26ce 1698 /* shrink the sv if we allocated more than we used */
79072805
LW
1699 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1700 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1701 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1702 }
02aa26ce 1703
9b599b2a 1704 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1705 if (s > PL_bufptr) {
1706 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1707 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1708 sv, Nullsv,
4e553d73 1709 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1710 ? "tr"
3280af22 1711 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1712 ? "s"
1713 : "qq")));
79072805 1714 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1715 } else
8990e307 1716 SvREFCNT_dec(sv);
79072805
LW
1717 return s;
1718}
1719
ffb4593c
NT
1720/* S_intuit_more
1721 * Returns TRUE if there's more to the expression (e.g., a subscript),
1722 * FALSE otherwise.
ffb4593c
NT
1723 *
1724 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1725 *
1726 * ->[ and ->{ return TRUE
1727 * { and [ outside a pattern are always subscripts, so return TRUE
1728 * if we're outside a pattern and it's not { or [, then return FALSE
1729 * if we're in a pattern and the first char is a {
1730 * {4,5} (any digits around the comma) returns FALSE
1731 * if we're in a pattern and the first char is a [
1732 * [] returns FALSE
1733 * [SOMETHING] has a funky algorithm to decide whether it's a
1734 * character class or not. It has to deal with things like
1735 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1736 * anything else returns TRUE
1737 */
1738
9cbb5ea2
GS
1739/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1740
76e3520e 1741STATIC int
cea2e8a9 1742S_intuit_more(pTHX_ register char *s)
79072805 1743{
3280af22 1744 if (PL_lex_brackets)
79072805
LW
1745 return TRUE;
1746 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1747 return TRUE;
1748 if (*s != '{' && *s != '[')
1749 return FALSE;
3280af22 1750 if (!PL_lex_inpat)
79072805
LW
1751 return TRUE;
1752
1753 /* In a pattern, so maybe we have {n,m}. */
1754 if (*s == '{') {
1755 s++;
1756 if (!isDIGIT(*s))
1757 return TRUE;
1758 while (isDIGIT(*s))
1759 s++;
1760 if (*s == ',')
1761 s++;
1762 while (isDIGIT(*s))
1763 s++;
1764 if (*s == '}')
1765 return FALSE;
1766 return TRUE;
1767
1768 }
1769
1770 /* On the other hand, maybe we have a character class */
1771
1772 s++;
1773 if (*s == ']' || *s == '^')
1774 return FALSE;
1775 else {
ffb4593c 1776 /* this is terrifying, and it works */
79072805
LW
1777 int weight = 2; /* let's weigh the evidence */
1778 char seen[256];
f27ffc4a 1779 unsigned char un_char = 255, last_un_char;
93a17b20 1780 char *send = strchr(s,']');
3280af22 1781 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1782
1783 if (!send) /* has to be an expression */
1784 return TRUE;
1785
1786 Zero(seen,256,char);
1787 if (*s == '$')
1788 weight -= 3;
1789 else if (isDIGIT(*s)) {
1790 if (s[1] != ']') {
1791 if (isDIGIT(s[1]) && s[2] == ']')
1792 weight -= 10;
1793 }
1794 else
1795 weight -= 100;
1796 }
1797 for (; s < send; s++) {
1798 last_un_char = un_char;
1799 un_char = (unsigned char)*s;
1800 switch (*s) {
1801 case '@':
1802 case '&':
1803 case '$':
1804 weight -= seen[un_char] * 10;
7e2040f0 1805 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1806 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1807 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1808 weight -= 100;
1809 else
1810 weight -= 10;
1811 }
1812 else if (*s == '$' && s[1] &&
93a17b20
LW
1813 strchr("[#!%*<>()-=",s[1])) {
1814 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1815 weight -= 10;
1816 else
1817 weight -= 1;
1818 }
1819 break;
1820 case '\\':
1821 un_char = 254;
1822 if (s[1]) {
93a17b20 1823 if (strchr("wds]",s[1]))
79072805
LW
1824 weight += 100;
1825 else if (seen['\''] || seen['"'])
1826 weight += 1;
93a17b20 1827 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1828 weight += 40;
1829 else if (isDIGIT(s[1])) {
1830 weight += 40;
1831 while (s[1] && isDIGIT(s[1]))
1832 s++;
1833 }
1834 }
1835 else
1836 weight += 100;
1837 break;
1838 case '-':
1839 if (s[1] == '\\')
1840 weight += 50;
93a17b20 1841 if (strchr("aA01! ",last_un_char))
79072805 1842 weight += 30;
93a17b20 1843 if (strchr("zZ79~",s[1]))
79072805 1844 weight += 30;
f27ffc4a
GS
1845 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1846 weight -= 5; /* cope with negative subscript */
79072805
LW
1847 break;
1848 default:
93a17b20 1849 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1850 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1851 char *d = tmpbuf;
1852 while (isALPHA(*s))
1853 *d++ = *s++;
1854 *d = '\0';
1855 if (keyword(tmpbuf, d - tmpbuf))
1856 weight -= 150;
1857 }
1858 if (un_char == last_un_char + 1)
1859 weight += 5;
1860 weight -= seen[un_char];
1861 break;
1862 }
1863 seen[un_char]++;
1864 }
1865 if (weight >= 0) /* probably a character class */
1866 return FALSE;
1867 }
1868
1869 return TRUE;
1870}
ffed7fef 1871
ffb4593c
NT
1872/*
1873 * S_intuit_method
1874 *
1875 * Does all the checking to disambiguate
1876 * foo bar
1877 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1878 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1879 *
1880 * First argument is the stuff after the first token, e.g. "bar".
1881 *
1882 * Not a method if bar is a filehandle.
1883 * Not a method if foo is a subroutine prototyped to take a filehandle.
1884 * Not a method if it's really "Foo $bar"
1885 * Method if it's "foo $bar"
1886 * Not a method if it's really "print foo $bar"
1887 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 1888 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 1889 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1890 * =>
1891 */
1892
76e3520e 1893STATIC int
cea2e8a9 1894S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1895{
1896 char *s = start + (*start == '$');
3280af22 1897 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1898 STRLEN len;
1899 GV* indirgv;
1900
1901 if (gv) {
b6c543e3 1902 CV *cv;
a0d0e21e
LW
1903 if (GvIO(gv))
1904 return 0;
b6c543e3
IZ
1905 if ((cv = GvCVu(gv))) {
1906 char *proto = SvPVX(cv);
1907 if (proto) {
1908 if (*proto == ';')
1909 proto++;
1910 if (*proto == '*')
1911 return 0;
1912 }
1913 } else
a0d0e21e
LW
1914 gv = 0;
1915 }
8903cb82 1916 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1917 /* start is the beginning of the possible filehandle/object,
1918 * and s is the end of it
1919 * tmpbuf is a copy of it
1920 */
1921
a0d0e21e 1922 if (*start == '$') {
3280af22 1923 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1924 return 0;
1925 s = skipspace(s);
3280af22
NIS
1926 PL_bufptr = start;
1927 PL_expect = XREF;
a0d0e21e
LW
1928 return *s == '(' ? FUNCMETH : METHOD;
1929 }
1930 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1931 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1932 len -= 2;
1933 tmpbuf[len] = '\0';
1934 goto bare_package;
1935 }
1936 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1937 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1938 return 0;
1939 /* filehandle or package name makes it a method */
89bfa8cd 1940 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1941 s = skipspace(s);
3280af22 1942 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1943 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1944 bare_package:
3280af22 1945 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1946 newSVpvn(tmpbuf,len));
3280af22
NIS
1947 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1948 PL_expect = XTERM;
a0d0e21e 1949 force_next(WORD);
3280af22 1950 PL_bufptr = s;
a0d0e21e
LW
1951 return *s == '(' ? FUNCMETH : METHOD;
1952 }
1953 }
1954 return 0;
1955}
1956
ffb4593c
NT
1957/*
1958 * S_incl_perldb
1959 * Return a string of Perl code to load the debugger. If PERL5DB
1960 * is set, it will return the contents of that, otherwise a
1961 * compile-time require of perl5db.pl.
1962 */
1963
76e3520e 1964STATIC char*
cea2e8a9 1965S_incl_perldb(pTHX)
a0d0e21e 1966{
3280af22 1967 if (PL_perldb) {
76e3520e 1968 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1969
1970 if (pdb)
1971 return pdb;
93189314 1972 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
1973 return "BEGIN { require 'perl5db.pl' }";
1974 }
1975 return "";
1976}
1977
1978
16d20bd9 1979/* Encoded script support. filter_add() effectively inserts a
4e553d73 1980 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1981 * Note that the filter function only applies to the current source file
1982 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1983 *
1984 * The datasv parameter (which may be NULL) can be used to pass
1985 * private data to this instance of the filter. The filter function
1986 * can recover the SV using the FILTER_DATA macro and use it to
1987 * store private buffers and state information.
1988 *
1989 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1990 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1991 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1992 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1993 * private use must be set using malloc'd pointers.
1994 */
16d20bd9
AD
1995
1996SV *
864dbfa3 1997Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1998{
f4c556ac
GS
1999 if (!funcp)
2000 return Nullsv;
2001
3280af22
NIS
2002 if (!PL_rsfp_filters)
2003 PL_rsfp_filters = newAV();
16d20bd9 2004 if (!datasv)
8c52afec 2005 datasv = NEWSV(255,0);
16d20bd9 2006 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2007 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2008 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2009 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2010 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2011 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2012 av_unshift(PL_rsfp_filters, 1);
2013 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2014 return(datasv);
2015}
4e553d73 2016
16d20bd9
AD
2017
2018/* Delete most recently added instance of this filter function. */
a0d0e21e 2019void
864dbfa3 2020Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2021{
e0c19803 2022 SV *datasv;
fe5a182c 2023 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2024 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2025 return;
2026 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2027 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2028 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2029 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2030 IoANY(datasv) = (void *)NULL;
3280af22 2031 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2032
16d20bd9
AD
2033 return;
2034 }
2035 /* we need to search for the correct entry and clear it */
cea2e8a9 2036 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2037}
2038
2039
2040/* Invoke the n'th filter function for the current rsfp. */
2041I32
864dbfa3 2042Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
2043
2044
8ac85365 2045 /* 0 = read one text line */
a0d0e21e 2046{
16d20bd9
AD
2047 filter_t funcp;
2048 SV *datasv = NULL;
e50aee73 2049
3280af22 2050 if (!PL_rsfp_filters)
16d20bd9 2051 return -1;
3280af22 2052 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2053 /* Provide a default input filter to make life easy. */
2054 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2055 DEBUG_P(PerlIO_printf(Perl_debug_log,
2056 "filter_read %d: from rsfp\n", idx));
4e553d73 2057 if (maxlen) {
16d20bd9
AD
2058 /* Want a block */
2059 int len ;
2060 int old_len = SvCUR(buf_sv) ;
2061
2062 /* ensure buf_sv is large enough */
eb160463 2063 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2064 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2065 if (PerlIO_error(PL_rsfp))
37120919
AD
2066 return -1; /* error */
2067 else
2068 return 0 ; /* end of file */
2069 }
16d20bd9
AD
2070 SvCUR_set(buf_sv, old_len + len) ;
2071 } else {
2072 /* Want a line */
3280af22
NIS
2073 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2074 if (PerlIO_error(PL_rsfp))
37120919
AD
2075 return -1; /* error */
2076 else
2077 return 0 ; /* end of file */
2078 }
16d20bd9
AD
2079 }
2080 return SvCUR(buf_sv);
2081 }
2082 /* Skip this filter slot if filter has been deleted */
3280af22 2083 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2084 DEBUG_P(PerlIO_printf(Perl_debug_log,
2085 "filter_read %d: skipped (filter deleted)\n",
2086 idx));
16d20bd9
AD
2087 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2088 }
2089 /* Get function pointer hidden within datasv */
4755096e 2090 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2091 DEBUG_P(PerlIO_printf(Perl_debug_log,
2092 "filter_read %d: via function %p (%s)\n",
fe5a182c 2093 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2094 /* Call function. The function is expected to */
2095 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2096 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2097 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2098}
2099
76e3520e 2100STATIC char *
cea2e8a9 2101S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2102{
c39cd008 2103#ifdef PERL_CR_FILTER
3280af22 2104 if (!PL_rsfp_filters) {
c39cd008 2105 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2106 }
2107#endif
3280af22 2108 if (PL_rsfp_filters) {
16d20bd9 2109
55497cff
PP
2110 if (!append)
2111 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2112 if (FILTER_READ(0, sv, 0) > 0)
2113 return ( SvPVX(sv) ) ;
2114 else
2115 return Nullch ;
2116 }
9d116dd7 2117 else
fd049845 2118 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2119}
2120
01ec43d0
GS
2121STATIC HV *
2122S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2123{
2124 GV *gv;
2125
01ec43d0 2126 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2127 return PL_curstash;
2128
2129 if (len > 2 &&
2130 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2131 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2132 {
2133 return GvHV(gv); /* Foo:: */
def3634b
GS
2134 }
2135
2136 /* use constant CLASS => 'MyClass' */
2137 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2138 SV *sv;
2139 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2140 pkgname = SvPV_nolen(sv);
2141 }
2142 }
2143
2144 return gv_stashpv(pkgname, FALSE);
2145}
a0d0e21e 2146
748a9306
LW
2147#ifdef DEBUGGING
2148 static char* exp_name[] =
09bef843 2149 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2150 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2151 };
748a9306 2152#endif
463ee0b2 2153
02aa26ce
NT
2154/*
2155 yylex
2156
2157 Works out what to call the token just pulled out of the input
2158 stream. The yacc parser takes care of taking the ops we return and
2159 stitching them into a tree.
2160
2161 Returns:
2162 PRIVATEREF
2163
2164 Structure:
2165 if read an identifier
2166 if we're in a my declaration
2167 croak if they tried to say my($foo::bar)
2168 build the ops for a my() declaration
2169 if it's an access to a my() variable
2170 are we in a sort block?
2171 croak if my($a); $a <=> $b
2172 build ops for access to a my() variable
2173 if in a dq string, and they've said @foo and we can't find @foo
2174 croak
2175 build ops for a bareword
2176 if we already built the token before, use it.
2177*/
2178
dba4d153 2179#ifdef USE_PURE_BISON
864dbfa3 2180int
dba4d153 2181Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2182{
20141f0e
RI
2183 int r;
2184
6f202aea 2185 yyactlevel++;
20141f0e
RI
2186 yylval_pointer[yyactlevel] = lvalp;
2187 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2188 if (yyactlevel >= YYMAXLEVEL)
2189 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2190
dba4d153 2191 r = Perl_yylex(aTHX);
20141f0e 2192
d8ae6756
RI
2193 if (yyactlevel > 0)
2194 yyactlevel--;
20141f0e
RI
2195
2196 return r;
2197}
dba4d153 2198#endif
20141f0e 2199
dba4d153
JH
2200#ifdef __SC__
2201#pragma segment Perl_yylex
2202#endif
dba4d153 2203int
dba4d153 2204Perl_yylex(pTHX)
20141f0e 2205{
79072805 2206 register char *s;
378cc40b 2207 register char *d;
79072805 2208 register I32 tmp;
463ee0b2 2209 STRLEN len;
161b471a
NIS
2210 GV *gv = Nullgv;
2211 GV **gvp = 0;
aa7440fb 2212 bool bof = FALSE;
1d239bbb 2213 I32 orig_keyword = 0;
a687059c 2214
02aa26ce 2215 /* check if there's an identifier for us to look at */
ba979b31 2216 if (PL_pending_ident)
e930465f 2217 return S_pending_ident(aTHX);
bbce6d69 2218
02aa26ce
NT
2219 /* no identifier pending identification */
2220
3280af22 2221 switch (PL_lex_state) {
79072805
LW
2222#ifdef COMMENTARY
2223 case LEX_NORMAL: /* Some compilers will produce faster */
2224 case LEX_INTERPNORMAL: /* code if we comment these out. */
2225 break;
2226#endif
2227
09bef843 2228 /* when we've already built the next token, just pull it out of the queue */
79072805 2229 case LEX_KNOWNEXT:
3280af22
NIS
2230 PL_nexttoke--;
2231 yylval = PL_nextval[PL_nexttoke];
2232 if (!PL_nexttoke) {
2233 PL_lex_state = PL_lex_defer;
2234 PL_expect = PL_lex_expect;
2235 PL_lex_defer = LEX_NORMAL;
463ee0b2 2236 }
607df283 2237 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2238 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2239 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2240
3280af22 2241 return(PL_nexttype[PL_nexttoke]);
79072805 2242
02aa26ce 2243 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2244 when we get here, PL_bufptr is at the \
02aa26ce 2245 */
79072805
LW
2246 case LEX_INTERPCASEMOD:
2247#ifdef DEBUGGING
3280af22 2248 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2249 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2250#endif
02aa26ce 2251 /* handle \E or end of string */
3280af22 2252 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2253 char oldmod;
02aa26ce
NT
2254
2255 /* if at a \E */
3280af22
NIS
2256 if (PL_lex_casemods) {
2257 oldmod = PL_lex_casestack[--PL_lex_casemods];
2258 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2259
3280af22
NIS
2260 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2261 PL_bufptr += 2;
2262 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2263 }
79072805
LW
2264 return ')';
2265 }
3280af22
NIS
2266 if (PL_bufptr != PL_bufend)
2267 PL_bufptr += 2;
2268 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2269 return yylex();
79072805
LW
2270 }
2271 else {
607df283 2272 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2273 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2274 s = PL_bufptr + 1;
6e909404
JH
2275 if (s[1] == '\\' && s[2] == 'E') {
2276 PL_bufptr = s + 3;
2277 PL_lex_state = LEX_INTERPCONCAT;
2278 return yylex();
a0d0e21e 2279 }
6e909404
JH
2280 else {
2281 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2282 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2283 if (strchr("LU", *s) &&
2284 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2285 PL_lex_casestack[--PL_lex_casemods] = '\0';
2286 return ')';
2287 }
2288 if (PL_lex_casemods > 10)
2289 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2290 PL_lex_casestack[PL_lex_casemods++] = *s;
2291 PL_lex_casestack[PL_lex_casemods] = '\0';
2292 PL_lex_state = LEX_INTERPCONCAT;
2293 PL_nextval[PL_nexttoke].ival = 0;
2294 force_next('(');
2295 if (*s == 'l')
2296 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2297 else if (*s == 'u')
2298 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2299 else if (*s == 'L')
2300 PL_nextval[PL_nexttoke].ival = OP_LC;
2301 else if (*s == 'U')
2302 PL_nextval[PL_nexttoke].ival = OP_UC;
2303 else if (*s == 'Q')
2304 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2305 else
2306 Perl_croak(aTHX_ "panic: yylex");
2307 PL_bufptr = s + 1;
a0d0e21e 2308 }
79072805 2309 force_next(FUNC);
3280af22
NIS
2310 if (PL_lex_starts) {
2311 s = PL_bufptr;
2312 PL_lex_starts = 0;
79072805
LW
2313 Aop(OP_CONCAT);
2314 }
2315 else
cea2e8a9 2316 return yylex();
79072805
LW
2317 }
2318
55497cff
PP
2319 case LEX_INTERPPUSH:
2320 return sublex_push();
2321
79072805 2322 case LEX_INTERPSTART:
3280af22 2323 if (PL_bufptr == PL_bufend)
79072805 2324 return sublex_done();
607df283 2325 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2326 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2327 PL_expect = XTERM;
2328 PL_lex_dojoin = (*PL_bufptr == '@');
2329 PL_lex_state = LEX_INTERPNORMAL;
2330 if (PL_lex_dojoin) {
2331 PL_nextval[PL_nexttoke].ival = 0;
79072805 2332 force_next(',');
a0d0e21e 2333 force_ident("\"", '$');
3280af22 2334 PL_nextval[PL_nexttoke].ival = 0;
79072805 2335 force_next('$');
3280af22 2336 PL_nextval[PL_nexttoke].ival = 0;
79072805 2337 force_next('(');
3280af22 2338 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2339 force_next(FUNC);
2340 }
3280af22
NIS
2341 if (PL_lex_starts++) {
2342 s = PL_bufptr;
79072805
LW
2343 Aop(OP_CONCAT);
2344 }
cea2e8a9 2345 return yylex();
79072805
LW
2346
2347 case LEX_INTERPENDMAYBE:
3280af22
NIS
2348 if (intuit_more(PL_bufptr)) {
2349 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2350 break;
2351 }
2352 /* FALL THROUGH */
2353
2354 case LEX_INTERPEND:
3280af22
NIS
2355 if (PL_lex_dojoin) {
2356 PL_lex_dojoin = FALSE;
2357 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2358 return ')';
2359 }
43a16006 2360 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2361 && SvEVALED(PL_lex_repl))
43a16006 2362 {
e9fa98b2 2363 if (PL_bufptr != PL_bufend)
cea2e8a9 2364 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2365 PL_lex_repl = Nullsv;
2366 }
79072805
LW
2367 /* FALLTHROUGH */
2368 case LEX_INTERPCONCAT:
2369#ifdef DEBUGGING
3280af22 2370 if (PL_lex_brackets)
cea2e8a9 2371 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2372#endif
3280af22 2373 if (PL_bufptr == PL_bufend)
79072805
LW
2374 return sublex_done();
2375
3280af22
NIS
2376 if (SvIVX(PL_linestr) == '\'') {
2377 SV *sv = newSVsv(PL_linestr);
2378 if (!PL_lex_inpat)
76e3520e 2379 sv = tokeq(sv);
3280af22 2380 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2381 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2382 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2383 s = PL_bufend;
79072805
LW
2384 }
2385 else {
3280af22 2386 s = scan_const(PL_bufptr);
79072805 2387 if (*s == '\\')
3280af22 2388 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2389 else
3280af22 2390 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2391 }
2392
3280af22
NIS
2393 if (s != PL_bufptr) {
2394 PL_nextval[PL_nexttoke] = yylval;
2395 PL_expect = XTERM;
79072805 2396 force_next(THING);
3280af22 2397 if (PL_lex_starts++)
79072805
LW
2398 Aop(OP_CONCAT);
2399 else {
3280af22 2400 PL_bufptr = s;
cea2e8a9 2401 return yylex();
79072805
LW
2402 }
2403 }
2404
cea2e8a9 2405 return yylex();
a0d0e21e 2406 case LEX_FORMLINE:
3280af22
NIS
2407 PL_lex_state = LEX_NORMAL;
2408 s = scan_formline(PL_bufptr);
2409 if (!PL_lex_formbrack)
a0d0e21e
LW
2410 goto rightbracket;
2411 OPERATOR(';');
79072805
LW
2412 }
2413
3280af22
NIS
2414 s = PL_bufptr;
2415 PL_oldoldbufptr = PL_oldbufptr;
2416 PL_oldbufptr = s;
607df283 2417 DEBUG_T( {
bf49b057
GS
2418 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2419 exp_name[PL_expect], s);
5f80b19c 2420 } );
463ee0b2
LW
2421
2422 retry:
378cc40b
LW
2423 switch (*s) {
2424 default:
7e2040f0 2425 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2426 goto keylookup;
cea2e8a9 2427 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2428 case 4:
2429 case 26:
2430 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2431 case 0:
3280af22
NIS
2432 if (!PL_rsfp) {
2433 PL_last_uni = 0;
2434 PL_last_lop = 0;
c5ee2135
LW
2435 if (PL_lex_brackets) {
2436 if (PL_lex_formbrack)
2437 yyerror("Format not terminated");
2438 else
2439 yyerror("Missing right curly or square bracket");
2440 }
4e553d73 2441 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2442 "### Tokener got EOF\n");
5f80b19c 2443 } );
79072805 2444 TOKEN(0);
463ee0b2 2445 }
3280af22 2446 if (s++ < PL_bufend)
a687059c 2447 goto retry; /* ignore stray nulls */
3280af22
NIS
2448 PL_last_uni = 0;
2449 PL_last_lop = 0;
2450 if (!PL_in_eval && !PL_preambled) {
2451 PL_preambled = TRUE;
2452 sv_setpv(PL_linestr,incl_perldb());
2453 if (SvCUR(PL_linestr))
2454 sv_catpv(PL_linestr,";");
2455 if (PL_preambleav){
2456 while(AvFILLp(PL_preambleav) >= 0) {
2457 SV *tmpsv = av_shift(PL_preambleav);
2458 sv_catsv(PL_linestr, tmpsv);
2459 sv_catpv(PL_linestr, ";");
91b7def8
PP
2460 sv_free(tmpsv);
2461 }
3280af22
NIS
2462 sv_free((SV*)PL_preambleav);
2463 PL_preambleav = NULL;
91b7def8 2464 }
3280af22
NIS
2465 if (PL_minus_n || PL_minus_p) {
2466 sv_catpv(PL_linestr, "LINE: while (<>) {");
2467 if (PL_minus_l)
2468 sv_catpv(PL_linestr,"chomp;");
2469 if (PL_minus_a) {
3280af22
NIS
2470 if (PL_minus_F) {
2471 if (strchr("/'\"", *PL_splitstr)
2472 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2473 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121
PP
2474 else {
2475 char delim;
2476 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2477 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2478 delim = *s;
75c72d73 2479 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2480 "q" + (delim == '\''), delim);
3280af22 2481 for (s = PL_splitstr; *s; s++) {
54310121 2482 if (*s == '\\')
3280af22
NIS
2483 sv_catpvn(PL_linestr, "\\", 1);
2484 sv_catpvn(PL_linestr, s, 1);
54310121 2485 }
cea2e8a9 2486 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2487 }
2304df62
AD
2488 }
2489 else
75c72d73 2490 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2491 }
79072805 2492 }
3280af22
NIS
2493 sv_catpv(PL_linestr, "\n");
2494 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2495 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2496 PL_last_lop = PL_last_uni = Nullch;
3280af22 2497 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2498 SV *sv = NEWSV(85,0);
2499
2500 sv_upgrade(sv, SVt_PVMG);
3280af22 2501 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2502 (void)SvIOK_on(sv);
2503 SvIVX(sv) = 0;
57843af0 2504 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2505 }
79072805 2506 goto retry;
a687059c 2507 }
e929a76b 2508 do {
aa7440fb 2509 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2510 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2511 fake_eof:
2512 if (PL_rsfp) {
2513 if (PL_preprocess && !PL_in_eval)
2514 (void)PerlProc_pclose(PL_rsfp);
2515 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2516 PerlIO_clearerr(PL_rsfp);
2517 else
2518 (void)PerlIO_close(PL_rsfp);
2519 PL_rsfp = Nullfp;
2520 PL_doextract = FALSE;
2521 }
2522 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2523 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2524 sv_catpv(PL_linestr,";}");
2525 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2526 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2527 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2528 PL_minus_n = PL_minus_p = 0;
2529 goto retry;
2530 }
2531 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2532 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2533 sv_setpv(PL_linestr,"");
2534 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2535 }
2536 /* if it looks like the start of a BOM, check if it in fact is */
2537 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2538#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2539# ifdef __GNU_LIBRARY__
2540# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2541# define FTELL_FOR_PIPE_IS_BROKEN
2542# endif
e3f494f1
JH
2543# else
2544# ifdef __GLIBC__
2545# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2546# define FTELL_FOR_PIPE_IS_BROKEN
2547# endif
2548# endif
226017aa
DD
2549# endif
2550#endif
2551#ifdef FTELL_FOR_PIPE_IS_BROKEN
2552 /* This loses the possibility to detect the bof
2553 * situation on perl -P when the libc5 is being used.
2554 * Workaround? Maybe attach some extra state to PL_rsfp?
2555 */
2556 if (!PL_preprocess)
7e28d3af 2557 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2558#else
eb160463 2559 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2560#endif
7e28d3af 2561 if (bof) {
3280af22 2562 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2563 s = swallow_bom((U8*)s);
e929a76b 2564 }
378cc40b 2565 }
3280af22 2566 if (PL_doextract) {
a0d0e21e
LW
2567 /* Incest with pod. */
2568 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2569 sv_setpv(PL_linestr, "");
2570 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2571 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2572 PL_last_lop = PL_last_uni = Nullch;
3280af22 2573 PL_doextract = FALSE;
a0d0e21e 2574 }
4e553d73 2575 }
463ee0b2 2576 incline(s);
3280af22
NIS
2577 } while (PL_doextract);
2578 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2579 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2580 SV *sv = NEWSV(85,0);
a687059c 2581
93a17b20 2582 sv_upgrade(sv, SVt_PVMG);
3280af22 2583 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2584 (void)SvIOK_on(sv);
2585 SvIVX(sv) = 0;
57843af0 2586 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2587 }
3280af22 2588 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2589 PL_last_lop = PL_last_uni = Nullch;
57843af0 2590 if (CopLINE(PL_curcop) == 1) {
3280af22 2591 while (s < PL_bufend && isSPACE(*s))
79072805 2592 s++;
a0d0e21e 2593 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2594 s++;
44a8e56a 2595 d = Nullch;
3280af22 2596 if (!PL_in_eval) {
44a8e56a
PP
2597 if (*s == '#' && *(s+1) == '!')
2598 d = s + 2;
2599#ifdef ALTERNATE_SHEBANG
2600 else {
2601 static char as[] = ALTERNATE_SHEBANG;
2602 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2603 d = s + (sizeof(as) - 1);
2604 }
2605#endif /* ALTERNATE_SHEBANG */
2606 }
2607 if (d) {
b8378b72 2608 char *ipath;
774d564b 2609 char *ipathend;
b8378b72 2610
774d564b 2611 while (isSPACE(*d))
b8378b72
CS
2612 d++;
2613 ipath = d;
774d564b
PP
2614 while (*d && !isSPACE(*d))
2615 d++;
2616 ipathend = d;
2617
2618#ifdef ARG_ZERO_IS_SCRIPT
2619 if (ipathend > ipath) {
2620 /*
2621 * HP-UX (at least) sets argv[0] to the script name,
2622 * which makes $^X incorrect. And Digital UNIX and Linux,
2623 * at least, set argv[0] to the basename of the Perl
2624 * interpreter. So, having found "#!", we'll set it right.
2625 */
ee2f7564 2626 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2627 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2628 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2629 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2630 SvSETMAGIC(x);
2631 }
556c1dec
JH
2632 else {
2633 STRLEN blen;
2634 STRLEN llen;
2635 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2636 char *lstart = SvPV(x,llen);
2637 if (llen < blen) {
2638 bstart += blen - llen;
2639 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2640 sv_setpvn(x, ipath, ipathend - ipath);
2641 SvSETMAGIC(x);
2642 }
2643 }
2644 }
774d564b 2645 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2646 }
774d564b 2647#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2648
2649 /*
2650 * Look for options.
2651 */
748a9306 2652 d = instr(s,"perl -");
84e30d1a 2653 if (!d) {
748a9306 2654 d = instr(s,"perl");
84e30d1a
GS
2655#if defined(DOSISH)
2656 /* avoid getting into infinite loops when shebang
2657 * line contains "Perl" rather than "perl" */
2658 if (!d) {
2659 for (d = ipathend-4; d >= ipath; --d) {
2660 if ((*d == 'p' || *d == 'P')
2661 && !ibcmp(d, "perl", 4))
2662 {
2663 break;
2664 }
2665 }
2666 if (d < ipath)
2667 d = Nullch;
2668 }
2669#endif
2670 }
44a8e56a
PP
2671#ifdef ALTERNATE_SHEBANG
2672 /*
2673 * If the ALTERNATE_SHEBANG on this system starts with a
2674 * character that can be part of a Perl expression, then if
2675 * we see it but not "perl", we're probably looking at the
2676 * start of Perl code, not a request to hand off to some
2677 * other interpreter. Similarly, if "perl" is there, but
2678 * not in the first 'word' of the line, we assume the line
2679 * contains the start of the Perl program.
44a8e56a
PP
2680 */
2681 if (d && *s != '#') {
774d564b 2682 char *c = ipath;
44a8e56a
PP
2683 while (*c && !strchr("; \t\r\n\f\v#", *c))
2684 c++;
2685 if (c < d)
2686 d = Nullch; /* "perl" not in first word; ignore */
2687 else
2688 *s = '#'; /* Don't try to parse shebang line */
2689 }
774d564b 2690#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2691#ifndef MACOS_TRADITIONAL
748a9306 2692 if (!d &&
44a8e56a 2693 *s == '#' &&
774d564b 2694 ipathend > ipath &&
3280af22 2695 !PL_minus_c &&
748a9306 2696 !instr(s,"indir") &&
3280af22 2697 instr(PL_origargv[0],"perl"))
748a9306 2698 {
9f68db38 2699 char **newargv;
9f68db38 2700
774d564b
PP
2701 *ipathend = '\0';
2702 s = ipathend + 1;
3280af22 2703 while (s < PL_bufend && isSPACE(*s))
9f68db38 2704 s++;
3280af22
NIS
2705 if (s < PL_bufend) {
2706 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2707 newargv[1] = s;
3280af22 2708 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2709 s++;
2710 *s = '\0';
3280af22 2711 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2712 }
2713 else
3280af22 2714 newargv = PL_origargv;
774d564b 2715 newargv[0] = ipath;
b35112e7 2716 PERL_FPU_PRE_EXEC
b4748376 2717 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2718 PERL_FPU_POST_EXEC
cea2e8a9 2719 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2720 }
bf4acbe4 2721#endif
748a9306 2722 if (d) {
3280af22
NIS
2723 U32 oldpdb = PL_perldb;
2724 bool oldn = PL_minus_n;
2725 bool oldp = PL_minus_p;
748a9306
LW
2726
2727 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2728 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2729
2730 if (*d++ == '-') {
a11ec5a9 2731 bool switches_done = PL_doswitches;
8cc95fdb
PP
2732 do {
2733 if (*d == 'M' || *d == 'm') {
2734 char *m = d;
2735 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2736 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2737 (int)(d - m), m);
2738 }
2739 d = moreswitches(d);
2740 } while (d);
f0b2cf55
YST
2741 if (PL_doswitches && !switches_done) {
2742 int argc = PL_origargc;
2743 char **argv = PL_origargv;
2744 do {
2745 argc--,argv++;
2746 } while (argc && argv[0][0] == '-' && argv[0][1]);
2747 init_argv_symbols(argc,argv);
2748 }
155aba94
GS
2749 if ((PERLDB_LINE && !oldpdb) ||
2750 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2751 /* if we have already added "LINE: while (<>) {",
2752 we must not do it again */
748a9306 2753 {
3280af22
NIS
2754 sv_setpv(PL_linestr, "");
2755 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2756 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2757 PL_last_lop = PL_last_uni = Nullch;
3280af22 2758 PL_preambled = FALSE;
84902520 2759 if (PERLDB_LINE)
3280af22 2760 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2761 goto retry;
2762 }
a11ec5a9
RGS
2763 if (PL_doswitches && !switches_done) {
2764 int argc = PL_origargc;
2765 char **argv = PL_origargv;
2766 do {
2767 argc--,argv++;
2768 } while (argc && argv[0][0] == '-' && argv[0][1]);
2769 init_argv_symbols(argc,argv);
2770 }
a0d0e21e 2771 }
79072805 2772 }
9f68db38 2773 }
79072805 2774 }
3280af22
NIS
2775 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2776 PL_bufptr = s;
2777 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2778 return yylex();
ae986130 2779 }
378cc40b 2780 goto retry;
4fdae800 2781 case '\r':
6a27c188 2782#ifdef PERL_STRICT_CR
cea2e8a9 2783 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2784 Perl_croak(aTHX_
cc507455 2785 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2786#endif
4fdae800 2787 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2788#ifdef MACOS_TRADITIONAL
2789 case '\312':
2790#endif
378cc40b
LW
2791 s++;
2792 goto retry;
378cc40b 2793 case '#':
e929a76b 2794 case '\n':
3280af22 2795 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2796 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2797 /* handle eval qq[#line 1 "foo"\n ...] */
2798 CopLINE_dec(PL_curcop);
2799 incline(s);
2800 }
3280af22 2801 d = PL_bufend;
a687059c 2802 while (s < d && *s != '\n')
378cc40b 2803 s++;
0f85fab0 2804 if (s < d)
378cc40b 2805 s++;
78c267c1 2806 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2807 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2808 incline(s);
3280af22
NIS
2809 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2810 PL_bufptr = s;
2811 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2812 return yylex();
a687059c 2813 }
378cc40b 2814 }
a687059c 2815 else {
378cc40b 2816 *s = '\0';
3280af22 2817 PL_bufend = s;
a687059c 2818 }
378cc40b
LW
2819 goto retry;
2820 case '-':
79072805 2821 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2822 I32 ftst = 0;
2823
378cc40b 2824 s++;
3280af22 2825 PL_bufptr = s;
748a9306
LW
2826 tmp = *s++;
2827
bf4acbe4 2828 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2829 s++;
2830
2831 if (strnEQ(s,"=>",2)) {
3280af22 2832 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2833 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2834 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2835 } );
748a9306
LW
2836 OPERATOR('-'); /* unary minus */
2837 }
3280af22 2838 PL_last_uni = PL_oldbufptr;
748a9306 2839 switch (tmp) {
e5edeb50
JH
2840 case 'r': ftst = OP_FTEREAD; break;
2841 case 'w': ftst = OP_FTEWRITE; break;
2842 case 'x': ftst = OP_FTEEXEC; break;
2843 case 'o': ftst = OP_FTEOWNED; break;
2844 case 'R': ftst = OP_FTRREAD; break;
2845 case 'W': ftst = OP_FTRWRITE; break;
2846 case 'X': ftst = OP_FTREXEC; break;
2847 case 'O': ftst = OP_FTROWNED; break;
2848 case 'e': ftst = OP_FTIS; break;
2849 case 'z': ftst = OP_FTZERO; break;
2850 case 's': ftst = OP_FTSIZE; break;
2851 case 'f': ftst = OP_FTFILE; break;
2852 case 'd': ftst = OP_FTDIR; break;
2853 case 'l': ftst = OP_FTLINK; break;
2854 case 'p': ftst = OP_FTPIPE; break;
2855 case 'S': ftst = OP_FTSOCK; break;
2856 case 'u': ftst = OP_FTSUID; break;
2857 case 'g': ftst = OP_FTSGID; break;
2858 case 'k': ftst = OP_FTSVTX; break;
2859 case 'b': ftst = OP_FTBLK; break;
2860 case 'c': ftst = OP_FTCHR; break;
2861 case 't': ftst = OP_FTTTY; break;
2862 case 'T': ftst = OP_FTTEXT; break;
2863 case 'B': ftst = OP_FTBINARY; break;
2864 case 'M': case 'A': case 'C':
2865 gv_fetchpv("\024",TRUE, SVt_PV);
2866 switch (tmp) {
2867 case 'M': ftst = OP_FTMTIME; break;
2868 case 'A': ftst = OP_FTATIME; break;
2869 case 'C': ftst = OP_FTCTIME; break;
2870 default: break;
2871 }
2872 break;
378cc40b 2873 default:
378cc40b
LW
2874 break;
2875 }
e5edeb50 2876 if (ftst) {
eb160463 2877 PL_last_lop_op = (OPCODE)ftst;
4e553d73 2878 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2879 "### Saw file test %c\n", (int)ftst);
5f80b19c 2880 } );
e5edeb50
JH
2881 FTST(ftst);
2882 }
2883 else {
2884 /* Assume it was a minus followed by a one-letter named
2885 * subroutine call (or a -bareword), then. */
95c31fe3 2886 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0
RGS
2887 "### '-%c' looked like a file test but was not\n",
2888 tmp);
5f80b19c 2889 } );
3cf7b4c4 2890 s = --PL_bufptr;
e5edeb50 2891 }
378cc40b 2892 }
a687059c
LW
2893 tmp = *s++;
2894 if (*s == tmp) {
2895 s++;
3280af22 2896 if (PL_expect == XOPERATOR)
79072805
LW
2897 TERM(POSTDEC);
2898 else
2899 OPERATOR(PREDEC);
2900 }
2901 else if (*s == '>') {
2902 s++;
2903 s = skipspace(s);
7e2040f0 2904 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2905 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2906 TOKEN(ARROW);
79072805 2907 }
748a9306
LW
2908 else if (*s == '$')
2909 OPERATOR(ARROW);
463ee0b2 2910 else
748a9306 2911 TERM(ARROW);
a687059c 2912 }
3280af22 2913 if (PL_expect == XOPERATOR)
79072805
LW
2914 Aop(OP_SUBTRACT);
2915 else {
3280af22 2916 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2917 check_uni();
79072805 2918 OPERATOR('-'); /* unary minus */
2f3197b3 2919 }
79072805 2920
378cc40b 2921 case '+':
a687059c
LW
2922 tmp = *s++;
2923 if (*s == tmp) {
378cc40b 2924 s++;
3280af22 2925 if (PL_expect == XOPERATOR)
79072805
LW
2926 TERM(POSTINC);
2927 else
2928 OPERATOR(PREINC);
378cc40b 2929 }
3280af22 2930 if (PL_expect == XOPERATOR)
79072805
LW
2931 Aop(OP_ADD);
2932 else {
3280af22 2933 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2934 check_uni();
a687059c 2935 OPERATOR('+');
2f3197b3 2936 }
a687059c 2937
378cc40b 2938 case '*':
3280af22
NIS
2939 if (PL_expect != XOPERATOR) {
2940 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2941 PL_expect = XOPERATOR;
2942 force_ident(PL_tokenbuf, '*');
2943 if (!*PL_tokenbuf)
a0d0e21e 2944 PREREF('*');
79072805 2945 TERM('*');
a687059c 2946 }
79072805
LW
2947 s++;
2948 if (*s == '*') {
a687059c 2949 s++;
79072805 2950 PWop(OP_POW);
a687059c 2951 }
79072805
LW
2952 Mop(OP_MULTIPLY);
2953
378cc40b 2954 case '%':
3280af22 2955 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2956 ++s;
2957 Mop(OP_MODULO);
a687059c 2958 }
3280af22
NIS
2959 PL_tokenbuf[0] = '%';
2960 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2961 if (!PL_tokenbuf[1]) {
bbce6d69 2962 PREREF('%');
a687059c 2963 }
3280af22 2964 PL_pending_ident = '%';
bbce6d69 2965 TERM('%');
a687059c 2966
378cc40b 2967 case '^':
79072805 2968 s++;
a0d0e21e 2969 BOop(OP_BIT_XOR);
79072805 2970 case '[':
3280af22 2971 PL_lex_brackets++;
79072805 2972 /* FALL THROUGH */
378cc40b 2973 case '~':
378cc40b 2974 case ',':
378cc40b
LW
2975 tmp = *s++;
2976 OPERATOR(tmp);
a0d0e21e
LW
2977 case ':':
2978 if (s[1] == ':') {
2979 len = 0;
2980 goto just_a_word;
2981 }
2982 s++;
09bef843
SB
2983 switch (PL_expect) {
2984 OP *attrs;
2985 case XOPERATOR:
2986 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2987 break;
2988 PL_bufptr = s; /* update in case we back off */
2989 goto grabattrs;
2990 case XATTRBLOCK:
2991 PL_expect = XBLOCK;
2992 goto grabattrs;
2993 case XATTRTERM:
2994 PL_expect = XTERMBLOCK;
2995 grabattrs:
2996 s = skipspace(s);
2997 attrs = Nullop;
7e2040f0 2998 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2999 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3000 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3001 if (tmp < 0) tmp = -tmp;
3002 switch (tmp) {
3003 case KEY_or:
3004 case KEY_and:
c963b151 3005 case KEY_err:
f9829d6b
GS
3006 case KEY_for:
3007 case KEY_unless:
3008 case KEY_if:
3009 case KEY_while:
3010 case KEY_until:
3011 goto got_attrs;
3012 default:
3013 break;
3014 }
3015 }
09bef843
SB
3016 if (*d == '(') {
3017 d = scan_str(d,TRUE,TRUE);
3018 if (!d) {
09bef843
SB
3019 /* MUST advance bufptr here to avoid bogus
3020 "at end of line" context messages from yyerror().
3021 */
3022 PL_bufptr = s + len;
3023 yyerror("Unterminated attribute parameter in attribute list");
3024 if (attrs)
3025 op_free(attrs);
3026 return 0; /* EOF indicator */
3027 }
3028 }
3029 if (PL_lex_stuff) {
3030 SV *sv = newSVpvn(s, len);
3031 sv_catsv(sv, PL_lex_stuff);
3032 attrs = append_elem(OP_LIST, attrs,
3033 newSVOP(OP_CONST, 0, sv));
3034 SvREFCNT_dec(PL_lex_stuff);
3035 PL_lex_stuff = Nullsv;
3036 }
3037 else {
371fce9b
DM
3038 if (len == 6 && strnEQ(s, "unique", len)) {
3039 if (PL_in_my == KEY_our)
3040#ifdef USE_ITHREADS
3041 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3042#else
3043 ; /* skip to avoid loading attributes.pm */
3044#endif
3045 else
3046 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3047 }
3048
d3cea301
SB
3049 /* NOTE: any CV attrs applied here need to be part of
3050 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3051 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3052 CvLVALUE_on(PL_compcv);
3053 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3054 CvLOCKED_on(PL_compcv);
3055 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3056 CvMETHOD_on(PL_compcv);
06492da6
SF
3057 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3058 CvASSERTION_on(PL_compcv);
78f9721b
SM
3059 /* After we've set the flags, it could be argued that
3060 we don't need to do the attributes.pm-based setting
3061 process, and shouldn't bother appending recognized
d3cea301
SB
3062 flags. To experiment with that, uncomment the
3063 following "else". (Note that's already been
3064 uncommented. That keeps the above-applied built-in
3065 attributes from being intercepted (and possibly
3066 rejected) by a package's attribute routines, but is
3067 justified by the performance win for the common case
3068 of applying only built-in attributes.) */
0256094b 3069 else
78f9721b
SM
3070 attrs = append_elem(OP_LIST, attrs,
3071 newSVOP(OP_CONST, 0,
3072 newSVpvn(s, len)));
09bef843
SB
3073 }
3074 s = skipspace(d);
0120eecf 3075 if (*s == ':' && s[1] != ':')
09bef843 3076 s = skipspace(s+1);
0120eecf
GS
3077 else if (s == d)
3078 break; /* require real whitespace or :'s */
09bef843 3079 }
f9829d6b 3080 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3081 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3082 char q = ((*s == '\'') ? '"' : '\'');
3083 /* If here for an expression, and parsed no attrs, back off. */
3084 if (tmp == '=' && !attrs) {
3085 s = PL_bufptr;
3086 break;
3087 }
3088 /* MUST advance bufptr here to avoid bogus "at end of line"
3089 context messages from yyerror().
3090 */
3091 PL_bufptr = s;
3092 if (!*s)
3093 yyerror("Unterminated attribute list");
3094 else
3095 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3096 q, *s, q));
3097 if (attrs)
3098 op_free(attrs);
3099 OPERATOR(':');
3100 }
f9829d6b 3101 got_attrs:
09bef843
SB
3102 if (attrs) {
3103 PL_nextval[PL_nexttoke].opval = attrs;
3104 force_next(THING);
3105 }
3106 TOKEN(COLONATTR);
3107 }
a0d0e21e 3108 OPERATOR(':');
8990e307
LW
3109 case '(':
3110 s++;
3280af22
NIS
3111 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3112 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3113 else
3280af22 3114 PL_expect = XTERM;
4a202259 3115 s = skipspace(s);
a0d0e21e 3116 TOKEN('(');
378cc40b 3117 case ';':
f4dd75d9 3118 CLINE;
378cc40b
LW
3119 tmp = *s++;
3120 OPERATOR(tmp);
3121 case ')':
378cc40b 3122 tmp = *s++;
16d20bd9
AD
3123 s = skipspace(s);
3124 if (*s == '{')
3125 PREBLOCK(tmp);
378cc40b 3126 TERM(tmp);
79072805
LW
3127 case ']':
3128 s++;
3280af22 3129 if (PL_lex_brackets <= 0)
d98d5fff 3130 yyerror("Unmatched right square bracket");
463ee0b2 3131 else
3280af22
NIS
3132 --PL_lex_brackets;
3133 if (PL_lex_state == LEX_INTERPNORMAL) {
3134 if (PL_lex_brackets == 0) {
a0d0e21e 3135 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3136 PL_lex_state = LEX_INTERPEND;
79072805
LW
3137 }
3138 }
4633a7c4 3139 TERM(']');
79072805
LW
3140 case '{':
3141 leftbracket:
79072805 3142 s++;
3280af22 3143 if (PL_lex_brackets > 100) {
8edd5f42 3144 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3145 }
3280af22 3146 switch (PL_expect) {
a0d0e21e 3147 case XTERM:
3280af22 3148 if (PL_lex_formbrack) {
a0d0e21e
LW
3149 s--;
3150 PRETERMBLOCK(DO);
3151 }
3280af22
NIS
3152 if (PL_oldoldbufptr == PL_last_lop)
3153 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3154 else
3280af22 3155 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3156 OPERATOR(HASHBRACK);
a0d0e21e 3157 case XOPERATOR:
bf4acbe4 3158 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3159 s++;
44a8e56a 3160 d = s;
3280af22
NIS
3161 PL_tokenbuf[0] = '\0';
3162 if (d < PL_bufend && *d == '-') {
3163 PL_tokenbuf[0] = '-';
44a8e56a 3164 d++;
bf4acbe4 3165 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3166 d++;
3167 }
7e2040f0 3168 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3169 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3170 FALSE, &len);
bf4acbe4 3171 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3172 d++;
3173 if (*d == '}') {
3280af22 3174 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3175 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3176 if (minus)
3177 force_next('-');
748a9306
LW
3178 }
3179 }
3180 /* FALL THROUGH */
09bef843 3181 case XATTRBLOCK:
748a9306 3182 case XBLOCK:
3280af22
NIS
3183 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3184 PL_expect = XSTATE;
a0d0e21e 3185 break;
09bef843 3186 case XATTRTERM:
a0d0e21e 3187 case XTERMBLOCK:
3280af22
NIS
3188 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3189 PL_expect = XSTATE;
a0d0e21e
LW
3190 break;
3191 default: {
3192 char *t;
3280af22
NIS
3193 if (PL_oldoldbufptr == PL_last_lop)
3194 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3195 else
3280af22 3196 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3197 s = skipspace(s);
8452ff4b
SB
3198 if (*s == '}') {
3199 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3200 PL_expect = XTERM;
3201 /* This hack is to get the ${} in the message. */
3202 PL_bufptr = s+1;
3203 yyerror("syntax error");
3204 break;
3205 }
a0d0e21e 3206 OPERATOR(HASHBRACK);
8452ff4b 3207 }
b8a4b1be
GS
3208 /* This hack serves to disambiguate a pair of curlies
3209 * as being a block or an anon hash. Normally, expectation
3210 * determines that, but in cases where we're not in a
3211 * position to expect anything in particular (like inside
3212 * eval"") we have to resolve the ambiguity. This code
3213 * covers the case where the first term in the curlies is a
3214 * quoted string. Most other cases need to be explicitly
3215 * disambiguated by prepending a `+' before the opening
3216 * curly in order to force resolution as an anon hash.
3217 *
3218 * XXX should probably propagate the outer expectation
3219 * into eval"" to rely less on this hack, but that could
3220 * potentially break current behavior of eval"".
3221 * GSAR 97-07-21
3222 */
3223 t = s;
3224 if (*s == '\'' || *s == '"' || *s == '`') {
3225 /* common case: get past first string, handling escapes */
3280af22 3226 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3227 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3228 t++;
3229 t++;
a0d0e21e 3230 }
b8a4b1be 3231 else if (*s == 'q') {
3280af22 3232 if (++t < PL_bufend
b8a4b1be 3233 && (!isALNUM(*t)
3280af22 3234 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3235 && !isALNUM(*t))))
3236 {
abc667d1 3237 /* skip q//-like construct */
b8a4b1be
GS
3238 char *tmps;
3239 char open, close, term;
3240 I32 brackets = 1;
3241
3280af22 3242 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3243 t++;
abc667d1
DM
3244 /* check for q => */
3245 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3246 OPERATOR(HASHBRACK);
3247 }
b8a4b1be
GS
3248 term = *t;
3249 open = term;
3250 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3251 term = tmps[5];
3252 close = term;
3253 if (open == close)
3280af22
NIS
3254 for (t++; t < PL_bufend; t++) {
3255 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3256 t++;
6d07e5e9 3257 else if (*t == open)
b8a4b1be
GS
3258 break;
3259 }
abc667d1 3260 else {
3280af22
NIS
3261 for (t++; t < PL_bufend; t++) {
3262 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3263 t++;
6d07e5e9 3264 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3265 break;
3266 else if (*t == open)
3267 brackets++;
3268 }
abc667d1
DM
3269 }
3270 t++;
b8a4b1be 3271 }
abc667d1
DM
3272 else
3273 /* skip plain q word */
3274 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3275 t += UTF8SKIP(t);
a0d0e21e 3276 }
7e2040f0 3277 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3278 t += UTF8SKIP(t);
7e2040f0 3279 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3280 t += UTF8SKIP(t);
a0d0e21e 3281 }
3280af22 3282 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3283 t++;
b8a4b1be
GS
3284 /* if comma follows first term, call it an anon hash */
3285 /* XXX it could be a comma expression with loop modifiers */
3280af22 3286 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3287 || (*t == '=' && t[1] == '>')))
a0d0e21e 3288 OPERATOR(HASHBRACK);
3280af22 3289 if (PL_expect == XREF)
4e4e412b 3290 PL_expect = XTERM;
a0d0e21e 3291 else {
3280af22
NIS
3292 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3293 PL_expect = XSTATE;
a0d0e21e 3294 }
8990e307 3295 }
a0d0e21e 3296 break;
463ee0b2 3297 }
57843af0 3298 yylval.ival = CopLINE(PL_curcop);
79072805 3299 if (isSPACE(*s) || *s == '#')
3280af22 3300 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3301 TOKEN('{');
378cc40b 3302 case '}':
79072805
LW
3303 rightbracket:
3304 s++;
3280af22 3305 if (PL_lex_brackets <= 0)
d98d5fff 3306 yyerror("Unmatched right curly bracket");
463ee0b2 3307 else
3280af22 3308 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3309 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3310 PL_lex_formbrack = 0;
3311 if (PL_lex_state == LEX_INTERPNORMAL) {
3312 if (PL_lex_brackets == 0) {
9059aa12
LW
3313 if (PL_expect & XFAKEBRACK) {
3314 PL_expect &= XENUMMASK;
3280af22
NIS
3315 PL_lex_state = LEX_INTERPEND;
3316 PL_bufptr = s;
cea2e8a9 3317 return yylex(); /* ignore fake brackets */
79072805 3318 }
fa83b5b6 3319 if (*s == '-' && s[1] == '>')
3280af22 3320 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3321 else if (*s != '[' && *s != '{')
3280af22 3322 PL_lex_state = LEX_INTERPEND;
79072805
LW
3323 }
3324 }
9059aa12
LW
3325 if (PL_expect & XFAKEBRACK) {
3326 PL_expect &= XENUMMASK;
3280af22 3327 PL_bufptr = s;
cea2e8a9 3328 return yylex(); /* ignore fake brackets */
748a9306 3329 }
79072805
LW
3330 force_next('}');
3331 TOKEN(';');
378cc40b
LW
3332 case '&':
3333 s++;
3334 tmp = *s++;
3335 if (tmp == '&')
a0d0e21e 3336 AOPERATOR(ANDAND);
378cc40b 3337 s--;
3280af22 3338 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3339 if (ckWARN(WARN_SEMICOLON)
3340 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3341 {
57843af0 3342 CopLINE_dec(PL_curcop);
9014280d 3343 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3344 CopLINE_inc(PL_curcop);
463ee0b2 3345 }
79072805 3346 BAop(OP_BIT_AND);
463ee0b2 3347 }
79072805 3348
3280af22
NIS
3349 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3350 if (*PL_tokenbuf) {
3351 PL_expect = XOPERATOR;
3352 force_ident(PL_tokenbuf, '&');
463ee0b2 3353 }
79072805
LW
3354 else
3355 PREREF('&');
c07a80fd 3356 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3357 TERM('&');
3358
378cc40b
LW
3359 case '|':
3360 s++;
3361 tmp = *s++;
3362 if (tmp == '|')
a0d0e21e 3363 AOPERATOR(OROR);
378cc40b 3364 s--;
79072805 3365 BOop(OP_BIT_OR);
378cc40b
LW
3366 case '=':
3367 s++;
3368 tmp = *s++;
3369 if (tmp == '=')
79072805
LW
3370 Eop(OP_EQ);
3371 if (tmp == '>')
3372 OPERATOR(',');
378cc40b 3373 if (tmp == '~')
79072805 3374 PMop(OP_MATCH);
599cee73 3375 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3376 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3377 s--;
3280af22
NIS
3378 if (PL_expect == XSTATE && isALPHA(tmp) &&
3379 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3380 {
3280af22
NIS
3381 if (PL_in_eval && !PL_rsfp) {
3382 d = PL_bufend;
a5f75d66
AD
3383 while (s < d) {
3384 if (*s++ == '\n') {
3385 incline(s);
3386 if (strnEQ(s,"=cut",4)) {
3387 s = strchr(s,'\n');
3388 if (s)
3389 s++;
3390 else
3391 s = d;
3392 incline(s);
3393 goto retry;
3394 }
3395 }
3396 }
3397 goto retry;
3398 }
3280af22
NIS
3399 s = PL_bufend;
3400 PL_doextract = TRUE;
a0d0e21e
LW
3401 goto retry;
3402 }
3280af22 3403 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3404 char *t;
51882d45 3405#ifdef PERL_STRICT_CR
bf4acbe4 3406 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3407#else
bf4acbe4 3408 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3409#endif
a0d0e21e
LW
3410 if (*t == '\n' || *t == '#') {
3411 s--;
3280af22 3412 PL_expect = XBLOCK;
a0d0e21e
LW
3413 goto leftbracket;
3414 }
79072805 3415 }
a0d0e21e
LW
3416 yylval.ival = 0;
3417 OPERATOR(ASSIGNOP);
378cc40b
LW
3418 case '!':
3419 s++;
3420 tmp = *s++;
3421 if (tmp == '=')
79072805 3422 Eop(OP_NE);
378cc40b 3423 if (tmp == '~')
79072805 3424 PMop(OP_NOT);
378cc40b
LW
3425 s--;
3426 OPERATOR('!');
3427 case '<':
3280af22 3428 if (PL_expect != XOPERATOR) {
93a17b20 3429 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3430 check_uni();
79072805
LW
3431 if (s[1] == '<')
3432 s = scan_heredoc(s);
3433 else
3434 s = scan_inputsymbol(s);
3435 TERM(sublex_start());
378cc40b
LW
3436 }
3437 s++;
3438 tmp = *s++;
3439 if (tmp == '<')
79072805 3440 SHop(OP_LEFT_SHIFT);
395c3793
LW
3441 if (tmp == '=') {
3442 tmp = *s++;
3443 if (tmp == '>')
79072805 3444 Eop(OP_NCMP);
395c3793 3445 s--;
79072805 3446 Rop(OP_LE);
395c3793 3447 }
378cc40b 3448 s--;
79072805 3449 Rop(OP_LT);
378cc40b
LW
3450 case '>':
3451 s++;
3452 tmp = *s++;
3453 if (tmp == '>')
79072805 3454 SHop(OP_RIGHT_SHIFT);
378cc40b 3455 if (tmp == '=')
79072805 3456 Rop(OP_GE);
378cc40b 3457 s--;
79072805 3458 Rop(OP_GT);
378cc40b
LW
3459
3460 case '$':
bbce6d69
PP
3461 CLINE;
3462
3280af22
NIS
3463 if (PL_expect == XOPERATOR) {
3464 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3465 PL_expect = XTERM;
a0d0e21e 3466 depcom();
bbce6d69 3467 return ','; /* grandfather non-comma-format format */
a0d0e21e 3468 }
8990e307 3469 }
a0d0e21e 3470
7e2040f0 3471 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3472 PL_tokenbuf[0] = '@';
376b8730
SM
3473 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3474 sizeof PL_tokenbuf - 1, FALSE);
3475 if (PL_expect == XOPERATOR)
3476 no_op("Array length", s);
3280af22 3477 if (!PL_tokenbuf[1])
a0d0e21e 3478 PREREF(DOLSHARP);
3280af22
NIS
3479 PL_expect = XOPERATOR;
3480 PL_pending_ident = '#';
463ee0b2 3481 TOKEN(DOLSHARP);
79072805 3482 }
bbce6d69 3483
3280af22 3484 PL_tokenbuf[0] = '$';
376b8730
SM
3485 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3486 sizeof PL_tokenbuf - 1, FALSE);
3487 if (PL_expect == XOPERATOR)
3488 no_op("Scalar", s);
3280af22
NIS
3489 if (!PL_tokenbuf[1]) {
3490 if (s == PL_bufend)
bbce6d69
PP
3491 yyerror("Final $ should be \\$ or $name");
3492 PREREF('$');
8990e307 3493 }
a0d0e21e 3494
bbce6d69 3495 /* This kludge not intended to be bulletproof. */
3280af22 3496 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3497 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3498 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3499 yylval.opval->op_private = OPpCONST_ARYBASE;
3500 TERM(THING);
3501 }
3502
ff68c719 3503 d = s;
69d2bceb 3504 tmp = (I32)*s;
3280af22 3505 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3506 s = skipspace(s);
3507
3280af22 3508 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3509 char *t;
3510 if (*s == '[') {
3280af22 3511 PL_tokenbuf[0] = '@';
599cee73 3512 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3513 for(t = s + 1;
7e2040f0 3514 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3515 t++) ;
a0d0e21e 3516 if (*t++ == ',') {
3280af22
NIS
3517 PL_bufptr = skipspace(PL_bufptr);
3518 while (t < PL_bufend && *t != ']')
bbce6d69 3519 t++;
9014280d 3520 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73
PM
3521 "Multidimensional syntax %.*s not suppor