This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix bug #18573 : in a double-quoted string, a \c not followed
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
be3c0a43 3 * Copyright (c) 1991-2002, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
4ac733c9 29static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 30static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
9059aa12
LW
38#define XFAKEBRACK 128
39#define XENUMMASK 127
40
39e02b42
JH
41#ifdef USE_UTF8_SCRIPTS
42# define UTF (!IN_BYTES)
2b9d42f0 43#else
746b446a 44# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 45#endif
a0ed51b3 46
61f0cdd9 47/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
48 * 1999-02-27 mjd-perl-patch@plover.com */
49#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50
bf4acbe4
GS
51/* On MacOS, respect nonbreaking spaces */
52#ifdef MACOS_TRADITIONAL
53#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54#else
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
56#endif
57
ffb4593c
NT
58/* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
79072805
LW
60 * can get by with a single comparison (if the compiler is smart enough).
61 */
62
fb73857a
PP
63/* #define LEX_NOTPARSING 11 is done in perl.h. */
64
55497cff
PP
65#define LEX_NORMAL 10
66#define LEX_INTERPNORMAL 9
67#define LEX_INTERPCASEMOD 8
68#define LEX_INTERPPUSH 7
69#define LEX_INTERPSTART 6
70#define LEX_INTERPEND 5
71#define LEX_INTERPENDMAYBE 4
72#define LEX_INTERPCONCAT 3
73#define LEX_INTERPCONST 2
74#define LEX_FORMLINE 1
75#define LEX_KNOWNEXT 0
79072805 76
79072805
LW
77#ifdef ff_next
78#undef ff_next
d48672a2
LW
79#endif
80
a1a0e61e 81#ifdef USE_PURE_BISON
dba4d153
JH
82# ifndef YYMAXLEVEL
83# define YYMAXLEVEL 100
84# endif
20141f0e
RI
85YYSTYPE* yylval_pointer[YYMAXLEVEL];
86int* yychar_pointer[YYMAXLEVEL];
6f202aea 87int yyactlevel = -1;
22c35a8c
GS
88# undef yylval
89# undef yychar
20141f0e
RI
90# define yylval (*yylval_pointer[yyactlevel])
91# define yychar (*yychar_pointer[yyactlevel])
92# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 93# undef yylex
dba4d153 94# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
95#endif
96
79072805 97#include "keywords.h"
fe14fcc3 98
ffb4593c
NT
99/* CLINE is a macro that ensures PL_copline has a sane value */
100
ae986130
LW
101#ifdef CLINE
102#undef CLINE
103#endif
57843af0 104#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 105
ffb4593c
NT
106/*
107 * Convenience functions to return different tokens and prime the
9cbb5ea2 108 * lexer for the next token. They all take an argument.
ffb4593c
NT
109 *
110 * TOKEN : generic token (used for '(', DOLSHARP, etc)
111 * OPERATOR : generic operator
112 * AOPERATOR : assignment operator
113 * PREBLOCK : beginning the block after an if, while, foreach, ...
114 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
115 * PREREF : *EXPR where EXPR is not a simple identifier
116 * TERM : expression term
117 * LOOPX : loop exiting command (goto, last, dump, etc)
118 * FTST : file test operator
119 * FUN0 : zero-argument function
2d2e263d 120 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
121 * BOop : bitwise or or xor
122 * BAop : bitwise and
123 * SHop : shift operator
124 * PWop : power operator
9cbb5ea2 125 * PMop : pattern-matching operator
ffb4593c
NT
126 * Aop : addition-level operator
127 * Mop : multiplication-level operator
128 * Eop : equality-testing operator
e5edeb50 129 * Rop : relational operator <= != gt
ffb4593c
NT
130 *
131 * Also see LOP and lop() below.
132 */
133
075953c3
JH
134/* Note that REPORT() and REPORT2() will be expressions that supply
135 * their own trailing comma, not suitable for statements as such. */
998054bd 136#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
137# define REPORT(x,retval) tokereport(x,s,(int)retval),
138# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 139#else
075953c3
JH
140# define REPORT(x,retval)
141# define REPORT2(x,retval)
998054bd
SC
142#endif
143
075953c3
JH
144#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
145#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
146#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
147#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
148#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
149#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
150#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
151#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
6f33ba73 152#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERMORDORDOR,PL_bufptr = s,(int)UNIOP)
075953c3
JH
153#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
154#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
155#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
156#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
157#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
158#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
159#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
160#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
161#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
162#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
163#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
a687059c 169 */
6f33ba73 170#define UNI2(f,x) return(yylval.ival = f, \
075953c3 171 REPORT("uni",f) \
6f33ba73 172 PL_expect = x, \
3280af22
NIS
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
175 PL_last_lop_op = f, \
a687059c 176 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
6f33ba73
RGS
177#define UNI(f) UNI2(f,XTERM)
178#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 179
79072805 180#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 181 REPORT("uni",f) \
3280af22
NIS
182 PL_bufptr = s, \
183 PL_last_uni = PL_oldbufptr, \
79072805
LW
184 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
185
9f68db38 186/* grandfather return to old style */
3280af22 187#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 188
8fa7f367
JH
189#ifdef DEBUGGING
190
2d00ba3b 191STATIC void
61b2116b 192S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 193{
998054bd 194 DEBUG_T({
9c5ffd7c 195 SV* report = newSVpv(thing, 0);
29b291f7
RB
196 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
197 (IV)rv);
998054bd
SC
198
199 if (s - PL_bufptr > 0)
200 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
201 else {
202 if (PL_oldbufptr && *PL_oldbufptr)
203 sv_catpv(report, PL_tokenbuf);
204 }
205 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
5f80b19c 206 });
998054bd
SC
207}
208
8fa7f367
JH
209#endif
210
ffb4593c
NT
211/*
212 * S_ao
213 *
c963b151
BD
214 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
215 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
216 */
217
76e3520e 218STATIC int
cea2e8a9 219S_ao(pTHX_ int toketype)
a0d0e21e 220{
3280af22
NIS
221 if (*PL_bufptr == '=') {
222 PL_bufptr++;
a0d0e21e
LW
223 if (toketype == ANDAND)
224 yylval.ival = OP_ANDASSIGN;
225 else if (toketype == OROR)
226 yylval.ival = OP_ORASSIGN;
c963b151
BD
227 else if (toketype == DORDOR)
228 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
229 toketype = ASSIGNOP;
230 }
231 return toketype;
232}
233
ffb4593c
NT
234/*
235 * S_no_op
236 * When Perl expects an operator and finds something else, no_op
237 * prints the warning. It always prints "<something> found where
238 * operator expected. It prints "Missing semicolon on previous line?"
239 * if the surprise occurs at the start of the line. "do you need to
240 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
241 * where the compiler doesn't know if foo is a method call or a function.
242 * It prints "Missing operator before end of line" if there's nothing
243 * after the missing operator, or "... before <...>" if there is something
244 * after the missing operator.
245 */
246
76e3520e 247STATIC void
cea2e8a9 248S_no_op(pTHX_ char *what, char *s)
463ee0b2 249{
3280af22
NIS
250 char *oldbp = PL_bufptr;
251 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 252
1189a94a
GS
253 if (!s)
254 s = oldbp;
07c798fb 255 else
1189a94a 256 PL_bufptr = s;
cea2e8a9 257 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 258 if (is_first)
cea2e8a9 259 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 260 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 261 char *t;
7e2040f0 262 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 263 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 264 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 265 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 266 }
07c798fb
HS
267 else {
268 assert(s >= oldbp);
cea2e8a9 269 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 270 }
3280af22 271 PL_bufptr = oldbp;
8990e307
LW
272}
273
ffb4593c
NT
274/*
275 * S_missingterm
276 * Complain about missing quote/regexp/heredoc terminator.
277 * If it's called with (char *)NULL then it cauterizes the line buffer.
278 * If we're in a delimited string and the delimiter is a control
279 * character, it's reformatted into a two-char sequence like ^C.
280 * This is fatal.
281 */
282
76e3520e 283STATIC void
cea2e8a9 284S_missingterm(pTHX_ char *s)
8990e307
LW
285{
286 char tmpbuf[3];
287 char q;
288 if (s) {
289 char *nl = strrchr(s,'\n');
d2719217 290 if (nl)
8990e307
LW
291 *nl = '\0';
292 }
9d116dd7
JH
293 else if (
294#ifdef EBCDIC
295 iscntrl(PL_multi_close)
296#else
297 PL_multi_close < 32 || PL_multi_close == 127
298#endif
299 ) {
8990e307 300 *tmpbuf = '^';
3280af22 301 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
302 s = "\\n";
303 tmpbuf[2] = '\0';
304 s = tmpbuf;
305 }
306 else {
eb160463 307 *tmpbuf = (char)PL_multi_close;
8990e307
LW
308 tmpbuf[1] = '\0';
309 s = tmpbuf;
310 }
311 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 312 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 313}
79072805 314
ffb4593c
NT
315/*
316 * Perl_deprecate
ffb4593c
NT
317 */
318
79072805 319void
864dbfa3 320Perl_deprecate(pTHX_ char *s)
a0d0e21e 321{
599cee73 322 if (ckWARN(WARN_DEPRECATED))
9014280d 323 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
324}
325
12bcd1a6
PM
326void
327Perl_deprecate_old(pTHX_ char *s)
328{
329 /* This function should NOT be called for any new deprecated warnings */
330 /* Use Perl_deprecate instead */
331 /* */
332 /* It is here to maintain backward compatibility with the pre-5.8 */
333 /* warnings category hierarchy. The "deprecated" category used to */
334 /* live under the "syntax" category. It is now a top-level category */
335 /* in its own right. */
336
337 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
338 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
339 "Use of %s is deprecated", s);
340}
341
ffb4593c
NT
342/*
343 * depcom
9cbb5ea2 344 * Deprecate a comma-less variable list.
ffb4593c
NT
345 */
346
76e3520e 347STATIC void
cea2e8a9 348S_depcom(pTHX)
a0d0e21e 349{
12bcd1a6 350 deprecate_old("comma-less variable list");
a0d0e21e
LW
351}
352
ffb4593c 353/*
9cbb5ea2
GS
354 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
355 * utf16-to-utf8-reversed.
ffb4593c
NT
356 */
357
c39cd008
GS
358#ifdef PERL_CR_FILTER
359static void
360strip_return(SV *sv)
361{
362 register char *s = SvPVX(sv);
363 register char *e = s + SvCUR(sv);
364 /* outer loop optimized to do nothing if there are no CR-LFs */
365 while (s < e) {
366 if (*s++ == '\r' && *s == '\n') {
367 /* hit a CR-LF, need to copy the rest */
368 register char *d = s - 1;
369 *d++ = *s++;
370 while (s < e) {
371 if (*s == '\r' && s[1] == '\n')
372 s++;
373 *d++ = *s++;
374 }
375 SvCUR(sv) -= s - d;
376 return;
377 }
378 }
379}
a868473f 380
76e3520e 381STATIC I32
c39cd008 382S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 383{
c39cd008
GS
384 I32 count = FILTER_READ(idx+1, sv, maxlen);
385 if (count > 0 && !maxlen)
386 strip_return(sv);
387 return count;
a868473f
NIS
388}
389#endif
390
ffb4593c
NT
391/*
392 * Perl_lex_start
9cbb5ea2
GS
393 * Initialize variables. Uses the Perl save_stack to save its state (for
394 * recursive calls to the parser).
ffb4593c
NT
395 */
396
a0d0e21e 397void
864dbfa3 398Perl_lex_start(pTHX_ SV *line)
79072805 399{
8990e307
LW
400 char *s;
401 STRLEN len;
402
3280af22
NIS
403 SAVEI32(PL_lex_dojoin);
404 SAVEI32(PL_lex_brackets);
3280af22
NIS
405 SAVEI32(PL_lex_casemods);
406 SAVEI32(PL_lex_starts);
407 SAVEI32(PL_lex_state);
7766f137 408 SAVEVPTR(PL_lex_inpat);
3280af22 409 SAVEI32(PL_lex_inwhat);
18b09519
GS
410 if (PL_lex_state == LEX_KNOWNEXT) {
411 I32 toke = PL_nexttoke;
412 while (--toke >= 0) {
413 SAVEI32(PL_nexttype[toke]);
414 SAVEVPTR(PL_nextval[toke]);
415 }
416 SAVEI32(PL_nexttoke);
18b09519 417 }
57843af0 418 SAVECOPLINE(PL_curcop);
3280af22
NIS
419 SAVEPPTR(PL_bufptr);
420 SAVEPPTR(PL_bufend);
421 SAVEPPTR(PL_oldbufptr);
422 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
423 SAVEPPTR(PL_last_lop);
424 SAVEPPTR(PL_last_uni);
3280af22
NIS
425 SAVEPPTR(PL_linestart);
426 SAVESPTR(PL_linestr);
427 SAVEPPTR(PL_lex_brackstack);
428 SAVEPPTR(PL_lex_casestack);
c76ac1ee 429 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
430 SAVESPTR(PL_lex_stuff);
431 SAVEI32(PL_lex_defer);
09bef843 432 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 433 SAVESPTR(PL_lex_repl);
bebdddfc
GS
434 SAVEINT(PL_expect);
435 SAVEINT(PL_lex_expect);
3280af22
NIS
436
437 PL_lex_state = LEX_NORMAL;
438 PL_lex_defer = 0;
439 PL_expect = XSTATE;
440 PL_lex_brackets = 0;
3280af22
NIS
441 New(899, PL_lex_brackstack, 120, char);
442 New(899, PL_lex_casestack, 12, char);
443 SAVEFREEPV(PL_lex_brackstack);
444 SAVEFREEPV(PL_lex_casestack);
445 PL_lex_casemods = 0;
446 *PL_lex_casestack = '\0';
447 PL_lex_dojoin = 0;
448 PL_lex_starts = 0;
449 PL_lex_stuff = Nullsv;
450 PL_lex_repl = Nullsv;
451 PL_lex_inpat = 0;
76be56bc 452 PL_nexttoke = 0;
3280af22 453 PL_lex_inwhat = 0;
09bef843 454 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
455 PL_linestr = line;
456 if (SvREADONLY(PL_linestr))
457 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
458 s = SvPV(PL_linestr, len);
6f27f9a7 459 if (!len || s[len-1] != ';') {
3280af22
NIS
460 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
461 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
462 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 463 }
3280af22
NIS
464 SvTEMP_off(PL_linestr);
465 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
466 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 467 PL_last_lop = PL_last_uni = Nullch;
3280af22 468 PL_rsfp = 0;
79072805 469}
a687059c 470
ffb4593c
NT
471/*
472 * Perl_lex_end
9cbb5ea2
GS
473 * Finalizer for lexing operations. Must be called when the parser is
474 * done with the lexer.
ffb4593c
NT
475 */
476
463ee0b2 477void
864dbfa3 478Perl_lex_end(pTHX)
463ee0b2 479{
3280af22 480 PL_doextract = FALSE;
463ee0b2
LW
481}
482
ffb4593c
NT
483/*
484 * S_incline
485 * This subroutine has nothing to do with tilting, whether at windmills
486 * or pinball tables. Its name is short for "increment line". It
57843af0 487 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 488 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
489 * # line 500 "foo.pm"
490 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
491 */
492
76e3520e 493STATIC void
cea2e8a9 494S_incline(pTHX_ char *s)
463ee0b2
LW
495{
496 char *t;
497 char *n;
73659bf1 498 char *e;
463ee0b2 499 char ch;
463ee0b2 500
57843af0 501 CopLINE_inc(PL_curcop);
463ee0b2
LW
502 if (*s++ != '#')
503 return;
bf4acbe4 504 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
505 if (strnEQ(s, "line", 4))
506 s += 4;
507 else
508 return;
084592ab 509 if (SPACE_OR_TAB(*s))
73659bf1 510 s++;
4e553d73 511 else
73659bf1 512 return;
bf4acbe4 513 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
514 if (!isDIGIT(*s))
515 return;
516 n = s;
517 while (isDIGIT(*s))
518 s++;
bf4acbe4 519 while (SPACE_OR_TAB(*s))
463ee0b2 520 s++;
73659bf1 521 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 522 s++;
73659bf1
GS
523 e = t + 1;
524 }
463ee0b2 525 else {
463ee0b2 526 for (t = s; !isSPACE(*t); t++) ;
73659bf1 527 e = t;
463ee0b2 528 }
bf4acbe4 529 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
530 e++;
531 if (*e != '\n' && *e != '\0')
532 return; /* false alarm */
533
463ee0b2
LW
534 ch = *t;
535 *t = '\0';
f4dd75d9 536 if (t - s > 0) {
05ec9bb3 537 CopFILE_free(PL_curcop);
57843af0 538 CopFILE_set(PL_curcop, s);
f4dd75d9 539 }
463ee0b2 540 *t = ch;
57843af0 541 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
542}
543
ffb4593c
NT
544/*
545 * S_skipspace
546 * Called to gobble the appropriate amount and type of whitespace.
547 * Skips comments as well.
548 */
549
76e3520e 550STATIC char *
cea2e8a9 551S_skipspace(pTHX_ register char *s)
a687059c 552{
3280af22 553 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 554 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
555 s++;
556 return s;
557 }
558 for (;;) {
fd049845 559 STRLEN prevlen;
09bef843 560 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 561 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
562 while (s < PL_bufend && isSPACE(*s)) {
563 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
564 incline(s);
565 }
ffb4593c
NT
566
567 /* comment */
3280af22
NIS
568 if (s < PL_bufend && *s == '#') {
569 while (s < PL_bufend && *s != '\n')
463ee0b2 570 s++;
60e6418e 571 if (s < PL_bufend) {
463ee0b2 572 s++;
60e6418e
GS
573 if (PL_in_eval && !PL_rsfp) {
574 incline(s);
575 continue;
576 }
577 }
463ee0b2 578 }
ffb4593c
NT
579
580 /* only continue to recharge the buffer if we're at the end
581 * of the buffer, we're not reading from a source filter, and
582 * we're in normal lexing mode
583 */
09bef843
SB
584 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
585 PL_lex_state == LEX_FORMLINE)
463ee0b2 586 return s;
ffb4593c
NT
587
588 /* try to recharge the buffer */
9cbb5ea2
GS
589 if ((s = filter_gets(PL_linestr, PL_rsfp,
590 (prevlen = SvCUR(PL_linestr)))) == Nullch)
591 {
592 /* end of file. Add on the -p or -n magic */
3280af22
NIS
593 if (PL_minus_n || PL_minus_p) {
594 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
595 ";}continue{print or die qq(-p destination: $!\\n)" :
596 "");
3280af22
NIS
597 sv_catpv(PL_linestr,";}");
598 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
599 }
600 else
3280af22 601 sv_setpv(PL_linestr,";");
ffb4593c
NT
602
603 /* reset variables for next time we lex */
9cbb5ea2
GS
604 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
605 = SvPVX(PL_linestr);
3280af22 606 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 607 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
608
609 /* Close the filehandle. Could be from -P preprocessor,
610 * STDIN, or a regular file. If we were reading code from
611 * STDIN (because the commandline held no -e or filename)
612 * then we don't close it, we reset it so the code can
613 * read from STDIN too.
614 */
615
3280af22
NIS
616 if (PL_preprocess && !PL_in_eval)
617 (void)PerlProc_pclose(PL_rsfp);
618 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
619 PerlIO_clearerr(PL_rsfp);
8990e307 620 else
3280af22
NIS
621 (void)PerlIO_close(PL_rsfp);
622 PL_rsfp = Nullfp;
463ee0b2
LW
623 return s;
624 }
ffb4593c
NT
625
626 /* not at end of file, so we only read another line */
09bef843
SB
627 /* make corresponding updates to old pointers, for yyerror() */
628 oldprevlen = PL_oldbufptr - PL_bufend;
629 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
630 if (PL_last_uni)
631 oldunilen = PL_last_uni - PL_bufend;
632 if (PL_last_lop)
633 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
634 PL_linestart = PL_bufptr = s + prevlen;
635 PL_bufend = s + SvCUR(PL_linestr);
636 s = PL_bufptr;
09bef843
SB
637 PL_oldbufptr = s + oldprevlen;
638 PL_oldoldbufptr = s + oldoldprevlen;
639 if (PL_last_uni)
640 PL_last_uni = s + oldunilen;
641 if (PL_last_lop)
642 PL_last_lop = s + oldloplen;
a0d0e21e 643 incline(s);
ffb4593c
NT
644
645 /* debugger active and we're not compiling the debugger code,
646 * so store the line into the debugger's array of lines
647 */
3280af22 648 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
649 SV *sv = NEWSV(85,0);
650
651 sv_upgrade(sv, SVt_PVMG);
3280af22 652 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
653 (void)SvIOK_on(sv);
654 SvIVX(sv) = 0;
57843af0 655 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 656 }
463ee0b2 657 }
a687059c 658}
378cc40b 659
ffb4593c
NT
660/*
661 * S_check_uni
662 * Check the unary operators to ensure there's no ambiguity in how they're
663 * used. An ambiguous piece of code would be:
664 * rand + 5
665 * This doesn't mean rand() + 5. Because rand() is a unary operator,
666 * the +5 is its argument.
667 */
668
76e3520e 669STATIC void
cea2e8a9 670S_check_uni(pTHX)
ba106d47 671{
2f3197b3 672 char *s;
a0d0e21e 673 char *t;
2f3197b3 674
3280af22 675 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 676 return;
3280af22
NIS
677 while (isSPACE(*PL_last_uni))
678 PL_last_uni++;
7e2040f0 679 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 680 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 681 return;
0453d815 682 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 683 char ch = *s;
0453d815 684 *s = '\0';
9014280d 685 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4e553d73 686 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
687 PL_last_uni);
688 *s = ch;
689 }
2f3197b3
LW
690}
691
ffb4593c
NT
692/*
693 * LOP : macro to build a list operator. Its behaviour has been replaced
694 * with a subroutine, S_lop() for which LOP is just another name.
695 */
696
a0d0e21e
LW
697#define LOP(f,x) return lop(f,x,s)
698
ffb4593c
NT
699/*
700 * S_lop
701 * Build a list operator (or something that might be one). The rules:
702 * - if we have a next token, then it's a list operator [why?]
703 * - if the next thing is an opening paren, then it's a function
704 * - else it's a list operator
705 */
706
76e3520e 707STATIC I32
a0be28da 708S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 709{
79072805 710 yylval.ival = f;
35c8bce7 711 CLINE;
075953c3 712 REPORT("lop", f)
3280af22
NIS
713 PL_expect = x;
714 PL_bufptr = s;
715 PL_last_lop = PL_oldbufptr;
eb160463 716 PL_last_lop_op = (OPCODE)f;
3280af22 717 if (PL_nexttoke)
a0d0e21e 718 return LSTOP;
79072805
LW
719 if (*s == '(')
720 return FUNC;
721 s = skipspace(s);
722 if (*s == '(')
723 return FUNC;
724 else
725 return LSTOP;
726}
727
ffb4593c
NT
728/*
729 * S_force_next
9cbb5ea2 730 * When the lexer realizes it knows the next token (for instance,
ffb4593c 731 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
732 * to know what token to return the next time the lexer is called. Caller
733 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
734 * handles the token correctly.
ffb4593c
NT
735 */
736
4e553d73 737STATIC void
cea2e8a9 738S_force_next(pTHX_ I32 type)
79072805 739{
3280af22
NIS
740 PL_nexttype[PL_nexttoke] = type;
741 PL_nexttoke++;
742 if (PL_lex_state != LEX_KNOWNEXT) {
743 PL_lex_defer = PL_lex_state;
744 PL_lex_expect = PL_expect;
745 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
746 }
747}
748
ffb4593c
NT
749/*
750 * S_force_word
751 * When the lexer knows the next thing is a word (for instance, it has
752 * just seen -> and it knows that the next char is a word char, then
753 * it calls S_force_word to stick the next word into the PL_next lookahead.
754 *
755 * Arguments:
b1b65b59 756 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
757 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
758 * int check_keyword : if true, Perl checks to make sure the word isn't
759 * a keyword (do this if the word is a label, e.g. goto FOO)
760 * int allow_pack : if true, : characters will also be allowed (require,
761 * use, etc. do this)
9cbb5ea2 762 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
763 */
764
76e3520e 765STATIC char *
cea2e8a9 766S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 767{
463ee0b2
LW
768 register char *s;
769 STRLEN len;
4e553d73 770
463ee0b2
LW
771 start = skipspace(start);
772 s = start;
7e2040f0 773 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 774 (allow_pack && *s == ':') ||
15f0808c 775 (allow_initial_tick && *s == '\'') )
a0d0e21e 776 {
3280af22
NIS
777 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
778 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
779 return start;
780 if (token == METHOD) {
781 s = skipspace(s);
782 if (*s == '(')
3280af22 783 PL_expect = XTERM;
463ee0b2 784 else {
3280af22 785 PL_expect = XOPERATOR;
463ee0b2 786 }
79072805 787 }
3280af22
NIS
788 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
789 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
790 force_next(token);
791 }
792 return s;
793}
794
ffb4593c
NT
795/*
796 * S_force_ident
9cbb5ea2 797 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
798 * text only contains the "foo" portion. The first argument is a pointer
799 * to the "foo", and the second argument is the type symbol to prefix.
800 * Forces the next token to be a "WORD".
9cbb5ea2 801 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
802 */
803
76e3520e 804STATIC void
cea2e8a9 805S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
806{
807 if (s && *s) {
11343788 808 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 809 PL_nextval[PL_nexttoke].opval = o;
79072805 810 force_next(WORD);
748a9306 811 if (kind) {
11343788 812 o->op_private = OPpCONST_ENTERED;
55497cff
PP
813 /* XXX see note in pp_entereval() for why we forgo typo
814 warnings if the symbol must be introduced in an eval.
815 GSAR 96-10-12 */
3280af22 816 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
817 kind == '$' ? SVt_PV :
818 kind == '@' ? SVt_PVAV :
819 kind == '%' ? SVt_PVHV :
820 SVt_PVGV
821 );
748a9306 822 }
79072805
LW
823 }
824}
825
1571675a
GS
826NV
827Perl_str_to_version(pTHX_ SV *sv)
828{
829 NV retval = 0.0;
830 NV nshift = 1.0;
831 STRLEN len;
832 char *start = SvPVx(sv,len);
3aa33fe5 833 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
834 char *end = start + len;
835 while (start < end) {
ba210ebe 836 STRLEN skip;
1571675a
GS
837 UV n;
838 if (utf)
9041c2e3 839 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
840 else {
841 n = *(U8*)start;
842 skip = 1;
843 }
844 retval += ((NV)n)/nshift;
845 start += skip;
846 nshift *= 1000;
847 }
848 return retval;
849}
850
4e553d73 851/*
ffb4593c
NT
852 * S_force_version
853 * Forces the next token to be a version number.
e759cc13
RGS
854 * If the next token appears to be an invalid version number, (e.g. "v2b"),
855 * and if "guessing" is TRUE, then no new token is created (and the caller
856 * must use an alternative parsing method).
ffb4593c
NT
857 */
858
76e3520e 859STATIC char *
e759cc13 860S_force_version(pTHX_ char *s, int guessing)
89bfa8cd
PP
861{
862 OP *version = Nullop;
44dcb63b 863 char *d;
89bfa8cd
PP
864
865 s = skipspace(s);
866
44dcb63b 867 d = s;
dd629d5b 868 if (*d == 'v')
44dcb63b 869 d++;
44dcb63b 870 if (isDIGIT(*d)) {
e759cc13
RGS
871 while (isDIGIT(*d) || *d == '_' || *d == '.')
872 d++;
9f3d182e 873 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 874 SV *ver;
b73d6f50 875 s = scan_num(s, &yylval);
89bfa8cd 876 version = yylval.opval;
dd629d5b
GS
877 ver = cSVOPx(version)->op_sv;
878 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 879 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
880 SvNVX(ver) = str_to_version(ver);
881 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 882 }
89bfa8cd 883 }
e759cc13
RGS
884 else if (guessing)
885 return s;
89bfa8cd
PP
886 }
887
888 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 889 PL_nextval[PL_nexttoke].opval = version;
4e553d73 890 force_next(WORD);
89bfa8cd 891
e759cc13 892 return s;
89bfa8cd
PP
893}
894
ffb4593c
NT
895/*
896 * S_tokeq
897 * Tokenize a quoted string passed in as an SV. It finds the next
898 * chunk, up to end of string or a backslash. It may make a new
899 * SV containing that chunk (if HINT_NEW_STRING is on). It also
900 * turns \\ into \.
901 */
902
76e3520e 903STATIC SV *
cea2e8a9 904S_tokeq(pTHX_ SV *sv)
79072805
LW
905{
906 register char *s;
907 register char *send;
908 register char *d;
b3ac6de7
IZ
909 STRLEN len = 0;
910 SV *pv = sv;
79072805
LW
911
912 if (!SvLEN(sv))
b3ac6de7 913 goto finish;
79072805 914
a0d0e21e 915 s = SvPV_force(sv, len);
21a311ee 916 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 917 goto finish;
463ee0b2 918 send = s + len;
79072805
LW
919 while (s < send && *s != '\\')
920 s++;
921 if (s == send)
b3ac6de7 922 goto finish;
79072805 923 d = s;
be4731d2 924 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 925 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
926 if (SvUTF8(sv))
927 SvUTF8_on(pv);
928 }
79072805
LW
929 while (s < send) {
930 if (*s == '\\') {
a0d0e21e 931 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
932 s++; /* all that, just for this */
933 }
934 *d++ = *s++;
935 }
936 *d = '\0';
463ee0b2 937 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 938 finish:
3280af22 939 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 940 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
941 return sv;
942}
943
ffb4593c
NT
944/*
945 * Now come three functions related to double-quote context,
946 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
947 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
948 * interact with PL_lex_state, and create fake ( ... ) argument lists
949 * to handle functions and concatenation.
950 * They assume that whoever calls them will be setting up a fake
951 * join call, because each subthing puts a ',' after it. This lets
952 * "lower \luPpEr"
953 * become
954 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
955 *
956 * (I'm not sure whether the spurious commas at the end of lcfirst's
957 * arguments and join's arguments are created or not).
958 */
959
960/*
961 * S_sublex_start
962 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
963 *
964 * Pattern matching will set PL_lex_op to the pattern-matching op to
965 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
966 *
967 * OP_CONST and OP_READLINE are easy--just make the new op and return.
968 *
969 * Everything else becomes a FUNC.
970 *
971 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
972 * had an OP_CONST or OP_READLINE). This just sets us up for a
973 * call to S_sublex_push().
974 */
975
76e3520e 976STATIC I32
cea2e8a9 977S_sublex_start(pTHX)
79072805
LW
978{
979 register I32 op_type = yylval.ival;
79072805
LW
980
981 if (op_type == OP_NULL) {
3280af22
NIS
982 yylval.opval = PL_lex_op;
983 PL_lex_op = Nullop;
79072805
LW
984 return THING;
985 }
986 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 987 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
988
989 if (SvTYPE(sv) == SVt_PVIV) {
990 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
991 STRLEN len;
992 char *p;
993 SV *nsv;
994
995 p = SvPV(sv, len);
79cb57f6 996 nsv = newSVpvn(p, len);
01ec43d0
GS
997 if (SvUTF8(sv))
998 SvUTF8_on(nsv);
b3ac6de7
IZ
999 SvREFCNT_dec(sv);
1000 sv = nsv;
4e553d73 1001 }
b3ac6de7 1002 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1003 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1004 /* Allow <FH> // "foo" */
1005 if (op_type == OP_READLINE)
1006 PL_expect = XTERMORDORDOR;
79072805
LW
1007 return THING;
1008 }
1009
3280af22
NIS
1010 PL_sublex_info.super_state = PL_lex_state;
1011 PL_sublex_info.sub_inwhat = op_type;
1012 PL_sublex_info.sub_op = PL_lex_op;
1013 PL_lex_state = LEX_INTERPPUSH;
55497cff 1014
3280af22
NIS
1015 PL_expect = XTERM;
1016 if (PL_lex_op) {
1017 yylval.opval = PL_lex_op;
1018 PL_lex_op = Nullop;
55497cff
PP
1019 return PMFUNC;
1020 }
1021 else
1022 return FUNC;
1023}
1024
ffb4593c
NT
1025/*
1026 * S_sublex_push
1027 * Create a new scope to save the lexing state. The scope will be
1028 * ended in S_sublex_done. Returns a '(', starting the function arguments
1029 * to the uc, lc, etc. found before.
1030 * Sets PL_lex_state to LEX_INTERPCONCAT.
1031 */
1032
76e3520e 1033STATIC I32
cea2e8a9 1034S_sublex_push(pTHX)
55497cff 1035{
f46d017c 1036 ENTER;
55497cff 1037
3280af22
NIS
1038 PL_lex_state = PL_sublex_info.super_state;
1039 SAVEI32(PL_lex_dojoin);
1040 SAVEI32(PL_lex_brackets);
3280af22
NIS
1041 SAVEI32(PL_lex_casemods);
1042 SAVEI32(PL_lex_starts);
1043 SAVEI32(PL_lex_state);
7766f137 1044 SAVEVPTR(PL_lex_inpat);
3280af22 1045 SAVEI32(PL_lex_inwhat);
57843af0 1046 SAVECOPLINE(PL_curcop);
3280af22 1047 SAVEPPTR(PL_bufptr);
8452ff4b 1048 SAVEPPTR(PL_bufend);
3280af22
NIS
1049 SAVEPPTR(PL_oldbufptr);
1050 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1051 SAVEPPTR(PL_last_lop);
1052 SAVEPPTR(PL_last_uni);
3280af22
NIS
1053 SAVEPPTR(PL_linestart);
1054 SAVESPTR(PL_linestr);
1055 SAVEPPTR(PL_lex_brackstack);
1056 SAVEPPTR(PL_lex_casestack);
1057
1058 PL_linestr = PL_lex_stuff;
1059 PL_lex_stuff = Nullsv;
1060
9cbb5ea2
GS
1061 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1062 = SvPVX(PL_linestr);
3280af22 1063 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1064 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1065 SAVEFREESV(PL_linestr);
1066
1067 PL_lex_dojoin = FALSE;
1068 PL_lex_brackets = 0;
3280af22
NIS
1069 New(899, PL_lex_brackstack, 120, char);
1070 New(899, PL_lex_casestack, 12, char);
1071 SAVEFREEPV(PL_lex_brackstack);
1072 SAVEFREEPV(PL_lex_casestack);
1073 PL_lex_casemods = 0;
1074 *PL_lex_casestack = '\0';
1075 PL_lex_starts = 0;
1076 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1077 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1078
1079 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1080 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1081 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1082 else
3280af22 1083 PL_lex_inpat = Nullop;
79072805 1084
55497cff 1085 return '(';
79072805
LW
1086}
1087
ffb4593c
NT
1088/*
1089 * S_sublex_done
1090 * Restores lexer state after a S_sublex_push.
1091 */
1092
76e3520e 1093STATIC I32
cea2e8a9 1094S_sublex_done(pTHX)
79072805 1095{
3280af22 1096 if (!PL_lex_starts++) {
9aa983d2
JH
1097 SV *sv = newSVpvn("",0);
1098 if (SvUTF8(PL_linestr))
1099 SvUTF8_on(sv);
3280af22 1100 PL_expect = XOPERATOR;
9aa983d2 1101 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1102 return THING;
1103 }
1104
3280af22
NIS
1105 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1106 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1107 return yylex();
79072805
LW
1108 }
1109
ffb4593c 1110 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1111 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1112 PL_linestr = PL_lex_repl;
1113 PL_lex_inpat = 0;
1114 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1115 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1116 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1117 SAVEFREESV(PL_linestr);
1118 PL_lex_dojoin = FALSE;
1119 PL_lex_brackets = 0;
3280af22
NIS
1120 PL_lex_casemods = 0;
1121 *PL_lex_casestack = '\0';
1122 PL_lex_starts = 0;
25da4f38 1123 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1124 PL_lex_state = LEX_INTERPNORMAL;
1125 PL_lex_starts++;
e9fa98b2
HS
1126 /* we don't clear PL_lex_repl here, so that we can check later
1127 whether this is an evalled subst; that means we rely on the
1128 logic to ensure sublex_done() is called again only via the
1129 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1130 }
e9fa98b2 1131 else {
3280af22 1132 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1133 PL_lex_repl = Nullsv;
1134 }
79072805 1135 return ',';
ffed7fef
LW
1136 }
1137 else {
f46d017c 1138 LEAVE;
3280af22
NIS
1139 PL_bufend = SvPVX(PL_linestr);
1140 PL_bufend += SvCUR(PL_linestr);
1141 PL_expect = XOPERATOR;
09bef843 1142 PL_sublex_info.sub_inwhat = 0;
79072805 1143 return ')';
ffed7fef
LW
1144 }
1145}
1146
02aa26ce
NT
1147/*
1148 scan_const
1149
1150 Extracts a pattern, double-quoted string, or transliteration. This
1151 is terrifying code.
1152
3280af22
NIS
1153 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1154 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1155 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1156
9b599b2a
GS
1157 Returns a pointer to the character scanned up to. Iff this is
1158 advanced from the start pointer supplied (ie if anything was
1159 successfully parsed), will leave an OP for the substring scanned
1160 in yylval. Caller must intuit reason for not parsing further
1161 by looking at the next characters herself.
1162
02aa26ce
NT
1163 In patterns:
1164 backslashes:
1165 double-quoted style: \r and \n
1166 regexp special ones: \D \s
1167 constants: \x3
1168 backrefs: \1 (deprecated in substitution replacements)
1169 case and quoting: \U \Q \E
1170 stops on @ and $, but not for $ as tail anchor
1171
1172 In transliterations:
1173 characters are VERY literal, except for - not at the start or end
1174 of the string, which indicates a range. scan_const expands the
1175 range to the full set of intermediate characters.
1176
1177 In double-quoted strings:
1178 backslashes:
1179 double-quoted style: \r and \n
1180 constants: \x3
1181 backrefs: \1 (deprecated)
1182 case and quoting: \U \Q \E
1183 stops on @ and $
1184
1185 scan_const does *not* construct ops to handle interpolated strings.
1186 It stops processing as soon as it finds an embedded $ or @ variable
1187 and leaves it to the caller to work out what's going on.
1188
da6eedaa 1189 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1190
1191 $ in pattern could be $foo or could be tail anchor. Assumption:
1192 it's a tail anchor if $ is the last thing in the string, or if it's
1193 followed by one of ")| \n\t"
1194
1195 \1 (backreferences) are turned into $1
1196
1197 The structure of the code is
1198 while (there's a character to process) {
1199 handle transliteration ranges
1200 skip regexp comments
1201 skip # initiated comments in //x patterns
1202 check for embedded @foo
1203 check for embedded scalars
1204 if (backslash) {
1205 leave intact backslashes from leave (below)
1206 deprecate \1 in strings and sub replacements
1207 handle string-changing backslashes \l \U \Q \E, etc.
1208 switch (what was escaped) {
1209 handle - in a transliteration (becomes a literal -)
1210 handle \132 octal characters
1211 handle 0x15 hex characters
1212 handle \cV (control V)
1213 handle printf backslashes (\f, \r, \n, etc)
1214 } (end switch)
1215 } (end if backslash)
1216 } (end while character to read)
4e553d73 1217
02aa26ce
NT
1218*/
1219
76e3520e 1220STATIC char *
cea2e8a9 1221S_scan_const(pTHX_ char *start)
79072805 1222{
3280af22 1223 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1224 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1225 register char *s = start; /* start of the constant */
1226 register char *d = SvPVX(sv); /* destination for copies */
1227 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1228 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1229 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1230 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1231 UV uv;
1232
dff6d3cd 1233 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1234 PL_lex_inpat
4a2d328f 1235 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1236 : "";
79072805 1237
2b9d42f0
NIS
1238 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1239 /* If we are doing a trans and we know we want UTF8 set expectation */
1240 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1241 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1242 }
1243
1244
79072805 1245 while (s < send || dorange) {
02aa26ce 1246 /* get transliterations out of the way (they're most literal) */
3280af22 1247 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1248 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1249 if (dorange) {
1ba5c669
JH
1250 I32 i; /* current expanded character */
1251 I32 min; /* first character in range */
1252 I32 max; /* last character in range */
02aa26ce 1253
2b9d42f0 1254 if (has_utf8) {
8973db79
JH
1255 char *c = (char*)utf8_hop((U8*)d, -1);
1256 char *e = d++;
1257 while (e-- > c)
1258 *(e + 1) = *e;
25716404 1259 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1260 /* mark the range as done, and continue */
1261 dorange = FALSE;
1262 didrange = TRUE;
1263 continue;
1264 }
2b9d42f0 1265
02aa26ce 1266 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1267 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1268 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1269 d -= 2; /* eat the first char and the - */
1270
8ada0baa
JH
1271 min = (U8)*d; /* first char in range */
1272 max = (U8)d[1]; /* last char in range */
1273
c2e66d9e 1274 if (min > max) {
01ec43d0 1275 Perl_croak(aTHX_
d1573ac7 1276 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1277 (char)min, (char)max);
c2e66d9e
GS
1278 }
1279
c7f1f016 1280#ifdef EBCDIC
8ada0baa
JH
1281 if ((isLOWER(min) && isLOWER(max)) ||
1282 (isUPPER(min) && isUPPER(max))) {
1283 if (isLOWER(min)) {
1284 for (i = min; i <= max; i++)
1285 if (isLOWER(i))
db42d148 1286 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1287 } else {
1288 for (i = min; i <= max; i++)
1289 if (isUPPER(i))
db42d148 1290 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1291 }
1292 }
1293 else
1294#endif
1295 for (i = min; i <= max; i++)
eb160463 1296 *d++ = (char)i;
02aa26ce
NT
1297
1298 /* mark the range as done, and continue */
79072805 1299 dorange = FALSE;
01ec43d0 1300 didrange = TRUE;
79072805 1301 continue;
4e553d73 1302 }
02aa26ce
NT
1303
1304 /* range begins (ignore - as first or last char) */
79072805 1305 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1306 if (didrange) {
1fafa243 1307 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1308 }
2b9d42f0 1309 if (has_utf8) {
25716404 1310 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1311 s++;
1312 continue;
1313 }
79072805
LW
1314 dorange = TRUE;
1315 s++;
01ec43d0
GS
1316 }
1317 else {
1318 didrange = FALSE;
1319 }
79072805 1320 }
02aa26ce
NT
1321
1322 /* if we get here, we're not doing a transliteration */
1323
0f5d15d6
IZ
1324 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1325 except for the last char, which will be done separately. */
3280af22 1326 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1327 if (s[2] == '#') {
1328 while (s < send && *s != ')')
db42d148 1329 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1330 }
1331 else if (s[2] == '{' /* This should match regcomp.c */
1332 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1333 {
cc6b7395 1334 I32 count = 1;
0f5d15d6 1335 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1336 char c;
1337
d9f97599
GS
1338 while (count && (c = *regparse)) {
1339 if (c == '\\' && regparse[1])
1340 regparse++;
4e553d73 1341 else if (c == '{')
cc6b7395 1342 count++;
4e553d73 1343 else if (c == '}')
cc6b7395 1344 count--;
d9f97599 1345 regparse++;
cc6b7395 1346 }
5bdf89e7
IZ
1347 if (*regparse != ')') {
1348 regparse--; /* Leave one char for continuation. */
cc6b7395 1349 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1350 }
0f5d15d6 1351 while (s < regparse)
db42d148 1352 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1353 }
748a9306 1354 }
02aa26ce
NT
1355
1356 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1357 else if (*s == '#' && PL_lex_inpat &&
1358 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1359 while (s+1 < send && *s != '\n')
db42d148 1360 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1361 }
02aa26ce 1362
5d1d4326 1363 /* check for embedded arrays
da6eedaa 1364 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1365 */
7e2040f0 1366 else if (*s == '@' && s[1]
5d1d4326 1367 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1368 break;
02aa26ce
NT
1369
1370 /* check for embedded scalars. only stop if we're sure it's a
1371 variable.
1372 */
79072805 1373 else if (*s == '$') {
3280af22 1374 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1375 break;
6002328a 1376 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1377 break; /* in regexp, $ might be tail anchor */
1378 }
02aa26ce 1379
2b9d42f0
NIS
1380 /* End of else if chain - OP_TRANS rejoin rest */
1381
02aa26ce 1382 /* backslashes */
79072805
LW
1383 if (*s == '\\' && s+1 < send) {
1384 s++;
02aa26ce
NT
1385
1386 /* some backslashes we leave behind */
c9f97d15 1387 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1388 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1389 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1390 continue;
1391 }
02aa26ce
NT
1392
1393 /* deprecate \1 in strings and substitution replacements */
3280af22 1394 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1395 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1396 {
599cee73 1397 if (ckWARN(WARN_SYNTAX))
9014280d 1398 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1399 *--s = '$';
1400 break;
1401 }
02aa26ce
NT
1402
1403 /* string-change backslash escapes */
3280af22 1404 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1405 --s;
1406 break;
1407 }
02aa26ce
NT
1408
1409 /* if we get here, it's either a quoted -, or a digit */
79072805 1410 switch (*s) {
02aa26ce
NT
1411
1412 /* quoted - in transliterations */
79072805 1413 case '-':
3280af22 1414 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1415 *d++ = *s++;
1416 continue;
1417 }
1418 /* FALL THROUGH */
1419 default:
11b8faa4 1420 {
707afd92
MS
1421 if (ckWARN(WARN_MISC) &&
1422 isALNUM(*s) &&
1423 *s != '_')
9014280d 1424 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1425 "Unrecognized escape \\%c passed through",
1426 *s);
1427 /* default action is to copy the quoted character */
f9a63242 1428 goto default_action;
11b8faa4 1429 }
02aa26ce
NT
1430
1431 /* \132 indicates an octal constant */
79072805
LW
1432 case '0': case '1': case '2': case '3':
1433 case '4': case '5': case '6': case '7':
ba210ebe 1434 {
53305cf1
NC
1435 I32 flags = 0;
1436 STRLEN len = 3;
1437 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1438 s += len;
1439 }
012bcf8d 1440 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1441
1442 /* \x24 indicates a hex constant */
79072805 1443 case 'x':
a0ed51b3
LW
1444 ++s;
1445 if (*s == '{') {
1446 char* e = strchr(s, '}');
a4c04bdc
NC
1447 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1448 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1449 STRLEN len;
355860ce 1450
53305cf1 1451 ++s;
adaeee49 1452 if (!e) {
a0ed51b3 1453 yyerror("Missing right brace on \\x{}");
355860ce 1454 continue;
ba210ebe 1455 }
53305cf1
NC
1456 len = e - s;
1457 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1458 s = e + 1;
a0ed51b3
LW
1459 }
1460 else {
ba210ebe 1461 {
53305cf1 1462 STRLEN len = 2;
a4c04bdc 1463 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1464 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1465 s += len;
1466 }
012bcf8d
GS
1467 }
1468
1469 NUM_ESCAPE_INSERT:
1470 /* Insert oct or hex escaped character.
301d3d20 1471 * There will always enough room in sv since such
db42d148 1472 * escapes will be longer than any UTF-8 sequence
301d3d20 1473 * they can end up as. */
ba7cea30 1474
c7f1f016
NIS
1475 /* We need to map to chars to ASCII before doing the tests
1476 to cover EBCDIC
1477 */
c4d5f83a 1478 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1479 if (!has_utf8 && uv > 255) {
301d3d20
JH
1480 /* Might need to recode whatever we have
1481 * accumulated so far if it contains any
1482 * hibit chars.
1483 *
1484 * (Can't we keep track of that and avoid
1485 * this rescan? --jhi)
012bcf8d 1486 */
c7f1f016 1487 int hicount = 0;
63cd0674
NIS
1488 U8 *c;
1489 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1490 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1491 hicount++;
db42d148 1492 }
012bcf8d 1493 }
63cd0674 1494 if (hicount) {
db42d148
NIS
1495 STRLEN offset = d - SvPVX(sv);
1496 U8 *src, *dst;
1497 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1498 src = (U8 *)d - 1;
1499 dst = src+hicount;
1500 d += hicount;
1501 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1502 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1503 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1504 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1505 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1506 }
1507 else {
63cd0674 1508 *dst-- = *src;
012bcf8d 1509 }
c7f1f016 1510 src--;
012bcf8d
GS
1511 }
1512 }
1513 }
1514
9aa983d2 1515 if (has_utf8 || uv > 255) {
9041c2e3 1516 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1517 has_utf8 = TRUE;
f9a63242
JH
1518 if (PL_lex_inwhat == OP_TRANS &&
1519 PL_sublex_info.sub_op) {
1520 PL_sublex_info.sub_op->op_private |=
1521 (PL_lex_repl ? OPpTRANS_FROM_UTF
1522 : OPpTRANS_TO_UTF);
f9a63242 1523 }
012bcf8d 1524 }
a0ed51b3 1525 else {
012bcf8d 1526 *d++ = (char)uv;
a0ed51b3 1527 }
012bcf8d
GS
1528 }
1529 else {
c4d5f83a 1530 *d++ = (char) uv;
a0ed51b3 1531 }
79072805 1532 continue;
02aa26ce 1533
b239daa5 1534 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1535 case 'N':
55eda711 1536 ++s;
423cee85
JH
1537 if (*s == '{') {
1538 char* e = strchr(s, '}');
155aba94 1539 SV *res;
423cee85
JH
1540 STRLEN len;
1541 char *str;
4e553d73 1542
423cee85 1543 if (!e) {
5777a3f7 1544 yyerror("Missing right brace on \\N{}");
423cee85
JH
1545 e = s - 1;
1546 goto cont_scan;
1547 }
dbc0d4f2
JH
1548 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1549 /* \N{U+...} */
1550 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1551 PERL_SCAN_DISALLOW_PREFIX;
1552 s += 3;
1553 len = e - s;
1554 uv = grok_hex(s, &len, &flags, NULL);
1555 s = e + 1;
1556 goto NUM_ESCAPE_INSERT;
1557 }
55eda711
JH
1558 res = newSVpvn(s + 1, e - s - 1);
1559 res = new_constant( Nullch, 0, "charnames",
1560 res, Nullsv, "\\N{...}" );
f9a63242
JH
1561 if (has_utf8)
1562 sv_utf8_upgrade(res);
423cee85 1563 str = SvPV(res,len);
1c47067b
JH
1564#ifdef EBCDIC_NEVER_MIND
1565 /* charnames uses pack U and that has been
1566 * recently changed to do the below uni->native
1567 * mapping, so this would be redundant (and wrong,
1568 * the code point would be doubly converted).
1569 * But leave this in just in case the pack U change
1570 * gets revoked, but the semantics is still
1571 * desireable for charnames. --jhi */
cddc7ef4
JH
1572 {
1573 UV uv = utf8_to_uvchr((U8*)str, 0);
1574
1575 if (uv < 0x100) {
1576 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1577
1578 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1579 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1580 str = SvPV(res, len);
1581 }
1582 }
1583#endif
89491803 1584 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1585 char *ostart = SvPVX(sv);
1586 SvCUR_set(sv, d - ostart);
1587 SvPOK_on(sv);
e4f3eed8 1588 *d = '\0';
f08d6ad9 1589 sv_utf8_upgrade(sv);
d2f449dd 1590 /* this just broke our allocation above... */
eb160463 1591 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1592 d = SvPVX(sv) + SvCUR(sv);
89491803 1593 has_utf8 = TRUE;
f08d6ad9 1594 }
eb160463 1595 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1596 char *odest = SvPVX(sv);
1597
8973db79 1598 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1599 d = SvPVX(sv) + (d - odest);
1600 }
1601 Copy(str, d, len, char);
1602 d += len;
1603 SvREFCNT_dec(res);
1604 cont_scan:
1605 s = e + 1;
1606 }
1607 else
5777a3f7 1608 yyerror("Missing braces on \\N{}");
423cee85
JH
1609 continue;
1610
02aa26ce 1611 /* \c is a control character */
79072805
LW
1612 case 'c':
1613 s++;
961ce445 1614 if (s < send) {
ba210ebe 1615 U8 c = *s++;
c7f1f016
NIS
1616#ifdef EBCDIC
1617 if (isLOWER(c))
1618 c = toUPPER(c);
1619#endif
db42d148 1620 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1621 }
961ce445
RGS
1622 else {
1623 yyerror("Missing control char name in \\c");
1624 }
79072805 1625 continue;
02aa26ce
NT
1626
1627 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1628 case 'b':
db42d148 1629 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1630 break;
1631 case 'n':
db42d148 1632 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1633 break;
1634 case 'r':
db42d148 1635 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1636 break;
1637 case 'f':
db42d148 1638 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1639 break;
1640 case 't':
db42d148 1641 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1642 break;
34a3fe2a 1643 case 'e':
db42d148 1644 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1645 break;
1646 case 'a':
db42d148 1647 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1648 break;
02aa26ce
NT
1649 } /* end switch */
1650
79072805
LW
1651 s++;
1652 continue;
02aa26ce
NT
1653 } /* end if (backslash) */
1654
f9a63242 1655 default_action:
2b9d42f0
NIS
1656 /* If we started with encoded form, or already know we want it
1657 and then encode the next character */
1658 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1659 STRLEN len = 1;
1660 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1661 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1662 s += len;
1663 if (need > len) {
1664 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1665 STRLEN off = d - SvPVX(sv);
1666 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1667 }
1668 d = (char*)uvchr_to_utf8((U8*)d, uv);
1669 has_utf8 = TRUE;
1670 }
1671 else {
1672 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1673 }
02aa26ce
NT
1674 } /* while loop to process each character */
1675
1676 /* terminate the string and set up the sv */
79072805 1677 *d = '\0';
463ee0b2 1678 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1679 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1680 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1681
79072805 1682 SvPOK_on(sv);
9f4817db 1683 if (PL_encoding && !has_utf8) {
d0063567
DK
1684 sv_recode_to_utf8(sv, PL_encoding);
1685 if (SvUTF8(sv))
1686 has_utf8 = TRUE;
9f4817db 1687 }
2b9d42f0 1688 if (has_utf8) {
7e2040f0 1689 SvUTF8_on(sv);
2b9d42f0 1690 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1691 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1692 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1693 }
1694 }
79072805 1695
02aa26ce 1696 /* shrink the sv if we allocated more than we used */
79072805
LW
1697 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1698 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1699 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1700 }
02aa26ce 1701
9b599b2a 1702 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1703 if (s > PL_bufptr) {
1704 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1705 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1706 sv, Nullsv,
4e553d73 1707 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1708 ? "tr"
3280af22 1709 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1710 ? "s"
1711 : "qq")));
79072805 1712 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1713 } else
8990e307 1714 SvREFCNT_dec(sv);
79072805
LW
1715 return s;
1716}
1717
ffb4593c
NT
1718/* S_intuit_more
1719 * Returns TRUE if there's more to the expression (e.g., a subscript),
1720 * FALSE otherwise.
ffb4593c
NT
1721 *
1722 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1723 *
1724 * ->[ and ->{ return TRUE
1725 * { and [ outside a pattern are always subscripts, so return TRUE
1726 * if we're outside a pattern and it's not { or [, then return FALSE
1727 * if we're in a pattern and the first char is a {
1728 * {4,5} (any digits around the comma) returns FALSE
1729 * if we're in a pattern and the first char is a [
1730 * [] returns FALSE
1731 * [SOMETHING] has a funky algorithm to decide whether it's a
1732 * character class or not. It has to deal with things like
1733 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1734 * anything else returns TRUE
1735 */
1736
9cbb5ea2
GS
1737/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1738
76e3520e 1739STATIC int
cea2e8a9 1740S_intuit_more(pTHX_ register char *s)
79072805 1741{
3280af22 1742 if (PL_lex_brackets)
79072805
LW
1743 return TRUE;
1744 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1745 return TRUE;
1746 if (*s != '{' && *s != '[')
1747 return FALSE;
3280af22 1748 if (!PL_lex_inpat)
79072805
LW
1749 return TRUE;
1750
1751 /* In a pattern, so maybe we have {n,m}. */
1752 if (*s == '{') {
1753 s++;
1754 if (!isDIGIT(*s))
1755 return TRUE;
1756 while (isDIGIT(*s))
1757 s++;
1758 if (*s == ',')
1759 s++;
1760 while (isDIGIT(*s))
1761 s++;
1762 if (*s == '}')
1763 return FALSE;
1764 return TRUE;
1765
1766 }
1767
1768 /* On the other hand, maybe we have a character class */
1769
1770 s++;
1771 if (*s == ']' || *s == '^')
1772 return FALSE;
1773 else {
ffb4593c 1774 /* this is terrifying, and it works */
79072805
LW
1775 int weight = 2; /* let's weigh the evidence */
1776 char seen[256];
f27ffc4a 1777 unsigned char un_char = 255, last_un_char;
93a17b20 1778 char *send = strchr(s,']');
3280af22 1779 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1780
1781 if (!send) /* has to be an expression */
1782 return TRUE;
1783
1784 Zero(seen,256,char);
1785 if (*s == '$')
1786 weight -= 3;
1787 else if (isDIGIT(*s)) {
1788 if (s[1] != ']') {
1789 if (isDIGIT(s[1]) && s[2] == ']')
1790 weight -= 10;
1791 }
1792 else
1793 weight -= 100;
1794 }
1795 for (; s < send; s++) {
1796 last_un_char = un_char;
1797 un_char = (unsigned char)*s;
1798 switch (*s) {
1799 case '@':
1800 case '&':
1801 case '$':
1802 weight -= seen[un_char] * 10;
7e2040f0 1803 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1804 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1805 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1806 weight -= 100;
1807 else
1808 weight -= 10;
1809 }
1810 else if (*s == '$' && s[1] &&
93a17b20
LW
1811 strchr("[#!%*<>()-=",s[1])) {
1812 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1813 weight -= 10;
1814 else
1815 weight -= 1;
1816 }
1817 break;
1818 case '\\':
1819 un_char = 254;
1820 if (s[1]) {
93a17b20 1821 if (strchr("wds]",s[1]))
79072805
LW
1822 weight += 100;
1823 else if (seen['\''] || seen['"'])
1824 weight += 1;
93a17b20 1825 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1826 weight += 40;
1827 else if (isDIGIT(s[1])) {
1828 weight += 40;
1829 while (s[1] && isDIGIT(s[1]))
1830 s++;
1831 }
1832 }
1833 else
1834 weight += 100;
1835 break;
1836 case '-':
1837 if (s[1] == '\\')
1838 weight += 50;
93a17b20 1839 if (strchr("aA01! ",last_un_char))
79072805 1840 weight += 30;
93a17b20 1841 if (strchr("zZ79~",s[1]))
79072805 1842 weight += 30;
f27ffc4a
GS
1843 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1844 weight -= 5; /* cope with negative subscript */
79072805
LW
1845 break;
1846 default:
93a17b20 1847 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1848 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1849 char *d = tmpbuf;
1850 while (isALPHA(*s))
1851 *d++ = *s++;
1852 *d = '\0';
1853 if (keyword(tmpbuf, d - tmpbuf))
1854 weight -= 150;
1855 }
1856 if (un_char == last_un_char + 1)
1857 weight += 5;
1858 weight -= seen[un_char];
1859 break;
1860 }
1861 seen[un_char]++;
1862 }
1863 if (weight >= 0) /* probably a character class */
1864 return FALSE;
1865 }
1866
1867 return TRUE;
1868}
ffed7fef 1869
ffb4593c
NT
1870/*
1871 * S_intuit_method
1872 *
1873 * Does all the checking to disambiguate
1874 * foo bar
1875 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1876 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1877 *
1878 * First argument is the stuff after the first token, e.g. "bar".
1879 *
1880 * Not a method if bar is a filehandle.
1881 * Not a method if foo is a subroutine prototyped to take a filehandle.
1882 * Not a method if it's really "Foo $bar"
1883 * Method if it's "foo $bar"
1884 * Not a method if it's really "print foo $bar"
1885 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 1886 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 1887 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1888 * =>
1889 */
1890
76e3520e 1891STATIC int
cea2e8a9 1892S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1893{
1894 char *s = start + (*start == '$');
3280af22 1895 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1896 STRLEN len;
1897 GV* indirgv;
1898
1899 if (gv) {
b6c543e3 1900 CV *cv;
a0d0e21e
LW
1901 if (GvIO(gv))
1902 return 0;
b6c543e3
IZ
1903 if ((cv = GvCVu(gv))) {
1904 char *proto = SvPVX(cv);
1905 if (proto) {
1906 if (*proto == ';')
1907 proto++;
1908 if (*proto == '*')
1909 return 0;
1910 }
1911 } else
a0d0e21e
LW
1912 gv = 0;
1913 }
8903cb82 1914 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1915 /* start is the beginning of the possible filehandle/object,
1916 * and s is the end of it
1917 * tmpbuf is a copy of it
1918 */
1919
a0d0e21e 1920 if (*start == '$') {
3280af22 1921 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1922 return 0;
1923 s = skipspace(s);
3280af22
NIS
1924 PL_bufptr = start;
1925 PL_expect = XREF;
a0d0e21e
LW
1926 return *s == '(' ? FUNCMETH : METHOD;
1927 }
1928 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1929 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1930 len -= 2;
1931 tmpbuf[len] = '\0';
1932 goto bare_package;
1933 }
1934 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1935 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1936 return 0;
1937 /* filehandle or package name makes it a method */
89bfa8cd 1938 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1939 s = skipspace(s);
3280af22 1940 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1941 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1942 bare_package:
3280af22 1943 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1944 newSVpvn(tmpbuf,len));
3280af22
NIS
1945 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1946 PL_expect = XTERM;
a0d0e21e 1947 force_next(WORD);
3280af22 1948 PL_bufptr = s;
a0d0e21e
LW
1949 return *s == '(' ? FUNCMETH : METHOD;
1950 }
1951 }
1952 return 0;
1953}
1954
ffb4593c
NT
1955/*
1956 * S_incl_perldb
1957 * Return a string of Perl code to load the debugger. If PERL5DB
1958 * is set, it will return the contents of that, otherwise a
1959 * compile-time require of perl5db.pl.
1960 */
1961
76e3520e 1962STATIC char*
cea2e8a9 1963S_incl_perldb(pTHX)
a0d0e21e 1964{
3280af22 1965 if (PL_perldb) {
76e3520e 1966 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1967
1968 if (pdb)
1969 return pdb;
93189314 1970 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
1971 return "BEGIN { require 'perl5db.pl' }";
1972 }
1973 return "";
1974}
1975
1976
16d20bd9 1977/* Encoded script support. filter_add() effectively inserts a
4e553d73 1978 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1979 * Note that the filter function only applies to the current source file
1980 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1981 *
1982 * The datasv parameter (which may be NULL) can be used to pass
1983 * private data to this instance of the filter. The filter function
1984 * can recover the SV using the FILTER_DATA macro and use it to
1985 * store private buffers and state information.
1986 *
1987 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1988 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1989 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1990 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1991 * private use must be set using malloc'd pointers.
1992 */
16d20bd9
AD
1993
1994SV *
864dbfa3 1995Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1996{
f4c556ac
GS
1997 if (!funcp)
1998 return Nullsv;
1999
3280af22
NIS
2000 if (!PL_rsfp_filters)
2001 PL_rsfp_filters = newAV();
16d20bd9 2002 if (!datasv)
8c52afec 2003 datasv = NEWSV(255,0);
16d20bd9 2004 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2005 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2006 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2007 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2008 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2009 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2010 av_unshift(PL_rsfp_filters, 1);
2011 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2012 return(datasv);
2013}
4e553d73 2014
16d20bd9
AD
2015
2016/* Delete most recently added instance of this filter function. */
a0d0e21e 2017void
864dbfa3 2018Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2019{
e0c19803 2020 SV *datasv;
fe5a182c 2021 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2022 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2023 return;
2024 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2025 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2026 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2027 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2028 IoANY(datasv) = (void *)NULL;
3280af22 2029 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2030
16d20bd9
AD
2031 return;
2032 }
2033 /* we need to search for the correct entry and clear it */
cea2e8a9 2034 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2035}
2036
2037
2038/* Invoke the n'th filter function for the current rsfp. */
2039I32
864dbfa3 2040Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
2041
2042
8ac85365 2043 /* 0 = read one text line */
a0d0e21e 2044{
16d20bd9
AD
2045 filter_t funcp;
2046 SV *datasv = NULL;
e50aee73 2047
3280af22 2048 if (!PL_rsfp_filters)
16d20bd9 2049 return -1;
3280af22 2050 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2051 /* Provide a default input filter to make life easy. */
2052 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2053 DEBUG_P(PerlIO_printf(Perl_debug_log,
2054 "filter_read %d: from rsfp\n", idx));
4e553d73 2055 if (maxlen) {
16d20bd9
AD
2056 /* Want a block */
2057 int len ;
2058 int old_len = SvCUR(buf_sv) ;
2059
2060 /* ensure buf_sv is large enough */
eb160463 2061 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2062 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2063 if (PerlIO_error(PL_rsfp))
37120919
AD
2064 return -1; /* error */
2065 else
2066 return 0 ; /* end of file */
2067 }
16d20bd9
AD
2068 SvCUR_set(buf_sv, old_len + len) ;
2069 } else {
2070 /* Want a line */
3280af22
NIS
2071 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2072 if (PerlIO_error(PL_rsfp))
37120919
AD
2073 return -1; /* error */
2074 else
2075 return 0 ; /* end of file */
2076 }
16d20bd9
AD
2077 }
2078 return SvCUR(buf_sv);
2079 }
2080 /* Skip this filter slot if filter has been deleted */
3280af22 2081 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2082 DEBUG_P(PerlIO_printf(Perl_debug_log,
2083 "filter_read %d: skipped (filter deleted)\n",
2084 idx));
16d20bd9
AD
2085 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2086 }
2087 /* Get function pointer hidden within datasv */
4755096e 2088 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2089 DEBUG_P(PerlIO_printf(Perl_debug_log,
2090 "filter_read %d: via function %p (%s)\n",
fe5a182c 2091 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2092 /* Call function. The function is expected to */
2093 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2094 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2095 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2096}
2097
76e3520e 2098STATIC char *
cea2e8a9 2099S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2100{
c39cd008 2101#ifdef PERL_CR_FILTER
3280af22 2102 if (!PL_rsfp_filters) {
c39cd008 2103 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2104 }
2105#endif
3280af22 2106 if (PL_rsfp_filters) {
16d20bd9 2107
55497cff
PP
2108 if (!append)
2109 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2110 if (FILTER_READ(0, sv, 0) > 0)
2111 return ( SvPVX(sv) ) ;
2112 else
2113 return Nullch ;
2114 }
9d116dd7 2115 else
fd049845 2116 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2117}
2118
01ec43d0
GS
2119STATIC HV *
2120S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2121{
2122 GV *gv;
2123
01ec43d0 2124 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2125 return PL_curstash;
2126
2127 if (len > 2 &&
2128 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2129 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2130 {
2131 return GvHV(gv); /* Foo:: */
def3634b
GS
2132 }
2133
2134 /* use constant CLASS => 'MyClass' */
2135 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2136 SV *sv;
2137 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2138 pkgname = SvPV_nolen(sv);
2139 }
2140 }
2141
2142 return gv_stashpv(pkgname, FALSE);
2143}
a0d0e21e 2144
748a9306
LW
2145#ifdef DEBUGGING
2146 static char* exp_name[] =
09bef843
SB
2147 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2148 "ATTRTERM", "TERMBLOCK"
2149 };
748a9306 2150#endif
463ee0b2 2151
02aa26ce
NT
2152/*
2153 yylex
2154
2155 Works out what to call the token just pulled out of the input
2156 stream. The yacc parser takes care of taking the ops we return and
2157 stitching them into a tree.
2158
2159 Returns:
2160 PRIVATEREF
2161
2162 Structure:
2163 if read an identifier
2164 if we're in a my declaration
2165 croak if they tried to say my($foo::bar)
2166 build the ops for a my() declaration
2167 if it's an access to a my() variable
2168 are we in a sort block?
2169 croak if my($a); $a <=> $b
2170 build ops for access to a my() variable
2171 if in a dq string, and they've said @foo and we can't find @foo
2172 croak
2173 build ops for a bareword
2174 if we already built the token before, use it.
2175*/
2176
dba4d153 2177#ifdef USE_PURE_BISON
864dbfa3 2178int
dba4d153 2179Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2180{
20141f0e
RI
2181 int r;
2182
6f202aea 2183 yyactlevel++;
20141f0e
RI
2184 yylval_pointer[yyactlevel] = lvalp;
2185 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2186 if (yyactlevel >= YYMAXLEVEL)
2187 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2188
dba4d153 2189 r = Perl_yylex(aTHX);
20141f0e 2190
d8ae6756
RI
2191 if (yyactlevel > 0)
2192 yyactlevel--;
20141f0e
RI
2193
2194 return r;
2195}
dba4d153 2196#endif
20141f0e 2197
dba4d153
JH
2198#ifdef __SC__
2199#pragma segment Perl_yylex
2200#endif
dba4d153 2201int
dba4d153 2202Perl_yylex(pTHX)
20141f0e 2203{
79072805 2204 register char *s;
378cc40b 2205 register char *d;
79072805 2206 register I32 tmp;
463ee0b2 2207 STRLEN len;
161b471a
NIS
2208 GV *gv = Nullgv;
2209 GV **gvp = 0;
aa7440fb 2210 bool bof = FALSE;
a687059c 2211
02aa26ce 2212 /* check if there's an identifier for us to look at */
ba979b31 2213 if (PL_pending_ident)
e930465f 2214 return S_pending_ident(aTHX);
bbce6d69 2215
02aa26ce
NT
2216 /* no identifier pending identification */
2217
3280af22 2218 switch (PL_lex_state) {
79072805
LW
2219#ifdef COMMENTARY
2220 case LEX_NORMAL: /* Some compilers will produce faster */
2221 case LEX_INTERPNORMAL: /* code if we comment these out. */
2222 break;
2223#endif
2224
09bef843 2225 /* when we've already built the next token, just pull it out of the queue */
79072805 2226 case LEX_KNOWNEXT:
3280af22
NIS
2227 PL_nexttoke--;
2228 yylval = PL_nextval[PL_nexttoke];
2229 if (!PL_nexttoke) {
2230 PL_lex_state = PL_lex_defer;
2231 PL_expect = PL_lex_expect;
2232 PL_lex_defer = LEX_NORMAL;
463ee0b2 2233 }
607df283 2234 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2235 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2236 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2237
3280af22 2238 return(PL_nexttype[PL_nexttoke]);
79072805 2239
02aa26ce 2240 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2241 when we get here, PL_bufptr is at the \
02aa26ce 2242 */
79072805
LW
2243 case LEX_INTERPCASEMOD:
2244#ifdef DEBUGGING
3280af22 2245 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2246 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2247#endif
02aa26ce 2248 /* handle \E or end of string */
3280af22 2249 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2250 char oldmod;
02aa26ce
NT
2251
2252 /* if at a \E */
3280af22
NIS
2253 if (PL_lex_casemods) {
2254 oldmod = PL_lex_casestack[--PL_lex_casemods];
2255 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2256
3280af22
NIS
2257 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2258 PL_bufptr += 2;
2259 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2260 }
79072805
LW
2261 return ')';
2262 }
3280af22
NIS
2263 if (PL_bufptr != PL_bufend)
2264 PL_bufptr += 2;
2265 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2266 return yylex();
79072805
LW
2267 }
2268 else {
607df283 2269 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2270 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2271 s = PL_bufptr + 1;
79072805 2272 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
eb160463 2273 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
a0d0e21e 2274 if (strchr("LU", *s) &&
3280af22 2275 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2276 {
3280af22 2277 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2278 return ')';
2279 }
3280af22
NIS
2280 if (PL_lex_casemods > 10) {
2281 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2282 if (newlb != PL_lex_casestack) {
a0d0e21e 2283 SAVEFREEPV(newlb);
3280af22 2284 PL_lex_casestack = newlb;
a0d0e21e
LW
2285 }
2286 }
3280af22
NIS
2287 PL_lex_casestack[PL_lex_casemods++] = *s;
2288 PL_lex_casestack[PL_lex_casemods] = '\0';
2289 PL_lex_state = LEX_INTERPCONCAT;
2290 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2291 force_next('(');
2292 if (*s == 'l')
3280af22 2293 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2294 else if (*s == 'u')
3280af22 2295 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2296 else if (*s == 'L')
3280af22 2297 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2298 else if (*s == 'U')
3280af22 2299 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2300 else if (*s == 'Q')
3280af22 2301 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2302 else
cea2e8a9 2303 Perl_croak(aTHX_ "panic: yylex");
3280af22 2304 PL_bufptr = s + 1;
79072805 2305 force_next(FUNC);
3280af22
NIS
2306 if (PL_lex_starts) {
2307 s = PL_bufptr;
2308 PL_lex_starts = 0;
79072805
LW
2309 Aop(OP_CONCAT);
2310 }
2311 else
cea2e8a9 2312 return yylex();
79072805
LW
2313 }
2314
55497cff
PP
2315 case LEX_INTERPPUSH:
2316 return sublex_push();
2317
79072805 2318 case LEX_INTERPSTART:
3280af22 2319 if (PL_bufptr == PL_bufend)
79072805 2320 return sublex_done();
607df283 2321 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2322 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2323 PL_expect = XTERM;
2324 PL_lex_dojoin = (*PL_bufptr == '@');
2325 PL_lex_state = LEX_INTERPNORMAL;
2326 if (PL_lex_dojoin) {
2327 PL_nextval[PL_nexttoke].ival = 0;
79072805 2328 force_next(',');
a0d0e21e 2329 force_ident("\"", '$');
3280af22 2330 PL_nextval[PL_nexttoke].ival = 0;
79072805 2331 force_next('$');
3280af22 2332 PL_nextval[PL_nexttoke].ival = 0;
79072805 2333 force_next('(');
3280af22 2334 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2335 force_next(FUNC);
2336 }
3280af22
NIS
2337 if (PL_lex_starts++) {
2338 s = PL_bufptr;
79072805
LW
2339 Aop(OP_CONCAT);
2340 }
cea2e8a9 2341 return yylex();
79072805
LW
2342
2343 case LEX_INTERPENDMAYBE:
3280af22
NIS
2344 if (intuit_more(PL_bufptr)) {
2345 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2346 break;
2347 }
2348 /* FALL THROUGH */
2349
2350 case LEX_INTERPEND:
3280af22
NIS
2351 if (PL_lex_dojoin) {
2352 PL_lex_dojoin = FALSE;
2353 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2354 return ')';
2355 }
43a16006 2356 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2357 && SvEVALED(PL_lex_repl))
43a16006 2358 {
e9fa98b2 2359 if (PL_bufptr != PL_bufend)
cea2e8a9 2360 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2361 PL_lex_repl = Nullsv;
2362 }
79072805
LW
2363 /* FALLTHROUGH */
2364 case LEX_INTERPCONCAT:
2365#ifdef DEBUGGING
3280af22 2366 if (PL_lex_brackets)
cea2e8a9 2367 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2368#endif
3280af22 2369 if (PL_bufptr == PL_bufend)
79072805
LW
2370 return sublex_done();
2371
3280af22
NIS
2372 if (SvIVX(PL_linestr) == '\'') {
2373 SV *sv = newSVsv(PL_linestr);
2374 if (!PL_lex_inpat)
76e3520e 2375 sv = tokeq(sv);
3280af22 2376 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2377 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2378 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2379 s = PL_bufend;
79072805
LW
2380 }
2381 else {
3280af22 2382 s = scan_const(PL_bufptr);
79072805 2383 if (*s == '\\')
3280af22 2384 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2385 else
3280af22 2386 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2387 }
2388
3280af22
NIS
2389 if (s != PL_bufptr) {
2390 PL_nextval[PL_nexttoke] = yylval;
2391 PL_expect = XTERM;
79072805 2392 force_next(THING);
3280af22 2393 if (PL_lex_starts++)
79072805
LW
2394 Aop(OP_CONCAT);
2395 else {
3280af22 2396 PL_bufptr = s;
cea2e8a9 2397 return yylex();
79072805
LW
2398 }
2399 }
2400
cea2e8a9 2401 return yylex();
a0d0e21e 2402 case LEX_FORMLINE:
3280af22
NIS
2403 PL_lex_state = LEX_NORMAL;
2404 s = scan_formline(PL_bufptr);
2405 if (!PL_lex_formbrack)
a0d0e21e
LW
2406 goto rightbracket;
2407 OPERATOR(';');
79072805
LW
2408 }
2409
3280af22
NIS
2410 s = PL_bufptr;
2411 PL_oldoldbufptr = PL_oldbufptr;
2412 PL_oldbufptr = s;
607df283 2413 DEBUG_T( {
bf49b057
GS
2414 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2415 exp_name[PL_expect], s);
5f80b19c 2416 } );
463ee0b2
LW
2417
2418 retry:
378cc40b
LW
2419 switch (*s) {
2420 default:
7e2040f0 2421 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2422 goto keylookup;
cea2e8a9 2423 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2424 case 4:
2425 case 26:
2426 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2427 case 0:
3280af22
NIS
2428 if (!PL_rsfp) {
2429 PL_last_uni = 0;
2430 PL_last_lop = 0;
2431 if (PL_lex_brackets)
d98d5fff 2432 yyerror("Missing right curly or square bracket");
4e553d73 2433 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2434 "### Tokener got EOF\n");
5f80b19c 2435 } );
79072805 2436 TOKEN(0);
463ee0b2 2437 }
3280af22 2438 if (s++ < PL_bufend)
a687059c 2439 goto retry; /* ignore stray nulls */
3280af22
NIS
2440 PL_last_uni = 0;
2441 PL_last_lop = 0;
2442 if (!PL_in_eval && !PL_preambled) {
2443 PL_preambled = TRUE;
2444 sv_setpv(PL_linestr,incl_perldb());
2445 if (SvCUR(PL_linestr))
2446 sv_catpv(PL_linestr,";");
2447 if (PL_preambleav){
2448 while(AvFILLp(PL_preambleav) >= 0) {
2449 SV *tmpsv = av_shift(PL_preambleav);
2450 sv_catsv(PL_linestr, tmpsv);
2451 sv_catpv(PL_linestr, ";");
91b7def8
PP
2452 sv_free(tmpsv);
2453 }
3280af22
NIS
2454 sv_free((SV*)PL_preambleav);
2455 PL_preambleav = NULL;
91b7def8 2456 }
3280af22
NIS
2457 if (PL_minus_n || PL_minus_p) {
2458 sv_catpv(PL_linestr, "LINE: while (<>) {");
2459 if (PL_minus_l)
2460 sv_catpv(PL_linestr,"chomp;");
2461 if (PL_minus_a) {
3280af22
NIS
2462 if (PL_minus_F) {
2463 if (strchr("/'\"", *PL_splitstr)
2464 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2465 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121
PP
2466 else {
2467 char delim;
2468 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2469 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2470 delim = *s;
75c72d73 2471 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2472 "q" + (delim == '\''), delim);
3280af22 2473 for (s = PL_splitstr; *s; s++) {
54310121 2474 if (*s == '\\')
3280af22
NIS
2475 sv_catpvn(PL_linestr, "\\", 1);
2476 sv_catpvn(PL_linestr, s, 1);
54310121 2477 }
cea2e8a9 2478 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2479 }
2304df62
AD
2480 }
2481 else
75c72d73 2482 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2483 }
79072805 2484 }
3280af22
NIS
2485 sv_catpv(PL_linestr, "\n");
2486 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2487 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2488 PL_last_lop = PL_last_uni = Nullch;
3280af22 2489 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2490 SV *sv = NEWSV(85,0);
2491
2492 sv_upgrade(sv, SVt_PVMG);
3280af22 2493 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2494 (void)SvIOK_on(sv);
2495 SvIVX(sv) = 0;
57843af0 2496 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2497 }
79072805 2498 goto retry;
a687059c 2499 }
e929a76b 2500 do {
aa7440fb 2501 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2502 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2503 fake_eof:
2504 if (PL_rsfp) {
2505 if (PL_preprocess && !PL_in_eval)
2506 (void)PerlProc_pclose(PL_rsfp);
2507 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2508 PerlIO_clearerr(PL_rsfp);
2509 else
2510 (void)PerlIO_close(PL_rsfp);
2511 PL_rsfp = Nullfp;
2512 PL_doextract = FALSE;
2513 }
2514 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2515 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2516 sv_catpv(PL_linestr,";}");
2517 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2518 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2519 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2520 PL_minus_n = PL_minus_p = 0;
2521 goto retry;
2522 }
2523 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2524 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2525 sv_setpv(PL_linestr,"");
2526 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2527 }
2528 /* if it looks like the start of a BOM, check if it in fact is */
2529 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2530#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2531# ifdef __GNU_LIBRARY__
2532# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2533# define FTELL_FOR_PIPE_IS_BROKEN
2534# endif
e3f494f1
JH
2535# else
2536# ifdef __GLIBC__
2537# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2538# define FTELL_FOR_PIPE_IS_BROKEN
2539# endif
2540# endif
226017aa
DD
2541# endif
2542#endif
2543#ifdef FTELL_FOR_PIPE_IS_BROKEN
2544 /* This loses the possibility to detect the bof
2545 * situation on perl -P when the libc5 is being used.
2546 * Workaround? Maybe attach some extra state to PL_rsfp?
2547 */
2548 if (!PL_preprocess)
7e28d3af 2549 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2550#else
eb160463 2551 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2552#endif
7e28d3af 2553 if (bof) {
3280af22 2554 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2555 s = swallow_bom((U8*)s);
e929a76b 2556 }
378cc40b 2557 }
3280af22 2558 if (PL_doextract) {
a0d0e21e
LW
2559 /* Incest with pod. */
2560 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2561 sv_setpv(PL_linestr, "");
2562 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2563 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2564 PL_last_lop = PL_last_uni = Nullch;
3280af22 2565 PL_doextract = FALSE;
a0d0e21e 2566 }
4e553d73 2567 }
463ee0b2 2568 incline(s);
3280af22
NIS
2569 } while (PL_doextract);
2570 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2571 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2572 SV *sv = NEWSV(85,0);
a687059c 2573
93a17b20 2574 sv_upgrade(sv, SVt_PVMG);
3280af22 2575 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2576 (void)SvIOK_on(sv);
2577 SvIVX(sv) = 0;
57843af0 2578 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2579 }
3280af22 2580 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2581 PL_last_lop = PL_last_uni = Nullch;
57843af0 2582 if (CopLINE(PL_curcop) == 1) {
3280af22 2583 while (s < PL_bufend && isSPACE(*s))
79072805 2584 s++;
a0d0e21e 2585 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2586 s++;
44a8e56a 2587 d = Nullch;
3280af22 2588 if (!PL_in_eval) {
44a8e56a
PP
2589 if (*s == '#' && *(s+1) == '!')
2590 d = s + 2;
2591#ifdef ALTERNATE_SHEBANG
2592 else {
2593 static char as[] = ALTERNATE_SHEBANG;
2594 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2595 d = s + (sizeof(as) - 1);
2596 }
2597#endif /* ALTERNATE_SHEBANG */
2598 }
2599 if (d) {
b8378b72 2600 char *ipath;
774d564b 2601 char *ipathend;
b8378b72 2602
774d564b 2603 while (isSPACE(*d))
b8378b72
CS
2604 d++;
2605 ipath = d;
774d564b
PP
2606 while (*d && !isSPACE(*d))
2607 d++;
2608 ipathend = d;
2609
2610#ifdef ARG_ZERO_IS_SCRIPT
2611 if (ipathend > ipath) {
2612 /*
2613 * HP-UX (at least) sets argv[0] to the script name,
2614 * which makes $^X incorrect. And Digital UNIX and Linux,
2615 * at least, set argv[0] to the basename of the Perl
2616 * interpreter. So, having found "#!", we'll set it right.
2617 */
ee2f7564 2618 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2619 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2620 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2621 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2622 SvSETMAGIC(x);
2623 }
556c1dec
JH
2624 else {
2625 STRLEN blen;
2626 STRLEN llen;
2627 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2628 char *lstart = SvPV(x,llen);
2629 if (llen < blen) {
2630 bstart += blen - llen;
2631 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2632 sv_setpvn(x, ipath, ipathend - ipath);
2633 SvSETMAGIC(x);
2634 }
2635 }
2636 }
774d564b 2637 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2638 }
774d564b 2639#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2640
2641 /*
2642 * Look for options.
2643 */
748a9306 2644 d = instr(s,"perl -");
84e30d1a 2645 if (!d) {
748a9306 2646 d = instr(s,"perl");
84e30d1a
GS
2647#if defined(DOSISH)
2648 /* avoid getting into infinite loops when shebang
2649 * line contains "Perl" rather than "perl" */
2650 if (!d) {
2651 for (d = ipathend-4; d >= ipath; --d) {
2652 if ((*d == 'p' || *d == 'P')
2653 && !ibcmp(d, "perl", 4))
2654 {
2655 break;
2656 }
2657 }
2658 if (d < ipath)
2659 d = Nullch;
2660 }
2661#endif
2662 }
44a8e56a
PP
2663#ifdef ALTERNATE_SHEBANG
2664 /*
2665 * If the ALTERNATE_SHEBANG on this system starts with a
2666 * character that can be part of a Perl expression, then if
2667 * we see it but not "perl", we're probably looking at the
2668 * start of Perl code, not a request to hand off to some
2669 * other interpreter. Similarly, if "perl" is there, but
2670 * not in the first 'word' of the line, we assume the line
2671 * contains the start of the Perl program.
44a8e56a
PP
2672 */
2673 if (d && *s != '#') {
774d564b 2674 char *c = ipath;
44a8e56a
PP
2675 while (*c && !strchr("; \t\r\n\f\v#", *c))
2676 c++;
2677 if (c < d)
2678 d = Nullch; /* "perl" not in first word; ignore */
2679 else
2680 *s = '#'; /* Don't try to parse shebang line */
2681 }
774d564b 2682#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2683#ifndef MACOS_TRADITIONAL
748a9306 2684 if (!d &&
44a8e56a 2685 *s == '#' &&
774d564b 2686 ipathend > ipath &&
3280af22 2687 !PL_minus_c &&
748a9306 2688 !instr(s,"indir") &&
3280af22 2689 instr(PL_origargv[0],"perl"))
748a9306 2690 {
9f68db38 2691 char **newargv;
9f68db38 2692
774d564b
PP
2693 *ipathend = '\0';
2694 s = ipathend + 1;
3280af22 2695 while (s < PL_bufend && isSPACE(*s))
9f68db38 2696 s++;
3280af22
NIS
2697 if (s < PL_bufend) {
2698 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2699 newargv[1] = s;
3280af22 2700 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2701 s++;
2702 *s = '\0';
3280af22 2703 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2704 }
2705 else
3280af22 2706 newargv = PL_origargv;
774d564b 2707 newargv[0] = ipath;
b4748376 2708 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2709 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2710 }
bf4acbe4 2711#endif
748a9306 2712 if (d) {
3280af22
NIS
2713 U32 oldpdb = PL_perldb;
2714 bool oldn = PL_minus_n;
2715 bool oldp = PL_minus_p;
748a9306
LW
2716
2717 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2718 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2719
2720 if (*d++ == '-') {
a11ec5a9 2721 bool switches_done = PL_doswitches;
8cc95fdb
PP
2722 do {
2723 if (*d == 'M' || *d == 'm') {
2724 char *m = d;
2725 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2726 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2727 (int)(d - m), m);
2728 }
2729 d = moreswitches(d);
2730 } while (d);
155aba94
GS
2731 if ((PERLDB_LINE && !oldpdb) ||
2732 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2733 /* if we have already added "LINE: while (<>) {",
2734 we must not do it again */
748a9306 2735 {
3280af22
NIS
2736 sv_setpv(PL_linestr, "");
2737 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2738 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2739 PL_last_lop = PL_last_uni = Nullch;
3280af22 2740 PL_preambled = FALSE;
84902520 2741 if (PERLDB_LINE)
3280af22 2742 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2743 goto retry;
2744 }
a11ec5a9
RGS
2745 if (PL_doswitches && !switches_done) {
2746 int argc = PL_origargc;
2747 char **argv = PL_origargv;
2748 do {
2749 argc--,argv++;
2750 } while (argc && argv[0][0] == '-' && argv[0][1]);
2751 init_argv_symbols(argc,argv);
2752 }
a0d0e21e 2753 }
79072805 2754 }
9f68db38 2755 }
79072805 2756 }
3280af22
NIS
2757 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2758 PL_bufptr = s;
2759 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2760 return yylex();
ae986130 2761 }
378cc40b 2762 goto retry;
4fdae800 2763 case '\r':
6a27c188 2764#ifdef PERL_STRICT_CR
cea2e8a9 2765 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2766 Perl_croak(aTHX_
cc507455 2767 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2768#endif
4fdae800 2769 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2770#ifdef MACOS_TRADITIONAL
2771 case '\312':
2772#endif
378cc40b
LW
2773 s++;
2774 goto retry;
378cc40b 2775 case '#':
e929a76b 2776 case '\n':
3280af22 2777 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2778 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2779 /* handle eval qq[#line 1 "foo"\n ...] */
2780 CopLINE_dec(PL_curcop);
2781 incline(s);
2782 }
3280af22 2783 d = PL_bufend;
a687059c 2784 while (s < d && *s != '\n')
378cc40b 2785 s++;
0f85fab0 2786 if (s < d)
378cc40b 2787 s++;
78c267c1 2788 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2789 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2790 incline(s);
3280af22
NIS
2791 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2792 PL_bufptr = s;
2793 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2794 return yylex();
a687059c 2795 }
378cc40b 2796 }
a687059c 2797 else {
378cc40b 2798 *s = '\0';
3280af22 2799 PL_bufend = s;
a687059c 2800 }
378cc40b
LW
2801 goto retry;
2802 case '-':
79072805 2803 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2804 I32 ftst = 0;
2805
378cc40b 2806 s++;
3280af22 2807 PL_bufptr = s;
748a9306
LW
2808 tmp = *s++;
2809
bf4acbe4 2810 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2811 s++;
2812
2813 if (strnEQ(s,"=>",2)) {
3280af22 2814 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2815 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2816 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2817 } );
748a9306
LW
2818 OPERATOR('-'); /* unary minus */
2819 }
3280af22 2820 PL_last_uni = PL_oldbufptr;
748a9306 2821 switch (tmp) {
e5edeb50
JH
2822 case 'r': ftst = OP_FTEREAD; break;
2823 case 'w': ftst = OP_FTEWRITE; break;
2824 case 'x': ftst = OP_FTEEXEC; break;
2825 case 'o': ftst = OP_FTEOWNED; break;
2826 case 'R': ftst = OP_FTRREAD; break;
2827 case 'W': ftst = OP_FTRWRITE; break;
2828 case 'X': ftst = OP_FTREXEC; break;
2829 case 'O': ftst = OP_FTROWNED; break;
2830 case 'e': ftst = OP_FTIS; break;
2831 case 'z': ftst = OP_FTZERO; break;
2832 case 's': ftst = OP_FTSIZE; break;
2833 case 'f': ftst = OP_FTFILE; break;
2834 case 'd': ftst = OP_FTDIR; break;
2835 case 'l': ftst = OP_FTLINK; break;
2836 case 'p': ftst = OP_FTPIPE; break;
2837 case 'S': ftst = OP_FTSOCK; break;
2838 case 'u': ftst = OP_FTSUID; break;
2839 case 'g': ftst = OP_FTSGID; break;
2840 case 'k': ftst = OP_FTSVTX; break;
2841 case 'b': ftst = OP_FTBLK; break;
2842 case 'c': ftst = OP_FTCHR; break;
2843 case 't': ftst = OP_FTTTY; break;
2844 case 'T': ftst = OP_FTTEXT; break;
2845 case 'B': ftst = OP_FTBINARY; break;
2846 case 'M': case 'A': case 'C':
2847 gv_fetchpv("\024",TRUE, SVt_PV);
2848 switch (tmp) {
2849 case 'M': ftst = OP_FTMTIME; break;
2850 case 'A': ftst = OP_FTATIME; break;
2851 case 'C': ftst = OP_FTCTIME; break;
2852 default: break;
2853 }
2854 break;
378cc40b 2855 default:
378cc40b
LW
2856 break;
2857 }
e5edeb50 2858 if (ftst) {
eb160463 2859 PL_last_lop_op = (OPCODE)ftst;
4e553d73 2860 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2861 "### Saw file test %c\n", (int)ftst);
5f80b19c 2862 } );
e5edeb50
JH
2863 FTST(ftst);
2864 }
2865 else {
2866 /* Assume it was a minus followed by a one-letter named
2867 * subroutine call (or a -bareword), then. */
95c31fe3 2868 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2869 "### %c looked like a file test but was not\n",
2870 (int)ftst);
5f80b19c 2871 } );
e5edeb50
JH
2872 s -= 2;
2873 }
378cc40b 2874 }
a687059c
LW
2875 tmp = *s++;
2876 if (*s == tmp) {
2877 s++;
3280af22 2878 if (PL_expect == XOPERATOR)
79072805
LW
2879 TERM(POSTDEC);
2880 else
2881 OPERATOR(PREDEC);
2882 }
2883 else if (*s == '>') {
2884 s++;
2885 s = skipspace(s);
7e2040f0 2886 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2887 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2888 TOKEN(ARROW);
79072805 2889 }
748a9306
LW
2890 else if (*s == '$')
2891 OPERATOR(ARROW);
463ee0b2 2892 else
748a9306 2893 TERM(ARROW);
a687059c 2894 }
3280af22 2895 if (PL_expect == XOPERATOR)
79072805
LW
2896 Aop(OP_SUBTRACT);
2897 else {
3280af22 2898 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2899 check_uni();
79072805 2900 OPERATOR('-'); /* unary minus */
2f3197b3 2901 }
79072805 2902
378cc40b 2903 case '+':
a687059c
LW
2904 tmp = *s++;
2905 if (*s == tmp) {
378cc40b 2906 s++;
3280af22 2907 if (PL_expect == XOPERATOR)
79072805
LW
2908 TERM(POSTINC);
2909 else
2910 OPERATOR(PREINC);
378cc40b 2911 }
3280af22 2912 if (PL_expect == XOPERATOR)
79072805
LW
2913 Aop(OP_ADD);
2914 else {
3280af22 2915 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2916 check_uni();
a687059c 2917 OPERATOR('+');
2f3197b3 2918 }
a687059c 2919
378cc40b 2920 case '*':
3280af22
NIS
2921 if (PL_expect != XOPERATOR) {
2922 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2923 PL_expect = XOPERATOR;
2924 force_ident(PL_tokenbuf, '*');
2925 if (!*PL_tokenbuf)
a0d0e21e 2926 PREREF('*');
79072805 2927 TERM('*');
a687059c 2928 }
79072805
LW
2929 s++;
2930 if (*s == '*') {
a687059c 2931 s++;
79072805 2932 PWop(OP_POW);
a687059c 2933 }
79072805
LW
2934 Mop(OP_MULTIPLY);
2935
378cc40b 2936 case '%':
3280af22 2937 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2938 ++s;
2939 Mop(OP_MODULO);
a687059c 2940 }
3280af22
NIS
2941 PL_tokenbuf[0] = '%';
2942 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2943 if (!PL_tokenbuf[1]) {
2944 if (s == PL_bufend)
bbce6d69
PP
2945 yyerror("Final % should be \\% or %name");
2946 PREREF('%');
a687059c 2947 }
3280af22 2948 PL_pending_ident = '%';
bbce6d69 2949 TERM('%');
a687059c 2950
378cc40b 2951 case '^':
79072805 2952 s++;
a0d0e21e 2953 BOop(OP_BIT_XOR);
79072805 2954 case '[':
3280af22 2955 PL_lex_brackets++;
79072805 2956 /* FALL THROUGH */
378cc40b 2957 case '~':
378cc40b 2958 case ',':
378cc40b
LW
2959 tmp = *s++;
2960 OPERATOR(tmp);
a0d0e21e
LW
2961 case ':':
2962 if (s[1] == ':') {
2963 len = 0;
2964 goto just_a_word;
2965 }
2966 s++;
09bef843
SB
2967 switch (PL_expect) {
2968 OP *attrs;
2969 case XOPERATOR:
2970 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2971 break;
2972 PL_bufptr = s; /* update in case we back off */
2973 goto grabattrs;
2974 case XATTRBLOCK:
2975 PL_expect = XBLOCK;
2976 goto grabattrs;
2977 case XATTRTERM:
2978 PL_expect = XTERMBLOCK;
2979 grabattrs:
2980 s = skipspace(s);
2981 attrs = Nullop;
7e2040f0 2982 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2983 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2984 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2985 if (tmp < 0) tmp = -tmp;
2986 switch (tmp) {
2987 case KEY_or:
2988 case KEY_and:
c963b151 2989 case KEY_err:
f9829d6b
GS
2990 case KEY_for:
2991 case KEY_unless:
2992 case KEY_if:
2993 case KEY_while:
2994 case KEY_until:
2995 goto got_attrs;
2996 default:
2997 break;
2998 }
2999 }
09bef843
SB
3000 if (*d == '(') {
3001 d = scan_str(d,TRUE,TRUE);
3002 if (!d) {
09bef843
SB
3003 /* MUST advance bufptr here to avoid bogus
3004 "at end of line" context messages from yyerror().
3005 */
3006 PL_bufptr = s + len;
3007 yyerror("Unterminated attribute parameter in attribute list");
3008 if (attrs)
3009 op_free(attrs);
3010 return 0; /* EOF indicator */
3011 }
3012 }
3013 if (PL_lex_stuff) {
3014 SV *sv = newSVpvn(s, len);
3015 sv_catsv(sv, PL_lex_stuff);
3016 attrs = append_elem(OP_LIST, attrs,
3017 newSVOP(OP_CONST, 0, sv));
3018 SvREFCNT_dec(PL_lex_stuff);
3019 PL_lex_stuff = Nullsv;
3020 }
3021 else {
d3cea301
SB
3022 /* NOTE: any CV attrs applied here need to be part of
3023 the CVf_BUILTIN_ATTRS define in cv.h! */
78f9721b
SM
3024 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3025 CvLVALUE_on(PL_compcv);
3026 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3027 CvLOCKED_on(PL_compcv);
3028 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3029 CvMETHOD_on(PL_compcv);
87ecf892 3030#ifdef USE_ITHREADS
d3cea301
SB
3031 else if (PL_in_my == KEY_our && len == 6 &&
3032 strnEQ(s, "unique", len))
7fb37951 3033 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
87ecf892 3034#endif
78f9721b
SM
3035 /* After we've set the flags, it could be argued that
3036 we don't need to do the attributes.pm-based setting
3037 process, and shouldn't bother appending recognized
d3cea301
SB
3038 flags. To experiment with that, uncomment the
3039 following "else". (Note that's already been
3040 uncommented. That keeps the above-applied built-in
3041 attributes from being intercepted (and possibly
3042 rejected) by a package's attribute routines, but is
3043 justified by the performance win for the common case
3044 of applying only built-in attributes.) */
0256094b 3045 else
78f9721b
SM
3046 attrs = append_elem(OP_LIST, attrs,
3047 newSVOP(OP_CONST, 0,
3048 newSVpvn(s, len)));
09bef843
SB
3049 }
3050 s = skipspace(d);
0120eecf 3051 if (*s == ':' && s[1] != ':')
09bef843 3052 s = skipspace(s+1);
0120eecf
GS
3053 else if (s == d)
3054 break; /* require real whitespace or :'s */
09bef843 3055 }
f9829d6b 3056 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3057 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3058 char q = ((*s == '\'') ? '"' : '\'');
3059 /* If here for an expression, and parsed no attrs, back off. */
3060 if (tmp == '=' && !attrs) {
3061 s = PL_bufptr;
3062 break;
3063 }
3064 /* MUST advance bufptr here to avoid bogus "at end of line"
3065 context messages from yyerror().
3066 */
3067 PL_bufptr = s;
3068 if (!*s)
3069 yyerror("Unterminated attribute list");
3070 else
3071 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3072 q, *s, q));
3073 if (attrs)
3074 op_free(attrs);
3075 OPERATOR(':');
3076 }
f9829d6b 3077 got_attrs:
09bef843
SB
3078 if (attrs) {
3079 PL_nextval[PL_nexttoke].opval = attrs;
3080 force_next(THING);
3081 }
3082 TOKEN(COLONATTR);
3083 }
a0d0e21e 3084 OPERATOR(':');
8990e307
LW
3085 case '(':
3086 s++;
3280af22
NIS
3087 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3088 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3089 else
3280af22 3090 PL_expect = XTERM;
a0d0e21e 3091 TOKEN('(');
378cc40b 3092 case ';':
f4dd75d9 3093 CLINE;
378cc40b
LW
3094 tmp = *s++;
3095 OPERATOR(tmp);
3096 case ')':
378cc40b 3097 tmp = *s++;
16d20bd9
AD
3098 s = skipspace(s);
3099 if (*s == '{')
3100 PREBLOCK(tmp);
378cc40b 3101 TERM(tmp);
79072805
LW
3102 case ']':
3103 s++;
3280af22 3104 if (PL_lex_brackets <= 0)
d98d5fff 3105 yyerror("Unmatched right square bracket");
463ee0b2 3106 else
3280af22
NIS
3107 --PL_lex_brackets;
3108 if (PL_lex_state == LEX_INTERPNORMAL) {
3109 if (PL_lex_brackets == 0) {
a0d0e21e 3110 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3111 PL_lex_state = LEX_INTERPEND;
79072805
LW
3112 }
3113 }
4633a7c4 3114 TERM(']');
79072805
LW
3115 case '{':
3116 leftbracket:
79072805 3117 s++;
3280af22
NIS
3118 if (PL_lex_brackets > 100) {
3119 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3120 if (newlb != PL_lex_brackstack) {
8990e307 3121 SAVEFREEPV(newlb);
3280af22 3122 PL_lex_brackstack = newlb;
8990e307
LW
3123 }
3124 }
3280af22 3125 switch (PL_expect) {
a0d0e21e 3126 case XTERM:
3280af22 3127 if (PL_lex_formbrack) {
a0d0e21e
LW
3128 s--;
3129 PRETERMBLOCK(DO);
3130 }
3280af22
NIS
3131 if (PL_oldoldbufptr == PL_last_lop)
3132 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3133 else
3280af22 3134 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3135 OPERATOR(HASHBRACK);
a0d0e21e 3136 case XOPERATOR:
bf4acbe4 3137 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3138 s++;
44a8e56a 3139 d = s;
3280af22
NIS
3140 PL_tokenbuf[0] = '\0';
3141 if (d < PL_bufend && *d == '-') {
3142 PL_tokenbuf[0] = '-';
44a8e56a 3143 d++;
bf4acbe4 3144 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3145 d++;
3146 }
7e2040f0 3147 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3148 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3149 FALSE, &len);
bf4acbe4 3150 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3151 d++;
3152 if (*d == '}') {
3280af22 3153 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3154 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3155 if (minus)
3156 force_next('-');
748a9306
LW
3157 }
3158 }
3159 /* FALL THROUGH */
09bef843 3160 case XATTRBLOCK:
748a9306 3161 case XBLOCK:
3280af22
NIS
3162 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3163 PL_expect = XSTATE;
a0d0e21e 3164 break;
09bef843 3165 case XATTRTERM:
a0d0e21e 3166 case XTERMBLOCK:
3280af22
NIS
3167 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3168 PL_expect = XSTATE;
a0d0e21e
LW
3169 break;
3170 default: {
3171 char *t;
3280af22
NIS
3172 if (PL_oldoldbufptr == PL_last_lop)
3173 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3174 else
3280af22 3175 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3176 s = skipspace(s);
8452ff4b
SB
3177 if (*s == '}') {
3178 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3179 PL_expect = XTERM;
3180 /* This hack is to get the ${} in the message. */
3181 PL_bufptr = s+1;
3182 yyerror("syntax error");
3183 break;
3184 }
a0d0e21e 3185 OPERATOR(HASHBRACK);
8452ff4b 3186 }
b8a4b1be
GS
3187 /* This hack serves to disambiguate a pair of curlies
3188 * as being a block or an anon hash. Normally, expectation
3189 * determines that, but in cases where we're not in a
3190 * position to expect anything in particular (like inside
3191 * eval"") we have to resolve the ambiguity. This code
3192 * covers the case where the first term in the curlies is a
3193 * quoted string. Most other cases need to be explicitly
3194 * disambiguated by prepending a `+' before the opening
3195 * curly in order to force resolution as an anon hash.
3196 *
3197 * XXX should probably propagate the outer expectation
3198 * into eval"" to rely less on this hack, but that could
3199 * potentially break current behavior of eval"".
3200 * GSAR 97-07-21
3201 */
3202 t = s;
3203 if (*s == '\'' || *s == '"' || *s == '`') {
3204 /* common case: get past first string, handling escapes */
3280af22 3205 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3206 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3207 t++;
3208 t++;
a0d0e21e 3209 }
b8a4b1be 3210 else if (*s == 'q') {
3280af22 3211 if (++t < PL_bufend
b8a4b1be 3212 && (!isALNUM(*t)
3280af22 3213 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3214 && !isALNUM(*t))))
3215 {
b8a4b1be
GS
3216 char *tmps;
3217 char open, close, term;
3218 I32 brackets = 1;
3219
3280af22 3220 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3221 t++;
3222 term = *t;
3223 open = term;
3224 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3225 term = tmps[5];
3226 close = term;
3227 if (open == close)
3280af22
NIS
3228 for (t++; t < PL_bufend; t++) {
3229 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3230 t++;
6d07e5e9 3231 else if (*t == open)
b8a4b1be
GS
3232 break;
3233 }
3234 else
3280af22
NIS
3235 for (t++; t < PL_bufend; t++) {
3236 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3237 t++;
6d07e5e9 3238 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3239 break;
3240 else if (*t == open)
3241 brackets++;
3242 }
3243 }
3244 t++;
a0d0e21e 3245 }
7e2040f0 3246 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3247 t += UTF8SKIP(t);
7e2040f0 3248 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3249 t += UTF8SKIP(t);
a0d0e21e 3250 }
3280af22 3251 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3252 t++;
b8a4b1be
GS
3253 /* if comma follows first term, call it an anon hash */
3254 /* XXX it could be a comma expression with loop modifiers */
3280af22 3255 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3256 || (*t == '=' && t[1] == '>')))
a0d0e21e 3257 OPERATOR(HASHBRACK);
3280af22 3258 if (PL_expect == XREF)
4e4e412b 3259 PL_expect = XTERM;
a0d0e21e 3260 else {
3280af22
NIS
3261 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3262 PL_expect = XSTATE;
a0d0e21e 3263 }
8990e307 3264 }
a0d0e21e 3265 break;
463ee0b2 3266 }
57843af0 3267 yylval.ival = CopLINE(PL_curcop);
79072805 3268 if (isSPACE(*s) || *s == '#')
3280af22 3269 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3270 TOKEN('{');
378cc40b 3271 case '}':
79072805
LW
3272 rightbracket:
3273 s++;
3280af22 3274 if (PL_lex_brackets <= 0)
d98d5fff 3275 yyerror("Unmatched right curly bracket");
463ee0b2 3276 else
3280af22 3277 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3278 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3279 PL_lex_formbrack = 0;
3280 if (PL_lex_state == LEX_INTERPNORMAL) {
3281 if (PL_lex_brackets == 0) {
9059aa12
LW
3282 if (PL_expect & XFAKEBRACK) {
3283 PL_expect &= XENUMMASK;
3280af22
NIS
3284 PL_lex_state = LEX_INTERPEND;
3285 PL_bufptr = s;
cea2e8a9 3286 return yylex(); /* ignore fake brackets */
79072805 3287 }
fa83b5b6 3288 if (*s == '-' && s[1] == '>')
3280af22 3289 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3290 else if (*s != '[' && *s != '{')
3280af22 3291 PL_lex_state = LEX_INTERPEND;
79072805
LW
3292 }
3293 }
9059aa12
LW
3294 if (PL_expect & XFAKEBRACK) {
3295 PL_expect &= XENUMMASK;
3280af22 3296 PL_bufptr = s;
cea2e8a9 3297 return yylex(); /* ignore fake brackets */
748a9306 3298 }
79072805
LW
3299 force_next('}');
3300 TOKEN(';');
378cc40b
LW
3301 case '&':
3302 s++;
3303 tmp = *s++;
3304 if (tmp == '&')
a0d0e21e 3305 AOPERATOR(ANDAND);
378cc40b 3306 s--;
3280af22 3307 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3308 if (ckWARN(WARN_SEMICOLON)
3309 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3310 {
57843af0 3311 CopLINE_dec(PL_curcop);
9014280d 3312 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3313 CopLINE_inc(PL_curcop);
463ee0b2 3314 }
79072805 3315 BAop(OP_BIT_AND);
463ee0b2 3316 }
79072805 3317
3280af22
NIS
3318 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3319 if (*PL_tokenbuf) {
3320 PL_expect = XOPERATOR;
3321 force_ident(PL_tokenbuf, '&');
463ee0b2 3322 }
79072805
LW
3323 else
3324 PREREF('&');
c07a80fd 3325 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3326 TERM('&');
3327
378cc40b
LW
3328 case '|':
3329 s++;
3330 tmp = *s++;
3331 if (tmp == '|')
a0d0e21e 3332 AOPERATOR(OROR);
378cc40b 3333 s--;
79072805 3334 BOop(OP_BIT_OR);
378cc40b
LW
3335 case '=':
3336 s++;
3337 tmp = *s++;
3338 if (tmp == '=')
79072805
LW
3339 Eop(OP_EQ);
3340 if (tmp == '>')
3341 OPERATOR(',');
378cc40b 3342 if (tmp == '~')
79072805 3343 PMop(OP_MATCH);
599cee73 3344 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3345 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3346 s--;
3280af22
NIS
3347 if (PL_expect == XSTATE && isALPHA(tmp) &&
3348 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3349 {
3280af22
NIS
3350 if (PL_in_eval && !PL_rsfp) {
3351 d = PL_bufend;
a5f75d66
AD
3352 while (s < d) {
3353 if (*s++ == '\n') {
3354 incline(s);
3355 if (strnEQ(s,"=cut",4)) {
3356 s = strchr(s,'\n');
3357 if (s)
3358 s++;
3359 else
3360 s = d;
3361 incline(s);
3362 goto retry;
3363 }
3364 }
3365 }
3366 goto retry;
3367 }
3280af22
NIS
3368 s = PL_bufend;
3369 PL_doextract = TRUE;
a0d0e21e
LW
3370 goto retry;
3371 }
3280af22 3372 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3373 char *t;
51882d45 3374#ifdef PERL_STRICT_CR
bf4acbe4 3375 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3376#else
bf4acbe4 3377 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3378#endif
a0d0e21e
LW
3379 if (*t == '\n' || *t == '#') {
3380 s--;
3280af22 3381 PL_expect = XBLOCK;
a0d0e21e
LW
3382 goto leftbracket;
3383 }
79072805 3384 }
a0d0e21e
LW
3385 yylval.ival = 0;
3386 OPERATOR(ASSIGNOP);
378cc40b
LW
3387 case '!':
3388 s++;
3389 tmp = *s++;
3390 if (tmp == '=')
79072805 3391 Eop(OP_NE);
378cc40b 3392 if (tmp == '~')
79072805 3393 PMop(OP_NOT);
378cc40b
LW
3394 s--;
3395 OPERATOR('!');
3396 case '<':
3280af22 3397 if (PL_expect != XOPERATOR) {
93a17b20 3398 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3399 check_uni();
79072805
LW
3400 if (s[1] == '<')
3401 s = scan_heredoc(s);
3402 else
3403 s = scan_inputsymbol(s);
3404 TERM(sublex_start());
378cc40b
LW
3405 }
3406 s++;
3407 tmp = *s++;
3408 if (tmp == '<')
79072805 3409 SHop(OP_LEFT_SHIFT);
395c3793
LW
3410 if (tmp == '=') {
3411 tmp = *s++;
3412 if (tmp == '>')
79072805 3413 Eop(OP_NCMP);
395c3793 3414 s--;
79072805 3415 Rop(OP_LE);
395c3793 3416 }
378cc40b 3417 s--;
79072805 3418 Rop(OP_LT);
378cc40b
LW
3419 case '>':
3420 s++;
3421 tmp = *s++;
3422 if (tmp == '>')
79072805 3423 SHop(OP_RIGHT_SHIFT);
378cc40b 3424 if (tmp == '=')
79072805 3425 Rop(OP_GE);
378cc40b 3426 s--;
79072805 3427 Rop(OP_GT);
378cc40b
LW
3428
3429 case '$':
bbce6d69
PP
3430 CLINE;
3431
3280af22
NIS
3432 if (PL_expect == XOPERATOR) {
3433 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3434 PL_expect = XTERM;
a0d0e21e 3435 depcom();
bbce6d69 3436 return ','; /* grandfather non-comma-format format */
a0d0e21e 3437 }
8990e307 3438 }
a0d0e21e 3439
7e2040f0 3440 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3441 PL_tokenbuf[0] = '@';
376b8730
SM
3442 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3443 sizeof PL_tokenbuf - 1, FALSE);
3444 if (PL_expect == XOPERATOR)
3445 no_op("Array length", s);
3280af22 3446 if (!PL_tokenbuf[1])
a0d0e21e 3447 PREREF(DOLSHARP);
3280af22
NIS
3448 PL_expect = XOPERATOR;
3449 PL_pending_ident = '#';
463ee0b2 3450 TOKEN(DOLSHARP);
79072805 3451 }
bbce6d69 3452
3280af22 3453 PL_tokenbuf[0] = '$';
376b8730
SM
3454 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3455 sizeof PL_tokenbuf - 1, FALSE);
3456 if (PL_expect == XOPERATOR)
3457 no_op("Scalar", s);
3280af22
NIS
3458 if (!PL_tokenbuf[1]) {
3459 if (s == PL_bufend)
bbce6d69
PP
3460 yyerror("Final $ should be \\$ or $name");
3461 PREREF('$');
8990e307 3462 }
a0d0e21e 3463
bbce6d69 3464 /* This kludge not intended to be bulletproof. */
3280af22 3465 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3466 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3467 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3468 yylval.opval->op_private = OPpCONST_ARYBASE;
3469 TERM(THING);
3470 }
3471
ff68c719 3472 d = s;
69d2bceb 3473 tmp = (I32)*s;
3280af22 3474 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3475 s = skipspace(s);
3476
3280af22 3477 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3478 char *t;
3479 if (*s == '[') {
3280af22 3480 PL_tokenbuf[0] = '@';
599cee73 3481 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3482 for(t = s + 1;
7e2040f0 3483 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3484 t++) ;
a0d0e21e 3485 if (*t++ == ',') {
3280af22
NIS
3486 PL_bufptr = skipspace(PL_bufptr);
3487 while (t < PL_bufend && *t != ']')
bbce6d69 3488 t++;
9014280d 3489 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73
PM
3490 "Multidimensional syntax %.*s not supported",
3491 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3492 }
3493 }
bbce6d69
PP
3494 }
3495 else if (*s == '{') {
3280af22 3496 PL_tokenbuf[0] = '%';
599cee73 3497 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3498 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3499 {
3280af22 3500 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3501 STRLEN len;
3502 for (t++; isSPACE(*t); t++) ;
7e2040f0 3503 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3504 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3505 for (; isSPACE(*t); t++) ;
864dbfa3</