This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
ExtUtils::Constant 0.13
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
be3c0a43 3 * Copyright (c) 1991-2002, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
4ac733c9 29static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 30static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
9059aa12
LW
38#define XFAKEBRACK 128
39#define XENUMMASK 127
40
39e02b42
JH
41#ifdef USE_UTF8_SCRIPTS
42# define UTF (!IN_BYTES)
2b9d42f0 43#else
746b446a 44# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 45#endif
a0ed51b3 46
61f0cdd9 47/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
48 * 1999-02-27 mjd-perl-patch@plover.com */
49#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50
bf4acbe4
GS
51/* On MacOS, respect nonbreaking spaces */
52#ifdef MACOS_TRADITIONAL
53#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54#else
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
56#endif
57
ffb4593c
NT
58/* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
79072805
LW
60 * can get by with a single comparison (if the compiler is smart enough).
61 */
62
fb73857a 63/* #define LEX_NOTPARSING 11 is done in perl.h. */
64
55497cff 65#define LEX_NORMAL 10
66#define LEX_INTERPNORMAL 9
67#define LEX_INTERPCASEMOD 8
68#define LEX_INTERPPUSH 7
69#define LEX_INTERPSTART 6
70#define LEX_INTERPEND 5
71#define LEX_INTERPENDMAYBE 4
72#define LEX_INTERPCONCAT 3
73#define LEX_INTERPCONST 2
74#define LEX_FORMLINE 1
75#define LEX_KNOWNEXT 0
79072805 76
79072805
LW
77#ifdef ff_next
78#undef ff_next
d48672a2
LW
79#endif
80
a1a0e61e 81#ifdef USE_PURE_BISON
dba4d153
JH
82# ifndef YYMAXLEVEL
83# define YYMAXLEVEL 100
84# endif
20141f0e
IRC
85YYSTYPE* yylval_pointer[YYMAXLEVEL];
86int* yychar_pointer[YYMAXLEVEL];
6f202aea 87int yyactlevel = -1;
22c35a8c
GS
88# undef yylval
89# undef yychar
20141f0e
IRC
90# define yylval (*yylval_pointer[yyactlevel])
91# define yychar (*yychar_pointer[yyactlevel])
92# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 93# undef yylex
dba4d153 94# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
95#endif
96
79072805 97#include "keywords.h"
fe14fcc3 98
ffb4593c
NT
99/* CLINE is a macro that ensures PL_copline has a sane value */
100
ae986130
LW
101#ifdef CLINE
102#undef CLINE
103#endif
57843af0 104#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 105
ffb4593c
NT
106/*
107 * Convenience functions to return different tokens and prime the
9cbb5ea2 108 * lexer for the next token. They all take an argument.
ffb4593c
NT
109 *
110 * TOKEN : generic token (used for '(', DOLSHARP, etc)
111 * OPERATOR : generic operator
112 * AOPERATOR : assignment operator
113 * PREBLOCK : beginning the block after an if, while, foreach, ...
114 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
115 * PREREF : *EXPR where EXPR is not a simple identifier
116 * TERM : expression term
117 * LOOPX : loop exiting command (goto, last, dump, etc)
118 * FTST : file test operator
119 * FUN0 : zero-argument function
2d2e263d 120 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
121 * BOop : bitwise or or xor
122 * BAop : bitwise and
123 * SHop : shift operator
124 * PWop : power operator
9cbb5ea2 125 * PMop : pattern-matching operator
ffb4593c
NT
126 * Aop : addition-level operator
127 * Mop : multiplication-level operator
128 * Eop : equality-testing operator
e5edeb50 129 * Rop : relational operator <= != gt
ffb4593c
NT
130 *
131 * Also see LOP and lop() below.
132 */
133
075953c3
JH
134/* Note that REPORT() and REPORT2() will be expressions that supply
135 * their own trailing comma, not suitable for statements as such. */
998054bd 136#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
137# define REPORT(x,retval) tokereport(x,s,(int)retval),
138# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 139#else
075953c3
JH
140# define REPORT(x,retval)
141# define REPORT2(x,retval)
998054bd
SC
142#endif
143
075953c3
JH
144#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
145#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
146#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
147#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
148#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
149#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
150#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
151#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
152#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
153#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
154#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
155#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
156#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
157#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
158#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
159#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
160#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
161#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
162#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
163#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
167 */
2f3197b3 168#define UNI(f) return(yylval.ival = f, \
075953c3 169 REPORT("uni",f) \
3280af22
NIS
170 PL_expect = XTERM, \
171 PL_bufptr = s, \
172 PL_last_uni = PL_oldbufptr, \
173 PL_last_lop_op = f, \
a687059c
LW
174 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
175
79072805 176#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 177 REPORT("uni",f) \
3280af22
NIS
178 PL_bufptr = s, \
179 PL_last_uni = PL_oldbufptr, \
79072805
LW
180 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
181
9f68db38 182/* grandfather return to old style */
3280af22 183#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 184
8fa7f367
JH
185#ifdef DEBUGGING
186
2d00ba3b 187STATIC void
61b2116b 188S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 189{
998054bd 190 DEBUG_T({
9c5ffd7c 191 SV* report = newSVpv(thing, 0);
29b291f7
RB
192 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
193 (IV)rv);
998054bd
SC
194
195 if (s - PL_bufptr > 0)
196 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
197 else {
198 if (PL_oldbufptr && *PL_oldbufptr)
199 sv_catpv(report, PL_tokenbuf);
200 }
201 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
5f80b19c 202 });
998054bd
SC
203}
204
8fa7f367
JH
205#endif
206
ffb4593c
NT
207/*
208 * S_ao
209 *
c963b151
BD
210 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
211 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
212 */
213
76e3520e 214STATIC int
cea2e8a9 215S_ao(pTHX_ int toketype)
a0d0e21e 216{
3280af22
NIS
217 if (*PL_bufptr == '=') {
218 PL_bufptr++;
a0d0e21e
LW
219 if (toketype == ANDAND)
220 yylval.ival = OP_ANDASSIGN;
221 else if (toketype == OROR)
222 yylval.ival = OP_ORASSIGN;
c963b151
BD
223 else if (toketype == DORDOR)
224 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
225 toketype = ASSIGNOP;
226 }
227 return toketype;
228}
229
ffb4593c
NT
230/*
231 * S_no_op
232 * When Perl expects an operator and finds something else, no_op
233 * prints the warning. It always prints "<something> found where
234 * operator expected. It prints "Missing semicolon on previous line?"
235 * if the surprise occurs at the start of the line. "do you need to
236 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
237 * where the compiler doesn't know if foo is a method call or a function.
238 * It prints "Missing operator before end of line" if there's nothing
239 * after the missing operator, or "... before <...>" if there is something
240 * after the missing operator.
241 */
242
76e3520e 243STATIC void
cea2e8a9 244S_no_op(pTHX_ char *what, char *s)
463ee0b2 245{
3280af22
NIS
246 char *oldbp = PL_bufptr;
247 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 248
1189a94a
GS
249 if (!s)
250 s = oldbp;
07c798fb 251 else
1189a94a 252 PL_bufptr = s;
cea2e8a9 253 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 254 if (is_first)
cea2e8a9 255 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 256 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 257 char *t;
7e2040f0 258 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 259 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 260 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 261 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 262 }
07c798fb
HS
263 else {
264 assert(s >= oldbp);
cea2e8a9 265 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 266 }
3280af22 267 PL_bufptr = oldbp;
8990e307
LW
268}
269
ffb4593c
NT
270/*
271 * S_missingterm
272 * Complain about missing quote/regexp/heredoc terminator.
273 * If it's called with (char *)NULL then it cauterizes the line buffer.
274 * If we're in a delimited string and the delimiter is a control
275 * character, it's reformatted into a two-char sequence like ^C.
276 * This is fatal.
277 */
278
76e3520e 279STATIC void
cea2e8a9 280S_missingterm(pTHX_ char *s)
8990e307
LW
281{
282 char tmpbuf[3];
283 char q;
284 if (s) {
285 char *nl = strrchr(s,'\n');
d2719217 286 if (nl)
8990e307
LW
287 *nl = '\0';
288 }
9d116dd7
JH
289 else if (
290#ifdef EBCDIC
291 iscntrl(PL_multi_close)
292#else
293 PL_multi_close < 32 || PL_multi_close == 127
294#endif
295 ) {
8990e307 296 *tmpbuf = '^';
3280af22 297 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
298 s = "\\n";
299 tmpbuf[2] = '\0';
300 s = tmpbuf;
301 }
302 else {
eb160463 303 *tmpbuf = (char)PL_multi_close;
8990e307
LW
304 tmpbuf[1] = '\0';
305 s = tmpbuf;
306 }
307 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 308 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 309}
79072805 310
ffb4593c
NT
311/*
312 * Perl_deprecate
ffb4593c
NT
313 */
314
79072805 315void
864dbfa3 316Perl_deprecate(pTHX_ char *s)
a0d0e21e 317{
599cee73 318 if (ckWARN(WARN_DEPRECATED))
9014280d 319 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
320}
321
12bcd1a6
PM
322void
323Perl_deprecate_old(pTHX_ char *s)
324{
325 /* This function should NOT be called for any new deprecated warnings */
326 /* Use Perl_deprecate instead */
327 /* */
328 /* It is here to maintain backward compatibility with the pre-5.8 */
329 /* warnings category hierarchy. The "deprecated" category used to */
330 /* live under the "syntax" category. It is now a top-level category */
331 /* in its own right. */
332
333 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
334 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
335 "Use of %s is deprecated", s);
336}
337
ffb4593c
NT
338/*
339 * depcom
9cbb5ea2 340 * Deprecate a comma-less variable list.
ffb4593c
NT
341 */
342
76e3520e 343STATIC void
cea2e8a9 344S_depcom(pTHX)
a0d0e21e 345{
12bcd1a6 346 deprecate_old("comma-less variable list");
a0d0e21e
LW
347}
348
ffb4593c 349/*
9cbb5ea2
GS
350 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
351 * utf16-to-utf8-reversed.
ffb4593c
NT
352 */
353
c39cd008
GS
354#ifdef PERL_CR_FILTER
355static void
356strip_return(SV *sv)
357{
358 register char *s = SvPVX(sv);
359 register char *e = s + SvCUR(sv);
360 /* outer loop optimized to do nothing if there are no CR-LFs */
361 while (s < e) {
362 if (*s++ == '\r' && *s == '\n') {
363 /* hit a CR-LF, need to copy the rest */
364 register char *d = s - 1;
365 *d++ = *s++;
366 while (s < e) {
367 if (*s == '\r' && s[1] == '\n')
368 s++;
369 *d++ = *s++;
370 }
371 SvCUR(sv) -= s - d;
372 return;
373 }
374 }
375}
a868473f 376
76e3520e 377STATIC I32
c39cd008 378S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 379{
c39cd008
GS
380 I32 count = FILTER_READ(idx+1, sv, maxlen);
381 if (count > 0 && !maxlen)
382 strip_return(sv);
383 return count;
a868473f
NIS
384}
385#endif
386
ffb4593c
NT
387/*
388 * Perl_lex_start
9cbb5ea2
GS
389 * Initialize variables. Uses the Perl save_stack to save its state (for
390 * recursive calls to the parser).
ffb4593c
NT
391 */
392
a0d0e21e 393void
864dbfa3 394Perl_lex_start(pTHX_ SV *line)
79072805 395{
8990e307
LW
396 char *s;
397 STRLEN len;
398
3280af22
NIS
399 SAVEI32(PL_lex_dojoin);
400 SAVEI32(PL_lex_brackets);
3280af22
NIS
401 SAVEI32(PL_lex_casemods);
402 SAVEI32(PL_lex_starts);
403 SAVEI32(PL_lex_state);
7766f137 404 SAVEVPTR(PL_lex_inpat);
3280af22 405 SAVEI32(PL_lex_inwhat);
18b09519
GS
406 if (PL_lex_state == LEX_KNOWNEXT) {
407 I32 toke = PL_nexttoke;
408 while (--toke >= 0) {
409 SAVEI32(PL_nexttype[toke]);
410 SAVEVPTR(PL_nextval[toke]);
411 }
412 SAVEI32(PL_nexttoke);
18b09519 413 }
57843af0 414 SAVECOPLINE(PL_curcop);
3280af22
NIS
415 SAVEPPTR(PL_bufptr);
416 SAVEPPTR(PL_bufend);
417 SAVEPPTR(PL_oldbufptr);
418 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
419 SAVEPPTR(PL_last_lop);
420 SAVEPPTR(PL_last_uni);
3280af22
NIS
421 SAVEPPTR(PL_linestart);
422 SAVESPTR(PL_linestr);
423 SAVEPPTR(PL_lex_brackstack);
424 SAVEPPTR(PL_lex_casestack);
c76ac1ee 425 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
426 SAVESPTR(PL_lex_stuff);
427 SAVEI32(PL_lex_defer);
09bef843 428 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 429 SAVESPTR(PL_lex_repl);
bebdddfc
GS
430 SAVEINT(PL_expect);
431 SAVEINT(PL_lex_expect);
3280af22
NIS
432
433 PL_lex_state = LEX_NORMAL;
434 PL_lex_defer = 0;
435 PL_expect = XSTATE;
436 PL_lex_brackets = 0;
3280af22
NIS
437 New(899, PL_lex_brackstack, 120, char);
438 New(899, PL_lex_casestack, 12, char);
439 SAVEFREEPV(PL_lex_brackstack);
440 SAVEFREEPV(PL_lex_casestack);
441 PL_lex_casemods = 0;
442 *PL_lex_casestack = '\0';
443 PL_lex_dojoin = 0;
444 PL_lex_starts = 0;
445 PL_lex_stuff = Nullsv;
446 PL_lex_repl = Nullsv;
447 PL_lex_inpat = 0;
76be56bc 448 PL_nexttoke = 0;
3280af22 449 PL_lex_inwhat = 0;
09bef843 450 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
451 PL_linestr = line;
452 if (SvREADONLY(PL_linestr))
453 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
454 s = SvPV(PL_linestr, len);
6f27f9a7 455 if (!len || s[len-1] != ';') {
3280af22
NIS
456 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
457 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
458 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 459 }
3280af22
NIS
460 SvTEMP_off(PL_linestr);
461 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
462 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 463 PL_last_lop = PL_last_uni = Nullch;
3280af22 464 PL_rsfp = 0;
79072805 465}
a687059c 466
ffb4593c
NT
467/*
468 * Perl_lex_end
9cbb5ea2
GS
469 * Finalizer for lexing operations. Must be called when the parser is
470 * done with the lexer.
ffb4593c
NT
471 */
472
463ee0b2 473void
864dbfa3 474Perl_lex_end(pTHX)
463ee0b2 475{
3280af22 476 PL_doextract = FALSE;
463ee0b2
LW
477}
478
ffb4593c
NT
479/*
480 * S_incline
481 * This subroutine has nothing to do with tilting, whether at windmills
482 * or pinball tables. Its name is short for "increment line". It
57843af0 483 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 484 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
485 * # line 500 "foo.pm"
486 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
487 */
488
76e3520e 489STATIC void
cea2e8a9 490S_incline(pTHX_ char *s)
463ee0b2
LW
491{
492 char *t;
493 char *n;
73659bf1 494 char *e;
463ee0b2 495 char ch;
463ee0b2 496
57843af0 497 CopLINE_inc(PL_curcop);
463ee0b2
LW
498 if (*s++ != '#')
499 return;
bf4acbe4 500 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
501 if (strnEQ(s, "line", 4))
502 s += 4;
503 else
504 return;
084592ab 505 if (SPACE_OR_TAB(*s))
73659bf1 506 s++;
4e553d73 507 else
73659bf1 508 return;
bf4acbe4 509 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
510 if (!isDIGIT(*s))
511 return;
512 n = s;
513 while (isDIGIT(*s))
514 s++;
bf4acbe4 515 while (SPACE_OR_TAB(*s))
463ee0b2 516 s++;
73659bf1 517 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 518 s++;
73659bf1
GS
519 e = t + 1;
520 }
463ee0b2 521 else {
463ee0b2 522 for (t = s; !isSPACE(*t); t++) ;
73659bf1 523 e = t;
463ee0b2 524 }
bf4acbe4 525 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
526 e++;
527 if (*e != '\n' && *e != '\0')
528 return; /* false alarm */
529
463ee0b2
LW
530 ch = *t;
531 *t = '\0';
f4dd75d9 532 if (t - s > 0) {
05ec9bb3 533 CopFILE_free(PL_curcop);
57843af0 534 CopFILE_set(PL_curcop, s);
f4dd75d9 535 }
463ee0b2 536 *t = ch;
57843af0 537 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
538}
539
ffb4593c
NT
540/*
541 * S_skipspace
542 * Called to gobble the appropriate amount and type of whitespace.
543 * Skips comments as well.
544 */
545
76e3520e 546STATIC char *
cea2e8a9 547S_skipspace(pTHX_ register char *s)
a687059c 548{
3280af22 549 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 550 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
551 s++;
552 return s;
553 }
554 for (;;) {
fd049845 555 STRLEN prevlen;
09bef843 556 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 557 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
558 while (s < PL_bufend && isSPACE(*s)) {
559 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
560 incline(s);
561 }
ffb4593c
NT
562
563 /* comment */
3280af22
NIS
564 if (s < PL_bufend && *s == '#') {
565 while (s < PL_bufend && *s != '\n')
463ee0b2 566 s++;
60e6418e 567 if (s < PL_bufend) {
463ee0b2 568 s++;
60e6418e
GS
569 if (PL_in_eval && !PL_rsfp) {
570 incline(s);
571 continue;
572 }
573 }
463ee0b2 574 }
ffb4593c
NT
575
576 /* only continue to recharge the buffer if we're at the end
577 * of the buffer, we're not reading from a source filter, and
578 * we're in normal lexing mode
579 */
09bef843
SB
580 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
581 PL_lex_state == LEX_FORMLINE)
463ee0b2 582 return s;
ffb4593c
NT
583
584 /* try to recharge the buffer */
9cbb5ea2
GS
585 if ((s = filter_gets(PL_linestr, PL_rsfp,
586 (prevlen = SvCUR(PL_linestr)))) == Nullch)
587 {
588 /* end of file. Add on the -p or -n magic */
3280af22
NIS
589 if (PL_minus_n || PL_minus_p) {
590 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
591 ";}continue{print or die qq(-p destination: $!\\n)" :
592 "");
3280af22
NIS
593 sv_catpv(PL_linestr,";}");
594 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
595 }
596 else
3280af22 597 sv_setpv(PL_linestr,";");
ffb4593c
NT
598
599 /* reset variables for next time we lex */
9cbb5ea2
GS
600 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
601 = SvPVX(PL_linestr);
3280af22 602 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 603 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
604
605 /* Close the filehandle. Could be from -P preprocessor,
606 * STDIN, or a regular file. If we were reading code from
607 * STDIN (because the commandline held no -e or filename)
608 * then we don't close it, we reset it so the code can
609 * read from STDIN too.
610 */
611
3280af22
NIS
612 if (PL_preprocess && !PL_in_eval)
613 (void)PerlProc_pclose(PL_rsfp);
614 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
615 PerlIO_clearerr(PL_rsfp);
8990e307 616 else
3280af22
NIS
617 (void)PerlIO_close(PL_rsfp);
618 PL_rsfp = Nullfp;
463ee0b2
LW
619 return s;
620 }
ffb4593c
NT
621
622 /* not at end of file, so we only read another line */
09bef843
SB
623 /* make corresponding updates to old pointers, for yyerror() */
624 oldprevlen = PL_oldbufptr - PL_bufend;
625 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
626 if (PL_last_uni)
627 oldunilen = PL_last_uni - PL_bufend;
628 if (PL_last_lop)
629 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
630 PL_linestart = PL_bufptr = s + prevlen;
631 PL_bufend = s + SvCUR(PL_linestr);
632 s = PL_bufptr;
09bef843
SB
633 PL_oldbufptr = s + oldprevlen;
634 PL_oldoldbufptr = s + oldoldprevlen;
635 if (PL_last_uni)
636 PL_last_uni = s + oldunilen;
637 if (PL_last_lop)
638 PL_last_lop = s + oldloplen;
a0d0e21e 639 incline(s);
ffb4593c
NT
640
641 /* debugger active and we're not compiling the debugger code,
642 * so store the line into the debugger's array of lines
643 */
3280af22 644 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
645 SV *sv = NEWSV(85,0);
646
647 sv_upgrade(sv, SVt_PVMG);
3280af22 648 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
649 (void)SvIOK_on(sv);
650 SvIVX(sv) = 0;
57843af0 651 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 652 }
463ee0b2 653 }
a687059c 654}
378cc40b 655
ffb4593c
NT
656/*
657 * S_check_uni
658 * Check the unary operators to ensure there's no ambiguity in how they're
659 * used. An ambiguous piece of code would be:
660 * rand + 5
661 * This doesn't mean rand() + 5. Because rand() is a unary operator,
662 * the +5 is its argument.
663 */
664
76e3520e 665STATIC void
cea2e8a9 666S_check_uni(pTHX)
ba106d47 667{
2f3197b3 668 char *s;
a0d0e21e 669 char *t;
2f3197b3 670
3280af22 671 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 672 return;
3280af22
NIS
673 while (isSPACE(*PL_last_uni))
674 PL_last_uni++;
7e2040f0 675 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 676 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 677 return;
0453d815 678 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 679 char ch = *s;
0453d815 680 *s = '\0';
9014280d 681 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4e553d73 682 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
683 PL_last_uni);
684 *s = ch;
685 }
2f3197b3
LW
686}
687
ffb4593c
NT
688/*
689 * LOP : macro to build a list operator. Its behaviour has been replaced
690 * with a subroutine, S_lop() for which LOP is just another name.
691 */
692
a0d0e21e
LW
693#define LOP(f,x) return lop(f,x,s)
694
ffb4593c
NT
695/*
696 * S_lop
697 * Build a list operator (or something that might be one). The rules:
698 * - if we have a next token, then it's a list operator [why?]
699 * - if the next thing is an opening paren, then it's a function
700 * - else it's a list operator
701 */
702
76e3520e 703STATIC I32
a0be28da 704S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 705{
79072805 706 yylval.ival = f;
35c8bce7 707 CLINE;
075953c3 708 REPORT("lop", f)
3280af22
NIS
709 PL_expect = x;
710 PL_bufptr = s;
711 PL_last_lop = PL_oldbufptr;
eb160463 712 PL_last_lop_op = (OPCODE)f;
3280af22 713 if (PL_nexttoke)
a0d0e21e 714 return LSTOP;
79072805
LW
715 if (*s == '(')
716 return FUNC;
717 s = skipspace(s);
718 if (*s == '(')
719 return FUNC;
720 else
721 return LSTOP;
722}
723
ffb4593c
NT
724/*
725 * S_force_next
9cbb5ea2 726 * When the lexer realizes it knows the next token (for instance,
ffb4593c 727 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
728 * to know what token to return the next time the lexer is called. Caller
729 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
730 * handles the token correctly.
ffb4593c
NT
731 */
732
4e553d73 733STATIC void
cea2e8a9 734S_force_next(pTHX_ I32 type)
79072805 735{
3280af22
NIS
736 PL_nexttype[PL_nexttoke] = type;
737 PL_nexttoke++;
738 if (PL_lex_state != LEX_KNOWNEXT) {
739 PL_lex_defer = PL_lex_state;
740 PL_lex_expect = PL_expect;
741 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
742 }
743}
744
ffb4593c
NT
745/*
746 * S_force_word
747 * When the lexer knows the next thing is a word (for instance, it has
748 * just seen -> and it knows that the next char is a word char, then
749 * it calls S_force_word to stick the next word into the PL_next lookahead.
750 *
751 * Arguments:
b1b65b59 752 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
753 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
754 * int check_keyword : if true, Perl checks to make sure the word isn't
755 * a keyword (do this if the word is a label, e.g. goto FOO)
756 * int allow_pack : if true, : characters will also be allowed (require,
757 * use, etc. do this)
9cbb5ea2 758 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
759 */
760
76e3520e 761STATIC char *
cea2e8a9 762S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 763{
463ee0b2
LW
764 register char *s;
765 STRLEN len;
4e553d73 766
463ee0b2
LW
767 start = skipspace(start);
768 s = start;
7e2040f0 769 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 770 (allow_pack && *s == ':') ||
15f0808c 771 (allow_initial_tick && *s == '\'') )
a0d0e21e 772 {
3280af22
NIS
773 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
774 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
775 return start;
776 if (token == METHOD) {
777 s = skipspace(s);
778 if (*s == '(')
3280af22 779 PL_expect = XTERM;
463ee0b2 780 else {
3280af22 781 PL_expect = XOPERATOR;
463ee0b2 782 }
79072805 783 }
3280af22
NIS
784 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
785 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
786 force_next(token);
787 }
788 return s;
789}
790
ffb4593c
NT
791/*
792 * S_force_ident
9cbb5ea2 793 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
794 * text only contains the "foo" portion. The first argument is a pointer
795 * to the "foo", and the second argument is the type symbol to prefix.
796 * Forces the next token to be a "WORD".
9cbb5ea2 797 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
798 */
799
76e3520e 800STATIC void
cea2e8a9 801S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
802{
803 if (s && *s) {
11343788 804 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 805 PL_nextval[PL_nexttoke].opval = o;
79072805 806 force_next(WORD);
748a9306 807 if (kind) {
11343788 808 o->op_private = OPpCONST_ENTERED;
55497cff 809 /* XXX see note in pp_entereval() for why we forgo typo
810 warnings if the symbol must be introduced in an eval.
811 GSAR 96-10-12 */
3280af22 812 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
813 kind == '$' ? SVt_PV :
814 kind == '@' ? SVt_PVAV :
815 kind == '%' ? SVt_PVHV :
816 SVt_PVGV
817 );
748a9306 818 }
79072805
LW
819 }
820}
821
1571675a
GS
822NV
823Perl_str_to_version(pTHX_ SV *sv)
824{
825 NV retval = 0.0;
826 NV nshift = 1.0;
827 STRLEN len;
828 char *start = SvPVx(sv,len);
3aa33fe5 829 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
830 char *end = start + len;
831 while (start < end) {
ba210ebe 832 STRLEN skip;
1571675a
GS
833 UV n;
834 if (utf)
9041c2e3 835 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
836 else {
837 n = *(U8*)start;
838 skip = 1;
839 }
840 retval += ((NV)n)/nshift;
841 start += skip;
842 nshift *= 1000;
843 }
844 return retval;
845}
846
4e553d73 847/*
ffb4593c
NT
848 * S_force_version
849 * Forces the next token to be a version number.
e759cc13
RGS
850 * If the next token appears to be an invalid version number, (e.g. "v2b"),
851 * and if "guessing" is TRUE, then no new token is created (and the caller
852 * must use an alternative parsing method).
ffb4593c
NT
853 */
854
76e3520e 855STATIC char *
e759cc13 856S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 857{
858 OP *version = Nullop;
44dcb63b 859 char *d;
89bfa8cd 860
861 s = skipspace(s);
862
44dcb63b 863 d = s;
dd629d5b 864 if (*d == 'v')
44dcb63b 865 d++;
44dcb63b 866 if (isDIGIT(*d)) {
e759cc13
RGS
867 while (isDIGIT(*d) || *d == '_' || *d == '.')
868 d++;
9f3d182e 869 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 870 SV *ver;
b73d6f50 871 s = scan_num(s, &yylval);
89bfa8cd 872 version = yylval.opval;
dd629d5b
GS
873 ver = cSVOPx(version)->op_sv;
874 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 875 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
876 SvNVX(ver) = str_to_version(ver);
877 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 878 }
89bfa8cd 879 }
e759cc13
RGS
880 else if (guessing)
881 return s;
89bfa8cd 882 }
883
884 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 885 PL_nextval[PL_nexttoke].opval = version;
4e553d73 886 force_next(WORD);
89bfa8cd 887
e759cc13 888 return s;
89bfa8cd 889}
890
ffb4593c
NT
891/*
892 * S_tokeq
893 * Tokenize a quoted string passed in as an SV. It finds the next
894 * chunk, up to end of string or a backslash. It may make a new
895 * SV containing that chunk (if HINT_NEW_STRING is on). It also
896 * turns \\ into \.
897 */
898
76e3520e 899STATIC SV *
cea2e8a9 900S_tokeq(pTHX_ SV *sv)
79072805
LW
901{
902 register char *s;
903 register char *send;
904 register char *d;
b3ac6de7
IZ
905 STRLEN len = 0;
906 SV *pv = sv;
79072805
LW
907
908 if (!SvLEN(sv))
b3ac6de7 909 goto finish;
79072805 910
a0d0e21e 911 s = SvPV_force(sv, len);
21a311ee 912 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 913 goto finish;
463ee0b2 914 send = s + len;
79072805
LW
915 while (s < send && *s != '\\')
916 s++;
917 if (s == send)
b3ac6de7 918 goto finish;
79072805 919 d = s;
be4731d2 920 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 921 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
922 if (SvUTF8(sv))
923 SvUTF8_on(pv);
924 }
79072805
LW
925 while (s < send) {
926 if (*s == '\\') {
a0d0e21e 927 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
928 s++; /* all that, just for this */
929 }
930 *d++ = *s++;
931 }
932 *d = '\0';
463ee0b2 933 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 934 finish:
3280af22 935 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 936 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
937 return sv;
938}
939
ffb4593c
NT
940/*
941 * Now come three functions related to double-quote context,
942 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
943 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
944 * interact with PL_lex_state, and create fake ( ... ) argument lists
945 * to handle functions and concatenation.
946 * They assume that whoever calls them will be setting up a fake
947 * join call, because each subthing puts a ',' after it. This lets
948 * "lower \luPpEr"
949 * become
950 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
951 *
952 * (I'm not sure whether the spurious commas at the end of lcfirst's
953 * arguments and join's arguments are created or not).
954 */
955
956/*
957 * S_sublex_start
958 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
959 *
960 * Pattern matching will set PL_lex_op to the pattern-matching op to
961 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
962 *
963 * OP_CONST and OP_READLINE are easy--just make the new op and return.
964 *
965 * Everything else becomes a FUNC.
966 *
967 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
968 * had an OP_CONST or OP_READLINE). This just sets us up for a
969 * call to S_sublex_push().
970 */
971
76e3520e 972STATIC I32
cea2e8a9 973S_sublex_start(pTHX)
79072805
LW
974{
975 register I32 op_type = yylval.ival;
79072805
LW
976
977 if (op_type == OP_NULL) {
3280af22
NIS
978 yylval.opval = PL_lex_op;
979 PL_lex_op = Nullop;
79072805
LW
980 return THING;
981 }
982 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 983 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
984
985 if (SvTYPE(sv) == SVt_PVIV) {
986 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
987 STRLEN len;
988 char *p;
989 SV *nsv;
990
991 p = SvPV(sv, len);
79cb57f6 992 nsv = newSVpvn(p, len);
01ec43d0
GS
993 if (SvUTF8(sv))
994 SvUTF8_on(nsv);
b3ac6de7
IZ
995 SvREFCNT_dec(sv);
996 sv = nsv;
4e553d73 997 }
b3ac6de7 998 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 999 PL_lex_stuff = Nullsv;
79072805
LW
1000 return THING;
1001 }
1002
3280af22
NIS
1003 PL_sublex_info.super_state = PL_lex_state;
1004 PL_sublex_info.sub_inwhat = op_type;
1005 PL_sublex_info.sub_op = PL_lex_op;
1006 PL_lex_state = LEX_INTERPPUSH;
55497cff 1007
3280af22
NIS
1008 PL_expect = XTERM;
1009 if (PL_lex_op) {
1010 yylval.opval = PL_lex_op;
1011 PL_lex_op = Nullop;
55497cff 1012 return PMFUNC;
1013 }
1014 else
1015 return FUNC;
1016}
1017
ffb4593c
NT
1018/*
1019 * S_sublex_push
1020 * Create a new scope to save the lexing state. The scope will be
1021 * ended in S_sublex_done. Returns a '(', starting the function arguments
1022 * to the uc, lc, etc. found before.
1023 * Sets PL_lex_state to LEX_INTERPCONCAT.
1024 */
1025
76e3520e 1026STATIC I32
cea2e8a9 1027S_sublex_push(pTHX)
55497cff 1028{
f46d017c 1029 ENTER;
55497cff 1030
3280af22
NIS
1031 PL_lex_state = PL_sublex_info.super_state;
1032 SAVEI32(PL_lex_dojoin);
1033 SAVEI32(PL_lex_brackets);
3280af22
NIS
1034 SAVEI32(PL_lex_casemods);
1035 SAVEI32(PL_lex_starts);
1036 SAVEI32(PL_lex_state);
7766f137 1037 SAVEVPTR(PL_lex_inpat);
3280af22 1038 SAVEI32(PL_lex_inwhat);
57843af0 1039 SAVECOPLINE(PL_curcop);
3280af22 1040 SAVEPPTR(PL_bufptr);
8452ff4b 1041 SAVEPPTR(PL_bufend);
3280af22
NIS
1042 SAVEPPTR(PL_oldbufptr);
1043 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1044 SAVEPPTR(PL_last_lop);
1045 SAVEPPTR(PL_last_uni);
3280af22
NIS
1046 SAVEPPTR(PL_linestart);
1047 SAVESPTR(PL_linestr);
1048 SAVEPPTR(PL_lex_brackstack);
1049 SAVEPPTR(PL_lex_casestack);
1050
1051 PL_linestr = PL_lex_stuff;
1052 PL_lex_stuff = Nullsv;
1053
9cbb5ea2
GS
1054 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1055 = SvPVX(PL_linestr);
3280af22 1056 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1057 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1058 SAVEFREESV(PL_linestr);
1059
1060 PL_lex_dojoin = FALSE;
1061 PL_lex_brackets = 0;
3280af22
NIS
1062 New(899, PL_lex_brackstack, 120, char);
1063 New(899, PL_lex_casestack, 12, char);
1064 SAVEFREEPV(PL_lex_brackstack);
1065 SAVEFREEPV(PL_lex_casestack);
1066 PL_lex_casemods = 0;
1067 *PL_lex_casestack = '\0';
1068 PL_lex_starts = 0;
1069 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1070 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1071
1072 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1073 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1074 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1075 else
3280af22 1076 PL_lex_inpat = Nullop;
79072805 1077
55497cff 1078 return '(';
79072805
LW
1079}
1080
ffb4593c
NT
1081/*
1082 * S_sublex_done
1083 * Restores lexer state after a S_sublex_push.
1084 */
1085
76e3520e 1086STATIC I32
cea2e8a9 1087S_sublex_done(pTHX)
79072805 1088{
3280af22 1089 if (!PL_lex_starts++) {
9aa983d2
JH
1090 SV *sv = newSVpvn("",0);
1091 if (SvUTF8(PL_linestr))
1092 SvUTF8_on(sv);
3280af22 1093 PL_expect = XOPERATOR;
9aa983d2 1094 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1095 return THING;
1096 }
1097
3280af22
NIS
1098 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1099 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1100 return yylex();
79072805
LW
1101 }
1102
ffb4593c 1103 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1104 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1105 PL_linestr = PL_lex_repl;
1106 PL_lex_inpat = 0;
1107 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1108 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1109 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1110 SAVEFREESV(PL_linestr);
1111 PL_lex_dojoin = FALSE;
1112 PL_lex_brackets = 0;
3280af22
NIS
1113 PL_lex_casemods = 0;
1114 *PL_lex_casestack = '\0';
1115 PL_lex_starts = 0;
25da4f38 1116 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1117 PL_lex_state = LEX_INTERPNORMAL;
1118 PL_lex_starts++;
e9fa98b2
HS
1119 /* we don't clear PL_lex_repl here, so that we can check later
1120 whether this is an evalled subst; that means we rely on the
1121 logic to ensure sublex_done() is called again only via the
1122 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1123 }
e9fa98b2 1124 else {
3280af22 1125 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1126 PL_lex_repl = Nullsv;
1127 }
79072805 1128 return ',';
ffed7fef
LW
1129 }
1130 else {
f46d017c 1131 LEAVE;
3280af22
NIS
1132 PL_bufend = SvPVX(PL_linestr);
1133 PL_bufend += SvCUR(PL_linestr);
1134 PL_expect = XOPERATOR;
09bef843 1135 PL_sublex_info.sub_inwhat = 0;
79072805 1136 return ')';
ffed7fef
LW
1137 }
1138}
1139
02aa26ce
NT
1140/*
1141 scan_const
1142
1143 Extracts a pattern, double-quoted string, or transliteration. This
1144 is terrifying code.
1145
3280af22
NIS
1146 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1147 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1148 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1149
9b599b2a
GS
1150 Returns a pointer to the character scanned up to. Iff this is
1151 advanced from the start pointer supplied (ie if anything was
1152 successfully parsed), will leave an OP for the substring scanned
1153 in yylval. Caller must intuit reason for not parsing further
1154 by looking at the next characters herself.
1155
02aa26ce
NT
1156 In patterns:
1157 backslashes:
1158 double-quoted style: \r and \n
1159 regexp special ones: \D \s
1160 constants: \x3
1161 backrefs: \1 (deprecated in substitution replacements)
1162 case and quoting: \U \Q \E
1163 stops on @ and $, but not for $ as tail anchor
1164
1165 In transliterations:
1166 characters are VERY literal, except for - not at the start or end
1167 of the string, which indicates a range. scan_const expands the
1168 range to the full set of intermediate characters.
1169
1170 In double-quoted strings:
1171 backslashes:
1172 double-quoted style: \r and \n
1173 constants: \x3
1174 backrefs: \1 (deprecated)
1175 case and quoting: \U \Q \E
1176 stops on @ and $
1177
1178 scan_const does *not* construct ops to handle interpolated strings.
1179 It stops processing as soon as it finds an embedded $ or @ variable
1180 and leaves it to the caller to work out what's going on.
1181
da6eedaa 1182 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1183
1184 $ in pattern could be $foo or could be tail anchor. Assumption:
1185 it's a tail anchor if $ is the last thing in the string, or if it's
1186 followed by one of ")| \n\t"
1187
1188 \1 (backreferences) are turned into $1
1189
1190 The structure of the code is
1191 while (there's a character to process) {
1192 handle transliteration ranges
1193 skip regexp comments
1194 skip # initiated comments in //x patterns
1195 check for embedded @foo
1196 check for embedded scalars
1197 if (backslash) {
1198 leave intact backslashes from leave (below)
1199 deprecate \1 in strings and sub replacements
1200 handle string-changing backslashes \l \U \Q \E, etc.
1201 switch (what was escaped) {
1202 handle - in a transliteration (becomes a literal -)
1203 handle \132 octal characters
1204 handle 0x15 hex characters
1205 handle \cV (control V)
1206 handle printf backslashes (\f, \r, \n, etc)
1207 } (end switch)
1208 } (end if backslash)
1209 } (end while character to read)
4e553d73 1210
02aa26ce
NT
1211*/
1212
76e3520e 1213STATIC char *
cea2e8a9 1214S_scan_const(pTHX_ char *start)
79072805 1215{
3280af22 1216 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1217 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1218 register char *s = start; /* start of the constant */
1219 register char *d = SvPVX(sv); /* destination for copies */
1220 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1221 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1222 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1223 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1224 UV uv;
1225
dff6d3cd 1226 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1227 PL_lex_inpat
4a2d328f 1228 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1229 : "";
79072805 1230
2b9d42f0
NIS
1231 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1232 /* If we are doing a trans and we know we want UTF8 set expectation */
1233 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1234 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1235 }
1236
1237
79072805 1238 while (s < send || dorange) {
02aa26ce 1239 /* get transliterations out of the way (they're most literal) */
3280af22 1240 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1241 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1242 if (dorange) {
1ba5c669
JH
1243 I32 i; /* current expanded character */
1244 I32 min; /* first character in range */
1245 I32 max; /* last character in range */
02aa26ce 1246
2b9d42f0 1247 if (has_utf8) {
8973db79
JH
1248 char *c = (char*)utf8_hop((U8*)d, -1);
1249 char *e = d++;
1250 while (e-- > c)
1251 *(e + 1) = *e;
25716404 1252 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1253 /* mark the range as done, and continue */
1254 dorange = FALSE;
1255 didrange = TRUE;
1256 continue;
1257 }
2b9d42f0 1258
02aa26ce 1259 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1260 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1261 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1262 d -= 2; /* eat the first char and the - */
1263
8ada0baa
JH
1264 min = (U8)*d; /* first char in range */
1265 max = (U8)d[1]; /* last char in range */
1266
c2e66d9e 1267 if (min > max) {
01ec43d0 1268 Perl_croak(aTHX_
1ba5c669
JH
1269 "Invalid [] range \"%c-%c\" in transliteration operator",
1270 (char)min, (char)max);
c2e66d9e
GS
1271 }
1272
c7f1f016 1273#ifdef EBCDIC
8ada0baa
JH
1274 if ((isLOWER(min) && isLOWER(max)) ||
1275 (isUPPER(min) && isUPPER(max))) {
1276 if (isLOWER(min)) {
1277 for (i = min; i <= max; i++)
1278 if (isLOWER(i))
db42d148 1279 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1280 } else {
1281 for (i = min; i <= max; i++)
1282 if (isUPPER(i))
db42d148 1283 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1284 }
1285 }
1286 else
1287#endif
1288 for (i = min; i <= max; i++)
eb160463 1289 *d++ = (char)i;
02aa26ce
NT
1290
1291 /* mark the range as done, and continue */
79072805 1292 dorange = FALSE;
01ec43d0 1293 didrange = TRUE;
79072805 1294 continue;
4e553d73 1295 }
02aa26ce
NT
1296
1297 /* range begins (ignore - as first or last char) */
79072805 1298 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1299 if (didrange) {
1fafa243 1300 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1301 }
2b9d42f0 1302 if (has_utf8) {
25716404 1303 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1304 s++;
1305 continue;
1306 }
79072805
LW
1307 dorange = TRUE;
1308 s++;
01ec43d0
GS
1309 }
1310 else {
1311 didrange = FALSE;
1312 }
79072805 1313 }
02aa26ce
NT
1314
1315 /* if we get here, we're not doing a transliteration */
1316
0f5d15d6
IZ
1317 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1318 except for the last char, which will be done separately. */
3280af22 1319 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1320 if (s[2] == '#') {
1321 while (s < send && *s != ')')
db42d148 1322 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1323 }
1324 else if (s[2] == '{' /* This should match regcomp.c */
1325 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1326 {
cc6b7395 1327 I32 count = 1;
0f5d15d6 1328 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1329 char c;
1330
d9f97599
GS
1331 while (count && (c = *regparse)) {
1332 if (c == '\\' && regparse[1])
1333 regparse++;
4e553d73 1334 else if (c == '{')
cc6b7395 1335 count++;
4e553d73 1336 else if (c == '}')
cc6b7395 1337 count--;
d9f97599 1338 regparse++;
cc6b7395 1339 }
5bdf89e7
IZ
1340 if (*regparse != ')') {
1341 regparse--; /* Leave one char for continuation. */
cc6b7395 1342 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1343 }
0f5d15d6 1344 while (s < regparse)
db42d148 1345 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1346 }
748a9306 1347 }
02aa26ce
NT
1348
1349 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1350 else if (*s == '#' && PL_lex_inpat &&
1351 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1352 while (s+1 < send && *s != '\n')
db42d148 1353 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1354 }
02aa26ce 1355
5d1d4326 1356 /* check for embedded arrays
da6eedaa 1357 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1358 */
7e2040f0 1359 else if (*s == '@' && s[1]
5d1d4326 1360 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1361 break;
02aa26ce
NT
1362
1363 /* check for embedded scalars. only stop if we're sure it's a
1364 variable.
1365 */
79072805 1366 else if (*s == '$') {
3280af22 1367 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1368 break;
6002328a 1369 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1370 break; /* in regexp, $ might be tail anchor */
1371 }
02aa26ce 1372
2b9d42f0
NIS
1373 /* End of else if chain - OP_TRANS rejoin rest */
1374
02aa26ce 1375 /* backslashes */
79072805
LW
1376 if (*s == '\\' && s+1 < send) {
1377 s++;
02aa26ce
NT
1378
1379 /* some backslashes we leave behind */
c9f97d15 1380 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1381 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1382 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1383 continue;
1384 }
02aa26ce
NT
1385
1386 /* deprecate \1 in strings and substitution replacements */
3280af22 1387 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1388 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1389 {
599cee73 1390 if (ckWARN(WARN_SYNTAX))
9014280d 1391 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1392 *--s = '$';
1393 break;
1394 }
02aa26ce
NT
1395
1396 /* string-change backslash escapes */
3280af22 1397 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1398 --s;
1399 break;
1400 }
02aa26ce
NT
1401
1402 /* if we get here, it's either a quoted -, or a digit */
79072805 1403 switch (*s) {
02aa26ce
NT
1404
1405 /* quoted - in transliterations */
79072805 1406 case '-':
3280af22 1407 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1408 *d++ = *s++;
1409 continue;
1410 }
1411 /* FALL THROUGH */
1412 default:
11b8faa4 1413 {
707afd92
MS
1414 if (ckWARN(WARN_MISC) &&
1415 isALNUM(*s) &&
1416 *s != '_')
9014280d 1417 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1418 "Unrecognized escape \\%c passed through",
1419 *s);
1420 /* default action is to copy the quoted character */
f9a63242 1421 goto default_action;
11b8faa4 1422 }
02aa26ce
NT
1423
1424 /* \132 indicates an octal constant */
79072805
LW
1425 case '0': case '1': case '2': case '3':
1426 case '4': case '5': case '6': case '7':
ba210ebe 1427 {
53305cf1
NC
1428 I32 flags = 0;
1429 STRLEN len = 3;
1430 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1431 s += len;
1432 }
012bcf8d 1433 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1434
1435 /* \x24 indicates a hex constant */
79072805 1436 case 'x':
a0ed51b3
LW
1437 ++s;
1438 if (*s == '{') {
1439 char* e = strchr(s, '}');
a4c04bdc
NC
1440 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1441 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1442 STRLEN len;
355860ce 1443
53305cf1 1444 ++s;
adaeee49 1445 if (!e) {
a0ed51b3 1446 yyerror("Missing right brace on \\x{}");
355860ce 1447 continue;
ba210ebe 1448 }
53305cf1
NC
1449 len = e - s;
1450 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1451 s = e + 1;
a0ed51b3
LW
1452 }
1453 else {
ba210ebe 1454 {
53305cf1 1455 STRLEN len = 2;
a4c04bdc 1456 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1457 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1458 s += len;
1459 }
012bcf8d
GS
1460 }
1461
1462 NUM_ESCAPE_INSERT:
1463 /* Insert oct or hex escaped character.
301d3d20 1464 * There will always enough room in sv since such
db42d148 1465 * escapes will be longer than any UTF-8 sequence
301d3d20 1466 * they can end up as. */
ba7cea30 1467
c7f1f016
NIS
1468 /* We need to map to chars to ASCII before doing the tests
1469 to cover EBCDIC
1470 */
c4d5f83a 1471 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1472 if (!has_utf8 && uv > 255) {
301d3d20
JH
1473 /* Might need to recode whatever we have
1474 * accumulated so far if it contains any
1475 * hibit chars.
1476 *
1477 * (Can't we keep track of that and avoid
1478 * this rescan? --jhi)
012bcf8d 1479 */
c7f1f016 1480 int hicount = 0;
63cd0674
NIS
1481 U8 *c;
1482 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1483 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1484 hicount++;
db42d148 1485 }
012bcf8d 1486 }
63cd0674 1487 if (hicount) {
db42d148
NIS
1488 STRLEN offset = d - SvPVX(sv);
1489 U8 *src, *dst;
1490 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1491 src = (U8 *)d - 1;
1492 dst = src+hicount;
1493 d += hicount;
1494 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1495 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1496 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1497 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1498 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1499 }
1500 else {
63cd0674 1501 *dst-- = *src;
012bcf8d 1502 }
c7f1f016 1503 src--;
012bcf8d
GS
1504 }
1505 }
1506 }
1507
9aa983d2 1508 if (has_utf8 || uv > 255) {
9041c2e3 1509 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1510 has_utf8 = TRUE;
f9a63242
JH
1511 if (PL_lex_inwhat == OP_TRANS &&
1512 PL_sublex_info.sub_op) {
1513 PL_sublex_info.sub_op->op_private |=
1514 (PL_lex_repl ? OPpTRANS_FROM_UTF
1515 : OPpTRANS_TO_UTF);
f9a63242 1516 }
012bcf8d 1517 }
a0ed51b3 1518 else {
012bcf8d 1519 *d++ = (char)uv;
a0ed51b3 1520 }
012bcf8d
GS
1521 }
1522 else {
c4d5f83a 1523 *d++ = (char) uv;
a0ed51b3 1524 }
79072805 1525 continue;
02aa26ce 1526
b239daa5 1527 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1528 case 'N':
55eda711 1529 ++s;
423cee85
JH
1530 if (*s == '{') {
1531 char* e = strchr(s, '}');
155aba94 1532 SV *res;
423cee85
JH
1533 STRLEN len;
1534 char *str;
4e553d73 1535
423cee85 1536 if (!e) {
5777a3f7 1537 yyerror("Missing right brace on \\N{}");
423cee85
JH
1538 e = s - 1;
1539 goto cont_scan;
1540 }
dbc0d4f2
JH
1541 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1542 /* \N{U+...} */
1543 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1544 PERL_SCAN_DISALLOW_PREFIX;
1545 s += 3;
1546 len = e - s;
1547 uv = grok_hex(s, &len, &flags, NULL);
1548 s = e + 1;
1549 goto NUM_ESCAPE_INSERT;
1550 }
55eda711
JH
1551 res = newSVpvn(s + 1, e - s - 1);
1552 res = new_constant( Nullch, 0, "charnames",
1553 res, Nullsv, "\\N{...}" );
f9a63242
JH
1554 if (has_utf8)
1555 sv_utf8_upgrade(res);
423cee85 1556 str = SvPV(res,len);
1c47067b
JH
1557#ifdef EBCDIC_NEVER_MIND
1558 /* charnames uses pack U and that has been
1559 * recently changed to do the below uni->native
1560 * mapping, so this would be redundant (and wrong,
1561 * the code point would be doubly converted).
1562 * But leave this in just in case the pack U change
1563 * gets revoked, but the semantics is still
1564 * desireable for charnames. --jhi */
cddc7ef4
JH
1565 {
1566 UV uv = utf8_to_uvchr((U8*)str, 0);
1567
1568 if (uv < 0x100) {
1569 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1570
1571 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1572 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1573 str = SvPV(res, len);
1574 }
1575 }
1576#endif
89491803 1577 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1578 char *ostart = SvPVX(sv);
1579 SvCUR_set(sv, d - ostart);
1580 SvPOK_on(sv);
e4f3eed8 1581 *d = '\0';
f08d6ad9 1582 sv_utf8_upgrade(sv);
d2f449dd 1583 /* this just broke our allocation above... */
eb160463 1584 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1585 d = SvPVX(sv) + SvCUR(sv);
89491803 1586 has_utf8 = TRUE;
f08d6ad9 1587 }
eb160463 1588 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1589 char *odest = SvPVX(sv);
1590
8973db79 1591 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1592 d = SvPVX(sv) + (d - odest);
1593 }
1594 Copy(str, d, len, char);
1595 d += len;
1596 SvREFCNT_dec(res);
1597 cont_scan:
1598 s = e + 1;
1599 }
1600 else
5777a3f7 1601 yyerror("Missing braces on \\N{}");
423cee85
JH
1602 continue;
1603
02aa26ce 1604 /* \c is a control character */
79072805
LW
1605 case 'c':
1606 s++;
ba210ebe
JH
1607 {
1608 U8 c = *s++;
c7f1f016
NIS
1609#ifdef EBCDIC
1610 if (isLOWER(c))
1611 c = toUPPER(c);
1612#endif
db42d148 1613 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1614 }
79072805 1615 continue;
02aa26ce
NT
1616
1617 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1618 case 'b':
db42d148 1619 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1620 break;
1621 case 'n':
db42d148 1622 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1623 break;
1624 case 'r':
db42d148 1625 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1626 break;
1627 case 'f':
db42d148 1628 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1629 break;
1630 case 't':
db42d148 1631 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1632 break;
34a3fe2a 1633 case 'e':
db42d148 1634 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1635 break;
1636 case 'a':
db42d148 1637 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1638 break;
02aa26ce
NT
1639 } /* end switch */
1640
79072805
LW
1641 s++;
1642 continue;
02aa26ce
NT
1643 } /* end if (backslash) */
1644
f9a63242 1645 default_action:
2b9d42f0
NIS
1646 /* If we started with encoded form, or already know we want it
1647 and then encode the next character */
1648 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1649 STRLEN len = 1;
1650 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1651 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1652 s += len;
1653 if (need > len) {
1654 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1655 STRLEN off = d - SvPVX(sv);
1656 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1657 }
1658 d = (char*)uvchr_to_utf8((U8*)d, uv);
1659 has_utf8 = TRUE;
1660 }
1661 else {
1662 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1663 }
02aa26ce
NT
1664 } /* while loop to process each character */
1665
1666 /* terminate the string and set up the sv */
79072805 1667 *d = '\0';
463ee0b2 1668 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1669 if (SvCUR(sv) >= SvLEN(sv))
585602fa 1670 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1671
79072805 1672 SvPOK_on(sv);
9f4817db 1673 if (PL_encoding && !has_utf8) {
799ef3cb 1674 sv_recode_to_utf8(sv, PL_encoding);
9f4817db
JH
1675 has_utf8 = TRUE;
1676 }
2b9d42f0 1677 if (has_utf8) {
7e2040f0 1678 SvUTF8_on(sv);
2b9d42f0
NIS
1679 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1680 PL_sublex_info.sub_op->op_private |=
1681 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1682 }
1683 }
79072805 1684
02aa26ce 1685 /* shrink the sv if we allocated more than we used */
79072805
LW
1686 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1687 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1688 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1689 }
02aa26ce 1690
9b599b2a 1691 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1692 if (s > PL_bufptr) {
1693 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1694 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1695 sv, Nullsv,
4e553d73 1696 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1697 ? "tr"
3280af22 1698 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1699 ? "s"
1700 : "qq")));
79072805 1701 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1702 } else
8990e307 1703 SvREFCNT_dec(sv);
79072805
LW
1704 return s;
1705}
1706
ffb4593c
NT
1707/* S_intuit_more
1708 * Returns TRUE if there's more to the expression (e.g., a subscript),
1709 * FALSE otherwise.
ffb4593c
NT
1710 *
1711 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1712 *
1713 * ->[ and ->{ return TRUE
1714 * { and [ outside a pattern are always subscripts, so return TRUE
1715 * if we're outside a pattern and it's not { or [, then return FALSE
1716 * if we're in a pattern and the first char is a {
1717 * {4,5} (any digits around the comma) returns FALSE
1718 * if we're in a pattern and the first char is a [
1719 * [] returns FALSE
1720 * [SOMETHING] has a funky algorithm to decide whether it's a
1721 * character class or not. It has to deal with things like
1722 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1723 * anything else returns TRUE
1724 */
1725
9cbb5ea2
GS
1726/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1727
76e3520e 1728STATIC int
cea2e8a9 1729S_intuit_more(pTHX_ register char *s)
79072805 1730{
3280af22 1731 if (PL_lex_brackets)
79072805
LW
1732 return TRUE;
1733 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1734 return TRUE;
1735 if (*s != '{' && *s != '[')
1736 return FALSE;
3280af22 1737 if (!PL_lex_inpat)
79072805
LW
1738 return TRUE;
1739
1740 /* In a pattern, so maybe we have {n,m}. */
1741 if (*s == '{') {
1742 s++;
1743 if (!isDIGIT(*s))
1744 return TRUE;
1745 while (isDIGIT(*s))
1746 s++;
1747 if (*s == ',')
1748 s++;
1749 while (isDIGIT(*s))
1750 s++;
1751 if (*s == '}')
1752 return FALSE;
1753 return TRUE;
1754
1755 }
1756
1757 /* On the other hand, maybe we have a character class */
1758
1759 s++;
1760 if (*s == ']' || *s == '^')
1761 return FALSE;
1762 else {
ffb4593c 1763 /* this is terrifying, and it works */
79072805
LW
1764 int weight = 2; /* let's weigh the evidence */
1765 char seen[256];
f27ffc4a 1766 unsigned char un_char = 255, last_un_char;
93a17b20 1767 char *send = strchr(s,']');
3280af22 1768 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1769
1770 if (!send) /* has to be an expression */
1771 return TRUE;
1772
1773 Zero(seen,256,char);
1774 if (*s == '$')
1775 weight -= 3;
1776 else if (isDIGIT(*s)) {
1777 if (s[1] != ']') {
1778 if (isDIGIT(s[1]) && s[2] == ']')
1779 weight -= 10;
1780 }
1781 else
1782 weight -= 100;
1783 }
1784 for (; s < send; s++) {
1785 last_un_char = un_char;
1786 un_char = (unsigned char)*s;
1787 switch (*s) {
1788 case '@':
1789 case '&':
1790 case '$':
1791 weight -= seen[un_char] * 10;
7e2040f0 1792 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1793 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1794 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1795 weight -= 100;
1796 else
1797 weight -= 10;
1798 }
1799 else if (*s == '$' && s[1] &&
93a17b20
LW
1800 strchr("[#!%*<>()-=",s[1])) {
1801 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1802 weight -= 10;
1803 else
1804 weight -= 1;
1805 }
1806 break;
1807 case '\\':
1808 un_char = 254;
1809 if (s[1]) {
93a17b20 1810 if (strchr("wds]",s[1]))
79072805
LW
1811 weight += 100;
1812 else if (seen['\''] || seen['"'])
1813 weight += 1;
93a17b20 1814 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1815 weight += 40;
1816 else if (isDIGIT(s[1])) {
1817 weight += 40;
1818 while (s[1] && isDIGIT(s[1]))
1819 s++;
1820 }
1821 }
1822 else
1823 weight += 100;
1824 break;
1825 case '-':
1826 if (s[1] == '\\')
1827 weight += 50;
93a17b20 1828 if (strchr("aA01! ",last_un_char))
79072805 1829 weight += 30;
93a17b20 1830 if (strchr("zZ79~",s[1]))
79072805 1831 weight += 30;
f27ffc4a
GS
1832 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1833 weight -= 5; /* cope with negative subscript */
79072805
LW
1834 break;
1835 default:
93a17b20 1836 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1837 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1838 char *d = tmpbuf;
1839 while (isALPHA(*s))
1840 *d++ = *s++;
1841 *d = '\0';
1842 if (keyword(tmpbuf, d - tmpbuf))
1843 weight -= 150;
1844 }
1845 if (un_char == last_un_char + 1)
1846 weight += 5;
1847 weight -= seen[un_char];
1848 break;
1849 }
1850 seen[un_char]++;
1851 }
1852 if (weight >= 0) /* probably a character class */
1853 return FALSE;
1854 }
1855
1856 return TRUE;
1857}
ffed7fef 1858
ffb4593c
NT
1859/*
1860 * S_intuit_method
1861 *
1862 * Does all the checking to disambiguate
1863 * foo bar
1864 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1865 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1866 *
1867 * First argument is the stuff after the first token, e.g. "bar".
1868 *
1869 * Not a method if bar is a filehandle.
1870 * Not a method if foo is a subroutine prototyped to take a filehandle.
1871 * Not a method if it's really "Foo $bar"
1872 * Method if it's "foo $bar"
1873 * Not a method if it's really "print foo $bar"
1874 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 1875 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 1876 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1877 * =>
1878 */
1879
76e3520e 1880STATIC int
cea2e8a9 1881S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1882{
1883 char *s = start + (*start == '$');
3280af22 1884 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1885 STRLEN len;
1886 GV* indirgv;
1887
1888 if (gv) {
b6c543e3 1889 CV *cv;
a0d0e21e
LW
1890 if (GvIO(gv))
1891 return 0;
b6c543e3
IZ
1892 if ((cv = GvCVu(gv))) {
1893 char *proto = SvPVX(cv);
1894 if (proto) {
1895 if (*proto == ';')
1896 proto++;
1897 if (*proto == '*')
1898 return 0;
1899 }
1900 } else
a0d0e21e
LW
1901 gv = 0;
1902 }
8903cb82 1903 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1904 /* start is the beginning of the possible filehandle/object,
1905 * and s is the end of it
1906 * tmpbuf is a copy of it
1907 */
1908
a0d0e21e 1909 if (*start == '$') {
3280af22 1910 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1911 return 0;
1912 s = skipspace(s);
3280af22
NIS
1913 PL_bufptr = start;
1914 PL_expect = XREF;
a0d0e21e
LW
1915 return *s == '(' ? FUNCMETH : METHOD;
1916 }
1917 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1918 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1919 len -= 2;
1920 tmpbuf[len] = '\0';
1921 goto bare_package;
1922 }
1923 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1924 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1925 return 0;
1926 /* filehandle or package name makes it a method */
89bfa8cd 1927 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1928 s = skipspace(s);
3280af22 1929 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1930 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1931 bare_package:
3280af22 1932 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1933 newSVpvn(tmpbuf,len));
3280af22
NIS
1934 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1935 PL_expect = XTERM;
a0d0e21e 1936 force_next(WORD);
3280af22 1937 PL_bufptr = s;
a0d0e21e
LW
1938 return *s == '(' ? FUNCMETH : METHOD;
1939 }
1940 }
1941 return 0;
1942}
1943
ffb4593c
NT
1944/*
1945 * S_incl_perldb
1946 * Return a string of Perl code to load the debugger. If PERL5DB
1947 * is set, it will return the contents of that, otherwise a
1948 * compile-time require of perl5db.pl.
1949 */
1950
76e3520e 1951STATIC char*
cea2e8a9 1952S_incl_perldb(pTHX)
a0d0e21e 1953{
3280af22 1954 if (PL_perldb) {
76e3520e 1955 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1956
1957 if (pdb)
1958 return pdb;
91487cfc 1959 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1960 return "BEGIN { require 'perl5db.pl' }";
1961 }
1962 return "";
1963}
1964
1965
16d20bd9 1966/* Encoded script support. filter_add() effectively inserts a
4e553d73 1967 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1968 * Note that the filter function only applies to the current source file
1969 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1970 *
1971 * The datasv parameter (which may be NULL) can be used to pass
1972 * private data to this instance of the filter. The filter function
1973 * can recover the SV using the FILTER_DATA macro and use it to
1974 * store private buffers and state information.
1975 *
1976 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1977 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1978 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1979 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1980 * private use must be set using malloc'd pointers.
1981 */
16d20bd9
AD
1982
1983SV *
864dbfa3 1984Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1985{
f4c556ac
GS
1986 if (!funcp)
1987 return Nullsv;
1988
3280af22
NIS
1989 if (!PL_rsfp_filters)
1990 PL_rsfp_filters = newAV();
16d20bd9 1991 if (!datasv)
8c52afec 1992 datasv = NEWSV(255,0);
16d20bd9 1993 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1994 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1995 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1996 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 1997 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 1998 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
1999 av_unshift(PL_rsfp_filters, 1);
2000 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2001 return(datasv);
2002}
4e553d73 2003
16d20bd9
AD
2004
2005/* Delete most recently added instance of this filter function. */
a0d0e21e 2006void
864dbfa3 2007Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2008{
e0c19803 2009 SV *datasv;
fe5a182c 2010 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2011 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2012 return;
2013 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2014 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2015 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2016 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2017 IoANY(datasv) = (void *)NULL;
3280af22 2018 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2019
16d20bd9
AD
2020 return;
2021 }
2022 /* we need to search for the correct entry and clear it */
cea2e8a9 2023 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2024}
2025
2026
2027/* Invoke the n'th filter function for the current rsfp. */
2028I32
864dbfa3 2029Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
2030
2031
8ac85365 2032 /* 0 = read one text line */
a0d0e21e 2033{
16d20bd9
AD
2034 filter_t funcp;
2035 SV *datasv = NULL;
e50aee73 2036
3280af22 2037 if (!PL_rsfp_filters)
16d20bd9 2038 return -1;
3280af22 2039 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2040 /* Provide a default input filter to make life easy. */
2041 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2042 DEBUG_P(PerlIO_printf(Perl_debug_log,
2043 "filter_read %d: from rsfp\n", idx));
4e553d73 2044 if (maxlen) {
16d20bd9
AD
2045 /* Want a block */
2046 int len ;
2047 int old_len = SvCUR(buf_sv) ;
2048
2049 /* ensure buf_sv is large enough */
eb160463 2050 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2051 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2052 if (PerlIO_error(PL_rsfp))
37120919
AD
2053 return -1; /* error */
2054 else
2055 return 0 ; /* end of file */
2056 }
16d20bd9
AD
2057 SvCUR_set(buf_sv, old_len + len) ;
2058 } else {
2059 /* Want a line */
3280af22
NIS
2060 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2061 if (PerlIO_error(PL_rsfp))
37120919
AD
2062 return -1; /* error */
2063 else
2064 return 0 ; /* end of file */
2065 }
16d20bd9
AD
2066 }
2067 return SvCUR(buf_sv);
2068 }
2069 /* Skip this filter slot if filter has been deleted */
3280af22 2070 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2071 DEBUG_P(PerlIO_printf(Perl_debug_log,
2072 "filter_read %d: skipped (filter deleted)\n",
2073 idx));
16d20bd9
AD
2074 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2075 }
2076 /* Get function pointer hidden within datasv */
4755096e 2077 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2078 DEBUG_P(PerlIO_printf(Perl_debug_log,
2079 "filter_read %d: via function %p (%s)\n",
fe5a182c 2080 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2081 /* Call function. The function is expected to */
2082 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2083 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2084 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2085}
2086
76e3520e 2087STATIC char *
cea2e8a9 2088S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2089{
c39cd008 2090#ifdef PERL_CR_FILTER
3280af22 2091 if (!PL_rsfp_filters) {
c39cd008 2092 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2093 }
2094#endif
3280af22 2095 if (PL_rsfp_filters) {
16d20bd9 2096
55497cff 2097 if (!append)
2098 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2099 if (FILTER_READ(0, sv, 0) > 0)
2100 return ( SvPVX(sv) ) ;
2101 else
2102 return Nullch ;
2103 }
9d116dd7 2104 else
fd049845 2105 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2106}
2107
01ec43d0
GS
2108STATIC HV *
2109S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2110{
2111 GV *gv;
2112
01ec43d0 2113 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2114 return PL_curstash;
2115
2116 if (len > 2 &&
2117 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2118 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2119 {
2120 return GvHV(gv); /* Foo:: */
def3634b
GS
2121 }
2122
2123 /* use constant CLASS => 'MyClass' */
2124 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2125 SV *sv;
2126 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2127 pkgname = SvPV_nolen(sv);
2128 }
2129 }
2130
2131 return gv_stashpv(pkgname, FALSE);
2132}
a0d0e21e 2133
748a9306
LW
2134#ifdef DEBUGGING
2135 static char* exp_name[] =
09bef843
SB
2136 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2137 "ATTRTERM", "TERMBLOCK"
2138 };
748a9306 2139#endif
463ee0b2 2140
02aa26ce
NT
2141/*
2142 yylex
2143
2144 Works out what to call the token just pulled out of the input
2145 stream. The yacc parser takes care of taking the ops we return and
2146 stitching them into a tree.
2147
2148 Returns:
2149 PRIVATEREF
2150
2151 Structure:
2152 if read an identifier
2153 if we're in a my declaration
2154 croak if they tried to say my($foo::bar)
2155 build the ops for a my() declaration
2156 if it's an access to a my() variable
2157 are we in a sort block?
2158 croak if my($a); $a <=> $b
2159 build ops for access to a my() variable
2160 if in a dq string, and they've said @foo and we can't find @foo
2161 croak
2162 build ops for a bareword
2163 if we already built the token before, use it.
2164*/
2165
dba4d153 2166#ifdef USE_PURE_BISON
864dbfa3 2167int
dba4d153 2168Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2169{
20141f0e
IRC
2170 int r;
2171
6f202aea 2172 yyactlevel++;
20141f0e
IRC
2173 yylval_pointer[yyactlevel] = lvalp;
2174 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
IRC
2175 if (yyactlevel >= YYMAXLEVEL)
2176 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2177
dba4d153 2178 r = Perl_yylex(aTHX);
20141f0e 2179
d8ae6756
IRC
2180 if (yyactlevel > 0)
2181 yyactlevel--;
20141f0e
IRC
2182
2183 return r;
2184}
dba4d153 2185#endif
20141f0e 2186
dba4d153
JH
2187#ifdef __SC__
2188#pragma segment Perl_yylex
2189#endif
dba4d153 2190int
dba4d153 2191Perl_yylex(pTHX)
20141f0e 2192{
79072805 2193 register char *s;
378cc40b 2194 register char *d;
79072805 2195 register I32 tmp;
463ee0b2 2196 STRLEN len;
161b471a
NIS
2197 GV *gv = Nullgv;
2198 GV **gvp = 0;
aa7440fb 2199 bool bof = FALSE;
a687059c 2200
02aa26ce 2201 /* check if there's an identifier for us to look at */
ba979b31 2202 if (PL_pending_ident)
e930465f 2203 return S_pending_ident(aTHX);
bbce6d69 2204
02aa26ce
NT
2205 /* no identifier pending identification */
2206
3280af22 2207 switch (PL_lex_state) {
79072805
LW
2208#ifdef COMMENTARY
2209 case LEX_NORMAL: /* Some compilers will produce faster */
2210 case LEX_INTERPNORMAL: /* code if we comment these out. */
2211 break;
2212#endif
2213
09bef843 2214 /* when we've already built the next token, just pull it out of the queue */
79072805 2215 case LEX_KNOWNEXT:
3280af22
NIS
2216 PL_nexttoke--;
2217 yylval = PL_nextval[PL_nexttoke];
2218 if (!PL_nexttoke) {
2219 PL_lex_state = PL_lex_defer;
2220 PL_expect = PL_lex_expect;
2221 PL_lex_defer = LEX_NORMAL;
463ee0b2 2222 }
607df283 2223 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2224 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2225 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2226
3280af22 2227 return(PL_nexttype[PL_nexttoke]);
79072805 2228
02aa26ce 2229 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2230 when we get here, PL_bufptr is at the \
02aa26ce 2231 */
79072805
LW
2232 case LEX_INTERPCASEMOD:
2233#ifdef DEBUGGING
3280af22 2234 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2235 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2236#endif
02aa26ce 2237 /* handle \E or end of string */
3280af22 2238 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2239 char oldmod;
02aa26ce
NT
2240
2241 /* if at a \E */
3280af22
NIS
2242 if (PL_lex_casemods) {
2243 oldmod = PL_lex_casestack[--PL_lex_casemods];
2244 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2245
3280af22
NIS
2246 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2247 PL_bufptr += 2;
2248 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2249 }
79072805
LW
2250 return ')';
2251 }
3280af22
NIS
2252 if (PL_bufptr != PL_bufend)
2253 PL_bufptr += 2;
2254 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2255 return yylex();
79072805
LW
2256 }
2257 else {
607df283 2258 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2259 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2260 s = PL_bufptr + 1;
79072805 2261 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
eb160463 2262 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
a0d0e21e 2263 if (strchr("LU", *s) &&
3280af22 2264 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2265 {
3280af22 2266 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2267 return ')';
2268 }
3280af22
NIS
2269 if (PL_lex_casemods > 10) {
2270 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2271 if (newlb != PL_lex_casestack) {
a0d0e21e 2272 SAVEFREEPV(newlb);
3280af22 2273 PL_lex_casestack = newlb;
a0d0e21e
LW
2274 }
2275 }
3280af22
NIS
2276 PL_lex_casestack[PL_lex_casemods++] = *s;
2277 PL_lex_casestack[PL_lex_casemods] = '\0';
2278 PL_lex_state = LEX_INTERPCONCAT;
2279 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2280 force_next('(');
2281 if (*s == 'l')
3280af22 2282 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2283 else if (*s == 'u')
3280af22 2284 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2285 else if (*s == 'L')
3280af22 2286 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2287 else if (*s == 'U')
3280af22 2288 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2289 else if (*s == 'Q')
3280af22 2290 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2291 else
cea2e8a9 2292 Perl_croak(aTHX_ "panic: yylex");
3280af22 2293 PL_bufptr = s + 1;
79072805 2294 force_next(FUNC);
3280af22
NIS
2295 if (PL_lex_starts) {
2296 s = PL_bufptr;
2297 PL_lex_starts = 0;
79072805
LW
2298 Aop(OP_CONCAT);
2299 }
2300 else
cea2e8a9 2301 return yylex();
79072805
LW
2302 }
2303
55497cff 2304 case LEX_INTERPPUSH:
2305 return sublex_push();
2306
79072805 2307 case LEX_INTERPSTART:
3280af22 2308 if (PL_bufptr == PL_bufend)
79072805 2309 return sublex_done();
607df283 2310 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2311 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2312 PL_expect = XTERM;
2313 PL_lex_dojoin = (*PL_bufptr == '@');
2314 PL_lex_state = LEX_INTERPNORMAL;
2315 if (PL_lex_dojoin) {
2316 PL_nextval[PL_nexttoke].ival = 0;
79072805 2317 force_next(',');
4d1ff10f 2318#ifdef USE_5005THREADS
533c011a
NIS
2319 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2320 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2321 force_next(PRIVATEREF);
2322#else
a0d0e21e 2323 force_ident("\"", '$');
4d1ff10f 2324#endif /* USE_5005THREADS */
3280af22 2325 PL_nextval[PL_nexttoke].ival = 0;
79072805 2326 force_next('$');
3280af22 2327 PL_nextval[PL_nexttoke].ival = 0;
79072805 2328 force_next('(');
3280af22 2329 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2330 force_next(FUNC);
2331 }
3280af22
NIS
2332 if (PL_lex_starts++) {
2333 s = PL_bufptr;
79072805
LW
2334 Aop(OP_CONCAT);
2335 }
cea2e8a9 2336 return yylex();
79072805
LW
2337
2338 case LEX_INTERPENDMAYBE:
3280af22
NIS
2339 if (intuit_more(PL_bufptr)) {
2340 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2341 break;
2342 }
2343 /* FALL THROUGH */
2344
2345 case LEX_INTERPEND:
3280af22
NIS
2346 if (PL_lex_dojoin) {
2347 PL_lex_dojoin = FALSE;
2348 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2349 return ')';
2350 }
43a16006 2351 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2352 && SvEVALED(PL_lex_repl))
43a16006 2353 {
e9fa98b2 2354 if (PL_bufptr != PL_bufend)
cea2e8a9 2355 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2356 PL_lex_repl = Nullsv;
2357 }
79072805
LW
2358 /* FALLTHROUGH */
2359 case LEX_INTERPCONCAT:
2360#ifdef DEBUGGING
3280af22 2361 if (PL_lex_brackets)
cea2e8a9 2362 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2363#endif
3280af22 2364 if (PL_bufptr == PL_bufend)
79072805
LW
2365 return sublex_done();
2366
3280af22
NIS
2367 if (SvIVX(PL_linestr) == '\'') {
2368 SV *sv = newSVsv(PL_linestr);
2369 if (!PL_lex_inpat)
76e3520e 2370 sv = tokeq(sv);
3280af22 2371 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2372 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2373 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2374 s = PL_bufend;
79072805
LW
2375 }
2376 else {
3280af22 2377 s = scan_const(PL_bufptr);
79072805 2378 if (*s == '\\')
3280af22 2379 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2380 else
3280af22 2381 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2382 }
2383
3280af22
NIS
2384 if (s != PL_bufptr) {
2385 PL_nextval[PL_nexttoke] = yylval;
2386 PL_expect = XTERM;
79072805 2387 force_next(THING);
3280af22 2388 if (PL_lex_starts++)
79072805
LW
2389 Aop(OP_CONCAT);
2390 else {
3280af22 2391 PL_bufptr = s;
cea2e8a9 2392 return yylex();
79072805
LW
2393 }
2394 }
2395
cea2e8a9 2396 return yylex();
a0d0e21e 2397 case LEX_FORMLINE:
3280af22
NIS
2398 PL_lex_state = LEX_NORMAL;
2399 s = scan_formline(PL_bufptr);
2400 if (!PL_lex_formbrack)
a0d0e21e
LW
2401 goto rightbracket;
2402 OPERATOR(';');
79072805
LW
2403 }
2404
3280af22
NIS
2405 s = PL_bufptr;
2406 PL_oldoldbufptr = PL_oldbufptr;
2407 PL_oldbufptr = s;
607df283 2408 DEBUG_T( {
bf49b057
GS
2409 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2410 exp_name[PL_expect], s);
5f80b19c 2411 } );
463ee0b2
LW
2412
2413 retry:
378cc40b
LW
2414 switch (*s) {
2415 default:
7e2040f0 2416 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2417 goto keylookup;
cea2e8a9 2418 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2419 case 4:
2420 case 26:
2421 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2422 case 0:
3280af22
NIS
2423 if (!PL_rsfp) {
2424 PL_last_uni = 0;
2425 PL_last_lop = 0;
2426 if (PL_lex_brackets)
d98d5fff 2427 yyerror("Missing right curly or square bracket");
4e553d73 2428 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2429 "### Tokener got EOF\n");
5f80b19c 2430 } );
79072805 2431 TOKEN(0);
463ee0b2 2432 }
3280af22 2433 if (s++ < PL_bufend)
a687059c 2434 goto retry; /* ignore stray nulls */
3280af22
NIS
2435 PL_last_uni = 0;
2436 PL_last_lop = 0;
2437 if (!PL_in_eval && !PL_preambled) {
2438 PL_preambled = TRUE;
2439 sv_setpv(PL_linestr,incl_perldb());
2440 if (SvCUR(PL_linestr))
2441 sv_catpv(PL_linestr,";");
2442 if (PL_preambleav){
2443 while(AvFILLp(PL_preambleav) >= 0) {
2444 SV *tmpsv = av_shift(PL_preambleav);
2445 sv_catsv(PL_linestr, tmpsv);
2446 sv_catpv(PL_linestr, ";");
91b7def8 2447 sv_free(tmpsv);
2448 }
3280af22
NIS
2449 sv_free((SV*)PL_preambleav);
2450 PL_preambleav = NULL;
91b7def8 2451 }
3280af22
NIS
2452 if (PL_minus_n || PL_minus_p) {
2453 sv_catpv(PL_linestr, "LINE: while (<>) {");
2454 if (PL_minus_l)
2455 sv_catpv(PL_linestr,"chomp;");
2456 if (PL_minus_a) {
3280af22
NIS
2457 if (PL_minus_F) {
2458 if (strchr("/'\"", *PL_splitstr)
2459 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2460 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2461 else {
2462 char delim;
2463 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2464 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2465 delim = *s;
75c72d73 2466 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2467 "q" + (delim == '\''), delim);
3280af22 2468 for (s = PL_splitstr; *s; s++) {
54310121 2469 if (*s == '\\')
3280af22
NIS
2470 sv_catpvn(PL_linestr, "\\", 1);
2471 sv_catpvn(PL_linestr, s, 1);
54310121 2472 }
cea2e8a9 2473 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2474 }
2304df62
AD
2475 }
2476 else
75c72d73 2477 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2478 }
79072805 2479 }
3280af22
NIS
2480 sv_catpv(PL_linestr, "\n");
2481 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2482 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2483 PL_last_lop = PL_last_uni = Nullch;
3280af22 2484 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2485 SV *sv = NEWSV(85,0);
2486
2487 sv_upgrade(sv, SVt_PVMG);
3280af22 2488 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2489 (void)SvIOK_on(sv);
2490 SvIVX(sv) = 0;
57843af0 2491 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2492 }
79072805 2493 goto retry;
a687059c 2494 }
e929a76b 2495 do {
aa7440fb 2496 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2497 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2498 fake_eof:
2499 if (PL_rsfp) {
2500 if (PL_preprocess && !PL_in_eval)
2501 (void)PerlProc_pclose(PL_rsfp);
2502 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2503 PerlIO_clearerr(PL_rsfp);
2504 else
2505 (void)PerlIO_close(PL_rsfp);
2506 PL_rsfp = Nullfp;
2507 PL_doextract = FALSE;
2508 }
2509 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2510 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2511 sv_catpv(PL_linestr,";}");
2512 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2513 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2514 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2515 PL_minus_n = PL_minus_p = 0;
2516 goto retry;
2517 }
2518 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2519 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2520 sv_setpv(PL_linestr,"");
2521 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2522 }
2523 /* if it looks like the start of a BOM, check if it in fact is */
2524 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2525#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2526# ifdef __GNU_LIBRARY__
2527# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2528# define FTELL_FOR_PIPE_IS_BROKEN
2529# endif
e3f494f1
JH
2530# else
2531# ifdef __GLIBC__
2532# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2533# define FTELL_FOR_PIPE_IS_BROKEN
2534# endif
2535# endif
226017aa
DD
2536# endif
2537#endif
2538#ifdef FTELL_FOR_PIPE_IS_BROKEN
2539 /* This loses the possibility to detect the bof
2540 * situation on perl -P when the libc5 is being used.
2541 * Workaround? Maybe attach some extra state to PL_rsfp?
2542 */
2543 if (!PL_preprocess)
7e28d3af 2544 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2545#else
eb160463 2546 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2547#endif
7e28d3af 2548 if (bof) {
3280af22 2549 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2550 s = swallow_bom((U8*)s);
e929a76b 2551 }
378cc40b 2552 }
3280af22 2553 if (PL_doextract) {
a0d0e21e
LW
2554 /* Incest with pod. */
2555 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2556 sv_setpv(PL_linestr, "");
2557 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2558 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2559 PL_last_lop = PL_last_uni = Nullch;
3280af22 2560 PL_doextract = FALSE;
a0d0e21e 2561 }
4e553d73 2562 }
463ee0b2 2563 incline(s);
3280af22
NIS
2564 } while (PL_doextract);
2565 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2566 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2567 SV *sv = NEWSV(85,0);
a687059c 2568
93a17b20 2569 sv_upgrade(sv, SVt_PVMG);
3280af22 2570 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2571 (void)SvIOK_on(sv);
2572 SvIVX(sv) = 0;
57843af0 2573 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2574 }
3280af22 2575 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2576 PL_last_lop = PL_last_uni = Nullch;
57843af0 2577 if (CopLINE(PL_curcop) == 1) {
3280af22 2578 while (s < PL_bufend && isSPACE(*s))
79072805 2579 s++;
a0d0e21e 2580 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2581 s++;
44a8e56a 2582 d = Nullch;
3280af22 2583 if (!PL_in_eval) {
44a8e56a 2584 if (*s == '#' && *(s+1) == '!')
2585 d = s + 2;
2586#ifdef ALTERNATE_SHEBANG
2587 else {
2588 static char as[] = ALTERNATE_SHEBANG;
2589 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2590 d = s + (sizeof(as) - 1);
2591 }
2592#endif /* ALTERNATE_SHEBANG */
2593 }
2594 if (d) {
b8378b72 2595 char *ipath;
774d564b 2596 char *ipathend;
b8378b72 2597
774d564b 2598 while (isSPACE(*d))
b8378b72
CS
2599 d++;
2600 ipath = d;
774d564b 2601 while (*d && !isSPACE(*d))
2602 d++;
2603 ipathend = d;
2604
2605#ifdef ARG_ZERO_IS_SCRIPT
2606 if (ipathend > ipath) {
2607 /*
2608 * HP-UX (at least) sets argv[0] to the script name,
2609 * which makes $^X incorrect. And Digital UNIX and Linux,
2610 * at least, set argv[0] to the basename of the Perl
2611 * interpreter. So, having found "#!", we'll set it right.
2612 */
ee2f7564 2613 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2614 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2615 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2616 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2617 SvSETMAGIC(x);
2618 }
556c1dec
JH
2619 else {
2620 STRLEN blen;
2621 STRLEN llen;
2622 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2623 char *lstart = SvPV(x,llen);
2624 if (llen < blen) {
2625 bstart += blen - llen;
2626 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2627 sv_setpvn(x, ipath, ipathend - ipath);
2628 SvSETMAGIC(x);
2629 }
2630 }
2631 }
774d564b 2632 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2633 }
774d564b 2634#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2635
2636 /*
2637 * Look for options.
2638 */
748a9306 2639 d = instr(s,"perl -");
84e30d1a 2640 if (!d) {
748a9306 2641 d = instr(s,"perl");
84e30d1a
GS
2642#if defined(DOSISH)
2643 /* avoid getting into infinite loops when shebang
2644 * line contains "Perl" rather than "perl" */
2645 if (!d) {
2646 for (d = ipathend-4; d >= ipath; --d) {
2647 if ((*d == 'p' || *d == 'P')
2648 && !ibcmp(d, "perl", 4))
2649 {
2650 break;
2651 }
2652 }
2653 if (d < ipath)
2654 d = Nullch;
2655 }
2656#endif
2657 }
44a8e56a 2658#ifdef ALTERNATE_SHEBANG
2659 /*
2660 * If the ALTERNATE_SHEBANG on this system starts with a
2661 * character that can be part of a Perl expression, then if
2662 * we see it but not "perl", we're probably looking at the
2663 * start of Perl code, not a request to hand off to some
2664 * other interpreter. Similarly, if "perl" is there, but
2665 * not in the first 'word' of the line, we assume the line
2666 * contains the start of the Perl program.
44a8e56a 2667 */
2668 if (d && *s != '#') {
774d564b 2669 char *c = ipath;
44a8e56a 2670 while (*c && !strchr("; \t\r\n\f\v#", *c))
2671 c++;
2672 if (c < d)
2673 d = Nullch; /* "perl" not in first word; ignore */
2674 else
2675 *s = '#'; /* Don't try to parse shebang line */
2676 }
774d564b 2677#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2678#ifndef MACOS_TRADITIONAL
748a9306 2679 if (!d &&
44a8e56a 2680 *s == '#' &&
774d564b 2681 ipathend > ipath &&
3280af22 2682 !PL_minus_c &&
748a9306 2683 !instr(s,"indir") &&
3280af22 2684 instr(PL_origargv[0],"perl"))
748a9306 2685 {
9f68db38 2686 char **newargv;
9f68db38 2687
774d564b 2688 *ipathend = '\0';
2689 s = ipathend + 1;
3280af22 2690 while (s < PL_bufend && isSPACE(*s))
9f68db38 2691 s++;
3280af22
NIS
2692 if (s < PL_bufend) {
2693 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2694 newargv[1] = s;
3280af22 2695 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2696 s++;
2697 *s = '\0';
3280af22 2698 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2699 }
2700 else
3280af22 2701 newargv = PL_origargv;
774d564b 2702 newargv[0] = ipath;
b4748376 2703 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2704 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2705 }
bf4acbe4 2706#endif
748a9306 2707 if (d) {
3280af22
NIS
2708 U32 oldpdb = PL_perldb;
2709 bool oldn = PL_minus_n;
2710 bool oldp = PL_minus_p;
748a9306
LW
2711
2712 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2713 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2714
2715 if (*d++ == '-') {
a11ec5a9 2716 bool switches_done = PL_doswitches;
8cc95fdb 2717 do {
2718 if (*d == 'M' || *d == 'm') {
2719 char *m = d;
2720 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2721 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2722 (int)(d - m), m);
2723 }
2724 d = moreswitches(d);
2725 } while (d);
155aba94
GS
2726 if ((PERLDB_LINE && !oldpdb) ||
2727 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2728 /* if we have already added "LINE: while (<>) {",
2729 we must not do it again */
748a9306 2730 {
3280af22
NIS
2731 sv_setpv(PL_linestr, "");
2732 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2733 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2734 PL_last_lop = PL_last_uni = Nullch;
3280af22 2735 PL_preambled = FALSE;
84902520 2736 if (PERLDB_LINE)
3280af22 2737 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2738 goto retry;
2739 }
a11ec5a9
RGS
2740 if (PL_doswitches && !switches_done) {
2741 int argc = PL_origargc;
2742 char **argv = PL_origargv;
2743 do {
2744 argc--,argv++;
2745 } while (argc && argv[0][0] == '-' && argv[0][1]);
2746 init_argv_symbols(argc,argv);
2747 }
a0d0e21e 2748 }
79072805 2749 }
9f68db38 2750 }
79072805 2751 }
3280af22
NIS
2752 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2753 PL_bufptr = s;
2754 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2755 return yylex();
ae986130 2756 }
378cc40b 2757 goto retry;
4fdae800 2758 case '\r':
6a27c188 2759#ifdef PERL_STRICT_CR
cea2e8a9 2760 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2761 Perl_croak(aTHX_
cc507455 2762 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2763#endif
4fdae800 2764 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2765#ifdef MACOS_TRADITIONAL
2766 case '\312':
2767#endif
378cc40b
LW
2768 s++;
2769 goto retry;
378cc40b 2770 case '#':
e929a76b 2771 case '\n':
3280af22 2772 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2773 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2774 /* handle eval qq[#line 1 "foo"\n ...] */
2775 CopLINE_dec(PL_curcop);
2776 incline(s);
2777 }
3280af22 2778 d = PL_bufend;
a687059c 2779 while (s < d && *s != '\n')
378cc40b 2780 s++;
0f85fab0 2781 if (s < d)
378cc40b 2782 s++;
78c267c1 2783 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2784 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2785 incline(s);
3280af22
NIS
2786 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2787 PL_bufptr = s;
2788 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2789 return yylex();
a687059c 2790 }
378cc40b 2791 }
a687059c 2792 else {
378cc40b 2793 *s = '\0';
3280af22 2794 PL_bufend = s;
a687059c 2795 }
378cc40b
LW
2796 goto retry;
2797 case '-':
79072805 2798 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2799 I32 ftst = 0;
2800
378cc40b 2801 s++;
3280af22 2802 PL_bufptr = s;
748a9306
LW
2803 tmp = *s++;
2804
bf4acbe4 2805 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2806 s++;
2807
2808 if (strnEQ(s,"=>",2)) {
3280af22 2809 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2810 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2811 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2812 } );
748a9306
LW
2813 OPERATOR('-'); /* unary minus */
2814 }
3280af22 2815 PL_last_uni = PL_oldbufptr;
748a9306 2816 switch (tmp) {
e5edeb50
JH
2817 case 'r': ftst = OP_FTEREAD; break;
2818 case 'w': ftst = OP_FTEWRITE; break;
2819 case 'x': ftst = OP_FTEEXEC; break;
2820 case 'o': ftst = OP_FTEOWNED; break;
2821 case 'R': ftst = OP_FTRREAD; break;
2822 case 'W': ftst = OP_FTRWRITE; break;
2823 case 'X': ftst = OP_FTREXEC; break;
2824 case 'O': ftst = OP_FTROWNED; break;
2825 case 'e': ftst = OP_FTIS; break;
2826 case 'z': ftst = OP_FTZERO; break;
2827 case 's': ftst = OP_FTSIZE; break;
2828 case 'f': ftst = OP_FTFILE; break;
2829 case 'd': ftst = OP_FTDIR; break;
2830 case 'l': ftst = OP_FTLINK; break;
2831 case 'p': ftst = OP_FTPIPE; break;
2832 case 'S': ftst = OP_FTSOCK; break;
2833 case 'u': ftst = OP_FTSUID; break;
2834 case 'g': ftst = OP_FTSGID; break;
2835 case 'k': ftst = OP_FTSVTX; break;
2836 case 'b': ftst = OP_FTBLK; break;
2837 case 'c': ftst = OP_FTCHR; break;
2838 case 't': ftst = OP_FTTTY; break;
2839 case 'T': ftst = OP_FTTEXT; break;
2840 case 'B': ftst = OP_FTBINARY; break;
2841 case 'M': case 'A': case 'C':
2842 gv_fetchpv("\024",TRUE, SVt_PV);
2843 switch (tmp) {
2844 case 'M': ftst = OP_FTMTIME; break;
2845 case 'A': ftst = OP_FTATIME; break;
2846 case 'C': ftst = OP_FTCTIME; break;
2847 default: break;
2848 }
2849 break;
378cc40b 2850 default:
378cc40b
LW
2851 break;
2852 }
e5edeb50 2853 if (ftst) {
eb160463 2854 PL_last_lop_op = (OPCODE)ftst;
4e553d73 2855 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2856 "### Saw file test %c\n", (int)ftst);
5f80b19c 2857 } );
e5edeb50
JH
2858 FTST(ftst);
2859 }
2860 else {
2861 /* Assume it was a minus followed by a one-letter named
2862 * subroutine call (or a -bareword), then. */
95c31fe3 2863 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2864 "### %c looked like a file test but was not\n",
2865 (int)ftst);
5f80b19c 2866 } );
e5edeb50
JH
2867 s -= 2;
2868 }
378cc40b 2869 }
a687059c
LW
2870 tmp = *s++;
2871 if (*s == tmp) {
2872 s++;
3280af22 2873 if (PL_expect == XOPERATOR)
79072805
LW
2874 TERM(POSTDEC);
2875 else
2876 OPERATOR(PREDEC);
2877 }
2878 else if (*s == '>') {
2879 s++;
2880 s = skipspace(s);
7e2040f0 2881 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2882 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2883 TOKEN(ARROW);
79072805 2884 }
748a9306
LW
2885 else if (*s == '$')
2886 OPERATOR(ARROW);
463ee0b2 2887 else
748a9306 2888 TERM(ARROW);
a687059c 2889 }
3280af22 2890 if (PL_expect == XOPERATOR)
79072805
LW
2891 Aop(OP_SUBTRACT);
2892 else {
3280af22 2893 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2894 check_uni();
79072805 2895 OPERATOR('-'); /* unary minus */
2f3197b3 2896 }
79072805 2897
378cc40b 2898 case '+':
a687059c
LW
2899 tmp = *s++;
2900 if (*s == tmp) {
378cc40b 2901 s++;
3280af22 2902 if (PL_expect == XOPERATOR)
79072805
LW
2903 TERM(POSTINC);
2904 else
2905 OPERATOR(PREINC);
378cc40b 2906 }
3280af22 2907 if (PL_expect == XOPERATOR)
79072805
LW
2908 Aop(OP_ADD);
2909 else {
3280af22 2910 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2911 check_uni();
a687059c 2912 OPERATOR('+');
2f3197b3 2913 }
a687059c 2914
378cc40b 2915 case '*':
3280af22
NIS
2916 if (PL_expect != XOPERATOR) {
2917 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2918 PL_expect = XOPERATOR;
2919 force_ident(PL_tokenbuf, '*');
2920 if (!*PL_tokenbuf)
a0d0e21e 2921 PREREF('*');
79072805 2922 TERM('*');
a687059c 2923 }
79072805
LW
2924 s++;
2925 if (*s == '*') {
a687059c 2926 s++;
79072805 2927 PWop(OP_POW);
a687059c 2928 }
79072805
LW
2929 Mop(OP_MULTIPLY);
2930
378cc40b 2931 case '%':
3280af22 2932 if (PL_expect == XOPERATOR) {
bbce6d69 2933 ++s;
2934 Mop(OP_MODULO);
a687059c 2935 }
3280af22
NIS
2936 PL_tokenbuf[0] = '%';
2937 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2938 if (!PL_tokenbuf[1]) {
2939 if (s == PL_bufend)
bbce6d69 2940 yyerror("Final % should be \\% or %name");
2941 PREREF('%');
a687059c 2942 }
3280af22 2943 PL_pending_ident = '%';
bbce6d69 2944 TERM('%');
a687059c 2945
378cc40b 2946 case '^':
79072805 2947 s++;
a0d0e21e 2948 BOop(OP_BIT_XOR);
79072805 2949 case '[':
3280af22 2950 PL_lex_brackets++;
79072805 2951 /* FALL THROUGH */
378cc40b 2952 case '~':
378cc40b 2953 case ',':
378cc40b
LW
2954 tmp = *s++;
2955 OPERATOR(tmp);
a0d0e21e
LW
2956 case ':':
2957 if (s[1] == ':') {
2958 len = 0;
2959 goto just_a_word;
2960 }
2961 s++;
09bef843
SB
2962 switch (PL_expect) {
2963 OP *attrs;
2964 case XOPERATOR:
2965 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2966 break;
2967 PL_bufptr = s; /* update in case we back off */
2968 goto grabattrs;
2969 case XATTRBLOCK:
2970 PL_expect = XBLOCK;
2971 goto grabattrs;
2972 case XATTRTERM:
2973 PL_expect = XTERMBLOCK;
2974 grabattrs:
2975 s = skipspace(s);
2976 attrs = Nullop;
7e2040f0 2977 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2978 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2979 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2980 if (tmp < 0) tmp = -tmp;
2981 switch (tmp) {
2982 case KEY_or:
2983 case KEY_and:
c963b151 2984 case KEY_err:
f9829d6b
GS
2985 case KEY_for:
2986 case KEY_unless:
2987 case KEY_if:
2988 case KEY_while:
2989 case KEY_until:
2990 goto got_attrs;
2991 default:
2992 break;
2993 }
2994 }
09bef843
SB
2995 if (*d == '(') {
2996 d = scan_str(d,TRUE,TRUE);
2997 if (!d) {
09bef843
SB
2998 /* MUST advance bufptr here to avoid bogus
2999 "at end of line" context messages from yyerror().
3000 */
3001 PL_bufptr = s + len;
3002 yyerror("Unterminated attribute parameter in attribute list");
3003 if (attrs)
3004 op_free(attrs);
3005 return 0; /* EOF indicator */
3006 }
3007 }
3008 if (PL_lex_stuff) {
3009 SV *sv = newSVpvn(s, len);
3010 sv_catsv(sv, PL_lex_stuff);
3011 attrs = append_elem(OP_LIST, attrs,
3012 newSVOP(OP_CONST, 0, sv));
3013 SvREFCNT_dec(PL_lex_stuff);
3014 PL_lex_stuff = Nullsv;
3015 }
3016 else {
d3cea301
SB
3017 /* NOTE: any CV attrs applied here need to be part of
3018 the CVf_BUILTIN_ATTRS define in cv.h! */
78f9721b
SM
3019 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3020 CvLVALUE_on(PL_compcv);
3021 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3022 CvLOCKED_on(PL_compcv);
3023 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3024 CvMETHOD_on(PL_compcv);
87ecf892 3025#ifdef USE_ITHREADS
d3cea301
SB
3026 else if (PL_in_my == KEY_our && len == 6 &&
3027 strnEQ(s, "unique", len))
7fb37951 3028 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
87ecf892 3029#endif
78f9721b
SM
3030 /* After we've set the flags, it could be argued that
3031 we don't need to do the attributes.pm-based setting
3032 process, and shouldn't bother appending recognized
d3cea301
SB
3033 flags. To experiment with that, uncomment the
3034 following "else". (Note that's already been
3035 uncommented. That keeps the above-applied built-in
3036 attributes from being intercepted (and possibly
3037 rejected) by a package's attribute routines, but is
3038 justified by the performance win for the common case
3039 of applying only built-in attributes.) */
0256094b 3040 else
78f9721b
SM
3041 attrs = append_elem(OP_LIST, attrs,
3042 newSVOP(OP_CONST, 0,
3043 newSVpvn(s, len)));
09bef843
SB
3044 }
3045 s = skipspace(d);
0120eecf 3046 if (*s == ':' && s[1] != ':')
09bef843 3047 s = skipspace(s+1);
0120eecf
GS
3048 else if (s == d)
3049 break; /* require real whitespace or :'s */
09bef843 3050 }
f9829d6b 3051 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3052 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3053 char q = ((*s == '\'') ? '"' : '\'');
3054 /* If here for an expression, and parsed no attrs, back off. */
3055 if (tmp == '=' && !attrs) {
3056 s = PL_bufptr;
3057 break;
3058 }
3059 /* MUST advance bufptr here to avoid bogus "at end of line"
3060 context messages from yyerror().
3061 */
3062 PL_bufptr = s;
3063 if (!*s)
3064 yyerror("Unterminated attribute list");
3065 else
3066 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3067 q, *s, q));
3068 if (attrs)
3069 op_free(attrs);
3070 OPERATOR(':');
3071 }
f9829d6b 3072 got_attrs:
09bef843
SB
3073 if (attrs) {
3074 PL_nextval[PL_nexttoke].opval = attrs;
3075 force_next(THING);
3076 }
3077 TOKEN(COLONATTR);
3078 }
a0d0e21e 3079 OPERATOR(':');
8990e307
LW
3080 case '(':
3081 s++;
3280af22
NIS
3082 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3083 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3084 else
3280af22 3085 PL_expect = XTERM;
a0d0e21e 3086 TOKEN('(');
378cc40b 3087 case ';':
f4dd75d9 3088 CLINE;
378cc40b
LW
3089 tmp = *s++;
3090 OPERATOR(tmp);
3091 case ')':
378cc40b 3092 tmp = *s++;
16d20bd9
AD
3093 s = skipspace(s);
3094 if (*s == '{')
3095 PREBLOCK(tmp);
378cc40b 3096 TERM(tmp);
79072805
LW
3097 case ']':
3098 s++;
3280af22 3099 if (PL_lex_brackets <= 0)
d98d5fff 3100 yyerror("Unmatched right square bracket");
463ee0b2 3101 else
3280af22
NIS
3102 --PL_lex_brackets;
3103 if (PL_lex_state == LEX_INTERPNORMAL) {
3104 if (PL_lex_brackets == 0) {
a0d0e21e 3105 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3106 PL_lex_state = LEX_INTERPEND;
79072805
LW
3107 }
3108 }
4633a7c4 3109 TERM(']');
79072805
LW
3110 case '{':
3111 leftbracket:
79072805 3112 s++;
3280af22
NIS
3113 if (PL_lex_brackets > 100) {
3114 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3115 if (newlb != PL_lex_brackstack) {
8990e307 3116 SAVEFREEPV(newlb);
3280af22 3117 PL_lex_brackstack = newlb;
8990e307
LW
3118 }
3119 }
3280af22 3120 switch (PL_expect) {
a0d0e21e 3121 case XTERM:
3280af22 3122 if (PL_lex_formbrack) {
a0d0e21e
LW
3123 s--;
3124 PRETERMBLOCK(DO);
3125 }
3280af22
NIS
3126 if (PL_oldoldbufptr == PL_last_lop)
3127 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3128 else
3280af22 3129 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3130 OPERATOR(HASHBRACK);
a0d0e21e 3131 case XOPERATOR:
bf4acbe4 3132 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3133 s++;
44a8e56a 3134 d = s;
3280af22
NIS
3135 PL_tokenbuf[0] = '\0';
3136 if (d < PL_bufend && *d == '-') {
3137 PL_tokenbuf[0] = '-';
44a8e56a 3138 d++;
bf4acbe4 3139 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3140 d++;
3141 }
7e2040f0 3142 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3143 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3144 FALSE, &len);
bf4acbe4 3145 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3146 d++;
3147 if (*d == '}') {
3280af22 3148 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3149 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3150 if (minus)
3151 force_next('-');
748a9306
LW
3152 }
3153 }
3154 /* FALL THROUGH */
09bef843 3155 case XATTRBLOCK:
748a9306 3156 case XBLOCK:
3280af22
NIS
3157 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3158 PL_expect = XSTATE;
a0d0e21e 3159 break;
09bef843 3160 case XATTRTERM:
a0d0e21e 3161 case XTERMBLOCK:
3280af22
NIS
3162 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3163 PL_expect = XSTATE;
a0d0e21e
LW
3164 break;
3165 default: {
3166 char *t;
3280af22
NIS
3167 if (PL_oldoldbufptr == PL_last_lop)
3168 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3169 else
3280af22 3170 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3171 s = skipspace(s);
8452ff4b
SB
3172 if (*s == '}') {
3173 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3174 PL_expect = XTERM;
3175 /* This hack is to get the ${} in the message. */
3176 PL_bufptr = s+1;
3177 yyerror("syntax error");
3178 break;
3179 }
a0d0e21e 3180 OPERATOR(HASHBRACK);
8452ff4b 3181 }
b8a4b1be
GS
3182 /* This hack serves to disambiguate a pair of curlies
3183 * as being a block or an anon hash. Normally, expectation
3184 * determines that, but in cases where we're not in a
3185 * position to expect anything in particular (like inside
3186 * eval"") we have to resolve the ambiguity. This code
3187 * covers the case where the first term in the curlies is a
3188 * quoted string. Most other cases need to be explicitly
3189 * disambiguated by prepending a `+' before the opening
3190 * curly in order to force resolution as an anon hash.
3191 *
3192 * XXX should probably propagate the outer expectation
3193 * into eval"" to rely less on this hack, but that could
3194 * potentially break current behavior of eval"".
3195 * GSAR 97-07-21
3196 */
3197 t = s;
3198 if (*s == '\'' || *s == '"' || *s == '`') {
3199 /* common case: get past first string, handling escapes */
3280af22 3200 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3201 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3202 t++;
3203 t++;
a0d0e21e 3204 }
b8a4b1be 3205 else if (*s == 'q') {
3280af22 3206 if (++t < PL_bufend
b8a4b1be 3207 && (!isALNUM(*t)
3280af22 3208 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3209 && !isALNUM(*t))))
3210 {
b8a4b1be
GS
3211 char *tmps;
3212 char open, close, term;
3213 I32 brackets = 1;
3214
3280af22 3215 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3216 t++;
3217 term = *t;
3218 open = term;
3219 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3220 term = tmps[5];
3221 close = term;
3222 if (open == close)
3280af22
NIS
3223 for (t++; t < PL_bufend; t++) {
3224 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3225 t++;
6d07e5e9 3226 else if (*t == open)
b8a4b1be
GS
3227 break;
3228 }
3229 else
3280af22
NIS
3230 for (t++; t < PL_bufend; t++) {
3231 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3232 t++;
6d07e5e9 3233 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3234 break;
3235 else if (*t == open)
3236 brackets++;
3237 }
3238 }
3239 t++;
a0d0e21e 3240 }
7e2040f0 3241 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3242 t += UTF8SKIP(t);
7e2040f0 3243 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3244 t += UTF8SKIP(t);
a0d0e21e 3245 }
3280af22 3246 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3247 t++;
b8a4b1be
GS
3248 /* if comma follows first term, call it an anon hash */
3249 /* XXX it could be a comma expression with loop modifiers */
3280af22 3250 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3251 || (*t == '=' && t[1] == '>')))
a0d0e21e 3252 OPERATOR(HASHBRACK);
3280af22 3253 if (PL_expect == XREF)
4e4e412b 3254 PL_expect = XTERM;
a0d0e21e 3255 else {
3280af22
NIS
3256 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3257 PL_expect = XSTATE;
a0d0e21e 3258 }
8990e307 3259 }
a0d0e21e 3260 break;
463ee0b2 3261 }
57843af0 3262 yylval.ival = CopLINE(PL_curcop);
79072805 3263 if (isSPACE(*s) || *s == '#')
3280af22 3264 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3265 TOKEN('{');
378cc40b 3266 case '}':
79072805
LW
3267 rightbracket:
3268 s++;
3280af22 3269 if (PL_lex_brackets <= 0)
d98d5fff 3270 yyerror("Unmatched right curly bracket");
463ee0b2 3271 else
3280af22 3272 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3273 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3274 PL_lex_formbrack = 0;
3275 if (PL_lex_state == LEX_INTERPNORMAL) {
3276 if (PL_lex_brackets == 0) {
9059aa12
LW
3277 if (PL_expect & XFAKEBRACK) {
3278 PL_expect &= XENUMMASK;
3280af22
NIS
3279 PL_lex_state = LEX_INTERPEND;
3280 PL_bufptr = s;
cea2e8a9 3281 return yylex(); /* ignore fake brackets */
79072805 3282 }
fa83b5b6 3283 if (*s == '-' && s[1] == '>')
3280af22 3284 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3285 else if (*s != '[' && *s != '{')
3280af22 3286 PL_lex_state = LEX_INTERPEND;
79072805
LW
3287 }
3288 }
9059aa12
LW
3289 if (PL_expect & XFAKEBRACK) {
3290 PL_expect &= XENUMMASK;
3280af22 3291 PL_bufptr = s;
cea2e8a9 3292 return yylex(); /* ignore fake brackets */
748a9306 3293 }
79072805
LW
3294 force_next('}');
3295 TOKEN(';');
378cc40b
LW
3296 case '&':
3297 s++;
3298 tmp = *s++;
3299 if (tmp == '&')
a0d0e21e 3300 AOPERATOR(ANDAND);
378cc40b 3301 s--;
3280af22 3302 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3303 if (ckWARN(WARN_SEMICOLON)
3304 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3305 {
57843af0 3306 CopLINE_dec(PL_curcop);
9014280d 3307 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3308 CopLINE_inc(PL_curcop);
463ee0b2 3309 }
79072805 3310 BAop(OP_BIT_AND);
463ee0b2 3311 }
79072805 3312
3280af22
NIS
3313 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3314 if (*PL_tokenbuf) {
3315 PL_expect = XOPERATOR;
3316 force_ident(PL_tokenbuf, '&');
463ee0b2 3317 }
79072805
LW
3318 else
3319 PREREF('&');
c07a80fd 3320 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3321 TERM('&');
3322
378cc40b
LW
3323 case '|':
3324 s++;
3325 tmp = *s++;
3326 if (tmp == '|')
a0d0e21e 3327 AOPERATOR(OROR);
378cc40b 3328 s--;
79072805 3329 BOop(OP_BIT_OR);
378cc40b
LW
3330 case '=':
3331 s++;
3332 tmp = *s++;
3333 if (tmp == '=')
79072805
LW
3334 Eop(OP_EQ);
3335 if (tmp == '>')
3336 OPERATOR(',');
378cc40b 3337 if (tmp == '~')
79072805 3338 PMop(OP_MATCH);
599cee73 3339 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3340 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3341 s--;
3280af22
NIS
3342 if (PL_expect == XSTATE && isALPHA(tmp) &&
3343 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3344 {
3280af22
NIS
3345 if (PL_in_eval && !PL_rsfp) {
3346 d = PL_bufend;
a5f75d66
AD
3347 while (s < d) {
3348 if (*s++ == '\n') {
3349 incline(s);
3350 if (strnEQ(s,"=cut",4)) {
3351 s = strchr(s,'\n');
3352 if (s)
3353 s++;
3354 else
3355 s = d;
3356 incline(s);
3357 goto retry;
3358 }
3359 }
3360 }
3361 goto retry;
3362 }
3280af22
NIS
3363 s = PL_bufend;
3364 PL_doextract = TRUE;
a0d0e21e
LW
3365 goto retry;
3366 }
3280af22 3367 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3368 char *t;
51882d45 3369#ifdef PERL_STRICT_CR
bf4acbe4 3370 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3371#else
bf4acbe4 3372 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3373#endif
a0d0e21e
LW
3374 if (*t == '\n' || *t == '#') {
3375 s--;
3280af22 3376 PL_expect = XBLOCK;
a0d0e21e
LW
3377 goto leftbracket;
3378 }
79072805 3379 }
a0d0e21e
LW
3380 yylval.ival = 0;
3381 OPERATOR(ASSIGNOP);
378cc40b
LW
3382 case '!':
3383 s++;
3384 tmp = *s++;
3385 if (tmp == '=')
79072805 3386 Eop(OP_NE);
378cc40b 3387 if (tmp == '~')
79072805 3388 PMop(OP_NOT);
378cc40b
LW
3389 s--;
3390 OPERATOR('!');
3391 case '<':
3280af22 3392 if (PL_expect != XOPERATOR) {
93a17b20 3393 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3394 check_uni();
79072805
LW
3395 if (s[1] == '<')
3396 s = scan_heredoc(s);
3397 else
3398 s = scan_inputsymbol(s);
3399 TERM(sublex_start());
378cc40b
LW
3400 }
3401 s++;
3402 tmp = *s++;
3403 if (tmp == '<')
79072805 3404 SHop(OP_LEFT_SHIFT);
395c3793
LW
3405 if (tmp == '=') {
3406 tmp = *s++;
3407 if (tmp == '>')
79072805 3408 Eop(OP_NCMP);
395c3793 3409 s--;
79072805 3410 Rop(OP_LE);
395c3793 3411 }
378cc40b 3412 s--;
79072805 3413 Rop(OP_LT);
378cc40b
LW
3414 case '>':
3415 s++;
3416 tmp = *s++;
3417 if (tmp == '>')
79072805 3418 SHop(OP_RIGHT_SHIFT);
378cc40b 3419 if (tmp == '=')
79072805 3420 Rop(OP_GE);
378cc40b 3421 s--;
79072805 3422 Rop(OP_GT);
378cc40b
LW
3423
3424 case '$':
bbce6d69 3425 CLINE;
3426
3280af22
NIS
3427 if (PL_expect == XOPERATOR) {
3428 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3429 PL_expect = XTERM;
a0d0e21e 3430 depcom();
bbce6d69 3431 return ','; /* grandfather non-comma-format format */
a0d0e21e 3432 }
8990e307 3433 }
a0d0e21e 3434
7e2040f0 3435 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3436 PL_tokenbuf[0] = '@';
376b8730
SM
3437 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3438 sizeof PL_tokenbuf - 1, FALSE);
3439 if (PL_expect == XOPERATOR)
3440 no_op("Array length", s);
3280af22 3441 if (!PL_tokenbuf[1])
a0d0e21e 3442 PREREF(DOLSHARP);
3280af22
NIS
3443 PL_expect = XOPERATOR;
3444 PL_pending_ident = '#';
463ee0b2 3445 TOKEN(DOLSHARP);
79072805 3446 }
bbce6d69 3447
3280af22 3448 PL_tokenbuf[0] = '$';
376b8730
SM
3449 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3450 sizeof PL_tokenbuf - 1, FALSE);
3451 if (PL_expect == XOPERATOR)
3452 no_op("Scalar", s);
3280af22
NIS
3453 if (!PL_tokenbuf[1]) {
3454 if (s == PL_bufend)
bbce6d69 3455 yyerror("Final $ should be \\$ or $name");
3456 PREREF('$');
8990e307 3457 }
a0d0e21e 3458
bbce6d69 3459 /* This kludge not intended to be bulletproof. */
3280af22 3460 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3461 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3462 newSViv(PL_compiling.cop_arybase));
bbce6d69 3463 yylval.opval->op_private = OPpCONST_ARYBASE;
3464 TERM(THING);
3465 }
3466
ff68c719 3467 d = s;
69d2bceb 3468 tmp = (I32)*s;
3280af22 3469 if (PL_lex_state == LEX_NORMAL)
ff68c719 3470 s = skipspace(s);
3471
3280af22 3472 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3473 char *t;
3474 if (*s == '[') {
3280af22 3475 PL_tokenbuf[0] = '@';
599cee73 3476 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3477 for(t = s + 1;
7e2040f0 3478 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3479 t++) ;
a0d0e21e 3480 if (*t++ == ',') {
3280af22
NIS
3481 PL_bufptr = skipspace(PL_bufptr);
3482 while (t < PL_bufend && *t != ']')
bbce6d69 3483 t++;
9014280d 3484 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73
PM
3485 "Multidimensional syntax %.*s not supported",
3486 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3487 }
3488 }
bbce6d69 3489 }
3490 else if (*s == '{') {
3280af22 3491 PL_tokenbuf[0] = '%';
599cee73 3492 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3493 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3494 {
3280af22 3495 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3496 STRLEN len;
3497 for (t++; isSPACE(*t); t++) ;
7e2040f0 3498 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3499 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3500 for (; isSPACE(*t); t++) ;
864dbfa3 3501 if (*t == ';' && get_cv(tmpbuf, FALSE))
9014280d 3502 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 3503 "You need to quote \"%s\"", tmpbuf);
748a9306 3504 }
93a17b20
LW
3505 }
3506 }
2f3197b3 3507 }
bbce6d69 3508
3280af22 3509 PL_expect = XOPERATOR;
69d2bceb 3510