This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: Smoking patch 8898 for perl v5.7.0 on aix 4.3.0.0
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
bc89e66f 3 * Copyright (c) 1991-2001, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
4e553d73 42/* In variables name $^X, these are the legal values for X.
2b92dfce
GS
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
79072805
LW
72#ifdef ff_next
73#undef ff_next
d48672a2
LW
74#endif
75
a1a0e61e 76#ifdef USE_PURE_BISON
dba4d153
JH
77# ifndef YYMAXLEVEL
78# define YYMAXLEVEL 100
79# endif
20141f0e
RI
80YYSTYPE* yylval_pointer[YYMAXLEVEL];
81int* yychar_pointer[YYMAXLEVEL];
6f202aea 82int yyactlevel = -1;
22c35a8c
GS
83# undef yylval
84# undef yychar
20141f0e
RI
85# define yylval (*yylval_pointer[yyactlevel])
86# define yychar (*yychar_pointer[yyactlevel])
87# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 88# undef yylex
dba4d153 89# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
90#endif
91
79072805 92#include "keywords.h"
fe14fcc3 93
ffb4593c
NT
94/* CLINE is a macro that ensures PL_copline has a sane value */
95
ae986130
LW
96#ifdef CLINE
97#undef CLINE
98#endif
57843af0 99#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 100
ffb4593c
NT
101/*
102 * Convenience functions to return different tokens and prime the
9cbb5ea2 103 * lexer for the next token. They all take an argument.
ffb4593c
NT
104 *
105 * TOKEN : generic token (used for '(', DOLSHARP, etc)
106 * OPERATOR : generic operator
107 * AOPERATOR : assignment operator
108 * PREBLOCK : beginning the block after an if, while, foreach, ...
109 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
110 * PREREF : *EXPR where EXPR is not a simple identifier
111 * TERM : expression term
112 * LOOPX : loop exiting command (goto, last, dump, etc)
113 * FTST : file test operator
114 * FUN0 : zero-argument function
2d2e263d 115 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
116 * BOop : bitwise or or xor
117 * BAop : bitwise and
118 * SHop : shift operator
119 * PWop : power operator
9cbb5ea2 120 * PMop : pattern-matching operator
ffb4593c
NT
121 * Aop : addition-level operator
122 * Mop : multiplication-level operator
123 * Eop : equality-testing operator
e5edeb50 124 * Rop : relational operator <= != gt
ffb4593c
NT
125 *
126 * Also see LOP and lop() below.
127 */
128
075953c3
JH
129/* Note that REPORT() and REPORT2() will be expressions that supply
130 * their own trailing comma, not suitable for statements as such. */
998054bd 131#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
132# define REPORT(x,retval) tokereport(x,s,(int)retval),
133# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 134#else
075953c3
JH
135# define REPORT(x,retval)
136# define REPORT2(x,retval)
998054bd
SC
137#endif
138
075953c3
JH
139#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
140#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
141#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
142#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
143#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
144#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
145#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
146#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
147#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
148#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
149#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
150#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
151#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
152#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
153#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
154#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
155#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
156#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
157#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
158#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 159
a687059c
LW
160/* This bit of chicanery makes a unary function followed by
161 * a parenthesis into a function with one argument, highest precedence.
162 */
2f3197b3 163#define UNI(f) return(yylval.ival = f, \
075953c3 164 REPORT("uni",f) \
3280af22
NIS
165 PL_expect = XTERM, \
166 PL_bufptr = s, \
167 PL_last_uni = PL_oldbufptr, \
168 PL_last_lop_op = f, \
a687059c
LW
169 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
170
79072805 171#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 172 REPORT("uni",f) \
3280af22
NIS
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
79072805
LW
175 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
176
9f68db38 177/* grandfather return to old style */
3280af22 178#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 179
2d00ba3b 180STATIC void
61b2116b 181S_tokereport(pTHX_ char *thing, char* s, I32 rv)
998054bd
SC
182{
183 SV *report;
184 DEBUG_T({
185 report = newSVpv(thing, 0);
61b2116b 186 Perl_sv_catpvf(aTHX_ report, ":line %i:%i:", CopLINE(PL_curcop), rv);
998054bd
SC
187
188 if (s - PL_bufptr > 0)
189 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
190 else {
191 if (PL_oldbufptr && *PL_oldbufptr)
192 sv_catpv(report, PL_tokenbuf);
193 }
194 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
195 })
196}
197
ffb4593c
NT
198/*
199 * S_ao
200 *
201 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
202 * into an OP_ANDASSIGN or OP_ORASSIGN
203 */
204
76e3520e 205STATIC int
cea2e8a9 206S_ao(pTHX_ int toketype)
a0d0e21e 207{
3280af22
NIS
208 if (*PL_bufptr == '=') {
209 PL_bufptr++;
a0d0e21e
LW
210 if (toketype == ANDAND)
211 yylval.ival = OP_ANDASSIGN;
212 else if (toketype == OROR)
213 yylval.ival = OP_ORASSIGN;
214 toketype = ASSIGNOP;
215 }
216 return toketype;
217}
218
ffb4593c
NT
219/*
220 * S_no_op
221 * When Perl expects an operator and finds something else, no_op
222 * prints the warning. It always prints "<something> found where
223 * operator expected. It prints "Missing semicolon on previous line?"
224 * if the surprise occurs at the start of the line. "do you need to
225 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
226 * where the compiler doesn't know if foo is a method call or a function.
227 * It prints "Missing operator before end of line" if there's nothing
228 * after the missing operator, or "... before <...>" if there is something
229 * after the missing operator.
230 */
231
76e3520e 232STATIC void
cea2e8a9 233S_no_op(pTHX_ char *what, char *s)
463ee0b2 234{
3280af22
NIS
235 char *oldbp = PL_bufptr;
236 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 237
1189a94a
GS
238 if (!s)
239 s = oldbp;
07c798fb 240 else
1189a94a 241 PL_bufptr = s;
cea2e8a9 242 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 243 if (is_first)
cea2e8a9 244 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 245 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 246 char *t;
7e2040f0 247 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 248 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 249 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 250 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 251 }
07c798fb
HS
252 else {
253 assert(s >= oldbp);
cea2e8a9 254 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 255 }
3280af22 256 PL_bufptr = oldbp;
8990e307
LW
257}
258
ffb4593c
NT
259/*
260 * S_missingterm
261 * Complain about missing quote/regexp/heredoc terminator.
262 * If it's called with (char *)NULL then it cauterizes the line buffer.
263 * If we're in a delimited string and the delimiter is a control
264 * character, it's reformatted into a two-char sequence like ^C.
265 * This is fatal.
266 */
267
76e3520e 268STATIC void
cea2e8a9 269S_missingterm(pTHX_ char *s)
8990e307
LW
270{
271 char tmpbuf[3];
272 char q;
273 if (s) {
274 char *nl = strrchr(s,'\n');
d2719217 275 if (nl)
8990e307
LW
276 *nl = '\0';
277 }
9d116dd7
JH
278 else if (
279#ifdef EBCDIC
280 iscntrl(PL_multi_close)
281#else
282 PL_multi_close < 32 || PL_multi_close == 127
283#endif
284 ) {
8990e307 285 *tmpbuf = '^';
3280af22 286 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
287 s = "\\n";
288 tmpbuf[2] = '\0';
289 s = tmpbuf;
290 }
291 else {
3280af22 292 *tmpbuf = PL_multi_close;
8990e307
LW
293 tmpbuf[1] = '\0';
294 s = tmpbuf;
295 }
296 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 297 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 298}
79072805 299
ffb4593c
NT
300/*
301 * Perl_deprecate
ffb4593c
NT
302 */
303
79072805 304void
864dbfa3 305Perl_deprecate(pTHX_ char *s)
a0d0e21e 306{
599cee73 307 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 308 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
309}
310
ffb4593c
NT
311/*
312 * depcom
9cbb5ea2 313 * Deprecate a comma-less variable list.
ffb4593c
NT
314 */
315
76e3520e 316STATIC void
cea2e8a9 317S_depcom(pTHX)
a0d0e21e
LW
318{
319 deprecate("comma-less variable list");
320}
321
ffb4593c 322/*
9cbb5ea2
GS
323 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
324 * utf16-to-utf8-reversed.
ffb4593c
NT
325 */
326
c39cd008
GS
327#ifdef PERL_CR_FILTER
328static void
329strip_return(SV *sv)
330{
331 register char *s = SvPVX(sv);
332 register char *e = s + SvCUR(sv);
333 /* outer loop optimized to do nothing if there are no CR-LFs */
334 while (s < e) {
335 if (*s++ == '\r' && *s == '\n') {
336 /* hit a CR-LF, need to copy the rest */
337 register char *d = s - 1;
338 *d++ = *s++;
339 while (s < e) {
340 if (*s == '\r' && s[1] == '\n')
341 s++;
342 *d++ = *s++;
343 }
344 SvCUR(sv) -= s - d;
345 return;
346 }
347 }
348}
a868473f 349
76e3520e 350STATIC I32
c39cd008 351S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 352{
c39cd008
GS
353 I32 count = FILTER_READ(idx+1, sv, maxlen);
354 if (count > 0 && !maxlen)
355 strip_return(sv);
356 return count;
a868473f
NIS
357}
358#endif
359
ffb4593c
NT
360/*
361 * Perl_lex_start
9cbb5ea2
GS
362 * Initialize variables. Uses the Perl save_stack to save its state (for
363 * recursive calls to the parser).
ffb4593c
NT
364 */
365
a0d0e21e 366void
864dbfa3 367Perl_lex_start(pTHX_ SV *line)
79072805 368{
8990e307
LW
369 char *s;
370 STRLEN len;
371
3280af22
NIS
372 SAVEI32(PL_lex_dojoin);
373 SAVEI32(PL_lex_brackets);
3280af22
NIS
374 SAVEI32(PL_lex_casemods);
375 SAVEI32(PL_lex_starts);
376 SAVEI32(PL_lex_state);
7766f137 377 SAVEVPTR(PL_lex_inpat);
3280af22 378 SAVEI32(PL_lex_inwhat);
18b09519
GS
379 if (PL_lex_state == LEX_KNOWNEXT) {
380 I32 toke = PL_nexttoke;
381 while (--toke >= 0) {
382 SAVEI32(PL_nexttype[toke]);
383 SAVEVPTR(PL_nextval[toke]);
384 }
385 SAVEI32(PL_nexttoke);
18b09519 386 }
57843af0 387 SAVECOPLINE(PL_curcop);
3280af22
NIS
388 SAVEPPTR(PL_bufptr);
389 SAVEPPTR(PL_bufend);
390 SAVEPPTR(PL_oldbufptr);
391 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
392 SAVEPPTR(PL_last_lop);
393 SAVEPPTR(PL_last_uni);
3280af22
NIS
394 SAVEPPTR(PL_linestart);
395 SAVESPTR(PL_linestr);
396 SAVEPPTR(PL_lex_brackstack);
397 SAVEPPTR(PL_lex_casestack);
c76ac1ee 398 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
399 SAVESPTR(PL_lex_stuff);
400 SAVEI32(PL_lex_defer);
09bef843 401 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 402 SAVESPTR(PL_lex_repl);
bebdddfc
GS
403 SAVEINT(PL_expect);
404 SAVEINT(PL_lex_expect);
3280af22
NIS
405
406 PL_lex_state = LEX_NORMAL;
407 PL_lex_defer = 0;
408 PL_expect = XSTATE;
409 PL_lex_brackets = 0;
3280af22
NIS
410 New(899, PL_lex_brackstack, 120, char);
411 New(899, PL_lex_casestack, 12, char);
412 SAVEFREEPV(PL_lex_brackstack);
413 SAVEFREEPV(PL_lex_casestack);
414 PL_lex_casemods = 0;
415 *PL_lex_casestack = '\0';
416 PL_lex_dojoin = 0;
417 PL_lex_starts = 0;
418 PL_lex_stuff = Nullsv;
419 PL_lex_repl = Nullsv;
420 PL_lex_inpat = 0;
76be56bc 421 PL_nexttoke = 0;
3280af22 422 PL_lex_inwhat = 0;
09bef843 423 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
424 PL_linestr = line;
425 if (SvREADONLY(PL_linestr))
426 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
427 s = SvPV(PL_linestr, len);
8990e307 428 if (len && s[len-1] != ';') {
3280af22
NIS
429 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
430 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
431 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 432 }
3280af22
NIS
433 SvTEMP_off(PL_linestr);
434 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
435 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 436 PL_last_lop = PL_last_uni = Nullch;
3280af22 437 SvREFCNT_dec(PL_rs);
79cb57f6 438 PL_rs = newSVpvn("\n", 1);
3280af22 439 PL_rsfp = 0;
79072805 440}
a687059c 441
ffb4593c
NT
442/*
443 * Perl_lex_end
9cbb5ea2
GS
444 * Finalizer for lexing operations. Must be called when the parser is
445 * done with the lexer.
ffb4593c
NT
446 */
447
463ee0b2 448void
864dbfa3 449Perl_lex_end(pTHX)
463ee0b2 450{
3280af22 451 PL_doextract = FALSE;
463ee0b2
LW
452}
453
ffb4593c
NT
454/*
455 * S_incline
456 * This subroutine has nothing to do with tilting, whether at windmills
457 * or pinball tables. Its name is short for "increment line". It
57843af0 458 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 459 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
460 * # line 500 "foo.pm"
461 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
462 */
463
76e3520e 464STATIC void
cea2e8a9 465S_incline(pTHX_ char *s)
463ee0b2
LW
466{
467 char *t;
468 char *n;
73659bf1 469 char *e;
463ee0b2 470 char ch;
463ee0b2 471
57843af0 472 CopLINE_inc(PL_curcop);
463ee0b2
LW
473 if (*s++ != '#')
474 return;
bf4acbe4 475 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
476 if (strnEQ(s, "line", 4))
477 s += 4;
478 else
479 return;
084592ab 480 if (SPACE_OR_TAB(*s))
73659bf1 481 s++;
4e553d73 482 else
73659bf1 483 return;
bf4acbe4 484 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
485 if (!isDIGIT(*s))
486 return;
487 n = s;
488 while (isDIGIT(*s))
489 s++;
bf4acbe4 490 while (SPACE_OR_TAB(*s))
463ee0b2 491 s++;
73659bf1 492 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 493 s++;
73659bf1
GS
494 e = t + 1;
495 }
463ee0b2 496 else {
463ee0b2 497 for (t = s; !isSPACE(*t); t++) ;
73659bf1 498 e = t;
463ee0b2 499 }
bf4acbe4 500 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
501 e++;
502 if (*e != '\n' && *e != '\0')
503 return; /* false alarm */
504
463ee0b2
LW
505 ch = *t;
506 *t = '\0';
f4dd75d9
GS
507 if (t - s > 0) {
508#ifdef USE_ITHREADS
509 Safefree(CopFILE(PL_curcop));
510#else
511 SvREFCNT_dec(CopFILEGV(PL_curcop));
512#endif
57843af0 513 CopFILE_set(PL_curcop, s);
f4dd75d9 514 }
463ee0b2 515 *t = ch;
57843af0 516 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
517}
518
ffb4593c
NT
519/*
520 * S_skipspace
521 * Called to gobble the appropriate amount and type of whitespace.
522 * Skips comments as well.
523 */
524
76e3520e 525STATIC char *
cea2e8a9 526S_skipspace(pTHX_ register char *s)
a687059c 527{
3280af22 528 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 529 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
530 s++;
531 return s;
532 }
533 for (;;) {
fd049845 534 STRLEN prevlen;
09bef843
SB
535 SSize_t oldprevlen, oldoldprevlen;
536 SSize_t oldloplen, oldunilen;
60e6418e
GS
537 while (s < PL_bufend && isSPACE(*s)) {
538 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
539 incline(s);
540 }
ffb4593c
NT
541
542 /* comment */
3280af22
NIS
543 if (s < PL_bufend && *s == '#') {
544 while (s < PL_bufend && *s != '\n')
463ee0b2 545 s++;
60e6418e 546 if (s < PL_bufend) {
463ee0b2 547 s++;
60e6418e
GS
548 if (PL_in_eval && !PL_rsfp) {
549 incline(s);
550 continue;
551 }
552 }
463ee0b2 553 }
ffb4593c
NT
554
555 /* only continue to recharge the buffer if we're at the end
556 * of the buffer, we're not reading from a source filter, and
557 * we're in normal lexing mode
558 */
09bef843
SB
559 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
560 PL_lex_state == LEX_FORMLINE)
463ee0b2 561 return s;
ffb4593c
NT
562
563 /* try to recharge the buffer */
9cbb5ea2
GS
564 if ((s = filter_gets(PL_linestr, PL_rsfp,
565 (prevlen = SvCUR(PL_linestr)))) == Nullch)
566 {
567 /* end of file. Add on the -p or -n magic */
3280af22
NIS
568 if (PL_minus_n || PL_minus_p) {
569 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
570 ";}continue{print or die qq(-p destination: $!\\n)" :
571 "");
3280af22
NIS
572 sv_catpv(PL_linestr,";}");
573 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
574 }
575 else
3280af22 576 sv_setpv(PL_linestr,";");
ffb4593c
NT
577
578 /* reset variables for next time we lex */
9cbb5ea2
GS
579 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
580 = SvPVX(PL_linestr);
3280af22 581 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 582 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
583
584 /* Close the filehandle. Could be from -P preprocessor,
585 * STDIN, or a regular file. If we were reading code from
586 * STDIN (because the commandline held no -e or filename)
587 * then we don't close it, we reset it so the code can
588 * read from STDIN too.
589 */
590
3280af22
NIS
591 if (PL_preprocess && !PL_in_eval)
592 (void)PerlProc_pclose(PL_rsfp);
593 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
594 PerlIO_clearerr(PL_rsfp);
8990e307 595 else
3280af22
NIS
596 (void)PerlIO_close(PL_rsfp);
597 PL_rsfp = Nullfp;
463ee0b2
LW
598 return s;
599 }
ffb4593c
NT
600
601 /* not at end of file, so we only read another line */
09bef843
SB
602 /* make corresponding updates to old pointers, for yyerror() */
603 oldprevlen = PL_oldbufptr - PL_bufend;
604 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
605 if (PL_last_uni)
606 oldunilen = PL_last_uni - PL_bufend;
607 if (PL_last_lop)
608 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
609 PL_linestart = PL_bufptr = s + prevlen;
610 PL_bufend = s + SvCUR(PL_linestr);
611 s = PL_bufptr;
09bef843
SB
612 PL_oldbufptr = s + oldprevlen;
613 PL_oldoldbufptr = s + oldoldprevlen;
614 if (PL_last_uni)
615 PL_last_uni = s + oldunilen;
616 if (PL_last_lop)
617 PL_last_lop = s + oldloplen;
a0d0e21e 618 incline(s);
ffb4593c
NT
619
620 /* debugger active and we're not compiling the debugger code,
621 * so store the line into the debugger's array of lines
622 */
3280af22 623 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
624 SV *sv = NEWSV(85,0);
625
626 sv_upgrade(sv, SVt_PVMG);
3280af22 627 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 628 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 629 }
463ee0b2 630 }
a687059c 631}
378cc40b 632
ffb4593c
NT
633/*
634 * S_check_uni
635 * Check the unary operators to ensure there's no ambiguity in how they're
636 * used. An ambiguous piece of code would be:
637 * rand + 5
638 * This doesn't mean rand() + 5. Because rand() is a unary operator,
639 * the +5 is its argument.
640 */
641
76e3520e 642STATIC void
cea2e8a9 643S_check_uni(pTHX)
ba106d47 644{
2f3197b3 645 char *s;
a0d0e21e 646 char *t;
2f3197b3 647
3280af22 648 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 649 return;
3280af22
NIS
650 while (isSPACE(*PL_last_uni))
651 PL_last_uni++;
7e2040f0 652 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 653 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 654 return;
0453d815 655 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 656 char ch = *s;
0453d815 657 *s = '\0';
4e553d73
NIS
658 Perl_warner(aTHX_ WARN_AMBIGUOUS,
659 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
660 PL_last_uni);
661 *s = ch;
662 }
2f3197b3
LW
663}
664
ffb4593c
NT
665/* workaround to replace the UNI() macro with a function. Only the
666 * hints/uts.sh file mentions this. Other comments elsewhere in the
667 * source indicate Microport Unix might need it too.
668 */
669
ffed7fef
LW
670#ifdef CRIPPLED_CC
671
672#undef UNI
ffed7fef 673#define UNI(f) return uni(f,s)
ffed7fef 674
76e3520e 675STATIC int
cea2e8a9 676S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
677{
678 yylval.ival = f;
3280af22
NIS
679 PL_expect = XTERM;
680 PL_bufptr = s;
8f872242
NIS
681 PL_last_uni = PL_oldbufptr;
682 PL_last_lop_op = f;
ffed7fef
LW
683 if (*s == '(')
684 return FUNC1;
685 s = skipspace(s);
686 if (*s == '(')
687 return FUNC1;
688 else
689 return UNIOP;
690}
691
a0d0e21e
LW
692#endif /* CRIPPLED_CC */
693
ffb4593c
NT
694/*
695 * LOP : macro to build a list operator. Its behaviour has been replaced
696 * with a subroutine, S_lop() for which LOP is just another name.
697 */
698
a0d0e21e
LW
699#define LOP(f,x) return lop(f,x,s)
700
ffb4593c
NT
701/*
702 * S_lop
703 * Build a list operator (or something that might be one). The rules:
704 * - if we have a next token, then it's a list operator [why?]
705 * - if the next thing is an opening paren, then it's a function
706 * - else it's a list operator
707 */
708
76e3520e 709STATIC I32
a0be28da 710S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 711{
79072805 712 yylval.ival = f;
35c8bce7 713 CLINE;
075953c3 714 REPORT("lop", f)
3280af22
NIS
715 PL_expect = x;
716 PL_bufptr = s;
717 PL_last_lop = PL_oldbufptr;
718 PL_last_lop_op = f;
719 if (PL_nexttoke)
a0d0e21e 720 return LSTOP;
79072805
LW
721 if (*s == '(')
722 return FUNC;
723 s = skipspace(s);
724 if (*s == '(')
725 return FUNC;
726 else
727 return LSTOP;
728}
729
ffb4593c
NT
730/*
731 * S_force_next
9cbb5ea2 732 * When the lexer realizes it knows the next token (for instance,
ffb4593c 733 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
734 * to know what token to return the next time the lexer is called. Caller
735 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
736 * handles the token correctly.
ffb4593c
NT
737 */
738
4e553d73 739STATIC void
cea2e8a9 740S_force_next(pTHX_ I32 type)
79072805 741{
3280af22
NIS
742 PL_nexttype[PL_nexttoke] = type;
743 PL_nexttoke++;
744 if (PL_lex_state != LEX_KNOWNEXT) {
745 PL_lex_defer = PL_lex_state;
746 PL_lex_expect = PL_expect;
747 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
748 }
749}
750
ffb4593c
NT
751/*
752 * S_force_word
753 * When the lexer knows the next thing is a word (for instance, it has
754 * just seen -> and it knows that the next char is a word char, then
755 * it calls S_force_word to stick the next word into the PL_next lookahead.
756 *
757 * Arguments:
b1b65b59 758 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
759 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
760 * int check_keyword : if true, Perl checks to make sure the word isn't
761 * a keyword (do this if the word is a label, e.g. goto FOO)
762 * int allow_pack : if true, : characters will also be allowed (require,
763 * use, etc. do this)
9cbb5ea2 764 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
765 */
766
76e3520e 767STATIC char *
cea2e8a9 768S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 769{
463ee0b2
LW
770 register char *s;
771 STRLEN len;
4e553d73 772
463ee0b2
LW
773 start = skipspace(start);
774 s = start;
7e2040f0 775 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 776 (allow_pack && *s == ':') ||
15f0808c 777 (allow_initial_tick && *s == '\'') )
a0d0e21e 778 {
3280af22
NIS
779 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
780 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
781 return start;
782 if (token == METHOD) {
783 s = skipspace(s);
784 if (*s == '(')
3280af22 785 PL_expect = XTERM;
463ee0b2 786 else {
3280af22 787 PL_expect = XOPERATOR;
463ee0b2 788 }
79072805 789 }
3280af22
NIS
790 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
791 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
792 force_next(token);
793 }
794 return s;
795}
796
ffb4593c
NT
797/*
798 * S_force_ident
9cbb5ea2 799 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
800 * text only contains the "foo" portion. The first argument is a pointer
801 * to the "foo", and the second argument is the type symbol to prefix.
802 * Forces the next token to be a "WORD".
9cbb5ea2 803 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
804 */
805
76e3520e 806STATIC void
cea2e8a9 807S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
808{
809 if (s && *s) {
11343788 810 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 811 PL_nextval[PL_nexttoke].opval = o;
79072805 812 force_next(WORD);
748a9306 813 if (kind) {
11343788 814 o->op_private = OPpCONST_ENTERED;
55497cff
PP
815 /* XXX see note in pp_entereval() for why we forgo typo
816 warnings if the symbol must be introduced in an eval.
817 GSAR 96-10-12 */
3280af22 818 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
819 kind == '$' ? SVt_PV :
820 kind == '@' ? SVt_PVAV :
821 kind == '%' ? SVt_PVHV :
822 SVt_PVGV
823 );
748a9306 824 }
79072805
LW
825 }
826}
827
1571675a
GS
828NV
829Perl_str_to_version(pTHX_ SV *sv)
830{
831 NV retval = 0.0;
832 NV nshift = 1.0;
833 STRLEN len;
834 char *start = SvPVx(sv,len);
3aa33fe5 835 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
836 char *end = start + len;
837 while (start < end) {
ba210ebe 838 STRLEN skip;
1571675a
GS
839 UV n;
840 if (utf)
dcad2880 841 n = utf8_to_uv((U8*)start, len, &skip, 0);
1571675a
GS
842 else {
843 n = *(U8*)start;
844 skip = 1;
845 }
846 retval += ((NV)n)/nshift;
847 start += skip;
848 nshift *= 1000;
849 }
850 return retval;
851}
852
4e553d73 853/*
ffb4593c
NT
854 * S_force_version
855 * Forces the next token to be a version number.
856 */
857
76e3520e 858STATIC char *
cea2e8a9 859S_force_version(pTHX_ char *s)
89bfa8cd
PP
860{
861 OP *version = Nullop;
44dcb63b 862 char *d;
89bfa8cd
PP
863
864 s = skipspace(s);
865
44dcb63b 866 d = s;
dd629d5b 867 if (*d == 'v')
44dcb63b 868 d++;
44dcb63b 869 if (isDIGIT(*d)) {
a7cb1f99 870 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 871 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 872 SV *ver;
b73d6f50 873 s = scan_num(s, &yylval);
89bfa8cd 874 version = yylval.opval;
dd629d5b
GS
875 ver = cSVOPx(version)->op_sv;
876 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 877 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
878 SvNVX(ver) = str_to_version(ver);
879 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 880 }
89bfa8cd
PP
881 }
882 }
883
884 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 885 PL_nextval[PL_nexttoke].opval = version;
4e553d73 886 force_next(WORD);
89bfa8cd
PP
887
888 return (s);
889}
890
ffb4593c
NT
891/*
892 * S_tokeq
893 * Tokenize a quoted string passed in as an SV. It finds the next
894 * chunk, up to end of string or a backslash. It may make a new
895 * SV containing that chunk (if HINT_NEW_STRING is on). It also
896 * turns \\ into \.
897 */
898
76e3520e 899STATIC SV *
cea2e8a9 900S_tokeq(pTHX_ SV *sv)
79072805
LW
901{
902 register char *s;
903 register char *send;
904 register char *d;
b3ac6de7
IZ
905 STRLEN len = 0;
906 SV *pv = sv;
79072805
LW
907
908 if (!SvLEN(sv))
b3ac6de7 909 goto finish;
79072805 910
a0d0e21e 911 s = SvPV_force(sv, len);
21a311ee 912 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 913 goto finish;
463ee0b2 914 send = s + len;
79072805
LW
915 while (s < send && *s != '\\')
916 s++;
917 if (s == send)
b3ac6de7 918 goto finish;
79072805 919 d = s;
3280af22 920 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 921 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
922 while (s < send) {
923 if (*s == '\\') {
a0d0e21e 924 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
925 s++; /* all that, just for this */
926 }
927 *d++ = *s++;
928 }
929 *d = '\0';
463ee0b2 930 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 931 finish:
3280af22 932 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 933 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
934 return sv;
935}
936
ffb4593c
NT
937/*
938 * Now come three functions related to double-quote context,
939 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
940 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
941 * interact with PL_lex_state, and create fake ( ... ) argument lists
942 * to handle functions and concatenation.
943 * They assume that whoever calls them will be setting up a fake
944 * join call, because each subthing puts a ',' after it. This lets
945 * "lower \luPpEr"
946 * become
947 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
948 *
949 * (I'm not sure whether the spurious commas at the end of lcfirst's
950 * arguments and join's arguments are created or not).
951 */
952
953/*
954 * S_sublex_start
955 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
956 *
957 * Pattern matching will set PL_lex_op to the pattern-matching op to
958 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
959 *
960 * OP_CONST and OP_READLINE are easy--just make the new op and return.
961 *
962 * Everything else becomes a FUNC.
963 *
964 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
965 * had an OP_CONST or OP_READLINE). This just sets us up for a
966 * call to S_sublex_push().
967 */
968
76e3520e 969STATIC I32
cea2e8a9 970S_sublex_start(pTHX)
79072805
LW
971{
972 register I32 op_type = yylval.ival;
79072805
LW
973
974 if (op_type == OP_NULL) {
3280af22
NIS
975 yylval.opval = PL_lex_op;
976 PL_lex_op = Nullop;
79072805
LW
977 return THING;
978 }
979 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 980 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
981
982 if (SvTYPE(sv) == SVt_PVIV) {
983 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
984 STRLEN len;
985 char *p;
986 SV *nsv;
987
988 p = SvPV(sv, len);
79cb57f6 989 nsv = newSVpvn(p, len);
01ec43d0
GS
990 if (SvUTF8(sv))
991 SvUTF8_on(nsv);
b3ac6de7
IZ
992 SvREFCNT_dec(sv);
993 sv = nsv;
4e553d73 994 }
b3ac6de7 995 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 996 PL_lex_stuff = Nullsv;
79072805
LW
997 return THING;
998 }
999
3280af22
NIS
1000 PL_sublex_info.super_state = PL_lex_state;
1001 PL_sublex_info.sub_inwhat = op_type;
1002 PL_sublex_info.sub_op = PL_lex_op;
1003 PL_lex_state = LEX_INTERPPUSH;
55497cff 1004
3280af22
NIS
1005 PL_expect = XTERM;
1006 if (PL_lex_op) {
1007 yylval.opval = PL_lex_op;
1008 PL_lex_op = Nullop;
55497cff
PP
1009 return PMFUNC;
1010 }
1011 else
1012 return FUNC;
1013}
1014
ffb4593c
NT
1015/*
1016 * S_sublex_push
1017 * Create a new scope to save the lexing state. The scope will be
1018 * ended in S_sublex_done. Returns a '(', starting the function arguments
1019 * to the uc, lc, etc. found before.
1020 * Sets PL_lex_state to LEX_INTERPCONCAT.
1021 */
1022
76e3520e 1023STATIC I32
cea2e8a9 1024S_sublex_push(pTHX)
55497cff 1025{
f46d017c 1026 ENTER;
55497cff 1027
3280af22
NIS
1028 PL_lex_state = PL_sublex_info.super_state;
1029 SAVEI32(PL_lex_dojoin);
1030 SAVEI32(PL_lex_brackets);
3280af22
NIS
1031 SAVEI32(PL_lex_casemods);
1032 SAVEI32(PL_lex_starts);
1033 SAVEI32(PL_lex_state);
7766f137 1034 SAVEVPTR(PL_lex_inpat);
3280af22 1035 SAVEI32(PL_lex_inwhat);
57843af0 1036 SAVECOPLINE(PL_curcop);
3280af22
NIS
1037 SAVEPPTR(PL_bufptr);
1038 SAVEPPTR(PL_oldbufptr);
1039 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1040 SAVEPPTR(PL_last_lop);
1041 SAVEPPTR(PL_last_uni);
3280af22
NIS
1042 SAVEPPTR(PL_linestart);
1043 SAVESPTR(PL_linestr);
1044 SAVEPPTR(PL_lex_brackstack);
1045 SAVEPPTR(PL_lex_casestack);
1046
1047 PL_linestr = PL_lex_stuff;
1048 PL_lex_stuff = Nullsv;
1049
9cbb5ea2
GS
1050 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1051 = SvPVX(PL_linestr);
3280af22 1052 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1053 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1054 SAVEFREESV(PL_linestr);
1055
1056 PL_lex_dojoin = FALSE;
1057 PL_lex_brackets = 0;
3280af22
NIS
1058 New(899, PL_lex_brackstack, 120, char);
1059 New(899, PL_lex_casestack, 12, char);
1060 SAVEFREEPV(PL_lex_brackstack);
1061 SAVEFREEPV(PL_lex_casestack);
1062 PL_lex_casemods = 0;
1063 *PL_lex_casestack = '\0';
1064 PL_lex_starts = 0;
1065 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1066 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1067
1068 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1069 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1070 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1071 else
3280af22 1072 PL_lex_inpat = Nullop;
79072805 1073
55497cff 1074 return '(';
79072805
LW
1075}
1076
ffb4593c
NT
1077/*
1078 * S_sublex_done
1079 * Restores lexer state after a S_sublex_push.
1080 */
1081
76e3520e 1082STATIC I32
cea2e8a9 1083S_sublex_done(pTHX)
79072805 1084{
3280af22 1085 if (!PL_lex_starts++) {
9aa983d2
JH
1086 SV *sv = newSVpvn("",0);
1087 if (SvUTF8(PL_linestr))
1088 SvUTF8_on(sv);
3280af22 1089 PL_expect = XOPERATOR;
9aa983d2 1090 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1091 return THING;
1092 }
1093
3280af22
NIS
1094 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1095 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1096 return yylex();
79072805
LW
1097 }
1098
ffb4593c 1099 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1100 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1101 PL_linestr = PL_lex_repl;
1102 PL_lex_inpat = 0;
1103 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1104 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1105 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1106 SAVEFREESV(PL_linestr);
1107 PL_lex_dojoin = FALSE;
1108 PL_lex_brackets = 0;
3280af22
NIS
1109 PL_lex_casemods = 0;
1110 *PL_lex_casestack = '\0';
1111 PL_lex_starts = 0;
25da4f38 1112 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1113 PL_lex_state = LEX_INTERPNORMAL;
1114 PL_lex_starts++;
e9fa98b2
HS
1115 /* we don't clear PL_lex_repl here, so that we can check later
1116 whether this is an evalled subst; that means we rely on the
1117 logic to ensure sublex_done() is called again only via the
1118 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1119 }
e9fa98b2 1120 else {
3280af22 1121 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1122 PL_lex_repl = Nullsv;
1123 }
79072805 1124 return ',';
ffed7fef
LW
1125 }
1126 else {
f46d017c 1127 LEAVE;
3280af22
NIS
1128 PL_bufend = SvPVX(PL_linestr);
1129 PL_bufend += SvCUR(PL_linestr);
1130 PL_expect = XOPERATOR;
09bef843 1131 PL_sublex_info.sub_inwhat = 0;
79072805 1132 return ')';
ffed7fef
LW
1133 }
1134}
1135
02aa26ce
NT
1136/*
1137 scan_const
1138
1139 Extracts a pattern, double-quoted string, or transliteration. This
1140 is terrifying code.
1141
3280af22
NIS
1142 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1143 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1144 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1145
9b599b2a
GS
1146 Returns a pointer to the character scanned up to. Iff this is
1147 advanced from the start pointer supplied (ie if anything was
1148 successfully parsed), will leave an OP for the substring scanned
1149 in yylval. Caller must intuit reason for not parsing further
1150 by looking at the next characters herself.
1151
02aa26ce
NT
1152 In patterns:
1153 backslashes:
1154 double-quoted style: \r and \n
1155 regexp special ones: \D \s
1156 constants: \x3
1157 backrefs: \1 (deprecated in substitution replacements)
1158 case and quoting: \U \Q \E
1159 stops on @ and $, but not for $ as tail anchor
1160
1161 In transliterations:
1162 characters are VERY literal, except for - not at the start or end
1163 of the string, which indicates a range. scan_const expands the
1164 range to the full set of intermediate characters.
1165
1166 In double-quoted strings:
1167 backslashes:
1168 double-quoted style: \r and \n
1169 constants: \x3
1170 backrefs: \1 (deprecated)
1171 case and quoting: \U \Q \E
1172 stops on @ and $
1173
1174 scan_const does *not* construct ops to handle interpolated strings.
1175 It stops processing as soon as it finds an embedded $ or @ variable
1176 and leaves it to the caller to work out what's going on.
1177
1178 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1179
1180 $ in pattern could be $foo or could be tail anchor. Assumption:
1181 it's a tail anchor if $ is the last thing in the string, or if it's
1182 followed by one of ")| \n\t"
1183
1184 \1 (backreferences) are turned into $1
1185
1186 The structure of the code is
1187 while (there's a character to process) {
1188 handle transliteration ranges
1189 skip regexp comments
1190 skip # initiated comments in //x patterns
1191 check for embedded @foo
1192 check for embedded scalars
1193 if (backslash) {
1194 leave intact backslashes from leave (below)
1195 deprecate \1 in strings and sub replacements
1196 handle string-changing backslashes \l \U \Q \E, etc.
1197 switch (what was escaped) {
1198 handle - in a transliteration (becomes a literal -)
1199 handle \132 octal characters
1200 handle 0x15 hex characters
1201 handle \cV (control V)
1202 handle printf backslashes (\f, \r, \n, etc)
1203 } (end switch)
1204 } (end if backslash)
1205 } (end while character to read)
4e553d73 1206
02aa26ce
NT
1207*/
1208
76e3520e 1209STATIC char *
cea2e8a9 1210S_scan_const(pTHX_ char *start)
79072805 1211{
3280af22 1212 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1213 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1214 register char *s = start; /* start of the constant */
1215 register char *d = SvPVX(sv); /* destination for copies */
1216 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1217 bool didrange = FALSE; /* did we just finish a range? */
9aa983d2
JH
1218 bool has_utf8 = (PL_linestr && SvUTF8(PL_linestr));
1219 /* the constant is UTF8 */
012bcf8d
GS
1220 UV uv;
1221
ac2262e3 1222 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1223 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1224 : UTF;
89491803 1225 I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1226 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1227 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1228 : UTF;
dff6d3cd 1229 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1230 PL_lex_inpat
4a2d328f 1231 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1232 : "";
79072805
LW
1233
1234 while (s < send || dorange) {
02aa26ce 1235 /* get transliterations out of the way (they're most literal) */
3280af22 1236 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1237 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1238 if (dorange) {
1ba5c669
JH
1239 I32 i; /* current expanded character */
1240 I32 min; /* first character in range */
1241 I32 max; /* last character in range */
02aa26ce 1242
8973db79
JH
1243 if (utf) {
1244 char *c = (char*)utf8_hop((U8*)d, -1);
1245 char *e = d++;
1246 while (e-- > c)
1247 *(e + 1) = *e;
8b107d6c 1248 *c = (char)0xff;
8973db79
JH
1249 /* mark the range as done, and continue */
1250 dorange = FALSE;
1251 didrange = TRUE;
1252 continue;
1253 }
02aa26ce 1254 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1255 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1256 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1257 d -= 2; /* eat the first char and the - */
1258
8ada0baa
JH
1259 min = (U8)*d; /* first char in range */
1260 max = (U8)d[1]; /* last char in range */
1261
c2e66d9e 1262 if (min > max) {
01ec43d0 1263 Perl_croak(aTHX_
1ba5c669
JH
1264 "Invalid [] range \"%c-%c\" in transliteration operator",
1265 (char)min, (char)max);
c2e66d9e
GS
1266 }
1267
1ba5c669 1268#ifndef ASCIIish
8ada0baa
JH
1269 if ((isLOWER(min) && isLOWER(max)) ||
1270 (isUPPER(min) && isUPPER(max))) {
1271 if (isLOWER(min)) {
1272 for (i = min; i <= max; i++)
1273 if (isLOWER(i))
1274 *d++ = i;
1275 } else {
1276 for (i = min; i <= max; i++)
1277 if (isUPPER(i))
1278 *d++ = i;
1279 }
1280 }
1281 else
1282#endif
1283 for (i = min; i <= max; i++)
1284 *d++ = i;
02aa26ce
NT
1285
1286 /* mark the range as done, and continue */
79072805 1287 dorange = FALSE;
01ec43d0 1288 didrange = TRUE;
79072805 1289 continue;
4e553d73 1290 }
02aa26ce
NT
1291
1292 /* range begins (ignore - as first or last char) */
79072805 1293 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1294 if (didrange) {
1fafa243 1295 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1296 }
a0ed51b3 1297 if (utf) {
a176fa2a 1298 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1299 s++;
1300 continue;
1301 }
79072805
LW
1302 dorange = TRUE;
1303 s++;
01ec43d0
GS
1304 }
1305 else {
1306 didrange = FALSE;
1307 }
79072805 1308 }
02aa26ce
NT
1309
1310 /* if we get here, we're not doing a transliteration */
1311
0f5d15d6
IZ
1312 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1313 except for the last char, which will be done separately. */
3280af22 1314 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1315 if (s[2] == '#') {
1316 while (s < send && *s != ')')
1317 *d++ = *s++;
155aba94
GS
1318 }
1319 else if (s[2] == '{' /* This should match regcomp.c */
1320 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1321 {
cc6b7395 1322 I32 count = 1;
0f5d15d6 1323 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1324 char c;
1325
d9f97599
GS
1326 while (count && (c = *regparse)) {
1327 if (c == '\\' && regparse[1])
1328 regparse++;
4e553d73 1329 else if (c == '{')
cc6b7395 1330 count++;
4e553d73 1331 else if (c == '}')
cc6b7395 1332 count--;
d9f97599 1333 regparse++;
cc6b7395 1334 }
5bdf89e7
IZ
1335 if (*regparse != ')') {
1336 regparse--; /* Leave one char for continuation. */
cc6b7395 1337 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1338 }
0f5d15d6 1339 while (s < regparse)
cc6b7395
IZ
1340 *d++ = *s++;
1341 }
748a9306 1342 }
02aa26ce
NT
1343
1344 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1345 else if (*s == '#' && PL_lex_inpat &&
1346 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1347 while (s+1 < send && *s != '\n')
1348 *d++ = *s++;
1349 }
02aa26ce 1350
5d1d4326
JH
1351 /* check for embedded arrays
1352 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1353 */
7e2040f0 1354 else if (*s == '@' && s[1]
5d1d4326 1355 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1356 break;
02aa26ce
NT
1357
1358 /* check for embedded scalars. only stop if we're sure it's a
1359 variable.
1360 */
79072805 1361 else if (*s == '$') {
3280af22 1362 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1363 break;
c277df42 1364 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1365 break; /* in regexp, $ might be tail anchor */
1366 }
02aa26ce
NT
1367
1368 /* backslashes */
79072805
LW
1369 if (*s == '\\' && s+1 < send) {
1370 s++;
02aa26ce
NT
1371
1372 /* some backslashes we leave behind */
c9f97d15 1373 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1374 *d++ = '\\';
1375 *d++ = *s++;
1376 continue;
1377 }
02aa26ce
NT
1378
1379 /* deprecate \1 in strings and substitution replacements */
3280af22 1380 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1381 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1382 {
599cee73 1383 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1384 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1385 *--s = '$';
1386 break;
1387 }
02aa26ce
NT
1388
1389 /* string-change backslash escapes */
3280af22 1390 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1391 --s;
1392 break;
1393 }
02aa26ce
NT
1394
1395 /* if we get here, it's either a quoted -, or a digit */
79072805 1396 switch (*s) {
02aa26ce
NT
1397
1398 /* quoted - in transliterations */
79072805 1399 case '-':
3280af22 1400 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1401 *d++ = *s++;
1402 continue;
1403 }
1404 /* FALL THROUGH */
1405 default:
11b8faa4 1406 {
7e84c16c 1407 if (ckWARN(WARN_MISC) && isALNUM(*s))
4e553d73 1408 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1409 "Unrecognized escape \\%c passed through",
1410 *s);
1411 /* default action is to copy the quoted character */
f9a63242 1412 goto default_action;
11b8faa4 1413 }
02aa26ce
NT
1414
1415 /* \132 indicates an octal constant */
79072805
LW
1416 case '0': case '1': case '2': case '3':
1417 case '4': case '5': case '6': case '7':
ba210ebe
JH
1418 {
1419 STRLEN len = 0; /* disallow underscores */
1420 uv = (UV)scan_oct(s, 3, &len);
1421 s += len;
1422 }
012bcf8d 1423 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1424
1425 /* \x24 indicates a hex constant */
79072805 1426 case 'x':
a0ed51b3
LW
1427 ++s;
1428 if (*s == '{') {
1429 char* e = strchr(s, '}');
adaeee49 1430 if (!e) {
a0ed51b3 1431 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1432 e = s;
1433 }
89491803 1434 else {
ba210ebe
JH
1435 STRLEN len = 1; /* allow underscores */
1436 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1437 }
1438 s = e + 1;
a0ed51b3
LW
1439 }
1440 else {
ba210ebe
JH
1441 {
1442 STRLEN len = 0; /* disallow underscores */
1443 uv = (UV)scan_hex(s, 2, &len);
1444 s += len;
1445 }
012bcf8d
GS
1446 }
1447
1448 NUM_ESCAPE_INSERT:
1449 /* Insert oct or hex escaped character.
301d3d20
JH
1450 * There will always enough room in sv since such
1451 * escapes will be longer than any UT-F8 sequence
1452 * they can end up as. */
ba7cea30
JH
1453
1454 /* This spot is wrong for EBCDIC. Characters like
1455 * the lowercase letters and digits are >127 in EBCDIC,
1456 * so here they would need to be mapped to the Unicode
1457 * repertoire. --jhi */
1458
7948272d 1459 if (uv > 127) {
9aa983d2 1460 if (!has_utf8 && uv > 255) {
301d3d20
JH
1461 /* Might need to recode whatever we have
1462 * accumulated so far if it contains any
1463 * hibit chars.
1464 *
1465 * (Can't we keep track of that and avoid
1466 * this rescan? --jhi)
012bcf8d
GS
1467 */
1468 int hicount = 0;
1469 char *c;
301d3d20 1470
012bcf8d 1471 for (c = SvPVX(sv); c < d; c++) {
fd400ab9 1472 if (UTF8_IS_CONTINUED(*c))
012bcf8d
GS
1473 hicount++;
1474 }
1475 if (hicount) {
1476 char *old_pvx = SvPVX(sv);
1477 char *src, *dst;
301d3d20
JH
1478
1479 d = SvGROW(sv,
8973db79 1480 SvLEN(sv) + hicount + 1) +
301d3d20 1481 (d - old_pvx);
012bcf8d
GS
1482
1483 src = d - 1;
1484 d += hicount;
1485 dst = d - 1;
1486
1487 while (src < dst) {
fd400ab9 1488 if (UTF8_IS_CONTINUED(*src)) {
9b877dbb
IH
1489 *dst-- = UTF8_EIGHT_BIT_LO(*src);
1490 *dst-- = UTF8_EIGHT_BIT_HI(*src--);
012bcf8d
GS
1491 }
1492 else {
1493 *dst-- = *src--;
1494 }
1495 }
1496 }
1497 }
1498
9aa983d2 1499 if (has_utf8 || uv > 255) {
012bcf8d 1500 d = (char*)uv_to_utf8((U8*)d, uv);
4e553d73 1501 has_utf8 = TRUE;
f9a63242
JH
1502 if (PL_lex_inwhat == OP_TRANS &&
1503 PL_sublex_info.sub_op) {
1504 PL_sublex_info.sub_op->op_private |=
1505 (PL_lex_repl ? OPpTRANS_FROM_UTF
1506 : OPpTRANS_TO_UTF);
1507 utf = TRUE;
1508 }
012bcf8d 1509 }
a0ed51b3 1510 else {
012bcf8d 1511 *d++ = (char)uv;
a0ed51b3 1512 }
012bcf8d
GS
1513 }
1514 else {
1515 *d++ = (char)uv;
a0ed51b3 1516 }
79072805 1517 continue;
02aa26ce 1518
4a2d328f
IZ
1519 /* \N{latin small letter a} is a named character */
1520 case 'N':
e0a47bd2 1521 s++;
423cee85
JH
1522 if (*s == '{') {
1523 char* e = strchr(s, '}');
155aba94 1524 SV *res;
423cee85
JH
1525 STRLEN len;
1526 char *str;
4e553d73 1527
423cee85 1528 if (!e) {
5777a3f7 1529 yyerror("Missing right brace on \\N{}");
423cee85
JH
1530 e = s - 1;
1531 goto cont_scan;
1532 }
e0a47bd2
JH
1533 if (s[1] == 'U' && s[2] == '+') { /* \N{U+HHHH} */
1534 STRLEN alen = e - s - 3;
1535 STRLEN blen;
1536 UV uv = (UV)scan_hex(s + 3, alen, &blen);
1537
1538 if (blen == alen) {
1539 res = newSVpvn(s, (uv >> 8) + 1); /* filler */
1540 str = (char *)uv_to_utf8((U8*)SvPVX(res), uv);
1541 SvCUR_set(res, str - SvPVX(res));
7c95f590 1542 *str = '\0';
e0a47bd2 1543 if (uv > 0x7f)
7c95f590 1544 SvUTF8_on(res);
e0a47bd2
JH
1545 }
1546 else {
1547 yyerror("Illegal hexadecimal code on \\N{U+...}");
1548 e = s - 1;
1549 goto cont_scan;
1550 }
1551 }
1552 else {
1553 res = newSVpvn(s + 1, e - s - 1);
1554 res = new_constant( Nullch, 0, "charnames",
1555 res, Nullsv, "\\N{...}" );
1556 }
f9a63242
JH
1557 if (has_utf8)
1558 sv_utf8_upgrade(res);
423cee85 1559 str = SvPV(res,len);
89491803 1560 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1561 char *ostart = SvPVX(sv);
1562 SvCUR_set(sv, d - ostart);
1563 SvPOK_on(sv);
e4f3eed8 1564 *d = '\0';
f08d6ad9 1565 sv_utf8_upgrade(sv);
d2f449dd
SB
1566 /* this just broke our allocation above... */
1567 SvGROW(sv, send - start);
f08d6ad9 1568 d = SvPVX(sv) + SvCUR(sv);
89491803 1569 has_utf8 = TRUE;
f08d6ad9 1570 }
423cee85
JH
1571 if (len > e - s + 4) {
1572 char *odest = SvPVX(sv);
1573
8973db79 1574 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1575 d = SvPVX(sv) + (d - odest);
1576 }
1577 Copy(str, d, len, char);
1578 d += len;
1579 SvREFCNT_dec(res);
1580 cont_scan:
1581 s = e + 1;
1582 }
1583 else
5777a3f7 1584 yyerror("Missing braces on \\N{}");
423cee85
JH
1585 continue;
1586
02aa26ce 1587 /* \c is a control character */
79072805
LW
1588 case 'c':
1589 s++;
9d116dd7
JH
1590#ifdef EBCDIC
1591 *d = *s++;
1592 if (isLOWER(*d))
1593 *d = toUPPER(*d);
4e553d73 1594 *d = toCTRL(*d);
774a9426 1595 d++;
9d116dd7 1596#else
ba210ebe
JH
1597 {
1598 U8 c = *s++;
1599 *d++ = toCTRL(c);
1600 }
9d116dd7 1601#endif
79072805 1602 continue;
02aa26ce
NT
1603
1604 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1605 case 'b':
1606 *d++ = '\b';
1607 break;
1608 case 'n':
1609 *d++ = '\n';
1610 break;
1611 case 'r':
1612 *d++ = '\r';
1613 break;
1614 case 'f':
1615 *d++ = '\f';
1616 break;
1617 case 't':
1618 *d++ = '\t';
1619 break;
34a3fe2a
PP
1620#ifdef EBCDIC
1621 case 'e':
1622 *d++ = '\047'; /* CP 1047 */
1623 break;
1624 case 'a':
1625 *d++ = '\057'; /* CP 1047 */
1626 break;
1627#else
79072805
LW
1628 case 'e':
1629 *d++ = '\033';
1630 break;
1631 case 'a':
1632 *d++ = '\007';
1633 break;
34a3fe2a 1634#endif
02aa26ce
NT
1635 } /* end switch */
1636
79072805
LW
1637 s++;
1638 continue;
02aa26ce
NT
1639 } /* end if (backslash) */
1640
f9a63242 1641 default_action:
fd400ab9 1642 if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
a5a960be
RI
1643 STRLEN len = (STRLEN) -1;
1644 UV uv;
1645 if (this_utf8) {
a0dbb045 1646 uv = utf8_to_uv((U8*)s, send - s, &len, 0);
a5a960be
RI
1647 }
1648 if (len == (STRLEN)-1) {
1649 /* Illegal UTF8 (a high-bit byte), make it valid. */
1650 char *old_pvx = SvPVX(sv);
1651 /* need space for one extra char (NOTE: SvCUR() not set here) */
1652 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
1653 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1654 }
1655 else {
1656 while (len--)
1657 *d++ = *s++;
1658 }
1659 has_utf8 = TRUE;
f9a63242
JH
1660 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1661 PL_sublex_info.sub_op->op_private |=
1662 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1663 utf = TRUE;
1664 }
a5a960be
RI
1665 continue;
1666 }
1667
f9a63242 1668 *d++ = *s++;
02aa26ce
NT
1669 } /* while loop to process each character */
1670
1671 /* terminate the string and set up the sv */
79072805 1672 *d = '\0';
463ee0b2 1673 SvCUR_set(sv, d - SvPVX(sv));
79072805 1674 SvPOK_on(sv);
89491803 1675 if (has_utf8)
7e2040f0 1676 SvUTF8_on(sv);
79072805 1677
02aa26ce 1678 /* shrink the sv if we allocated more than we used */
79072805
LW
1679 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1680 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1681 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1682 }
02aa26ce 1683
9b599b2a 1684 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1685 if (s > PL_bufptr) {
1686 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1687 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1688 sv, Nullsv,
4e553d73 1689 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1690 ? "tr"
3280af22 1691 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1692 ? "s"
1693 : "qq")));
79072805 1694 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1695 } else
8990e307 1696 SvREFCNT_dec(sv);
79072805
LW
1697 return s;
1698}
1699
ffb4593c
NT
1700/* S_intuit_more
1701 * Returns TRUE if there's more to the expression (e.g., a subscript),
1702 * FALSE otherwise.
ffb4593c
NT
1703 *
1704 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1705 *
1706 * ->[ and ->{ return TRUE
1707 * { and [ outside a pattern are always subscripts, so return TRUE
1708 * if we're outside a pattern and it's not { or [, then return FALSE
1709 * if we're in a pattern and the first char is a {
1710 * {4,5} (any digits around the comma) returns FALSE
1711 * if we're in a pattern and the first char is a [
1712 * [] returns FALSE
1713 * [SOMETHING] has a funky algorithm to decide whether it's a
1714 * character class or not. It has to deal with things like
1715 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1716 * anything else returns TRUE
1717 */
1718
9cbb5ea2
GS
1719/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1720
76e3520e 1721STATIC int
cea2e8a9 1722S_intuit_more(pTHX_ register char *s)
79072805 1723{
3280af22 1724 if (PL_lex_brackets)
79072805
LW
1725 return TRUE;
1726 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1727 return TRUE;
1728 if (*s != '{' && *s != '[')
1729 return FALSE;
3280af22 1730 if (!PL_lex_inpat)
79072805
LW
1731 return TRUE;
1732
1733 /* In a pattern, so maybe we have {n,m}. */
1734 if (*s == '{') {
1735 s++;
1736 if (!isDIGIT(*s))
1737 return TRUE;
1738 while (isDIGIT(*s))
1739 s++;
1740 if (*s == ',')
1741 s++;
1742 while (isDIGIT(*s))
1743 s++;
1744 if (*s == '}')
1745 return FALSE;
1746 return TRUE;
1747
1748 }
1749
1750 /* On the other hand, maybe we have a character class */
1751
1752 s++;
1753 if (*s == ']' || *s == '^')
1754 return FALSE;
1755 else {
ffb4593c 1756 /* this is terrifying, and it works */
79072805
LW
1757 int weight = 2; /* let's weigh the evidence */
1758 char seen[256];
f27ffc4a 1759 unsigned char un_char = 255, last_un_char;
93a17b20 1760 char *send = strchr(s,']');
3280af22 1761 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1762
1763 if (!send) /* has to be an expression */
1764 return TRUE;
1765
1766 Zero(seen,256,char);
1767 if (*s == '$')
1768 weight -= 3;
1769 else if (isDIGIT(*s)) {
1770 if (s[1] != ']') {
1771 if (isDIGIT(s[1]) && s[2] == ']')
1772 weight -= 10;
1773 }
1774 else
1775 weight -= 100;
1776 }
1777 for (; s < send; s++) {
1778 last_un_char = un_char;
1779 un_char = (unsigned char)*s;
1780 switch (*s) {
1781 case '@':
1782 case '&':
1783 case '$':
1784 weight -= seen[un_char] * 10;
7e2040f0 1785 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1786 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1787 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1788 weight -= 100;
1789 else
1790 weight -= 10;
1791 }
1792 else if (*s == '$' && s[1] &&
93a17b20
LW
1793 strchr("[#!%*<>()-=",s[1])) {
1794 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1795 weight -= 10;
1796 else
1797 weight -= 1;
1798 }
1799 break;
1800 case '\\':
1801 un_char = 254;
1802 if (s[1]) {
93a17b20 1803 if (strchr("wds]",s[1]))
79072805
LW
1804 weight += 100;
1805 else if (seen['\''] || seen['"'])
1806 weight += 1;
93a17b20 1807 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1808 weight += 40;
1809 else if (isDIGIT(s[1])) {
1810 weight += 40;
1811 while (s[1] && isDIGIT(s[1]))
1812 s++;
1813 }
1814 }
1815 else
1816 weight += 100;
1817 break;
1818 case '-':
1819 if (s[1] == '\\')
1820 weight += 50;
93a17b20 1821 if (strchr("aA01! ",last_un_char))
79072805 1822 weight += 30;
93a17b20 1823 if (strchr("zZ79~",s[1]))
79072805 1824 weight += 30;
f27ffc4a
GS
1825 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1826 weight -= 5; /* cope with negative subscript */
79072805
LW
1827 break;
1828 default:
93a17b20 1829 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1830 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1831 char *d = tmpbuf;
1832 while (isALPHA(*s))
1833 *d++ = *s++;
1834 *d = '\0';
1835 if (keyword(tmpbuf, d - tmpbuf))
1836 weight -= 150;
1837 }
1838 if (un_char == last_un_char + 1)
1839 weight += 5;
1840 weight -= seen[un_char];
1841 break;
1842 }
1843 seen[un_char]++;
1844 }
1845 if (weight >= 0) /* probably a character class */
1846 return FALSE;
1847 }
1848
1849 return TRUE;
1850}
ffed7fef 1851
ffb4593c
NT
1852/*
1853 * S_intuit_method
1854 *
1855 * Does all the checking to disambiguate
1856 * foo bar
1857 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1858 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1859 *
1860 * First argument is the stuff after the first token, e.g. "bar".
1861 *
1862 * Not a method if bar is a filehandle.
1863 * Not a method if foo is a subroutine prototyped to take a filehandle.
1864 * Not a method if it's really "Foo $bar"
1865 * Method if it's "foo $bar"
1866 * Not a method if it's really "print foo $bar"
1867 * Method if it's really "foo package::" (interpreted as package->foo)
1868 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1869 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1870 * =>
1871 */
1872
76e3520e 1873STATIC int
cea2e8a9 1874S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1875{
1876 char *s = start + (*start == '$');
3280af22 1877 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1878 STRLEN len;
1879 GV* indirgv;
1880
1881 if (gv) {
b6c543e3 1882 CV *cv;
a0d0e21e
LW
1883 if (GvIO(gv))
1884 return 0;
b6c543e3
IZ
1885 if ((cv = GvCVu(gv))) {
1886 char *proto = SvPVX(cv);
1887 if (proto) {
1888 if (*proto == ';')
1889 proto++;
1890 if (*proto == '*')
1891 return 0;
1892 }
1893 } else
a0d0e21e
LW
1894 gv = 0;
1895 }
8903cb82 1896 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1897 /* start is the beginning of the possible filehandle/object,
1898 * and s is the end of it
1899 * tmpbuf is a copy of it
1900 */
1901
a0d0e21e 1902 if (*start == '$') {
3280af22 1903 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1904 return 0;
1905 s = skipspace(s);
3280af22
NIS
1906 PL_bufptr = start;
1907 PL_expect = XREF;
a0d0e21e
LW
1908 return *s == '(' ? FUNCMETH : METHOD;
1909 }
1910 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1911 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1912 len -= 2;
1913 tmpbuf[len] = '\0';
1914 goto bare_package;
1915 }
1916 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1917 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1918 return 0;
1919 /* filehandle or package name makes it a method */
89bfa8cd 1920 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1921 s = skipspace(s);
3280af22 1922 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1923 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1924 bare_package:
3280af22 1925 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1926 newSVpvn(tmpbuf,len));
3280af22
NIS
1927 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1928 PL_expect = XTERM;
a0d0e21e 1929 force_next(WORD);
3280af22 1930 PL_bufptr = s;
a0d0e21e
LW
1931 return *s == '(' ? FUNCMETH : METHOD;
1932 }
1933 }
1934 return 0;
1935}
1936
ffb4593c
NT
1937/*
1938 * S_incl_perldb
1939 * Return a string of Perl code to load the debugger. If PERL5DB
1940 * is set, it will return the contents of that, otherwise a
1941 * compile-time require of perl5db.pl.
1942 */
1943
76e3520e 1944STATIC char*
cea2e8a9 1945S_incl_perldb(pTHX)
a0d0e21e 1946{
3280af22 1947 if (PL_perldb) {
76e3520e 1948 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1949
1950 if (pdb)
1951 return pdb;
61bb5906 1952 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1953 return "BEGIN { require 'perl5db.pl' }";
1954 }
1955 return "";
1956}
1957
1958
16d20bd9 1959/* Encoded script support. filter_add() effectively inserts a
4e553d73 1960 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1961 * Note that the filter function only applies to the current source file
1962 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1963 *
1964 * The datasv parameter (which may be NULL) can be used to pass
1965 * private data to this instance of the filter. The filter function
1966 * can recover the SV using the FILTER_DATA macro and use it to
1967 * store private buffers and state information.
1968 *
1969 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1970 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1971 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1972 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1973 * private use must be set using malloc'd pointers.
1974 */
16d20bd9
AD
1975
1976SV *
864dbfa3 1977Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1978{
f4c556ac
GS
1979 if (!funcp)
1980 return Nullsv;
1981
3280af22
NIS
1982 if (!PL_rsfp_filters)
1983 PL_rsfp_filters = newAV();
16d20bd9 1984 if (!datasv)
8c52afec 1985 datasv = NEWSV(255,0);
16d20bd9 1986 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1987 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1988 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1989 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1990 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1991 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1992 av_unshift(PL_rsfp_filters, 1);
1993 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1994 return(datasv);
1995}
4e553d73 1996
16d20bd9
AD
1997
1998/* Delete most recently added instance of this filter function. */
a0d0e21e 1999void
864dbfa3 2000Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2001{
e0c19803 2002 SV *datasv;
f4c556ac 2003 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 2004 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2005 return;
2006 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2007 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2008 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2009 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2010 IoANY(datasv) = (void *)NULL;
3280af22 2011 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2012
16d20bd9
AD
2013 return;
2014 }
2015 /* we need to search for the correct entry and clear it */
cea2e8a9 2016 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2017}
2018
2019
2020/* Invoke the n'th filter function for the current rsfp. */
2021I32
864dbfa3 2022Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
2023
2024
8ac85365 2025 /* 0 = read one text line */
a0d0e21e 2026{
16d20bd9
AD
2027 filter_t funcp;
2028 SV *datasv = NULL;
e50aee73 2029
3280af22 2030 if (!PL_rsfp_filters)
16d20bd9 2031 return -1;
3280af22 2032 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2033 /* Provide a default input filter to make life easy. */
2034 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2035 DEBUG_P(PerlIO_printf(Perl_debug_log,
2036 "filter_read %d: from rsfp\n", idx));
4e553d73 2037 if (maxlen) {
16d20bd9
AD
2038 /* Want a block */
2039 int len ;
2040 int old_len = SvCUR(buf_sv) ;
2041
2042 /* ensure buf_sv is large enough */
2043 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
2044 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2045 if (PerlIO_error(PL_rsfp))
37120919
AD
2046 return -1; /* error */
2047 else
2048 return 0 ; /* end of file */
2049 }
16d20bd9
AD
2050 SvCUR_set(buf_sv, old_len + len) ;
2051 } else {
2052 /* Want a line */
3280af22
NIS
2053 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2054 if (PerlIO_error(PL_rsfp))
37120919
AD
2055 return -1; /* error */
2056 else
2057 return 0 ; /* end of file */
2058 }
16d20bd9
AD
2059 }
2060 return SvCUR(buf_sv);
2061 }
2062 /* Skip this filter slot if filter has been deleted */
3280af22 2063 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2064 DEBUG_P(PerlIO_printf(Perl_debug_log,
2065 "filter_read %d: skipped (filter deleted)\n",
2066 idx));
16d20bd9
AD
2067 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2068 }
2069 /* Get function pointer hidden within datasv */
4755096e 2070 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2071 DEBUG_P(PerlIO_printf(Perl_debug_log,
2072 "filter_read %d: via function %p (%s)\n",
2073 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2074 /* Call function. The function is expected to */
2075 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2076 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 2077 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
2078}
2079
76e3520e 2080STATIC char *
cea2e8a9 2081S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2082{
c39cd008 2083#ifdef PERL_CR_FILTER
3280af22 2084 if (!PL_rsfp_filters) {
c39cd008 2085 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2086 }
2087#endif
3280af22 2088 if (PL_rsfp_filters) {
16d20bd9 2089
55497cff
PP
2090 if (!append)
2091 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2092 if (FILTER_READ(0, sv, 0) > 0)
2093 return ( SvPVX(sv) ) ;
2094 else
2095 return Nullch ;
2096 }
9d116dd7 2097 else
fd049845 2098 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2099}
2100
01ec43d0
GS
2101STATIC HV *
2102S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2103{
2104 GV *gv;
2105
01ec43d0 2106 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2107 return PL_curstash;
2108
2109 if (len > 2 &&
2110 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2111 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2112 {
2113 return GvHV(gv); /* Foo:: */
def3634b
GS
2114 }
2115
2116 /* use constant CLASS => 'MyClass' */
2117 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2118 SV *sv;
2119 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2120 pkgname = SvPV_nolen(sv);
2121 }
2122 }
2123
2124 return gv_stashpv(pkgname, FALSE);
2125}
a0d0e21e 2126
748a9306
LW
2127#ifdef DEBUGGING
2128 static char* exp_name[] =
09bef843
SB
2129 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2130 "ATTRTERM", "TERMBLOCK"
2131 };
748a9306 2132#endif
463ee0b2 2133
02aa26ce
NT
2134/*
2135 yylex
2136
2137 Works out what to call the token just pulled out of the input
2138 stream. The yacc parser takes care of taking the ops we return and
2139 stitching them into a tree.
2140
2141 Returns:
2142 PRIVATEREF
2143
2144 Structure:
2145 if read an identifier
2146 if we're in a my declaration
2147 croak if they tried to say my($foo::bar)
2148 build the ops for a my() declaration
2149 if it's an access to a my() variable
2150 are we in a sort block?
2151 croak if my($a); $a <=> $b
2152 build ops for access to a my() variable
2153 if in a dq string, and they've said @foo and we can't find @foo
2154 croak
2155 build ops for a bareword
2156 if we already built the token before, use it.
2157*/
2158
dba4d153 2159#ifdef USE_PURE_BISON
864dbfa3 2160int
dba4d153 2161Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2162{
20141f0e
RI
2163 int r;
2164
6f202aea 2165 yyactlevel++;
20141f0e
RI
2166 yylval_pointer[yyactlevel] = lvalp;
2167 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2168 if (yyactlevel >= YYMAXLEVEL)
2169 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2170
dba4d153 2171 r = Perl_yylex(aTHX);
20141f0e 2172
d8ae6756
RI
2173 if (yyactlevel > 0)
2174 yyactlevel--;
20141f0e
RI
2175
2176 return r;
2177}
dba4d153 2178#endif
20141f0e 2179
dba4d153
JH
2180#ifdef __SC__
2181#pragma segment Perl_yylex
2182#endif
dba4d153 2183int
dba4d153 2184Perl_yylex(pTHX)
20141f0e 2185{
79072805 2186 register char *s;
378cc40b 2187 register char *d;
79072805 2188 register I32 tmp;
463ee0b2 2189 STRLEN len;
161b471a
NIS
2190 GV *gv = Nullgv;
2191 GV **gvp = 0;
aa7440fb 2192 bool bof = FALSE;
a687059c 2193
02aa26ce 2194 /* check if there's an identifier for us to look at */
3280af22 2195 if (PL_pending_ident) {
02aa26ce 2196 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2197 char pit = PL_pending_ident;
2198 PL_pending_ident = 0;
bbce6d69 2199
607df283
SC
2200 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2201 "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
2202
02aa26ce
NT
2203 /* if we're in a my(), we can't allow dynamics here.
2204 $foo'bar has already been turned into $foo::bar, so
2205 just check for colons.
2206
2207 if it's a legal name, the OP is a PADANY.
2208 */
3280af22 2209 if (PL_in_my) {
77ca0c92 2210 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2211 if (strchr(PL_tokenbuf,':'))
2212 yyerror(Perl_form(aTHX_ "No package name allowed for "
2213 "variable %s in \"our\"",
2214 PL_tokenbuf));
77ca0c92
LW
2215 tmp = pad_allocmy(PL_tokenbuf);
2216 }
2217 else {
2218 if (strchr(PL_tokenbuf,':'))
2219 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2220
77ca0c92
LW
2221 yylval.opval = newOP(OP_PADANY, 0);
2222 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2223 return PRIVATEREF;
2224 }
bbce6d69
PP
2225 }
2226
4e553d73 2227 /*
02aa26ce
NT
2228 build the ops for accesses to a my() variable.
2229
2230 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2231 then used in a comparison. This catches most, but not
2232 all cases. For instance, it catches
2233 sort { my($a); $a <=> $b }
2234 but not
2235 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2236 (although why you'd do that is anyone's guess).
2237 */
2238
3280af22 2239 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2240#ifdef USE_THREADS
54b9620d 2241 /* Check for single character per-thread SVs */
3280af22
NIS
2242 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2243 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2244 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2245 {
2faa37cc 2246 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2247 yylval.opval->op_targ = tmp;
2248 return PRIVATEREF;
2249 }
2250#endif /* USE_THREADS */
3280af22 2251 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2252 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2253 /* might be an "our" variable" */
f472eb5c 2254 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2255 /* build ops for a bareword */
f472eb5c
GS
2256 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2257 sv_catpvn(sym, "::", 2);
2258 sv_catpv(sym, PL_tokenbuf+1);
2259 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2260 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2261 gv_fetchpv(SvPVX(sym),
77ca0c92 2262 (PL_in_eval
f472eb5c
GS
2263 ? (GV_ADDMULTI | GV_ADDINEVAL)
2264 : TRUE
77ca0c92
LW
2265 ),
2266 ((PL_tokenbuf[0] == '$') ? SVt_PV
2267 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2268 : SVt_PVHV));
2269 return WORD;
2270 }
2271
02aa26ce 2272 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2273 if (PL_last_lop_op == OP_SORT &&
2274 PL_tokenbuf[0] == '$' &&
2275 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2276 && !PL_tokenbuf[2])
bbce6d69 2277 {
3280af22
NIS
2278 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2279 d < PL_bufend && *d != '\n';
a863c7d1
MB
2280 d++)
2281 {
2282 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2283 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2284 PL_tokenbuf);
a863c7d1 2285 }
bbce6d69
PP
2286 }
2287 }
bbce6d69 2288
a863c7d1
MB
2289 yylval.opval = newOP(OP_PADANY, 0);
2290 yylval.opval->op_targ = tmp;
2291 return PRIVATEREF;
2292 }
bbce6d69
PP
2293 }
2294
02aa26ce
NT
2295 /*
2296 Whine if they've said @foo in a doublequoted string,
2297 and @foo isn't a variable we can find in the symbol
2298 table.
2299 */
3280af22
NIS
2300 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2301 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2302 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2303 && ckWARN(WARN_AMBIGUOUS))
2304 {
2305 /* Downgraded from fatal to warning 20000522 mjd */
2306 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2307 "Possible unintended interpolation of %s in string",
2308 PL_tokenbuf);
2309 }
bbce6d69
PP
2310 }
2311
02aa26ce 2312 /* build ops for a bareword */
3280af22 2313 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2314 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2315 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2316 ((PL_tokenbuf[0] == '$') ? SVt_PV
2317 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2318 : SVt_PVHV));
2319 return WORD;
2320 }
2321
02aa26ce
NT
2322 /* no identifier pending identification */
2323
3280af22 2324 switch (PL_lex_state) {
79072805
LW
2325#ifdef COMMENTARY
2326 case LEX_NORMAL: /* Some compilers will produce faster */
2327 case LEX_INTERPNORMAL: /* code if we comment these out. */
2328 break;
2329#endif
2330
09bef843 2331 /* when we've already built the next token, just pull it out of the queue */
79072805 2332 case LEX_KNOWNEXT:
3280af22
NIS
2333 PL_nexttoke--;
2334 yylval = PL_nextval[PL_nexttoke];
2335 if (!PL_nexttoke) {
2336 PL_lex_state = PL_lex_defer;
2337 PL_expect = PL_lex_expect;
2338 PL_lex_defer = LEX_NORMAL;
463ee0b2 2339 }
607df283 2340 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f
RB
2341 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2342 (IV)PL_nexttype[PL_nexttoke]); })
607df283 2343
3280af22 2344 return(PL_nexttype[PL_nexttoke]);
79072805 2345
02aa26ce 2346 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2347 when we get here, PL_bufptr is at the \
02aa26ce 2348 */
79072805
LW
2349 case LEX_INTERPCASEMOD:
2350#ifdef DEBUGGING
3280af22 2351 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2352 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2353#endif
02aa26ce 2354 /* handle \E or end of string */
3280af22 2355 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2356 char oldmod;
02aa26ce
NT
2357
2358 /* if at a \E */
3280af22
NIS
2359 if (PL_lex_casemods) {
2360 oldmod = PL_lex_casestack[--PL_lex_casemods];
2361 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2362
3280af22
NIS
2363 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2364 PL_bufptr += 2;
2365 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2366 }
79072805
LW
2367 return ')';
2368 }
3280af22
NIS
2369 if (PL_bufptr != PL_bufend)
2370 PL_bufptr += 2;
2371 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2372 return yylex();
79072805
LW
2373 }
2374 else {
607df283
SC
2375 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2376 "### Saw case modifier at '%s'\n", PL_bufptr); })
3280af22 2377 s = PL_bufptr + 1;
79072805
LW
2378 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2379 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2380 if (strchr("LU", *s) &&
3280af22 2381 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2382 {
3280af22 2383 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2384 return ')';
2385 }
3280af22
NIS
2386 if (PL_lex_casemods > 10) {
2387 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2388 if (newlb != PL_lex_casestack) {
a0d0e21e 2389 SAVEFREEPV(newlb);
3280af22 2390 PL_lex_casestack = newlb;
a0d0e21e
LW
2391 }
2392 }
3280af22
NIS
2393 PL_lex_casestack[PL_lex_casemods++] = *s;
2394 PL_lex_casestack[PL_lex_casemods] = '\0';
2395 PL_lex_state = LEX_INTERPCONCAT;
2396 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2397 force_next('(');
2398 if (*s == 'l')
3280af22 2399 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2400 else if (*s == 'u')
3280af22 2401 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2402 else if (*s == 'L')
3280af22 2403 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2404 else if (*s == 'U')
3280af22 2405 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2406 else if (*s == 'Q')
3280af22 2407 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2408 else
cea2e8a9 2409 Perl_croak(aTHX_ "panic: yylex");
3280af22 2410 PL_bufptr = s + 1;
79072805 2411 force_next(FUNC);
3280af22
NIS
2412 if (PL_lex_starts) {
2413 s = PL_bufptr;
2414 PL_lex_starts = 0;
79072805
LW
2415 Aop(OP_CONCAT);
2416 }
2417 else
cea2e8a9 2418 return yylex();
79072805
LW
2419 }
2420
55497cff
PP
2421 case LEX_INTERPPUSH:
2422 return sublex_push();
2423
79072805 2424 case LEX_INTERPSTART:
3280af22 2425 if (PL_bufptr == PL_bufend)
79072805 2426 return sublex_done();
607df283
SC
2427 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2428 "### Interpolated variable at '%s'\n", PL_bufptr); })
3280af22
NIS
2429 PL_expect = XTERM;
2430 PL_lex_dojoin = (*PL_bufptr == '@');
2431 PL_lex_state = LEX_INTERPNORMAL;
2432 if (PL_lex_dojoin) {
2433 PL_nextval[PL_nexttoke].ival = 0;
79072805 2434 force_next(',');
554b3eca 2435#ifdef USE_THREADS
533c011a
NIS
2436 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2437 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2438 force_next(PRIVATEREF);
2439#else
a0d0e21e 2440 force_ident("\"", '$');
554b3eca 2441#endif /* USE_THREADS */
3280af22 2442 PL_nextval[PL_nexttoke].ival = 0;
79072805 2443 force_next('$');
3280af22 2444 PL_nextval[PL_nexttoke].ival = 0;
79072805 2445 force_next('(');
3280af22 2446 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2447 force_next(FUNC);
2448 }
3280af22
NIS
2449 if (PL_lex_starts++) {
2450 s = PL_bufptr;
79072805
LW
2451 Aop(OP_CONCAT);
2452 }
cea2e8a9 2453 return yylex();
79072805
LW
2454
2455 case LEX_INTERPENDMAYBE:
3280af22
NIS
2456 if (intuit_more(PL_bufptr)) {
2457 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2458 break;
2459 }
2460 /* FALL THROUGH */
2461
2462 case LEX_INTERPEND:
3280af22
NIS
2463 if (PL_lex_dojoin) {
2464 PL_lex_dojoin = FALSE;
2465 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2466 return ')';
2467 }
43a16006 2468 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2469 && SvEVALED(PL_lex_repl))
43a16006 2470 {
e9fa98b2 2471 if (PL_bufptr != PL_bufend)
cea2e8a9 2472 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2473 PL_lex_repl = Nullsv;
2474 }
79072805
LW
2475 /* FALLTHROUGH */
2476 case LEX_INTERPCONCAT:
2477#ifdef DEBUGGING
3280af22 2478 if (PL_lex_brackets)
cea2e8a9 2479 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2480#endif
3280af22 2481 if (PL_bufptr == PL_bufend)
79072805
LW
2482 return sublex_done();
2483
3280af22
NIS
2484 if (SvIVX(PL_linestr) == '\'') {
2485 SV *sv = newSVsv(PL_linestr);
2486 if (!PL_lex_inpat)
76e3520e 2487 sv = tokeq(sv);
3280af22 2488 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2489 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2490 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2491 s = PL_bufend;
79072805
LW
2492 }
2493 else {
3280af22 2494 s = scan_const(PL_bufptr);
79072805 2495 if (*s == '\\')
3280af22 2496 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2497 else
3280af22 2498 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2499 }
2500
3280af22
NIS
2501 if (s != PL_bufptr) {
2502 PL_nextval[PL_nexttoke] = yylval;
2503 PL_expect = XTERM;
79072805 2504 force_next(THING);
3280af22 2505 if (PL_lex_starts++)
79072805
LW
2506 Aop(OP_CONCAT);
2507 else {
3280af22 2508 PL_bufptr = s;
cea2e8a9 2509 return yylex();
79072805
LW
2510 }
2511 }
2512
cea2e8a9 2513 return yylex();
a0d0e21e 2514 case LEX_FORMLINE:
3280af22
NIS
2515 PL_lex_state = LEX_NORMAL;
2516 s = scan_formline(PL_bufptr);
2517 if (!PL_lex_formbrack)
a0d0e21e
LW
2518 goto rightbracket;
2519 OPERATOR(';');
79072805
LW
2520 }
2521
3280af22
NIS
2522 s = PL_bufptr;
2523 PL_oldoldbufptr = PL_oldbufptr;
2524 PL_oldbufptr = s;
607df283 2525 DEBUG_T( {
bf49b057
GS
2526 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2527 exp_name[PL_expect], s);
79072805 2528 } )
463ee0b2
LW
2529
2530 retry:
378cc40b
LW
2531 switch (*s) {
2532 default:
7e2040f0 2533 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2534 goto keylookup;
cea2e8a9 2535 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2536 case 4:
2537 case 26:
2538 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2539 case 0:
3280af22
NIS
2540 if (!PL_rsfp) {
2541 PL_last_uni = 0;
2542 PL_last_lop = 0;
2543 if (PL_lex_brackets)
d98d5fff 2544 yyerror("Missing right curly or square bracket");
4e553d73 2545 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2546 "### Tokener got EOF\n");
2547 } )
79072805 2548 TOKEN(0);
463ee0b2 2549 }
3280af22 2550 if (s++ < PL_bufend)
a687059c 2551 goto retry; /* ignore stray nulls */
3280af22
NIS
2552 PL_last_uni = 0;
2553 PL_last_lop = 0;
2554 if (!PL_in_eval && !PL_preambled) {
2555 PL_preambled = TRUE;
2556 sv_setpv(PL_linestr,incl_perldb());
2557 if (SvCUR(PL_linestr))
2558 sv_catpv(PL_linestr,";");
2559 if (PL_preambleav){
2560 while(AvFILLp(PL_preambleav) >= 0) {
2561 SV *tmpsv = av_shift(PL_preambleav);
2562 sv_catsv(PL_linestr, tmpsv);
2563 sv_catpv(PL_linestr, ";");
91b7def8
PP
2564 sv_free(tmpsv);
2565 }
3280af22
NIS
2566 sv_free((SV*)PL_preambleav);
2567 PL_preambleav = NULL;
91b7def8 2568 }
3280af22
NIS
2569 if (PL_minus_n || PL_minus_p) {
2570 sv_catpv(PL_linestr, "LINE: while (<>) {");
2571 if (PL_minus_l)
2572 sv_catpv(PL_linestr,"chomp;");
2573 if (PL_minus_a) {
8fd239a7
CS
2574 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2575 if (gv)
2576 GvIMPORTED_AV_on(gv);
3280af22
NIS
2577 if (PL_minus_F) {
2578 if (strchr("/'\"", *PL_splitstr)
2579 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2580 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2581 else {
2582 char delim;
2583 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2584 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2585 delim = *s;
cea2e8a9 2586 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2587 "q" + (delim == '\''), delim);
3280af22 2588 for (s = PL_splitstr; *s; s++) {
54310121 2589 if (*s == '\\')
3280af22
NIS
2590 sv_catpvn(PL_linestr, "\\", 1);
2591 sv_catpvn(PL_linestr, s, 1);
54310121 2592 }
cea2e8a9 2593 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2594 }
2304df62
AD
2595 }
2596 else
3280af22 2597 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2598 }
79072805 2599 }
3280af22
NIS
2600 sv_catpv(PL_linestr, "\n");
2601 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2602 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2603 PL_last_lop = PL_last_uni = Nullch;
3280af22 2604 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2605 SV *sv = NEWSV(85,0);
2606
2607 sv_upgrade(sv, SVt_PVMG);
3280af22 2608 sv_setsv(sv,PL_linestr);
57843af0 2609 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2610 }
79072805 2611 goto retry;
a687059c 2612 }
e929a76b 2613 do {
aa7440fb 2614 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2615 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2616 fake_eof:
2617 if (PL_rsfp) {
2618 if (PL_preprocess && !PL_in_eval)
2619 (void)PerlProc_pclose(PL_rsfp);
2620 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2621 PerlIO_clearerr(PL_rsfp);
2622 else
2623 (void)PerlIO_close(PL_rsfp);
2624 PL_rsfp = Nullfp;
2625 PL_doextract = FALSE;
2626 }
2627 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2628 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2629 sv_catpv(PL_linestr,";}");
2630 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2631 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2632 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2633 PL_minus_n = PL_minus_p = 0;
2634 goto retry;
2635 }
2636 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2637 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2638 sv_setpv(PL_linestr,"");
2639 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2640 }
2641 /* if it looks like the start of a BOM, check if it in fact is */
2642 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2643#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2644# ifdef __GNU_LIBRARY__
2645# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2646# define FTELL_FOR_PIPE_IS_BROKEN
2647# endif
e3f494f1
JH
2648# else
2649# ifdef __GLIBC__
2650# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2651# define FTELL_FOR_PIPE_IS_BROKEN
2652# endif
2653# endif
226017aa
DD
2654# endif
2655#endif
2656#ifdef FTELL_FOR_PIPE_IS_BROKEN
2657 /* This loses the possibility to detect the bof
2658 * situation on perl -P when the libc5 is being used.
2659 * Workaround? Maybe attach some extra state to PL_rsfp?
2660 */
2661 if (!PL_preprocess)
7e28d3af 2662 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2663#else
7e28d3af 2664 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2665#endif
7e28d3af 2666 if (bof) {
3280af22 2667 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2668 s = swallow_bom((U8*)s);
e929a76b 2669 }
378cc40b 2670 }
3280af22 2671 if (PL_doextract) {
a0d0e21e 2672 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2673 PL_doextract = FALSE;
a0d0e21e
LW
2674
2675 /* Incest with pod. */
2676 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2677 sv_setpv(PL_linestr, "");
2678 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2679 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2680 PL_last_lop = PL_last_uni = Nullch;
3280af22 2681 PL_doextract = FALSE;
a0d0e21e 2682 }
4e553d73 2683 }
463ee0b2 2684 incline(s);
3280af22
NIS
2685 } while (PL_doextract);
2686 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2687 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2688 SV *sv = NEWSV(85,0);
a687059c 2689
93a17b20 2690 sv_upgrade(sv, SVt_PVMG);
3280af22 2691 sv_setsv(sv,PL_linestr);
57843af0 2692 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2693 }
3280af22 2694 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2695 PL_last_lop = PL_last_uni = Nullch;
57843af0 2696 if (CopLINE(PL_curcop) == 1) {
3280af22 2697 while (s < PL_bufend && isSPACE(*s))
79072805 2698 s++;
a0d0e21e 2699 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2700 s++;
44a8e56a 2701 d = Nullch;
3280af22 2702 if (!PL_in_eval) {
44a8e56a
PP
2703 if (*s == '#' && *(s+1) == '!')
2704 d = s + 2;
2705#ifdef ALTERNATE_SHEBANG
2706 else {
2707 static char as[] = ALTERNATE_SHEBANG;
2708 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2709 d = s + (sizeof(as) - 1);
2710 }
2711#endif /* ALTERNATE_SHEBANG */
2712 }
2713 if (d) {
b8378b72 2714 char *ipath;
774d564b 2715 char *ipathend;
b8378b72 2716
774d564b 2717 while (isSPACE(*d))
b8378b72
CS
2718 d++;
2719 ipath = d;
774d564b
PP
2720 while (*d && !isSPACE(*d))
2721 d++;
2722 ipathend = d;
2723
2724#ifdef ARG_ZERO_IS_SCRIPT
2725 if (ipathend > ipath) {
2726 /*
2727 * HP-UX (at least) sets argv[0] to the script name,
2728 * which makes $^X incorrect. And Digital UNIX and Linux,
2729 * at least, set argv[0] to the basename of the Perl
2730 * interpreter. So, having found "#!", we'll set it right.
2731 */
2732 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2733 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2734 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2735 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2736 SvSETMAGIC(x);
2737 }
774d564b 2738 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2739 }
774d564b 2740#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2741
2742 /*
2743 * Look for options.
2744 */
748a9306 2745 d = instr(s,"perl -");
84e30d1a 2746 if (!d) {
748a9306 2747 d = instr(s,"perl");
84e30d1a
GS
2748#if defined(DOSISH)
2749 /* avoid getting into infinite loops when shebang
2750 * line contains "Perl" rather than "perl" */
2751 if (!d) {
2752 for (d = ipathend-4; d >= ipath; --d) {
2753 if ((*d == 'p' || *d == 'P')
2754 && !ibcmp(d, "perl", 4))
2755 {
2756 break;
2757 }
2758 }
2759 if (d < ipath)
2760 d = Nullch;
2761 }
2762#endif
2763 }
44a8e56a
PP
2764#ifdef ALTERNATE_SHEBANG
2765 /*
2766 * If the ALTERNATE_SHEBANG on this system starts with a
2767 * character that can be part of a Perl expression, then if
2768 * we see it but not "perl", we're probably looking at the
2769 * start of Perl code, not a request to hand off to some
2770 * other interpreter. Similarly, if "perl" is there, but
2771 * not in the first 'word' of the line, we assume the line
2772 * contains the start of the Perl program.
44a8e56a
PP
2773 */
2774 if (d && *s != '#') {
774d564b 2775 char *c = ipath;
44a8e56a
PP
2776 while (*c && !strchr("; \t\r\n\f\v#", *c))
2777 c++;
2778 if (c < d)
2779 d = Nullch; /* "perl" not in first word; ignore */
2780 else
2781 *s = '#'; /* Don't try to parse shebang line */
2782 }
774d564b 2783#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2784#ifndef MACOS_TRADITIONAL
748a9306 2785 if (!d &&
44a8e56a 2786 *s == '#' &&
774d564b 2787 ipathend > ipath &&
3280af22 2788 !PL_minus_c &&
748a9306 2789 !instr(s,"indir") &&
3280af22 2790 instr(PL_origargv[0],"perl"))
748a9306 2791 {
9f68db38 2792 char **newargv;
9f68db38 2793
774d564b
PP
2794 *ipathend = '\0';
2795 s = ipathend + 1;
3280af22 2796 while (s < PL_bufend && isSPACE(*s))
9f68db38 2797 s++;
3280af22
NIS
2798 if (s < PL_bufend) {
2799 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2800 newargv[1] = s;
3280af22 2801 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2802 s++;
2803 *s = '\0';
3280af22 2804 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2805 }
2806 else
3280af22 2807 newargv = PL_origargv;
774d564b 2808 newargv[0] = ipath;
b4748376 2809 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2810 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2811 }
bf4acbe4 2812#endif
748a9306 2813 if (d) {
3280af22
NIS
2814 U32 oldpdb = PL_perldb;
2815 bool oldn = PL_minus_n;
2816 bool oldp = PL_minus_p;
748a9306
LW
2817
2818 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2819 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2820
2821 if (*d++ == '-') {
8cc95fdb
PP
2822 do {
2823 if (*d == 'M' || *d == 'm') {
2824 char *m = d;
2825 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2826 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2827 (int)(d - m), m);
2828 }
2829 d = moreswitches(d);
2830 } while (d);
155aba94
GS
2831 if ((PERLDB_LINE && !oldpdb) ||
2832 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2833 /* if we have already added "LINE: while (<>) {",
2834 we must not do it again */
748a9306 2835 {
3280af22
NIS
2836 sv_setpv(PL_linestr, "");
2837 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2838 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2839 PL_last_lop = PL_last_uni = Nullch;
3280af22 2840 PL_preambled = FALSE;
84902520 2841 if (PERLDB_LINE)
3280af22 2842 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2843 goto retry;
2844 }
a0d0e21e 2845 }
79072805 2846 }
9f68db38 2847 }
79072805 2848 }
3280af22
NIS
2849 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2850 PL_bufptr = s;
2851 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2852 return yylex();
ae986130 2853 }
378cc40b 2854 goto retry;
4fdae800 2855 case '\r':
6a27c188 2856#ifdef PERL_STRICT_CR
cea2e8a9 2857 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2858 Perl_croak(aTHX_
cc507455 2859 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2860#endif
4fdae800 2861 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2862#ifdef MACOS_TRADITIONAL
2863 case '\312':
2864#endif
378cc40b
LW
2865 s++;
2866 goto retry;
378cc40b 2867 case '#':
e929a76b 2868 case '\n':
3280af22 2869 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2870 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2871 /* handle eval qq[#line 1 "foo"\n ...] */
2872 CopLINE_dec(PL_curcop);
2873 incline(s);
2874 }
3280af22 2875 d = PL_bufend;
a687059c 2876 while (s < d && *s != '\n')
378cc40b 2877 s++;
0f85fab0 2878 if (s < d)
378cc40b 2879 s++;
463ee0b2 2880 incline(s);
3280af22
NIS
2881 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2882 PL_bufptr = s;
2883 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2884 return yylex();
a687059c 2885 }
378cc40b 2886 }
a687059c 2887 else {
378cc40b 2888 *s = '\0';
3280af22 2889 PL_bufend = s;
a687059c 2890 }
378cc40b
LW
2891 goto retry;
2892 case '-':
79072805 2893 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2894 I32 ftst = 0;
2895
378cc40b 2896 s++;
3280af22 2897 PL_bufptr = s;
748a9306
LW
2898 tmp = *s++;
2899
bf4acbe4 2900 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2901 s++;
2902
2903 if (strnEQ(s,"=>",2)) {
3280af22 2904 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2905 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2906 "### Saw unary minus before =>, forcing word '%s'\n", s);
2907 } )
748a9306
LW
2908 OPERATOR('-'); /* unary minus */
2909 }
3280af22 2910 PL_last_uni = PL_oldbufptr;
748a9306 2911 switch (tmp) {
e5edeb50
JH
2912 case 'r': ftst = OP_FTEREAD; break;
2913 case 'w': ftst = OP_FTEWRITE; break;
2914 case 'x': ftst = OP_FTEEXEC; break;
2915 case 'o': ftst = OP_FTEOWNED; break;
2916 case 'R': ftst = OP_FTRREAD; break;
2917 case 'W': ftst = OP_FTRWRITE; break;
2918 case 'X': ftst = OP_FTREXEC; break;
2919 case 'O': ftst = OP_FTROWNED; break;
2920 case 'e': ftst = OP_FTIS; break;
2921 case 'z': ftst = OP_FTZERO; break;
2922 case 's': ftst = OP_FTSIZE; break;
2923 case 'f': ftst = OP_FTFILE; break;
2924 case 'd': ftst = OP_FTDIR; break;
2925 case 'l': ftst = OP_FTLINK; break;
2926 case 'p': ftst = OP_FTPIPE; break;
2927 case 'S': ftst = OP_FTSOCK; break;
2928 case 'u': ftst = OP_FTSUID; break;
2929 case 'g': ftst = OP_FTSGID; break;
2930 case 'k': ftst = OP_FTSVTX; break;
2931 case 'b': ftst = OP_FTBLK; break;
2932 case 'c': ftst = OP_FTCHR; break;
2933 case 't': ftst = OP_FTTTY; break;
2934 case 'T': ftst = OP_FTTEXT; break;
2935 case 'B': ftst = OP_FTBINARY; break;
2936 case 'M': case 'A': case 'C':
2937 gv_fetchpv("\024",TRUE, SVt_PV);
2938 switch (tmp) {
2939 case 'M': ftst = OP_FTMTIME; break;
2940 case 'A': ftst = OP_FTATIME; break;
2941 case 'C': ftst = OP_FTCTIME; break;
2942 default: break;
2943 }
2944 break;
378cc40b 2945 default:
378cc40b
LW
2946 break;
2947 }
e5edeb50
JH
2948 if (ftst) {
2949 PL_last_lop_op = ftst;
4e553d73 2950 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2951 "### Saw file test %c\n", (int)ftst);
e5edeb50 2952 } )
e5edeb50
JH
2953 FTST(ftst);
2954 }
2955 else {
2956 /* Assume it was a minus followed by a one-letter named
2957 * subroutine call (or a -bareword), then. */
95c31fe3 2958 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2959 "### %c looked like a file test but was not\n",
2960 (int)ftst);
95c31fe3 2961 } )
e5edeb50
JH
2962 s -= 2;
2963 }
378cc40b 2964 }
a687059c
LW
2965 tmp = *s++;
2966 if (*s == tmp) {
2967 s++;
3280af22 2968 if (PL_expect == XOPERATOR)
79072805
LW
2969 TERM(POSTDEC);
2970 else
2971 OPERATOR(PREDEC);
2972 }
2973 else if (*s == '>') {
2974 s++;
2975 s = skipspace(s);
7e2040f0 2976 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2977 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2978 TOKEN(ARROW);
79072805 2979 }
748a9306
LW
2980 else if (*s == '$')
2981 OPERATOR(ARROW);
463ee0b2 2982 else
748a9306 2983 TERM(ARROW);
a687059c 2984 }
3280af22 2985 if (PL_expect == XOPERATOR)
79072805
LW
2986 Aop(OP_SUBTRACT);
2987 else {
3280af22 2988 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2989 check_uni();
79072805 2990 OPERATOR('-'); /* unary minus */
2f3197b3 2991 }
79072805 2992
378cc40b 2993 case '+':
a687059c
LW
2994 tmp = *s++;
2995 if (*s == tmp) {
378cc40b 2996 s++;
3280af22 2997 if (PL_expect == XOPERATOR)
79072805
LW
2998 TERM(POSTINC);
2999 else
3000 OPERATOR(PREINC);
378cc40b 3001 }
3280af22 3002 if (PL_expect == XOPERATOR)
79072805
LW
3003 Aop(OP_ADD);
3004 else {
3280af22 3005 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3006 check_uni();
a687059c 3007 OPERATOR('+');
2f3197b3 3008 }
a687059c 3009
378cc40b 3010 case '*':
3280af22
NIS
3011 if (PL_expect != XOPERATOR) {
3012 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3013 PL_expect = XOPERATOR;
3014 force_ident(PL_tokenbuf, '*');
3015 if (!*PL_tokenbuf)
a0d0e21e 3016 PREREF('*');
79072805 3017 TERM('*');
a687059c 3018 }
79072805
LW
3019 s++;
3020 if (*s == '*') {
a687059c 3021 s++;
79072805 3022 PWop(OP_POW);
a687059c 3023 }
79072805
LW
3024 Mop(OP_MULTIPLY);
3025
378cc40b 3026 case '%':
3280af22 3027 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3028 ++s;
3029 Mop(OP_MODULO);
a687059c 3030 }
3280af22
NIS
3031 PL_tokenbuf[0] = '%';
3032 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3033 if (!PL_tokenbuf[1]) {
3034 if (s == PL_bufend)
bbce6d69
PP
3035 yyerror("Final % should be \\% or %name");
3036 PREREF('%');
a687059c 3037 }
3280af22 3038 PL_pending_ident = '%';
bbce6d69 3039 TERM('%');
a687059c 3040
378cc40b 3041 case '^':
79072805 3042 s++;
a0d0e21e 3043 BOop(OP_BIT_XOR);
79072805 3044 case '[':
3280af22 3045 PL_lex_brackets++;
79072805 3046 /* FALL THROUGH */
378cc40b 3047 case '~':
378cc40b 3048 case ',':
378cc40b
LW
3049 tmp = *s++;
3050 OPERATOR(tmp);
a0d0e21e
LW
3051 case ':':
3052 if (s[1] == ':') {
3053 len = 0;
3054 goto just_a_word;
3055 }
3056 s++;
09bef843
SB
3057 switch (PL_expect) {
3058 OP *attrs;
3059 case XOPERATOR:
3060 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3061 break;
3062 PL_bufptr = s; /* update in case we back off */
3063 goto grabattrs;
3064 case XATTRBLOCK:
3065 PL_expect = XBLOCK;
3066 goto grabattrs;
3067 case XATTRTERM:
3068 PL_expect = XTERMBLOCK;
3069 grabattrs:
3070 s = skipspace(s);
3071 attrs = Nullop;
7e2040f0 3072 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3073 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3074 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3075 if (tmp < 0) tmp = -tmp;
3076 switch (tmp) {
3077 case KEY_or:
3078 case KEY_and:
3079 case KEY_for:
3080 case KEY_unless:
3081 case KEY_if:
3082 case KEY_while:
3083 case KEY_until:
3084 goto got_attrs;
3085 default:
3086 break;
3087 }
3088 }
09bef843
SB
3089 if (*d == '(') {
3090 d = scan_str(d,TRUE,TRUE);
3091 if (!d) {
09bef843
SB
3092 /* MUST advance bufptr here to avoid bogus
3093 "at end of line" context messages from yyerror().
3094 */
3095 PL_bufptr = s + len;
3096 yyerror("Unterminated attribute parameter in attribute list");
3097 if (attrs)
3098 op_free(attrs);
3099 return 0; /* EOF indicator */
3100 }
3101 }
3102 if (PL_lex_stuff) {
3103 SV *sv = newSVpvn(s, len);
3104 sv_catsv(sv, PL_lex_stuff);
3105 attrs = append_elem(OP_LIST, attrs,
3106 newSVOP(OP_CONST, 0, sv));
3107 SvREFCNT_dec(PL_lex_stuff);
3108 PL_lex_stuff = Nullsv;
3109 }
3110 else {
78f9721b
SM
3111 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3112 CvLVALUE_on(PL_compcv);
3113 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3114 CvLOCKED_on(PL_compcv);
3115 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3116 CvMETHOD_on(PL_compcv);
87ecf892
DM
3117#ifdef USE_ITHREADS
3118 else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "shared", len))
3119 GvSHARED_on(cGVOPx_gv(yylval.opval));
3120#endif
78f9721b
SM
3121 /* After we've set the flags, it could be argued that
3122 we don't need to do the attributes.pm-based setting
3123 process, and shouldn't bother appending recognized
3124 flags. To experiment with that, uncomment the
3125 following "else": */
0256094b 3126 else
78f9721b
SM
3127 attrs = append_elem(OP_LIST, attrs,
3128 newSVOP(OP_CONST, 0,
3129 newSVpvn(s, len)));
09bef843
SB
3130 }
3131 s = skipspace(d);
0120eecf 3132 if (*s == ':' && s[1] != ':')
09bef843 3133 s = skipspace(s+1);
0120eecf
GS
3134 else if (s == d)
3135 break; /* require real whitespace or :'s */
09bef843 3136 }
f9829d6b
GS
3137 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3138 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3139 char q = ((*s == '\'') ? '"' : '\'');
3140 /* If here for an expression, and parsed no attrs, back off. */
3141 if (tmp == '=' && !attrs) {
3142 s = PL_bufptr;
3143 break;
3144 }
3145 /* MUST advance bufptr here to avoid bogus "at end of line"
3146 context messages from yyerror().
3147 */
3148 PL_bufptr = s;
3149 if (!*s)
3150 yyerror("Unterminated attribute list");
3151 else
3152 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3153 q, *s, q));
3154 if (attrs)
3155 op_free(attrs);
3156 OPERATOR(':');
3157 }
f9829d6b 3158 got_attrs:
09bef843
SB
3159 if (attrs) {
3160 PL_nextval[PL_nexttoke].opval = attrs;
3161 force_next(THING);
3162 }
3163 TOKEN(COLONATTR);
3164 }
a0d0e21e 3165 OPERATOR(':');
8990e307
LW
3166 case '(':
3167 s++;
3280af22
NIS
3168 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3169 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3170 else
3280af22 3171 PL_expect = XTERM;
a0d0e21e 3172 TOKEN('(');
378cc40b 3173 case ';':
f4dd75d9 3174 CLINE;
378cc40b
LW
3175 tmp = *s++;
3176 OPERATOR(tmp);
3177 case ')':
378cc40b 3178 tmp = *s++;
16d20bd9
AD
3179 s = skipspace(s);
3180 if (*s == '{')
3181 PREBLOCK(tmp);
378cc40b 3182 TERM(tmp);
79072805
LW
3183 case ']':
3184 s++;
3280af22 3185 if (PL_lex_brackets <= 0)
d98d5fff 3186 yyerror("Unmatched right square bracket");
463ee0b2 3187 else
3280af22
NIS
3188 --PL_lex_brackets;
3189 if (PL_lex_state == LEX_INTERPNORMAL) {
3190 if (PL_lex_brackets == 0) {
a0d0e21e 3191 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3192 PL_lex_state = LEX_INTERPEND;
79072805
LW
3193 }
3194 }
4633a7c4 3195 TERM(']');
79072805
LW
3196 case '{':
3197 leftbracket:
79072805 3198 s++;
3280af22
NIS
3199 if (PL_lex_brackets > 100) {
3200 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3201 if (newlb != PL_lex_brackstack) {
8990e307 3202 SAVEFREEPV(newlb);
3280af22 3203 PL_lex_brackstack = newlb;
8990e307
LW
3204 }
3205 }
3280af22 3206 switch (PL_expect) {
a0d0e21e 3207 case XTERM:
3280af22 3208 if (PL_lex_formbrack) {
a0d0e21e
LW
3209 s--;
3210 PRETERMBLOCK(DO);
3211 }
3280af22
NIS
3212 if (PL_oldoldbufptr == PL_last_lop)
3213 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3214 else
3280af22 3215 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3216 OPERATOR(HASHBRACK);
a0d0e21e 3217 case XOPERATOR:
bf4acbe4 3218 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3219 s++;
44a8e56a 3220 d = s;
3280af22
NIS
3221 PL_tokenbuf[0] = '\0';
3222 if (d < PL_bufend && *d == '-') {
3223 PL_tokenbuf[0] = '-';
44a8e56a 3224 d++;
bf4acbe4 3225 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3226 d++;
3227 }
7e2040f0 3228 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3229 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3230 FALSE, &len);
bf4acbe4 3231 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3232 d++;
3233 if (*d == '}') {
3280af22 3234 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3235 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3236 if (minus)
3237 force_next('-');
748a9306
LW
3238 }
3239 }
3240 /* FALL THROUGH */
09bef843 3241 case XATTRBLOCK:
748a9306 3242 case XBLOCK:
3280af22
NIS
3243 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3244 PL_expect = XSTATE;
a0d0e21e 3245 break;
09bef843 3246 case XATTRTERM:
a0d0e21e 3247 case XTERMBLOCK:
3280af22
NIS
3248 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3249 PL_expect = XSTATE;
a0d0e21e
LW
3250 break;
3251 default: {
3252 char *t;
3280af22
NIS
3253 if (PL_oldoldbufptr == PL_last_lop)
3254 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3255 else
3280af22 3256 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3257 s = skipspace(s);
09ecc4b6 3258 if (*s == '}')
a0d0e21e 3259 OPERATOR(HASHBRACK);
b8a4b1be
GS
3260 /* This hack serves to disambiguate a pair of curlies
3261 * as being a block or an anon hash. Normally, expectation
3262 * determines that, but in cases where we're not in a
3263 * position to expect anything in particular (like inside
3264 * eval"") we have to resolve the ambiguity. This code
3265 * covers the case where the first term in the curlies is a
3266 * quoted string. Most other cases need to be explicitly
3267 * disambiguated by prepending a `+' before the opening
3268 * curly in order to force resolution as an anon hash.
3269 *
3270 * XXX should probably propagate the outer expectation
3271 * into eval"" to rely less on this hack, but that could
3272 * potentially break current behavior of eval"".
3273 * GSAR 97-07-21
3274 */
3275 t = s;
3276 if (*s == '\'' || *s == '"' || *s == '`') {
3277 /* common case: get past first string, handling escapes */
3280af22 3278 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3279 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3280 t++;
3281 t++;
a0d0e21e 3282 }
b8a4b1be 3283 else if (*s == 'q') {
3280af22 3284 if (++t < PL_bufend
b8a4b1be 3285 && (!isALNUM(*t)
3280af22 3286 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3287 && !isALNUM(*t))))
3288 {
b8a4b1be
GS
3289 char *tmps;
3290 char open, close, term;
3291 I32 brackets = 1;
3292
3280af22 3293 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3294 t++;
3295 term = *t;
3296 open = term;
3297 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3298 term = tmps[5];
3299 close = term;
3300 if (open == close)
3280af22
NIS
3301 for (t++; t < PL_bufend; t++) {
3302 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3303 t++;
6d07e5e9 3304 else if (*t == open)
b8a4b1be
GS
3305 break;
3306 }
3307 else
3280af22
NIS
3308 for (t++; t < PL_bufend; t++) {
3309 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3310 t++;
6d07e5e9 3311 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3312 break;
3313 else if (*t == open)
3314 brackets++;
3315 }
3316 }
3317 t++;
a0d0e21e 3318 }
7e2040f0 3319 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3320 t += UTF8SKIP(t);
7e2040f0 3321 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3322 t += UTF8SKIP(t);
a0d0e21e 3323 }
3280af22 3324 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3325 t++;
b8a4b1be
GS
3326 /* if comma follows first term, call it an anon hash */
3327 /* XXX it could be a comma expression with loop modifiers */
3280af22 3328 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3329 || (*t == '=' && t[1] == '>')))
a0d0e21e 3330 OPERATOR(HASHBRACK);
3280af22 3331 if (PL_expect == XREF)
4e4e412b 3332 PL_expect = XTERM;
a0d0e21e 3333 else {
3280af22
NIS
3334 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3335 PL_expect = XSTATE;
a0d0e21e 3336 }
8990e307 3337 }
a0d0e21e 3338 break;
463ee0b2 3339 }
57843af0 3340 yylval.ival = CopLINE(PL_curcop);
79072805 3341 if (isSPACE(*s) || *s == '#')
3280af22 3342 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3343 TOKEN('{');
378cc40b 3344 case '}':
79072805
LW
3345 rightbracket:
3346 s++;
3280af22 3347 if (PL_lex_brackets <= 0)
d98d5fff 3348 yyerror("Unmatched right curly bracket");
463ee0b2 3349 else
3280af22 3350 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3351 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3352 PL_lex_formbrack = 0;
3353 if (PL_lex_state == LEX_INTERPNORMAL) {
3354 if (PL_lex_brackets == 0) {
9059aa12
LW
3355 if (PL_expect & XFAKEBRACK) {
3356 PL_expect &= XENUMMASK;
3280af22
NIS
3357 PL_lex_state = LEX_INTERPEND;
3358 PL_bufptr = s;
cea2e8a9 3359 return yylex(); /* ignore fake brackets */
79072805 3360 }
fa83b5b6 3361 if (*s == '-' && s[1] == '>')
3280af22 3362 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3363 else if (*s != '[' && *s != '{')
3280af22 3364 PL_lex_state = LEX_INTERPEND;
79072805
LW
3365 }
3366 }
9059aa12
LW
3367 if (PL_expect & XFAKEBRACK) {
3368 PL_expect &= XENUMMASK;
3280af22 3369 PL_bufptr = s;
cea2e8a9 3370 return yylex(); /* ignore fake brackets */
748a9306 3371 }
79072805
LW
3372 force_next('}');
3373 TOKEN(';');
378cc40b
LW
3374 case '&':
3375 s++;
3376 tmp = *s++;
3377 if (tmp == '&')
a0d0e21e 3378 AOPERATOR(ANDAND);
378cc40b 3379 s--;
3280af22 3380 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3381 if (ckWARN(WARN_SEMICOLON)
3382 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3383 {
57843af0 3384 CopLINE_dec(PL_curcop);
cea2e8a9 3385 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3386 CopLINE_inc(PL_curcop);
463ee0b2 3387 }
79072805 3388 BAop(OP_BIT_AND);
463ee0b2 3389 }
79072805 3390
3280af22
NIS
3391 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3392 if (*PL_tokenbuf) {
3393 PL_expect = XOPERATOR;
3394 force_ident(PL_tokenbuf, '&');
463ee0b2 3395 }
79072805
LW
3396 else
3397 PREREF('&');
c07a80fd 3398 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3399 TERM('&');
3400
378cc40b
LW
3401 case '|':
3402 s++;
3403 tmp = *s++;
3404 if (tmp == '|')
a0d0e21e 3405 AOPERATOR(OROR);
378cc40b 3406 s--;
79072805 3407 BOop(OP_BIT_OR);
378cc40b
LW
3408 case '=':
3409 s++;
3410 tmp = *s++;
3411 if (tmp == '=')
79072805
LW
3412 Eop(OP_EQ);
3413 if (tmp == '>')
3414 OPERATOR(',');
378cc40b 3415 if (tmp == '~')
79072805 3416 PMop(OP_MATCH);
599cee73 3417 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3418 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3419 s--;
3280af22
NIS
3420 if (PL_expect == XSTATE && isALPHA(tmp) &&
3421 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3422 {
3280af22
NIS
3423 if (PL_in_eval && !PL_rsfp) {
3424 d = PL_bufend;
a5f75d66
AD
3425 while (s < d) {
3426 if (*s++ == '\n') {
3427 incline(s);
3428 if (strnEQ(s,"=cut",4)) {
3429 s = strchr(s,'\n');
3430 if (s)
3431 s++;
3432 else
3433 s = d;
3434 incline(s);
3435 goto retry;
3436 }
3437 }
3438 }
3439 goto retry;
3440 }
3280af22
NIS
3441 s = PL_bufend;
3442 PL_doextract = TRUE;
a0d0e21e
LW
3443 goto retry;
3444 }
3280af22 3445 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3446 char *t;
51882d45 3447#ifdef PERL_STRICT_CR
bf4acbe4 3448 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3449#else
bf4acbe4 3450 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3451#endif
a0d0e21e
LW
3452 if (*t == '\n' || *t == '#') {
3453 s--;
3280af22 3454 PL_expect = XBLOCK;
a0d0e21e
LW
3455 goto leftbracket;
3456 }
79072805 3457 }
a0d0e21e
LW
3458 yylval.ival = 0;
3459 OPERATOR(ASSIGNOP);
378cc40b
LW
3460 case '!':
3461 s++;
3462 tmp = *s++;
3463 if (tmp == '=')
79072805 3464 Eop(OP_NE);
378cc40b 3465 if (tmp == '~')
79072805 3466 PMop(OP_NOT);
378cc40b
LW
3467 s--;
3468 OPERATOR('!');
3469 case '<':
3280af22 3470 if (PL_expect != XOPERATOR) {
93a17b20 3471 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3472 check_uni();
79072805
LW
3473 if (s[1] == '<')
3474 s = scan_heredoc(s);
3475 else
3476 s = scan_inputsymbol(s);
3477 TERM(sublex_start());
378cc40b
LW
3478 }
3479 s++;
3480 tmp = *s++;
3481 if (tmp == '<')
79072805 3482 SHop(OP_LEFT_SHIFT);
395c3793
LW
3483 if (tmp == '=') {
3484 tmp = *s++;
3485 if (tmp == '>')
79072805 3486 Eop(OP_NCMP);
395c3793 3487 s--;
79072805 3488 Rop(OP_LE);
395c3793 3489 }
378cc40b 3490 s--;
79072805 3491 Rop(OP_LT);
378cc40b
LW
3492 case '>':
3493 s++;
3494 tmp = *s++;
3495 if (tmp == '>')
79072805 3496 SHop(OP_RIGHT_SHIFT);
378cc40b 3497 if (tmp == '=')
79072805 3498 Rop(OP_GE);
378cc40b 3499 s--;
79072805 3500 Rop(OP_GT);
378cc40b
LW
3501
3502 case '$':
bbce6d69
PP
3503 CLINE;
3504
3280af22
NIS
3505 if (PL_expect == XOPERATOR) {
3506 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3507 PL_expect = XTERM;
a0d0e21e 3508 depcom();
bbce6d69 3509 return ','; /* grandfather non-comma-format format */
a0d0e21e 3510 }
8990e307 3511 }
a0d0e21e 3512
7e2040f0 3513 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3514 PL_tokenbuf[0] = '@';
376b8730
SM
3515 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3516 sizeof PL_tokenbuf - 1, FALSE);
3517 if (PL_expect == XOPERATOR)
3518 no_op("Array length", s);
3280af22 3519 if (!PL_tokenbuf[1])
a0d0e21e 3520 PREREF(DOLSHARP);
3280af22
NIS
3521 PL_expect = XOPERATOR;
3522 PL_pending_ident = '#';
463ee0b2 3523 TOKEN(DOLSHARP);
79072805 3524 }
bbce6d69 3525
3280af22 3526 PL_tokenbuf[0] = '$';
376b8730
SM
3527 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3528 sizeof PL_tokenbuf - 1, FALSE);
3529 if (PL_expect == XOPERATOR)
3530 no_op("Scalar", s);
3280af22
NIS
3531 if (!PL_tokenbuf[1]) {
3532 if (s == PL_bufend)
bbce6d69
PP
3533 yyerror("Final $ should be \\$ or $name");
3534 PREREF('$');
8990e307 3535 }
a0d0e21e 3536
bbce6d69 3537 /* This kludge not intended to be bulletproof. */
3280af22 3538 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3539 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3540 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3541 yylval.opval->op_private = OPpCONST_ARYBASE;
3542 TERM(THING);
3543 }
3544
ff68c719 3545 d = s;
69d2bceb 3546 tmp = (I32)*s;
3280af22 3547 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3548 s = skipspace(s);
3549
3280af22 3550 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3551 char *t;
3552 if (*s == '[') {
3280af22 3553 PL_tokenbuf[0] = '@';
599cee73 3554 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3555 for(t = s + 1;
7e2040f0 3556 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3557 t++) ;
a0d0e21e 3558 if (*t++ == ',') {
3280af22
NIS
3559 PL_bufptr = skipspace(PL_bufptr);
3560 while (t < PL_bufend && *t != ']')
bbce6d69 3561 t++;
cea2e8a9 3562 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM