This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate mainline.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
bc89e66f 3 * Copyright (c) 1991-2001, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
be4731d2 40#define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || PL_hints & HINT_UTF8)
a0ed51b3 41
4e553d73 42/* In variables name $^X, these are the legal values for X.
2b92dfce
GS
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
79072805
LW
72#ifdef ff_next
73#undef ff_next
d48672a2
LW
74#endif
75
a1a0e61e 76#ifdef USE_PURE_BISON
dba4d153
JH
77# ifndef YYMAXLEVEL
78# define YYMAXLEVEL 100
79# endif
20141f0e
RI
80YYSTYPE* yylval_pointer[YYMAXLEVEL];
81int* yychar_pointer[YYMAXLEVEL];
6f202aea 82int yyactlevel = -1;
22c35a8c
GS
83# undef yylval
84# undef yychar
20141f0e
RI
85# define yylval (*yylval_pointer[yyactlevel])
86# define yychar (*yychar_pointer[yyactlevel])
87# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 88# undef yylex
dba4d153 89# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
90#endif
91
79072805 92#include "keywords.h"
fe14fcc3 93
ffb4593c
NT
94/* CLINE is a macro that ensures PL_copline has a sane value */
95
ae986130
LW
96#ifdef CLINE
97#undef CLINE
98#endif
57843af0 99#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 100
ffb4593c
NT
101/*
102 * Convenience functions to return different tokens and prime the
9cbb5ea2 103 * lexer for the next token. They all take an argument.
ffb4593c
NT
104 *
105 * TOKEN : generic token (used for '(', DOLSHARP, etc)
106 * OPERATOR : generic operator
107 * AOPERATOR : assignment operator
108 * PREBLOCK : beginning the block after an if, while, foreach, ...
109 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
110 * PREREF : *EXPR where EXPR is not a simple identifier
111 * TERM : expression term
112 * LOOPX : loop exiting command (goto, last, dump, etc)
113 * FTST : file test operator
114 * FUN0 : zero-argument function
2d2e263d 115 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
116 * BOop : bitwise or or xor
117 * BAop : bitwise and
118 * SHop : shift operator
119 * PWop : power operator
9cbb5ea2 120 * PMop : pattern-matching operator
ffb4593c
NT
121 * Aop : addition-level operator
122 * Mop : multiplication-level operator
123 * Eop : equality-testing operator
e5edeb50 124 * Rop : relational operator <= != gt
ffb4593c
NT
125 *
126 * Also see LOP and lop() below.
127 */
128
075953c3
JH
129/* Note that REPORT() and REPORT2() will be expressions that supply
130 * their own trailing comma, not suitable for statements as such. */
998054bd 131#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
132# define REPORT(x,retval) tokereport(x,s,(int)retval),
133# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 134#else
075953c3
JH
135# define REPORT(x,retval)
136# define REPORT2(x,retval)
998054bd
SC
137#endif
138
075953c3
JH
139#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
140#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
141#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
142#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
143#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
144#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
145#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
146#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
147#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
148#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
149#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
150#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
151#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
152#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
153#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
154#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
155#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
156#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
157#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
158#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 159
a687059c
LW
160/* This bit of chicanery makes a unary function followed by
161 * a parenthesis into a function with one argument, highest precedence.
162 */
2f3197b3 163#define UNI(f) return(yylval.ival = f, \
075953c3 164 REPORT("uni",f) \
3280af22
NIS
165 PL_expect = XTERM, \
166 PL_bufptr = s, \
167 PL_last_uni = PL_oldbufptr, \
168 PL_last_lop_op = f, \
a687059c
LW
169 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
170
79072805 171#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 172 REPORT("uni",f) \
3280af22
NIS
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
79072805
LW
175 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
176
9f68db38 177/* grandfather return to old style */
3280af22 178#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 179
2d00ba3b 180STATIC void
61b2116b 181S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 182{
998054bd
SC
183 SV *report;
184 DEBUG_T({
185 report = newSVpv(thing, 0);
61b2116b 186 Perl_sv_catpvf(aTHX_ report, ":line %i:%i:", CopLINE(PL_curcop), rv);
998054bd
SC
187
188 if (s - PL_bufptr > 0)
189 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
190 else {
191 if (PL_oldbufptr && *PL_oldbufptr)
192 sv_catpv(report, PL_tokenbuf);
193 }
194 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
195 })
196}
197
ffb4593c
NT
198/*
199 * S_ao
200 *
201 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
202 * into an OP_ANDASSIGN or OP_ORASSIGN
203 */
204
76e3520e 205STATIC int
cea2e8a9 206S_ao(pTHX_ int toketype)
a0d0e21e 207{
3280af22
NIS
208 if (*PL_bufptr == '=') {
209 PL_bufptr++;
a0d0e21e
LW
210 if (toketype == ANDAND)
211 yylval.ival = OP_ANDASSIGN;
212 else if (toketype == OROR)
213 yylval.ival = OP_ORASSIGN;
214 toketype = ASSIGNOP;
215 }
216 return toketype;
217}
218
ffb4593c
NT
219/*
220 * S_no_op
221 * When Perl expects an operator and finds something else, no_op
222 * prints the warning. It always prints "<something> found where
223 * operator expected. It prints "Missing semicolon on previous line?"
224 * if the surprise occurs at the start of the line. "do you need to
225 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
226 * where the compiler doesn't know if foo is a method call or a function.
227 * It prints "Missing operator before end of line" if there's nothing
228 * after the missing operator, or "... before <...>" if there is something
229 * after the missing operator.
230 */
231
76e3520e 232STATIC void
cea2e8a9 233S_no_op(pTHX_ char *what, char *s)
463ee0b2 234{
3280af22
NIS
235 char *oldbp = PL_bufptr;
236 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 237
1189a94a
GS
238 if (!s)
239 s = oldbp;
07c798fb 240 else
1189a94a 241 PL_bufptr = s;
cea2e8a9 242 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 243 if (is_first)
cea2e8a9 244 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 245 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 246 char *t;
7e2040f0 247 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 248 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 249 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 250 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 251 }
07c798fb
HS
252 else {
253 assert(s >= oldbp);
cea2e8a9 254 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 255 }
3280af22 256 PL_bufptr = oldbp;
8990e307
LW
257}
258
ffb4593c
NT
259/*
260 * S_missingterm
261 * Complain about missing quote/regexp/heredoc terminator.
262 * If it's called with (char *)NULL then it cauterizes the line buffer.
263 * If we're in a delimited string and the delimiter is a control
264 * character, it's reformatted into a two-char sequence like ^C.
265 * This is fatal.
266 */
267
76e3520e 268STATIC void
cea2e8a9 269S_missingterm(pTHX_ char *s)
8990e307
LW
270{
271 char tmpbuf[3];
272 char q;
273 if (s) {
274 char *nl = strrchr(s,'\n');
d2719217 275 if (nl)
8990e307
LW
276 *nl = '\0';
277 }
9d116dd7
JH
278 else if (
279#ifdef EBCDIC
280 iscntrl(PL_multi_close)
281#else
282 PL_multi_close < 32 || PL_multi_close == 127
283#endif
284 ) {
8990e307 285 *tmpbuf = '^';
3280af22 286 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
287 s = "\\n";
288 tmpbuf[2] = '\0';
289 s = tmpbuf;
290 }
291 else {
3280af22 292 *tmpbuf = PL_multi_close;
8990e307
LW
293 tmpbuf[1] = '\0';
294 s = tmpbuf;
295 }
296 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 297 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 298}
79072805 299
ffb4593c
NT
300/*
301 * Perl_deprecate
ffb4593c
NT
302 */
303
79072805 304void
864dbfa3 305Perl_deprecate(pTHX_ char *s)
a0d0e21e 306{
599cee73 307 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 308 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
309}
310
ffb4593c
NT
311/*
312 * depcom
9cbb5ea2 313 * Deprecate a comma-less variable list.
ffb4593c
NT
314 */
315
76e3520e 316STATIC void
cea2e8a9 317S_depcom(pTHX)
a0d0e21e
LW
318{
319 deprecate("comma-less variable list");
320}
321
ffb4593c 322/*
9cbb5ea2
GS
323 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
324 * utf16-to-utf8-reversed.
ffb4593c
NT
325 */
326
c39cd008
GS
327#ifdef PERL_CR_FILTER
328static void
329strip_return(SV *sv)
330{
331 register char *s = SvPVX(sv);
332 register char *e = s + SvCUR(sv);
333 /* outer loop optimized to do nothing if there are no CR-LFs */
334 while (s < e) {
335 if (*s++ == '\r' && *s == '\n') {
336 /* hit a CR-LF, need to copy the rest */
337 register char *d = s - 1;
338 *d++ = *s++;
339 while (s < e) {
340 if (*s == '\r' && s[1] == '\n')
341 s++;
342 *d++ = *s++;
343 }
344 SvCUR(sv) -= s - d;
345 return;
346 }
347 }
348}
a868473f 349
76e3520e 350STATIC I32
c39cd008 351S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 352{
c39cd008
GS
353 I32 count = FILTER_READ(idx+1, sv, maxlen);
354 if (count > 0 && !maxlen)
355 strip_return(sv);
356 return count;
a868473f
NIS
357}
358#endif
359
ffb4593c
NT
360/*
361 * Perl_lex_start
9cbb5ea2
GS
362 * Initialize variables. Uses the Perl save_stack to save its state (for
363 * recursive calls to the parser).
ffb4593c
NT
364 */
365
a0d0e21e 366void
864dbfa3 367Perl_lex_start(pTHX_ SV *line)
79072805 368{
8990e307
LW
369 char *s;
370 STRLEN len;
371
3280af22
NIS
372 SAVEI32(PL_lex_dojoin);
373 SAVEI32(PL_lex_brackets);
3280af22
NIS
374 SAVEI32(PL_lex_casemods);
375 SAVEI32(PL_lex_starts);
376 SAVEI32(PL_lex_state);
7766f137 377 SAVEVPTR(PL_lex_inpat);
3280af22 378 SAVEI32(PL_lex_inwhat);
18b09519
GS
379 if (PL_lex_state == LEX_KNOWNEXT) {
380 I32 toke = PL_nexttoke;
381 while (--toke >= 0) {
382 SAVEI32(PL_nexttype[toke]);
383 SAVEVPTR(PL_nextval[toke]);
384 }
385 SAVEI32(PL_nexttoke);
18b09519 386 }
57843af0 387 SAVECOPLINE(PL_curcop);
3280af22
NIS
388 SAVEPPTR(PL_bufptr);
389 SAVEPPTR(PL_bufend);
390 SAVEPPTR(PL_oldbufptr);
391 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
392 SAVEPPTR(PL_last_lop);
393 SAVEPPTR(PL_last_uni);
3280af22
NIS
394 SAVEPPTR(PL_linestart);
395 SAVESPTR(PL_linestr);
396 SAVEPPTR(PL_lex_brackstack);
397 SAVEPPTR(PL_lex_casestack);
c76ac1ee 398 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
399 SAVESPTR(PL_lex_stuff);
400 SAVEI32(PL_lex_defer);
09bef843 401 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 402 SAVESPTR(PL_lex_repl);
bebdddfc
GS
403 SAVEINT(PL_expect);
404 SAVEINT(PL_lex_expect);
3280af22
NIS
405
406 PL_lex_state = LEX_NORMAL;
407 PL_lex_defer = 0;
408 PL_expect = XSTATE;
409 PL_lex_brackets = 0;
3280af22
NIS
410 New(899, PL_lex_brackstack, 120, char);
411 New(899, PL_lex_casestack, 12, char);
412 SAVEFREEPV(PL_lex_brackstack);
413 SAVEFREEPV(PL_lex_casestack);
414 PL_lex_casemods = 0;
415 *PL_lex_casestack = '\0';
416 PL_lex_dojoin = 0;
417 PL_lex_starts = 0;
418 PL_lex_stuff = Nullsv;
419 PL_lex_repl = Nullsv;
420 PL_lex_inpat = 0;
76be56bc 421 PL_nexttoke = 0;
3280af22 422 PL_lex_inwhat = 0;
09bef843 423 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
424 PL_linestr = line;
425 if (SvREADONLY(PL_linestr))
426 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
427 s = SvPV(PL_linestr, len);
8990e307 428 if (len && s[len-1] != ';') {
3280af22
NIS
429 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
430 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
431 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 432 }
3280af22
NIS
433 SvTEMP_off(PL_linestr);
434 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
435 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 436 PL_last_lop = PL_last_uni = Nullch;
3280af22 437 SvREFCNT_dec(PL_rs);
79cb57f6 438 PL_rs = newSVpvn("\n", 1);
3280af22 439 PL_rsfp = 0;
79072805 440}
a687059c 441
ffb4593c
NT
442/*
443 * Perl_lex_end
9cbb5ea2
GS
444 * Finalizer for lexing operations. Must be called when the parser is
445 * done with the lexer.
ffb4593c
NT
446 */
447
463ee0b2 448void
864dbfa3 449Perl_lex_end(pTHX)
463ee0b2 450{
3280af22 451 PL_doextract = FALSE;
463ee0b2
LW
452}
453
ffb4593c
NT
454/*
455 * S_incline
456 * This subroutine has nothing to do with tilting, whether at windmills
457 * or pinball tables. Its name is short for "increment line". It
57843af0 458 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 459 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
460 * # line 500 "foo.pm"
461 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
462 */
463
76e3520e 464STATIC void
cea2e8a9 465S_incline(pTHX_ char *s)
463ee0b2
LW
466{
467 char *t;
468 char *n;
73659bf1 469 char *e;
463ee0b2 470 char ch;
463ee0b2 471
57843af0 472 CopLINE_inc(PL_curcop);
463ee0b2
LW
473 if (*s++ != '#')
474 return;
bf4acbe4 475 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
476 if (strnEQ(s, "line", 4))
477 s += 4;
478 else
479 return;
084592ab 480 if (SPACE_OR_TAB(*s))
73659bf1 481 s++;
4e553d73 482 else
73659bf1 483 return;
bf4acbe4 484 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
485 if (!isDIGIT(*s))
486 return;
487 n = s;
488 while (isDIGIT(*s))
489 s++;
bf4acbe4 490 while (SPACE_OR_TAB(*s))
463ee0b2 491 s++;
73659bf1 492 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 493 s++;
73659bf1
GS
494 e = t + 1;
495 }
463ee0b2 496 else {
463ee0b2 497 for (t = s; !isSPACE(*t); t++) ;
73659bf1 498 e = t;
463ee0b2 499 }
bf4acbe4 500 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
501 e++;
502 if (*e != '\n' && *e != '\0')
503 return; /* false alarm */
504
463ee0b2
LW
505 ch = *t;
506 *t = '\0';
f4dd75d9
GS
507 if (t - s > 0) {
508#ifdef USE_ITHREADS
509 Safefree(CopFILE(PL_curcop));
510#else
511 SvREFCNT_dec(CopFILEGV(PL_curcop));
512#endif
57843af0 513 CopFILE_set(PL_curcop, s);
f4dd75d9 514 }
463ee0b2 515 *t = ch;
57843af0 516 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
517}
518
ffb4593c
NT
519/*
520 * S_skipspace
521 * Called to gobble the appropriate amount and type of whitespace.
522 * Skips comments as well.
523 */
524
76e3520e 525STATIC char *
cea2e8a9 526S_skipspace(pTHX_ register char *s)
a687059c 527{
3280af22 528 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 529 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
530 s++;
531 return s;
532 }
533 for (;;) {
fd049845 534 STRLEN prevlen;
09bef843
SB
535 SSize_t oldprevlen, oldoldprevlen;
536 SSize_t oldloplen, oldunilen;
60e6418e
GS
537 while (s < PL_bufend && isSPACE(*s)) {
538 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
539 incline(s);
540 }
ffb4593c
NT
541
542 /* comment */
3280af22
NIS
543 if (s < PL_bufend && *s == '#') {
544 while (s < PL_bufend && *s != '\n')
463ee0b2 545 s++;
60e6418e 546 if (s < PL_bufend) {
463ee0b2 547 s++;
60e6418e
GS
548 if (PL_in_eval && !PL_rsfp) {
549 incline(s);
550 continue;
551 }
552 }
463ee0b2 553 }
ffb4593c
NT
554
555 /* only continue to recharge the buffer if we're at the end
556 * of the buffer, we're not reading from a source filter, and
557 * we're in normal lexing mode
558 */
09bef843
SB
559 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
560 PL_lex_state == LEX_FORMLINE)
463ee0b2 561 return s;
ffb4593c
NT
562
563 /* try to recharge the buffer */
9cbb5ea2
GS
564 if ((s = filter_gets(PL_linestr, PL_rsfp,
565 (prevlen = SvCUR(PL_linestr)))) == Nullch)
566 {
567 /* end of file. Add on the -p or -n magic */
3280af22
NIS
568 if (PL_minus_n || PL_minus_p) {
569 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
570 ";}continue{print or die qq(-p destination: $!\\n)" :
571 "");
3280af22
NIS
572 sv_catpv(PL_linestr,";}");
573 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
574 }
575 else
3280af22 576 sv_setpv(PL_linestr,";");
ffb4593c
NT
577
578 /* reset variables for next time we lex */
9cbb5ea2
GS
579 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
580 = SvPVX(PL_linestr);
3280af22 581 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 582 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
583
584 /* Close the filehandle. Could be from -P preprocessor,
585 * STDIN, or a regular file. If we were reading code from
586 * STDIN (because the commandline held no -e or filename)
587 * then we don't close it, we reset it so the code can
588 * read from STDIN too.
589 */
590
3280af22
NIS
591 if (PL_preprocess && !PL_in_eval)
592 (void)PerlProc_pclose(PL_rsfp);
593 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
594 PerlIO_clearerr(PL_rsfp);
8990e307 595 else
3280af22
NIS
596 (void)PerlIO_close(PL_rsfp);
597 PL_rsfp = Nullfp;
463ee0b2
LW
598 return s;
599 }
ffb4593c
NT
600
601 /* not at end of file, so we only read another line */
09bef843
SB
602 /* make corresponding updates to old pointers, for yyerror() */
603 oldprevlen = PL_oldbufptr - PL_bufend;
604 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
605 if (PL_last_uni)
606 oldunilen = PL_last_uni - PL_bufend;
607 if (PL_last_lop)
608 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
609 PL_linestart = PL_bufptr = s + prevlen;
610 PL_bufend = s + SvCUR(PL_linestr);
611 s = PL_bufptr;
09bef843
SB
612 PL_oldbufptr = s + oldprevlen;
613 PL_oldoldbufptr = s + oldoldprevlen;
614 if (PL_last_uni)
615 PL_last_uni = s + oldunilen;
616 if (PL_last_lop)
617 PL_last_lop = s + oldloplen;
a0d0e21e 618 incline(s);
ffb4593c
NT
619
620 /* debugger active and we're not compiling the debugger code,
621 * so store the line into the debugger's array of lines
622 */
3280af22 623 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
624 SV *sv = NEWSV(85,0);
625
626 sv_upgrade(sv, SVt_PVMG);
3280af22 627 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 628 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 629 }
463ee0b2 630 }
a687059c 631}
378cc40b 632
ffb4593c
NT
633/*
634 * S_check_uni
635 * Check the unary operators to ensure there's no ambiguity in how they're
636 * used. An ambiguous piece of code would be:
637 * rand + 5
638 * This doesn't mean rand() + 5. Because rand() is a unary operator,
639 * the +5 is its argument.
640 */
641
76e3520e 642STATIC void
cea2e8a9 643S_check_uni(pTHX)
ba106d47 644{
2f3197b3 645 char *s;
a0d0e21e 646 char *t;
2f3197b3 647
3280af22 648 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 649 return;
3280af22
NIS
650 while (isSPACE(*PL_last_uni))
651 PL_last_uni++;
7e2040f0 652 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 653 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 654 return;
0453d815 655 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 656 char ch = *s;
0453d815 657 *s = '\0';
4e553d73
NIS
658 Perl_warner(aTHX_ WARN_AMBIGUOUS,
659 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
660 PL_last_uni);
661 *s = ch;
662 }
2f3197b3
LW
663}
664
ffb4593c
NT
665/* workaround to replace the UNI() macro with a function. Only the
666 * hints/uts.sh file mentions this. Other comments elsewhere in the
667 * source indicate Microport Unix might need it too.
668 */
669
ffed7fef
LW
670#ifdef CRIPPLED_CC
671
672#undef UNI
ffed7fef 673#define UNI(f) return uni(f,s)
ffed7fef 674
76e3520e 675STATIC int
cea2e8a9 676S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
677{
678 yylval.ival = f;
3280af22
NIS
679 PL_expect = XTERM;
680 PL_bufptr = s;
8f872242
NIS
681 PL_last_uni = PL_oldbufptr;
682 PL_last_lop_op = f;
ffed7fef
LW
683 if (*s == '(')
684 return FUNC1;
685 s = skipspace(s);
686 if (*s == '(')
687 return FUNC1;
688 else
689 return UNIOP;
690}
691
a0d0e21e
LW
692#endif /* CRIPPLED_CC */
693
ffb4593c
NT
694/*
695 * LOP : macro to build a list operator. Its behaviour has been replaced
696 * with a subroutine, S_lop() for which LOP is just another name.
697 */
698
a0d0e21e
LW
699#define LOP(f,x) return lop(f,x,s)
700
ffb4593c
NT
701/*
702 * S_lop
703 * Build a list operator (or something that might be one). The rules:
704 * - if we have a next token, then it's a list operator [why?]
705 * - if the next thing is an opening paren, then it's a function
706 * - else it's a list operator
707 */
708
76e3520e 709STATIC I32
a0be28da 710S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 711{
79072805 712 yylval.ival = f;
35c8bce7 713 CLINE;
075953c3 714 REPORT("lop", f)
3280af22
NIS
715 PL_expect = x;
716 PL_bufptr = s;
717 PL_last_lop = PL_oldbufptr;
718 PL_last_lop_op = f;
719 if (PL_nexttoke)
a0d0e21e 720 return LSTOP;
79072805
LW
721 if (*s == '(')
722 return FUNC;
723 s = skipspace(s);
724 if (*s == '(')
725 return FUNC;
726 else
727 return LSTOP;
728}
729
ffb4593c
NT
730/*
731 * S_force_next
9cbb5ea2 732 * When the lexer realizes it knows the next token (for instance,
ffb4593c 733 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
734 * to know what token to return the next time the lexer is called. Caller
735 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
736 * handles the token correctly.
ffb4593c
NT
737 */
738
4e553d73 739STATIC void
cea2e8a9 740S_force_next(pTHX_ I32 type)
79072805 741{
3280af22
NIS
742 PL_nexttype[PL_nexttoke] = type;
743 PL_nexttoke++;
744 if (PL_lex_state != LEX_KNOWNEXT) {
745 PL_lex_defer = PL_lex_state;
746 PL_lex_expect = PL_expect;
747 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
748 }
749}
750
ffb4593c
NT
751/*
752 * S_force_word
753 * When the lexer knows the next thing is a word (for instance, it has
754 * just seen -> and it knows that the next char is a word char, then
755 * it calls S_force_word to stick the next word into the PL_next lookahead.
756 *
757 * Arguments:
b1b65b59 758 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
759 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
760 * int check_keyword : if true, Perl checks to make sure the word isn't
761 * a keyword (do this if the word is a label, e.g. goto FOO)
762 * int allow_pack : if true, : characters will also be allowed (require,
763 * use, etc. do this)
9cbb5ea2 764 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
765 */
766
76e3520e 767STATIC char *
cea2e8a9 768S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 769{
463ee0b2
LW
770 register char *s;
771 STRLEN len;
4e553d73 772
463ee0b2
LW
773 start = skipspace(start);
774 s = start;
7e2040f0 775 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 776 (allow_pack && *s == ':') ||
15f0808c 777 (allow_initial_tick && *s == '\'') )
a0d0e21e 778 {
3280af22
NIS
779 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
780 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
781 return start;
782 if (token == METHOD) {
783 s = skipspace(s);
784 if (*s == '(')
3280af22 785 PL_expect = XTERM;
463ee0b2 786 else {
3280af22 787 PL_expect = XOPERATOR;
463ee0b2 788 }
79072805 789 }
3280af22
NIS
790 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
791 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
792 force_next(token);
793 }
794 return s;
795}
796
ffb4593c
NT
797/*
798 * S_force_ident
9cbb5ea2 799 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
800 * text only contains the "foo" portion. The first argument is a pointer
801 * to the "foo", and the second argument is the type symbol to prefix.
802 * Forces the next token to be a "WORD".
9cbb5ea2 803 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
804 */
805
76e3520e 806STATIC void
cea2e8a9 807S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
808{
809 if (s && *s) {
11343788 810 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 811 PL_nextval[PL_nexttoke].opval = o;
79072805 812 force_next(WORD);
748a9306 813 if (kind) {
11343788 814 o->op_private = OPpCONST_ENTERED;
55497cff
PP
815 /* XXX see note in pp_entereval() for why we forgo typo
816 warnings if the symbol must be introduced in an eval.
817 GSAR 96-10-12 */
3280af22 818 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
819 kind == '$' ? SVt_PV :
820 kind == '@' ? SVt_PVAV :
821 kind == '%' ? SVt_PVHV :
822 SVt_PVGV
823 );
748a9306 824 }
79072805
LW
825 }
826}
827
1571675a
GS
828NV
829Perl_str_to_version(pTHX_ SV *sv)
830{
831 NV retval = 0.0;
832 NV nshift = 1.0;
833 STRLEN len;
834 char *start = SvPVx(sv,len);
3aa33fe5 835 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
836 char *end = start + len;
837 while (start < end) {
ba210ebe 838 STRLEN skip;
1571675a
GS
839 UV n;
840 if (utf)
9041c2e3 841 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
842 else {
843 n = *(U8*)start;
844 skip = 1;
845 }
846 retval += ((NV)n)/nshift;
847 start += skip;
848 nshift *= 1000;
849 }
850 return retval;
851}
852
4e553d73 853/*
ffb4593c
NT
854 * S_force_version
855 * Forces the next token to be a version number.
856 */
857
76e3520e 858STATIC char *
cea2e8a9 859S_force_version(pTHX_ char *s)
89bfa8cd
PP
860{
861 OP *version = Nullop;
44dcb63b 862 char *d;
89bfa8cd
PP
863
864 s = skipspace(s);
865
44dcb63b 866 d = s;
dd629d5b 867 if (*d == 'v')
44dcb63b 868 d++;
44dcb63b 869 if (isDIGIT(*d)) {
a7cb1f99 870 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 871 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 872 SV *ver;
b73d6f50 873 s = scan_num(s, &yylval);
89bfa8cd 874 version = yylval.opval;
dd629d5b
GS
875 ver = cSVOPx(version)->op_sv;
876 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 877 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
878 SvNVX(ver) = str_to_version(ver);
879 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 880 }
89bfa8cd
PP
881 }
882 }
883
884 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 885 PL_nextval[PL_nexttoke].opval = version;
4e553d73 886 force_next(WORD);
89bfa8cd
PP
887
888 return (s);
889}
890
ffb4593c
NT
891/*
892 * S_tokeq
893 * Tokenize a quoted string passed in as an SV. It finds the next
894 * chunk, up to end of string or a backslash. It may make a new
895 * SV containing that chunk (if HINT_NEW_STRING is on). It also
896 * turns \\ into \.
897 */
898
76e3520e 899STATIC SV *
cea2e8a9 900S_tokeq(pTHX_ SV *sv)
79072805
LW
901{
902 register char *s;
903 register char *send;
904 register char *d;
b3ac6de7
IZ
905 STRLEN len = 0;
906 SV *pv = sv;
79072805
LW
907
908 if (!SvLEN(sv))
b3ac6de7 909 goto finish;
79072805 910
a0d0e21e 911 s = SvPV_force(sv, len);
21a311ee 912 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 913 goto finish;
463ee0b2 914 send = s + len;
79072805
LW
915 while (s < send && *s != '\\')
916 s++;
917 if (s == send)
b3ac6de7 918 goto finish;
79072805 919 d = s;
be4731d2 920 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 921 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
922 if (SvUTF8(sv))
923 SvUTF8_on(pv);
924 }
79072805
LW
925 while (s < send) {
926 if (*s == '\\') {
a0d0e21e 927 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
928 s++; /* all that, just for this */
929 }
930 *d++ = *s++;
931 }
932 *d = '\0';
463ee0b2 933 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 934 finish:
3280af22 935 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 936 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
937 return sv;
938}
939
ffb4593c
NT
940/*
941 * Now come three functions related to double-quote context,
942 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
943 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
944 * interact with PL_lex_state, and create fake ( ... ) argument lists
945 * to handle functions and concatenation.
946 * They assume that whoever calls them will be setting up a fake
947 * join call, because each subthing puts a ',' after it. This lets
948 * "lower \luPpEr"
949 * become
950 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
951 *
952 * (I'm not sure whether the spurious commas at the end of lcfirst's
953 * arguments and join's arguments are created or not).
954 */
955
956/*
957 * S_sublex_start
958 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
959 *
960 * Pattern matching will set PL_lex_op to the pattern-matching op to
961 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
962 *
963 * OP_CONST and OP_READLINE are easy--just make the new op and return.
964 *
965 * Everything else becomes a FUNC.
966 *
967 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
968 * had an OP_CONST or OP_READLINE). This just sets us up for a
969 * call to S_sublex_push().
970 */
971
76e3520e 972STATIC I32
cea2e8a9 973S_sublex_start(pTHX)
79072805
LW
974{
975 register I32 op_type = yylval.ival;
79072805
LW
976
977 if (op_type == OP_NULL) {
3280af22
NIS
978 yylval.opval = PL_lex_op;
979 PL_lex_op = Nullop;
79072805
LW
980 return THING;
981 }
982 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 983 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
984
985 if (SvTYPE(sv) == SVt_PVIV) {
986 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
987 STRLEN len;
988 char *p;
989 SV *nsv;
990
991 p = SvPV(sv, len);
79cb57f6 992 nsv = newSVpvn(p, len);
01ec43d0
GS
993 if (SvUTF8(sv))
994 SvUTF8_on(nsv);
b3ac6de7
IZ
995 SvREFCNT_dec(sv);
996 sv = nsv;
4e553d73 997 }
b3ac6de7 998 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 999 PL_lex_stuff = Nullsv;
79072805
LW
1000 return THING;
1001 }
1002
3280af22
NIS
1003 PL_sublex_info.super_state = PL_lex_state;
1004 PL_sublex_info.sub_inwhat = op_type;
1005 PL_sublex_info.sub_op = PL_lex_op;
1006 PL_lex_state = LEX_INTERPPUSH;
55497cff 1007
3280af22
NIS
1008 PL_expect = XTERM;
1009 if (PL_lex_op) {
1010 yylval.opval = PL_lex_op;
1011 PL_lex_op = Nullop;
55497cff
PP
1012 return PMFUNC;
1013 }
1014 else
1015 return FUNC;
1016}
1017
ffb4593c
NT
1018/*
1019 * S_sublex_push
1020 * Create a new scope to save the lexing state. The scope will be
1021 * ended in S_sublex_done. Returns a '(', starting the function arguments
1022 * to the uc, lc, etc. found before.
1023 * Sets PL_lex_state to LEX_INTERPCONCAT.
1024 */
1025
76e3520e 1026STATIC I32
cea2e8a9 1027S_sublex_push(pTHX)
55497cff 1028{
f46d017c 1029 ENTER;
55497cff 1030
3280af22
NIS
1031 PL_lex_state = PL_sublex_info.super_state;
1032 SAVEI32(PL_lex_dojoin);
1033 SAVEI32(PL_lex_brackets);
3280af22
NIS
1034 SAVEI32(PL_lex_casemods);
1035 SAVEI32(PL_lex_starts);
1036 SAVEI32(PL_lex_state);
7766f137 1037 SAVEVPTR(PL_lex_inpat);
3280af22 1038 SAVEI32(PL_lex_inwhat);
57843af0 1039 SAVECOPLINE(PL_curcop);
3280af22
NIS
1040 SAVEPPTR(PL_bufptr);
1041 SAVEPPTR(PL_oldbufptr);
1042 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1043 SAVEPPTR(PL_last_lop);
1044 SAVEPPTR(PL_last_uni);
3280af22
NIS
1045 SAVEPPTR(PL_linestart);
1046 SAVESPTR(PL_linestr);
1047 SAVEPPTR(PL_lex_brackstack);
1048 SAVEPPTR(PL_lex_casestack);
1049
1050 PL_linestr = PL_lex_stuff;
1051 PL_lex_stuff = Nullsv;
1052
9cbb5ea2
GS
1053 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1054 = SvPVX(PL_linestr);
3280af22 1055 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1056 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1057 SAVEFREESV(PL_linestr);
1058
1059 PL_lex_dojoin = FALSE;
1060 PL_lex_brackets = 0;
3280af22
NIS
1061 New(899, PL_lex_brackstack, 120, char);
1062 New(899, PL_lex_casestack, 12, char);
1063 SAVEFREEPV(PL_lex_brackstack);
1064 SAVEFREEPV(PL_lex_casestack);
1065 PL_lex_casemods = 0;
1066 *PL_lex_casestack = '\0';
1067 PL_lex_starts = 0;
1068 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1069 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1070
1071 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1072 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1073 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1074 else
3280af22 1075 PL_lex_inpat = Nullop;
79072805 1076
55497cff 1077 return '(';
79072805
LW
1078}
1079
ffb4593c
NT
1080/*
1081 * S_sublex_done
1082 * Restores lexer state after a S_sublex_push.
1083 */
1084
76e3520e 1085STATIC I32
cea2e8a9 1086S_sublex_done(pTHX)
79072805 1087{
3280af22 1088 if (!PL_lex_starts++) {
9aa983d2
JH
1089 SV *sv = newSVpvn("",0);
1090 if (SvUTF8(PL_linestr))
1091 SvUTF8_on(sv);
3280af22 1092 PL_expect = XOPERATOR;
9aa983d2 1093 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1094 return THING;
1095 }
1096
3280af22
NIS
1097 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1098 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1099 return yylex();
79072805
LW
1100 }
1101
ffb4593c 1102 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1103 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1104 PL_linestr = PL_lex_repl;
1105 PL_lex_inpat = 0;
1106 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1107 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1108 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1109 SAVEFREESV(PL_linestr);
1110 PL_lex_dojoin = FALSE;
1111 PL_lex_brackets = 0;
3280af22
NIS
1112 PL_lex_casemods = 0;
1113 *PL_lex_casestack = '\0';
1114 PL_lex_starts = 0;
25da4f38 1115 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1116 PL_lex_state = LEX_INTERPNORMAL;
1117 PL_lex_starts++;
e9fa98b2
HS
1118 /* we don't clear PL_lex_repl here, so that we can check later
1119 whether this is an evalled subst; that means we rely on the
1120 logic to ensure sublex_done() is called again only via the
1121 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1122 }
e9fa98b2 1123 else {
3280af22 1124 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1125 PL_lex_repl = Nullsv;
1126 }
79072805 1127 return ',';
ffed7fef
LW
1128 }
1129 else {
f46d017c 1130 LEAVE;
3280af22
NIS
1131 PL_bufend = SvPVX(PL_linestr);
1132 PL_bufend += SvCUR(PL_linestr);
1133 PL_expect = XOPERATOR;
09bef843 1134 PL_sublex_info.sub_inwhat = 0;
79072805 1135 return ')';
ffed7fef
LW
1136 }
1137}
1138
02aa26ce
NT
1139/*
1140 scan_const
1141
1142 Extracts a pattern, double-quoted string, or transliteration. This
1143 is terrifying code.
1144
3280af22
NIS
1145 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1146 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1147 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1148
9b599b2a
GS
1149 Returns a pointer to the character scanned up to. Iff this is
1150 advanced from the start pointer supplied (ie if anything was
1151 successfully parsed), will leave an OP for the substring scanned
1152 in yylval. Caller must intuit reason for not parsing further
1153 by looking at the next characters herself.
1154
02aa26ce
NT
1155 In patterns:
1156 backslashes:
1157 double-quoted style: \r and \n
1158 regexp special ones: \D \s
1159 constants: \x3
1160 backrefs: \1 (deprecated in substitution replacements)
1161 case and quoting: \U \Q \E
1162 stops on @ and $, but not for $ as tail anchor
1163
1164 In transliterations:
1165 characters are VERY literal, except for - not at the start or end
1166 of the string, which indicates a range. scan_const expands the
1167 range to the full set of intermediate characters.
1168
1169 In double-quoted strings:
1170 backslashes:
1171 double-quoted style: \r and \n
1172 constants: \x3
1173 backrefs: \1 (deprecated)
1174 case and quoting: \U \Q \E
1175 stops on @ and $
1176
1177 scan_const does *not* construct ops to handle interpolated strings.
1178 It stops processing as soon as it finds an embedded $ or @ variable
1179 and leaves it to the caller to work out what's going on.
1180
1181 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1182
1183 $ in pattern could be $foo or could be tail anchor. Assumption:
1184 it's a tail anchor if $ is the last thing in the string, or if it's
1185 followed by one of ")| \n\t"
1186
1187 \1 (backreferences) are turned into $1
1188
1189 The structure of the code is
1190 while (there's a character to process) {
1191 handle transliteration ranges
1192 skip regexp comments
1193 skip # initiated comments in //x patterns
1194 check for embedded @foo
1195 check for embedded scalars
1196 if (backslash) {
1197 leave intact backslashes from leave (below)
1198 deprecate \1 in strings and sub replacements
1199 handle string-changing backslashes \l \U \Q \E, etc.
1200 switch (what was escaped) {
1201 handle - in a transliteration (becomes a literal -)
1202 handle \132 octal characters
1203 handle 0x15 hex characters
1204 handle \cV (control V)
1205 handle printf backslashes (\f, \r, \n, etc)
1206 } (end switch)
1207 } (end if backslash)
1208 } (end while character to read)
4e553d73 1209
02aa26ce
NT
1210*/
1211
76e3520e 1212STATIC char *
cea2e8a9 1213S_scan_const(pTHX_ char *start)
79072805 1214{
3280af22 1215 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1216 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1217 register char *s = start; /* start of the constant */
1218 register char *d = SvPVX(sv); /* destination for copies */
1219 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1220 bool didrange = FALSE; /* did we just finish a range? */
9aa983d2
JH
1221 bool has_utf8 = (PL_linestr && SvUTF8(PL_linestr));
1222 /* the constant is UTF8 */
012bcf8d
GS
1223 UV uv;
1224
ac2262e3 1225 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1226 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1227 : UTF;
89491803 1228 I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1229 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1230 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1231 : UTF;
dff6d3cd 1232 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1233 PL_lex_inpat
4a2d328f 1234 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1235 : "";
79072805
LW
1236
1237 while (s < send || dorange) {
02aa26ce 1238 /* get transliterations out of the way (they're most literal) */
3280af22 1239 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1240 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1241 if (dorange) {
1ba5c669
JH
1242 I32 i; /* current expanded character */
1243 I32 min; /* first character in range */
1244 I32 max; /* last character in range */
02aa26ce 1245
8973db79
JH
1246 if (utf) {
1247 char *c = (char*)utf8_hop((U8*)d, -1);
1248 char *e = d++;
1249 while (e-- > c)
1250 *(e + 1) = *e;
8b107d6c 1251 *c = (char)0xff;
8973db79
JH
1252 /* mark the range as done, and continue */
1253 dorange = FALSE;
1254 didrange = TRUE;
1255 continue;
1256 }
02aa26ce 1257 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1258 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1259 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1260 d -= 2; /* eat the first char and the - */
1261
8ada0baa
JH
1262 min = (U8)*d; /* first char in range */
1263 max = (U8)d[1]; /* last char in range */
1264
c2e66d9e 1265 if (min > max) {
01ec43d0 1266 Perl_croak(aTHX_
1ba5c669
JH
1267 "Invalid [] range \"%c-%c\" in transliteration operator",
1268 (char)min, (char)max);
c2e66d9e
GS
1269 }
1270
c7f1f016 1271#ifdef EBCDIC
8ada0baa
JH
1272 if ((isLOWER(min) && isLOWER(max)) ||
1273 (isUPPER(min) && isUPPER(max))) {
1274 if (isLOWER(min)) {
1275 for (i = min; i <= max; i++)
1276 if (isLOWER(i))
db42d148 1277 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1278 } else {
1279 for (i = min; i <= max; i++)
1280 if (isUPPER(i))
db42d148 1281 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1282 }
1283 }
1284 else
1285#endif
1286 for (i = min; i <= max; i++)
1287 *d++ = i;
02aa26ce
NT
1288
1289 /* mark the range as done, and continue */
79072805 1290 dorange = FALSE;
01ec43d0 1291 didrange = TRUE;
79072805 1292 continue;
4e553d73 1293 }
02aa26ce
NT
1294
1295 /* range begins (ignore - as first or last char) */
79072805 1296 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1297 if (didrange) {
1fafa243 1298 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1299 }
a0ed51b3 1300 if (utf) {
a176fa2a 1301 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1302 s++;
1303 continue;
1304 }
79072805
LW
1305 dorange = TRUE;
1306 s++;
01ec43d0
GS
1307 }
1308 else {
1309 didrange = FALSE;
1310 }
79072805 1311 }
02aa26ce
NT
1312
1313 /* if we get here, we're not doing a transliteration */
1314
0f5d15d6
IZ
1315 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1316 except for the last char, which will be done separately. */
3280af22 1317 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1318 if (s[2] == '#') {
1319 while (s < send && *s != ')')
db42d148 1320 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1321 }
1322 else if (s[2] == '{' /* This should match regcomp.c */
1323 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1324 {
cc6b7395 1325 I32 count = 1;
0f5d15d6 1326 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1327 char c;
1328
d9f97599
GS
1329 while (count && (c = *regparse)) {
1330 if (c == '\\' && regparse[1])
1331 regparse++;
4e553d73 1332 else if (c == '{')
cc6b7395 1333 count++;
4e553d73 1334 else if (c == '}')
cc6b7395 1335 count--;
d9f97599 1336 regparse++;
cc6b7395 1337 }
5bdf89e7
IZ
1338 if (*regparse != ')') {
1339 regparse--; /* Leave one char for continuation. */
cc6b7395 1340 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1341 }
0f5d15d6 1342 while (s < regparse)
db42d148 1343 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1344 }
748a9306 1345 }
02aa26ce
NT
1346
1347 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1348 else if (*s == '#' && PL_lex_inpat &&
1349 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1350 while (s+1 < send && *s != '\n')
db42d148 1351 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1352 }
02aa26ce 1353
5d1d4326
JH
1354 /* check for embedded arrays
1355 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1356 */
7e2040f0 1357 else if (*s == '@' && s[1]
5d1d4326 1358 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1359 break;
02aa26ce
NT
1360
1361 /* check for embedded scalars. only stop if we're sure it's a
1362 variable.
1363 */
79072805 1364 else if (*s == '$') {
3280af22 1365 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1366 break;
c277df42 1367 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1368 break; /* in regexp, $ might be tail anchor */
1369 }
02aa26ce
NT
1370
1371 /* backslashes */
79072805
LW
1372 if (*s == '\\' && s+1 < send) {
1373 s++;
02aa26ce
NT
1374
1375 /* some backslashes we leave behind */
c9f97d15 1376 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1377 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1378 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1379 continue;
1380 }
02aa26ce
NT
1381
1382 /* deprecate \1 in strings and substitution replacements */
3280af22 1383 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1384 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1385 {
599cee73 1386 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1387 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1388 *--s = '$';
1389 break;
1390 }
02aa26ce
NT
1391
1392 /* string-change backslash escapes */
3280af22 1393 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1394 --s;
1395 break;
1396 }
02aa26ce
NT
1397
1398 /* if we get here, it's either a quoted -, or a digit */
79072805 1399 switch (*s) {
02aa26ce
NT
1400
1401 /* quoted - in transliterations */
79072805 1402 case '-':
3280af22 1403 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1404 *d++ = *s++;
1405 continue;
1406 }
1407 /* FALL THROUGH */
1408 default:
11b8faa4 1409 {
7e84c16c 1410 if (ckWARN(WARN_MISC) && isALNUM(*s))
4e553d73 1411 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1412 "Unrecognized escape \\%c passed through",
1413 *s);
1414 /* default action is to copy the quoted character */
f9a63242 1415 goto default_action;
11b8faa4 1416 }
02aa26ce
NT
1417
1418 /* \132 indicates an octal constant */
79072805
LW
1419 case '0': case '1': case '2': case '3':
1420 case '4': case '5': case '6': case '7':
ba210ebe
JH
1421 {
1422 STRLEN len = 0; /* disallow underscores */
1423 uv = (UV)scan_oct(s, 3, &len);
1424 s += len;
1425 }
012bcf8d 1426 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1427
1428 /* \x24 indicates a hex constant */
79072805 1429 case 'x':
a0ed51b3
LW
1430 ++s;
1431 if (*s == '{') {
1432 char* e = strchr(s, '}');
adaeee49 1433 if (!e) {
a0ed51b3 1434 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1435 e = s;
1436 }
89491803 1437 else {
ba210ebe
JH
1438 STRLEN len = 1; /* allow underscores */
1439 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1440 }
1441 s = e + 1;
a0ed51b3
LW
1442 }
1443 else {
ba210ebe
JH
1444 {
1445 STRLEN len = 0; /* disallow underscores */
1446 uv = (UV)scan_hex(s, 2, &len);
1447 s += len;
1448 }
012bcf8d
GS
1449 }
1450
1451 NUM_ESCAPE_INSERT:
1452 /* Insert oct or hex escaped character.
301d3d20 1453 * There will always enough room in sv since such
db42d148 1454 * escapes will be longer than any UTF-8 sequence
301d3d20 1455 * they can end up as. */
ba7cea30 1456
c7f1f016
NIS
1457 /* We need to map to chars to ASCII before doing the tests
1458 to cover EBCDIC
1459 */
c4d5f83a 1460 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1461 if (!has_utf8 && uv > 255) {
301d3d20
JH
1462 /* Might need to recode whatever we have
1463 * accumulated so far if it contains any
1464 * hibit chars.
1465 *
1466 * (Can't we keep track of that and avoid
1467 * this rescan? --jhi)
012bcf8d 1468 */
c7f1f016 1469 int hicount = 0;
63cd0674
NIS
1470 U8 *c;
1471 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1472 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1473 hicount++;
db42d148 1474 }
012bcf8d 1475 }
63cd0674 1476 if (hicount) {
db42d148
NIS
1477 STRLEN offset = d - SvPVX(sv);
1478 U8 *src, *dst;
1479 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1480 src = (U8 *)d - 1;
1481 dst = src+hicount;
1482 d += hicount;
1483 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1484 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1485 U8 ch = NATIVE_TO_ASCII(*src);
db42d148
NIS
1486 *dst-- = UTF8_EIGHT_BIT_LO(ch);
1487 *dst-- = UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1488 }
1489 else {
63cd0674 1490 *dst-- = *src;
012bcf8d 1491 }
c7f1f016 1492 src--;
012bcf8d
GS
1493 }
1494 }
1495 }
1496
9aa983d2 1497 if (has_utf8 || uv > 255) {
9041c2e3 1498 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1499 has_utf8 = TRUE;
f9a63242
JH
1500 if (PL_lex_inwhat == OP_TRANS &&
1501 PL_sublex_info.sub_op) {
1502 PL_sublex_info.sub_op->op_private |=
1503 (PL_lex_repl ? OPpTRANS_FROM_UTF
1504 : OPpTRANS_TO_UTF);
1505 utf = TRUE;
1506 }
012bcf8d 1507 }
a0ed51b3 1508 else {
012bcf8d 1509 *d++ = (char)uv;
a0ed51b3 1510 }
012bcf8d
GS
1511 }
1512 else {
c4d5f83a 1513 *d++ = (char) uv;
a0ed51b3 1514 }
79072805 1515 continue;
02aa26ce 1516
4a2d328f
IZ
1517 /* \N{latin small letter a} is a named character */
1518 case 'N':
55eda711 1519 ++s;
423cee85
JH
1520 if (*s == '{') {
1521 char* e = strchr(s, '}');
155aba94 1522 SV *res;
423cee85
JH
1523 STRLEN len;
1524 char *str;
4e553d73 1525
423cee85 1526 if (!e) {
5777a3f7 1527 yyerror("Missing right brace on \\N{}");
423cee85
JH
1528 e = s - 1;
1529 goto cont_scan;
1530 }
55eda711
JH
1531 res = newSVpvn(s + 1, e - s - 1);
1532 res = new_constant( Nullch, 0, "charnames",
1533 res, Nullsv, "\\N{...}" );
f9a63242
JH
1534 if (has_utf8)
1535 sv_utf8_upgrade(res);
423cee85 1536 str = SvPV(res,len);
89491803 1537 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1538 char *ostart = SvPVX(sv);
1539 SvCUR_set(sv, d - ostart);
1540 SvPOK_on(sv);
e4f3eed8 1541 *d = '\0';
f08d6ad9 1542 sv_utf8_upgrade(sv);
d2f449dd
SB
1543 /* this just broke our allocation above... */
1544 SvGROW(sv, send - start);
f08d6ad9 1545 d = SvPVX(sv) + SvCUR(sv);
89491803 1546 has_utf8 = TRUE;
f08d6ad9 1547 }
423cee85
JH
1548 if (len > e - s + 4) {
1549 char *odest = SvPVX(sv);
1550
8973db79 1551 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1552 d = SvPVX(sv) + (d - odest);
1553 }
1554 Copy(str, d, len, char);
1555 d += len;
1556 SvREFCNT_dec(res);
1557 cont_scan:
1558 s = e + 1;
1559 }
1560 else
5777a3f7 1561 yyerror("Missing braces on \\N{}");
423cee85
JH
1562 continue;
1563
02aa26ce 1564 /* \c is a control character */
79072805
LW
1565 case 'c':
1566 s++;
ba210ebe
JH
1567 {
1568 U8 c = *s++;
c7f1f016
NIS
1569#ifdef EBCDIC
1570 if (isLOWER(c))
1571 c = toUPPER(c);
1572#endif
db42d148 1573 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1574 }
79072805 1575 continue;
02aa26ce
NT
1576
1577 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1578 case 'b':
db42d148 1579 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1580 break;
1581 case 'n':
db42d148 1582 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1583 break;
1584 case 'r':
db42d148 1585 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1586 break;
1587 case 'f':
db42d148 1588 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1589 break;
1590 case 't':
db42d148 1591 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1592 break;
34a3fe2a 1593 case 'e':
db42d148 1594 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1595 break;
1596 case 'a':
db42d148 1597 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1598 break;
02aa26ce
NT
1599 } /* end switch */
1600
79072805
LW
1601 s++;
1602 continue;
02aa26ce
NT
1603 } /* end if (backslash) */
1604
f9a63242 1605 default_action:
63cd0674 1606 if (!UTF8_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
a5a960be
RI
1607 STRLEN len = (STRLEN) -1;
1608 UV uv;
1609 if (this_utf8) {
9041c2e3 1610 uv = utf8n_to_uvchr((U8*)s, send - s, &len, 0);
a5a960be
RI
1611 }
1612 if (len == (STRLEN)-1) {
1613 /* Illegal UTF8 (a high-bit byte), make it valid. */
1614 char *old_pvx = SvPVX(sv);
1615 /* need space for one extra char (NOTE: SvCUR() not set here) */
1616 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
9041c2e3 1617 d = (char*)uvchr_to_utf8((U8*)d, (U8)*s++);
a5a960be
RI
1618 }
1619 else {
1620 while (len--)
1621 *d++ = *s++;
1622 }
1623 has_utf8 = TRUE;
f9a63242
JH
1624 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1625 PL_sublex_info.sub_op->op_private |=
1626 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1627 utf = TRUE;
1628 }
a5a960be
RI
1629 continue;
1630 }
db42d148 1631 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
02aa26ce
NT
1632 } /* while loop to process each character */
1633
1634 /* terminate the string and set up the sv */
79072805 1635 *d = '\0';
463ee0b2 1636 SvCUR_set(sv, d - SvPVX(sv));
79072805 1637 SvPOK_on(sv);
89491803 1638 if (has_utf8)
7e2040f0 1639 SvUTF8_on(sv);
79072805 1640
02aa26ce 1641 /* shrink the sv if we allocated more than we used */
79072805
LW
1642 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1643 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1644 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1645 }
02aa26ce 1646
9b599b2a 1647 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1648 if (s > PL_bufptr) {
1649 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1650 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1651 sv, Nullsv,
4e553d73 1652 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1653 ? "tr"
3280af22 1654 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1655 ? "s"
1656 : "qq")));
79072805 1657 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1658 } else
8990e307 1659 SvREFCNT_dec(sv);
79072805
LW
1660 return s;
1661}
1662
ffb4593c
NT
1663/* S_intuit_more
1664 * Returns TRUE if there's more to the expression (e.g., a subscript),
1665 * FALSE otherwise.
ffb4593c
NT
1666 *
1667 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1668 *
1669 * ->[ and ->{ return TRUE
1670 * { and [ outside a pattern are always subscripts, so return TRUE
1671 * if we're outside a pattern and it's not { or [, then return FALSE
1672 * if we're in a pattern and the first char is a {
1673 * {4,5} (any digits around the comma) returns FALSE
1674 * if we're in a pattern and the first char is a [
1675 * [] returns FALSE
1676 * [SOMETHING] has a funky algorithm to decide whether it's a
1677 * character class or not. It has to deal with things like
1678 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1679 * anything else returns TRUE
1680 */
1681
9cbb5ea2
GS
1682/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1683
76e3520e 1684STATIC int
cea2e8a9 1685S_intuit_more(pTHX_ register char *s)
79072805 1686{
3280af22 1687 if (PL_lex_brackets)
79072805
LW
1688 return TRUE;
1689 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1690 return TRUE;
1691 if (*s != '{' && *s != '[')
1692 return FALSE;
3280af22 1693 if (!PL_lex_inpat)
79072805
LW
1694 return TRUE;
1695
1696 /* In a pattern, so maybe we have {n,m}. */
1697 if (*s == '{') {
1698 s++;
1699 if (!isDIGIT(*s))
1700 return TRUE;
1701 while (isDIGIT(*s))
1702 s++;
1703 if (*s == ',')
1704 s++;
1705 while (isDIGIT(*s))
1706 s++;
1707 if (*s == '}')
1708 return FALSE;
1709 return TRUE;
1710
1711 }
1712
1713 /* On the other hand, maybe we have a character class */
1714
1715 s++;
1716 if (*s == ']' || *s == '^')
1717 return FALSE;
1718 else {
ffb4593c 1719 /* this is terrifying, and it works */
79072805
LW
1720 int weight = 2; /* let's weigh the evidence */
1721 char seen[256];
f27ffc4a 1722 unsigned char un_char = 255, last_un_char;
93a17b20 1723 char *send = strchr(s,']');
3280af22 1724 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1725
1726 if (!send) /* has to be an expression */
1727 return TRUE;
1728
1729 Zero(seen,256,char);
1730 if (*s == '$')
1731 weight -= 3;
1732 else if (isDIGIT(*s)) {
1733 if (s[1] != ']') {
1734 if (isDIGIT(s[1]) && s[2] == ']')
1735 weight -= 10;
1736 }
1737 else
1738 weight -= 100;
1739 }
1740 for (; s < send; s++) {
1741 last_un_char = un_char;
1742 un_char = (unsigned char)*s;
1743 switch (*s) {
1744 case '@':
1745 case '&':
1746 case '$':
1747 weight -= seen[un_char] * 10;
7e2040f0 1748 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1749 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1750 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1751 weight -= 100;
1752 else
1753 weight -= 10;
1754 }
1755 else if (*s == '$' && s[1] &&
93a17b20
LW
1756 strchr("[#!%*<>()-=",s[1])) {
1757 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1758 weight -= 10;
1759 else
1760 weight -= 1;
1761 }
1762 break;
1763 case '\\':
1764 un_char = 254;
1765 if (s[1]) {
93a17b20 1766 if (strchr("wds]",s[1]))
79072805
LW
1767 weight += 100;
1768 else if (seen['\''] || seen['"'])
1769 weight += 1;
93a17b20 1770 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1771 weight += 40;
1772 else if (isDIGIT(s[1])) {
1773 weight += 40;
1774 while (s[1] && isDIGIT(s[1]))
1775 s++;
1776 }
1777 }
1778 else
1779 weight += 100;
1780 break;
1781 case '-':
1782 if (s[1] == '\\')
1783 weight += 50;
93a17b20 1784 if (strchr("aA01! ",last_un_char))
79072805 1785 weight += 30;
93a17b20 1786 if (strchr("zZ79~",s[1]))
79072805 1787 weight += 30;
f27ffc4a
GS
1788 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1789 weight -= 5; /* cope with negative subscript */
79072805
LW
1790 break;
1791 default:
93a17b20 1792 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1793 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1794 char *d = tmpbuf;
1795 while (isALPHA(*s))
1796 *d++ = *s++;
1797 *d = '\0';
1798 if (keyword(tmpbuf, d - tmpbuf))
1799 weight -= 150;
1800 }
1801 if (un_char == last_un_char + 1)
1802 weight += 5;
1803 weight -= seen[un_char];
1804 break;
1805 }
1806 seen[un_char]++;
1807 }
1808 if (weight >= 0) /* probably a character class */
1809 return FALSE;
1810 }
1811
1812 return TRUE;
1813}
ffed7fef 1814
ffb4593c
NT
1815/*
1816 * S_intuit_method
1817 *
1818 * Does all the checking to disambiguate
1819 * foo bar
1820 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1821 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1822 *
1823 * First argument is the stuff after the first token, e.g. "bar".
1824 *
1825 * Not a method if bar is a filehandle.
1826 * Not a method if foo is a subroutine prototyped to take a filehandle.
1827 * Not a method if it's really "Foo $bar"
1828 * Method if it's "foo $bar"
1829 * Not a method if it's really "print foo $bar"
1830 * Method if it's really "foo package::" (interpreted as package->foo)
1831 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1832 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1833 * =>
1834 */
1835
76e3520e 1836STATIC int
cea2e8a9 1837S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1838{
1839 char *s = start + (*start == '$');
3280af22 1840 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1841 STRLEN len;
1842 GV* indirgv;
1843
1844 if (gv) {
b6c543e3 1845 CV *cv;
a0d0e21e
LW
1846 if (GvIO(gv))
1847 return 0;
b6c543e3
IZ
1848 if ((cv = GvCVu(gv))) {
1849 char *proto = SvPVX(cv);
1850 if (proto) {
1851 if (*proto == ';')
1852 proto++;
1853 if (*proto == '*')
1854 return 0;
1855 }
1856 } else
a0d0e21e
LW
1857 gv = 0;
1858 }
8903cb82 1859 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1860 /* start is the beginning of the possible filehandle/object,
1861 * and s is the end of it
1862 * tmpbuf is a copy of it
1863 */
1864
a0d0e21e 1865 if (*start == '$') {
3280af22 1866 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1867 return 0;
1868 s = skipspace(s);
3280af22
NIS
1869 PL_bufptr = start;
1870 PL_expect = XREF;
a0d0e21e
LW
1871 return *s == '(' ? FUNCMETH : METHOD;
1872 }
1873 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1874 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1875 len -= 2;
1876 tmpbuf[len] = '\0';
1877 goto bare_package;
1878 }
1879 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1880 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1881 return 0;
1882 /* filehandle or package name makes it a method */
89bfa8cd 1883 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1884 s = skipspace(s);
3280af22 1885 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1886 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1887 bare_package:
3280af22 1888 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1889 newSVpvn(tmpbuf,len));
3280af22
NIS
1890 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1891 PL_expect = XTERM;
a0d0e21e 1892 force_next(WORD);
3280af22 1893 PL_bufptr = s;
a0d0e21e
LW
1894 return *s == '(' ? FUNCMETH : METHOD;
1895 }
1896 }
1897 return 0;
1898}
1899
ffb4593c
NT
1900/*
1901 * S_incl_perldb
1902 * Return a string of Perl code to load the debugger. If PERL5DB
1903 * is set, it will return the contents of that, otherwise a
1904 * compile-time require of perl5db.pl.
1905 */
1906
76e3520e 1907STATIC char*
cea2e8a9 1908S_incl_perldb(pTHX)
a0d0e21e 1909{
3280af22 1910 if (PL_perldb) {
76e3520e 1911 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1912
1913 if (pdb)
1914 return pdb;
61bb5906 1915 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1916 return "BEGIN { require 'perl5db.pl' }";
1917 }
1918 return "";
1919}
1920
1921
16d20bd9 1922/* Encoded script support. filter_add() effectively inserts a
4e553d73 1923 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1924 * Note that the filter function only applies to the current source file
1925 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1926 *
1927 * The datasv parameter (which may be NULL) can be used to pass
1928 * private data to this instance of the filter. The filter function
1929 * can recover the SV using the FILTER_DATA macro and use it to
1930 * store private buffers and state information.
1931 *
1932 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1933 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1934 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1935 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1936 * private use must be set using malloc'd pointers.
1937 */
16d20bd9
AD
1938
1939SV *
864dbfa3 1940Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1941{
f4c556ac
GS
1942 if (!funcp)
1943 return Nullsv;
1944
3280af22
NIS
1945 if (!PL_rsfp_filters)
1946 PL_rsfp_filters = newAV();
16d20bd9 1947 if (!datasv)
8c52afec 1948 datasv = NEWSV(255,0);
16d20bd9 1949 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1950 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1951 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1952 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1953 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1954 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1955 av_unshift(PL_rsfp_filters, 1);
1956 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1957 return(datasv);
1958}
4e553d73 1959
16d20bd9
AD
1960
1961/* Delete most recently added instance of this filter function. */
a0d0e21e 1962void
864dbfa3 1963Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1964{
e0c19803 1965 SV *datasv;
f4c556ac 1966 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1967 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1968 return;
1969 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1970 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1971 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1972 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1973 IoANY(datasv) = (void *)NULL;
3280af22 1974 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1975
16d20bd9
AD
1976 return;
1977 }
1978 /* we need to search for the correct entry and clear it */
cea2e8a9 1979 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1980}
1981
1982
1983/* Invoke the n'th filter function for the current rsfp. */
1984I32
864dbfa3 1985Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
1986
1987
8ac85365 1988 /* 0 = read one text line */
a0d0e21e 1989{
16d20bd9
AD
1990 filter_t funcp;
1991 SV *datasv = NULL;
e50aee73 1992
3280af22 1993 if (!PL_rsfp_filters)
16d20bd9 1994 return -1;
3280af22 1995 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1996 /* Provide a default input filter to make life easy. */
1997 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1998 DEBUG_P(PerlIO_printf(Perl_debug_log,
1999 "filter_read %d: from rsfp\n", idx));
4e553d73 2000 if (maxlen) {
16d20bd9
AD
2001 /* Want a block */
2002 int len ;
2003 int old_len = SvCUR(buf_sv) ;
2004
2005 /* ensure buf_sv is large enough */
2006 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
2007 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2008 if (PerlIO_error(PL_rsfp))
37120919
AD
2009 return -1; /* error */
2010 else
2011 return 0 ; /* end of file */
2012 }
16d20bd9
AD
2013 SvCUR_set(buf_sv, old_len + len) ;
2014 } else {
2015 /* Want a line */
3280af22
NIS
2016 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2017 if (PerlIO_error(PL_rsfp))
37120919
AD
2018 return -1; /* error */
2019 else
2020 return 0 ; /* end of file */
2021 }
16d20bd9
AD
2022 }
2023 return SvCUR(buf_sv);
2024 }
2025 /* Skip this filter slot if filter has been deleted */
3280af22 2026 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2027 DEBUG_P(PerlIO_printf(Perl_debug_log,
2028 "filter_read %d: skipped (filter deleted)\n",
2029 idx));
16d20bd9
AD
2030 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2031 }
2032 /* Get function pointer hidden within datasv */
4755096e 2033 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2034 DEBUG_P(PerlIO_printf(Perl_debug_log,
2035 "filter_read %d: via function %p (%s)\n",
2036 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2037 /* Call function. The function is expected to */
2038 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2039 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 2040 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
2041}
2042
76e3520e 2043STATIC char *
cea2e8a9 2044S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2045{
c39cd008 2046#ifdef PERL_CR_FILTER
3280af22 2047 if (!PL_rsfp_filters) {
c39cd008 2048 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2049 }
2050#endif
3280af22 2051 if (PL_rsfp_filters) {
16d20bd9 2052
55497cff
PP
2053 if (!append)
2054 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2055 if (FILTER_READ(0, sv, 0) > 0)
2056 return ( SvPVX(sv) ) ;
2057 else
2058 return Nullch ;
2059 }
9d116dd7 2060 else
fd049845 2061 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2062}
2063
01ec43d0
GS
2064STATIC HV *
2065S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2066{
2067 GV *gv;
2068
01ec43d0 2069 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2070 return PL_curstash;
2071
2072 if (len > 2 &&
2073 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2074 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2075 {
2076 return GvHV(gv); /* Foo:: */
def3634b
GS
2077 }
2078
2079 /* use constant CLASS => 'MyClass' */
2080 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2081 SV *sv;
2082 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2083 pkgname = SvPV_nolen(sv);
2084 }
2085 }
2086
2087 return gv_stashpv(pkgname, FALSE);
2088}
a0d0e21e 2089
748a9306
LW
2090#ifdef DEBUGGING
2091 static char* exp_name[] =
09bef843
SB
2092 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2093 "ATTRTERM", "TERMBLOCK"
2094 };
748a9306 2095#endif
463ee0b2 2096
02aa26ce
NT
2097/*
2098 yylex
2099
2100 Works out what to call the token just pulled out of the input
2101 stream. The yacc parser takes care of taking the ops we return and
2102 stitching them into a tree.
2103
2104 Returns:
2105 PRIVATEREF
2106
2107 Structure:
2108 if read an identifier
2109 if we're in a my declaration
2110 croak if they tried to say my($foo::bar)
2111 build the ops for a my() declaration
2112 if it's an access to a my() variable
2113 are we in a sort block?
2114 croak if my($a); $a <=> $b
2115 build ops for access to a my() variable
2116 if in a dq string, and they've said @foo and we can't find @foo
2117 croak
2118 build ops for a bareword
2119 if we already built the token before, use it.
2120*/
2121
dba4d153 2122#ifdef USE_PURE_BISON
864dbfa3 2123int
dba4d153 2124Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2125{
20141f0e
RI
2126 int r;
2127
6f202aea 2128 yyactlevel++;
20141f0e
RI
2129 yylval_pointer[yyactlevel] = lvalp;
2130 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2131 if (yyactlevel >= YYMAXLEVEL)
2132 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2133
dba4d153 2134 r = Perl_yylex(aTHX);
20141f0e 2135
d8ae6756
RI
2136 if (yyactlevel > 0)
2137 yyactlevel--;
20141f0e
RI
2138
2139 return r;
2140}
dba4d153 2141#endif
20141f0e 2142
dba4d153
JH
2143#ifdef __SC__
2144#pragma segment Perl_yylex
2145#endif
dba4d153 2146int
dba4d153 2147Perl_yylex(pTHX)
20141f0e 2148{
79072805 2149 register char *s;
378cc40b 2150 register char *d;
79072805 2151 register I32 tmp;
463ee0b2 2152 STRLEN len;
161b471a
NIS
2153 GV *gv = Nullgv;
2154 GV **gvp = 0;
aa7440fb 2155 bool bof = FALSE;
a687059c 2156
02aa26ce 2157 /* check if there's an identifier for us to look at */
3280af22 2158 if (PL_pending_ident) {
02aa26ce 2159 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2160 char pit = PL_pending_ident;
2161 PL_pending_ident = 0;
bbce6d69 2162
607df283
SC
2163 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2164 "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
2165
02aa26ce
NT
2166 /* if we're in a my(), we can't allow dynamics here.
2167 $foo'bar has already been turned into $foo::bar, so
2168 just check for colons.
2169
2170 if it's a legal name, the OP is a PADANY.
2171 */
3280af22 2172 if (PL_in_my) {
77ca0c92 2173 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2174 if (strchr(PL_tokenbuf,':'))
2175 yyerror(Perl_form(aTHX_ "No package name allowed for "
2176 "variable %s in \"our\"",
2177 PL_tokenbuf));
77ca0c92
LW
2178 tmp = pad_allocmy(PL_tokenbuf);
2179 }
2180 else {
2181 if (strchr(PL_tokenbuf,':'))
2182 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2183
77ca0c92
LW
2184 yylval.opval = newOP(OP_PADANY, 0);
2185 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2186 return PRIVATEREF;
2187 }
bbce6d69
PP
2188 }
2189
4e553d73 2190 /*
02aa26ce
NT
2191 build the ops for accesses to a my() variable.
2192
2193 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2194 then used in a comparison. This catches most, but not
2195 all cases. For instance, it catches
2196 sort { my($a); $a <=> $b }
2197 but not
2198 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2199 (although why you'd do that is anyone's guess).
2200 */
2201
3280af22 2202 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2203#ifdef USE_THREADS
54b9620d 2204 /* Check for single character per-thread SVs */
3280af22
NIS
2205 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2206 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2207 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2208 {
2faa37cc 2209 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2210 yylval.opval->op_targ = tmp;
2211 return PRIVATEREF;
2212 }
2213#endif /* USE_THREADS */
3280af22 2214 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2215 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2216 /* might be an "our" variable" */
f472eb5c 2217 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2218 /* build ops for a bareword */
f472eb5c
GS
2219 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2220 sv_catpvn(sym, "::", 2);
2221 sv_catpv(sym, PL_tokenbuf+1);
2222 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2223 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2224 gv_fetchpv(SvPVX(sym),
77ca0c92 2225 (PL_in_eval
f472eb5c
GS
2226 ? (GV_ADDMULTI | GV_ADDINEVAL)
2227 : TRUE
77ca0c92
LW
2228 ),
2229 ((PL_tokenbuf[0] == '$') ? SVt_PV
2230 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2231 : SVt_PVHV));
2232 return WORD;
2233 }
2234
02aa26ce 2235 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2236 if (PL_last_lop_op == OP_SORT &&
2237 PL_tokenbuf[0] == '$' &&
2238 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2239 && !PL_tokenbuf[2])
bbce6d69 2240 {
3280af22
NIS
2241 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2242 d < PL_bufend && *d != '\n';
a863c7d1
MB
2243 d++)
2244 {
2245 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2246 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2247 PL_tokenbuf);
a863c7d1 2248 }
bbce6d69
PP
2249 }
2250 }
bbce6d69 2251
a863c7d1
MB
2252 yylval.opval = newOP(OP_PADANY, 0);
2253 yylval.opval->op_targ = tmp;
2254 return PRIVATEREF;
2255 }
bbce6d69
PP
2256 }
2257
02aa26ce
NT
2258 /*
2259 Whine if they've said @foo in a doublequoted string,
2260 and @foo isn't a variable we can find in the symbol
2261 table.
2262 */
3280af22
NIS
2263 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2264 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2265 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2266 && ckWARN(WARN_AMBIGUOUS))
2267 {
2268 /* Downgraded from fatal to warning 20000522 mjd */
2269 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2270 "Possible unintended interpolation of %s in string",
2271 PL_tokenbuf);
2272 }
bbce6d69
PP
2273 }
2274
02aa26ce 2275 /* build ops for a bareword */
3280af22 2276 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2277 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2278 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2279 ((PL_tokenbuf[0] == '$') ? SVt_PV
2280 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2281 : SVt_PVHV));
2282 return WORD;
2283 }
2284
02aa26ce
NT
2285 /* no identifier pending identification */
2286
3280af22 2287 switch (PL_lex_state) {
79072805
LW
2288#ifdef COMMENTARY
2289 case LEX_NORMAL: /* Some compilers will produce faster */
2290 case LEX_INTERPNORMAL: /* code if we comment these out. */
2291 break;
2292#endif
2293
09bef843 2294 /* when we've already built the next token, just pull it out of the queue */
79072805 2295 case LEX_KNOWNEXT:
3280af22
NIS
2296 PL_nexttoke--;
2297 yylval = PL_nextval[PL_nexttoke];
2298 if (!PL_nexttoke) {
2299 PL_lex_state = PL_lex_defer;
2300 PL_expect = PL_lex_expect;
2301 PL_lex_defer = LEX_NORMAL;
463ee0b2 2302 }
607df283 2303 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f
RB
2304 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2305 (IV)PL_nexttype[PL_nexttoke]); })
607df283 2306
3280af22 2307 return(PL_nexttype[PL_nexttoke]);
79072805 2308
02aa26ce 2309 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2310 when we get here, PL_bufptr is at the \
02aa26ce 2311 */
79072805
LW
2312 case LEX_INTERPCASEMOD:
2313#ifdef DEBUGGING
3280af22 2314 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2315 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2316#endif
02aa26ce 2317 /* handle \E or end of string */
3280af22 2318 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2319 char oldmod;
02aa26ce
NT
2320
2321 /* if at a \E */
3280af22
NIS
2322 if (PL_lex_casemods) {
2323 oldmod = PL_lex_casestack[--PL_lex_casemods];
2324 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2325
3280af22
NIS
2326 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2327 PL_bufptr += 2;
2328 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2329 }
79072805
LW
2330 return ')';
2331 }
3280af22
NIS
2332 if (PL_bufptr != PL_bufend)
2333 PL_bufptr += 2;
2334 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2335 return yylex();
79072805
LW
2336 }
2337 else {
607df283
SC
2338 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2339 "### Saw case modifier at '%s'\n", PL_bufptr); })
3280af22 2340 s = PL_bufptr + 1;
79072805
LW
2341 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2342 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2343 if (strchr("LU", *s) &&
3280af22 2344 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2345 {
3280af22 2346 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2347 return ')';
2348 }
3280af22
NIS
2349 if (PL_lex_casemods > 10) {
2350 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2351 if (newlb != PL_lex_casestack) {
a0d0e21e 2352 SAVEFREEPV(newlb);
3280af22 2353 PL_lex_casestack = newlb;
a0d0e21e
LW
2354 }
2355 }
3280af22
NIS
2356 PL_lex_casestack[PL_lex_casemods++] = *s;
2357 PL_lex_casestack[PL_lex_casemods] = '\0';
2358 PL_lex_state = LEX_INTERPCONCAT;
2359 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2360 force_next('(');
2361 if (*s == 'l')
3280af22 2362 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2363 else if (*s == 'u')
3280af22 2364 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2365 else if (*s == 'L')
3280af22 2366 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2367 else if (*s == 'U')
3280af22 2368 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2369 else if (*s == 'Q')
3280af22 2370 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2371 else
cea2e8a9 2372 Perl_croak(aTHX_ "panic: yylex");
3280af22 2373 PL_bufptr = s + 1;
79072805 2374 force_next(FUNC);
3280af22
NIS
2375 if (PL_lex_starts) {
2376 s = PL_bufptr;
2377 PL_lex_starts = 0;
79072805
LW
2378 Aop(OP_CONCAT);
2379 }
2380 else
cea2e8a9 2381 return yylex();
79072805
LW
2382 }
2383
55497cff
PP
2384 case LEX_INTERPPUSH:
2385 return sublex_push();
2386
79072805 2387 case LEX_INTERPSTART:
3280af22 2388 if (PL_bufptr == PL_bufend)
79072805 2389 return sublex_done();
607df283
SC
2390 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2391 "### Interpolated variable at '%s'\n", PL_bufptr); })
3280af22
NIS
2392 PL_expect = XTERM;
2393 PL_lex_dojoin = (*PL_bufptr == '@');
2394 PL_lex_state = LEX_INTERPNORMAL;
2395 if (PL_lex_dojoin) {
2396 PL_nextval[PL_nexttoke].ival = 0;
79072805 2397 force_next(',');
554b3eca 2398#ifdef USE_THREADS
533c011a
NIS
2399 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2400 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2401 force_next(PRIVATEREF);
2402#else
a0d0e21e 2403 force_ident("\"", '$');
554b3eca 2404#endif /* USE_THREADS */
3280af22 2405 PL_nextval[PL_nexttoke].ival = 0;
79072805 2406 force_next('$');
3280af22 2407 PL_nextval[PL_nexttoke].ival = 0;
79072805 2408 force_next('(');
3280af22 2409 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2410 force_next(FUNC);
2411 }
3280af22
NIS
2412 if (PL_lex_starts++) {
2413 s = PL_bufptr;
79072805
LW
2414 Aop(OP_CONCAT);
2415 }
cea2e8a9 2416 return yylex();
79072805
LW
2417
2418 case LEX_INTERPENDMAYBE:
3280af22
NIS
2419 if (intuit_more(PL_bufptr)) {
2420 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2421 break;
2422 }
2423 /* FALL THROUGH */
2424
2425 case LEX_INTERPEND:
3280af22
NIS
2426 if (PL_lex_dojoin) {
2427 PL_lex_dojoin = FALSE;
2428 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2429 return ')';
2430 }
43a16006 2431 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2432 && SvEVALED(PL_lex_repl))
43a16006 2433 {
e9fa98b2 2434 if (PL_bufptr != PL_bufend)
cea2e8a9 2435 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2436 PL_lex_repl = Nullsv;
2437 }
79072805
LW
2438 /* FALLTHROUGH */
2439 case LEX_INTERPCONCAT:
2440#ifdef DEBUGGING
3280af22 2441 if (PL_lex_brackets)
cea2e8a9 2442 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2443#endif
3280af22 2444 if (PL_bufptr == PL_bufend)
79072805
LW
2445 return sublex_done();
2446
3280af22
NIS
2447 if (SvIVX(PL_linestr) == '\'') {
2448 SV *sv = newSVsv(PL_linestr);
2449 if (!PL_lex_inpat)
76e3520e 2450 sv = tokeq(sv);
3280af22 2451 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2452 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2453 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2454 s = PL_bufend;
79072805
LW
2455 }
2456 else {
3280af22 2457 s = scan_const(PL_bufptr);
79072805 2458 if (*s == '\\')
3280af22 2459 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2460 else
3280af22 2461 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2462 }
2463
3280af22
NIS
2464 if (s != PL_bufptr) {
2465 PL_nextval[PL_nexttoke] = yylval;
2466 PL_expect = XTERM;
79072805 2467 force_next(THING);
3280af22 2468 if (PL_lex_starts++)
79072805
LW
2469 Aop(OP_CONCAT);
2470 else {
3280af22 2471 PL_bufptr = s;
cea2e8a9 2472 return yylex();
79072805
LW
2473 }
2474 }
2475
cea2e8a9 2476 return yylex();
a0d0e21e 2477 case LEX_FORMLINE:
3280af22
NIS
2478 PL_lex_state = LEX_NORMAL;
2479 s = scan_formline(PL_bufptr);
2480 if (!PL_lex_formbrack)
a0d0e21e
LW
2481 goto rightbracket;
2482 OPERATOR(';');
79072805
LW
2483 }
2484
3280af22
NIS
2485 s = PL_bufptr;
2486 PL_oldoldbufptr = PL_oldbufptr;
2487 PL_oldbufptr = s;
607df283 2488 DEBUG_T( {
bf49b057
GS
2489 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2490 exp_name[PL_expect], s);
79072805 2491 } )
463ee0b2
LW
2492
2493 retry:
378cc40b
LW
2494 switch (*s) {
2495 default:
7e2040f0 2496 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2497 goto keylookup;
cea2e8a9 2498 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2499 case 4:
2500 case 26:
2501 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2502 case 0:
3280af22
NIS
2503 if (!PL_rsfp) {
2504 PL_last_uni = 0;
2505 PL_last_lop = 0;
2506 if (PL_lex_brackets)
d98d5fff 2507 yyerror("Missing right curly or square bracket");
4e553d73 2508 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2509 "### Tokener got EOF\n");
2510 } )
79072805 2511 TOKEN(0);
463ee0b2 2512 }
3280af22 2513 if (s++ < PL_bufend)
a687059c 2514 goto retry; /* ignore stray nulls */
3280af22
NIS
2515 PL_last_uni = 0;
2516 PL_last_lop = 0;
2517 if (!PL_in_eval && !PL_preambled) {
2518 PL_preambled = TRUE;
2519 sv_setpv(PL_linestr,incl_perldb());
2520 if (SvCUR(PL_linestr))
2521 sv_catpv(PL_linestr,";");
2522 if (PL_preambleav){
2523 while(AvFILLp(PL_preambleav) >= 0) {
2524 SV *tmpsv = av_shift(PL_preambleav);
2525 sv_catsv(PL_linestr, tmpsv);
2526 sv_catpv(PL_linestr, ";");
91b7def8
PP
2527 sv_free(tmpsv);
2528 }
3280af22
NIS
2529 sv_free((SV*)PL_preambleav);
2530 PL_preambleav = NULL;
91b7def8 2531 }
3280af22
NIS
2532 if (PL_minus_n || PL_minus_p) {
2533 sv_catpv(PL_linestr, "LINE: while (<>) {");
2534 if (PL_minus_l)
2535 sv_catpv(PL_linestr,"chomp;");
2536 if (PL_minus_a) {
8fd239a7
CS
2537 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2538 if (gv)
2539 GvIMPORTED_AV_on(gv);
3280af22
NIS
2540 if (PL_minus_F) {
2541 if (strchr("/'\"", *PL_splitstr)
2542 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2543 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2544 else {
2545 char delim;
2546 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2547 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2548 delim = *s;
cea2e8a9 2549 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2550 "q" + (delim == '\''), delim);
3280af22 2551 for (s = PL_splitstr; *s; s++) {
54310121 2552 if (*s == '\\')
3280af22
NIS
2553 sv_catpvn(PL_linestr, "\\", 1);
2554 sv_catpvn(PL_linestr, s, 1);
54310121 2555 }
cea2e8a9 2556 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2557 }
2304df62
AD
2558 }
2559 else
3280af22 2560 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2561 }
79072805 2562 }
3280af22
NIS
2563 sv_catpv(PL_linestr, "\n");
2564 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2565 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2566 PL_last_lop = PL_last_uni = Nullch;
3280af22 2567 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2568 SV *sv = NEWSV(85,0);
2569
2570 sv_upgrade(sv, SVt_PVMG);
3280af22 2571 sv_setsv(sv,PL_linestr);
57843af0 2572 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2573 }
79072805 2574 goto retry;
a687059c 2575 }
e929a76b 2576 do {
aa7440fb 2577 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2578 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2579 fake_eof:
2580 if (PL_rsfp) {
2581 if (PL_preprocess && !PL_in_eval)
2582 (void)PerlProc_pclose(PL_rsfp);
2583 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2584 PerlIO_clearerr(PL_rsfp);
2585 else
2586 (void)PerlIO_close(PL_rsfp);
2587 PL_rsfp = Nullfp;
2588 PL_doextract = FALSE;
2589 }
2590 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2591 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2592 sv_catpv(PL_linestr,";}");
2593 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2594 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2595 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2596 PL_minus_n = PL_minus_p = 0;
2597 goto retry;
2598 }
2599 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2600 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2601 sv_setpv(PL_linestr,"");
2602 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2603 }
2604 /* if it looks like the start of a BOM, check if it in fact is */
2605 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2606#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2607# ifdef __GNU_LIBRARY__
2608# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2609# define FTELL_FOR_PIPE_IS_BROKEN
2610# endif
e3f494f1
JH
2611# else
2612# ifdef __GLIBC__
2613# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2614# define FTELL_FOR_PIPE_IS_BROKEN
2615# endif
2616# endif
226017aa
DD
2617# endif
2618#endif
2619#ifdef FTELL_FOR_PIPE_IS_BROKEN
2620 /* This loses the possibility to detect the bof
2621 * situation on perl -P when the libc5 is being used.
2622 * Workaround? Maybe attach some extra state to PL_rsfp?
2623 */
2624 if (!PL_preprocess)
7e28d3af 2625 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2626#else
7e28d3af 2627 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2628#endif
7e28d3af 2629 if (bof) {
3280af22 2630 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2631 s = swallow_bom((U8*)s);
e929a76b 2632 }
378cc40b 2633 }
3280af22 2634 if (PL_doextract) {
a0d0e21e 2635 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2636 PL_doextract = FALSE;
a0d0e21e
LW
2637
2638 /* Incest with pod. */
2639 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2640 sv_setpv(PL_linestr, "");
2641 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2642 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2643 PL_last_lop = PL_last_uni = Nullch;
3280af22 2644 PL_doextract = FALSE;
a0d0e21e 2645 }
4e553d73 2646 }
463ee0b2 2647 incline(s);
3280af22
NIS
2648 } while (PL_doextract);
2649 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2650 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2651 SV *sv = NEWSV(85,0);
a687059c 2652
93a17b20 2653 sv_upgrade(sv, SVt_PVMG);
3280af22 2654 sv_setsv(sv,PL_linestr);
57843af0 2655 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2656 }
3280af22 2657 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2658 PL_last_lop = PL_last_uni = Nullch;
57843af0 2659 if (CopLINE(PL_curcop) == 1) {
3280af22 2660 while (s < PL_bufend && isSPACE(*s))
79072805 2661 s++;
a0d0e21e 2662 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2663 s++;
44a8e56a 2664 d = Nullch;
3280af22 2665 if (!PL_in_eval) {
44a8e56a
PP
2666 if (*s == '#' && *(s+1) == '!')
2667 d = s + 2;
2668#ifdef ALTERNATE_SHEBANG
2669 else {
2670 static char as[] = ALTERNATE_SHEBANG;
2671 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2672 d = s + (sizeof(as) - 1);
2673 }
2674#endif /* ALTERNATE_SHEBANG */
2675 }
2676 if (d) {
b8378b72 2677 char *ipath;
774d564b 2678 char *ipathend;
b8378b72 2679
774d564b 2680 while (isSPACE(*d))
b8378b72
CS
2681 d++;
2682 ipath = d;
774d564b
PP
2683 while (*d && !isSPACE(*d))
2684 d++;
2685 ipathend = d;
2686
2687#ifdef ARG_ZERO_IS_SCRIPT
2688 if (ipathend > ipath) {
2689 /*
2690 * HP-UX (at least) sets argv[0] to the script name,
2691 * which makes $^X incorrect. And Digital UNIX and Linux,
2692 * at least, set argv[0] to the basename of the Perl
2693 * interpreter. So, having found "#!", we'll set it right.
2694 */
2695 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2696 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2697 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2698 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2699 SvSETMAGIC(x);
2700 }
774d564b 2701 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2702 }
774d564b 2703#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2704
2705 /*
2706 * Look for options.
2707 */
748a9306 2708 d = instr(s,"perl -");
84e30d1a 2709 if (!d) {
748a9306 2710 d = instr(s,"perl");
84e30d1a
GS
2711#if defined(DOSISH)
2712 /* avoid getting into infinite loops when shebang
2713 * line contains "Perl" rather than "perl" */
2714 if (!d) {
2715 for (d = ipathend-4; d >= ipath; --d) {
2716 if ((*d == 'p' || *d == 'P')
2717 && !ibcmp(d, "perl", 4))
2718 {
2719 break;
2720 }
2721 }
2722 if (d < ipath)
2723 d = Nullch;
2724 }
2725#endif
2726 }
44a8e56a
PP
2727#ifdef ALTERNATE_SHEBANG
2728 /*
2729 * If the ALTERNATE_SHEBANG on this system starts with a
2730 * character that can be part of a Perl expression, then if
2731 * we see it but not "perl", we're probably looking at the
2732 * start of Perl code, not a request to hand off to some
2733 * other interpreter. Similarly, if "perl" is there, but
2734 * not in the first 'word' of the line, we assume the line
2735 * contains the start of the Perl program.
44a8e56a
PP
2736 */
2737 if (d && *s != '#') {
774d564b 2738 char *c = ipath;
44a8e56a
PP
2739 while (*c && !strchr("; \t\r\n\f\v#", *c))
2740 c++;
2741 if (c < d)
2742 d = Nullch; /* "perl" not in first word; ignore */
2743 else
2744 *s = '#'; /* Don't try to parse shebang line */
2745 }
774d564b 2746#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2747#ifndef MACOS_TRADITIONAL
748a9306 2748 if (!d &&
44a8e56a 2749 *s == '#' &&
774d564b 2750 ipathend > ipath &&
3280af22 2751 !PL_minus_c &&
748a9306 2752 !instr(s,"indir") &&
3280af22 2753 instr(PL_origargv[0],"perl"))
748a9306 2754 {
9f68db38 2755 char **newargv;
9f68db38 2756
774d564b
PP
2757 *ipathend = '\0';
2758 s = ipathend + 1;
3280af22 2759 while (s < PL_bufend && isSPACE(*s))
9f68db38 2760 s++;
3280af22
NIS
2761 if (s < PL_bufend) {
2762 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2763 newargv[1] = s;
3280af22 2764 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2765 s++;
2766 *s = '\0';
3280af22 2767 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2768 }
2769 else
3280af22 2770 newargv = PL_origargv;
774d564b 2771 newargv[0] = ipath;
b4748376 2772 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2773 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2774 }
bf4acbe4 2775#endif
748a9306 2776 if (d) {
3280af22
NIS
2777 U32 oldpdb = PL_perldb;
2778 bool oldn = PL_minus_n;
2779 bool oldp = PL_minus_p;
748a9306
LW
2780
2781 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2782 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2783
2784 if (*d++ == '-') {
8cc95fdb
PP
2785 do {
2786 if (*d == 'M' || *d == 'm') {
2787 char *m = d;
2788 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2789 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2790 (int)(d - m), m);
2791 }
2792 d = moreswitches(d);
2793 } while (d);
155aba94
GS
2794 if ((PERLDB_LINE && !oldpdb) ||
2795 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2796 /* if we have already added "LINE: while (<>) {",
2797 we must not do it again */
748a9306 2798 {
3280af22
NIS
2799 sv_setpv(PL_linestr, "");
2800 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2801 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2802 PL_last_lop = PL_last_uni = Nullch;
3280af22 2803 PL_preambled = FALSE;
84902520 2804 if (PERLDB_LINE)
3280af22 2805 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2806 goto retry;
2807 }
a0d0e21e 2808 }
79072805 2809 }
9f68db38 2810 }
79072805 2811 }
3280af22
NIS
2812 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2813 PL_bufptr = s;
2814 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2815 return yylex();
ae986130 2816 }
378cc40b 2817 goto retry;
4fdae800 2818 case '\r':
6a27c188 2819#ifdef PERL_STRICT_CR
cea2e8a9 2820 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2821 Perl_croak(aTHX_
cc507455 2822 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2823#endif
4fdae800 2824 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2825#ifdef MACOS_TRADITIONAL
2826 case '\312':
2827#endif
378cc40b
LW
2828 s++;
2829 goto retry;
378cc40b 2830 case '#':
e929a76b 2831 case '\n':
3280af22 2832 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2833 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2834 /* handle eval qq[#line 1 "foo"\n ...] */
2835 CopLINE_dec(PL_curcop);
2836 incline(s);
2837 }
3280af22 2838 d = PL_bufend;
a687059c 2839 while (s < d && *s != '\n')
378cc40b 2840 s++;
0f85fab0 2841 if (s < d)
378cc40b 2842 s++;
463ee0b2 2843 incline(s);
3280af22
NIS
2844 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2845 PL_bufptr = s;
2846 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2847 return yylex();
a687059c 2848 }
378cc40b 2849 }
a687059c 2850 else {
378cc40b 2851 *s = '\0';
3280af22 2852 PL_bufend = s;
a687059c 2853 }
378cc40b
LW
2854 goto retry;
2855 case '-':
79072805 2856 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2857 I32 ftst = 0;
2858
378cc40b 2859 s++;
3280af22 2860 PL_bufptr = s;
748a9306
LW
2861 tmp = *s++;
2862
bf4acbe4 2863 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2864 s++;
2865
2866 if (strnEQ(s,"=>",2)) {
3280af22 2867 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2868 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2869 "### Saw unary minus before =>, forcing word '%s'\n", s);
2870 } )
748a9306
LW
2871 OPERATOR('-'); /* unary minus */
2872 }
3280af22 2873 PL_last_uni = PL_oldbufptr;
748a9306 2874 switch (tmp) {
e5edeb50
JH
2875 case 'r': ftst = OP_FTEREAD; break;
2876 case 'w': ftst = OP_FTEWRITE; break;
2877 case 'x': ftst = OP_FTEEXEC; break;
2878 case 'o': ftst = OP_FTEOWNED; break;
2879 case 'R': ftst = OP_FTRREAD; break;
2880 case 'W': ftst = OP_FTRWRITE; break;
2881 case 'X': ftst = OP_FTREXEC; break;
2882 case 'O': ftst = OP_FTROWNED; break;
2883 case 'e': ftst = OP_FTIS; break;
2884 case 'z': ftst = OP_FTZERO; break;
2885 case 's': ftst = OP_FTSIZE; break;
2886 case 'f': ftst = OP_FTFILE; break;
2887 case 'd': ftst = OP_FTDIR; break;
2888 case 'l': ftst = OP_FTLINK; break;
2889 case 'p': ftst = OP_FTPIPE; break;
2890 case 'S': ftst = OP_FTSOCK; break;
2891 case 'u': ftst = OP_FTSUID; break;
2892 case 'g': ftst = OP_FTSGID; break;
2893 case 'k': ftst = OP_FTSVTX; break;
2894 case 'b': ftst = OP_FTBLK; break;
2895 case 'c': ftst = OP_FTCHR; break;
2896 case 't': ftst = OP_FTTTY; break;
2897 case 'T': ftst = OP_FTTEXT; break;
2898 case 'B': ftst = OP_FTBINARY; break;
2899 case 'M': case 'A': case 'C':
2900 gv_fetchpv("\024",TRUE, SVt_PV);
2901 switch (tmp) {
2902 case 'M': ftst = OP_FTMTIME; break;
2903 case 'A': ftst = OP_FTATIME; break;
2904 case 'C': ftst = OP_FTCTIME; break;
2905 default: break;
2906 }
2907 break;
378cc40b 2908 default:
378cc40b
LW
2909 break;
2910 }
e5edeb50
JH
2911 if (ftst) {
2912 PL_last_lop_op = ftst;
4e553d73 2913 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2914 "### Saw file test %c\n", (int)ftst);
e5edeb50 2915 } )
e5edeb50
JH
2916 FTST(ftst);
2917 }
2918 else {
2919 /* Assume it was a minus followed by a one-letter named
2920 * subroutine call (or a -bareword), then. */
95c31fe3 2921 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2922 "### %c looked like a file test but was not\n",
2923 (int)ftst);
95c31fe3 2924 } )
e5edeb50
JH
2925 s -= 2;
2926 }
378cc40b 2927 }
a687059c
LW
2928 tmp = *s++;
2929 if (*s == tmp) {
2930 s++;
3280af22 2931 if (PL_expect == XOPERATOR)
79072805
LW
2932 TERM(POSTDEC);
2933 else
2934 OPERATOR(PREDEC);
2935 }
2936 else if (*s == '>') {
2937 s++;
2938 s = skipspace(s);
7e2040f0 2939 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2940 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2941 TOKEN(ARROW);
79072805 2942 }
748a9306
LW
2943 else if (*s == '$')
2944 OPERATOR(ARROW);
463ee0b2 2945 else
748a9306 2946 TERM(ARROW);
a687059c 2947 }
3280af22 2948 if (PL_expect == XOPERATOR)
79072805
LW
2949 Aop(OP_SUBTRACT);
2950 else {
3280af22 2951 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2952 check_uni();
79072805 2953 OPERATOR('-'); /* unary minus */
2f3197b3 2954 }
79072805 2955
378cc40b 2956 case '+':
a687059c
LW
2957 tmp = *s++;
2958 if (*s == tmp) {
378cc40b 2959 s++;
3280af22 2960 if (PL_expect == XOPERATOR)
79072805
LW
2961 TERM(POSTINC);
2962 else
2963 OPERATOR(PREINC);
378cc40b 2964 }
3280af22 2965 if (PL_expect == XOPERATOR)
79072805
LW
2966 Aop(OP_ADD);
2967 else {
3280af22 2968 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2969 check_uni();
a687059c 2970 OPERATOR('+');
2f3197b3 2971 }
a687059c 2972
378cc40b 2973 case '*':
3280af22
NIS
2974 if (PL_expect != XOPERATOR) {
2975 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2976 PL_expect = XOPERATOR;
2977 force_ident(PL_tokenbuf, '*');
2978 if (!*PL_tokenbuf)
a0d0e21e 2979 PREREF('*');
79072805 2980 TERM('*');
a687059c 2981 }
79072805
LW
2982 s++;
2983 if (*s == '*') {
a687059c 2984 s++;
79072805 2985 PWop(OP_POW);
a687059c 2986 }
79072805
LW
2987 Mop(OP_MULTIPLY);
2988
378cc40b 2989 case '%':
3280af22 2990 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2991 ++s;
2992 Mop(OP_MODULO);
a687059c 2993 }
3280af22
NIS
2994 PL_tokenbuf[0] = '%';
2995 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2996 if (!PL_tokenbuf[1]) {
2997 if (s == PL_bufend)
bbce6d69
PP
2998 yyerror("Final % should be \\% or %name");
2999 PREREF('%');
a687059c 3000 }
3280af22 3001 PL_pending_ident = '%';
bbce6d69 3002 TERM('%');
a687059c 3003
378cc40b 3004 case '^':
79072805 3005 s++;
a0d0e21e 3006 BOop(OP_BIT_XOR);
79072805 3007 case '[':
3280af22 3008 PL_lex_brackets++;
79072805 3009 /* FALL THROUGH */
378cc40b 3010 case '~':
378cc40b 3011 case ',':
378cc40b
LW
3012 tmp = *s++;
3013 OPERATOR(tmp);
a0d0e21e
LW
3014 case ':':
3015 if (s[1] == ':') {
3016 len = 0;
3017 goto just_a_word;
3018 }
3019 s++;
09bef843
SB
3020 switch (PL_expect) {
3021 OP *attrs;
3022 case XOPERATOR:
3023 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3024 break;
3025 PL_bufptr = s; /* update in case we back off */
3026 goto grabattrs;
3027 case XATTRBLOCK:
3028 PL_expect = XBLOCK;
3029 goto grabattrs;
3030 case XATTRTERM:
3031 PL_expect = XTERMBLOCK;
3032 grabattrs:
3033 s = skipspace(s);
3034 attrs = Nullop;
7e2040f0 3035 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3036 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3037 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3038 if (tmp < 0) tmp = -tmp;
3039 switch (tmp) {
3040 case KEY_or:
3041 case KEY_and:
3042 case KEY_for:
3043 case KEY_unless:
3044 case KEY_if:
3045 case KEY_while:
3046 case KEY_until:
3047 goto got_attrs;
3048 default:
3049 break;
3050 }
3051 }
09bef843
SB
3052 if (*d == '(') {
3053 d = scan_str(d,TRUE,TRUE);
3054 if (!d) {
09bef843
SB
3055 /* MUST advance bufptr here to avoid bogus
3056 "at end of line" context messages from yyerror().
3057 */
3058 PL_bufptr = s + len;
3059 yyerror("Unterminated attribute parameter in attribute list");
3060 if (attrs)
3061 op_free(attrs);
3062 return 0; /* EOF indicator */
3063 }
3064 }
3065 if (PL_lex_stuff) {
3066 SV *sv = newSVpvn(s, len);
3067 sv_catsv(sv, PL_lex_stuff);
3068 attrs = append_elem(OP_LIST, attrs,
3069 newSVOP(OP_CONST, 0, sv));
3070 SvREFCNT_dec(PL_lex_stuff);
3071 PL_lex_stuff = Nullsv;
3072 }
3073 else {
78f9721b
SM
3074 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3075 CvLVALUE_on(PL_compcv);
3076 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3077 CvLOCKED_on(PL_compcv);
3078 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3079 CvMETHOD_on(PL_compcv);
87ecf892
DM
3080#ifdef USE_ITHREADS
3081 else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "shared", len))
3082 GvSHARED_on(cGVOPx_gv(yylval.opval));
3083#endif
78f9721b
SM
3084 /* After we've set the flags, it could be argued that
3085 we don't need to do the attributes.pm-based setting
3086 process, and shouldn't bother appending recognized
3087 flags. To experiment with that, uncomment the
3088 following "else": */
0256094b 3089 else
78f9721b
SM
3090 attrs = append_elem(OP_LIST, attrs,
3091 newSVOP(OP_CONST, 0,
3092 newSVpvn(s, len)));
09bef843
SB
3093 }
3094 s = skipspace(d);
0120eecf 3095 if (*s == ':' && s[1] != ':')
09bef843 3096 s = skipspace(s+1);
0120eecf
GS
3097 else if (s == d)
3098 break; /* require real whitespace or :'s */
09bef843 3099 }
f9829d6b
GS
3100 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3101 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3102 char q = ((*s == '\'') ? '"' : '\'');
3103 /* If here for an expression, and parsed no attrs, back off. */
3104 if (tmp == '=' && !attrs) {
3105 s = PL_bufptr;
3106 break;
3107 }
3108 /* MUST advance bufptr here to avoid bogus "at end of line"
3109 context messages from yyerror().
3110 */
3111 PL_bufptr = s;
3112 if (!*s)
3113 yyerror("Unterminated attribute list");
3114 else
3115 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3116 q, *s, q));
3117 if (attrs)
3118 op_free(attrs);
3119 OPERATOR(':');
3120 }
f9829d6b 3121 got_attrs:
09bef843
SB
3122 if (attrs) {
3123 PL_nextval[PL_nexttoke].opval = attrs;
3124 force_next(THING);
3125 }
3126 TOKEN(COLONATTR);
3127 }
a0d0e21e 3128 OPERATOR(':');
8990e307
LW
3129 case '(':
3130 s++;
3280af22
NIS
3131 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3132 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3133 else
3280af22 3134 PL_expect = XTERM;
a0d0e21e 3135 TOKEN('(');
378cc40b 3136 case ';':
f4dd75d9 3137 CLINE;
378cc40b
LW
3138 tmp = *s++;
3139 OPERATOR(tmp);
3140 case ')':
378cc40b 3141 tmp = *s++;
16d20bd9
AD
3142 s = skipspace(s);
3143 if (*s == '{')
3144 PREBLOCK(tmp);
378cc40b 3145 TERM(tmp);
79072805
LW
3146 case ']':
3147 s++;
3280af22 3148 if (PL_lex_brackets <= 0)
d98d5fff 3149 yyerror("Unmatched right square bracket");
463ee0b2 3150 else
3280af22
NIS
3151 --PL_lex_brackets;
3152 if (PL_lex_state == LEX_INTERPNORMAL) {
3153 if (PL_lex_brackets == 0) {
a0d0e21e 3154 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3155 PL_lex_state = LEX_INTERPEND;
79072805
LW
3156 }
3157 }
4633a7c4 3158 TERM(']');
79072805
LW
3159 case '{':
3160 leftbracket:
79072805 3161 s++;
3280af22
NIS
3162 if (PL_lex_brackets > 100) {
3163 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3164 if (newlb != PL_lex_brackstack) {
8990e307 3165 SAVEFREEPV(newlb);
3280af22 3166 PL_lex_brackstack = newlb;
8990e307
LW
3167 }
3168 }
3280af22 3169 switch (PL_expect) {
a0d0e21e 3170 case XTERM:
3280af22 3171 if (PL_lex_formbrack) {
a0d0e21e
LW
3172 s--;
3173 PRETERMBLOCK(DO);
3174 }
3280af22
NIS
3175 if (PL_oldoldbufptr == PL_last_lop)
3176 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3177 else
3280af22 3178 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3179 OPERATOR(HASHBRACK);
a0d0e21e 3180 case XOPERATOR:
bf4acbe4 3181 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3182 s++;
44a8e56a 3183 d = s;
3280af22
NIS
3184 PL_tokenbuf[0] = '\0';
3185 if (d < PL_bufend && *d == '-') {
3186 PL_tokenbuf[0] = '-';
44a8e56a 3187 d++;
bf4acbe4 3188 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3189 d++;
3190 }
7e2040f0 3191 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3192 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3193 FALSE, &len);
bf4acbe4 3194 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3195 d++;
3196 if (*d == '}') {
3280af22 3197 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3198 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3199 if (minus)
3200 force_next('-');
748a9306
LW
3201 }
3202 }
3203 /* FALL THROUGH */
09bef843 3204 case XATTRBLOCK:
748a9306 3205 case XBLOCK:
3280af22
NIS
3206 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3207 PL_expect = XSTATE;
a0d0e21e 3208 break;
09bef843 3209 case XATTRTERM:
a0d0e21e 3210 case XTERMBLOCK:
3280af22
NIS
3211 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3212 PL_expect = XSTATE;
a0d0e21e
LW
3213 break;
3214 default: {
3215 char *t;
3280af22
NIS
3216 if (PL_oldoldbufptr == PL_last_lop)
3217 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3218 else
3280af22 3219 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3220 s = skipspace(s);
09ecc4b6 3221 if (*s == '}')
a0d0e21e 3222 OPERATOR(HASHBRACK);
b8a4b1be
GS
3223 /* This hack serves to disambiguate a pair of curlies
3224 * as being a block or an anon hash. Normally, expectation
3225 * determines that, but in cases where we're not in a
3226 * position to expect anything in particular (like inside
3227 * eval"") we have to resolve the ambiguity. This code
3228 * covers the case where the first term in the curlies is a
3229 * quoted string. Most other cases need to be explicitly
3230 * disambiguated by prepending a `+' before the opening
3231 * curly in order to force resolution as an anon hash.
3232 *
3233 * XXX should probably propagate the outer expectation
3234 * into eval"" to rely less on this hack, but that could
3235 * potentially break current behavior of eval"".
3236 * GSAR 97-07-21
3237 */
3238 t = s;
3239 if (*s == '\'' || *s == '"' || *s == '`') {
3240 /* common case: get past first string, handling escapes */
3280af22 3241 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3242 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3243 t++;
3244 t++;
a0d0e21e 3245 }
b8a4b1be 3246 else if (*s == 'q') {
3280af22 3247 if (++t < PL_bufend
b8a4b1be 3248 && (!isALNUM(*t)
3280af22 3249 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3250 && !isALNUM(*t))))
3251 {
b8a4b1be
GS
3252 char *tmps;
3253 char open, close, term;
3254 I32 brackets = 1;
3255
3280af22 3256 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3257 t++;
3258 term = *t;
3259 open = term;
3260 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3261 term = tmps[5];
3262 close = term;
3263 if (open == close)
3280af22
NIS
3264 for (t++; t < PL_bufend; t++) {
3265 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3266 t++;
6d07e5e9 3267 else if (*t == open)
b8a4b1be
GS
3268 break;
3269 }
3270 else
3280af22
NIS
3271 for (t++; t < PL_bufend; t++) {
3272 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3273 t++;
6d07e5e9 3274 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3275 break;
3276 else if (*t == open)
3277 brackets++;
3278 }
3279 }
3280 t++;
a0d0e21e 3281 }
7e2040f0 3282 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3283 t += UTF8SKIP(t);
7e2040f0 3284 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3285 t += UTF8SKIP(t);
a0d0e21e 3286 }
3280af22 3287 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3288 t++;
b8a4b1be
GS
3289 /* if comma follows first term, call it an anon hash */
3290 /* XXX it could be a comma expression with loop modifiers */
3280af22 3291 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3292 || (*t == '=' && t[1] == '>')))
a0d0e21e 3293 OPERATOR(HASHBRACK);
3280af22 3294 if (PL_expect == XREF)
4e4e412b 3295 PL_expect = XTERM;
a0d0e21e 3296 else {
3280af22
NIS
3297 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3298 PL_expect = XSTATE;
a0d0e21e 3299 }
8990e307 3300 }
a0d0e21e 3301 break;
463ee0b2 3302 }
57843af0 3303 yylval.ival = CopLINE(PL_curcop);
79072805 3304 if (isSPACE(*s) || *s == '#')
3280af22 3305 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3306 TOKEN('{');
378cc40b 3307 case '}':
79072805
LW
3308 rightbracket:
3309 s++;
3280af22 3310 if (PL_lex_brackets <= 0)
d98d5fff 3311 yyerror("Unmatched right curly bracket");
463ee0b2 3312 else
3280af22 3313 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3314 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3315 PL_lex_formbrack = 0;
3316 if (PL_lex_state == LEX_INTERPNORMAL) {
3317 if (PL_lex_brackets == 0) {
9059aa12
LW
3318 if (PL_expect & XFAKEBRACK) {
3319 PL_expect &= XENUMMASK;
3280af22
NIS
3320 PL_lex_state = LEX_INTERPEND;
3321 PL_bufptr = s;
cea2e8a9 3322 return yylex(); /* ignore fake brackets */
79072805 3323 }
fa83b5b6 3324 if (*s == '-' && s[1] == '>')
3280af22 3325 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3326 else if (*s != '[' && *s != '{')
3280af22 3327 PL_lex_state = LEX_INTERPEND;
79072805
LW
3328 }
3329 }
9059aa12
LW
3330 if (PL_expect & XFAKEBRACK) {
3331 PL_expect &= XENUMMASK;
3280af22 3332 PL_bufptr = s;
cea2e8a9 3333 return yylex(); /* ignore fake brackets */
748a9306 3334 }
79072805
LW
3335 force_next('}');
3336 TOKEN(';');
378cc40b
LW
3337 case '&':
3338 s++;
3339 tmp = *s++;
3340 if (tmp == '&')
a0d0e21e 3341 AOPERATOR(ANDAND);
378cc40b 3342 s--;
3280af22 3343 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3344 if (ckWARN(WARN_SEMICOLON)
3345 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3346 {
57843af0 3347 CopLINE_dec(PL_curcop);
cea2e8a9 3348 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3349 CopLINE_inc(PL_curcop);
463ee0b2 3350 }
79072805 3351 BAop(OP_BIT_AND);
463ee0b2 3352 }
79072805 3353
3280af22
NIS
3354 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3355 if (*PL_tokenbuf) {
3356 PL_expect = XOPERATOR;
3357 force_ident(PL_tokenbuf, '&');
463ee0b2 3358 }
79072805
LW
3359 else
3360 PREREF('&');
c07a80fd 3361 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3362 TERM('&');
3363
378cc40b
LW
3364 case '|':
3365 s++;
3366 tmp = *s++;
3367 if (tmp == '|')
a0d0e21e 3368 AOPERATOR(OROR);
378cc40b 3369 s--;
79072805 3370 BOop(OP_BIT_OR);
378cc40b
LW
3371 case '=':
3372 s++;
3373 tmp = *s++;
3374 if (tmp == '=')
79072805
LW
3375 Eop(OP_EQ);
3376 if (tmp == '>')
3377 OPERATOR(',');
378cc40b 3378 if (tmp == '~')
79072805 3379 PMop(OP_MATCH);
599cee73 3380 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3381 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3382 s--;
3280af22
NIS
3383 if (PL_expect == XSTATE && isALPHA(tmp) &&
3384 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3385 {
3280af22
NIS
3386 if (PL_in_eval && !PL_rsfp) {
3387 d = PL_bufend;
a5f75d66
AD
3388 while (s < d) {
3389 if (*s++ == '\n') {
3390 incline(s);
3391 if (strnEQ(s,"=cut",4)) {
3392 s = strchr(s,'\n');
3393 if (s)
3394 s++;
3395 else
3396 s = d;
3397 incline(s);
3398 goto retry;
3399 }
3400 }
3401 }
3402 goto retry;
3403 }
3280af22
NIS
3404 s = PL_bufend;
3405 PL_doextract = TRUE;
a0d0e21e
LW
3406 goto retry;
3407 }
3280af22 3408 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3409 char *t;
51882d45 3410#ifdef PERL_STRICT_CR
bf4acbe4 3411 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3412#else
bf4acbe4 3413 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3414#endif
a0d0e21e
LW
3415 if (*t == '\n' || *t == '#') {
3416 s--;
3280af22 3417 PL_expect = XBLOCK;
a0d0e21e
LW
3418 goto leftbracket;
3419 }
79072805 3420 }
a0d0e21e
LW
3421 yylval.ival = 0;
3422 OPERATOR(ASSIGNOP);
378cc40b
LW
3423 case '!':
3424 s++;
3425 tmp = *s++;
3426 if (tmp == '=')
79072805 3427 Eop(OP_NE);
378cc40b 3428 if (tmp == '~')
79072805 3429 PMop(OP_NOT);
378cc40b
LW
3430 s--;
3431 OPERATOR('!');
3432 case '<':
3280af22 3433 if (PL_expect != XOPERATOR) {
93a17b20 3434 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3435 check_uni();
79072805
LW
3436 if (s[1] == '<')
3437 s = scan_heredoc(s);
3438 else
3439 s = scan_inputsymbol(s);
3440 TERM(sublex_start());
378cc40b
LW
3441 }
3442 s++;
3443 tmp = *s++;
3444 if (tmp == '<')
79072805 3445 SHop(OP_LEFT_SHIFT);
395c3793
LW
3446 if (tmp == '=') {
3447 tmp = *s++;
3448 if (tmp == '>')
79072805 3449 Eop(OP_NCMP);
395c3793 3450 s--;
79072805 3451 Rop(OP_LE);
395c3793 3452 }
378cc40b 3453 s--;
79072805 3454 Rop(OP_LT);
378cc40b
LW
3455 case '>':
3456 s++;
3457 tmp = *s++;
3458 if (tmp == '>')
79072805 3459 SHop(OP_RIGHT_SHIFT);
378cc40b 3460 if (tmp == '=')
79072805 3461 Rop(OP_GE);
378cc40b 3462 s--;
79072805 3463 Rop(OP_GT);
378cc40b
LW
3464
3465 case '$':
bbce6d69
PP
3466 CLINE;
3467
3280af22
NIS
3468 if (PL_expect == XOPERATOR) {
3469 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3470 PL_expect = XTERM;
a0d0e21e 3471 depcom();
bbce6d69 3472 return ','; /* grandfather non-comma-format format */
a0d0e21e 3473 }
8990e307 3474 }
a0d0e21e 3475
7e2040f0 3476 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3477 PL_tokenbuf[0] = '@';
376b8730
SM
3478 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3479 sizeof PL_tokenbuf - 1, FALSE);
3480 if (PL_expect == XOPERATOR)
3481 no_op("Array length", s);
3280af22 3482 if (!PL_tokenbuf[1])
a0d0e21e 3483 PREREF(DOLSHARP);
3280af22
NIS
3484 PL_expect = XOPERATOR;
3485 PL_pending_ident = '#';
463ee0b2 3486 TOKEN(DOLSHARP);
79072805 3487 }
bbce6d69 3488
3280af22 3489 PL_tokenbuf[0] = '$';
376b8730
SM
3490 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3491 sizeof PL_tokenbuf - 1, FALSE);
3492 if (PL_expect == XOPERATOR)
3493 no_op("Scalar", s);
3280af22
NIS
3494 if (!PL_tokenbuf[1]) {
3495 if (s == PL_bufend)
bbce6d69
PP
3496 yyerror("Final $ should be \\$ or $name");
3497 PREREF('$');
8990e307 3498 }
a0d0e21e 3499
bbce6d69 3500 /* This kludge not intended to be bulletproof. */
3280af22 3501 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3502 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3503 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3504 yylval.opval->op_private = OPpCONST_ARYBASE;
3505 TERM(THING);
3506 }
3507
ff68c719 3508 d = s;
69d2bceb 3509 tmp = (I32)*s;
3280af22 3510 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3511 s = skipspace(s);
3512
3280af22 3513 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3514 char *t;
3515 if (*s == '[') {
3280af22 3516 PL_tokenbuf[0] = '@';
599cee73 3517 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3518 for(t = s + 1;
7e2040f0 3519 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3520 t++) ;
a0d0e21e 3521 if (*t++ == ',') {
3280af22
NIS
3522 PL_bufptr = skipspace(PL_bufptr);
3523 while (t < PL_bufend && *t != ']')
bbce6d69 3524 t++;
cea2e8a9 3525 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3526 "Multidimensional syntax %.*s not supported",
3527 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3528 }
3529 }
bbce6d69
PP
3530 }
3531 else if (*s == '{') {
3280af22 3532 PL_tokenbuf[0] = '%';
599cee73 3533 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&