This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Prefer !UTF8_IS_INVARIANT() over UTF8_IS_CONTINUED() when that
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
bc89e66f 3 * Copyright (c) 1991-2001, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
be4731d2 40#define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || PL_hints & HINT_UTF8)
a0ed51b3 41
4e553d73 42/* In variables name $^X, these are the legal values for X.
2b92dfce
GS
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
79072805
LW
72#ifdef ff_next
73#undef ff_next
d48672a2
LW
74#endif
75
a1a0e61e 76#ifdef USE_PURE_BISON
dba4d153
JH
77# ifndef YYMAXLEVEL
78# define YYMAXLEVEL 100
79# endif
20141f0e
RI
80YYSTYPE* yylval_pointer[YYMAXLEVEL];
81int* yychar_pointer[YYMAXLEVEL];
6f202aea 82int yyactlevel = -1;
22c35a8c
GS
83# undef yylval
84# undef yychar
20141f0e
RI
85# define yylval (*yylval_pointer[yyactlevel])
86# define yychar (*yychar_pointer[yyactlevel])
87# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 88# undef yylex
dba4d153 89# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
90#endif
91
79072805 92#include "keywords.h"
fe14fcc3 93
ffb4593c
NT
94/* CLINE is a macro that ensures PL_copline has a sane value */
95
ae986130
LW
96#ifdef CLINE
97#undef CLINE
98#endif
57843af0 99#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 100
ffb4593c
NT
101/*
102 * Convenience functions to return different tokens and prime the
9cbb5ea2 103 * lexer for the next token. They all take an argument.
ffb4593c
NT
104 *
105 * TOKEN : generic token (used for '(', DOLSHARP, etc)
106 * OPERATOR : generic operator
107 * AOPERATOR : assignment operator
108 * PREBLOCK : beginning the block after an if, while, foreach, ...
109 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
110 * PREREF : *EXPR where EXPR is not a simple identifier
111 * TERM : expression term
112 * LOOPX : loop exiting command (goto, last, dump, etc)
113 * FTST : file test operator
114 * FUN0 : zero-argument function
2d2e263d 115 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
116 * BOop : bitwise or or xor
117 * BAop : bitwise and
118 * SHop : shift operator
119 * PWop : power operator
9cbb5ea2 120 * PMop : pattern-matching operator
ffb4593c
NT
121 * Aop : addition-level operator
122 * Mop : multiplication-level operator
123 * Eop : equality-testing operator
e5edeb50 124 * Rop : relational operator <= != gt
ffb4593c
NT
125 *
126 * Also see LOP and lop() below.
127 */
128
075953c3
JH
129/* Note that REPORT() and REPORT2() will be expressions that supply
130 * their own trailing comma, not suitable for statements as such. */
998054bd 131#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
132# define REPORT(x,retval) tokereport(x,s,(int)retval),
133# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 134#else
075953c3
JH
135# define REPORT(x,retval)
136# define REPORT2(x,retval)
998054bd
SC
137#endif
138
075953c3
JH
139#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
140#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
141#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
142#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
143#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
144#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
145#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
146#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
147#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
148#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
149#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
150#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
151#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
152#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
153#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
154#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
155#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
156#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
157#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
158#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 159
a687059c
LW
160/* This bit of chicanery makes a unary function followed by
161 * a parenthesis into a function with one argument, highest precedence.
162 */
2f3197b3 163#define UNI(f) return(yylval.ival = f, \
075953c3 164 REPORT("uni",f) \
3280af22
NIS
165 PL_expect = XTERM, \
166 PL_bufptr = s, \
167 PL_last_uni = PL_oldbufptr, \
168 PL_last_lop_op = f, \
a687059c
LW
169 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
170
79072805 171#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 172 REPORT("uni",f) \
3280af22
NIS
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
79072805
LW
175 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
176
9f68db38 177/* grandfather return to old style */
3280af22 178#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 179
2d00ba3b 180STATIC void
61b2116b 181S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 182{
998054bd
SC
183 SV *report;
184 DEBUG_T({
185 report = newSVpv(thing, 0);
61b2116b 186 Perl_sv_catpvf(aTHX_ report, ":line %i:%i:", CopLINE(PL_curcop), rv);
998054bd
SC
187
188 if (s - PL_bufptr > 0)
189 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
190 else {
191 if (PL_oldbufptr && *PL_oldbufptr)
192 sv_catpv(report, PL_tokenbuf);
193 }
194 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
195 })
196}
197
ffb4593c
NT
198/*
199 * S_ao
200 *
201 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
202 * into an OP_ANDASSIGN or OP_ORASSIGN
203 */
204
76e3520e 205STATIC int
cea2e8a9 206S_ao(pTHX_ int toketype)
a0d0e21e 207{
3280af22
NIS
208 if (*PL_bufptr == '=') {
209 PL_bufptr++;
a0d0e21e
LW
210 if (toketype == ANDAND)
211 yylval.ival = OP_ANDASSIGN;
212 else if (toketype == OROR)
213 yylval.ival = OP_ORASSIGN;
214 toketype = ASSIGNOP;
215 }
216 return toketype;
217}
218
ffb4593c
NT
219/*
220 * S_no_op
221 * When Perl expects an operator and finds something else, no_op
222 * prints the warning. It always prints "<something> found where
223 * operator expected. It prints "Missing semicolon on previous line?"
224 * if the surprise occurs at the start of the line. "do you need to
225 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
226 * where the compiler doesn't know if foo is a method call or a function.
227 * It prints "Missing operator before end of line" if there's nothing
228 * after the missing operator, or "... before <...>" if there is something
229 * after the missing operator.
230 */
231
76e3520e 232STATIC void
cea2e8a9 233S_no_op(pTHX_ char *what, char *s)
463ee0b2 234{
3280af22
NIS
235 char *oldbp = PL_bufptr;
236 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 237
1189a94a
GS
238 if (!s)
239 s = oldbp;
07c798fb 240 else
1189a94a 241 PL_bufptr = s;
cea2e8a9 242 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 243 if (is_first)
cea2e8a9 244 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 245 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 246 char *t;
7e2040f0 247 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 248 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 249 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 250 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 251 }
07c798fb
HS
252 else {
253 assert(s >= oldbp);
cea2e8a9 254 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 255 }
3280af22 256 PL_bufptr = oldbp;
8990e307
LW
257}
258
ffb4593c
NT
259/*
260 * S_missingterm
261 * Complain about missing quote/regexp/heredoc terminator.
262 * If it's called with (char *)NULL then it cauterizes the line buffer.
263 * If we're in a delimited string and the delimiter is a control
264 * character, it's reformatted into a two-char sequence like ^C.
265 * This is fatal.
266 */
267
76e3520e 268STATIC void
cea2e8a9 269S_missingterm(pTHX_ char *s)
8990e307
LW
270{
271 char tmpbuf[3];
272 char q;
273 if (s) {
274 char *nl = strrchr(s,'\n');
d2719217 275 if (nl)
8990e307
LW
276 *nl = '\0';
277 }
9d116dd7
JH
278 else if (
279#ifdef EBCDIC
280 iscntrl(PL_multi_close)
281#else
282 PL_multi_close < 32 || PL_multi_close == 127
283#endif
284 ) {
8990e307 285 *tmpbuf = '^';
3280af22 286 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
287 s = "\\n";
288 tmpbuf[2] = '\0';
289 s = tmpbuf;
290 }
291 else {
3280af22 292 *tmpbuf = PL_multi_close;
8990e307
LW
293 tmpbuf[1] = '\0';
294 s = tmpbuf;
295 }
296 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 297 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 298}
79072805 299
ffb4593c
NT
300/*
301 * Perl_deprecate
ffb4593c
NT
302 */
303
79072805 304void
864dbfa3 305Perl_deprecate(pTHX_ char *s)
a0d0e21e 306{
599cee73 307 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 308 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
309}
310
ffb4593c
NT
311/*
312 * depcom
9cbb5ea2 313 * Deprecate a comma-less variable list.
ffb4593c
NT
314 */
315
76e3520e 316STATIC void
cea2e8a9 317S_depcom(pTHX)
a0d0e21e
LW
318{
319 deprecate("comma-less variable list");
320}
321
ffb4593c 322/*
9cbb5ea2
GS
323 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
324 * utf16-to-utf8-reversed.
ffb4593c
NT
325 */
326
c39cd008
GS
327#ifdef PERL_CR_FILTER
328static void
329strip_return(SV *sv)
330{
331 register char *s = SvPVX(sv);
332 register char *e = s + SvCUR(sv);
333 /* outer loop optimized to do nothing if there are no CR-LFs */
334 while (s < e) {
335 if (*s++ == '\r' && *s == '\n') {
336 /* hit a CR-LF, need to copy the rest */
337 register char *d = s - 1;
338 *d++ = *s++;
339 while (s < e) {
340 if (*s == '\r' && s[1] == '\n')
341 s++;
342 *d++ = *s++;
343 }
344 SvCUR(sv) -= s - d;
345 return;
346 }
347 }
348}
a868473f 349
76e3520e 350STATIC I32
c39cd008 351S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 352{
c39cd008
GS
353 I32 count = FILTER_READ(idx+1, sv, maxlen);
354 if (count > 0 && !maxlen)
355 strip_return(sv);
356 return count;
a868473f
NIS
357}
358#endif
359
ffb4593c
NT
360/*
361 * Perl_lex_start
9cbb5ea2
GS
362 * Initialize variables. Uses the Perl save_stack to save its state (for
363 * recursive calls to the parser).
ffb4593c
NT
364 */
365
a0d0e21e 366void
864dbfa3 367Perl_lex_start(pTHX_ SV *line)
79072805 368{
8990e307
LW
369 char *s;
370 STRLEN len;
371
3280af22
NIS
372 SAVEI32(PL_lex_dojoin);
373 SAVEI32(PL_lex_brackets);
3280af22
NIS
374 SAVEI32(PL_lex_casemods);
375 SAVEI32(PL_lex_starts);
376 SAVEI32(PL_lex_state);
7766f137 377 SAVEVPTR(PL_lex_inpat);
3280af22 378 SAVEI32(PL_lex_inwhat);
18b09519
GS
379 if (PL_lex_state == LEX_KNOWNEXT) {
380 I32 toke = PL_nexttoke;
381 while (--toke >= 0) {
382 SAVEI32(PL_nexttype[toke]);
383 SAVEVPTR(PL_nextval[toke]);
384 }
385 SAVEI32(PL_nexttoke);
18b09519 386 }
57843af0 387 SAVECOPLINE(PL_curcop);
3280af22
NIS
388 SAVEPPTR(PL_bufptr);
389 SAVEPPTR(PL_bufend);
390 SAVEPPTR(PL_oldbufptr);
391 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
392 SAVEPPTR(PL_last_lop);
393 SAVEPPTR(PL_last_uni);
3280af22
NIS
394 SAVEPPTR(PL_linestart);
395 SAVESPTR(PL_linestr);
396 SAVEPPTR(PL_lex_brackstack);
397 SAVEPPTR(PL_lex_casestack);
c76ac1ee 398 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
399 SAVESPTR(PL_lex_stuff);
400 SAVEI32(PL_lex_defer);
09bef843 401 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 402 SAVESPTR(PL_lex_repl);
bebdddfc
GS
403 SAVEINT(PL_expect);
404 SAVEINT(PL_lex_expect);
3280af22
NIS
405
406 PL_lex_state = LEX_NORMAL;
407 PL_lex_defer = 0;
408 PL_expect = XSTATE;
409 PL_lex_brackets = 0;
3280af22
NIS
410 New(899, PL_lex_brackstack, 120, char);
411 New(899, PL_lex_casestack, 12, char);
412 SAVEFREEPV(PL_lex_brackstack);
413 SAVEFREEPV(PL_lex_casestack);
414 PL_lex_casemods = 0;
415 *PL_lex_casestack = '\0';
416 PL_lex_dojoin = 0;
417 PL_lex_starts = 0;
418 PL_lex_stuff = Nullsv;
419 PL_lex_repl = Nullsv;
420 PL_lex_inpat = 0;
76be56bc 421 PL_nexttoke = 0;
3280af22 422 PL_lex_inwhat = 0;
09bef843 423 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
424 PL_linestr = line;
425 if (SvREADONLY(PL_linestr))
426 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
427 s = SvPV(PL_linestr, len);
8990e307 428 if (len && s[len-1] != ';') {
3280af22
NIS
429 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
430 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
431 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 432 }
3280af22
NIS
433 SvTEMP_off(PL_linestr);
434 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
435 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 436 PL_last_lop = PL_last_uni = Nullch;
3280af22 437 SvREFCNT_dec(PL_rs);
79cb57f6 438 PL_rs = newSVpvn("\n", 1);
3280af22 439 PL_rsfp = 0;
79072805 440}
a687059c 441
ffb4593c
NT
442/*
443 * Perl_lex_end
9cbb5ea2
GS
444 * Finalizer for lexing operations. Must be called when the parser is
445 * done with the lexer.
ffb4593c
NT
446 */
447
463ee0b2 448void
864dbfa3 449Perl_lex_end(pTHX)
463ee0b2 450{
3280af22 451 PL_doextract = FALSE;
463ee0b2
LW
452}
453
ffb4593c
NT
454/*
455 * S_incline
456 * This subroutine has nothing to do with tilting, whether at windmills
457 * or pinball tables. Its name is short for "increment line". It
57843af0 458 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 459 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
460 * # line 500 "foo.pm"
461 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
462 */
463
76e3520e 464STATIC void
cea2e8a9 465S_incline(pTHX_ char *s)
463ee0b2
LW
466{
467 char *t;
468 char *n;
73659bf1 469 char *e;
463ee0b2 470 char ch;
463ee0b2 471
57843af0 472 CopLINE_inc(PL_curcop);
463ee0b2
LW
473 if (*s++ != '#')
474 return;
bf4acbe4 475 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
476 if (strnEQ(s, "line", 4))
477 s += 4;
478 else
479 return;
084592ab 480 if (SPACE_OR_TAB(*s))
73659bf1 481 s++;
4e553d73 482 else
73659bf1 483 return;
bf4acbe4 484 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
485 if (!isDIGIT(*s))
486 return;
487 n = s;
488 while (isDIGIT(*s))
489 s++;
bf4acbe4 490 while (SPACE_OR_TAB(*s))
463ee0b2 491 s++;
73659bf1 492 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 493 s++;
73659bf1
GS
494 e = t + 1;
495 }
463ee0b2 496 else {
463ee0b2 497 for (t = s; !isSPACE(*t); t++) ;
73659bf1 498 e = t;
463ee0b2 499 }
bf4acbe4 500 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
501 e++;
502 if (*e != '\n' && *e != '\0')
503 return; /* false alarm */
504
463ee0b2
LW
505 ch = *t;
506 *t = '\0';
f4dd75d9
GS
507 if (t - s > 0) {
508#ifdef USE_ITHREADS
509 Safefree(CopFILE(PL_curcop));
510#else
511 SvREFCNT_dec(CopFILEGV(PL_curcop));
512#endif
57843af0 513 CopFILE_set(PL_curcop, s);
f4dd75d9 514 }
463ee0b2 515 *t = ch;
57843af0 516 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
517}
518
ffb4593c
NT
519/*
520 * S_skipspace
521 * Called to gobble the appropriate amount and type of whitespace.
522 * Skips comments as well.
523 */
524
76e3520e 525STATIC char *
cea2e8a9 526S_skipspace(pTHX_ register char *s)
a687059c 527{
3280af22 528 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 529 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
530 s++;
531 return s;
532 }
533 for (;;) {
fd049845 534 STRLEN prevlen;
09bef843
SB
535 SSize_t oldprevlen, oldoldprevlen;
536 SSize_t oldloplen, oldunilen;
60e6418e
GS
537 while (s < PL_bufend && isSPACE(*s)) {
538 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
539 incline(s);
540 }
ffb4593c
NT
541
542 /* comment */
3280af22
NIS
543 if (s < PL_bufend && *s == '#') {
544 while (s < PL_bufend && *s != '\n')
463ee0b2 545 s++;
60e6418e 546 if (s < PL_bufend) {
463ee0b2 547 s++;
60e6418e
GS
548 if (PL_in_eval && !PL_rsfp) {
549 incline(s);
550 continue;
551 }
552 }
463ee0b2 553 }
ffb4593c
NT
554
555 /* only continue to recharge the buffer if we're at the end
556 * of the buffer, we're not reading from a source filter, and
557 * we're in normal lexing mode
558 */
09bef843
SB
559 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
560 PL_lex_state == LEX_FORMLINE)
463ee0b2 561 return s;
ffb4593c
NT
562
563 /* try to recharge the buffer */
9cbb5ea2
GS
564 if ((s = filter_gets(PL_linestr, PL_rsfp,
565 (prevlen = SvCUR(PL_linestr)))) == Nullch)
566 {
567 /* end of file. Add on the -p or -n magic */
3280af22
NIS
568 if (PL_minus_n || PL_minus_p) {
569 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
570 ";}continue{print or die qq(-p destination: $!\\n)" :
571 "");
3280af22
NIS
572 sv_catpv(PL_linestr,";}");
573 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
574 }
575 else
3280af22 576 sv_setpv(PL_linestr,";");
ffb4593c
NT
577
578 /* reset variables for next time we lex */
9cbb5ea2
GS
579 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
580 = SvPVX(PL_linestr);
3280af22 581 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 582 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
583
584 /* Close the filehandle. Could be from -P preprocessor,
585 * STDIN, or a regular file. If we were reading code from
586 * STDIN (because the commandline held no -e or filename)
587 * then we don't close it, we reset it so the code can
588 * read from STDIN too.
589 */
590
3280af22
NIS
591 if (PL_preprocess && !PL_in_eval)
592 (void)PerlProc_pclose(PL_rsfp);
593 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
594 PerlIO_clearerr(PL_rsfp);
8990e307 595 else
3280af22
NIS
596 (void)PerlIO_close(PL_rsfp);
597 PL_rsfp = Nullfp;
463ee0b2
LW
598 return s;
599 }
ffb4593c
NT
600
601 /* not at end of file, so we only read another line */
09bef843
SB
602 /* make corresponding updates to old pointers, for yyerror() */
603 oldprevlen = PL_oldbufptr - PL_bufend;
604 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
605 if (PL_last_uni)
606 oldunilen = PL_last_uni - PL_bufend;
607 if (PL_last_lop)
608 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
609 PL_linestart = PL_bufptr = s + prevlen;
610 PL_bufend = s + SvCUR(PL_linestr);
611 s = PL_bufptr;
09bef843
SB
612 PL_oldbufptr = s + oldprevlen;
613 PL_oldoldbufptr = s + oldoldprevlen;
614 if (PL_last_uni)
615 PL_last_uni = s + oldunilen;
616 if (PL_last_lop)
617 PL_last_lop = s + oldloplen;
a0d0e21e 618 incline(s);
ffb4593c
NT
619
620 /* debugger active and we're not compiling the debugger code,
621 * so store the line into the debugger's array of lines
622 */
3280af22 623 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
624 SV *sv = NEWSV(85,0);
625
626 sv_upgrade(sv, SVt_PVMG);
3280af22 627 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 628 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 629 }
463ee0b2 630 }
a687059c 631}
378cc40b 632
ffb4593c
NT
633/*
634 * S_check_uni
635 * Check the unary operators to ensure there's no ambiguity in how they're
636 * used. An ambiguous piece of code would be:
637 * rand + 5
638 * This doesn't mean rand() + 5. Because rand() is a unary operator,
639 * the +5 is its argument.
640 */
641
76e3520e 642STATIC void
cea2e8a9 643S_check_uni(pTHX)
ba106d47 644{
2f3197b3 645 char *s;
a0d0e21e 646 char *t;
2f3197b3 647
3280af22 648 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 649 return;
3280af22
NIS
650 while (isSPACE(*PL_last_uni))
651 PL_last_uni++;
7e2040f0 652 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 653 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 654 return;
0453d815 655 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 656 char ch = *s;
0453d815 657 *s = '\0';
4e553d73
NIS
658 Perl_warner(aTHX_ WARN_AMBIGUOUS,
659 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
660 PL_last_uni);
661 *s = ch;
662 }
2f3197b3
LW
663}
664
ffb4593c
NT
665/* workaround to replace the UNI() macro with a function. Only the
666 * hints/uts.sh file mentions this. Other comments elsewhere in the
667 * source indicate Microport Unix might need it too.
668 */
669
ffed7fef
LW
670#ifdef CRIPPLED_CC
671
672#undef UNI
ffed7fef 673#define UNI(f) return uni(f,s)
ffed7fef 674
76e3520e 675STATIC int
cea2e8a9 676S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
677{
678 yylval.ival = f;
3280af22
NIS
679 PL_expect = XTERM;
680 PL_bufptr = s;
8f872242
NIS
681 PL_last_uni = PL_oldbufptr;
682 PL_last_lop_op = f;
ffed7fef
LW
683 if (*s == '(')
684 return FUNC1;
685 s = skipspace(s);
686 if (*s == '(')
687 return FUNC1;
688 else
689 return UNIOP;
690}
691
a0d0e21e
LW
692#endif /* CRIPPLED_CC */
693
ffb4593c
NT
694/*
695 * LOP : macro to build a list operator. Its behaviour has been replaced
696 * with a subroutine, S_lop() for which LOP is just another name.
697 */
698
a0d0e21e
LW
699#define LOP(f,x) return lop(f,x,s)
700
ffb4593c
NT
701/*
702 * S_lop
703 * Build a list operator (or something that might be one). The rules:
704 * - if we have a next token, then it's a list operator [why?]
705 * - if the next thing is an opening paren, then it's a function
706 * - else it's a list operator
707 */
708
76e3520e 709STATIC I32
a0be28da 710S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 711{
79072805 712 yylval.ival = f;
35c8bce7 713 CLINE;
075953c3 714 REPORT("lop", f)
3280af22
NIS
715 PL_expect = x;
716 PL_bufptr = s;
717 PL_last_lop = PL_oldbufptr;
718 PL_last_lop_op = f;
719 if (PL_nexttoke)
a0d0e21e 720 return LSTOP;
79072805
LW
721 if (*s == '(')
722 return FUNC;
723 s = skipspace(s);
724 if (*s == '(')
725 return FUNC;
726 else
727 return LSTOP;
728}
729
ffb4593c
NT
730/*
731 * S_force_next
9cbb5ea2 732 * When the lexer realizes it knows the next token (for instance,
ffb4593c 733 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
734 * to know what token to return the next time the lexer is called. Caller
735 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
736 * handles the token correctly.
ffb4593c
NT
737 */
738
4e553d73 739STATIC void
cea2e8a9 740S_force_next(pTHX_ I32 type)
79072805 741{
3280af22
NIS
742 PL_nexttype[PL_nexttoke] = type;
743 PL_nexttoke++;
744 if (PL_lex_state != LEX_KNOWNEXT) {
745 PL_lex_defer = PL_lex_state;
746 PL_lex_expect = PL_expect;
747 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
748 }
749}
750
ffb4593c
NT
751/*
752 * S_force_word
753 * When the lexer knows the next thing is a word (for instance, it has
754 * just seen -> and it knows that the next char is a word char, then
755 * it calls S_force_word to stick the next word into the PL_next lookahead.
756 *
757 * Arguments:
b1b65b59 758 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
759 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
760 * int check_keyword : if true, Perl checks to make sure the word isn't
761 * a keyword (do this if the word is a label, e.g. goto FOO)
762 * int allow_pack : if true, : characters will also be allowed (require,
763 * use, etc. do this)
9cbb5ea2 764 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
765 */
766
76e3520e 767STATIC char *
cea2e8a9 768S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 769{
463ee0b2
LW
770 register char *s;
771 STRLEN len;
4e553d73 772
463ee0b2
LW
773 start = skipspace(start);
774 s = start;
7e2040f0 775 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 776 (allow_pack && *s == ':') ||
15f0808c 777 (allow_initial_tick && *s == '\'') )
a0d0e21e 778 {
3280af22
NIS
779 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
780 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
781 return start;
782 if (token == METHOD) {
783 s = skipspace(s);
784 if (*s == '(')
3280af22 785 PL_expect = XTERM;
463ee0b2 786 else {
3280af22 787 PL_expect = XOPERATOR;
463ee0b2 788 }
79072805 789 }
3280af22
NIS
790 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
791 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
792 force_next(token);
793 }
794 return s;
795}
796
ffb4593c
NT
797/*
798 * S_force_ident
9cbb5ea2 799 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
800 * text only contains the "foo" portion. The first argument is a pointer
801 * to the "foo", and the second argument is the type symbol to prefix.
802 * Forces the next token to be a "WORD".
9cbb5ea2 803 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
804 */
805
76e3520e 806STATIC void
cea2e8a9 807S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
808{
809 if (s && *s) {
11343788 810 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 811 PL_nextval[PL_nexttoke].opval = o;
79072805 812 force_next(WORD);
748a9306 813 if (kind) {
11343788 814 o->op_private = OPpCONST_ENTERED;
55497cff
PP
815 /* XXX see note in pp_entereval() for why we forgo typo
816 warnings if the symbol must be introduced in an eval.
817 GSAR 96-10-12 */
3280af22 818 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
819 kind == '$' ? SVt_PV :
820 kind == '@' ? SVt_PVAV :
821 kind == '%' ? SVt_PVHV :
822 SVt_PVGV
823 );
748a9306 824 }
79072805
LW
825 }
826}
827
1571675a
GS
828NV
829Perl_str_to_version(pTHX_ SV *sv)
830{
831 NV retval = 0.0;
832 NV nshift = 1.0;
833 STRLEN len;
834 char *start = SvPVx(sv,len);
3aa33fe5 835 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
836 char *end = start + len;
837 while (start < end) {
ba210ebe 838 STRLEN skip;
1571675a
GS
839 UV n;
840 if (utf)
9041c2e3 841 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
842 else {
843 n = *(U8*)start;
844 skip = 1;
845 }
846 retval += ((NV)n)/nshift;
847 start += skip;
848 nshift *= 1000;
849 }
850 return retval;
851}
852
4e553d73 853/*
ffb4593c
NT
854 * S_force_version
855 * Forces the next token to be a version number.
856 */
857
76e3520e 858STATIC char *
cea2e8a9 859S_force_version(pTHX_ char *s)
89bfa8cd
PP
860{
861 OP *version = Nullop;
44dcb63b 862 char *d;
89bfa8cd
PP
863
864 s = skipspace(s);
865
44dcb63b 866 d = s;
dd629d5b 867 if (*d == 'v')
44dcb63b 868 d++;
44dcb63b 869 if (isDIGIT(*d)) {
a7cb1f99 870 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 871 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 872 SV *ver;
b73d6f50 873 s = scan_num(s, &yylval);
89bfa8cd 874 version = yylval.opval;
dd629d5b
GS
875 ver = cSVOPx(version)->op_sv;
876 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 877 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
878 SvNVX(ver) = str_to_version(ver);
879 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 880 }
89bfa8cd
PP
881 }
882 }
883
884 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 885 PL_nextval[PL_nexttoke].opval = version;
4e553d73 886 force_next(WORD);
89bfa8cd
PP
887
888 return (s);
889}
890
ffb4593c
NT
891/*
892 * S_tokeq
893 * Tokenize a quoted string passed in as an SV. It finds the next
894 * chunk, up to end of string or a backslash. It may make a new
895 * SV containing that chunk (if HINT_NEW_STRING is on). It also
896 * turns \\ into \.
897 */
898
76e3520e 899STATIC SV *
cea2e8a9 900S_tokeq(pTHX_ SV *sv)
79072805
LW
901{
902 register char *s;
903 register char *send;
904 register char *d;
b3ac6de7
IZ
905 STRLEN len = 0;
906 SV *pv = sv;
79072805
LW
907
908 if (!SvLEN(sv))
b3ac6de7 909 goto finish;
79072805 910
a0d0e21e 911 s = SvPV_force(sv, len);
21a311ee 912 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 913 goto finish;
463ee0b2 914 send = s + len;
79072805
LW
915 while (s < send && *s != '\\')
916 s++;
917 if (s == send)
b3ac6de7 918 goto finish;
79072805 919 d = s;
be4731d2 920 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 921 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
922 if (SvUTF8(sv))
923 SvUTF8_on(pv);
924 }
79072805
LW
925 while (s < send) {
926 if (*s == '\\') {
a0d0e21e 927 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
928 s++; /* all that, just for this */
929 }
930 *d++ = *s++;
931 }
932 *d = '\0';
463ee0b2 933 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 934 finish:
3280af22 935 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 936 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
937 return sv;
938}
939
ffb4593c
NT
940/*
941 * Now come three functions related to double-quote context,
942 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
943 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
944 * interact with PL_lex_state, and create fake ( ... ) argument lists
945 * to handle functions and concatenation.
946 * They assume that whoever calls them will be setting up a fake
947 * join call, because each subthing puts a ',' after it. This lets
948 * "lower \luPpEr"
949 * become
950 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
951 *
952 * (I'm not sure whether the spurious commas at the end of lcfirst's
953 * arguments and join's arguments are created or not).
954 */
955
956/*
957 * S_sublex_start
958 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
959 *
960 * Pattern matching will set PL_lex_op to the pattern-matching op to
961 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
962 *
963 * OP_CONST and OP_READLINE are easy--just make the new op and return.
964 *
965 * Everything else becomes a FUNC.
966 *
967 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
968 * had an OP_CONST or OP_READLINE). This just sets us up for a
969 * call to S_sublex_push().
970 */
971
76e3520e 972STATIC I32
cea2e8a9 973S_sublex_start(pTHX)
79072805
LW
974{
975 register I32 op_type = yylval.ival;
79072805
LW
976
977 if (op_type == OP_NULL) {
3280af22
NIS
978 yylval.opval = PL_lex_op;
979 PL_lex_op = Nullop;
79072805
LW
980 return THING;
981 }
982 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 983 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
984
985 if (SvTYPE(sv) == SVt_PVIV) {
986 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
987 STRLEN len;
988 char *p;
989 SV *nsv;
990
991 p = SvPV(sv, len);
79cb57f6 992 nsv = newSVpvn(p, len);
01ec43d0
GS
993 if (SvUTF8(sv))
994 SvUTF8_on(nsv);
b3ac6de7
IZ
995 SvREFCNT_dec(sv);
996 sv = nsv;
4e553d73 997 }
b3ac6de7 998 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 999 PL_lex_stuff = Nullsv;
79072805
LW
1000 return THING;
1001 }
1002
3280af22
NIS
1003 PL_sublex_info.super_state = PL_lex_state;
1004 PL_sublex_info.sub_inwhat = op_type;
1005 PL_sublex_info.sub_op = PL_lex_op;
1006 PL_lex_state = LEX_INTERPPUSH;
55497cff 1007
3280af22
NIS
1008 PL_expect = XTERM;
1009 if (PL_lex_op) {
1010 yylval.opval = PL_lex_op;
1011 PL_lex_op = Nullop;
55497cff
PP
1012 return PMFUNC;
1013 }
1014 else
1015 return FUNC;
1016}
1017
ffb4593c
NT
1018/*
1019 * S_sublex_push
1020 * Create a new scope to save the lexing state. The scope will be
1021 * ended in S_sublex_done. Returns a '(', starting the function arguments
1022 * to the uc, lc, etc. found before.
1023 * Sets PL_lex_state to LEX_INTERPCONCAT.
1024 */
1025
76e3520e 1026STATIC I32
cea2e8a9 1027S_sublex_push(pTHX)
55497cff 1028{
f46d017c 1029 ENTER;
55497cff 1030
3280af22
NIS
1031 PL_lex_state = PL_sublex_info.super_state;
1032 SAVEI32(PL_lex_dojoin);
1033 SAVEI32(PL_lex_brackets);
3280af22
NIS
1034 SAVEI32(PL_lex_casemods);
1035 SAVEI32(PL_lex_starts);
1036 SAVEI32(PL_lex_state);
7766f137 1037 SAVEVPTR(PL_lex_inpat);
3280af22 1038 SAVEI32(PL_lex_inwhat);
57843af0 1039 SAVECOPLINE(PL_curcop);
3280af22
NIS
1040 SAVEPPTR(PL_bufptr);
1041 SAVEPPTR(PL_oldbufptr);
1042 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1043 SAVEPPTR(PL_last_lop);
1044 SAVEPPTR(PL_last_uni);
3280af22
NIS
1045 SAVEPPTR(PL_linestart);
1046 SAVESPTR(PL_linestr);
1047 SAVEPPTR(PL_lex_brackstack);
1048 SAVEPPTR(PL_lex_casestack);
1049
1050 PL_linestr = PL_lex_stuff;
1051 PL_lex_stuff = Nullsv;
1052
9cbb5ea2
GS
1053 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1054 = SvPVX(PL_linestr);
3280af22 1055 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1056 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1057 SAVEFREESV(PL_linestr);
1058
1059 PL_lex_dojoin = FALSE;
1060 PL_lex_brackets = 0;
3280af22
NIS
1061 New(899, PL_lex_brackstack, 120, char);
1062 New(899, PL_lex_casestack, 12, char);
1063 SAVEFREEPV(PL_lex_brackstack);
1064 SAVEFREEPV(PL_lex_casestack);
1065 PL_lex_casemods = 0;
1066 *PL_lex_casestack = '\0';
1067 PL_lex_starts = 0;
1068 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1069 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1070
1071 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1072 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1073 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1074 else
3280af22 1075 PL_lex_inpat = Nullop;
79072805 1076
55497cff 1077 return '(';
79072805
LW
1078}
1079
ffb4593c
NT
1080/*
1081 * S_sublex_done
1082 * Restores lexer state after a S_sublex_push.
1083 */
1084
76e3520e 1085STATIC I32
cea2e8a9 1086S_sublex_done(pTHX)
79072805 1087{
3280af22 1088 if (!PL_lex_starts++) {
9aa983d2
JH
1089 SV *sv = newSVpvn("",0);
1090 if (SvUTF8(PL_linestr))
1091 SvUTF8_on(sv);
3280af22 1092 PL_expect = XOPERATOR;
9aa983d2 1093 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1094 return THING;
1095 }
1096
3280af22
NIS
1097 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1098 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1099 return yylex();
79072805
LW
1100 }
1101
ffb4593c 1102 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1103 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1104 PL_linestr = PL_lex_repl;
1105 PL_lex_inpat = 0;
1106 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1107 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1108 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1109 SAVEFREESV(PL_linestr);
1110 PL_lex_dojoin = FALSE;
1111 PL_lex_brackets = 0;
3280af22
NIS
1112 PL_lex_casemods = 0;
1113 *PL_lex_casestack = '\0';
1114 PL_lex_starts = 0;
25da4f38 1115 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1116 PL_lex_state = LEX_INTERPNORMAL;
1117 PL_lex_starts++;
e9fa98b2
HS
1118 /* we don't clear PL_lex_repl here, so that we can check later
1119 whether this is an evalled subst; that means we rely on the
1120 logic to ensure sublex_done() is called again only via the
1121 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1122 }
e9fa98b2 1123 else {
3280af22 1124 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1125 PL_lex_repl = Nullsv;
1126 }
79072805 1127 return ',';
ffed7fef
LW
1128 }
1129 else {
f46d017c 1130 LEAVE;
3280af22
NIS
1131 PL_bufend = SvPVX(PL_linestr);
1132 PL_bufend += SvCUR(PL_linestr);
1133 PL_expect = XOPERATOR;
09bef843 1134 PL_sublex_info.sub_inwhat = 0;
79072805 1135 return ')';
ffed7fef
LW
1136 }
1137}
1138
02aa26ce
NT
1139/*
1140 scan_const
1141
1142 Extracts a pattern, double-quoted string, or transliteration. This
1143 is terrifying code.
1144
3280af22
NIS
1145 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1146 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1147 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1148
9b599b2a
GS
1149 Returns a pointer to the character scanned up to. Iff this is
1150 advanced from the start pointer supplied (ie if anything was
1151 successfully parsed), will leave an OP for the substring scanned
1152 in yylval. Caller must intuit reason for not parsing further
1153 by looking at the next characters herself.
1154
02aa26ce
NT
1155 In patterns:
1156 backslashes:
1157 double-quoted style: \r and \n
1158 regexp special ones: \D \s
1159 constants: \x3
1160 backrefs: \1 (deprecated in substitution replacements)
1161 case and quoting: \U \Q \E
1162 stops on @ and $, but not for $ as tail anchor
1163
1164 In transliterations:
1165 characters are VERY literal, except for - not at the start or end
1166 of the string, which indicates a range. scan_const expands the
1167 range to the full set of intermediate characters.
1168
1169 In double-quoted strings:
1170 backslashes:
1171 double-quoted style: \r and \n
1172 constants: \x3
1173 backrefs: \1 (deprecated)
1174 case and quoting: \U \Q \E
1175 stops on @ and $
1176
1177 scan_const does *not* construct ops to handle interpolated strings.
1178 It stops processing as soon as it finds an embedded $ or @ variable
1179 and leaves it to the caller to work out what's going on.
1180
1181 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1182
1183 $ in pattern could be $foo or could be tail anchor. Assumption:
1184 it's a tail anchor if $ is the last thing in the string, or if it's
1185 followed by one of ")| \n\t"
1186
1187 \1 (backreferences) are turned into $1
1188
1189 The structure of the code is
1190 while (there's a character to process) {
1191 handle transliteration ranges
1192 skip regexp comments
1193 skip # initiated comments in //x patterns
1194 check for embedded @foo
1195 check for embedded scalars
1196 if (backslash) {
1197 leave intact backslashes from leave (below)
1198 deprecate \1 in strings and sub replacements
1199 handle string-changing backslashes \l \U \Q \E, etc.
1200 switch (what was escaped) {
1201 handle - in a transliteration (becomes a literal -)
1202 handle \132 octal characters
1203 handle 0x15 hex characters
1204 handle \cV (control V)
1205 handle printf backslashes (\f, \r, \n, etc)
1206 } (end switch)
1207 } (end if backslash)
1208 } (end while character to read)
4e553d73 1209
02aa26ce
NT
1210*/
1211
76e3520e 1212STATIC char *
cea2e8a9 1213S_scan_const(pTHX_ char *start)
79072805 1214{
3280af22 1215 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1216 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1217 register char *s = start; /* start of the constant */
1218 register char *d = SvPVX(sv); /* destination for copies */
1219 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1220 bool didrange = FALSE; /* did we just finish a range? */
9aa983d2
JH
1221 bool has_utf8 = (PL_linestr && SvUTF8(PL_linestr));
1222 /* the constant is UTF8 */
012bcf8d
GS
1223 UV uv;
1224
ac2262e3 1225 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1226 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1227 : UTF;
89491803 1228 I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1229 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1230 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1231 : UTF;
dff6d3cd 1232 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1233 PL_lex_inpat
4a2d328f 1234 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1235 : "";
79072805
LW
1236
1237 while (s < send || dorange) {
02aa26ce 1238 /* get transliterations out of the way (they're most literal) */
3280af22 1239 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1240 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1241 if (dorange) {
1ba5c669
JH
1242 I32 i; /* current expanded character */
1243 I32 min; /* first character in range */
1244 I32 max; /* last character in range */
02aa26ce 1245
8973db79
JH
1246 if (utf) {
1247 char *c = (char*)utf8_hop((U8*)d, -1);
1248 char *e = d++;
1249 while (e-- > c)
1250 *(e + 1) = *e;
8b107d6c 1251 *c = (char)0xff;
8973db79
JH
1252 /* mark the range as done, and continue */
1253 dorange = FALSE;
1254 didrange = TRUE;
1255 continue;
1256 }
02aa26ce 1257 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1258 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1259 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1260 d -= 2; /* eat the first char and the - */
1261
8ada0baa
JH
1262 min = (U8)*d; /* first char in range */
1263 max = (U8)d[1]; /* last char in range */
1264
c2e66d9e 1265 if (min > max) {
01ec43d0 1266 Perl_croak(aTHX_
1ba5c669
JH
1267 "Invalid [] range \"%c-%c\" in transliteration operator",
1268 (char)min, (char)max);
c2e66d9e
GS
1269 }
1270
c7f1f016 1271#ifdef EBCDIC
8ada0baa
JH
1272 if ((isLOWER(min) && isLOWER(max)) ||
1273 (isUPPER(min) && isUPPER(max))) {
1274 if (isLOWER(min)) {
1275 for (i = min; i <= max; i++)
1276 if (isLOWER(i))
db42d148 1277 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1278 } else {
1279 for (i = min; i <= max; i++)
1280 if (isUPPER(i))
db42d148 1281 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1282 }
1283 }
1284 else
1285#endif
1286 for (i = min; i <= max; i++)
1287 *d++ = i;
02aa26ce
NT
1288
1289 /* mark the range as done, and continue */
79072805 1290 dorange = FALSE;
01ec43d0 1291 didrange = TRUE;
79072805 1292 continue;
4e553d73 1293 }
02aa26ce
NT
1294
1295 /* range begins (ignore - as first or last char) */
79072805 1296 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1297 if (didrange) {
1fafa243 1298 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1299 }
a0ed51b3 1300 if (utf) {
a176fa2a 1301 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1302 s++;
1303 continue;
1304 }
79072805
LW
1305 dorange = TRUE;
1306 s++;
01ec43d0
GS
1307 }
1308 else {
1309 didrange = FALSE;
1310 }
79072805 1311 }
02aa26ce
NT
1312
1313 /* if we get here, we're not doing a transliteration */
1314
0f5d15d6
IZ
1315 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1316 except for the last char, which will be done separately. */
3280af22 1317 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1318 if (s[2] == '#') {
1319 while (s < send && *s != ')')
db42d148 1320 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1321 }
1322 else if (s[2] == '{' /* This should match regcomp.c */
1323 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1324 {
cc6b7395 1325 I32 count = 1;
0f5d15d6 1326 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1327 char c;
1328
d9f97599
GS
1329 while (count && (c = *regparse)) {
1330 if (c == '\\' && regparse[1])
1331 regparse++;
4e553d73 1332 else if (c == '{')
cc6b7395 1333 count++;
4e553d73 1334 else if (c == '}')
cc6b7395 1335 count--;
d9f97599 1336 regparse++;
cc6b7395 1337 }
5bdf89e7
IZ
1338 if (*regparse != ')') {
1339 regparse--; /* Leave one char for continuation. */
cc6b7395 1340 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1341 }
0f5d15d6 1342 while (s < regparse)
db42d148 1343 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1344 }
748a9306 1345 }
02aa26ce
NT
1346
1347 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1348 else if (*s == '#' && PL_lex_inpat &&
1349 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1350 while (s+1 < send && *s != '\n')
db42d148 1351 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1352 }
02aa26ce 1353
5d1d4326
JH
1354 /* check for embedded arrays
1355 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1356 */
7e2040f0 1357 else if (*s == '@' && s[1]
5d1d4326 1358 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1359 break;
02aa26ce
NT
1360
1361 /* check for embedded scalars. only stop if we're sure it's a
1362 variable.
1363 */
79072805 1364 else if (*s == '$') {
3280af22 1365 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1366 break;
c277df42 1367 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1368 break; /* in regexp, $ might be tail anchor */
1369 }
02aa26ce
NT
1370
1371 /* backslashes */
79072805
LW
1372 if (*s == '\\' && s+1 < send) {
1373 s++;
02aa26ce
NT
1374
1375 /* some backslashes we leave behind */
c9f97d15 1376 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1377 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1378 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1379 continue;
1380 }
02aa26ce
NT
1381
1382 /* deprecate \1 in strings and substitution replacements */
3280af22 1383 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1384 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1385 {
599cee73 1386 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1387 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1388 *--s = '$';
1389 break;
1390 }
02aa26ce
NT
1391
1392 /* string-change backslash escapes */
3280af22 1393 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1394 --s;
1395 break;
1396 }
02aa26ce
NT
1397
1398 /* if we get here, it's either a quoted -, or a digit */
79072805 1399 switch (*s) {
02aa26ce
NT
1400
1401 /* quoted - in transliterations */
79072805 1402 case '-':
3280af22 1403 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1404 *d++ = *s++;
1405 continue;
1406 }
1407 /* FALL THROUGH */
1408 default:
11b8faa4 1409 {
7e84c16c 1410 if (ckWARN(WARN_MISC) && isALNUM(*s))
4e553d73 1411 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1412 "Unrecognized escape \\%c passed through",
1413 *s);
1414 /* default action is to copy the quoted character */
f9a63242 1415 goto default_action;
11b8faa4 1416 }
02aa26ce
NT
1417
1418 /* \132 indicates an octal constant */
79072805
LW
1419 case '0': case '1': case '2': case '3':
1420 case '4': case '5': case '6': case '7':
ba210ebe
JH
1421 {
1422 STRLEN len = 0; /* disallow underscores */
1423 uv = (UV)scan_oct(s, 3, &len);
1424 s += len;
1425 }
012bcf8d 1426 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1427
1428 /* \x24 indicates a hex constant */
79072805 1429 case 'x':
a0ed51b3
LW
1430 ++s;
1431 if (*s == '{') {
1432 char* e = strchr(s, '}');
adaeee49 1433 if (!e) {
a0ed51b3 1434 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1435 e = s;
1436 }
89491803 1437 else {
ba210ebe
JH
1438 STRLEN len = 1; /* allow underscores */
1439 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1440 }
1441 s = e + 1;
a0ed51b3
LW
1442 }
1443 else {
ba210ebe
JH
1444 {
1445 STRLEN len = 0; /* disallow underscores */
1446 uv = (UV)scan_hex(s, 2, &len);
1447 s += len;
1448 }
012bcf8d
GS
1449 }
1450
1451 NUM_ESCAPE_INSERT:
1452 /* Insert oct or hex escaped character.
301d3d20 1453 * There will always enough room in sv since such
db42d148 1454 * escapes will be longer than any UTF-8 sequence
301d3d20 1455 * they can end up as. */
ba7cea30 1456
c7f1f016
NIS
1457 /* We need to map to chars to ASCII before doing the tests
1458 to cover EBCDIC
1459 */
63cd0674 1460 if (!UTF8_IS_INVARIANT(uv)) {
9aa983d2 1461 if (!has_utf8 && uv > 255) {
301d3d20
JH
1462 /* Might need to recode whatever we have
1463 * accumulated so far if it contains any
1464 * hibit chars.
1465 *
1466 * (Can't we keep track of that and avoid
1467 * this rescan? --jhi)
012bcf8d 1468 */
c7f1f016 1469 int hicount = 0;
63cd0674
NIS
1470 U8 *c;
1471 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
1472 if (!UTF8_IS_INVARIANT(*c)) {
012bcf8d 1473 hicount++;
db42d148 1474 }
012bcf8d 1475 }
63cd0674 1476 if (hicount) {
db42d148
NIS
1477 STRLEN offset = d - SvPVX(sv);
1478 U8 *src, *dst;
1479 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1480 src = (U8 *)d - 1;
1481 dst = src+hicount;
1482 d += hicount;
1483 while (src >= (U8 *)SvPVX(sv)) {
63cd0674
NIS
1484 if (!UTF8_IS_INVARIANT(*src)) {
1485 U8 ch = NATIVE_TO_ASCII(*src);
db42d148
NIS
1486 *dst-- = UTF8_EIGHT_BIT_LO(ch);
1487 *dst-- = UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1488 }
1489 else {
63cd0674 1490 *dst-- = *src;
012bcf8d 1491 }
c7f1f016 1492 src--;
012bcf8d
GS
1493 }
1494 }
1495 }
1496
9aa983d2 1497 if (has_utf8 || uv > 255) {
9041c2e3 1498 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1499 has_utf8 = TRUE;
f9a63242
JH
1500 if (PL_lex_inwhat == OP_TRANS &&
1501 PL_sublex_info.sub_op) {
1502 PL_sublex_info.sub_op->op_private |=
1503 (PL_lex_repl ? OPpTRANS_FROM_UTF
1504 : OPpTRANS_TO_UTF);
1505 utf = TRUE;
1506 }
012bcf8d 1507 }
a0ed51b3 1508 else {
012bcf8d 1509 *d++ = (char)uv;
a0ed51b3 1510 }
012bcf8d
GS
1511 }
1512 else {
db42d148 1513 *d++ = NATIVE_TO_NEED(has_utf8,uv);
a0ed51b3 1514 }
79072805 1515 continue;
02aa26ce 1516
4a2d328f
IZ
1517 /* \N{latin small letter a} is a named character */
1518 case 'N':
55eda711 1519 ++s;
423cee85
JH
1520 if (*s == '{') {
1521 char* e = strchr(s, '}');
155aba94 1522 SV *res;
423cee85
JH
1523 STRLEN len;
1524 char *str;
4e553d73 1525
423cee85 1526 if (!e) {
5777a3f7 1527 yyerror("Missing right brace on \\N{}");
423cee85
JH
1528 e = s - 1;
1529 goto cont_scan;
1530 }
55eda711
JH
1531 res = newSVpvn(s + 1, e - s - 1);
1532 res = new_constant( Nullch, 0, "charnames",
1533 res, Nullsv, "\\N{...}" );
f9a63242
JH
1534 if (has_utf8)
1535 sv_utf8_upgrade(res);
423cee85 1536 str = SvPV(res,len);
89491803 1537 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1538 char *ostart = SvPVX(sv);
1539 SvCUR_set(sv, d - ostart);
1540 SvPOK_on(sv);
e4f3eed8 1541 *d = '\0';
f08d6ad9 1542 sv_utf8_upgrade(sv);
d2f449dd
SB
1543 /* this just broke our allocation above... */
1544 SvGROW(sv, send - start);
f08d6ad9 1545 d = SvPVX(sv) + SvCUR(sv);
89491803 1546 has_utf8 = TRUE;
f08d6ad9 1547 }
423cee85
JH
1548 if (len > e - s + 4) {
1549 char *odest = SvPVX(sv);
1550
8973db79 1551 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1552 d = SvPVX(sv) + (d - odest);
1553 }
1554 Copy(str, d, len, char);
1555 d += len;
1556 SvREFCNT_dec(res);
1557 cont_scan:
1558 s = e + 1;
1559 }
1560 else
5777a3f7 1561 yyerror("Missing braces on \\N{}");
423cee85
JH
1562 continue;
1563
02aa26ce 1564 /* \c is a control character */
79072805
LW
1565 case 'c':
1566 s++;
ba210ebe
JH
1567 {
1568 U8 c = *s++;
c7f1f016
NIS
1569#ifdef EBCDIC
1570 if (isLOWER(c))
1571 c = toUPPER(c);
1572#endif
db42d148 1573 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1574 }
79072805 1575 continue;
02aa26ce
NT
1576
1577 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1578 case 'b':
db42d148 1579 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1580 break;
1581 case 'n':
db42d148 1582 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1583 break;
1584 case 'r':
db42d148 1585 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1586 break;
1587 case 'f':
db42d148 1588 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1589 break;
1590 case 't':
db42d148 1591 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1592 break;
34a3fe2a 1593 case 'e':
db42d148 1594 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1595 break;
1596 case 'a':
db42d148 1597 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1598 break;
02aa26ce
NT
1599 } /* end switch */
1600
79072805
LW
1601 s++;
1602 continue;
02aa26ce
NT
1603 } /* end if (backslash) */
1604
f9a63242 1605 default_action:
db42d148 1606 /* The 'has_utf8' here is very dubious */
63cd0674 1607 if (!UTF8_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
a5a960be
RI
1608 STRLEN len = (STRLEN) -1;
1609 UV uv;
1610 if (this_utf8) {
9041c2e3 1611 uv = utf8n_to_uvchr((U8*)s, send - s, &len, 0);
a5a960be
RI
1612 }
1613 if (len == (STRLEN)-1) {
1614 /* Illegal UTF8 (a high-bit byte), make it valid. */
1615 char *old_pvx = SvPVX(sv);
1616 /* need space for one extra char (NOTE: SvCUR() not set here) */
1617 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
9041c2e3 1618 d = (char*)uvchr_to_utf8((U8*)d, (U8)*s++);
a5a960be
RI
1619 }
1620 else {
1621 while (len--)
1622 *d++ = *s++;
1623 }
1624 has_utf8 = TRUE;
f9a63242
JH
1625 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1626 PL_sublex_info.sub_op->op_private |=
1627 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1628 utf = TRUE;
1629 }
a5a960be
RI
1630 continue;
1631 }
db42d148 1632 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
02aa26ce
NT
1633 } /* while loop to process each character */
1634
1635 /* terminate the string and set up the sv */
79072805 1636 *d = '\0';
463ee0b2 1637 SvCUR_set(sv, d - SvPVX(sv));
79072805 1638 SvPOK_on(sv);
89491803 1639 if (has_utf8)
7e2040f0 1640 SvUTF8_on(sv);
79072805 1641
02aa26ce 1642 /* shrink the sv if we allocated more than we used */
79072805
LW
1643 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1644 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1645 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1646 }
02aa26ce 1647
9b599b2a 1648 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1649 if (s > PL_bufptr) {
1650 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1651 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1652 sv, Nullsv,
4e553d73 1653 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1654 ? "tr"
3280af22 1655 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1656 ? "s"
1657 : "qq")));
79072805 1658 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1659 } else
8990e307 1660 SvREFCNT_dec(sv);
79072805
LW
1661 return s;
1662}
1663
ffb4593c
NT
1664/* S_intuit_more
1665 * Returns TRUE if there's more to the expression (e.g., a subscript),
1666 * FALSE otherwise.
ffb4593c
NT
1667 *
1668 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1669 *
1670 * ->[ and ->{ return TRUE
1671 * { and [ outside a pattern are always subscripts, so return TRUE
1672 * if we're outside a pattern and it's not { or [, then return FALSE
1673 * if we're in a pattern and the first char is a {
1674 * {4,5} (any digits around the comma) returns FALSE
1675 * if we're in a pattern and the first char is a [
1676 * [] returns FALSE
1677 * [SOMETHING] has a funky algorithm to decide whether it's a
1678 * character class or not. It has to deal with things like
1679 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1680 * anything else returns TRUE
1681 */
1682
9cbb5ea2
GS
1683/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1684
76e3520e 1685STATIC int
cea2e8a9 1686S_intuit_more(pTHX_ register char *s)
79072805 1687{
3280af22 1688 if (PL_lex_brackets)
79072805
LW
1689 return TRUE;
1690 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1691 return TRUE;
1692 if (*s != '{' && *s != '[')
1693 return FALSE;
3280af22 1694 if (!PL_lex_inpat)
79072805
LW
1695 return TRUE;
1696
1697 /* In a pattern, so maybe we have {n,m}. */
1698 if (*s == '{') {
1699 s++;
1700 if (!isDIGIT(*s))
1701 return TRUE;
1702 while (isDIGIT(*s))
1703 s++;
1704 if (*s == ',')
1705 s++;
1706 while (isDIGIT(*s))
1707 s++;
1708 if (*s == '}')
1709 return FALSE;
1710 return TRUE;
1711
1712 }
1713
1714 /* On the other hand, maybe we have a character class */
1715
1716 s++;
1717 if (*s == ']' || *s == '^')
1718 return FALSE;
1719 else {
ffb4593c 1720 /* this is terrifying, and it works */
79072805
LW
1721 int weight = 2; /* let's weigh the evidence */
1722 char seen[256];
f27ffc4a 1723 unsigned char un_char = 255, last_un_char;
93a17b20 1724 char *send = strchr(s,']');
3280af22 1725 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1726
1727 if (!send) /* has to be an expression */
1728 return TRUE;
1729
1730 Zero(seen,256,char);
1731 if (*s == '$')
1732 weight -= 3;
1733 else if (isDIGIT(*s)) {
1734 if (s[1] != ']') {
1735 if (isDIGIT(s[1]) && s[2] == ']')
1736 weight -= 10;
1737 }
1738 else
1739 weight -= 100;
1740 }
1741 for (; s < send; s++) {
1742 last_un_char = un_char;
1743 un_char = (unsigned char)*s;
1744 switch (*s) {
1745 case '@':
1746 case '&':
1747 case '$':
1748 weight -= seen[un_char] * 10;
7e2040f0 1749 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1750 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1751 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1752 weight -= 100;
1753 else
1754 weight -= 10;
1755 }
1756 else if (*s == '$' && s[1] &&
93a17b20
LW
1757 strchr("[#!%*<>()-=",s[1])) {
1758 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1759 weight -= 10;
1760 else
1761 weight -= 1;
1762 }
1763 break;
1764 case '\\':
1765 un_char = 254;
1766 if (s[1]) {
93a17b20 1767 if (strchr("wds]",s[1]))
79072805
LW
1768 weight += 100;
1769 else if (seen['\''] || seen['"'])
1770 weight += 1;
93a17b20 1771 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1772 weight += 40;
1773 else if (isDIGIT(s[1])) {
1774 weight += 40;
1775 while (s[1] && isDIGIT(s[1]))
1776 s++;
1777 }
1778 }
1779 else
1780 weight += 100;
1781 break;
1782 case '-':
1783 if (s[1] == '\\')
1784 weight += 50;
93a17b20 1785 if (strchr("aA01! ",last_un_char))
79072805 1786 weight += 30;
93a17b20 1787 if (strchr("zZ79~",s[1]))
79072805 1788 weight += 30;
f27ffc4a
GS
1789 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1790 weight -= 5; /* cope with negative subscript */
79072805
LW
1791 break;
1792 default:
93a17b20 1793 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1794 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1795 char *d = tmpbuf;
1796 while (isALPHA(*s))
1797 *d++ = *s++;
1798 *d = '\0';
1799 if (keyword(tmpbuf, d - tmpbuf))
1800 weight -= 150;
1801 }
1802 if (un_char == last_un_char + 1)
1803 weight += 5;
1804 weight -= seen[un_char];
1805 break;
1806 }
1807 seen[un_char]++;
1808 }
1809 if (weight >= 0) /* probably a character class */
1810 return FALSE;
1811 }
1812
1813 return TRUE;
1814}
ffed7fef 1815
ffb4593c
NT
1816/*
1817 * S_intuit_method
1818 *
1819 * Does all the checking to disambiguate
1820 * foo bar
1821 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1822 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1823 *
1824 * First argument is the stuff after the first token, e.g. "bar".
1825 *
1826 * Not a method if bar is a filehandle.
1827 * Not a method if foo is a subroutine prototyped to take a filehandle.
1828 * Not a method if it's really "Foo $bar"
1829 * Method if it's "foo $bar"
1830 * Not a method if it's really "print foo $bar"
1831 * Method if it's really "foo package::" (interpreted as package->foo)
1832 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1833 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1834 * =>
1835 */
1836
76e3520e 1837STATIC int
cea2e8a9 1838S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1839{
1840 char *s = start + (*start == '$');
3280af22 1841 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1842 STRLEN len;
1843 GV* indirgv;
1844
1845 if (gv) {
b6c543e3 1846 CV *cv;
a0d0e21e
LW
1847 if (GvIO(gv))
1848 return 0;
b6c543e3
IZ
1849 if ((cv = GvCVu(gv))) {
1850 char *proto = SvPVX(cv);
1851 if (proto) {
1852 if (*proto == ';')
1853 proto++;
1854 if (*proto == '*')
1855 return 0;
1856 }
1857 } else
a0d0e21e
LW
1858 gv = 0;
1859 }
8903cb82 1860 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1861 /* start is the beginning of the possible filehandle/object,
1862 * and s is the end of it
1863 * tmpbuf is a copy of it
1864 */
1865
a0d0e21e 1866 if (*start == '$') {
3280af22 1867 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1868 return 0;
1869 s = skipspace(s);
3280af22
NIS
1870 PL_bufptr = start;
1871 PL_expect = XREF;
a0d0e21e
LW
1872 return *s == '(' ? FUNCMETH : METHOD;
1873 }
1874 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1875 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1876 len -= 2;
1877 tmpbuf[len] = '\0';
1878 goto bare_package;
1879 }
1880 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1881 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1882 return 0;
1883 /* filehandle or package name makes it a method */
89bfa8cd 1884 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1885 s = skipspace(s);
3280af22 1886 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1887 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1888 bare_package:
3280af22 1889 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1890 newSVpvn(tmpbuf,len));
3280af22
NIS
1891 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1892 PL_expect = XTERM;
a0d0e21e 1893 force_next(WORD);
3280af22 1894 PL_bufptr = s;
a0d0e21e
LW
1895 return *s == '(' ? FUNCMETH : METHOD;
1896 }
1897 }
1898 return 0;
1899}
1900
ffb4593c
NT
1901/*
1902 * S_incl_perldb
1903 * Return a string of Perl code to load the debugger. If PERL5DB
1904 * is set, it will return the contents of that, otherwise a
1905 * compile-time require of perl5db.pl.
1906 */
1907
76e3520e 1908STATIC char*
cea2e8a9 1909S_incl_perldb(pTHX)
a0d0e21e 1910{
3280af22 1911 if (PL_perldb) {
76e3520e 1912 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1913
1914 if (pdb)
1915 return pdb;
61bb5906 1916 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1917 return "BEGIN { require 'perl5db.pl' }";
1918 }
1919 return "";
1920}
1921
1922
16d20bd9 1923/* Encoded script support. filter_add() effectively inserts a
4e553d73 1924 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1925 * Note that the filter function only applies to the current source file
1926 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1927 *
1928 * The datasv parameter (which may be NULL) can be used to pass
1929 * private data to this instance of the filter. The filter function
1930 * can recover the SV using the FILTER_DATA macro and use it to
1931 * store private buffers and state information.
1932 *
1933 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1934 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1935 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1936 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1937 * private use must be set using malloc'd pointers.
1938 */
16d20bd9
AD
1939
1940SV *
864dbfa3 1941Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1942{
f4c556ac
GS
1943 if (!funcp)
1944 return Nullsv;
1945
3280af22
NIS
1946 if (!PL_rsfp_filters)
1947 PL_rsfp_filters = newAV();
16d20bd9 1948 if (!datasv)
8c52afec 1949 datasv = NEWSV(255,0);
16d20bd9 1950 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1951 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1952 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1953 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1954 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1955 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1956 av_unshift(PL_rsfp_filters, 1);
1957 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1958 return(datasv);
1959}
4e553d73 1960
16d20bd9
AD
1961
1962/* Delete most recently added instance of this filter function. */
a0d0e21e 1963void
864dbfa3 1964Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1965{
e0c19803 1966 SV *datasv;
f4c556ac 1967 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1968 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1969 return;
1970 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1971 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1972 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1973 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1974 IoANY(datasv) = (void *)NULL;
3280af22 1975 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1976
16d20bd9
AD
1977 return;
1978 }
1979 /* we need to search for the correct entry and clear it */
cea2e8a9 1980 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1981}
1982
1983
1984/* Invoke the n'th filter function for the current rsfp. */
1985I32
864dbfa3 1986Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
1987
1988
8ac85365 1989 /* 0 = read one text line */
a0d0e21e 1990{
16d20bd9
AD
1991 filter_t funcp;
1992 SV *datasv = NULL;
e50aee73 1993
3280af22 1994 if (!PL_rsfp_filters)
16d20bd9 1995 return -1;
3280af22 1996 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1997 /* Provide a default input filter to make life easy. */
1998 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1999 DEBUG_P(PerlIO_printf(Perl_debug_log,
2000 "filter_read %d: from rsfp\n", idx));
4e553d73 2001 if (maxlen) {
16d20bd9
AD
2002 /* Want a block */
2003 int len ;
2004 int old_len = SvCUR(buf_sv) ;
2005
2006 /* ensure buf_sv is large enough */
2007 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
2008 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2009 if (PerlIO_error(PL_rsfp))
37120919
AD
2010 return -1; /* error */
2011 else
2012 return 0 ; /* end of file */
2013 }
16d20bd9
AD
2014 SvCUR_set(buf_sv, old_len + len) ;
2015 } else {
2016 /* Want a line */
3280af22
NIS
2017 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2018 if (PerlIO_error(PL_rsfp))
37120919
AD
2019 return -1; /* error */
2020 else
2021 return 0 ; /* end of file */
2022 }
16d20bd9
AD
2023 }
2024 return SvCUR(buf_sv);
2025 }
2026 /* Skip this filter slot if filter has been deleted */
3280af22 2027 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2028 DEBUG_P(PerlIO_printf(Perl_debug_log,
2029 "filter_read %d: skipped (filter deleted)\n",
2030 idx));
16d20bd9
AD
2031 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2032 }
2033 /* Get function pointer hidden within datasv */
4755096e 2034 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2035 DEBUG_P(PerlIO_printf(Perl_debug_log,
2036 "filter_read %d: via function %p (%s)\n",
2037 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2038 /* Call function. The function is expected to */
2039 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2040 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 2041 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
2042}
2043
76e3520e 2044STATIC char *
cea2e8a9 2045S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2046{
c39cd008 2047#ifdef PERL_CR_FILTER
3280af22 2048 if (!PL_rsfp_filters) {
c39cd008 2049 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2050 }
2051#endif
3280af22 2052 if (PL_rsfp_filters) {
16d20bd9 2053
55497cff
PP
2054 if (!append)
2055 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2056 if (FILTER_READ(0, sv, 0) > 0)
2057 return ( SvPVX(sv) ) ;
2058 else
2059 return Nullch ;
2060 }
9d116dd7 2061 else
fd049845 2062 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2063}
2064
01ec43d0
GS
2065STATIC HV *
2066S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2067{
2068 GV *gv;
2069
01ec43d0 2070 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2071 return PL_curstash;
2072
2073 if (len > 2 &&
2074 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2075 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2076 {
2077 return GvHV(gv); /* Foo:: */
def3634b
GS
2078 }
2079
2080 /* use constant CLASS => 'MyClass' */
2081 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2082 SV *sv;
2083 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2084 pkgname = SvPV_nolen(sv);
2085 }
2086 }
2087
2088 return gv_stashpv(pkgname, FALSE);
2089}
a0d0e21e 2090
748a9306
LW
2091#ifdef DEBUGGING
2092 static char* exp_name[] =
09bef843
SB
2093 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2094 "ATTRTERM", "TERMBLOCK"
2095 };
748a9306 2096#endif
463ee0b2 2097
02aa26ce
NT
2098/*
2099 yylex
2100
2101 Works out what to call the token just pulled out of the input
2102 stream. The yacc parser takes care of taking the ops we return and
2103 stitching them into a tree.
2104
2105 Returns:
2106 PRIVATEREF
2107
2108 Structure:
2109 if read an identifier
2110 if we're in a my declaration
2111 croak if they tried to say my($foo::bar)
2112 build the ops for a my() declaration
2113 if it's an access to a my() variable
2114 are we in a sort block?
2115 croak if my($a); $a <=> $b
2116 build ops for access to a my() variable
2117 if in a dq string, and they've said @foo and we can't find @foo
2118 croak
2119 build ops for a bareword
2120 if we already built the token before, use it.
2121*/
2122
dba4d153 2123#ifdef USE_PURE_BISON
864dbfa3 2124int
dba4d153 2125Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2126{
20141f0e
RI
2127 int r;
2128
6f202aea 2129 yyactlevel++;
20141f0e
RI
2130 yylval_pointer[yyactlevel] = lvalp;
2131 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2132 if (yyactlevel >= YYMAXLEVEL)
2133 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2134
dba4d153 2135 r = Perl_yylex(aTHX);
20141f0e 2136
d8ae6756
RI
2137 if (yyactlevel > 0)
2138 yyactlevel--;
20141f0e
RI
2139
2140 return r;
2141}
dba4d153 2142#endif
20141f0e 2143
dba4d153
JH
2144#ifdef __SC__
2145#pragma segment Perl_yylex
2146#endif
dba4d153 2147int
dba4d153 2148Perl_yylex(pTHX)
20141f0e 2149{
79072805 2150 register char *s;
378cc40b 2151 register char *d;
79072805 2152 register I32 tmp;
463ee0b2 2153 STRLEN len;
161b471a
NIS
2154 GV *gv = Nullgv;
2155 GV **gvp = 0;
aa7440fb 2156 bool bof = FALSE;
a687059c 2157
02aa26ce 2158 /* check if there's an identifier for us to look at */
3280af22 2159 if (PL_pending_ident) {
02aa26ce 2160 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2161 char pit = PL_pending_ident;
2162 PL_pending_ident = 0;
bbce6d69 2163
607df283
SC
2164 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2165 "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
2166
02aa26ce
NT
2167 /* if we're in a my(), we can't allow dynamics here.
2168 $foo'bar has already been turned into $foo::bar, so
2169 just check for colons.
2170
2171 if it's a legal name, the OP is a PADANY.
2172 */
3280af22 2173 if (PL_in_my) {
77ca0c92 2174 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2175 if (strchr(PL_tokenbuf,':'))
2176 yyerror(Perl_form(aTHX_ "No package name allowed for "
2177 "variable %s in \"our\"",
2178 PL_tokenbuf));
77ca0c92
LW
2179 tmp = pad_allocmy(PL_tokenbuf);
2180 }
2181 else {
2182 if (strchr(PL_tokenbuf,':'))
2183 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2184
77ca0c92
LW
2185 yylval.opval = newOP(OP_PADANY, 0);
2186 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2187 return PRIVATEREF;
2188 }
bbce6d69
PP
2189 }
2190
4e553d73 2191 /*
02aa26ce
NT
2192 build the ops for accesses to a my() variable.
2193
2194 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2195 then used in a comparison. This catches most, but not
2196 all cases. For instance, it catches
2197 sort { my($a); $a <=> $b }
2198 but not
2199 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2200 (although why you'd do that is anyone's guess).
2201 */
2202
3280af22 2203 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2204#ifdef USE_THREADS
54b9620d 2205 /* Check for single character per-thread SVs */
3280af22
NIS
2206 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2207 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2208 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2209 {
2faa37cc 2210 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2211 yylval.opval->op_targ = tmp;
2212 return PRIVATEREF;
2213 }
2214#endif /* USE_THREADS */
3280af22 2215 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2216 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2217 /* might be an "our" variable" */
f472eb5c 2218 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2219 /* build ops for a bareword */
f472eb5c
GS
2220 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2221 sv_catpvn(sym, "::", 2);
2222 sv_catpv(sym, PL_tokenbuf+1);
2223 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2224 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2225 gv_fetchpv(SvPVX(sym),
77ca0c92 2226 (PL_in_eval
f472eb5c
GS
2227 ? (GV_ADDMULTI | GV_ADDINEVAL)
2228 : TRUE
77ca0c92
LW
2229 ),
2230 ((PL_tokenbuf[0] == '$') ? SVt_PV
2231 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2232 : SVt_PVHV));
2233 return WORD;
2234 }
2235
02aa26ce 2236 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2237 if (PL_last_lop_op == OP_SORT &&
2238 PL_tokenbuf[0] == '$' &&
2239 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2240 && !PL_tokenbuf[2])
bbce6d69 2241 {
3280af22
NIS
2242 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2243 d < PL_bufend && *d != '\n';
a863c7d1
MB
2244 d++)
2245 {
2246 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2247 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2248 PL_tokenbuf);
a863c7d1 2249 }
bbce6d69
PP
2250 }
2251 }
bbce6d69 2252
a863c7d1
MB
2253 yylval.opval = newOP(OP_PADANY, 0);
2254 yylval.opval->op_targ = tmp;
2255 return PRIVATEREF;
2256 }
bbce6d69
PP
2257 }
2258
02aa26ce
NT
2259 /*
2260 Whine if they've said @foo in a doublequoted string,
2261 and @foo isn't a variable we can find in the symbol
2262 table.
2263 */
3280af22
NIS
2264 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2265 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2266 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2267 && ckWARN(WARN_AMBIGUOUS))
2268 {
2269 /* Downgraded from fatal to warning 20000522 mjd */
2270 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2271 "Possible unintended interpolation of %s in string",
2272 PL_tokenbuf);
2273 }
bbce6d69
PP
2274 }
2275
02aa26ce 2276 /* build ops for a bareword */
3280af22 2277 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2278 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2279 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2280 ((PL_tokenbuf[0] == '$') ? SVt_PV
2281 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2282 : SVt_PVHV));
2283 return WORD;
2284 }
2285
02aa26ce
NT
2286 /* no identifier pending identification */
2287
3280af22 2288 switch (PL_lex_state) {
79072805
LW
2289#ifdef COMMENTARY
2290 case LEX_NORMAL: /* Some compilers will produce faster */
2291 case LEX_INTERPNORMAL: /* code if we comment these out. */
2292 break;
2293#endif
2294
09bef843 2295 /* when we've already built the next token, just pull it out of the queue */
79072805 2296 case LEX_KNOWNEXT:
3280af22
NIS
2297 PL_nexttoke--;
2298 yylval = PL_nextval[PL_nexttoke];
2299 if (!PL_nexttoke) {
2300 PL_lex_state = PL_lex_defer;
2301 PL_expect = PL_lex_expect;
2302 PL_lex_defer = LEX_NORMAL;
463ee0b2 2303 }
607df283 2304 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f
RB
2305 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2306 (IV)PL_nexttype[PL_nexttoke]); })
607df283 2307
3280af22 2308 return(PL_nexttype[PL_nexttoke]);
79072805 2309
02aa26ce 2310 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2311 when we get here, PL_bufptr is at the \
02aa26ce 2312 */
79072805
LW
2313 case LEX_INTERPCASEMOD:
2314#ifdef DEBUGGING
3280af22 2315 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2316 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2317#endif
02aa26ce 2318 /* handle \E or end of string */
3280af22 2319 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2320 char oldmod;
02aa26ce
NT
2321
2322 /* if at a \E */
3280af22
NIS
2323 if (PL_lex_casemods) {
2324 oldmod = PL_lex_casestack[--PL_lex_casemods];
2325 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2326
3280af22
NIS
2327 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2328 PL_bufptr += 2;
2329 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2330 }
79072805
LW
2331 return ')';
2332 }
3280af22
NIS
2333 if (PL_bufptr != PL_bufend)
2334 PL_bufptr += 2;
2335 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2336 return yylex();
79072805
LW
2337 }
2338 else {
607df283
SC
2339 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2340 "### Saw case modifier at '%s'\n", PL_bufptr); })
3280af22 2341 s = PL_bufptr + 1;
79072805
LW
2342 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2343 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2344 if (strchr("LU", *s) &&
3280af22 2345 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2346 {
3280af22 2347 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2348 return ')';
2349 }
3280af22
NIS
2350 if (PL_lex_casemods > 10) {
2351 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2352 if (newlb != PL_lex_casestack) {
a0d0e21e 2353 SAVEFREEPV(newlb);
3280af22 2354 PL_lex_casestack = newlb;
a0d0e21e
LW
2355 }
2356 }
3280af22
NIS
2357 PL_lex_casestack[PL_lex_casemods++] = *s;
2358 PL_lex_casestack[PL_lex_casemods] = '\0';
2359 PL_lex_state = LEX_INTERPCONCAT;
2360 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2361 force_next('(');
2362 if (*s == 'l')
3280af22 2363 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2364 else if (*s == 'u')
3280af22 2365 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2366 else if (*s == 'L')
3280af22 2367 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2368 else if (*s == 'U')
3280af22 2369 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2370 else if (*s == 'Q')
3280af22 2371 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2372 else
cea2e8a9 2373 Perl_croak(aTHX_ "panic: yylex");
3280af22 2374 PL_bufptr = s + 1;
79072805 2375 force_next(FUNC);
3280af22
NIS
2376 if (PL_lex_starts) {
2377 s = PL_bufptr;
2378 PL_lex_starts = 0;
79072805
LW
2379 Aop(OP_CONCAT);
2380 }
2381 else
cea2e8a9 2382 return yylex();
79072805
LW
2383 }
2384
55497cff
PP
2385 case LEX_INTERPPUSH:
2386 return sublex_push();
2387
79072805 2388 case LEX_INTERPSTART:
3280af22 2389 if (PL_bufptr == PL_bufend)
79072805 2390 return sublex_done();
607df283
SC
2391 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2392 "### Interpolated variable at '%s'\n", PL_bufptr); })
3280af22
NIS
2393 PL_expect = XTERM;
2394 PL_lex_dojoin = (*PL_bufptr == '@');
2395 PL_lex_state = LEX_INTERPNORMAL;
2396 if (PL_lex_dojoin) {
2397 PL_nextval[PL_nexttoke].ival = 0;
79072805 2398 force_next(',');
554b3eca 2399#ifdef USE_THREADS
533c011a
NIS
2400 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2401 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2402 force_next(PRIVATEREF);
2403#else
a0d0e21e 2404 force_ident("\"", '$');
554b3eca 2405#endif /* USE_THREADS */
3280af22 2406 PL_nextval[PL_nexttoke].ival = 0;
79072805 2407 force_next('$');
3280af22 2408 PL_nextval[PL_nexttoke].ival = 0;
79072805 2409 force_next('(');
3280af22 2410 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2411 force_next(FUNC);
2412 }
3280af22
NIS
2413 if (PL_lex_starts++) {
2414 s = PL_bufptr;
79072805
LW
2415 Aop(OP_CONCAT);
2416 }
cea2e8a9 2417 return yylex();
79072805
LW
2418
2419 case LEX_INTERPENDMAYBE:
3280af22
NIS
2420 if (intuit_more(PL_bufptr)) {
2421 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2422 break;
2423 }
2424 /* FALL THROUGH */
2425
2426 case LEX_INTERPEND:
3280af22
NIS
2427 if (PL_lex_dojoin) {
2428 PL_lex_dojoin = FALSE;
2429 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2430 return ')';
2431 }
43a16006 2432 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2433 && SvEVALED(PL_lex_repl))
43a16006 2434 {
e9fa98b2 2435 if (PL_bufptr != PL_bufend)
cea2e8a9 2436 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2437 PL_lex_repl = Nullsv;
2438 }
79072805
LW
2439 /* FALLTHROUGH */
2440 case LEX_INTERPCONCAT:
2441#ifdef DEBUGGING
3280af22 2442 if (PL_lex_brackets)
cea2e8a9 2443 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2444#endif
3280af22 2445 if (PL_bufptr == PL_bufend)
79072805
LW
2446 return sublex_done();
2447
3280af22
NIS
2448 if (SvIVX(PL_linestr) == '\'') {
2449 SV *sv = newSVsv(PL_linestr);
2450 if (!PL_lex_inpat)
76e3520e 2451 sv = tokeq(sv);
3280af22 2452 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2453 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2454 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2455 s = PL_bufend;
79072805
LW
2456 }
2457 else {
3280af22 2458 s = scan_const(PL_bufptr);
79072805 2459 if (*s == '\\')
3280af22 2460 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2461 else
3280af22 2462 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2463 }
2464
3280af22
NIS
2465 if (s != PL_bufptr) {
2466 PL_nextval[PL_nexttoke] = yylval;
2467 PL_expect = XTERM;
79072805 2468 force_next(THING);
3280af22 2469 if (PL_lex_starts++)
79072805
LW
2470 Aop(OP_CONCAT);
2471 else {
3280af22 2472 PL_bufptr = s;
cea2e8a9 2473 return yylex();
79072805
LW
2474 }
2475 }
2476
cea2e8a9 2477 return yylex();
a0d0e21e 2478 case LEX_FORMLINE:
3280af22
NIS
2479 PL_lex_state = LEX_NORMAL;
2480 s = scan_formline(PL_bufptr);
2481 if (!PL_lex_formbrack)
a0d0e21e
LW
2482 goto rightbracket;
2483 OPERATOR(';');
79072805
LW
2484 }
2485
3280af22
NIS
2486 s = PL_bufptr;
2487 PL_oldoldbufptr = PL_oldbufptr;
2488 PL_oldbufptr = s;
607df283 2489 DEBUG_T( {
bf49b057
GS
2490 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2491 exp_name[PL_expect], s);
79072805 2492 } )
463ee0b2
LW
2493
2494 retry:
378cc40b
LW
2495 switch (*s) {
2496 default:
7e2040f0 2497 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2498 goto keylookup;
cea2e8a9 2499 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2500 case 4:
2501 case 26:
2502 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2503 case 0:
3280af22
NIS
2504 if (!PL_rsfp) {
2505 PL_last_uni = 0;
2506 PL_last_lop = 0;
2507 if (PL_lex_brackets)
d98d5fff 2508 yyerror("Missing right curly or square bracket");
4e553d73 2509 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2510 "### Tokener got EOF\n");
2511 } )
79072805 2512 TOKEN(0);
463ee0b2 2513 }
3280af22 2514 if (s++ < PL_bufend)
a687059c 2515 goto retry; /* ignore stray nulls */
3280af22
NIS
2516 PL_last_uni = 0;
2517 PL_last_lop = 0;
2518 if (!PL_in_eval && !PL_preambled) {
2519 PL_preambled = TRUE;
2520 sv_setpv(PL_linestr,incl_perldb());
2521 if (SvCUR(PL_linestr))
2522 sv_catpv(PL_linestr,";");
2523 if (PL_preambleav){
2524 while(AvFILLp(PL_preambleav) >= 0) {
2525 SV *tmpsv = av_shift(PL_preambleav);
2526 sv_catsv(PL_linestr, tmpsv);
2527 sv_catpv(PL_linestr, ";");
91b7def8
PP
2528 sv_free(tmpsv);
2529 }
3280af22
NIS
2530 sv_free((SV*)PL_preambleav);
2531 PL_preambleav = NULL;
91b7def8 2532 }
3280af22
NIS
2533 if (PL_minus_n || PL_minus_p) {
2534 sv_catpv(PL_linestr, "LINE: while (<>) {");
2535 if (PL_minus_l)
2536 sv_catpv(PL_linestr,"chomp;");
2537 if (PL_minus_a) {
8fd239a7
CS
2538 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2539 if (gv)
2540 GvIMPORTED_AV_on(gv);
3280af22
NIS
2541 if (PL_minus_F) {
2542 if (strchr("/'\"", *PL_splitstr)
2543 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2544 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2545 else {
2546 char delim;
2547 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2548 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2549 delim = *s;
cea2e8a9 2550 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2551 "q" + (delim == '\''), delim);
3280af22 2552 for (s = PL_splitstr; *s; s++) {
54310121 2553 if (*s == '\\')
3280af22
NIS
2554 sv_catpvn(PL_linestr, "\\", 1);
2555 sv_catpvn(PL_linestr, s, 1);
54310121 2556 }
cea2e8a9 2557 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2558 }
2304df62
AD
2559 }
2560 else
3280af22 2561 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2562 }
79072805 2563 }
3280af22
NIS
2564 sv_catpv(PL_linestr, "\n");
2565 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2566 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2567 PL_last_lop = PL_last_uni = Nullch;
3280af22 2568 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2569 SV *sv = NEWSV(85,0);
2570
2571 sv_upgrade(sv, SVt_PVMG);
3280af22 2572 sv_setsv(sv,PL_linestr);
57843af0 2573 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2574 }
79072805 2575 goto retry;
a687059c 2576 }
e929a76b 2577 do {
aa7440fb 2578 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2579 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2580 fake_eof:
2581 if (PL_rsfp) {
2582 if (PL_preprocess && !PL_in_eval)
2583 (void)PerlProc_pclose(PL_rsfp);
2584 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2585 PerlIO_clearerr(PL_rsfp);
2586 else
2587 (void)PerlIO_close(PL_rsfp);
2588 PL_rsfp = Nullfp;
2589 PL_doextract = FALSE;
2590 }
2591 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2592 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2593 sv_catpv(PL_linestr,";}");
2594 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2595 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2596 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2597 PL_minus_n = PL_minus_p = 0;
2598 goto retry;
2599 }
2600 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2601 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2602 sv_setpv(PL_linestr,"");
2603 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2604 }
2605 /* if it looks like the start of a BOM, check if it in fact is */
2606 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2607#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2608# ifdef __GNU_LIBRARY__
2609# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2610# define FTELL_FOR_PIPE_IS_BROKEN
2611# endif
e3f494f1
JH
2612# else
2613# ifdef __GLIBC__
2614# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2615# define FTELL_FOR_PIPE_IS_BROKEN
2616# endif
2617# endif
226017aa
DD
2618# endif
2619#endif
2620#ifdef FTELL_FOR_PIPE_IS_BROKEN
2621 /* This loses the possibility to detect the bof
2622 * situation on perl -P when the libc5 is being used.
2623 * Workaround? Maybe attach some extra state to PL_rsfp?
2624 */
2625 if (!PL_preprocess)
7e28d3af 2626 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2627#else
7e28d3af 2628 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2629#endif
7e28d3af 2630 if (bof) {
3280af22 2631 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2632 s = swallow_bom((U8*)s);
e929a76b 2633 }
378cc40b 2634 }
3280af22 2635 if (PL_doextract) {
a0d0e21e 2636 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2637 PL_doextract = FALSE;
a0d0e21e
LW
2638
2639 /* Incest with pod. */
2640 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2641 sv_setpv(PL_linestr, "");
2642 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2643 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2644 PL_last_lop = PL_last_uni = Nullch;
3280af22 2645 PL_doextract = FALSE;
a0d0e21e 2646 }
4e553d73 2647 }
463ee0b2 2648 incline(s);
3280af22
NIS
2649 } while (PL_doextract);
2650 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2651 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2652 SV *sv = NEWSV(85,0);
a687059c 2653
93a17b20 2654 sv_upgrade(sv, SVt_PVMG);
3280af22 2655 sv_setsv(sv,PL_linestr);
57843af0 2656 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2657 }
3280af22 2658 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2659 PL_last_lop = PL_last_uni = Nullch;
57843af0 2660 if (CopLINE(PL_curcop) == 1) {
3280af22 2661 while (s < PL_bufend && isSPACE(*s))
79072805 2662 s++;
a0d0e21e 2663 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2664 s++;
44a8e56a 2665 d = Nullch;
3280af22 2666 if (!PL_in_eval) {
44a8e56a
PP
2667 if (*s == '#' && *(s+1) == '!')
2668 d = s + 2;
2669#ifdef ALTERNATE_SHEBANG
2670 else {
2671 static char as[] = ALTERNATE_SHEBANG;
2672 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2673 d = s + (sizeof(as) - 1);
2674 }
2675#endif /* ALTERNATE_SHEBANG */
2676 }
2677 if (d) {
b8378b72 2678 char *ipath;
774d564b 2679 char *ipathend;
b8378b72 2680
774d564b 2681 while (isSPACE(*d))
b8378b72
CS
2682 d++;
2683 ipath = d;
774d564b
PP
2684 while (*d && !isSPACE(*d))
2685 d++;
2686 ipathend = d;
2687
2688#ifdef ARG_ZERO_IS_SCRIPT
2689 if (ipathend > ipath) {
2690 /*
2691 * HP-UX (at least) sets argv[0] to the script name,
2692 * which makes $^X incorrect. And Digital UNIX and Linux,
2693 * at least, set argv[0] to the basename of the Perl
2694 * interpreter. So, having found "#!", we'll set it right.
2695 */
2696 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2697 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2698 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2699 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2700 SvSETMAGIC(x);
2701 }
774d564b 2702 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2703 }
774d564b 2704#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2705
2706 /*
2707 * Look for options.
2708 */
748a9306 2709 d = instr(s,"perl -");
84e30d1a 2710 if (!d) {
748a9306 2711 d = instr(s,"perl");
84e30d1a
GS
2712#if defined(DOSISH)
2713 /* avoid getting into infinite loops when shebang
2714 * line contains "Perl" rather than "perl" */
2715 if (!d) {
2716 for (d = ipathend-4; d >= ipath; --d) {
2717 if ((*d == 'p' || *d == 'P')
2718 && !ibcmp(d, "perl", 4))
2719 {
2720 break;
2721 }
2722 }
2723 if (d < ipath)
2724 d = Nullch;
2725 }
2726#endif
2727 }
44a8e56a
PP
2728#ifdef ALTERNATE_SHEBANG
2729 /*
2730 * If the ALTERNATE_SHEBANG on this system starts with a
2731 * character that can be part of a Perl expression, then if
2732 * we see it but not "perl", we're probably looking at the
2733 * start of Perl code, not a request to hand off to some
2734 * other interpreter. Similarly, if "perl" is there, but
2735 * not in the first 'word' of the line, we assume the line
2736 * contains the start of the Perl program.
44a8e56a
PP
2737 */
2738 if (d && *s != '#') {
774d564b 2739 char *c = ipath;
44a8e56a
PP
2740 while (*c && !strchr("; \t\r\n\f\v#", *c))
2741 c++;
2742 if (c < d)
2743 d = Nullch; /* "perl" not in first word; ignore */
2744 else
2745 *s = '#'; /* Don't try to parse shebang line */
2746 }
774d564b 2747#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2748#ifndef MACOS_TRADITIONAL
748a9306 2749 if (!d &&
44a8e56a 2750 *s == '#' &&
774d564b 2751 ipathend > ipath &&
3280af22 2752 !PL_minus_c &&
748a9306 2753 !instr(s,"indir") &&
3280af22 2754 instr(PL_origargv[0],"perl"))
748a9306 2755 {
9f68db38 2756 char **newargv;
9f68db38 2757
774d564b
PP
2758 *ipathend = '\0';
2759 s = ipathend + 1;
3280af22 2760 while (s < PL_bufend && isSPACE(*s))
9f68db38 2761 s++;
3280af22
NIS
2762 if (s < PL_bufend) {
2763 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2764 newargv[1] = s;
3280af22 2765 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2766 s++;
2767 *s = '\0';
3280af22 2768 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2769 }
2770 else
3280af22 2771 newargv = PL_origargv;
774d564b 2772 newargv[0] = ipath;
b4748376 2773 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2774 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2775 }
bf4acbe4 2776#endif
748a9306 2777 if (d) {
3280af22
NIS
2778 U32 oldpdb = PL_perldb;
2779 bool oldn = PL_minus_n;
2780 bool oldp = PL_minus_p;
748a9306
LW
2781
2782 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2783 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2784
2785 if (*d++ == '-') {
8cc95fdb
PP
2786 do {
2787 if (*d == 'M' || *d == 'm') {
2788 char *m = d;
2789 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2790 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2791 (int)(d - m), m);
2792 }
2793 d = moreswitches(d);
2794 } while (d);
155aba94
GS
2795 if ((PERLDB_LINE && !oldpdb) ||
2796 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2797 /* if we have already added "LINE: while (<>) {",
2798 we must not do it again */
748a9306 2799 {
3280af22
NIS
2800 sv_setpv(PL_linestr, "");
2801 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2802 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2803 PL_last_lop = PL_last_uni = Nullch;
3280af22 2804 PL_preambled = FALSE;
84902520 2805 if (PERLDB_LINE)
3280af22 2806 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2807 goto retry;
2808 }
a0d0e21e 2809 }
79072805 2810 }
9f68db38 2811 }
79072805 2812 }
3280af22
NIS
2813 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2814 PL_bufptr = s;
2815 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2816 return yylex();
ae986130 2817 }
378cc40b 2818 goto retry;
4fdae800 2819 case '\r':
6a27c188 2820#ifdef PERL_STRICT_CR
cea2e8a9 2821 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2822 Perl_croak(aTHX_
cc507455 2823 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2824#endif
4fdae800 2825 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2826#ifdef MACOS_TRADITIONAL
2827 case '\312':
2828#endif
378cc40b
LW
2829 s++;
2830 goto retry;
378cc40b 2831 case '#':
e929a76b 2832 case '\n':
3280af22 2833 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2834 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2835 /* handle eval qq[#line 1 "foo"\n ...] */
2836 CopLINE_dec(PL_curcop);
2837 incline(s);
2838 }
3280af22 2839 d = PL_bufend;
a687059c 2840 while (s < d && *s != '\n')
378cc40b 2841 s++;
0f85fab0 2842 if (s < d)
378cc40b 2843 s++;
463ee0b2 2844 incline(s);
3280af22
NIS
2845 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2846 PL_bufptr = s;
2847 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2848 return yylex();
a687059c 2849 }
378cc40b 2850 }
a687059c 2851 else {
378cc40b 2852 *s = '\0';
3280af22 2853 PL_bufend = s;
a687059c 2854 }
378cc40b
LW
2855 goto retry;
2856 case '-':
79072805 2857 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2858 I32 ftst = 0;
2859
378cc40b 2860 s++;
3280af22 2861 PL_bufptr = s;
748a9306
LW
2862 tmp = *s++;
2863
bf4acbe4 2864 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2865 s++;
2866
2867 if (strnEQ(s,"=>",2)) {
3280af22 2868 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2869 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2870 "### Saw unary minus before =>, forcing word '%s'\n", s);
2871 } )
748a9306
LW
2872 OPERATOR('-'); /* unary minus */
2873 }
3280af22 2874 PL_last_uni = PL_oldbufptr;
748a9306 2875 switch (tmp) {
e5edeb50
JH
2876 case 'r': ftst = OP_FTEREAD; break;
2877 case 'w': ftst = OP_FTEWRITE; break;
2878 case 'x': ftst = OP_FTEEXEC; break;
2879 case 'o': ftst = OP_FTEOWNED; break;
2880 case 'R': ftst = OP_FTRREAD; break;
2881 case 'W': ftst = OP_FTRWRITE; break;
2882 case 'X': ftst = OP_FTREXEC; break;
2883 case 'O': ftst = OP_FTROWNED; break;
2884 case 'e': ftst = OP_FTIS; break;
2885 case 'z': ftst = OP_FTZERO; break;
2886 case 's': ftst = OP_FTSIZE; break;
2887 case 'f': ftst = OP_FTFILE; break;
2888 case 'd': ftst = OP_FTDIR; break;
2889 case 'l': ftst = OP_FTLINK; break;
2890 case 'p': ftst = OP_FTPIPE; break;
2891 case 'S': ftst = OP_FTSOCK; break;
2892 case 'u': ftst = OP_FTSUID; break;
2893 case 'g': ftst = OP_FTSGID; break;
2894 case 'k': ftst = OP_FTSVTX; break;
2895 case 'b': ftst = OP_FTBLK; break;
2896 case 'c': ftst = OP_FTCHR; break;
2897 case 't': ftst = OP_FTTTY; break;
2898 case 'T': ftst = OP_FTTEXT; break;
2899 case 'B': ftst = OP_FTBINARY; break;
2900 case 'M': case 'A': case 'C':
2901 gv_fetchpv("\024",TRUE, SVt_PV);
2902 switch (tmp) {
2903 case 'M': ftst = OP_FTMTIME; break;
2904 case 'A': ftst = OP_FTATIME; break;
2905 case 'C': ftst = OP_FTCTIME; break;
2906 default: break;
2907 }
2908 break;
378cc40b 2909 default:
378cc40b
LW
2910 break;
2911 }
e5edeb50
JH
2912 if (ftst) {
2913 PL_last_lop_op = ftst;
4e553d73 2914 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2915 "### Saw file test %c\n", (int)ftst);
e5edeb50 2916 } )
e5edeb50
JH
2917 FTST(ftst);
2918 }
2919 else {
2920 /* Assume it was a minus followed by a one-letter named
2921 * subroutine call (or a -bareword), then. */
95c31fe3 2922 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2923 "### %c looked like a file test but was not\n",
2924 (int)ftst);
95c31fe3 2925 } )
e5edeb50
JH
2926 s -= 2;
2927 }
378cc40b 2928 }
a687059c
LW
2929 tmp = *s++;
2930 if (*s == tmp) {
2931 s++;
3280af22 2932 if (PL_expect == XOPERATOR)
79072805
LW
2933 TERM(POSTDEC);
2934 else
2935 OPERATOR(PREDEC);
2936 }
2937 else if (*s == '>') {
2938 s++;
2939 s = skipspace(s);
7e2040f0 2940 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2941 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2942 TOKEN(ARROW);
79072805 2943 }
748a9306
LW
2944 else if (*s == '$')
2945 OPERATOR(ARROW);
463ee0b2 2946 else
748a9306 2947 TERM(ARROW);
a687059c 2948 }
3280af22 2949 if (PL_expect == XOPERATOR)
79072805
LW
2950 Aop(OP_SUBTRACT);
2951 else {
3280af22 2952 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2953 check_uni();
79072805 2954 OPERATOR('-'); /* unary minus */
2f3197b3 2955 }
79072805 2956
378cc40b 2957 case '+':
a687059c
LW
2958 tmp = *s++;
2959 if (*s == tmp) {
378cc40b 2960 s++;
3280af22 2961 if (PL_expect == XOPERATOR)
79072805
LW
2962 TERM(POSTINC);
2963 else
2964 OPERATOR(PREINC);
378cc40b 2965 }
3280af22 2966 if (PL_expect == XOPERATOR)
79072805
LW
2967 Aop(OP_ADD);
2968 else {
3280af22 2969 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2970 check_uni();
a687059c 2971 OPERATOR('+');
2f3197b3 2972 }
a687059c 2973
378cc40b 2974 case '*':
3280af22
NIS
2975 if (PL_expect != XOPERATOR) {
2976 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2977 PL_expect = XOPERATOR;
2978 force_ident(PL_tokenbuf, '*');
2979 if (!*PL_tokenbuf)
a0d0e21e 2980 PREREF('*');
79072805 2981 TERM('*');
a687059c 2982 }
79072805
LW
2983 s++;
2984 if (*s == '*') {
a687059c 2985 s++;
79072805 2986 PWop(OP_POW);
a687059c 2987 }
79072805
LW
2988 Mop(OP_MULTIPLY);
2989
378cc40b 2990 case '%':
3280af22 2991 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2992 ++s;
2993 Mop(OP_MODULO);
a687059c 2994 }
3280af22
NIS
2995 PL_tokenbuf[0] = '%';
2996 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2997 if (!PL_tokenbuf[1]) {
2998 if (s == PL_bufend)
bbce6d69
PP
2999 yyerror("Final % should be \\% or %name");
3000 PREREF('%');
a687059c 3001 }
3280af22 3002 PL_pending_ident = '%';
bbce6d69 3003 TERM('%');
a687059c 3004
378cc40b 3005 case '^':
79072805 3006 s++;
a0d0e21e 3007 BOop(OP_BIT_XOR);
79072805 3008 case '[':
3280af22 3009 PL_lex_brackets++;
79072805 3010 /* FALL THROUGH */
378cc40b 3011 case '~':
378cc40b 3012 case ',':
378cc40b
LW
3013 tmp = *s++;
3014 OPERATOR(tmp);
a0d0e21e
LW
3015 case ':':
3016 if (s[1] == ':') {
3017 len = 0;
3018 goto just_a_word;
3019 }
3020 s++;
09bef843
SB
3021 switch (PL_expect) {
3022 OP *attrs;
3023 case XOPERATOR:
3024 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3025 break;
3026 PL_bufptr = s; /* update in case we back off */
3027 goto grabattrs;
3028 case XATTRBLOCK:
3029 PL_expect = XBLOCK;
3030 goto grabattrs;
3031 case XATTRTERM:
3032 PL_expect = XTERMBLOCK;
3033 grabattrs:
3034 s = skipspace(s);
3035 attrs = Nullop;
7e2040f0 3036 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3037 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3038 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3039 if (tmp < 0) tmp = -tmp;
3040 switch (tmp) {
3041 case KEY_or:
3042 case KEY_and:
3043 case KEY_for:
3044 case KEY_unless:
3045 case KEY_if:
3046 case KEY_while:
3047 case KEY_until:
3048 goto got_attrs;
3049 default:
3050 break;
3051 }
3052 }
09bef843
SB
3053 if (*d == '(') {
3054 d = scan_str(d,TRUE,TRUE);
3055 if (!d) {
09bef843
SB
3056 /* MUST advance bufptr here to avoid bogus
3057 "at end of line" context messages from yyerror().
3058 */
3059 PL_bufptr = s + len;
3060 yyerror("Unterminated attribute parameter in attribute list");
3061 if (attrs)
3062 op_free(attrs);
3063 return 0; /* EOF indicator */
3064 }
3065 }
3066 if (PL_lex_stuff) {
3067 SV *sv = newSVpvn(s, len);
3068 sv_catsv(sv, PL_lex_stuff);
3069 attrs = append_elem(OP_LIST, attrs,
3070 newSVOP(OP_CONST, 0, sv));
3071 SvREFCNT_dec(PL_lex_stuff);
3072 PL_lex_stuff = Nullsv;
3073 }
3074 else {
78f9721b
SM
3075 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3076 CvLVALUE_on(PL_compcv);
3077 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3078 CvLOCKED_on(PL_compcv);
3079 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3080 CvMETHOD_on(PL_compcv);
87ecf892
DM
3081#ifdef USE_ITHREADS
3082 else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "shared", len))
3083 GvSHARED_on(cGVOPx_gv(yylval.opval));
3084#endif
78f9721b
SM
3085 /* After we've set the flags, it could be argued that
3086 we don't need to do the attributes.pm-based setting
3087 process, and shouldn't bother appending recognized
3088 flags. To experiment with that, uncomment the
3089 following "else": */
0256094b 3090 else
78f9721b
SM
3091 attrs = append_elem(OP_LIST, attrs,
3092 newSVOP(OP_CONST, 0,
3093 newSVpvn(s, len)));
09bef843
SB
3094 }
3095 s = skipspace(d);
0120eecf 3096 if (*s == ':' && s[1] != ':')
09bef843 3097 s = skipspace(s+1);
0120eecf
GS
3098 else if (s == d)
3099 break; /* require real whitespace or :'s */
09bef843 3100 }
f9829d6b
GS
3101 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3102 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3103 char q = ((*s == '\'') ? '"' : '\'');
3104 /* If here for an expression, and parsed no attrs, back off. */
3105 if (tmp == '=' && !attrs) {
3106 s = PL_bufptr;
3107 break;
3108 }
3109 /* MUST advance bufptr here to avoid bogus "at end of line"
3110 context messages from yyerror().
3111 */
3112 PL_bufptr = s;
3113 if (!*s)
3114 yyerror("Unterminated attribute list");
3115 else
3116 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3117 q, *s, q));
3118 if (attrs)
3119 op_free(attrs);
3120 OPERATOR(':');
3121 }
f9829d6b 3122 got_attrs:
09bef843
SB
3123 if (attrs) {
3124 PL_nextval[PL_nexttoke].opval = attrs;
3125 force_next(THING);
3126 }
3127 TOKEN(COLONATTR);
3128 }
a0d0e21e 3129 OPERATOR(':');
8990e307
LW
3130 case '(':
3131 s++;
3280af22
NIS
3132 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3133 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3134 else
3280af22 3135 PL_expect = XTERM;
a0d0e21e 3136 TOKEN('(');
378cc40b 3137 case ';':
f4dd75d9 3138 CLINE;
378cc40b
LW
3139 tmp = *s++;
3140 OPERATOR(tmp);
3141 case ')':
378cc40b 3142 tmp = *s++;
16d20bd9
AD
3143 s = skipspace(s);
3144 if (*s == '{')
3145 PREBLOCK(tmp);
378cc40b 3146 TERM(tmp);
79072805
LW
3147 case ']':
3148 s++;
3280af22 3149 if (PL_lex_brackets <= 0)
d98d5fff 3150 yyerror("Unmatched right square bracket");
463ee0b2 3151 else
3280af22
NIS
3152 --PL_lex_brackets;
3153 if (PL_lex_state == LEX_INTERPNORMAL) {
3154 if (PL_lex_brackets == 0) {
a0d0e21e 3155 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3156 PL_lex_state = LEX_INTERPEND;
79072805
LW
3157 }
3158 }
4633a7c4 3159 TERM(']');
79072805
LW
3160 case '{':
3161 leftbracket:
79072805 3162 s++;
3280af22
NIS
3163 if (PL_lex_brackets > 100) {
3164 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3165 if (newlb != PL_lex_brackstack) {
8990e307 3166 SAVEFREEPV(newlb);
3280af22 3167 PL_lex_brackstack = newlb;
8990e307
LW
3168 }
3169 }
3280af22 3170 switch (PL_expect) {
a0d0e21e 3171 case XTERM:
3280af22 3172 if (PL_lex_formbrack) {
a0d0e21e
LW
3173 s--;
3174 PRETERMBLOCK(DO);
3175 }
3280af22
NIS
3176 if (PL_oldoldbufptr == PL_last_lop)
3177 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3178 else
3280af22 3179 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3180 OPERATOR(HASHBRACK);
a0d0e21e 3181 case XOPERATOR:
bf4acbe4 3182 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3183 s++;
44a8e56a 3184 d = s;
3280af22
NIS
3185 PL_tokenbuf[0] = '\0';
3186 if (d < PL_bufend && *d == '-') {
3187 PL_tokenbuf[0] = '-';
44a8e56a 3188 d++;
bf4acbe4 3189 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3190 d++;
3191 }
7e2040f0 3192 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3193 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3194 FALSE, &len);
bf4acbe4 3195 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3196 d++;
3197 if (*d == '}') {
3280af22 3198 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3199 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3200 if (minus)
3201 force_next('-');
748a9306
LW
3202 }
3203 }
3204 /* FALL THROUGH */
09bef843 3205 case XATTRBLOCK:
748a9306 3206 case XBLOCK:
3280af22
NIS
3207 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3208 PL_expect = XSTATE;
a0d0e21e 3209 break;
09bef843 3210 case XATTRTERM:
a0d0e21e 3211 case XTERMBLOCK:
3280af22
NIS
3212 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3213 PL_expect = XSTATE;
a0d0e21e
LW
3214 break;
3215 default: {
3216 char *t;
3280af22
NIS
3217 if (PL_oldoldbufptr == PL_last_lop)
3218 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3219 else
3280af22 3220 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3221 s = skipspace(s);
09ecc4b6 3222 if (*s == '}')
a0d0e21e 3223 OPERATOR(HASHBRACK);
b8a4b1be
GS
3224 /* This hack serves to disambiguate a pair of curlies
3225 * as being a block or an anon hash. Normally, expectation
3226 * determines that, but in cases where we're not in a
3227 * position to expect anything in particular (like inside
3228 * eval"") we have to resolve the ambiguity. This code
3229 * covers the case where the first term in the curlies is a
3230 * quoted string. Most other cases need to be explicitly
3231 * disambiguated by prepending a `+' before the opening
3232 * curly in order to force resolution as an anon hash.
3233 *
3234 * XXX should probably propagate the outer expectation
3235 * into eval"" to rely less on this hack, but that could
3236 * potentially break current behavior of eval"".
3237 * GSAR 97-07-21
3238 */
3239 t = s;
3240 if (*s == '\'' || *s == '"' || *s == '`') {
3241 /* common case: get past first string, handling escapes */
3280af22 3242 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3243 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3244 t++;
3245 t++;
a0d0e21e 3246 }
b8a4b1be 3247 else if (*s == 'q') {
3280af22 3248 if (++t < PL_bufend
b8a4b1be 3249 && (!isALNUM(*t)
3280af22 3250 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3251 && !isALNUM(*t))))
3252 {
b8a4b1be
GS
3253 char *tmps;
3254 char open, close, term;
3255 I32 brackets = 1;
3256
3280af22 3257 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3258 t++;
3259 term = *t;
3260 open = term;
3261 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3262 term = tmps[5];
3263 close = term;
3264 if (open == close)
3280af22
NIS
3265 for (t++; t < PL_bufend; t++) {
3266 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3267 t++;
6d07e5e9 3268 else if (*t == open)
b8a4b1be
GS
3269 break;
3270 }
3271 else
3280af22
NIS
3272 for (t++; t < PL_bufend; t++) {
3273 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3274 t++;
6d07e5e9 3275 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3276 break;
3277 else if (*t == open)
3278 brackets++;
3279 }
3280 }
3281 t++;
a0d0e21e 3282 }
7e2040f0 3283 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3284 t += UTF8SKIP(t);
7e2040f0 3285 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3286 t += UTF8SKIP(t);
a0d0e21e 3287 }
3280af22 3288 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3289 t++;
b8a4b1be
GS
3290 /* if comma follows first term, call it an anon hash */
3291 /* XXX it could be a comma expression with loop modifiers */
3280af22 3292 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3293 || (*t == '=' && t[1] == '>')))
a0d0e21e 3294 OPERATOR(HASHBRACK);
3280af22 3295 if (PL_expect == XREF)
4e4e412b 3296 PL_expect = XTERM;
a0d0e21e 3297 else {
3280af22
NIS
3298 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3299 PL_expect = XSTATE;
a0d0e21e 3300 }
8990e307 3301 }
a0d0e21e 3302 break;
463ee0b2 3303 }
57843af0 3304 yylval.ival = CopLINE(PL_curcop);
79072805 3305 if (isSPACE(*s) || *s == '#')
3280af22 3306 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3307 TOKEN('{');
378cc40b 3308 case '}':
79072805
LW
3309 rightbracket:
3310 s++;
3280af22 3311 if (PL_lex_brackets <= 0)
d98d5fff 3312 yyerror("Unmatched right curly bracket");
463ee0b2 3313 else
3280af22 3314 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3315 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3316 PL_lex_formbrack = 0;
3317 if (PL_lex_state == LEX_INTERPNORMAL) {
3318 if (PL_lex_brackets == 0) {
9059aa12
LW
3319 if (PL_expect & XFAKEBRACK) {
3320 PL_expect &= XENUMMASK;
3280af22
NIS
3321 PL_lex_state = LEX_INTERPEND;
3322 PL_bufptr = s;
cea2e8a9 3323 return yylex(); /* ignore fake brackets */
79072805 3324 }
fa83b5b6 3325 if (*s == '-' && s[1] == '>')
3280af22 3326 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3327 else if (*s != '[' && *s != '{')
3280af22 3328 PL_lex_state = LEX_INTERPEND;
79072805
LW
3329 }
3330 }
9059aa12
LW
3331 if (PL_expect & XFAKEBRACK) {
3332 PL_expect &= XENUMMASK;
3280af22 3333 PL_bufptr = s;
cea2e8a9 3334 return yylex(); /* ignore fake brackets */
748a9306 3335 }
79072805
LW
3336 force_next('}');
3337 TOKEN(';');
378cc40b
LW
3338 case '&':
3339 s++;
3340 tmp = *s++;
3341 if (tmp == '&')
a0d0e21e 3342 AOPERATOR(ANDAND);
378cc40b 3343 s--;
3280af22 3344 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3345 if (ckWARN(WARN_SEMICOLON)
3346 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3347 {
57843af0 3348 CopLINE_dec(PL_curcop);
cea2e8a9 3349 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3350 CopLINE_inc(PL_curcop);
463ee0b2 3351 }
79072805 3352 BAop(OP_BIT_AND);
463ee0b2 3353 }
79072805 3354
3280af22
NIS
3355 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3356 if (*PL_tokenbuf) {
3357 PL_expect = XOPERATOR;
3358 force_ident(PL_tokenbuf, '&');
463ee0b2 3359 }
79072805
LW
3360 else
3361 PREREF('&');
c07a80fd 3362 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3363 TERM('&');
3364
378cc40b
LW
3365 case '|':
3366 s++;
3367 tmp = *s++;
3368 if (tmp == '|')
a0d0e21e 3369 AOPERATOR(OROR);
378cc40b 3370 s--;
79072805 3371 BOop(OP_BIT_OR);
378cc40b
LW
3372 case '=':
3373 s++;
3374 tmp = *s++;
3375 if (tmp == '=')
79072805
LW
3376 Eop(OP_EQ);
3377 if (tmp == '>')
3378 OPERATOR(',');
378cc40b 3379 if (tmp == '~')
79072805 3380 PMop(OP_MATCH);
599cee73 3381 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3382 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3383 s--;
3280af22
NIS
3384 if (PL_expect == XSTATE && isALPHA(tmp) &&
3385 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3386 {
3280af22
NIS
3387 if (PL_in_eval && !PL_rsfp) {
3388 d = PL_bufend;
a5f75d66
AD
3389 while (s < d) {
3390 if (*s++ == '\n') {
3391 incline(s);
3392 if (strnEQ(s,"=cut",4)) {
3393 s = strchr(s,'\n');
3394 if (s)
3395 s++;
3396 else
3397 s = d;
3398 incline(s);
3399 goto retry;
3400 }
3401 }
3402 }
3403 goto retry;
3404 }
3280af22
NIS
3405 s = PL_bufend;
3406 PL_doextract = TRUE;
a0d0e21e
LW
3407 goto retry;
3408 }
3280af22 3409 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3410 char *t;
51882d45 3411#ifdef PERL_STRICT_CR
bf4acbe4 3412 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3413#else
bf4acbe4 3414 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3415#endif
a0d0e21e
LW
3416 if (*t == '\n' || *t == '#') {
3417 s--;
3280af22 3418 PL_expect = XBLOCK;
a0d0e21e
LW
3419 goto leftbracket;
3420 }
79072805 3421 }
a0d0e21e
LW
3422 yylval.ival = 0;
3423 OPERATOR(ASSIGNOP);
378cc40b
LW
3424 case '!':
3425 s++;
3426 tmp = *s++;
3427 if (tmp == '=')
79072805 3428 Eop(OP_NE);
378cc40b 3429 if (tmp == '~')
79072805 3430 PMop(OP_NOT);
378cc40b
LW
3431 s--;
3432 OPERATOR('!');
3433 case '<':
3280af22 3434 if (PL_expect != XOPERATOR) {
93a17b20 3435 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3436 check_uni();
79072805
LW
3437 if (s[1] == '<')
3438 s = scan_heredoc(s);
3439 else
3440 s = scan_inputsymbol(s);
3441 TERM(sublex_start());
378cc40b
LW
3442 }
3443 s++;
3444 tmp = *s++;
3445 if (tmp == '<')
79072805 3446 SHop(OP_LEFT_SHIFT);
395c3793
LW
3447 if (tmp == '=') {
3448 tmp = *s++;
3449 if (tmp == '>')
79072805 3450 Eop(OP_NCMP);
395c3793 3451 s--;
79072805 3452 Rop(OP_LE);
395c3793 3453 }
378cc40b 3454 s--;
79072805 3455 Rop(OP_LT);
378cc40b
LW
3456 case '>':
3457 s++;
3458 tmp = *s++;
3459 if (tmp == '>')
79072805 3460 SHop(OP_RIGHT_SHIFT);
378cc40b 3461 if (tmp == '=')
79072805 3462 Rop(OP_GE);
378cc40b 3463 s--;
79072805 3464 Rop(OP_GT);
378cc40b
LW
3465
3466 case '$':
bbce6d69
PP
3467 CLINE;
3468
3280af22
NIS
3469 if (PL_expect == XOPERATOR) {
3470 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3471 PL_expect = XTERM;
a0d0e21e 3472 depcom();
bbce6d69 3473 return ','; /* grandfather non-comma-format format */
a0d0e21e 3474 }
8990e307 3475 }
a0d0e21e 3476
7e2040f0 3477 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3478 PL_tokenbuf[0] = '@';
376b8730
SM
3479 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3480 sizeof PL_tokenbuf - 1, FALSE);
3481 if (PL_expect == XOPERATOR)
3482 no_op("Array length", s);
3280af22 3483 if (!PL_tokenbuf[1])
a0d0e21e 3484 PREREF(DOLSHARP);
3280af22
NIS
3485 PL_expect = XOPERATOR;
3486 PL_pending_ident = '#';
463ee0b2 3487 TOKEN(DOLSHARP);
79072805 3488 }
bbce6d69 3489
3280af22 3490 PL_tokenbuf[0] = '$';
376b8730
SM
3491 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3492 sizeof PL_tokenbuf - 1, FALSE);
3493 if (PL_expect == XOPERATOR)
3494 no_op("Scalar", s);
3280af22
NIS
3495 if (!PL_tokenbuf[1]) {
3496 if (s == PL_bufend)
bbce6d69
PP
3497 yyerror("Final $ should be \\$ or $name");
3498 PREREF('$');
8990e307 3499 }
a0d0e21e 3500
bbce6d69 3501 /* This kludge not intended to be bulletproof. */
3280af22 3502 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3503 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3504 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3505 yylval.opval->op_private = OPpCONST_ARYBASE;
3506 TERM(THING);
3507 }
3508
ff68c719 3509 d = s;
69d2bceb 3510 tmp = (I32)*s;
3280af22 3511 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3512 s = skipspace(s);
3513
3280af22 3514 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3515 char *t;
3516 if (*s == '[') {
3280af22 3517 PL_tokenbuf[0] = '@';
599cee73 3518 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3519 for(t = s + 1;
7e2040f0 3520 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3521 t++) ;
a0d0e21e 3522 if (*t++ == ',') {
3280af22
NIS
3523 PL_bufptr = skipspace(PL_bufptr);
3524 while (t < PL_bufend && *t != ']')
bbce6d69 3525 t++;
cea2e8a9 3526 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3527 "Multidimensional syntax %.*s not supported",
3528 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3529 }
3530 }
bbce6d69
PP
3531 }
3532 else if (*s == '{') {
3280af22 3533 PL_tokenbuf[0] = '%';
599cee73 3534 if (ckWARN(WARN_SYNTAX) &