This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
(Retracted by #11223.)
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
bc89e66f 3 * Copyright (c) 1991-2001, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
2b9d42f0
NIS
39#ifdef EBCDIC
40/* For now 'use utf8' does not affect tokenizer on EBCDIC */
41#define UTF (PL_linestr && DO_UTF8(PL_linestr))
42#else
43#define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
44#endif
a0ed51b3 45
61f0cdd9 46/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
47 * 1999-02-27 mjd-perl-patch@plover.com */
48#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
49
bf4acbe4
GS
50/* On MacOS, respect nonbreaking spaces */
51#ifdef MACOS_TRADITIONAL
52#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
53#else
54#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
55#endif
56
ffb4593c
NT
57/* LEX_* are values for PL_lex_state, the state of the lexer.
58 * They are arranged oddly so that the guard on the switch statement
79072805
LW
59 * can get by with a single comparison (if the compiler is smart enough).
60 */
61
fb73857a
PP
62/* #define LEX_NOTPARSING 11 is done in perl.h. */
63
55497cff
PP
64#define LEX_NORMAL 10
65#define LEX_INTERPNORMAL 9
66#define LEX_INTERPCASEMOD 8
67#define LEX_INTERPPUSH 7
68#define LEX_INTERPSTART 6
69#define LEX_INTERPEND 5
70#define LEX_INTERPENDMAYBE 4
71#define LEX_INTERPCONCAT 3
72#define LEX_INTERPCONST 2
73#define LEX_FORMLINE 1
74#define LEX_KNOWNEXT 0
79072805 75
79072805
LW
76#ifdef ff_next
77#undef ff_next
d48672a2
LW
78#endif
79
a1a0e61e 80#ifdef USE_PURE_BISON
dba4d153
JH
81# ifndef YYMAXLEVEL
82# define YYMAXLEVEL 100
83# endif
20141f0e
RI
84YYSTYPE* yylval_pointer[YYMAXLEVEL];
85int* yychar_pointer[YYMAXLEVEL];
6f202aea 86int yyactlevel = -1;
22c35a8c
GS
87# undef yylval
88# undef yychar
20141f0e
RI
89# define yylval (*yylval_pointer[yyactlevel])
90# define yychar (*yychar_pointer[yyactlevel])
91# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 92# undef yylex
dba4d153 93# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
94#endif
95
79072805 96#include "keywords.h"
fe14fcc3 97
ffb4593c
NT
98/* CLINE is a macro that ensures PL_copline has a sane value */
99
ae986130
LW
100#ifdef CLINE
101#undef CLINE
102#endif
57843af0 103#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 104
ffb4593c
NT
105/*
106 * Convenience functions to return different tokens and prime the
9cbb5ea2 107 * lexer for the next token. They all take an argument.
ffb4593c
NT
108 *
109 * TOKEN : generic token (used for '(', DOLSHARP, etc)
110 * OPERATOR : generic operator
111 * AOPERATOR : assignment operator
112 * PREBLOCK : beginning the block after an if, while, foreach, ...
113 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
114 * PREREF : *EXPR where EXPR is not a simple identifier
115 * TERM : expression term
116 * LOOPX : loop exiting command (goto, last, dump, etc)
117 * FTST : file test operator
118 * FUN0 : zero-argument function
2d2e263d 119 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
120 * BOop : bitwise or or xor
121 * BAop : bitwise and
122 * SHop : shift operator
123 * PWop : power operator
9cbb5ea2 124 * PMop : pattern-matching operator
ffb4593c
NT
125 * Aop : addition-level operator
126 * Mop : multiplication-level operator
127 * Eop : equality-testing operator
e5edeb50 128 * Rop : relational operator <= != gt
ffb4593c
NT
129 *
130 * Also see LOP and lop() below.
131 */
132
075953c3
JH
133/* Note that REPORT() and REPORT2() will be expressions that supply
134 * their own trailing comma, not suitable for statements as such. */
998054bd 135#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
136# define REPORT(x,retval) tokereport(x,s,(int)retval),
137# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 138#else
075953c3
JH
139# define REPORT(x,retval)
140# define REPORT2(x,retval)
998054bd
SC
141#endif
142
075953c3
JH
143#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
144#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
145#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
146#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
147#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
148#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
149#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
150#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
151#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
152#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
153#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
154#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
155#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
156#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
157#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
158#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
159#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
160#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
161#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
162#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 163
a687059c
LW
164/* This bit of chicanery makes a unary function followed by
165 * a parenthesis into a function with one argument, highest precedence.
166 */
2f3197b3 167#define UNI(f) return(yylval.ival = f, \
075953c3 168 REPORT("uni",f) \
3280af22
NIS
169 PL_expect = XTERM, \
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
172 PL_last_lop_op = f, \
a687059c
LW
173 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
174
79072805 175#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 176 REPORT("uni",f) \
3280af22
NIS
177 PL_bufptr = s, \
178 PL_last_uni = PL_oldbufptr, \
79072805
LW
179 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
180
9f68db38 181/* grandfather return to old style */
3280af22 182#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 183
8fa7f367
JH
184#ifdef DEBUGGING
185
2d00ba3b 186STATIC void
61b2116b 187S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 188{
998054bd 189 DEBUG_T({
9c5ffd7c 190 SV* report = newSVpv(thing, 0);
29b291f7
RB
191 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
192 (IV)rv);
998054bd
SC
193
194 if (s - PL_bufptr > 0)
195 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
196 else {
197 if (PL_oldbufptr && *PL_oldbufptr)
198 sv_catpv(report, PL_tokenbuf);
199 }
200 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
5f80b19c 201 });
998054bd
SC
202}
203
8fa7f367
JH
204#endif
205
ffb4593c
NT
206/*
207 * S_ao
208 *
209 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
210 * into an OP_ANDASSIGN or OP_ORASSIGN
211 */
212
76e3520e 213STATIC int
cea2e8a9 214S_ao(pTHX_ int toketype)
a0d0e21e 215{
3280af22
NIS
216 if (*PL_bufptr == '=') {
217 PL_bufptr++;
a0d0e21e
LW
218 if (toketype == ANDAND)
219 yylval.ival = OP_ANDASSIGN;
220 else if (toketype == OROR)
221 yylval.ival = OP_ORASSIGN;
222 toketype = ASSIGNOP;
223 }
224 return toketype;
225}
226
ffb4593c
NT
227/*
228 * S_no_op
229 * When Perl expects an operator and finds something else, no_op
230 * prints the warning. It always prints "<something> found where
231 * operator expected. It prints "Missing semicolon on previous line?"
232 * if the surprise occurs at the start of the line. "do you need to
233 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
234 * where the compiler doesn't know if foo is a method call or a function.
235 * It prints "Missing operator before end of line" if there's nothing
236 * after the missing operator, or "... before <...>" if there is something
237 * after the missing operator.
238 */
239
76e3520e 240STATIC void
cea2e8a9 241S_no_op(pTHX_ char *what, char *s)
463ee0b2 242{
3280af22
NIS
243 char *oldbp = PL_bufptr;
244 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 245
1189a94a
GS
246 if (!s)
247 s = oldbp;
07c798fb 248 else
1189a94a 249 PL_bufptr = s;
cea2e8a9 250 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 251 if (is_first)
cea2e8a9 252 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 253 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 254 char *t;
7e2040f0 255 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 256 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 257 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 258 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 259 }
07c798fb
HS
260 else {
261 assert(s >= oldbp);
cea2e8a9 262 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 263 }
3280af22 264 PL_bufptr = oldbp;
8990e307
LW
265}
266
ffb4593c
NT
267/*
268 * S_missingterm
269 * Complain about missing quote/regexp/heredoc terminator.
270 * If it's called with (char *)NULL then it cauterizes the line buffer.
271 * If we're in a delimited string and the delimiter is a control
272 * character, it's reformatted into a two-char sequence like ^C.
273 * This is fatal.
274 */
275
76e3520e 276STATIC void
cea2e8a9 277S_missingterm(pTHX_ char *s)
8990e307
LW
278{
279 char tmpbuf[3];
280 char q;
281 if (s) {
282 char *nl = strrchr(s,'\n');
d2719217 283 if (nl)
8990e307
LW
284 *nl = '\0';
285 }
9d116dd7
JH
286 else if (
287#ifdef EBCDIC
288 iscntrl(PL_multi_close)
289#else
290 PL_multi_close < 32 || PL_multi_close == 127
291#endif
292 ) {
8990e307 293 *tmpbuf = '^';
3280af22 294 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
295 s = "\\n";
296 tmpbuf[2] = '\0';
297 s = tmpbuf;
298 }
299 else {
3280af22 300 *tmpbuf = PL_multi_close;
8990e307
LW
301 tmpbuf[1] = '\0';
302 s = tmpbuf;
303 }
304 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 305 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 306}
79072805 307
ffb4593c
NT
308/*
309 * Perl_deprecate
ffb4593c
NT
310 */
311
79072805 312void
864dbfa3 313Perl_deprecate(pTHX_ char *s)
a0d0e21e 314{
599cee73 315 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 316 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
317}
318
ffb4593c
NT
319/*
320 * depcom
9cbb5ea2 321 * Deprecate a comma-less variable list.
ffb4593c
NT
322 */
323
76e3520e 324STATIC void
cea2e8a9 325S_depcom(pTHX)
a0d0e21e
LW
326{
327 deprecate("comma-less variable list");
328}
329
ffb4593c 330/*
9cbb5ea2
GS
331 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
332 * utf16-to-utf8-reversed.
ffb4593c
NT
333 */
334
c39cd008
GS
335#ifdef PERL_CR_FILTER
336static void
337strip_return(SV *sv)
338{
339 register char *s = SvPVX(sv);
340 register char *e = s + SvCUR(sv);
341 /* outer loop optimized to do nothing if there are no CR-LFs */
342 while (s < e) {
343 if (*s++ == '\r' && *s == '\n') {
344 /* hit a CR-LF, need to copy the rest */
345 register char *d = s - 1;
346 *d++ = *s++;
347 while (s < e) {
348 if (*s == '\r' && s[1] == '\n')
349 s++;
350 *d++ = *s++;
351 }
352 SvCUR(sv) -= s - d;
353 return;
354 }
355 }
356}
a868473f 357
76e3520e 358STATIC I32
c39cd008 359S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 360{
c39cd008
GS
361 I32 count = FILTER_READ(idx+1, sv, maxlen);
362 if (count > 0 && !maxlen)
363 strip_return(sv);
364 return count;
a868473f
NIS
365}
366#endif
367
ffb4593c
NT
368/*
369 * Perl_lex_start
9cbb5ea2
GS
370 * Initialize variables. Uses the Perl save_stack to save its state (for
371 * recursive calls to the parser).
ffb4593c
NT
372 */
373
a0d0e21e 374void
864dbfa3 375Perl_lex_start(pTHX_ SV *line)
79072805 376{
8990e307
LW
377 char *s;
378 STRLEN len;
379
3280af22
NIS
380 SAVEI32(PL_lex_dojoin);
381 SAVEI32(PL_lex_brackets);
3280af22
NIS
382 SAVEI32(PL_lex_casemods);
383 SAVEI32(PL_lex_starts);
384 SAVEI32(PL_lex_state);
7766f137 385 SAVEVPTR(PL_lex_inpat);
3280af22 386 SAVEI32(PL_lex_inwhat);
18b09519
GS
387 if (PL_lex_state == LEX_KNOWNEXT) {
388 I32 toke = PL_nexttoke;
389 while (--toke >= 0) {
390 SAVEI32(PL_nexttype[toke]);
391 SAVEVPTR(PL_nextval[toke]);
392 }
393 SAVEI32(PL_nexttoke);
18b09519 394 }
57843af0 395 SAVECOPLINE(PL_curcop);
3280af22
NIS
396 SAVEPPTR(PL_bufptr);
397 SAVEPPTR(PL_bufend);
398 SAVEPPTR(PL_oldbufptr);
399 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
400 SAVEPPTR(PL_last_lop);
401 SAVEPPTR(PL_last_uni);
3280af22
NIS
402 SAVEPPTR(PL_linestart);
403 SAVESPTR(PL_linestr);
404 SAVEPPTR(PL_lex_brackstack);
405 SAVEPPTR(PL_lex_casestack);
c76ac1ee 406 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
407 SAVESPTR(PL_lex_stuff);
408 SAVEI32(PL_lex_defer);
09bef843 409 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 410 SAVESPTR(PL_lex_repl);
bebdddfc
GS
411 SAVEINT(PL_expect);
412 SAVEINT(PL_lex_expect);
3280af22
NIS
413
414 PL_lex_state = LEX_NORMAL;
415 PL_lex_defer = 0;
416 PL_expect = XSTATE;
417 PL_lex_brackets = 0;
3280af22
NIS
418 New(899, PL_lex_brackstack, 120, char);
419 New(899, PL_lex_casestack, 12, char);
420 SAVEFREEPV(PL_lex_brackstack);
421 SAVEFREEPV(PL_lex_casestack);
422 PL_lex_casemods = 0;
423 *PL_lex_casestack = '\0';
424 PL_lex_dojoin = 0;
425 PL_lex_starts = 0;
426 PL_lex_stuff = Nullsv;
427 PL_lex_repl = Nullsv;
428 PL_lex_inpat = 0;
76be56bc 429 PL_nexttoke = 0;
3280af22 430 PL_lex_inwhat = 0;
09bef843 431 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
432 PL_linestr = line;
433 if (SvREADONLY(PL_linestr))
434 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
435 s = SvPV(PL_linestr, len);
8990e307 436 if (len && s[len-1] != ';') {
3280af22
NIS
437 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
438 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
439 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 440 }
3280af22
NIS
441 SvTEMP_off(PL_linestr);
442 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
443 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 444 PL_last_lop = PL_last_uni = Nullch;
3280af22 445 SvREFCNT_dec(PL_rs);
79cb57f6 446 PL_rs = newSVpvn("\n", 1);
3280af22 447 PL_rsfp = 0;
79072805 448}
a687059c 449
ffb4593c
NT
450/*
451 * Perl_lex_end
9cbb5ea2
GS
452 * Finalizer for lexing operations. Must be called when the parser is
453 * done with the lexer.
ffb4593c
NT
454 */
455
463ee0b2 456void
864dbfa3 457Perl_lex_end(pTHX)
463ee0b2 458{
3280af22 459 PL_doextract = FALSE;
463ee0b2
LW
460}
461
ffb4593c
NT
462/*
463 * S_incline
464 * This subroutine has nothing to do with tilting, whether at windmills
465 * or pinball tables. Its name is short for "increment line". It
57843af0 466 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 467 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
468 * # line 500 "foo.pm"
469 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
470 */
471
76e3520e 472STATIC void
cea2e8a9 473S_incline(pTHX_ char *s)
463ee0b2
LW
474{
475 char *t;
476 char *n;
73659bf1 477 char *e;
463ee0b2 478 char ch;
463ee0b2 479
57843af0 480 CopLINE_inc(PL_curcop);
463ee0b2
LW
481 if (*s++ != '#')
482 return;
bf4acbe4 483 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
484 if (strnEQ(s, "line", 4))
485 s += 4;
486 else
487 return;
084592ab 488 if (SPACE_OR_TAB(*s))
73659bf1 489 s++;
4e553d73 490 else
73659bf1 491 return;
bf4acbe4 492 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
493 if (!isDIGIT(*s))
494 return;
495 n = s;
496 while (isDIGIT(*s))
497 s++;
bf4acbe4 498 while (SPACE_OR_TAB(*s))
463ee0b2 499 s++;
73659bf1 500 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 501 s++;
73659bf1
GS
502 e = t + 1;
503 }
463ee0b2 504 else {
463ee0b2 505 for (t = s; !isSPACE(*t); t++) ;
73659bf1 506 e = t;
463ee0b2 507 }
bf4acbe4 508 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
509 e++;
510 if (*e != '\n' && *e != '\0')
511 return; /* false alarm */
512
463ee0b2
LW
513 ch = *t;
514 *t = '\0';
f4dd75d9
GS
515 if (t - s > 0) {
516#ifdef USE_ITHREADS
517 Safefree(CopFILE(PL_curcop));
518#else
519 SvREFCNT_dec(CopFILEGV(PL_curcop));
520#endif
57843af0 521 CopFILE_set(PL_curcop, s);
f4dd75d9 522 }
463ee0b2 523 *t = ch;
57843af0 524 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
525}
526
ffb4593c
NT
527/*
528 * S_skipspace
529 * Called to gobble the appropriate amount and type of whitespace.
530 * Skips comments as well.
531 */
532
76e3520e 533STATIC char *
cea2e8a9 534S_skipspace(pTHX_ register char *s)
a687059c 535{
3280af22 536 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 537 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
538 s++;
539 return s;
540 }
541 for (;;) {
fd049845 542 STRLEN prevlen;
09bef843 543 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 544 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
545 while (s < PL_bufend && isSPACE(*s)) {
546 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
547 incline(s);
548 }
ffb4593c
NT
549
550 /* comment */
3280af22
NIS
551 if (s < PL_bufend && *s == '#') {
552 while (s < PL_bufend && *s != '\n')
463ee0b2 553 s++;
60e6418e 554 if (s < PL_bufend) {
463ee0b2 555 s++;
60e6418e
GS
556 if (PL_in_eval && !PL_rsfp) {
557 incline(s);
558 continue;
559 }
560 }
463ee0b2 561 }
ffb4593c
NT
562
563 /* only continue to recharge the buffer if we're at the end
564 * of the buffer, we're not reading from a source filter, and
565 * we're in normal lexing mode
566 */
09bef843
SB
567 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
568 PL_lex_state == LEX_FORMLINE)
463ee0b2 569 return s;
ffb4593c
NT
570
571 /* try to recharge the buffer */
9cbb5ea2
GS
572 if ((s = filter_gets(PL_linestr, PL_rsfp,
573 (prevlen = SvCUR(PL_linestr)))) == Nullch)
574 {
575 /* end of file. Add on the -p or -n magic */
3280af22
NIS
576 if (PL_minus_n || PL_minus_p) {
577 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
578 ";}continue{print or die qq(-p destination: $!\\n)" :
579 "");
3280af22
NIS
580 sv_catpv(PL_linestr,";}");
581 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
582 }
583 else
3280af22 584 sv_setpv(PL_linestr,";");
ffb4593c
NT
585
586 /* reset variables for next time we lex */
9cbb5ea2
GS
587 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
588 = SvPVX(PL_linestr);
3280af22 589 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 590 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
591
592 /* Close the filehandle. Could be from -P preprocessor,
593 * STDIN, or a regular file. If we were reading code from
594 * STDIN (because the commandline held no -e or filename)
595 * then we don't close it, we reset it so the code can
596 * read from STDIN too.
597 */
598
3280af22
NIS
599 if (PL_preprocess && !PL_in_eval)
600 (void)PerlProc_pclose(PL_rsfp);
601 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
602 PerlIO_clearerr(PL_rsfp);
8990e307 603 else
3280af22
NIS
604 (void)PerlIO_close(PL_rsfp);
605 PL_rsfp = Nullfp;
463ee0b2
LW
606 return s;
607 }
ffb4593c
NT
608
609 /* not at end of file, so we only read another line */
09bef843
SB
610 /* make corresponding updates to old pointers, for yyerror() */
611 oldprevlen = PL_oldbufptr - PL_bufend;
612 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
613 if (PL_last_uni)
614 oldunilen = PL_last_uni - PL_bufend;
615 if (PL_last_lop)
616 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
617 PL_linestart = PL_bufptr = s + prevlen;
618 PL_bufend = s + SvCUR(PL_linestr);
619 s = PL_bufptr;
09bef843
SB
620 PL_oldbufptr = s + oldprevlen;
621 PL_oldoldbufptr = s + oldoldprevlen;
622 if (PL_last_uni)
623 PL_last_uni = s + oldunilen;
624 if (PL_last_lop)
625 PL_last_lop = s + oldloplen;
a0d0e21e 626 incline(s);
ffb4593c
NT
627
628 /* debugger active and we're not compiling the debugger code,
629 * so store the line into the debugger's array of lines
630 */
3280af22 631 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
632 SV *sv = NEWSV(85,0);
633
634 sv_upgrade(sv, SVt_PVMG);
3280af22 635 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 636 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 637 }
463ee0b2 638 }
a687059c 639}
378cc40b 640
ffb4593c
NT
641/*
642 * S_check_uni
643 * Check the unary operators to ensure there's no ambiguity in how they're
644 * used. An ambiguous piece of code would be:
645 * rand + 5
646 * This doesn't mean rand() + 5. Because rand() is a unary operator,
647 * the +5 is its argument.
648 */
649
76e3520e 650STATIC void
cea2e8a9 651S_check_uni(pTHX)
ba106d47 652{
2f3197b3 653 char *s;
a0d0e21e 654 char *t;
2f3197b3 655
3280af22 656 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 657 return;
3280af22
NIS
658 while (isSPACE(*PL_last_uni))
659 PL_last_uni++;
7e2040f0 660 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 661 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 662 return;
0453d815 663 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 664 char ch = *s;
0453d815 665 *s = '\0';
4e553d73
NIS
666 Perl_warner(aTHX_ WARN_AMBIGUOUS,
667 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
668 PL_last_uni);
669 *s = ch;
670 }
2f3197b3
LW
671}
672
ffb4593c
NT
673/* workaround to replace the UNI() macro with a function. Only the
674 * hints/uts.sh file mentions this. Other comments elsewhere in the
675 * source indicate Microport Unix might need it too.
676 */
677
ffed7fef
LW
678#ifdef CRIPPLED_CC
679
680#undef UNI
ffed7fef 681#define UNI(f) return uni(f,s)
ffed7fef 682
76e3520e 683STATIC int
cea2e8a9 684S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
685{
686 yylval.ival = f;
3280af22
NIS
687 PL_expect = XTERM;
688 PL_bufptr = s;
8f872242
NIS
689 PL_last_uni = PL_oldbufptr;
690 PL_last_lop_op = f;
ffed7fef
LW
691 if (*s == '(')
692 return FUNC1;
693 s = skipspace(s);
694 if (*s == '(')
695 return FUNC1;
696 else
697 return UNIOP;
698}
699
a0d0e21e
LW
700#endif /* CRIPPLED_CC */
701
ffb4593c
NT
702/*
703 * LOP : macro to build a list operator. Its behaviour has been replaced
704 * with a subroutine, S_lop() for which LOP is just another name.
705 */
706
a0d0e21e
LW
707#define LOP(f,x) return lop(f,x,s)
708
ffb4593c
NT
709/*
710 * S_lop
711 * Build a list operator (or something that might be one). The rules:
712 * - if we have a next token, then it's a list operator [why?]
713 * - if the next thing is an opening paren, then it's a function
714 * - else it's a list operator
715 */
716
76e3520e 717STATIC I32
a0be28da 718S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 719{
79072805 720 yylval.ival = f;
35c8bce7 721 CLINE;
075953c3 722 REPORT("lop", f)
3280af22
NIS
723 PL_expect = x;
724 PL_bufptr = s;
725 PL_last_lop = PL_oldbufptr;
726 PL_last_lop_op = f;
727 if (PL_nexttoke)
a0d0e21e 728 return LSTOP;
79072805
LW
729 if (*s == '(')
730 return FUNC;
731 s = skipspace(s);
732 if (*s == '(')
733 return FUNC;
734 else
735 return LSTOP;
736}
737
ffb4593c
NT
738/*
739 * S_force_next
9cbb5ea2 740 * When the lexer realizes it knows the next token (for instance,
ffb4593c 741 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
742 * to know what token to return the next time the lexer is called. Caller
743 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
744 * handles the token correctly.
ffb4593c
NT
745 */
746
4e553d73 747STATIC void
cea2e8a9 748S_force_next(pTHX_ I32 type)
79072805 749{
3280af22
NIS
750 PL_nexttype[PL_nexttoke] = type;
751 PL_nexttoke++;
752 if (PL_lex_state != LEX_KNOWNEXT) {
753 PL_lex_defer = PL_lex_state;
754 PL_lex_expect = PL_expect;
755 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
756 }
757}
758
ffb4593c
NT
759/*
760 * S_force_word
761 * When the lexer knows the next thing is a word (for instance, it has
762 * just seen -> and it knows that the next char is a word char, then
763 * it calls S_force_word to stick the next word into the PL_next lookahead.
764 *
765 * Arguments:
b1b65b59 766 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
767 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
768 * int check_keyword : if true, Perl checks to make sure the word isn't
769 * a keyword (do this if the word is a label, e.g. goto FOO)
770 * int allow_pack : if true, : characters will also be allowed (require,
771 * use, etc. do this)
9cbb5ea2 772 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
773 */
774
76e3520e 775STATIC char *
cea2e8a9 776S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 777{
463ee0b2
LW
778 register char *s;
779 STRLEN len;
4e553d73 780
463ee0b2
LW
781 start = skipspace(start);
782 s = start;
7e2040f0 783 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 784 (allow_pack && *s == ':') ||
15f0808c 785 (allow_initial_tick && *s == '\'') )
a0d0e21e 786 {
3280af22
NIS
787 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
788 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
789 return start;
790 if (token == METHOD) {
791 s = skipspace(s);
792 if (*s == '(')
3280af22 793 PL_expect = XTERM;
463ee0b2 794 else {
3280af22 795 PL_expect = XOPERATOR;
463ee0b2 796 }
79072805 797 }
3280af22
NIS
798 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
799 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
800 force_next(token);
801 }
802 return s;
803}
804
ffb4593c
NT
805/*
806 * S_force_ident
9cbb5ea2 807 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
808 * text only contains the "foo" portion. The first argument is a pointer
809 * to the "foo", and the second argument is the type symbol to prefix.
810 * Forces the next token to be a "WORD".
9cbb5ea2 811 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
812 */
813
76e3520e 814STATIC void
cea2e8a9 815S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
816{
817 if (s && *s) {
11343788 818 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 819 PL_nextval[PL_nexttoke].opval = o;
79072805 820 force_next(WORD);
748a9306 821 if (kind) {
11343788 822 o->op_private = OPpCONST_ENTERED;
55497cff
PP
823 /* XXX see note in pp_entereval() for why we forgo typo
824 warnings if the symbol must be introduced in an eval.
825 GSAR 96-10-12 */
3280af22 826 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
827 kind == '$' ? SVt_PV :
828 kind == '@' ? SVt_PVAV :
829 kind == '%' ? SVt_PVHV :
830 SVt_PVGV
831 );
748a9306 832 }
79072805
LW
833 }
834}
835
1571675a
GS
836NV
837Perl_str_to_version(pTHX_ SV *sv)
838{
839 NV retval = 0.0;
840 NV nshift = 1.0;
841 STRLEN len;
842 char *start = SvPVx(sv,len);
3aa33fe5 843 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
844 char *end = start + len;
845 while (start < end) {
ba210ebe 846 STRLEN skip;
1571675a
GS
847 UV n;
848 if (utf)
9041c2e3 849 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
850 else {
851 n = *(U8*)start;
852 skip = 1;
853 }
854 retval += ((NV)n)/nshift;
855 start += skip;
856 nshift *= 1000;
857 }
858 return retval;
859}
860
4e553d73 861/*
ffb4593c
NT
862 * S_force_version
863 * Forces the next token to be a version number.
864 */
865
76e3520e 866STATIC char *
cea2e8a9 867S_force_version(pTHX_ char *s)
89bfa8cd
PP
868{
869 OP *version = Nullop;
44dcb63b 870 char *d;
89bfa8cd
PP
871
872 s = skipspace(s);
873
44dcb63b 874 d = s;
dd629d5b 875 if (*d == 'v')
44dcb63b 876 d++;
44dcb63b 877 if (isDIGIT(*d)) {
a7cb1f99 878 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 879 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 880 SV *ver;
b73d6f50 881 s = scan_num(s, &yylval);
89bfa8cd 882 version = yylval.opval;
dd629d5b
GS
883 ver = cSVOPx(version)->op_sv;
884 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 885 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
886 SvNVX(ver) = str_to_version(ver);
887 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 888 }
89bfa8cd
PP
889 }
890 }
891
892 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 893 PL_nextval[PL_nexttoke].opval = version;
4e553d73 894 force_next(WORD);
89bfa8cd
PP
895
896 return (s);
897}
898
ffb4593c
NT
899/*
900 * S_tokeq
901 * Tokenize a quoted string passed in as an SV. It finds the next
902 * chunk, up to end of string or a backslash. It may make a new
903 * SV containing that chunk (if HINT_NEW_STRING is on). It also
904 * turns \\ into \.
905 */
906
76e3520e 907STATIC SV *
cea2e8a9 908S_tokeq(pTHX_ SV *sv)
79072805
LW
909{
910 register char *s;
911 register char *send;
912 register char *d;
b3ac6de7
IZ
913 STRLEN len = 0;
914 SV *pv = sv;
79072805
LW
915
916 if (!SvLEN(sv))
b3ac6de7 917 goto finish;
79072805 918
a0d0e21e 919 s = SvPV_force(sv, len);
21a311ee 920 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 921 goto finish;
463ee0b2 922 send = s + len;
79072805
LW
923 while (s < send && *s != '\\')
924 s++;
925 if (s == send)
b3ac6de7 926 goto finish;
79072805 927 d = s;
be4731d2 928 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 929 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
930 if (SvUTF8(sv))
931 SvUTF8_on(pv);
932 }
79072805
LW
933 while (s < send) {
934 if (*s == '\\') {
a0d0e21e 935 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
936 s++; /* all that, just for this */
937 }
938 *d++ = *s++;
939 }
940 *d = '\0';
463ee0b2 941 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 942 finish:
3280af22 943 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 944 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
945 return sv;
946}
947
ffb4593c
NT
948/*
949 * Now come three functions related to double-quote context,
950 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
951 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
952 * interact with PL_lex_state, and create fake ( ... ) argument lists
953 * to handle functions and concatenation.
954 * They assume that whoever calls them will be setting up a fake
955 * join call, because each subthing puts a ',' after it. This lets
956 * "lower \luPpEr"
957 * become
958 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
959 *
960 * (I'm not sure whether the spurious commas at the end of lcfirst's
961 * arguments and join's arguments are created or not).
962 */
963
964/*
965 * S_sublex_start
966 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
967 *
968 * Pattern matching will set PL_lex_op to the pattern-matching op to
969 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
970 *
971 * OP_CONST and OP_READLINE are easy--just make the new op and return.
972 *
973 * Everything else becomes a FUNC.
974 *
975 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
976 * had an OP_CONST or OP_READLINE). This just sets us up for a
977 * call to S_sublex_push().
978 */
979
76e3520e 980STATIC I32
cea2e8a9 981S_sublex_start(pTHX)
79072805
LW
982{
983 register I32 op_type = yylval.ival;
79072805
LW
984
985 if (op_type == OP_NULL) {
3280af22
NIS
986 yylval.opval = PL_lex_op;
987 PL_lex_op = Nullop;
79072805
LW
988 return THING;
989 }
990 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 991 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
992
993 if (SvTYPE(sv) == SVt_PVIV) {
994 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
995 STRLEN len;
996 char *p;
997 SV *nsv;
998
999 p = SvPV(sv, len);
79cb57f6 1000 nsv = newSVpvn(p, len);
01ec43d0
GS
1001 if (SvUTF8(sv))
1002 SvUTF8_on(nsv);
b3ac6de7
IZ
1003 SvREFCNT_dec(sv);
1004 sv = nsv;
4e553d73 1005 }
b3ac6de7 1006 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1007 PL_lex_stuff = Nullsv;
79072805
LW
1008 return THING;
1009 }
1010
3280af22
NIS
1011 PL_sublex_info.super_state = PL_lex_state;
1012 PL_sublex_info.sub_inwhat = op_type;
1013 PL_sublex_info.sub_op = PL_lex_op;
1014 PL_lex_state = LEX_INTERPPUSH;
55497cff 1015
3280af22
NIS
1016 PL_expect = XTERM;
1017 if (PL_lex_op) {
1018 yylval.opval = PL_lex_op;
1019 PL_lex_op = Nullop;
55497cff
PP
1020 return PMFUNC;
1021 }
1022 else
1023 return FUNC;
1024}
1025
ffb4593c
NT
1026/*
1027 * S_sublex_push
1028 * Create a new scope to save the lexing state. The scope will be
1029 * ended in S_sublex_done. Returns a '(', starting the function arguments
1030 * to the uc, lc, etc. found before.
1031 * Sets PL_lex_state to LEX_INTERPCONCAT.
1032 */
1033
76e3520e 1034STATIC I32
cea2e8a9 1035S_sublex_push(pTHX)
55497cff 1036{
f46d017c 1037 ENTER;
55497cff 1038
3280af22
NIS
1039 PL_lex_state = PL_sublex_info.super_state;
1040 SAVEI32(PL_lex_dojoin);
1041 SAVEI32(PL_lex_brackets);
3280af22
NIS
1042 SAVEI32(PL_lex_casemods);
1043 SAVEI32(PL_lex_starts);
1044 SAVEI32(PL_lex_state);
7766f137 1045 SAVEVPTR(PL_lex_inpat);
3280af22 1046 SAVEI32(PL_lex_inwhat);
57843af0 1047 SAVECOPLINE(PL_curcop);
3280af22 1048 SAVEPPTR(PL_bufptr);
8452ff4b 1049 SAVEPPTR(PL_bufend);
3280af22
NIS
1050 SAVEPPTR(PL_oldbufptr);
1051 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1052 SAVEPPTR(PL_last_lop);
1053 SAVEPPTR(PL_last_uni);
3280af22
NIS
1054 SAVEPPTR(PL_linestart);
1055 SAVESPTR(PL_linestr);
1056 SAVEPPTR(PL_lex_brackstack);
1057 SAVEPPTR(PL_lex_casestack);
1058
1059 PL_linestr = PL_lex_stuff;
1060 PL_lex_stuff = Nullsv;
1061
9cbb5ea2
GS
1062 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1063 = SvPVX(PL_linestr);
3280af22 1064 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1065 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1066 SAVEFREESV(PL_linestr);
1067
1068 PL_lex_dojoin = FALSE;
1069 PL_lex_brackets = 0;
3280af22
NIS
1070 New(899, PL_lex_brackstack, 120, char);
1071 New(899, PL_lex_casestack, 12, char);
1072 SAVEFREEPV(PL_lex_brackstack);
1073 SAVEFREEPV(PL_lex_casestack);
1074 PL_lex_casemods = 0;
1075 *PL_lex_casestack = '\0';
1076 PL_lex_starts = 0;
1077 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1078 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1079
1080 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1081 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1082 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1083 else
3280af22 1084 PL_lex_inpat = Nullop;
79072805 1085
55497cff 1086 return '(';
79072805
LW
1087}
1088
ffb4593c
NT
1089/*
1090 * S_sublex_done
1091 * Restores lexer state after a S_sublex_push.
1092 */
1093
76e3520e 1094STATIC I32
cea2e8a9 1095S_sublex_done(pTHX)
79072805 1096{
3280af22 1097 if (!PL_lex_starts++) {
9aa983d2
JH
1098 SV *sv = newSVpvn("",0);
1099 if (SvUTF8(PL_linestr))
1100 SvUTF8_on(sv);
3280af22 1101 PL_expect = XOPERATOR;
9aa983d2 1102 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1103 return THING;
1104 }
1105
3280af22
NIS
1106 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1107 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1108 return yylex();
79072805
LW
1109 }
1110
ffb4593c 1111 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1112 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1113 PL_linestr = PL_lex_repl;
1114 PL_lex_inpat = 0;
1115 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1116 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1117 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1118 SAVEFREESV(PL_linestr);
1119 PL_lex_dojoin = FALSE;
1120 PL_lex_brackets = 0;
3280af22
NIS
1121 PL_lex_casemods = 0;
1122 *PL_lex_casestack = '\0';
1123 PL_lex_starts = 0;
25da4f38 1124 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1125 PL_lex_state = LEX_INTERPNORMAL;
1126 PL_lex_starts++;
e9fa98b2
HS
1127 /* we don't clear PL_lex_repl here, so that we can check later
1128 whether this is an evalled subst; that means we rely on the
1129 logic to ensure sublex_done() is called again only via the
1130 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1131 }
e9fa98b2 1132 else {
3280af22 1133 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1134 PL_lex_repl = Nullsv;
1135 }
79072805 1136 return ',';
ffed7fef
LW
1137 }
1138 else {
f46d017c 1139 LEAVE;
3280af22
NIS
1140 PL_bufend = SvPVX(PL_linestr);
1141 PL_bufend += SvCUR(PL_linestr);
1142 PL_expect = XOPERATOR;
09bef843 1143 PL_sublex_info.sub_inwhat = 0;
79072805 1144 return ')';
ffed7fef
LW
1145 }
1146}
1147
02aa26ce
NT
1148/*
1149 scan_const
1150
1151 Extracts a pattern, double-quoted string, or transliteration. This
1152 is terrifying code.
1153
3280af22
NIS
1154 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1155 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1156 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1157
9b599b2a
GS
1158 Returns a pointer to the character scanned up to. Iff this is
1159 advanced from the start pointer supplied (ie if anything was
1160 successfully parsed), will leave an OP for the substring scanned
1161 in yylval. Caller must intuit reason for not parsing further
1162 by looking at the next characters herself.
1163
02aa26ce
NT
1164 In patterns:
1165 backslashes:
1166 double-quoted style: \r and \n
1167 regexp special ones: \D \s
1168 constants: \x3
1169 backrefs: \1 (deprecated in substitution replacements)
1170 case and quoting: \U \Q \E
1171 stops on @ and $, but not for $ as tail anchor
1172
1173 In transliterations:
1174 characters are VERY literal, except for - not at the start or end
1175 of the string, which indicates a range. scan_const expands the
1176 range to the full set of intermediate characters.
1177
1178 In double-quoted strings:
1179 backslashes:
1180 double-quoted style: \r and \n
1181 constants: \x3
1182 backrefs: \1 (deprecated)
1183 case and quoting: \U \Q \E
1184 stops on @ and $
1185
1186 scan_const does *not* construct ops to handle interpolated strings.
1187 It stops processing as soon as it finds an embedded $ or @ variable
1188 and leaves it to the caller to work out what's going on.
1189
1190 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1191
1192 $ in pattern could be $foo or could be tail anchor. Assumption:
1193 it's a tail anchor if $ is the last thing in the string, or if it's
1194 followed by one of ")| \n\t"
1195
1196 \1 (backreferences) are turned into $1
1197
1198 The structure of the code is
1199 while (there's a character to process) {
1200 handle transliteration ranges
1201 skip regexp comments
1202 skip # initiated comments in //x patterns
1203 check for embedded @foo
1204 check for embedded scalars
1205 if (backslash) {
1206 leave intact backslashes from leave (below)
1207 deprecate \1 in strings and sub replacements
1208 handle string-changing backslashes \l \U \Q \E, etc.
1209 switch (what was escaped) {
1210 handle - in a transliteration (becomes a literal -)
1211 handle \132 octal characters
1212 handle 0x15 hex characters
1213 handle \cV (control V)
1214 handle printf backslashes (\f, \r, \n, etc)
1215 } (end switch)
1216 } (end if backslash)
1217 } (end while character to read)
4e553d73 1218
02aa26ce
NT
1219*/
1220
76e3520e 1221STATIC char *
cea2e8a9 1222S_scan_const(pTHX_ char *start)
79072805 1223{
3280af22 1224 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1225 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1226 register char *s = start; /* start of the constant */
1227 register char *d = SvPVX(sv); /* destination for copies */
1228 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1229 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1230 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1231 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1232 UV uv;
1233
dff6d3cd 1234 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1235 PL_lex_inpat
4a2d328f 1236 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1237 : "";
79072805 1238
2b9d42f0
NIS
1239 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1240 /* If we are doing a trans and we know we want UTF8 set expectation */
1241 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1242 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1243 }
1244
1245
79072805 1246 while (s < send || dorange) {
02aa26ce 1247 /* get transliterations out of the way (they're most literal) */
3280af22 1248 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1249 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1250 if (dorange) {
1ba5c669
JH
1251 I32 i; /* current expanded character */
1252 I32 min; /* first character in range */
1253 I32 max; /* last character in range */
02aa26ce 1254
2b9d42f0 1255 if (has_utf8) {
8973db79
JH
1256 char *c = (char*)utf8_hop((U8*)d, -1);
1257 char *e = d++;
1258 while (e-- > c)
1259 *(e + 1) = *e;
25716404 1260 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1261 /* mark the range as done, and continue */
1262 dorange = FALSE;
1263 didrange = TRUE;
1264 continue;
1265 }
2b9d42f0 1266
02aa26ce 1267 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1268 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1269 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1270 d -= 2; /* eat the first char and the - */
1271
8ada0baa
JH
1272 min = (U8)*d; /* first char in range */
1273 max = (U8)d[1]; /* last char in range */
1274
c2e66d9e 1275 if (min > max) {
01ec43d0 1276 Perl_croak(aTHX_
1ba5c669
JH
1277 "Invalid [] range \"%c-%c\" in transliteration operator",
1278 (char)min, (char)max);
c2e66d9e
GS
1279 }
1280
c7f1f016 1281#ifdef EBCDIC
8ada0baa
JH
1282 if ((isLOWER(min) && isLOWER(max)) ||
1283 (isUPPER(min) && isUPPER(max))) {
1284 if (isLOWER(min)) {
1285 for (i = min; i <= max; i++)
1286 if (isLOWER(i))
db42d148 1287 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1288 } else {
1289 for (i = min; i <= max; i++)
1290 if (isUPPER(i))
db42d148 1291 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1292 }
1293 }
1294 else
1295#endif
1296 for (i = min; i <= max; i++)
1297 *d++ = i;
02aa26ce
NT
1298
1299 /* mark the range as done, and continue */
79072805 1300 dorange = FALSE;
01ec43d0 1301 didrange = TRUE;
79072805 1302 continue;
4e553d73 1303 }
02aa26ce
NT
1304
1305 /* range begins (ignore - as first or last char) */
79072805 1306 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1307 if (didrange) {
1fafa243 1308 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1309 }
2b9d42f0 1310 if (has_utf8) {
25716404 1311 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1312 s++;
1313 continue;
1314 }
79072805
LW
1315 dorange = TRUE;
1316 s++;
01ec43d0
GS
1317 }
1318 else {
1319 didrange = FALSE;
1320 }
79072805 1321 }
02aa26ce
NT
1322
1323 /* if we get here, we're not doing a transliteration */
1324
0f5d15d6
IZ
1325 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1326 except for the last char, which will be done separately. */
3280af22 1327 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1328 if (s[2] == '#') {
1329 while (s < send && *s != ')')
db42d148 1330 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1331 }
1332 else if (s[2] == '{' /* This should match regcomp.c */
1333 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1334 {
cc6b7395 1335 I32 count = 1;
0f5d15d6 1336 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1337 char c;
1338
d9f97599
GS
1339 while (count && (c = *regparse)) {
1340 if (c == '\\' && regparse[1])
1341 regparse++;
4e553d73 1342 else if (c == '{')
cc6b7395 1343 count++;
4e553d73 1344 else if (c == '}')
cc6b7395 1345 count--;
d9f97599 1346 regparse++;
cc6b7395 1347 }
5bdf89e7
IZ
1348 if (*regparse != ')') {
1349 regparse--; /* Leave one char for continuation. */
cc6b7395 1350 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1351 }
0f5d15d6 1352 while (s < regparse)
db42d148 1353 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1354 }
748a9306 1355 }
02aa26ce
NT
1356
1357 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1358 else if (*s == '#' && PL_lex_inpat &&
1359 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1360 while (s+1 < send && *s != '\n')
db42d148 1361 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1362 }
02aa26ce 1363
5d1d4326
JH
1364 /* check for embedded arrays
1365 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1366 */
7e2040f0 1367 else if (*s == '@' && s[1]
5d1d4326 1368 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1369 break;
02aa26ce
NT
1370
1371 /* check for embedded scalars. only stop if we're sure it's a
1372 variable.
1373 */
79072805 1374 else if (*s == '$') {
3280af22 1375 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1376 break;
6002328a 1377 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1378 break; /* in regexp, $ might be tail anchor */
1379 }
02aa26ce 1380
2b9d42f0
NIS
1381 /* End of else if chain - OP_TRANS rejoin rest */
1382
02aa26ce 1383 /* backslashes */
79072805
LW
1384 if (*s == '\\' && s+1 < send) {
1385 s++;
02aa26ce
NT
1386
1387 /* some backslashes we leave behind */
c9f97d15 1388 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1389 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1390 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1391 continue;
1392 }
02aa26ce
NT
1393
1394 /* deprecate \1 in strings and substitution replacements */
3280af22 1395 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1396 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1397 {
599cee73 1398 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1399 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1400 *--s = '$';
1401 break;
1402 }
02aa26ce
NT
1403
1404 /* string-change backslash escapes */
3280af22 1405 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1406 --s;
1407 break;
1408 }
02aa26ce
NT
1409
1410 /* if we get here, it's either a quoted -, or a digit */
79072805 1411 switch (*s) {
02aa26ce
NT
1412
1413 /* quoted - in transliterations */
79072805 1414 case '-':
3280af22 1415 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1416 *d++ = *s++;
1417 continue;
1418 }
1419 /* FALL THROUGH */
1420 default:
11b8faa4 1421 {
7e84c16c 1422 if (ckWARN(WARN_MISC) && isALNUM(*s))
4e553d73 1423 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1424 "Unrecognized escape \\%c passed through",
1425 *s);
1426 /* default action is to copy the quoted character */
f9a63242 1427 goto default_action;
11b8faa4 1428 }
02aa26ce
NT
1429
1430 /* \132 indicates an octal constant */
79072805
LW
1431 case '0': case '1': case '2': case '3':
1432 case '4': case '5': case '6': case '7':
ba210ebe
JH
1433 {
1434 STRLEN len = 0; /* disallow underscores */
1435 uv = (UV)scan_oct(s, 3, &len);
1436 s += len;
1437 }
012bcf8d 1438 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1439
1440 /* \x24 indicates a hex constant */
79072805 1441 case 'x':
a0ed51b3
LW
1442 ++s;
1443 if (*s == '{') {
1444 char* e = strchr(s, '}');
355860ce
HS
1445 STRLEN len = 1; /* allow underscores */
1446
adaeee49 1447 if (!e) {
a0ed51b3 1448 yyerror("Missing right brace on \\x{}");
355860ce
HS
1449 ++s;
1450 continue;
ba210ebe 1451 }
355860ce 1452 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
ba210ebe 1453 s = e + 1;
a0ed51b3
LW
1454 }
1455 else {
ba210ebe
JH
1456 {
1457 STRLEN len = 0; /* disallow underscores */
1458 uv = (UV)scan_hex(s, 2, &len);
1459 s += len;
1460 }
012bcf8d
GS
1461 }
1462
1463 NUM_ESCAPE_INSERT:
1464 /* Insert oct or hex escaped character.
301d3d20 1465 * There will always enough room in sv since such
db42d148 1466 * escapes will be longer than any UTF-8 sequence
301d3d20 1467 * they can end up as. */
ba7cea30 1468
c7f1f016
NIS
1469 /* We need to map to chars to ASCII before doing the tests
1470 to cover EBCDIC
1471 */
c4d5f83a 1472 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1473 if (!has_utf8 && uv > 255) {
301d3d20
JH
1474 /* Might need to recode whatever we have
1475 * accumulated so far if it contains any
1476 * hibit chars.
1477 *
1478 * (Can't we keep track of that and avoid
1479 * this rescan? --jhi)
012bcf8d 1480 */
c7f1f016 1481 int hicount = 0;
63cd0674
NIS
1482 U8 *c;
1483 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1484 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1485 hicount++;
db42d148 1486 }
012bcf8d 1487 }
63cd0674 1488 if (hicount) {
db42d148
NIS
1489 STRLEN offset = d - SvPVX(sv);
1490 U8 *src, *dst;
1491 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1492 src = (U8 *)d - 1;
1493 dst = src+hicount;
1494 d += hicount;
1495 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1496 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1497 U8 ch = NATIVE_TO_ASCII(*src);
db42d148
NIS
1498 *dst-- = UTF8_EIGHT_BIT_LO(ch);
1499 *dst-- = UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1500 }
1501 else {
63cd0674 1502 *dst-- = *src;
012bcf8d 1503 }
c7f1f016 1504 src--;
012bcf8d
GS
1505 }
1506 }
1507 }
1508
9aa983d2 1509 if (has_utf8 || uv > 255) {
9041c2e3 1510 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1511 has_utf8 = TRUE;
f9a63242
JH
1512 if (PL_lex_inwhat == OP_TRANS &&
1513 PL_sublex_info.sub_op) {
1514 PL_sublex_info.sub_op->op_private |=
1515 (PL_lex_repl ? OPpTRANS_FROM_UTF
1516 : OPpTRANS_TO_UTF);
f9a63242 1517 }
012bcf8d 1518 }
a0ed51b3 1519 else {
012bcf8d 1520 *d++ = (char)uv;
a0ed51b3 1521 }
012bcf8d
GS
1522 }
1523 else {
c4d5f83a 1524 *d++ = (char) uv;
a0ed51b3 1525 }
79072805 1526 continue;
02aa26ce 1527
4a2d328f
IZ
1528 /* \N{latin small letter a} is a named character */
1529 case 'N':
55eda711 1530 ++s;
423cee85
JH
1531 if (*s == '{') {
1532 char* e = strchr(s, '}');
155aba94 1533 SV *res;
423cee85
JH
1534 STRLEN len;
1535 char *str;
4e553d73 1536
423cee85 1537 if (!e) {
5777a3f7 1538 yyerror("Missing right brace on \\N{}");
423cee85
JH
1539 e = s - 1;
1540 goto cont_scan;
1541 }
55eda711
JH
1542 res = newSVpvn(s + 1, e - s - 1);
1543 res = new_constant( Nullch, 0, "charnames",
1544 res, Nullsv, "\\N{...}" );
f9a63242
JH
1545 if (has_utf8)
1546 sv_utf8_upgrade(res);
423cee85 1547 str = SvPV(res,len);
89491803 1548 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1549 char *ostart = SvPVX(sv);
1550 SvCUR_set(sv, d - ostart);
1551 SvPOK_on(sv);
e4f3eed8 1552 *d = '\0';
f08d6ad9 1553 sv_utf8_upgrade(sv);
d2f449dd
SB
1554 /* this just broke our allocation above... */
1555 SvGROW(sv, send - start);
f08d6ad9 1556 d = SvPVX(sv) + SvCUR(sv);
89491803 1557 has_utf8 = TRUE;
f08d6ad9 1558 }
423cee85
JH
1559 if (len > e - s + 4) {
1560 char *odest = SvPVX(sv);
1561
8973db79 1562 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1563 d = SvPVX(sv) + (d - odest);
1564 }
1565 Copy(str, d, len, char);
1566 d += len;
1567 SvREFCNT_dec(res);
1568 cont_scan:
1569 s = e + 1;
1570 }
1571 else
5777a3f7 1572 yyerror("Missing braces on \\N{}");
423cee85
JH
1573 continue;
1574
02aa26ce 1575 /* \c is a control character */
79072805
LW
1576 case 'c':
1577 s++;
ba210ebe
JH
1578 {
1579 U8 c = *s++;
c7f1f016
NIS
1580#ifdef EBCDIC
1581 if (isLOWER(c))
1582 c = toUPPER(c);
1583#endif
db42d148 1584 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1585 }
79072805 1586 continue;
02aa26ce
NT
1587
1588 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1589 case 'b':
db42d148 1590 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1591 break;
1592 case 'n':
db42d148 1593 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1594 break;
1595 case 'r':
db42d148 1596 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1597 break;
1598 case 'f':
db42d148 1599 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1600 break;
1601 case 't':
db42d148 1602 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1603 break;
34a3fe2a 1604 case 'e':
db42d148 1605 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1606 break;
1607 case 'a':
db42d148 1608 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1609 break;
02aa26ce
NT
1610 } /* end switch */
1611
79072805
LW
1612 s++;
1613 continue;
02aa26ce
NT
1614 } /* end if (backslash) */
1615
f9a63242 1616 default_action:
2b9d42f0
NIS
1617 /* If we started with encoded form, or already know we want it
1618 and then encode the next character */
1619 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1620 STRLEN len = 1;
1621 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1622 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1623 s += len;
1624 if (need > len) {
1625 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1626 STRLEN off = d - SvPVX(sv);
1627 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1628 }
1629 d = (char*)uvchr_to_utf8((U8*)d, uv);
1630 has_utf8 = TRUE;
1631 }
1632 else {
1633 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1634 }
02aa26ce
NT
1635 } /* while loop to process each character */
1636
1637 /* terminate the string and set up the sv */
79072805 1638 *d = '\0';
463ee0b2 1639 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1640 if (SvCUR(sv) >= SvLEN(sv))
585602fa 1641 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1642
79072805 1643 SvPOK_on(sv);
2b9d42f0 1644 if (has_utf8) {
7e2040f0 1645 SvUTF8_on(sv);
2b9d42f0
NIS
1646 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1647 PL_sublex_info.sub_op->op_private |=
1648 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1649 }
1650 }
79072805 1651
02aa26ce 1652 /* shrink the sv if we allocated more than we used */
79072805
LW
1653 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1654 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1655 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1656 }
02aa26ce 1657
9b599b2a 1658 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1659 if (s > PL_bufptr) {
1660 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1661 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1662 sv, Nullsv,
4e553d73 1663 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1664 ? "tr"
3280af22 1665 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1666 ? "s"
1667 : "qq")));
79072805 1668 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1669 } else
8990e307 1670 SvREFCNT_dec(sv);
79072805
LW
1671 return s;
1672}
1673
ffb4593c
NT
1674/* S_intuit_more
1675 * Returns TRUE if there's more to the expression (e.g., a subscript),
1676 * FALSE otherwise.
ffb4593c
NT
1677 *
1678 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1679 *
1680 * ->[ and ->{ return TRUE
1681 * { and [ outside a pattern are always subscripts, so return TRUE
1682 * if we're outside a pattern and it's not { or [, then return FALSE
1683 * if we're in a pattern and the first char is a {
1684 * {4,5} (any digits around the comma) returns FALSE
1685 * if we're in a pattern and the first char is a [
1686 * [] returns FALSE
1687 * [SOMETHING] has a funky algorithm to decide whether it's a
1688 * character class or not. It has to deal with things like
1689 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1690 * anything else returns TRUE
1691 */
1692
9cbb5ea2
GS
1693/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1694
76e3520e 1695STATIC int
cea2e8a9 1696S_intuit_more(pTHX_ register char *s)
79072805 1697{
3280af22 1698 if (PL_lex_brackets)
79072805
LW
1699 return TRUE;
1700 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1701 return TRUE;
1702 if (*s != '{' && *s != '[')
1703 return FALSE;
3280af22 1704 if (!PL_lex_inpat)
79072805
LW
1705 return TRUE;
1706
1707 /* In a pattern, so maybe we have {n,m}. */
1708 if (*s == '{') {
1709 s++;
1710 if (!isDIGIT(*s))
1711 return TRUE;
1712 while (isDIGIT(*s))
1713 s++;
1714 if (*s == ',')
1715 s++;
1716 while (isDIGIT(*s))
1717 s++;
1718 if (*s == '}')
1719 return FALSE;
1720 return TRUE;
1721
1722 }
1723
1724 /* On the other hand, maybe we have a character class */
1725
1726 s++;
1727 if (*s == ']' || *s == '^')
1728 return FALSE;
1729 else {
ffb4593c 1730 /* this is terrifying, and it works */
79072805
LW
1731 int weight = 2; /* let's weigh the evidence */
1732 char seen[256];
f27ffc4a 1733 unsigned char un_char = 255, last_un_char;
93a17b20 1734 char *send = strchr(s,']');
3280af22 1735 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1736
1737 if (!send) /* has to be an expression */
1738 return TRUE;
1739
1740 Zero(seen,256,char);
1741 if (*s == '$')
1742 weight -= 3;
1743 else if (isDIGIT(*s)) {
1744 if (s[1] != ']') {
1745 if (isDIGIT(s[1]) && s[2] == ']')
1746 weight -= 10;
1747 }
1748 else
1749 weight -= 100;
1750 }
1751 for (; s < send; s++) {
1752 last_un_char = un_char;
1753 un_char = (unsigned char)*s;
1754 switch (*s) {
1755 case '@':
1756 case '&':
1757 case '$':
1758 weight -= seen[un_char] * 10;
7e2040f0 1759 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1760 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1761 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1762 weight -= 100;
1763 else
1764 weight -= 10;
1765 }
1766 else if (*s == '$' && s[1] &&
93a17b20
LW
1767 strchr("[#!%*<>()-=",s[1])) {
1768 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1769 weight -= 10;
1770 else
1771 weight -= 1;
1772 }
1773 break;
1774 case '\\':
1775 un_char = 254;
1776 if (s[1]) {
93a17b20 1777 if (strchr("wds]",s[1]))
79072805
LW
1778 weight += 100;
1779 else if (seen['\''] || seen['"'])
1780 weight += 1;
93a17b20 1781 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1782 weight += 40;
1783 else if (isDIGIT(s[1])) {
1784 weight += 40;
1785 while (s[1] && isDIGIT(s[1]))
1786 s++;
1787 }
1788 }
1789 else
1790 weight += 100;
1791 break;
1792 case '-':
1793 if (s[1] == '\\')
1794 weight += 50;
93a17b20 1795 if (strchr("aA01! ",last_un_char))
79072805 1796 weight += 30;
93a17b20 1797 if (strchr("zZ79~",s[1]))
79072805 1798 weight += 30;
f27ffc4a
GS
1799 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1800 weight -= 5; /* cope with negative subscript */
79072805
LW
1801 break;
1802 default:
93a17b20 1803 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1804 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1805 char *d = tmpbuf;
1806 while (isALPHA(*s))
1807 *d++ = *s++;
1808 *d = '\0';
1809 if (keyword(tmpbuf, d - tmpbuf))
1810 weight -= 150;
1811 }
1812 if (un_char == last_un_char + 1)
1813 weight += 5;
1814 weight -= seen[un_char];
1815 break;
1816 }
1817 seen[un_char]++;
1818 }
1819 if (weight >= 0) /* probably a character class */
1820 return FALSE;
1821 }
1822
1823 return TRUE;
1824}
ffed7fef 1825
ffb4593c
NT
1826/*
1827 * S_intuit_method
1828 *
1829 * Does all the checking to disambiguate
1830 * foo bar
1831 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1832 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1833 *
1834 * First argument is the stuff after the first token, e.g. "bar".
1835 *
1836 * Not a method if bar is a filehandle.
1837 * Not a method if foo is a subroutine prototyped to take a filehandle.
1838 * Not a method if it's really "Foo $bar"
1839 * Method if it's "foo $bar"
1840 * Not a method if it's really "print foo $bar"
1841 * Method if it's really "foo package::" (interpreted as package->foo)
1842 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1843 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1844 * =>
1845 */
1846
76e3520e 1847STATIC int
cea2e8a9 1848S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1849{
1850 char *s = start + (*start == '$');
3280af22 1851 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1852 STRLEN len;
1853 GV* indirgv;
1854
1855 if (gv) {
b6c543e3 1856 CV *cv;
a0d0e21e
LW
1857 if (GvIO(gv))
1858 return 0;
b6c543e3
IZ
1859 if ((cv = GvCVu(gv))) {
1860 char *proto = SvPVX(cv);
1861 if (proto) {
1862 if (*proto == ';')
1863 proto++;
1864 if (*proto == '*')
1865 return 0;
1866 }
1867 } else
a0d0e21e
LW
1868 gv = 0;
1869 }
8903cb82 1870 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1871 /* start is the beginning of the possible filehandle/object,
1872 * and s is the end of it
1873 * tmpbuf is a copy of it
1874 */
1875
a0d0e21e 1876 if (*start == '$') {
3280af22 1877 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1878 return 0;
1879 s = skipspace(s);
3280af22
NIS
1880 PL_bufptr = start;
1881 PL_expect = XREF;
a0d0e21e
LW
1882 return *s == '(' ? FUNCMETH : METHOD;
1883 }
1884 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1885 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1886 len -= 2;
1887 tmpbuf[len] = '\0';
1888 goto bare_package;
1889 }
1890 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1891 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1892 return 0;
1893 /* filehandle or package name makes it a method */
89bfa8cd 1894 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1895 s = skipspace(s);
3280af22 1896 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1897 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1898 bare_package:
3280af22 1899 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1900 newSVpvn(tmpbuf,len));
3280af22
NIS
1901 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1902 PL_expect = XTERM;
a0d0e21e 1903 force_next(WORD);
3280af22 1904 PL_bufptr = s;
a0d0e21e
LW
1905 return *s == '(' ? FUNCMETH : METHOD;
1906 }
1907 }
1908 return 0;
1909}
1910
ffb4593c
NT
1911/*
1912 * S_incl_perldb
1913 * Return a string of Perl code to load the debugger. If PERL5DB
1914 * is set, it will return the contents of that, otherwise a
1915 * compile-time require of perl5db.pl.
1916 */
1917
76e3520e 1918STATIC char*
cea2e8a9 1919S_incl_perldb(pTHX)
a0d0e21e 1920{
3280af22 1921 if (PL_perldb) {
76e3520e 1922 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1923
1924 if (pdb)
1925 return pdb;
61bb5906 1926 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1927 return "BEGIN { require 'perl5db.pl' }";
1928 }
1929 return "";
1930}
1931
1932
16d20bd9 1933/* Encoded script support. filter_add() effectively inserts a
4e553d73 1934 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1935 * Note that the filter function only applies to the current source file
1936 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1937 *
1938 * The datasv parameter (which may be NULL) can be used to pass
1939 * private data to this instance of the filter. The filter function
1940 * can recover the SV using the FILTER_DATA macro and use it to
1941 * store private buffers and state information.
1942 *
1943 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1944 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1945 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1946 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1947 * private use must be set using malloc'd pointers.
1948 */
16d20bd9
AD
1949
1950SV *
864dbfa3 1951Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1952{
f4c556ac
GS
1953 if (!funcp)
1954 return Nullsv;
1955
3280af22
NIS
1956 if (!PL_rsfp_filters)
1957 PL_rsfp_filters = newAV();
16d20bd9 1958 if (!datasv)
8c52afec 1959 datasv = NEWSV(255,0);
16d20bd9 1960 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1961 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1962 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1963 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1964 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1965 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1966 av_unshift(PL_rsfp_filters, 1);
1967 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1968 return(datasv);
1969}
4e553d73 1970
16d20bd9
AD
1971
1972/* Delete most recently added instance of this filter function. */
a0d0e21e 1973void
864dbfa3 1974Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1975{
e0c19803 1976 SV *datasv;
f4c556ac 1977 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1978 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1979 return;
1980 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1981 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1982 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1983 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1984 IoANY(datasv) = (void *)NULL;
3280af22 1985 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1986
16d20bd9
AD
1987 return;
1988 }
1989 /* we need to search for the correct entry and clear it */
cea2e8a9 1990 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1991}
1992
1993
1994/* Invoke the n'th filter function for the current rsfp. */
1995I32
864dbfa3 1996Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
1997
1998
8ac85365 1999 /* 0 = read one text line */
a0d0e21e 2000{
16d20bd9
AD
2001 filter_t funcp;
2002 SV *datasv = NULL;
e50aee73 2003
3280af22 2004 if (!PL_rsfp_filters)
16d20bd9 2005 return -1;
3280af22 2006 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2007 /* Provide a default input filter to make life easy. */
2008 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2009 DEBUG_P(PerlIO_printf(Perl_debug_log,
2010 "filter_read %d: from rsfp\n", idx));
4e553d73 2011 if (maxlen) {
16d20bd9
AD
2012 /* Want a block */
2013 int len ;
2014 int old_len = SvCUR(buf_sv) ;
2015
2016 /* ensure buf_sv is large enough */
2017 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
2018 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2019 if (PerlIO_error(PL_rsfp))
37120919
AD
2020 return -1; /* error */
2021 else
2022 return 0 ; /* end of file */
2023 }
16d20bd9
AD
2024 SvCUR_set(buf_sv, old_len + len) ;
2025 } else {
2026 /* Want a line */
3280af22
NIS
2027 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2028 if (PerlIO_error(PL_rsfp))
37120919
AD
2029 return -1; /* error */
2030 else
2031 return 0 ; /* end of file */
2032 }
16d20bd9
AD
2033 }
2034 return SvCUR(buf_sv);
2035 }
2036 /* Skip this filter slot if filter has been deleted */
3280af22 2037 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2038 DEBUG_P(PerlIO_printf(Perl_debug_log,
2039 "filter_read %d: skipped (filter deleted)\n",
2040 idx));
16d20bd9
AD
2041 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2042 }
2043 /* Get function pointer hidden within datasv */
4755096e 2044 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2045 DEBUG_P(PerlIO_printf(Perl_debug_log,
2046 "filter_read %d: via function %p (%s)\n",
2047 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2048 /* Call function. The function is expected to */
2049 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2050 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 2051 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
2052}
2053
76e3520e 2054STATIC char *
cea2e8a9 2055S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2056{
c39cd008 2057#ifdef PERL_CR_FILTER
3280af22 2058 if (!PL_rsfp_filters) {
c39cd008 2059 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2060 }
2061#endif
3280af22 2062 if (PL_rsfp_filters) {
16d20bd9 2063
55497cff
PP
2064 if (!append)
2065 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2066 if (FILTER_READ(0, sv, 0) > 0)
2067 return ( SvPVX(sv) ) ;
2068 else
2069 return Nullch ;
2070 }
9d116dd7 2071 else
fd049845 2072 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2073}
2074
01ec43d0
GS
2075STATIC HV *
2076S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2077{
2078 GV *gv;
2079
01ec43d0 2080 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2081 return PL_curstash;
2082
2083 if (len > 2 &&
2084 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2085 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2086 {
2087 return GvHV(gv); /* Foo:: */
def3634b
GS
2088 }
2089
2090 /* use constant CLASS => 'MyClass' */
2091 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2092 SV *sv;
2093 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2094 pkgname = SvPV_nolen(sv);
2095 }
2096 }
2097
2098 return gv_stashpv(pkgname, FALSE);
2099}
a0d0e21e 2100
748a9306
LW
2101#ifdef DEBUGGING
2102 static char* exp_name[] =
09bef843
SB
2103 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2104 "ATTRTERM", "TERMBLOCK"
2105 };
748a9306 2106#endif
463ee0b2 2107
02aa26ce
NT
2108/*
2109 yylex
2110
2111 Works out what to call the token just pulled out of the input
2112 stream. The yacc parser takes care of taking the ops we return and
2113 stitching them into a tree.
2114
2115 Returns:
2116 PRIVATEREF
2117
2118 Structure:
2119 if read an identifier
2120 if we're in a my declaration
2121 croak if they tried to say my($foo::bar)
2122 build the ops for a my() declaration
2123 if it's an access to a my() variable
2124 are we in a sort block?
2125 croak if my($a); $a <=> $b
2126 build ops for access to a my() variable
2127 if in a dq string, and they've said @foo and we can't find @foo
2128 croak
2129 build ops for a bareword
2130 if we already built the token before, use it.
2131*/
2132
dba4d153 2133#ifdef USE_PURE_BISON
864dbfa3 2134int
dba4d153 2135Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2136{
20141f0e
RI
2137 int r;
2138
6f202aea 2139 yyactlevel++;
20141f0e
RI
2140 yylval_pointer[yyactlevel] = lvalp;
2141 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2142 if (yyactlevel >= YYMAXLEVEL)
2143 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2144
dba4d153 2145 r = Perl_yylex(aTHX);
20141f0e 2146
d8ae6756
RI
2147 if (yyactlevel > 0)
2148 yyactlevel--;
20141f0e
RI
2149
2150 return r;
2151}
dba4d153 2152#endif
20141f0e 2153
dba4d153
JH
2154#ifdef __SC__
2155#pragma segment Perl_yylex
2156#endif
dba4d153 2157int
dba4d153 2158Perl_yylex(pTHX)
20141f0e 2159{
79072805 2160 register char *s;
378cc40b 2161 register char *d;
79072805 2162 register I32 tmp;
463ee0b2 2163 STRLEN len;
161b471a
NIS
2164 GV *gv = Nullgv;
2165 GV **gvp = 0;
aa7440fb 2166 bool bof = FALSE;
a687059c 2167
02aa26ce 2168 /* check if there's an identifier for us to look at */
3280af22 2169 if (PL_pending_ident) {
02aa26ce 2170 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2171 char pit = PL_pending_ident;
2172 PL_pending_ident = 0;
bbce6d69 2173
607df283 2174 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2175 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
607df283 2176
02aa26ce
NT
2177 /* if we're in a my(), we can't allow dynamics here.
2178 $foo'bar has already been turned into $foo::bar, so
2179 just check for colons.
2180
2181 if it's a legal name, the OP is a PADANY.
2182 */
3280af22 2183 if (PL_in_my) {
77ca0c92 2184 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2185 if (strchr(PL_tokenbuf,':'))
2186 yyerror(Perl_form(aTHX_ "No package name allowed for "
2187 "variable %s in \"our\"",
2188 PL_tokenbuf));
77ca0c92
LW
2189 tmp = pad_allocmy(PL_tokenbuf);
2190 }
2191 else {
2192 if (strchr(PL_tokenbuf,':'))
2193 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2194
77ca0c92
LW
2195 yylval.opval = newOP(OP_PADANY, 0);
2196 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2197 return PRIVATEREF;
2198 }
bbce6d69
PP
2199 }
2200
4e553d73 2201 /*
02aa26ce
NT
2202 build the ops for accesses to a my() variable.
2203
2204 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2205 then used in a comparison. This catches most, but not
2206 all cases. For instance, it catches
2207 sort { my($a); $a <=> $b }
2208 but not
2209 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2210 (although why you'd do that is anyone's guess).
2211 */
2212
3280af22 2213 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2214#ifdef USE_THREADS
54b9620d 2215 /* Check for single character per-thread SVs */
3280af22
NIS
2216 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2217 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2218 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2219 {
2faa37cc 2220 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2221 yylval.opval->op_targ = tmp;
2222 return PRIVATEREF;
2223 }
2224#endif /* USE_THREADS */
3280af22 2225 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2226 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2227 /* might be an "our" variable" */
f472eb5c 2228 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2229 /* build ops for a bareword */
f472eb5c
GS
2230 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2231 sv_catpvn(sym, "::", 2);
2232 sv_catpv(sym, PL_tokenbuf+1);
2233 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2234 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2235 gv_fetchpv(SvPVX(sym),
77ca0c92 2236 (PL_in_eval
f472eb5c
GS
2237 ? (GV_ADDMULTI | GV_ADDINEVAL)
2238 : TRUE
77ca0c92
LW
2239 ),
2240 ((PL_tokenbuf[0] == '$') ? SVt_PV
2241 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2242 : SVt_PVHV));
2243 return WORD;
2244 }
2245
02aa26ce 2246 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2247 if (PL_last_lop_op == OP_SORT &&
2248 PL_tokenbuf[0] == '$' &&
2249 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2250 && !PL_tokenbuf[2])
bbce6d69 2251 {
3280af22
NIS
2252 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2253 d < PL_bufend && *d != '\n';
a863c7d1
MB
2254 d++)
2255 {
2256 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2257 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2258 PL_tokenbuf);
a863c7d1 2259 }
bbce6d69
PP
2260 }
2261 }
bbce6d69 2262
a863c7d1
MB
2263 yylval.opval = newOP(OP_PADANY, 0);
2264 yylval.opval->op_targ = tmp;
2265 return PRIVATEREF;
2266 }
bbce6d69
PP
2267 }
2268
02aa26ce
NT
2269 /*
2270 Whine if they've said @foo in a doublequoted string,
2271 and @foo isn't a variable we can find in the symbol
2272 table.
2273 */
3280af22
NIS
2274 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2275 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2276 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2277 && ckWARN(WARN_AMBIGUOUS))
2278 {
2279 /* Downgraded from fatal to warning 20000522 mjd */
2280 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2281 "Possible unintended interpolation of %s in string",
2282 PL_tokenbuf);
2283 }
bbce6d69
PP
2284 }
2285
02aa26ce 2286 /* build ops for a bareword */
3280af22 2287 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2288 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2289 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2290 ((PL_tokenbuf[0] == '$') ? SVt_PV
2291 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2292 : SVt_PVHV));
2293 return WORD;
2294 }
2295
02aa26ce
NT
2296 /* no identifier pending identification */
2297
3280af22 2298 switch (PL_lex_state) {
79072805
LW
2299#ifdef COMMENTARY
2300 case LEX_NORMAL: /* Some compilers will produce faster */
2301 case LEX_INTERPNORMAL: /* code if we comment these out. */
2302 break;
2303#endif
2304
09bef843 2305 /* when we've already built the next token, just pull it out of the queue */
79072805 2306 case LEX_KNOWNEXT:
3280af22
NIS
2307 PL_nexttoke--;
2308 yylval = PL_nextval[PL_nexttoke];
2309 if (!PL_nexttoke) {
2310 PL_lex_state = PL_lex_defer;
2311 PL_expect = PL_lex_expect;
2312 PL_lex_defer = LEX_NORMAL;
463ee0b2 2313 }
607df283 2314 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2315 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2316 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2317
3280af22 2318 return(PL_nexttype[PL_nexttoke]);
79072805 2319
02aa26ce 2320 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2321 when we get here, PL_bufptr is at the \
02aa26ce 2322 */
79072805
LW
2323 case LEX_INTERPCASEMOD:
2324#ifdef DEBUGGING
3280af22 2325 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2326 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2327#endif
02aa26ce 2328 /* handle \E or end of string */
3280af22 2329 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2330 char oldmod;
02aa26ce
NT
2331
2332 /* if at a \E */
3280af22
NIS
2333 if (PL_lex_casemods) {
2334 oldmod = PL_lex_casestack[--PL_lex_casemods];
2335 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2336
3280af22
NIS
2337 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2338 PL_bufptr += 2;
2339 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2340 }
79072805
LW
2341 return ')';
2342 }
3280af22
NIS
2343 if (PL_bufptr != PL_bufend)
2344 PL_bufptr += 2;
2345 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2346 return yylex();
79072805
LW
2347 }
2348 else {
607df283 2349 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2350 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2351 s = PL_bufptr + 1;
79072805
LW
2352 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2353 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2354 if (strchr("LU", *s) &&
3280af22 2355 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2356 {
3280af22 2357 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2358 return ')';
2359 }
3280af22
NIS
2360 if (PL_lex_casemods > 10) {
2361 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2362 if (newlb != PL_lex_casestack) {
a0d0e21e 2363 SAVEFREEPV(newlb);
3280af22 2364 PL_lex_casestack = newlb;
a0d0e21e
LW
2365 }
2366 }
3280af22
NIS
2367 PL_lex_casestack[PL_lex_casemods++] = *s;
2368 PL_lex_casestack[PL_lex_casemods] = '\0';
2369 PL_lex_state = LEX_INTERPCONCAT;
2370 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2371 force_next('(');
2372 if (*s == 'l')
3280af22 2373 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2374 else if (*s == 'u')
3280af22 2375 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2376 else if (*s == 'L')
3280af22 2377 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2378 else if (*s == 'U')
3280af22 2379 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2380 else if (*s == 'Q')
3280af22 2381 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2382 else
cea2e8a9 2383 Perl_croak(aTHX_ "panic: yylex");
3280af22 2384 PL_bufptr = s + 1;
79072805 2385 force_next(FUNC);
3280af22
NIS
2386 if (PL_lex_starts) {
2387 s = PL_bufptr;
2388 PL_lex_starts = 0;
79072805
LW
2389 Aop(OP_CONCAT);
2390 }
2391 else
cea2e8a9 2392 return yylex();
79072805
LW
2393 }
2394
55497cff
PP
2395 case LEX_INTERPPUSH:
2396 return sublex_push();
2397
79072805 2398 case LEX_INTERPSTART:
3280af22 2399 if (PL_bufptr == PL_bufend)
79072805 2400 return sublex_done();
607df283 2401 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2402 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2403 PL_expect = XTERM;
2404 PL_lex_dojoin = (*PL_bufptr == '@');
2405 PL_lex_state = LEX_INTERPNORMAL;
2406 if (PL_lex_dojoin) {
2407 PL_nextval[PL_nexttoke].ival = 0;
79072805 2408 force_next(',');
554b3eca 2409#ifdef USE_THREADS
533c011a
NIS
2410 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2411 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2412 force_next(PRIVATEREF);
2413#else
a0d0e21e 2414 force_ident("\"", '$');
554b3eca 2415#endif /* USE_THREADS */
3280af22 2416 PL_nextval[PL_nexttoke].ival = 0;
79072805 2417 force_next('$');
3280af22 2418 PL_nextval[PL_nexttoke].ival = 0;
79072805 2419 force_next('(');
3280af22 2420 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2421 force_next(FUNC);
2422 }
3280af22
NIS
2423 if (PL_lex_starts++) {
2424 s = PL_bufptr;
79072805
LW
2425 Aop(OP_CONCAT);
2426 }
cea2e8a9 2427 return yylex();
79072805
LW
2428
2429 case LEX_INTERPENDMAYBE:
3280af22
NIS
2430 if (intuit_more(PL_bufptr)) {
2431 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2432 break;
2433 }
2434 /* FALL THROUGH */
2435
2436 case LEX_INTERPEND:
3280af22
NIS
2437 if (PL_lex_dojoin) {
2438 PL_lex_dojoin = FALSE;
2439 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2440 return ')';
2441 }
43a16006 2442 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2443 && SvEVALED(PL_lex_repl))
43a16006 2444 {
e9fa98b2 2445 if (PL_bufptr != PL_bufend)
cea2e8a9 2446 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2447 PL_lex_repl = Nullsv;
2448 }
79072805
LW
2449 /* FALLTHROUGH */
2450 case LEX_INTERPCONCAT:
2451#ifdef DEBUGGING
3280af22 2452 if (PL_lex_brackets)
cea2e8a9 2453 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2454#endif
3280af22 2455 if (PL_bufptr == PL_bufend)
79072805
LW
2456 return sublex_done();
2457
3280af22
NIS
2458 if (SvIVX(PL_linestr) == '\'') {
2459 SV *sv = newSVsv(PL_linestr);
2460 if (!PL_lex_inpat)
76e3520e 2461 sv = tokeq(sv);
3280af22 2462 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2463 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2464 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2465 s = PL_bufend;
79072805
LW
2466 }
2467 else {
3280af22 2468 s = scan_const(PL_bufptr);
79072805 2469 if (*s == '\\')
3280af22 2470 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2471 else
3280af22 2472 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2473 }
2474
3280af22
NIS
2475 if (s != PL_bufptr) {
2476 PL_nextval[PL_nexttoke] = yylval;
2477 PL_expect = XTERM;
79072805 2478 force_next(THING);
3280af22 2479 if (PL_lex_starts++)
79072805
LW
2480 Aop(OP_CONCAT);
2481 else {
3280af22 2482 PL_bufptr = s;
cea2e8a9 2483 return yylex();
79072805
LW
2484 }
2485 }
2486
cea2e8a9 2487 return yylex();
a0d0e21e 2488 case LEX_FORMLINE:
3280af22
NIS
2489 PL_lex_state = LEX_NORMAL;
2490 s = scan_formline(PL_bufptr);
2491 if (!PL_lex_formbrack)
a0d0e21e
LW
2492 goto rightbracket;
2493 OPERATOR(';');
79072805
LW
2494 }
2495
3280af22
NIS
2496 s = PL_bufptr;
2497 PL_oldoldbufptr = PL_oldbufptr;
2498 PL_oldbufptr = s;
607df283 2499 DEBUG_T( {
bf49b057
GS
2500 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2501 exp_name[PL_expect], s);
5f80b19c 2502 } );
463ee0b2
LW
2503
2504 retry:
378cc40b
LW
2505 switch (*s) {
2506 default:
7e2040f0 2507 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2508 goto keylookup;
cea2e8a9 2509 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2510 case 4:
2511 case 26:
2512 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2513 case 0:
3280af22
NIS
2514 if (!PL_rsfp) {
2515 PL_last_uni = 0;
2516 PL_last_lop = 0;
2517 if (PL_lex_brackets)
d98d5fff 2518 yyerror("Missing right curly or square bracket");
4e553d73 2519 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2520 "### Tokener got EOF\n");
5f80b19c 2521 } );
79072805 2522 TOKEN(0);
463ee0b2 2523 }
3280af22 2524 if (s++ < PL_bufend)
a687059c 2525 goto retry; /* ignore stray nulls */
3280af22
NIS
2526 PL_last_uni = 0;
2527 PL_last_lop = 0;
2528 if (!PL_in_eval && !PL_preambled) {
2529 PL_preambled = TRUE;
2530 sv_setpv(PL_linestr,incl_perldb());
2531 if (SvCUR(PL_linestr))
2532 sv_catpv(PL_linestr,";");
2533 if (PL_preambleav){
2534 while(AvFILLp(PL_preambleav) >= 0) {
2535 SV *tmpsv = av_shift(PL_preambleav);
2536 sv_catsv(PL_linestr, tmpsv);
2537 sv_catpv(PL_linestr, ";");
91b7def8
PP
2538 sv_free(tmpsv);
2539 }
3280af22
NIS
2540 sv_free((SV*)PL_preambleav);
2541 PL_preambleav = NULL;
91b7def8 2542 }
3280af22
NIS
2543 if (PL_minus_n || PL_minus_p) {
2544 sv_catpv(PL_linestr, "LINE: while (<>) {");
2545 if (PL_minus_l)
2546 sv_catpv(PL_linestr,"chomp;");
2547 if (PL_minus_a) {
3280af22
NIS
2548 if (PL_minus_F) {
2549 if (strchr("/'\"", *PL_splitstr)
2550 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2551 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2552 else {
2553 char delim;
2554 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2555 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2556 delim = *s;
75c72d73 2557 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2558 "q" + (delim == '\''), delim);
3280af22 2559 for (s = PL_splitstr; *s; s++) {
54310121 2560 if (*s == '\\')
3280af22
NIS
2561 sv_catpvn(PL_linestr, "\\", 1);
2562 sv_catpvn(PL_linestr, s, 1);
54310121 2563 }
cea2e8a9 2564 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2565 }
2304df62
AD
2566 }
2567 else
75c72d73 2568 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2569 }
79072805 2570 }
3280af22
NIS
2571 sv_catpv(PL_linestr, "\n");
2572 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2573 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2574 PL_last_lop = PL_last_uni = Nullch;
3280af22 2575 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2576 SV *sv = NEWSV(85,0);
2577
2578 sv_upgrade(sv, SVt_PVMG);
3280af22 2579 sv_setsv(sv,PL_linestr);
57843af0 2580 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2581 }
79072805 2582 goto retry;
a687059c 2583 }
e929a76b 2584 do {
aa7440fb 2585 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2586 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2587 fake_eof:
2588 if (PL_rsfp) {
2589 if (PL_preprocess && !PL_in_eval)
2590 (void)PerlProc_pclose(PL_rsfp);
2591 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2592 PerlIO_clearerr(PL_rsfp);
2593 else
2594 (void)PerlIO_close(PL_rsfp);
2595 PL_rsfp = Nullfp;
2596 PL_doextract = FALSE;
2597 }
2598 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2599 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2600 sv_catpv(PL_linestr,";}");
2601 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2602 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2603 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2604 PL_minus_n = PL_minus_p = 0;
2605 goto retry;
2606 }
2607 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2608 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2609 sv_setpv(PL_linestr,"");
2610 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2611 }
2612 /* if it looks like the start of a BOM, check if it in fact is */
2613 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2614#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2615# ifdef __GNU_LIBRARY__
2616# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2617# define FTELL_FOR_PIPE_IS_BROKEN
2618# endif
e3f494f1
JH
2619# else
2620# ifdef __GLIBC__
2621# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2622# define FTELL_FOR_PIPE_IS_BROKEN
2623# endif
2624# endif
226017aa
DD
2625# endif
2626#endif
2627#ifdef FTELL_FOR_PIPE_IS_BROKEN
2628 /* This loses the possibility to detect the bof
2629 * situation on perl -P when the libc5 is being used.
2630 * Workaround? Maybe attach some extra state to PL_rsfp?
2631 */
2632 if (!PL_preprocess)
7e28d3af 2633 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2634#else
7e28d3af 2635 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2636#endif
7e28d3af 2637 if (bof) {
3280af22 2638 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2639 s = swallow_bom((U8*)s);
e929a76b 2640 }
378cc40b 2641 }
3280af22 2642 if (PL_doextract) {
a0d0e21e 2643 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2644 PL_doextract = FALSE;
a0d0e21e
LW
2645
2646 /* Incest with pod. */
2647 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2648 sv_setpv(PL_linestr, "");
2649 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2650 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2651 PL_last_lop = PL_last_uni = Nullch;
3280af22 2652 PL_doextract = FALSE;
a0d0e21e 2653 }
4e553d73 2654 }
463ee0b2 2655 incline(s);
3280af22
NIS
2656 } while (PL_doextract);
2657 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2658 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2659 SV *sv = NEWSV(85,0);
a687059c 2660
93a17b20 2661 sv_upgrade(sv, SVt_PVMG);
3280af22 2662 sv_setsv(sv,PL_linestr);
57843af0 2663 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2664 }
3280af22 2665 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2666 PL_last_lop = PL_last_uni = Nullch;
57843af0 2667 if (CopLINE(PL_curcop) == 1) {
3280af22 2668 while (s < PL_bufend && isSPACE(*s))
79072805 2669 s++;
a0d0e21e 2670 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2671 s++;
44a8e56a 2672 d = Nullch;
3280af22 2673 if (!PL_in_eval) {
44a8e56a
PP
2674 if (*s == '#' && *(s+1) == '!')
2675 d = s + 2;
2676#ifdef ALTERNATE_SHEBANG
2677 else {
2678 static char as[] = ALTERNATE_SHEBANG;
2679 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2680 d = s + (sizeof(as) - 1);
2681 }
2682#endif /* ALTERNATE_SHEBANG */
2683 }
2684 if (d) {
b8378b72 2685 char *ipath;
774d564b 2686 char *ipathend;
b8378b72 2687
774d564b 2688 while (isSPACE(*d))
b8378b72
CS
2689 d++;
2690 ipath = d;
774d564b
PP
2691 while (*d && !isSPACE(*d))
2692 d++;
2693 ipathend = d;
2694
2695#ifdef ARG_ZERO_IS_SCRIPT
2696 if (ipathend > ipath) {
2697 /*
2698 * HP-UX (at least) sets argv[0] to the script name,
2699 * which makes $^X incorrect. And Digital UNIX and Linux,
2700 * at least, set argv[0] to the basename of the Perl
2701 * interpreter. So, having found "#!", we'll set it right.
2702 */
2703 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2704 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2705 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2706 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2707 SvSETMAGIC(x);
2708 }
774d564b 2709 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2710 }
774d564b 2711#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2712
2713 /*
2714 * Look for options.
2715 */
748a9306 2716 d = instr(s,"perl -");
84e30d1a 2717 if (!d) {
748a9306 2718 d = instr(s,"perl");
84e30d1a
GS
2719#if defined(DOSISH)
2720 /* avoid getting into infinite loops when shebang
2721 * line contains "Perl" rather than "perl" */
2722 if (!d) {
2723 for (d = ipathend-4; d >= ipath; --d) {
2724 if ((*d == 'p' || *d == 'P')
2725 && !ibcmp(d, "perl", 4))
2726 {
2727 break;
2728 }
2729 }
2730 if (d < ipath)
2731 d = Nullch;
2732 }
2733#endif
2734 }
44a8e56a
PP
2735#ifdef ALTERNATE_SHEBANG
2736 /*
2737 * If the ALTERNATE_SHEBANG on this system starts with a
2738 * character that can be part of a Perl expression, then if
2739 * we see it but not "perl", we're probably looking at the
2740 * start of Perl code, not a request to hand off to some
2741 * other interpreter. Similarly, if "perl" is there, but
2742 * not in the first 'word' of the line, we assume the line
2743 * contains the start of the Perl program.
44a8e56a
PP
2744 */
2745 if (d && *s != '#') {
774d564b 2746 char *c = ipath;
44a8e56a
PP
2747 while (*c && !strchr("; \t\r\n\f\v#", *c))
2748 c++;
2749 if (c < d)
2750 d = Nullch; /* "perl" not in first word; ignore */
2751 else
2752 *s = '#'; /* Don't try to parse shebang line */
2753 }
774d564b 2754#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2755#ifndef MACOS_TRADITIONAL
748a9306 2756 if (!d &&
44a8e56a 2757 *s == '#' &&
774d564b 2758 ipathend > ipath &&
3280af22 2759 !PL_minus_c &&
748a9306 2760 !instr(s,"indir") &&
3280af22 2761 instr(PL_origargv[0],"perl"))
748a9306 2762 {
9f68db38 2763 char **newargv;
9f68db38 2764
774d564b
PP
2765 *ipathend = '\0';
2766 s = ipathend + 1;
3280af22 2767 while (s < PL_bufend && isSPACE(*s))
9f68db38 2768 s++;
3280af22
NIS
2769 if (s < PL_bufend) {
2770 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2771 newargv[1] = s;
3280af22 2772 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2773 s++;
2774 *s = '\0';
3280af22 2775 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2776 }
2777 else
3280af22 2778 newargv = PL_origargv;
774d564b 2779 newargv[0] = ipath;
b4748376 2780 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2781 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2782 }
bf4acbe4 2783#endif
748a9306 2784 if (d) {
3280af22
NIS
2785 U32 oldpdb = PL_perldb;
2786 bool oldn = PL_minus_n;
2787 bool oldp = PL_minus_p;
748a9306
LW
2788
2789 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2790 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2791
2792 if (*d++ == '-') {
8cc95fdb
PP
2793 do {
2794 if (*d == 'M' || *d == 'm') {
2795 char *m = d;
2796 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2797 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2798 (int)(d - m), m);
2799 }
2800 d = moreswitches(d);
2801 } while (d);
155aba94
GS
2802 if ((PERLDB_LINE && !oldpdb) ||
2803 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2804 /* if we have already added "LINE: while (<>) {",
2805 we must not do it again */
748a9306 2806 {
3280af22
NIS
2807 sv_setpv(PL_linestr, "");
2808 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2809 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2810 PL_last_lop = PL_last_uni = Nullch;
3280af22 2811 PL_preambled = FALSE;
84902520 2812 if (PERLDB_LINE)
3280af22 2813 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2814 goto retry;
2815 }
a0d0e21e 2816 }
79072805 2817 }
9f68db38 2818 }
79072805 2819 }
3280af22
NIS
2820 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2821 PL_bufptr = s;
2822 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2823 return yylex();
ae986130 2824 }
378cc40b 2825 goto retry;
4fdae800 2826 case '\r':
6a27c188 2827#ifdef PERL_STRICT_CR
cea2e8a9 2828 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2829 Perl_croak(aTHX_
cc507455 2830 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2831#endif
4fdae800 2832 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2833#ifdef MACOS_TRADITIONAL
2834 case '\312':
2835#endif
378cc40b
LW
2836 s++;
2837 goto retry;
378cc40b 2838 case '#':
e929a76b 2839 case '\n':
3280af22 2840 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2841 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2842 /* handle eval qq[#line 1 "foo"\n ...] */
2843 CopLINE_dec(PL_curcop);
2844 incline(s);
2845 }
3280af22 2846 d = PL_bufend;
a687059c 2847 while (s < d && *s != '\n')
378cc40b 2848 s++;
0f85fab0 2849 if (s < d)
378cc40b 2850 s++;
78c267c1 2851 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2852 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2853 incline(s);
3280af22
NIS
2854 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2855 PL_bufptr = s;
2856 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2857 return yylex();
a687059c 2858 }
378cc40b 2859 }
a687059c 2860 else {
378cc40b 2861 *s = '\0';
3280af22 2862 PL_bufend = s;
a687059c 2863 }
378cc40b
LW
2864 goto retry;
2865 case '-':
79072805 2866 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2867 I32 ftst = 0;
2868
378cc40b 2869 s++;
3280af22 2870 PL_bufptr = s;
748a9306
LW
2871 tmp = *s++;
2872
bf4acbe4 2873 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2874 s++;
2875
2876 if (strnEQ(s,"=>",2)) {
3280af22 2877 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2878 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2879 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2880 } );
748a9306
LW
2881 OPERATOR('-'); /* unary minus */
2882 }
3280af22 2883 PL_last_uni = PL_oldbufptr;
748a9306 2884 switch (tmp) {
e5edeb50
JH
2885 case 'r': ftst = OP_FTEREAD; break;
2886 case 'w': ftst = OP_FTEWRITE; break;
2887 case 'x': ftst = OP_FTEEXEC; break;
2888 case 'o': ftst = OP_FTEOWNED; break;
2889 case 'R': ftst = OP_FTRREAD; break;
2890 case 'W': ftst = OP_FTRWRITE; break;
2891 case 'X': ftst = OP_FTREXEC; break;
2892 case 'O': ftst = OP_FTROWNED; break;
2893 case 'e': ftst = OP_FTIS; break;
2894 case 'z': ftst = OP_FTZERO; break;
2895 case 's': ftst = OP_FTSIZE; break;
2896 case 'f': ftst = OP_FTFILE; break;
2897 case 'd': ftst = OP_FTDIR; break;
2898 case 'l': ftst = OP_FTLINK; break;
2899 case 'p': ftst = OP_FTPIPE; break;
2900 case 'S': ftst = OP_FTSOCK; break;
2901 case 'u': ftst = OP_FTSUID; break;
2902 case 'g': ftst = OP_FTSGID; break;
2903 case 'k': ftst = OP_FTSVTX; break;
2904 case 'b': ftst = OP_FTBLK; break;
2905 case 'c': ftst = OP_FTCHR; break;
2906 case 't': ftst = OP_FTTTY; break;
2907 case 'T': ftst = OP_FTTEXT; break;
2908 case 'B': ftst = OP_FTBINARY; break;
2909 case 'M': case 'A': case 'C':
2910 gv_fetchpv("\024",TRUE, SVt_PV);
2911 switch (tmp) {
2912 case 'M': ftst = OP_FTMTIME; break;
2913 case 'A': ftst = OP_FTATIME; break;
2914 case 'C': ftst = OP_FTCTIME; break;
2915 default: break;
2916 }
2917 break;
378cc40b 2918 default:
378cc40b
LW
2919 break;
2920 }
e5edeb50
JH
2921 if (ftst) {
2922 PL_last_lop_op = ftst;
4e553d73 2923 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2924 "### Saw file test %c\n", (int)ftst);
5f80b19c 2925 } );
e5edeb50
JH
2926 FTST(ftst);
2927 }
2928 else {
2929 /* Assume it was a minus followed by a one-letter named
2930 * subroutine call (or a -bareword), then. */
95c31fe3 2931 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2932 "### %c looked like a file test but was not\n",
2933 (int)ftst);
5f80b19c 2934 } );
e5edeb50
JH
2935 s -= 2;
2936 }
378cc40b 2937 }
a687059c
LW
2938 tmp = *s++;
2939 if (*s == tmp) {
2940 s++;
3280af22 2941 if (PL_expect == XOPERATOR)
79072805
LW
2942 TERM(POSTDEC);
2943 else
2944 OPERATOR(PREDEC);
2945 }
2946 else if (*s == '>') {
2947 s++;
2948 s = skipspace(s);
7e2040f0 2949 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2950 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2951 TOKEN(ARROW);
79072805 2952 }
748a9306
LW
2953 else if (*s == '$')
2954 OPERATOR(ARROW);
463ee0b2 2955 else
748a9306 2956 TERM(ARROW);
a687059c 2957 }
3280af22 2958 if (PL_expect == XOPERATOR)
79072805
LW
2959 Aop(OP_SUBTRACT);
2960 else {
3280af22 2961 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2962 check_uni();
79072805 2963 OPERATOR('-'); /* unary minus */
2f3197b3 2964 }
79072805 2965
378cc40b 2966 case '+':
a687059c
LW
2967 tmp = *s++;
2968 if (*s == tmp) {
378cc40b 2969 s++;
3280af22 2970 if (PL_expect == XOPERATOR)
79072805
LW
2971 TERM(POSTINC);
2972 else
2973 OPERATOR(PREINC);
378cc40b 2974 }
3280af22 2975 if (PL_expect == XOPERATOR)
79072805
LW
2976 Aop(OP_ADD);
2977 else {
3280af22 2978 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2979 check_uni();
a687059c 2980 OPERATOR('+');
2f3197b3 2981 }
a687059c 2982
378cc40b 2983 case '*':
3280af22
NIS
2984 if (PL_expect != XOPERATOR) {
2985 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2986 PL_expect = XOPERATOR;
2987 force_ident(PL_tokenbuf, '*');
2988 if (!*PL_tokenbuf)
a0d0e21e 2989 PREREF('*');
79072805 2990 TERM('*');
a687059c 2991 }
79072805
LW
2992 s++;
2993 if (*s == '*') {
a687059c 2994 s++;
79072805 2995 PWop(OP_POW);
a687059c 2996 }
79072805
LW
2997 Mop(OP_MULTIPLY);
2998
378cc40b 2999 case '%':
3280af22 3000 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3001 ++s;
3002 Mop(OP_MODULO);
a687059c 3003 }
3280af22
NIS
3004 PL_tokenbuf[0] = '%';
3005 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3006 if (!PL_tokenbuf[1]) {
3007 if (s == PL_bufend)
bbce6d69
PP
3008 yyerror("Final % should be \\% or %name");
3009 PREREF('%');
a687059c 3010 }
3280af22 3011 PL_pending_ident = '%';
bbce6d69 3012 TERM('%');
a687059c 3013
378cc40b 3014 case '^':
79072805 3015 s++;
a0d0e21e 3016 BOop(OP_BIT_XOR);
79072805 3017 case '[':
3280af22 3018 PL_lex_brackets++;
79072805 3019 /* FALL THROUGH */
378cc40b 3020 case '~':
378cc40b 3021 case ',':
378cc40b
LW
3022 tmp = *s++;
3023 OPERATOR(tmp);
a0d0e21e
LW
3024 case ':':
3025 if (s[1] == ':') {
3026 len = 0;
3027 goto just_a_word;
3028 }
3029 s++;
09bef843
SB
3030 switch (PL_expect) {
3031 OP *attrs;
3032 case XOPERATOR:
3033 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3034 break;
3035 PL_bufptr = s; /* update in case we back off */
3036 goto grabattrs;
3037 case XATTRBLOCK:
3038 PL_expect = XBLOCK;
3039 goto grabattrs;
3040 case XATTRTERM:
3041 PL_expect = XTERMBLOCK;
3042 grabattrs:
3043 s = skipspace(s);
3044 attrs = Nullop;
7e2040f0 3045 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3046 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3047 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3048 if (tmp < 0) tmp = -tmp;
3049 switch (tmp) {
3050 case KEY_or:
3051 case KEY_and:
3052 case KEY_for:
3053 case KEY_unless:
3054 case KEY_if:
3055 case KEY_while:
3056 case KEY_until:
3057 goto got_attrs;
3058 default:
3059 break;
3060 }
3061 }
09bef843
SB
3062 if (*d == '(') {
3063 d = scan_str(d,TRUE,TRUE);
3064 if (!d) {
09bef843
SB
3065 /* MUST advance bufptr here to avoid bogus
3066 "at end of line" context messages from yyerror().
3067 */
3068 PL_bufptr = s + len;
3069 yyerror("Unterminated attribute parameter in attribute list");
3070 if (attrs)
3071 op_free(attrs);
3072 return 0; /* EOF indicator */
3073 }
3074 }
3075 if (PL_lex_stuff) {
3076 SV *sv = newSVpvn(s, len);
3077 sv_catsv(sv, PL_lex_stuff);
3078 attrs = append_elem(OP_LIST, attrs,
3079 newSVOP(OP_CONST, 0, sv));
3080 SvREFCNT_dec(PL_lex_stuff);
3081 PL_lex_stuff = Nullsv;
3082 }
3083 else {
78f9721b
SM
3084 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3085 CvLVALUE_on(PL_compcv);
3086 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3087 CvLOCKED_on(PL_compcv);
3088 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3089 CvMETHOD_on(PL_compcv);
87ecf892 3090#ifdef USE_ITHREADS
c8a3bf85 3091 else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "unique", len))
7fb37951 3092 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
87ecf892 3093#endif
78f9721b
SM
3094 /* After we've set the flags, it could be argued that
3095 we don't need to do the attributes.pm-based setting
3096 process, and shouldn't bother appending recognized
3097 flags. To experiment with that, uncomment the
3098 following "else": */
0256094b 3099 else
78f9721b
SM
3100 attrs = append_elem(OP_LIST, attrs,
3101 newSVOP(OP_CONST, 0,
3102 newSVpvn(s, len)));
09bef843
SB
3103 }
3104 s = skipspace(d);
0120eecf 3105 if (*s == ':' && s[1] != ':')
09bef843 3106 s = skipspace(s+1);
0120eecf
GS
3107 else if (s == d)
3108 break; /* require real whitespace or :'s */
09bef843 3109 }
f9829d6b
GS
3110 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3111 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3112 char q = ((*s == '\'') ? '"' : '\'');
3113 /* If here for an expression, and parsed no attrs, back off. */
3114 if (tmp == '=' && !attrs) {
3115 s = PL_bufptr;
3116 break;
3117 }
3118 /* MUST advance bufptr here to avoid bogus "at end of line"
3119 context messages from yyerror().
3120 */
3121 PL_bufptr = s;
3122 if (!*s)
3123 yyerror("Unterminated attribute list");
3124 else
3125 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3126 q, *s, q));
3127 if (attrs)
3128 op_free(attrs);
3129 OPERATOR(':');
3130 }
f9829d6b 3131 got_attrs:
09bef843
SB
3132 if (attrs) {
3133 PL_nextval[PL_nexttoke].opval = attrs;
3134 force_next(THING);
3135 }
3136 TOKEN(COLONATTR);
3137 }
a0d0e21e 3138 OPERATOR(':');
8990e307
LW
3139 case '(':
3140 s++;
3280af22
NIS
3141 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3142 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3143 else
3280af22 3144 PL_expect = XTERM;
a0d0e21e 3145 TOKEN('(');
378cc40b 3146 case ';':
f4dd75d9 3147 CLINE;
378cc40b
LW
3148 tmp = *s++;
3149 OPERATOR(tmp);
3150 case ')':
378cc40b 3151 tmp = *s++;
16d20bd9
AD
3152 s = skipspace(s);
3153 if (*s == '{')
3154 PREBLOCK(tmp);
378cc40b 3155 TERM(tmp);
79072805
LW
3156 case ']':
3157 s++;
3280af22 3158 if (PL_lex_brackets <= 0)
d98d5fff 3159 yyerror("Unmatched right square bracket");
463ee0b2 3160 else
3280af22
NIS
3161 --PL_lex_brackets;
3162 if (PL_lex_state == LEX_INTERPNORMAL) {
3163 if (PL_lex_brackets == 0) {
a0d0e21e 3164 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3165 PL_lex_state = LEX_INTERPEND;
79072805
LW
3166 }
3167 }
4633a7c4 3168 TERM(']');
79072805
LW
3169 case '{':
3170 leftbracket:
79072805 3171 s++;
3280af22
NIS
3172 if (PL_lex_brackets > 100) {
3173 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3174 if (newlb != PL_lex_brackstack) {
8990e307 3175 SAVEFREEPV(newlb);
3280af22 3176 PL_lex_brackstack = newlb;
8990e307
LW
3177 }
3178 }
3280af22 3179 switch (PL_expect) {
a0d0e21e 3180 case XTERM:
3280af22 3181 if (PL_lex_formbrack) {
a0d0e21e
LW
3182 s--;
3183 PRETERMBLOCK(DO);
3184 }
3280af22
NIS
3185 if (PL_oldoldbufptr == PL_last_lop)
3186 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3187 else
3280af22 3188 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3189 OPERATOR(HASHBRACK);
a0d0e21e 3190 case XOPERATOR:
bf4acbe4 3191 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3192 s++;
44a8e56a 3193 d = s;
3280af22
NIS
3194 PL_tokenbuf[0] = '\0';
3195 if (d < PL_bufend && *d == '-') {
3196 PL_tokenbuf[0] = '-';
44a8e56a 3197 d++;
bf4acbe4 3198 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3199 d++;
3200 }
7e2040f0 3201 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3202 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3203 FALSE, &len);
bf4acbe4 3204 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3205 d++;
3206 if (*d == '}') {
3280af22 3207 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3208 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3209 if (minus)
3210 force_next('-');
748a9306
LW
3211 }
3212 }
3213 /* FALL THROUGH */
09bef843 3214 case XATTRBLOCK:
748a9306 3215 case XBLOCK:
3280af22
NIS
3216 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3217 PL_expect = XSTATE;
a0d0e21e 3218 break;
09bef843 3219 case XATTRTERM:
a0d0e21e 3220 case XTERMBLOCK:
3280af22
NIS
3221 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3222 PL_expect = XSTATE;
a0d0e21e
LW
3223 break;
3224 default: {
3225 char *t;
3280af22
NIS
3226 if (PL_oldoldbufptr == PL_last_lop)
3227 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3228 else
3280af22 3229 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3230 s = skipspace(s);
8452ff4b
SB
3231 if (*s == '}') {
3232 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3233 PL_expect = XTERM;
3234 /* This hack is to get the ${} in the message. */
3235 PL_bufptr = s+1;
3236 yyerror("syntax error");
3237 break;
3238 }
a0d0e21e 3239 OPERATOR(HASHBRACK);
8452ff4b 3240 }
b8a4b1be
GS
3241 /* This hack serves to disambiguate a pair of curlies
3242 * as being a block or an anon hash. Normally, expectation
3243 * determines that, but in cases where we're not in a
3244 * position to expect anything in particular (like inside
3245 * eval"") we have to resolve the ambiguity. This code
3246 * covers the case where the first term in the curlies is a
3247 * quoted string. Most other cases need to be explicitly
3248 * disambiguated by prepending a `+' before the opening
3249 * curly in order to force resolution as an anon hash.
3250 *
3251 * XXX should probably propagate the outer expectation
3252 * into eval"" to rely less on this hack, but that could
3253 * potentially break current behavior of eval"".
3254 * GSAR 97-07-21
3255 */
3256 t = s;
3257 if (*s == '\'' || *s == '"' || *s == '`') {
3258 /* common case: get past first string, handling escapes */
3280af22 3259 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3260 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3261 t++;
3262 t++;
a0d0e21e 3263 }
b8a4b1be 3264 else if (*s == 'q') {
3280af22 3265 if (++t < PL_bufend
b8a4b1be 3266 && (!isALNUM(*t)
3280af22 3267 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3268 && !isALNUM(*t))))
3269 {
b8a4b1be
GS
3270 char *tmps;
3271 char open, close, term;
3272 I32 brackets = 1;
3273
3280af22 3274 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3275 t++;
3276 term = *t;
3277 open = term;
3278 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3279 term = tmps[5];
3280 close = term;
3281 if (open == close)
3280af22
NIS
3282 for (t++; t < PL_bufend; t++) {
3283 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3284 t++;
6d07e5e9 3285 else if (*t == open)
b8a4b1be
GS
3286 break;
3287 }
3288 else
3280af22
NIS
3289 for (t++; t < PL_bufend; t++) {
3290 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3291 t++;
6d07e5e9 3292 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3293 break;
3294 else if (*t == open)
3295 brackets++;
3296 }
3297 }
3298 t++;
a0d0e21e 3299 }
7e2040f0 3300 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3301 t += UTF8SKIP(t);
7e2040f0 3302 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3303 t += UTF8SKIP(t);
a0d0e21e 3304 }
3280af22 3305 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3306 t++;
b8a4b1be
GS
3307 /* if comma follows first term, call it an anon hash */
3308 /* XXX it could be a comma expression with loop modifiers */
3280af22 3309 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3310 || (*t == '=' && t[1] == '>')))
a0d0e21e 3311 OPERATOR(HASHBRACK);
3280af22 3312 if (PL_expect == XREF)
4e4e412b 3313 PL_expect = XTERM;
a0d0e21e 3314 else {
3280af22
NIS
3315 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3316 PL_expect = XSTATE;
a0d0e21e 3317 }
8990e307 3318 }
a0d0e21e 3319 break;
463ee0b2 3320 }
57843af0 3321 yylval.ival = CopLINE(PL_curcop);
79072805 3322 if (isSPACE(*s) || *s == '#')
3280af22 3323 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3324 TOKEN('{');
378cc40b 3325 case '}':
79072805
LW
3326 rightbracket:
3327 s++;
3280af22 3328 if (PL_lex_brackets <= 0)
d98d5fff 3329 yyerror("Unmatched right curly bracket");
463ee0b2 3330 else
3280af22 3331 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3332 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3333 PL_lex_formbrack = 0;
3334 if (PL_lex_state == LEX_INTERPNORMAL) {
3335 if (PL_lex_brackets == 0) {
9059aa12
LW
3336 if (PL_expect & XFAKEBRACK) {
3337 PL_expect &= XENUMMASK;
3280af22
NIS
3338 PL_lex_state = LEX_INTERPEND;
3339 PL_bufptr = s;
cea2e8a9 3340 return yylex(); /* ignore fake brackets */
79072805 3341 }
fa83b5b6 3342 if (*s == '-' && s[1] == '>')
3280af22 3343 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3344 else if (*s != '[' && *s != '{')
3280af22 3345 PL_lex_state = LEX_INTERPEND;
79072805
LW
3346 }
3347 }
9059aa12
LW
3348 if (PL_expect & XFAKEBRACK) {
3349 PL_expect &= XENUMMASK;
3280af22 3350 PL_bufptr = s;
cea2e8a9 3351 return yylex(); /* ignore fake brackets */
748a9306 3352 }
79072805
LW
3353 force_next('}');
3354 TOKEN(';');
378cc40b
LW
3355 case '&':
3356 s++;
3357 tmp = *s++;
3358 if (tmp == '&')
a0d0e21e 3359 AOPERATOR(ANDAND);
378cc40b 3360 s--;
3280af22 3361 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3362 if (ckWARN(WARN_SEMICOLON)
3363 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3364 {
57843af0 3365 CopLINE_dec(PL_curcop);
cea2e8a9 3366 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3367 CopLINE_inc(PL_curcop);
463ee0b2 3368 }
79072805 3369 BAop(OP_BIT_AND);
463ee0b2 3370 }
79072805 3371
3280af22
NIS
3372 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3373 if (*PL_tokenbuf) {
3374 PL_expect = XOPERATOR;
3375 force_ident(PL_tokenbuf, '&');
463ee0b2 3376 }
79072805
LW
3377 else
3378 PREREF('&');
c07a80fd 3379 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3380 TERM('&');
3381
378cc40b
LW
3382 case '|':
3383 s++;
3384 tmp = *s++;
3385 if (tmp == '|')
a0d0e21e 3386 AOPERATOR(OROR);
378cc40b 3387 s--;
79072805 3388 BOop(OP_BIT_OR);
378cc40b
LW
3389 case '=':
3390 s++;
3391 tmp = *s++;
3392 if (tmp == '=')
79072805
LW
3393 Eop(OP_EQ);
3394 if (tmp == '>')
3395 OPERATOR(',');
378cc40b 3396 if (tmp == '~')
79072805 3397 PMop(OP_MATCH);
599cee73 3398 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3399 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3400 s--;
3280af22
NIS
3401 if (PL_expect == XSTATE && isALPHA(tmp) &&
3402 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3403 {
3280af22
NIS
3404 if (PL_in_eval && !PL_rsfp) {
3405 d = PL_bufend;
a5f75d66
AD
3406 while (s < d) {
3407 if (*s++ == '\n') {
3408 incline(s);
3409 if (strnEQ(s,"=cut",4)) {
3410 s = strchr(s,'\n');
3411 if (s)
3412 s++;
3413 else
3414 s = d;
3415 incline(s);
3416 goto retry;
3417 }
3418 }
3419 }
3420 goto retry;
3421 }
3280af22
NIS
3422 s = PL_bufend;
3423 PL_doextract = TRUE;
a0d0e21e
LW
3424 goto retry;
3425 }
3280af22 3426 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3427 char *t;
51882d45 3428#ifdef PERL_STRICT_CR
bf4acbe4 3429 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3430#else
bf4acbe4 3431 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3432#endif
a0d0e21e
LW
3433 if (*t == '\n' || *t == '#') {
3434 s--;
3280af22 3435 PL_expect = XBLOCK;
a0d0e21e
LW
3436 goto leftbracket;
3437 }
79072805 3438 }
a0d0e21e
LW
3439 yylval.ival = 0;
3440 OPERATOR(ASSIGNOP);
378cc40b
LW
3441 case '!':
3442 s++;
3443 tmp = *s++;
3444 if (tmp == '=')
79072805 3445 Eop(OP_NE);
378cc40b 3446 if (tmp == '~')
79072805 3447 PMop(OP_NOT);
378cc40b
LW
3448 s--;
3449 OPERATOR('!');
3450 case '<':
3280af22 3451 if (PL_expect != XOPERATOR) {
93a17b20 3452 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3453 check_uni();
79072805
LW
3454 if (s[1] == '<')
3455 s = scan_heredoc(s);
3456 else
3457 s = scan_inputsymbol(s);
3458 TERM(sublex_start());
378cc40b
LW
3459 }
3460 s++;
3461 tmp = *s++;
3462 if (tmp == '<')
79072805 3463 SHop(OP_LEFT_SHIFT);
395c3793
LW
3464 if (tmp == '=') {
3465 tmp = *s++;
3466 if (tmp == '>')
79072805 3467 Eop(OP_NCMP);
395c3793 3468 s--;
79072805 3469 Rop(OP_LE);
395c3793 3470 }
378cc40b 3471 s--;
79072805 3472 Rop(OP_LT);
378cc40b
LW
3473 case '>':
3474 s++;
3475 tmp = *s++;
3476 if (tmp == '>')
79072805 3477 SHop(OP_RIGHT_SHIFT);
378cc40b 3478 if (tmp == '=')
79072805 3479 Rop(OP_GE);
378cc40b 3480 s--;
79072805 3481 Rop(OP_GT);
378cc40b
LW
3482
3483 case '$':
bbce6d69
PP
3484 CLINE;
3485
3280af22
NIS
3486 if (PL_expect == XOPERATOR) {
3487 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3488 PL_expect = XTERM;
a0d0e21e 3489 depcom();
bbce6d69 3490 return ','; /* grandfather non-comma-format format */
a0d0e21e 3491 }
8990e307 3492 }
a0d0e21e 3493
7e2040f0 3494 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3495 PL_tokenbuf[0] = '@';
376b8730
SM
3496 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3497 sizeof PL_tokenbuf - 1, FALSE);
3498 if (PL_expect == XOPERATOR)
3499 no_op("Array length", s);
3280af22 3500 if (!PL_tokenbuf[1])
a0d0e21e 3501 PREREF(DOLSHARP);
3280af22
NIS
3502 PL_expect = XOPERATOR;
3503 PL_pending_ident = '#';
463ee0b2 3504 TOKEN(DOLSHARP);
79072805 3505 }
bbce6d69 3506
3280af22 3507 PL_tokenbuf[0] = '$';
376b8730
SM
3508 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3509 sizeof PL_tokenbuf - 1, FALSE);
3510 if (PL_expect == XOPERATOR)
3511 no_op("Scalar", s);
3280af22
NIS
3512 if (!PL_tokenbuf[1]) {
3513 if (s == PL_bufend)
bbce6d69
PP
3514 yyerror("Final $ should be \\$ or $name");
3515 PREREF('$');
8990e307 3516 }
a0d0e21e 3517
bbce6d69 3518 /* This kludge not intended to be bulletproof. */
3280af22 3519 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3520 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3521 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3522 yylval.opval->op_private = OPpCONST_ARYBASE;
3523 TERM(THING);
3524 }
3525
ff68c719 3526 d = s;
69d2bceb 3527 tmp = (I32)*s;
3280af22 3528 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3529 s = skipspace(s);
3530
3280af22 3531 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {