This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix for sv_2iv() on "shared" value from keys %hash.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
2b92dfce
GS
42/* In variables name $^X, these are the legal values for X.
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
a790bc05
PP
72/* XXX If this causes problems, set i_unistd=undef in the hint file. */
73#ifdef I_UNISTD
74# include <unistd.h> /* Needed for execv() */
75#endif
76
77
79072805
LW
78#ifdef ff_next
79#undef ff_next
d48672a2
LW
80#endif
81
a1a0e61e 82#ifdef USE_PURE_BISON
20141f0e
RI
83#ifndef YYMAXLEVEL
84#define YYMAXLEVEL 100
85#endif
86YYSTYPE* yylval_pointer[YYMAXLEVEL];
87int* yychar_pointer[YYMAXLEVEL];
88int yyactlevel = 0;
22c35a8c
GS
89# undef yylval
90# undef yychar
20141f0e
RI
91# define yylval (*yylval_pointer[yyactlevel])
92# define yychar (*yychar_pointer[yyactlevel])
93# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
94# undef yylex
95# define yylex() Perl_yylex(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
96#endif
97
79072805 98#include "keywords.h"
fe14fcc3 99
ffb4593c
NT
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
ae986130
LW
102#ifdef CLINE
103#undef CLINE
104#endif
57843af0 105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 106
ffb4593c
NT
107/*
108 * Convenience functions to return different tokens and prime the
9cbb5ea2 109 * lexer for the next token. They all take an argument.
ffb4593c
NT
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
2d2e263d 121 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
9cbb5ea2 126 * PMop : pattern-matching operator
ffb4593c
NT
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
130 * Rop : relational operator <= != gt
131 *
132 * Also see LOP and lop() below.
133 */
134
3280af22
NIS
135#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
136#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
137#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
138#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
139#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
140#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
141#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
142#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
143#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
144#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
145#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
146#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
147#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
148#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
149#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
150#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
151#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
152#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
153#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
154#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 155
a687059c
LW
156/* This bit of chicanery makes a unary function followed by
157 * a parenthesis into a function with one argument, highest precedence.
158 */
2f3197b3 159#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
160 PL_expect = XTERM, \
161 PL_bufptr = s, \
162 PL_last_uni = PL_oldbufptr, \
163 PL_last_lop_op = f, \
a687059c
LW
164 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
165
79072805 166#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
167 PL_bufptr = s, \
168 PL_last_uni = PL_oldbufptr, \
79072805
LW
169 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
170
9f68db38 171/* grandfather return to old style */
3280af22 172#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 173
ffb4593c
NT
174/*
175 * S_ao
176 *
177 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
178 * into an OP_ANDASSIGN or OP_ORASSIGN
179 */
180
76e3520e 181STATIC int
cea2e8a9 182S_ao(pTHX_ int toketype)
a0d0e21e 183{
3280af22
NIS
184 if (*PL_bufptr == '=') {
185 PL_bufptr++;
a0d0e21e
LW
186 if (toketype == ANDAND)
187 yylval.ival = OP_ANDASSIGN;
188 else if (toketype == OROR)
189 yylval.ival = OP_ORASSIGN;
190 toketype = ASSIGNOP;
191 }
192 return toketype;
193}
194
ffb4593c
NT
195/*
196 * S_no_op
197 * When Perl expects an operator and finds something else, no_op
198 * prints the warning. It always prints "<something> found where
199 * operator expected. It prints "Missing semicolon on previous line?"
200 * if the surprise occurs at the start of the line. "do you need to
201 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
202 * where the compiler doesn't know if foo is a method call or a function.
203 * It prints "Missing operator before end of line" if there's nothing
204 * after the missing operator, or "... before <...>" if there is something
205 * after the missing operator.
206 */
207
76e3520e 208STATIC void
cea2e8a9 209S_no_op(pTHX_ char *what, char *s)
463ee0b2 210{
3280af22
NIS
211 char *oldbp = PL_bufptr;
212 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 213
1189a94a
GS
214 if (!s)
215 s = oldbp;
07c798fb 216 else
1189a94a 217 PL_bufptr = s;
cea2e8a9 218 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 219 if (is_first)
cea2e8a9 220 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 221 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 222 char *t;
7e2040f0 223 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 224 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 225 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 226 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 227 }
07c798fb
HS
228 else {
229 assert(s >= oldbp);
cea2e8a9 230 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 231 }
3280af22 232 PL_bufptr = oldbp;
8990e307
LW
233}
234
ffb4593c
NT
235/*
236 * S_missingterm
237 * Complain about missing quote/regexp/heredoc terminator.
238 * If it's called with (char *)NULL then it cauterizes the line buffer.
239 * If we're in a delimited string and the delimiter is a control
240 * character, it's reformatted into a two-char sequence like ^C.
241 * This is fatal.
242 */
243
76e3520e 244STATIC void
cea2e8a9 245S_missingterm(pTHX_ char *s)
8990e307
LW
246{
247 char tmpbuf[3];
248 char q;
249 if (s) {
250 char *nl = strrchr(s,'\n');
d2719217 251 if (nl)
8990e307
LW
252 *nl = '\0';
253 }
9d116dd7
JH
254 else if (
255#ifdef EBCDIC
256 iscntrl(PL_multi_close)
257#else
258 PL_multi_close < 32 || PL_multi_close == 127
259#endif
260 ) {
8990e307 261 *tmpbuf = '^';
3280af22 262 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
263 s = "\\n";
264 tmpbuf[2] = '\0';
265 s = tmpbuf;
266 }
267 else {
3280af22 268 *tmpbuf = PL_multi_close;
8990e307
LW
269 tmpbuf[1] = '\0';
270 s = tmpbuf;
271 }
272 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 273 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 274}
79072805 275
ffb4593c
NT
276/*
277 * Perl_deprecate
ffb4593c
NT
278 */
279
79072805 280void
864dbfa3 281Perl_deprecate(pTHX_ char *s)
a0d0e21e 282{
d008e5eb 283 dTHR;
599cee73 284 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 285 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
286}
287
ffb4593c
NT
288/*
289 * depcom
9cbb5ea2 290 * Deprecate a comma-less variable list.
ffb4593c
NT
291 */
292
76e3520e 293STATIC void
cea2e8a9 294S_depcom(pTHX)
a0d0e21e
LW
295{
296 deprecate("comma-less variable list");
297}
298
ffb4593c 299/*
9cbb5ea2
GS
300 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
301 * utf16-to-utf8-reversed.
ffb4593c
NT
302 */
303
c39cd008
GS
304#ifdef PERL_CR_FILTER
305static void
306strip_return(SV *sv)
307{
308 register char *s = SvPVX(sv);
309 register char *e = s + SvCUR(sv);
310 /* outer loop optimized to do nothing if there are no CR-LFs */
311 while (s < e) {
312 if (*s++ == '\r' && *s == '\n') {
313 /* hit a CR-LF, need to copy the rest */
314 register char *d = s - 1;
315 *d++ = *s++;
316 while (s < e) {
317 if (*s == '\r' && s[1] == '\n')
318 s++;
319 *d++ = *s++;
320 }
321 SvCUR(sv) -= s - d;
322 return;
323 }
324 }
325}
a868473f 326
76e3520e 327STATIC I32
c39cd008 328S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 329{
c39cd008
GS
330 I32 count = FILTER_READ(idx+1, sv, maxlen);
331 if (count > 0 && !maxlen)
332 strip_return(sv);
333 return count;
a868473f
NIS
334}
335#endif
336
ffb4593c
NT
337/*
338 * Perl_lex_start
9cbb5ea2
GS
339 * Initialize variables. Uses the Perl save_stack to save its state (for
340 * recursive calls to the parser).
ffb4593c
NT
341 */
342
a0d0e21e 343void
864dbfa3 344Perl_lex_start(pTHX_ SV *line)
79072805 345{
0f15f207 346 dTHR;
8990e307
LW
347 char *s;
348 STRLEN len;
349
3280af22
NIS
350 SAVEI32(PL_lex_dojoin);
351 SAVEI32(PL_lex_brackets);
3280af22
NIS
352 SAVEI32(PL_lex_casemods);
353 SAVEI32(PL_lex_starts);
354 SAVEI32(PL_lex_state);
7766f137 355 SAVEVPTR(PL_lex_inpat);
3280af22 356 SAVEI32(PL_lex_inwhat);
18b09519
GS
357 if (PL_lex_state == LEX_KNOWNEXT) {
358 I32 toke = PL_nexttoke;
359 while (--toke >= 0) {
360 SAVEI32(PL_nexttype[toke]);
361 SAVEVPTR(PL_nextval[toke]);
362 }
363 SAVEI32(PL_nexttoke);
18b09519 364 }
57843af0 365 SAVECOPLINE(PL_curcop);
3280af22
NIS
366 SAVEPPTR(PL_bufptr);
367 SAVEPPTR(PL_bufend);
368 SAVEPPTR(PL_oldbufptr);
369 SAVEPPTR(PL_oldoldbufptr);
370 SAVEPPTR(PL_linestart);
371 SAVESPTR(PL_linestr);
372 SAVEPPTR(PL_lex_brackstack);
373 SAVEPPTR(PL_lex_casestack);
c76ac1ee 374 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
375 SAVESPTR(PL_lex_stuff);
376 SAVEI32(PL_lex_defer);
09bef843 377 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 378 SAVESPTR(PL_lex_repl);
bebdddfc
GS
379 SAVEINT(PL_expect);
380 SAVEINT(PL_lex_expect);
3280af22
NIS
381
382 PL_lex_state = LEX_NORMAL;
383 PL_lex_defer = 0;
384 PL_expect = XSTATE;
385 PL_lex_brackets = 0;
3280af22
NIS
386 New(899, PL_lex_brackstack, 120, char);
387 New(899, PL_lex_casestack, 12, char);
388 SAVEFREEPV(PL_lex_brackstack);
389 SAVEFREEPV(PL_lex_casestack);
390 PL_lex_casemods = 0;
391 *PL_lex_casestack = '\0';
392 PL_lex_dojoin = 0;
393 PL_lex_starts = 0;
394 PL_lex_stuff = Nullsv;
395 PL_lex_repl = Nullsv;
396 PL_lex_inpat = 0;
76be56bc 397 PL_nexttoke = 0;
3280af22 398 PL_lex_inwhat = 0;
09bef843 399 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
400 PL_linestr = line;
401 if (SvREADONLY(PL_linestr))
402 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
403 s = SvPV(PL_linestr, len);
8990e307 404 if (len && s[len-1] != ';') {
3280af22
NIS
405 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
406 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
407 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 408 }
3280af22
NIS
409 SvTEMP_off(PL_linestr);
410 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
411 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
412 SvREFCNT_dec(PL_rs);
79cb57f6 413 PL_rs = newSVpvn("\n", 1);
3280af22 414 PL_rsfp = 0;
79072805 415}
a687059c 416
ffb4593c
NT
417/*
418 * Perl_lex_end
9cbb5ea2
GS
419 * Finalizer for lexing operations. Must be called when the parser is
420 * done with the lexer.
ffb4593c
NT
421 */
422
463ee0b2 423void
864dbfa3 424Perl_lex_end(pTHX)
463ee0b2 425{
3280af22 426 PL_doextract = FALSE;
463ee0b2
LW
427}
428
ffb4593c
NT
429/*
430 * S_incline
431 * This subroutine has nothing to do with tilting, whether at windmills
432 * or pinball tables. Its name is short for "increment line". It
57843af0 433 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 434 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
435 * # line 500 "foo.pm"
436 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
437 */
438
76e3520e 439STATIC void
cea2e8a9 440S_incline(pTHX_ char *s)
463ee0b2 441{
0f15f207 442 dTHR;
463ee0b2
LW
443 char *t;
444 char *n;
73659bf1 445 char *e;
463ee0b2 446 char ch;
463ee0b2 447
57843af0 448 CopLINE_inc(PL_curcop);
463ee0b2
LW
449 if (*s++ != '#')
450 return;
bf4acbe4 451 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
452 if (strnEQ(s, "line", 4))
453 s += 4;
454 else
455 return;
456 if (*s == ' ' || *s == '\t')
457 s++;
458 else
459 return;
bf4acbe4 460 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
461 if (!isDIGIT(*s))
462 return;
463 n = s;
464 while (isDIGIT(*s))
465 s++;
bf4acbe4 466 while (SPACE_OR_TAB(*s))
463ee0b2 467 s++;
73659bf1 468 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 469 s++;
73659bf1
GS
470 e = t + 1;
471 }
463ee0b2 472 else {
463ee0b2 473 for (t = s; !isSPACE(*t); t++) ;
73659bf1 474 e = t;
463ee0b2 475 }
bf4acbe4 476 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
477 e++;
478 if (*e != '\n' && *e != '\0')
479 return; /* false alarm */
480
463ee0b2
LW
481 ch = *t;
482 *t = '\0';
f4dd75d9
GS
483 if (t - s > 0) {
484#ifdef USE_ITHREADS
485 Safefree(CopFILE(PL_curcop));
486#else
487 SvREFCNT_dec(CopFILEGV(PL_curcop));
488#endif
57843af0 489 CopFILE_set(PL_curcop, s);
f4dd75d9 490 }
463ee0b2 491 *t = ch;
57843af0 492 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
493}
494
ffb4593c
NT
495/*
496 * S_skipspace
497 * Called to gobble the appropriate amount and type of whitespace.
498 * Skips comments as well.
499 */
500
76e3520e 501STATIC char *
cea2e8a9 502S_skipspace(pTHX_ register char *s)
a687059c 503{
11343788 504 dTHR;
3280af22 505 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 506 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
507 s++;
508 return s;
509 }
510 for (;;) {
fd049845 511 STRLEN prevlen;
09bef843
SB
512 SSize_t oldprevlen, oldoldprevlen;
513 SSize_t oldloplen, oldunilen;
60e6418e
GS
514 while (s < PL_bufend && isSPACE(*s)) {
515 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
516 incline(s);
517 }
ffb4593c
NT
518
519 /* comment */
3280af22
NIS
520 if (s < PL_bufend && *s == '#') {
521 while (s < PL_bufend && *s != '\n')
463ee0b2 522 s++;
60e6418e 523 if (s < PL_bufend) {
463ee0b2 524 s++;
60e6418e
GS
525 if (PL_in_eval && !PL_rsfp) {
526 incline(s);
527 continue;
528 }
529 }
463ee0b2 530 }
ffb4593c
NT
531
532 /* only continue to recharge the buffer if we're at the end
533 * of the buffer, we're not reading from a source filter, and
534 * we're in normal lexing mode
535 */
09bef843
SB
536 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
537 PL_lex_state == LEX_FORMLINE)
463ee0b2 538 return s;
ffb4593c
NT
539
540 /* try to recharge the buffer */
9cbb5ea2
GS
541 if ((s = filter_gets(PL_linestr, PL_rsfp,
542 (prevlen = SvCUR(PL_linestr)))) == Nullch)
543 {
544 /* end of file. Add on the -p or -n magic */
3280af22
NIS
545 if (PL_minus_n || PL_minus_p) {
546 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
547 ";}continue{print or die qq(-p destination: $!\\n)" :
548 "");
3280af22
NIS
549 sv_catpv(PL_linestr,";}");
550 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
551 }
552 else
3280af22 553 sv_setpv(PL_linestr,";");
ffb4593c
NT
554
555 /* reset variables for next time we lex */
9cbb5ea2
GS
556 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
557 = SvPVX(PL_linestr);
3280af22 558 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
559
560 /* Close the filehandle. Could be from -P preprocessor,
561 * STDIN, or a regular file. If we were reading code from
562 * STDIN (because the commandline held no -e or filename)
563 * then we don't close it, we reset it so the code can
564 * read from STDIN too.
565 */
566
3280af22
NIS
567 if (PL_preprocess && !PL_in_eval)
568 (void)PerlProc_pclose(PL_rsfp);
569 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
570 PerlIO_clearerr(PL_rsfp);
8990e307 571 else
3280af22
NIS
572 (void)PerlIO_close(PL_rsfp);
573 PL_rsfp = Nullfp;
463ee0b2
LW
574 return s;
575 }
ffb4593c
NT
576
577 /* not at end of file, so we only read another line */
09bef843
SB
578 /* make corresponding updates to old pointers, for yyerror() */
579 oldprevlen = PL_oldbufptr - PL_bufend;
580 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
581 if (PL_last_uni)
582 oldunilen = PL_last_uni - PL_bufend;
583 if (PL_last_lop)
584 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
585 PL_linestart = PL_bufptr = s + prevlen;
586 PL_bufend = s + SvCUR(PL_linestr);
587 s = PL_bufptr;
09bef843
SB
588 PL_oldbufptr = s + oldprevlen;
589 PL_oldoldbufptr = s + oldoldprevlen;
590 if (PL_last_uni)
591 PL_last_uni = s + oldunilen;
592 if (PL_last_lop)
593 PL_last_lop = s + oldloplen;
a0d0e21e 594 incline(s);
ffb4593c
NT
595
596 /* debugger active and we're not compiling the debugger code,
597 * so store the line into the debugger's array of lines
598 */
3280af22 599 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
600 SV *sv = NEWSV(85,0);
601
602 sv_upgrade(sv, SVt_PVMG);
3280af22 603 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 604 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 605 }
463ee0b2 606 }
a687059c 607}
378cc40b 608
ffb4593c
NT
609/*
610 * S_check_uni
611 * Check the unary operators to ensure there's no ambiguity in how they're
612 * used. An ambiguous piece of code would be:
613 * rand + 5
614 * This doesn't mean rand() + 5. Because rand() is a unary operator,
615 * the +5 is its argument.
616 */
617
76e3520e 618STATIC void
cea2e8a9 619S_check_uni(pTHX)
ba106d47 620{
2f3197b3 621 char *s;
a0d0e21e 622 char *t;
0453d815 623 dTHR;
2f3197b3 624
3280af22 625 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 626 return;
3280af22
NIS
627 while (isSPACE(*PL_last_uni))
628 PL_last_uni++;
7e2040f0 629 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 630 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 631 return;
0453d815 632 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 633 char ch = *s;
0453d815
PM
634 *s = '\0';
635 Perl_warner(aTHX_ WARN_AMBIGUOUS,
636 "Warning: Use of \"%s\" without parens is ambiguous",
637 PL_last_uni);
638 *s = ch;
639 }
2f3197b3
LW
640}
641
ffb4593c
NT
642/* workaround to replace the UNI() macro with a function. Only the
643 * hints/uts.sh file mentions this. Other comments elsewhere in the
644 * source indicate Microport Unix might need it too.
645 */
646
ffed7fef
LW
647#ifdef CRIPPLED_CC
648
649#undef UNI
ffed7fef 650#define UNI(f) return uni(f,s)
ffed7fef 651
76e3520e 652STATIC int
cea2e8a9 653S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
654{
655 yylval.ival = f;
3280af22
NIS
656 PL_expect = XTERM;
657 PL_bufptr = s;
8f872242
NIS
658 PL_last_uni = PL_oldbufptr;
659 PL_last_lop_op = f;
ffed7fef
LW
660 if (*s == '(')
661 return FUNC1;
662 s = skipspace(s);
663 if (*s == '(')
664 return FUNC1;
665 else
666 return UNIOP;
667}
668
a0d0e21e
LW
669#endif /* CRIPPLED_CC */
670
ffb4593c
NT
671/*
672 * LOP : macro to build a list operator. Its behaviour has been replaced
673 * with a subroutine, S_lop() for which LOP is just another name.
674 */
675
a0d0e21e
LW
676#define LOP(f,x) return lop(f,x,s)
677
ffb4593c
NT
678/*
679 * S_lop
680 * Build a list operator (or something that might be one). The rules:
681 * - if we have a next token, then it's a list operator [why?]
682 * - if the next thing is an opening paren, then it's a function
683 * - else it's a list operator
684 */
685
76e3520e 686STATIC I32
a0be28da 687S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 688{
0f15f207 689 dTHR;
79072805 690 yylval.ival = f;
35c8bce7 691 CLINE;
3280af22
NIS
692 PL_expect = x;
693 PL_bufptr = s;
694 PL_last_lop = PL_oldbufptr;
695 PL_last_lop_op = f;
696 if (PL_nexttoke)
a0d0e21e 697 return LSTOP;
79072805
LW
698 if (*s == '(')
699 return FUNC;
700 s = skipspace(s);
701 if (*s == '(')
702 return FUNC;
703 else
704 return LSTOP;
705}
706
ffb4593c
NT
707/*
708 * S_force_next
9cbb5ea2 709 * When the lexer realizes it knows the next token (for instance,
ffb4593c 710 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
711 * to know what token to return the next time the lexer is called. Caller
712 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
713 * handles the token correctly.
ffb4593c
NT
714 */
715
76e3520e 716STATIC void
cea2e8a9 717S_force_next(pTHX_ I32 type)
79072805 718{
3280af22
NIS
719 PL_nexttype[PL_nexttoke] = type;
720 PL_nexttoke++;
721 if (PL_lex_state != LEX_KNOWNEXT) {
722 PL_lex_defer = PL_lex_state;
723 PL_lex_expect = PL_expect;
724 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
725 }
726}
727
ffb4593c
NT
728/*
729 * S_force_word
730 * When the lexer knows the next thing is a word (for instance, it has
731 * just seen -> and it knows that the next char is a word char, then
732 * it calls S_force_word to stick the next word into the PL_next lookahead.
733 *
734 * Arguments:
b1b65b59 735 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
736 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
737 * int check_keyword : if true, Perl checks to make sure the word isn't
738 * a keyword (do this if the word is a label, e.g. goto FOO)
739 * int allow_pack : if true, : characters will also be allowed (require,
740 * use, etc. do this)
9cbb5ea2 741 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
742 */
743
76e3520e 744STATIC char *
cea2e8a9 745S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 746{
463ee0b2
LW
747 register char *s;
748 STRLEN len;
749
750 start = skipspace(start);
751 s = start;
7e2040f0 752 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 753 (allow_pack && *s == ':') ||
15f0808c 754 (allow_initial_tick && *s == '\'') )
a0d0e21e 755 {
3280af22
NIS
756 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
757 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
758 return start;
759 if (token == METHOD) {
760 s = skipspace(s);
761 if (*s == '(')
3280af22 762 PL_expect = XTERM;
463ee0b2 763 else {
3280af22 764 PL_expect = XOPERATOR;
463ee0b2 765 }
79072805 766 }
3280af22
NIS
767 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
768 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
769 force_next(token);
770 }
771 return s;
772}
773
ffb4593c
NT
774/*
775 * S_force_ident
9cbb5ea2 776 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
777 * text only contains the "foo" portion. The first argument is a pointer
778 * to the "foo", and the second argument is the type symbol to prefix.
779 * Forces the next token to be a "WORD".
9cbb5ea2 780 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
781 */
782
76e3520e 783STATIC void
cea2e8a9 784S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
785{
786 if (s && *s) {
11343788 787 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 788 PL_nextval[PL_nexttoke].opval = o;
79072805 789 force_next(WORD);
748a9306 790 if (kind) {
e858de61 791 dTHR; /* just for in_eval */
11343788 792 o->op_private = OPpCONST_ENTERED;
55497cff
PP
793 /* XXX see note in pp_entereval() for why we forgo typo
794 warnings if the symbol must be introduced in an eval.
795 GSAR 96-10-12 */
3280af22 796 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
797 kind == '$' ? SVt_PV :
798 kind == '@' ? SVt_PVAV :
799 kind == '%' ? SVt_PVHV :
800 SVt_PVGV
801 );
748a9306 802 }
79072805
LW
803 }
804}
805
1571675a
GS
806NV
807Perl_str_to_version(pTHX_ SV *sv)
808{
809 NV retval = 0.0;
810 NV nshift = 1.0;
811 STRLEN len;
812 char *start = SvPVx(sv,len);
3aa33fe5 813 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
814 char *end = start + len;
815 while (start < end) {
ba210ebe 816 STRLEN skip;
1571675a
GS
817 UV n;
818 if (utf)
dcad2880 819 n = utf8_to_uv((U8*)start, len, &skip, 0);
1571675a
GS
820 else {
821 n = *(U8*)start;
822 skip = 1;
823 }
824 retval += ((NV)n)/nshift;
825 start += skip;
826 nshift *= 1000;
827 }
828 return retval;
829}
830
ffb4593c
NT
831/*
832 * S_force_version
833 * Forces the next token to be a version number.
834 */
835
76e3520e 836STATIC char *
cea2e8a9 837S_force_version(pTHX_ char *s)
89bfa8cd
PP
838{
839 OP *version = Nullop;
44dcb63b 840 char *d;
89bfa8cd
PP
841
842 s = skipspace(s);
843
44dcb63b 844 d = s;
dd629d5b 845 if (*d == 'v')
44dcb63b 846 d++;
44dcb63b 847 if (isDIGIT(*d)) {
a7cb1f99 848 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 849 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 850 SV *ver;
b73d6f50 851 s = scan_num(s, &yylval);
89bfa8cd 852 version = yylval.opval;
dd629d5b
GS
853 ver = cSVOPx(version)->op_sv;
854 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 855 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
856 SvNVX(ver) = str_to_version(ver);
857 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 858 }
89bfa8cd
PP
859 }
860 }
861
862 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 863 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd
PP
864 force_next(WORD);
865
866 return (s);
867}
868
ffb4593c
NT
869/*
870 * S_tokeq
871 * Tokenize a quoted string passed in as an SV. It finds the next
872 * chunk, up to end of string or a backslash. It may make a new
873 * SV containing that chunk (if HINT_NEW_STRING is on). It also
874 * turns \\ into \.
875 */
876
76e3520e 877STATIC SV *
cea2e8a9 878S_tokeq(pTHX_ SV *sv)
79072805
LW
879{
880 register char *s;
881 register char *send;
882 register char *d;
b3ac6de7
IZ
883 STRLEN len = 0;
884 SV *pv = sv;
79072805
LW
885
886 if (!SvLEN(sv))
b3ac6de7 887 goto finish;
79072805 888
a0d0e21e 889 s = SvPV_force(sv, len);
21a311ee 890 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 891 goto finish;
463ee0b2 892 send = s + len;
79072805
LW
893 while (s < send && *s != '\\')
894 s++;
895 if (s == send)
b3ac6de7 896 goto finish;
79072805 897 d = s;
3280af22 898 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 899 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
900 while (s < send) {
901 if (*s == '\\') {
a0d0e21e 902 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
903 s++; /* all that, just for this */
904 }
905 *d++ = *s++;
906 }
907 *d = '\0';
463ee0b2 908 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 909 finish:
3280af22 910 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 911 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
912 return sv;
913}
914
ffb4593c
NT
915/*
916 * Now come three functions related to double-quote context,
917 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
918 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
919 * interact with PL_lex_state, and create fake ( ... ) argument lists
920 * to handle functions and concatenation.
921 * They assume that whoever calls them will be setting up a fake
922 * join call, because each subthing puts a ',' after it. This lets
923 * "lower \luPpEr"
924 * become
925 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
926 *
927 * (I'm not sure whether the spurious commas at the end of lcfirst's
928 * arguments and join's arguments are created or not).
929 */
930
931/*
932 * S_sublex_start
933 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
934 *
935 * Pattern matching will set PL_lex_op to the pattern-matching op to
936 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
937 *
938 * OP_CONST and OP_READLINE are easy--just make the new op and return.
939 *
940 * Everything else becomes a FUNC.
941 *
942 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
943 * had an OP_CONST or OP_READLINE). This just sets us up for a
944 * call to S_sublex_push().
945 */
946
76e3520e 947STATIC I32
cea2e8a9 948S_sublex_start(pTHX)
79072805
LW
949{
950 register I32 op_type = yylval.ival;
79072805
LW
951
952 if (op_type == OP_NULL) {
3280af22
NIS
953 yylval.opval = PL_lex_op;
954 PL_lex_op = Nullop;
79072805
LW
955 return THING;
956 }
957 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 958 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
959
960 if (SvTYPE(sv) == SVt_PVIV) {
961 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
962 STRLEN len;
963 char *p;
964 SV *nsv;
965
966 p = SvPV(sv, len);
79cb57f6 967 nsv = newSVpvn(p, len);
01ec43d0
GS
968 if (SvUTF8(sv))
969 SvUTF8_on(nsv);
b3ac6de7
IZ
970 SvREFCNT_dec(sv);
971 sv = nsv;
972 }
973 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 974 PL_lex_stuff = Nullsv;
79072805
LW
975 return THING;
976 }
977
3280af22
NIS
978 PL_sublex_info.super_state = PL_lex_state;
979 PL_sublex_info.sub_inwhat = op_type;
980 PL_sublex_info.sub_op = PL_lex_op;
981 PL_lex_state = LEX_INTERPPUSH;
55497cff 982
3280af22
NIS
983 PL_expect = XTERM;
984 if (PL_lex_op) {
985 yylval.opval = PL_lex_op;
986 PL_lex_op = Nullop;
55497cff
PP
987 return PMFUNC;
988 }
989 else
990 return FUNC;
991}
992
ffb4593c
NT
993/*
994 * S_sublex_push
995 * Create a new scope to save the lexing state. The scope will be
996 * ended in S_sublex_done. Returns a '(', starting the function arguments
997 * to the uc, lc, etc. found before.
998 * Sets PL_lex_state to LEX_INTERPCONCAT.
999 */
1000
76e3520e 1001STATIC I32
cea2e8a9 1002S_sublex_push(pTHX)
55497cff 1003{
0f15f207 1004 dTHR;
f46d017c 1005 ENTER;
55497cff 1006
3280af22
NIS
1007 PL_lex_state = PL_sublex_info.super_state;
1008 SAVEI32(PL_lex_dojoin);
1009 SAVEI32(PL_lex_brackets);
3280af22
NIS
1010 SAVEI32(PL_lex_casemods);
1011 SAVEI32(PL_lex_starts);
1012 SAVEI32(PL_lex_state);
7766f137 1013 SAVEVPTR(PL_lex_inpat);
3280af22 1014 SAVEI32(PL_lex_inwhat);
57843af0 1015 SAVECOPLINE(PL_curcop);
3280af22
NIS
1016 SAVEPPTR(PL_bufptr);
1017 SAVEPPTR(PL_oldbufptr);
1018 SAVEPPTR(PL_oldoldbufptr);
1019 SAVEPPTR(PL_linestart);
1020 SAVESPTR(PL_linestr);
1021 SAVEPPTR(PL_lex_brackstack);
1022 SAVEPPTR(PL_lex_casestack);
1023
1024 PL_linestr = PL_lex_stuff;
1025 PL_lex_stuff = Nullsv;
1026
9cbb5ea2
GS
1027 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1028 = SvPVX(PL_linestr);
3280af22
NIS
1029 PL_bufend += SvCUR(PL_linestr);
1030 SAVEFREESV(PL_linestr);
1031
1032 PL_lex_dojoin = FALSE;
1033 PL_lex_brackets = 0;
3280af22
NIS
1034 New(899, PL_lex_brackstack, 120, char);
1035 New(899, PL_lex_casestack, 12, char);
1036 SAVEFREEPV(PL_lex_brackstack);
1037 SAVEFREEPV(PL_lex_casestack);
1038 PL_lex_casemods = 0;
1039 *PL_lex_casestack = '\0';
1040 PL_lex_starts = 0;
1041 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1042 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1043
1044 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1045 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1046 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1047 else
3280af22 1048 PL_lex_inpat = Nullop;
79072805 1049
55497cff 1050 return '(';
79072805
LW
1051}
1052
ffb4593c
NT
1053/*
1054 * S_sublex_done
1055 * Restores lexer state after a S_sublex_push.
1056 */
1057
76e3520e 1058STATIC I32
cea2e8a9 1059S_sublex_done(pTHX)
79072805 1060{
3280af22
NIS
1061 if (!PL_lex_starts++) {
1062 PL_expect = XOPERATOR;
79cb57f6 1063 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1064 return THING;
1065 }
1066
3280af22
NIS
1067 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1068 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1069 return yylex();
79072805
LW
1070 }
1071
ffb4593c 1072 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1073 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1074 PL_linestr = PL_lex_repl;
1075 PL_lex_inpat = 0;
1076 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1077 PL_bufend += SvCUR(PL_linestr);
1078 SAVEFREESV(PL_linestr);
1079 PL_lex_dojoin = FALSE;
1080 PL_lex_brackets = 0;
3280af22
NIS
1081 PL_lex_casemods = 0;
1082 *PL_lex_casestack = '\0';
1083 PL_lex_starts = 0;
25da4f38 1084 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1085 PL_lex_state = LEX_INTERPNORMAL;
1086 PL_lex_starts++;
e9fa98b2
HS
1087 /* we don't clear PL_lex_repl here, so that we can check later
1088 whether this is an evalled subst; that means we rely on the
1089 logic to ensure sublex_done() is called again only via the
1090 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1091 }
e9fa98b2 1092 else {
3280af22 1093 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1094 PL_lex_repl = Nullsv;
1095 }
79072805 1096 return ',';
ffed7fef
LW
1097 }
1098 else {
f46d017c 1099 LEAVE;
3280af22
NIS
1100 PL_bufend = SvPVX(PL_linestr);
1101 PL_bufend += SvCUR(PL_linestr);
1102 PL_expect = XOPERATOR;
09bef843 1103 PL_sublex_info.sub_inwhat = 0;
79072805 1104 return ')';
ffed7fef
LW
1105 }
1106}
1107
02aa26ce
NT
1108/*
1109 scan_const
1110
1111 Extracts a pattern, double-quoted string, or transliteration. This
1112 is terrifying code.
1113
3280af22
NIS
1114 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1115 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1116 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1117
9b599b2a
GS
1118 Returns a pointer to the character scanned up to. Iff this is
1119 advanced from the start pointer supplied (ie if anything was
1120 successfully parsed), will leave an OP for the substring scanned
1121 in yylval. Caller must intuit reason for not parsing further
1122 by looking at the next characters herself.
1123
02aa26ce
NT
1124 In patterns:
1125 backslashes:
1126 double-quoted style: \r and \n
1127 regexp special ones: \D \s
1128 constants: \x3
1129 backrefs: \1 (deprecated in substitution replacements)
1130 case and quoting: \U \Q \E
1131 stops on @ and $, but not for $ as tail anchor
1132
1133 In transliterations:
1134 characters are VERY literal, except for - not at the start or end
1135 of the string, which indicates a range. scan_const expands the
1136 range to the full set of intermediate characters.
1137
1138 In double-quoted strings:
1139 backslashes:
1140 double-quoted style: \r and \n
1141 constants: \x3
1142 backrefs: \1 (deprecated)
1143 case and quoting: \U \Q \E
1144 stops on @ and $
1145
1146 scan_const does *not* construct ops to handle interpolated strings.
1147 It stops processing as soon as it finds an embedded $ or @ variable
1148 and leaves it to the caller to work out what's going on.
1149
1150 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1151
1152 $ in pattern could be $foo or could be tail anchor. Assumption:
1153 it's a tail anchor if $ is the last thing in the string, or if it's
1154 followed by one of ")| \n\t"
1155
1156 \1 (backreferences) are turned into $1
1157
1158 The structure of the code is
1159 while (there's a character to process) {
1160 handle transliteration ranges
1161 skip regexp comments
1162 skip # initiated comments in //x patterns
1163 check for embedded @foo
1164 check for embedded scalars
1165 if (backslash) {
1166 leave intact backslashes from leave (below)
1167 deprecate \1 in strings and sub replacements
1168 handle string-changing backslashes \l \U \Q \E, etc.
1169 switch (what was escaped) {
1170 handle - in a transliteration (becomes a literal -)
1171 handle \132 octal characters
1172 handle 0x15 hex characters
1173 handle \cV (control V)
1174 handle printf backslashes (\f, \r, \n, etc)
1175 } (end switch)
1176 } (end if backslash)
1177 } (end while character to read)
1178
1179*/
1180
76e3520e 1181STATIC char *
cea2e8a9 1182S_scan_const(pTHX_ char *start)
79072805 1183{
3280af22 1184 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1185 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1186 register char *s = start; /* start of the constant */
1187 register char *d = SvPVX(sv); /* destination for copies */
1188 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1189 bool didrange = FALSE; /* did we just finish a range? */
7e2040f0 1190 bool has_utf = FALSE; /* embedded \x{} */
012bcf8d
GS
1191 UV uv;
1192
ac2262e3 1193 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1194 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1195 : UTF;
ac2262e3 1196 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1197 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1198 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1199 : UTF;
dff6d3cd 1200 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1201 PL_lex_inpat
4a2d328f 1202 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1203 : "";
79072805
LW
1204
1205 while (s < send || dorange) {
02aa26ce 1206 /* get transliterations out of the way (they're most literal) */
3280af22 1207 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1208 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1209 if (dorange) {
02aa26ce 1210 I32 i; /* current expanded character */
8ada0baa 1211 I32 min; /* first character in range */
02aa26ce
NT
1212 I32 max; /* last character in range */
1213
1214 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1215 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1216 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1217 d -= 2; /* eat the first char and the - */
1218
8ada0baa
JH
1219 min = (U8)*d; /* first char in range */
1220 max = (U8)d[1]; /* last char in range */
1221
c2e66d9e 1222 if (min > max) {
01ec43d0
GS
1223 Perl_croak(aTHX_
1224 "Invalid [] range \"%c-%c\" in transliteration operator",
d2560b70 1225 (char)min, (char)max);
c2e66d9e
GS
1226 }
1227
8ada0baa
JH
1228#ifndef ASCIIish
1229 if ((isLOWER(min) && isLOWER(max)) ||
1230 (isUPPER(min) && isUPPER(max))) {
1231 if (isLOWER(min)) {
1232 for (i = min; i <= max; i++)
1233 if (isLOWER(i))
1234 *d++ = i;
1235 } else {
1236 for (i = min; i <= max; i++)
1237 if (isUPPER(i))
1238 *d++ = i;
1239 }
1240 }
1241 else
1242#endif
1243 for (i = min; i <= max; i++)
1244 *d++ = i;
02aa26ce
NT
1245
1246 /* mark the range as done, and continue */
79072805 1247 dorange = FALSE;
01ec43d0 1248 didrange = TRUE;
79072805 1249 continue;
c2e66d9e 1250 }
02aa26ce
NT
1251
1252 /* range begins (ignore - as first or last char) */
79072805 1253 else if (*s == '-' && s+1 < send && s != start) {
01ec43d0 1254 if (didrange) {
1fafa243 1255 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1256 }
a0ed51b3 1257 if (utf) {
a176fa2a 1258 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1259 s++;
1260 continue;
1261 }
79072805
LW
1262 dorange = TRUE;
1263 s++;
01ec43d0
GS
1264 }
1265 else {
1266 didrange = FALSE;
1267 }
79072805 1268 }
02aa26ce
NT
1269
1270 /* if we get here, we're not doing a transliteration */
1271
0f5d15d6
IZ
1272 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1273 except for the last char, which will be done separately. */
3280af22 1274 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1275 if (s[2] == '#') {
1276 while (s < send && *s != ')')
1277 *d++ = *s++;
155aba94
GS
1278 }
1279 else if (s[2] == '{' /* This should match regcomp.c */
1280 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1281 {
cc6b7395 1282 I32 count = 1;
0f5d15d6 1283 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1284 char c;
1285
d9f97599
GS
1286 while (count && (c = *regparse)) {
1287 if (c == '\\' && regparse[1])
1288 regparse++;
cc6b7395
IZ
1289 else if (c == '{')
1290 count++;
1291 else if (c == '}')
1292 count--;
d9f97599 1293 regparse++;
cc6b7395 1294 }
5bdf89e7
IZ
1295 if (*regparse != ')') {
1296 regparse--; /* Leave one char for continuation. */
cc6b7395 1297 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1298 }
0f5d15d6 1299 while (s < regparse)
cc6b7395
IZ
1300 *d++ = *s++;
1301 }
748a9306 1302 }
02aa26ce
NT
1303
1304 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1305 else if (*s == '#' && PL_lex_inpat &&
1306 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1307 while (s+1 < send && *s != '\n')
1308 *d++ = *s++;
1309 }
02aa26ce 1310
5d1d4326
JH
1311 /* check for embedded arrays
1312 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1313 */
7e2040f0 1314 else if (*s == '@' && s[1]
5d1d4326 1315 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1316 break;
02aa26ce
NT
1317
1318 /* check for embedded scalars. only stop if we're sure it's a
1319 variable.
1320 */
79072805 1321 else if (*s == '$') {
3280af22 1322 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1323 break;
c277df42 1324 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1325 break; /* in regexp, $ might be tail anchor */
1326 }
02aa26ce 1327
a0ed51b3
LW
1328 /* (now in tr/// code again) */
1329
d008e5eb 1330 if (*s & 0x80 && thisutf) {
ba210ebe
JH
1331 STRLEN len;
1332 UV uv;
1333
dcad2880 1334 uv = utf8_to_uv((U8*)s, send - s, &len, UTF8_CHECK_ONLY);
ba210ebe 1335 if (len == 1) {
fcc8fcf6 1336 /* Illegal UTF8 (a high-bit byte), make it valid. */
ba210ebe
JH
1337 char *old_pvx = SvPVX(sv);
1338 /* need space for one extra char (NOTE: SvCUR() not set here) */
1339 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
1340 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1341 }
1342 else {
1343 while (len--)
1344 *d++ = *s++;
1345 }
1346 has_utf = TRUE;
1347 continue;
a0ed51b3
LW
1348 }
1349
02aa26ce 1350 /* backslashes */
79072805
LW
1351 if (*s == '\\' && s+1 < send) {
1352 s++;
02aa26ce
NT
1353
1354 /* some backslashes we leave behind */
c9f97d15 1355 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1356 *d++ = '\\';
1357 *d++ = *s++;
1358 continue;
1359 }
02aa26ce
NT
1360
1361 /* deprecate \1 in strings and substitution replacements */
3280af22 1362 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1363 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1364 {
d008e5eb 1365 dTHR; /* only for ckWARN */
599cee73 1366 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1367 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1368 *--s = '$';
1369 break;
1370 }
02aa26ce
NT
1371
1372 /* string-change backslash escapes */
3280af22 1373 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1374 --s;
1375 break;
1376 }
02aa26ce
NT
1377
1378 /* if we get here, it's either a quoted -, or a digit */
79072805 1379 switch (*s) {
02aa26ce
NT
1380
1381 /* quoted - in transliterations */
79072805 1382 case '-':
3280af22 1383 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1384 *d++ = *s++;
1385 continue;
1386 }
1387 /* FALL THROUGH */
1388 default:
11b8faa4
JH
1389 {
1390 dTHR;
7e84c16c 1391 if (ckWARN(WARN_MISC) && isALNUM(*s))
e476b1b5 1392 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1393 "Unrecognized escape \\%c passed through",
1394 *s);
1395 /* default action is to copy the quoted character */
1396 *d++ = *s++;
1397 continue;
1398 }
02aa26ce
NT
1399
1400 /* \132 indicates an octal constant */
79072805
LW
1401 case '0': case '1': case '2': case '3':
1402 case '4': case '5': case '6': case '7':
ba210ebe
JH
1403 {
1404 STRLEN len = 0; /* disallow underscores */
1405 uv = (UV)scan_oct(s, 3, &len);
1406 s += len;
1407 }
012bcf8d 1408 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1409
1410 /* \x24 indicates a hex constant */
79072805 1411 case 'x':
a0ed51b3
LW
1412 ++s;
1413 if (*s == '{') {
1414 char* e = strchr(s, '}');
adaeee49 1415 if (!e) {
a0ed51b3 1416 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1417 e = s;
1418 }
ba210ebe
JH
1419 {
1420 STRLEN len = 1; /* allow underscores */
1421 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1422 }
1423 s = e + 1;
a0ed51b3
LW
1424 }
1425 else {
ba210ebe
JH
1426 {
1427 STRLEN len = 0; /* disallow underscores */
1428 uv = (UV)scan_hex(s, 2, &len);
1429 s += len;
1430 }
012bcf8d
GS
1431 }
1432
1433 NUM_ESCAPE_INSERT:
1434 /* Insert oct or hex escaped character.
1435 * There will always enough room in sv since such escapes will
1436 * be longer than any utf8 sequence they can end up as
1437 */
1438 if (uv > 127) {
1439 if (!thisutf && !has_utf && uv > 255) {
1440 /* might need to recode whatever we have accumulated so far
1441 * if it contains any hibit chars
1442 */
1443 int hicount = 0;
1444 char *c;
1445 for (c = SvPVX(sv); c < d; c++) {
1446 if (*c & 0x80)
1447 hicount++;
1448 }
1449 if (hicount) {
1450 char *old_pvx = SvPVX(sv);
1451 char *src, *dst;
1452 d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
1453
1454 src = d - 1;
1455 d += hicount;
1456 dst = d - 1;
1457
1458 while (src < dst) {
1459 if (*src & 0x80) {
1460 dst--;
1461 uv_to_utf8((U8*)dst, (U8)*src--);
1462 dst--;
1463 }
1464 else {
1465 *dst-- = *src--;
1466 }
1467 }
1468 }
1469 }
1470
1471 if (thisutf || uv > 255) {
1472 d = (char*)uv_to_utf8((U8*)d, uv);
7e2040f0 1473 has_utf = TRUE;
012bcf8d 1474 }
a0ed51b3 1475 else {
012bcf8d 1476 *d++ = (char)uv;
a0ed51b3 1477 }
012bcf8d
GS
1478 }
1479 else {
1480 *d++ = (char)uv;
a0ed51b3 1481 }
79072805 1482 continue;
02aa26ce 1483
4a2d328f
IZ
1484 /* \N{latin small letter a} is a named character */
1485 case 'N':
423cee85
JH
1486 ++s;
1487 if (*s == '{') {
1488 char* e = strchr(s, '}');
155aba94 1489 SV *res;
423cee85
JH
1490 STRLEN len;
1491 char *str;
423cee85
JH
1492
1493 if (!e) {
5777a3f7 1494 yyerror("Missing right brace on \\N{}");
423cee85
JH
1495 e = s - 1;
1496 goto cont_scan;
1497 }
1498 res = newSVpvn(s + 1, e - s - 1);
1499 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1500 res, Nullsv, "\\N{...}" );
423cee85 1501 str = SvPV(res,len);
f08d6ad9
GS
1502 if (!has_utf && SvUTF8(res)) {
1503 char *ostart = SvPVX(sv);
1504 SvCUR_set(sv, d - ostart);
1505 SvPOK_on(sv);
e4f3eed8 1506 *d = '\0';
f08d6ad9 1507 sv_utf8_upgrade(sv);
d2f449dd
SB
1508 /* this just broke our allocation above... */
1509 SvGROW(sv, send - start);
f08d6ad9 1510 d = SvPVX(sv) + SvCUR(sv);
e1992b6d 1511 has_utf = TRUE;
f08d6ad9 1512 }
423cee85
JH
1513 if (len > e - s + 4) {
1514 char *odest = SvPVX(sv);
1515
1516 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1517 d = SvPVX(sv) + (d - odest);
1518 }
1519 Copy(str, d, len, char);
1520 d += len;
1521 SvREFCNT_dec(res);
1522 cont_scan:
1523 s = e + 1;
1524 }
1525 else
5777a3f7 1526 yyerror("Missing braces on \\N{}");
423cee85
JH
1527 continue;
1528
02aa26ce 1529 /* \c is a control character */
79072805
LW
1530 case 'c':
1531 s++;
9d116dd7
JH
1532#ifdef EBCDIC
1533 *d = *s++;
1534 if (isLOWER(*d))
1535 *d = toUPPER(*d);
774a9426
GS
1536 *d = toCTRL(*d);
1537 d++;
9d116dd7 1538#else
ba210ebe
JH
1539 {
1540 U8 c = *s++;
1541 *d++ = toCTRL(c);
1542 }
9d116dd7 1543#endif
79072805 1544 continue;
02aa26ce
NT
1545
1546 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1547 case 'b':
1548 *d++ = '\b';
1549 break;
1550 case 'n':
1551 *d++ = '\n';
1552 break;
1553 case 'r':
1554 *d++ = '\r';
1555 break;
1556 case 'f':
1557 *d++ = '\f';
1558 break;
1559 case 't':
1560 *d++ = '\t';
1561 break;
34a3fe2a
PP
1562#ifdef EBCDIC
1563 case 'e':
1564 *d++ = '\047'; /* CP 1047 */
1565 break;
1566 case 'a':
1567 *d++ = '\057'; /* CP 1047 */
1568 break;
1569#else
79072805
LW
1570 case 'e':
1571 *d++ = '\033';
1572 break;
1573 case 'a':
1574 *d++ = '\007';
1575 break;
34a3fe2a 1576#endif
02aa26ce
NT
1577 } /* end switch */
1578
79072805
LW
1579 s++;
1580 continue;
02aa26ce
NT
1581 } /* end if (backslash) */
1582
79072805 1583 *d++ = *s++;
02aa26ce
NT
1584 } /* while loop to process each character */
1585
1586 /* terminate the string and set up the sv */
79072805 1587 *d = '\0';
463ee0b2 1588 SvCUR_set(sv, d - SvPVX(sv));
79072805 1589 SvPOK_on(sv);
7e2040f0
GS
1590 if (has_utf)
1591 SvUTF8_on(sv);
79072805 1592
02aa26ce 1593 /* shrink the sv if we allocated more than we used */
79072805
LW
1594 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1595 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1596 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1597 }
02aa26ce 1598
9b599b2a 1599 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1600 if (s > PL_bufptr) {
1601 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1602 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1603 sv, Nullsv,
3280af22 1604 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1605 ? "tr"
3280af22 1606 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1607 ? "s"
1608 : "qq")));
79072805 1609 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1610 } else
8990e307 1611 SvREFCNT_dec(sv);
79072805
LW
1612 return s;
1613}
1614
ffb4593c
NT
1615/* S_intuit_more
1616 * Returns TRUE if there's more to the expression (e.g., a subscript),
1617 * FALSE otherwise.
ffb4593c
NT
1618 *
1619 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1620 *
1621 * ->[ and ->{ return TRUE
1622 * { and [ outside a pattern are always subscripts, so return TRUE
1623 * if we're outside a pattern and it's not { or [, then return FALSE
1624 * if we're in a pattern and the first char is a {
1625 * {4,5} (any digits around the comma) returns FALSE
1626 * if we're in a pattern and the first char is a [
1627 * [] returns FALSE
1628 * [SOMETHING] has a funky algorithm to decide whether it's a
1629 * character class or not. It has to deal with things like
1630 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1631 * anything else returns TRUE
1632 */
1633
9cbb5ea2
GS
1634/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1635
76e3520e 1636STATIC int
cea2e8a9 1637S_intuit_more(pTHX_ register char *s)
79072805 1638{
3280af22 1639 if (PL_lex_brackets)
79072805
LW
1640 return TRUE;
1641 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1642 return TRUE;
1643 if (*s != '{' && *s != '[')
1644 return FALSE;
3280af22 1645 if (!PL_lex_inpat)
79072805
LW
1646 return TRUE;
1647
1648 /* In a pattern, so maybe we have {n,m}. */
1649 if (*s == '{') {
1650 s++;
1651 if (!isDIGIT(*s))
1652 return TRUE;
1653 while (isDIGIT(*s))
1654 s++;
1655 if (*s == ',')
1656 s++;
1657 while (isDIGIT(*s))
1658 s++;
1659 if (*s == '}')
1660 return FALSE;
1661 return TRUE;
1662
1663 }
1664
1665 /* On the other hand, maybe we have a character class */
1666
1667 s++;
1668 if (*s == ']' || *s == '^')
1669 return FALSE;
1670 else {
ffb4593c 1671 /* this is terrifying, and it works */
79072805
LW
1672 int weight = 2; /* let's weigh the evidence */
1673 char seen[256];
f27ffc4a 1674 unsigned char un_char = 255, last_un_char;
93a17b20 1675 char *send = strchr(s,']');
3280af22 1676 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1677
1678 if (!send) /* has to be an expression */
1679 return TRUE;
1680
1681 Zero(seen,256,char);
1682 if (*s == '$')
1683 weight -= 3;
1684 else if (isDIGIT(*s)) {
1685 if (s[1] != ']') {
1686 if (isDIGIT(s[1]) && s[2] == ']')
1687 weight -= 10;
1688 }
1689 else
1690 weight -= 100;
1691 }
1692 for (; s < send; s++) {
1693 last_un_char = un_char;
1694 un_char = (unsigned char)*s;
1695 switch (*s) {
1696 case '@':
1697 case '&':
1698 case '$':
1699 weight -= seen[un_char] * 10;
7e2040f0 1700 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1701 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1702 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1703 weight -= 100;
1704 else
1705 weight -= 10;
1706 }
1707 else if (*s == '$' && s[1] &&
93a17b20
LW
1708 strchr("[#!%*<>()-=",s[1])) {
1709 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1710 weight -= 10;
1711 else
1712 weight -= 1;
1713 }
1714 break;
1715 case '\\':
1716 un_char = 254;
1717 if (s[1]) {
93a17b20 1718 if (strchr("wds]",s[1]))
79072805
LW
1719 weight += 100;
1720 else if (seen['\''] || seen['"'])
1721 weight += 1;
93a17b20 1722 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1723 weight += 40;
1724 else if (isDIGIT(s[1])) {
1725 weight += 40;
1726 while (s[1] && isDIGIT(s[1]))
1727 s++;
1728 }
1729 }
1730 else
1731 weight += 100;
1732 break;
1733 case '-':
1734 if (s[1] == '\\')
1735 weight += 50;
93a17b20 1736 if (strchr("aA01! ",last_un_char))
79072805 1737 weight += 30;
93a17b20 1738 if (strchr("zZ79~",s[1]))
79072805 1739 weight += 30;
f27ffc4a
GS
1740 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1741 weight -= 5; /* cope with negative subscript */
79072805
LW
1742 break;
1743 default:
93a17b20 1744 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1745 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1746 char *d = tmpbuf;
1747 while (isALPHA(*s))
1748 *d++ = *s++;
1749 *d = '\0';
1750 if (keyword(tmpbuf, d - tmpbuf))
1751 weight -= 150;
1752 }
1753 if (un_char == last_un_char + 1)
1754 weight += 5;
1755 weight -= seen[un_char];
1756 break;
1757 }
1758 seen[un_char]++;
1759 }
1760 if (weight >= 0) /* probably a character class */
1761 return FALSE;
1762 }
1763
1764 return TRUE;
1765}
ffed7fef 1766
ffb4593c
NT
1767/*
1768 * S_intuit_method
1769 *
1770 * Does all the checking to disambiguate
1771 * foo bar
1772 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1773 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1774 *
1775 * First argument is the stuff after the first token, e.g. "bar".
1776 *
1777 * Not a method if bar is a filehandle.
1778 * Not a method if foo is a subroutine prototyped to take a filehandle.
1779 * Not a method if it's really "Foo $bar"
1780 * Method if it's "foo $bar"
1781 * Not a method if it's really "print foo $bar"
1782 * Method if it's really "foo package::" (interpreted as package->foo)
1783 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1784 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1785 * =>
1786 */
1787
76e3520e 1788STATIC int
cea2e8a9 1789S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1790{
1791 char *s = start + (*start == '$');
3280af22 1792 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1793 STRLEN len;
1794 GV* indirgv;
1795
1796 if (gv) {
b6c543e3 1797 CV *cv;
a0d0e21e
LW
1798 if (GvIO(gv))
1799 return 0;
b6c543e3
IZ
1800 if ((cv = GvCVu(gv))) {
1801 char *proto = SvPVX(cv);
1802 if (proto) {
1803 if (*proto == ';')
1804 proto++;
1805 if (*proto == '*')
1806 return 0;
1807 }
1808 } else
a0d0e21e
LW
1809 gv = 0;
1810 }
8903cb82 1811 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1812 /* start is the beginning of the possible filehandle/object,
1813 * and s is the end of it
1814 * tmpbuf is a copy of it
1815 */
1816
a0d0e21e 1817 if (*start == '$') {
3280af22 1818 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1819 return 0;
1820 s = skipspace(s);
3280af22
NIS
1821 PL_bufptr = start;
1822 PL_expect = XREF;
a0d0e21e
LW
1823 return *s == '(' ? FUNCMETH : METHOD;
1824 }
1825 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1826 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1827 len -= 2;
1828 tmpbuf[len] = '\0';
1829 goto bare_package;
1830 }
1831 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1832 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1833 return 0;
1834 /* filehandle or package name makes it a method */
89bfa8cd 1835 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1836 s = skipspace(s);
3280af22 1837 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1838 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1839 bare_package:
3280af22 1840 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1841 newSVpvn(tmpbuf,len));
3280af22
NIS
1842 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1843 PL_expect = XTERM;
a0d0e21e 1844 force_next(WORD);
3280af22 1845 PL_bufptr = s;
a0d0e21e
LW
1846 return *s == '(' ? FUNCMETH : METHOD;
1847 }
1848 }
1849 return 0;
1850}
1851
ffb4593c
NT
1852/*
1853 * S_incl_perldb
1854 * Return a string of Perl code to load the debugger. If PERL5DB
1855 * is set, it will return the contents of that, otherwise a
1856 * compile-time require of perl5db.pl.
1857 */
1858
76e3520e 1859STATIC char*
cea2e8a9 1860S_incl_perldb(pTHX)
a0d0e21e 1861{
3280af22 1862 if (PL_perldb) {
76e3520e 1863 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1864
1865 if (pdb)
1866 return pdb;
61bb5906 1867 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1868 return "BEGIN { require 'perl5db.pl' }";
1869 }
1870 return "";
1871}
1872
1873
16d20bd9
AD
1874/* Encoded script support. filter_add() effectively inserts a
1875 * 'pre-processing' function into the current source input stream.
1876 * Note that the filter function only applies to the current source file
1877 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1878 *
1879 * The datasv parameter (which may be NULL) can be used to pass
1880 * private data to this instance of the filter. The filter function
1881 * can recover the SV using the FILTER_DATA macro and use it to
1882 * store private buffers and state information.
1883 *
1884 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1885 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1886 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1887 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1888 * private use must be set using malloc'd pointers.
1889 */
16d20bd9
AD
1890
1891SV *
864dbfa3 1892Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1893{
f4c556ac
GS
1894 if (!funcp)
1895 return Nullsv;
1896
3280af22
NIS
1897 if (!PL_rsfp_filters)
1898 PL_rsfp_filters = newAV();
16d20bd9 1899 if (!datasv)
8c52afec 1900 datasv = NEWSV(255,0);
16d20bd9 1901 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1902 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1903 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1904 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1905 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1906 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1907 av_unshift(PL_rsfp_filters, 1);
1908 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1909 return(datasv);
1910}
1911
1912
1913/* Delete most recently added instance of this filter function. */
a0d0e21e 1914void
864dbfa3 1915Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1916{
e0c19803 1917 SV *datasv;
f4c556ac 1918 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1919 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1920 return;
1921 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1922 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1923 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1924 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1925 IoANY(datasv) = (void *)NULL;
3280af22 1926 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1927
16d20bd9
AD
1928 return;
1929 }
1930 /* we need to search for the correct entry and clear it */
cea2e8a9 1931 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1932}
1933
1934
1935/* Invoke the n'th filter function for the current rsfp. */
1936I32
864dbfa3 1937Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1938
1939
1940 /* 0 = read one text line */
a0d0e21e 1941{
16d20bd9
AD
1942 filter_t funcp;
1943 SV *datasv = NULL;
e50aee73 1944
3280af22 1945 if (!PL_rsfp_filters)
16d20bd9 1946 return -1;
3280af22 1947 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1948 /* Provide a default input filter to make life easy. */
1949 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1950 DEBUG_P(PerlIO_printf(Perl_debug_log,
1951 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1952 if (maxlen) {
1953 /* Want a block */
1954 int len ;
1955 int old_len = SvCUR(buf_sv) ;
1956
1957 /* ensure buf_sv is large enough */
1958 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1959 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1960 if (PerlIO_error(PL_rsfp))
37120919
AD
1961 return -1; /* error */
1962 else
1963 return 0 ; /* end of file */
1964 }
16d20bd9
AD
1965 SvCUR_set(buf_sv, old_len + len) ;
1966 } else {
1967 /* Want a line */
3280af22
NIS
1968 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1969 if (PerlIO_error(PL_rsfp))
37120919
AD
1970 return -1; /* error */
1971 else
1972 return 0 ; /* end of file */
1973 }
16d20bd9
AD
1974 }
1975 return SvCUR(buf_sv);
1976 }
1977 /* Skip this filter slot if filter has been deleted */
3280af22 1978 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1979 DEBUG_P(PerlIO_printf(Perl_debug_log,
1980 "filter_read %d: skipped (filter deleted)\n",
1981 idx));
16d20bd9
AD
1982 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1983 }
1984 /* Get function pointer hidden within datasv */
4755096e 1985 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
1986 DEBUG_P(PerlIO_printf(Perl_debug_log,
1987 "filter_read %d: via function %p (%s)\n",
1988 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1989 /* Call function. The function is expected to */
1990 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1991 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1992 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1993}
1994
76e3520e 1995STATIC char *
cea2e8a9 1996S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1997{
c39cd008 1998#ifdef PERL_CR_FILTER
3280af22 1999 if (!PL_rsfp_filters) {
c39cd008 2000 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2001 }
2002#endif
3280af22 2003 if (PL_rsfp_filters) {
16d20bd9 2004
55497cff
PP
2005 if (!append)
2006 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2007 if (FILTER_READ(0, sv, 0) > 0)
2008 return ( SvPVX(sv) ) ;
2009 else
2010 return Nullch ;
2011 }
9d116dd7 2012 else
fd049845 2013 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2014}
2015
01ec43d0
GS
2016STATIC HV *
2017S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2018{
2019 GV *gv;
2020
01ec43d0 2021 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2022 return PL_curstash;
2023
2024 if (len > 2 &&
2025 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2026 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2027 {
2028 return GvHV(gv); /* Foo:: */
def3634b
GS
2029 }
2030
2031 /* use constant CLASS => 'MyClass' */
2032 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2033 SV *sv;
2034 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2035 pkgname = SvPV_nolen(sv);
2036 }
2037 }
2038
2039 return gv_stashpv(pkgname, FALSE);
2040}
a0d0e21e 2041
748a9306
LW
2042#ifdef DEBUGGING
2043 static char* exp_name[] =
09bef843
SB
2044 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2045 "ATTRTERM", "TERMBLOCK"
2046 };
748a9306 2047#endif
463ee0b2 2048
02aa26ce
NT
2049/*
2050 yylex
2051
2052 Works out what to call the token just pulled out of the input
2053 stream. The yacc parser takes care of taking the ops we return and
2054 stitching them into a tree.
2055
2056 Returns:
2057 PRIVATEREF
2058
2059 Structure:
2060 if read an identifier
2061 if we're in a my declaration
2062 croak if they tried to say my($foo::bar)
2063 build the ops for a my() declaration
2064 if it's an access to a my() variable
2065 are we in a sort block?
2066 croak if my($a); $a <=> $b
2067 build ops for access to a my() variable
2068 if in a dq string, and they've said @foo and we can't find @foo
2069 croak
2070 build ops for a bareword
2071 if we already built the token before, use it.
2072*/
2073
bf4acbe4
GS
2074#ifdef __SC__
2075#pragma segment Perl_yylex
2076#endif
864dbfa3
GS
2077int
2078#ifdef USE_PURE_BISON
cea2e8a9 2079Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 2080#else
cea2e8a9 2081Perl_yylex(pTHX)
864dbfa3 2082#endif
378cc40b 2083{
b73d6f50 2084 dTHR;
20141f0e
RI
2085 int r;
2086
2087#ifdef USE_PURE_BISON
20141f0e
RI
2088 yylval_pointer[yyactlevel] = lvalp;
2089 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2090 yyactlevel++;
2091 if (yyactlevel >= YYMAXLEVEL)
2092 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e
RI
2093#endif
2094
2095 r = S_syylex(aTHX);
2096
2097#ifdef USE_PURE_BISON
2098 yyactlevel--;
2099#endif
2100
2101 return r;
2102}
2103
2104STATIC int
2105S_syylex(pTHX) /* need to be separate from yylex for reentrancy */
2106{
11343788 2107 dTHR;
79072805 2108 register char *s;
378cc40b 2109 register char *d;
79072805 2110 register I32 tmp;
463ee0b2 2111 STRLEN len;
161b471a
NIS
2112 GV *gv = Nullgv;
2113 GV **gvp = 0;
a687059c 2114
02aa26ce 2115 /* check if there's an identifier for us to look at */
3280af22 2116 if (PL_pending_ident) {
02aa26ce 2117 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2118 char pit = PL_pending_ident;
2119 PL_pending_ident = 0;
bbce6d69 2120
02aa26ce
NT
2121 /* if we're in a my(), we can't allow dynamics here.
2122 $foo'bar has already been turned into $foo::bar, so
2123 just check for colons.
2124
2125 if it's a legal name, the OP is a PADANY.
2126 */
3280af22 2127 if (PL_in_my) {
77ca0c92 2128 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2129 if (strchr(PL_tokenbuf,':'))
2130 yyerror(Perl_form(aTHX_ "No package name allowed for "
2131 "variable %s in \"our\"",
2132 PL_tokenbuf));
77ca0c92
LW
2133 tmp = pad_allocmy(PL_tokenbuf);
2134 }
2135 else {
2136 if (strchr(PL_tokenbuf,':'))
2137 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2138
77ca0c92
LW
2139 yylval.opval = newOP(OP_PADANY, 0);
2140 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2141 return PRIVATEREF;
2142 }
bbce6d69
PP
2143 }
2144
02aa26ce
NT
2145 /*
2146 build the ops for accesses to a my() variable.
2147
2148 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2149 then used in a comparison. This catches most, but not
2150 all cases. For instance, it catches
2151 sort { my($a); $a <=> $b }
2152 but not
2153 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2154 (although why you'd do that is anyone's guess).
2155 */
2156
3280af22 2157 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2158#ifdef USE_THREADS
54b9620d 2159 /* Check for single character per-thread SVs */
3280af22
NIS
2160 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2161 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2162 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2163 {
2faa37cc 2164 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2165 yylval.opval->op_targ = tmp;
2166 return PRIVATEREF;
2167 }
2168#endif /* USE_THREADS */
3280af22 2169 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2170 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2171 /* might be an "our" variable" */
f472eb5c 2172 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2173 /* build ops for a bareword */
f472eb5c
GS
2174 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2175 sv_catpvn(sym, "::", 2);
2176 sv_catpv(sym, PL_tokenbuf+1);
2177 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2178 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2179 gv_fetchpv(SvPVX(sym),
77ca0c92 2180 (PL_in_eval
f472eb5c
GS
2181 ? (GV_ADDMULTI | GV_ADDINEVAL)
2182 : TRUE
77ca0c92
LW
2183 ),
2184 ((PL_tokenbuf[0] == '$') ? SVt_PV
2185 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2186 : SVt_PVHV));
2187 return WORD;
2188 }
2189
02aa26ce 2190 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2191 if (PL_last_lop_op == OP_SORT &&
2192 PL_tokenbuf[0] == '$' &&
2193 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2194 && !PL_tokenbuf[2])
bbce6d69 2195 {
3280af22
NIS
2196 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2197 d < PL_bufend && *d != '\n';
a863c7d1
MB
2198 d++)
2199 {
2200 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2201 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2202 PL_tokenbuf);
a863c7d1 2203 }
bbce6d69
PP
2204 }
2205 }
bbce6d69 2206
a863c7d1
MB
2207 yylval.opval = newOP(OP_PADANY, 0);
2208 yylval.opval->op_targ = tmp;
2209 return PRIVATEREF;
2210 }
bbce6d69
PP
2211 }
2212
02aa26ce
NT
2213 /*
2214 Whine if they've said @foo in a doublequoted string,
2215 and @foo isn't a variable we can find in the symbol
2216 table.
2217 */
3280af22
NIS
2218 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2219 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2220 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2221 && ckWARN(WARN_AMBIGUOUS))
2222 {
2223 /* Downgraded from fatal to warning 20000522 mjd */
2224 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2225 "Possible unintended interpolation of %s in string",
2226 PL_tokenbuf);
2227 }
bbce6d69
PP
2228 }
2229
02aa26ce 2230 /* build ops for a bareword */
3280af22 2231 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2232 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2233 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2234 ((PL_tokenbuf[0] == '$') ? SVt_PV
2235 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2236 : SVt_PVHV));
2237 return WORD;
2238 }
2239
02aa26ce
NT
2240 /* no identifier pending identification */
2241
3280af22 2242 switch (PL_lex_state) {
79072805
LW
2243#ifdef COMMENTARY
2244 case LEX_NORMAL: /* Some compilers will produce faster */
2245 case LEX_INTERPNORMAL: /* code if we comment these out. */
2246 break;
2247#endif
2248
09bef843 2249 /* when we've already built the next token, just pull it out of the queue */
79072805 2250 case LEX_KNOWNEXT:
3280af22
NIS
2251 PL_nexttoke--;
2252 yylval = PL_nextval[PL_nexttoke];
2253 if (!PL_nexttoke) {
2254 PL_lex_state = PL_lex_defer;
2255 PL_expect = PL_lex_expect;
2256 PL_lex_defer = LEX_NORMAL;
463ee0b2 2257 }
3280af22 2258 return(PL_nexttype[PL_nexttoke]);
79072805 2259
02aa26ce 2260 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2261 when we get here, PL_bufptr is at the \
02aa26ce 2262 */
79072805
LW
2263 case LEX_INTERPCASEMOD:
2264#ifdef DEBUGGING
3280af22 2265 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2266 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2267#endif
02aa26ce 2268 /* handle \E or end of string */
3280af22 2269 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2270 char oldmod;
02aa26ce
NT
2271
2272 /* if at a \E */
3280af22
NIS
2273 if (PL_lex_casemods) {
2274 oldmod = PL_lex_casestack[--PL_lex_casemods];
2275 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2276
3280af22
NIS
2277 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2278 PL_bufptr += 2;
2279 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2280 }
79072805
LW
2281 return ')';
2282 }
3280af22
NIS
2283 if (PL_bufptr != PL_bufend)
2284 PL_bufptr += 2;
2285 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2286 return yylex();
79072805
LW
2287 }
2288 else {
3280af22 2289 s = PL_bufptr + 1;
79072805
LW
2290 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2291 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2292 if (strchr("LU", *s) &&
3280af22 2293 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2294 {
3280af22 2295 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2296 return ')';
2297 }
3280af22
NIS
2298 if (PL_lex_casemods > 10) {
2299 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2300 if (newlb != PL_lex_casestack) {
a0d0e21e 2301 SAVEFREEPV(newlb);
3280af22 2302 PL_lex_casestack = newlb;
a0d0e21e
LW
2303 }
2304 }
3280af22
NIS
2305 PL_lex_casestack[PL_lex_casemods++] = *s;
2306 PL_lex_casestack[PL_lex_casemods] = '\0';
2307 PL_lex_state = LEX_INTERPCONCAT;
2308 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2309 force_next('(');
2310 if (*s == 'l')
3280af22 2311 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2312 else if (*s == 'u')
3280af22 2313 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2314 else if (*s == 'L')
3280af22 2315 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2316 else if (*s == 'U')
3280af22 2317 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2318 else if (*s == 'Q')
3280af22 2319 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2320 else
cea2e8a9 2321 Perl_croak(aTHX_ "panic: yylex");
3280af22 2322 PL_bufptr = s + 1;
79072805 2323 force_next(FUNC);
3280af22
NIS
2324 if (PL_lex_starts) {
2325 s = PL_bufptr;
2326 PL_lex_starts = 0;
79072805
LW
2327 Aop(OP_CONCAT);
2328 }
2329 else
cea2e8a9 2330 return yylex();
79072805
LW
2331 }
2332
55497cff
PP
2333 case LEX_INTERPPUSH:
2334 return sublex_push();
2335
79072805 2336 case LEX_INTERPSTART:
3280af22 2337 if (PL_bufptr == PL_bufend)
79072805 2338 return sublex_done();
3280af22
NIS
2339 PL_expect = XTERM;
2340 PL_lex_dojoin = (*PL_bufptr == '@');
2341 PL_lex_state = LEX_INTERPNORMAL;
2342 if (PL_lex_dojoin) {
2343 PL_nextval[PL_nexttoke].ival = 0;
79072805 2344 force_next(',');
554b3eca 2345#ifdef USE_THREADS
533c011a
NIS
2346 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2347 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2348 force_next(PRIVATEREF);
2349#else
a0d0e21e 2350 force_ident("\"", '$');
554b3eca 2351#endif /* USE_THREADS */
3280af22 2352 PL_nextval[PL_nexttoke].ival = 0;
79072805 2353 force_next('$');
3280af22 2354 PL_nextval[PL_nexttoke].ival = 0;
79072805 2355 force_next('(');
3280af22 2356 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2357 force_next(FUNC);
2358 }
3280af22
NIS
2359 if (PL_lex_starts++) {
2360 s = PL_bufptr;
79072805
LW
2361 Aop(OP_CONCAT);
2362 }
cea2e8a9 2363 return yylex();
79072805
LW
2364
2365 case LEX_INTERPENDMAYBE:
3280af22
NIS
2366 if (intuit_more(PL_bufptr)) {
2367 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2368 break;
2369 }
2370 /* FALL THROUGH */
2371
2372 case LEX_INTERPEND:
3280af22
NIS
2373 if (PL_lex_dojoin) {
2374 PL_lex_dojoin = FALSE;
2375 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2376 return ')';
2377 }
43a16006 2378 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2379 && SvEVALED(PL_lex_repl))
43a16006 2380 {
e9fa98b2 2381 if (PL_bufptr != PL_bufend)
cea2e8a9 2382 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2383 PL_lex_repl = Nullsv;
2384 }
79072805
LW
2385 /* FALLTHROUGH */
2386 case LEX_INTERPCONCAT:
2387#ifdef DEBUGGING
3280af22 2388 if (PL_lex_brackets)
cea2e8a9 2389 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2390#endif
3280af22 2391 if (PL_bufptr == PL_bufend)
79072805
LW
2392 return sublex_done();
2393
3280af22
NIS
2394 if (SvIVX(PL_linestr) == '\'') {
2395 SV *sv = newSVsv(PL_linestr);
2396 if (!PL_lex_inpat)
76e3520e 2397 sv = tokeq(sv);
3280af22 2398 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2399 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2400 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2401 s = PL_bufend;
79072805
LW
2402 }
2403 else {
3280af22 2404 s = scan_const(PL_bufptr);
79072805 2405 if (*s == '\\')
3280af22 2406 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2407 else
3280af22 2408 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2409 }
2410
3280af22
NIS
2411 if (s != PL_bufptr) {
2412 PL_nextval[PL_nexttoke] = yylval;
2413 PL_expect = XTERM;
79072805 2414 force_next(THING);
3280af22 2415 if (PL_lex_starts++)
79072805
LW
2416 Aop(OP_CONCAT);
2417 else {
3280af22 2418 PL_bufptr = s;
cea2e8a9 2419 return yylex();
79072805
LW
2420 }
2421 }
2422
cea2e8a9 2423 return yylex();
a0d0e21e 2424 case LEX_FORMLINE:
3280af22
NIS
2425 PL_lex_state = LEX_NORMAL;
2426 s = scan_formline(PL_bufptr);
2427 if (!PL_lex_formbrack)
a0d0e21e
LW
2428 goto rightbracket;
2429 OPERATOR(';');
79072805
LW
2430 }
2431
3280af22
NIS
2432 s = PL_bufptr;
2433 PL_oldoldbufptr = PL_oldbufptr;
2434 PL_oldbufptr = s;
79072805 2435 DEBUG_p( {
bf49b057
GS
2436 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2437 exp_name[PL_expect], s);
79072805 2438 } )
463ee0b2
LW
2439
2440 retry:
378cc40b
LW
2441 switch (*s) {
2442 default:
7e2040f0 2443 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2444 goto keylookup;
cea2e8a9 2445 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2446 case 4:
2447 case 26:
2448 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2449 case 0:
3280af22
NIS
2450 if (!PL_rsfp) {
2451 PL_last_uni = 0;
2452 PL_last_lop = 0;
2453 if (PL_lex_brackets)
d98d5fff 2454 yyerror("Missing right curly or square bracket");
79072805 2455 TOKEN(0);
463ee0b2 2456 }
3280af22 2457 if (s++ < PL_bufend)
a687059c 2458 goto retry; /* ignore stray nulls */
3280af22
NIS
2459 PL_last_uni = 0;
2460 PL_last_lop = 0;
2461 if (!PL_in_eval && !PL_preambled) {
2462 PL_preambled = TRUE;
2463 sv_setpv(PL_linestr,incl_perldb());
2464 if (SvCUR(PL_linestr))
2465 sv_catpv(PL_linestr,";");
2466 if (PL_preambleav){
2467 while(AvFILLp(PL_preambleav) >= 0) {
2468 SV *tmpsv = av_shift(PL_preambleav);
2469 sv_catsv(PL_linestr, tmpsv);
2470 sv_catpv(PL_linestr, ";");
91b7def8
PP
2471 sv_free(tmpsv);
2472 }
3280af22
NIS
2473 sv_free((SV*)PL_preambleav);
2474 PL_preambleav = NULL;
91b7def8 2475 }
3280af22
NIS
2476 if (PL_minus_n || PL_minus_p) {
2477 sv_catpv(PL_linestr, "LINE: while (<>) {");
2478 if (PL_minus_l)
2479 sv_catpv(PL_linestr,"chomp;");
2480 if (PL_minus_a) {
8fd239a7
CS
2481 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2482 if (gv)
2483 GvIMPORTED_AV_on(gv);
3280af22
NIS
2484 if (PL_minus_F) {
2485 if (strchr("/'\"", *PL_splitstr)
2486 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2487 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2488 else {
2489 char delim;
2490 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2491 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2492 delim = *s;
cea2e8a9 2493 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2494 "q" + (delim == '\''), delim);
3280af22 2495 for (s = PL_splitstr; *s; s++) {
54310121 2496 if (*s == '\\')
3280af22
NIS
2497 sv_catpvn(PL_linestr, "\\", 1);
2498 sv_catpvn(PL_linestr, s, 1);
54310121 2499 }
cea2e8a9 2500 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2501 }
2304df62
AD
2502 }
2503 else
3280af22 2504 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2505 }
79072805 2506 }
3280af22
NIS
2507 sv_catpv(PL_linestr, "\n");
2508 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2509 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2510 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2511 SV *sv = NEWSV(85,0);
2512
2513 sv_upgrade(sv, SVt_PVMG);
3280af22 2514 sv_setsv(sv,PL_linestr);
57843af0 2515 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2516 }
79072805 2517 goto retry;
a687059c 2518 }
e929a76b 2519 do {
01ec43d0
GS
2520 bool bof;
2521 bof = PL_rsfp && (PerlIO_tell(PL_rsfp) == 0); /* *Before* read! */
dea0fc0b
JH
2522 s = filter_gets(PL_linestr, PL_rsfp, 0);
2523 if (s == Nullch) {
e929a76b 2524 fake_eof:
3280af22
NIS
2525 if (PL_rsfp) {
2526 if (PL_preprocess && !PL_in_eval)
2527 (void)PerlProc_pclose(PL_rsfp);
2528 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2529 PerlIO_clearerr(PL_rsfp);
395c3793 2530 else
3280af22
NIS
2531 (void)PerlIO_close(PL_rsfp);
2532 PL_rsfp = Nullfp;
4a9ae47a 2533 PL_doextract = FALSE;
395c3793 2534 }
3280af22
NIS
2535 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2536 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2537 sv_catpv(PL_linestr,";}");
2538 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2539 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2540 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2541 goto retry;
2542 }
3280af22
NIS
2543 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2544 sv_setpv(PL_linestr,"");
79072805 2545 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
dea0fc0b
JH
2546 } else if (bof) {
2547 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2548 s = swallow_bom((U8*)s);
378cc40b 2549 }
3280af22 2550 if (PL_doextract) {
a0d0e21e 2551 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2552 PL_doextract = FALSE;
a0d0e21e
LW
2553
2554 /* Incest with pod. */
2555 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2556 sv_setpv(PL_linestr, "");
2557 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2558 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2559 PL_doextract = FALSE;
a0d0e21e 2560 }
b250498f 2561 }
463ee0b2 2562 incline(s);
3280af22
NIS
2563 } while (PL_doextract);
2564 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2565 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2566 SV *sv = NEWSV(85,0);
a687059c 2567
93a17b20 2568 sv_upgrade(sv, SVt_PVMG);
3280af22 2569 sv_setsv(sv,PL_linestr);
57843af0 2570 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2571 }
3280af22 2572 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2573 if (CopLINE(PL_curcop) == 1) {
3280af22 2574 while (s < PL_bufend && isSPACE(*s))
79072805 2575 s++;
a0d0e21e 2576 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2577 s++;
44a8e56a 2578 d = Nullch;
3280af22 2579 if (!PL_in_eval) {
44a8e56a
PP
2580 if (*s == '#' && *(s+1) == '!')
2581 d = s + 2;
2582#ifdef ALTERNATE_SHEBANG
2583 else {
2584 static char as[] = ALTERNATE_SHEBANG;
2585 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2586 d = s + (sizeof(as) - 1);
2587 }
2588#endif /* ALTERNATE_SHEBANG */
2589 }
2590 if (d) {
b8378b72 2591 char *ipath;
774d564b 2592 char *ipathend;
b8378b72 2593
774d564b 2594 while (isSPACE(*d))
b8378b72
CS
2595 d++;
2596 ipath = d;
774d564b
PP
2597 while (*d && !isSPACE(*d))
2598 d++;
2599 ipathend = d;
2600
2601#ifdef ARG_ZERO_IS_SCRIPT
2602 if (ipathend > ipath) {
2603 /*
2604 * HP-UX (at least) sets argv[0] to the script name,
2605 * which makes $^X incorrect. And Digital UNIX and Linux,
2606 * at least, set argv[0] to the basename of the Perl
2607 * interpreter. So, having found "#!", we'll set it right.
2608 */
2609 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2610 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2611 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2612 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2613 SvSETMAGIC(x);
2614 }
774d564b 2615 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2616 }
774d564b 2617#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2618
2619 /*
2620 * Look for options.
2621 */
748a9306 2622 d = instr(s,"perl -");
84e30d1a 2623 if (!d) {
748a9306 2624 d = instr(s,"perl");
84e30d1a
GS
2625#if defined(DOSISH)
2626 /* avoid getting into infinite loops when shebang
2627 * line contains "Perl" rather than "perl" */
2628 if (!d) {
2629 for (d = ipathend-4; d >= ipath; --d) {
2630 if ((*d == 'p' || *d == 'P')
2631 && !ibcmp(d, "perl", 4))
2632 {
2633 break;
2634 }
2635 }
2636 if (d < ipath)
2637 d = Nullch;
2638 }
2639#endif
2640 }
44a8e56a
PP
2641#ifdef ALTERNATE_SHEBANG
2642 /*
2643 * If the ALTERNATE_SHEBANG on this system starts with a
2644 * character that can be part of a Perl expression, then if
2645 * we see it but not "perl", we're probably looking at the
2646 * start of Perl code, not a request to hand off to some
2647 * other interpreter. Similarly, if "perl" is there, but
2648 * not in the first 'word' of the line, we assume the line
2649 * contains the start of the Perl program.
44a8e56a
PP
2650 */
2651 if (d && *s != '#') {
774d564b 2652 char *c = ipath;
44a8e56a
PP
2653 while (*c && !strchr("; \t\r\n\f\v#", *c))
2654 c++;
2655 if (c < d)
2656 d = Nullch; /* "perl" not in first word; ignore */
2657 else
2658 *s = '#'; /* Don't try to parse shebang line */
2659 }
774d564b 2660#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2661#ifndef MACOS_TRADITIONAL
748a9306 2662 if (!d &&
44a8e56a 2663 *s == '#' &&
774d564b 2664 ipathend > ipath &&
3280af22 2665 !PL_minus_c &&
748a9306 2666 !instr(s,"indir") &&
3280af22 2667 instr(PL_origargv[0],"perl"))
748a9306 2668 {
9f68db38 2669 char **newargv;
9f68db38 2670
774d564b
PP
2671 *ipathend = '\0';
2672 s = ipathend + 1;
3280af22 2673 while (s < PL_bufend && isSPACE(*s))
9f68db38 2674 s++;
3280af22
NIS
2675 if (s < PL_bufend) {
2676 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2677 newargv[1] = s;
3280af22 2678 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2679 s++;
2680 *s = '\0';
3280af22 2681 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2682 }
2683 else
3280af22 2684 newargv = PL_origargv;
774d564b 2685 newargv[0] = ipath;
80252599 2686 PerlProc_execv(ipath, newargv);
cea2e8a9 2687 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2688 }
bf4acbe4 2689#endif
748a9306 2690 if (d) {
3280af22
NIS
2691 U32 oldpdb = PL_perldb;
2692 bool oldn = PL_minus_n;
2693 bool oldp = PL_minus_p;
748a9306
LW
2694
2695 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2696 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2697
2698 if (*d++ == '-') {
8cc95fdb
PP
2699 do {
2700 if (*d == 'M' || *d == 'm') {
2701 char *m = d;
2702 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2703 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2704 (int)(d - m), m);
2705 }
2706 d = moreswitches(d);
2707 } while (d);
155aba94
GS
2708 if ((PERLDB_LINE && !oldpdb) ||
2709 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2710 /* if we have already added "LINE: while (<>) {",
2711 we must not do it again */
748a9306 2712 {
3280af22
NIS
2713 sv_setpv(PL_linestr, "");
2714 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2715 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2716 PL_preambled = FALSE;
84902520 2717 if (PERLDB_LINE)
3280af22 2718 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2719 goto retry;
2720 }
a0d0e21e 2721 }
79072805 2722 }
9f68db38 2723 }
79072805 2724 }
3280af22
NIS
2725 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2726 PL_bufptr = s;
2727 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2728 return yylex();
ae986130 2729 }
378cc40b 2730 goto retry;
4fdae800 2731 case '\r':
6a27c188 2732#ifdef PERL_STRICT_CR
cea2e8a9
GS
2733 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2734 Perl_croak(aTHX_
cc507455 2735 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2736#endif
4fdae800 2737 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2738#ifdef MACOS_TRADITIONAL
2739 case '\312':
2740#endif
378cc40b
LW
2741 s++;
2742 goto retry;
378cc40b 2743 case '#':
e929a76b 2744 case '\n':
3280af22 2745 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2746 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2747 /* handle eval qq[#line 1 "foo"\n ...] */
2748 CopLINE_dec(PL_curcop);
2749 incline(s);
2750 }
3280af22 2751 d = PL_bufend;
a687059c 2752 while (s < d && *s != '\n')
378cc40b 2753 s++;
0f85fab0 2754 if (s < d)
378cc40b 2755 s++;
463ee0b2 2756 incline(s);
3280af22
NIS
2757 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2758 PL_bufptr = s;
2759 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2760 return yylex();
a687059c 2761 }
378cc40b 2762 }
a687059c 2763 else {
378cc40b 2764 *s = '\0';
3280af22 2765 PL_bufend = s;
a687059c 2766 }
378cc40b
LW
2767 goto retry;
2768 case '-':
79072805 2769 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2770 s++;
3280af22 2771 PL_bufptr = s;
748a9306
LW
2772 tmp = *s++;
2773
bf4acbe4 2774 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2775 s++;
2776
2777 if (strnEQ(s,"=>",2)) {
3280af22 2778 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2779 OPERATOR('-'); /* unary minus */
2780 }
3280af22
NIS
2781 PL_last_uni = PL_oldbufptr;
2782 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2783 switch (tmp) {
79072805
LW
2784 case 'r': FTST(OP_FTEREAD);
2785 case 'w': FTST(OP_FTEWRITE);
2786 case 'x': FTST(OP_FTEEXEC);
2787 case 'o': FTST(OP_FTEOWNED);
2788 case 'R': FTST(OP_FTRREAD);
2789 case 'W': FTST(OP_FTRWRITE);
2790 case 'X': FTST(OP_FTREXEC);
2791 case 'O': FTST(OP_FTROWNED);
2792 case 'e': FTST(OP_FTIS);
2793 case 'z': FTST(OP_FTZERO);
2794 case 's': FTST(OP_FTSIZE);
2795 case 'f': FTST(OP_FTFILE);
2796 case 'd': FTST(OP_FTDIR);
2797 case 'l': FTST(OP_FTLINK);
2798 case 'p': FTST(OP_FTPIPE);
2799 case 'S': FTST(OP_FTSOCK);
2800 case 'u': FTST(OP_FTSUID);
2801 case 'g': FTST(OP_FTSGID);
2802 case 'k': FTST(OP_FTSVTX);
2803 case 'b': FTST(OP_FTBLK);
2804 case 'c': FTST(OP_FTCHR);
2805 case 't': FTST(OP_FTTTY);
2806 case 'T': FTST(OP_FTTEXT);
2807 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2808 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2809 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2810 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2811 default:
cea2e8a9 2812 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2813 break;
2814 }
2815 }
a687059c
LW
2816 tmp = *s++;
2817 if (*s == tmp) {
2818 s++;
3280af22 2819 if (PL_expect == XOPERATOR)
79072805
LW
2820 TERM(POSTDEC);
2821 else
2822 OPERATOR(PREDEC);
2823 }
2824 else if (*s == '>') {
2825 s++;
2826 s = skipspace(s);
7e2040f0 2827 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2828 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2829 TOKEN(ARROW);
79072805 2830 }
748a9306
LW
2831 else if (*s == '$')
2832 OPERATOR(ARROW);
463ee0b2 2833 else
748a9306 2834 TERM(ARROW);
a687059c 2835 }
3280af22 2836 if (PL_expect == XOPERATOR)
79072805
LW
2837 Aop(OP_SUBTRACT);
2838 else {
3280af22 2839 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2840 check_uni();
79072805 2841 OPERATOR('-'); /* unary minus */
2f3197b3 2842 }
79072805 2843
378cc40b 2844 case '+':
a687059c
LW
2845 tmp = *s++;
2846 if (*s == tmp) {
378cc40b 2847 s++;
3280af22 2848 if (PL_expect == XOPERATOR)
79072805
LW
2849 TERM(POSTINC);
2850 else
2851 OPERATOR(PREINC);
378cc40b 2852 }
3280af22 2853 if (PL_expect == XOPERATOR)
79072805
LW
2854 Aop(OP_ADD);
2855 else {
3280af22 2856 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2857 check_uni();
a687059c 2858 OPERATOR('+');
2f3197b3 2859 }
a687059c 2860
378cc40b 2861 case '*':
3280af22
NIS
2862 if (PL_expect != XOPERATOR) {
2863 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2864 PL_expect = XOPERATOR;
2865 force_ident(PL_tokenbuf, '*');
2866 if (!*PL_tokenbuf)
a0d0e21e 2867 PREREF('*');
79072805 2868 TERM('*');
a687059c 2869 }
79072805
LW
2870 s++;
2871 if (*s == '*') {
a687059c 2872 s++;
79072805 2873 PWop(OP_POW);
a687059c 2874 }
79072805
LW
2875 Mop(OP_MULTIPLY);
2876
378cc40b 2877 case '%':
3280af22 2878 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2879 ++s;
2880 Mop(OP_MODULO);
a687059c 2881 }
3280af22
NIS
2882 PL_tokenbuf[0] = '%';
2883 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2884 if (!PL_tokenbuf[1]) {
2885 if (s == PL_bufend)
bbce6d69
PP
2886 yyerror("Final % should be \\% or %name");
2887 PREREF('%');
a687059c 2888 }
3280af22 2889 PL_pending_ident = '%';
bbce6d69 2890 TERM('%');
a687059c 2891
378cc40b 2892 case '^':
79072805 2893 s++;
a0d0e21e 2894 BOop(OP_BIT_XOR);
79072805 2895 case '[':
3280af22 2896 PL_lex_brackets++;
79072805 2897 /* FALL THROUGH */
378cc40b 2898 case '~':
378cc40b 2899 case ',':
378cc40b
LW
2900 tmp = *s++;
2901 OPERATOR(tmp);
a0d0e21e
LW
2902 case ':':
2903 if (s[1] == ':') {
2904 len = 0;
2905 goto just_a_word;
2906 }
2907 s++;
09bef843
SB
2908 switch (PL_expect) {
2909 OP *attrs;
2910 case XOPERATOR:
2911 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2912 break;
2913 PL_bufptr = s; /* update in case we back off */
2914 goto grabattrs;
2915 case XATTRBLOCK:
2916 PL_expect = XBLOCK;
2917 goto grabattrs;
2918 case XATTRTERM:
2919 PL_expect = XTERMBLOCK;
2920 grabattrs:
2921 s = skipspace(s);
2922 attrs = Nullop;
7e2040f0 2923 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2924 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2925 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2926 if (tmp < 0) tmp = -tmp;
2927 switch (tmp) {
2928 case KEY_or:
2929 case KEY_and:
2930 case KEY_for:
2931 case KEY_unless:
2932 case KEY_if:
2933 case KEY_while:
2934 case KEY_until:
2935 goto got_attrs;
2936 default:
2937 break;
2938 }
2939 }
09bef843
SB
2940 if (*d == '(') {
2941 d = scan_str(d,TRUE,TRUE);
2942 if (!d) {
2943 if (PL_lex_stuff) {
2944 SvREFCNT_dec(PL_lex_stuff);
2945 PL_lex_stuff = Nullsv;
2946 }
2947 /* MUST advance bufptr here to avoid bogus
2948 "at end of line" context messages from yyerror().
2949 */
2950 PL_bufptr = s + len;
2951 yyerror("Unterminated attribute parameter in attribute list");
2952 if (attrs)
2953 op_free(attrs);
2954 return 0; /* EOF indicator */
2955 }
2956 }
2957 if (PL_lex_stuff) {
2958 SV *sv = newSVpvn(s, len);
2959 sv_catsv(sv, PL_lex_stuff);
2960 attrs = append_elem(OP_LIST, attrs,
2961 newSVOP(OP_CONST, 0, sv));
2962 SvREFCNT_dec(PL_lex_stuff);
2963 PL_lex_stuff = Nullsv;
2964 }
2965 else {
2966 attrs = append_elem(OP_LIST, attrs,
2967 newSVOP(OP_CONST, 0,
2968 newSVpvn(s, len)));
2969 }
2970 s = skipspace(d);
0120eecf 2971 if (*s == ':' && s[1] != ':')
09bef843 2972 s = skipspace(s+1);
0120eecf
GS
2973 else if (s == d)
2974 break; /* require real whitespace or :'s */
09bef843 2975 }
f9829d6b
GS
2976 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2977 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2978 char q = ((*s == '\'') ? '"' : '\'');
2979 /* If here for an expression, and parsed no attrs, back off. */
2980 if (tmp == '=' && !attrs) {
2981 s = PL_bufptr;
2982 break;
2983 }
2984 /* MUST advance bufptr here to avoid bogus "at end of line"
2985 context messages from yyerror().
2986 */
2987 PL_bufptr = s;
2988 if (!*s)
2989 yyerror("Unterminated attribute list");
2990 else
2991 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2992 q, *s, q));
2993 if (attrs)
2994 op_free(attrs);
2995 OPERATOR(':');
2996 }
f9829d6b 2997 got_attrs:
09bef843
SB
2998 if (attrs) {
2999 PL_nextval[PL_nexttoke].opval = attrs;
3000 force_next(THING);
3001 }
3002 TOKEN(COLONATTR);
3003 }
a0d0e21e 3004 OPERATOR(':');
8990e307
LW
3005 case '(':
3006 s++;
3280af22
NIS
3007 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3008 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3009 else
3280af22 3010 PL_expect = XTERM;
a0d0e21e 3011 TOKEN('(');
378cc40b 3012 case ';':
f4dd75d9 3013 CLINE;
378cc40b
LW
3014 tmp = *s++;
3015 OPERATOR(tmp);
3016 case ')':
378cc40b 3017 tmp = *s++;
16d20bd9
AD
3018 s = skipspace(s);
3019 if (*s == '{')
3020 PREBLOCK(tmp);
378cc40b 3021 TERM(tmp);
79072805
LW
3022 case ']':
3023 s++;
3280af22 3024 if (PL_lex_brackets <= 0)
d98d5fff 3025 yyerror("Unmatched right square bracket");
463ee0b2 3026 else
3280af22
NIS
3027 --PL_lex_brackets;
3028 if (PL_lex_state == LEX_INTERPNORMAL) {
3029 if (PL_lex_brackets == 0) {
a0d0e21e 3030 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3031 PL_lex_state = LEX_INTERPEND;
79072805
LW
3032 }
3033 }
4633a7c4 3034 TERM(']');
79072805
LW
3035 case '{':
3036 leftbracket:
79072805 3037 s++;
3280af22
NIS
3038 if (PL_lex_brackets > 100) {
3039 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3040 if (newlb != PL_lex_brackstack) {
8990e307 3041 SAVEFREEPV(newlb);
3280af22 3042 PL_lex_brackstack = newlb;
8990e307
LW
3043 }
3044 }
3280af22 3045 switch (PL_expect) {
a0d0e21e 3046 case XTERM:
3280af22 3047 if (PL_lex_formbrack) {
a0d0e21e
LW
3048 s--;
3049 PRETERMBLOCK(DO);
3050 }
3280af22
NIS
3051 if (PL_oldoldbufptr == PL_last_lop)
3052 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3053 else
3280af22 3054 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3055 OPERATOR(HASHBRACK);
a0d0e21e 3056 case XOPERATOR:
bf4acbe4 3057 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3058 s++;
44a8e56a 3059 d = s;
3280af22
NIS
3060 PL_tokenbuf[0] = '\0';
3061 if (d < PL_bufend && *d == '-') {
3062 PL_tokenbuf[0] = '-';
44a8e56a 3063 d++;
bf4acbe4 3064 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3065 d++;
3066 }
7e2040f0 3067 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3068 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3069 FALSE, &len);
bf4acbe4 3070 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3071 d++;
3072 if (*d == '}') {
3280af22 3073 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3074 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3075 if (minus)
3076 force_next('-');
748a9306
LW
3077 }
3078 }
3079 /* FALL THROUGH */
09bef843 3080 case XATTRBLOCK:
748a9306 3081 case XBLOCK:
3280af22
NIS
3082 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3083 PL_expect = XSTATE;
a0d0e21e 3084 break;
09bef843 3085 case XATTRTERM:
a0d0e21e 3086 case XTERMBLOCK:
3280af22
NIS
3087 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3088 PL_expect = XSTATE;
a0d0e21e
LW
3089 break;
3090 default: {
3091 char *t;
3280af22
NIS
3092 if (PL_oldoldbufptr == PL_last_lop)
3093 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3094 else
3280af22 3095 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3096 s = skipspace(s);
09ecc4b6 3097 if (*s == '}')
a0d0e21e 3098 OPERATOR(HASHBRACK);
b8a4b1be
GS
3099 /* This hack serves to disambiguate a pair of curlies
3100 * as being a block or an anon hash. Normally, expectation
3101 * determines that, but in cases where we're not in a
3102 * position to expect anything in particular (like inside
3103 * eval"") we have to resolve the ambiguity. This code
3104 * covers the case where the first term in the curlies is a
3105 * quoted string. Most other cases need to be explicitly
3106 * disambiguated by prepending a `+' before the opening
3107 * curly in order to force resolution as an anon hash.
3108 *
3109 * XXX should probably propagate the outer expectation
3110 * into eval"" to rely less on this hack, but that could
3111 * potentially break current behavior of eval"".
3112 * GSAR 97-07-21
3113 */
3114 t = s;
3115 if (*s == '\'' || *s == '"' || *s == '`') {
3116 /* common case: get past first string, handling escapes */
3280af22 3117 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3118 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3119 t++;
3120 t++;
a0d0e21e 3121 }
b8a4b1be 3122 else if (*s == 'q') {
3280af22 3123 if (++t < PL_bufend
b8a4b1be 3124 && (!isALNUM(*t)
3280af22 3125 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3126 && !isALNUM(*t))))
3127 {
b8a4b1be
GS
3128 char *tmps;
3129 char open, close, term;
3130 I32 brackets = 1;
3131
3280af22 3132 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3133 t++;
3134 term = *t;
3135 open = term;
3136 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3137 term = tmps[5];
3138 close = term;
3139 if (open == close)
3280af22
NIS
3140 for (t++; t < PL_bufend; t++) {
3141 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3142 t++;
6d07e5e9 3143 else if (*t == open)
b8a4b1be
GS
3144 break;
3145 }
3146 else
3280af22
NIS
3147 for (t++; t < PL_bufend; t++) {
3148 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3149 t++;
6d07e5e9 3150 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3151 break;
3152 else if (*t == open)
3153 brackets++;
3154 }
3155 }
3156 t++;
a0d0e21e 3157 }
7e2040f0 3158 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3159 t += UTF8SKIP(t);
7e2040f0 3160 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3161 t += UTF8SKIP(t);
a0d0e21e 3162 }
3280af22 3163 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3164 t++;
b8a4b1be
GS
3165 /* if comma follows first term, call it an anon hash */
3166 /* XXX it could be a comma expression with loop modifiers */
3280af22 3167 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3168 || (*t == '=' && t[1] == '>')))
a0d0e21e 3169 OPERATOR(HASHBRACK);
3280af22 3170 if (PL_expect == XREF)
4e4e412b 3171 PL_expect = XTERM;
a0d0e21e 3172 else {
3280af22
NIS
3173 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3174 PL_expect = XSTATE;
a0d0e21e 3175 }
8990e307 3176 }
a0d0e21e 3177 break;
463ee0b2 3178 }
57843af0 3179 yylval.ival = CopLINE(PL_curcop);
79072805 3180 if (isSPACE(*s) || *s == '#')
3280af22 3181 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3182 TOKEN('{');
378cc40b 3183 case '}':
79072805
LW
3184 rightbracket:
3185 s++;
3280af22 3186 if (PL_lex_brackets <= 0)
d98d5fff 3187 yyerror("Unmatched right curly bracket");
463ee0b2 3188 else
3280af22 3189 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3190 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3191 PL_lex_formbrack = 0;
3192 if (PL_lex_state == LEX_INTERPNORMAL) {
3193 if (PL_lex_brackets == 0) {
9059aa12
LW
3194 if (PL_expect & XFAKEBRACK) {
3195 PL_expect &= XENUMMASK;
3280af22
NIS
3196 PL_lex_state = LEX_INTERPEND;
3197 PL_bufptr = s;
cea2e8a9 3198 return yylex(); /* ignore fake brackets */
79072805 3199 }
fa83b5b6 3200 if (*s == '-' && s[1] == '>')
3280af22 3201 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3202 else if (*s != '[' && *s != '{')
3280af22 3203 PL_lex_state = LEX_INTERPEND;
79072805
LW
3204 }
3205 }
9059aa12
LW
3206 if (PL_expect & XFAKEBRACK) {
3207 PL_expect &= XENUMMASK;
3280af22 3208 PL_bufptr = s;
cea2e8a9 3209 return yylex(); /* ignore fake brackets */
748a9306 3210 }
79072805
LW
3211 force_next('}');
3212 TOKEN(';');
378cc40b
LW
3213 case '&':
3214 s++;
3215 tmp = *s++;
3216 if (tmp == '&')
a0d0e21e 3217 AOPERATOR(ANDAND);
378cc40b 3218 s--;
3280af22 3219 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3220 if (ckWARN(WARN_SEMICOLON)
3221 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3222 {
57843af0 3223 CopLINE_dec(PL_curcop);
cea2e8a9 3224 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3225 CopLINE_inc(PL_curcop);
463ee0b2 3226 }
79072805 3227 BAop(OP_BIT_AND);
463ee0b2 3228 }
79072805 3229
3280af22
NIS
3230 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3231 if (*PL_tokenbuf) {
3232 PL_expect = XOPERATOR;
3233 force_ident(PL_tokenbuf, '&');
463ee0b2 3234 }
79072805
LW
3235 else
3236 PREREF('&');
c07a80fd 3237 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3238 TERM('&');
3239
378cc40b
LW
3240 case '|':
3241 s++;
3242 tmp = *s++;
3243 if (tmp == '|')
a0d0e21e 3244 AOPERATOR(OROR);
378cc40b 3245 s--;
79072805 3246 BOop(OP_BIT_OR);
378cc40b
LW
3247 case '=':
3248 s++;
3249 tmp = *s++;
3250 if (tmp == '=')
79072805
LW
3251 Eop(OP_EQ);
3252 if (tmp == '>')
3253 OPERATOR(',');
378cc40b 3254 if (tmp == '~')
79072805 3255 PMop(OP_MATCH);
599cee73 3256 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3257 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3258 s--;
3280af22
NIS
3259 if (PL_expect == XSTATE && isALPHA(tmp) &&
3260 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3261 {
3280af22
NIS
3262 if (PL_in_eval && !PL_rsfp) {
3263 d = PL_bufend;
a5f75d66
AD
3264 while (s < d) {
3265 if (*s++ == '\n') {
3266 incline(s);
3267 if (strnEQ(s,"=cut",4)) {
3268 s = strchr(s,'\n');
3269 if (s)
3270 s++;
3271 else
3272 s = d;
3273 incline(s);
3274 goto retry;
3275 }
3276 }
3277 }
3278 goto retry;
3279 }
3280af22
NIS
3280 s = PL_bufend;
3281 PL_doextract = TRUE;
a0d0e21e
LW
3282 goto retry;
3283 }
3280af22 3284 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3285 char *t;
51882d45 3286#ifdef PERL_STRICT_CR
bf4acbe4 3287 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3288#else
bf4acbe4 3289 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3290#endif
a0d0e21e
LW
3291 if (*t == '\n' || *t == '#') {
3292 s--;
3280af22 3293 PL_expect = XBLOCK;
a0d0e21e
LW
3294 goto leftbracket;
3295 }
79072805 3296 }
a0d0e21e
LW
3297 yylval.ival = 0;
3298 OPERATOR(ASSIGNOP);
378cc40b
LW
3299 case '!':
3300 s++;
3301 tmp = *s++;
3302 if (tmp == '=')
79072805 3303 Eop(OP_NE);
378cc40b 3304 if (tmp == '~')
79072805 3305 PMop(OP_NOT);
378cc40b
LW
3306 s--;
3307 OPERATOR('!');
3308 case '<':
3280af22 3309 if (PL_expect != XOPERATOR) {
93a17b20 3310 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3311 check_uni();
79072805
LW
3312 if (s[1] == '<')
3313 s = scan_heredoc(s);
3314 else
3315 s = scan_inputsymbol(s);
3316 TERM(sublex_start());
378cc40b
LW
3317 }
3318 s++;
3319 tmp = *s++;
3320 if (tmp == '<')
79072805 3321 SHop(OP_LEFT_SHIFT);
395c3793
LW
3322 if (tmp == '=') {
3323 tmp = *s++;
3324 if (tmp == '>')
79072805 3325 Eop(OP_NCMP);
395c3793 3326 s--;
79072805 3327 Rop(OP_LE);
395c3793 3328 }
378cc40b 3329 s--;
79072805 3330 Rop(OP_LT);
378cc40b
LW
3331 case '>':
3332 s++;
3333 tmp = *s++;
3334 if (tmp == '>')
79072805 3335 SHop(OP_RIGHT_SHIFT);
378cc40b 3336 if (tmp == '=')
79072805 3337 Rop(OP_GE);
378cc40b 3338 s--;
79072805 3339 Rop(OP_GT);
378cc40b
LW
3340
3341 case '$':
bbce6d69
PP
3342 CLINE;
3343
3280af22
NIS
3344 if (PL_expect == XOPERATOR) {
3345 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3346 PL_expect = XTERM;
a0d0e21e 3347 depcom();
bbce6d69 3348 return ','; /* grandfather non-comma-format format */
a0d0e21e 3349 }
8990e307 3350 }
a0d0e21e 3351
7e2040f0 3352 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3353 PL_tokenbuf[0] = '@';
376b8730
SM
3354 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3355 sizeof PL_tokenbuf - 1, FALSE);
3356 if (PL_expect == XOPERATOR)
3357 no_op("Array length", s);
3280af22 3358 if (!PL_tokenbuf[1])
a0d0e21e 3359 PREREF(DOLSHARP);
3280af22
NIS
3360 PL_expect = XOPERATOR;
3361 PL_pending_ident = '#';
463ee0b2 3362 TOKEN(DOLSHARP);
79072805 3363 }
bbce6d69 3364
3280af22 3365 PL_tokenbuf[0] = '$';
376b8730
SM
3366 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3367 sizeof PL_tokenbuf - 1, FALSE);
3368 if (PL_expect == XOPERATOR)
3369 no_op("Scalar", s);
3280af22
NIS
3370 if (!PL_tokenbuf[1]) {
3371 if (s == PL_bufend)
bbce6d69
PP
3372 yyerror("Final $ should be \\$ or $name");
3373 PREREF('$');
8990e307 3374 }
a0d0e21e 3375
bbce6d69 3376 /* This kludge not intended to be bulletproof. */
3280af22 3377 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3378 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3379 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3380 yylval.opval->op_private = OPpCONST_ARYBASE;
3381 TERM(THING);
3382 }
3383
ff68c719 3384 d = s;
69d2bceb 3385 tmp = (I32)*s;
3280af22 3386 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3387 s = skipspace(s);
3388
3280af22 3389 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3390 char *t;
3391 if (*s == '[') {
3280af22 3392 PL_tokenbuf[0] = '@';
599cee73 3393 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3394 for(t = s + 1;
7e2040f0 3395 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3396 t++) ;
a0d0e21e 3397 if (*t++ == ',') {
3280af22
NIS
3398 PL_bufptr = skipspace(PL_bufptr);
3399 while (t < PL_bufend && *t != ']')
bbce6d69 3400 t++;
cea2e8a9 3401 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3402 "Multidimensional syntax %.*s not supported",
3403 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3404 }
3405 }
bbce6d69
PP
3406 }
3407 else if (*s == '{') {
3280af22 3408 PL_tokenbuf[0] = '%';
599cee73 3409 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3410 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3411 {
3280af22 3412 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3413 STRLEN len;
3414 for (t++; isSPACE(*t); t++) ;
7e2040f0 3415 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3416 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3417 for (; isSPACE(*t); t++) ;
864dbfa3 3418 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3419 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3420 "You need to quote \"%s\"", tmpbuf);
748a9306 3421 }
93a17b20
LW
3422 }
3423 }
2f3197b3 3424 }
bbce6d69 3425
3280af22 3426 PL_expect = XOPERATOR;
69d2bceb 3427 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3428 bool islop = (PL_last_lop == PL_oldoldbufptr);
3429 if (!islop || PL_last_lop_op == OP_GREPSTART)
3430 PL_expect = XOPERATOR;
bbce6d69 3431 else if (strchr("$@\"'`q", *s))
3280af22 3432 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3433 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3434 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3435 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3436 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3437 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3438 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3439 /* binary operators exclude handle interpretations */
3440 switch (tmp) {
3441 case -KEY_x:
3442 case -KEY_eq:
3443 case -KEY_ne:
3444 case -KEY_gt:
3445 case -KEY_lt:
3446 case -KEY_ge:
3447 case -KEY_le:
3448 case -KEY_cmp:
3449 break;
3450 default:
3280af22 3451 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3452 break;
3453 }
3454 }
68dc0745
PP
3455 else {
3456 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3457 if (gv && GvCVu(gv))
3280af22 3458 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3459 }
93a17b20 3460 }
bbce6d69 3461 else if (isDIGIT(*s))
3280af22 3462 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3463 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3464 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3465 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3466 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3467 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3468 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3469 }
3280af22 3470 PL_pending_ident = '$';
79072805 3471 TOKEN('$');
378cc40b
LW
3472
3473 case '@':
3280af22 3474 if (PL_expect == XOPERATOR)
bbce6d69 3475 no_op("Array", s);
3280af22
NIS
3476 PL_tokenbuf[0] = '@';
3477 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3478 if (!PL_tokenbuf[1]) {
3479 if (s == PL_bufend)
bbce6d69
PP
3480 yyerror("Final @ should be \\@ or @name");
3481 PREREF('@');
3482 }
3280af22 3483 if (PL_lex_state == LEX_NORMAL)
ff68c719 3484 s = skipspace(s);
3280af22 3485 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3486 if (*s == '{')
3280af22 3487 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3488
3489 /* Warn about @ where they meant $. */
599cee73 3490 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3491 if (*s == '[' || *s == '{') {
3492 char *t = s + 1;
7e2040f0 3493 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3494 t++;
3495 if (*t == '}' || *t == ']') {
3496 t++;
3280af22 3497 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3498 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3499 "Scalar value %.*s better written as $%.*s",
3280af22 3500 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3501 }
93a17b20
LW
3502 }
3503 }
463ee0b2 3504 }
3280af22 3505 PL_pending_ident = '@';
79072805 3506 TERM('@');
378cc40b
LW
3507
3508 case '/': /* may either be division or pattern */
3509 case '?': /* may either be conditional or pattern */
3280af22 3510 if (PL_expect != XOPERATOR) {
c277df42 3511 /* Disable warning on "study /blah/" */
3280af22
NIS
3512 if (PL_oldoldbufptr == PL_last_uni
3513 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3514 || memNE(PL_last_uni, "study", 5)
3515 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3516 check_uni();
8782bef2 3517 s = scan_pat(s,OP_MATCH);
79072805 3518 TERM(sublex_start());
378cc40b
LW
3519 }
3520 tmp = *s++;
a687059c 3521 if (tmp == '/')
79072805 3522 Mop(OP_DIVIDE);
378cc40b
LW
3523 OPERATOR(tmp);
3524
3525 case '.':
51882d45
GS
3526 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3527#ifdef PERL_STRICT_CR
3528 && s[1] == '\n'
3529#else
3530 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3531#endif
3532 && (s == PL_linestart || s[-1] == '\n') )
3533 {
3280af22
NIS
3534 PL_lex_formbrack = 0;
3535 PL_expect = XSTATE;
79072805
LW
3536 goto rightbracket;
3537 }
3280af22 3538 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3539 tmp = *s++;
a687059c
LW
3540 if (*s == tmp) {
3541 s++;
2f3197b3
LW
3542 if (*s == tmp) {
3543 s++;
79072805 3544 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3545 }
3546 else
79072805 3547 yylval.ival = 0;
378cc40b 3548 OPERATOR(DOTDOT);
a687059c 3549 }
3280af22 3550 if (PL_expect != XOPERATOR)
2f3197b3 3551 check_uni();
79072805 3552 Aop(OP_CONCAT);
378cc40b
LW
3553 }
3554 /* FALL THROUGH */
3555 case '0': case '1': case '2': case '3': case '4':
3556 case '5': case '6': case '7': case '8': case '9':
b73d6f50 3557 s = scan_num(s, &yylval);
3280af22 3558 if (PL_expect == XOPERATOR)
8990e307 3559 no_op("Number",s);
79072805
LW
3560 TERM(THING);
3561
3562 case '\'':
09bef843 3563 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3564 if (PL_expect == XOPERATOR) {
3565 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3566 PL_expect = XTERM;
a0d0e21e
LW
3567 depcom();
3568 return ','; /* grandfather non-comma-format format */
3569 }
463ee0b2 3570 else
8990e307 3571 no_op("String",s);
463ee0b2 3572 }
79072805 3573 if (!s)
85e6fe83 3574 missingterm((char*)0);
79072805
LW
3575 yylval.ival = OP_CONST;
3576 TERM(sublex_start());
3577
3578 case '"':
09bef843 3579 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3580 if (PL_expect == XOPERATOR) {
3581 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3582 PL_expect = XTERM;
a0d0e21e
LW
3583 depcom();
3584 return ','; /* grandfather non-comma-format format */
3585 }
463ee0b2 3586