This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Make the stdio test program of 7427 less noisy while being
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
2b92dfce
GS
42/* In variables name $^X, these are the legal values for X.
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
a790bc05
PP
72/* XXX If this causes problems, set i_unistd=undef in the hint file. */
73#ifdef I_UNISTD
74# include <unistd.h> /* Needed for execv() */
75#endif
76
77
79072805
LW
78#ifdef ff_next
79#undef ff_next
d48672a2
LW
80#endif
81
a1a0e61e 82#ifdef USE_PURE_BISON
dba4d153
JH
83# ifndef YYMAXLEVEL
84# define YYMAXLEVEL 100
85# endif
20141f0e
RI
86YYSTYPE* yylval_pointer[YYMAXLEVEL];
87int* yychar_pointer[YYMAXLEVEL];
88int yyactlevel = 0;
22c35a8c
GS
89# undef yylval
90# undef yychar
20141f0e
RI
91# define yylval (*yylval_pointer[yyactlevel])
92# define yychar (*yychar_pointer[yyactlevel])
93# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
94# undef yylex
dba4d153 95# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
96#endif
97
79072805 98#include "keywords.h"
fe14fcc3 99
ffb4593c
NT
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
ae986130
LW
102#ifdef CLINE
103#undef CLINE
104#endif
57843af0 105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 106
ffb4593c
NT
107/*
108 * Convenience functions to return different tokens and prime the
9cbb5ea2 109 * lexer for the next token. They all take an argument.
ffb4593c
NT
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
2d2e263d 121 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
9cbb5ea2 126 * PMop : pattern-matching operator
ffb4593c
NT
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
130 * Rop : relational operator <= != gt
131 *
132 * Also see LOP and lop() below.
133 */
134
3280af22
NIS
135#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
136#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
137#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
138#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
139#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
140#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
141#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
142#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
143#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
144#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
145#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
146#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
147#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
148#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
149#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
150#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
151#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
152#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
153#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
154#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 155
a687059c
LW
156/* This bit of chicanery makes a unary function followed by
157 * a parenthesis into a function with one argument, highest precedence.
158 */
2f3197b3 159#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
160 PL_expect = XTERM, \
161 PL_bufptr = s, \
162 PL_last_uni = PL_oldbufptr, \
163 PL_last_lop_op = f, \
a687059c
LW
164 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
165
79072805 166#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
167 PL_bufptr = s, \
168 PL_last_uni = PL_oldbufptr, \
79072805
LW
169 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
170
9f68db38 171/* grandfather return to old style */
3280af22 172#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 173
ffb4593c
NT
174/*
175 * S_ao
176 *
177 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
178 * into an OP_ANDASSIGN or OP_ORASSIGN
179 */
180
76e3520e 181STATIC int
cea2e8a9 182S_ao(pTHX_ int toketype)
a0d0e21e 183{
3280af22
NIS
184 if (*PL_bufptr == '=') {
185 PL_bufptr++;
a0d0e21e
LW
186 if (toketype == ANDAND)
187 yylval.ival = OP_ANDASSIGN;
188 else if (toketype == OROR)
189 yylval.ival = OP_ORASSIGN;
190 toketype = ASSIGNOP;
191 }
192 return toketype;
193}
194
ffb4593c
NT
195/*
196 * S_no_op
197 * When Perl expects an operator and finds something else, no_op
198 * prints the warning. It always prints "<something> found where
199 * operator expected. It prints "Missing semicolon on previous line?"
200 * if the surprise occurs at the start of the line. "do you need to
201 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
202 * where the compiler doesn't know if foo is a method call or a function.
203 * It prints "Missing operator before end of line" if there's nothing
204 * after the missing operator, or "... before <...>" if there is something
205 * after the missing operator.
206 */
207
76e3520e 208STATIC void
cea2e8a9 209S_no_op(pTHX_ char *what, char *s)
463ee0b2 210{
3280af22
NIS
211 char *oldbp = PL_bufptr;
212 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 213
1189a94a
GS
214 if (!s)
215 s = oldbp;
07c798fb 216 else
1189a94a 217 PL_bufptr = s;
cea2e8a9 218 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 219 if (is_first)
cea2e8a9 220 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 221 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 222 char *t;
7e2040f0 223 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 224 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 225 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 226 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 227 }
07c798fb
HS
228 else {
229 assert(s >= oldbp);
cea2e8a9 230 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 231 }
3280af22 232 PL_bufptr = oldbp;
8990e307
LW
233}
234
ffb4593c
NT
235/*
236 * S_missingterm
237 * Complain about missing quote/regexp/heredoc terminator.
238 * If it's called with (char *)NULL then it cauterizes the line buffer.
239 * If we're in a delimited string and the delimiter is a control
240 * character, it's reformatted into a two-char sequence like ^C.
241 * This is fatal.
242 */
243
76e3520e 244STATIC void
cea2e8a9 245S_missingterm(pTHX_ char *s)
8990e307
LW
246{
247 char tmpbuf[3];
248 char q;
249 if (s) {
250 char *nl = strrchr(s,'\n');
d2719217 251 if (nl)
8990e307
LW
252 *nl = '\0';
253 }
9d116dd7
JH
254 else if (
255#ifdef EBCDIC
256 iscntrl(PL_multi_close)
257#else
258 PL_multi_close < 32 || PL_multi_close == 127
259#endif
260 ) {
8990e307 261 *tmpbuf = '^';
3280af22 262 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
263 s = "\\n";
264 tmpbuf[2] = '\0';
265 s = tmpbuf;
266 }
267 else {
3280af22 268 *tmpbuf = PL_multi_close;
8990e307
LW
269 tmpbuf[1] = '\0';
270 s = tmpbuf;
271 }
272 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 273 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 274}
79072805 275
ffb4593c
NT
276/*
277 * Perl_deprecate
ffb4593c
NT
278 */
279
79072805 280void
864dbfa3 281Perl_deprecate(pTHX_ char *s)
a0d0e21e 282{
d008e5eb 283 dTHR;
599cee73 284 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 285 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
286}
287
ffb4593c
NT
288/*
289 * depcom
9cbb5ea2 290 * Deprecate a comma-less variable list.
ffb4593c
NT
291 */
292
76e3520e 293STATIC void
cea2e8a9 294S_depcom(pTHX)
a0d0e21e
LW
295{
296 deprecate("comma-less variable list");
297}
298
ffb4593c 299/*
9cbb5ea2
GS
300 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
301 * utf16-to-utf8-reversed.
ffb4593c
NT
302 */
303
c39cd008
GS
304#ifdef PERL_CR_FILTER
305static void
306strip_return(SV *sv)
307{
308 register char *s = SvPVX(sv);
309 register char *e = s + SvCUR(sv);
310 /* outer loop optimized to do nothing if there are no CR-LFs */
311 while (s < e) {
312 if (*s++ == '\r' && *s == '\n') {
313 /* hit a CR-LF, need to copy the rest */
314 register char *d = s - 1;
315 *d++ = *s++;
316 while (s < e) {
317 if (*s == '\r' && s[1] == '\n')
318 s++;
319 *d++ = *s++;
320 }
321 SvCUR(sv) -= s - d;
322 return;
323 }
324 }
325}
a868473f 326
76e3520e 327STATIC I32
c39cd008 328S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 329{
c39cd008
GS
330 I32 count = FILTER_READ(idx+1, sv, maxlen);
331 if (count > 0 && !maxlen)
332 strip_return(sv);
333 return count;
a868473f
NIS
334}
335#endif
336
ffb4593c
NT
337/*
338 * Perl_lex_start
9cbb5ea2
GS
339 * Initialize variables. Uses the Perl save_stack to save its state (for
340 * recursive calls to the parser).
ffb4593c
NT
341 */
342
a0d0e21e 343void
864dbfa3 344Perl_lex_start(pTHX_ SV *line)
79072805 345{
0f15f207 346 dTHR;
8990e307
LW
347 char *s;
348 STRLEN len;
349
3280af22
NIS
350 SAVEI32(PL_lex_dojoin);
351 SAVEI32(PL_lex_brackets);
3280af22
NIS
352 SAVEI32(PL_lex_casemods);
353 SAVEI32(PL_lex_starts);
354 SAVEI32(PL_lex_state);
7766f137 355 SAVEVPTR(PL_lex_inpat);
3280af22 356 SAVEI32(PL_lex_inwhat);
18b09519
GS
357 if (PL_lex_state == LEX_KNOWNEXT) {
358 I32 toke = PL_nexttoke;
359 while (--toke >= 0) {
360 SAVEI32(PL_nexttype[toke]);
361 SAVEVPTR(PL_nextval[toke]);
362 }
363 SAVEI32(PL_nexttoke);
18b09519 364 }
57843af0 365 SAVECOPLINE(PL_curcop);
3280af22
NIS
366 SAVEPPTR(PL_bufptr);
367 SAVEPPTR(PL_bufend);
368 SAVEPPTR(PL_oldbufptr);
369 SAVEPPTR(PL_oldoldbufptr);
370 SAVEPPTR(PL_linestart);
371 SAVESPTR(PL_linestr);
372 SAVEPPTR(PL_lex_brackstack);
373 SAVEPPTR(PL_lex_casestack);
c76ac1ee 374 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
375 SAVESPTR(PL_lex_stuff);
376 SAVEI32(PL_lex_defer);
09bef843 377 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 378 SAVESPTR(PL_lex_repl);
bebdddfc
GS
379 SAVEINT(PL_expect);
380 SAVEINT(PL_lex_expect);
3280af22
NIS
381
382 PL_lex_state = LEX_NORMAL;
383 PL_lex_defer = 0;
384 PL_expect = XSTATE;
385 PL_lex_brackets = 0;
3280af22
NIS
386 New(899, PL_lex_brackstack, 120, char);
387 New(899, PL_lex_casestack, 12, char);
388 SAVEFREEPV(PL_lex_brackstack);
389 SAVEFREEPV(PL_lex_casestack);
390 PL_lex_casemods = 0;
391 *PL_lex_casestack = '\0';
392 PL_lex_dojoin = 0;
393 PL_lex_starts = 0;
394 PL_lex_stuff = Nullsv;
395 PL_lex_repl = Nullsv;
396 PL_lex_inpat = 0;
76be56bc 397 PL_nexttoke = 0;
3280af22 398 PL_lex_inwhat = 0;
09bef843 399 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
400 PL_linestr = line;
401 if (SvREADONLY(PL_linestr))
402 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
403 s = SvPV(PL_linestr, len);
8990e307 404 if (len && s[len-1] != ';') {
3280af22
NIS
405 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
406 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
407 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 408 }
3280af22
NIS
409 SvTEMP_off(PL_linestr);
410 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
411 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
412 SvREFCNT_dec(PL_rs);
79cb57f6 413 PL_rs = newSVpvn("\n", 1);
3280af22 414 PL_rsfp = 0;
79072805 415}
a687059c 416
ffb4593c
NT
417/*
418 * Perl_lex_end
9cbb5ea2
GS
419 * Finalizer for lexing operations. Must be called when the parser is
420 * done with the lexer.
ffb4593c
NT
421 */
422
463ee0b2 423void
864dbfa3 424Perl_lex_end(pTHX)
463ee0b2 425{
3280af22 426 PL_doextract = FALSE;
463ee0b2
LW
427}
428
ffb4593c
NT
429/*
430 * S_incline
431 * This subroutine has nothing to do with tilting, whether at windmills
432 * or pinball tables. Its name is short for "increment line". It
57843af0 433 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 434 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
435 * # line 500 "foo.pm"
436 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
437 */
438
76e3520e 439STATIC void
cea2e8a9 440S_incline(pTHX_ char *s)
463ee0b2 441{
0f15f207 442 dTHR;
463ee0b2
LW
443 char *t;
444 char *n;
73659bf1 445 char *e;
463ee0b2 446 char ch;
463ee0b2 447
57843af0 448 CopLINE_inc(PL_curcop);
463ee0b2
LW
449 if (*s++ != '#')
450 return;
bf4acbe4 451 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
452 if (strnEQ(s, "line", 4))
453 s += 4;
454 else
455 return;
456 if (*s == ' ' || *s == '\t')
457 s++;
458 else
459 return;
bf4acbe4 460 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
461 if (!isDIGIT(*s))
462 return;
463 n = s;
464 while (isDIGIT(*s))
465 s++;
bf4acbe4 466 while (SPACE_OR_TAB(*s))
463ee0b2 467 s++;
73659bf1 468 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 469 s++;
73659bf1
GS
470 e = t + 1;
471 }
463ee0b2 472 else {
463ee0b2 473 for (t = s; !isSPACE(*t); t++) ;
73659bf1 474 e = t;
463ee0b2 475 }
bf4acbe4 476 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
477 e++;
478 if (*e != '\n' && *e != '\0')
479 return; /* false alarm */
480
463ee0b2
LW
481 ch = *t;
482 *t = '\0';
f4dd75d9
GS
483 if (t - s > 0) {
484#ifdef USE_ITHREADS
485 Safefree(CopFILE(PL_curcop));
486#else
487 SvREFCNT_dec(CopFILEGV(PL_curcop));
488#endif
57843af0 489 CopFILE_set(PL_curcop, s);
f4dd75d9 490 }
463ee0b2 491 *t = ch;
57843af0 492 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
493}
494
ffb4593c
NT
495/*
496 * S_skipspace
497 * Called to gobble the appropriate amount and type of whitespace.
498 * Skips comments as well.
499 */
500
76e3520e 501STATIC char *
cea2e8a9 502S_skipspace(pTHX_ register char *s)
a687059c 503{
11343788 504 dTHR;
3280af22 505 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 506 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
507 s++;
508 return s;
509 }
510 for (;;) {
fd049845 511 STRLEN prevlen;
09bef843
SB
512 SSize_t oldprevlen, oldoldprevlen;
513 SSize_t oldloplen, oldunilen;
60e6418e
GS
514 while (s < PL_bufend && isSPACE(*s)) {
515 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
516 incline(s);
517 }
ffb4593c
NT
518
519 /* comment */
3280af22
NIS
520 if (s < PL_bufend && *s == '#') {
521 while (s < PL_bufend && *s != '\n')
463ee0b2 522 s++;
60e6418e 523 if (s < PL_bufend) {
463ee0b2 524 s++;
60e6418e
GS
525 if (PL_in_eval && !PL_rsfp) {
526 incline(s);
527 continue;
528 }
529 }
463ee0b2 530 }
ffb4593c
NT
531
532 /* only continue to recharge the buffer if we're at the end
533 * of the buffer, we're not reading from a source filter, and
534 * we're in normal lexing mode
535 */
09bef843
SB
536 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
537 PL_lex_state == LEX_FORMLINE)
463ee0b2 538 return s;
ffb4593c
NT
539
540 /* try to recharge the buffer */
9cbb5ea2
GS
541 if ((s = filter_gets(PL_linestr, PL_rsfp,
542 (prevlen = SvCUR(PL_linestr)))) == Nullch)
543 {
544 /* end of file. Add on the -p or -n magic */
3280af22
NIS
545 if (PL_minus_n || PL_minus_p) {
546 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
547 ";}continue{print or die qq(-p destination: $!\\n)" :
548 "");
3280af22
NIS
549 sv_catpv(PL_linestr,";}");
550 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
551 }
552 else
3280af22 553 sv_setpv(PL_linestr,";");
ffb4593c
NT
554
555 /* reset variables for next time we lex */
9cbb5ea2
GS
556 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
557 = SvPVX(PL_linestr);
3280af22 558 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
559
560 /* Close the filehandle. Could be from -P preprocessor,
561 * STDIN, or a regular file. If we were reading code from
562 * STDIN (because the commandline held no -e or filename)
563 * then we don't close it, we reset it so the code can
564 * read from STDIN too.
565 */
566
3280af22
NIS
567 if (PL_preprocess && !PL_in_eval)
568 (void)PerlProc_pclose(PL_rsfp);
569 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
570 PerlIO_clearerr(PL_rsfp);
8990e307 571 else
3280af22
NIS
572 (void)PerlIO_close(PL_rsfp);
573 PL_rsfp = Nullfp;
463ee0b2
LW
574 return s;
575 }
ffb4593c
NT
576
577 /* not at end of file, so we only read another line */
09bef843
SB
578 /* make corresponding updates to old pointers, for yyerror() */
579 oldprevlen = PL_oldbufptr - PL_bufend;
580 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
581 if (PL_last_uni)
582 oldunilen = PL_last_uni - PL_bufend;
583 if (PL_last_lop)
584 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
585 PL_linestart = PL_bufptr = s + prevlen;
586 PL_bufend = s + SvCUR(PL_linestr);
587 s = PL_bufptr;
09bef843
SB
588 PL_oldbufptr = s + oldprevlen;
589 PL_oldoldbufptr = s + oldoldprevlen;
590 if (PL_last_uni)
591 PL_last_uni = s + oldunilen;
592 if (PL_last_lop)
593 PL_last_lop = s + oldloplen;
a0d0e21e 594 incline(s);
ffb4593c
NT
595
596 /* debugger active and we're not compiling the debugger code,
597 * so store the line into the debugger's array of lines
598 */
3280af22 599 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
600 SV *sv = NEWSV(85,0);
601
602 sv_upgrade(sv, SVt_PVMG);
3280af22 603 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 604 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 605 }
463ee0b2 606 }
a687059c 607}
378cc40b 608
ffb4593c
NT
609/*
610 * S_check_uni
611 * Check the unary operators to ensure there's no ambiguity in how they're
612 * used. An ambiguous piece of code would be:
613 * rand + 5
614 * This doesn't mean rand() + 5. Because rand() is a unary operator,
615 * the +5 is its argument.
616 */
617
76e3520e 618STATIC void
cea2e8a9 619S_check_uni(pTHX)
ba106d47 620{
2f3197b3 621 char *s;
a0d0e21e 622 char *t;
0453d815 623 dTHR;
2f3197b3 624
3280af22 625 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 626 return;
3280af22
NIS
627 while (isSPACE(*PL_last_uni))
628 PL_last_uni++;
7e2040f0 629 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 630 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 631 return;
0453d815 632 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 633 char ch = *s;
0453d815
PM
634 *s = '\0';
635 Perl_warner(aTHX_ WARN_AMBIGUOUS,
636 "Warning: Use of \"%s\" without parens is ambiguous",
637 PL_last_uni);
638 *s = ch;
639 }
2f3197b3
LW
640}
641
ffb4593c
NT
642/* workaround to replace the UNI() macro with a function. Only the
643 * hints/uts.sh file mentions this. Other comments elsewhere in the
644 * source indicate Microport Unix might need it too.
645 */
646
ffed7fef
LW
647#ifdef CRIPPLED_CC
648
649#undef UNI
ffed7fef 650#define UNI(f) return uni(f,s)
ffed7fef 651
76e3520e 652STATIC int
cea2e8a9 653S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
654{
655 yylval.ival = f;
3280af22
NIS
656 PL_expect = XTERM;
657 PL_bufptr = s;
8f872242
NIS
658 PL_last_uni = PL_oldbufptr;
659 PL_last_lop_op = f;
ffed7fef
LW
660 if (*s == '(')
661 return FUNC1;
662 s = skipspace(s);
663 if (*s == '(')
664 return FUNC1;
665 else
666 return UNIOP;
667}
668
a0d0e21e
LW
669#endif /* CRIPPLED_CC */
670
ffb4593c
NT
671/*
672 * LOP : macro to build a list operator. Its behaviour has been replaced
673 * with a subroutine, S_lop() for which LOP is just another name.
674 */
675
a0d0e21e
LW
676#define LOP(f,x) return lop(f,x,s)
677
ffb4593c
NT
678/*
679 * S_lop
680 * Build a list operator (or something that might be one). The rules:
681 * - if we have a next token, then it's a list operator [why?]
682 * - if the next thing is an opening paren, then it's a function
683 * - else it's a list operator
684 */
685
76e3520e 686STATIC I32
a0be28da 687S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 688{
0f15f207 689 dTHR;
79072805 690 yylval.ival = f;
35c8bce7 691 CLINE;
3280af22
NIS
692 PL_expect = x;
693 PL_bufptr = s;
694 PL_last_lop = PL_oldbufptr;
695 PL_last_lop_op = f;
696 if (PL_nexttoke)
a0d0e21e 697 return LSTOP;
79072805
LW
698 if (*s == '(')
699 return FUNC;
700 s = skipspace(s);
701 if (*s == '(')
702 return FUNC;
703 else
704 return LSTOP;
705}
706
ffb4593c
NT
707/*
708 * S_force_next
9cbb5ea2 709 * When the lexer realizes it knows the next token (for instance,
ffb4593c 710 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
711 * to know what token to return the next time the lexer is called. Caller
712 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
713 * handles the token correctly.
ffb4593c
NT
714 */
715
76e3520e 716STATIC void
cea2e8a9 717S_force_next(pTHX_ I32 type)
79072805 718{
3280af22
NIS
719 PL_nexttype[PL_nexttoke] = type;
720 PL_nexttoke++;
721 if (PL_lex_state != LEX_KNOWNEXT) {
722 PL_lex_defer = PL_lex_state;
723 PL_lex_expect = PL_expect;
724 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
725 }
726}
727
ffb4593c
NT
728/*
729 * S_force_word
730 * When the lexer knows the next thing is a word (for instance, it has
731 * just seen -> and it knows that the next char is a word char, then
732 * it calls S_force_word to stick the next word into the PL_next lookahead.
733 *
734 * Arguments:
b1b65b59 735 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
736 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
737 * int check_keyword : if true, Perl checks to make sure the word isn't
738 * a keyword (do this if the word is a label, e.g. goto FOO)
739 * int allow_pack : if true, : characters will also be allowed (require,
740 * use, etc. do this)
9cbb5ea2 741 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
742 */
743
76e3520e 744STATIC char *
cea2e8a9 745S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 746{
463ee0b2
LW
747 register char *s;
748 STRLEN len;
749
750 start = skipspace(start);
751 s = start;
7e2040f0 752 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 753 (allow_pack && *s == ':') ||
15f0808c 754 (allow_initial_tick && *s == '\'') )
a0d0e21e 755 {
3280af22
NIS
756 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
757 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
758 return start;
759 if (token == METHOD) {
760 s = skipspace(s);
761 if (*s == '(')
3280af22 762 PL_expect = XTERM;
463ee0b2 763 else {
3280af22 764 PL_expect = XOPERATOR;
463ee0b2 765 }
79072805 766 }
3280af22
NIS
767 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
768 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
769 force_next(token);
770 }
771 return s;
772}
773
ffb4593c
NT
774/*
775 * S_force_ident
9cbb5ea2 776 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
777 * text only contains the "foo" portion. The first argument is a pointer
778 * to the "foo", and the second argument is the type symbol to prefix.
779 * Forces the next token to be a "WORD".
9cbb5ea2 780 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
781 */
782
76e3520e 783STATIC void
cea2e8a9 784S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
785{
786 if (s && *s) {
11343788 787 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 788 PL_nextval[PL_nexttoke].opval = o;
79072805 789 force_next(WORD);
748a9306 790 if (kind) {
e858de61 791 dTHR; /* just for in_eval */
11343788 792 o->op_private = OPpCONST_ENTERED;
55497cff
PP
793 /* XXX see note in pp_entereval() for why we forgo typo
794 warnings if the symbol must be introduced in an eval.
795 GSAR 96-10-12 */
3280af22 796 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
797 kind == '$' ? SVt_PV :
798 kind == '@' ? SVt_PVAV :
799 kind == '%' ? SVt_PVHV :
800 SVt_PVGV
801 );
748a9306 802 }
79072805
LW
803 }
804}
805
1571675a
GS
806NV
807Perl_str_to_version(pTHX_ SV *sv)
808{
809 NV retval = 0.0;
810 NV nshift = 1.0;
811 STRLEN len;
812 char *start = SvPVx(sv,len);
3aa33fe5 813 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
814 char *end = start + len;
815 while (start < end) {
ba210ebe 816 STRLEN skip;
1571675a
GS
817 UV n;
818 if (utf)
dcad2880 819 n = utf8_to_uv((U8*)start, len, &skip, 0);
1571675a
GS
820 else {
821 n = *(U8*)start;
822 skip = 1;
823 }
824 retval += ((NV)n)/nshift;
825 start += skip;
826 nshift *= 1000;
827 }
828 return retval;
829}
830
ffb4593c
NT
831/*
832 * S_force_version
833 * Forces the next token to be a version number.
834 */
835
76e3520e 836STATIC char *
cea2e8a9 837S_force_version(pTHX_ char *s)
89bfa8cd
PP
838{
839 OP *version = Nullop;
44dcb63b 840 char *d;
89bfa8cd
PP
841
842 s = skipspace(s);
843
44dcb63b 844 d = s;
dd629d5b 845 if (*d == 'v')
44dcb63b 846 d++;
44dcb63b 847 if (isDIGIT(*d)) {
a7cb1f99 848 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 849 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 850 SV *ver;
b73d6f50 851 s = scan_num(s, &yylval);
89bfa8cd 852 version = yylval.opval;
dd629d5b
GS
853 ver = cSVOPx(version)->op_sv;
854 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 855 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
856 SvNVX(ver) = str_to_version(ver);
857 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 858 }
89bfa8cd
PP
859 }
860 }
861
862 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 863 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd
PP
864 force_next(WORD);
865
866 return (s);
867}
868
ffb4593c
NT
869/*
870 * S_tokeq
871 * Tokenize a quoted string passed in as an SV. It finds the next
872 * chunk, up to end of string or a backslash. It may make a new
873 * SV containing that chunk (if HINT_NEW_STRING is on). It also
874 * turns \\ into \.
875 */
876
76e3520e 877STATIC SV *
cea2e8a9 878S_tokeq(pTHX_ SV *sv)
79072805
LW
879{
880 register char *s;
881 register char *send;
882 register char *d;
b3ac6de7
IZ
883 STRLEN len = 0;
884 SV *pv = sv;
79072805
LW
885
886 if (!SvLEN(sv))
b3ac6de7 887 goto finish;
79072805 888
a0d0e21e 889 s = SvPV_force(sv, len);
21a311ee 890 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 891 goto finish;
463ee0b2 892 send = s + len;
79072805
LW
893 while (s < send && *s != '\\')
894 s++;
895 if (s == send)
b3ac6de7 896 goto finish;
79072805 897 d = s;
3280af22 898 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 899 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
900 while (s < send) {
901 if (*s == '\\') {
a0d0e21e 902 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
903 s++; /* all that, just for this */
904 }
905 *d++ = *s++;
906 }
907 *d = '\0';
463ee0b2 908 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 909 finish:
3280af22 910 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 911 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
912 return sv;
913}
914
ffb4593c
NT
915/*
916 * Now come three functions related to double-quote context,
917 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
918 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
919 * interact with PL_lex_state, and create fake ( ... ) argument lists
920 * to handle functions and concatenation.
921 * They assume that whoever calls them will be setting up a fake
922 * join call, because each subthing puts a ',' after it. This lets
923 * "lower \luPpEr"
924 * become
925 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
926 *
927 * (I'm not sure whether the spurious commas at the end of lcfirst's
928 * arguments and join's arguments are created or not).
929 */
930
931/*
932 * S_sublex_start
933 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
934 *
935 * Pattern matching will set PL_lex_op to the pattern-matching op to
936 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
937 *
938 * OP_CONST and OP_READLINE are easy--just make the new op and return.
939 *
940 * Everything else becomes a FUNC.
941 *
942 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
943 * had an OP_CONST or OP_READLINE). This just sets us up for a
944 * call to S_sublex_push().
945 */
946
76e3520e 947STATIC I32
cea2e8a9 948S_sublex_start(pTHX)
79072805
LW
949{
950 register I32 op_type = yylval.ival;
79072805
LW
951
952 if (op_type == OP_NULL) {
3280af22
NIS
953 yylval.opval = PL_lex_op;
954 PL_lex_op = Nullop;
79072805
LW
955 return THING;
956 }
957 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 958 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
959
960 if (SvTYPE(sv) == SVt_PVIV) {
961 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
962 STRLEN len;
963 char *p;
964 SV *nsv;
965
966 p = SvPV(sv, len);
79cb57f6 967 nsv = newSVpvn(p, len);
01ec43d0
GS
968 if (SvUTF8(sv))
969 SvUTF8_on(nsv);
b3ac6de7
IZ
970 SvREFCNT_dec(sv);
971 sv = nsv;
972 }
973 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 974 PL_lex_stuff = Nullsv;
79072805
LW
975 return THING;
976 }
977
3280af22
NIS
978 PL_sublex_info.super_state = PL_lex_state;
979 PL_sublex_info.sub_inwhat = op_type;
980 PL_sublex_info.sub_op = PL_lex_op;
981 PL_lex_state = LEX_INTERPPUSH;
55497cff 982
3280af22
NIS
983 PL_expect = XTERM;
984 if (PL_lex_op) {
985 yylval.opval = PL_lex_op;
986 PL_lex_op = Nullop;
55497cff
PP
987 return PMFUNC;
988 }
989 else
990 return FUNC;
991}
992
ffb4593c
NT
993/*
994 * S_sublex_push
995 * Create a new scope to save the lexing state. The scope will be
996 * ended in S_sublex_done. Returns a '(', starting the function arguments
997 * to the uc, lc, etc. found before.
998 * Sets PL_lex_state to LEX_INTERPCONCAT.
999 */
1000
76e3520e 1001STATIC I32
cea2e8a9 1002S_sublex_push(pTHX)
55497cff 1003{
0f15f207 1004 dTHR;
f46d017c 1005 ENTER;
55497cff 1006
3280af22
NIS
1007 PL_lex_state = PL_sublex_info.super_state;
1008 SAVEI32(PL_lex_dojoin);
1009 SAVEI32(PL_lex_brackets);
3280af22
NIS
1010 SAVEI32(PL_lex_casemods);
1011 SAVEI32(PL_lex_starts);
1012 SAVEI32(PL_lex_state);
7766f137 1013 SAVEVPTR(PL_lex_inpat);
3280af22 1014 SAVEI32(PL_lex_inwhat);
57843af0 1015 SAVECOPLINE(PL_curcop);
3280af22
NIS
1016 SAVEPPTR(PL_bufptr);
1017 SAVEPPTR(PL_oldbufptr);
1018 SAVEPPTR(PL_oldoldbufptr);
1019 SAVEPPTR(PL_linestart);
1020 SAVESPTR(PL_linestr);
1021 SAVEPPTR(PL_lex_brackstack);
1022 SAVEPPTR(PL_lex_casestack);
1023
1024 PL_linestr = PL_lex_stuff;
1025 PL_lex_stuff = Nullsv;
1026
9cbb5ea2
GS
1027 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1028 = SvPVX(PL_linestr);
3280af22
NIS
1029 PL_bufend += SvCUR(PL_linestr);
1030 SAVEFREESV(PL_linestr);
1031
1032 PL_lex_dojoin = FALSE;
1033 PL_lex_brackets = 0;
3280af22
NIS
1034 New(899, PL_lex_brackstack, 120, char);
1035 New(899, PL_lex_casestack, 12, char);
1036 SAVEFREEPV(PL_lex_brackstack);
1037 SAVEFREEPV(PL_lex_casestack);
1038 PL_lex_casemods = 0;
1039 *PL_lex_casestack = '\0';
1040 PL_lex_starts = 0;
1041 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1042 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1043
1044 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1045 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1046 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1047 else
3280af22 1048 PL_lex_inpat = Nullop;
79072805 1049
55497cff 1050 return '(';
79072805
LW
1051}
1052
ffb4593c
NT
1053/*
1054 * S_sublex_done
1055 * Restores lexer state after a S_sublex_push.
1056 */
1057
76e3520e 1058STATIC I32
cea2e8a9 1059S_sublex_done(pTHX)
79072805 1060{
3280af22
NIS
1061 if (!PL_lex_starts++) {
1062 PL_expect = XOPERATOR;
79cb57f6 1063 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1064 return THING;
1065 }
1066
3280af22
NIS
1067 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1068 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1069 return yylex();
79072805
LW
1070 }
1071
ffb4593c 1072 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1073 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1074 PL_linestr = PL_lex_repl;
1075 PL_lex_inpat = 0;
1076 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1077 PL_bufend += SvCUR(PL_linestr);
1078 SAVEFREESV(PL_linestr);
1079 PL_lex_dojoin = FALSE;
1080 PL_lex_brackets = 0;
3280af22
NIS
1081 PL_lex_casemods = 0;
1082 *PL_lex_casestack = '\0';
1083 PL_lex_starts = 0;
25da4f38 1084 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1085 PL_lex_state = LEX_INTERPNORMAL;
1086 PL_lex_starts++;
e9fa98b2
HS
1087 /* we don't clear PL_lex_repl here, so that we can check later
1088 whether this is an evalled subst; that means we rely on the
1089 logic to ensure sublex_done() is called again only via the
1090 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1091 }
e9fa98b2 1092 else {
3280af22 1093 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1094 PL_lex_repl = Nullsv;
1095 }
79072805 1096 return ',';
ffed7fef
LW
1097 }
1098 else {
f46d017c 1099 LEAVE;
3280af22
NIS
1100 PL_bufend = SvPVX(PL_linestr);
1101 PL_bufend += SvCUR(PL_linestr);
1102 PL_expect = XOPERATOR;
09bef843 1103 PL_sublex_info.sub_inwhat = 0;
79072805 1104 return ')';
ffed7fef
LW
1105 }
1106}
1107
02aa26ce
NT
1108/*
1109 scan_const
1110
1111 Extracts a pattern, double-quoted string, or transliteration. This
1112 is terrifying code.
1113
3280af22
NIS
1114 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1115 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1116 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1117
9b599b2a
GS
1118 Returns a pointer to the character scanned up to. Iff this is
1119 advanced from the start pointer supplied (ie if anything was
1120 successfully parsed), will leave an OP for the substring scanned
1121 in yylval. Caller must intuit reason for not parsing further
1122 by looking at the next characters herself.
1123
02aa26ce
NT
1124 In patterns:
1125 backslashes:
1126 double-quoted style: \r and \n
1127 regexp special ones: \D \s
1128 constants: \x3
1129 backrefs: \1 (deprecated in substitution replacements)
1130 case and quoting: \U \Q \E
1131 stops on @ and $, but not for $ as tail anchor
1132
1133 In transliterations:
1134 characters are VERY literal, except for - not at the start or end
1135 of the string, which indicates a range. scan_const expands the
1136 range to the full set of intermediate characters.
1137
1138 In double-quoted strings:
1139 backslashes:
1140 double-quoted style: \r and \n
1141 constants: \x3
1142 backrefs: \1 (deprecated)
1143 case and quoting: \U \Q \E
1144 stops on @ and $
1145
1146 scan_const does *not* construct ops to handle interpolated strings.
1147 It stops processing as soon as it finds an embedded $ or @ variable
1148 and leaves it to the caller to work out what's going on.
1149
1150 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1151
1152 $ in pattern could be $foo or could be tail anchor. Assumption:
1153 it's a tail anchor if $ is the last thing in the string, or if it's
1154 followed by one of ")| \n\t"
1155
1156 \1 (backreferences) are turned into $1
1157
1158 The structure of the code is
1159 while (there's a character to process) {
1160 handle transliteration ranges
1161 skip regexp comments
1162 skip # initiated comments in //x patterns
1163 check for embedded @foo
1164 check for embedded scalars
1165 if (backslash) {
1166 leave intact backslashes from leave (below)
1167 deprecate \1 in strings and sub replacements
1168 handle string-changing backslashes \l \U \Q \E, etc.
1169 switch (what was escaped) {
1170 handle - in a transliteration (becomes a literal -)
1171 handle \132 octal characters
1172 handle 0x15 hex characters
1173 handle \cV (control V)
1174 handle printf backslashes (\f, \r, \n, etc)
1175 } (end switch)
1176 } (end if backslash)
1177 } (end while character to read)
1178
1179*/
1180
76e3520e 1181STATIC char *
cea2e8a9 1182S_scan_const(pTHX_ char *start)
79072805 1183{
3280af22 1184 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1185 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1186 register char *s = start; /* start of the constant */
1187 register char *d = SvPVX(sv); /* destination for copies */
1188 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1189 bool didrange = FALSE; /* did we just finish a range? */
89491803 1190 bool has_utf8 = FALSE; /* embedded \x{} */
012bcf8d
GS
1191 UV uv;
1192
ac2262e3 1193 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1194 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1195 : UTF;
89491803 1196 I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1197 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1198 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1199 : UTF;
dff6d3cd 1200 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1201 PL_lex_inpat
4a2d328f 1202 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1203 : "";
79072805
LW
1204
1205 while (s < send || dorange) {
02aa26ce 1206 /* get transliterations out of the way (they're most literal) */
3280af22 1207 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1208 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1209 if (dorange) {
02aa26ce 1210 I32 i; /* current expanded character */
8ada0baa 1211 I32 min; /* first character in range */
02aa26ce
NT
1212 I32 max; /* last character in range */
1213
1214 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1215 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1216 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1217 d -= 2; /* eat the first char and the - */
1218
8ada0baa
JH
1219 min = (U8)*d; /* first char in range */
1220 max = (U8)d[1]; /* last char in range */
1221
c2e66d9e 1222 if (min > max) {
01ec43d0
GS
1223 Perl_croak(aTHX_
1224 "Invalid [] range \"%c-%c\" in transliteration operator",
d2560b70 1225 (char)min, (char)max);
c2e66d9e
GS
1226 }
1227
8ada0baa
JH
1228#ifndef ASCIIish
1229 if ((isLOWER(min) && isLOWER(max)) ||
1230 (isUPPER(min) && isUPPER(max))) {
1231 if (isLOWER(min)) {
1232 for (i = min; i <= max; i++)
1233 if (isLOWER(i))
1234 *d++ = i;
1235 } else {
1236 for (i = min; i <= max; i++)
1237 if (isUPPER(i))
1238 *d++ = i;
1239 }
1240 }
1241 else
1242#endif
1243 for (i = min; i <= max; i++)
1244 *d++ = i;
02aa26ce
NT
1245
1246 /* mark the range as done, and continue */
79072805 1247 dorange = FALSE;
01ec43d0 1248 didrange = TRUE;
79072805 1249 continue;
c2e66d9e 1250 }
02aa26ce
NT
1251
1252 /* range begins (ignore - as first or last char) */
79072805 1253 else if (*s == '-' && s+1 < send && s != start) {
01ec43d0 1254 if (didrange) {
1fafa243 1255 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1256 }
a0ed51b3 1257 if (utf) {
a176fa2a 1258 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1259 s++;
1260 continue;
1261 }
79072805
LW
1262 dorange = TRUE;
1263 s++;
01ec43d0
GS
1264 }
1265 else {
1266 didrange = FALSE;
1267 }
79072805 1268 }
02aa26ce
NT
1269
1270 /* if we get here, we're not doing a transliteration */
1271
0f5d15d6
IZ
1272 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1273 except for the last char, which will be done separately. */
3280af22 1274 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1275 if (s[2] == '#') {
1276 while (s < send && *s != ')')
1277 *d++ = *s++;
155aba94
GS
1278 }
1279 else if (s[2] == '{' /* This should match regcomp.c */
1280 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1281 {
cc6b7395 1282 I32 count = 1;
0f5d15d6 1283 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1284 char c;
1285
d9f97599
GS
1286 while (count && (c = *regparse)) {
1287 if (c == '\\' && regparse[1])
1288 regparse++;
cc6b7395
IZ
1289 else if (c == '{')
1290 count++;
1291 else if (c == '}')
1292 count--;
d9f97599 1293 regparse++;
cc6b7395 1294 }
5bdf89e7
IZ
1295 if (*regparse != ')') {
1296 regparse--; /* Leave one char for continuation. */
cc6b7395 1297 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1298 }
0f5d15d6 1299 while (s < regparse)
cc6b7395
IZ
1300 *d++ = *s++;
1301 }
748a9306 1302 }
02aa26ce
NT
1303
1304 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1305 else if (*s == '#' && PL_lex_inpat &&
1306 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1307 while (s+1 < send && *s != '\n')
1308 *d++ = *s++;
1309 }
02aa26ce 1310
5d1d4326
JH
1311 /* check for embedded arrays
1312 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1313 */
7e2040f0 1314 else if (*s == '@' && s[1]
5d1d4326 1315 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1316 break;
02aa26ce
NT
1317
1318 /* check for embedded scalars. only stop if we're sure it's a
1319 variable.
1320 */
79072805 1321 else if (*s == '$') {
3280af22 1322 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1323 break;
c277df42 1324 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1325 break; /* in regexp, $ might be tail anchor */
1326 }
02aa26ce 1327
a0ed51b3
LW
1328 /* (now in tr/// code again) */
1329
89491803 1330 if (*s & 0x80 && this_utf8) {
ba210ebe
JH
1331 STRLEN len;
1332 UV uv;
1333
dcad2880 1334 uv = utf8_to_uv((U8*)s, send - s, &len, UTF8_CHECK_ONLY);
ba210ebe 1335 if (len == 1) {
fcc8fcf6 1336 /* Illegal UTF8 (a high-bit byte), make it valid. */
ba210ebe
JH
1337 char *old_pvx = SvPVX(sv);
1338 /* need space for one extra char (NOTE: SvCUR() not set here) */
1339 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
1340 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1341 }
1342 else {
1343 while (len--)
1344 *d++ = *s++;
1345 }
89491803 1346 has_utf8 = TRUE;
ba210ebe 1347 continue;
a0ed51b3
LW
1348 }
1349
02aa26ce 1350 /* backslashes */
79072805
LW
1351 if (*s == '\\' && s+1 < send) {
1352 s++;
02aa26ce
NT
1353
1354 /* some backslashes we leave behind */
c9f97d15 1355 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1356 *d++ = '\\';
1357 *d++ = *s++;
1358 continue;
1359 }
02aa26ce
NT
1360
1361 /* deprecate \1 in strings and substitution replacements */
3280af22 1362 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1363 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1364 {
d008e5eb 1365 dTHR; /* only for ckWARN */
599cee73 1366 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1367 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1368 *--s = '$';
1369 break;
1370 }
02aa26ce
NT
1371
1372 /* string-change backslash escapes */
3280af22 1373 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1374 --s;
1375 break;
1376 }
02aa26ce
NT
1377
1378 /* if we get here, it's either a quoted -, or a digit */
79072805 1379 switch (*s) {
02aa26ce
NT
1380
1381 /* quoted - in transliterations */
79072805 1382 case '-':
3280af22 1383 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1384 *d++ = *s++;
1385 continue;
1386 }
1387 /* FALL THROUGH */
1388 default:
11b8faa4
JH
1389 {
1390 dTHR;
7e84c16c 1391 if (ckWARN(WARN_MISC) && isALNUM(*s))
e476b1b5 1392 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1393 "Unrecognized escape \\%c passed through",
1394 *s);
1395 /* default action is to copy the quoted character */
1396 *d++ = *s++;
1397 continue;
1398 }
02aa26ce
NT
1399
1400 /* \132 indicates an octal constant */
79072805
LW
1401 case '0': case '1': case '2': case '3':
1402 case '4': case '5': case '6': case '7':
ba210ebe
JH
1403 {
1404 STRLEN len = 0; /* disallow underscores */
1405 uv = (UV)scan_oct(s, 3, &len);
1406 s += len;
1407 }
012bcf8d 1408 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1409
1410 /* \x24 indicates a hex constant */
79072805 1411 case 'x':
a0ed51b3
LW
1412 ++s;
1413 if (*s == '{') {
1414 char* e = strchr(s, '}');
adaeee49 1415 if (!e) {
a0ed51b3 1416 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1417 e = s;
1418 }
89491803 1419 else {
ba210ebe
JH
1420 STRLEN len = 1; /* allow underscores */
1421 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
89491803 1422 has_utf8 = TRUE;
ba210ebe
JH
1423 }
1424 s = e + 1;
a0ed51b3
LW
1425 }
1426 else {
ba210ebe
JH
1427 {
1428 STRLEN len = 0; /* disallow underscores */
1429 uv = (UV)scan_hex(s, 2, &len);
1430 s += len;
1431 }
012bcf8d
GS
1432 }
1433
1434 NUM_ESCAPE_INSERT:
1435 /* Insert oct or hex escaped character.
1436 * There will always enough room in sv since such escapes will
1437 * be longer than any utf8 sequence they can end up as
1438 */
89491803
SC
1439 if (uv > 127 || has_utf8) {
1440 if (!this_utf8 && !has_utf8 && uv > 255) {
012bcf8d
GS
1441 /* might need to recode whatever we have accumulated so far
1442 * if it contains any hibit chars
1443 */
1444 int hicount = 0;
1445 char *c;
1446 for (c = SvPVX(sv); c < d; c++) {
1447 if (*c & 0x80)
1448 hicount++;
1449 }
1450 if (hicount) {
1451 char *old_pvx = SvPVX(sv);
1452 char *src, *dst;
1453 d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
1454
1455 src = d - 1;
1456 d += hicount;
1457 dst = d - 1;
1458
1459 while (src < dst) {
1460 if (*src & 0x80) {
1461 dst--;
1462 uv_to_utf8((U8*)dst, (U8)*src--);
1463 dst--;
1464 }
1465 else {
1466 *dst-- = *src--;
1467 }
1468 }
1469 }
1470 }
1471
89491803 1472 if (has_utf8 || uv > 255) {
012bcf8d 1473 d = (char*)uv_to_utf8((U8*)d, uv);
89491803 1474 this_utf8 = TRUE;
012bcf8d 1475 }
a0ed51b3 1476 else {
012bcf8d 1477 *d++ = (char)uv;
a0ed51b3 1478 }
012bcf8d
GS
1479 }
1480 else {
1481 *d++ = (char)uv;
a0ed51b3 1482 }
79072805 1483 continue;
02aa26ce 1484
4a2d328f
IZ
1485 /* \N{latin small letter a} is a named character */
1486 case 'N':
423cee85
JH
1487 ++s;
1488 if (*s == '{') {
1489 char* e = strchr(s, '}');
155aba94 1490 SV *res;
423cee85
JH
1491 STRLEN len;
1492 char *str;
423cee85
JH
1493
1494 if (!e) {
5777a3f7 1495 yyerror("Missing right brace on \\N{}");
423cee85
JH
1496 e = s - 1;
1497 goto cont_scan;
1498 }
1499 res = newSVpvn(s + 1, e - s - 1);
1500 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1501 res, Nullsv, "\\N{...}" );
423cee85 1502 str = SvPV(res,len);
89491803 1503 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1504 char *ostart = SvPVX(sv);
1505 SvCUR_set(sv, d - ostart);
1506 SvPOK_on(sv);
e4f3eed8 1507 *d = '\0';
f08d6ad9 1508 sv_utf8_upgrade(sv);
d2f449dd
SB
1509 /* this just broke our allocation above... */
1510 SvGROW(sv, send - start);
f08d6ad9 1511 d = SvPVX(sv) + SvCUR(sv);
89491803 1512 has_utf8 = TRUE;
f08d6ad9 1513 }
423cee85
JH
1514 if (len > e - s + 4) {
1515 char *odest = SvPVX(sv);
1516
1517 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1518 d = SvPVX(sv) + (d - odest);
1519 }
1520 Copy(str, d, len, char);
1521 d += len;
1522 SvREFCNT_dec(res);
1523 cont_scan:
1524 s = e + 1;
1525 }
1526 else
5777a3f7 1527 yyerror("Missing braces on \\N{}");
423cee85
JH
1528 continue;
1529
02aa26ce 1530 /* \c is a control character */
79072805
LW
1531 case 'c':
1532 s++;
9d116dd7
JH
1533#ifdef EBCDIC
1534 *d = *s++;
1535 if (isLOWER(*d))
1536 *d = toUPPER(*d);
774a9426
GS
1537 *d = toCTRL(*d);
1538 d++;
9d116dd7 1539#else
ba210ebe
JH
1540 {
1541 U8 c = *s++;
1542 *d++ = toCTRL(c);
1543 }
9d116dd7 1544#endif
79072805 1545 continue;
02aa26ce
NT
1546
1547 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1548 case 'b':
1549 *d++ = '\b';
1550 break;
1551 case 'n':
1552 *d++ = '\n';
1553 break;
1554 case 'r':
1555 *d++ = '\r';
1556 break;
1557 case 'f':
1558 *d++ = '\f';
1559 break;
1560 case 't':
1561 *d++ = '\t';
1562 break;
34a3fe2a
PP
1563#ifdef EBCDIC
1564 case 'e':
1565 *d++ = '\047'; /* CP 1047 */
1566 break;
1567 case 'a':
1568 *d++ = '\057'; /* CP 1047 */
1569 break;
1570#else
79072805
LW
1571 case 'e':
1572 *d++ = '\033';
1573 break;
1574 case 'a':
1575 *d++ = '\007';
1576 break;
34a3fe2a 1577#endif
02aa26ce
NT
1578 } /* end switch */
1579
79072805
LW
1580 s++;
1581 continue;
02aa26ce
NT
1582 } /* end if (backslash) */
1583
79072805 1584 *d++ = *s++;
02aa26ce
NT
1585 } /* while loop to process each character */
1586
1587 /* terminate the string and set up the sv */
79072805 1588 *d = '\0';
463ee0b2 1589 SvCUR_set(sv, d - SvPVX(sv));
79072805 1590 SvPOK_on(sv);
89491803 1591 if (has_utf8)
7e2040f0 1592 SvUTF8_on(sv);
79072805 1593
02aa26ce 1594 /* shrink the sv if we allocated more than we used */
79072805
LW
1595 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1596 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1597 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1598 }
02aa26ce 1599
9b599b2a 1600 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1601 if (s > PL_bufptr) {
1602 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1603 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1604 sv, Nullsv,
3280af22 1605 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1606 ? "tr"
3280af22 1607 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1608 ? "s"
1609 : "qq")));
79072805 1610 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1611 } else
8990e307 1612 SvREFCNT_dec(sv);
79072805
LW
1613 return s;
1614}
1615
ffb4593c
NT
1616/* S_intuit_more
1617 * Returns TRUE if there's more to the expression (e.g., a subscript),
1618 * FALSE otherwise.
ffb4593c
NT
1619 *
1620 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1621 *
1622 * ->[ and ->{ return TRUE
1623 * { and [ outside a pattern are always subscripts, so return TRUE
1624 * if we're outside a pattern and it's not { or [, then return FALSE
1625 * if we're in a pattern and the first char is a {
1626 * {4,5} (any digits around the comma) returns FALSE
1627 * if we're in a pattern and the first char is a [
1628 * [] returns FALSE
1629 * [SOMETHING] has a funky algorithm to decide whether it's a
1630 * character class or not. It has to deal with things like
1631 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1632 * anything else returns TRUE
1633 */
1634
9cbb5ea2
GS
1635/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1636
76e3520e 1637STATIC int
cea2e8a9 1638S_intuit_more(pTHX_ register char *s)
79072805 1639{
3280af22 1640 if (PL_lex_brackets)
79072805
LW
1641 return TRUE;
1642 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1643 return TRUE;
1644 if (*s != '{' && *s != '[')
1645 return FALSE;
3280af22 1646 if (!PL_lex_inpat)
79072805
LW
1647 return TRUE;
1648
1649 /* In a pattern, so maybe we have {n,m}. */
1650 if (*s == '{') {
1651 s++;
1652 if (!isDIGIT(*s))
1653 return TRUE;
1654 while (isDIGIT(*s))
1655 s++;
1656 if (*s == ',')
1657 s++;
1658 while (isDIGIT(*s))
1659 s++;
1660 if (*s == '}')
1661 return FALSE;
1662 return TRUE;
1663
1664 }
1665
1666 /* On the other hand, maybe we have a character class */
1667
1668 s++;
1669 if (*s == ']' || *s == '^')
1670 return FALSE;
1671 else {
ffb4593c 1672 /* this is terrifying, and it works */
79072805
LW
1673 int weight = 2; /* let's weigh the evidence */
1674 char seen[256];
f27ffc4a 1675 unsigned char un_char = 255, last_un_char;
93a17b20 1676 char *send = strchr(s,']');
3280af22 1677 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1678
1679 if (!send) /* has to be an expression */
1680 return TRUE;
1681
1682 Zero(seen,256,char);
1683 if (*s == '$')
1684 weight -= 3;
1685 else if (isDIGIT(*s)) {
1686 if (s[1] != ']') {
1687 if (isDIGIT(s[1]) && s[2] == ']')
1688 weight -= 10;
1689 }
1690 else
1691 weight -= 100;
1692 }
1693 for (; s < send; s++) {
1694 last_un_char = un_char;
1695 un_char = (unsigned char)*s;
1696 switch (*s) {
1697 case '@':
1698 case '&':
1699 case '$':
1700 weight -= seen[un_char] * 10;
7e2040f0 1701 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1702 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1703 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1704 weight -= 100;
1705 else
1706 weight -= 10;
1707 }
1708 else if (*s == '$' && s[1] &&
93a17b20
LW
1709 strchr("[#!%*<>()-=",s[1])) {
1710 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1711 weight -= 10;
1712 else
1713 weight -= 1;
1714 }
1715 break;
1716 case '\\':
1717 un_char = 254;
1718 if (s[1]) {
93a17b20 1719 if (strchr("wds]",s[1]))
79072805
LW
1720 weight += 100;
1721 else if (seen['\''] || seen['"'])
1722 weight += 1;
93a17b20 1723 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1724 weight += 40;
1725 else if (isDIGIT(s[1])) {
1726 weight += 40;
1727 while (s[1] && isDIGIT(s[1]))
1728 s++;
1729 }
1730 }
1731 else
1732 weight += 100;
1733 break;
1734 case '-':
1735 if (s[1] == '\\')
1736 weight += 50;
93a17b20 1737 if (strchr("aA01! ",last_un_char))
79072805 1738 weight += 30;
93a17b20 1739 if (strchr("zZ79~",s[1]))
79072805 1740 weight += 30;
f27ffc4a
GS
1741 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1742 weight -= 5; /* cope with negative subscript */
79072805
LW
1743 break;
1744 default:
93a17b20 1745 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1746 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1747 char *d = tmpbuf;
1748 while (isALPHA(*s))
1749 *d++ = *s++;
1750 *d = '\0';
1751 if (keyword(tmpbuf, d - tmpbuf))
1752 weight -= 150;
1753 }
1754 if (un_char == last_un_char + 1)
1755 weight += 5;
1756 weight -= seen[un_char];
1757 break;
1758 }
1759 seen[un_char]++;
1760 }
1761 if (weight >= 0) /* probably a character class */
1762 return FALSE;
1763 }
1764
1765 return TRUE;
1766}
ffed7fef 1767
ffb4593c
NT
1768/*
1769 * S_intuit_method
1770 *
1771 * Does all the checking to disambiguate
1772 * foo bar
1773 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1774 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1775 *
1776 * First argument is the stuff after the first token, e.g. "bar".
1777 *
1778 * Not a method if bar is a filehandle.
1779 * Not a method if foo is a subroutine prototyped to take a filehandle.
1780 * Not a method if it's really "Foo $bar"
1781 * Method if it's "foo $bar"
1782 * Not a method if it's really "print foo $bar"
1783 * Method if it's really "foo package::" (interpreted as package->foo)
1784 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1785 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1786 * =>
1787 */
1788
76e3520e 1789STATIC int
cea2e8a9 1790S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1791{
1792 char *s = start + (*start == '$');
3280af22 1793 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1794 STRLEN len;
1795 GV* indirgv;
1796
1797 if (gv) {
b6c543e3 1798 CV *cv;
a0d0e21e
LW
1799 if (GvIO(gv))
1800 return 0;
b6c543e3
IZ
1801 if ((cv = GvCVu(gv))) {
1802 char *proto = SvPVX(cv);
1803 if (proto) {
1804 if (*proto == ';')
1805 proto++;
1806 if (*proto == '*')
1807 return 0;
1808 }
1809 } else
a0d0e21e
LW
1810 gv = 0;
1811 }
8903cb82 1812 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1813 /* start is the beginning of the possible filehandle/object,
1814 * and s is the end of it
1815 * tmpbuf is a copy of it
1816 */
1817
a0d0e21e 1818 if (*start == '$') {
3280af22 1819 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1820 return 0;
1821 s = skipspace(s);
3280af22
NIS
1822 PL_bufptr = start;
1823 PL_expect = XREF;
a0d0e21e
LW
1824 return *s == '(' ? FUNCMETH : METHOD;
1825 }
1826 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1827 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1828 len -= 2;
1829 tmpbuf[len] = '\0';
1830 goto bare_package;
1831 }
1832 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1833 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1834 return 0;
1835 /* filehandle or package name makes it a method */
89bfa8cd 1836 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1837 s = skipspace(s);
3280af22 1838 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1839 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1840 bare_package:
3280af22 1841 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1842 newSVpvn(tmpbuf,len));
3280af22
NIS
1843 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1844 PL_expect = XTERM;
a0d0e21e 1845 force_next(WORD);
3280af22 1846 PL_bufptr = s;
a0d0e21e
LW
1847 return *s == '(' ? FUNCMETH : METHOD;
1848 }
1849 }
1850 return 0;
1851}
1852
ffb4593c
NT
1853/*
1854 * S_incl_perldb
1855 * Return a string of Perl code to load the debugger. If PERL5DB
1856 * is set, it will return the contents of that, otherwise a
1857 * compile-time require of perl5db.pl.
1858 */
1859
76e3520e 1860STATIC char*
cea2e8a9 1861S_incl_perldb(pTHX)
a0d0e21e 1862{
3280af22 1863 if (PL_perldb) {
76e3520e 1864 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1865
1866 if (pdb)
1867 return pdb;
61bb5906 1868 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1869 return "BEGIN { require 'perl5db.pl' }";
1870 }
1871 return "";
1872}
1873
1874
16d20bd9
AD
1875/* Encoded script support. filter_add() effectively inserts a
1876 * 'pre-processing' function into the current source input stream.
1877 * Note that the filter function only applies to the current source file
1878 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1879 *
1880 * The datasv parameter (which may be NULL) can be used to pass
1881 * private data to this instance of the filter. The filter function
1882 * can recover the SV using the FILTER_DATA macro and use it to
1883 * store private buffers and state information.
1884 *
1885 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1886 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1887 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1888 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1889 * private use must be set using malloc'd pointers.
1890 */
16d20bd9
AD
1891
1892SV *
864dbfa3 1893Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1894{
f4c556ac
GS
1895 if (!funcp)
1896 return Nullsv;
1897
3280af22
NIS
1898 if (!PL_rsfp_filters)
1899 PL_rsfp_filters = newAV();
16d20bd9 1900 if (!datasv)
8c52afec 1901 datasv = NEWSV(255,0);
16d20bd9 1902 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1903 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1904 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1905 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1906 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1907 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1908 av_unshift(PL_rsfp_filters, 1);
1909 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1910 return(datasv);
1911}
1912
1913
1914/* Delete most recently added instance of this filter function. */
a0d0e21e 1915void
864dbfa3 1916Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1917{
e0c19803 1918 SV *datasv;
f4c556ac 1919 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1920 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1921 return;
1922 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1923 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1924 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1925 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1926 IoANY(datasv) = (void *)NULL;
3280af22 1927 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1928
16d20bd9
AD
1929 return;
1930 }
1931 /* we need to search for the correct entry and clear it */
cea2e8a9 1932 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1933}
1934
1935
1936/* Invoke the n'th filter function for the current rsfp. */
1937I32
864dbfa3 1938Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1939
1940
1941 /* 0 = read one text line */
a0d0e21e 1942{
16d20bd9
AD
1943 filter_t funcp;
1944 SV *datasv = NULL;
e50aee73 1945
3280af22 1946 if (!PL_rsfp_filters)
16d20bd9 1947 return -1;
3280af22 1948 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1949 /* Provide a default input filter to make life easy. */
1950 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1951 DEBUG_P(PerlIO_printf(Perl_debug_log,
1952 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1953 if (maxlen) {
1954 /* Want a block */
1955 int len ;
1956 int old_len = SvCUR(buf_sv) ;
1957
1958 /* ensure buf_sv is large enough */
1959 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1960 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1961 if (PerlIO_error(PL_rsfp))
37120919
AD
1962 return -1; /* error */
1963 else
1964 return 0 ; /* end of file */
1965 }
16d20bd9
AD
1966 SvCUR_set(buf_sv, old_len + len) ;
1967 } else {
1968 /* Want a line */
3280af22
NIS
1969 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1970 if (PerlIO_error(PL_rsfp))
37120919
AD
1971 return -1; /* error */
1972 else
1973 return 0 ; /* end of file */
1974 }
16d20bd9
AD
1975 }
1976 return SvCUR(buf_sv);
1977 }
1978 /* Skip this filter slot if filter has been deleted */
3280af22 1979 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1980 DEBUG_P(PerlIO_printf(Perl_debug_log,
1981 "filter_read %d: skipped (filter deleted)\n",
1982 idx));
16d20bd9
AD
1983 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1984 }
1985 /* Get function pointer hidden within datasv */
4755096e 1986 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
1987 DEBUG_P(PerlIO_printf(Perl_debug_log,
1988 "filter_read %d: via function %p (%s)\n",
1989 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1990 /* Call function. The function is expected to */
1991 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1992 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1993 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1994}
1995
76e3520e 1996STATIC char *
cea2e8a9 1997S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1998{
c39cd008 1999#ifdef PERL_CR_FILTER
3280af22 2000 if (!PL_rsfp_filters) {
c39cd008 2001 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2002 }
2003#endif
3280af22 2004 if (PL_rsfp_filters) {
16d20bd9 2005
55497cff
PP
2006 if (!append)
2007 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2008 if (FILTER_READ(0, sv, 0) > 0)
2009 return ( SvPVX(sv) ) ;
2010 else
2011 return Nullch ;
2012 }
9d116dd7 2013 else
fd049845 2014 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2015}
2016
01ec43d0
GS
2017STATIC HV *
2018S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2019{
2020 GV *gv;
2021
01ec43d0 2022 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2023 return PL_curstash;
2024
2025 if (len > 2 &&
2026 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2027 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2028 {
2029 return GvHV(gv); /* Foo:: */
def3634b
GS
2030 }
2031
2032 /* use constant CLASS => 'MyClass' */
2033 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2034 SV *sv;
2035 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2036 pkgname = SvPV_nolen(sv);
2037 }
2038 }
2039
2040 return gv_stashpv(pkgname, FALSE);
2041}
a0d0e21e 2042
748a9306
LW
2043#ifdef DEBUGGING
2044 static char* exp_name[] =
09bef843
SB
2045 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2046 "ATTRTERM", "TERMBLOCK"
2047 };
748a9306 2048#endif
463ee0b2 2049
02aa26ce
NT
2050/*
2051 yylex
2052
2053 Works out what to call the token just pulled out of the input
2054 stream. The yacc parser takes care of taking the ops we return and
2055 stitching them into a tree.
2056
2057 Returns:
2058 PRIVATEREF
2059
2060 Structure:
2061 if read an identifier
2062 if we're in a my declaration
2063 croak if they tried to say my($foo::bar)
2064 build the ops for a my() declaration
2065 if it's an access to a my() variable
2066 are we in a sort block?
2067 croak if my($a); $a <=> $b
2068 build ops for access to a my() variable
2069 if in a dq string, and they've said @foo and we can't find @foo
2070 croak
2071 build ops for a bareword
2072 if we already built the token before, use it.
2073*/
2074
dba4d153 2075#ifdef USE_PURE_BISON
bf4acbe4 2076#ifdef __SC__
dba4d153 2077#pragma segment Perl_yylex_r
bf4acbe4 2078#endif
864dbfa3 2079int
dba4d153 2080Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2081{
b73d6f50 2082 dTHR;
20141f0e
RI
2083 int r;
2084
20141f0e
RI
2085 yylval_pointer[yyactlevel] = lvalp;
2086 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2087 yyactlevel++;
2088 if (yyactlevel >= YYMAXLEVEL)
2089 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2090
dba4d153 2091 r = Perl_yylex(aTHX);
20141f0e 2092
20141f0e 2093 yyactlevel--;
20141f0e
RI
2094
2095 return r;
2096}
dba4d153 2097#endif
20141f0e 2098
dba4d153
JH
2099#ifdef __SC__
2100#pragma segment Perl_yylex
2101#endif
2102
2103int
2104#ifdef USE_PURE_BISON
2105Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
2106#else
2107Perl_yylex(pTHX)
2108#endif
20141f0e 2109{
11343788 2110 dTHR;
79072805 2111 register char *s;
378cc40b 2112 register char *d;
79072805 2113 register I32 tmp;
463ee0b2 2114 STRLEN len;
161b471a
NIS
2115 GV *gv = Nullgv;
2116 GV **gvp = 0;
a687059c 2117
02aa26ce 2118 /* check if there's an identifier for us to look at */
3280af22 2119 if (PL_pending_ident) {
02aa26ce 2120 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2121 char pit = PL_pending_ident;
2122 PL_pending_ident = 0;
bbce6d69 2123
02aa26ce
NT
2124 /* if we're in a my(), we can't allow dynamics here.
2125 $foo'bar has already been turned into $foo::bar, so
2126 just check for colons.
2127
2128 if it's a legal name, the OP is a PADANY.
2129 */
3280af22 2130 if (PL_in_my) {
77ca0c92 2131 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2132 if (strchr(PL_tokenbuf,':'))
2133 yyerror(Perl_form(aTHX_ "No package name allowed for "
2134 "variable %s in \"our\"",
2135 PL_tokenbuf));
77ca0c92
LW
2136 tmp = pad_allocmy(PL_tokenbuf);
2137 }
2138 else {
2139 if (strchr(PL_tokenbuf,':'))
2140 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2141
77ca0c92
LW
2142 yylval.opval = newOP(OP_PADANY, 0);
2143 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2144 return PRIVATEREF;
2145 }
bbce6d69
PP
2146 }
2147
02aa26ce
NT
2148 /*
2149 build the ops for accesses to a my() variable.
2150
2151 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2152 then used in a comparison. This catches most, but not
2153 all cases. For instance, it catches
2154 sort { my($a); $a <=> $b }
2155 but not
2156 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2157 (although why you'd do that is anyone's guess).
2158 */
2159
3280af22 2160 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2161#ifdef USE_THREADS
54b9620d 2162 /* Check for single character per-thread SVs */
3280af22
NIS
2163 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2164 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2165 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2166 {
2faa37cc 2167 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2168 yylval.opval->op_targ = tmp;
2169 return PRIVATEREF;
2170 }
2171#endif /* USE_THREADS */
3280af22 2172 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2173 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2174 /* might be an "our" variable" */
f472eb5c 2175 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2176 /* build ops for a bareword */
f472eb5c
GS
2177 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2178 sv_catpvn(sym, "::", 2);
2179 sv_catpv(sym, PL_tokenbuf+1);
2180 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2181 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2182 gv_fetchpv(SvPVX(sym),
77ca0c92 2183 (PL_in_eval
f472eb5c
GS
2184 ? (GV_ADDMULTI | GV_ADDINEVAL)
2185 : TRUE
77ca0c92
LW
2186 ),
2187 ((PL_tokenbuf[0] == '$') ? SVt_PV
2188 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2189 : SVt_PVHV));
2190 return WORD;
2191 }
2192
02aa26ce 2193 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2194 if (PL_last_lop_op == OP_SORT &&
2195 PL_tokenbuf[0] == '$' &&
2196 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2197 && !PL_tokenbuf[2])
bbce6d69 2198 {
3280af22
NIS
2199 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2200 d < PL_bufend && *d != '\n';
a863c7d1
MB
2201 d++)
2202 {
2203 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2204 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2205 PL_tokenbuf);
a863c7d1 2206 }
bbce6d69
PP
2207 }
2208 }
bbce6d69 2209
a863c7d1
MB
2210 yylval.opval = newOP(OP_PADANY, 0);
2211 yylval.opval->op_targ = tmp;
2212 return PRIVATEREF;
2213 }
bbce6d69
PP
2214 }
2215
02aa26ce
NT
2216 /*
2217 Whine if they've said @foo in a doublequoted string,
2218 and @foo isn't a variable we can find in the symbol
2219 table.
2220 */
3280af22
NIS
2221 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2222 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2223 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2224 && ckWARN(WARN_AMBIGUOUS))
2225 {
2226 /* Downgraded from fatal to warning 20000522 mjd */
2227 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2228 "Possible unintended interpolation of %s in string",
2229 PL_tokenbuf);
2230 }
bbce6d69
PP
2231 }
2232
02aa26ce 2233 /* build ops for a bareword */
3280af22 2234 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2235 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2236 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2237 ((PL_tokenbuf[0] == '$') ? SVt_PV
2238 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2239 : SVt_PVHV));
2240 return WORD;
2241 }
2242
02aa26ce
NT
2243 /* no identifier pending identification */
2244
3280af22 2245 switch (PL_lex_state) {
79072805
LW
2246#ifdef COMMENTARY
2247 case LEX_NORMAL: /* Some compilers will produce faster */
2248 case LEX_INTERPNORMAL: /* code if we comment these out. */
2249 break;
2250#endif
2251
09bef843 2252 /* when we've already built the next token, just pull it out of the queue */
79072805 2253 case LEX_KNOWNEXT:
3280af22
NIS
2254 PL_nexttoke--;
2255 yylval = PL_nextval[PL_nexttoke];
2256 if (!PL_nexttoke) {
2257 PL_lex_state = PL_lex_defer;
2258 PL_expect = PL_lex_expect;
2259 PL_lex_defer = LEX_NORMAL;
463ee0b2 2260 }
3280af22 2261 return(PL_nexttype[PL_nexttoke]);
79072805 2262
02aa26ce 2263 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2264 when we get here, PL_bufptr is at the \
02aa26ce 2265 */
79072805
LW
2266 case LEX_INTERPCASEMOD:
2267#ifdef DEBUGGING
3280af22 2268 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2269 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2270#endif
02aa26ce 2271 /* handle \E or end of string */
3280af22 2272 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2273 char oldmod;
02aa26ce
NT
2274
2275 /* if at a \E */
3280af22
NIS
2276 if (PL_lex_casemods) {
2277 oldmod = PL_lex_casestack[--PL_lex_casemods];
2278 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2279
3280af22
NIS
2280 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2281 PL_bufptr += 2;
2282 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2283 }
79072805
LW
2284 return ')';
2285 }
3280af22
NIS
2286 if (PL_bufptr != PL_bufend)
2287 PL_bufptr += 2;
2288 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2289 return yylex();
79072805
LW
2290 }
2291 else {
3280af22 2292 s = PL_bufptr + 1;
79072805
LW
2293 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2294 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2295 if (strchr("LU", *s) &&
3280af22 2296 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2297 {
3280af22 2298 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2299 return ')';
2300 }
3280af22
NIS
2301 if (PL_lex_casemods > 10) {
2302 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2303 if (newlb != PL_lex_casestack) {
a0d0e21e 2304 SAVEFREEPV(newlb);
3280af22 2305 PL_lex_casestack = newlb;
a0d0e21e
LW
2306 }
2307 }
3280af22
NIS
2308 PL_lex_casestack[PL_lex_casemods++] = *s;
2309 PL_lex_casestack[PL_lex_casemods] = '\0';
2310 PL_lex_state = LEX_INTERPCONCAT;
2311 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2312 force_next('(');
2313 if (*s == 'l')
3280af22 2314 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2315 else if (*s == 'u')
3280af22 2316 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2317 else if (*s == 'L')
3280af22 2318 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2319 else if (*s == 'U')
3280af22 2320 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2321 else if (*s == 'Q')
3280af22 2322 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2323 else
cea2e8a9 2324 Perl_croak(aTHX_ "panic: yylex");
3280af22 2325 PL_bufptr = s + 1;
79072805 2326 force_next(FUNC);
3280af22
NIS
2327 if (PL_lex_starts) {
2328 s = PL_bufptr;
2329 PL_lex_starts = 0;
79072805
LW
2330 Aop(OP_CONCAT);
2331 }
2332 else
cea2e8a9 2333 return yylex();
79072805
LW
2334 }
2335
55497cff
PP
2336 case LEX_INTERPPUSH:
2337 return sublex_push();
2338
79072805 2339 case LEX_INTERPSTART:
3280af22 2340 if (PL_bufptr == PL_bufend)
79072805 2341 return sublex_done();
3280af22
NIS
2342 PL_expect = XTERM;
2343 PL_lex_dojoin = (*PL_bufptr == '@');
2344 PL_lex_state = LEX_INTERPNORMAL;
2345 if (PL_lex_dojoin) {
2346 PL_nextval[PL_nexttoke].ival = 0;
79072805 2347 force_next(',');
554b3eca 2348#ifdef USE_THREADS
533c011a
NIS
2349 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2350 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2351 force_next(PRIVATEREF);
2352#else
a0d0e21e 2353 force_ident("\"", '$');
554b3eca 2354#endif /* USE_THREADS */
3280af22 2355 PL_nextval[PL_nexttoke].ival = 0;
79072805 2356 force_next('$');
3280af22 2357 PL_nextval[PL_nexttoke].ival = 0;
79072805 2358 force_next('(');
3280af22 2359 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2360 force_next(FUNC);
2361 }
3280af22
NIS
2362 if (PL_lex_starts++) {
2363 s = PL_bufptr;
79072805
LW
2364 Aop(OP_CONCAT);
2365 }
cea2e8a9 2366 return yylex();
79072805
LW
2367
2368 case LEX_INTERPENDMAYBE:
3280af22
NIS
2369 if (intuit_more(PL_bufptr)) {
2370 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2371 break;
2372 }
2373 /* FALL THROUGH */
2374
2375 case LEX_INTERPEND:
3280af22
NIS
2376 if (PL_lex_dojoin) {
2377 PL_lex_dojoin = FALSE;
2378 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2379 return ')';
2380 }
43a16006 2381 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2382 && SvEVALED(PL_lex_repl))
43a16006 2383 {
e9fa98b2 2384 if (PL_bufptr != PL_bufend)
cea2e8a9 2385 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2386 PL_lex_repl = Nullsv;
2387 }
79072805
LW
2388 /* FALLTHROUGH */
2389 case LEX_INTERPCONCAT:
2390#ifdef DEBUGGING
3280af22 2391 if (PL_lex_brackets)
cea2e8a9 2392 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2393#endif
3280af22 2394 if (PL_bufptr == PL_bufend)
79072805
LW
2395 return sublex_done();
2396
3280af22
NIS
2397 if (SvIVX(PL_linestr) == '\'') {
2398 SV *sv = newSVsv(PL_linestr);
2399 if (!PL_lex_inpat)
76e3520e 2400 sv = tokeq(sv);
3280af22 2401 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2402 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2403 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2404 s = PL_bufend;
79072805
LW
2405 }
2406 else {
3280af22 2407 s = scan_const(PL_bufptr);
79072805 2408 if (*s == '\\')
3280af22 2409 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2410 else
3280af22 2411 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2412 }
2413
3280af22
NIS
2414 if (s != PL_bufptr) {
2415 PL_nextval[PL_nexttoke] = yylval;
2416 PL_expect = XTERM;
79072805 2417 force_next(THING);
3280af22 2418 if (PL_lex_starts++)
79072805
LW
2419 Aop(OP_CONCAT);
2420 else {
3280af22 2421 PL_bufptr = s;
cea2e8a9 2422 return yylex();
79072805
LW
2423 }
2424 }
2425
cea2e8a9 2426 return yylex();
a0d0e21e 2427 case LEX_FORMLINE:
3280af22
NIS
2428 PL_lex_state = LEX_NORMAL;
2429 s = scan_formline(PL_bufptr);
2430 if (!PL_lex_formbrack)
a0d0e21e
LW
2431 goto rightbracket;
2432 OPERATOR(';');
79072805
LW
2433 }
2434
3280af22
NIS
2435 s = PL_bufptr;
2436 PL_oldoldbufptr = PL_oldbufptr;
2437 PL_oldbufptr = s;
79072805 2438 DEBUG_p( {
bf49b057
GS
2439 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2440 exp_name[PL_expect], s);
79072805 2441 } )
463ee0b2
LW
2442
2443 retry:
378cc40b
LW
2444 switch (*s) {
2445 default:
7e2040f0 2446 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2447 goto keylookup;
cea2e8a9 2448 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2449 case 4:
2450 case 26:
2451 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2452 case 0:
3280af22
NIS
2453 if (!PL_rsfp) {
2454 PL_last_uni = 0;
2455 PL_last_lop = 0;
2456 if (PL_lex_brackets)
d98d5fff 2457 yyerror("Missing right curly or square bracket");
79072805 2458 TOKEN(0);
463ee0b2 2459 }
3280af22 2460 if (s++ < PL_bufend)
a687059c 2461 goto retry; /* ignore stray nulls */
3280af22
NIS
2462 PL_last_uni = 0;
2463 PL_last_lop = 0;
2464 if (!PL_in_eval && !PL_preambled) {
2465 PL_preambled = TRUE;
2466 sv_setpv(PL_linestr,incl_perldb());
2467 if (SvCUR(PL_linestr))
2468 sv_catpv(PL_linestr,";");
2469 if (PL_preambleav){
2470 while(AvFILLp(PL_preambleav) >= 0) {
2471 SV *tmpsv = av_shift(PL_preambleav);
2472 sv_catsv(PL_linestr, tmpsv);
2473 sv_catpv(PL_linestr, ";");
91b7def8
PP
2474 sv_free(tmpsv);
2475 }
3280af22
NIS
2476 sv_free((SV*)PL_preambleav);
2477 PL_preambleav = NULL;
91b7def8 2478 }
3280af22
NIS
2479 if (PL_minus_n || PL_minus_p) {
2480 sv_catpv(PL_linestr, "LINE: while (<>) {");
2481 if (PL_minus_l)
2482 sv_catpv(PL_linestr,"chomp;");
2483 if (PL_minus_a) {
8fd239a7
CS
2484 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2485 if (gv)
2486 GvIMPORTED_AV_on(gv);
3280af22
NIS
2487 if (PL_minus_F) {
2488 if (strchr("/'\"", *PL_splitstr)
2489 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2490 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2491 else {
2492 char delim;
2493 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2494 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2495 delim = *s;
cea2e8a9 2496 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2497 "q" + (delim == '\''), delim);
3280af22 2498 for (s = PL_splitstr; *s; s++) {
54310121 2499 if (*s == '\\')
3280af22
NIS
2500 sv_catpvn(PL_linestr, "\\", 1);
2501 sv_catpvn(PL_linestr, s, 1);
54310121 2502 }
cea2e8a9 2503 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2504 }
2304df62
AD
2505 }
2506 else
3280af22 2507 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2508 }
79072805 2509 }
3280af22
NIS
2510 sv_catpv(PL_linestr, "\n");
2511 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2512 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2513 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2514 SV *sv = NEWSV(85,0);
2515
2516 sv_upgrade(sv, SVt_PVMG);
3280af22 2517 sv_setsv(sv,PL_linestr);
57843af0 2518 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2519 }
79072805 2520 goto retry;
a687059c 2521 }
e929a76b 2522 do {
226017aa
DD
2523 bool bof = PL_rsfp ? TRUE : FALSE;
2524 if (bof) {
2525#ifdef PERLIO_IS_STDIO
2526# ifdef __GLIBC__
2527# if __GLIBC__ == 1 /* Linux glibc5 */
2528# define FTELL_FOR_PIPE_IS_BROKEN
2529# endif
2530# endif
2531#endif
2532#ifdef FTELL_FOR_PIPE_IS_BROKEN
2533 /* This loses the possibility to detect the bof
2534 * situation on perl -P when the libc5 is being used.
2535 * Workaround? Maybe attach some extra state to PL_rsfp?
2536 */
2537 if (!PL_preprocess)
2538 bof = PerlIO_tell(PL_rsfp) == 0;
2539#else
2540 bof = PerlIO_tell(PL_rsfp) == 0;
2541#endif
2542 }
dea0fc0b
JH
2543 s = filter_gets(PL_linestr, PL_rsfp, 0);
2544 if (s == Nullch) {
e929a76b 2545 fake_eof:
3280af22
NIS
2546 if (PL_rsfp) {
2547 if (PL_preprocess && !PL_in_eval)
2548 (void)PerlProc_pclose(PL_rsfp);
2549 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2550 PerlIO_clearerr(PL_rsfp);
395c3793 2551 else
3280af22
NIS
2552 (void)PerlIO_close(PL_rsfp);
2553 PL_rsfp = Nullfp;
4a9ae47a 2554 PL_doextract = FALSE;
395c3793 2555 }
3280af22
NIS
2556 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2557 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2558 sv_catpv(PL_linestr,";}");
2559 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2560 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2561 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2562 goto retry;
2563 }
3280af22
NIS
2564 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2565 sv_setpv(PL_linestr,"");
79072805 2566 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
dea0fc0b
JH
2567 } else if (bof) {
2568 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2569 s = swallow_bom((U8*)s);
378cc40b 2570 }
3280af22 2571 if (PL_doextract) {
a0d0e21e 2572 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2573 PL_doextract = FALSE;
a0d0e21e
LW
2574
2575 /* Incest with pod. */
2576 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2577 sv_setpv(PL_linestr, "");
2578 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2579 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2580 PL_doextract = FALSE;
a0d0e21e 2581 }
b250498f 2582 }
463ee0b2 2583 incline(s);
3280af22
NIS
2584 } while (PL_doextract);
2585 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2586 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2587 SV *sv = NEWSV(85,0);
a687059c 2588
93a17b20 2589 sv_upgrade(sv, SVt_PVMG);
3280af22 2590 sv_setsv(sv,PL_linestr);
57843af0 2591 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2592 }
3280af22 2593 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2594 if (CopLINE(PL_curcop) == 1) {
3280af22 2595 while (s < PL_bufend && isSPACE(*s))
79072805 2596 s++;
a0d0e21e 2597 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2598 s++;
44a8e56a 2599 d = Nullch;
3280af22 2600 if (!PL_in_eval) {
44a8e56a
PP
2601 if (*s == '#' && *(s+1) == '!')
2602 d = s + 2;
2603#ifdef ALTERNATE_SHEBANG
2604 else {
2605 static char as[] = ALTERNATE_SHEBANG;
2606 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2607 d = s + (sizeof(as) - 1);
2608 }
2609#endif /* ALTERNATE_SHEBANG */
2610 }
2611 if (d) {
b8378b72 2612 char *ipath;
774d564b 2613 char *ipathend;
b8378b72 2614
774d564b 2615 while (isSPACE(*d))
b8378b72
CS
2616 d++;
2617 ipath = d;
774d564b
PP
2618 while (*d && !isSPACE(*d))
2619 d++;
2620 ipathend = d;
2621
2622#ifdef ARG_ZERO_IS_SCRIPT
2623 if (ipathend > ipath) {
2624 /*
2625 * HP-UX (at least) sets argv[0] to the script name,
2626 * which makes $^X incorrect. And Digital UNIX and Linux,
2627 * at least, set argv[0] to the basename of the Perl
2628 * interpreter. So, having found "#!", we'll set it right.
2629 */
2630 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2631 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2632 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2633 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2634 SvSETMAGIC(x);
2635 }
774d564b 2636 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2637 }
774d564b 2638#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2639
2640 /*
2641 * Look for options.
2642 */
748a9306 2643 d = instr(s,"perl -");
84e30d1a 2644 if (!d) {
748a9306 2645 d = instr(s,"perl");
84e30d1a
GS
2646#if defined(DOSISH)
2647 /* avoid getting into infinite loops when shebang
2648 * line contains "Perl" rather than "perl" */
2649 if (!d) {
2650 for (d = ipathend-4; d >= ipath; --d) {
2651 if ((*d == 'p' || *d == 'P')
2652 && !ibcmp(d, "perl", 4))
2653 {
2654 break;
2655 }
2656 }
2657 if (d < ipath)
2658 d = Nullch;
2659 }
2660#endif
2661 }
44a8e56a
PP
2662#ifdef ALTERNATE_SHEBANG
2663 /*
2664 * If the ALTERNATE_SHEBANG on this system starts with a
2665 * character that can be part of a Perl expression, then if
2666 * we see it but not "perl", we're probably looking at the
2667 * start of Perl code, not a request to hand off to some
2668 * other interpreter. Similarly, if "perl" is there, but
2669 * not in the first 'word' of the line, we assume the line
2670 * contains the start of the Perl program.
44a8e56a
PP
2671 */
2672 if (d && *s != '#') {
774d564b 2673 char *c = ipath;
44a8e56a
PP
2674 while (*c && !strchr("; \t\r\n\f\v#", *c))
2675 c++;
2676 if (c < d)
2677 d = Nullch; /* "perl" not in first word; ignore */
2678 else
2679 *s = '#'; /* Don't try to parse shebang line */
2680 }
774d564b 2681#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2682#ifndef MACOS_TRADITIONAL
748a9306 2683 if (!d &&
44a8e56a 2684 *s == '#' &&
774d564b 2685 ipathend > ipath &&
3280af22 2686 !PL_minus_c &&
748a9306 2687 !instr(s,"indir") &&
3280af22 2688 instr(PL_origargv[0],"perl"))
748a9306 2689 {
9f68db38 2690 char **newargv;
9f68db38 2691
774d564b
PP
2692 *ipathend = '\0';
2693 s = ipathend + 1;
3280af22 2694 while (s < PL_bufend && isSPACE(*s))
9f68db38 2695 s++;
3280af22
NIS
2696 if (s < PL_bufend) {
2697 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2698 newargv[1] = s;
3280af22 2699 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2700 s++;
2701 *s = '\0';
3280af22 2702 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2703 }
2704 else
3280af22 2705 newargv = PL_origargv;
774d564b 2706 newargv[0] = ipath;
80252599 2707 PerlProc_execv(ipath, newargv);
cea2e8a9 2708 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2709 }
bf4acbe4 2710#endif
748a9306 2711 if (d) {
3280af22
NIS
2712 U32 oldpdb = PL_perldb;
2713 bool oldn = PL_minus_n;
2714 bool oldp = PL_minus_p;
748a9306
LW
2715
2716 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2717 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2718
2719 if (*d++ == '-') {
8cc95fdb
PP
2720 do {
2721 if (*d == 'M' || *d == 'm') {
2722 char *m = d;
2723 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2724 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2725 (int)(d - m), m);
2726 }
2727 d = moreswitches(d);
2728 } while (d);
155aba94
GS
2729 if ((PERLDB_LINE && !oldpdb) ||
2730 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2731 /* if we have already added "LINE: while (<>) {",
2732 we must not do it again */
748a9306 2733 {
3280af22
NIS
2734 sv_setpv(PL_linestr, "");
2735 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2736 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2737 PL_preambled = FALSE;
84902520 2738 if (PERLDB_LINE)
3280af22 2739 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2740 goto retry;
2741 }
a0d0e21e 2742 }
79072805 2743 }
9f68db38 2744 }
79072805 2745 }
3280af22
NIS
2746 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2747 PL_bufptr = s;
2748 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2749 return yylex();
ae986130 2750 }
378cc40b 2751 goto retry;
4fdae800 2752 case '\r':
6a27c188 2753#ifdef PERL_STRICT_CR
cea2e8a9
GS
2754 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2755 Perl_croak(aTHX_
cc507455 2756 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2757#endif
4fdae800 2758 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2759#ifdef MACOS_TRADITIONAL
2760 case '\312':
2761#endif
378cc40b
LW
2762 s++;
2763 goto retry;
378cc40b 2764 case '#':
e929a76b 2765 case '\n':
3280af22 2766 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2767 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2768 /* handle eval qq[#line 1 "foo"\n ...] */
2769 CopLINE_dec(PL_curcop);
2770 incline(s);
2771 }
3280af22 2772 d = PL_bufend;
a687059c 2773 while (s < d && *s != '\n')
378cc40b 2774 s++;
0f85fab0 2775 if (s < d)
378cc40b 2776 s++;
463ee0b2 2777 incline(s);
3280af22
NIS
2778 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2779 PL_bufptr = s;
2780 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2781 return yylex();
a687059c 2782 }
378cc40b 2783 }
a687059c 2784 else {
378cc40b 2785 *s = '\0';
3280af22 2786 PL_bufend = s;
a687059c 2787 }
378cc40b
LW
2788 goto retry;
2789 case '-':
79072805 2790 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2791 s++;
3280af22 2792 PL_bufptr = s;
748a9306
LW
2793 tmp = *s++;
2794
bf4acbe4 2795 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2796 s++;
2797
2798 if (strnEQ(s,"=>",2)) {
3280af22 2799 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2800 OPERATOR('-'); /* unary minus */
2801 }
3280af22
NIS
2802 PL_last_uni = PL_oldbufptr;
2803 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2804 switch (tmp) {
79072805
LW
2805 case 'r': FTST(OP_FTEREAD);
2806 case 'w': FTST(OP_FTEWRITE);
2807 case 'x': FTST(OP_FTEEXEC);
2808 case 'o': FTST(OP_FTEOWNED);
2809 case 'R': FTST(OP_FTRREAD);
2810 case 'W': FTST(OP_FTRWRITE);
2811 case 'X': FTST(OP_FTREXEC);
2812 case 'O': FTST(OP_FTROWNED);
2813 case 'e': FTST(OP_FTIS);
2814 case 'z': FTST(OP_FTZERO);
2815 case 's': FTST(OP_FTSIZE);
2816 case 'f': FTST(OP_FTFILE);
2817 case 'd': FTST(OP_FTDIR);
2818 case 'l': FTST(OP_FTLINK);
2819 case 'p': FTST(OP_FTPIPE);
2820 case 'S': FTST(OP_FTSOCK);
2821 case 'u': FTST(OP_FTSUID);
2822 case 'g': FTST(OP_FTSGID);
2823 case 'k': FTST(OP_FTSVTX);
2824 case 'b': FTST(OP_FTBLK);
2825 case 'c': FTST(OP_FTCHR);
2826 case 't': FTST(OP_FTTTY);
2827 case 'T': FTST(OP_FTTEXT);
2828 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2829 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2830 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2831 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2832 default:
cea2e8a9 2833 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2834 break;
2835 }
2836 }
a687059c
LW
2837 tmp = *s++;
2838 if (*s == tmp) {
2839 s++;
3280af22 2840 if (PL_expect == XOPERATOR)
79072805
LW
2841 TERM(POSTDEC);
2842 else
2843 OPERATOR(PREDEC);
2844 }
2845 else if (*s == '>') {
2846 s++;
2847 s = skipspace(s);
7e2040f0 2848 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2849 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2850 TOKEN(ARROW);
79072805 2851 }
748a9306
LW
2852 else if (*s == '$')
2853 OPERATOR(ARROW);
463ee0b2 2854 else
748a9306 2855 TERM(ARROW);
a687059c 2856 }
3280af22 2857 if (PL_expect == XOPERATOR)
79072805
LW
2858 Aop(OP_SUBTRACT);
2859 else {
3280af22 2860 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2861 check_uni();
79072805 2862 OPERATOR('-'); /* unary minus */
2f3197b3 2863 }
79072805 2864
378cc40b 2865 case '+':
a687059c
LW
2866 tmp = *s++;
2867 if (*s == tmp) {
378cc40b 2868 s++;
3280af22 2869 if (PL_expect == XOPERATOR)
79072805
LW
2870 TERM(POSTINC);
2871 else
2872 OPERATOR(PREINC);
378cc40b 2873 }
3280af22 2874 if (PL_expect == XOPERATOR)
79072805
LW
2875 Aop(OP_ADD);
2876 else {
3280af22 2877 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2878 check_uni();
a687059c 2879 OPERATOR('+');
2f3197b3 2880 }
a687059c 2881
378cc40b 2882 case '*':
3280af22
NIS
2883 if (PL_expect != XOPERATOR) {
2884 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2885 PL_expect = XOPERATOR;
2886 force_ident(PL_tokenbuf, '*');
2887 if (!*PL_tokenbuf)
a0d0e21e 2888 PREREF('*');
79072805 2889 TERM('*');
a687059c 2890 }
79072805
LW
2891 s++;
2892 if (*s == '*') {
a687059c 2893 s++;
79072805 2894 PWop(OP_POW);
a687059c 2895 }
79072805
LW
2896 Mop(OP_MULTIPLY);
2897
378cc40b 2898 case '%':
3280af22 2899 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2900 ++s;
2901 Mop(OP_MODULO);
a687059c 2902 }
3280af22
NIS
2903 PL_tokenbuf[0] = '%';
2904 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2905 if (!PL_tokenbuf[1]) {
2906 if (s == PL_bufend)
bbce6d69
PP
2907 yyerror("Final % should be \\% or %name");
2908 PREREF('%');
a687059c 2909 }
3280af22 2910 PL_pending_ident = '%';
bbce6d69 2911 TERM('%');
a687059c 2912
378cc40b 2913 case '^':
79072805 2914 s++;
a0d0e21e 2915 BOop(OP_BIT_XOR);
79072805 2916 case '[':
3280af22 2917 PL_lex_brackets++;
79072805 2918 /* FALL THROUGH */
378cc40b 2919 case '~':
378cc40b 2920 case ',':
378cc40b
LW
2921 tmp = *s++;
2922 OPERATOR(tmp);
a0d0e21e
LW
2923 case ':':
2924 if (s[1] == ':') {
2925 len = 0;
2926 goto just_a_word;
2927 }
2928 s++;
09bef843
SB
2929 switch (PL_expect) {
2930 OP *attrs;
2931 case XOPERATOR:
2932 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2933 break;
2934 PL_bufptr = s; /* update in case we back off */
2935 goto grabattrs;
2936 case XATTRBLOCK:
2937 PL_expect = XBLOCK;
2938 goto grabattrs;
2939 case XATTRTERM:
2940 PL_expect = XTERMBLOCK;
2941 grabattrs:
2942 s = skipspace(s);
2943 attrs = Nullop;
7e2040f0 2944 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2945 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2946 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2947 if (tmp < 0) tmp = -tmp;
2948 switch (tmp) {
2949 case KEY_or:
2950 case KEY_and:
2951 case KEY_for:
2952 case KEY_unless:
2953 case KEY_if:
2954 case KEY_while:
2955 case KEY_until:
2956 goto got_attrs;
2957 default:
2958 break;
2959 }
2960 }
09bef843
SB
2961 if (*d == '(') {
2962 d = scan_str(d,TRUE,TRUE);
2963 if (!d) {
2964 if (PL_lex_stuff) {
2965 SvREFCNT_dec(PL_lex_stuff);
2966 PL_lex_stuff = Nullsv;
2967 }
2968 /* MUST advance bufptr here to avoid bogus
2969 "at end of line" context messages from yyerror().
2970 */
2971 PL_bufptr = s + len;
2972 yyerror("Unterminated attribute parameter in attribute list");
2973 if (attrs)
2974 op_free(attrs);
2975 return 0; /* EOF indicator */
2976 }
2977 }
2978 if (PL_lex_stuff) {
2979 SV *sv = newSVpvn(s, len);
2980 sv_catsv(sv, PL_lex_stuff);
2981 attrs = append_elem(OP_LIST, attrs,
2982 newSVOP(OP_CONST, 0, sv));
2983 SvREFCNT_dec(PL_lex_stuff);
2984 PL_lex_stuff = Nullsv;
2985 }
2986 else {
2987 attrs = append_elem(OP_LIST, attrs,
2988 newSVOP(OP_CONST, 0,
2989 newSVpvn(s, len)));
2990 }
2991 s = skipspace(d);
0120eecf 2992 if (*s == ':' && s[1] != ':')
09bef843 2993 s = skipspace(s+1);
0120eecf
GS
2994 else if (s == d)
2995 break; /* require real whitespace or :'s */
09bef843 2996 }
f9829d6b
GS
2997 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2998 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2999 char q = ((*s == '\'') ? '"' : '\'');
3000 /* If here for an expression, and parsed no attrs, back off. */
3001 if (tmp == '=' && !attrs) {
3002 s = PL_bufptr;
3003 break;
3004 }
3005 /* MUST advance bufptr here to avoid bogus "at end of line"
3006 context messages from yyerror().
3007 */
3008 PL_bufptr = s;
3009 if (!*s)
3010 yyerror("Unterminated attribute list");
3011 else
3012 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3013 q, *s, q));
3014 if (attrs)
3015 op_free(attrs);
3016 OPERATOR(':');
3017 }
f9829d6b 3018 got_attrs:
09bef843
SB
3019 if (attrs) {
3020 PL_nextval[PL_nexttoke].opval = attrs;
3021 force_next(THING);
3022 }
3023 TOKEN(COLONATTR);
3024 }
a0d0e21e 3025 OPERATOR(':');
8990e307
LW
3026 case '(':
3027 s++;
3280af22
NIS
3028 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3029 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3030 else
3280af22 3031 PL_expect = XTERM;
a0d0e21e 3032 TOKEN('(');
378cc40b 3033 case ';':
f4dd75d9 3034 CLINE;
378cc40b
LW
3035 tmp = *s++;
3036 OPERATOR(tmp);
3037 case ')':
378cc40b 3038 tmp = *s++;
16d20bd9
AD
3039 s = skipspace(s);
3040 if (*s == '{')
3041 PREBLOCK(tmp);
378cc40b 3042 TERM(tmp);
79072805
LW
3043 case ']':
3044 s++;
3280af22 3045 if (PL_lex_brackets <= 0)
d98d5fff 3046 yyerror("Unmatched right square bracket");
463ee0b2 3047 else
3280af22
NIS
3048 --PL_lex_brackets;
3049 if (PL_lex_state == LEX_INTERPNORMAL) {
3050 if (PL_lex_brackets == 0) {
a0d0e21e 3051 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3052 PL_lex_state = LEX_INTERPEND;
79072805
LW
3053 }
3054 }
4633a7c4 3055 TERM(']');
79072805
LW
3056 case '{':
3057 leftbracket:
79072805 3058 s++;
3280af22
NIS
3059 if (PL_lex_brackets > 100) {
3060 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3061 if (newlb != PL_lex_brackstack) {
8990e307 3062 SAVEFREEPV(newlb);
3280af22 3063 PL_lex_brackstack = newlb;
8990e307
LW
3064 }
3065 }
3280af22 3066 switch (PL_expect) {
a0d0e21e 3067 case XTERM:
3280af22 3068 if (PL_lex_formbrack) {
a0d0e21e
LW
3069 s--;
3070 PRETERMBLOCK(DO);
3071 }
3280af22
NIS
3072 if (PL_oldoldbufptr == PL_last_lop)
3073 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3074 else
3280af22 3075 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3076 OPERATOR(HASHBRACK);
a0d0e21e 3077 case XOPERATOR:
bf4acbe4 3078 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3079 s++;
44a8e56a 3080 d = s;
3280af22
NIS
3081 PL_tokenbuf[0] = '\0';
3082 if (d < PL_bufend && *d == '-') {
3083 PL_tokenbuf[0] = '-';
44a8e56a 3084 d++;
bf4acbe4 3085 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3086 d++;
3087 }
7e2040f0 3088 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3089 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3090 FALSE, &len);
bf4acbe4 3091 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3092 d++;
3093 if (*d == '}') {
3280af22 3094 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3095 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3096 if (minus)
3097 force_next('-');
748a9306
LW
3098 }
3099 }
3100 /* FALL THROUGH */
09bef843 3101 case XATTRBLOCK:
748a9306 3102 case XBLOCK:
3280af22
NIS
3103 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3104 PL_expect = XSTATE;
a0d0e21e 3105 break;
09bef843 3106 case XATTRTERM:
a0d0e21e 3107 case XTERMBLOCK:
3280af22
NIS
3108 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3109 PL_expect = XSTATE;
a0d0e21e
LW
3110 break;
3111 default: {
3112 char *t;
3280af22
NIS
3113 if (PL_oldoldbufptr == PL_last_lop)
3114 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3115 else
3280af22 3116 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3117 s = skipspace(s);
09ecc4b6 3118 if (*s == '}')
a0d0e21e 3119 OPERATOR(HASHBRACK);
b8a4b1be
GS
3120 /* This hack serves to disambiguate a pair of curlies
3121 * as being a block or an anon hash. Normally, expectation
3122 * determines that, but in cases where we're not in a
3123 * position to expect anything in particular (like inside
3124 * eval"") we have to resolve the ambiguity. This code
3125 * covers the case where the first term in the curlies is a
3126 * quoted string. Most other cases need to be explicitly
3127 * disambiguated by prepending a `+' before the opening
3128 * curly in order to force resolution as an anon hash.
3129 *
3130 * XXX should probably propagate the outer expectation
3131 * into eval"" to rely less on this hack, but that could
3132 * potentially break current behavior of eval"".
3133 * GSAR 97-07-21
3134 */
3135 t = s;
3136 if (*s == '\'' || *s == '"' || *s == '`') {
3137 /* common case: get past first string, handling escapes */
3280af22 3138 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3139 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3140 t++;
3141 t++;
a0d0e21e 3142 }
b8a4b1be 3143 else if (*s == 'q') {
3280af22 3144 if (++t < PL_bufend
b8a4b1be 3145 && (!isALNUM(*t)
3280af22 3146 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3147 && !isALNUM(*t))))
3148 {
b8a4b1be
GS
3149 char *tmps;
3150 char open, close, term;
3151 I32 brackets = 1;
3152
3280af22 3153 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3154 t++;
3155 term = *t;
3156 open = term;
3157 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3158 term = tmps[5];
3159 close = term;
3160 if (open == close)
3280af22
NIS
3161 for (t++; t < PL_bufend; t++) {
3162 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3163 t++;
6d07e5e9 3164 else if (*t == open)
b8a4b1be
GS
3165 break;
3166 }
3167 else
3280af22
NIS
3168 for (t++; t < PL_bufend; t++) {
3169 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3170 t++;
6d07e5e9 3171 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3172 break;
3173 else if (*t == open)
3174 brackets++;
3175 }
3176 }
3177 t++;
a0d0e21e 3178 }
7e2040f0 3179 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3180 t += UTF8SKIP(t);
7e2040f0 3181 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3182 t += UTF8SKIP(t);
a0d0e21e 3183 }
3280af22 3184 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3185 t++;
b8a4b1be
GS
3186 /* if comma follows first term, call it an anon hash */
3187 /* XXX it could be a comma expression with loop modifiers */
3280af22 3188 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3189 || (*t == '=' && t[1] == '>')))
a0d0e21e 3190 OPERATOR(HASHBRACK);
3280af22 3191 if (PL_expect == XREF)
4e4e412b 3192 PL_expect = XTERM;
a0d0e21e 3193 else {
3280af22
NIS
3194 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3195 PL_expect = XSTATE;
a0d0e21e 3196 }
8990e307 3197 }
a0d0e21e 3198 break;
463ee0b2 3199 }
57843af0 3200 yylval.ival = CopLINE(PL_curcop);
79072805 3201 if (isSPACE(*s) || *s == '#')
3280af22 3202 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3203 TOKEN('{');
378cc40b 3204 case '}':
79072805
LW
3205 rightbracket:
3206 s++;
3280af22 3207 if (PL_lex_brackets <= 0)
d98d5fff 3208 yyerror("Unmatched right curly bracket");
463ee0b2 3209 else
3280af22 3210 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3211 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3212 PL_lex_formbrack = 0;
3213 if (PL_lex_state == LEX_INTERPNORMAL) {
3214 if (PL_lex_brackets == 0) {
9059aa12
LW
3215 if (PL_expect & XFAKEBRACK) {
3216 PL_expect &= XENUMMASK;
3280af22
NIS
3217 PL_lex_state = LEX_INTERPEND;
3218 PL_bufptr = s;
cea2e8a9 3219 return yylex(); /* ignore fake brackets */
79072805 3220 }
fa83b5b6 3221 if (*s == '-' && s[1] == '>')
3280af22 3222 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3223 else if (*s != '[' && *s != '{')
3280af22 3224 PL_lex_state = LEX_INTERPEND;
79072805
LW
3225 }
3226 }
9059aa12
LW
3227 if (PL_expect & XFAKEBRACK) {
3228 PL_expect &= XENUMMASK;
3280af22 3229 PL_bufptr = s;
cea2e8a9 3230 return yylex(); /* ignore fake brackets */
748a9306 3231 }
79072805
LW
3232 force_next('}');
3233 TOKEN(';');
378cc40b
LW
3234 case '&':
3235 s++;
3236 tmp = *s++;
3237 if (tmp == '&')
a0d0e21e 3238 AOPERATOR(ANDAND);
378cc40b 3239 s--;
3280af22 3240 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3241 if (ckWARN(WARN_SEMICOLON)
3242 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3243 {
57843af0 3244 CopLINE_dec(PL_curcop);
cea2e8a9 3245 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3246 CopLINE_inc(PL_curcop);
463ee0b2 3247 }
79072805 3248 BAop(OP_BIT_AND);
463ee0b2 3249 }
79072805 3250
3280af22
NIS
3251 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3252 if (*PL_tokenbuf) {
3253 PL_expect = XOPERATOR;
3254 force_ident(PL_tokenbuf, '&');
463ee0b2 3255 }
79072805
LW
3256 else
3257 PREREF('&');
c07a80fd 3258 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3259 TERM('&');
3260
378cc40b
LW
3261 case '|':
3262 s++;
3263 tmp = *s++;
3264 if (tmp == '|')
a0d0e21e 3265 AOPERATOR(OROR);
378cc40b 3266 s--;
79072805 3267 BOop(OP_BIT_OR);
378cc40b
LW
3268 case '=':
3269 s++;
3270 tmp = *s++;
3271 if (tmp == '=')
79072805
LW
3272 Eop(OP_EQ);
3273 if (tmp == '>')
3274 OPERATOR(',');
378cc40b 3275 if (tmp == '~')
79072805 3276 PMop(OP_MATCH);
599cee73 3277 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3278 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3279 s--;
3280af22
NIS
3280 if (PL_expect == XSTATE && isALPHA(tmp) &&
3281 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3282 {
3280af22
NIS
3283 if (PL_in_eval && !PL_rsfp) {
3284 d = PL_bufend;
a5f75d66
AD
3285 while (s < d) {
3286 if (*s++ == '\n') {
3287 incline(s);
3288 if (strnEQ(s,"=cut",4)) {
3289 s = strchr(s,'\n');
3290 if (s)
3291 s++;
3292 else
3293 s = d;
3294 incline(s);
3295 goto retry;
3296 }
3297 }
3298 }
3299 goto retry;
3300 }
3280af22
NIS
3301 s = PL_bufend;
3302 PL_doextract = TRUE;
a0d0e21e
LW
3303 goto retry;
3304 }
3280af22 3305 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3306 char *t;
51882d45 3307#ifdef PERL_STRICT_CR
bf4acbe4 3308 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3309#else
bf4acbe4 3310 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3311#endif
a0d0e21e
LW
3312 if (*t == '\n' || *t == '#') {
3313 s--;
3280af22 3314 PL_expect = XBLOCK;
a0d0e21e
LW
3315 goto leftbracket;
3316 }
79072805 3317 }
a0d0e21e
LW
3318 yylval.ival = 0;
3319 OPERATOR(ASSIGNOP);
378cc40b
LW
3320 case '!':
3321 s++;
3322 tmp = *s++;
3323 if (tmp == '=')
79072805 3324 Eop(OP_NE);
378cc40b 3325 if (tmp == '~')
79072805 3326 PMop(OP_NOT);
378cc40b
LW
3327 s--;
3328 OPERATOR('!');
3329 case '<':
3280af22 3330 if (PL_expect != XOPERATOR) {
93a17b20 3331 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3332 check_uni();
79072805
LW
3333 if (s[1] == '<')
3334 s = scan_heredoc(s);
3335 else
3336 s = scan_inputsymbol(s);
3337 TERM(sublex_start());
378cc40b
LW
3338 }
3339 s++;
3340 tmp = *s++;
3341 if (tmp == '<')
79072805 3342 SHop(OP_LEFT_SHIFT);
395c3793
LW
3343 if (tmp == '=') {
3344 tmp = *s++;
3345 if (tmp == '>')
79072805 3346 Eop(OP_NCMP);
395c3793 3347 s--;
79072805 3348 Rop(OP_LE);
395c3793 3349 }
378cc40b 3350 s--;
79072805 3351 Rop(OP_LT);
378cc40b
LW
3352 case '>':
3353 s++;
3354 tmp = *s++;
3355 if (tmp == '>')
79072805 3356 SHop(OP_RIGHT_SHIFT);
378cc40b 3357 if (tmp == '=')
79072805 3358 Rop(OP_GE);
378cc40b 3359 s--;
79072805 3360 Rop(OP_GT);
378cc40b
LW
3361
3362 case '$':
bbce6d69
PP
3363 CLINE;
3364
3280af22
NIS
3365 if (PL_expect == XOPERATOR) {
3366 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3367 PL_expect = XTERM;
a0d0e21e 3368 depcom();
bbce6d69 3369 return ','; /* grandfather non-comma-format format */
a0d0e21e 3370 }
8990e307 3371 }
a0d0e21e 3372
7e2040f0 3373 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3374 PL_tokenbuf[0] = '@';
376b8730
SM
3375 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3376 sizeof PL_tokenbuf - 1, FALSE);
3377 if (PL_expect == XOPERATOR)
3378 no_op("Array length", s);
3280af22 3379 if (!PL_tokenbuf[1])
a0d0e21e 3380 PREREF(DOLSHARP);
3280af22
NIS
3381 PL_expect = XOPERATOR;
3382 PL_pending_ident = '#';
463ee0b2 3383 TOKEN(DOLSHARP);
79072805 3384 }
bbce6d69 3385
3280af22 3386 PL_tokenbuf[0] = '$';
376b8730
SM
3387 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3388 sizeof PL_tokenbuf - 1, FALSE);
3389 if (PL_expect == XOPERATOR)
3390 no_op("Scalar", s);
3280af22
NIS
3391 if (!PL_tokenbuf[1]) {
3392 if (s == PL_bufend)
bbce6d69
PP
3393 yyerror("Final $ should be \\$ or $name");
3394 PREREF('$');
8990e307 3395 }
a0d0e21e 3396
bbce6d69 3397 /* This kludge not intended to be bulletproof. */
3280af22 3398 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3399 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3400 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3401 yylval.opval->op_private = OPpCONST_ARYBASE;
3402 TERM(THING);
3403 }
3404
ff68c719 3405 d = s;
69d2bceb 3406 tmp = (I32)*s;
3280af22 3407 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3408 s = skipspace(s);
3409
3280af22 3410 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3411 char *t;
3412 if (*s == '[') {
3280af22 3413 PL_tokenbuf[0] = '@';
599cee73 3414 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3415 for(t = s + 1;
7e2040f0 3416 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3417 t++) ;
a0d0e21e 3418 if (*t++ == ',') {
3280af22
NIS
3419 PL_bufptr = skipspace(PL_bufptr);
3420 while (t < PL_bufend && *t != ']')
bbce6d69 3421 t++;
cea2e8a9 3422 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3423 "Multidimensional syntax %.*s not supported",
3424 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3425 }
3426 }
bbce6d69
PP
3427 }
3428 else if (*s == '{') {
3280af22 3429 PL_tokenbuf[0] = '%';
599cee73 3430 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3431 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3432 {
3280af22 3433 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3434 STRLEN len;
3435 for (t++; isSPACE(*t); t++) ;
7e2040f0 3436 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3437 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3438 for (; isSPACE(*t); t++) ;
864dbfa3 3439 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3440 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3441 "You need to quote \"%s\"", tmpbuf);
748a9306 3442 }
93a17b20
LW
3443 }
3444 }
2f3197b3 3445 }
bbce6d69 3446
3280af22 3447 PL_expect = XOPERATOR;
69d2bceb 3448 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3449 bool islop = (PL_last_lop == PL_oldoldbufptr);
3450 if (!islop || PL_last_lop_op == OP_GREPSTART)
3451 PL_expect = XOPERATOR;
bbce6d69 3452 else if (strchr("$@\"'`q", *s))
3280af22 3453 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3454 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3455 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3456 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3457 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3458 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3459 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3460 /* binary operators exclude handle interpretations */
3461 switch (tmp) {
3462 case -KEY_x:
3463 case -KEY_eq:
3464 case -KEY_ne:
3465 case -KEY_gt:
3466 case -KEY_lt:
3467 case -KEY_ge:
3468 case -KEY_le:
3469 case -KEY_cmp:
3470 break;
3471 default:
3280af22 3472 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3473 break;
3474 }
3475 }
68dc0745
PP
3476 else {
3477 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3478 if (gv && GvCVu(gv))
3280af22 3479 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3480 }
93a17b20 3481 }
bbce6d69 3482 else if (isDIGIT(*s))
3280af22 3483 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3484 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3485 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3486 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3487 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3488 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3489 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3490 }
3280af22 3491 PL_pending_ident = '$';
79072805 3492 TOKEN('$');
378cc40b
LW
3493
3494 case '@':
3280af22 3495 if (PL_expect == XOPERATOR)
bbce6d69 3496 no_op("Array", s);
3280af22
NIS
3497 PL_tokenbuf[0] = '@';
3498 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3499 if (!PL_tokenbuf[1]) {
3500 if (s == PL_bufend)
bbce6d69
PP
3501 yyerror("Final @ should be \\@ or @name");
3502 PREREF('@');
3503 }
3280af22 3504 if (PL_lex_state == LEX_NORMAL)
ff68c719 3505 s = skipspace(s);
3280af22 3506 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3507 if (*s == '{')
3280af22 3508 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3509
3510 /* Warn about @ where they meant $. */
599cee73 3511 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3512 if (*s == '[' || *s == '{') {
3513 char *t = s + 1;
7e2040f0 3514 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3515 t++;
3516 if (*t == '}' || *t == ']') {
3517 t++;
3280af22 3518 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3519 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3520 "Scalar value %.*s better written as $%.*s",
3280af22 3521 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3522 }
93a17b20
LW
3523 }
3524 }
463ee0b2 3525 }
3280af22 3526 PL_pending_ident = '@';
79072805 3527 TERM('@');
378cc40b
LW
3528
3529 case '/': /* may either be division or pattern */
3530 case '?': /* may either be conditional or pattern */
3280af22 3531 if (PL_expect != XOPERATOR) {
c277df42 3532 /* Disable warning on "study /blah/" */
3280af22
NIS
3533 if (PL_oldoldbufptr == PL_last_uni
3534 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3535 || memNE(PL_last_uni, "study", 5)
3536 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3537 check_uni();
8782bef2 3538 s = scan_pat(s,OP_MATCH);
79072805 3539 TERM(sublex_start());
378cc40b
LW
3540 }
3541 tmp = *s++;
a687059c 3542 if (tmp == '/')
79072805 3543 Mop(OP_DIVIDE);
378cc40b
LW
3544 OPERATOR(tmp);
3545
3546 case '.':
51882d45
GS
3547 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3548#ifdef PERL_STRICT_CR
3549 && s[1] == '\n'
3550#else
3551 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3552#endif
3553 && (s == PL_linestart || s[-1] == '\n') )
3554 {
3280af22
NIS
3555 PL_lex_formbrack = 0;
3556 PL_expect = XSTATE;
79072805
LW
3557 goto rightbracket;
3558 }
3280af22 3559 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3560 tmp = *s++;
a687059c
LW
3561 if (*s == tmp) {
3562 s++;
2f3197b3
LW
3563 if (*s == tmp) {
3564 s++;
79072805 3565 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3566 }
3567 else
79072805 3568 yylval.ival = 0;
378cc40b 3569 OPERATOR(DOTDOT);
a687059c 3570 }
3280af22 3571 if (PL_expect != XOPERATOR)
2f3197b3 3572 check_uni();
79072805 3573 Aop(OP_CONCAT);
378cc40b
LW
3574 }
3575 /* FALL THROUGH */
3576 case '0': case '1': case '2': case '3': case '4':
3577 case '5': case '6': case '7': case '8': case '9':
b73d6f50 3578 s = scan_num(s, &yylval);
3280af22 3579 if (PL_expect == XOPERATOR)
8990e307 3580 no_op("Number",s);
79072