This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re├źntrancy fix.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
2b92dfce
GS
42/* In variables name $^X, these are the legal values for X.
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
a790bc05
PP
72/* XXX If this causes problems, set i_unistd=undef in the hint file. */
73#ifdef I_UNISTD
74# include <unistd.h> /* Needed for execv() */
75#endif
76
77
79072805
LW
78#ifdef ff_next
79#undef ff_next
d48672a2
LW
80#endif
81
a1a0e61e 82#ifdef USE_PURE_BISON
20141f0e
RI
83#ifndef YYMAXLEVEL
84#define YYMAXLEVEL 100
85#endif
86YYSTYPE* yylval_pointer[YYMAXLEVEL];
87int* yychar_pointer[YYMAXLEVEL];
88int yyactlevel = 0;
22c35a8c
GS
89# undef yylval
90# undef yychar
20141f0e
RI
91# define yylval (*yylval_pointer[yyactlevel])
92# define yychar (*yychar_pointer[yyactlevel])
93# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
94# undef yylex
95# define yylex() Perl_yylex(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
96#endif
97
79072805 98#include "keywords.h"
fe14fcc3 99
ffb4593c
NT
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
ae986130
LW
102#ifdef CLINE
103#undef CLINE
104#endif
57843af0 105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 106
ffb4593c
NT
107/*
108 * Convenience functions to return different tokens and prime the
9cbb5ea2 109 * lexer for the next token. They all take an argument.
ffb4593c
NT
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
2d2e263d 121 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
9cbb5ea2 126 * PMop : pattern-matching operator
ffb4593c
NT
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
130 * Rop : relational operator <= != gt
131 *
132 * Also see LOP and lop() below.
133 */
134
3280af22
NIS
135#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
136#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
137#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
138#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
139#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
140#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
141#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
142#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
143#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
144#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
145#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
146#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
147#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
148#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
149#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
150#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
151#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
152#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
153#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
154#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 155
a687059c
LW
156/* This bit of chicanery makes a unary function followed by
157 * a parenthesis into a function with one argument, highest precedence.
158 */
2f3197b3 159#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
160 PL_expect = XTERM, \
161 PL_bufptr = s, \
162 PL_last_uni = PL_oldbufptr, \
163 PL_last_lop_op = f, \
a687059c
LW
164 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
165
79072805 166#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
167 PL_bufptr = s, \
168 PL_last_uni = PL_oldbufptr, \
79072805
LW
169 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
170
9f68db38 171/* grandfather return to old style */
3280af22 172#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 173
ffb4593c
NT
174/*
175 * S_ao
176 *
177 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
178 * into an OP_ANDASSIGN or OP_ORASSIGN
179 */
180
76e3520e 181STATIC int
cea2e8a9 182S_ao(pTHX_ int toketype)
a0d0e21e 183{
3280af22
NIS
184 if (*PL_bufptr == '=') {
185 PL_bufptr++;
a0d0e21e
LW
186 if (toketype == ANDAND)
187 yylval.ival = OP_ANDASSIGN;
188 else if (toketype == OROR)
189 yylval.ival = OP_ORASSIGN;
190 toketype = ASSIGNOP;
191 }
192 return toketype;
193}
194
ffb4593c
NT
195/*
196 * S_no_op
197 * When Perl expects an operator and finds something else, no_op
198 * prints the warning. It always prints "<something> found where
199 * operator expected. It prints "Missing semicolon on previous line?"
200 * if the surprise occurs at the start of the line. "do you need to
201 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
202 * where the compiler doesn't know if foo is a method call or a function.
203 * It prints "Missing operator before end of line" if there's nothing
204 * after the missing operator, or "... before <...>" if there is something
205 * after the missing operator.
206 */
207
76e3520e 208STATIC void
cea2e8a9 209S_no_op(pTHX_ char *what, char *s)
463ee0b2 210{
3280af22
NIS
211 char *oldbp = PL_bufptr;
212 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 213
1189a94a
GS
214 if (!s)
215 s = oldbp;
07c798fb 216 else
1189a94a 217 PL_bufptr = s;
cea2e8a9 218 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 219 if (is_first)
cea2e8a9 220 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 221 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 222 char *t;
7e2040f0 223 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 224 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 225 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 226 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 227 }
07c798fb
HS
228 else {
229 assert(s >= oldbp);
cea2e8a9 230 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 231 }
3280af22 232 PL_bufptr = oldbp;
8990e307
LW
233}
234
ffb4593c
NT
235/*
236 * S_missingterm
237 * Complain about missing quote/regexp/heredoc terminator.
238 * If it's called with (char *)NULL then it cauterizes the line buffer.
239 * If we're in a delimited string and the delimiter is a control
240 * character, it's reformatted into a two-char sequence like ^C.
241 * This is fatal.
242 */
243
76e3520e 244STATIC void
cea2e8a9 245S_missingterm(pTHX_ char *s)
8990e307
LW
246{
247 char tmpbuf[3];
248 char q;
249 if (s) {
250 char *nl = strrchr(s,'\n');
d2719217 251 if (nl)
8990e307
LW
252 *nl = '\0';
253 }
9d116dd7
JH
254 else if (
255#ifdef EBCDIC
256 iscntrl(PL_multi_close)
257#else
258 PL_multi_close < 32 || PL_multi_close == 127
259#endif
260 ) {
8990e307 261 *tmpbuf = '^';
3280af22 262 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
263 s = "\\n";
264 tmpbuf[2] = '\0';
265 s = tmpbuf;
266 }
267 else {
3280af22 268 *tmpbuf = PL_multi_close;
8990e307
LW
269 tmpbuf[1] = '\0';
270 s = tmpbuf;
271 }
272 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 273 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 274}
79072805 275
ffb4593c
NT
276/*
277 * Perl_deprecate
ffb4593c
NT
278 */
279
79072805 280void
864dbfa3 281Perl_deprecate(pTHX_ char *s)
a0d0e21e 282{
d008e5eb 283 dTHR;
599cee73 284 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 285 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
286}
287
ffb4593c
NT
288/*
289 * depcom
9cbb5ea2 290 * Deprecate a comma-less variable list.
ffb4593c
NT
291 */
292
76e3520e 293STATIC void
cea2e8a9 294S_depcom(pTHX)
a0d0e21e
LW
295{
296 deprecate("comma-less variable list");
297}
298
ffb4593c 299/*
9cbb5ea2
GS
300 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
301 * utf16-to-utf8-reversed.
ffb4593c
NT
302 */
303
c39cd008
GS
304#ifdef PERL_CR_FILTER
305static void
306strip_return(SV *sv)
307{
308 register char *s = SvPVX(sv);
309 register char *e = s + SvCUR(sv);
310 /* outer loop optimized to do nothing if there are no CR-LFs */
311 while (s < e) {
312 if (*s++ == '\r' && *s == '\n') {
313 /* hit a CR-LF, need to copy the rest */
314 register char *d = s - 1;
315 *d++ = *s++;
316 while (s < e) {
317 if (*s == '\r' && s[1] == '\n')
318 s++;
319 *d++ = *s++;
320 }
321 SvCUR(sv) -= s - d;
322 return;
323 }
324 }
325}
a868473f 326
76e3520e 327STATIC I32
c39cd008 328S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 329{
c39cd008
GS
330 I32 count = FILTER_READ(idx+1, sv, maxlen);
331 if (count > 0 && !maxlen)
332 strip_return(sv);
333 return count;
a868473f
NIS
334}
335#endif
336
ffb4593c
NT
337/*
338 * Perl_lex_start
9cbb5ea2
GS
339 * Initialize variables. Uses the Perl save_stack to save its state (for
340 * recursive calls to the parser).
ffb4593c
NT
341 */
342
a0d0e21e 343void
864dbfa3 344Perl_lex_start(pTHX_ SV *line)
79072805 345{
0f15f207 346 dTHR;
8990e307
LW
347 char *s;
348 STRLEN len;
349
3280af22
NIS
350 SAVEI32(PL_lex_dojoin);
351 SAVEI32(PL_lex_brackets);
3280af22
NIS
352 SAVEI32(PL_lex_casemods);
353 SAVEI32(PL_lex_starts);
354 SAVEI32(PL_lex_state);
7766f137 355 SAVEVPTR(PL_lex_inpat);
3280af22 356 SAVEI32(PL_lex_inwhat);
18b09519
GS
357 if (PL_lex_state == LEX_KNOWNEXT) {
358 I32 toke = PL_nexttoke;
359 while (--toke >= 0) {
360 SAVEI32(PL_nexttype[toke]);
361 SAVEVPTR(PL_nextval[toke]);
362 }
363 SAVEI32(PL_nexttoke);
18b09519 364 }
57843af0 365 SAVECOPLINE(PL_curcop);
3280af22
NIS
366 SAVEPPTR(PL_bufptr);
367 SAVEPPTR(PL_bufend);
368 SAVEPPTR(PL_oldbufptr);
369 SAVEPPTR(PL_oldoldbufptr);
370 SAVEPPTR(PL_linestart);
371 SAVESPTR(PL_linestr);
372 SAVEPPTR(PL_lex_brackstack);
373 SAVEPPTR(PL_lex_casestack);
c76ac1ee 374 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
375 SAVESPTR(PL_lex_stuff);
376 SAVEI32(PL_lex_defer);
09bef843 377 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 378 SAVESPTR(PL_lex_repl);
bebdddfc
GS
379 SAVEINT(PL_expect);
380 SAVEINT(PL_lex_expect);
3280af22
NIS
381
382 PL_lex_state = LEX_NORMAL;
383 PL_lex_defer = 0;
384 PL_expect = XSTATE;
385 PL_lex_brackets = 0;
3280af22
NIS
386 New(899, PL_lex_brackstack, 120, char);
387 New(899, PL_lex_casestack, 12, char);
388 SAVEFREEPV(PL_lex_brackstack);
389 SAVEFREEPV(PL_lex_casestack);
390 PL_lex_casemods = 0;
391 *PL_lex_casestack = '\0';
392 PL_lex_dojoin = 0;
393 PL_lex_starts = 0;
394 PL_lex_stuff = Nullsv;
395 PL_lex_repl = Nullsv;
396 PL_lex_inpat = 0;
76be56bc 397 PL_nexttoke = 0;
3280af22 398 PL_lex_inwhat = 0;
09bef843 399 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
400 PL_linestr = line;
401 if (SvREADONLY(PL_linestr))
402 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
403 s = SvPV(PL_linestr, len);
8990e307 404 if (len && s[len-1] != ';') {
3280af22
NIS
405 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
406 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
407 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 408 }
3280af22
NIS
409 SvTEMP_off(PL_linestr);
410 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
411 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
412 SvREFCNT_dec(PL_rs);
79cb57f6 413 PL_rs = newSVpvn("\n", 1);
3280af22 414 PL_rsfp = 0;
79072805 415}
a687059c 416
ffb4593c
NT
417/*
418 * Perl_lex_end
9cbb5ea2
GS
419 * Finalizer for lexing operations. Must be called when the parser is
420 * done with the lexer.
ffb4593c
NT
421 */
422
463ee0b2 423void
864dbfa3 424Perl_lex_end(pTHX)
463ee0b2 425{
3280af22 426 PL_doextract = FALSE;
463ee0b2
LW
427}
428
ffb4593c
NT
429/*
430 * S_incline
431 * This subroutine has nothing to do with tilting, whether at windmills
432 * or pinball tables. Its name is short for "increment line". It
57843af0 433 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 434 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
435 * # line 500 "foo.pm"
436 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
437 */
438
76e3520e 439STATIC void
cea2e8a9 440S_incline(pTHX_ char *s)
463ee0b2 441{
0f15f207 442 dTHR;
463ee0b2
LW
443 char *t;
444 char *n;
73659bf1 445 char *e;
463ee0b2 446 char ch;
463ee0b2 447
57843af0 448 CopLINE_inc(PL_curcop);
463ee0b2
LW
449 if (*s++ != '#')
450 return;
bf4acbe4 451 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
452 if (strnEQ(s, "line", 4))
453 s += 4;
454 else
455 return;
456 if (*s == ' ' || *s == '\t')
457 s++;
458 else
459 return;
bf4acbe4 460 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
461 if (!isDIGIT(*s))
462 return;
463 n = s;
464 while (isDIGIT(*s))
465 s++;
bf4acbe4 466 while (SPACE_OR_TAB(*s))
463ee0b2 467 s++;
73659bf1 468 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 469 s++;
73659bf1
GS
470 e = t + 1;
471 }
463ee0b2 472 else {
463ee0b2 473 for (t = s; !isSPACE(*t); t++) ;
73659bf1 474 e = t;
463ee0b2 475 }
bf4acbe4 476 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
477 e++;
478 if (*e != '\n' && *e != '\0')
479 return; /* false alarm */
480
463ee0b2
LW
481 ch = *t;
482 *t = '\0';
f4dd75d9
GS
483 if (t - s > 0) {
484#ifdef USE_ITHREADS
485 Safefree(CopFILE(PL_curcop));
486#else
487 SvREFCNT_dec(CopFILEGV(PL_curcop));
488#endif
57843af0 489 CopFILE_set(PL_curcop, s);
f4dd75d9 490 }
463ee0b2 491 *t = ch;
57843af0 492 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
493}
494
ffb4593c
NT
495/*
496 * S_skipspace
497 * Called to gobble the appropriate amount and type of whitespace.
498 * Skips comments as well.
499 */
500
76e3520e 501STATIC char *
cea2e8a9 502S_skipspace(pTHX_ register char *s)
a687059c 503{
11343788 504 dTHR;
3280af22 505 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 506 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
507 s++;
508 return s;
509 }
510 for (;;) {
fd049845 511 STRLEN prevlen;
09bef843
SB
512 SSize_t oldprevlen, oldoldprevlen;
513 SSize_t oldloplen, oldunilen;
60e6418e
GS
514 while (s < PL_bufend && isSPACE(*s)) {
515 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
516 incline(s);
517 }
ffb4593c
NT
518
519 /* comment */
3280af22
NIS
520 if (s < PL_bufend && *s == '#') {
521 while (s < PL_bufend && *s != '\n')
463ee0b2 522 s++;
60e6418e 523 if (s < PL_bufend) {
463ee0b2 524 s++;
60e6418e
GS
525 if (PL_in_eval && !PL_rsfp) {
526 incline(s);
527 continue;
528 }
529 }
463ee0b2 530 }
ffb4593c
NT
531
532 /* only continue to recharge the buffer if we're at the end
533 * of the buffer, we're not reading from a source filter, and
534 * we're in normal lexing mode
535 */
09bef843
SB
536 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
537 PL_lex_state == LEX_FORMLINE)
463ee0b2 538 return s;
ffb4593c
NT
539
540 /* try to recharge the buffer */
9cbb5ea2
GS
541 if ((s = filter_gets(PL_linestr, PL_rsfp,
542 (prevlen = SvCUR(PL_linestr)))) == Nullch)
543 {
544 /* end of file. Add on the -p or -n magic */
3280af22
NIS
545 if (PL_minus_n || PL_minus_p) {
546 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
547 ";}continue{print or die qq(-p destination: $!\\n)" :
548 "");
3280af22
NIS
549 sv_catpv(PL_linestr,";}");
550 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
551 }
552 else
3280af22 553 sv_setpv(PL_linestr,";");
ffb4593c
NT
554
555 /* reset variables for next time we lex */
9cbb5ea2
GS
556 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
557 = SvPVX(PL_linestr);
3280af22 558 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
559
560 /* Close the filehandle. Could be from -P preprocessor,
561 * STDIN, or a regular file. If we were reading code from
562 * STDIN (because the commandline held no -e or filename)
563 * then we don't close it, we reset it so the code can
564 * read from STDIN too.
565 */
566
3280af22
NIS
567 if (PL_preprocess && !PL_in_eval)
568 (void)PerlProc_pclose(PL_rsfp);
569 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
570 PerlIO_clearerr(PL_rsfp);
8990e307 571 else
3280af22
NIS
572 (void)PerlIO_close(PL_rsfp);
573 PL_rsfp = Nullfp;
463ee0b2
LW
574 return s;
575 }
ffb4593c
NT
576
577 /* not at end of file, so we only read another line */
09bef843
SB
578 /* make corresponding updates to old pointers, for yyerror() */
579 oldprevlen = PL_oldbufptr - PL_bufend;
580 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
581 if (PL_last_uni)
582 oldunilen = PL_last_uni - PL_bufend;
583 if (PL_last_lop)
584 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
585 PL_linestart = PL_bufptr = s + prevlen;
586 PL_bufend = s + SvCUR(PL_linestr);
587 s = PL_bufptr;
09bef843
SB
588 PL_oldbufptr = s + oldprevlen;
589 PL_oldoldbufptr = s + oldoldprevlen;
590 if (PL_last_uni)
591 PL_last_uni = s + oldunilen;
592 if (PL_last_lop)
593 PL_last_lop = s + oldloplen;
a0d0e21e 594 incline(s);
ffb4593c
NT
595
596 /* debugger active and we're not compiling the debugger code,
597 * so store the line into the debugger's array of lines
598 */
3280af22 599 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
600 SV *sv = NEWSV(85,0);
601
602 sv_upgrade(sv, SVt_PVMG);
3280af22 603 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 604 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 605 }
463ee0b2 606 }
a687059c 607}
378cc40b 608
ffb4593c
NT
609/*
610 * S_check_uni
611 * Check the unary operators to ensure there's no ambiguity in how they're
612 * used. An ambiguous piece of code would be:
613 * rand + 5
614 * This doesn't mean rand() + 5. Because rand() is a unary operator,
615 * the +5 is its argument.
616 */
617
76e3520e 618STATIC void
cea2e8a9 619S_check_uni(pTHX)
ba106d47 620{
2f3197b3 621 char *s;
a0d0e21e 622 char *t;
0453d815 623 dTHR;
2f3197b3 624
3280af22 625 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 626 return;
3280af22
NIS
627 while (isSPACE(*PL_last_uni))
628 PL_last_uni++;
7e2040f0 629 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 630 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 631 return;
0453d815 632 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 633 char ch = *s;
0453d815
PM
634 *s = '\0';
635 Perl_warner(aTHX_ WARN_AMBIGUOUS,
636 "Warning: Use of \"%s\" without parens is ambiguous",
637 PL_last_uni);
638 *s = ch;
639 }
2f3197b3
LW
640}
641
ffb4593c
NT
642/* workaround to replace the UNI() macro with a function. Only the
643 * hints/uts.sh file mentions this. Other comments elsewhere in the
644 * source indicate Microport Unix might need it too.
645 */
646
ffed7fef
LW
647#ifdef CRIPPLED_CC
648
649#undef UNI
ffed7fef 650#define UNI(f) return uni(f,s)
ffed7fef 651
76e3520e 652STATIC int
cea2e8a9 653S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
654{
655 yylval.ival = f;
3280af22
NIS
656 PL_expect = XTERM;
657 PL_bufptr = s;
8f872242
NIS
658 PL_last_uni = PL_oldbufptr;
659 PL_last_lop_op = f;
ffed7fef
LW
660 if (*s == '(')
661 return FUNC1;
662 s = skipspace(s);
663 if (*s == '(')
664 return FUNC1;
665 else
666 return UNIOP;
667}
668
a0d0e21e
LW
669#endif /* CRIPPLED_CC */
670
ffb4593c
NT
671/*
672 * LOP : macro to build a list operator. Its behaviour has been replaced
673 * with a subroutine, S_lop() for which LOP is just another name.
674 */
675
a0d0e21e
LW
676#define LOP(f,x) return lop(f,x,s)
677
ffb4593c
NT
678/*
679 * S_lop
680 * Build a list operator (or something that might be one). The rules:
681 * - if we have a next token, then it's a list operator [why?]
682 * - if the next thing is an opening paren, then it's a function
683 * - else it's a list operator
684 */
685
76e3520e 686STATIC I32
a0be28da 687S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 688{
0f15f207 689 dTHR;
79072805 690 yylval.ival = f;
35c8bce7 691 CLINE;
3280af22
NIS
692 PL_expect = x;
693 PL_bufptr = s;
694 PL_last_lop = PL_oldbufptr;
695 PL_last_lop_op = f;
696 if (PL_nexttoke)
a0d0e21e 697 return LSTOP;
79072805
LW
698 if (*s == '(')
699 return FUNC;
700 s = skipspace(s);
701 if (*s == '(')
702 return FUNC;
703 else
704 return LSTOP;
705}
706
ffb4593c
NT
707/*
708 * S_force_next
9cbb5ea2 709 * When the lexer realizes it knows the next token (for instance,
ffb4593c 710 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
711 * to know what token to return the next time the lexer is called. Caller
712 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
713 * handles the token correctly.
ffb4593c
NT
714 */
715
76e3520e 716STATIC void
cea2e8a9 717S_force_next(pTHX_ I32 type)
79072805 718{
3280af22
NIS
719 PL_nexttype[PL_nexttoke] = type;
720 PL_nexttoke++;
721 if (PL_lex_state != LEX_KNOWNEXT) {
722 PL_lex_defer = PL_lex_state;
723 PL_lex_expect = PL_expect;
724 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
725 }
726}
727
ffb4593c
NT
728/*
729 * S_force_word
730 * When the lexer knows the next thing is a word (for instance, it has
731 * just seen -> and it knows that the next char is a word char, then
732 * it calls S_force_word to stick the next word into the PL_next lookahead.
733 *
734 * Arguments:
b1b65b59 735 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
736 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
737 * int check_keyword : if true, Perl checks to make sure the word isn't
738 * a keyword (do this if the word is a label, e.g. goto FOO)
739 * int allow_pack : if true, : characters will also be allowed (require,
740 * use, etc. do this)
9cbb5ea2 741 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
742 */
743
76e3520e 744STATIC char *
cea2e8a9 745S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 746{
463ee0b2
LW
747 register char *s;
748 STRLEN len;
749
750 start = skipspace(start);
751 s = start;
7e2040f0 752 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 753 (allow_pack && *s == ':') ||
15f0808c 754 (allow_initial_tick && *s == '\'') )
a0d0e21e 755 {
3280af22
NIS
756 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
757 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
758 return start;
759 if (token == METHOD) {
760 s = skipspace(s);
761 if (*s == '(')
3280af22 762 PL_expect = XTERM;
463ee0b2 763 else {
3280af22 764 PL_expect = XOPERATOR;
463ee0b2 765 }
79072805 766 }
3280af22
NIS
767 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
768 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
769 force_next(token);
770 }
771 return s;
772}
773
ffb4593c
NT
774/*
775 * S_force_ident
9cbb5ea2 776 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
777 * text only contains the "foo" portion. The first argument is a pointer
778 * to the "foo", and the second argument is the type symbol to prefix.
779 * Forces the next token to be a "WORD".
9cbb5ea2 780 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
781 */
782
76e3520e 783STATIC void
cea2e8a9 784S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
785{
786 if (s && *s) {
11343788 787 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 788 PL_nextval[PL_nexttoke].opval = o;
79072805 789 force_next(WORD);
748a9306 790 if (kind) {
e858de61 791 dTHR; /* just for in_eval */
11343788 792 o->op_private = OPpCONST_ENTERED;
55497cff
PP
793 /* XXX see note in pp_entereval() for why we forgo typo
794 warnings if the symbol must be introduced in an eval.
795 GSAR 96-10-12 */
3280af22 796 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
797 kind == '$' ? SVt_PV :
798 kind == '@' ? SVt_PVAV :
799 kind == '%' ? SVt_PVHV :
800 SVt_PVGV
801 );
748a9306 802 }
79072805
LW
803 }
804}
805
1571675a
GS
806NV
807Perl_str_to_version(pTHX_ SV *sv)
808{
809 NV retval = 0.0;
810 NV nshift = 1.0;
811 STRLEN len;
812 char *start = SvPVx(sv,len);
3aa33fe5 813 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
814 char *end = start + len;
815 while (start < end) {
816 I32 skip;
817 UV n;
818 if (utf)
8e84507e 819 n = utf8_to_uv_chk((U8*)start, &skip, 0);
1571675a
GS
820 else {
821 n = *(U8*)start;
822 skip = 1;
823 }
824 retval += ((NV)n)/nshift;
825 start += skip;
826 nshift *= 1000;
827 }
828 return retval;
829}
830
ffb4593c
NT
831/*
832 * S_force_version
833 * Forces the next token to be a version number.
834 */
835
76e3520e 836STATIC char *
cea2e8a9 837S_force_version(pTHX_ char *s)
89bfa8cd
PP
838{
839 OP *version = Nullop;
44dcb63b 840 char *d;
89bfa8cd
PP
841
842 s = skipspace(s);
843
44dcb63b 844 d = s;
dd629d5b 845 if (*d == 'v')
44dcb63b 846 d++;
44dcb63b 847 if (isDIGIT(*d)) {
a7cb1f99 848 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 849 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 850 SV *ver;
89bfa8cd 851 s = scan_num(s);
89bfa8cd 852 version = yylval.opval;
dd629d5b
GS
853 ver = cSVOPx(version)->op_sv;
854 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 855 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
856 SvNVX(ver) = str_to_version(ver);
857 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 858 }
89bfa8cd
PP
859 }
860 }
861
862 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 863 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd
PP
864 force_next(WORD);
865
866 return (s);
867}
868
ffb4593c
NT
869/*
870 * S_tokeq
871 * Tokenize a quoted string passed in as an SV. It finds the next
872 * chunk, up to end of string or a backslash. It may make a new
873 * SV containing that chunk (if HINT_NEW_STRING is on). It also
874 * turns \\ into \.
875 */
876
76e3520e 877STATIC SV *
cea2e8a9 878S_tokeq(pTHX_ SV *sv)
79072805
LW
879{
880 register char *s;
881 register char *send;
882 register char *d;
b3ac6de7
IZ
883 STRLEN len = 0;
884 SV *pv = sv;
79072805
LW
885
886 if (!SvLEN(sv))
b3ac6de7 887 goto finish;
79072805 888
a0d0e21e 889 s = SvPV_force(sv, len);
21a311ee 890 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 891 goto finish;
463ee0b2 892 send = s + len;
79072805
LW
893 while (s < send && *s != '\\')
894 s++;
895 if (s == send)
b3ac6de7 896 goto finish;
79072805 897 d = s;
3280af22 898 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 899 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
900 while (s < send) {
901 if (*s == '\\') {
a0d0e21e 902 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
903 s++; /* all that, just for this */
904 }
905 *d++ = *s++;
906 }
907 *d = '\0';
463ee0b2 908 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 909 finish:
3280af22 910 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 911 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
912 return sv;
913}
914
ffb4593c
NT
915/*
916 * Now come three functions related to double-quote context,
917 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
918 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
919 * interact with PL_lex_state, and create fake ( ... ) argument lists
920 * to handle functions and concatenation.
921 * They assume that whoever calls them will be setting up a fake
922 * join call, because each subthing puts a ',' after it. This lets
923 * "lower \luPpEr"
924 * become
925 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
926 *
927 * (I'm not sure whether the spurious commas at the end of lcfirst's
928 * arguments and join's arguments are created or not).
929 */
930
931/*
932 * S_sublex_start
933 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
934 *
935 * Pattern matching will set PL_lex_op to the pattern-matching op to
936 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
937 *
938 * OP_CONST and OP_READLINE are easy--just make the new op and return.
939 *
940 * Everything else becomes a FUNC.
941 *
942 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
943 * had an OP_CONST or OP_READLINE). This just sets us up for a
944 * call to S_sublex_push().
945 */
946
76e3520e 947STATIC I32
cea2e8a9 948S_sublex_start(pTHX)
79072805
LW
949{
950 register I32 op_type = yylval.ival;
79072805
LW
951
952 if (op_type == OP_NULL) {
3280af22
NIS
953 yylval.opval = PL_lex_op;
954 PL_lex_op = Nullop;
79072805
LW
955 return THING;
956 }
957 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 958 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
959
960 if (SvTYPE(sv) == SVt_PVIV) {
961 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
962 STRLEN len;
963 char *p;
964 SV *nsv;
965
966 p = SvPV(sv, len);
79cb57f6 967 nsv = newSVpvn(p, len);
01ec43d0
GS
968 if (SvUTF8(sv))
969 SvUTF8_on(nsv);
b3ac6de7
IZ
970 SvREFCNT_dec(sv);
971 sv = nsv;
972 }
973 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 974 PL_lex_stuff = Nullsv;
79072805
LW
975 return THING;
976 }
977
3280af22
NIS
978 PL_sublex_info.super_state = PL_lex_state;
979 PL_sublex_info.sub_inwhat = op_type;
980 PL_sublex_info.sub_op = PL_lex_op;
981 PL_lex_state = LEX_INTERPPUSH;
55497cff 982
3280af22
NIS
983 PL_expect = XTERM;
984 if (PL_lex_op) {
985 yylval.opval = PL_lex_op;
986 PL_lex_op = Nullop;
55497cff
PP
987 return PMFUNC;
988 }
989 else
990 return FUNC;
991}
992
ffb4593c
NT
993/*
994 * S_sublex_push
995 * Create a new scope to save the lexing state. The scope will be
996 * ended in S_sublex_done. Returns a '(', starting the function arguments
997 * to the uc, lc, etc. found before.
998 * Sets PL_lex_state to LEX_INTERPCONCAT.
999 */
1000
76e3520e 1001STATIC I32
cea2e8a9 1002S_sublex_push(pTHX)
55497cff 1003{
0f15f207 1004 dTHR;
f46d017c 1005 ENTER;
55497cff 1006
3280af22
NIS
1007 PL_lex_state = PL_sublex_info.super_state;
1008 SAVEI32(PL_lex_dojoin);
1009 SAVEI32(PL_lex_brackets);
3280af22
NIS
1010 SAVEI32(PL_lex_casemods);
1011 SAVEI32(PL_lex_starts);
1012 SAVEI32(PL_lex_state);
7766f137 1013 SAVEVPTR(PL_lex_inpat);
3280af22 1014 SAVEI32(PL_lex_inwhat);
57843af0 1015 SAVECOPLINE(PL_curcop);
3280af22
NIS
1016 SAVEPPTR(PL_bufptr);
1017 SAVEPPTR(PL_oldbufptr);
1018 SAVEPPTR(PL_oldoldbufptr);
1019 SAVEPPTR(PL_linestart);
1020 SAVESPTR(PL_linestr);
1021 SAVEPPTR(PL_lex_brackstack);
1022 SAVEPPTR(PL_lex_casestack);
1023
1024 PL_linestr = PL_lex_stuff;
1025 PL_lex_stuff = Nullsv;
1026
9cbb5ea2
GS
1027 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1028 = SvPVX(PL_linestr);
3280af22
NIS
1029 PL_bufend += SvCUR(PL_linestr);
1030 SAVEFREESV(PL_linestr);
1031
1032 PL_lex_dojoin = FALSE;
1033 PL_lex_brackets = 0;
3280af22
NIS
1034 New(899, PL_lex_brackstack, 120, char);
1035 New(899, PL_lex_casestack, 12, char);
1036 SAVEFREEPV(PL_lex_brackstack);
1037 SAVEFREEPV(PL_lex_casestack);
1038 PL_lex_casemods = 0;
1039 *PL_lex_casestack = '\0';
1040 PL_lex_starts = 0;
1041 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1042 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1043
1044 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1045 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1046 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1047 else
3280af22 1048 PL_lex_inpat = Nullop;
79072805 1049
55497cff 1050 return '(';
79072805
LW
1051}
1052
ffb4593c
NT
1053/*
1054 * S_sublex_done
1055 * Restores lexer state after a S_sublex_push.
1056 */
1057
76e3520e 1058STATIC I32
cea2e8a9 1059S_sublex_done(pTHX)
79072805 1060{
3280af22
NIS
1061 if (!PL_lex_starts++) {
1062 PL_expect = XOPERATOR;
79cb57f6 1063 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1064 return THING;
1065 }
1066
3280af22
NIS
1067 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1068 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1069 return yylex();
79072805
LW
1070 }
1071
ffb4593c 1072 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1073 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1074 PL_linestr = PL_lex_repl;
1075 PL_lex_inpat = 0;
1076 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1077 PL_bufend += SvCUR(PL_linestr);
1078 SAVEFREESV(PL_linestr);
1079 PL_lex_dojoin = FALSE;
1080 PL_lex_brackets = 0;
3280af22
NIS
1081 PL_lex_casemods = 0;
1082 *PL_lex_casestack = '\0';
1083 PL_lex_starts = 0;
25da4f38 1084 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1085 PL_lex_state = LEX_INTERPNORMAL;
1086 PL_lex_starts++;
e9fa98b2
HS
1087 /* we don't clear PL_lex_repl here, so that we can check later
1088 whether this is an evalled subst; that means we rely on the
1089 logic to ensure sublex_done() is called again only via the
1090 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1091 }
e9fa98b2 1092 else {
3280af22 1093 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1094 PL_lex_repl = Nullsv;
1095 }
79072805 1096 return ',';
ffed7fef
LW
1097 }
1098 else {
f46d017c 1099 LEAVE;
3280af22
NIS
1100 PL_bufend = SvPVX(PL_linestr);
1101 PL_bufend += SvCUR(PL_linestr);
1102 PL_expect = XOPERATOR;
09bef843 1103 PL_sublex_info.sub_inwhat = 0;
79072805 1104 return ')';
ffed7fef
LW
1105 }
1106}
1107
02aa26ce
NT
1108/*
1109 scan_const
1110
1111 Extracts a pattern, double-quoted string, or transliteration. This
1112 is terrifying code.
1113
3280af22
NIS
1114 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1115 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1116 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1117
9b599b2a
GS
1118 Returns a pointer to the character scanned up to. Iff this is
1119 advanced from the start pointer supplied (ie if anything was
1120 successfully parsed), will leave an OP for the substring scanned
1121 in yylval. Caller must intuit reason for not parsing further
1122 by looking at the next characters herself.
1123
02aa26ce
NT
1124 In patterns:
1125 backslashes:
1126 double-quoted style: \r and \n
1127 regexp special ones: \D \s
1128 constants: \x3
1129 backrefs: \1 (deprecated in substitution replacements)
1130 case and quoting: \U \Q \E
1131 stops on @ and $, but not for $ as tail anchor
1132
1133 In transliterations:
1134 characters are VERY literal, except for - not at the start or end
1135 of the string, which indicates a range. scan_const expands the
1136 range to the full set of intermediate characters.
1137
1138 In double-quoted strings:
1139 backslashes:
1140 double-quoted style: \r and \n
1141 constants: \x3
1142 backrefs: \1 (deprecated)
1143 case and quoting: \U \Q \E
1144 stops on @ and $
1145
1146 scan_const does *not* construct ops to handle interpolated strings.
1147 It stops processing as soon as it finds an embedded $ or @ variable
1148 and leaves it to the caller to work out what's going on.
1149
1150 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1151
1152 $ in pattern could be $foo or could be tail anchor. Assumption:
1153 it's a tail anchor if $ is the last thing in the string, or if it's
1154 followed by one of ")| \n\t"
1155
1156 \1 (backreferences) are turned into $1
1157
1158 The structure of the code is
1159 while (there's a character to process) {
1160 handle transliteration ranges
1161 skip regexp comments
1162 skip # initiated comments in //x patterns
1163 check for embedded @foo
1164 check for embedded scalars
1165 if (backslash) {
1166 leave intact backslashes from leave (below)
1167 deprecate \1 in strings and sub replacements
1168 handle string-changing backslashes \l \U \Q \E, etc.
1169 switch (what was escaped) {
1170 handle - in a transliteration (becomes a literal -)
1171 handle \132 octal characters
1172 handle 0x15 hex characters
1173 handle \cV (control V)
1174 handle printf backslashes (\f, \r, \n, etc)
1175 } (end switch)
1176 } (end if backslash)
1177 } (end while character to read)
1178
1179*/
1180
76e3520e 1181STATIC char *
cea2e8a9 1182S_scan_const(pTHX_ char *start)
79072805 1183{
3280af22 1184 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1185 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1186 register char *s = start; /* start of the constant */
1187 register char *d = SvPVX(sv); /* destination for copies */
1188 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1189 bool didrange = FALSE; /* did we just finish a range? */
7e2040f0 1190 bool has_utf = FALSE; /* embedded \x{} */
02aa26ce 1191 I32 len; /* ? */
012bcf8d
GS
1192 UV uv;
1193
ac2262e3 1194 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1195 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1196 : UTF;
ac2262e3 1197 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1198 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1199 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1200 : UTF;
dff6d3cd 1201 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1202 PL_lex_inpat
4a2d328f 1203 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1204 : "";
79072805
LW
1205
1206 while (s < send || dorange) {
02aa26ce 1207 /* get transliterations out of the way (they're most literal) */
3280af22 1208 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1209 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1210 if (dorange) {
02aa26ce 1211 I32 i; /* current expanded character */
8ada0baa 1212 I32 min; /* first character in range */
02aa26ce
NT
1213 I32 max; /* last character in range */
1214
1215 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1216 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1217 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1218 d -= 2; /* eat the first char and the - */
1219
8ada0baa
JH
1220 min = (U8)*d; /* first char in range */
1221 max = (U8)d[1]; /* last char in range */
1222
c2e66d9e 1223 if (min > max) {
01ec43d0
GS
1224 Perl_croak(aTHX_
1225 "Invalid [] range \"%c-%c\" in transliteration operator",
d2560b70 1226 (char)min, (char)max);
c2e66d9e
GS
1227 }
1228
8ada0baa
JH
1229#ifndef ASCIIish
1230 if ((isLOWER(min) && isLOWER(max)) ||
1231 (isUPPER(min) && isUPPER(max))) {
1232 if (isLOWER(min)) {
1233 for (i = min; i <= max; i++)
1234 if (isLOWER(i))
1235 *d++ = i;
1236 } else {
1237 for (i = min; i <= max; i++)
1238 if (isUPPER(i))
1239 *d++ = i;
1240 }
1241 }
1242 else
1243#endif
1244 for (i = min; i <= max; i++)
1245 *d++ = i;
02aa26ce
NT
1246
1247 /* mark the range as done, and continue */
79072805 1248 dorange = FALSE;
01ec43d0 1249 didrange = TRUE;
79072805 1250 continue;
c2e66d9e 1251 }
02aa26ce
NT
1252
1253 /* range begins (ignore - as first or last char) */
79072805 1254 else if (*s == '-' && s+1 < send && s != start) {
01ec43d0 1255 if (didrange) {
1fafa243 1256 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1257 }
a0ed51b3 1258 if (utf) {
a176fa2a 1259 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1260 s++;
1261 continue;
1262 }
79072805
LW
1263 dorange = TRUE;
1264 s++;
01ec43d0
GS
1265 }
1266 else {
1267 didrange = FALSE;
1268 }
79072805 1269 }
02aa26ce
NT
1270
1271 /* if we get here, we're not doing a transliteration */
1272
0f5d15d6
IZ
1273 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1274 except for the last char, which will be done separately. */
3280af22 1275 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1276 if (s[2] == '#') {
1277 while (s < send && *s != ')')
1278 *d++ = *s++;
155aba94
GS
1279 }
1280 else if (s[2] == '{' /* This should match regcomp.c */
1281 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1282 {
cc6b7395 1283 I32 count = 1;
0f5d15d6 1284 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1285 char c;
1286
d9f97599
GS
1287 while (count && (c = *regparse)) {
1288 if (c == '\\' && regparse[1])
1289 regparse++;
cc6b7395
IZ
1290 else if (c == '{')
1291 count++;
1292 else if (c == '}')
1293 count--;
d9f97599 1294 regparse++;
cc6b7395 1295 }
5bdf89e7
IZ
1296 if (*regparse != ')') {
1297 regparse--; /* Leave one char for continuation. */
cc6b7395 1298 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1299 }
0f5d15d6 1300 while (s < regparse)
cc6b7395
IZ
1301 *d++ = *s++;
1302 }
748a9306 1303 }
02aa26ce
NT
1304
1305 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1306 else if (*s == '#' && PL_lex_inpat &&
1307 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1308 while (s+1 < send && *s != '\n')
1309 *d++ = *s++;
1310 }
02aa26ce 1311
5d1d4326
JH
1312 /* check for embedded arrays
1313 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1314 */
7e2040f0 1315 else if (*s == '@' && s[1]
5d1d4326 1316 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1317 break;
02aa26ce
NT
1318
1319 /* check for embedded scalars. only stop if we're sure it's a
1320 variable.
1321 */
79072805 1322 else if (*s == '$') {
3280af22 1323 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1324 break;
c277df42 1325 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1326 break; /* in regexp, $ might be tail anchor */
1327 }
02aa26ce 1328
a0ed51b3
LW
1329 /* (now in tr/// code again) */
1330
d008e5eb 1331 if (*s & 0x80 && thisutf) {
8e84507e 1332 (void)utf8_to_uv_chk((U8*)s, &len, 0);
012bcf8d
GS
1333 if (len == 1) {
1334 /* illegal UTF8, make it valid */
012bcf8d 1335 char *old_pvx = SvPVX(sv);
bee7c419
GS
1336 /* need space for one extra char (NOTE: SvCUR() not set here) */
1337 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
012bcf8d
GS
1338 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1339 }
1340 else {
1341 while (len--)
1342 *d++ = *s++;
1343 }
1344 has_utf = TRUE;
1345 continue;
a0ed51b3
LW
1346 }
1347
02aa26ce 1348 /* backslashes */
79072805
LW
1349 if (*s == '\\' && s+1 < send) {
1350 s++;
02aa26ce
NT
1351
1352 /* some backslashes we leave behind */
c9f97d15 1353 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1354 *d++ = '\\';
1355 *d++ = *s++;
1356 continue;
1357 }
02aa26ce
NT
1358
1359 /* deprecate \1 in strings and substitution replacements */
3280af22 1360 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1361 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1362 {
d008e5eb 1363 dTHR; /* only for ckWARN */
599cee73 1364 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1365 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1366 *--s = '$';
1367 break;
1368 }
02aa26ce
NT
1369
1370 /* string-change backslash escapes */
3280af22 1371 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1372 --s;
1373 break;
1374 }
02aa26ce
NT
1375
1376 /* if we get here, it's either a quoted -, or a digit */
79072805 1377 switch (*s) {
02aa26ce
NT
1378
1379 /* quoted - in transliterations */
79072805 1380 case '-':
3280af22 1381 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1382 *d++ = *s++;
1383 continue;
1384 }
1385 /* FALL THROUGH */
1386 default:
11b8faa4
JH
1387 {
1388 dTHR;
7e84c16c 1389 if (ckWARN(WARN_MISC) && isALNUM(*s))
e476b1b5 1390 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1391 "Unrecognized escape \\%c passed through",
1392 *s);
1393 /* default action is to copy the quoted character */
1394 *d++ = *s++;
1395 continue;
1396 }
02aa26ce
NT
1397
1398 /* \132 indicates an octal constant */
79072805
LW
1399 case '0': case '1': case '2': case '3':
1400 case '4': case '5': case '6': case '7':
b21ed0a9 1401 len = 0; /* disallow underscores */
012bcf8d 1402 uv = (UV)scan_oct(s, 3, &len);
79072805 1403 s += len;
012bcf8d 1404 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1405
1406 /* \x24 indicates a hex constant */
79072805 1407 case 'x':
a0ed51b3
LW
1408 ++s;
1409 if (*s == '{') {
1410 char* e = strchr(s, '}');
adaeee49 1411 if (!e) {
a0ed51b3 1412 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1413 e = s;
1414 }
b21ed0a9 1415 len = 1; /* allow underscores */
012bcf8d
GS
1416 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1417 s = e + 1;
a0ed51b3
LW
1418 }
1419 else {
b21ed0a9 1420 len = 0; /* disallow underscores */
012bcf8d
GS
1421 uv = (UV)scan_hex(s, 2, &len);
1422 s += len;
1423 }
1424
1425 NUM_ESCAPE_INSERT:
1426 /* Insert oct or hex escaped character.
1427 * There will always enough room in sv since such escapes will
1428 * be longer than any utf8 sequence they can end up as
1429 */
1430 if (uv > 127) {
1431 if (!thisutf && !has_utf && uv > 255) {
1432 /* might need to recode whatever we have accumulated so far
1433 * if it contains any hibit chars
1434 */
1435 int hicount = 0;
1436 char *c;
1437 for (c = SvPVX(sv); c < d; c++) {
1438 if (*c & 0x80)
1439 hicount++;
1440 }
1441 if (hicount) {
1442 char *old_pvx = SvPVX(sv);
1443 char *src, *dst;
1444 d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
1445
1446 src = d - 1;
1447 d += hicount;
1448 dst = d - 1;
1449
1450 while (src < dst) {
1451 if (*src & 0x80) {
1452 dst--;
1453 uv_to_utf8((U8*)dst, (U8)*src--);
1454 dst--;
1455 }
1456 else {
1457 *dst-- = *src--;
1458 }
1459 }
1460 }
1461 }
1462
1463 if (thisutf || uv > 255) {
1464 d = (char*)uv_to_utf8((U8*)d, uv);
7e2040f0 1465 has_utf = TRUE;
012bcf8d 1466 }
a0ed51b3 1467 else {
012bcf8d 1468 *d++ = (char)uv;
a0ed51b3 1469 }
012bcf8d
GS
1470 }
1471 else {
1472 *d++ = (char)uv;
a0ed51b3 1473 }
79072805 1474 continue;
02aa26ce 1475
4a2d328f
IZ
1476 /* \N{latin small letter a} is a named character */
1477 case 'N':
423cee85
JH
1478 ++s;
1479 if (*s == '{') {
1480 char* e = strchr(s, '}');
155aba94 1481 SV *res;
423cee85
JH
1482 STRLEN len;
1483 char *str;
423cee85
JH
1484
1485 if (!e) {
5777a3f7 1486 yyerror("Missing right brace on \\N{}");
423cee85
JH
1487 e = s - 1;
1488 goto cont_scan;
1489 }
1490 res = newSVpvn(s + 1, e - s - 1);
1491 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1492 res, Nullsv, "\\N{...}" );
423cee85 1493 str = SvPV(res,len);
f08d6ad9
GS
1494 if (!has_utf && SvUTF8(res)) {
1495 char *ostart = SvPVX(sv);
1496 SvCUR_set(sv, d - ostart);
1497 SvPOK_on(sv);
e4f3eed8 1498 *d = '\0';
f08d6ad9 1499 sv_utf8_upgrade(sv);
d2f449dd
SB
1500 /* this just broke our allocation above... */
1501 SvGROW(sv, send - start);
f08d6ad9 1502 d = SvPVX(sv) + SvCUR(sv);
e1992b6d 1503 has_utf = TRUE;
f08d6ad9 1504 }
423cee85
JH
1505 if (len > e - s + 4) {
1506 char *odest = SvPVX(sv);
1507
1508 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1509 d = SvPVX(sv) + (d - odest);
1510 }
1511 Copy(str, d, len, char);
1512 d += len;
1513 SvREFCNT_dec(res);
1514 cont_scan:
1515 s = e + 1;
1516 }
1517 else
5777a3f7 1518 yyerror("Missing braces on \\N{}");
423cee85
JH
1519 continue;
1520
02aa26ce 1521 /* \c is a control character */
79072805
LW
1522 case 'c':
1523 s++;
9d116dd7
JH
1524#ifdef EBCDIC
1525 *d = *s++;
1526 if (isLOWER(*d))
1527 *d = toUPPER(*d);
774a9426
GS
1528 *d = toCTRL(*d);
1529 d++;
9d116dd7 1530#else
bbce6d69
PP
1531 len = *s++;
1532 *d++ = toCTRL(len);
9d116dd7 1533#endif
79072805 1534 continue;
02aa26ce
NT
1535
1536 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1537 case 'b':
1538 *d++ = '\b';
1539 break;
1540 case 'n':
1541 *d++ = '\n';
1542 break;
1543 case 'r':
1544 *d++ = '\r';
1545 break;
1546 case 'f':
1547 *d++ = '\f';
1548 break;
1549 case 't':
1550 *d++ = '\t';
1551 break;
34a3fe2a
PP
1552#ifdef EBCDIC
1553 case 'e':
1554 *d++ = '\047'; /* CP 1047 */
1555 break;
1556 case 'a':
1557 *d++ = '\057'; /* CP 1047 */
1558 break;
1559#else
79072805
LW
1560 case 'e':
1561 *d++ = '\033';
1562 break;
1563 case 'a':
1564 *d++ = '\007';
1565 break;
34a3fe2a 1566#endif
02aa26ce
NT
1567 } /* end switch */
1568
79072805
LW
1569 s++;
1570 continue;
02aa26ce
NT
1571 } /* end if (backslash) */
1572
79072805 1573 *d++ = *s++;
02aa26ce
NT
1574 } /* while loop to process each character */
1575
1576 /* terminate the string and set up the sv */
79072805 1577 *d = '\0';
463ee0b2 1578 SvCUR_set(sv, d - SvPVX(sv));
79072805 1579 SvPOK_on(sv);
7e2040f0
GS
1580 if (has_utf)
1581 SvUTF8_on(sv);
79072805 1582
02aa26ce 1583 /* shrink the sv if we allocated more than we used */
79072805
LW
1584 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1585 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1586 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1587 }
02aa26ce 1588
9b599b2a 1589 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1590 if (s > PL_bufptr) {
1591 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1592 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1593 sv, Nullsv,
3280af22 1594 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1595 ? "tr"
3280af22 1596 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1597 ? "s"
1598 : "qq")));
79072805 1599 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1600 } else
8990e307 1601 SvREFCNT_dec(sv);
79072805
LW
1602 return s;
1603}
1604
ffb4593c
NT
1605/* S_intuit_more
1606 * Returns TRUE if there's more to the expression (e.g., a subscript),
1607 * FALSE otherwise.
ffb4593c
NT
1608 *
1609 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1610 *
1611 * ->[ and ->{ return TRUE
1612 * { and [ outside a pattern are always subscripts, so return TRUE
1613 * if we're outside a pattern and it's not { or [, then return FALSE
1614 * if we're in a pattern and the first char is a {
1615 * {4,5} (any digits around the comma) returns FALSE
1616 * if we're in a pattern and the first char is a [
1617 * [] returns FALSE
1618 * [SOMETHING] has a funky algorithm to decide whether it's a
1619 * character class or not. It has to deal with things like
1620 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1621 * anything else returns TRUE
1622 */
1623
9cbb5ea2
GS
1624/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1625
76e3520e 1626STATIC int
cea2e8a9 1627S_intuit_more(pTHX_ register char *s)
79072805 1628{
3280af22 1629 if (PL_lex_brackets)
79072805
LW
1630 return TRUE;
1631 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1632 return TRUE;
1633 if (*s != '{' && *s != '[')
1634 return FALSE;
3280af22 1635 if (!PL_lex_inpat)
79072805
LW
1636 return TRUE;
1637
1638 /* In a pattern, so maybe we have {n,m}. */
1639 if (*s == '{') {
1640 s++;
1641 if (!isDIGIT(*s))
1642 return TRUE;
1643 while (isDIGIT(*s))
1644 s++;
1645 if (*s == ',')
1646 s++;
1647 while (isDIGIT(*s))
1648 s++;
1649 if (*s == '}')
1650 return FALSE;
1651 return TRUE;
1652
1653 }
1654
1655 /* On the other hand, maybe we have a character class */
1656
1657 s++;
1658 if (*s == ']' || *s == '^')
1659 return FALSE;
1660 else {
ffb4593c 1661 /* this is terrifying, and it works */
79072805
LW
1662 int weight = 2; /* let's weigh the evidence */
1663 char seen[256];
f27ffc4a 1664 unsigned char un_char = 255, last_un_char;
93a17b20 1665 char *send = strchr(s,']');
3280af22 1666 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1667
1668 if (!send) /* has to be an expression */
1669 return TRUE;
1670
1671 Zero(seen,256,char);
1672 if (*s == '$')
1673 weight -= 3;
1674 else if (isDIGIT(*s)) {
1675 if (s[1] != ']') {
1676 if (isDIGIT(s[1]) && s[2] == ']')
1677 weight -= 10;
1678 }
1679 else
1680 weight -= 100;
1681 }
1682 for (; s < send; s++) {
1683 last_un_char = un_char;
1684 un_char = (unsigned char)*s;
1685 switch (*s) {
1686 case '@':
1687 case '&':
1688 case '$':
1689 weight -= seen[un_char] * 10;
7e2040f0 1690 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1691 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1692 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1693 weight -= 100;
1694 else
1695 weight -= 10;
1696 }
1697 else if (*s == '$' && s[1] &&
93a17b20
LW
1698 strchr("[#!%*<>()-=",s[1])) {
1699 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1700 weight -= 10;
1701 else
1702 weight -= 1;
1703 }
1704 break;
1705 case '\\':
1706 un_char = 254;
1707 if (s[1]) {
93a17b20 1708 if (strchr("wds]",s[1]))
79072805
LW
1709 weight += 100;
1710 else if (seen['\''] || seen['"'])
1711 weight += 1;
93a17b20 1712 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1713 weight += 40;
1714 else if (isDIGIT(s[1])) {
1715 weight += 40;
1716 while (s[1] && isDIGIT(s[1]))
1717 s++;
1718 }
1719 }
1720 else
1721 weight += 100;
1722 break;
1723 case '-':
1724 if (s[1] == '\\')
1725 weight += 50;
93a17b20 1726 if (strchr("aA01! ",last_un_char))
79072805 1727 weight += 30;
93a17b20 1728 if (strchr("zZ79~",s[1]))
79072805 1729 weight += 30;
f27ffc4a
GS
1730 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1731 weight -= 5; /* cope with negative subscript */
79072805
LW
1732 break;
1733 default:
93a17b20 1734 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1735 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1736 char *d = tmpbuf;
1737 while (isALPHA(*s))
1738 *d++ = *s++;
1739 *d = '\0';
1740 if (keyword(tmpbuf, d - tmpbuf))
1741 weight -= 150;
1742 }
1743 if (un_char == last_un_char + 1)
1744 weight += 5;
1745 weight -= seen[un_char];
1746 break;
1747 }
1748 seen[un_char]++;
1749 }
1750 if (weight >= 0) /* probably a character class */
1751 return FALSE;
1752 }
1753
1754 return TRUE;
1755}
ffed7fef 1756
ffb4593c
NT
1757/*
1758 * S_intuit_method
1759 *
1760 * Does all the checking to disambiguate
1761 * foo bar
1762 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1763 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1764 *
1765 * First argument is the stuff after the first token, e.g. "bar".
1766 *
1767 * Not a method if bar is a filehandle.
1768 * Not a method if foo is a subroutine prototyped to take a filehandle.
1769 * Not a method if it's really "Foo $bar"
1770 * Method if it's "foo $bar"
1771 * Not a method if it's really "print foo $bar"
1772 * Method if it's really "foo package::" (interpreted as package->foo)
1773 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1774 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1775 * =>
1776 */
1777
76e3520e 1778STATIC int
cea2e8a9 1779S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1780{
1781 char *s = start + (*start == '$');
3280af22 1782 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1783 STRLEN len;
1784 GV* indirgv;
1785
1786 if (gv) {
b6c543e3 1787 CV *cv;
a0d0e21e
LW
1788 if (GvIO(gv))
1789 return 0;
b6c543e3
IZ
1790 if ((cv = GvCVu(gv))) {
1791 char *proto = SvPVX(cv);
1792 if (proto) {
1793 if (*proto == ';')
1794 proto++;
1795 if (*proto == '*')
1796 return 0;
1797 }
1798 } else
a0d0e21e
LW
1799 gv = 0;
1800 }
8903cb82 1801 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1802 /* start is the beginning of the possible filehandle/object,
1803 * and s is the end of it
1804 * tmpbuf is a copy of it
1805 */
1806
a0d0e21e 1807 if (*start == '$') {
3280af22 1808 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1809 return 0;
1810 s = skipspace(s);
3280af22
NIS
1811 PL_bufptr = start;
1812 PL_expect = XREF;
a0d0e21e
LW
1813 return *s == '(' ? FUNCMETH : METHOD;
1814 }
1815 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1816 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1817 len -= 2;
1818 tmpbuf[len] = '\0';
1819 goto bare_package;
1820 }
1821 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1822 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1823 return 0;
1824 /* filehandle or package name makes it a method */
89bfa8cd 1825 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1826 s = skipspace(s);
3280af22 1827 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1828 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1829 bare_package:
3280af22 1830 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1831 newSVpvn(tmpbuf,len));
3280af22
NIS
1832 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1833 PL_expect = XTERM;
a0d0e21e 1834 force_next(WORD);
3280af22 1835 PL_bufptr = s;
a0d0e21e
LW
1836 return *s == '(' ? FUNCMETH : METHOD;
1837 }
1838 }
1839 return 0;
1840}
1841
ffb4593c
NT
1842/*
1843 * S_incl_perldb
1844 * Return a string of Perl code to load the debugger. If PERL5DB
1845 * is set, it will return the contents of that, otherwise a
1846 * compile-time require of perl5db.pl.
1847 */
1848
76e3520e 1849STATIC char*
cea2e8a9 1850S_incl_perldb(pTHX)
a0d0e21e 1851{
3280af22 1852 if (PL_perldb) {
76e3520e 1853 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1854
1855 if (pdb)
1856 return pdb;
61bb5906 1857 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1858 return "BEGIN { require 'perl5db.pl' }";
1859 }
1860 return "";
1861}
1862
1863
16d20bd9
AD
1864/* Encoded script support. filter_add() effectively inserts a
1865 * 'pre-processing' function into the current source input stream.
1866 * Note that the filter function only applies to the current source file
1867 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1868 *
1869 * The datasv parameter (which may be NULL) can be used to pass
1870 * private data to this instance of the filter. The filter function
1871 * can recover the SV using the FILTER_DATA macro and use it to
1872 * store private buffers and state information.
1873 *
1874 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1875 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1876 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1877 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1878 * private use must be set using malloc'd pointers.
1879 */
16d20bd9
AD
1880
1881SV *
864dbfa3 1882Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1883{
f4c556ac
GS
1884 if (!funcp)
1885 return Nullsv;
1886
3280af22
NIS
1887 if (!PL_rsfp_filters)
1888 PL_rsfp_filters = newAV();
16d20bd9 1889 if (!datasv)
8c52afec 1890 datasv = NEWSV(255,0);
16d20bd9 1891 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1892 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1893 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1894 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1895 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1896 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1897 av_unshift(PL_rsfp_filters, 1);
1898 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1899 return(datasv);
1900}
1901
1902
1903/* Delete most recently added instance of this filter function. */
a0d0e21e 1904void
864dbfa3 1905Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1906{
e0c19803 1907 SV *datasv;
f4c556ac 1908 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1909 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1910 return;
1911 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1912 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1913 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1914 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1915 IoANY(datasv) = (void *)NULL;
3280af22 1916 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1917
16d20bd9
AD
1918 return;
1919 }
1920 /* we need to search for the correct entry and clear it */
cea2e8a9 1921 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1922}
1923
1924
1925/* Invoke the n'th filter function for the current rsfp. */
1926I32
864dbfa3 1927Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1928
1929
1930 /* 0 = read one text line */
a0d0e21e 1931{
16d20bd9
AD
1932 filter_t funcp;
1933 SV *datasv = NULL;
e50aee73 1934
3280af22 1935 if (!PL_rsfp_filters)
16d20bd9 1936 return -1;
3280af22 1937 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1938 /* Provide a default input filter to make life easy. */
1939 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1940 DEBUG_P(PerlIO_printf(Perl_debug_log,
1941 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1942 if (maxlen) {
1943 /* Want a block */
1944 int len ;
1945 int old_len = SvCUR(buf_sv) ;
1946
1947 /* ensure buf_sv is large enough */
1948 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1949 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1950 if (PerlIO_error(PL_rsfp))
37120919
AD
1951 return -1; /* error */
1952 else
1953 return 0 ; /* end of file */
1954 }
16d20bd9
AD
1955 SvCUR_set(buf_sv, old_len + len) ;
1956 } else {
1957 /* Want a line */
3280af22
NIS
1958 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1959 if (PerlIO_error(PL_rsfp))
37120919
AD
1960 return -1; /* error */
1961 else
1962 return 0 ; /* end of file */
1963 }
16d20bd9
AD
1964 }
1965 return SvCUR(buf_sv);
1966 }
1967 /* Skip this filter slot if filter has been deleted */
3280af22 1968 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1969 DEBUG_P(PerlIO_printf(Perl_debug_log,
1970 "filter_read %d: skipped (filter deleted)\n",
1971 idx));
16d20bd9
AD
1972 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1973 }
1974 /* Get function pointer hidden within datasv */
4755096e 1975 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
1976 DEBUG_P(PerlIO_printf(Perl_debug_log,
1977 "filter_read %d: via function %p (%s)\n",
1978 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1979 /* Call function. The function is expected to */
1980 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1981 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1982 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1983}
1984
76e3520e 1985STATIC char *
cea2e8a9 1986S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1987{
c39cd008 1988#ifdef PERL_CR_FILTER
3280af22 1989 if (!PL_rsfp_filters) {
c39cd008 1990 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1991 }
1992#endif
3280af22 1993 if (PL_rsfp_filters) {
16d20bd9 1994
55497cff
PP
1995 if (!append)
1996 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1997 if (FILTER_READ(0, sv, 0) > 0)
1998 return ( SvPVX(sv) ) ;
1999 else
2000 return Nullch ;
2001 }
9d116dd7 2002 else
fd049845 2003 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2004}
2005
01ec43d0
GS
2006STATIC HV *
2007S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2008{
2009 GV *gv;
2010
01ec43d0 2011 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2012 return PL_curstash;
2013
2014 if (len > 2 &&
2015 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2016 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2017 {
2018 return GvHV(gv); /* Foo:: */
def3634b
GS
2019 }
2020
2021 /* use constant CLASS => 'MyClass' */
2022 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2023 SV *sv;
2024 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2025 pkgname = SvPV_nolen(sv);
2026 }
2027 }
2028
2029 return gv_stashpv(pkgname, FALSE);
2030}
a0d0e21e 2031
748a9306
LW
2032#ifdef DEBUGGING
2033 static char* exp_name[] =
09bef843
SB
2034 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2035 "ATTRTERM", "TERMBLOCK"
2036 };
748a9306 2037#endif
463ee0b2 2038
02aa26ce
NT
2039/*
2040 yylex
2041
2042 Works out what to call the token just pulled out of the input
2043 stream. The yacc parser takes care of taking the ops we return and
2044 stitching them into a tree.
2045
2046 Returns:
2047 PRIVATEREF
2048
2049 Structure:
2050 if read an identifier
2051 if we're in a my declaration
2052 croak if they tried to say my($foo::bar)
2053 build the ops for a my() declaration
2054 if it's an access to a my() variable
2055 are we in a sort block?
2056 croak if my($a); $a <=> $b
2057 build ops for access to a my() variable
2058 if in a dq string, and they've said @foo and we can't find @foo
2059 croak
2060 build ops for a bareword
2061 if we already built the token before, use it.
2062*/
2063
bf4acbe4
GS
2064#ifdef __SC__
2065#pragma segment Perl_yylex
2066#endif
864dbfa3
GS
2067int
2068#ifdef USE_PURE_BISON
cea2e8a9 2069Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 2070#else
cea2e8a9 2071Perl_yylex(pTHX)
864dbfa3 2072#endif
378cc40b 2073{
20141f0e
RI
2074
2075 int r;
2076
2077#ifdef USE_PURE_BISON
2078/* increment level and store the argument pointers */
2079 yyactlevel++;
2080 if (yyactlevel >= YYMAXLEVEL) {
2081/* What to do ??? */
2082 }
2083 yylval_pointer[yyactlevel] = lvalp;
2084 yychar_pointer[yyactlevel] = lcharp;
2085 /* Save last pointer at the bottom */
2086 yylval_pointer[0] = lvalp;
2087 yychar_pointer[0] = lcharp;
2088#endif
2089
2090 r = S_syylex(aTHX);
2091
2092#ifdef USE_PURE_BISON
2093 yyactlevel--;
2094#endif
2095
2096 return r;
2097}
2098
2099STATIC int
2100S_syylex(pTHX) /* need to be separate from yylex for reentrancy */
2101{
11343788 2102 dTHR;
79072805 2103 register char *s;
378cc40b 2104 register char *d;
79072805 2105 register I32 tmp;
463ee0b2 2106 STRLEN len;
161b471a
NIS
2107 GV *gv = Nullgv;
2108 GV **gvp = 0;
a687059c 2109
02aa26ce 2110 /* check if there's an identifier for us to look at */
3280af22 2111 if (PL_pending_ident) {
02aa26ce 2112 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2113 char pit = PL_pending_ident;
2114 PL_pending_ident = 0;
bbce6d69 2115
02aa26ce
NT
2116 /* if we're in a my(), we can't allow dynamics here.
2117 $foo'bar has already been turned into $foo::bar, so
2118 just check for colons.
2119
2120 if it's a legal name, the OP is a PADANY.
2121 */
3280af22 2122 if (PL_in_my) {
77ca0c92 2123 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2124 if (strchr(PL_tokenbuf,':'))
2125 yyerror(Perl_form(aTHX_ "No package name allowed for "
2126 "variable %s in \"our\"",
2127 PL_tokenbuf));
77ca0c92
LW
2128 tmp = pad_allocmy(PL_tokenbuf);
2129 }
2130 else {
2131 if (strchr(PL_tokenbuf,':'))
2132 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2133
77ca0c92
LW
2134 yylval.opval = newOP(OP_PADANY, 0);
2135 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2136 return PRIVATEREF;
2137 }
bbce6d69
PP
2138 }
2139
02aa26ce
NT
2140 /*
2141 build the ops for accesses to a my() variable.
2142
2143 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2144 then used in a comparison. This catches most, but not
2145 all cases. For instance, it catches
2146 sort { my($a); $a <=> $b }
2147 but not
2148 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2149 (although why you'd do that is anyone's guess).
2150 */
2151
3280af22 2152 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2153#ifdef USE_THREADS
54b9620d 2154 /* Check for single character per-thread SVs */
3280af22
NIS
2155 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2156 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2157 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2158 {
2faa37cc 2159 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2160 yylval.opval->op_targ = tmp;
2161 return PRIVATEREF;
2162 }
2163#endif /* USE_THREADS */
3280af22 2164 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2165 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2166 /* might be an "our" variable" */
f472eb5c 2167 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2168 /* build ops for a bareword */
f472eb5c
GS
2169 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2170 sv_catpvn(sym, "::", 2);
2171 sv_catpv(sym, PL_tokenbuf+1);
2172 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2173 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2174 gv_fetchpv(SvPVX(sym),
77ca0c92 2175 (PL_in_eval
f472eb5c
GS
2176 ? (GV_ADDMULTI | GV_ADDINEVAL)
2177 : TRUE
77ca0c92
LW
2178 ),
2179 ((PL_tokenbuf[0] == '$') ? SVt_PV
2180 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2181 : SVt_PVHV));
2182 return WORD;
2183 }
2184
02aa26ce 2185 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2186 if (PL_last_lop_op == OP_SORT &&
2187 PL_tokenbuf[0] == '$' &&
2188 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2189 && !PL_tokenbuf[2])
bbce6d69 2190 {
3280af22
NIS
2191 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2192 d < PL_bufend && *d != '\n';
a863c7d1
MB
2193 d++)
2194 {
2195 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2196 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2197 PL_tokenbuf);
a863c7d1 2198 }
bbce6d69
PP
2199 }
2200 }
bbce6d69 2201
a863c7d1
MB
2202 yylval.opval = newOP(OP_PADANY, 0);
2203 yylval.opval->op_targ = tmp;
2204 return PRIVATEREF;
2205 }
bbce6d69
PP
2206 }
2207
02aa26ce
NT
2208 /*
2209 Whine if they've said @foo in a doublequoted string,
2210 and @foo isn't a variable we can find in the symbol
2211 table.
2212 */
3280af22
NIS
2213 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2214 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2215 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2216 && ckWARN(WARN_AMBIGUOUS))
2217 {
2218 /* Downgraded from fatal to warning 20000522 mjd */
2219 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2220 "Possible unintended interpolation of %s in string",
2221 PL_tokenbuf);
2222 }
bbce6d69
PP
2223 }
2224
02aa26ce 2225 /* build ops for a bareword */
3280af22 2226 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2227 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2228 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2229 ((PL_tokenbuf[0] == '$') ? SVt_PV
2230 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2231 : SVt_PVHV));
2232 return WORD;
2233 }
2234
02aa26ce
NT
2235 /* no identifier pending identification */
2236
3280af22 2237 switch (PL_lex_state) {
79072805
LW
2238#ifdef COMMENTARY
2239 case LEX_NORMAL: /* Some compilers will produce faster */
2240 case LEX_INTERPNORMAL: /* code if we comment these out. */
2241 break;
2242#endif
2243
09bef843 2244 /* when we've already built the next token, just pull it out of the queue */
79072805 2245 case LEX_KNOWNEXT:
3280af22
NIS
2246 PL_nexttoke--;
2247 yylval = PL_nextval[PL_nexttoke];
2248 if (!PL_nexttoke) {
2249 PL_lex_state = PL_lex_defer;
2250 PL_expect = PL_lex_expect;
2251 PL_lex_defer = LEX_NORMAL;
463ee0b2 2252 }
3280af22 2253 return(PL_nexttype[PL_nexttoke]);
79072805 2254
02aa26ce 2255 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2256 when we get here, PL_bufptr is at the \
02aa26ce 2257 */
79072805
LW
2258 case LEX_INTERPCASEMOD:
2259#ifdef DEBUGGING
3280af22 2260 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2261 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2262#endif
02aa26ce 2263 /* handle \E or end of string */
3280af22 2264 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2265 char oldmod;
02aa26ce
NT
2266
2267 /* if at a \E */
3280af22
NIS
2268 if (PL_lex_casemods) {
2269 oldmod = PL_lex_casestack[--PL_lex_casemods];
2270 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2271
3280af22
NIS
2272 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2273 PL_bufptr += 2;
2274 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2275 }
79072805
LW
2276 return ')';
2277 }
3280af22
NIS
2278 if (PL_bufptr != PL_bufend)
2279 PL_bufptr += 2;
2280 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2281 return yylex();
79072805
LW
2282 }
2283 else {
3280af22 2284 s = PL_bufptr + 1;
79072805
LW
2285 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2286 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2287 if (strchr("LU", *s) &&
3280af22 2288 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2289 {
3280af22 2290 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2291 return ')';
2292 }
3280af22
NIS
2293 if (PL_lex_casemods > 10) {
2294 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2295 if (newlb != PL_lex_casestack) {
a0d0e21e 2296 SAVEFREEPV(newlb);
3280af22 2297 PL_lex_casestack = newlb;
a0d0e21e
LW
2298 }
2299 }
3280af22
NIS
2300 PL_lex_casestack[PL_lex_casemods++] = *s;
2301 PL_lex_casestack[PL_lex_casemods] = '\0';
2302 PL_lex_state = LEX_INTERPCONCAT;
2303 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2304 force_next('(');
2305 if (*s == 'l')
3280af22 2306 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2307 else if (*s == 'u')
3280af22 2308 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2309 else if (*s == 'L')
3280af22 2310 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2311 else if (*s == 'U')
3280af22 2312 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2313 else if (*s == 'Q')
3280af22 2314 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2315 else
cea2e8a9 2316 Perl_croak(aTHX_ "panic: yylex");
3280af22 2317 PL_bufptr = s + 1;
79072805 2318 force_next(FUNC);
3280af22
NIS
2319 if (PL_lex_starts) {
2320 s = PL_bufptr;
2321 PL_lex_starts = 0;
79072805
LW
2322 Aop(OP_CONCAT);
2323 }
2324 else
cea2e8a9 2325 return yylex();
79072805
LW
2326 }
2327
55497cff
PP
2328 case LEX_INTERPPUSH:
2329 return sublex_push();
2330
79072805 2331 case LEX_INTERPSTART:
3280af22 2332 if (PL_bufptr == PL_bufend)
79072805 2333 return sublex_done();
3280af22
NIS
2334 PL_expect = XTERM;
2335 PL_lex_dojoin = (*PL_bufptr == '@');
2336 PL_lex_state = LEX_INTERPNORMAL;
2337 if (PL_lex_dojoin) {
2338 PL_nextval[PL_nexttoke].ival = 0;
79072805 2339 force_next(',');
554b3eca 2340#ifdef USE_THREADS
533c011a
NIS
2341 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2342 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2343 force_next(PRIVATEREF);
2344#else
a0d0e21e 2345 force_ident("\"", '$');
554b3eca 2346#endif /* USE_THREADS */
3280af22 2347 PL_nextval[PL_nexttoke].ival = 0;
79072805 2348 force_next('$');
3280af22 2349 PL_nextval[PL_nexttoke].ival = 0;
79072805 2350 force_next('(');
3280af22 2351 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2352 force_next(FUNC);
2353 }
3280af22
NIS
2354 if (PL_lex_starts++) {
2355 s = PL_bufptr;
79072805
LW
2356 Aop(OP_CONCAT);
2357 }
cea2e8a9 2358 return yylex();
79072805
LW
2359
2360 case LEX_INTERPENDMAYBE:
3280af22
NIS
2361 if (intuit_more(PL_bufptr)) {
2362 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2363 break;
2364 }
2365 /* FALL THROUGH */
2366
2367 case LEX_INTERPEND:
3280af22
NIS
2368 if (PL_lex_dojoin) {
2369 PL_lex_dojoin = FALSE;
2370 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2371 return ')';
2372 }
43a16006 2373 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2374 && SvEVALED(PL_lex_repl))
43a16006 2375 {
e9fa98b2 2376 if (PL_bufptr != PL_bufend)
cea2e8a9 2377 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2378 PL_lex_repl = Nullsv;
2379 }
79072805
LW
2380 /* FALLTHROUGH */
2381 case LEX_INTERPCONCAT:
2382#ifdef DEBUGGING
3280af22 2383 if (PL_lex_brackets)
cea2e8a9 2384 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2385#endif
3280af22 2386 if (PL_bufptr == PL_bufend)
79072805
LW
2387 return sublex_done();
2388
3280af22
NIS
2389 if (SvIVX(PL_linestr) == '\'') {
2390 SV *sv = newSVsv(PL_linestr);
2391 if (!PL_lex_inpat)
76e3520e 2392 sv = tokeq(sv);
3280af22 2393 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2394 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2395 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2396 s = PL_bufend;
79072805
LW
2397 }
2398 else {
3280af22 2399 s = scan_const(PL_bufptr);
79072805 2400 if (*s == '\\')
3280af22 2401 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2402 else
3280af22 2403 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2404 }
2405
3280af22
NIS
2406 if (s != PL_bufptr) {
2407 PL_nextval[PL_nexttoke] = yylval;
2408 PL_expect = XTERM;
79072805 2409 force_next(THING);
3280af22 2410 if (PL_lex_starts++)
79072805
LW
2411 Aop(OP_CONCAT);
2412 else {
3280af22 2413 PL_bufptr = s;
cea2e8a9 2414 return yylex();
79072805
LW
2415 }
2416 }
2417
cea2e8a9 2418 return yylex();
a0d0e21e 2419 case LEX_FORMLINE:
3280af22
NIS
2420 PL_lex_state = LEX_NORMAL;
2421 s = scan_formline(PL_bufptr);
2422 if (!PL_lex_formbrack)
a0d0e21e
LW
2423 goto rightbracket;
2424 OPERATOR(';');
79072805
LW
2425 }
2426
3280af22
NIS
2427 s = PL_bufptr;
2428 PL_oldoldbufptr = PL_oldbufptr;
2429 PL_oldbufptr = s;
79072805 2430 DEBUG_p( {
bf49b057
GS
2431 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2432 exp_name[PL_expect], s);
79072805 2433 } )
463ee0b2
LW
2434
2435 retry:
378cc40b
LW
2436 switch (*s) {
2437 default:
7e2040f0 2438 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2439 goto keylookup;
cea2e8a9 2440 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2441 case 4:
2442 case 26:
2443 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2444 case 0:
3280af22
NIS
2445 if (!PL_rsfp) {
2446 PL_last_uni = 0;
2447 PL_last_lop = 0;
2448 if (PL_lex_brackets)
d98d5fff 2449 yyerror("Missing right curly or square bracket");
79072805 2450 TOKEN(0);
463ee0b2 2451 }
3280af22 2452 if (s++ < PL_bufend)
a687059c 2453 goto retry; /* ignore stray nulls */
3280af22
NIS
2454 PL_last_uni = 0;
2455 PL_last_lop = 0;
2456 if (!PL_in_eval && !PL_preambled) {
2457 PL_preambled = TRUE;
2458 sv_setpv(PL_linestr,incl_perldb());
2459 if (SvCUR(PL_linestr))
2460 sv_catpv(PL_linestr,";");
2461 if (PL_preambleav){
2462 while(AvFILLp(PL_preambleav) >= 0) {
2463 SV *tmpsv = av_shift(PL_preambleav);
2464 sv_catsv(PL_linestr, tmpsv);
2465 sv_catpv(PL_linestr, ";");
91b7def8
PP
2466 sv_free(tmpsv);
2467 }
3280af22
NIS
2468 sv_free((SV*)PL_preambleav);
2469 PL_preambleav = NULL;
91b7def8 2470 }
3280af22
NIS
2471 if (PL_minus_n || PL_minus_p) {
2472 sv_catpv(PL_linestr, "LINE: while (<>) {");
2473 if (PL_minus_l)
2474 sv_catpv(PL_linestr,"chomp;");
2475 if (PL_minus_a) {
8fd239a7
CS
2476 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2477 if (gv)
2478 GvIMPORTED_AV_on(gv);
3280af22
NIS
2479 if (PL_minus_F) {
2480 if (strchr("/'\"", *PL_splitstr)
2481 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2482 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2483 else {
2484 char delim;
2485 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2486 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2487 delim = *s;
cea2e8a9 2488 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2489 "q" + (delim == '\''), delim);
3280af22 2490 for (s = PL_splitstr; *s; s++) {
54310121 2491 if (*s == '\\')
3280af22
NIS
2492 sv_catpvn(PL_linestr, "\\", 1);
2493 sv_catpvn(PL_linestr, s, 1);
54310121 2494 }
cea2e8a9 2495 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2496 }
2304df62
AD
2497 }
2498 else
3280af22 2499 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2500 }
79072805 2501 }
3280af22
NIS
2502 sv_catpv(PL_linestr, "\n");
2503 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2504 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2505 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2506 SV *sv = NEWSV(85,0);
2507
2508 sv_upgrade(sv, SVt_PVMG);
3280af22 2509 sv_setsv(sv,PL_linestr);
57843af0 2510 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2511 }
79072805 2512 goto retry;
a687059c 2513 }
e929a76b 2514 do {
01ec43d0
GS
2515 bool bof;
2516 bof = PL_rsfp && (PerlIO_tell(PL_rsfp) == 0); /* *Before* read! */
dea0fc0b
JH
2517 s = filter_gets(PL_linestr, PL_rsfp, 0);
2518 if (s == Nullch) {
e929a76b 2519 fake_eof:
3280af22
NIS
2520 if (PL_rsfp) {
2521 if (PL_preprocess && !PL_in_eval)
2522 (void)PerlProc_pclose(PL_rsfp);
2523 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2524 PerlIO_clearerr(PL_rsfp);
395c3793 2525 else
3280af22
NIS
2526 (void)PerlIO_close(PL_rsfp);
2527 PL_rsfp = Nullfp;
4a9ae47a 2528 PL_doextract = FALSE;
395c3793 2529 }
3280af22
NIS
2530 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2531 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2532 sv_catpv(PL_linestr,";}");
2533 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2534 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2535 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2536 goto retry;
2537 }
3280af22
NIS
2538 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2539 sv_setpv(PL_linestr,"");
79072805 2540 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
dea0fc0b
JH
2541 } else if (bof) {
2542 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2543 s = swallow_bom((U8*)s);
378cc40b 2544 }
3280af22 2545 if (PL_doextract) {
a0d0e21e 2546 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2547 PL_doextract = FALSE;
a0d0e21e
LW
2548
2549 /* Incest with pod. */
2550 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2551 sv_setpv(PL_linestr, "");
2552 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2553 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2554 PL_doextract = FALSE;
a0d0e21e 2555 }
b250498f 2556 }
463ee0b2 2557 incline(s);
3280af22
NIS
2558 } while (PL_doextract);
2559 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2560 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2561 SV *sv = NEWSV(85,0);
a687059c 2562
93a17b20 2563 sv_upgrade(sv, SVt_PVMG);
3280af22 2564 sv_setsv(sv,PL_linestr);
57843af0 2565 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2566 }
3280af22 2567 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2568 if (CopLINE(PL_curcop) == 1) {
3280af22 2569 while (s < PL_bufend && isSPACE(*s))
79072805 2570 s++;
a0d0e21e 2571 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2572 s++;
44a8e56a 2573 d = Nullch;
3280af22 2574 if (!PL_in_eval) {
44a8e56a
PP
2575 if (*s == '#' && *(s+1) == '!')
2576 d = s + 2;
2577#ifdef ALTERNATE_SHEBANG
2578 else {
2579 static char as[] = ALTERNATE_SHEBANG;
2580 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2581 d = s + (sizeof(as) - 1);
2582 }
2583#endif /* ALTERNATE_SHEBANG */
2584 }
2585 if (d) {
b8378b72 2586 char *ipath;
774d564b 2587 char *ipathend;
b8378b72 2588
774d564b 2589 while (isSPACE(*d))
b8378b72
CS
2590 d++;
2591 ipath = d;
774d564b
PP
2592 while (*d && !isSPACE(*d))
2593 d++;
2594 ipathend = d;
2595
2596#ifdef ARG_ZERO_IS_SCRIPT
2597 if (ipathend > ipath) {
2598 /*
2599 * HP-UX (at least) sets argv[0] to the script name,
2600 * which makes $^X incorrect. And Digital UNIX and Linux,
2601 * at least, set argv[0] to the basename of the Perl
2602 * interpreter. So, having found "#!", we'll set it right.
2603 */
2604 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2605 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2606 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2607 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2608 SvSETMAGIC(x);
2609 }
774d564b 2610 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2611 }
774d564b 2612#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2613
2614 /*
2615 * Look for options.
2616 */
748a9306 2617 d = instr(s,"perl -");
84e30d1a 2618 if (!d) {
748a9306 2619 d = instr(s,"perl");
84e30d1a
GS
2620#if defined(DOSISH)
2621 /* avoid getting into infinite loops when shebang
2622 * line contains "Perl" rather than "perl" */
2623 if (!d) {
2624 for (d = ipathend-4; d >= ipath; --d) {
2625 if ((*d == 'p' || *d == 'P')
2626 && !ibcmp(d, "perl", 4))
2627 {
2628 break;
2629 }
2630 }
2631 if (d < ipath)
2632 d = Nullch;
2633 }
2634#endif
2635 }
44a8e56a
PP
2636#ifdef ALTERNATE_SHEBANG
2637 /*
2638 * If the ALTERNATE_SHEBANG on this system starts with a
2639 * character that can be part of a Perl expression, then if
2640 * we see it but not "perl", we're probably looking at the
2641 * start of Perl code, not a request to hand off to some
2642 * other interpreter. Similarly, if "perl" is there, but
2643 * not in the first 'word' of the line, we assume the line
2644 * contains the start of the Perl program.
44a8e56a
PP
2645 */
2646 if (d && *s != '#') {
774d564b 2647 char *c = ipath;
44a8e56a
PP
2648 while (*c && !strchr("; \t\r\n\f\v#", *c))
2649 c++;
2650 if (c < d)
2651 d = Nullch; /* "perl" not in first word; ignore */
2652 else
2653 *s = '#'; /* Don't try to parse shebang line */
2654 }
774d564b 2655#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2656#ifndef MACOS_TRADITIONAL
748a9306 2657 if (!d &&
44a8e56a 2658 *s == '#' &&
774d564b 2659 ipathend > ipath &&
3280af22 2660 !PL_minus_c &&
748a9306 2661 !instr(s,"indir") &&
3280af22 2662 instr(PL_origargv[0],"perl"))
748a9306 2663 {
9f68db38 2664 char **newargv;
9f68db38 2665
774d564b
PP
2666 *ipathend = '\0';
2667 s = ipathend + 1;
3280af22 2668 while (s < PL_bufend && isSPACE(*s))
9f68db38 2669 s++;
3280af22
NIS
2670 if (s < PL_bufend) {
2671 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2672 newargv[1] = s;
3280af22 2673 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2674 s++;
2675 *s = '\0';
3280af22 2676 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2677 }
2678 else
3280af22 2679 newargv = PL_origargv;
774d564b 2680 newargv[0] = ipath;
80252599 2681 PerlProc_execv(ipath, newargv);
cea2e8a9 2682 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2683 }
bf4acbe4 2684#endif
748a9306 2685 if (d) {
3280af22
NIS
2686 U32 oldpdb = PL_perldb;
2687 bool oldn = PL_minus_n;
2688 bool oldp = PL_minus_p;
748a9306
LW
2689
2690 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2691 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2692
2693 if (*d++ == '-') {
8cc95fdb
PP
2694 do {
2695 if (*d == 'M' || *d == 'm') {
2696 char *m = d;
2697 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2698 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2699 (int)(d - m), m);
2700 }
2701 d = moreswitches(d);
2702 } while (d);
155aba94
GS
2703 if ((PERLDB_LINE && !oldpdb) ||
2704 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2705 /* if we have already added "LINE: while (<>) {",
2706 we must not do it again */
748a9306 2707 {
3280af22
NIS
2708 sv_setpv(PL_linestr, "");
2709 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2710 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2711 PL_preambled = FALSE;
84902520 2712 if (PERLDB_LINE)
3280af22 2713 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2714 goto retry;
2715 }
a0d0e21e 2716 }
79072805 2717 }
9f68db38 2718 }
79072805 2719 }
3280af22
NIS
2720 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2721 PL_bufptr = s;
2722 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2723 return yylex();
ae986130 2724 }
378cc40b 2725 goto retry;
4fdae800 2726 case '\r':
6a27c188 2727#ifdef PERL_STRICT_CR
cea2e8a9
GS
2728 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2729 Perl_croak(aTHX_
cc507455 2730 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2731#endif
4fdae800 2732 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2733#ifdef MACOS_TRADITIONAL
2734 case '\312':
2735#endif
378cc40b
LW
2736 s++;
2737 goto retry;
378cc40b 2738 case '#':
e929a76b 2739 case '\n':
3280af22 2740 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2741 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2742 /* handle eval qq[#line 1 "foo"\n ...] */
2743 CopLINE_dec(PL_curcop);
2744 incline(s);
2745 }
3280af22 2746 d = PL_bufend;
a687059c 2747 while (s < d && *s != '\n')
378cc40b 2748 s++;
0f85fab0 2749 if (s < d)
378cc40b 2750 s++;
463ee0b2 2751 incline(s);
3280af22
NIS
2752 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2753 PL_bufptr = s;
2754 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2755 return yylex();
a687059c 2756 }
378cc40b 2757 }
a687059c 2758 else {
378cc40b 2759 *s = '\0';
3280af22 2760 PL_bufend = s;
a687059c 2761 }
378cc40b
LW
2762 goto retry;
2763 case '-':
79072805 2764 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2765 s++;
3280af22 2766 PL_bufptr = s;
748a9306
LW
2767 tmp = *s++;
2768
bf4acbe4 2769 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2770 s++;
2771
2772 if (strnEQ(s,"=>",2)) {
3280af22 2773 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2774 OPERATOR('-'); /* unary minus */
2775 }
3280af22
NIS
2776 PL_last_uni = PL_oldbufptr;
2777 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2778 switch (tmp) {
79072805
LW
2779 case 'r': FTST(OP_FTEREAD);
2780 case 'w': FTST(OP_FTEWRITE);
2781 case 'x': FTST(OP_FTEEXEC);
2782 case 'o': FTST(OP_FTEOWNED);
2783 case 'R': FTST(OP_FTRREAD);
2784 case 'W': FTST(OP_FTRWRITE);
2785 case 'X': FTST(OP_FTREXEC);
2786 case 'O': FTST(OP_FTROWNED);
2787 case 'e': FTST(OP_FTIS);
2788 case 'z': FTST(OP_FTZERO);
2789 case 's': FTST(OP_FTSIZE);
2790 case 'f': FTST(OP_FTFILE);
2791 case 'd': FTST(OP_FTDIR);
2792 case 'l': FTST(OP_FTLINK);
2793 case 'p': FTST(OP_FTPIPE);
2794 case 'S': FTST(OP_FTSOCK);
2795 case 'u': FTST(OP_FTSUID);
2796 case 'g': FTST(OP_FTSGID);
2797 case 'k': FTST(OP_FTSVTX);
2798 case 'b': FTST(OP_FTBLK);
2799 case 'c': FTST(OP_FTCHR);
2800 case 't': FTST(OP_FTTTY);
2801 case 'T': FTST(OP_FTTEXT);
2802 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2803 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2804 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2805 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2806 default:
cea2e8a9 2807 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2808 break;
2809 }
2810 }
a687059c
LW
2811 tmp = *s++;
2812 if (*s == tmp) {
2813 s++;
3280af22 2814 if (PL_expect == XOPERATOR)
79072805
LW
2815 TERM(POSTDEC);
2816 else
2817 OPERATOR(PREDEC);
2818 }
2819 else if (*s == '>') {
2820 s++;
2821 s = skipspace(s);
7e2040f0 2822 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2823 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2824 TOKEN(ARROW);
79072805 2825 }
748a9306
LW
2826 else if (*s == '$')
2827 OPERATOR(ARROW);
463ee0b2 2828 else
748a9306 2829 TERM(ARROW);
a687059c 2830 }
3280af22 2831 if (PL_expect == XOPERATOR)
79072805
LW
2832 Aop(OP_SUBTRACT);
2833 else {
3280af22 2834 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2835 check_uni();
79072805 2836 OPERATOR('-'); /* unary minus */
2f3197b3 2837 }
79072805 2838
378cc40b 2839 case '+':
a687059c
LW
2840 tmp = *s++;
2841 if (*s == tmp) {
378cc40b 2842 s++;
3280af22 2843 if (PL_expect == XOPERATOR)
79072805
LW
2844 TERM(POSTINC);
2845 else
2846 OPERATOR(PREINC);
378cc40b 2847 }
3280af22 2848 if (PL_expect == XOPERATOR)
79072805
LW
2849 Aop(OP_ADD);
2850 else {
3280af22 2851 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2852 check_uni();
a687059c 2853 OPERATOR('+');
2f3197b3 2854 }
a687059c 2855
378cc40b 2856 case '*':
3280af22
NIS
2857 if (PL_expect != XOPERATOR) {
2858 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2859 PL_expect = XOPERATOR;
2860 force_ident(PL_tokenbuf, '*');
2861 if (!*PL_tokenbuf)
a0d0e21e 2862 PREREF('*');
79072805 2863 TERM('*');
a687059c 2864 }
79072805
LW
2865 s++;
2866 if (*s == '*') {
a687059c 2867 s++;
79072805 2868 PWop(OP_POW);
a687059c 2869 }
79072805
LW
2870 Mop(OP_MULTIPLY);
2871
378cc40b 2872 case '%':
3280af22 2873 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2874 ++s;
2875 Mop(OP_MODULO);
a687059c 2876 }
3280af22
NIS
2877 PL_tokenbuf[0] = '%';
2878 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2879 if (!PL_tokenbuf[1]) {
2880 if (s == PL_bufend)
bbce6d69
PP
2881 yyerror("Final % should be \\% or %name");
2882 PREREF('%');
a687059c 2883 }
3280af22 2884 PL_pending_ident = '%';
bbce6d69 2885 TERM('%');
a687059c 2886
378cc40b 2887 case '^':
79072805 2888 s++;
a0d0e21e 2889 BOop(OP_BIT_XOR);
79072805 2890 case '[':
3280af22 2891 PL_lex_brackets++;
79072805 2892 /* FALL THROUGH */
378cc40b 2893 case '~':
378cc40b 2894 case ',':
378cc40b
LW
2895 tmp = *s++;
2896 OPERATOR(tmp);
a0d0e21e
LW
2897 case ':':
2898 if (s[1] == ':') {
2899 len = 0;
2900 goto just_a_word;
2901 }
2902 s++;
09bef843
SB
2903 switch (PL_expect) {
2904 OP *attrs;
2905 case XOPERATOR:
2906 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2907 break;
2908 PL_bufptr = s; /* update in case we back off */
2909 goto grabattrs;
2910 case XATTRBLOCK:
2911 PL_expect = XBLOCK;
2912 goto grabattrs;
2913 case XATTRTERM:
2914 PL_expect = XTERMBLOCK;
2915 grabattrs:
2916 s = skipspace(s);
2917 attrs = Nullop;
7e2040f0 2918 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2919 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2920 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2921 if (tmp < 0) tmp = -tmp;
2922 switch (tmp) {
2923 case KEY_or:
2924 case KEY_and:
2925 case KEY_for:
2926 case KEY_unless:
2927 case KEY_if:
2928 case KEY_while:
2929 case KEY_until:
2930 goto got_attrs;
2931 default:
2932 break;
2933 }
2934 }
09bef843
SB
2935 if (*d == '(') {
2936 d = scan_str(d,TRUE,TRUE);
2937 if (!d) {
2938 if (PL_lex_stuff) {
2939 SvREFCNT_dec(PL_lex_stuff);
2940 PL_lex_stuff = Nullsv;
2941 }
2942 /* MUST advance bufptr here to avoid bogus
2943 "at end of line" context messages from yyerror().
2944 */
2945 PL_bufptr = s + len;
2946 yyerror("Unterminated attribute parameter in attribute list");
2947 if (attrs)
2948 op_free(attrs);
2949 return 0; /* EOF indicator */
2950 }
2951 }
2952 if (PL_lex_stuff) {
2953 SV *sv = newSVpvn(s, len);
2954 sv_catsv(sv, PL_lex_stuff);
2955 attrs = append_elem(OP_LIST, attrs,
2956 newSVOP(OP_CONST, 0, sv));
2957 SvREFCNT_dec(PL_lex_stuff);
2958 PL_lex_stuff = Nullsv;
2959 }
2960 else {
2961 attrs = append_elem(OP_LIST, attrs,
2962 newSVOP(OP_CONST, 0,
2963 newSVpvn(s, len)));
2964 }
2965 s = skipspace(d);
0120eecf 2966 if (*s == ':' && s[1] != ':')
09bef843 2967 s = skipspace(s+1);
0120eecf
GS
2968 else if (s == d)
2969 break; /* require real whitespace or :'s */
09bef843 2970 }
f9829d6b
GS
2971 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2972 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2973 char q = ((*s == '\'') ? '"' : '\'');
2974 /* If here for an expression, and parsed no attrs, back off. */
2975 if (tmp == '=' && !attrs) {
2976 s = PL_bufptr;
2977 break;
2978 }
2979 /* MUST advance bufptr here to avoid bogus "at end of line"
2980 context messages from yyerror().
2981 */
2982 PL_bufptr = s;
2983 if (!*s)
2984 yyerror("Unterminated attribute list");
2985 else
2986 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2987 q, *s, q));
2988 if (attrs)
2989 op_free(attrs);
2990 OPERATOR(':');
2991 }
f9829d6b 2992 got_attrs:
09bef843
SB
2993 if (attrs) {
2994 PL_nextval[PL_nexttoke].opval = attrs;
2995 force_next(THING);
2996 }
2997 TOKEN(COLONATTR);
2998 }
a0d0e21e 2999 OPERATOR(':');
8990e307
LW
3000 case '(':
3001 s++;
3280af22
NIS
3002 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3003 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3004 else
3280af22 3005 PL_expect = XTERM;
a0d0e21e 3006 TOKEN('(');
378cc40b 3007 case ';':
f4dd75d9 3008 CLINE;
378cc40b
LW
3009 tmp = *s++;
3010 OPERATOR(tmp);
3011 case ')':
378cc40b 3012 tmp = *s++;
16d20bd9
AD
3013 s = skipspace(s);
3014 if (*s == '{')
3015 PREBLOCK(tmp);
378cc40b 3016 TERM(tmp);
79072805
LW
3017 case ']':
3018 s++;
3280af22 3019 if (PL_lex_brackets <= 0)
d98d5fff 3020 yyerror("Unmatched right square bracket");
463ee0b2 3021 else
3280af22
NIS
3022 --PL_lex_brackets;
3023 if (PL_lex_state == LEX_INTERPNORMAL) {
3024 if (PL_lex_brackets == 0) {
a0d0e21e 3025 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3026 PL_lex_state = LEX_INTERPEND;
79072805
LW
3027 }
3028 }
4633a7c4 3029 TERM(']');
79072805
LW
3030 case '{':
3031 leftbracket:
79072805 3032 s++;
3280af22
NIS
3033 if (PL_lex_brackets > 100) {
3034 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3035 if (newlb != PL_lex_brackstack) {
8990e307 3036 SAVEFREEPV(newlb);
3280af22 3037 PL_lex_brackstack = newlb;
8990e307
LW
3038 }
3039 }
3280af22 3040 switch (PL_expect) {
a0d0e21e 3041 case XTERM:
3280af22 3042 if (PL_lex_formbrack) {
a0d0e21e
LW
3043 s--;
3044 PRETERMBLOCK(DO);
3045 }
3280af22
NIS
3046 if (PL_oldoldbufptr == PL_last_lop)
3047 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3048 else
3280af22 3049 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3050 OPERATOR(HASHBRACK);
a0d0e21e 3051 case XOPERATOR:
bf4acbe4 3052 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3053 s++;
44a8e56a 3054 d = s;
3280af22
NIS
3055 PL_tokenbuf[0] = '\0';
3056 if (d < PL_bufend && *d == '-') {
3057 PL_tokenbuf[0] = '-';
44a8e56a 3058 d++;
bf4acbe4 3059 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3060 d++;
3061 }
7e2040f0 3062 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3063 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3064 FALSE, &len);
bf4acbe4 3065 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3066 d++;
3067 if (*d == '}') {
3280af22 3068 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3069 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3070 if (minus)
3071 force_next('-');
748a9306
LW
3072 }
3073 }
3074 /* FALL THROUGH */
09bef843 3075 case XATTRBLOCK:
748a9306 3076 case XBLOCK:
3280af22
NIS
3077 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3078 PL_expect = XSTATE;
a0d0e21e 3079 break;
09bef843 3080 case XATTRTERM:
a0d0e21e 3081 case XTERMBLOCK:
3280af22
NIS
3082 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3083 PL_expect = XSTATE;
a0d0e21e
LW
3084 break;
3085 default: {
3086 char *t;
3280af22
NIS
3087 if (PL_oldoldbufptr == PL_last_lop)
3088 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3089 else
3280af22 3090 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3091 s = skipspace(s);
09ecc4b6 3092 if (*s == '}')
a0d0e21e 3093 OPERATOR(HASHBRACK);
b8a4b1be
GS
3094 /* This hack serves to disambiguate a pair of curlies
3095 * as being a block or an anon hash. Normally, expectation
3096 * determines that, but in cases where we're not in a
3097 * position to expect anything in particular (like inside
3098 * eval"") we have to resolve the ambiguity. This code
3099 * covers the case where the first term in the curlies is a
3100 * quoted string. Most other cases need to be explicitly
3101 * disambiguated by prepending a `+' before the opening
3102 * curly in order to force resolution as an anon hash.
3103 *
3104 * XXX should probably propagate the outer expectation
3105 * into eval"" to rely less on this hack, but that could
3106 * potentially break current behavior of eval"".
3107 * GSAR 97-07-21
3108 */
3109 t = s;
3110 if (*s == '\'' || *s == '"' || *s == '`') {
3111 /* common case: get past first string, handling escapes */
3280af22 3112 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3113 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3114 t++;
3115 t++;
a0d0e21e 3116 }
b8a4b1be 3117 else if (*s == 'q') {
3280af22 3118 if (++t < PL_bufend
b8a4b1be 3119 && (!isALNUM(*t)
3280af22 3120 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3121 && !isALNUM(*t))))
3122 {
b8a4b1be
GS
3123 char *tmps;
3124 char open, close, term;
3125 I32 brackets = 1;
3126
3280af22 3127 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3128 t++;
3129 term = *t;
3130 open = term;
3131 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3132 term = tmps[5];
3133 close = term;
3134 if (open == close)
3280af22
NIS
3135 for (t++; t < PL_bufend; t++) {
3136 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3137 t++;
6d07e5e9 3138 else if (*t == open)
b8a4b1be
GS
3139 break;
3140 }
3141 else
3280af22
NIS
3142 for (t++; t < PL_bufend; t++) {
3143 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3144 t++;
6d07e5e9 3145 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3146 break;
3147 else if (*t == open)
3148 brackets++;
3149 }
3150 }
3151 t++;
a0d0e21e 3152 }
7e2040f0 3153 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3154 t += UTF8SKIP(t);
7e2040f0 3155 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3156 t += UTF8SKIP(t);
a0d0e21e 3157 }
3280af22 3158 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3159 t++;
b8a4b1be
GS
3160 /* if comma follows first term, call it an anon hash */
3161 /* XXX it could be a comma expression with loop modifiers */
3280af22 3162 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3163 || (*t == '=' && t[1] == '>')))
a0d0e21e 3164 OPERATOR(HASHBRACK);
3280af22 3165 if (PL_expect == XREF)
4e4e412b 3166 PL_expect = XTERM;
a0d0e21e 3167 else {
3280af22
NIS
3168 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3169 PL_expect = XSTATE;
a0d0e21e 3170 }
8990e307 3171 }
a0d0e21e 3172 break;
463ee0b2 3173 }
57843af0 3174 yylval.ival = CopLINE(PL_curcop);
79072805 3175 if (isSPACE(*s) || *s == '#')
3280af22 3176 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3177 TOKEN('{');
378cc40b 3178 case '}':
79072805
LW
3179 rightbracket:
3180 s++;
3280af22 3181 if (PL_lex_brackets <= 0)
d98d5fff 3182 yyerror("Unmatched right curly bracket");
463ee0b2 3183 else
3280af22 3184 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3185 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3186 PL_lex_formbrack = 0;
3187 if (PL_lex_state == LEX_INTERPNORMAL) {
3188 if (PL_lex_brackets == 0) {
9059aa12
LW
3189 if (PL_expect & XFAKEBRACK) {
3190 PL_expect &= XENUMMASK;
3280af22
NIS
3191 PL_lex_state = LEX_INTERPEND;
3192 PL_bufptr = s;
cea2e8a9 3193 return yylex(); /* ignore fake brackets */
79072805 3194 }
fa83b5b6 3195 if (*s == '-' && s[1] == '>')
3280af22 3196 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3197 else if (*s != '[' && *s != '{')
3280af22 3198 PL_lex_state = LEX_INTERPEND;
79072805
LW
3199 }
3200 }
9059aa12
LW
3201 if (PL_expect & XFAKEBRACK) {
3202 PL_expect &= XENUMMASK;
3280af22 3203 PL_bufptr = s;
cea2e8a9 3204 return yylex(); /* ignore fake brackets */
748a9306 3205 }
79072805
LW
3206 force_next('}');
3207 TOKEN(';');
378cc40b
LW
3208 case '&':
3209 s++;
3210 tmp = *s++;
3211 if (tmp == '&')
a0d0e21e 3212 AOPERATOR(ANDAND);
378cc40b 3213 s--;
3280af22 3214 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3215 if (ckWARN(WARN_SEMICOLON)
3216 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3217 {
57843af0 3218 CopLINE_dec(PL_curcop);
cea2e8a9 3219 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3220 CopLINE_inc(PL_curcop);
463ee0b2 3221 }
79072805 3222 BAop(OP_BIT_AND);
463ee0b2 3223 }
79072805 3224
3280af22
NIS
3225 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3226 if (*PL_tokenbuf) {
3227 PL_expect = XOPERATOR;
3228 force_ident(PL_tokenbuf, '&');
463ee0b2 3229 }
79072805
LW
3230 else
3231 PREREF('&');
c07a80fd 3232 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3233 TERM('&');
3234
378cc40b
LW
3235 case '|':
3236 s++;
3237 tmp = *s++;
3238 if (tmp == '|')
a0d0e21e 3239 AOPERATOR(OROR);
378cc40b 3240 s--;
79072805 3241 BOop(OP_BIT_OR);
378cc40b
LW
3242 case '=':
3243 s++;
3244 tmp = *s++;
3245 if (tmp == '=')
79072805
LW
3246 Eop(OP_EQ);
3247 if (tmp == '>')
3248 OPERATOR(',');
378cc40b 3249 if (tmp == '~')
79072805 3250 PMop(OP_MATCH);
599cee73 3251 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3252 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3253 s--;
3280af22
NIS
3254 if (PL_expect == XSTATE && isALPHA(tmp) &&
3255 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3256 {
3280af22
NIS
3257 if (PL_in_eval && !PL_rsfp) {
3258 d = PL_bufend;
a5f75d66
AD
3259 while (s < d) {
3260 if (*s++ == '\n') {
3261 incline(s);
3262 if (strnEQ(s,"=cut",4)) {
3263 s = strchr(s,'\n');
3264 if (s)
3265 s++;
3266 else
3267 s = d;
3268 incline(s);
3269 goto retry;
3270 }
3271 }
3272 }
3273 goto retry;
3274 }
3280af22
NIS
3275 s = PL_bufend;
3276 PL_doextract = TRUE;
a0d0e21e
LW
3277 goto retry;
3278 }
3280af22 3279 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3280 char *t;
51882d45 3281#ifdef PERL_STRICT_CR
bf4acbe4 3282 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3283#else
bf4acbe4 3284 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3285#endif
a0d0e21e
LW
3286 if (*t == '\n' || *t == '#') {
3287 s--;
3280af22 3288 PL_expect = XBLOCK;
a0d0e21e
LW
3289 goto leftbracket;
3290 }
79072805 3291 }
a0d0e21e
LW
3292 yylval.ival = 0;
3293 OPERATOR(ASSIGNOP);
378cc40b
LW
3294 case '!':
3295 s++;
3296 tmp = *s++;
3297 if (tmp == '=')
79072805 3298 Eop(OP_NE);
378cc40b 3299 if (tmp == '~')
79072805 3300 PMop(OP_NOT);
378cc40b
LW
3301 s--;
3302 OPERATOR('!');
3303 case '<':
3280af22 3304 if (PL_expect != XOPERATOR) {
93a17b20 3305 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3306 check_uni();
79072805
LW
3307 if (s[1] == '<')
3308 s = scan_heredoc(s);
3309 else
3310 s = scan_inputsymbol(s);
3311 TERM(sublex_start());
378cc40b
LW
3312 }
3313 s++;
3314 tmp = *s++;
3315 if (tmp == '<')
79072805 3316 SHop(OP_LEFT_SHIFT);
395c3793
LW
3317 if (tmp == '=') {
3318 tmp = *s++;
3319 if (tmp == '>')
79072805 3320 Eop(OP_NCMP);
395c3793 3321 s--;
79072805 3322 Rop(OP_LE);
395c3793 3323 }
378cc40b 3324 s--;
79072805 3325 Rop(OP_LT);
378cc40b
LW
3326 case '>':
3327 s++;
3328 tmp = *s++;
3329 if (tmp == '>')
79072805 3330 SHop(OP_RIGHT_SHIFT);
378cc40b 3331 if (tmp == '=')
79072805 3332 Rop(OP_GE);
378cc40b 3333 s--;
79072805 3334 Rop(OP_GT);
378cc40b
LW
3335
3336 case '$':
bbce6d69
PP
3337 CLINE;
3338
3280af22
NIS
3339 if (PL_expect == XOPERATOR) {
3340 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3341 PL_expect = XTERM;
a0d0e21e 3342 depcom();
bbce6d69 3343 return ','; /* grandfather non-comma-format format */
a0d0e21e 3344 }
8990e307 3345 }
a0d0e21e 3346
7e2040f0 3347 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3348 PL_tokenbuf[0] = '@';
376b8730
SM
3349 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3350 sizeof PL_tokenbuf - 1, FALSE);
3351 if (PL_expect == XOPERATOR)
3352 no_op("Array length", s);
3280af22 3353 if (!PL_tokenbuf[1])
a0d0e21e 3354 PREREF(DOLSHARP);
3280af22
NIS
3355 PL_expect = XOPERATOR;
3356 PL_pending_ident = '#';
463ee0b2 3357 TOKEN(DOLSHARP);
79072805 3358 }
bbce6d69 3359
3280af22 3360 PL_tokenbuf[0] = '$';
376b8730
SM
3361 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3362 sizeof PL_tokenbuf - 1, FALSE);
3363 if (PL_expect == XOPERATOR)
3364 no_op("Scalar", s);
3280af22
NIS
3365 if (!PL_tokenbuf[1]) {
3366 if (s == PL_bufend)
bbce6d69
PP
3367 yyerror("Final $ should be \\$ or $name");
3368 PREREF('$');
8990e307 3369 }
a0d0e21e 3370
bbce6d69 3371 /* This kludge not intended to be bulletproof. */
3280af22 3372 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3373 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3374 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3375 yylval.opval->op_private = OPpCONST_ARYBASE;
3376 TERM(THING);
3377 }
3378
ff68c719 3379 d = s;
69d2bceb 3380 tmp = (I32)*s;
3280af22 3381 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3382 s = skipspace(s);
3383
3280af22 3384 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3385 char *t;
3386 if (*s == '[') {
3280af22 3387 PL_tokenbuf[0] = '@';
599cee73 3388 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3389 for(t = s + 1;
7e2040f0 3390 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3391 t++) ;
a0d0e21e 3392 if (*t++ == ',') {
3280af22
NIS
3393 PL_bufptr = skipspace(PL_bufptr);
3394 while (t < PL_bufend && *t != ']')
bbce6d69 3395 t++;
cea2e8a9 3396 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3397 "Multidimensional syntax %.*s not supported",
3398 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3399 }
3400 }
bbce6d69
PP
3401 }
3402 else if (*s == '{') {
3280af22 3403 PL_tokenbuf[0] = '%';
599cee73 3404 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3405 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3406 {
3280af22 3407 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3408 STRLEN len;
3409 for (t++; isSPACE(*t); t++) ;
7e2040f0 3410 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3411 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3412 for (; isSPACE(*t); t++) ;
864dbfa3 3413 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3414 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3415 "You need to quote \"%s\"", tmpbuf);
748a9306 3416 }
93a17b20
LW
3417 }
3418 }
2f3197b3 3419 }
bbce6d69 3420
3280af22 3421 PL_expect = XOPERATOR;
69d2bceb 3422 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3423 bool islop = (PL_last_lop == PL_oldoldbufptr);
3424 if (!islop || PL_last_lop_op == OP_GREPSTART)
3425 PL_expect = XOPERATOR;
bbce6d69 3426 else if (strchr("$@\"'`q", *s))
3280af22 3427 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3428 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3429 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3430 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3431 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3432 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3433 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3434 /* binary operators exclude handle interpretations */
3435 switch (tmp) {
3436 case -KEY_x:
3437 case -KEY_eq:
3438 case -KEY_ne:
3439 case -KEY_gt:
3440 case -KEY_lt:
3441 case -KEY_ge:
3442 case -KEY_le:
3443 case -KEY_cmp:
3444 break;
3445 default:
3280af22 3446 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3447 break;
3448 }
3449 }
68dc0745
PP
3450 else {
3451 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3452 if (gv && GvCVu(gv))
3280af22 3453 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3454 }
93a17b20 3455 }
bbce6d69 3456 else if (isDIGIT(*s))
3280af22 3457 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3458 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3459 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3460 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3461 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3462 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3463 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3464 }
3280af22 3465 PL_pending_ident = '$';
79072805 3466 TOKEN('$');
378cc40b
LW
3467
3468 case '@':
3280af22 3469 if (PL_expect == XOPERATOR)
bbce6d69 3470 no_op("Array", s);
3280af22
NIS
3471 PL_tokenbuf[0] = '@';
3472 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3473 if (!PL_tokenbuf[1]) {
3474 if (s == PL_bufend)
bbce6d69
PP
3475 yyerror("Final @ should be \\@ or @name");
3476 PREREF('@');
3477 }
3280af22 3478 if (PL_lex_state == LEX_NORMAL)
ff68c719 3479 s = skipspace(s);
3280af22 3480 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3481 if (*s == '{')
3280af22 3482 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3483
3484 /* Warn about @ where they meant $. */
599cee73 3485 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3486 if (*s == '[' || *s == '{') {
3487 char *t = s + 1;
7e2040f0 3488 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3489 t++;
3490 if (*t == '}' || *t == ']') {
3491 t++;
3280af22 3492 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3493 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3494 "Scalar value %.*s better written as $%.*s",
3280af22 3495 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3496 }
93a17b20
LW
3497 }
3498 }
463ee0b2 3499 }
3280af22 3500 PL_pending_ident = '@';
79072805 3501 TERM('@');
378cc40b
LW
3502
3503 case '/': /* may either be division or pattern */
3504 case '?': /* may either be conditional or pattern */
3280af22 3505 if (PL_expect != XOPERATOR) {
c277df42 3506 /* Disable warning on "study /blah/" */
3280af22
NIS
3507 if (PL_oldoldbufptr == PL_last_uni
3508 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3509 || memNE(PL_last_uni, "study", 5)
3510 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3511 check_uni();
8782bef2 3512 s = scan_pat(s,OP_MATCH);
79072805 3513 TERM(sublex_start());
378cc40b
LW
3514 }
3515 tmp = *s++;
a687059c 3516 if (tmp == '/')
79072805 3517 Mop(OP_DIVIDE);
378cc40b
LW
3518 OPERATOR(tmp);
3519
3520 case '.':
51882d45
GS
3521 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3522#ifdef PERL_STRICT_CR
3523 && s[1] == '\n'
3524#else
3525 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3526#endif
3527 && (s == PL_linestart || s[-1] == '\n') )
3528 {
3280af22
NIS
3529 PL_lex_formbrack = 0;
3530 PL_expect = XSTATE;
79072805
LW
3531 goto rightbracket;
3532 }
3280af22 3533 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3534 tmp = *s++;
a687059c
LW
3535 if (*s == tmp) {
3536 s++;
2f3197b3
LW
3537 if (*s == tmp) {
3538 s++;
79072805 3539 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3540 }
3541 else
79072805 3542 yylval.ival = 0;
378cc40b 3543 OPERATOR(DOTDOT);
a687059c 3544 }
3280af22 3545 if (PL_expect != XOPERATOR)
2f3197b3 3546 check_uni();
79072805 3547 Aop(OP_CONCAT);
378cc40b
LW
3548 }
3549 /* FALL THROUGH */
3550 case '0': case '1': case '2': case '3': case '4':
3551 case '5': case '6': case '7': case '8': case '9':
79072805 3552 s = scan_num(s);
3280af22 3553 if (PL_expect == XOPERATOR)
8990e307 3554 no_op("Number",s);
79072805
LW
3555 TERM(THING);
3556
3557 case '\'':
09bef843 3558 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3559 if (PL_expect == XOPERATOR) {
3560 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3561 PL_expect = XTERM;
a0d0e21e
LW
3562 depcom();
3563 return ','; /* grandfather non-comma-format format */
3564 }
463ee0b2 3565 else
8990e307 3566 no_op("String",s);
463ee0b2 3567 }
79072805 3568 if (!s)
85e6fe83 3569 missingterm((char*)0);
79072805
LW
3570 yylval.ival = OP_CONST;
3571 TERM(sublex_start());
3572
3573 case '"':
09bef843 3574 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3575 if (PL_expect == XOPERATOR) {
3576 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3577 PL_expect = XTERM;
a0d0e21e
LW
3578 depcom();
3579 return ','; /* grandfather non-comma-format format */
3580 }
463ee0b2 3581 else
8990e307 3582 no_op("String",s);
463ee0b2 3583 }
79072805 3584 if (!s)
85e6fe83