This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Changes.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
2b92dfce
GS
42/* In variables name $^X, these are the legal values for X.
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
a790bc05
PP
72/* XXX If this causes problems, set i_unistd=undef in the hint file. */
73#ifdef I_UNISTD
74# include <unistd.h> /* Needed for execv() */
75#endif
76
77
79072805
LW
78#ifdef ff_next
79#undef ff_next
d48672a2
LW
80#endif
81
a1a0e61e 82#ifdef USE_PURE_BISON
dba4d153
JH
83# ifndef YYMAXLEVEL
84# define YYMAXLEVEL 100
85# endif
20141f0e
RI
86YYSTYPE* yylval_pointer[YYMAXLEVEL];
87int* yychar_pointer[YYMAXLEVEL];
88int yyactlevel = 0;
22c35a8c
GS
89# undef yylval
90# undef yychar
20141f0e
RI
91# define yylval (*yylval_pointer[yyactlevel])
92# define yychar (*yychar_pointer[yyactlevel])
93# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
94# undef yylex
dba4d153 95# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
96#endif
97
79072805 98#include "keywords.h"
fe14fcc3 99
ffb4593c
NT
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
ae986130
LW
102#ifdef CLINE
103#undef CLINE
104#endif
57843af0 105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 106
ffb4593c
NT
107/*
108 * Convenience functions to return different tokens and prime the
9cbb5ea2 109 * lexer for the next token. They all take an argument.
ffb4593c
NT
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
2d2e263d 121 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
9cbb5ea2 126 * PMop : pattern-matching operator
ffb4593c
NT
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
130 * Rop : relational operator <= != gt
131 *
132 * Also see LOP and lop() below.
133 */
134
3280af22
NIS
135#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
136#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
137#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
138#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
139#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
140#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
141#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
142#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
143#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
144#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
145#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
146#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
147#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
148#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
149#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
150#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
151#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
152#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
153#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
154#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 155
a687059c
LW
156/* This bit of chicanery makes a unary function followed by
157 * a parenthesis into a function with one argument, highest precedence.
158 */
2f3197b3 159#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
160 PL_expect = XTERM, \
161 PL_bufptr = s, \
162 PL_last_uni = PL_oldbufptr, \
163 PL_last_lop_op = f, \
a687059c
LW
164 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
165
79072805 166#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
167 PL_bufptr = s, \
168 PL_last_uni = PL_oldbufptr, \
79072805
LW
169 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
170
9f68db38 171/* grandfather return to old style */
3280af22 172#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 173
ffb4593c
NT
174/*
175 * S_ao
176 *
177 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
178 * into an OP_ANDASSIGN or OP_ORASSIGN
179 */
180
76e3520e 181STATIC int
cea2e8a9 182S_ao(pTHX_ int toketype)
a0d0e21e 183{
3280af22
NIS
184 if (*PL_bufptr == '=') {
185 PL_bufptr++;
a0d0e21e
LW
186 if (toketype == ANDAND)
187 yylval.ival = OP_ANDASSIGN;
188 else if (toketype == OROR)
189 yylval.ival = OP_ORASSIGN;
190 toketype = ASSIGNOP;
191 }
192 return toketype;
193}
194
ffb4593c
NT
195/*
196 * S_no_op
197 * When Perl expects an operator and finds something else, no_op
198 * prints the warning. It always prints "<something> found where
199 * operator expected. It prints "Missing semicolon on previous line?"
200 * if the surprise occurs at the start of the line. "do you need to
201 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
202 * where the compiler doesn't know if foo is a method call or a function.
203 * It prints "Missing operator before end of line" if there's nothing
204 * after the missing operator, or "... before <...>" if there is something
205 * after the missing operator.
206 */
207
76e3520e 208STATIC void
cea2e8a9 209S_no_op(pTHX_ char *what, char *s)
463ee0b2 210{
3280af22
NIS
211 char *oldbp = PL_bufptr;
212 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 213
1189a94a
GS
214 if (!s)
215 s = oldbp;
07c798fb 216 else
1189a94a 217 PL_bufptr = s;
cea2e8a9 218 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 219 if (is_first)
cea2e8a9 220 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 221 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 222 char *t;
7e2040f0 223 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 224 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 225 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 226 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 227 }
07c798fb
HS
228 else {
229 assert(s >= oldbp);
cea2e8a9 230 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 231 }
3280af22 232 PL_bufptr = oldbp;
8990e307
LW
233}
234
ffb4593c
NT
235/*
236 * S_missingterm
237 * Complain about missing quote/regexp/heredoc terminator.
238 * If it's called with (char *)NULL then it cauterizes the line buffer.
239 * If we're in a delimited string and the delimiter is a control
240 * character, it's reformatted into a two-char sequence like ^C.
241 * This is fatal.
242 */
243
76e3520e 244STATIC void
cea2e8a9 245S_missingterm(pTHX_ char *s)
8990e307
LW
246{
247 char tmpbuf[3];
248 char q;
249 if (s) {
250 char *nl = strrchr(s,'\n');
d2719217 251 if (nl)
8990e307
LW
252 *nl = '\0';
253 }
9d116dd7
JH
254 else if (
255#ifdef EBCDIC
256 iscntrl(PL_multi_close)
257#else
258 PL_multi_close < 32 || PL_multi_close == 127
259#endif
260 ) {
8990e307 261 *tmpbuf = '^';
3280af22 262 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
263 s = "\\n";
264 tmpbuf[2] = '\0';
265 s = tmpbuf;
266 }
267 else {
3280af22 268 *tmpbuf = PL_multi_close;
8990e307
LW
269 tmpbuf[1] = '\0';
270 s = tmpbuf;
271 }
272 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 273 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 274}
79072805 275
ffb4593c
NT
276/*
277 * Perl_deprecate
ffb4593c
NT
278 */
279
79072805 280void
864dbfa3 281Perl_deprecate(pTHX_ char *s)
a0d0e21e 282{
d008e5eb 283 dTHR;
599cee73 284 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 285 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
286}
287
ffb4593c
NT
288/*
289 * depcom
9cbb5ea2 290 * Deprecate a comma-less variable list.
ffb4593c
NT
291 */
292
76e3520e 293STATIC void
cea2e8a9 294S_depcom(pTHX)
a0d0e21e
LW
295{
296 deprecate("comma-less variable list");
297}
298
ffb4593c 299/*
9cbb5ea2
GS
300 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
301 * utf16-to-utf8-reversed.
ffb4593c
NT
302 */
303
c39cd008
GS
304#ifdef PERL_CR_FILTER
305static void
306strip_return(SV *sv)
307{
308 register char *s = SvPVX(sv);
309 register char *e = s + SvCUR(sv);
310 /* outer loop optimized to do nothing if there are no CR-LFs */
311 while (s < e) {
312 if (*s++ == '\r' && *s == '\n') {
313 /* hit a CR-LF, need to copy the rest */
314 register char *d = s - 1;
315 *d++ = *s++;
316 while (s < e) {
317 if (*s == '\r' && s[1] == '\n')
318 s++;
319 *d++ = *s++;
320 }
321 SvCUR(sv) -= s - d;
322 return;
323 }
324 }
325}
a868473f 326
76e3520e 327STATIC I32
c39cd008 328S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 329{
c39cd008
GS
330 I32 count = FILTER_READ(idx+1, sv, maxlen);
331 if (count > 0 && !maxlen)
332 strip_return(sv);
333 return count;
a868473f
NIS
334}
335#endif
336
ffb4593c
NT
337/*
338 * Perl_lex_start
9cbb5ea2
GS
339 * Initialize variables. Uses the Perl save_stack to save its state (for
340 * recursive calls to the parser).
ffb4593c
NT
341 */
342
a0d0e21e 343void
864dbfa3 344Perl_lex_start(pTHX_ SV *line)
79072805 345{
0f15f207 346 dTHR;
8990e307
LW
347 char *s;
348 STRLEN len;
349
3280af22
NIS
350 SAVEI32(PL_lex_dojoin);
351 SAVEI32(PL_lex_brackets);
3280af22
NIS
352 SAVEI32(PL_lex_casemods);
353 SAVEI32(PL_lex_starts);
354 SAVEI32(PL_lex_state);
7766f137 355 SAVEVPTR(PL_lex_inpat);
3280af22 356 SAVEI32(PL_lex_inwhat);
18b09519
GS
357 if (PL_lex_state == LEX_KNOWNEXT) {
358 I32 toke = PL_nexttoke;
359 while (--toke >= 0) {
360 SAVEI32(PL_nexttype[toke]);
361 SAVEVPTR(PL_nextval[toke]);
362 }
363 SAVEI32(PL_nexttoke);
18b09519 364 }
57843af0 365 SAVECOPLINE(PL_curcop);
3280af22
NIS
366 SAVEPPTR(PL_bufptr);
367 SAVEPPTR(PL_bufend);
368 SAVEPPTR(PL_oldbufptr);
369 SAVEPPTR(PL_oldoldbufptr);
370 SAVEPPTR(PL_linestart);
371 SAVESPTR(PL_linestr);
372 SAVEPPTR(PL_lex_brackstack);
373 SAVEPPTR(PL_lex_casestack);
c76ac1ee 374 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
375 SAVESPTR(PL_lex_stuff);
376 SAVEI32(PL_lex_defer);
09bef843 377 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 378 SAVESPTR(PL_lex_repl);
bebdddfc
GS
379 SAVEINT(PL_expect);
380 SAVEINT(PL_lex_expect);
3280af22
NIS
381
382 PL_lex_state = LEX_NORMAL;
383 PL_lex_defer = 0;
384 PL_expect = XSTATE;
385 PL_lex_brackets = 0;
3280af22
NIS
386 New(899, PL_lex_brackstack, 120, char);
387 New(899, PL_lex_casestack, 12, char);
388 SAVEFREEPV(PL_lex_brackstack);
389 SAVEFREEPV(PL_lex_casestack);
390 PL_lex_casemods = 0;
391 *PL_lex_casestack = '\0';
392 PL_lex_dojoin = 0;
393 PL_lex_starts = 0;
394 PL_lex_stuff = Nullsv;
395 PL_lex_repl = Nullsv;
396 PL_lex_inpat = 0;
76be56bc 397 PL_nexttoke = 0;
3280af22 398 PL_lex_inwhat = 0;
09bef843 399 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
400 PL_linestr = line;
401 if (SvREADONLY(PL_linestr))
402 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
403 s = SvPV(PL_linestr, len);
8990e307 404 if (len && s[len-1] != ';') {
3280af22
NIS
405 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
406 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
407 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 408 }
3280af22
NIS
409 SvTEMP_off(PL_linestr);
410 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
411 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
412 SvREFCNT_dec(PL_rs);
79cb57f6 413 PL_rs = newSVpvn("\n", 1);
3280af22 414 PL_rsfp = 0;
79072805 415}
a687059c 416
ffb4593c
NT
417/*
418 * Perl_lex_end
9cbb5ea2
GS
419 * Finalizer for lexing operations. Must be called when the parser is
420 * done with the lexer.
ffb4593c
NT
421 */
422
463ee0b2 423void
864dbfa3 424Perl_lex_end(pTHX)
463ee0b2 425{
3280af22 426 PL_doextract = FALSE;
463ee0b2
LW
427}
428
ffb4593c
NT
429/*
430 * S_incline
431 * This subroutine has nothing to do with tilting, whether at windmills
432 * or pinball tables. Its name is short for "increment line". It
57843af0 433 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 434 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
435 * # line 500 "foo.pm"
436 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
437 */
438
76e3520e 439STATIC void
cea2e8a9 440S_incline(pTHX_ char *s)
463ee0b2 441{
0f15f207 442 dTHR;
463ee0b2
LW
443 char *t;
444 char *n;
73659bf1 445 char *e;
463ee0b2 446 char ch;
463ee0b2 447
57843af0 448 CopLINE_inc(PL_curcop);
463ee0b2
LW
449 if (*s++ != '#')
450 return;
bf4acbe4 451 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
452 if (strnEQ(s, "line", 4))
453 s += 4;
454 else
455 return;
456 if (*s == ' ' || *s == '\t')
457 s++;
458 else
459 return;
bf4acbe4 460 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
461 if (!isDIGIT(*s))
462 return;
463 n = s;
464 while (isDIGIT(*s))
465 s++;
bf4acbe4 466 while (SPACE_OR_TAB(*s))
463ee0b2 467 s++;
73659bf1 468 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 469 s++;
73659bf1
GS
470 e = t + 1;
471 }
463ee0b2 472 else {
463ee0b2 473 for (t = s; !isSPACE(*t); t++) ;
73659bf1 474 e = t;
463ee0b2 475 }
bf4acbe4 476 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
477 e++;
478 if (*e != '\n' && *e != '\0')
479 return; /* false alarm */
480
463ee0b2
LW
481 ch = *t;
482 *t = '\0';
f4dd75d9
GS
483 if (t - s > 0) {
484#ifdef USE_ITHREADS
485 Safefree(CopFILE(PL_curcop));
486#else
487 SvREFCNT_dec(CopFILEGV(PL_curcop));
488#endif
57843af0 489 CopFILE_set(PL_curcop, s);
f4dd75d9 490 }
463ee0b2 491 *t = ch;
57843af0 492 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
493}
494
ffb4593c
NT
495/*
496 * S_skipspace
497 * Called to gobble the appropriate amount and type of whitespace.
498 * Skips comments as well.
499 */
500
76e3520e 501STATIC char *
cea2e8a9 502S_skipspace(pTHX_ register char *s)
a687059c 503{
11343788 504 dTHR;
3280af22 505 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 506 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
507 s++;
508 return s;
509 }
510 for (;;) {
fd049845 511 STRLEN prevlen;
09bef843
SB
512 SSize_t oldprevlen, oldoldprevlen;
513 SSize_t oldloplen, oldunilen;
60e6418e
GS
514 while (s < PL_bufend && isSPACE(*s)) {
515 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
516 incline(s);
517 }
ffb4593c
NT
518
519 /* comment */
3280af22
NIS
520 if (s < PL_bufend && *s == '#') {
521 while (s < PL_bufend && *s != '\n')
463ee0b2 522 s++;
60e6418e 523 if (s < PL_bufend) {
463ee0b2 524 s++;
60e6418e
GS
525 if (PL_in_eval && !PL_rsfp) {
526 incline(s);
527 continue;
528 }
529 }
463ee0b2 530 }
ffb4593c
NT
531
532 /* only continue to recharge the buffer if we're at the end
533 * of the buffer, we're not reading from a source filter, and
534 * we're in normal lexing mode
535 */
09bef843
SB
536 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
537 PL_lex_state == LEX_FORMLINE)
463ee0b2 538 return s;
ffb4593c
NT
539
540 /* try to recharge the buffer */
9cbb5ea2
GS
541 if ((s = filter_gets(PL_linestr, PL_rsfp,
542 (prevlen = SvCUR(PL_linestr)))) == Nullch)
543 {
544 /* end of file. Add on the -p or -n magic */
3280af22
NIS
545 if (PL_minus_n || PL_minus_p) {
546 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
547 ";}continue{print or die qq(-p destination: $!\\n)" :
548 "");
3280af22
NIS
549 sv_catpv(PL_linestr,";}");
550 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
551 }
552 else
3280af22 553 sv_setpv(PL_linestr,";");
ffb4593c
NT
554
555 /* reset variables for next time we lex */
9cbb5ea2
GS
556 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
557 = SvPVX(PL_linestr);
3280af22 558 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
559
560 /* Close the filehandle. Could be from -P preprocessor,
561 * STDIN, or a regular file. If we were reading code from
562 * STDIN (because the commandline held no -e or filename)
563 * then we don't close it, we reset it so the code can
564 * read from STDIN too.
565 */
566
3280af22
NIS
567 if (PL_preprocess && !PL_in_eval)
568 (void)PerlProc_pclose(PL_rsfp);
569 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
570 PerlIO_clearerr(PL_rsfp);
8990e307 571 else
3280af22
NIS
572 (void)PerlIO_close(PL_rsfp);
573 PL_rsfp = Nullfp;
463ee0b2
LW
574 return s;
575 }
ffb4593c
NT
576
577 /* not at end of file, so we only read another line */
09bef843
SB
578 /* make corresponding updates to old pointers, for yyerror() */
579 oldprevlen = PL_oldbufptr - PL_bufend;
580 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
581 if (PL_last_uni)
582 oldunilen = PL_last_uni - PL_bufend;
583 if (PL_last_lop)
584 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
585 PL_linestart = PL_bufptr = s + prevlen;
586 PL_bufend = s + SvCUR(PL_linestr);
587 s = PL_bufptr;
09bef843
SB
588 PL_oldbufptr = s + oldprevlen;
589 PL_oldoldbufptr = s + oldoldprevlen;
590 if (PL_last_uni)
591 PL_last_uni = s + oldunilen;
592 if (PL_last_lop)
593 PL_last_lop = s + oldloplen;
a0d0e21e 594 incline(s);
ffb4593c
NT
595
596 /* debugger active and we're not compiling the debugger code,
597 * so store the line into the debugger's array of lines
598 */
3280af22 599 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
600 SV *sv = NEWSV(85,0);
601
602 sv_upgrade(sv, SVt_PVMG);
3280af22 603 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 604 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 605 }
463ee0b2 606 }
a687059c 607}
378cc40b 608
ffb4593c
NT
609/*
610 * S_check_uni
611 * Check the unary operators to ensure there's no ambiguity in how they're
612 * used. An ambiguous piece of code would be:
613 * rand + 5
614 * This doesn't mean rand() + 5. Because rand() is a unary operator,
615 * the +5 is its argument.
616 */
617
76e3520e 618STATIC void
cea2e8a9 619S_check_uni(pTHX)
ba106d47 620{
2f3197b3 621 char *s;
a0d0e21e 622 char *t;
0453d815 623 dTHR;
2f3197b3 624
3280af22 625 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 626 return;
3280af22
NIS
627 while (isSPACE(*PL_last_uni))
628 PL_last_uni++;
7e2040f0 629 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 630 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 631 return;
0453d815 632 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 633 char ch = *s;
0453d815
PM
634 *s = '\0';
635 Perl_warner(aTHX_ WARN_AMBIGUOUS,
636 "Warning: Use of \"%s\" without parens is ambiguous",
637 PL_last_uni);
638 *s = ch;
639 }
2f3197b3
LW
640}
641
ffb4593c
NT
642/* workaround to replace the UNI() macro with a function. Only the
643 * hints/uts.sh file mentions this. Other comments elsewhere in the
644 * source indicate Microport Unix might need it too.
645 */
646
ffed7fef
LW
647#ifdef CRIPPLED_CC
648
649#undef UNI
ffed7fef 650#define UNI(f) return uni(f,s)
ffed7fef 651
76e3520e 652STATIC int
cea2e8a9 653S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
654{
655 yylval.ival = f;
3280af22
NIS
656 PL_expect = XTERM;
657 PL_bufptr = s;
8f872242
NIS
658 PL_last_uni = PL_oldbufptr;
659 PL_last_lop_op = f;
ffed7fef
LW
660 if (*s == '(')
661 return FUNC1;
662 s = skipspace(s);
663 if (*s == '(')
664 return FUNC1;
665 else
666 return UNIOP;
667}
668
a0d0e21e
LW
669#endif /* CRIPPLED_CC */
670
ffb4593c
NT
671/*
672 * LOP : macro to build a list operator. Its behaviour has been replaced
673 * with a subroutine, S_lop() for which LOP is just another name.
674 */
675
a0d0e21e
LW
676#define LOP(f,x) return lop(f,x,s)
677
ffb4593c
NT
678/*
679 * S_lop
680 * Build a list operator (or something that might be one). The rules:
681 * - if we have a next token, then it's a list operator [why?]
682 * - if the next thing is an opening paren, then it's a function
683 * - else it's a list operator
684 */
685
76e3520e 686STATIC I32
a0be28da 687S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 688{
0f15f207 689 dTHR;
79072805 690 yylval.ival = f;
35c8bce7 691 CLINE;
3280af22
NIS
692 PL_expect = x;
693 PL_bufptr = s;
694 PL_last_lop = PL_oldbufptr;
695 PL_last_lop_op = f;
696 if (PL_nexttoke)
a0d0e21e 697 return LSTOP;
79072805
LW
698 if (*s == '(')
699 return FUNC;
700 s = skipspace(s);
701 if (*s == '(')
702 return FUNC;
703 else
704 return LSTOP;
705}
706
ffb4593c
NT
707/*
708 * S_force_next
9cbb5ea2 709 * When the lexer realizes it knows the next token (for instance,
ffb4593c 710 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
711 * to know what token to return the next time the lexer is called. Caller
712 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
713 * handles the token correctly.
ffb4593c
NT
714 */
715
76e3520e 716STATIC void
cea2e8a9 717S_force_next(pTHX_ I32 type)
79072805 718{
3280af22
NIS
719 PL_nexttype[PL_nexttoke] = type;
720 PL_nexttoke++;
721 if (PL_lex_state != LEX_KNOWNEXT) {
722 PL_lex_defer = PL_lex_state;
723 PL_lex_expect = PL_expect;
724 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
725 }
726}
727
ffb4593c
NT
728/*
729 * S_force_word
730 * When the lexer knows the next thing is a word (for instance, it has
731 * just seen -> and it knows that the next char is a word char, then
732 * it calls S_force_word to stick the next word into the PL_next lookahead.
733 *
734 * Arguments:
b1b65b59 735 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
736 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
737 * int check_keyword : if true, Perl checks to make sure the word isn't
738 * a keyword (do this if the word is a label, e.g. goto FOO)
739 * int allow_pack : if true, : characters will also be allowed (require,
740 * use, etc. do this)
9cbb5ea2 741 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
742 */
743
76e3520e 744STATIC char *
cea2e8a9 745S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 746{
463ee0b2
LW
747 register char *s;
748 STRLEN len;
749
750 start = skipspace(start);
751 s = start;
7e2040f0 752 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 753 (allow_pack && *s == ':') ||
15f0808c 754 (allow_initial_tick && *s == '\'') )
a0d0e21e 755 {
3280af22
NIS
756 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
757 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
758 return start;
759 if (token == METHOD) {
760 s = skipspace(s);
761 if (*s == '(')
3280af22 762 PL_expect = XTERM;
463ee0b2 763 else {
3280af22 764 PL_expect = XOPERATOR;
463ee0b2 765 }
79072805 766 }
3280af22
NIS
767 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
768 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
769 force_next(token);
770 }
771 return s;
772}
773
ffb4593c
NT
774/*
775 * S_force_ident
9cbb5ea2 776 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
777 * text only contains the "foo" portion. The first argument is a pointer
778 * to the "foo", and the second argument is the type symbol to prefix.
779 * Forces the next token to be a "WORD".
9cbb5ea2 780 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
781 */
782
76e3520e 783STATIC void
cea2e8a9 784S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
785{
786 if (s && *s) {
11343788 787 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 788 PL_nextval[PL_nexttoke].opval = o;
79072805 789 force_next(WORD);
748a9306 790 if (kind) {
e858de61 791 dTHR; /* just for in_eval */
11343788 792 o->op_private = OPpCONST_ENTERED;
55497cff
PP
793 /* XXX see note in pp_entereval() for why we forgo typo
794 warnings if the symbol must be introduced in an eval.
795 GSAR 96-10-12 */
3280af22 796 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
797 kind == '$' ? SVt_PV :
798 kind == '@' ? SVt_PVAV :
799 kind == '%' ? SVt_PVHV :
800 SVt_PVGV
801 );
748a9306 802 }
79072805
LW
803 }
804}
805
1571675a
GS
806NV
807Perl_str_to_version(pTHX_ SV *sv)
808{
809 NV retval = 0.0;
810 NV nshift = 1.0;
811 STRLEN len;
812 char *start = SvPVx(sv,len);
3aa33fe5 813 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
814 char *end = start + len;
815 while (start < end) {
ba210ebe 816 STRLEN skip;
1571675a
GS
817 UV n;
818 if (utf)
dcad2880 819 n = utf8_to_uv((U8*)start, len, &skip, 0);
1571675a
GS
820 else {
821 n = *(U8*)start;
822 skip = 1;
823 }
824 retval += ((NV)n)/nshift;
825 start += skip;
826 nshift *= 1000;
827 }
828 return retval;
829}
830
ffb4593c
NT
831/*
832 * S_force_version
833 * Forces the next token to be a version number.
834 */
835
76e3520e 836STATIC char *
cea2e8a9 837S_force_version(pTHX_ char *s)
89bfa8cd
PP
838{
839 OP *version = Nullop;
44dcb63b 840 char *d;
89bfa8cd
PP
841
842 s = skipspace(s);
843
44dcb63b 844 d = s;
dd629d5b 845 if (*d == 'v')
44dcb63b 846 d++;
44dcb63b 847 if (isDIGIT(*d)) {
a7cb1f99 848 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 849 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 850 SV *ver;
b73d6f50 851 s = scan_num(s, &yylval);
89bfa8cd 852 version = yylval.opval;
dd629d5b
GS
853 ver = cSVOPx(version)->op_sv;
854 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 855 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
856 SvNVX(ver) = str_to_version(ver);
857 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 858 }
89bfa8cd
PP
859 }
860 }
861
862 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 863 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd
PP
864 force_next(WORD);
865
866 return (s);
867}
868
ffb4593c
NT
869/*
870 * S_tokeq
871 * Tokenize a quoted string passed in as an SV. It finds the next
872 * chunk, up to end of string or a backslash. It may make a new
873 * SV containing that chunk (if HINT_NEW_STRING is on). It also
874 * turns \\ into \.
875 */
876
76e3520e 877STATIC SV *
cea2e8a9 878S_tokeq(pTHX_ SV *sv)
79072805
LW
879{
880 register char *s;
881 register char *send;
882 register char *d;
b3ac6de7
IZ
883 STRLEN len = 0;
884 SV *pv = sv;
79072805
LW
885
886 if (!SvLEN(sv))
b3ac6de7 887 goto finish;
79072805 888
a0d0e21e 889 s = SvPV_force(sv, len);
21a311ee 890 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 891 goto finish;
463ee0b2 892 send = s + len;
79072805
LW
893 while (s < send && *s != '\\')
894 s++;
895 if (s == send)
b3ac6de7 896 goto finish;
79072805 897 d = s;
3280af22 898 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 899 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
900 while (s < send) {
901 if (*s == '\\') {
a0d0e21e 902 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
903 s++; /* all that, just for this */
904 }
905 *d++ = *s++;
906 }
907 *d = '\0';
463ee0b2 908 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 909 finish:
3280af22 910 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 911 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
912 return sv;
913}
914
ffb4593c
NT
915/*
916 * Now come three functions related to double-quote context,
917 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
918 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
919 * interact with PL_lex_state, and create fake ( ... ) argument lists
920 * to handle functions and concatenation.
921 * They assume that whoever calls them will be setting up a fake
922 * join call, because each subthing puts a ',' after it. This lets
923 * "lower \luPpEr"
924 * become
925 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
926 *
927 * (I'm not sure whether the spurious commas at the end of lcfirst's
928 * arguments and join's arguments are created or not).
929 */
930
931/*
932 * S_sublex_start
933 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
934 *
935 * Pattern matching will set PL_lex_op to the pattern-matching op to
936 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
937 *
938 * OP_CONST and OP_READLINE are easy--just make the new op and return.
939 *
940 * Everything else becomes a FUNC.
941 *
942 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
943 * had an OP_CONST or OP_READLINE). This just sets us up for a
944 * call to S_sublex_push().
945 */
946
76e3520e 947STATIC I32
cea2e8a9 948S_sublex_start(pTHX)
79072805
LW
949{
950 register I32 op_type = yylval.ival;
79072805
LW
951
952 if (op_type == OP_NULL) {
3280af22
NIS
953 yylval.opval = PL_lex_op;
954 PL_lex_op = Nullop;
79072805
LW
955 return THING;
956 }
957 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 958 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
959
960 if (SvTYPE(sv) == SVt_PVIV) {
961 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
962 STRLEN len;
963 char *p;
964 SV *nsv;
965
966 p = SvPV(sv, len);
79cb57f6 967 nsv = newSVpvn(p, len);
01ec43d0
GS
968 if (SvUTF8(sv))
969 SvUTF8_on(nsv);
b3ac6de7
IZ
970 SvREFCNT_dec(sv);
971 sv = nsv;
972 }
973 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 974 PL_lex_stuff = Nullsv;
79072805
LW
975 return THING;
976 }
977
3280af22
NIS
978 PL_sublex_info.super_state = PL_lex_state;
979 PL_sublex_info.sub_inwhat = op_type;
980 PL_sublex_info.sub_op = PL_lex_op;
981 PL_lex_state = LEX_INTERPPUSH;
55497cff 982
3280af22
NIS
983 PL_expect = XTERM;
984 if (PL_lex_op) {
985 yylval.opval = PL_lex_op;
986 PL_lex_op = Nullop;
55497cff
PP
987 return PMFUNC;
988 }
989 else
990 return FUNC;
991}
992
ffb4593c
NT
993/*
994 * S_sublex_push
995 * Create a new scope to save the lexing state. The scope will be
996 * ended in S_sublex_done. Returns a '(', starting the function arguments
997 * to the uc, lc, etc. found before.
998 * Sets PL_lex_state to LEX_INTERPCONCAT.
999 */
1000
76e3520e 1001STATIC I32
cea2e8a9 1002S_sublex_push(pTHX)
55497cff 1003{
0f15f207 1004 dTHR;
f46d017c 1005 ENTER;
55497cff 1006
3280af22
NIS
1007 PL_lex_state = PL_sublex_info.super_state;
1008 SAVEI32(PL_lex_dojoin);
1009 SAVEI32(PL_lex_brackets);
3280af22
NIS
1010 SAVEI32(PL_lex_casemods);
1011 SAVEI32(PL_lex_starts);
1012 SAVEI32(PL_lex_state);
7766f137 1013 SAVEVPTR(PL_lex_inpat);
3280af22 1014 SAVEI32(PL_lex_inwhat);
57843af0 1015 SAVECOPLINE(PL_curcop);
3280af22
NIS
1016 SAVEPPTR(PL_bufptr);
1017 SAVEPPTR(PL_oldbufptr);
1018 SAVEPPTR(PL_oldoldbufptr);
1019 SAVEPPTR(PL_linestart);
1020 SAVESPTR(PL_linestr);
1021 SAVEPPTR(PL_lex_brackstack);
1022 SAVEPPTR(PL_lex_casestack);
1023
1024 PL_linestr = PL_lex_stuff;
1025 PL_lex_stuff = Nullsv;
1026
9cbb5ea2
GS
1027 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1028 = SvPVX(PL_linestr);
3280af22
NIS
1029 PL_bufend += SvCUR(PL_linestr);
1030 SAVEFREESV(PL_linestr);
1031
1032 PL_lex_dojoin = FALSE;
1033 PL_lex_brackets = 0;
3280af22
NIS
1034 New(899, PL_lex_brackstack, 120, char);
1035 New(899, PL_lex_casestack, 12, char);
1036 SAVEFREEPV(PL_lex_brackstack);
1037 SAVEFREEPV(PL_lex_casestack);
1038 PL_lex_casemods = 0;
1039 *PL_lex_casestack = '\0';
1040 PL_lex_starts = 0;
1041 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1042 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1043
1044 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1045 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1046 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1047 else
3280af22 1048 PL_lex_inpat = Nullop;
79072805 1049
55497cff 1050 return '(';
79072805
LW
1051}
1052
ffb4593c
NT
1053/*
1054 * S_sublex_done
1055 * Restores lexer state after a S_sublex_push.
1056 */
1057
76e3520e 1058STATIC I32
cea2e8a9 1059S_sublex_done(pTHX)
79072805 1060{
3280af22
NIS
1061 if (!PL_lex_starts++) {
1062 PL_expect = XOPERATOR;
79cb57f6 1063 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1064 return THING;
1065 }
1066
3280af22
NIS
1067 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1068 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1069 return yylex();
79072805
LW
1070 }
1071
ffb4593c 1072 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1073 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1074 PL_linestr = PL_lex_repl;
1075 PL_lex_inpat = 0;
1076 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1077 PL_bufend += SvCUR(PL_linestr);
1078 SAVEFREESV(PL_linestr);
1079 PL_lex_dojoin = FALSE;
1080 PL_lex_brackets = 0;
3280af22
NIS
1081 PL_lex_casemods = 0;
1082 *PL_lex_casestack = '\0';
1083 PL_lex_starts = 0;
25da4f38 1084 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1085 PL_lex_state = LEX_INTERPNORMAL;
1086 PL_lex_starts++;
e9fa98b2
HS
1087 /* we don't clear PL_lex_repl here, so that we can check later
1088 whether this is an evalled subst; that means we rely on the
1089 logic to ensure sublex_done() is called again only via the
1090 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1091 }
e9fa98b2 1092 else {
3280af22 1093 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1094 PL_lex_repl = Nullsv;
1095 }
79072805 1096 return ',';
ffed7fef
LW
1097 }
1098 else {
f46d017c 1099 LEAVE;
3280af22
NIS
1100 PL_bufend = SvPVX(PL_linestr);
1101 PL_bufend += SvCUR(PL_linestr);
1102 PL_expect = XOPERATOR;
09bef843 1103 PL_sublex_info.sub_inwhat = 0;
79072805 1104 return ')';
ffed7fef
LW
1105 }
1106}
1107
02aa26ce
NT
1108/*
1109 scan_const
1110
1111 Extracts a pattern, double-quoted string, or transliteration. This
1112 is terrifying code.
1113
3280af22
NIS
1114 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1115 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1116 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1117
9b599b2a
GS
1118 Returns a pointer to the character scanned up to. Iff this is
1119 advanced from the start pointer supplied (ie if anything was
1120 successfully parsed), will leave an OP for the substring scanned
1121 in yylval. Caller must intuit reason for not parsing further
1122 by looking at the next characters herself.
1123
02aa26ce
NT
1124 In patterns:
1125 backslashes:
1126 double-quoted style: \r and \n
1127 regexp special ones: \D \s
1128 constants: \x3
1129 backrefs: \1 (deprecated in substitution replacements)
1130 case and quoting: \U \Q \E
1131 stops on @ and $, but not for $ as tail anchor
1132
1133 In transliterations:
1134 characters are VERY literal, except for - not at the start or end
1135 of the string, which indicates a range. scan_const expands the
1136 range to the full set of intermediate characters.
1137
1138 In double-quoted strings:
1139 backslashes:
1140 double-quoted style: \r and \n
1141 constants: \x3
1142 backrefs: \1 (deprecated)
1143 case and quoting: \U \Q \E
1144 stops on @ and $
1145
1146 scan_const does *not* construct ops to handle interpolated strings.
1147 It stops processing as soon as it finds an embedded $ or @ variable
1148 and leaves it to the caller to work out what's going on.
1149
1150 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1151
1152 $ in pattern could be $foo or could be tail anchor. Assumption:
1153 it's a tail anchor if $ is the last thing in the string, or if it's
1154 followed by one of ")| \n\t"
1155
1156 \1 (backreferences) are turned into $1
1157
1158 The structure of the code is
1159 while (there's a character to process) {
1160 handle transliteration ranges
1161 skip regexp comments
1162 skip # initiated comments in //x patterns
1163 check for embedded @foo
1164 check for embedded scalars
1165 if (backslash) {
1166 leave intact backslashes from leave (below)
1167 deprecate \1 in strings and sub replacements
1168 handle string-changing backslashes \l \U \Q \E, etc.
1169 switch (what was escaped) {
1170 handle - in a transliteration (becomes a literal -)
1171 handle \132 octal characters
1172 handle 0x15 hex characters
1173 handle \cV (control V)
1174 handle printf backslashes (\f, \r, \n, etc)
1175 } (end switch)
1176 } (end if backslash)
1177 } (end while character to read)
1178
1179*/
1180
76e3520e 1181STATIC char *
cea2e8a9 1182S_scan_const(pTHX_ char *start)
79072805 1183{
3280af22 1184 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1185 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1186 register char *s = start; /* start of the constant */
1187 register char *d = SvPVX(sv); /* destination for copies */
1188 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1189 bool didrange = FALSE; /* did we just finish a range? */
7e2040f0 1190 bool has_utf = FALSE; /* embedded \x{} */
012bcf8d
GS
1191 UV uv;
1192
ac2262e3 1193 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1194 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1195 : UTF;
ac2262e3 1196 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1197 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1198 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1199 : UTF;
dff6d3cd 1200 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1201 PL_lex_inpat
4a2d328f 1202 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1203 : "";
79072805
LW
1204
1205 while (s < send || dorange) {
02aa26ce 1206 /* get transliterations out of the way (they're most literal) */
3280af22 1207 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1208 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1209 if (dorange) {
02aa26ce 1210 I32 i; /* current expanded character */
8ada0baa 1211 I32 min; /* first character in range */
02aa26ce
NT
1212 I32 max; /* last character in range */
1213
1214 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1215 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1216 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1217 d -= 2; /* eat the first char and the - */
1218
8ada0baa
JH
1219 min = (U8)*d; /* first char in range */
1220 max = (U8)d[1]; /* last char in range */
1221
c2e66d9e 1222 if (min > max) {
01ec43d0
GS
1223 Perl_croak(aTHX_
1224 "Invalid [] range \"%c-%c\" in transliteration operator",
d2560b70 1225 (char)min, (char)max);
c2e66d9e
GS
1226 }
1227
8ada0baa
JH
1228#ifndef ASCIIish
1229 if ((isLOWER(min) && isLOWER(max)) ||
1230 (isUPPER(min) && isUPPER(max))) {
1231 if (isLOWER(min)) {
1232 for (i = min; i <= max; i++)
1233 if (isLOWER(i))
1234 *d++ = i;
1235 } else {
1236 for (i = min; i <= max; i++)
1237 if (isUPPER(i))
1238 *d++ = i;
1239 }
1240 }
1241 else
1242#endif
1243 for (i = min; i <= max; i++)
1244 *d++ = i;
02aa26ce
NT
1245
1246 /* mark the range as done, and continue */
79072805 1247 dorange = FALSE;
01ec43d0 1248 didrange = TRUE;
79072805 1249 continue;
c2e66d9e 1250 }
02aa26ce
NT
1251
1252 /* range begins (ignore - as first or last char) */
79072805 1253 else if (*s == '-' && s+1 < send && s != start) {
01ec43d0 1254 if (didrange) {
1fafa243 1255 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1256 }
a0ed51b3 1257 if (utf) {
a176fa2a 1258 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1259 s++;
1260 continue;
1261 }
79072805
LW
1262 dorange = TRUE;
1263 s++;
01ec43d0
GS
1264 }
1265 else {
1266 didrange = FALSE;
1267 }
79072805 1268 }
02aa26ce
NT
1269
1270 /* if we get here, we're not doing a transliteration */
1271
0f5d15d6
IZ
1272 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1273 except for the last char, which will be done separately. */
3280af22 1274 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1275 if (s[2] == '#') {
1276 while (s < send && *s != ')')
1277 *d++ = *s++;
155aba94
GS
1278 }
1279 else if (s[2] == '{' /* This should match regcomp.c */
1280 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1281 {
cc6b7395 1282 I32 count = 1;
0f5d15d6 1283 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1284 char c;
1285
d9f97599
GS
1286 while (count && (c = *regparse)) {
1287 if (c == '\\' && regparse[1])
1288 regparse++;
cc6b7395
IZ
1289 else if (c == '{')
1290 count++;
1291 else if (c == '}')
1292 count--;
d9f97599 1293 regparse++;
cc6b7395 1294 }
5bdf89e7
IZ
1295 if (*regparse != ')') {
1296 regparse--; /* Leave one char for continuation. */
cc6b7395 1297 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1298 }
0f5d15d6 1299 while (s < regparse)
cc6b7395
IZ
1300 *d++ = *s++;
1301 }
748a9306 1302 }
02aa26ce
NT
1303
1304 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1305 else if (*s == '#' && PL_lex_inpat &&
1306 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1307 while (s+1 < send && *s != '\n')
1308 *d++ = *s++;
1309 }
02aa26ce 1310
5d1d4326
JH
1311 /* check for embedded arrays
1312 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1313 */
7e2040f0 1314 else if (*s == '@' && s[1]
5d1d4326 1315 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1316 break;
02aa26ce
NT
1317
1318 /* check for embedded scalars. only stop if we're sure it's a
1319 variable.
1320 */
79072805 1321 else if (*s == '$') {
3280af22 1322 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1323 break;
c277df42 1324 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1325 break; /* in regexp, $ might be tail anchor */
1326 }
02aa26ce 1327
a0ed51b3
LW
1328 /* (now in tr/// code again) */
1329
d008e5eb 1330 if (*s & 0x80 && thisutf) {
ba210ebe
JH
1331 STRLEN len;
1332 UV uv;
1333
dcad2880 1334 uv = utf8_to_uv((U8*)s, send - s, &len, UTF8_CHECK_ONLY);
ba210ebe 1335 if (len == 1) {
fcc8fcf6 1336 /* Illegal UTF8 (a high-bit byte), make it valid. */
ba210ebe
JH
1337 char *old_pvx = SvPVX(sv);
1338 /* need space for one extra char (NOTE: SvCUR() not set here) */
1339 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
1340 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1341 }
1342 else {
1343 while (len--)
1344 *d++ = *s++;
1345 }
1346 has_utf = TRUE;
1347 continue;
a0ed51b3
LW
1348 }
1349
02aa26ce 1350 /* backslashes */
79072805
LW
1351 if (*s == '\\' && s+1 < send) {
1352 s++;
02aa26ce
NT
1353
1354 /* some backslashes we leave behind */
c9f97d15 1355 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1356 *d++ = '\\';
1357 *d++ = *s++;
1358 continue;
1359 }
02aa26ce
NT
1360
1361 /* deprecate \1 in strings and substitution replacements */
3280af22 1362 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1363 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1364 {
d008e5eb 1365 dTHR; /* only for ckWARN */
599cee73 1366 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1367 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1368 *--s = '$';
1369 break;
1370 }
02aa26ce
NT
1371
1372 /* string-change backslash escapes */
3280af22 1373 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1374 --s;
1375 break;
1376 }
02aa26ce
NT
1377
1378 /* if we get here, it's either a quoted -, or a digit */
79072805 1379 switch (*s) {
02aa26ce
NT
1380
1381 /* quoted - in transliterations */
79072805 1382 case '-':
3280af22 1383 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1384 *d++ = *s++;
1385 continue;
1386 }
1387 /* FALL THROUGH */
1388 default:
11b8faa4
JH
1389 {
1390 dTHR;
7e84c16c 1391 if (ckWARN(WARN_MISC) && isALNUM(*s))
e476b1b5 1392 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1393 "Unrecognized escape \\%c passed through",
1394 *s);
1395 /* default action is to copy the quoted character */
1396 *d++ = *s++;
1397 continue;
1398 }
02aa26ce
NT
1399
1400 /* \132 indicates an octal constant */
79072805
LW
1401 case '0': case '1': case '2': case '3':
1402 case '4': case '5': case '6': case '7':
ba210ebe
JH
1403 {
1404 STRLEN len = 0; /* disallow underscores */
1405 uv = (UV)scan_oct(s, 3, &len);
1406 s += len;
1407 }
012bcf8d 1408 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1409
1410 /* \x24 indicates a hex constant */
79072805 1411 case 'x':
a0ed51b3
LW
1412 ++s;
1413 if (*s == '{') {
1414 char* e = strchr(s, '}');
adaeee49 1415 if (!e) {
a0ed51b3 1416 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1417 e = s;
1418 }
ba210ebe
JH
1419 {
1420 STRLEN len = 1; /* allow underscores */
1421 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1422 }
1423 s = e + 1;
a0ed51b3
LW
1424 }
1425 else {
ba210ebe
JH
1426 {
1427 STRLEN len = 0; /* disallow underscores */
1428 uv = (UV)scan_hex(s, 2, &len);
1429 s += len;
1430 }
012bcf8d
GS
1431 }
1432
1433 NUM_ESCAPE_INSERT:
1434 /* Insert oct or hex escaped character.
1435 * There will always enough room in sv since such escapes will
1436 * be longer than any utf8 sequence they can end up as
1437 */
1438 if (uv > 127) {
1439 if (!thisutf && !has_utf && uv > 255) {
1440 /* might need to recode whatever we have accumulated so far
1441 * if it contains any hibit chars
1442 */
1443 int hicount = 0;
1444 char *c;
1445 for (c = SvPVX(sv); c < d; c++) {
1446 if (*c & 0x80)
1447 hicount++;
1448 }
1449 if (hicount) {
1450 char *old_pvx = SvPVX(sv);
1451 char *src, *dst;
1452 d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
1453
1454 src = d - 1;
1455 d += hicount;
1456 dst = d - 1;
1457
1458 while (src < dst) {
1459 if (*src & 0x80) {
1460 dst--;
1461 uv_to_utf8((U8*)dst, (U8)*src--);
1462 dst--;
1463 }
1464 else {
1465 *dst-- = *src--;
1466 }
1467 }
1468 }
1469 }
1470
1471 if (thisutf || uv > 255) {
1472 d = (char*)uv_to_utf8((U8*)d, uv);
7e2040f0 1473 has_utf = TRUE;
012bcf8d 1474 }
a0ed51b3 1475 else {
012bcf8d 1476 *d++ = (char)uv;
a0ed51b3 1477 }
012bcf8d
GS
1478 }
1479 else {
1480 *d++ = (char)uv;
a0ed51b3 1481 }
79072805 1482 continue;
02aa26ce 1483
4a2d328f
IZ
1484 /* \N{latin small letter a} is a named character */
1485 case 'N':
423cee85
JH
1486 ++s;
1487 if (*s == '{') {
1488 char* e = strchr(s, '}');
155aba94 1489 SV *res;
423cee85
JH
1490 STRLEN len;
1491 char *str;
423cee85
JH
1492
1493 if (!e) {
5777a3f7 1494 yyerror("Missing right brace on \\N{}");
423cee85
JH
1495 e = s - 1;
1496 goto cont_scan;
1497 }
1498 res = newSVpvn(s + 1, e - s - 1);
1499 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1500 res, Nullsv, "\\N{...}" );
423cee85 1501 str = SvPV(res,len);
f08d6ad9
GS
1502 if (!has_utf && SvUTF8(res)) {
1503 char *ostart = SvPVX(sv);
1504 SvCUR_set(sv, d - ostart);
1505 SvPOK_on(sv);
e4f3eed8 1506 *d = '\0';
f08d6ad9 1507 sv_utf8_upgrade(sv);
d2f449dd
SB
1508 /* this just broke our allocation above... */
1509 SvGROW(sv, send - start);
f08d6ad9 1510 d = SvPVX(sv) + SvCUR(sv);
e1992b6d 1511 has_utf = TRUE;
f08d6ad9 1512 }
423cee85
JH
1513 if (len > e - s + 4) {
1514 char *odest = SvPVX(sv);
1515
1516 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1517 d = SvPVX(sv) + (d - odest);
1518 }
1519 Copy(str, d, len, char);
1520 d += len;
1521 SvREFCNT_dec(res);
1522 cont_scan:
1523 s = e + 1;
1524 }
1525 else
5777a3f7 1526 yyerror("Missing braces on \\N{}");
423cee85
JH
1527 continue;
1528
02aa26ce 1529 /* \c is a control character */
79072805
LW
1530 case 'c':
1531 s++;
9d116dd7
JH
1532#ifdef EBCDIC
1533 *d = *s++;
1534 if (isLOWER(*d))
1535 *d = toUPPER(*d);
774a9426
GS
1536 *d = toCTRL(*d);
1537 d++;
9d116dd7 1538#else
ba210ebe
JH
1539 {
1540 U8 c = *s++;
1541 *d++ = toCTRL(c);
1542 }
9d116dd7 1543#endif
79072805 1544 continue;
02aa26ce
NT
1545
1546 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1547 case 'b':
1548 *d++ = '\b';
1549 break;
1550 case 'n':
1551 *d++ = '\n';
1552 break;
1553 case 'r':
1554 *d++ = '\r';
1555 break;
1556 case 'f':
1557 *d++ = '\f';
1558 break;
1559 case 't':
1560 *d++ = '\t';
1561 break;
34a3fe2a
PP
1562#ifdef EBCDIC
1563 case 'e':
1564 *d++ = '\047'; /* CP 1047 */
1565 break;
1566 case 'a':
1567 *d++ = '\057'; /* CP 1047 */
1568 break;
1569#else
79072805
LW
1570 case 'e':
1571 *d++ = '\033';
1572 break;
1573 case 'a':
1574 *d++ = '\007';
1575 break;
34a3fe2a 1576#endif
02aa26ce
NT
1577 } /* end switch */
1578
79072805
LW
1579 s++;
1580 continue;
02aa26ce
NT
1581 } /* end if (backslash) */
1582
79072805 1583 *d++ = *s++;
02aa26ce
NT
1584 } /* while loop to process each character */
1585
1586 /* terminate the string and set up the sv */
79072805 1587 *d = '\0';
463ee0b2 1588 SvCUR_set(sv, d - SvPVX(sv));
79072805 1589 SvPOK_on(sv);
7e2040f0
GS
1590 if (has_utf)
1591 SvUTF8_on(sv);
79072805 1592
02aa26ce 1593 /* shrink the sv if we allocated more than we used */
79072805
LW
1594 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1595 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1596 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1597 }
02aa26ce 1598
9b599b2a 1599 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1600 if (s > PL_bufptr) {
1601 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1602 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1603 sv, Nullsv,
3280af22 1604 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1605 ? "tr"
3280af22 1606 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1607 ? "s"
1608 : "qq")));
79072805 1609 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1610 } else
8990e307 1611 SvREFCNT_dec(sv);
79072805
LW
1612 return s;
1613}
1614
ffb4593c
NT
1615/* S_intuit_more
1616 * Returns TRUE if there's more to the expression (e.g., a subscript),
1617 * FALSE otherwise.
ffb4593c
NT
1618 *
1619 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1620 *
1621 * ->[ and ->{ return TRUE
1622 * { and [ outside a pattern are always subscripts, so return TRUE
1623 * if we're outside a pattern and it's not { or [, then return FALSE
1624 * if we're in a pattern and the first char is a {
1625 * {4,5} (any digits around the comma) returns FALSE
1626 * if we're in a pattern and the first char is a [
1627 * [] returns FALSE
1628 * [SOMETHING] has a funky algorithm to decide whether it's a
1629 * character class or not. It has to deal with things like
1630 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1631 * anything else returns TRUE
1632 */
1633
9cbb5ea2
GS
1634/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1635
76e3520e 1636STATIC int
cea2e8a9 1637S_intuit_more(pTHX_ register char *s)
79072805 1638{
3280af22 1639 if (PL_lex_brackets)
79072805
LW
1640 return TRUE;
1641 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1642 return TRUE;
1643 if (*s != '{' && *s != '[')
1644 return FALSE;
3280af22 1645 if (!PL_lex_inpat)
79072805
LW
1646 return TRUE;
1647
1648 /* In a pattern, so maybe we have {n,m}. */
1649 if (*s == '{') {
1650 s++;
1651 if (!isDIGIT(*s))
1652 return TRUE;
1653 while (isDIGIT(*s))
1654 s++;
1655 if (*s == ',')
1656 s++;
1657 while (isDIGIT(*s))
1658 s++;
1659 if (*s == '}')
1660 return FALSE;
1661 return TRUE;
1662
1663 }
1664
1665 /* On the other hand, maybe we have a character class */
1666
1667 s++;
1668 if (*s == ']' || *s == '^')
1669 return FALSE;
1670 else {
ffb4593c 1671 /* this is terrifying, and it works */
79072805
LW
1672 int weight = 2; /* let's weigh the evidence */
1673 char seen[256];
f27ffc4a 1674 unsigned char un_char = 255, last_un_char;
93a17b20 1675 char *send = strchr(s,']');
3280af22 1676 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1677
1678 if (!send) /* has to be an expression */
1679 return TRUE;
1680
1681 Zero(seen,256,char);
1682 if (*s == '$')
1683 weight -= 3;
1684 else if (isDIGIT(*s)) {
1685 if (s[1] != ']') {
1686 if (isDIGIT(s[1]) && s[2] == ']')
1687 weight -= 10;
1688 }
1689 else
1690 weight -= 100;
1691 }
1692 for (; s < send; s++) {
1693 last_un_char = un_char;
1694 un_char = (unsigned char)*s;
1695 switch (*s) {
1696 case '@':
1697 case '&':
1698 case '$':
1699 weight -= seen[un_char] * 10;
7e2040f0 1700 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1701 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1702 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1703 weight -= 100;
1704 else
1705 weight -= 10;
1706 }
1707 else if (*s == '$' && s[1] &&
93a17b20
LW
1708 strchr("[#!%*<>()-=",s[1])) {
1709 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1710 weight -= 10;
1711 else
1712 weight -= 1;
1713 }
1714 break;
1715 case '\\':
1716 un_char = 254;
1717 if (s[1]) {
93a17b20 1718 if (strchr("wds]",s[1]))
79072805
LW
1719 weight += 100;
1720 else if (seen['\''] || seen['"'])
1721 weight += 1;
93a17b20 1722 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1723 weight += 40;
1724 else if (isDIGIT(s[1])) {
1725 weight += 40;
1726 while (s[1] && isDIGIT(s[1]))
1727 s++;
1728 }
1729 }
1730 else
1731 weight += 100;
1732 break;
1733 case '-':
1734 if (s[1] == '\\')
1735 weight += 50;
93a17b20 1736 if (strchr("aA01! ",last_un_char))
79072805 1737 weight += 30;
93a17b20 1738 if (strchr("zZ79~",s[1]))
79072805 1739 weight += 30;
f27ffc4a
GS
1740 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1741 weight -= 5; /* cope with negative subscript */
79072805
LW
1742 break;
1743 default:
93a17b20 1744 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1745 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1746 char *d = tmpbuf;
1747 while (isALPHA(*s))
1748 *d++ = *s++;
1749 *d = '\0';
1750 if (keyword(tmpbuf, d - tmpbuf))
1751 weight -= 150;
1752 }
1753 if (un_char == last_un_char + 1)
1754 weight += 5;
1755 weight -= seen[un_char];
1756 break;
1757 }
1758 seen[un_char]++;
1759 }
1760 if (weight >= 0) /* probably a character class */
1761 return FALSE;
1762 }
1763
1764 return TRUE;
1765}
ffed7fef 1766
ffb4593c
NT
1767/*
1768 * S_intuit_method
1769 *
1770 * Does all the checking to disambiguate
1771 * foo bar
1772 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1773 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1774 *
1775 * First argument is the stuff after the first token, e.g. "bar".
1776 *
1777 * Not a method if bar is a filehandle.
1778 * Not a method if foo is a subroutine prototyped to take a filehandle.
1779 * Not a method if it's really "Foo $bar"
1780 * Method if it's "foo $bar"
1781 * Not a method if it's really "print foo $bar"
1782 * Method if it's really "foo package::" (interpreted as package->foo)
1783 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1784 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1785 * =>
1786 */
1787
76e3520e 1788STATIC int
cea2e8a9 1789S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1790{
1791 char *s = start + (*start == '$');
3280af22 1792 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1793 STRLEN len;
1794 GV* indirgv;
1795
1796 if (gv) {
b6c543e3 1797 CV *cv;
a0d0e21e
LW
1798 if (GvIO(gv))
1799 return 0;
b6c543e3
IZ
1800 if ((cv = GvCVu(gv))) {
1801 char *proto = SvPVX(cv);
1802 if (proto) {
1803 if (*proto == ';')
1804 proto++;
1805 if (*proto == '*')
1806 return 0;
1807 }
1808 } else
a0d0e21e
LW
1809 gv = 0;
1810 }
8903cb82 1811 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1812 /* start is the beginning of the possible filehandle/object,
1813 * and s is the end of it
1814 * tmpbuf is a copy of it
1815 */
1816
a0d0e21e 1817 if (*start == '$') {
3280af22 1818 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1819 return 0;
1820 s = skipspace(s);
3280af22
NIS
1821 PL_bufptr = start;
1822 PL_expect = XREF;
a0d0e21e
LW
1823 return *s == '(' ? FUNCMETH : METHOD;
1824 }
1825 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1826 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1827 len -= 2;
1828 tmpbuf[len] = '\0';
1829 goto bare_package;
1830 }
1831 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1832 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1833 return 0;
1834 /* filehandle or package name makes it a method */
89bfa8cd 1835 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1836 s = skipspace(s);
3280af22 1837 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1838 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1839 bare_package:
3280af22 1840 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1841 newSVpvn(tmpbuf,len));
3280af22
NIS
1842 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1843 PL_expect = XTERM;
a0d0e21e 1844 force_next(WORD);
3280af22 1845 PL_bufptr = s;
a0d0e21e
LW
1846 return *s == '(' ? FUNCMETH : METHOD;
1847 }
1848 }
1849 return 0;
1850}
1851
ffb4593c
NT
1852/*
1853 * S_incl_perldb
1854 * Return a string of Perl code to load the debugger. If PERL5DB
1855 * is set, it will return the contents of that, otherwise a
1856 * compile-time require of perl5db.pl.
1857 */
1858
76e3520e 1859STATIC char*
cea2e8a9 1860S_incl_perldb(pTHX)
a0d0e21e 1861{
3280af22 1862 if (PL_perldb) {
76e3520e 1863 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1864
1865 if (pdb)
1866 return pdb;
61bb5906 1867 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1868 return "BEGIN { require 'perl5db.pl' }";
1869 }
1870 return "";
1871}
1872
1873
16d20bd9
AD
1874/* Encoded script support. filter_add() effectively inserts a
1875 * 'pre-processing' function into the current source input stream.
1876 * Note that the filter function only applies to the current source file
1877 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1878 *
1879 * The datasv parameter (which may be NULL) can be used to pass
1880 * private data to this instance of the filter. The filter function
1881 * can recover the SV using the FILTER_DATA macro and use it to
1882 * store private buffers and state information.
1883 *
1884 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1885 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1886 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1887 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1888 * private use must be set using malloc'd pointers.
1889 */
16d20bd9
AD
1890
1891SV *
864dbfa3 1892Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1893{
f4c556ac
GS
1894 if (!funcp)
1895 return Nullsv;
1896
3280af22
NIS
1897 if (!PL_rsfp_filters)
1898 PL_rsfp_filters = newAV();
16d20bd9 1899 if (!datasv)
8c52afec 1900 datasv = NEWSV(255,0);
16d20bd9 1901 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1902 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1903 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1904 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1905 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1906 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1907 av_unshift(PL_rsfp_filters, 1);
1908 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1909 return(datasv);
1910}
1911
1912
1913/* Delete most recently added instance of this filter function. */
a0d0e21e 1914void
864dbfa3 1915Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1916{
e0c19803 1917 SV *datasv;
f4c556ac 1918 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1919 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1920 return;
1921 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1922 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1923 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1924 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1925 IoANY(datasv) = (void *)NULL;
3280af22 1926 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1927
16d20bd9
AD
1928 return;
1929 }
1930 /* we need to search for the correct entry and clear it */
cea2e8a9 1931 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1932}
1933
1934
1935/* Invoke the n'th filter function for the current rsfp. */
1936I32
864dbfa3 1937Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1938
1939
1940 /* 0 = read one text line */
a0d0e21e 1941{
16d20bd9
AD
1942 filter_t funcp;
1943 SV *datasv = NULL;
e50aee73 1944
3280af22 1945 if (!PL_rsfp_filters)
16d20bd9 1946 return -1;
3280af22 1947 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1948 /* Provide a default input filter to make life easy. */
1949 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1950 DEBUG_P(PerlIO_printf(Perl_debug_log,
1951 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1952 if (maxlen) {
1953 /* Want a block */
1954 int len ;
1955 int old_len = SvCUR(buf_sv) ;
1956
1957 /* ensure buf_sv is large enough */
1958 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1959 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1960 if (PerlIO_error(PL_rsfp))
37120919
AD
1961 return -1; /* error */
1962 else
1963 return 0 ; /* end of file */
1964 }
16d20bd9
AD
1965 SvCUR_set(buf_sv, old_len + len) ;
1966 } else {
1967 /* Want a line */
3280af22
NIS
1968 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1969 if (PerlIO_error(PL_rsfp))
37120919
AD
1970 return -1; /* error */
1971 else
1972 return 0 ; /* end of file */
1973 }
16d20bd9
AD
1974 }
1975 return SvCUR(buf_sv);
1976 }
1977 /* Skip this filter slot if filter has been deleted */
3280af22 1978 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1979 DEBUG_P(PerlIO_printf(Perl_debug_log,
1980 "filter_read %d: skipped (filter deleted)\n",
1981 idx));
16d20bd9
AD
1982 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1983 }
1984 /* Get function pointer hidden within datasv */
4755096e 1985 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
1986 DEBUG_P(PerlIO_printf(Perl_debug_log,
1987 "filter_read %d: via function %p (%s)\n",
1988 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1989 /* Call function. The function is expected to */
1990 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1991 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1992 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1993}
1994
76e3520e 1995STATIC char *
cea2e8a9 1996S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1997{
c39cd008 1998#ifdef PERL_CR_FILTER
3280af22 1999 if (!PL_rsfp_filters) {
c39cd008 2000 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2001 }
2002#endif
3280af22 2003 if (PL_rsfp_filters) {
16d20bd9 2004
55497cff
PP
2005 if (!append)
2006 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2007 if (FILTER_READ(0, sv, 0) > 0)
2008 return ( SvPVX(sv) ) ;
2009 else
2010 return Nullch ;
2011 }
9d116dd7 2012 else
fd049845 2013 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2014}
2015
01ec43d0
GS
2016STATIC HV *
2017S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2018{
2019 GV *gv;
2020
01ec43d0 2021 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2022 return PL_curstash;
2023
2024 if (len > 2 &&
2025 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2026 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2027 {
2028 return GvHV(gv); /* Foo:: */
def3634b
GS
2029 }
2030
2031 /* use constant CLASS => 'MyClass' */
2032 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2033 SV *sv;
2034 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2035 pkgname = SvPV_nolen(sv);
2036 }
2037 }
2038
2039 return gv_stashpv(pkgname, FALSE);
2040}
a0d0e21e 2041
748a9306
LW
2042#ifdef DEBUGGING
2043 static char* exp_name[] =
09bef843
SB
2044 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2045 "ATTRTERM", "TERMBLOCK"
2046 };
748a9306 2047#endif
463ee0b2 2048
02aa26ce
NT
2049/*
2050 yylex
2051
2052 Works out what to call the token just pulled out of the input
2053 stream. The yacc parser takes care of taking the ops we return and
2054 stitching them into a tree.
2055
2056 Returns:
2057 PRIVATEREF
2058
2059 Structure:
2060 if read an identifier
2061 if we're in a my declaration
2062 croak if they tried to say my($foo::bar)
2063 build the ops for a my() declaration
2064 if it's an access to a my() variable
2065 are we in a sort block?
2066 croak if my($a); $a <=> $b
2067 build ops for access to a my() variable
2068 if in a dq string, and they've said @foo and we can't find @foo
2069 croak
2070 build ops for a bareword
2071 if we already built the token before, use it.
2072*/
2073
dba4d153 2074#ifdef USE_PURE_BISON
bf4acbe4 2075#ifdef __SC__
dba4d153 2076#pragma segment Perl_yylex_r
bf4acbe4 2077#endif
864dbfa3 2078int
dba4d153 2079Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2080{
b73d6f50 2081 dTHR;
20141f0e
RI
2082 int r;
2083
20141f0e
RI
2084 yylval_pointer[yyactlevel] = lvalp;
2085 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2086 yyactlevel++;
2087 if (yyactlevel >= YYMAXLEVEL)
2088 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2089
dba4d153 2090 r = Perl_yylex(aTHX);
20141f0e 2091
20141f0e 2092 yyactlevel--;
20141f0e
RI
2093
2094 return r;
2095}
dba4d153 2096#endif
20141f0e 2097
dba4d153
JH
2098#ifdef __SC__
2099#pragma segment Perl_yylex
2100#endif
2101
2102int
2103#ifdef USE_PURE_BISON
2104Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
2105#else
2106Perl_yylex(pTHX)
2107#endif
20141f0e 2108{
11343788 2109 dTHR;
79072805 2110 register char *s;
378cc40b 2111 register char *d;
79072805 2112 register I32 tmp;
463ee0b2 2113 STRLEN len;
161b471a
NIS
2114 GV *gv = Nullgv;
2115 GV **gvp = 0;
a687059c 2116
02aa26ce 2117 /* check if there's an identifier for us to look at */
3280af22 2118 if (PL_pending_ident) {
02aa26ce 2119 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2120 char pit = PL_pending_ident;
2121 PL_pending_ident = 0;
bbce6d69 2122
02aa26ce
NT
2123 /* if we're in a my(), we can't allow dynamics here.
2124 $foo'bar has already been turned into $foo::bar, so
2125 just check for colons.
2126
2127 if it's a legal name, the OP is a PADANY.
2128 */
3280af22 2129 if (PL_in_my) {
77ca0c92 2130 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2131 if (strchr(PL_tokenbuf,':'))
2132 yyerror(Perl_form(aTHX_ "No package name allowed for "
2133 "variable %s in \"our\"",
2134 PL_tokenbuf));
77ca0c92
LW
2135 tmp = pad_allocmy(PL_tokenbuf);
2136 }
2137 else {
2138 if (strchr(PL_tokenbuf,':'))
2139 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2140
77ca0c92
LW
2141 yylval.opval = newOP(OP_PADANY, 0);
2142 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2143 return PRIVATEREF;
2144 }
bbce6d69
PP
2145 }
2146
02aa26ce
NT
2147 /*
2148 build the ops for accesses to a my() variable.
2149
2150 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2151 then used in a comparison. This catches most, but not
2152 all cases. For instance, it catches
2153 sort { my($a); $a <=> $b }
2154 but not
2155 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2156 (although why you'd do that is anyone's guess).
2157 */
2158
3280af22 2159 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2160#ifdef USE_THREADS
54b9620d 2161 /* Check for single character per-thread SVs */
3280af22
NIS
2162 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2163 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2164 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2165 {
2faa37cc 2166 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2167 yylval.opval->op_targ = tmp;
2168 return PRIVATEREF;
2169 }
2170#endif /* USE_THREADS */
3280af22 2171 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2172 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2173 /* might be an "our" variable" */
f472eb5c 2174 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2175 /* build ops for a bareword */
f472eb5c
GS
2176 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2177 sv_catpvn(sym, "::", 2);
2178 sv_catpv(sym, PL_tokenbuf+1);
2179 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2180 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2181 gv_fetchpv(SvPVX(sym),
77ca0c92 2182 (PL_in_eval
f472eb5c
GS
2183 ? (GV_ADDMULTI | GV_ADDINEVAL)
2184 : TRUE
77ca0c92
LW
2185 ),
2186 ((PL_tokenbuf[0] == '$') ? SVt_PV
2187 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2188 : SVt_PVHV));
2189 return WORD;
2190 }
2191
02aa26ce 2192 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2193 if (PL_last_lop_op == OP_SORT &&
2194 PL_tokenbuf[0] == '$' &&
2195 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2196 && !PL_tokenbuf[2])
bbce6d69 2197 {
3280af22
NIS
2198 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2199 d < PL_bufend && *d != '\n';
a863c7d1
MB
2200 d++)
2201 {
2202 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2203 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2204 PL_tokenbuf);
a863c7d1 2205 }
bbce6d69
PP
2206 }
2207 }
bbce6d69 2208
a863c7d1
MB
2209 yylval.opval = newOP(OP_PADANY, 0);
2210 yylval.opval->op_targ = tmp;
2211 return PRIVATEREF;
2212 }
bbce6d69
PP
2213 }
2214
02aa26ce
NT
2215 /*
2216 Whine if they've said @foo in a doublequoted string,
2217 and @foo isn't a variable we can find in the symbol
2218 table.
2219 */
3280af22
NIS
2220 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2221 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2222 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2223 && ckWARN(WARN_AMBIGUOUS))
2224 {
2225 /* Downgraded from fatal to warning 20000522 mjd */
2226 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2227 "Possible unintended interpolation of %s in string",
2228 PL_tokenbuf);
2229 }
bbce6d69
PP
2230 }
2231
02aa26ce 2232 /* build ops for a bareword */
3280af22 2233 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2234 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2235 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2236 ((PL_tokenbuf[0] == '$') ? SVt_PV
2237 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2238 : SVt_PVHV));
2239 return WORD;
2240 }
2241
02aa26ce
NT
2242 /* no identifier pending identification */
2243
3280af22 2244 switch (PL_lex_state) {
79072805
LW
2245#ifdef COMMENTARY
2246 case LEX_NORMAL: /* Some compilers will produce faster */
2247 case LEX_INTERPNORMAL: /* code if we comment these out. */
2248 break;
2249#endif
2250
09bef843 2251 /* when we've already built the next token, just pull it out of the queue */
79072805 2252 case LEX_KNOWNEXT:
3280af22
NIS
2253 PL_nexttoke--;
2254 yylval = PL_nextval[PL_nexttoke];
2255 if (!PL_nexttoke) {
2256 PL_lex_state = PL_lex_defer;
2257 PL_expect = PL_lex_expect;
2258 PL_lex_defer = LEX_NORMAL;
463ee0b2 2259 }
3280af22 2260 return(PL_nexttype[PL_nexttoke]);
79072805 2261
02aa26ce 2262 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2263 when we get here, PL_bufptr is at the \
02aa26ce 2264 */
79072805
LW
2265 case LEX_INTERPCASEMOD:
2266#ifdef DEBUGGING
3280af22 2267 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2268 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2269#endif
02aa26ce 2270 /* handle \E or end of string */
3280af22 2271 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2272 char oldmod;
02aa26ce
NT
2273
2274 /* if at a \E */
3280af22
NIS
2275 if (PL_lex_casemods) {
2276 oldmod = PL_lex_casestack[--PL_lex_casemods];
2277 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2278
3280af22
NIS
2279 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2280 PL_bufptr += 2;
2281 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2282 }
79072805
LW
2283 return ')';
2284 }
3280af22
NIS
2285 if (PL_bufptr != PL_bufend)
2286 PL_bufptr += 2;
2287 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2288 return yylex();
79072805
LW
2289 }
2290 else {
3280af22 2291 s = PL_bufptr + 1;
79072805
LW
2292 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2293 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2294 if (strchr("LU", *s) &&
3280af22 2295 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2296 {
3280af22 2297 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2298 return ')';
2299 }
3280af22
NIS
2300 if (PL_lex_casemods > 10) {
2301 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2302 if (newlb != PL_lex_casestack) {
a0d0e21e 2303 SAVEFREEPV(newlb);
3280af22 2304 PL_lex_casestack = newlb;
a0d0e21e
LW
2305 }
2306 }
3280af22
NIS
2307 PL_lex_casestack[PL_lex_casemods++] = *s;
2308 PL_lex_casestack[PL_lex_casemods] = '\0';
2309 PL_lex_state = LEX_INTERPCONCAT;
2310 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2311 force_next('(');
2312 if (*s == 'l')
3280af22 2313 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2314 else if (*s == 'u')
3280af22 2315 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2316 else if (*s == 'L')
3280af22 2317 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2318 else if (*s == 'U')
3280af22 2319 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2320 else if (*s == 'Q')
3280af22 2321 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2322 else
cea2e8a9 2323 Perl_croak(aTHX_ "panic: yylex");
3280af22 2324 PL_bufptr = s + 1;
79072805 2325 force_next(FUNC);
3280af22
NIS
2326 if (PL_lex_starts) {
2327 s = PL_bufptr;
2328 PL_lex_starts = 0;
79072805
LW
2329 Aop(OP_CONCAT);
2330 }
2331 else
cea2e8a9 2332 return yylex();
79072805
LW
2333 }
2334
55497cff
PP
2335 case LEX_INTERPPUSH:
2336 return sublex_push();
2337
79072805 2338 case LEX_INTERPSTART:
3280af22 2339 if (PL_bufptr == PL_bufend)
79072805 2340 return sublex_done();
3280af22
NIS
2341 PL_expect = XTERM;
2342 PL_lex_dojoin = (*PL_bufptr == '@');
2343 PL_lex_state = LEX_INTERPNORMAL;
2344 if (PL_lex_dojoin) {
2345 PL_nextval[PL_nexttoke].ival = 0;
79072805 2346 force_next(',');
554b3eca 2347#ifdef USE_THREADS
533c011a
NIS
2348 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2349 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2350 force_next(PRIVATEREF);
2351#else
a0d0e21e 2352 force_ident("\"", '$');
554b3eca 2353#endif /* USE_THREADS */
3280af22 2354 PL_nextval[PL_nexttoke].ival = 0;
79072805 2355 force_next('$');
3280af22 2356 PL_nextval[PL_nexttoke].ival = 0;
79072805 2357 force_next('(');
3280af22 2358 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2359 force_next(FUNC);
2360 }
3280af22
NIS
2361 if (PL_lex_starts++) {
2362 s = PL_bufptr;
79072805
LW
2363 Aop(OP_CONCAT);
2364 }
cea2e8a9 2365 return yylex();
79072805
LW
2366
2367 case LEX_INTERPENDMAYBE:
3280af22
NIS
2368 if (intuit_more(PL_bufptr)) {
2369 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2370 break;
2371 }
2372 /* FALL THROUGH */
2373
2374 case LEX_INTERPEND:
3280af22
NIS
2375 if (PL_lex_dojoin) {
2376 PL_lex_dojoin = FALSE;
2377 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2378 return ')';
2379 }
43a16006 2380 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2381 && SvEVALED(PL_lex_repl))
43a16006 2382 {
e9fa98b2 2383 if (PL_bufptr != PL_bufend)
cea2e8a9 2384 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2385 PL_lex_repl = Nullsv;
2386 }
79072805
LW
2387 /* FALLTHROUGH */
2388 case LEX_INTERPCONCAT:
2389#ifdef DEBUGGING
3280af22 2390 if (PL_lex_brackets)
cea2e8a9 2391 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2392#endif
3280af22 2393 if (PL_bufptr == PL_bufend)
79072805
LW
2394 return sublex_done();
2395
3280af22
NIS
2396 if (SvIVX(PL_linestr) == '\'') {
2397 SV *sv = newSVsv(PL_linestr);
2398 if (!PL_lex_inpat)
76e3520e 2399 sv = tokeq(sv);
3280af22 2400 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2401 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2402 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2403 s = PL_bufend;
79072805
LW
2404 }
2405 else {
3280af22 2406 s = scan_const(PL_bufptr);
79072805 2407 if (*s == '\\')
3280af22 2408 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2409 else
3280af22 2410 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2411 }
2412
3280af22
NIS
2413 if (s != PL_bufptr) {
2414 PL_nextval[PL_nexttoke] = yylval;
2415 PL_expect = XTERM;
79072805 2416 force_next(THING);
3280af22 2417 if (PL_lex_starts++)
79072805
LW
2418 Aop(OP_CONCAT);
2419 else {
3280af22 2420 PL_bufptr = s;
cea2e8a9 2421 return yylex();
79072805
LW
2422 }
2423 }
2424
cea2e8a9 2425 return yylex();
a0d0e21e 2426 case LEX_FORMLINE:
3280af22
NIS
2427 PL_lex_state = LEX_NORMAL;
2428 s = scan_formline(PL_bufptr);
2429 if (!PL_lex_formbrack)
a0d0e21e
LW
2430 goto rightbracket;
2431 OPERATOR(';');
79072805
LW
2432 }
2433
3280af22
NIS
2434 s = PL_bufptr;
2435 PL_oldoldbufptr = PL_oldbufptr;
2436 PL_oldbufptr = s;
79072805 2437 DEBUG_p( {
bf49b057
GS
2438 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2439 exp_name[PL_expect], s);
79072805 2440 } )
463ee0b2
LW
2441
2442 retry:
378cc40b
LW
2443 switch (*s) {
2444 default:
7e2040f0 2445 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2446 goto keylookup;
cea2e8a9 2447 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2448 case 4:
2449 case 26:
2450 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2451 case 0:
3280af22
NIS
2452 if (!PL_rsfp) {
2453 PL_last_uni = 0;
2454 PL_last_lop = 0;
2455 if (PL_lex_brackets)
d98d5fff 2456 yyerror("Missing right curly or square bracket");
79072805 2457 TOKEN(0);
463ee0b2 2458 }
3280af22 2459 if (s++ < PL_bufend)
a687059c 2460 goto retry; /* ignore stray nulls */
3280af22
NIS
2461 PL_last_uni = 0;
2462 PL_last_lop = 0;
2463 if (!PL_in_eval && !PL_preambled) {
2464 PL_preambled = TRUE;
2465 sv_setpv(PL_linestr,incl_perldb());
2466 if (SvCUR(PL_linestr))
2467 sv_catpv(PL_linestr,";");
2468 if (PL_preambleav){
2469 while(AvFILLp(PL_preambleav) >= 0) {
2470 SV *tmpsv = av_shift(PL_preambleav);
2471 sv_catsv(PL_linestr, tmpsv);
2472 sv_catpv(PL_linestr, ";");
91b7def8
PP
2473 sv_free(tmpsv);
2474 }
3280af22
NIS
2475 sv_free((SV*)PL_preambleav);
2476 PL_preambleav = NULL;
91b7def8 2477 }
3280af22
NIS
2478 if (PL_minus_n || PL_minus_p) {
2479 sv_catpv(PL_linestr, "LINE: while (<>) {");
2480 if (PL_minus_l)
2481 sv_catpv(PL_linestr,"chomp;");
2482 if (PL_minus_a) {
8fd239a7
CS
2483 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2484 if (gv)
2485 GvIMPORTED_AV_on(gv);
3280af22
NIS
2486 if (PL_minus_F) {
2487 if (strchr("/'\"", *PL_splitstr)
2488 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2489 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2490 else {
2491 char delim;
2492 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2493 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2494 delim = *s;
cea2e8a9 2495 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2496 "q" + (delim == '\''), delim);
3280af22 2497 for (s = PL_splitstr; *s; s++) {
54310121 2498 if (*s == '\\')
3280af22
NIS
2499 sv_catpvn(PL_linestr, "\\", 1);
2500 sv_catpvn(PL_linestr, s, 1);
54310121 2501 }
cea2e8a9 2502 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2503 }
2304df62
AD
2504 }
2505 else
3280af22 2506 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2507 }
79072805 2508 }
3280af22
NIS
2509 sv_catpv(PL_linestr, "\n");
2510 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2511 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2512 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2513 SV *sv = NEWSV(85,0);
2514
2515 sv_upgrade(sv, SVt_PVMG);
3280af22 2516 sv_setsv(sv,PL_linestr);
57843af0 2517 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2518 }
79072805 2519 goto retry;
a687059c 2520 }
e929a76b 2521 do {
01ec43d0
GS
2522 bool bof;
2523 bof = PL_rsfp && (PerlIO_tell(PL_rsfp) == 0); /* *Before* read! */
dea0fc0b
JH
2524 s = filter_gets(PL_linestr, PL_rsfp, 0);
2525 if (s == Nullch) {
e929a76b 2526 fake_eof:
3280af22
NIS
2527 if (PL_rsfp) {
2528 if (PL_preprocess && !PL_in_eval)
2529 (void)PerlProc_pclose(PL_rsfp);
2530 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2531 PerlIO_clearerr(PL_rsfp);
395c3793 2532 else
3280af22
NIS
2533 (void)PerlIO_close(PL_rsfp);
2534 PL_rsfp = Nullfp;
4a9ae47a 2535 PL_doextract = FALSE;
395c3793 2536 }
3280af22
NIS
2537 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2538 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2539 sv_catpv(PL_linestr,";}");
2540 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2541 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2542 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2543 goto retry;
2544 }
3280af22
NIS
2545 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2546 sv_setpv(PL_linestr,"");
79072805 2547 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
dea0fc0b
JH
2548 } else if (bof) {
2549 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2550 s = swallow_bom((U8*)s);
378cc40b 2551 }
3280af22 2552 if (PL_doextract) {
a0d0e21e 2553 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2554 PL_doextract = FALSE;
a0d0e21e
LW
2555
2556 /* Incest with pod. */
2557 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2558 sv_setpv(PL_linestr, "");
2559 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2560 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2561 PL_doextract = FALSE;
a0d0e21e 2562 }
b250498f 2563 }
463ee0b2 2564 incline(s);
3280af22
NIS
2565 } while (PL_doextract);
2566 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2567 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2568 SV *sv = NEWSV(85,0);
a687059c 2569
93a17b20 2570 sv_upgrade(sv, SVt_PVMG);
3280af22 2571 sv_setsv(sv,PL_linestr);
57843af0 2572 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2573 }
3280af22 2574 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2575 if (CopLINE(PL_curcop) == 1) {
3280af22 2576 while (s < PL_bufend && isSPACE(*s))
79072805 2577 s++;
a0d0e21e 2578 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2579 s++;
44a8e56a 2580 d = Nullch;
3280af22 2581 if (!PL_in_eval) {
44a8e56a
PP
2582 if (*s == '#' && *(s+1) == '!')
2583 d = s + 2;
2584#ifdef ALTERNATE_SHEBANG
2585 else {
2586 static char as[] = ALTERNATE_SHEBANG;
2587 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2588 d = s + (sizeof(as) - 1);
2589 }
2590#endif /* ALTERNATE_SHEBANG */
2591 }
2592 if (d) {
b8378b72 2593 char *ipath;
774d564b 2594 char *ipathend;
b8378b72 2595
774d564b 2596 while (isSPACE(*d))
b8378b72
CS
2597 d++;
2598 ipath = d;
774d564b
PP
2599 while (*d && !isSPACE(*d))
2600 d++;
2601 ipathend = d;
2602
2603#ifdef ARG_ZERO_IS_SCRIPT
2604 if (ipathend > ipath) {
2605 /*
2606 * HP-UX (at least) sets argv[0] to the script name,
2607 * which makes $^X incorrect. And Digital UNIX and Linux,
2608 * at least, set argv[0] to the basename of the Perl
2609 * interpreter. So, having found "#!", we'll set it right.
2610 */
2611 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2612 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2613 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2614 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2615 SvSETMAGIC(x);
2616 }
774d564b 2617 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2618 }
774d564b 2619#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2620
2621 /*
2622 * Look for options.
2623 */
748a9306 2624 d = instr(s,"perl -");
84e30d1a 2625 if (!d) {
748a9306 2626 d = instr(s,"perl");
84e30d1a
GS
2627#if defined(DOSISH)
2628 /* avoid getting into infinite loops when shebang
2629 * line contains "Perl" rather than "perl" */
2630 if (!d) {
2631 for (d = ipathend-4; d >= ipath; --d) {
2632 if ((*d == 'p' || *d == 'P')
2633 && !ibcmp(d, "perl", 4))
2634 {
2635 break;
2636 }
2637 }
2638 if (d < ipath)
2639 d = Nullch;
2640 }
2641#endif
2642 }
44a8e56a
PP
2643#ifdef ALTERNATE_SHEBANG
2644 /*
2645 * If the ALTERNATE_SHEBANG on this system starts with a
2646 * character that can be part of a Perl expression, then if
2647 * we see it but not "perl", we're probably looking at the
2648 * start of Perl code, not a request to hand off to some
2649 * other interpreter. Similarly, if "perl" is there, but
2650 * not in the first 'word' of the line, we assume the line
2651 * contains the start of the Perl program.
44a8e56a
PP
2652 */
2653 if (d && *s != '#') {
774d564b 2654 char *c = ipath;
44a8e56a
PP
2655 while (*c && !strchr("; \t\r\n\f\v#", *c))
2656 c++;
2657 if (c < d)
2658 d = Nullch; /* "perl" not in first word; ignore */
2659 else
2660 *s = '#'; /* Don't try to parse shebang line */
2661 }
774d564b 2662#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2663#ifndef MACOS_TRADITIONAL
748a9306 2664 if (!d &&
44a8e56a 2665 *s == '#' &&
774d564b 2666 ipathend > ipath &&
3280af22 2667 !PL_minus_c &&
748a9306 2668 !instr(s,"indir") &&
3280af22 2669 instr(PL_origargv[0],"perl"))
748a9306 2670 {
9f68db38 2671 char **newargv;
9f68db38 2672
774d564b
PP
2673 *ipathend = '\0';
2674 s = ipathend + 1;
3280af22 2675 while (s < PL_bufend && isSPACE(*s))
9f68db38 2676 s++;
3280af22
NIS
2677 if (s < PL_bufend) {
2678 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2679 newargv[1] = s;
3280af22 2680 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2681 s++;
2682 *s = '\0';
3280af22 2683 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2684 }
2685 else
3280af22 2686 newargv = PL_origargv;
774d564b 2687 newargv[0] = ipath;
80252599 2688 PerlProc_execv(ipath, newargv);
cea2e8a9 2689 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2690 }
bf4acbe4 2691#endif
748a9306 2692 if (d) {
3280af22
NIS
2693 U32 oldpdb = PL_perldb;
2694 bool oldn = PL_minus_n;
2695 bool oldp = PL_minus_p;
748a9306
LW
2696
2697 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2698 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2699
2700 if (*d++ == '-') {
8cc95fdb
PP
2701 do {
2702 if (*d == 'M' || *d == 'm') {
2703 char *m = d;
2704 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2705 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2706 (int)(d - m), m);
2707 }
2708 d = moreswitches(d);
2709 } while (d);
155aba94
GS
2710 if ((PERLDB_LINE && !oldpdb) ||
2711 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2712 /* if we have already added "LINE: while (<>) {",
2713 we must not do it again */
748a9306 2714 {
3280af22
NIS
2715 sv_setpv(PL_linestr, "");
2716 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2717 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2718 PL_preambled = FALSE;
84902520 2719 if (PERLDB_LINE)
3280af22 2720 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2721 goto retry;
2722 }
a0d0e21e 2723 }
79072805 2724 }
9f68db38 2725 }
79072805 2726 }
3280af22
NIS
2727 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2728 PL_bufptr = s;
2729 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2730 return yylex();
ae986130 2731 }
378cc40b 2732 goto retry;
4fdae800 2733 case '\r':
6a27c188 2734#ifdef PERL_STRICT_CR
cea2e8a9
GS
2735 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2736 Perl_croak(aTHX_
cc507455 2737 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2738#endif
4fdae800 2739 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2740#ifdef MACOS_TRADITIONAL
2741 case '\312':
2742#endif
378cc40b
LW
2743 s++;
2744 goto retry;
378cc40b 2745 case '#':
e929a76b 2746 case '\n':
3280af22 2747 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2748 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2749 /* handle eval qq[#line 1 "foo"\n ...] */
2750 CopLINE_dec(PL_curcop);
2751 incline(s);
2752 }
3280af22 2753 d = PL_bufend;
a687059c 2754 while (s < d && *s != '\n')
378cc40b 2755 s++;
0f85fab0 2756 if (s < d)
378cc40b 2757 s++;
463ee0b2 2758 incline(s);
3280af22
NIS
2759 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2760 PL_bufptr = s;
2761 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2762 return yylex();
a687059c 2763 }
378cc40b 2764 }
a687059c 2765 else {
378cc40b 2766 *s = '\0';
3280af22 2767 PL_bufend = s;
a687059c 2768 }
378cc40b
LW
2769 goto retry;
2770 case '-':
79072805 2771 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2772 s++;
3280af22 2773 PL_bufptr = s;
748a9306
LW
2774 tmp = *s++;
2775
bf4acbe4 2776 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2777 s++;
2778
2779 if (strnEQ(s,"=>",2)) {
3280af22 2780 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2781 OPERATOR('-'); /* unary minus */
2782 }
3280af22
NIS
2783 PL_last_uni = PL_oldbufptr;
2784 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2785 switch (tmp) {
79072805
LW
2786 case 'r': FTST(OP_FTEREAD);
2787 case 'w': FTST(OP_FTEWRITE);
2788 case 'x': FTST(OP_FTEEXEC);
2789 case 'o': FTST(OP_FTEOWNED);
2790 case 'R': FTST(OP_FTRREAD);
2791 case 'W': FTST(OP_FTRWRITE);
2792 case 'X': FTST(OP_FTREXEC);
2793 case 'O': FTST(OP_FTROWNED);
2794 case 'e': FTST(OP_FTIS);
2795 case 'z': FTST(OP_FTZERO);
2796 case 's': FTST(OP_FTSIZE);
2797 case 'f': FTST(OP_FTFILE);
2798 case 'd': FTST(OP_FTDIR);
2799 case 'l': FTST(OP_FTLINK);
2800 case 'p': FTST(OP_FTPIPE);
2801 case 'S': FTST(OP_FTSOCK);
2802 case 'u': FTST(OP_FTSUID);
2803 case 'g': FTST(OP_FTSGID);
2804 case 'k': FTST(OP_FTSVTX);
2805 case 'b': FTST(OP_FTBLK);
2806 case 'c': FTST(OP_FTCHR);
2807 case 't': FTST(OP_FTTTY);
2808 case 'T': FTST(OP_FTTEXT);
2809 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2810 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2811 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2812 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2813 default:
cea2e8a9 2814 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2815 break;
2816 }
2817 }
a687059c
LW
2818 tmp = *s++;
2819 if (*s == tmp) {
2820 s++;
3280af22 2821 if (PL_expect == XOPERATOR)
79072805
LW
2822 TERM(POSTDEC);
2823 else
2824 OPERATOR(PREDEC);
2825 }
2826 else if (*s == '>') {
2827 s++;
2828 s = skipspace(s);
7e2040f0 2829 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2830 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2831 TOKEN(ARROW);
79072805 2832 }
748a9306
LW
2833 else if (*s == '$')
2834 OPERATOR(ARROW);
463ee0b2 2835 else
748a9306 2836 TERM(ARROW);
a687059c 2837 }
3280af22 2838 if (PL_expect == XOPERATOR)
79072805
LW
2839 Aop(OP_SUBTRACT);
2840 else {
3280af22 2841 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2842 check_uni();
79072805 2843 OPERATOR('-'); /* unary minus */
2f3197b3 2844 }
79072805 2845
378cc40b 2846 case '+':
a687059c
LW
2847 tmp = *s++;
2848 if (*s == tmp) {
378cc40b 2849 s++;
3280af22 2850 if (PL_expect == XOPERATOR)
79072805
LW
2851 TERM(POSTINC);
2852 else
2853 OPERATOR(PREINC);
378cc40b 2854 }
3280af22 2855 if (PL_expect == XOPERATOR)
79072805
LW
2856 Aop(OP_ADD);
2857 else {
3280af22 2858 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2859 check_uni();
a687059c 2860 OPERATOR('+');
2f3197b3 2861 }
a687059c 2862
378cc40b 2863 case '*':
3280af22
NIS
2864 if (PL_expect != XOPERATOR) {
2865 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2866 PL_expect = XOPERATOR;
2867 force_ident(PL_tokenbuf, '*');
2868 if (!*PL_tokenbuf)
a0d0e21e 2869 PREREF('*');
79072805 2870 TERM('*');
a687059c 2871 }
79072805
LW
2872 s++;
2873 if (*s == '*') {
a687059c 2874 s++;
79072805 2875 PWop(OP_POW);
a687059c 2876 }
79072805
LW
2877 Mop(OP_MULTIPLY);
2878
378cc40b 2879 case '%':
3280af22 2880 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2881 ++s;
2882 Mop(OP_MODULO);
a687059c 2883 }
3280af22
NIS
2884 PL_tokenbuf[0] = '%';
2885 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2886 if (!PL_tokenbuf[1]) {
2887 if (s == PL_bufend)
bbce6d69
PP
2888 yyerror("Final % should be \\% or %name");
2889 PREREF('%');
a687059c 2890 }
3280af22 2891 PL_pending_ident = '%';
bbce6d69 2892 TERM('%');
a687059c 2893
378cc40b 2894 case '^':
79072805 2895 s++;
a0d0e21e 2896 BOop(OP_BIT_XOR);
79072805 2897 case '[':
3280af22 2898 PL_lex_brackets++;
79072805 2899 /* FALL THROUGH */
378cc40b 2900 case '~':
378cc40b 2901 case ',':
378cc40b
LW
2902 tmp = *s++;
2903 OPERATOR(tmp);
a0d0e21e
LW
2904 case ':':
2905 if (s[1] == ':') {
2906 len = 0;
2907 goto just_a_word;
2908 }
2909 s++;
09bef843
SB
2910 switch (PL_expect) {
2911 OP *attrs;
2912 case XOPERATOR:
2913 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2914 break;
2915 PL_bufptr = s; /* update in case we back off */
2916 goto grabattrs;
2917 case XATTRBLOCK:
2918 PL_expect = XBLOCK;
2919 goto grabattrs;
2920 case XATTRTERM:
2921 PL_expect = XTERMBLOCK;
2922 grabattrs:
2923 s = skipspace(s);
2924 attrs = Nullop;
7e2040f0 2925 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2926 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2927 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2928 if (tmp < 0) tmp = -tmp;
2929 switch (tmp) {
2930 case KEY_or:
2931 case KEY_and:
2932 case KEY_for:
2933 case KEY_unless:
2934 case KEY_if:
2935 case KEY_while:
2936 case KEY_until:
2937 goto got_attrs;
2938 default:
2939 break;
2940 }
2941 }
09bef843
SB
2942 if (*d == '(') {
2943 d = scan_str(d,TRUE,TRUE);
2944 if (!d) {
2945 if (PL_lex_stuff) {
2946 SvREFCNT_dec(PL_lex_stuff);
2947 PL_lex_stuff = Nullsv;
2948 }
2949 /* MUST advance bufptr here to avoid bogus
2950 "at end of line" context messages from yyerror().
2951 */
2952 PL_bufptr = s + len;
2953 yyerror("Unterminated attribute parameter in attribute list");
2954 if (attrs)
2955 op_free(attrs);
2956 return 0; /* EOF indicator */
2957 }
2958 }
2959 if (PL_lex_stuff) {
2960 SV *sv = newSVpvn(s, len);
2961 sv_catsv(sv, PL_lex_stuff);
2962 attrs = append_elem(OP_LIST, attrs,
2963 newSVOP(OP_CONST, 0, sv));
2964 SvREFCNT_dec(PL_lex_stuff);
2965 PL_lex_stuff = Nullsv;
2966 }
2967 else {
2968 attrs = append_elem(OP_LIST, attrs,
2969 newSVOP(OP_CONST, 0,
2970 newSVpvn(s, len)));
2971 }
2972 s = skipspace(d);
0120eecf 2973 if (*s == ':' && s[1] != ':')
09bef843 2974 s = skipspace(s+1);
0120eecf
GS
2975 else if (s == d)
2976 break; /* require real whitespace or :'s */
09bef843 2977 }
f9829d6b
GS
2978 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2979 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2980 char q = ((*s == '\'') ? '"' : '\'');
2981 /* If here for an expression, and parsed no attrs, back off. */
2982 if (tmp == '=' && !attrs) {
2983 s = PL_bufptr;
2984 break;
2985 }
2986 /* MUST advance bufptr here to avoid bogus "at end of line"
2987 context messages from yyerror().
2988 */
2989 PL_bufptr = s;
2990 if (!*s)
2991 yyerror("Unterminated attribute list");
2992 else
2993 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2994 q, *s, q));
2995 if (attrs)
2996 op_free(attrs);
2997 OPERATOR(':');
2998 }
f9829d6b 2999 got_attrs:
09bef843
SB
3000 if (attrs) {
3001 PL_nextval[PL_nexttoke].opval = attrs;
3002 force_next(THING);
3003 }
3004 TOKEN(COLONATTR);
3005 }
a0d0e21e 3006 OPERATOR(':');
8990e307
LW
3007 case '(':
3008 s++;
3280af22
NIS
3009 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3010 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3011 else
3280af22 3012 PL_expect = XTERM;
a0d0e21e 3013 TOKEN('(');
378cc40b 3014 case ';':
f4dd75d9 3015 CLINE;
378cc40b
LW
3016 tmp = *s++;
3017 OPERATOR(tmp);
3018 case ')':
378cc40b 3019 tmp = *s++;
16d20bd9
AD
3020 s = skipspace(s);
3021 if (*s == '{')
3022 PREBLOCK(tmp);
378cc40b 3023 TERM(tmp);
79072805
LW
3024 case ']':
3025 s++;
3280af22 3026 if (PL_lex_brackets <= 0)
d98d5fff 3027 yyerror("Unmatched right square bracket");
463ee0b2 3028 else
3280af22
NIS
3029 --PL_lex_brackets;
3030 if (PL_lex_state == LEX_INTERPNORMAL) {
3031 if (PL_lex_brackets == 0) {
a0d0e21e 3032 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3033 PL_lex_state = LEX_INTERPEND;
79072805
LW
3034 }
3035 }
4633a7c4 3036 TERM(']');
79072805
LW
3037 case '{':
3038 leftbracket:
79072805 3039 s++;
3280af22
NIS
3040 if (PL_lex_brackets > 100) {
3041 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3042 if (newlb != PL_lex_brackstack) {
8990e307 3043 SAVEFREEPV(newlb);
3280af22 3044 PL_lex_brackstack = newlb;
8990e307
LW
3045 }
3046 }
3280af22 3047 switch (PL_expect) {
a0d0e21e 3048 case XTERM:
3280af22 3049 if (PL_lex_formbrack) {
a0d0e21e
LW
3050 s--;
3051 PRETERMBLOCK(DO);
3052 }
3280af22
NIS
3053 if (PL_oldoldbufptr == PL_last_lop)
3054 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3055 else
3280af22 3056 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3057 OPERATOR(HASHBRACK);
a0d0e21e 3058 case XOPERATOR:
bf4acbe4 3059 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3060 s++;
44a8e56a 3061 d = s;
3280af22
NIS
3062 PL_tokenbuf[0] = '\0';
3063 if (d < PL_bufend && *d == '-') {
3064 PL_tokenbuf[0] = '-';
44a8e56a 3065 d++;
bf4acbe4 3066 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3067 d++;
3068 }
7e2040f0 3069 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3070 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3071 FALSE, &len);
bf4acbe4 3072 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3073 d++;
3074 if (*d == '}') {
3280af22 3075 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3076 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3077 if (minus)
3078 force_next('-');
748a9306
LW
3079 }
3080 }
3081 /* FALL THROUGH */
09bef843 3082 case XATTRBLOCK:
748a9306 3083 case XBLOCK:
3280af22
NIS
3084 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3085 PL_expect = XSTATE;
a0d0e21e 3086 break;
09bef843 3087 case XATTRTERM:
a0d0e21e 3088 case XTERMBLOCK:
3280af22
NIS
3089 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3090 PL_expect = XSTATE;
a0d0e21e
LW
3091 break;
3092 default: {
3093 char *t;
3280af22
NIS
3094 if (PL_oldoldbufptr == PL_last_lop)
3095 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3096 else
3280af22 3097 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3098 s = skipspace(s);
09ecc4b6 3099 if (*s == '}')
a0d0e21e 3100 OPERATOR(HASHBRACK);
b8a4b1be
GS
3101 /* This hack serves to disambiguate a pair of curlies
3102 * as being a block or an anon hash. Normally, expectation
3103 * determines that, but in cases where we're not in a
3104 * position to expect anything in particular (like inside
3105 * eval"") we have to resolve the ambiguity. This code
3106 * covers the case where the first term in the curlies is a
3107 * quoted string. Most other cases need to be explicitly
3108 * disambiguated by prepending a `+' before the opening
3109 * curly in order to force resolution as an anon hash.
3110 *
3111 * XXX should probably propagate the outer expectation
3112 * into eval"" to rely less on this hack, but that could
3113 * potentially break current behavior of eval"".
3114 * GSAR 97-07-21
3115 */
3116 t = s;
3117 if (*s == '\'' || *s == '"' || *s == '`') {
3118 /* common case: get past first string, handling escapes */
3280af22 3119 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3120 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3121 t++;
3122 t++;
a0d0e21e 3123 }
b8a4b1be 3124 else if (*s == 'q') {
3280af22 3125 if (++t < PL_bufend
b8a4b1be 3126 && (!isALNUM(*t)
3280af22 3127 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3128 && !isALNUM(*t))))
3129 {
b8a4b1be
GS
3130 char *tmps;
3131 char open, close, term;
3132 I32 brackets = 1;
3133
3280af22 3134 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3135 t++;
3136 term = *t;
3137 open = term;
3138 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3139 term = tmps[5];
3140 close = term;
3141 if (open == close)
3280af22
NIS
3142 for (t++; t < PL_bufend; t++) {
3143 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3144 t++;
6d07e5e9 3145 else if (*t == open)
b8a4b1be
GS
3146 break;
3147 }
3148 else
3280af22
NIS
3149 for (t++; t < PL_bufend; t++) {
3150 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3151 t++;
6d07e5e9 3152 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3153 break;
3154 else if (*t == open)
3155 brackets++;
3156 }
3157 }
3158 t++;
a0d0e21e 3159 }
7e2040f0 3160 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3161 t += UTF8SKIP(t);
7e2040f0 3162 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3163 t += UTF8SKIP(t);
a0d0e21e 3164 }
3280af22 3165 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3166 t++;
b8a4b1be
GS
3167 /* if comma follows first term, call it an anon hash */
3168 /* XXX it could be a comma expression with loop modifiers */
3280af22 3169 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3170 || (*t == '=' && t[1] == '>')))
a0d0e21e 3171 OPERATOR(HASHBRACK);
3280af22 3172 if (PL_expect == XREF)
4e4e412b 3173 PL_expect = XTERM;
a0d0e21e 3174 else {
3280af22
NIS
3175 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3176 PL_expect = XSTATE;
a0d0e21e 3177 }
8990e307 3178 }
a0d0e21e 3179 break;
463ee0b2 3180 }
57843af0 3181 yylval.ival = CopLINE(PL_curcop);
79072805 3182 if (isSPACE(*s) || *s == '#')
3280af22 3183 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3184 TOKEN('{');
378cc40b 3185 case '}':
79072805
LW
3186 rightbracket:
3187 s++;
3280af22 3188 if (PL_lex_brackets <= 0)
d98d5fff 3189 yyerror("Unmatched right curly bracket");
463ee0b2 3190 else
3280af22 3191 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3192 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3193 PL_lex_formbrack = 0;
3194 if (PL_lex_state == LEX_INTERPNORMAL) {
3195 if (PL_lex_brackets == 0) {
9059aa12
LW
3196 if (PL_expect & XFAKEBRACK) {
3197 PL_expect &= XENUMMASK;
3280af22
NIS
3198 PL_lex_state = LEX_INTERPEND;
3199 PL_bufptr = s;
cea2e8a9 3200 return yylex(); /* ignore fake brackets */
79072805 3201 }
fa83b5b6 3202 if (*s == '-' && s[1] == '>')
3280af22 3203 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3204 else if (*s != '[' && *s != '{')
3280af22 3205 PL_lex_state = LEX_INTERPEND;
79072805
LW
3206 }
3207 }
9059aa12
LW
3208 if (PL_expect & XFAKEBRACK) {
3209 PL_expect &= XENUMMASK;
3280af22 3210 PL_bufptr = s;
cea2e8a9 3211 return yylex(); /* ignore fake brackets */
748a9306 3212 }
79072805
LW
3213 force_next('}');
3214 TOKEN(';');
378cc40b
LW
3215 case '&':
3216 s++;
3217 tmp = *s++;
3218 if (tmp == '&')
a0d0e21e 3219 AOPERATOR(ANDAND);
378cc40b 3220 s--;
3280af22 3221 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3222 if (ckWARN(WARN_SEMICOLON)
3223 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3224 {
57843af0 3225 CopLINE_dec(PL_curcop);
cea2e8a9 3226 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3227 CopLINE_inc(PL_curcop);
463ee0b2 3228 }
79072805 3229 BAop(OP_BIT_AND);
463ee0b2 3230 }
79072805 3231
3280af22
NIS
3232 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3233 if (*PL_tokenbuf) {
3234 PL_expect = XOPERATOR;
3235 force_ident(PL_tokenbuf, '&');
463ee0b2 3236 }
79072805
LW
3237 else
3238 PREREF('&');
c07a80fd 3239 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3240 TERM('&');
3241
378cc40b
LW
3242 case '|':
3243 s++;
3244 tmp = *s++;
3245 if (tmp == '|')
a0d0e21e 3246 AOPERATOR(OROR);
378cc40b 3247 s--;
79072805 3248 BOop(OP_BIT_OR);
378cc40b
LW
3249 case '=':
3250 s++;
3251 tmp = *s++;
3252 if (tmp == '=')
79072805
LW
3253 Eop(OP_EQ);
3254 if (tmp == '>')
3255 OPERATOR(',');
378cc40b 3256 if (tmp == '~')
79072805 3257 PMop(OP_MATCH);
599cee73 3258 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3259 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3260 s--;
3280af22
NIS
3261 if (PL_expect == XSTATE && isALPHA(tmp) &&
3262 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3263 {
3280af22
NIS
3264 if (PL_in_eval && !PL_rsfp) {
3265 d = PL_bufend;
a5f75d66
AD
3266 while (s < d) {
3267 if (*s++ == '\n') {
3268 incline(s);
3269 if (strnEQ(s,"=cut",4)) {
3270 s = strchr(s,'\n');
3271 if (s)
3272 s++;
3273 else
3274 s = d;
3275 incline(s);
3276 goto retry;
3277 }
3278 }
3279 }
3280 goto retry;
3281 }
3280af22
NIS
3282 s = PL_bufend;
3283 PL_doextract = TRUE;
a0d0e21e
LW
3284 goto retry;
3285 }
3280af22 3286 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3287 char *t;
51882d45 3288#ifdef PERL_STRICT_CR
bf4acbe4 3289 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3290#else
bf4acbe4 3291 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3292#endif
a0d0e21e
LW
3293 if (*t == '\n' || *t == '#') {
3294 s--;
3280af22 3295 PL_expect = XBLOCK;
a0d0e21e
LW
3296 goto leftbracket;
3297 }
79072805 3298 }
a0d0e21e
LW
3299 yylval.ival = 0;
3300 OPERATOR(ASSIGNOP);
378cc40b
LW
3301 case '!':
3302 s++;
3303 tmp = *s++;
3304 if (tmp == '=')
79072805 3305 Eop(OP_NE);
378cc40b 3306 if (tmp == '~')
79072805 3307 PMop(OP_NOT);
378cc40b
LW
3308 s--;
3309 OPERATOR('!');
3310 case '<':
3280af22 3311 if (PL_expect != XOPERATOR) {
93a17b20 3312 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3313 check_uni();
79072805
LW
3314 if (s[1] == '<')
3315 s = scan_heredoc(s);
3316 else
3317 s = scan_inputsymbol(s);
3318 TERM(sublex_start());
378cc40b
LW
3319 }
3320 s++;
3321 tmp = *s++;
3322 if (tmp == '<')
79072805 3323 SHop(OP_LEFT_SHIFT);
395c3793
LW
3324 if (tmp == '=') {
3325 tmp = *s++;
3326 if (tmp == '>')
79072805 3327 Eop(OP_NCMP);
395c3793 3328 s--;
79072805 3329 Rop(OP_LE);
395c3793 3330 }
378cc40b 3331 s--;
79072805 3332 Rop(OP_LT);
378cc40b
LW
3333 case '>':
3334 s++;
3335 tmp = *s++;
3336 if (tmp == '>')
79072805 3337 SHop(OP_RIGHT_SHIFT);
378cc40b 3338 if (tmp == '=')
79072805 3339 Rop(OP_GE);
378cc40b 3340 s--;
79072805 3341 Rop(OP_GT);
378cc40b
LW
3342
3343 case '$':
bbce6d69
PP
3344 CLINE;
3345
3280af22
NIS
3346 if (PL_expect == XOPERATOR) {
3347 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3348 PL_expect = XTERM;
a0d0e21e 3349 depcom();
bbce6d69 3350 return ','; /* grandfather non-comma-format format */
a0d0e21e 3351 }
8990e307 3352 }
a0d0e21e 3353
7e2040f0 3354 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3355 PL_tokenbuf[0] = '@';
376b8730
SM
3356 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3357 sizeof PL_tokenbuf - 1, FALSE);
3358 if (PL_expect == XOPERATOR)
3359 no_op("Array length", s);
3280af22 3360 if (!PL_tokenbuf[1])
a0d0e21e 3361 PREREF(DOLSHARP);
3280af22
NIS
3362 PL_expect = XOPERATOR;
3363 PL_pending_ident = '#';
463ee0b2 3364 TOKEN(DOLSHARP);
79072805 3365 }
bbce6d69 3366
3280af22 3367 PL_tokenbuf[0] = '$';
376b8730
SM
3368 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3369 sizeof PL_tokenbuf - 1, FALSE);
3370 if (PL_expect == XOPERATOR)
3371 no_op("Scalar", s);
3280af22
NIS
3372 if (!PL_tokenbuf[1]) {
3373 if (s == PL_bufend)
bbce6d69
PP
3374 yyerror("Final $ should be \\$ or $name");
3375 PREREF('$');
8990e307 3376 }
a0d0e21e 3377
bbce6d69 3378 /* This kludge not intended to be bulletproof. */
3280af22 3379 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3380 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3381 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3382 yylval.opval->op_private = OPpCONST_ARYBASE;
3383 TERM(THING);
3384 }
3385
ff68c719 3386 d = s;
69d2bceb 3387 tmp = (I32)*s;
3280af22 3388 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3389 s = skipspace(s);
3390
3280af22 3391 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3392 char *t;
3393 if (*s == '[') {
3280af22 3394 PL_tokenbuf[0] = '@';
599cee73 3395 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3396 for(t = s + 1;
7e2040f0 3397 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3398 t++) ;
a0d0e21e 3399 if (*t++ == ',') {
3280af22
NIS
3400 PL_bufptr = skipspace(PL_bufptr);
3401 while (t < PL_bufend && *t != ']')
bbce6d69 3402 t++;
cea2e8a9 3403 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3404 "Multidimensional syntax %.*s not supported",
3405 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3406 }
3407 }
bbce6d69
PP
3408 }
3409 else if (*s == '{') {
3280af22 3410 PL_tokenbuf[0] = '%';
599cee73 3411 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3412 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3413 {
3280af22 3414 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3415 STRLEN len;
3416 for (t++; isSPACE(*t); t++) ;
7e2040f0 3417 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3418 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3419 for (; isSPACE(*t); t++) ;
864dbfa3 3420 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3421 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3422 "You need to quote \"%s\"", tmpbuf);
748a9306 3423 }
93a17b20
LW
3424 }
3425 }
2f3197b3 3426 }
bbce6d69 3427
3280af22 3428 PL_expect = XOPERATOR;
69d2bceb 3429 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3430 bool islop = (PL_last_lop == PL_oldoldbufptr);
3431 if (!islop || PL_last_lop_op == OP_GREPSTART)
3432 PL_expect = XOPERATOR;
bbce6d69 3433 else if (strchr("$@\"'`q", *s))
3280af22 3434 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3435 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3436 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3437 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3438 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3439 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3440 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3441 /* binary operators exclude handle interpretations */
3442 switch (tmp) {
3443 case -KEY_x:
3444 case -KEY_eq:
3445 case -KEY_ne:
3446 case -KEY_gt:
3447 case -KEY_lt:
3448 case -KEY_ge:
3449 case -KEY_le:
3450 case -KEY_cmp:
3451 break;
3452 default:
3280af22 3453 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3454 break;
3455 }
3456 }
68dc0745
PP
3457 else {
3458 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3459 if (gv && GvCVu(gv))
3280af22 3460 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3461 }
93a17b20 3462 }
bbce6d69 3463 else if (isDIGIT(*s))
3280af22 3464 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3465 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3466 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3467 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3468 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3469 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3470 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3471 }
3280af22 3472 PL_pending_ident = '$';
79072805 3473 TOKEN('$');
378cc40b
LW
3474
3475 case '@':
3280af22 3476 if (PL_expect == XOPERATOR)
bbce6d69 3477 no_op("Array", s);
3280af22
NIS
3478 PL_tokenbuf[0] = '@';
3479 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3480 if (!PL_tokenbuf[1]) {
3481 if (s == PL_bufend)
bbce6d69
PP
3482 yyerror("Final @ should be \\@ or @name");
3483 PREREF('@');
3484 }
3280af22 3485 if (PL_lex_state == LEX_NORMAL)
ff68c719 3486 s = skipspace(s);
3280af22 3487 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3488 if (*s == '{')
3280af22 3489 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3490
3491 /* Warn about @ where they meant $. */
599cee73 3492 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3493 if (*s == '[' || *s == '{') {
3494 char *t = s + 1;
7e2040f0 3495 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3496 t++;
3497 if (*t == '}' || *t == ']') {
3498 t++;
3280af22 3499 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3500 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3501 "Scalar value %.*s better written as $%.*s",
3280af22 3502 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3503 }
93a17b20
LW
3504 }
3505 }
463ee0b2 3506 }
3280af22 3507 PL_pending_ident = '@';
79072805 3508 TERM('@');
378cc40b
LW
3509
3510 case '/': /* may either be division or pattern */
3511 case '?': /* may either be conditional or pattern */
3280af22 3512 if (PL_expect != XOPERATOR) {
c277df42 3513 /* Disable warning on "study /blah/" */
3280af22
NIS
3514 if (PL_oldoldbufptr == PL_last_uni
3515 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3516 || memNE(PL_last_uni, "study", 5)
3517 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3518 check_uni();
8782bef2 3519 s = scan_pat(s,OP_MATCH);
79072805 3520 TERM(sublex_start());
378cc40b
LW
3521 }
3522 tmp = *s++;
a687059c 3523 if (tmp == '/')
79072805 3524 Mop(OP_DIVIDE);
378cc40b
LW
3525 OPERATOR(tmp);
3526
3527 case '.':
51882d45
GS
3528 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3529#ifdef PERL_STRICT_CR
3530 && s[1] == '\n'
3531#else
3532 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3533#endif
3534 && (s == PL_linestart || s[-1] == '\n') )
3535 {
3280af22
NIS
3536 PL_lex_formbrack = 0;
3537 PL_expect = XSTATE;
79072805
LW
3538 goto rightbracket;
3539 }
3280af22 3540 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3541 tmp = *s++;
a687059c
LW
3542 if (*s == tmp) {
3543 s++;
2f3197b3
LW
3544 if (*s == tmp) {
3545 s++;
79072805 3546 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3547 }
3548 else
79072805 3549 yylval.ival = 0;
378cc40b 3550 OPERATOR(DOTDOT);
a687059c 3551 }
3280af22 3552 if (PL_expect != XOPERATOR)
2f3197b3 3553 check_uni();
79072805 3554 Aop(OP_CONCAT);
378cc40b
LW
3555 }
3556 /* FALL THROUGH */
3557 case '0': case '1': case '2': case '3': case '4':
3558 case '5': case '6': case '7': case '8': case '9':
b73d6f50 3559 s = scan_num(s, &yylval);
3280af22 3560 if (PL_expect == XOPERATOR)
8990e307 3561 no_op("Number",s);
79072805
LW
3562 TERM(THING);
3563
3564 case '\'':
09bef843 3565 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3566 if (PL_expect == XOPERATOR) {
3567 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3568 PL_expect = XTERM;
a0d0e21e
LW
3569 depcom();
3570 return ','; /* grandfather non-comma-format format */
3571 }
463ee0b2 3572 else
8990e307 3573 no_op("String",s);
463ee0b2 3574 }
79072805 3575 if (!s)
85e6fe83 3576 missingterm((char*)0);
79072805
LW
3577 yylval.ival = OP_CONST;
3578 TERM(sublex_start());
3579
3580 case '"':
09bef843 3581 s = scan_str(s,FALSE,FALSE);