This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Changes.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
2b92dfce
GS
42/* In variables name $^X, these are the legal values for X.
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a 58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff 60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
a790bc05 72/* XXX If this causes problems, set i_unistd=undef in the hint file. */
73#ifdef I_UNISTD
74# include <unistd.h> /* Needed for execv() */
75#endif
76
77
79072805
LW
78#ifdef ff_next
79#undef ff_next
d48672a2
LW
80#endif
81
a1a0e61e 82#ifdef USE_PURE_BISON
20141f0e
IRC
83#ifndef YYMAXLEVEL
84#define YYMAXLEVEL 100
85#endif
86YYSTYPE* yylval_pointer[YYMAXLEVEL];
87int* yychar_pointer[YYMAXLEVEL];
88int yyactlevel = 0;
22c35a8c
GS
89# undef yylval
90# undef yychar
20141f0e
IRC
91# define yylval (*yylval_pointer[yyactlevel])
92# define yychar (*yychar_pointer[yyactlevel])
93# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
94# undef yylex
95# define yylex() Perl_yylex(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
96#endif
97
79072805 98#include "keywords.h"
fe14fcc3 99
ffb4593c
NT
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
ae986130
LW
102#ifdef CLINE
103#undef CLINE
104#endif
57843af0 105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 106
ffb4593c
NT
107/*
108 * Convenience functions to return different tokens and prime the
9cbb5ea2 109 * lexer for the next token. They all take an argument.
ffb4593c
NT
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
2d2e263d 121 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
9cbb5ea2 126 * PMop : pattern-matching operator
ffb4593c
NT
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
130 * Rop : relational operator <= != gt
131 *
132 * Also see LOP and lop() below.
133 */
134
3280af22
NIS
135#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
136#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
137#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
138#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
139#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
140#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
141#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
142#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
143#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
144#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
145#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
146#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
147#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
148#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
149#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
150#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
151#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
152#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
153#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
154#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 155
a687059c
LW
156/* This bit of chicanery makes a unary function followed by
157 * a parenthesis into a function with one argument, highest precedence.
158 */
2f3197b3 159#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
160 PL_expect = XTERM, \
161 PL_bufptr = s, \
162 PL_last_uni = PL_oldbufptr, \
163 PL_last_lop_op = f, \
a687059c
LW
164 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
165
79072805 166#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
167 PL_bufptr = s, \
168 PL_last_uni = PL_oldbufptr, \
79072805
LW
169 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
170
9f68db38 171/* grandfather return to old style */
3280af22 172#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 173
ffb4593c
NT
174/*
175 * S_ao
176 *
177 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
178 * into an OP_ANDASSIGN or OP_ORASSIGN
179 */
180
76e3520e 181STATIC int
cea2e8a9 182S_ao(pTHX_ int toketype)
a0d0e21e 183{
3280af22
NIS
184 if (*PL_bufptr == '=') {
185 PL_bufptr++;
a0d0e21e
LW
186 if (toketype == ANDAND)
187 yylval.ival = OP_ANDASSIGN;
188 else if (toketype == OROR)
189 yylval.ival = OP_ORASSIGN;
190 toketype = ASSIGNOP;
191 }
192 return toketype;
193}
194
ffb4593c
NT
195/*
196 * S_no_op
197 * When Perl expects an operator and finds something else, no_op
198 * prints the warning. It always prints "<something> found where
199 * operator expected. It prints "Missing semicolon on previous line?"
200 * if the surprise occurs at the start of the line. "do you need to
201 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
202 * where the compiler doesn't know if foo is a method call or a function.
203 * It prints "Missing operator before end of line" if there's nothing
204 * after the missing operator, or "... before <...>" if there is something
205 * after the missing operator.
206 */
207
76e3520e 208STATIC void
cea2e8a9 209S_no_op(pTHX_ char *what, char *s)
463ee0b2 210{
3280af22
NIS
211 char *oldbp = PL_bufptr;
212 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 213
1189a94a
GS
214 if (!s)
215 s = oldbp;
07c798fb 216 else
1189a94a 217 PL_bufptr = s;
cea2e8a9 218 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 219 if (is_first)
cea2e8a9 220 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 221 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 222 char *t;
7e2040f0 223 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 224 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 225 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 226 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 227 }
07c798fb
HS
228 else {
229 assert(s >= oldbp);
cea2e8a9 230 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 231 }
3280af22 232 PL_bufptr = oldbp;
8990e307
LW
233}
234
ffb4593c
NT
235/*
236 * S_missingterm
237 * Complain about missing quote/regexp/heredoc terminator.
238 * If it's called with (char *)NULL then it cauterizes the line buffer.
239 * If we're in a delimited string and the delimiter is a control
240 * character, it's reformatted into a two-char sequence like ^C.
241 * This is fatal.
242 */
243
76e3520e 244STATIC void
cea2e8a9 245S_missingterm(pTHX_ char *s)
8990e307
LW
246{
247 char tmpbuf[3];
248 char q;
249 if (s) {
250 char *nl = strrchr(s,'\n');
d2719217 251 if (nl)
8990e307
LW
252 *nl = '\0';
253 }
9d116dd7
JH
254 else if (
255#ifdef EBCDIC
256 iscntrl(PL_multi_close)
257#else
258 PL_multi_close < 32 || PL_multi_close == 127
259#endif
260 ) {
8990e307 261 *tmpbuf = '^';
3280af22 262 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
263 s = "\\n";
264 tmpbuf[2] = '\0';
265 s = tmpbuf;
266 }
267 else {
3280af22 268 *tmpbuf = PL_multi_close;
8990e307
LW
269 tmpbuf[1] = '\0';
270 s = tmpbuf;
271 }
272 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 273 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 274}
79072805 275
ffb4593c
NT
276/*
277 * Perl_deprecate
ffb4593c
NT
278 */
279
79072805 280void
864dbfa3 281Perl_deprecate(pTHX_ char *s)
a0d0e21e 282{
d008e5eb 283 dTHR;
599cee73 284 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 285 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
286}
287
ffb4593c
NT
288/*
289 * depcom
9cbb5ea2 290 * Deprecate a comma-less variable list.
ffb4593c
NT
291 */
292
76e3520e 293STATIC void
cea2e8a9 294S_depcom(pTHX)
a0d0e21e
LW
295{
296 deprecate("comma-less variable list");
297}
298
ffb4593c 299/*
9cbb5ea2
GS
300 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
301 * utf16-to-utf8-reversed.
ffb4593c
NT
302 */
303
c39cd008
GS
304#ifdef PERL_CR_FILTER
305static void
306strip_return(SV *sv)
307{
308 register char *s = SvPVX(sv);
309 register char *e = s + SvCUR(sv);
310 /* outer loop optimized to do nothing if there are no CR-LFs */
311 while (s < e) {
312 if (*s++ == '\r' && *s == '\n') {
313 /* hit a CR-LF, need to copy the rest */
314 register char *d = s - 1;
315 *d++ = *s++;
316 while (s < e) {
317 if (*s == '\r' && s[1] == '\n')
318 s++;
319 *d++ = *s++;
320 }
321 SvCUR(sv) -= s - d;
322 return;
323 }
324 }
325}
a868473f 326
76e3520e 327STATIC I32
c39cd008 328S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 329{
c39cd008
GS
330 I32 count = FILTER_READ(idx+1, sv, maxlen);
331 if (count > 0 && !maxlen)
332 strip_return(sv);
333 return count;
a868473f
NIS
334}
335#endif
336
ffb4593c
NT
337/*
338 * Perl_lex_start
9cbb5ea2
GS
339 * Initialize variables. Uses the Perl save_stack to save its state (for
340 * recursive calls to the parser).
ffb4593c
NT
341 */
342
a0d0e21e 343void
864dbfa3 344Perl_lex_start(pTHX_ SV *line)
79072805 345{
0f15f207 346 dTHR;
8990e307
LW
347 char *s;
348 STRLEN len;
349
3280af22
NIS
350 SAVEI32(PL_lex_dojoin);
351 SAVEI32(PL_lex_brackets);
3280af22
NIS
352 SAVEI32(PL_lex_casemods);
353 SAVEI32(PL_lex_starts);
354 SAVEI32(PL_lex_state);
7766f137 355 SAVEVPTR(PL_lex_inpat);
3280af22 356 SAVEI32(PL_lex_inwhat);
18b09519
GS
357 if (PL_lex_state == LEX_KNOWNEXT) {
358 I32 toke = PL_nexttoke;
359 while (--toke >= 0) {
360 SAVEI32(PL_nexttype[toke]);
361 SAVEVPTR(PL_nextval[toke]);
362 }
363 SAVEI32(PL_nexttoke);
18b09519 364 }
57843af0 365 SAVECOPLINE(PL_curcop);
3280af22
NIS
366 SAVEPPTR(PL_bufptr);
367 SAVEPPTR(PL_bufend);
368 SAVEPPTR(PL_oldbufptr);
369 SAVEPPTR(PL_oldoldbufptr);
370 SAVEPPTR(PL_linestart);
371 SAVESPTR(PL_linestr);
372 SAVEPPTR(PL_lex_brackstack);
373 SAVEPPTR(PL_lex_casestack);
c76ac1ee 374 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
375 SAVESPTR(PL_lex_stuff);
376 SAVEI32(PL_lex_defer);
09bef843 377 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 378 SAVESPTR(PL_lex_repl);
bebdddfc
GS
379 SAVEINT(PL_expect);
380 SAVEINT(PL_lex_expect);
3280af22
NIS
381
382 PL_lex_state = LEX_NORMAL;
383 PL_lex_defer = 0;
384 PL_expect = XSTATE;
385 PL_lex_brackets = 0;
3280af22
NIS
386 New(899, PL_lex_brackstack, 120, char);
387 New(899, PL_lex_casestack, 12, char);
388 SAVEFREEPV(PL_lex_brackstack);
389 SAVEFREEPV(PL_lex_casestack);
390 PL_lex_casemods = 0;
391 *PL_lex_casestack = '\0';
392 PL_lex_dojoin = 0;
393 PL_lex_starts = 0;
394 PL_lex_stuff = Nullsv;
395 PL_lex_repl = Nullsv;
396 PL_lex_inpat = 0;
76be56bc 397 PL_nexttoke = 0;
3280af22 398 PL_lex_inwhat = 0;
09bef843 399 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
400 PL_linestr = line;
401 if (SvREADONLY(PL_linestr))
402 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
403 s = SvPV(PL_linestr, len);
8990e307 404 if (len && s[len-1] != ';') {
3280af22
NIS
405 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
406 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
407 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 408 }
3280af22
NIS
409 SvTEMP_off(PL_linestr);
410 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
411 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
412 SvREFCNT_dec(PL_rs);
79cb57f6 413 PL_rs = newSVpvn("\n", 1);
3280af22 414 PL_rsfp = 0;
79072805 415}
a687059c 416
ffb4593c
NT
417/*
418 * Perl_lex_end
9cbb5ea2
GS
419 * Finalizer for lexing operations. Must be called when the parser is
420 * done with the lexer.
ffb4593c
NT
421 */
422
463ee0b2 423void
864dbfa3 424Perl_lex_end(pTHX)
463ee0b2 425{
3280af22 426 PL_doextract = FALSE;
463ee0b2
LW
427}
428
ffb4593c
NT
429/*
430 * S_incline
431 * This subroutine has nothing to do with tilting, whether at windmills
432 * or pinball tables. Its name is short for "increment line". It
57843af0 433 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 434 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
435 * # line 500 "foo.pm"
436 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
437 */
438
76e3520e 439STATIC void
cea2e8a9 440S_incline(pTHX_ char *s)
463ee0b2 441{
0f15f207 442 dTHR;
463ee0b2
LW
443 char *t;
444 char *n;
73659bf1 445 char *e;
463ee0b2 446 char ch;
463ee0b2 447
57843af0 448 CopLINE_inc(PL_curcop);
463ee0b2
LW
449 if (*s++ != '#')
450 return;
bf4acbe4 451 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
452 if (strnEQ(s, "line", 4))
453 s += 4;
454 else
455 return;
456 if (*s == ' ' || *s == '\t')
457 s++;
458 else
459 return;
bf4acbe4 460 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
461 if (!isDIGIT(*s))
462 return;
463 n = s;
464 while (isDIGIT(*s))
465 s++;
bf4acbe4 466 while (SPACE_OR_TAB(*s))
463ee0b2 467 s++;
73659bf1 468 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 469 s++;
73659bf1
GS
470 e = t + 1;
471 }
463ee0b2 472 else {
463ee0b2 473 for (t = s; !isSPACE(*t); t++) ;
73659bf1 474 e = t;
463ee0b2 475 }
bf4acbe4 476 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
477 e++;
478 if (*e != '\n' && *e != '\0')
479 return; /* false alarm */
480
463ee0b2
LW
481 ch = *t;
482 *t = '\0';
f4dd75d9
GS
483 if (t - s > 0) {
484#ifdef USE_ITHREADS
485 Safefree(CopFILE(PL_curcop));
486#else
487 SvREFCNT_dec(CopFILEGV(PL_curcop));
488#endif
57843af0 489 CopFILE_set(PL_curcop, s);
f4dd75d9 490 }
463ee0b2 491 *t = ch;
57843af0 492 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
493}
494
ffb4593c
NT
495/*
496 * S_skipspace
497 * Called to gobble the appropriate amount and type of whitespace.
498 * Skips comments as well.
499 */
500
76e3520e 501STATIC char *
cea2e8a9 502S_skipspace(pTHX_ register char *s)
a687059c 503{
11343788 504 dTHR;
3280af22 505 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 506 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
507 s++;
508 return s;
509 }
510 for (;;) {
fd049845 511 STRLEN prevlen;
09bef843
SB
512 SSize_t oldprevlen, oldoldprevlen;
513 SSize_t oldloplen, oldunilen;
60e6418e
GS
514 while (s < PL_bufend && isSPACE(*s)) {
515 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
516 incline(s);
517 }
ffb4593c
NT
518
519 /* comment */
3280af22
NIS
520 if (s < PL_bufend && *s == '#') {
521 while (s < PL_bufend && *s != '\n')
463ee0b2 522 s++;
60e6418e 523 if (s < PL_bufend) {
463ee0b2 524 s++;
60e6418e
GS
525 if (PL_in_eval && !PL_rsfp) {
526 incline(s);
527 continue;
528 }
529 }
463ee0b2 530 }
ffb4593c
NT
531
532 /* only continue to recharge the buffer if we're at the end
533 * of the buffer, we're not reading from a source filter, and
534 * we're in normal lexing mode
535 */
09bef843
SB
536 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
537 PL_lex_state == LEX_FORMLINE)
463ee0b2 538 return s;
ffb4593c
NT
539
540 /* try to recharge the buffer */
9cbb5ea2
GS
541 if ((s = filter_gets(PL_linestr, PL_rsfp,
542 (prevlen = SvCUR(PL_linestr)))) == Nullch)
543 {
544 /* end of file. Add on the -p or -n magic */
3280af22
NIS
545 if (PL_minus_n || PL_minus_p) {
546 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
547 ";}continue{print or die qq(-p destination: $!\\n)" :
548 "");
3280af22
NIS
549 sv_catpv(PL_linestr,";}");
550 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
551 }
552 else
3280af22 553 sv_setpv(PL_linestr,";");
ffb4593c
NT
554
555 /* reset variables for next time we lex */
9cbb5ea2
GS
556 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
557 = SvPVX(PL_linestr);
3280af22 558 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
559
560 /* Close the filehandle. Could be from -P preprocessor,
561 * STDIN, or a regular file. If we were reading code from
562 * STDIN (because the commandline held no -e or filename)
563 * then we don't close it, we reset it so the code can
564 * read from STDIN too.
565 */
566
3280af22
NIS
567 if (PL_preprocess && !PL_in_eval)
568 (void)PerlProc_pclose(PL_rsfp);
569 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
570 PerlIO_clearerr(PL_rsfp);
8990e307 571 else
3280af22
NIS
572 (void)PerlIO_close(PL_rsfp);
573 PL_rsfp = Nullfp;
463ee0b2
LW
574 return s;
575 }
ffb4593c
NT
576
577 /* not at end of file, so we only read another line */
09bef843
SB
578 /* make corresponding updates to old pointers, for yyerror() */
579 oldprevlen = PL_oldbufptr - PL_bufend;
580 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
581 if (PL_last_uni)
582 oldunilen = PL_last_uni - PL_bufend;
583 if (PL_last_lop)
584 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
585 PL_linestart = PL_bufptr = s + prevlen;
586 PL_bufend = s + SvCUR(PL_linestr);
587 s = PL_bufptr;
09bef843
SB
588 PL_oldbufptr = s + oldprevlen;
589 PL_oldoldbufptr = s + oldoldprevlen;
590 if (PL_last_uni)
591 PL_last_uni = s + oldunilen;
592 if (PL_last_lop)
593 PL_last_lop = s + oldloplen;
a0d0e21e 594 incline(s);
ffb4593c
NT
595
596 /* debugger active and we're not compiling the debugger code,
597 * so store the line into the debugger's array of lines
598 */
3280af22 599 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
600 SV *sv = NEWSV(85,0);
601
602 sv_upgrade(sv, SVt_PVMG);
3280af22 603 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 604 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 605 }
463ee0b2 606 }
a687059c 607}
378cc40b 608
ffb4593c
NT
609/*
610 * S_check_uni
611 * Check the unary operators to ensure there's no ambiguity in how they're
612 * used. An ambiguous piece of code would be:
613 * rand + 5
614 * This doesn't mean rand() + 5. Because rand() is a unary operator,
615 * the +5 is its argument.
616 */
617
76e3520e 618STATIC void
cea2e8a9 619S_check_uni(pTHX)
ba106d47 620{
2f3197b3 621 char *s;
a0d0e21e 622 char *t;
0453d815 623 dTHR;
2f3197b3 624
3280af22 625 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 626 return;
3280af22
NIS
627 while (isSPACE(*PL_last_uni))
628 PL_last_uni++;
7e2040f0 629 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 630 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 631 return;
0453d815 632 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 633 char ch = *s;
0453d815
PM
634 *s = '\0';
635 Perl_warner(aTHX_ WARN_AMBIGUOUS,
636 "Warning: Use of \"%s\" without parens is ambiguous",
637 PL_last_uni);
638 *s = ch;
639 }
2f3197b3
LW
640}
641
ffb4593c
NT
642/* workaround to replace the UNI() macro with a function. Only the
643 * hints/uts.sh file mentions this. Other comments elsewhere in the
644 * source indicate Microport Unix might need it too.
645 */
646
ffed7fef
LW
647#ifdef CRIPPLED_CC
648
649#undef UNI
ffed7fef 650#define UNI(f) return uni(f,s)
ffed7fef 651
76e3520e 652STATIC int
cea2e8a9 653S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
654{
655 yylval.ival = f;
3280af22
NIS
656 PL_expect = XTERM;
657 PL_bufptr = s;
8f872242
NIS
658 PL_last_uni = PL_oldbufptr;
659 PL_last_lop_op = f;
ffed7fef
LW
660 if (*s == '(')
661 return FUNC1;
662 s = skipspace(s);
663 if (*s == '(')
664 return FUNC1;
665 else
666 return UNIOP;
667}
668
a0d0e21e
LW
669#endif /* CRIPPLED_CC */
670
ffb4593c
NT
671/*
672 * LOP : macro to build a list operator. Its behaviour has been replaced
673 * with a subroutine, S_lop() for which LOP is just another name.
674 */
675
a0d0e21e
LW
676#define LOP(f,x) return lop(f,x,s)
677
ffb4593c
NT
678/*
679 * S_lop
680 * Build a list operator (or something that might be one). The rules:
681 * - if we have a next token, then it's a list operator [why?]
682 * - if the next thing is an opening paren, then it's a function
683 * - else it's a list operator
684 */
685
76e3520e 686STATIC I32
a0be28da 687S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 688{
0f15f207 689 dTHR;
79072805 690 yylval.ival = f;
35c8bce7 691 CLINE;
3280af22
NIS
692 PL_expect = x;
693 PL_bufptr = s;
694 PL_last_lop = PL_oldbufptr;
695 PL_last_lop_op = f;
696 if (PL_nexttoke)
a0d0e21e 697 return LSTOP;
79072805
LW
698 if (*s == '(')
699 return FUNC;
700 s = skipspace(s);
701 if (*s == '(')
702 return FUNC;
703 else
704 return LSTOP;
705}
706
ffb4593c
NT
707/*
708 * S_force_next
9cbb5ea2 709 * When the lexer realizes it knows the next token (for instance,
ffb4593c 710 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
711 * to know what token to return the next time the lexer is called. Caller
712 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
713 * handles the token correctly.
ffb4593c
NT
714 */
715
76e3520e 716STATIC void
cea2e8a9 717S_force_next(pTHX_ I32 type)
79072805 718{
3280af22
NIS
719 PL_nexttype[PL_nexttoke] = type;
720 PL_nexttoke++;
721 if (PL_lex_state != LEX_KNOWNEXT) {
722 PL_lex_defer = PL_lex_state;
723 PL_lex_expect = PL_expect;
724 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
725 }
726}
727
ffb4593c
NT
728/*
729 * S_force_word
730 * When the lexer knows the next thing is a word (for instance, it has
731 * just seen -> and it knows that the next char is a word char, then
732 * it calls S_force_word to stick the next word into the PL_next lookahead.
733 *
734 * Arguments:
b1b65b59 735 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
736 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
737 * int check_keyword : if true, Perl checks to make sure the word isn't
738 * a keyword (do this if the word is a label, e.g. goto FOO)
739 * int allow_pack : if true, : characters will also be allowed (require,
740 * use, etc. do this)
9cbb5ea2 741 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
742 */
743
76e3520e 744STATIC char *
cea2e8a9 745S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 746{
463ee0b2
LW
747 register char *s;
748 STRLEN len;
749
750 start = skipspace(start);
751 s = start;
7e2040f0 752 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 753 (allow_pack && *s == ':') ||
15f0808c 754 (allow_initial_tick && *s == '\'') )
a0d0e21e 755 {
3280af22
NIS
756 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
757 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
758 return start;
759 if (token == METHOD) {
760 s = skipspace(s);
761 if (*s == '(')
3280af22 762 PL_expect = XTERM;
463ee0b2 763 else {
3280af22 764 PL_expect = XOPERATOR;
463ee0b2 765 }
79072805 766 }
3280af22
NIS
767 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
768 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
769 force_next(token);
770 }
771 return s;
772}
773
ffb4593c
NT
774/*
775 * S_force_ident
9cbb5ea2 776 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
777 * text only contains the "foo" portion. The first argument is a pointer
778 * to the "foo", and the second argument is the type symbol to prefix.
779 * Forces the next token to be a "WORD".
9cbb5ea2 780 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
781 */
782
76e3520e 783STATIC void
cea2e8a9 784S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
785{
786 if (s && *s) {
11343788 787 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 788 PL_nextval[PL_nexttoke].opval = o;
79072805 789 force_next(WORD);
748a9306 790 if (kind) {
e858de61 791 dTHR; /* just for in_eval */
11343788 792 o->op_private = OPpCONST_ENTERED;
55497cff 793 /* XXX see note in pp_entereval() for why we forgo typo
794 warnings if the symbol must be introduced in an eval.
795 GSAR 96-10-12 */
3280af22 796 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
797 kind == '$' ? SVt_PV :
798 kind == '@' ? SVt_PVAV :
799 kind == '%' ? SVt_PVHV :
800 SVt_PVGV
801 );
748a9306 802 }
79072805
LW
803 }
804}
805
1571675a
GS
806NV
807Perl_str_to_version(pTHX_ SV *sv)
808{
809 NV retval = 0.0;
810 NV nshift = 1.0;
811 STRLEN len;
812 char *start = SvPVx(sv,len);
3aa33fe5 813 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
814 char *end = start + len;
815 while (start < end) {
816 I32 skip;
817 UV n;
818 if (utf)
8e84507e 819 n = utf8_to_uv_chk((U8*)start, &skip, 0);
1571675a
GS
820 else {
821 n = *(U8*)start;
822 skip = 1;
823 }
824 retval += ((NV)n)/nshift;
825 start += skip;
826 nshift *= 1000;
827 }
828 return retval;
829}
830
ffb4593c
NT
831/*
832 * S_force_version
833 * Forces the next token to be a version number.
834 */
835
76e3520e 836STATIC char *
cea2e8a9 837S_force_version(pTHX_ char *s)
89bfa8cd 838{
839 OP *version = Nullop;
44dcb63b 840 char *d;
89bfa8cd 841
842 s = skipspace(s);
843
44dcb63b 844 d = s;
dd629d5b 845 if (*d == 'v')
44dcb63b 846 d++;
44dcb63b 847 if (isDIGIT(*d)) {
a7cb1f99 848 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 849 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 850 SV *ver;
b73d6f50 851 s = scan_num(s, &yylval);
89bfa8cd 852 version = yylval.opval;
dd629d5b
GS
853 ver = cSVOPx(version)->op_sv;
854 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 855 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
856 SvNVX(ver) = str_to_version(ver);
857 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 858 }
89bfa8cd 859 }
860 }
861
862 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 863 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd 864 force_next(WORD);
865
866 return (s);
867}
868
ffb4593c
NT
869/*
870 * S_tokeq
871 * Tokenize a quoted string passed in as an SV. It finds the next
872 * chunk, up to end of string or a backslash. It may make a new
873 * SV containing that chunk (if HINT_NEW_STRING is on). It also
874 * turns \\ into \.
875 */
876
76e3520e 877STATIC SV *
cea2e8a9 878S_tokeq(pTHX_ SV *sv)
79072805
LW
879{
880 register char *s;
881 register char *send;
882 register char *d;
b3ac6de7
IZ
883 STRLEN len = 0;
884 SV *pv = sv;
79072805
LW
885
886 if (!SvLEN(sv))
b3ac6de7 887 goto finish;
79072805 888
a0d0e21e 889 s = SvPV_force(sv, len);
21a311ee 890 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 891 goto finish;
463ee0b2 892 send = s + len;
79072805
LW
893 while (s < send && *s != '\\')
894 s++;
895 if (s == send)
b3ac6de7 896 goto finish;
79072805 897 d = s;
3280af22 898 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 899 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
900 while (s < send) {
901 if (*s == '\\') {
a0d0e21e 902 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
903 s++; /* all that, just for this */
904 }
905 *d++ = *s++;
906 }
907 *d = '\0';
463ee0b2 908 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 909 finish:
3280af22 910 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 911 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
912 return sv;
913}
914
ffb4593c
NT
915/*
916 * Now come three functions related to double-quote context,
917 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
918 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
919 * interact with PL_lex_state, and create fake ( ... ) argument lists
920 * to handle functions and concatenation.
921 * They assume that whoever calls them will be setting up a fake
922 * join call, because each subthing puts a ',' after it. This lets
923 * "lower \luPpEr"
924 * become
925 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
926 *
927 * (I'm not sure whether the spurious commas at the end of lcfirst's
928 * arguments and join's arguments are created or not).
929 */
930
931/*
932 * S_sublex_start
933 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
934 *
935 * Pattern matching will set PL_lex_op to the pattern-matching op to
936 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
937 *
938 * OP_CONST and OP_READLINE are easy--just make the new op and return.
939 *
940 * Everything else becomes a FUNC.
941 *
942 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
943 * had an OP_CONST or OP_READLINE). This just sets us up for a
944 * call to S_sublex_push().
945 */
946
76e3520e 947STATIC I32
cea2e8a9 948S_sublex_start(pTHX)
79072805
LW
949{
950 register I32 op_type = yylval.ival;
79072805
LW
951
952 if (op_type == OP_NULL) {
3280af22
NIS
953 yylval.opval = PL_lex_op;
954 PL_lex_op = Nullop;
79072805
LW
955 return THING;
956 }
957 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 958 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
959
960 if (SvTYPE(sv) == SVt_PVIV) {
961 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
962 STRLEN len;
963 char *p;
964 SV *nsv;
965
966 p = SvPV(sv, len);
79cb57f6 967 nsv = newSVpvn(p, len);
01ec43d0
GS
968 if (SvUTF8(sv))
969 SvUTF8_on(nsv);
b3ac6de7
IZ
970 SvREFCNT_dec(sv);
971 sv = nsv;
972 }
973 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 974 PL_lex_stuff = Nullsv;
79072805
LW
975 return THING;
976 }
977
3280af22
NIS
978 PL_sublex_info.super_state = PL_lex_state;
979 PL_sublex_info.sub_inwhat = op_type;
980 PL_sublex_info.sub_op = PL_lex_op;
981 PL_lex_state = LEX_INTERPPUSH;
55497cff 982
3280af22
NIS
983 PL_expect = XTERM;
984 if (PL_lex_op) {
985 yylval.opval = PL_lex_op;
986 PL_lex_op = Nullop;
55497cff 987 return PMFUNC;
988 }
989 else
990 return FUNC;
991}
992
ffb4593c
NT
993/*
994 * S_sublex_push
995 * Create a new scope to save the lexing state. The scope will be
996 * ended in S_sublex_done. Returns a '(', starting the function arguments
997 * to the uc, lc, etc. found before.
998 * Sets PL_lex_state to LEX_INTERPCONCAT.
999 */
1000
76e3520e 1001STATIC I32
cea2e8a9 1002S_sublex_push(pTHX)
55497cff 1003{
0f15f207 1004 dTHR;
f46d017c 1005 ENTER;
55497cff 1006
3280af22
NIS
1007 PL_lex_state = PL_sublex_info.super_state;
1008 SAVEI32(PL_lex_dojoin);
1009 SAVEI32(PL_lex_brackets);
3280af22
NIS
1010 SAVEI32(PL_lex_casemods);
1011 SAVEI32(PL_lex_starts);
1012 SAVEI32(PL_lex_state);
7766f137 1013 SAVEVPTR(PL_lex_inpat);
3280af22 1014 SAVEI32(PL_lex_inwhat);
57843af0 1015 SAVECOPLINE(PL_curcop);
3280af22
NIS
1016 SAVEPPTR(PL_bufptr);
1017 SAVEPPTR(PL_oldbufptr);
1018 SAVEPPTR(PL_oldoldbufptr);
1019 SAVEPPTR(PL_linestart);
1020 SAVESPTR(PL_linestr);
1021 SAVEPPTR(PL_lex_brackstack);
1022 SAVEPPTR(PL_lex_casestack);
1023
1024 PL_linestr = PL_lex_stuff;
1025 PL_lex_stuff = Nullsv;
1026
9cbb5ea2
GS
1027 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1028 = SvPVX(PL_linestr);
3280af22
NIS
1029 PL_bufend += SvCUR(PL_linestr);
1030 SAVEFREESV(PL_linestr);
1031
1032 PL_lex_dojoin = FALSE;
1033 PL_lex_brackets = 0;
3280af22
NIS
1034 New(899, PL_lex_brackstack, 120, char);
1035 New(899, PL_lex_casestack, 12, char);
1036 SAVEFREEPV(PL_lex_brackstack);
1037 SAVEFREEPV(PL_lex_casestack);
1038 PL_lex_casemods = 0;
1039 *PL_lex_casestack = '\0';
1040 PL_lex_starts = 0;
1041 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1042 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1043
1044 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1045 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1046 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1047 else
3280af22 1048 PL_lex_inpat = Nullop;
79072805 1049
55497cff 1050 return '(';
79072805
LW
1051}
1052
ffb4593c
NT
1053/*
1054 * S_sublex_done
1055 * Restores lexer state after a S_sublex_push.
1056 */
1057
76e3520e 1058STATIC I32
cea2e8a9 1059S_sublex_done(pTHX)
79072805 1060{
3280af22
NIS
1061 if (!PL_lex_starts++) {
1062 PL_expect = XOPERATOR;
79cb57f6 1063 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1064 return THING;
1065 }
1066
3280af22
NIS
1067 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1068 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1069 return yylex();
79072805
LW
1070 }
1071
ffb4593c 1072 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1073 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1074 PL_linestr = PL_lex_repl;
1075 PL_lex_inpat = 0;
1076 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1077 PL_bufend += SvCUR(PL_linestr);
1078 SAVEFREESV(PL_linestr);
1079 PL_lex_dojoin = FALSE;
1080 PL_lex_brackets = 0;
3280af22
NIS
1081 PL_lex_casemods = 0;
1082 *PL_lex_casestack = '\0';
1083 PL_lex_starts = 0;
25da4f38 1084 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1085 PL_lex_state = LEX_INTERPNORMAL;
1086 PL_lex_starts++;
e9fa98b2
HS
1087 /* we don't clear PL_lex_repl here, so that we can check later
1088 whether this is an evalled subst; that means we rely on the
1089 logic to ensure sublex_done() is called again only via the
1090 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1091 }
e9fa98b2 1092 else {
3280af22 1093 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1094 PL_lex_repl = Nullsv;
1095 }
79072805 1096 return ',';
ffed7fef
LW
1097 }
1098 else {
f46d017c 1099 LEAVE;
3280af22
NIS
1100 PL_bufend = SvPVX(PL_linestr);
1101 PL_bufend += SvCUR(PL_linestr);
1102 PL_expect = XOPERATOR;
09bef843 1103 PL_sublex_info.sub_inwhat = 0;
79072805 1104 return ')';
ffed7fef
LW
1105 }
1106}
1107
02aa26ce
NT
1108/*
1109 scan_const
1110
1111 Extracts a pattern, double-quoted string, or transliteration. This
1112 is terrifying code.
1113
3280af22
NIS
1114 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1115 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1116 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1117
9b599b2a
GS
1118 Returns a pointer to the character scanned up to. Iff this is
1119 advanced from the start pointer supplied (ie if anything was
1120 successfully parsed), will leave an OP for the substring scanned
1121 in yylval. Caller must intuit reason for not parsing further
1122 by looking at the next characters herself.
1123
02aa26ce
NT
1124 In patterns:
1125 backslashes:
1126 double-quoted style: \r and \n
1127 regexp special ones: \D \s
1128 constants: \x3
1129 backrefs: \1 (deprecated in substitution replacements)
1130 case and quoting: \U \Q \E
1131 stops on @ and $, but not for $ as tail anchor
1132
1133 In transliterations:
1134 characters are VERY literal, except for - not at the start or end
1135 of the string, which indicates a range. scan_const expands the
1136 range to the full set of intermediate characters.
1137
1138 In double-quoted strings:
1139 backslashes:
1140 double-quoted style: \r and \n
1141 constants: \x3
1142 backrefs: \1 (deprecated)
1143 case and quoting: \U \Q \E
1144 stops on @ and $
1145
1146 scan_const does *not* construct ops to handle interpolated strings.
1147 It stops processing as soon as it finds an embedded $ or @ variable
1148 and leaves it to the caller to work out what's going on.
1149
1150 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1151
1152 $ in pattern could be $foo or could be tail anchor. Assumption:
1153 it's a tail anchor if $ is the last thing in the string, or if it's
1154 followed by one of ")| \n\t"
1155
1156 \1 (backreferences) are turned into $1
1157
1158 The structure of the code is
1159 while (there's a character to process) {
1160 handle transliteration ranges
1161 skip regexp comments
1162 skip # initiated comments in //x patterns
1163 check for embedded @foo
1164 check for embedded scalars
1165 if (backslash) {
1166 leave intact backslashes from leave (below)
1167 deprecate \1 in strings and sub replacements
1168 handle string-changing backslashes \l \U \Q \E, etc.
1169 switch (what was escaped) {
1170 handle - in a transliteration (becomes a literal -)
1171 handle \132 octal characters
1172 handle 0x15 hex characters
1173 handle \cV (control V)
1174 handle printf backslashes (\f, \r, \n, etc)
1175 } (end switch)
1176 } (end if backslash)
1177 } (end while character to read)
1178
1179*/
1180
76e3520e 1181STATIC char *
cea2e8a9 1182S_scan_const(pTHX_ char *start)
79072805 1183{
3280af22 1184 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1185 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1186 register char *s = start; /* start of the constant */
1187 register char *d = SvPVX(sv); /* destination for copies */
1188 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1189 bool didrange = FALSE; /* did we just finish a range? */
7e2040f0 1190 bool has_utf = FALSE; /* embedded \x{} */
02aa26ce 1191 I32 len; /* ? */
012bcf8d
GS
1192 UV uv;
1193
ac2262e3 1194 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1195 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1196 : UTF;
ac2262e3 1197 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1198 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1199 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1200 : UTF;
dff6d3cd 1201 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1202 PL_lex_inpat
4a2d328f 1203 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1204 : "";
79072805
LW
1205
1206 while (s < send || dorange) {
02aa26ce 1207 /* get transliterations out of the way (they're most literal) */
3280af22 1208 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1209 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1210 if (dorange) {
02aa26ce 1211 I32 i; /* current expanded character */
8ada0baa 1212 I32 min; /* first character in range */
02aa26ce
NT
1213 I32 max; /* last character in range */
1214
1215 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1216 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1217 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1218 d -= 2; /* eat the first char and the - */
1219
8ada0baa
JH
1220 min = (U8)*d; /* first char in range */
1221 max = (U8)d[1]; /* last char in range */
1222
c2e66d9e 1223 if (min > max) {
01ec43d0
GS
1224 Perl_croak(aTHX_
1225 "Invalid [] range \"%c-%c\" in transliteration operator",
d2560b70 1226 (char)min, (char)max);
c2e66d9e
GS
1227 }
1228
8ada0baa
JH
1229#ifndef ASCIIish
1230 if ((isLOWER(min) && isLOWER(max)) ||
1231 (isUPPER(min) && isUPPER(max))) {
1232 if (isLOWER(min)) {
1233 for (i = min; i <= max; i++)
1234 if (isLOWER(i))
1235 *d++ = i;
1236 } else {
1237 for (i = min; i <= max; i++)
1238 if (isUPPER(i))
1239 *d++ = i;
1240 }
1241 }
1242 else
1243#endif
1244 for (i = min; i <= max; i++)
1245 *d++ = i;
02aa26ce
NT
1246
1247 /* mark the range as done, and continue */
79072805 1248 dorange = FALSE;
01ec43d0 1249 didrange = TRUE;
79072805 1250 continue;
c2e66d9e 1251 }
02aa26ce
NT
1252
1253 /* range begins (ignore - as first or last char) */
79072805 1254 else if (*s == '-' && s+1 < send && s != start) {
01ec43d0 1255 if (didrange) {
1fafa243 1256 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1257 }
a0ed51b3 1258 if (utf) {
a176fa2a 1259 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1260 s++;
1261 continue;
1262 }
79072805
LW
1263 dorange = TRUE;
1264 s++;
01ec43d0
GS
1265 }
1266 else {
1267 didrange = FALSE;
1268 }
79072805 1269 }
02aa26ce
NT
1270
1271 /* if we get here, we're not doing a transliteration */
1272
0f5d15d6
IZ
1273 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1274 except for the last char, which will be done separately. */
3280af22 1275 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1276 if (s[2] == '#') {
1277 while (s < send && *s != ')')
1278 *d++ = *s++;
155aba94
GS
1279 }
1280 else if (s[2] == '{' /* This should match regcomp.c */
1281 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1282 {
cc6b7395 1283 I32 count = 1;
0f5d15d6 1284 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1285 char c;
1286
d9f97599
GS
1287 while (count && (c = *regparse)) {
1288 if (c == '\\' && regparse[1])
1289 regparse++;
cc6b7395
IZ
1290 else if (c == '{')
1291 count++;
1292 else if (c == '}')
1293 count--;
d9f97599 1294 regparse++;
cc6b7395 1295 }
5bdf89e7
IZ
1296 if (*regparse != ')') {
1297 regparse--; /* Leave one char for continuation. */
cc6b7395 1298 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1299 }
0f5d15d6 1300 while (s < regparse)
cc6b7395
IZ
1301 *d++ = *s++;
1302 }
748a9306 1303 }
02aa26ce
NT
1304
1305 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1306 else if (*s == '#' && PL_lex_inpat &&
1307 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1308 while (s+1 < send && *s != '\n')
1309 *d++ = *s++;
1310 }
02aa26ce 1311
5d1d4326
JH
1312 /* check for embedded arrays
1313 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1314 */
7e2040f0 1315 else if (*s == '@' && s[1]
5d1d4326 1316 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1317 break;
02aa26ce
NT
1318
1319 /* check for embedded scalars. only stop if we're sure it's a
1320 variable.
1321 */
79072805 1322 else if (*s == '$') {
3280af22 1323 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1324 break;
c277df42 1325 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1326 break; /* in regexp, $ might be tail anchor */
1327 }
02aa26ce 1328
a0ed51b3
LW
1329 /* (now in tr/// code again) */
1330
d008e5eb 1331 if (*s & 0x80 && thisutf) {
8e84507e 1332 (void)utf8_to_uv_chk((U8*)s, &len, 0);
012bcf8d
GS
1333 if (len == 1) {
1334 /* illegal UTF8, make it valid */
012bcf8d 1335 char *old_pvx = SvPVX(sv);
bee7c419
GS
1336 /* need space for one extra char (NOTE: SvCUR() not set here) */
1337 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
012bcf8d
GS
1338 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1339 }
1340 else {
1341 while (len--)
1342 *d++ = *s++;
1343 }
1344 has_utf = TRUE;
1345 continue;
a0ed51b3
LW
1346 }
1347
02aa26ce 1348 /* backslashes */
79072805
LW
1349 if (*s == '\\' && s+1 < send) {
1350 s++;
02aa26ce
NT
1351
1352 /* some backslashes we leave behind */
c9f97d15 1353 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1354 *d++ = '\\';
1355 *d++ = *s++;
1356 continue;
1357 }
02aa26ce
NT
1358
1359 /* deprecate \1 in strings and substitution replacements */
3280af22 1360 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1361 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1362 {
d008e5eb 1363 dTHR; /* only for ckWARN */
599cee73 1364 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1365 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1366 *--s = '$';
1367 break;
1368 }
02aa26ce
NT
1369
1370 /* string-change backslash escapes */
3280af22 1371 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1372 --s;
1373 break;
1374 }
02aa26ce
NT
1375
1376 /* if we get here, it's either a quoted -, or a digit */
79072805 1377 switch (*s) {
02aa26ce
NT
1378
1379 /* quoted - in transliterations */
79072805 1380 case '-':
3280af22 1381 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1382 *d++ = *s++;
1383 continue;
1384 }
1385 /* FALL THROUGH */
1386 default:
11b8faa4
JH
1387 {
1388 dTHR;
7e84c16c 1389 if (ckWARN(WARN_MISC) && isALNUM(*s))
e476b1b5 1390 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1391 "Unrecognized escape \\%c passed through",
1392 *s);
1393 /* default action is to copy the quoted character */
1394 *d++ = *s++;
1395 continue;
1396 }
02aa26ce
NT
1397
1398 /* \132 indicates an octal constant */
79072805
LW
1399 case '0': case '1': case '2': case '3':
1400 case '4': case '5': case '6': case '7':
b21ed0a9 1401 len = 0; /* disallow underscores */
012bcf8d 1402 uv = (UV)scan_oct(s, 3, &len);
79072805 1403 s += len;
012bcf8d 1404 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1405
1406 /* \x24 indicates a hex constant */
79072805 1407 case 'x':
a0ed51b3
LW
1408 ++s;
1409 if (*s == '{') {
1410 char* e = strchr(s, '}');
adaeee49 1411 if (!e) {
a0ed51b3 1412 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1413 e = s;
1414 }
b21ed0a9 1415 len = 1; /* allow underscores */
012bcf8d
GS
1416 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1417 s = e + 1;
a0ed51b3
LW
1418 }
1419 else {
b21ed0a9 1420 len = 0; /* disallow underscores */
012bcf8d
GS
1421 uv = (UV)scan_hex(s, 2, &len);
1422 s += len;
1423 }
1424
1425 NUM_ESCAPE_INSERT:
1426 /* Insert oct or hex escaped character.
1427 * There will always enough room in sv since such escapes will
1428 * be longer than any utf8 sequence they can end up as
1429 */
1430 if (uv > 127) {
1431 if (!thisutf && !has_utf && uv > 255) {
1432 /* might need to recode whatever we have accumulated so far
1433 * if it contains any hibit chars
1434 */
1435 int hicount = 0;
1436 char *c;
1437 for (c = SvPVX(sv); c < d; c++) {
1438 if (*c & 0x80)
1439 hicount++;
1440 }
1441 if (hicount) {
1442 char *old_pvx = SvPVX(sv);
1443 char *src, *dst;
1444 d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
1445
1446 src = d - 1;
1447 d += hicount;
1448 dst = d - 1;
1449
1450 while (src < dst) {
1451 if (*src & 0x80) {
1452 dst--;
1453 uv_to_utf8((U8*)dst, (U8)*src--);
1454 dst--;
1455 }
1456 else {
1457 *dst-- = *src--;
1458 }
1459 }
1460 }
1461 }
1462
1463 if (thisutf || uv > 255) {
1464 d = (char*)uv_to_utf8((U8*)d, uv);
7e2040f0 1465 has_utf = TRUE;
012bcf8d 1466 }
a0ed51b3 1467 else {
012bcf8d 1468 *d++ = (char)uv;
a0ed51b3 1469 }
012bcf8d
GS
1470 }
1471 else {
1472 *d++ = (char)uv;
a0ed51b3 1473 }
79072805 1474 continue;
02aa26ce 1475
4a2d328f
IZ
1476 /* \N{latin small letter a} is a named character */
1477 case 'N':
423cee85
JH
1478 ++s;
1479 if (*s == '{') {
1480 char* e = strchr(s, '}');
155aba94 1481 SV *res;
423cee85
JH
1482 STRLEN len;
1483 char *str;
423cee85
JH
1484
1485 if (!e) {
5777a3f7 1486 yyerror("Missing right brace on \\N{}");
423cee85
JH
1487 e = s - 1;
1488 goto cont_scan;
1489 }
1490 res = newSVpvn(s + 1, e - s - 1);
1491 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1492 res, Nullsv, "\\N{...}" );
423cee85 1493 str = SvPV(res,len);
f08d6ad9
GS
1494 if (!has_utf && SvUTF8(res)) {
1495 char *ostart = SvPVX(sv);
1496 SvCUR_set(sv, d - ostart);
1497 SvPOK_on(sv);
e4f3eed8 1498 *d = '\0';
f08d6ad9 1499 sv_utf8_upgrade(sv);
d2f449dd
SB
1500 /* this just broke our allocation above... */
1501 SvGROW(sv, send - start);
f08d6ad9 1502 d = SvPVX(sv) + SvCUR(sv);
e1992b6d 1503 has_utf = TRUE;
f08d6ad9 1504 }
423cee85
JH
1505 if (len > e - s + 4) {
1506 char *odest = SvPVX(sv);
1507
1508 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1509 d = SvPVX(sv) + (d - odest);
1510 }
1511 Copy(str, d, len, char);
1512 d += len;
1513 SvREFCNT_dec(res);
1514 cont_scan:
1515 s = e + 1;
1516 }
1517 else
5777a3f7 1518 yyerror("Missing braces on \\N{}");
423cee85
JH
1519 continue;
1520
02aa26ce 1521 /* \c is a control character */
79072805
LW
1522 case 'c':
1523 s++;
9d116dd7
JH
1524#ifdef EBCDIC
1525 *d = *s++;
1526 if (isLOWER(*d))
1527 *d = toUPPER(*d);
774a9426
GS
1528 *d = toCTRL(*d);
1529 d++;
9d116dd7 1530#else
bbce6d69 1531 len = *s++;
1532 *d++ = toCTRL(len);
9d116dd7 1533#endif
79072805 1534 continue;
02aa26ce
NT
1535
1536 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1537 case 'b':
1538 *d++ = '\b';
1539 break;
1540 case 'n':
1541 *d++ = '\n';
1542 break;
1543 case 'r':
1544 *d++ = '\r';
1545 break;
1546 case 'f':
1547 *d++ = '\f';
1548 break;
1549 case 't':
1550 *d++ = '\t';
1551 break;
34a3fe2a
PP
1552#ifdef EBCDIC
1553 case 'e':
1554 *d++ = '\047'; /* CP 1047 */
1555 break;
1556 case 'a':
1557 *d++ = '\057'; /* CP 1047 */
1558 break;
1559#else
79072805
LW
1560 case 'e':
1561 *d++ = '\033';
1562 break;
1563 case 'a':
1564 *d++ = '\007';
1565 break;
34a3fe2a 1566#endif
02aa26ce
NT
1567 } /* end switch */
1568
79072805
LW
1569 s++;
1570 continue;
02aa26ce
NT
1571 } /* end if (backslash) */
1572
79072805 1573 *d++ = *s++;
02aa26ce
NT
1574 } /* while loop to process each character */
1575
1576 /* terminate the string and set up the sv */
79072805 1577 *d = '\0';
463ee0b2 1578 SvCUR_set(sv, d - SvPVX(sv));
79072805 1579 SvPOK_on(sv);
7e2040f0
GS
1580 if (has_utf)
1581 SvUTF8_on(sv);
79072805 1582
02aa26ce 1583 /* shrink the sv if we allocated more than we used */
79072805
LW
1584 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1585 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1586 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1587 }
02aa26ce 1588
9b599b2a 1589 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1590 if (s > PL_bufptr) {
1591 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1592 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1593 sv, Nullsv,
3280af22 1594 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1595 ? "tr"
3280af22 1596 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1597 ? "s"
1598 : "qq")));
79072805 1599 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1600 } else
8990e307 1601 SvREFCNT_dec(sv);
79072805
LW
1602 return s;
1603}
1604
ffb4593c
NT
1605/* S_intuit_more
1606 * Returns TRUE if there's more to the expression (e.g., a subscript),
1607 * FALSE otherwise.
ffb4593c
NT
1608 *
1609 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1610 *
1611 * ->[ and ->{ return TRUE
1612 * { and [ outside a pattern are always subscripts, so return TRUE
1613 * if we're outside a pattern and it's not { or [, then return FALSE
1614 * if we're in a pattern and the first char is a {
1615 * {4,5} (any digits around the comma) returns FALSE
1616 * if we're in a pattern and the first char is a [
1617 * [] returns FALSE
1618 * [SOMETHING] has a funky algorithm to decide whether it's a
1619 * character class or not. It has to deal with things like
1620 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1621 * anything else returns TRUE
1622 */
1623
9cbb5ea2
GS
1624/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1625
76e3520e 1626STATIC int
cea2e8a9 1627S_intuit_more(pTHX_ register char *s)
79072805 1628{
3280af22 1629 if (PL_lex_brackets)
79072805
LW
1630 return TRUE;
1631 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1632 return TRUE;
1633 if (*s != '{' && *s != '[')
1634 return FALSE;
3280af22 1635 if (!PL_lex_inpat)
79072805
LW
1636 return TRUE;
1637
1638 /* In a pattern, so maybe we have {n,m}. */
1639 if (*s == '{') {
1640 s++;
1641 if (!isDIGIT(*s))
1642 return TRUE;
1643 while (isDIGIT(*s))
1644 s++;
1645 if (*s == ',')
1646 s++;
1647 while (isDIGIT(*s))
1648 s++;
1649 if (*s == '}')
1650 return FALSE;
1651 return TRUE;
1652
1653 }
1654
1655 /* On the other hand, maybe we have a character class */
1656
1657 s++;
1658 if (*s == ']' || *s == '^')
1659 return FALSE;
1660 else {
ffb4593c 1661 /* this is terrifying, and it works */
79072805
LW
1662 int weight = 2; /* let's weigh the evidence */
1663 char seen[256];
f27ffc4a 1664 unsigned char un_char = 255, last_un_char;
93a17b20 1665 char *send = strchr(s,']');
3280af22 1666 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1667
1668 if (!send) /* has to be an expression */
1669 return TRUE;
1670
1671 Zero(seen,256,char);
1672 if (*s == '$')
1673 weight -= 3;
1674 else if (isDIGIT(*s)) {
1675 if (s[1] != ']') {
1676 if (isDIGIT(s[1]) && s[2] == ']')
1677 weight -= 10;
1678 }
1679 else
1680 weight -= 100;
1681 }
1682 for (; s < send; s++) {
1683 last_un_char = un_char;
1684 un_char = (unsigned char)*s;
1685 switch (*s) {
1686 case '@':
1687 case '&':
1688 case '$':
1689 weight -= seen[un_char] * 10;
7e2040f0 1690 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1691 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1692 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1693 weight -= 100;
1694 else
1695 weight -= 10;
1696 }
1697 else if (*s == '$' && s[1] &&
93a17b20
LW
1698 strchr("[#!%*<>()-=",s[1])) {
1699 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1700 weight -= 10;
1701 else
1702 weight -= 1;
1703 }
1704 break;
1705 case '\\':
1706 un_char = 254;
1707 if (s[1]) {
93a17b20 1708 if (strchr("wds]",s[1]))
79072805
LW
1709 weight += 100;
1710 else if (seen['\''] || seen['"'])
1711 weight += 1;
93a17b20 1712 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1713 weight += 40;
1714 else if (isDIGIT(s[1])) {
1715 weight += 40;
1716 while (s[1] && isDIGIT(s[1]))
1717 s++;
1718 }
1719 }
1720 else
1721 weight += 100;
1722 break;
1723 case '-':
1724 if (s[1] == '\\')
1725 weight += 50;
93a17b20 1726 if (strchr("aA01! ",last_un_char))
79072805 1727 weight += 30;
93a17b20 1728 if (strchr("zZ79~",s[1]))
79072805 1729 weight += 30;
f27ffc4a
GS
1730 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1731 weight -= 5; /* cope with negative subscript */
79072805
LW
1732 break;
1733 default:
93a17b20 1734 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1735 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1736 char *d = tmpbuf;
1737 while (isALPHA(*s))
1738 *d++ = *s++;
1739 *d = '\0';
1740 if (keyword(tmpbuf, d - tmpbuf))
1741 weight -= 150;
1742 }
1743 if (un_char == last_un_char + 1)
1744 weight += 5;
1745 weight -= seen[un_char];
1746 break;
1747 }
1748 seen[un_char]++;
1749 }
1750 if (weight >= 0) /* probably a character class */
1751 return FALSE;
1752 }
1753
1754 return TRUE;
1755}
ffed7fef 1756
ffb4593c
NT
1757/*
1758 * S_intuit_method
1759 *
1760 * Does all the checking to disambiguate
1761 * foo bar
1762 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1763 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1764 *
1765 * First argument is the stuff after the first token, e.g. "bar".
1766 *
1767 * Not a method if bar is a filehandle.
1768 * Not a method if foo is a subroutine prototyped to take a filehandle.
1769 * Not a method if it's really "Foo $bar"
1770 * Method if it's "foo $bar"
1771 * Not a method if it's really "print foo $bar"
1772 * Method if it's really "foo package::" (interpreted as package->foo)
1773 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1774 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1775 * =>
1776 */
1777
76e3520e 1778STATIC int
cea2e8a9 1779S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1780{
1781 char *s = start + (*start == '$');
3280af22 1782 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1783 STRLEN len;
1784 GV* indirgv;
1785
1786 if (gv) {
b6c543e3 1787 CV *cv;
a0d0e21e
LW
1788 if (GvIO(gv))
1789 return 0;
b6c543e3
IZ
1790 if ((cv = GvCVu(gv))) {
1791 char *proto = SvPVX(cv);
1792 if (proto) {
1793 if (*proto == ';')
1794 proto++;
1795 if (*proto == '*')
1796 return 0;
1797 }
1798 } else
a0d0e21e
LW
1799 gv = 0;
1800 }
8903cb82 1801 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1802 /* start is the beginning of the possible filehandle/object,
1803 * and s is the end of it
1804 * tmpbuf is a copy of it
1805 */
1806
a0d0e21e 1807 if (*start == '$') {
3280af22 1808 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1809 return 0;
1810 s = skipspace(s);
3280af22
NIS
1811 PL_bufptr = start;
1812 PL_expect = XREF;
a0d0e21e
LW
1813 return *s == '(' ? FUNCMETH : METHOD;
1814 }
1815 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1816 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1817 len -= 2;
1818 tmpbuf[len] = '\0';
1819 goto bare_package;
1820 }
1821 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1822 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1823 return 0;
1824 /* filehandle or package name makes it a method */
89bfa8cd 1825 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1826 s = skipspace(s);
3280af22 1827 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1828 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1829 bare_package:
3280af22 1830 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1831 newSVpvn(tmpbuf,len));
3280af22
NIS
1832 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1833 PL_expect = XTERM;
a0d0e21e 1834 force_next(WORD);
3280af22 1835 PL_bufptr = s;
a0d0e21e
LW
1836 return *s == '(' ? FUNCMETH : METHOD;
1837 }
1838 }
1839 return 0;
1840}
1841
ffb4593c
NT
1842/*
1843 * S_incl_perldb
1844 * Return a string of Perl code to load the debugger. If PERL5DB
1845 * is set, it will return the contents of that, otherwise a
1846 * compile-time require of perl5db.pl.
1847 */
1848
76e3520e 1849STATIC char*
cea2e8a9 1850S_incl_perldb(pTHX)
a0d0e21e 1851{
3280af22 1852 if (PL_perldb) {
76e3520e 1853 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1854
1855 if (pdb)
1856 return pdb;
61bb5906 1857 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1858 return "BEGIN { require 'perl5db.pl' }";
1859 }
1860 return "";
1861}
1862
1863
16d20bd9
AD
1864/* Encoded script support. filter_add() effectively inserts a
1865 * 'pre-processing' function into the current source input stream.
1866 * Note that the filter function only applies to the current source file
1867 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1868 *
1869 * The datasv parameter (which may be NULL) can be used to pass
1870 * private data to this instance of the filter. The filter function
1871 * can recover the SV using the FILTER_DATA macro and use it to
1872 * store private buffers and state information.
1873 *
1874 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1875 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1876 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1877 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1878 * private use must be set using malloc'd pointers.
1879 */
16d20bd9
AD
1880
1881SV *
864dbfa3 1882Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1883{
f4c556ac
GS
1884 if (!funcp)
1885 return Nullsv;
1886
3280af22
NIS
1887 if (!PL_rsfp_filters)
1888 PL_rsfp_filters = newAV();
16d20bd9 1889 if (!datasv)
8c52afec 1890 datasv = NEWSV(255,0);
16d20bd9 1891 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1892 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1893 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1894 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1895 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1896 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1897 av_unshift(PL_rsfp_filters, 1);
1898 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1899 return(datasv);
1900}
1901
1902
1903/* Delete most recently added instance of this filter function. */
a0d0e21e 1904void
864dbfa3 1905Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1906{
e0c19803 1907 SV *datasv;
f4c556ac 1908 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1909 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1910 return;
1911 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1912 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1913 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1914 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1915 IoANY(datasv) = (void *)NULL;
3280af22 1916 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1917
16d20bd9
AD
1918 return;
1919 }
1920 /* we need to search for the correct entry and clear it */
cea2e8a9 1921 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1922}
1923
1924
1925/* Invoke the n'th filter function for the current rsfp. */
1926I32
864dbfa3 1927Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1928
1929
1930 /* 0 = read one text line */
a0d0e21e 1931{
16d20bd9
AD
1932 filter_t funcp;
1933 SV *datasv = NULL;
e50aee73 1934
3280af22 1935 if (!PL_rsfp_filters)
16d20bd9 1936 return -1;
3280af22 1937 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1938 /* Provide a default input filter to make life easy. */
1939 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1940 DEBUG_P(PerlIO_printf(Perl_debug_log,
1941 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1942 if (maxlen) {
1943 /* Want a block */
1944 int len ;
1945 int old_len = SvCUR(buf_sv) ;
1946
1947 /* ensure buf_sv is large enough */
1948 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1949 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1950 if (PerlIO_error(PL_rsfp))
37120919
AD
1951 return -1; /* error */
1952 else
1953 return 0 ; /* end of file */
1954 }
16d20bd9
AD
1955 SvCUR_set(buf_sv, old_len + len) ;
1956 } else {
1957 /* Want a line */
3280af22
NIS
1958 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1959 if (PerlIO_error(PL_rsfp))
37120919
AD
1960 return -1; /* error */
1961 else
1962 return 0 ; /* end of file */
1963 }
16d20bd9
AD
1964 }
1965 return SvCUR(buf_sv);
1966 }
1967 /* Skip this filter slot if filter has been deleted */
3280af22 1968 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1969 DEBUG_P(PerlIO_printf(Perl_debug_log,
1970 "filter_read %d: skipped (filter deleted)\n",
1971 idx));
16d20bd9
AD
1972 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1973 }
1974 /* Get function pointer hidden within datasv */
4755096e 1975 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
1976 DEBUG_P(PerlIO_printf(Perl_debug_log,
1977 "filter_read %d: via function %p (%s)\n",
1978 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1979 /* Call function. The function is expected to */
1980 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1981 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1982 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1983}
1984
76e3520e 1985STATIC char *
cea2e8a9 1986S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1987{
c39cd008 1988#ifdef PERL_CR_FILTER
3280af22 1989 if (!PL_rsfp_filters) {
c39cd008 1990 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1991 }
1992#endif
3280af22 1993 if (PL_rsfp_filters) {
16d20bd9 1994
55497cff 1995 if (!append)
1996 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1997 if (FILTER_READ(0, sv, 0) > 0)
1998 return ( SvPVX(sv) ) ;
1999 else
2000 return Nullch ;
2001 }
9d116dd7 2002 else
fd049845 2003 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2004}
2005
01ec43d0
GS
2006STATIC HV *
2007S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2008{
2009 GV *gv;
2010
01ec43d0 2011 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2012 return PL_curstash;
2013
2014 if (len > 2 &&
2015 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2016 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2017 {
2018 return GvHV(gv); /* Foo:: */
def3634b
GS
2019 }
2020
2021 /* use constant CLASS => 'MyClass' */
2022 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2023 SV *sv;
2024 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2025 pkgname = SvPV_nolen(sv);
2026 }
2027 }
2028
2029 return gv_stashpv(pkgname, FALSE);
2030}
a0d0e21e 2031
748a9306
LW
2032#ifdef DEBUGGING
2033 static char* exp_name[] =
09bef843
SB
2034 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2035 "ATTRTERM", "TERMBLOCK"
2036 };
748a9306 2037#endif
463ee0b2 2038
02aa26ce
NT
2039/*
2040 yylex
2041
2042 Works out what to call the token just pulled out of the input
2043 stream. The yacc parser takes care of taking the ops we return and
2044 stitching them into a tree.
2045
2046 Returns:
2047 PRIVATEREF
2048
2049 Structure:
2050 if read an identifier
2051 if we're in a my declaration
2052 croak if they tried to say my($foo::bar)
2053 build the ops for a my() declaration
2054 if it's an access to a my() variable
2055 are we in a sort block?
2056 croak if my($a); $a <=> $b
2057 build ops for access to a my() variable
2058 if in a dq string, and they've said @foo and we can't find @foo
2059 croak
2060 build ops for a bareword
2061 if we already built the token before, use it.
2062*/
2063
bf4acbe4
GS
2064#ifdef __SC__
2065#pragma segment Perl_yylex
2066#endif
864dbfa3
GS
2067int
2068#ifdef USE_PURE_BISON
cea2e8a9 2069Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 2070#else
cea2e8a9 2071Perl_yylex(pTHX)
864dbfa3 2072#endif
378cc40b 2073{
b73d6f50 2074 dTHR;
20141f0e
IRC
2075 int r;
2076
2077#ifdef USE_PURE_BISON
20141f0e
IRC
2078 yylval_pointer[yyactlevel] = lvalp;
2079 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
IRC
2080 yyactlevel++;
2081 if (yyactlevel >= YYMAXLEVEL)
2082 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e
IRC
2083#endif
2084
2085 r = S_syylex(aTHX);
2086
2087#ifdef USE_PURE_BISON
2088 yyactlevel--;
2089#endif
2090
2091 return r;
2092}
2093
2094STATIC int
2095S_syylex(pTHX) /* need to be separate from yylex for reentrancy */
2096{
11343788 2097 dTHR;
79072805 2098 register char *s;
378cc40b 2099 register char *d;
79072805 2100 register I32 tmp;
463ee0b2 2101 STRLEN len;
161b471a
NIS
2102 GV *gv = Nullgv;
2103 GV **gvp = 0;
a687059c 2104
02aa26ce 2105 /* check if there's an identifier for us to look at */
3280af22 2106 if (PL_pending_ident) {
02aa26ce 2107 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2108 char pit = PL_pending_ident;
2109 PL_pending_ident = 0;
bbce6d69 2110
02aa26ce
NT
2111 /* if we're in a my(), we can't allow dynamics here.
2112 $foo'bar has already been turned into $foo::bar, so
2113 just check for colons.
2114
2115 if it's a legal name, the OP is a PADANY.
2116 */
3280af22 2117 if (PL_in_my) {
77ca0c92 2118 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2119 if (strchr(PL_tokenbuf,':'))
2120 yyerror(Perl_form(aTHX_ "No package name allowed for "
2121 "variable %s in \"our\"",
2122 PL_tokenbuf));
77ca0c92
LW
2123 tmp = pad_allocmy(PL_tokenbuf);
2124 }
2125 else {
2126 if (strchr(PL_tokenbuf,':'))
2127 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2128
77ca0c92
LW
2129 yylval.opval = newOP(OP_PADANY, 0);
2130 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2131 return PRIVATEREF;
2132 }
bbce6d69 2133 }
2134
02aa26ce
NT
2135 /*
2136 build the ops for accesses to a my() variable.
2137
2138 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2139 then used in a comparison. This catches most, but not
2140 all cases. For instance, it catches
2141 sort { my($a); $a <=> $b }
2142 but not
2143 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2144 (although why you'd do that is anyone's guess).
2145 */
2146
3280af22 2147 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2148#ifdef USE_THREADS
54b9620d 2149 /* Check for single character per-thread SVs */
3280af22
NIS
2150 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2151 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2152 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2153 {
2faa37cc 2154 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2155 yylval.opval->op_targ = tmp;
2156 return PRIVATEREF;
2157 }
2158#endif /* USE_THREADS */
3280af22 2159 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2160 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2161 /* might be an "our" variable" */
f472eb5c 2162 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2163 /* build ops for a bareword */
f472eb5c
GS
2164 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2165 sv_catpvn(sym, "::", 2);
2166 sv_catpv(sym, PL_tokenbuf+1);
2167 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2168 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2169 gv_fetchpv(SvPVX(sym),
77ca0c92 2170 (PL_in_eval
f472eb5c
GS
2171 ? (GV_ADDMULTI | GV_ADDINEVAL)
2172 : TRUE
77ca0c92
LW
2173 ),
2174 ((PL_tokenbuf[0] == '$') ? SVt_PV
2175 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2176 : SVt_PVHV));
2177 return WORD;
2178 }
2179
02aa26ce 2180 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2181 if (PL_last_lop_op == OP_SORT &&
2182 PL_tokenbuf[0] == '$' &&
2183 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2184 && !PL_tokenbuf[2])
bbce6d69 2185 {
3280af22
NIS
2186 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2187 d < PL_bufend && *d != '\n';
a863c7d1
MB
2188 d++)
2189 {
2190 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2191 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2192 PL_tokenbuf);
a863c7d1 2193 }
bbce6d69 2194 }
2195 }
bbce6d69 2196
a863c7d1
MB
2197 yylval.opval = newOP(OP_PADANY, 0);
2198 yylval.opval->op_targ = tmp;
2199 return PRIVATEREF;
2200 }
bbce6d69 2201 }
2202
02aa26ce
NT
2203 /*
2204 Whine if they've said @foo in a doublequoted string,
2205 and @foo isn't a variable we can find in the symbol
2206 table.
2207 */
3280af22
NIS
2208 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2209 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2210 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2211 && ckWARN(WARN_AMBIGUOUS))
2212 {
2213 /* Downgraded from fatal to warning 20000522 mjd */
2214 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2215 "Possible unintended interpolation of %s in string",
2216 PL_tokenbuf);
2217 }
bbce6d69 2218 }
2219
02aa26ce 2220 /* build ops for a bareword */
3280af22 2221 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2222 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2223 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2224 ((PL_tokenbuf[0] == '$') ? SVt_PV
2225 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 2226 : SVt_PVHV));
2227 return WORD;
2228 }
2229
02aa26ce
NT
2230 /* no identifier pending identification */
2231
3280af22 2232 switch (PL_lex_state) {
79072805
LW
2233#ifdef COMMENTARY
2234 case LEX_NORMAL: /* Some compilers will produce faster */
2235 case LEX_INTERPNORMAL: /* code if we comment these out. */
2236 break;
2237#endif
2238
09bef843 2239 /* when we've already built the next token, just pull it out of the queue */
79072805 2240 case LEX_KNOWNEXT:
3280af22
NIS
2241 PL_nexttoke--;
2242 yylval = PL_nextval[PL_nexttoke];
2243 if (!PL_nexttoke) {
2244 PL_lex_state = PL_lex_defer;
2245 PL_expect = PL_lex_expect;
2246 PL_lex_defer = LEX_NORMAL;
463ee0b2 2247 }
3280af22 2248 return(PL_nexttype[PL_nexttoke]);
79072805 2249
02aa26ce 2250 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2251 when we get here, PL_bufptr is at the \
02aa26ce 2252 */
79072805
LW
2253 case LEX_INTERPCASEMOD:
2254#ifdef DEBUGGING
3280af22 2255 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2256 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2257#endif
02aa26ce 2258 /* handle \E or end of string */
3280af22 2259 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2260 char oldmod;
02aa26ce
NT
2261
2262 /* if at a \E */
3280af22
NIS
2263 if (PL_lex_casemods) {
2264 oldmod = PL_lex_casestack[--PL_lex_casemods];
2265 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2266
3280af22
NIS
2267 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2268 PL_bufptr += 2;
2269 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2270 }
79072805
LW
2271 return ')';
2272 }
3280af22
NIS
2273 if (PL_bufptr != PL_bufend)
2274 PL_bufptr += 2;
2275 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2276 return yylex();
79072805
LW
2277 }
2278 else {
3280af22 2279 s = PL_bufptr + 1;
79072805
LW
2280 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2281 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2282 if (strchr("LU", *s) &&
3280af22 2283 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2284 {
3280af22 2285 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2286 return ')';
2287 }
3280af22
NIS
2288 if (PL_lex_casemods > 10) {
2289 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2290 if (newlb != PL_lex_casestack) {
a0d0e21e 2291 SAVEFREEPV(newlb);
3280af22 2292 PL_lex_casestack = newlb;
a0d0e21e
LW
2293 }
2294 }
3280af22
NIS
2295 PL_lex_casestack[PL_lex_casemods++] = *s;
2296 PL_lex_casestack[PL_lex_casemods] = '\0';
2297 PL_lex_state = LEX_INTERPCONCAT;
2298 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2299 force_next('(');
2300 if (*s == 'l')
3280af22 2301 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2302 else if (*s == 'u')
3280af22 2303 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2304 else if (*s == 'L')
3280af22 2305 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2306 else if (*s == 'U')
3280af22 2307 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2308 else if (*s == 'Q')
3280af22 2309 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2310 else
cea2e8a9 2311 Perl_croak(aTHX_ "panic: yylex");
3280af22 2312 PL_bufptr = s + 1;
79072805 2313 force_next(FUNC);
3280af22
NIS
2314 if (PL_lex_starts) {
2315 s = PL_bufptr;
2316 PL_lex_starts = 0;
79072805
LW
2317 Aop(OP_CONCAT);
2318 }
2319 else
cea2e8a9 2320 return yylex();
79072805
LW
2321 }
2322
55497cff 2323 case LEX_INTERPPUSH:
2324 return sublex_push();
2325
79072805 2326 case LEX_INTERPSTART:
3280af22 2327 if (PL_bufptr == PL_bufend)
79072805 2328 return sublex_done();
3280af22
NIS
2329 PL_expect = XTERM;
2330 PL_lex_dojoin = (*PL_bufptr == '@');
2331 PL_lex_state = LEX_INTERPNORMAL;
2332 if (PL_lex_dojoin) {
2333 PL_nextval[PL_nexttoke].ival = 0;
79072805 2334 force_next(',');
554b3eca 2335#ifdef USE_THREADS
533c011a
NIS
2336 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2337 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2338 force_next(PRIVATEREF);
2339#else
a0d0e21e 2340 force_ident("\"", '$');
554b3eca 2341#endif /* USE_THREADS */
3280af22 2342 PL_nextval[PL_nexttoke].ival = 0;
79072805 2343 force_next('$');
3280af22 2344 PL_nextval[PL_nexttoke].ival = 0;
79072805 2345 force_next('(');
3280af22 2346 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2347 force_next(FUNC);
2348 }
3280af22
NIS
2349 if (PL_lex_starts++) {
2350 s = PL_bufptr;
79072805
LW
2351 Aop(OP_CONCAT);
2352 }
cea2e8a9 2353 return yylex();
79072805
LW
2354
2355 case LEX_INTERPENDMAYBE:
3280af22
NIS
2356 if (intuit_more(PL_bufptr)) {
2357 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2358 break;
2359 }
2360 /* FALL THROUGH */
2361
2362 case LEX_INTERPEND:
3280af22
NIS
2363 if (PL_lex_dojoin) {
2364 PL_lex_dojoin = FALSE;
2365 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2366 return ')';
2367 }
43a16006 2368 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2369 && SvEVALED(PL_lex_repl))
43a16006 2370 {
e9fa98b2 2371 if (PL_bufptr != PL_bufend)
cea2e8a9 2372 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2373 PL_lex_repl = Nullsv;
2374 }
79072805
LW
2375 /* FALLTHROUGH */
2376 case LEX_INTERPCONCAT:
2377#ifdef DEBUGGING
3280af22 2378 if (PL_lex_brackets)
cea2e8a9 2379 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2380#endif
3280af22 2381 if (PL_bufptr == PL_bufend)
79072805
LW
2382 return sublex_done();
2383
3280af22
NIS
2384 if (SvIVX(PL_linestr) == '\'') {
2385 SV *sv = newSVsv(PL_linestr);
2386 if (!PL_lex_inpat)
76e3520e 2387 sv = tokeq(sv);
3280af22 2388 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2389 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2390 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2391 s = PL_bufend;
79072805
LW
2392 }
2393 else {
3280af22 2394 s = scan_const(PL_bufptr);
79072805 2395 if (*s == '\\')
3280af22 2396 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2397 else
3280af22 2398 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2399 }
2400
3280af22
NIS
2401 if (s != PL_bufptr) {
2402 PL_nextval[PL_nexttoke] = yylval;
2403 PL_expect = XTERM;
79072805 2404 force_next(THING);
3280af22 2405 if (PL_lex_starts++)
79072805
LW
2406 Aop(OP_CONCAT);
2407 else {
3280af22 2408 PL_bufptr = s;
cea2e8a9 2409 return yylex();
79072805
LW
2410 }
2411 }
2412
cea2e8a9 2413 return yylex();
a0d0e21e 2414 case LEX_FORMLINE:
3280af22
NIS
2415 PL_lex_state = LEX_NORMAL;
2416 s = scan_formline(PL_bufptr);
2417 if (!PL_lex_formbrack)
a0d0e21e
LW
2418 goto rightbracket;
2419 OPERATOR(';');
79072805
LW
2420 }
2421
3280af22
NIS
2422 s = PL_bufptr;
2423 PL_oldoldbufptr = PL_oldbufptr;
2424 PL_oldbufptr = s;
79072805 2425 DEBUG_p( {
bf49b057
GS
2426 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2427 exp_name[PL_expect], s);
79072805 2428 } )
463ee0b2
LW
2429
2430 retry:
378cc40b
LW
2431 switch (*s) {
2432 default:
7e2040f0 2433 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2434 goto keylookup;
cea2e8a9 2435 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2436 case 4:
2437 case 26:
2438 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2439 case 0:
3280af22
NIS
2440 if (!PL_rsfp) {
2441 PL_last_uni = 0;
2442 PL_last_lop = 0;
2443 if (PL_lex_brackets)
d98d5fff 2444 yyerror("Missing right curly or square bracket");
79072805 2445 TOKEN(0);
463ee0b2 2446 }
3280af22 2447 if (s++ < PL_bufend)
a687059c 2448 goto retry; /* ignore stray nulls */
3280af22
NIS
2449 PL_last_uni = 0;
2450 PL_last_lop = 0;
2451 if (!PL_in_eval && !PL_preambled) {
2452 PL_preambled = TRUE;
2453 sv_setpv(PL_linestr,incl_perldb());
2454 if (SvCUR(PL_linestr))
2455 sv_catpv(PL_linestr,";");
2456 if (PL_preambleav){
2457 while(AvFILLp(PL_preambleav) >= 0) {
2458 SV *tmpsv = av_shift(PL_preambleav);
2459 sv_catsv(PL_linestr, tmpsv);
2460 sv_catpv(PL_linestr, ";");
91b7def8 2461 sv_free(tmpsv);
2462 }
3280af22
NIS
2463 sv_free((SV*)PL_preambleav);
2464 PL_preambleav = NULL;
91b7def8 2465 }
3280af22
NIS
2466 if (PL_minus_n || PL_minus_p) {
2467 sv_catpv(PL_linestr, "LINE: while (<>) {");
2468 if (PL_minus_l)
2469 sv_catpv(PL_linestr,"chomp;");
2470 if (PL_minus_a) {
8fd239a7
CS
2471 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2472 if (gv)
2473 GvIMPORTED_AV_on(gv);
3280af22
NIS
2474 if (PL_minus_F) {
2475 if (strchr("/'\"", *PL_splitstr)
2476 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2477 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2478 else {
2479 char delim;
2480 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2481 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2482 delim = *s;
cea2e8a9 2483 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2484 "q" + (delim == '\''), delim);
3280af22 2485 for (s = PL_splitstr; *s; s++) {
54310121 2486 if (*s == '\\')
3280af22
NIS
2487 sv_catpvn(PL_linestr, "\\", 1);
2488 sv_catpvn(PL_linestr, s, 1);
54310121 2489 }
cea2e8a9 2490 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2491 }
2304df62
AD
2492 }
2493 else
3280af22 2494 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2495 }
79072805 2496 }
3280af22
NIS
2497 sv_catpv(PL_linestr, "\n");
2498 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2499 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2500 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2501 SV *sv = NEWSV(85,0);
2502
2503 sv_upgrade(sv, SVt_PVMG);
3280af22 2504 sv_setsv(sv,PL_linestr);
57843af0 2505 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2506 }
79072805 2507 goto retry;
a687059c 2508 }
e929a76b 2509 do {
01ec43d0
GS
2510 bool bof;
2511 bof = PL_rsfp && (PerlIO_tell(PL_rsfp) == 0); /* *Before* read! */
dea0fc0b
JH
2512 s = filter_gets(PL_linestr, PL_rsfp, 0);
2513 if (s == Nullch) {
e929a76b 2514 fake_eof:
3280af22
NIS
2515 if (PL_rsfp) {
2516 if (PL_preprocess && !PL_in_eval)
2517 (void)PerlProc_pclose(PL_rsfp);
2518 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2519 PerlIO_clearerr(PL_rsfp);
395c3793 2520 else
3280af22
NIS
2521 (void)PerlIO_close(PL_rsfp);
2522 PL_rsfp = Nullfp;
4a9ae47a 2523 PL_doextract = FALSE;
395c3793 2524 }
3280af22
NIS
2525 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2526 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2527 sv_catpv(PL_linestr,";}");
2528 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2529 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2530 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2531 goto retry;
2532 }
3280af22
NIS
2533 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2534 sv_setpv(PL_linestr,"");
79072805 2535 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
dea0fc0b
JH
2536 } else if (bof) {
2537 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2538 s = swallow_bom((U8*)s);
378cc40b 2539 }
3280af22 2540 if (PL_doextract) {
a0d0e21e 2541 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2542 PL_doextract = FALSE;
a0d0e21e
LW
2543
2544 /* Incest with pod. */
2545 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2546 sv_setpv(PL_linestr, "");
2547 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2548 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2549 PL_doextract = FALSE;
a0d0e21e 2550 }
b250498f 2551 }
463ee0b2 2552 incline(s);
3280af22
NIS
2553 } while (PL_doextract);
2554 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2555 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2556 SV *sv = NEWSV(85,0);
a687059c 2557
93a17b20 2558 sv_upgrade(sv, SVt_PVMG);
3280af22 2559 sv_setsv(sv,PL_linestr);
57843af0 2560 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2561 }
3280af22 2562 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2563 if (CopLINE(PL_curcop) == 1) {
3280af22 2564 while (s < PL_bufend && isSPACE(*s))
79072805 2565 s++;
a0d0e21e 2566 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2567 s++;
44a8e56a 2568 d = Nullch;
3280af22 2569 if (!PL_in_eval) {
44a8e56a 2570 if (*s == '#' && *(s+1) == '!')
2571 d = s + 2;
2572#ifdef ALTERNATE_SHEBANG
2573 else {
2574 static char as[] = ALTERNATE_SHEBANG;
2575 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2576 d = s + (sizeof(as) - 1);
2577 }
2578#endif /* ALTERNATE_SHEBANG */
2579 }
2580 if (d) {
b8378b72 2581 char *ipath;
774d564b 2582 char *ipathend;
b8378b72 2583
774d564b 2584 while (isSPACE(*d))
b8378b72
CS
2585 d++;
2586 ipath = d;
774d564b 2587 while (*d && !isSPACE(*d))
2588 d++;
2589 ipathend = d;
2590
2591#ifdef ARG_ZERO_IS_SCRIPT
2592 if (ipathend > ipath) {
2593 /*
2594 * HP-UX (at least) sets argv[0] to the script name,
2595 * which makes $^X incorrect. And Digital UNIX and Linux,
2596 * at least, set argv[0] to the basename of the Perl
2597 * interpreter. So, having found "#!", we'll set it right.
2598 */
2599 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2600 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2601 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2602 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2603 SvSETMAGIC(x);
2604 }
774d564b 2605 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2606 }
774d564b 2607#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2608
2609 /*
2610 * Look for options.
2611 */
748a9306 2612 d = instr(s,"perl -");
84e30d1a 2613 if (!d) {
748a9306 2614 d = instr(s,"perl");
84e30d1a
GS
2615#if defined(DOSISH)
2616 /* avoid getting into infinite loops when shebang
2617 * line contains "Perl" rather than "perl" */
2618 if (!d) {
2619 for (d = ipathend-4; d >= ipath; --d) {
2620 if ((*d == 'p' || *d == 'P')
2621 && !ibcmp(d, "perl", 4))
2622 {
2623 break;
2624 }
2625 }
2626 if (d < ipath)
2627 d = Nullch;
2628 }
2629#endif
2630 }
44a8e56a 2631#ifdef ALTERNATE_SHEBANG
2632 /*
2633 * If the ALTERNATE_SHEBANG on this system starts with a
2634 * character that can be part of a Perl expression, then if
2635 * we see it but not "perl", we're probably looking at the
2636 * start of Perl code, not a request to hand off to some
2637 * other interpreter. Similarly, if "perl" is there, but
2638 * not in the first 'word' of the line, we assume the line
2639 * contains the start of the Perl program.
44a8e56a 2640 */
2641 if (d && *s != '#') {
774d564b 2642 char *c = ipath;
44a8e56a 2643 while (*c && !strchr("; \t\r\n\f\v#", *c))
2644 c++;
2645 if (c < d)
2646 d = Nullch; /* "perl" not in first word; ignore */
2647 else
2648 *s = '#'; /* Don't try to parse shebang line */
2649 }
774d564b 2650#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2651#ifndef MACOS_TRADITIONAL
748a9306 2652 if (!d &&
44a8e56a 2653 *s == '#' &&
774d564b 2654 ipathend > ipath &&
3280af22 2655 !PL_minus_c &&
748a9306 2656 !instr(s,"indir") &&
3280af22 2657 instr(PL_origargv[0],"perl"))
748a9306 2658 {
9f68db38 2659 char **newargv;
9f68db38 2660
774d564b 2661 *ipathend = '\0';
2662 s = ipathend + 1;
3280af22 2663 while (s < PL_bufend && isSPACE(*s))
9f68db38 2664 s++;
3280af22
NIS
2665 if (s < PL_bufend) {
2666 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2667 newargv[1] = s;
3280af22 2668 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2669 s++;
2670 *s = '\0';
3280af22 2671 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2672 }
2673 else
3280af22 2674 newargv = PL_origargv;
774d564b 2675 newargv[0] = ipath;
80252599 2676 PerlProc_execv(ipath, newargv);
cea2e8a9 2677 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2678 }
bf4acbe4 2679#endif
748a9306 2680 if (d) {
3280af22
NIS
2681 U32 oldpdb = PL_perldb;
2682 bool oldn = PL_minus_n;
2683 bool oldp = PL_minus_p;
748a9306
LW
2684
2685 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2686 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2687
2688 if (*d++ == '-') {
8cc95fdb 2689 do {
2690 if (*d == 'M' || *d == 'm') {
2691 char *m = d;
2692 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2693 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2694 (int)(d - m), m);
2695 }
2696 d = moreswitches(d);
2697 } while (d);
155aba94
GS
2698 if ((PERLDB_LINE && !oldpdb) ||
2699 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2700 /* if we have already added "LINE: while (<>) {",
2701 we must not do it again */
748a9306 2702 {
3280af22
NIS
2703 sv_setpv(PL_linestr, "");
2704 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2705 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2706 PL_preambled = FALSE;
84902520 2707 if (PERLDB_LINE)
3280af22 2708 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2709 goto retry;
2710 }
a0d0e21e 2711 }
79072805 2712 }
9f68db38 2713 }
79072805 2714 }
3280af22
NIS
2715 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2716 PL_bufptr = s;
2717 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2718 return yylex();
ae986130 2719 }
378cc40b 2720 goto retry;
4fdae800 2721 case '\r':
6a27c188 2722#ifdef PERL_STRICT_CR
cea2e8a9
GS
2723 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2724 Perl_croak(aTHX_
cc507455 2725 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2726#endif
4fdae800 2727 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2728#ifdef MACOS_TRADITIONAL
2729 case '\312':
2730#endif
378cc40b
LW
2731 s++;
2732 goto retry;
378cc40b 2733 case '#':
e929a76b 2734 case '\n':
3280af22 2735 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2736 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2737 /* handle eval qq[#line 1 "foo"\n ...] */
2738 CopLINE_dec(PL_curcop);
2739 incline(s);
2740 }
3280af22 2741 d = PL_bufend;
a687059c 2742 while (s < d && *s != '\n')
378cc40b 2743 s++;
0f85fab0 2744 if (s < d)
378cc40b 2745 s++;
463ee0b2 2746 incline(s);
3280af22
NIS
2747 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2748 PL_bufptr = s;
2749 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2750 return yylex();
a687059c 2751 }
378cc40b 2752 }
a687059c 2753 else {
378cc40b 2754 *s = '\0';
3280af22 2755 PL_bufend = s;
a687059c 2756 }
378cc40b
LW
2757 goto retry;
2758 case '-':
79072805 2759 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2760 s++;
3280af22 2761 PL_bufptr = s;
748a9306
LW
2762 tmp = *s++;
2763
bf4acbe4 2764 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2765 s++;
2766
2767 if (strnEQ(s,"=>",2)) {
3280af22 2768 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2769 OPERATOR('-'); /* unary minus */
2770 }
3280af22
NIS
2771 PL_last_uni = PL_oldbufptr;
2772 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2773 switch (tmp) {
79072805
LW
2774 case 'r': FTST(OP_FTEREAD);
2775 case 'w': FTST(OP_FTEWRITE);
2776 case 'x': FTST(OP_FTEEXEC);
2777 case 'o': FTST(OP_FTEOWNED);
2778 case 'R': FTST(OP_FTRREAD);
2779 case 'W': FTST(OP_FTRWRITE);
2780 case 'X': FTST(OP_FTREXEC);
2781 case 'O': FTST(OP_FTROWNED);
2782 case 'e': FTST(OP_FTIS);
2783 case 'z': FTST(OP_FTZERO);
2784 case 's': FTST(OP_FTSIZE);
2785 case 'f': FTST(OP_FTFILE);
2786 case 'd': FTST(OP_FTDIR);
2787 case 'l': FTST(OP_FTLINK);
2788 case 'p': FTST(OP_FTPIPE);
2789 case 'S': FTST(OP_FTSOCK);
2790 case 'u': FTST(OP_FTSUID);
2791 case 'g': FTST(OP_FTSGID);
2792 case 'k': FTST(OP_FTSVTX);
2793 case 'b': FTST(OP_FTBLK);
2794 case 'c': FTST(OP_FTCHR);
2795 case 't': FTST(OP_FTTTY);
2796 case 'T': FTST(OP_FTTEXT);
2797 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2798 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2799 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2800 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2801 default:
cea2e8a9 2802 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2803 break;
2804 }
2805 }
a687059c
LW
2806 tmp = *s++;
2807 if (*s == tmp) {
2808 s++;
3280af22 2809 if (PL_expect == XOPERATOR)
79072805
LW
2810 TERM(POSTDEC);
2811 else
2812 OPERATOR(PREDEC);
2813 }
2814 else if (*s == '>') {
2815 s++;
2816 s = skipspace(s);
7e2040f0 2817 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2818 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2819 TOKEN(ARROW);
79072805 2820 }
748a9306
LW
2821 else if (*s == '$')
2822 OPERATOR(ARROW);
463ee0b2 2823 else
748a9306 2824 TERM(ARROW);
a687059c 2825 }
3280af22 2826 if (PL_expect == XOPERATOR)
79072805
LW
2827 Aop(OP_SUBTRACT);
2828 else {
3280af22 2829 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2830 check_uni();
79072805 2831 OPERATOR('-'); /* unary minus */
2f3197b3 2832 }
79072805 2833
378cc40b 2834 case '+':
a687059c
LW
2835 tmp = *s++;
2836 if (*s == tmp) {
378cc40b 2837 s++;
3280af22 2838 if (PL_expect == XOPERATOR)
79072805
LW
2839 TERM(POSTINC);
2840 else
2841 OPERATOR(PREINC);
378cc40b 2842 }
3280af22 2843 if (PL_expect == XOPERATOR)
79072805
LW
2844 Aop(OP_ADD);
2845 else {
3280af22 2846 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2847 check_uni();
a687059c 2848 OPERATOR('+');
2f3197b3 2849 }
a687059c 2850
378cc40b 2851 case '*':
3280af22
NIS
2852 if (PL_expect != XOPERATOR) {
2853 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2854 PL_expect = XOPERATOR;
2855 force_ident(PL_tokenbuf, '*');
2856 if (!*PL_tokenbuf)
a0d0e21e 2857 PREREF('*');
79072805 2858 TERM('*');
a687059c 2859 }
79072805
LW
2860 s++;
2861 if (*s == '*') {
a687059c 2862 s++;
79072805 2863 PWop(OP_POW);
a687059c 2864 }
79072805
LW
2865 Mop(OP_MULTIPLY);
2866
378cc40b 2867 case '%':
3280af22 2868 if (PL_expect == XOPERATOR) {
bbce6d69 2869 ++s;
2870 Mop(OP_MODULO);
a687059c 2871 }
3280af22
NIS
2872 PL_tokenbuf[0] = '%';
2873 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2874 if (!PL_tokenbuf[1]) {
2875 if (s == PL_bufend)
bbce6d69 2876 yyerror("Final % should be \\% or %name");
2877 PREREF('%');
a687059c 2878 }
3280af22 2879 PL_pending_ident = '%';
bbce6d69 2880 TERM('%');
a687059c 2881
378cc40b 2882 case '^':
79072805 2883 s++;
a0d0e21e 2884 BOop(OP_BIT_XOR);
79072805 2885 case '[':
3280af22 2886 PL_lex_brackets++;
79072805 2887 /* FALL THROUGH */
378cc40b 2888 case '~':
378cc40b 2889 case ',':
378cc40b
LW
2890 tmp = *s++;
2891 OPERATOR(tmp);
a0d0e21e
LW
2892 case ':':
2893 if (s[1] == ':') {
2894 len = 0;
2895 goto just_a_word;
2896 }
2897 s++;
09bef843
SB
2898 switch (PL_expect) {
2899 OP *attrs;
2900 case XOPERATOR:
2901 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2902 break;
2903 PL_bufptr = s; /* update in case we back off */
2904 goto grabattrs;
2905 case XATTRBLOCK:
2906 PL_expect = XBLOCK;
2907 goto grabattrs;
2908 case XATTRTERM:
2909 PL_expect = XTERMBLOCK;
2910 grabattrs:
2911 s = skipspace(s);
2912 attrs = Nullop;
7e2040f0 2913 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2914 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2915 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2916 if (tmp < 0) tmp = -tmp;
2917 switch (tmp) {
2918 case KEY_or:
2919 case KEY_and:
2920 case KEY_for:
2921 case KEY_unless:
2922 case KEY_if:
2923 case KEY_while:
2924 case KEY_until:
2925 goto got_attrs;
2926 default:
2927 break;
2928 }
2929 }
09bef843
SB
2930 if (*d == '(') {
2931 d = scan_str(d,TRUE,TRUE);
2932 if (!d) {
2933 if (PL_lex_stuff) {
2934 SvREFCNT_dec(PL_lex_stuff);
2935 PL_lex_stuff = Nullsv;
2936 }
2937 /* MUST advance bufptr here to avoid bogus
2938 "at end of line" context messages from yyerror().
2939 */
2940 PL_bufptr = s + len;
2941 yyerror("Unterminated attribute parameter in attribute list");
2942 if (attrs)
2943 op_free(attrs);
2944 return 0; /* EOF indicator */
2945 }
2946 }
2947 if (PL_lex_stuff) {
2948 SV *sv = newSVpvn(s, len);
2949 sv_catsv(sv, PL_lex_stuff);
2950 attrs = append_elem(OP_LIST, attrs,
2951 newSVOP(OP_CONST, 0, sv));
2952 SvREFCNT_dec(PL_lex_stuff);
2953 PL_lex_stuff = Nullsv;
2954 }
2955 else {
2956 attrs = append_elem(OP_LIST, attrs,
2957 newSVOP(OP_CONST, 0,
2958 newSVpvn(s, len)));
2959 }
2960 s = skipspace(d);
0120eecf 2961 if (*s == ':' && s[1] != ':')
09bef843 2962 s = skipspace(s+1);
0120eecf
GS
2963 else if (s == d)
2964 break; /* require real whitespace or :'s */
09bef843 2965 }
f9829d6b
GS
2966 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2967 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2968 char q = ((*s == '\'') ? '"' : '\'');
2969 /* If here for an expression, and parsed no attrs, back off. */
2970 if (tmp == '=' && !attrs) {
2971 s = PL_bufptr;
2972 break;
2973 }
2974 /* MUST advance bufptr here to avoid bogus "at end of line"
2975 context messages from yyerror().
2976 */
2977 PL_bufptr = s;
2978 if (!*s)
2979 yyerror("Unterminated attribute list");
2980 else
2981 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2982 q, *s, q));
2983 if (attrs)
2984 op_free(attrs);
2985 OPERATOR(':');
2986 }
f9829d6b 2987 got_attrs:
09bef843
SB
2988 if (attrs) {
2989 PL_nextval[PL_nexttoke].opval = attrs;
2990 force_next(THING);
2991 }
2992 TOKEN(COLONATTR);
2993 }
a0d0e21e 2994 OPERATOR(':');
8990e307
LW
2995 case '(':
2996 s++;
3280af22
NIS
2997 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2998 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2999 else
3280af22 3000 PL_expect = XTERM;
a0d0e21e 3001 TOKEN('(');
378cc40b 3002 case ';':
f4dd75d9 3003 CLINE;
378cc40b
LW
3004 tmp = *s++;
3005 OPERATOR(tmp);
3006 case ')':
378cc40b 3007 tmp = *s++;
16d20bd9
AD
3008 s = skipspace(s);
3009 if (*s == '{')
3010 PREBLOCK(tmp);
378cc40b 3011 TERM(tmp);
79072805
LW
3012 case ']':
3013 s++;
3280af22 3014 if (PL_lex_brackets <= 0)
d98d5fff 3015 yyerror("Unmatched right square bracket");
463ee0b2 3016 else
3280af22
NIS
3017 --PL_lex_brackets;
3018 if (PL_lex_state == LEX_INTERPNORMAL) {
3019 if (PL_lex_brackets == 0) {
a0d0e21e 3020 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3021 PL_lex_state = LEX_INTERPEND;
79072805
LW
3022 }
3023 }
4633a7c4 3024 TERM(']');
79072805
LW
3025 case '{':
3026 leftbracket:
79072805 3027 s++;
3280af22
NIS
3028 if (PL_lex_brackets > 100) {
3029 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3030 if (newlb != PL_lex_brackstack) {
8990e307 3031 SAVEFREEPV(newlb);
3280af22 3032 PL_lex_brackstack = newlb;
8990e307
LW
3033 }
3034 }
3280af22 3035 switch (PL_expect) {
a0d0e21e 3036 case XTERM:
3280af22 3037 if (PL_lex_formbrack) {
a0d0e21e
LW
3038 s--;
3039 PRETERMBLOCK(DO);
3040 }
3280af22
NIS
3041 if (PL_oldoldbufptr == PL_last_lop)
3042 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3043 else
3280af22 3044 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3045 OPERATOR(HASHBRACK);
a0d0e21e 3046 case XOPERATOR:
bf4acbe4 3047 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3048 s++;
44a8e56a 3049 d = s;
3280af22
NIS
3050 PL_tokenbuf[0] = '\0';
3051 if (d < PL_bufend && *d == '-') {
3052 PL_tokenbuf[0] = '-';
44a8e56a 3053 d++;
bf4acbe4 3054 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3055 d++;
3056 }
7e2040f0 3057 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3058 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3059 FALSE, &len);
bf4acbe4 3060 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3061 d++;
3062 if (*d == '}') {
3280af22 3063 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3064 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3065 if (minus)
3066 force_next('-');
748a9306
LW
3067 }
3068 }
3069 /* FALL THROUGH */
09bef843 3070 case XATTRBLOCK:
748a9306 3071 case XBLOCK:
3280af22
NIS
3072 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3073 PL_expect = XSTATE;
a0d0e21e 3074 break;
09bef843 3075 case XATTRTERM:
a0d0e21e 3076 case XTERMBLOCK:
3280af22
NIS
3077 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3078 PL_expect = XSTATE;
a0d0e21e
LW
3079 break;
3080 default: {
3081 char *t;
3280af22
NIS
3082 if (PL_oldoldbufptr == PL_last_lop)
3083 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3084 else
3280af22 3085 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3086 s = skipspace(s);
09ecc4b6 3087 if (*s == '}')
a0d0e21e 3088 OPERATOR(HASHBRACK);
b8a4b1be
GS
3089 /* This hack serves to disambiguate a pair of curlies
3090 * as being a block or an anon hash. Normally, expectation
3091 * determines that, but in cases where we're not in a
3092 * position to expect anything in particular (like inside
3093 * eval"") we have to resolve the ambiguity. This code
3094 * covers the case where the first term in the curlies is a
3095 * quoted string. Most other cases need to be explicitly
3096 * disambiguated by prepending a `+' before the opening
3097 * curly in order to force resolution as an anon hash.
3098 *
3099 * XXX should probably propagate the outer expectation
3100 * into eval"" to rely less on this hack, but that could
3101 * potentially break current behavior of eval"".
3102 * GSAR 97-07-21
3103 */
3104 t = s;
3105 if (*s == '\'' || *s == '"' || *s == '`') {
3106 /* common case: get past first string, handling escapes */
3280af22 3107 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3108 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3109 t++;
3110 t++;
a0d0e21e 3111 }
b8a4b1be 3112 else if (*s == 'q') {
3280af22 3113 if (++t < PL_bufend
b8a4b1be 3114 && (!isALNUM(*t)
3280af22 3115 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3116 && !isALNUM(*t))))
3117 {
b8a4b1be
GS
3118 char *tmps;
3119 char open, close, term;
3120 I32 brackets = 1;
3121
3280af22 3122 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3123 t++;
3124 term = *t;
3125 open = term;
3126 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3127 term = tmps[5];
3128 close = term;
3129 if (open == close)
3280af22
NIS
3130 for (t++; t < PL_bufend; t++) {
3131 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3132 t++;
6d07e5e9 3133 else if (*t == open)
b8a4b1be
GS
3134 break;
3135 }
3136 else
3280af22
NIS
3137 for (t++; t < PL_bufend; t++) {
3138 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3139 t++;
6d07e5e9 3140 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3141 break;
3142 else if (*t == open)
3143 brackets++;
3144 }
3145 }
3146 t++;
a0d0e21e 3147 }
7e2040f0 3148 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3149 t += UTF8SKIP(t);
7e2040f0 3150 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3151 t += UTF8SKIP(t);
a0d0e21e 3152 }
3280af22 3153 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3154 t++;
b8a4b1be
GS
3155 /* if comma follows first term, call it an anon hash */
3156 /* XXX it could be a comma expression with loop modifiers */
3280af22 3157 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3158 || (*t == '=' && t[1] == '>')))
a0d0e21e 3159 OPERATOR(HASHBRACK);
3280af22 3160 if (PL_expect == XREF)
4e4e412b 3161 PL_expect = XTERM;
a0d0e21e 3162 else {
3280af22
NIS
3163 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3164 PL_expect = XSTATE;
a0d0e21e 3165 }
8990e307 3166 }
a0d0e21e 3167 break;
463ee0b2 3168 }
57843af0 3169 yylval.ival = CopLINE(PL_curcop);
79072805 3170 if (isSPACE(*s) || *s == '#')
3280af22 3171 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3172 TOKEN('{');
378cc40b 3173 case '}':
79072805
LW
3174 rightbracket:
3175 s++;
3280af22 3176 if (PL_lex_brackets <= 0)
d98d5fff 3177 yyerror("Unmatched right curly bracket");
463ee0b2 3178 else
3280af22 3179 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3180 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3181 PL_lex_formbrack = 0;
3182 if (PL_lex_state == LEX_INTERPNORMAL) {
3183 if (PL_lex_brackets == 0) {
9059aa12
LW
3184 if (PL_expect & XFAKEBRACK) {
3185 PL_expect &= XENUMMASK;
3280af22
NIS
3186 PL_lex_state = LEX_INTERPEND;
3187 PL_bufptr = s;
cea2e8a9 3188 return yylex(); /* ignore fake brackets */
79072805 3189 }
fa83b5b6 3190 if (*s == '-' && s[1] == '>')
3280af22 3191 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3192 else if (*s != '[' && *s != '{')
3280af22 3193 PL_lex_state = LEX_INTERPEND;
79072805
LW
3194 }
3195 }
9059aa12
LW
3196 if (PL_expect & XFAKEBRACK) {
3197 PL_expect &= XENUMMASK;
3280af22 3198 PL_bufptr = s;
cea2e8a9 3199 return yylex(); /* ignore fake brackets */
748a9306 3200 }
79072805
LW
3201 force_next('}');
3202 TOKEN(';');
378cc40b
LW
3203 case '&':
3204 s++;
3205 tmp = *s++;
3206 if (tmp == '&')
a0d0e21e 3207 AOPERATOR(ANDAND);
378cc40b 3208 s--;
3280af22 3209 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3210 if (ckWARN(WARN_SEMICOLON)
3211 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3212 {
57843af0 3213 CopLINE_dec(PL_curcop);
cea2e8a9 3214 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3215 CopLINE_inc(PL_curcop);
463ee0b2 3216 }
79072805 3217 BAop(OP_BIT_AND);
463ee0b2 3218 }
79072805 3219
3280af22
NIS
3220 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3221 if (*PL_tokenbuf) {
3222 PL_expect = XOPERATOR;
3223 force_ident(PL_tokenbuf, '&');
463ee0b2 3224 }
79072805
LW
3225 else
3226 PREREF('&');
c07a80fd 3227 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3228 TERM('&');
3229
378cc40b
LW
3230 case '|':
3231 s++;
3232 tmp = *s++;
3233 if (tmp == '|')
a0d0e21e 3234 AOPERATOR(OROR);
378cc40b 3235 s--;
79072805 3236 BOop(OP_BIT_OR);
378cc40b
LW
3237 case '=':
3238 s++;
3239 tmp = *s++;
3240 if (tmp == '=')
79072805
LW
3241 Eop(OP_EQ);
3242 if (tmp == '>')
3243 OPERATOR(',');
378cc40b 3244 if (tmp == '~')
79072805 3245 PMop(OP_MATCH);
599cee73 3246 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3247 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3248 s--;
3280af22
NIS
3249 if (PL_expect == XSTATE && isALPHA(tmp) &&
3250 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3251 {
3280af22
NIS
3252 if (PL_in_eval && !PL_rsfp) {
3253 d = PL_bufend;
a5f75d66
AD
3254 while (s < d) {
3255 if (*s++ == '\n') {
3256 incline(s);
3257 if (strnEQ(s,"=cut",4)) {
3258 s = strchr(s,'\n');
3259 if (s)
3260 s++;
3261 else
3262 s = d;
3263 incline(s);
3264 goto retry;
3265 }
3266 }
3267 }
3268 goto retry;
3269 }
3280af22
NIS
3270 s = PL_bufend;
3271 PL_doextract = TRUE;
a0d0e21e
LW
3272 goto retry;
3273 }
3280af22 3274 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3275 char *t;
51882d45 3276#ifdef PERL_STRICT_CR
bf4acbe4 3277 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3278#else
bf4acbe4 3279 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3280#endif
a0d0e21e
LW
3281 if (*t == '\n' || *t == '#') {
3282 s--;
3280af22 3283 PL_expect = XBLOCK;
a0d0e21e
LW
3284 goto leftbracket;
3285 }
79072805 3286 }
a0d0e21e
LW
3287 yylval.ival = 0;
3288 OPERATOR(ASSIGNOP);
378cc40b
LW
3289 case '!':
3290 s++;
3291 tmp = *s++;
3292 if (tmp == '=')
79072805 3293 Eop(OP_NE);
378cc40b 3294 if (tmp == '~')
79072805 3295 PMop(OP_NOT);
378cc40b
LW
3296 s--;
3297 OPERATOR('!');
3298 case '<':
3280af22 3299 if (PL_expect != XOPERATOR) {
93a17b20 3300 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3301 check_uni();
79072805
LW
3302 if (s[1] == '<')
3303 s = scan_heredoc(s);
3304 else
3305 s = scan_inputsymbol(s);
3306 TERM(sublex_start());
378cc40b
LW
3307 }
3308 s++;
3309 tmp = *s++;
3310 if (tmp == '<')
79072805 3311 SHop(OP_LEFT_SHIFT);
395c3793
LW
3312 if (tmp == '=') {
3313 tmp = *s++;
3314 if (tmp == '>')
79072805 3315 Eop(OP_NCMP);
395c3793 3316 s--;
79072805 3317 Rop(OP_LE);
395c3793 3318 }
378cc40b 3319 s--;
79072805 3320 Rop(OP_LT);
378cc40b
LW
3321 case '>':
3322 s++;
3323 tmp = *s++;
3324 if (tmp == '>')
79072805 3325 SHop(OP_RIGHT_SHIFT);
378cc40b 3326 if (tmp == '=')
79072805 3327 Rop(OP_GE);
378cc40b 3328 s--;
79072805 3329 Rop(OP_GT);
378cc40b
LW
3330
3331 case '$':
bbce6d69 3332 CLINE;
3333
3280af22
NIS
3334 if (PL_expect == XOPERATOR) {
3335 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3336 PL_expect = XTERM;
a0d0e21e 3337 depcom();
bbce6d69 3338 return ','; /* grandfather non-comma-format format */
a0d0e21e 3339 }
8990e307 3340 }
a0d0e21e 3341
7e2040f0 3342 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3343 PL_tokenbuf[0] = '@';
376b8730
SM
3344 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3345 sizeof PL_tokenbuf - 1, FALSE);
3346 if (PL_expect == XOPERATOR)
3347 no_op("Array length", s);
3280af22 3348 if (!PL_tokenbuf[1])
a0d0e21e 3349 PREREF(DOLSHARP);
3280af22
NIS
3350 PL_expect = XOPERATOR;
3351 PL_pending_ident = '#';
463ee0b2 3352 TOKEN(DOLSHARP);
79072805 3353 }
bbce6d69 3354
3280af22 3355 PL_tokenbuf[0] = '$';
376b8730
SM
3356 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3357 sizeof PL_tokenbuf - 1, FALSE);
3358 if (PL_expect == XOPERATOR)
3359 no_op("Scalar", s);
3280af22
NIS
3360 if (!PL_tokenbuf[1]) {
3361 if (s == PL_bufend)
bbce6d69 3362 yyerror("Final $ should be \\$ or $name");
3363 PREREF('$');
8990e307 3364 }
a0d0e21e 3365
bbce6d69 3366 /* This kludge not intended to be bulletproof. */
3280af22 3367 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3368 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3369 newSViv(PL_compiling.cop_arybase));
bbce6d69 3370 yylval.opval->op_private = OPpCONST_ARYBASE;
3371 TERM(THING);
3372 }
3373
ff68c719 3374 d = s;
69d2bceb 3375 tmp = (I32)*s;
3280af22 3376 if (PL_lex_state == LEX_NORMAL)
ff68c719 3377 s = skipspace(s);
3378
3280af22 3379 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3380 char *t;
3381 if (*s == '[') {
3280af22 3382 PL_tokenbuf[0] = '@';
599cee73 3383 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3384 for(t = s + 1;
7e2040f0 3385 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3386 t++) ;
a0d0e21e 3387 if (*t++ == ',') {
3280af22
NIS
3388 PL_bufptr = skipspace(PL_bufptr);
3389 while (t < PL_bufend && *t != ']')
bbce6d69 3390 t++;
cea2e8a9 3391 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3392 "Multidimensional syntax %.*s not supported",
3393 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3394 }
3395 }
bbce6d69 3396 }
3397 else if (*s == '{') {
3280af22 3398 PL_tokenbuf[0] = '%';
599cee73 3399 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3400 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3401 {
3280af22 3402 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3403 STRLEN len;
3404 for (t++; isSPACE(*t); t++) ;
7e2040f0 3405 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3406 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3407 for (; isSPACE(*t); t++) ;
864dbfa3 3408 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3409 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3410 "You need to quote \"%s\"", tmpbuf);
748a9306 3411 }
93a17b20
LW
3412 }
3413 }
2f3197b3 3414 }
bbce6d69 3415
3280af22 3416 PL_expect = XOPERATOR;
69d2bceb 3417 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3418 bool islop = (PL_last_lop == PL_oldoldbufptr);
3419 if (!islop || PL_last_lop_op == OP_GREPSTART)
3420 PL_expect = XOPERATOR;
bbce6d69 3421 else if (strchr("$@\"'`q", *s))
3280af22 3422 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3423 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3424 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3425 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3426 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3427 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3428 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3429 /* binary operators exclude handle interpretations */
3430 switch (tmp) {
3431 case -KEY_x:
3432 case -KEY_eq:
3433 case -KEY_ne:
3434 case -KEY_gt:
3435 case -KEY_lt:
3436 case -KEY_ge:
3437 case -KEY_le:
3438 case -KEY_cmp:
3439 break;
3440 default:
3280af22 3441 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3442 break;
3443 }
3444 }
68dc0745 3445 else {
3446 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3447 if (gv && GvCVu(gv))
3280af22 3448 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3449 }
93a17b20 3450 }
bbce6d69 3451 else if (isDIGIT(*s))
3280af22 3452 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3453 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3454 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3455 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3456 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3457 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3458 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3459 }
3280af22 3460 PL_pending_ident = '$';
79072805 3461 TOKEN('$');
378cc40b
LW
3462
3463 case '@':
3280af22 3464 if (PL_expect == XOPERATOR)
bbce6d69 3465 no_op("Array", s);
3280af22
NIS
3466 PL_tokenbuf[0] = '@';
3467 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3468 if (!PL_tokenbuf[1]) {
3469 if (s == PL_bufend)
bbce6d69 3470 yyerror("Final @ should be \\@ or @name");
3471 PREREF('@');
3472 }
3280af22 3473 if (PL_lex_state == LEX_NORMAL)
ff68c719 3474 s = skipspace(s);
3280af22 3475 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3476 if (*s == '{')
3280af22 3477 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3478
3479 /* Warn about @ where they meant $. */
599cee73 3480 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3481 if (*s == '[' || *s == '{') {
3482 char *t = s + 1;
7e2040f0 3483 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3484 t++;
3485 if (*t == '}' || *t == ']') {
3486 t++;
3280af22 3487 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3488 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3489 "Scalar value %.*s better written as $%.*s",
3280af22 3490 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3491 }
93a17b20
LW
3492 }
3493 }
463ee0b2 3494 }
3280af22 3495 PL_pending_ident = '@';
79072805 3496 TERM('@');
378cc40b
LW
3497
3498 case '/': /* may either be division or pattern */
3499 case '?': /* may either be conditional or pattern */
3280af22 3500 if (PL_expect != XOPERATOR) {
c277df42 3501 /* Disable warning on "study /blah/" */
3280af22
NIS
3502 if (PL_oldoldbufptr == PL_last_uni
3503 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3504 || memNE(PL_last_uni, "study", 5)
3505 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3506 check_uni();
8782bef2 3507 s = scan_pat(s,OP_MATCH);
79072805 3508 TERM(sublex_start());
378cc40b
LW
3509 }
3510 tmp = *s++;
a687059c 3511 if (tmp == '/')
79072805 3512 Mop(OP_DIVIDE);
378cc40b
LW
3513 OPERATOR(tmp);
3514
3515 case '.':
51882d45
GS
3516 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3517#ifdef PERL_STRICT_CR
3518 && s[1] == '\n'
3519#else
3520 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3521#endif
3522 && (s == PL_linestart || s[-1] == '\n') )
3523 {
3280af22
NIS
3524 PL_lex_formbrack = 0;
3525 PL_expect = XSTATE;
79072805
LW
3526 goto rightbracket;
3527 }
3280af22 3528 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3529 tmp = *s++;
a687059c
LW
3530 if (*s == tmp) {
3531 s++;
2f3197b3
LW
3532 if (*s == tmp) {
3533 s++;
79072805 3534 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3535 }
3536 else
79072805 3537 yylval.ival = 0;
378cc40b 3538 OPERATOR(DOTDOT);
a687059c 3539 }
3280af22 3540 if (PL_expect != XOPERATOR)
2f3197b3 3541 check_uni();
79072805 3542 Aop(OP_CONCAT);
378cc40b
LW
3543 }
3544 /* FALL THROUGH */
3545 case '0': case '1': case '2': case '3': case '4':
3546 case '5': case '6': case '7': case '8': case '9':
b73d6f50 3547 s = scan_num(s, &yylval);
3280af22 3548 if (PL_expect == XOPERATOR)
8990e307 3549 no_op("Number",s);
79072805
LW
3550 TERM(THING);
3551
3552 case '\'':
09bef843 3553 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3554 if (PL_expect == XOPERATOR) {
3555 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3556 PL_expect = XTERM;
a0d0e21e
LW
3557 depcom();
3558 return ','; /* grandfather non-comma-format format */
3559 }
463ee0b2 3560 else
8990e307 3561 no_op("String",s);
463ee0b2 3562 }
79072805 3563 if (!s)
85e6fe83 3564 missingterm((char*)0);
79072805
LW
3565 yylval.ival = OP_CONST;
3566 TERM(sublex_start());
3567
3568 case '"':
09bef843 3569 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3570 if (PL_expect == XOPERATOR) {
3571 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3572 PL_expect = XTERM;
a0d0e21e
LW
3573 depcom();
3574 return ','; /* grandfather non-comma-format format */
3575 }
463ee0b2 3576 else
8990e307 3577 no_op("String",s);
463ee0b2 3578 }
79072805 3579 if (!s)
85e6fe83 3580 missingterm((char*)0);
4633a7c4 3581 yylval.ival = OP_CONST;
3280af22 3582 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3583 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {