This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Thinko in #7222.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
2b92dfce
GS
42/* In variables name $^X, these are the legal values for X.
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a 58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff 60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
a790bc05 72/* XXX If this causes problems, set i_unistd=undef in the hint file. */
73#ifdef I_UNISTD
74# include <unistd.h> /* Needed for execv() */
75#endif
76
77
79072805
LW
78#ifdef ff_next
79#undef ff_next
d48672a2
LW
80#endif
81
a1a0e61e
TD
82#ifdef USE_PURE_BISON
83YYSTYPE* yylval_pointer = NULL;
84int* yychar_pointer = NULL;
22c35a8c
GS
85# undef yylval
86# undef yychar
e4bfbdd4
JH
87# define yylval (*yylval_pointer)
88# define yychar (*yychar_pointer)
89# define PERL_YYLEX_PARAM yylval_pointer,yychar_pointer
cea2e8a9
GS
90# undef yylex
91# define yylex() Perl_yylex(aTHX_ yylval_pointer, yychar_pointer)
a1a0e61e
TD
92#endif
93
79072805 94#include "keywords.h"
fe14fcc3 95
ffb4593c
NT
96/* CLINE is a macro that ensures PL_copline has a sane value */
97
ae986130
LW
98#ifdef CLINE
99#undef CLINE
100#endif
57843af0 101#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 102
ffb4593c
NT
103/*
104 * Convenience functions to return different tokens and prime the
9cbb5ea2 105 * lexer for the next token. They all take an argument.
ffb4593c
NT
106 *
107 * TOKEN : generic token (used for '(', DOLSHARP, etc)
108 * OPERATOR : generic operator
109 * AOPERATOR : assignment operator
110 * PREBLOCK : beginning the block after an if, while, foreach, ...
111 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
112 * PREREF : *EXPR where EXPR is not a simple identifier
113 * TERM : expression term
114 * LOOPX : loop exiting command (goto, last, dump, etc)
115 * FTST : file test operator
116 * FUN0 : zero-argument function
2d2e263d 117 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
118 * BOop : bitwise or or xor
119 * BAop : bitwise and
120 * SHop : shift operator
121 * PWop : power operator
9cbb5ea2 122 * PMop : pattern-matching operator
ffb4593c
NT
123 * Aop : addition-level operator
124 * Mop : multiplication-level operator
125 * Eop : equality-testing operator
126 * Rop : relational operator <= != gt
127 *
128 * Also see LOP and lop() below.
129 */
130
3280af22
NIS
131#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
132#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
133#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
134#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
135#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
136#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
137#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
138#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
139#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
140#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
141#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
142#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
143#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
144#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
145#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
146#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
147#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
148#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
149#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
150#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 151
a687059c
LW
152/* This bit of chicanery makes a unary function followed by
153 * a parenthesis into a function with one argument, highest precedence.
154 */
2f3197b3 155#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
156 PL_expect = XTERM, \
157 PL_bufptr = s, \
158 PL_last_uni = PL_oldbufptr, \
159 PL_last_lop_op = f, \
a687059c
LW
160 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
161
79072805 162#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
163 PL_bufptr = s, \
164 PL_last_uni = PL_oldbufptr, \
79072805
LW
165 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
166
9f68db38 167/* grandfather return to old style */
3280af22 168#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 169
ffb4593c
NT
170/*
171 * S_ao
172 *
173 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
174 * into an OP_ANDASSIGN or OP_ORASSIGN
175 */
176
76e3520e 177STATIC int
cea2e8a9 178S_ao(pTHX_ int toketype)
a0d0e21e 179{
3280af22
NIS
180 if (*PL_bufptr == '=') {
181 PL_bufptr++;
a0d0e21e
LW
182 if (toketype == ANDAND)
183 yylval.ival = OP_ANDASSIGN;
184 else if (toketype == OROR)
185 yylval.ival = OP_ORASSIGN;
186 toketype = ASSIGNOP;
187 }
188 return toketype;
189}
190
ffb4593c
NT
191/*
192 * S_no_op
193 * When Perl expects an operator and finds something else, no_op
194 * prints the warning. It always prints "<something> found where
195 * operator expected. It prints "Missing semicolon on previous line?"
196 * if the surprise occurs at the start of the line. "do you need to
197 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
198 * where the compiler doesn't know if foo is a method call or a function.
199 * It prints "Missing operator before end of line" if there's nothing
200 * after the missing operator, or "... before <...>" if there is something
201 * after the missing operator.
202 */
203
76e3520e 204STATIC void
cea2e8a9 205S_no_op(pTHX_ char *what, char *s)
463ee0b2 206{
3280af22
NIS
207 char *oldbp = PL_bufptr;
208 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 209
1189a94a
GS
210 if (!s)
211 s = oldbp;
07c798fb 212 else
1189a94a 213 PL_bufptr = s;
cea2e8a9 214 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 215 if (is_first)
cea2e8a9 216 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 217 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 218 char *t;
7e2040f0 219 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 220 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 221 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 222 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 223 }
07c798fb
HS
224 else {
225 assert(s >= oldbp);
cea2e8a9 226 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 227 }
3280af22 228 PL_bufptr = oldbp;
8990e307
LW
229}
230
ffb4593c
NT
231/*
232 * S_missingterm
233 * Complain about missing quote/regexp/heredoc terminator.
234 * If it's called with (char *)NULL then it cauterizes the line buffer.
235 * If we're in a delimited string and the delimiter is a control
236 * character, it's reformatted into a two-char sequence like ^C.
237 * This is fatal.
238 */
239
76e3520e 240STATIC void
cea2e8a9 241S_missingterm(pTHX_ char *s)
8990e307
LW
242{
243 char tmpbuf[3];
244 char q;
245 if (s) {
246 char *nl = strrchr(s,'\n');
d2719217 247 if (nl)
8990e307
LW
248 *nl = '\0';
249 }
9d116dd7
JH
250 else if (
251#ifdef EBCDIC
252 iscntrl(PL_multi_close)
253#else
254 PL_multi_close < 32 || PL_multi_close == 127
255#endif
256 ) {
8990e307 257 *tmpbuf = '^';
3280af22 258 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
259 s = "\\n";
260 tmpbuf[2] = '\0';
261 s = tmpbuf;
262 }
263 else {
3280af22 264 *tmpbuf = PL_multi_close;
8990e307
LW
265 tmpbuf[1] = '\0';
266 s = tmpbuf;
267 }
268 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 269 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 270}
79072805 271
ffb4593c
NT
272/*
273 * Perl_deprecate
ffb4593c
NT
274 */
275
79072805 276void
864dbfa3 277Perl_deprecate(pTHX_ char *s)
a0d0e21e 278{
d008e5eb 279 dTHR;
599cee73 280 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 281 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
282}
283
ffb4593c
NT
284/*
285 * depcom
9cbb5ea2 286 * Deprecate a comma-less variable list.
ffb4593c
NT
287 */
288
76e3520e 289STATIC void
cea2e8a9 290S_depcom(pTHX)
a0d0e21e
LW
291{
292 deprecate("comma-less variable list");
293}
294
ffb4593c 295/*
9cbb5ea2
GS
296 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
297 * utf16-to-utf8-reversed.
ffb4593c
NT
298 */
299
c39cd008
GS
300#ifdef PERL_CR_FILTER
301static void
302strip_return(SV *sv)
303{
304 register char *s = SvPVX(sv);
305 register char *e = s + SvCUR(sv);
306 /* outer loop optimized to do nothing if there are no CR-LFs */
307 while (s < e) {
308 if (*s++ == '\r' && *s == '\n') {
309 /* hit a CR-LF, need to copy the rest */
310 register char *d = s - 1;
311 *d++ = *s++;
312 while (s < e) {
313 if (*s == '\r' && s[1] == '\n')
314 s++;
315 *d++ = *s++;
316 }
317 SvCUR(sv) -= s - d;
318 return;
319 }
320 }
321}
a868473f 322
76e3520e 323STATIC I32
c39cd008 324S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 325{
c39cd008
GS
326 I32 count = FILTER_READ(idx+1, sv, maxlen);
327 if (count > 0 && !maxlen)
328 strip_return(sv);
329 return count;
a868473f
NIS
330}
331#endif
332
ffb4593c
NT
333/*
334 * Perl_lex_start
9cbb5ea2
GS
335 * Initialize variables. Uses the Perl save_stack to save its state (for
336 * recursive calls to the parser).
ffb4593c
NT
337 */
338
a0d0e21e 339void
864dbfa3 340Perl_lex_start(pTHX_ SV *line)
79072805 341{
0f15f207 342 dTHR;
8990e307
LW
343 char *s;
344 STRLEN len;
345
3280af22
NIS
346 SAVEI32(PL_lex_dojoin);
347 SAVEI32(PL_lex_brackets);
3280af22
NIS
348 SAVEI32(PL_lex_casemods);
349 SAVEI32(PL_lex_starts);
350 SAVEI32(PL_lex_state);
7766f137 351 SAVEVPTR(PL_lex_inpat);
3280af22 352 SAVEI32(PL_lex_inwhat);
18b09519
GS
353 if (PL_lex_state == LEX_KNOWNEXT) {
354 I32 toke = PL_nexttoke;
355 while (--toke >= 0) {
356 SAVEI32(PL_nexttype[toke]);
357 SAVEVPTR(PL_nextval[toke]);
358 }
359 SAVEI32(PL_nexttoke);
18b09519 360 }
57843af0 361 SAVECOPLINE(PL_curcop);
3280af22
NIS
362 SAVEPPTR(PL_bufptr);
363 SAVEPPTR(PL_bufend);
364 SAVEPPTR(PL_oldbufptr);
365 SAVEPPTR(PL_oldoldbufptr);
366 SAVEPPTR(PL_linestart);
367 SAVESPTR(PL_linestr);
368 SAVEPPTR(PL_lex_brackstack);
369 SAVEPPTR(PL_lex_casestack);
c76ac1ee 370 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
371 SAVESPTR(PL_lex_stuff);
372 SAVEI32(PL_lex_defer);
09bef843 373 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 374 SAVESPTR(PL_lex_repl);
bebdddfc
GS
375 SAVEINT(PL_expect);
376 SAVEINT(PL_lex_expect);
3280af22
NIS
377
378 PL_lex_state = LEX_NORMAL;
379 PL_lex_defer = 0;
380 PL_expect = XSTATE;
381 PL_lex_brackets = 0;
3280af22
NIS
382 New(899, PL_lex_brackstack, 120, char);
383 New(899, PL_lex_casestack, 12, char);
384 SAVEFREEPV(PL_lex_brackstack);
385 SAVEFREEPV(PL_lex_casestack);
386 PL_lex_casemods = 0;
387 *PL_lex_casestack = '\0';
388 PL_lex_dojoin = 0;
389 PL_lex_starts = 0;
390 PL_lex_stuff = Nullsv;
391 PL_lex_repl = Nullsv;
392 PL_lex_inpat = 0;
76be56bc 393 PL_nexttoke = 0;
3280af22 394 PL_lex_inwhat = 0;
09bef843 395 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
396 PL_linestr = line;
397 if (SvREADONLY(PL_linestr))
398 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
399 s = SvPV(PL_linestr, len);
8990e307 400 if (len && s[len-1] != ';') {
3280af22
NIS
401 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
402 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
403 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 404 }
3280af22
NIS
405 SvTEMP_off(PL_linestr);
406 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
407 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
408 SvREFCNT_dec(PL_rs);
79cb57f6 409 PL_rs = newSVpvn("\n", 1);
3280af22 410 PL_rsfp = 0;
79072805 411}
a687059c 412
ffb4593c
NT
413/*
414 * Perl_lex_end
9cbb5ea2
GS
415 * Finalizer for lexing operations. Must be called when the parser is
416 * done with the lexer.
ffb4593c
NT
417 */
418
463ee0b2 419void
864dbfa3 420Perl_lex_end(pTHX)
463ee0b2 421{
3280af22 422 PL_doextract = FALSE;
463ee0b2
LW
423}
424
ffb4593c
NT
425/*
426 * S_incline
427 * This subroutine has nothing to do with tilting, whether at windmills
428 * or pinball tables. Its name is short for "increment line". It
57843af0 429 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 430 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
431 * # line 500 "foo.pm"
432 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
433 */
434
76e3520e 435STATIC void
cea2e8a9 436S_incline(pTHX_ char *s)
463ee0b2 437{
0f15f207 438 dTHR;
463ee0b2
LW
439 char *t;
440 char *n;
73659bf1 441 char *e;
463ee0b2 442 char ch;
463ee0b2 443
57843af0 444 CopLINE_inc(PL_curcop);
463ee0b2
LW
445 if (*s++ != '#')
446 return;
bf4acbe4 447 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
448 if (strnEQ(s, "line", 4))
449 s += 4;
450 else
451 return;
452 if (*s == ' ' || *s == '\t')
453 s++;
454 else
455 return;
bf4acbe4 456 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
457 if (!isDIGIT(*s))
458 return;
459 n = s;
460 while (isDIGIT(*s))
461 s++;
bf4acbe4 462 while (SPACE_OR_TAB(*s))
463ee0b2 463 s++;
73659bf1 464 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 465 s++;
73659bf1
GS
466 e = t + 1;
467 }
463ee0b2 468 else {
463ee0b2 469 for (t = s; !isSPACE(*t); t++) ;
73659bf1 470 e = t;
463ee0b2 471 }
bf4acbe4 472 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
473 e++;
474 if (*e != '\n' && *e != '\0')
475 return; /* false alarm */
476
463ee0b2
LW
477 ch = *t;
478 *t = '\0';
f4dd75d9
GS
479 if (t - s > 0) {
480#ifdef USE_ITHREADS
481 Safefree(CopFILE(PL_curcop));
482#else
483 SvREFCNT_dec(CopFILEGV(PL_curcop));
484#endif
57843af0 485 CopFILE_set(PL_curcop, s);
f4dd75d9 486 }
463ee0b2 487 *t = ch;
57843af0 488 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
489}
490
ffb4593c
NT
491/*
492 * S_skipspace
493 * Called to gobble the appropriate amount and type of whitespace.
494 * Skips comments as well.
495 */
496
76e3520e 497STATIC char *
cea2e8a9 498S_skipspace(pTHX_ register char *s)
a687059c 499{
11343788 500 dTHR;
3280af22 501 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 502 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
503 s++;
504 return s;
505 }
506 for (;;) {
fd049845 507 STRLEN prevlen;
09bef843
SB
508 SSize_t oldprevlen, oldoldprevlen;
509 SSize_t oldloplen, oldunilen;
60e6418e
GS
510 while (s < PL_bufend && isSPACE(*s)) {
511 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
512 incline(s);
513 }
ffb4593c
NT
514
515 /* comment */
3280af22
NIS
516 if (s < PL_bufend && *s == '#') {
517 while (s < PL_bufend && *s != '\n')
463ee0b2 518 s++;
60e6418e 519 if (s < PL_bufend) {
463ee0b2 520 s++;
60e6418e
GS
521 if (PL_in_eval && !PL_rsfp) {
522 incline(s);
523 continue;
524 }
525 }
463ee0b2 526 }
ffb4593c
NT
527
528 /* only continue to recharge the buffer if we're at the end
529 * of the buffer, we're not reading from a source filter, and
530 * we're in normal lexing mode
531 */
09bef843
SB
532 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
533 PL_lex_state == LEX_FORMLINE)
463ee0b2 534 return s;
ffb4593c
NT
535
536 /* try to recharge the buffer */
9cbb5ea2
GS
537 if ((s = filter_gets(PL_linestr, PL_rsfp,
538 (prevlen = SvCUR(PL_linestr)))) == Nullch)
539 {
540 /* end of file. Add on the -p or -n magic */
3280af22
NIS
541 if (PL_minus_n || PL_minus_p) {
542 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
543 ";}continue{print or die qq(-p destination: $!\\n)" :
544 "");
3280af22
NIS
545 sv_catpv(PL_linestr,";}");
546 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
547 }
548 else
3280af22 549 sv_setpv(PL_linestr,";");
ffb4593c
NT
550
551 /* reset variables for next time we lex */
9cbb5ea2
GS
552 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
553 = SvPVX(PL_linestr);
3280af22 554 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
555
556 /* Close the filehandle. Could be from -P preprocessor,
557 * STDIN, or a regular file. If we were reading code from
558 * STDIN (because the commandline held no -e or filename)
559 * then we don't close it, we reset it so the code can
560 * read from STDIN too.
561 */
562
3280af22
NIS
563 if (PL_preprocess && !PL_in_eval)
564 (void)PerlProc_pclose(PL_rsfp);
565 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
566 PerlIO_clearerr(PL_rsfp);
8990e307 567 else
3280af22
NIS
568 (void)PerlIO_close(PL_rsfp);
569 PL_rsfp = Nullfp;
463ee0b2
LW
570 return s;
571 }
ffb4593c
NT
572
573 /* not at end of file, so we only read another line */
09bef843
SB
574 /* make corresponding updates to old pointers, for yyerror() */
575 oldprevlen = PL_oldbufptr - PL_bufend;
576 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
577 if (PL_last_uni)
578 oldunilen = PL_last_uni - PL_bufend;
579 if (PL_last_lop)
580 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
581 PL_linestart = PL_bufptr = s + prevlen;
582 PL_bufend = s + SvCUR(PL_linestr);
583 s = PL_bufptr;
09bef843
SB
584 PL_oldbufptr = s + oldprevlen;
585 PL_oldoldbufptr = s + oldoldprevlen;
586 if (PL_last_uni)
587 PL_last_uni = s + oldunilen;
588 if (PL_last_lop)
589 PL_last_lop = s + oldloplen;
a0d0e21e 590 incline(s);
ffb4593c
NT
591
592 /* debugger active and we're not compiling the debugger code,
593 * so store the line into the debugger's array of lines
594 */
3280af22 595 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
596 SV *sv = NEWSV(85,0);
597
598 sv_upgrade(sv, SVt_PVMG);
3280af22 599 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 600 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 601 }
463ee0b2 602 }
a687059c 603}
378cc40b 604
ffb4593c
NT
605/*
606 * S_check_uni
607 * Check the unary operators to ensure there's no ambiguity in how they're
608 * used. An ambiguous piece of code would be:
609 * rand + 5
610 * This doesn't mean rand() + 5. Because rand() is a unary operator,
611 * the +5 is its argument.
612 */
613
76e3520e 614STATIC void
cea2e8a9 615S_check_uni(pTHX)
ba106d47 616{
2f3197b3 617 char *s;
a0d0e21e 618 char *t;
0453d815 619 dTHR;
2f3197b3 620
3280af22 621 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 622 return;
3280af22
NIS
623 while (isSPACE(*PL_last_uni))
624 PL_last_uni++;
7e2040f0 625 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 626 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 627 return;
0453d815 628 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 629 char ch = *s;
0453d815
PM
630 *s = '\0';
631 Perl_warner(aTHX_ WARN_AMBIGUOUS,
632 "Warning: Use of \"%s\" without parens is ambiguous",
633 PL_last_uni);
634 *s = ch;
635 }
2f3197b3
LW
636}
637
ffb4593c
NT
638/* workaround to replace the UNI() macro with a function. Only the
639 * hints/uts.sh file mentions this. Other comments elsewhere in the
640 * source indicate Microport Unix might need it too.
641 */
642
ffed7fef
LW
643#ifdef CRIPPLED_CC
644
645#undef UNI
ffed7fef 646#define UNI(f) return uni(f,s)
ffed7fef 647
76e3520e 648STATIC int
cea2e8a9 649S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
650{
651 yylval.ival = f;
3280af22
NIS
652 PL_expect = XTERM;
653 PL_bufptr = s;
8f872242
NIS
654 PL_last_uni = PL_oldbufptr;
655 PL_last_lop_op = f;
ffed7fef
LW
656 if (*s == '(')
657 return FUNC1;
658 s = skipspace(s);
659 if (*s == '(')
660 return FUNC1;
661 else
662 return UNIOP;
663}
664
a0d0e21e
LW
665#endif /* CRIPPLED_CC */
666
ffb4593c
NT
667/*
668 * LOP : macro to build a list operator. Its behaviour has been replaced
669 * with a subroutine, S_lop() for which LOP is just another name.
670 */
671
a0d0e21e
LW
672#define LOP(f,x) return lop(f,x,s)
673
ffb4593c
NT
674/*
675 * S_lop
676 * Build a list operator (or something that might be one). The rules:
677 * - if we have a next token, then it's a list operator [why?]
678 * - if the next thing is an opening paren, then it's a function
679 * - else it's a list operator
680 */
681
76e3520e 682STATIC I32
a0be28da 683S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 684{
0f15f207 685 dTHR;
79072805 686 yylval.ival = f;
35c8bce7 687 CLINE;
3280af22
NIS
688 PL_expect = x;
689 PL_bufptr = s;
690 PL_last_lop = PL_oldbufptr;
691 PL_last_lop_op = f;
692 if (PL_nexttoke)
a0d0e21e 693 return LSTOP;
79072805
LW
694 if (*s == '(')
695 return FUNC;
696 s = skipspace(s);
697 if (*s == '(')
698 return FUNC;
699 else
700 return LSTOP;
701}
702
ffb4593c
NT
703/*
704 * S_force_next
9cbb5ea2 705 * When the lexer realizes it knows the next token (for instance,
ffb4593c 706 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
707 * to know what token to return the next time the lexer is called. Caller
708 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
709 * handles the token correctly.
ffb4593c
NT
710 */
711
76e3520e 712STATIC void
cea2e8a9 713S_force_next(pTHX_ I32 type)
79072805 714{
3280af22
NIS
715 PL_nexttype[PL_nexttoke] = type;
716 PL_nexttoke++;
717 if (PL_lex_state != LEX_KNOWNEXT) {
718 PL_lex_defer = PL_lex_state;
719 PL_lex_expect = PL_expect;
720 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
721 }
722}
723
ffb4593c
NT
724/*
725 * S_force_word
726 * When the lexer knows the next thing is a word (for instance, it has
727 * just seen -> and it knows that the next char is a word char, then
728 * it calls S_force_word to stick the next word into the PL_next lookahead.
729 *
730 * Arguments:
b1b65b59 731 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
732 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
733 * int check_keyword : if true, Perl checks to make sure the word isn't
734 * a keyword (do this if the word is a label, e.g. goto FOO)
735 * int allow_pack : if true, : characters will also be allowed (require,
736 * use, etc. do this)
9cbb5ea2 737 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
738 */
739
76e3520e 740STATIC char *
cea2e8a9 741S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 742{
463ee0b2
LW
743 register char *s;
744 STRLEN len;
745
746 start = skipspace(start);
747 s = start;
7e2040f0 748 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 749 (allow_pack && *s == ':') ||
15f0808c 750 (allow_initial_tick && *s == '\'') )
a0d0e21e 751 {
3280af22
NIS
752 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
753 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
754 return start;
755 if (token == METHOD) {
756 s = skipspace(s);
757 if (*s == '(')
3280af22 758 PL_expect = XTERM;
463ee0b2 759 else {
3280af22 760 PL_expect = XOPERATOR;
463ee0b2 761 }
79072805 762 }
3280af22
NIS
763 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
764 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
765 force_next(token);
766 }
767 return s;
768}
769
ffb4593c
NT
770/*
771 * S_force_ident
9cbb5ea2 772 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
773 * text only contains the "foo" portion. The first argument is a pointer
774 * to the "foo", and the second argument is the type symbol to prefix.
775 * Forces the next token to be a "WORD".
9cbb5ea2 776 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
777 */
778
76e3520e 779STATIC void
cea2e8a9 780S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
781{
782 if (s && *s) {
11343788 783 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 784 PL_nextval[PL_nexttoke].opval = o;
79072805 785 force_next(WORD);
748a9306 786 if (kind) {
e858de61 787 dTHR; /* just for in_eval */
11343788 788 o->op_private = OPpCONST_ENTERED;
55497cff 789 /* XXX see note in pp_entereval() for why we forgo typo
790 warnings if the symbol must be introduced in an eval.
791 GSAR 96-10-12 */
3280af22 792 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
793 kind == '$' ? SVt_PV :
794 kind == '@' ? SVt_PVAV :
795 kind == '%' ? SVt_PVHV :
796 SVt_PVGV
797 );
748a9306 798 }
79072805
LW
799 }
800}
801
1571675a
GS
802NV
803Perl_str_to_version(pTHX_ SV *sv)
804{
805 NV retval = 0.0;
806 NV nshift = 1.0;
807 STRLEN len;
808 char *start = SvPVx(sv,len);
3aa33fe5 809 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
810 char *end = start + len;
811 while (start < end) {
812 I32 skip;
813 UV n;
814 if (utf)
8e84507e 815 n = utf8_to_uv_chk((U8*)start, &skip, 0);
1571675a
GS
816 else {
817 n = *(U8*)start;
818 skip = 1;
819 }
820 retval += ((NV)n)/nshift;
821 start += skip;
822 nshift *= 1000;
823 }
824 return retval;
825}
826
ffb4593c
NT
827/*
828 * S_force_version
829 * Forces the next token to be a version number.
830 */
831
76e3520e 832STATIC char *
cea2e8a9 833S_force_version(pTHX_ char *s)
89bfa8cd 834{
835 OP *version = Nullop;
44dcb63b 836 char *d;
89bfa8cd 837
838 s = skipspace(s);
839
44dcb63b 840 d = s;
dd629d5b 841 if (*d == 'v')
44dcb63b 842 d++;
44dcb63b 843 if (isDIGIT(*d)) {
a7cb1f99 844 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 845 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 846 SV *ver;
89bfa8cd 847 s = scan_num(s);
89bfa8cd 848 version = yylval.opval;
dd629d5b
GS
849 ver = cSVOPx(version)->op_sv;
850 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 851 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
852 SvNVX(ver) = str_to_version(ver);
853 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 854 }
89bfa8cd 855 }
856 }
857
858 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 859 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd 860 force_next(WORD);
861
862 return (s);
863}
864
ffb4593c
NT
865/*
866 * S_tokeq
867 * Tokenize a quoted string passed in as an SV. It finds the next
868 * chunk, up to end of string or a backslash. It may make a new
869 * SV containing that chunk (if HINT_NEW_STRING is on). It also
870 * turns \\ into \.
871 */
872
76e3520e 873STATIC SV *
cea2e8a9 874S_tokeq(pTHX_ SV *sv)
79072805
LW
875{
876 register char *s;
877 register char *send;
878 register char *d;
b3ac6de7
IZ
879 STRLEN len = 0;
880 SV *pv = sv;
79072805
LW
881
882 if (!SvLEN(sv))
b3ac6de7 883 goto finish;
79072805 884
a0d0e21e 885 s = SvPV_force(sv, len);
21a311ee 886 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 887 goto finish;
463ee0b2 888 send = s + len;
79072805
LW
889 while (s < send && *s != '\\')
890 s++;
891 if (s == send)
b3ac6de7 892 goto finish;
79072805 893 d = s;
3280af22 894 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 895 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
896 while (s < send) {
897 if (*s == '\\') {
a0d0e21e 898 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
899 s++; /* all that, just for this */
900 }
901 *d++ = *s++;
902 }
903 *d = '\0';
463ee0b2 904 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 905 finish:
3280af22 906 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 907 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
908 return sv;
909}
910
ffb4593c
NT
911/*
912 * Now come three functions related to double-quote context,
913 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
914 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
915 * interact with PL_lex_state, and create fake ( ... ) argument lists
916 * to handle functions and concatenation.
917 * They assume that whoever calls them will be setting up a fake
918 * join call, because each subthing puts a ',' after it. This lets
919 * "lower \luPpEr"
920 * become
921 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
922 *
923 * (I'm not sure whether the spurious commas at the end of lcfirst's
924 * arguments and join's arguments are created or not).
925 */
926
927/*
928 * S_sublex_start
929 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
930 *
931 * Pattern matching will set PL_lex_op to the pattern-matching op to
932 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
933 *
934 * OP_CONST and OP_READLINE are easy--just make the new op and return.
935 *
936 * Everything else becomes a FUNC.
937 *
938 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
939 * had an OP_CONST or OP_READLINE). This just sets us up for a
940 * call to S_sublex_push().
941 */
942
76e3520e 943STATIC I32
cea2e8a9 944S_sublex_start(pTHX)
79072805
LW
945{
946 register I32 op_type = yylval.ival;
79072805
LW
947
948 if (op_type == OP_NULL) {
3280af22
NIS
949 yylval.opval = PL_lex_op;
950 PL_lex_op = Nullop;
79072805
LW
951 return THING;
952 }
953 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 954 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
955
956 if (SvTYPE(sv) == SVt_PVIV) {
957 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
958 STRLEN len;
959 char *p;
960 SV *nsv;
961
962 p = SvPV(sv, len);
79cb57f6 963 nsv = newSVpvn(p, len);
01ec43d0
GS
964 if (SvUTF8(sv))
965 SvUTF8_on(nsv);
b3ac6de7
IZ
966 SvREFCNT_dec(sv);
967 sv = nsv;
968 }
969 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 970 PL_lex_stuff = Nullsv;
79072805
LW
971 return THING;
972 }
973
3280af22
NIS
974 PL_sublex_info.super_state = PL_lex_state;
975 PL_sublex_info.sub_inwhat = op_type;
976 PL_sublex_info.sub_op = PL_lex_op;
977 PL_lex_state = LEX_INTERPPUSH;
55497cff 978
3280af22
NIS
979 PL_expect = XTERM;
980 if (PL_lex_op) {
981 yylval.opval = PL_lex_op;
982 PL_lex_op = Nullop;
55497cff 983 return PMFUNC;
984 }
985 else
986 return FUNC;
987}
988
ffb4593c
NT
989/*
990 * S_sublex_push
991 * Create a new scope to save the lexing state. The scope will be
992 * ended in S_sublex_done. Returns a '(', starting the function arguments
993 * to the uc, lc, etc. found before.
994 * Sets PL_lex_state to LEX_INTERPCONCAT.
995 */
996
76e3520e 997STATIC I32
cea2e8a9 998S_sublex_push(pTHX)
55497cff 999{
0f15f207 1000 dTHR;
f46d017c 1001 ENTER;
55497cff 1002
3280af22
NIS
1003 PL_lex_state = PL_sublex_info.super_state;
1004 SAVEI32(PL_lex_dojoin);
1005 SAVEI32(PL_lex_brackets);
3280af22
NIS
1006 SAVEI32(PL_lex_casemods);
1007 SAVEI32(PL_lex_starts);
1008 SAVEI32(PL_lex_state);
7766f137 1009 SAVEVPTR(PL_lex_inpat);
3280af22 1010 SAVEI32(PL_lex_inwhat);
57843af0 1011 SAVECOPLINE(PL_curcop);
3280af22
NIS
1012 SAVEPPTR(PL_bufptr);
1013 SAVEPPTR(PL_oldbufptr);
1014 SAVEPPTR(PL_oldoldbufptr);
1015 SAVEPPTR(PL_linestart);
1016 SAVESPTR(PL_linestr);
1017 SAVEPPTR(PL_lex_brackstack);
1018 SAVEPPTR(PL_lex_casestack);
1019
1020 PL_linestr = PL_lex_stuff;
1021 PL_lex_stuff = Nullsv;
1022
9cbb5ea2
GS
1023 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1024 = SvPVX(PL_linestr);
3280af22
NIS
1025 PL_bufend += SvCUR(PL_linestr);
1026 SAVEFREESV(PL_linestr);
1027
1028 PL_lex_dojoin = FALSE;
1029 PL_lex_brackets = 0;
3280af22
NIS
1030 New(899, PL_lex_brackstack, 120, char);
1031 New(899, PL_lex_casestack, 12, char);
1032 SAVEFREEPV(PL_lex_brackstack);
1033 SAVEFREEPV(PL_lex_casestack);
1034 PL_lex_casemods = 0;
1035 *PL_lex_casestack = '\0';
1036 PL_lex_starts = 0;
1037 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1038 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1039
1040 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1041 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1042 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1043 else
3280af22 1044 PL_lex_inpat = Nullop;
79072805 1045
55497cff 1046 return '(';
79072805
LW
1047}
1048
ffb4593c
NT
1049/*
1050 * S_sublex_done
1051 * Restores lexer state after a S_sublex_push.
1052 */
1053
76e3520e 1054STATIC I32
cea2e8a9 1055S_sublex_done(pTHX)
79072805 1056{
3280af22
NIS
1057 if (!PL_lex_starts++) {
1058 PL_expect = XOPERATOR;
79cb57f6 1059 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1060 return THING;
1061 }
1062
3280af22
NIS
1063 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1064 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1065 return yylex();
79072805
LW
1066 }
1067
ffb4593c 1068 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1069 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1070 PL_linestr = PL_lex_repl;
1071 PL_lex_inpat = 0;
1072 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1073 PL_bufend += SvCUR(PL_linestr);
1074 SAVEFREESV(PL_linestr);
1075 PL_lex_dojoin = FALSE;
1076 PL_lex_brackets = 0;
3280af22
NIS
1077 PL_lex_casemods = 0;
1078 *PL_lex_casestack = '\0';
1079 PL_lex_starts = 0;
25da4f38 1080 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1081 PL_lex_state = LEX_INTERPNORMAL;
1082 PL_lex_starts++;
e9fa98b2
HS
1083 /* we don't clear PL_lex_repl here, so that we can check later
1084 whether this is an evalled subst; that means we rely on the
1085 logic to ensure sublex_done() is called again only via the
1086 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1087 }
e9fa98b2 1088 else {
3280af22 1089 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1090 PL_lex_repl = Nullsv;
1091 }
79072805 1092 return ',';
ffed7fef
LW
1093 }
1094 else {
f46d017c 1095 LEAVE;
3280af22
NIS
1096 PL_bufend = SvPVX(PL_linestr);
1097 PL_bufend += SvCUR(PL_linestr);
1098 PL_expect = XOPERATOR;
09bef843 1099 PL_sublex_info.sub_inwhat = 0;
79072805 1100 return ')';
ffed7fef
LW
1101 }
1102}
1103
02aa26ce
NT
1104/*
1105 scan_const
1106
1107 Extracts a pattern, double-quoted string, or transliteration. This
1108 is terrifying code.
1109
3280af22
NIS
1110 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1111 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1112 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1113
9b599b2a
GS
1114 Returns a pointer to the character scanned up to. Iff this is
1115 advanced from the start pointer supplied (ie if anything was
1116 successfully parsed), will leave an OP for the substring scanned
1117 in yylval. Caller must intuit reason for not parsing further
1118 by looking at the next characters herself.
1119
02aa26ce
NT
1120 In patterns:
1121 backslashes:
1122 double-quoted style: \r and \n
1123 regexp special ones: \D \s
1124 constants: \x3
1125 backrefs: \1 (deprecated in substitution replacements)
1126 case and quoting: \U \Q \E
1127 stops on @ and $, but not for $ as tail anchor
1128
1129 In transliterations:
1130 characters are VERY literal, except for - not at the start or end
1131 of the string, which indicates a range. scan_const expands the
1132 range to the full set of intermediate characters.
1133
1134 In double-quoted strings:
1135 backslashes:
1136 double-quoted style: \r and \n
1137 constants: \x3
1138 backrefs: \1 (deprecated)
1139 case and quoting: \U \Q \E
1140 stops on @ and $
1141
1142 scan_const does *not* construct ops to handle interpolated strings.
1143 It stops processing as soon as it finds an embedded $ or @ variable
1144 and leaves it to the caller to work out what's going on.
1145
1146 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1147
1148 $ in pattern could be $foo or could be tail anchor. Assumption:
1149 it's a tail anchor if $ is the last thing in the string, or if it's
1150 followed by one of ")| \n\t"
1151
1152 \1 (backreferences) are turned into $1
1153
1154 The structure of the code is
1155 while (there's a character to process) {
1156 handle transliteration ranges
1157 skip regexp comments
1158 skip # initiated comments in //x patterns
1159 check for embedded @foo
1160 check for embedded scalars
1161 if (backslash) {
1162 leave intact backslashes from leave (below)
1163 deprecate \1 in strings and sub replacements
1164 handle string-changing backslashes \l \U \Q \E, etc.
1165 switch (what was escaped) {
1166 handle - in a transliteration (becomes a literal -)
1167 handle \132 octal characters
1168 handle 0x15 hex characters
1169 handle \cV (control V)
1170 handle printf backslashes (\f, \r, \n, etc)
1171 } (end switch)
1172 } (end if backslash)
1173 } (end while character to read)
1174
1175*/
1176
76e3520e 1177STATIC char *
cea2e8a9 1178S_scan_const(pTHX_ char *start)
79072805 1179{
3280af22 1180 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1181 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1182 register char *s = start; /* start of the constant */
1183 register char *d = SvPVX(sv); /* destination for copies */
1184 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1185 bool didrange = FALSE; /* did we just finish a range? */
7e2040f0 1186 bool has_utf = FALSE; /* embedded \x{} */
02aa26ce 1187 I32 len; /* ? */
012bcf8d
GS
1188 UV uv;
1189
ac2262e3 1190 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1191 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1192 : UTF;
ac2262e3 1193 I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1194 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1195 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1196 : UTF;
dff6d3cd 1197 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1198 PL_lex_inpat
4a2d328f 1199 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1200 : "";
79072805
LW
1201
1202 while (s < send || dorange) {
02aa26ce 1203 /* get transliterations out of the way (they're most literal) */
3280af22 1204 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1205 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1206 if (dorange) {
02aa26ce 1207 I32 i; /* current expanded character */
8ada0baa 1208 I32 min; /* first character in range */
02aa26ce
NT
1209 I32 max; /* last character in range */
1210
1211 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1212 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1213 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1214 d -= 2; /* eat the first char and the - */
1215
8ada0baa
JH
1216 min = (U8)*d; /* first char in range */
1217 max = (U8)d[1]; /* last char in range */
1218
c2e66d9e 1219 if (min > max) {
01ec43d0
GS
1220 Perl_croak(aTHX_
1221 "Invalid [] range \"%c-%c\" in transliteration operator",
d2560b70 1222 (char)min, (char)max);
c2e66d9e
GS
1223 }
1224
8ada0baa
JH
1225#ifndef ASCIIish
1226 if ((isLOWER(min) && isLOWER(max)) ||
1227 (isUPPER(min) && isUPPER(max))) {
1228 if (isLOWER(min)) {
1229 for (i = min; i <= max; i++)
1230 if (isLOWER(i))
1231 *d++ = i;
1232 } else {
1233 for (i = min; i <= max; i++)
1234 if (isUPPER(i))
1235 *d++ = i;
1236 }
1237 }
1238 else
1239#endif
1240 for (i = min; i <= max; i++)
1241 *d++ = i;
02aa26ce
NT
1242
1243 /* mark the range as done, and continue */
79072805 1244 dorange = FALSE;
01ec43d0 1245 didrange = TRUE;
79072805 1246 continue;
c2e66d9e 1247 }
02aa26ce
NT
1248
1249 /* range begins (ignore - as first or last char) */
79072805 1250 else if (*s == '-' && s+1 < send && s != start) {
01ec43d0 1251 if (didrange) {
1fafa243 1252 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1253 }
a0ed51b3 1254 if (utf) {
a176fa2a 1255 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1256 s++;
1257 continue;
1258 }
79072805
LW
1259 dorange = TRUE;
1260 s++;
01ec43d0
GS
1261 }
1262 else {
1263 didrange = FALSE;
1264 }
79072805 1265 }
02aa26ce
NT
1266
1267 /* if we get here, we're not doing a transliteration */
1268
0f5d15d6
IZ
1269 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1270 except for the last char, which will be done separately. */
3280af22 1271 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1272 if (s[2] == '#') {
1273 while (s < send && *s != ')')
1274 *d++ = *s++;
155aba94
GS
1275 }
1276 else if (s[2] == '{' /* This should match regcomp.c */
1277 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1278 {
cc6b7395 1279 I32 count = 1;
0f5d15d6 1280 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1281 char c;
1282
d9f97599
GS
1283 while (count && (c = *regparse)) {
1284 if (c == '\\' && regparse[1])
1285 regparse++;
cc6b7395
IZ
1286 else if (c == '{')
1287 count++;
1288 else if (c == '}')
1289 count--;
d9f97599 1290 regparse++;
cc6b7395 1291 }
5bdf89e7
IZ
1292 if (*regparse != ')') {
1293 regparse--; /* Leave one char for continuation. */
cc6b7395 1294 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1295 }
0f5d15d6 1296 while (s < regparse)
cc6b7395
IZ
1297 *d++ = *s++;
1298 }
748a9306 1299 }
02aa26ce
NT
1300
1301 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1302 else if (*s == '#' && PL_lex_inpat &&
1303 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1304 while (s+1 < send && *s != '\n')
1305 *d++ = *s++;
1306 }
02aa26ce
NT
1307
1308 /* check for embedded arrays (@foo, @:foo, @'foo, @{foo}, @$foo) */
7e2040f0
GS
1309 else if (*s == '@' && s[1]
1310 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$", s[1])))
79072805 1311 break;
02aa26ce
NT
1312
1313 /* check for embedded scalars. only stop if we're sure it's a
1314 variable.
1315 */
79072805 1316 else if (*s == '$') {
3280af22 1317 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1318 break;
c277df42 1319 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1320 break; /* in regexp, $ might be tail anchor */
1321 }
02aa26ce 1322
a0ed51b3
LW
1323 /* (now in tr/// code again) */
1324
d008e5eb 1325 if (*s & 0x80 && thisutf) {
8e84507e 1326 (void)utf8_to_uv_chk((U8*)s, &len, 0);
012bcf8d
GS
1327 if (len == 1) {
1328 /* illegal UTF8, make it valid */
012bcf8d 1329 char *old_pvx = SvPVX(sv);
bee7c419
GS
1330 /* need space for one extra char (NOTE: SvCUR() not set here) */
1331 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
012bcf8d
GS
1332 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1333 }
1334 else {
1335 while (len--)
1336 *d++ = *s++;
1337 }
1338 has_utf = TRUE;
1339 continue;
a0ed51b3
LW
1340 }
1341
02aa26ce 1342 /* backslashes */
79072805
LW
1343 if (*s == '\\' && s+1 < send) {
1344 s++;
02aa26ce
NT
1345
1346 /* some backslashes we leave behind */
c9f97d15 1347 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1348 *d++ = '\\';
1349 *d++ = *s++;
1350 continue;
1351 }
02aa26ce
NT
1352
1353 /* deprecate \1 in strings and substitution replacements */
3280af22 1354 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1355 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1356 {
d008e5eb 1357 dTHR; /* only for ckWARN */
599cee73 1358 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1359 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1360 *--s = '$';
1361 break;
1362 }
02aa26ce
NT
1363
1364 /* string-change backslash escapes */
3280af22 1365 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1366 --s;
1367 break;
1368 }
02aa26ce
NT
1369
1370 /* if we get here, it's either a quoted -, or a digit */
79072805 1371 switch (*s) {
02aa26ce
NT
1372
1373 /* quoted - in transliterations */
79072805 1374 case '-':
3280af22 1375 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1376 *d++ = *s++;
1377 continue;
1378 }
1379 /* FALL THROUGH */
1380 default:
11b8faa4
JH
1381 {
1382 dTHR;
7e84c16c 1383 if (ckWARN(WARN_MISC) && isALNUM(*s))
e476b1b5 1384 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1385 "Unrecognized escape \\%c passed through",
1386 *s);
1387 /* default action is to copy the quoted character */
1388 *d++ = *s++;
1389 continue;
1390 }
02aa26ce
NT
1391
1392 /* \132 indicates an octal constant */
79072805
LW
1393 case '0': case '1': case '2': case '3':
1394 case '4': case '5': case '6': case '7':
b21ed0a9 1395 len = 0; /* disallow underscores */
012bcf8d 1396 uv = (UV)scan_oct(s, 3, &len);
79072805 1397 s += len;
012bcf8d 1398 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1399
1400 /* \x24 indicates a hex constant */
79072805 1401 case 'x':
a0ed51b3
LW
1402 ++s;
1403 if (*s == '{') {
1404 char* e = strchr(s, '}');
adaeee49 1405 if (!e) {
a0ed51b3 1406 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1407 e = s;
1408 }
b21ed0a9 1409 len = 1; /* allow underscores */
012bcf8d
GS
1410 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1411 s = e + 1;
a0ed51b3
LW
1412 }
1413 else {
b21ed0a9 1414 len = 0; /* disallow underscores */
012bcf8d
GS
1415 uv = (UV)scan_hex(s, 2, &len);
1416 s += len;
1417 }
1418
1419 NUM_ESCAPE_INSERT:
1420 /* Insert oct or hex escaped character.
1421 * There will always enough room in sv since such escapes will
1422 * be longer than any utf8 sequence they can end up as
1423 */
1424 if (uv > 127) {
1425 if (!thisutf && !has_utf && uv > 255) {
1426 /* might need to recode whatever we have accumulated so far
1427 * if it contains any hibit chars
1428 */
1429 int hicount = 0;
1430 char *c;
1431 for (c = SvPVX(sv); c < d; c++) {
1432 if (*c & 0x80)
1433 hicount++;
1434 }
1435 if (hicount) {
1436 char *old_pvx = SvPVX(sv);
1437 char *src, *dst;
1438 d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
1439
1440 src = d - 1;
1441 d += hicount;
1442 dst = d - 1;
1443
1444 while (src < dst) {
1445 if (*src & 0x80) {
1446 dst--;
1447 uv_to_utf8((U8*)dst, (U8)*src--);
1448 dst--;
1449 }
1450 else {
1451 *dst-- = *src--;
1452 }
1453 }
1454 }
1455 }
1456
1457 if (thisutf || uv > 255) {
1458 d = (char*)uv_to_utf8((U8*)d, uv);
7e2040f0 1459 has_utf = TRUE;
012bcf8d 1460 }
a0ed51b3 1461 else {
012bcf8d 1462 *d++ = (char)uv;
a0ed51b3 1463 }
012bcf8d
GS
1464 }
1465 else {
1466 *d++ = (char)uv;
a0ed51b3 1467 }
79072805 1468 continue;
02aa26ce 1469
4a2d328f
IZ
1470 /* \N{latin small letter a} is a named character */
1471 case 'N':
423cee85
JH
1472 ++s;
1473 if (*s == '{') {
1474 char* e = strchr(s, '}');
155aba94 1475 SV *res;
423cee85
JH
1476 STRLEN len;
1477 char *str;
423cee85
JH
1478
1479 if (!e) {
5777a3f7 1480 yyerror("Missing right brace on \\N{}");
423cee85
JH
1481 e = s - 1;
1482 goto cont_scan;
1483 }
1484 res = newSVpvn(s + 1, e - s - 1);
1485 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1486 res, Nullsv, "\\N{...}" );
423cee85 1487 str = SvPV(res,len);
f08d6ad9
GS
1488 if (!has_utf && SvUTF8(res)) {
1489 char *ostart = SvPVX(sv);
1490 SvCUR_set(sv, d - ostart);
1491 SvPOK_on(sv);
e4f3eed8 1492 *d = '\0';
f08d6ad9 1493 sv_utf8_upgrade(sv);
d2f449dd
SB
1494 /* this just broke our allocation above... */
1495 SvGROW(sv, send - start);
f08d6ad9 1496 d = SvPVX(sv) + SvCUR(sv);
e1992b6d 1497 has_utf = TRUE;
f08d6ad9 1498 }
423cee85
JH
1499 if (len > e - s + 4) {
1500 char *odest = SvPVX(sv);
1501
1502 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1503 d = SvPVX(sv) + (d - odest);
1504 }
1505 Copy(str, d, len, char);
1506 d += len;
1507 SvREFCNT_dec(res);
1508 cont_scan:
1509 s = e + 1;
1510 }
1511 else
5777a3f7 1512 yyerror("Missing braces on \\N{}");
423cee85
JH
1513 continue;
1514
02aa26ce 1515 /* \c is a control character */
79072805
LW
1516 case 'c':
1517 s++;
9d116dd7
JH
1518#ifdef EBCDIC
1519 *d = *s++;
1520 if (isLOWER(*d))
1521 *d = toUPPER(*d);
774a9426
GS
1522 *d = toCTRL(*d);
1523 d++;
9d116dd7 1524#else
bbce6d69 1525 len = *s++;
1526 *d++ = toCTRL(len);
9d116dd7 1527#endif
79072805 1528 continue;
02aa26ce
NT
1529
1530 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1531 case 'b':
1532 *d++ = '\b';
1533 break;
1534 case 'n':
1535 *d++ = '\n';
1536 break;
1537 case 'r':
1538 *d++ = '\r';
1539 break;
1540 case 'f':
1541 *d++ = '\f';
1542 break;
1543 case 't':
1544 *d++ = '\t';
1545 break;
34a3fe2a
PP
1546#ifdef EBCDIC
1547 case 'e':
1548 *d++ = '\047'; /* CP 1047 */
1549 break;
1550 case 'a':
1551 *d++ = '\057'; /* CP 1047 */
1552 break;
1553#else
79072805
LW
1554 case 'e':
1555 *d++ = '\033';
1556 break;
1557 case 'a':
1558 *d++ = '\007';
1559 break;
34a3fe2a 1560#endif
02aa26ce
NT
1561 } /* end switch */
1562
79072805
LW
1563 s++;
1564 continue;
02aa26ce
NT
1565 } /* end if (backslash) */
1566
79072805 1567 *d++ = *s++;
02aa26ce
NT
1568 } /* while loop to process each character */
1569
1570 /* terminate the string and set up the sv */
79072805 1571 *d = '\0';
463ee0b2 1572 SvCUR_set(sv, d - SvPVX(sv));
79072805 1573 SvPOK_on(sv);
7e2040f0
GS
1574 if (has_utf)
1575 SvUTF8_on(sv);
79072805 1576
02aa26ce 1577 /* shrink the sv if we allocated more than we used */
79072805
LW
1578 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1579 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1580 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1581 }
02aa26ce 1582
9b599b2a 1583 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1584 if (s > PL_bufptr) {
1585 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1586 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1587 sv, Nullsv,
3280af22 1588 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1589 ? "tr"
3280af22 1590 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1591 ? "s"
1592 : "qq")));
79072805 1593 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1594 } else
8990e307 1595 SvREFCNT_dec(sv);
79072805
LW
1596 return s;
1597}
1598
ffb4593c
NT
1599/* S_intuit_more
1600 * Returns TRUE if there's more to the expression (e.g., a subscript),
1601 * FALSE otherwise.
ffb4593c
NT
1602 *
1603 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1604 *
1605 * ->[ and ->{ return TRUE
1606 * { and [ outside a pattern are always subscripts, so return TRUE
1607 * if we're outside a pattern and it's not { or [, then return FALSE
1608 * if we're in a pattern and the first char is a {
1609 * {4,5} (any digits around the comma) returns FALSE
1610 * if we're in a pattern and the first char is a [
1611 * [] returns FALSE
1612 * [SOMETHING] has a funky algorithm to decide whether it's a
1613 * character class or not. It has to deal with things like
1614 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1615 * anything else returns TRUE
1616 */
1617
9cbb5ea2
GS
1618/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1619
76e3520e 1620STATIC int
cea2e8a9 1621S_intuit_more(pTHX_ register char *s)
79072805 1622{
3280af22 1623 if (PL_lex_brackets)
79072805
LW
1624 return TRUE;
1625 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1626 return TRUE;
1627 if (*s != '{' && *s != '[')
1628 return FALSE;
3280af22 1629 if (!PL_lex_inpat)
79072805
LW
1630 return TRUE;
1631
1632 /* In a pattern, so maybe we have {n,m}. */
1633 if (*s == '{') {
1634 s++;
1635 if (!isDIGIT(*s))
1636 return TRUE;
1637 while (isDIGIT(*s))
1638 s++;
1639 if (*s == ',')
1640 s++;
1641 while (isDIGIT(*s))
1642 s++;
1643 if (*s == '}')
1644 return FALSE;
1645 return TRUE;
1646
1647 }
1648
1649 /* On the other hand, maybe we have a character class */
1650
1651 s++;
1652 if (*s == ']' || *s == '^')
1653 return FALSE;
1654 else {
ffb4593c 1655 /* this is terrifying, and it works */
79072805
LW
1656 int weight = 2; /* let's weigh the evidence */
1657 char seen[256];
f27ffc4a 1658 unsigned char un_char = 255, last_un_char;
93a17b20 1659 char *send = strchr(s,']');
3280af22 1660 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1661
1662 if (!send) /* has to be an expression */
1663 return TRUE;
1664
1665 Zero(seen,256,char);
1666 if (*s == '$')
1667 weight -= 3;
1668 else if (isDIGIT(*s)) {
1669 if (s[1] != ']') {
1670 if (isDIGIT(s[1]) && s[2] == ']')
1671 weight -= 10;
1672 }
1673 else
1674 weight -= 100;
1675 }
1676 for (; s < send; s++) {
1677 last_un_char = un_char;
1678 un_char = (unsigned char)*s;
1679 switch (*s) {
1680 case '@':
1681 case '&':
1682 case '$':
1683 weight -= seen[un_char] * 10;
7e2040f0 1684 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1685 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1686 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1687 weight -= 100;
1688 else
1689 weight -= 10;
1690 }
1691 else if (*s == '$' && s[1] &&
93a17b20
LW
1692 strchr("[#!%*<>()-=",s[1])) {
1693 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1694 weight -= 10;
1695 else
1696 weight -= 1;
1697 }
1698 break;
1699 case '\\':
1700 un_char = 254;
1701 if (s[1]) {
93a17b20 1702 if (strchr("wds]",s[1]))
79072805
LW
1703 weight += 100;
1704 else if (seen['\''] || seen['"'])
1705 weight += 1;
93a17b20 1706 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1707 weight += 40;
1708 else if (isDIGIT(s[1])) {
1709 weight += 40;
1710 while (s[1] && isDIGIT(s[1]))
1711 s++;
1712 }
1713 }
1714 else
1715 weight += 100;
1716 break;
1717 case '-':
1718 if (s[1] == '\\')
1719 weight += 50;
93a17b20 1720 if (strchr("aA01! ",last_un_char))
79072805 1721 weight += 30;
93a17b20 1722 if (strchr("zZ79~",s[1]))
79072805 1723 weight += 30;
f27ffc4a
GS
1724 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1725 weight -= 5; /* cope with negative subscript */
79072805
LW
1726 break;
1727 default:
93a17b20 1728 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1729 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1730 char *d = tmpbuf;
1731 while (isALPHA(*s))
1732 *d++ = *s++;
1733 *d = '\0';
1734 if (keyword(tmpbuf, d - tmpbuf))
1735 weight -= 150;
1736 }
1737 if (un_char == last_un_char + 1)
1738 weight += 5;
1739 weight -= seen[un_char];
1740 break;
1741 }
1742 seen[un_char]++;
1743 }
1744 if (weight >= 0) /* probably a character class */
1745 return FALSE;
1746 }
1747
1748 return TRUE;
1749}
ffed7fef 1750
ffb4593c
NT
1751/*
1752 * S_intuit_method
1753 *
1754 * Does all the checking to disambiguate
1755 * foo bar
1756 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1757 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1758 *
1759 * First argument is the stuff after the first token, e.g. "bar".
1760 *
1761 * Not a method if bar is a filehandle.
1762 * Not a method if foo is a subroutine prototyped to take a filehandle.
1763 * Not a method if it's really "Foo $bar"
1764 * Method if it's "foo $bar"
1765 * Not a method if it's really "print foo $bar"
1766 * Method if it's really "foo package::" (interpreted as package->foo)
1767 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1768 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1769 * =>
1770 */
1771
76e3520e 1772STATIC int
cea2e8a9 1773S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1774{
1775 char *s = start + (*start == '$');
3280af22 1776 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1777 STRLEN len;
1778 GV* indirgv;
1779
1780 if (gv) {
b6c543e3 1781 CV *cv;
a0d0e21e
LW
1782 if (GvIO(gv))
1783 return 0;
b6c543e3
IZ
1784 if ((cv = GvCVu(gv))) {
1785 char *proto = SvPVX(cv);
1786 if (proto) {
1787 if (*proto == ';')
1788 proto++;
1789 if (*proto == '*')
1790 return 0;
1791 }
1792 } else
a0d0e21e
LW
1793 gv = 0;
1794 }
8903cb82 1795 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1796 /* start is the beginning of the possible filehandle/object,
1797 * and s is the end of it
1798 * tmpbuf is a copy of it
1799 */
1800
a0d0e21e 1801 if (*start == '$') {
3280af22 1802 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1803 return 0;
1804 s = skipspace(s);
3280af22
NIS
1805 PL_bufptr = start;
1806 PL_expect = XREF;
a0d0e21e
LW
1807 return *s == '(' ? FUNCMETH : METHOD;
1808 }
1809 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1810 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1811 len -= 2;
1812 tmpbuf[len] = '\0';
1813 goto bare_package;
1814 }
1815 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1816 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1817 return 0;
1818 /* filehandle or package name makes it a method */
89bfa8cd 1819 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1820 s = skipspace(s);
3280af22 1821 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1822 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1823 bare_package:
3280af22 1824 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1825 newSVpvn(tmpbuf,len));
3280af22
NIS
1826 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1827 PL_expect = XTERM;
a0d0e21e 1828 force_next(WORD);
3280af22 1829 PL_bufptr = s;
a0d0e21e
LW
1830 return *s == '(' ? FUNCMETH : METHOD;
1831 }
1832 }
1833 return 0;
1834}
1835
ffb4593c
NT
1836/*
1837 * S_incl_perldb
1838 * Return a string of Perl code to load the debugger. If PERL5DB
1839 * is set, it will return the contents of that, otherwise a
1840 * compile-time require of perl5db.pl.
1841 */
1842
76e3520e 1843STATIC char*
cea2e8a9 1844S_incl_perldb(pTHX)
a0d0e21e 1845{
3280af22 1846 if (PL_perldb) {
76e3520e 1847 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1848
1849 if (pdb)
1850 return pdb;
61bb5906 1851 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1852 return "BEGIN { require 'perl5db.pl' }";
1853 }
1854 return "";
1855}
1856
1857
16d20bd9
AD
1858/* Encoded script support. filter_add() effectively inserts a
1859 * 'pre-processing' function into the current source input stream.
1860 * Note that the filter function only applies to the current source file
1861 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1862 *
1863 * The datasv parameter (which may be NULL) can be used to pass
1864 * private data to this instance of the filter. The filter function
1865 * can recover the SV using the FILTER_DATA macro and use it to
1866 * store private buffers and state information.
1867 *
1868 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1869 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1870 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1871 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1872 * private use must be set using malloc'd pointers.
1873 */
16d20bd9
AD
1874
1875SV *
864dbfa3 1876Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1877{
f4c556ac
GS
1878 if (!funcp)
1879 return Nullsv;
1880
3280af22
NIS
1881 if (!PL_rsfp_filters)
1882 PL_rsfp_filters = newAV();
16d20bd9 1883 if (!datasv)
8c52afec 1884 datasv = NEWSV(255,0);
16d20bd9 1885 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1886 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1887 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1888 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1889 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1890 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1891 av_unshift(PL_rsfp_filters, 1);
1892 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1893 return(datasv);
1894}
1895
1896
1897/* Delete most recently added instance of this filter function. */
a0d0e21e 1898void
864dbfa3 1899Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1900{
e0c19803 1901 SV *datasv;
f4c556ac 1902 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1903 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1904 return;
1905 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1906 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1907 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1908 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1909 IoANY(datasv) = (void *)NULL;
3280af22 1910 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1911
16d20bd9
AD
1912 return;
1913 }
1914 /* we need to search for the correct entry and clear it */
cea2e8a9 1915 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1916}
1917
1918
1919/* Invoke the n'th filter function for the current rsfp. */
1920I32
864dbfa3 1921Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1922
1923
1924 /* 0 = read one text line */
a0d0e21e 1925{
16d20bd9
AD
1926 filter_t funcp;
1927 SV *datasv = NULL;
e50aee73 1928
3280af22 1929 if (!PL_rsfp_filters)
16d20bd9 1930 return -1;
3280af22 1931 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1932 /* Provide a default input filter to make life easy. */
1933 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1934 DEBUG_P(PerlIO_printf(Perl_debug_log,
1935 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1936 if (maxlen) {
1937 /* Want a block */
1938 int len ;
1939 int old_len = SvCUR(buf_sv) ;
1940
1941 /* ensure buf_sv is large enough */
1942 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1943 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1944 if (PerlIO_error(PL_rsfp))
37120919
AD
1945 return -1; /* error */
1946 else
1947 return 0 ; /* end of file */
1948 }
16d20bd9
AD
1949 SvCUR_set(buf_sv, old_len + len) ;
1950 } else {
1951 /* Want a line */
3280af22
NIS
1952 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1953 if (PerlIO_error(PL_rsfp))
37120919
AD
1954 return -1; /* error */
1955 else
1956 return 0 ; /* end of file */
1957 }
16d20bd9
AD
1958 }
1959 return SvCUR(buf_sv);
1960 }
1961 /* Skip this filter slot if filter has been deleted */
3280af22 1962 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1963 DEBUG_P(PerlIO_printf(Perl_debug_log,
1964 "filter_read %d: skipped (filter deleted)\n",
1965 idx));
16d20bd9
AD
1966 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1967 }
1968 /* Get function pointer hidden within datasv */
4755096e 1969 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
1970 DEBUG_P(PerlIO_printf(Perl_debug_log,
1971 "filter_read %d: via function %p (%s)\n",
1972 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1973 /* Call function. The function is expected to */
1974 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1975 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1976 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1977}
1978
76e3520e 1979STATIC char *
cea2e8a9 1980S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1981{
c39cd008 1982#ifdef PERL_CR_FILTER
3280af22 1983 if (!PL_rsfp_filters) {
c39cd008 1984 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1985 }
1986#endif
3280af22 1987 if (PL_rsfp_filters) {
16d20bd9 1988
55497cff 1989 if (!append)
1990 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1991 if (FILTER_READ(0, sv, 0) > 0)
1992 return ( SvPVX(sv) ) ;
1993 else
1994 return Nullch ;
1995 }
9d116dd7 1996 else
fd049845 1997 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1998}
1999
01ec43d0
GS
2000STATIC HV *
2001S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2002{
2003 GV *gv;
2004
01ec43d0 2005 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2006 return PL_curstash;
2007
2008 if (len > 2 &&
2009 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2010 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2011 {
2012 return GvHV(gv); /* Foo:: */
def3634b
GS
2013 }
2014
2015 /* use constant CLASS => 'MyClass' */
2016 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2017 SV *sv;
2018 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2019 pkgname = SvPV_nolen(sv);
2020 }
2021 }
2022
2023 return gv_stashpv(pkgname, FALSE);
2024}
a0d0e21e 2025
748a9306
LW
2026#ifdef DEBUGGING
2027 static char* exp_name[] =
09bef843
SB
2028 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2029 "ATTRTERM", "TERMBLOCK"
2030 };
748a9306 2031#endif
463ee0b2 2032
02aa26ce
NT
2033/*
2034 yylex
2035
2036 Works out what to call the token just pulled out of the input
2037 stream. The yacc parser takes care of taking the ops we return and
2038 stitching them into a tree.
2039
2040 Returns:
2041 PRIVATEREF
2042
2043 Structure:
2044 if read an identifier
2045 if we're in a my declaration
2046 croak if they tried to say my($foo::bar)
2047 build the ops for a my() declaration
2048 if it's an access to a my() variable
2049 are we in a sort block?
2050 croak if my($a); $a <=> $b
2051 build ops for access to a my() variable
2052 if in a dq string, and they've said @foo and we can't find @foo
2053 croak
2054 build ops for a bareword
2055 if we already built the token before, use it.
2056*/
2057
bf4acbe4
GS
2058#ifdef __SC__
2059#pragma segment Perl_yylex
2060#endif
864dbfa3
GS
2061int
2062#ifdef USE_PURE_BISON
cea2e8a9 2063Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
864dbfa3 2064#else
cea2e8a9 2065Perl_yylex(pTHX)
864dbfa3 2066#endif
378cc40b 2067{
11343788 2068 dTHR;
79072805 2069 register char *s;
378cc40b 2070 register char *d;
79072805 2071 register I32 tmp;
463ee0b2 2072 STRLEN len;
161b471a
NIS
2073 GV *gv = Nullgv;
2074 GV **gvp = 0;
a687059c 2075
a1a0e61e
TD
2076#ifdef USE_PURE_BISON
2077 yylval_pointer = lvalp;
2078 yychar_pointer = lcharp;
2079#endif
2080
02aa26ce 2081 /* check if there's an identifier for us to look at */
3280af22 2082 if (PL_pending_ident) {
02aa26ce 2083 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2084 char pit = PL_pending_ident;
2085 PL_pending_ident = 0;
bbce6d69 2086
02aa26ce
NT
2087 /* if we're in a my(), we can't allow dynamics here.
2088 $foo'bar has already been turned into $foo::bar, so
2089 just check for colons.
2090
2091 if it's a legal name, the OP is a PADANY.
2092 */
3280af22 2093 if (PL_in_my) {
77ca0c92 2094 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2095 if (strchr(PL_tokenbuf,':'))
2096 yyerror(Perl_form(aTHX_ "No package name allowed for "
2097 "variable %s in \"our\"",
2098 PL_tokenbuf));
77ca0c92
LW
2099 tmp = pad_allocmy(PL_tokenbuf);
2100 }
2101 else {
2102 if (strchr(PL_tokenbuf,':'))
2103 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2104
77ca0c92
LW
2105 yylval.opval = newOP(OP_PADANY, 0);
2106 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2107 return PRIVATEREF;
2108 }
bbce6d69 2109 }
2110
02aa26ce
NT
2111 /*
2112 build the ops for accesses to a my() variable.
2113
2114 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2115 then used in a comparison. This catches most, but not
2116 all cases. For instance, it catches
2117 sort { my($a); $a <=> $b }
2118 but not
2119 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2120 (although why you'd do that is anyone's guess).
2121 */
2122
3280af22 2123 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2124#ifdef USE_THREADS
54b9620d 2125 /* Check for single character per-thread SVs */
3280af22
NIS
2126 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2127 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2128 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2129 {
2faa37cc 2130 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2131 yylval.opval->op_targ = tmp;
2132 return PRIVATEREF;
2133 }
2134#endif /* USE_THREADS */
3280af22 2135 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2136 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2137 /* might be an "our" variable" */
f472eb5c 2138 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2139 /* build ops for a bareword */
f472eb5c
GS
2140 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2141 sv_catpvn(sym, "::", 2);
2142 sv_catpv(sym, PL_tokenbuf+1);
2143 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2144 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2145 gv_fetchpv(SvPVX(sym),
77ca0c92 2146 (PL_in_eval
f472eb5c
GS
2147 ? (GV_ADDMULTI | GV_ADDINEVAL)
2148 : TRUE
77ca0c92
LW
2149 ),
2150 ((PL_tokenbuf[0] == '$') ? SVt_PV
2151 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2152 : SVt_PVHV));
2153 return WORD;
2154 }
2155
02aa26ce 2156 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2157 if (PL_last_lop_op == OP_SORT &&
2158 PL_tokenbuf[0] == '$' &&
2159 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2160 && !PL_tokenbuf[2])
bbce6d69 2161 {
3280af22
NIS
2162 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2163 d < PL_bufend && *d != '\n';
a863c7d1
MB
2164 d++)
2165 {
2166 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2167 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2168 PL_tokenbuf);
a863c7d1 2169 }
bbce6d69 2170 }
2171 }
bbce6d69 2172
a863c7d1
MB
2173 yylval.opval = newOP(OP_PADANY, 0);
2174 yylval.opval->op_targ = tmp;
2175 return PRIVATEREF;
2176 }
bbce6d69 2177 }
2178
02aa26ce
NT
2179 /*
2180 Whine if they've said @foo in a doublequoted string,
2181 and @foo isn't a variable we can find in the symbol
2182 table.
2183 */
3280af22
NIS
2184 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2185 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2186 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2187 && ckWARN(WARN_AMBIGUOUS))
2188 {
2189 /* Downgraded from fatal to warning 20000522 mjd */
2190 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2191 "Possible unintended interpolation of %s in string",
2192 PL_tokenbuf);
2193 }
bbce6d69 2194 }
2195
02aa26ce 2196 /* build ops for a bareword */
3280af22 2197 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2198 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2199 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2200 ((PL_tokenbuf[0] == '$') ? SVt_PV
2201 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 2202 : SVt_PVHV));
2203 return WORD;
2204 }
2205
02aa26ce
NT
2206 /* no identifier pending identification */
2207
3280af22 2208 switch (PL_lex_state) {
79072805
LW
2209#ifdef COMMENTARY
2210 case LEX_NORMAL: /* Some compilers will produce faster */
2211 case LEX_INTERPNORMAL: /* code if we comment these out. */
2212 break;
2213#endif
2214
09bef843 2215 /* when we've already built the next token, just pull it out of the queue */
79072805 2216 case LEX_KNOWNEXT:
3280af22
NIS
2217 PL_nexttoke--;
2218 yylval = PL_nextval[PL_nexttoke];
2219 if (!PL_nexttoke) {
2220 PL_lex_state = PL_lex_defer;
2221 PL_expect = PL_lex_expect;
2222 PL_lex_defer = LEX_NORMAL;
463ee0b2 2223 }
3280af22 2224 return(PL_nexttype[PL_nexttoke]);
79072805 2225
02aa26ce 2226 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2227 when we get here, PL_bufptr is at the \
02aa26ce 2228 */
79072805
LW
2229 case LEX_INTERPCASEMOD:
2230#ifdef DEBUGGING
3280af22 2231 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2232 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2233#endif
02aa26ce 2234 /* handle \E or end of string */
3280af22 2235 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2236 char oldmod;
02aa26ce
NT
2237
2238 /* if at a \E */
3280af22
NIS
2239 if (PL_lex_casemods) {
2240 oldmod = PL_lex_casestack[--PL_lex_casemods];
2241 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2242
3280af22
NIS
2243 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2244 PL_bufptr += 2;
2245 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2246 }
79072805
LW
2247 return ')';
2248 }
3280af22
NIS
2249 if (PL_bufptr != PL_bufend)
2250 PL_bufptr += 2;
2251 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2252 return yylex();
79072805
LW
2253 }
2254 else {
3280af22 2255 s = PL_bufptr + 1;
79072805
LW
2256 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2257 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2258 if (strchr("LU", *s) &&
3280af22 2259 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2260 {
3280af22 2261 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2262 return ')';
2263 }
3280af22
NIS
2264 if (PL_lex_casemods > 10) {
2265 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2266 if (newlb != PL_lex_casestack) {
a0d0e21e 2267 SAVEFREEPV(newlb);
3280af22 2268 PL_lex_casestack = newlb;
a0d0e21e
LW
2269 }
2270 }
3280af22
NIS
2271 PL_lex_casestack[PL_lex_casemods++] = *s;
2272 PL_lex_casestack[PL_lex_casemods] = '\0';
2273 PL_lex_state = LEX_INTERPCONCAT;
2274 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2275 force_next('(');
2276 if (*s == 'l')
3280af22 2277 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2278 else if (*s == 'u')
3280af22 2279 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2280 else if (*s == 'L')
3280af22 2281 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2282 else if (*s == 'U')
3280af22 2283 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2284 else if (*s == 'Q')
3280af22 2285 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2286 else
cea2e8a9 2287 Perl_croak(aTHX_ "panic: yylex");
3280af22 2288 PL_bufptr = s + 1;
79072805 2289 force_next(FUNC);
3280af22
NIS
2290 if (PL_lex_starts) {
2291 s = PL_bufptr;
2292 PL_lex_starts = 0;
79072805
LW
2293 Aop(OP_CONCAT);
2294 }
2295 else
cea2e8a9 2296 return yylex();
79072805
LW
2297 }
2298
55497cff 2299 case LEX_INTERPPUSH:
2300 return sublex_push();
2301
79072805 2302 case LEX_INTERPSTART:
3280af22 2303 if (PL_bufptr == PL_bufend)
79072805 2304 return sublex_done();
3280af22
NIS
2305 PL_expect = XTERM;
2306 PL_lex_dojoin = (*PL_bufptr == '@');
2307 PL_lex_state = LEX_INTERPNORMAL;
2308 if (PL_lex_dojoin) {
2309 PL_nextval[PL_nexttoke].ival = 0;
79072805 2310 force_next(',');
554b3eca 2311#ifdef USE_THREADS
533c011a
NIS
2312 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2313 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2314 force_next(PRIVATEREF);
2315#else
a0d0e21e 2316 force_ident("\"", '$');
554b3eca 2317#endif /* USE_THREADS */
3280af22 2318 PL_nextval[PL_nexttoke].ival = 0;
79072805 2319 force_next('$');
3280af22 2320 PL_nextval[PL_nexttoke].ival = 0;
79072805 2321 force_next('(');
3280af22 2322 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2323 force_next(FUNC);
2324 }
3280af22
NIS
2325 if (PL_lex_starts++) {
2326 s = PL_bufptr;
79072805
LW
2327 Aop(OP_CONCAT);
2328 }
cea2e8a9 2329 return yylex();
79072805
LW
2330
2331 case LEX_INTERPENDMAYBE:
3280af22
NIS
2332 if (intuit_more(PL_bufptr)) {
2333 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2334 break;
2335 }
2336 /* FALL THROUGH */
2337
2338 case LEX_INTERPEND:
3280af22
NIS
2339 if (PL_lex_dojoin) {
2340 PL_lex_dojoin = FALSE;
2341 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2342 return ')';
2343 }
43a16006 2344 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2345 && SvEVALED(PL_lex_repl))
43a16006 2346 {
e9fa98b2 2347 if (PL_bufptr != PL_bufend)
cea2e8a9 2348 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2349 PL_lex_repl = Nullsv;
2350 }
79072805
LW
2351 /* FALLTHROUGH */
2352 case LEX_INTERPCONCAT:
2353#ifdef DEBUGGING
3280af22 2354 if (PL_lex_brackets)
cea2e8a9 2355 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2356#endif
3280af22 2357 if (PL_bufptr == PL_bufend)
79072805
LW
2358 return sublex_done();
2359
3280af22
NIS
2360 if (SvIVX(PL_linestr) == '\'') {
2361 SV *sv = newSVsv(PL_linestr);
2362 if (!PL_lex_inpat)
76e3520e 2363 sv = tokeq(sv);
3280af22 2364 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2365 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2366 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2367 s = PL_bufend;
79072805
LW
2368 }
2369 else {
3280af22 2370 s = scan_const(PL_bufptr);
79072805 2371 if (*s == '\\')
3280af22 2372 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2373 else
3280af22 2374 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2375 }
2376
3280af22
NIS
2377 if (s != PL_bufptr) {
2378 PL_nextval[PL_nexttoke] = yylval;
2379 PL_expect = XTERM;
79072805 2380 force_next(THING);
3280af22 2381 if (PL_lex_starts++)
79072805
LW
2382 Aop(OP_CONCAT);
2383 else {
3280af22 2384 PL_bufptr = s;
cea2e8a9 2385 return yylex();
79072805
LW
2386 }
2387 }
2388
cea2e8a9 2389 return yylex();
a0d0e21e 2390 case LEX_FORMLINE:
3280af22
NIS
2391 PL_lex_state = LEX_NORMAL;
2392 s = scan_formline(PL_bufptr);
2393 if (!PL_lex_formbrack)
a0d0e21e
LW
2394 goto rightbracket;
2395 OPERATOR(';');
79072805
LW
2396 }
2397
3280af22
NIS
2398 s = PL_bufptr;
2399 PL_oldoldbufptr = PL_oldbufptr;
2400 PL_oldbufptr = s;
79072805 2401 DEBUG_p( {
bf49b057
GS
2402 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2403 exp_name[PL_expect], s);
79072805 2404 } )
463ee0b2
LW
2405
2406 retry:
378cc40b
LW
2407 switch (*s) {
2408 default:
7e2040f0 2409 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2410 goto keylookup;
cea2e8a9 2411 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2412 case 4:
2413 case 26:
2414 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2415 case 0:
3280af22
NIS
2416 if (!PL_rsfp) {
2417 PL_last_uni = 0;
2418 PL_last_lop = 0;
2419 if (PL_lex_brackets)
d98d5fff 2420 yyerror("Missing right curly or square bracket");
79072805 2421 TOKEN(0);
463ee0b2 2422 }
3280af22 2423 if (s++ < PL_bufend)
a687059c 2424 goto retry; /* ignore stray nulls */
3280af22
NIS
2425 PL_last_uni = 0;
2426 PL_last_lop = 0;
2427 if (!PL_in_eval && !PL_preambled) {
2428 PL_preambled = TRUE;
2429 sv_setpv(PL_linestr,incl_perldb());
2430 if (SvCUR(PL_linestr))
2431 sv_catpv(PL_linestr,";");
2432 if (PL_preambleav){
2433 while(AvFILLp(PL_preambleav) >= 0) {
2434 SV *tmpsv = av_shift(PL_preambleav);
2435 sv_catsv(PL_linestr, tmpsv);
2436 sv_catpv(PL_linestr, ";");
91b7def8 2437 sv_free(tmpsv);
2438 }
3280af22
NIS
2439 sv_free((SV*)PL_preambleav);
2440 PL_preambleav = NULL;
91b7def8 2441 }
3280af22
NIS
2442 if (PL_minus_n || PL_minus_p) {
2443 sv_catpv(PL_linestr, "LINE: while (<>) {");
2444 if (PL_minus_l)
2445 sv_catpv(PL_linestr,"chomp;");
2446 if (PL_minus_a) {
8fd239a7
CS
2447 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2448 if (gv)
2449 GvIMPORTED_AV_on(gv);
3280af22
NIS
2450 if (PL_minus_F) {
2451 if (strchr("/'\"", *PL_splitstr)
2452 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2453 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2454 else {
2455 char delim;
2456 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2457 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2458 delim = *s;
cea2e8a9 2459 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2460 "q" + (delim == '\''), delim);
3280af22 2461 for (s = PL_splitstr; *s; s++) {
54310121 2462 if (*s == '\\')
3280af22
NIS
2463 sv_catpvn(PL_linestr, "\\", 1);
2464 sv_catpvn(PL_linestr, s, 1);
54310121 2465 }
cea2e8a9 2466 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2467 }
2304df62
AD
2468 }
2469 else
3280af22 2470 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2471 }
79072805 2472 }
3280af22
NIS
2473 sv_catpv(PL_linestr, "\n");
2474 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2475 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2476 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2477 SV *sv = NEWSV(85,0);
2478
2479 sv_upgrade(sv, SVt_PVMG);
3280af22 2480 sv_setsv(sv,PL_linestr);
57843af0 2481 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2482 }
79072805 2483 goto retry;
a687059c 2484 }
e929a76b 2485 do {
01ec43d0
GS
2486 bool bof;
2487 bof = PL_rsfp && (PerlIO_tell(PL_rsfp) == 0); /* *Before* read! */
dea0fc0b
JH
2488 s = filter_gets(PL_linestr, PL_rsfp, 0);
2489 if (s == Nullch) {
e929a76b 2490 fake_eof:
3280af22
NIS
2491 if (PL_rsfp) {
2492 if (PL_preprocess && !PL_in_eval)
2493 (void)PerlProc_pclose(PL_rsfp);
2494 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2495 PerlIO_clearerr(PL_rsfp);
395c3793 2496 else
3280af22
NIS
2497 (void)PerlIO_close(PL_rsfp);
2498 PL_rsfp = Nullfp;
4a9ae47a 2499 PL_doextract = FALSE;
395c3793 2500 }
3280af22
NIS
2501 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2502 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2503 sv_catpv(PL_linestr,";}");
2504 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2505 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2506 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2507 goto retry;
2508 }
3280af22
NIS
2509 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2510 sv_setpv(PL_linestr,"");
79072805 2511 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
dea0fc0b
JH
2512 } else if (bof) {
2513 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2514 s = swallow_bom((U8*)s);
378cc40b 2515 }
3280af22 2516 if (PL_doextract) {
a0d0e21e 2517 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2518 PL_doextract = FALSE;
a0d0e21e
LW
2519
2520 /* Incest with pod. */
2521 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2522 sv_setpv(PL_linestr, "");
2523 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2524 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2525 PL_doextract = FALSE;
a0d0e21e 2526 }
b250498f 2527 }
463ee0b2 2528 incline(s);
3280af22
NIS
2529 } while (PL_doextract);
2530 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2531 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2532 SV *sv = NEWSV(85,0);
a687059c 2533
93a17b20 2534 sv_upgrade(sv, SVt_PVMG);
3280af22 2535 sv_setsv(sv,PL_linestr);
57843af0 2536 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2537 }
3280af22 2538 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2539 if (CopLINE(PL_curcop) == 1) {
3280af22 2540 while (s < PL_bufend && isSPACE(*s))
79072805 2541 s++;
a0d0e21e 2542 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2543 s++;
44a8e56a 2544 d = Nullch;
3280af22 2545 if (!PL_in_eval) {
44a8e56a 2546 if (*s == '#' && *(s+1) == '!')
2547 d = s + 2;
2548#ifdef ALTERNATE_SHEBANG
2549 else {
2550 static char as[] = ALTERNATE_SHEBANG;
2551 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2552 d = s + (sizeof(as) - 1);
2553 }
2554#endif /* ALTERNATE_SHEBANG */
2555 }
2556 if (d) {
b8378b72 2557 char *ipath;
774d564b 2558 char *ipathend;
b8378b72 2559
774d564b 2560 while (isSPACE(*d))
b8378b72
CS
2561 d++;
2562 ipath = d;
774d564b 2563 while (*d && !isSPACE(*d))
2564 d++;
2565 ipathend = d;
2566
2567#ifdef ARG_ZERO_IS_SCRIPT
2568 if (ipathend > ipath) {
2569 /*
2570 * HP-UX (at least) sets argv[0] to the script name,
2571 * which makes $^X incorrect. And Digital UNIX and Linux,
2572 * at least, set argv[0] to the basename of the Perl
2573 * interpreter. So, having found "#!", we'll set it right.
2574 */
2575 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2576 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2577 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2578 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2579 SvSETMAGIC(x);
2580 }
774d564b 2581 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2582 }
774d564b 2583#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2584
2585 /*
2586 * Look for options.
2587 */
748a9306 2588 d = instr(s,"perl -");
84e30d1a 2589 if (!d) {
748a9306 2590 d = instr(s,"perl");
84e30d1a
GS
2591#if defined(DOSISH)
2592 /* avoid getting into infinite loops when shebang
2593 * line contains "Perl" rather than "perl" */
2594 if (!d) {
2595 for (d = ipathend-4; d >= ipath; --d) {
2596 if ((*d == 'p' || *d == 'P')
2597 && !ibcmp(d, "perl", 4))
2598 {
2599 break;
2600 }
2601 }
2602 if (d < ipath)
2603 d = Nullch;
2604 }
2605#endif
2606 }
44a8e56a 2607#ifdef ALTERNATE_SHEBANG
2608 /*
2609 * If the ALTERNATE_SHEBANG on this system starts with a
2610 * character that can be part of a Perl expression, then if
2611 * we see it but not "perl", we're probably looking at the
2612 * start of Perl code, not a request to hand off to some
2613 * other interpreter. Similarly, if "perl" is there, but
2614 * not in the first 'word' of the line, we assume the line
2615 * contains the start of the Perl program.
44a8e56a 2616 */
2617 if (d && *s != '#') {
774d564b 2618 char *c = ipath;
44a8e56a 2619 while (*c && !strchr("; \t\r\n\f\v#", *c))
2620 c++;
2621 if (c < d)
2622 d = Nullch; /* "perl" not in first word; ignore */
2623 else
2624 *s = '#'; /* Don't try to parse shebang line */
2625 }
774d564b 2626#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2627#ifndef MACOS_TRADITIONAL
748a9306 2628 if (!d &&
44a8e56a 2629 *s == '#' &&
774d564b 2630 ipathend > ipath &&
3280af22 2631 !PL_minus_c &&
748a9306 2632 !instr(s,"indir") &&
3280af22 2633 instr(PL_origargv[0],"perl"))
748a9306 2634 {
9f68db38 2635 char **newargv;
9f68db38 2636
774d564b 2637 *ipathend = '\0';
2638 s = ipathend + 1;
3280af22 2639 while (s < PL_bufend && isSPACE(*s))
9f68db38 2640 s++;
3280af22
NIS
2641 if (s < PL_bufend) {
2642 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2643 newargv[1] = s;
3280af22 2644 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2645 s++;
2646 *s = '\0';
3280af22 2647 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2648 }
2649 else
3280af22 2650 newargv = PL_origargv;
774d564b 2651 newargv[0] = ipath;
80252599 2652 PerlProc_execv(ipath, newargv);
cea2e8a9 2653 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2654 }
bf4acbe4 2655#endif
748a9306 2656 if (d) {
3280af22
NIS
2657 U32 oldpdb = PL_perldb;
2658 bool oldn = PL_minus_n;
2659 bool oldp = PL_minus_p;
748a9306
LW
2660
2661 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2662 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2663
2664 if (*d++ == '-') {
8cc95fdb 2665 do {
2666 if (*d == 'M' || *d == 'm') {
2667 char *m = d;
2668 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2669 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2670 (int)(d - m), m);
2671 }
2672 d = moreswitches(d);
2673 } while (d);
155aba94
GS
2674 if ((PERLDB_LINE && !oldpdb) ||
2675 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2676 /* if we have already added "LINE: while (<>) {",
2677 we must not do it again */
748a9306 2678 {
3280af22
NIS
2679 sv_setpv(PL_linestr, "");
2680 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2681 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2682 PL_preambled = FALSE;
84902520 2683 if (PERLDB_LINE)
3280af22 2684 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2685 goto retry;
2686 }
a0d0e21e 2687 }
79072805 2688 }
9f68db38 2689 }
79072805 2690 }
3280af22
NIS
2691 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2692 PL_bufptr = s;
2693 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2694 return yylex();
ae986130 2695 }
378cc40b 2696 goto retry;
4fdae800 2697 case '\r':
6a27c188 2698#ifdef PERL_STRICT_CR
cea2e8a9
GS
2699 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2700 Perl_croak(aTHX_
cc507455 2701 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2702#endif
4fdae800 2703 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2704#ifdef MACOS_TRADITIONAL
2705 case '\312':
2706#endif
378cc40b
LW
2707 s++;
2708 goto retry;
378cc40b 2709 case '#':
e929a76b 2710 case '\n':
3280af22 2711 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2712 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2713 /* handle eval qq[#line 1 "foo"\n ...] */
2714 CopLINE_dec(PL_curcop);
2715 incline(s);
2716 }
3280af22 2717 d = PL_bufend;
a687059c 2718 while (s < d && *s != '\n')
378cc40b 2719 s++;
0f85fab0 2720 if (s < d)
378cc40b 2721 s++;
463ee0b2 2722 incline(s);
3280af22
NIS
2723 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2724 PL_bufptr = s;
2725 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2726 return yylex();
a687059c 2727 }
378cc40b 2728 }
a687059c 2729 else {
378cc40b 2730 *s = '\0';
3280af22 2731 PL_bufend = s;
a687059c 2732 }
378cc40b
LW
2733 goto retry;
2734 case '-':
79072805 2735 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2736 s++;
3280af22 2737 PL_bufptr = s;
748a9306
LW
2738 tmp = *s++;
2739
bf4acbe4 2740 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2741 s++;
2742
2743 if (strnEQ(s,"=>",2)) {
3280af22 2744 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2745 OPERATOR('-'); /* unary minus */
2746 }
3280af22
NIS
2747 PL_last_uni = PL_oldbufptr;
2748 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2749 switch (tmp) {
79072805
LW
2750 case 'r': FTST(OP_FTEREAD);
2751 case 'w': FTST(OP_FTEWRITE);
2752 case 'x': FTST(OP_FTEEXEC);
2753 case 'o': FTST(OP_FTEOWNED);
2754 case 'R': FTST(OP_FTRREAD);
2755 case 'W': FTST(OP_FTRWRITE);
2756 case 'X': FTST(OP_FTREXEC);
2757 case 'O': FTST(OP_FTROWNED);
2758 case 'e': FTST(OP_FTIS);
2759 case 'z': FTST(OP_FTZERO);
2760 case 's': FTST(OP_FTSIZE);
2761 case 'f': FTST(OP_FTFILE);
2762 case 'd': FTST(OP_FTDIR);
2763 case 'l': FTST(OP_FTLINK);
2764 case 'p': FTST(OP_FTPIPE);
2765 case 'S': FTST(OP_FTSOCK);
2766 case 'u': FTST(OP_FTSUID);
2767 case 'g': FTST(OP_FTSGID);
2768 case 'k': FTST(OP_FTSVTX);
2769 case 'b': FTST(OP_FTBLK);
2770 case 'c': FTST(OP_FTCHR);
2771 case 't': FTST(OP_FTTTY);
2772 case 'T': FTST(OP_FTTEXT);
2773 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2774 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2775 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2776 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2777 default:
cea2e8a9 2778 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2779 break;
2780 }
2781 }
a687059c
LW
2782 tmp = *s++;
2783 if (*s == tmp) {
2784 s++;
3280af22 2785 if (PL_expect == XOPERATOR)
79072805
LW
2786 TERM(POSTDEC);
2787 else
2788 OPERATOR(PREDEC);
2789 }
2790 else if (*s == '>') {
2791 s++;
2792 s = skipspace(s);
7e2040f0 2793 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2794 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2795 TOKEN(ARROW);
79072805 2796 }
748a9306
LW
2797 else if (*s == '$')
2798 OPERATOR(ARROW);
463ee0b2 2799 else
748a9306 2800 TERM(ARROW);
a687059c 2801 }
3280af22 2802 if (PL_expect == XOPERATOR)
79072805
LW
2803 Aop(OP_SUBTRACT);
2804 else {
3280af22 2805 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2806 check_uni();
79072805 2807 OPERATOR('-'); /* unary minus */
2f3197b3 2808 }
79072805 2809
378cc40b 2810 case '+':
a687059c
LW
2811 tmp = *s++;
2812 if (*s == tmp) {
378cc40b 2813 s++;
3280af22 2814 if (PL_expect == XOPERATOR)
79072805
LW
2815 TERM(POSTINC);
2816 else
2817 OPERATOR(PREINC);
378cc40b 2818 }
3280af22 2819 if (PL_expect == XOPERATOR)
79072805
LW
2820 Aop(OP_ADD);
2821 else {
3280af22 2822 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2823 check_uni();
a687059c 2824 OPERATOR('+');
2f3197b3 2825 }
a687059c 2826
378cc40b 2827 case '*':
3280af22
NIS
2828 if (PL_expect != XOPERATOR) {
2829 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2830 PL_expect = XOPERATOR;
2831 force_ident(PL_tokenbuf, '*');
2832 if (!*PL_tokenbuf)
a0d0e21e 2833 PREREF('*');
79072805 2834 TERM('*');
a687059c 2835 }
79072805
LW
2836 s++;
2837 if (*s == '*') {
a687059c 2838 s++;
79072805 2839 PWop(OP_POW);
a687059c 2840 }
79072805
LW
2841 Mop(OP_MULTIPLY);
2842
378cc40b 2843 case '%':
3280af22 2844 if (PL_expect == XOPERATOR) {
bbce6d69 2845 ++s;
2846 Mop(OP_MODULO);
a687059c 2847 }
3280af22
NIS
2848 PL_tokenbuf[0] = '%';
2849 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2850 if (!PL_tokenbuf[1]) {
2851 if (s == PL_bufend)
bbce6d69 2852 yyerror("Final % should be \\% or %name");
2853 PREREF('%');
a687059c 2854 }
3280af22 2855 PL_pending_ident = '%';
bbce6d69 2856 TERM('%');
a687059c 2857
378cc40b 2858 case '^':
79072805 2859 s++;
a0d0e21e 2860 BOop(OP_BIT_XOR);
79072805 2861 case '[':
3280af22 2862 PL_lex_brackets++;
79072805 2863 /* FALL THROUGH */
378cc40b 2864 case '~':
378cc40b 2865 case ',':
378cc40b
LW
2866 tmp = *s++;
2867 OPERATOR(tmp);
a0d0e21e
LW
2868 case ':':
2869 if (s[1] == ':') {
2870 len = 0;
2871 goto just_a_word;
2872 }
2873 s++;
09bef843
SB
2874 switch (PL_expect) {
2875 OP *attrs;
2876 case XOPERATOR:
2877 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2878 break;
2879 PL_bufptr = s; /* update in case we back off */
2880 goto grabattrs;
2881 case XATTRBLOCK:
2882 PL_expect = XBLOCK;
2883 goto grabattrs;
2884 case XATTRTERM:
2885 PL_expect = XTERMBLOCK;
2886 grabattrs:
2887 s = skipspace(s);
2888 attrs = Nullop;
7e2040f0 2889 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2890 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2891 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2892 if (tmp < 0) tmp = -tmp;
2893 switch (tmp) {
2894 case KEY_or:
2895 case KEY_and:
2896 case KEY_for:
2897 case KEY_unless:
2898 case KEY_if:
2899 case KEY_while:
2900 case KEY_until:
2901 goto got_attrs;
2902 default:
2903 break;
2904 }
2905 }
09bef843
SB
2906 if (*d == '(') {
2907 d = scan_str(d,TRUE,TRUE);
2908 if (!d) {
2909 if (PL_lex_stuff) {
2910 SvREFCNT_dec(PL_lex_stuff);
2911 PL_lex_stuff = Nullsv;
2912 }
2913 /* MUST advance bufptr here to avoid bogus
2914 "at end of line" context messages from yyerror().
2915 */
2916 PL_bufptr = s + len;
2917 yyerror("Unterminated attribute parameter in attribute list");
2918 if (attrs)
2919 op_free(attrs);
2920 return 0; /* EOF indicator */
2921 }
2922 }
2923 if (PL_lex_stuff) {
2924 SV *sv = newSVpvn(s, len);
2925 sv_catsv(sv, PL_lex_stuff);
2926 attrs = append_elem(OP_LIST, attrs,
2927 newSVOP(OP_CONST, 0, sv));
2928 SvREFCNT_dec(PL_lex_stuff);
2929 PL_lex_stuff = Nullsv;
2930 }
2931 else {
2932 attrs = append_elem(OP_LIST, attrs,
2933 newSVOP(OP_CONST, 0,
2934 newSVpvn(s, len)));
2935 }
2936 s = skipspace(d);
0120eecf 2937 if (*s == ':' && s[1] != ':')
09bef843 2938 s = skipspace(s+1);
0120eecf
GS
2939 else if (s == d)
2940 break; /* require real whitespace or :'s */
09bef843 2941 }
f9829d6b
GS
2942 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2943 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2944 char q = ((*s == '\'') ? '"' : '\'');
2945 /* If here for an expression, and parsed no attrs, back off. */
2946 if (tmp == '=' && !attrs) {
2947 s = PL_bufptr;
2948 break;
2949 }
2950 /* MUST advance bufptr here to avoid bogus "at end of line"
2951 context messages from yyerror().
2952 */
2953 PL_bufptr = s;
2954 if (!*s)
2955 yyerror("Unterminated attribute list");
2956 else
2957 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
2958 q, *s, q));
2959 if (attrs)
2960 op_free(attrs);
2961 OPERATOR(':');
2962 }
f9829d6b 2963 got_attrs:
09bef843
SB
2964 if (attrs) {
2965 PL_nextval[PL_nexttoke].opval = attrs;
2966 force_next(THING);
2967 }
2968 TOKEN(COLONATTR);
2969 }
a0d0e21e 2970 OPERATOR(':');
8990e307
LW
2971 case '(':
2972 s++;
3280af22
NIS
2973 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
2974 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 2975 else
3280af22 2976 PL_expect = XTERM;
a0d0e21e 2977 TOKEN('(');
378cc40b 2978 case ';':
f4dd75d9 2979 CLINE;
378cc40b
LW
2980 tmp = *s++;
2981 OPERATOR(tmp);
2982 case ')':
378cc40b 2983 tmp = *s++;
16d20bd9
AD
2984 s = skipspace(s);
2985 if (*s == '{')
2986 PREBLOCK(tmp);
378cc40b 2987 TERM(tmp);
79072805
LW
2988 case ']':
2989 s++;
3280af22 2990 if (PL_lex_brackets <= 0)
d98d5fff 2991 yyerror("Unmatched right square bracket");
463ee0b2 2992 else
3280af22
NIS
2993 --PL_lex_brackets;
2994 if (PL_lex_state == LEX_INTERPNORMAL) {
2995 if (PL_lex_brackets == 0) {
a0d0e21e 2996 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 2997 PL_lex_state = LEX_INTERPEND;
79072805
LW
2998 }
2999 }
4633a7c4 3000 TERM(']');
79072805
LW
3001 case '{':
3002 leftbracket:
79072805 3003 s++;
3280af22
NIS
3004 if (PL_lex_brackets > 100) {
3005 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3006 if (newlb != PL_lex_brackstack) {
8990e307 3007 SAVEFREEPV(newlb);
3280af22 3008 PL_lex_brackstack = newlb;
8990e307
LW
3009 }
3010 }
3280af22 3011 switch (PL_expect) {
a0d0e21e 3012 case XTERM:
3280af22 3013 if (PL_lex_formbrack) {
a0d0e21e
LW
3014 s--;
3015 PRETERMBLOCK(DO);
3016 }
3280af22
NIS
3017 if (PL_oldoldbufptr == PL_last_lop)
3018 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3019 else
3280af22 3020 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3021 OPERATOR(HASHBRACK);
a0d0e21e 3022 case XOPERATOR:
bf4acbe4 3023 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3024 s++;
44a8e56a 3025 d = s;
3280af22
NIS
3026 PL_tokenbuf[0] = '\0';
3027 if (d < PL_bufend && *d == '-') {
3028 PL_tokenbuf[0] = '-';
44a8e56a 3029 d++;
bf4acbe4 3030 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3031 d++;
3032 }
7e2040f0 3033 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3034 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3035 FALSE, &len);
bf4acbe4 3036 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3037 d++;
3038 if (*d == '}') {
3280af22 3039 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3040 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3041 if (minus)
3042 force_next('-');
748a9306
LW
3043 }
3044 }
3045 /* FALL THROUGH */
09bef843 3046 case XATTRBLOCK:
748a9306 3047 case XBLOCK:
3280af22
NIS
3048 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3049 PL_expect = XSTATE;
a0d0e21e 3050 break;
09bef843 3051 case XATTRTERM:
a0d0e21e 3052 case XTERMBLOCK:
3280af22
NIS
3053 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3054 PL_expect = XSTATE;
a0d0e21e
LW
3055 break;
3056 default: {
3057 char *t;
3280af22
NIS
3058 if (PL_oldoldbufptr == PL_last_lop)
3059 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3060 else
3280af22 3061 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3062 s = skipspace(s);
09ecc4b6 3063 if (*s == '}')
a0d0e21e 3064 OPERATOR(HASHBRACK);
b8a4b1be
GS
3065 /* This hack serves to disambiguate a pair of curlies
3066 * as being a block or an anon hash. Normally, expectation
3067 * determines that, but in cases where we're not in a
3068 * position to expect anything in particular (like inside
3069 * eval"") we have to resolve the ambiguity. This code
3070 * covers the case where the first term in the curlies is a
3071 * quoted string. Most other cases need to be explicitly
3072 * disambiguated by prepending a `+' before the opening
3073 * curly in order to force resolution as an anon hash.
3074 *
3075 * XXX should probably propagate the outer expectation
3076 * into eval"" to rely less on this hack, but that could
3077 * potentially break current behavior of eval"".
3078 * GSAR 97-07-21
3079 */
3080 t = s;
3081 if (*s == '\'' || *s == '"' || *s == '`') {
3082 /* common case: get past first string, handling escapes */
3280af22 3083 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3084 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3085 t++;
3086 t++;
a0d0e21e 3087 }
b8a4b1be 3088 else if (*s == 'q') {
3280af22 3089 if (++t < PL_bufend
b8a4b1be 3090 && (!isALNUM(*t)
3280af22 3091 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3092 && !isALNUM(*t))))
3093 {
b8a4b1be
GS
3094 char *tmps;
3095 char open, close, term;
3096 I32 brackets = 1;
3097
3280af22 3098 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3099 t++;
3100 term = *t;
3101 open = term;
3102 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3103 term = tmps[5];
3104 close = term;
3105 if (open == close)
3280af22
NIS
3106 for (t++; t < PL_bufend; t++) {
3107 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3108 t++;
6d07e5e9 3109 else if (*t == open)
b8a4b1be
GS
3110 break;
3111 }
3112 else
3280af22
NIS
3113 for (t++; t < PL_bufend; t++) {
3114 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3115 t++;
6d07e5e9 3116 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3117 break;
3118 else if (*t == open)
3119 brackets++;
3120 }
3121 }
3122 t++;
a0d0e21e 3123 }
7e2040f0 3124 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3125 t += UTF8SKIP(t);
7e2040f0 3126 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3127 t += UTF8SKIP(t);
a0d0e21e 3128 }
3280af22 3129 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3130 t++;
b8a4b1be
GS
3131 /* if comma follows first term, call it an anon hash */
3132 /* XXX it could be a comma expression with loop modifiers */
3280af22 3133 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3134 || (*t == '=' && t[1] == '>')))
a0d0e21e 3135 OPERATOR(HASHBRACK);
3280af22 3136 if (PL_expect == XREF)
4e4e412b 3137 PL_expect = XTERM;
a0d0e21e 3138 else {
3280af22
NIS
3139 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3140 PL_expect = XSTATE;
a0d0e21e 3141 }
8990e307 3142 }
a0d0e21e 3143 break;
463ee0b2 3144 }
57843af0 3145 yylval.ival = CopLINE(PL_curcop);
79072805 3146 if (isSPACE(*s) || *s == '#')
3280af22 3147 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3148 TOKEN('{');
378cc40b 3149 case '}':
79072805
LW
3150 rightbracket:
3151 s++;
3280af22 3152 if (PL_lex_brackets <= 0)
d98d5fff 3153 yyerror("Unmatched right curly bracket");
463ee0b2 3154 else
3280af22 3155 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3156 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3157 PL_lex_formbrack = 0;
3158 if (PL_lex_state == LEX_INTERPNORMAL) {
3159 if (PL_lex_brackets == 0) {
9059aa12
LW
3160 if (PL_expect & XFAKEBRACK) {
3161 PL_expect &= XENUMMASK;
3280af22
NIS
3162 PL_lex_state = LEX_INTERPEND;
3163 PL_bufptr = s;
cea2e8a9 3164 return yylex(); /* ignore fake brackets */
79072805 3165 }
fa83b5b6 3166 if (*s == '-' && s[1] == '>')
3280af22 3167 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3168 else if (*s != '[' && *s != '{')
3280af22 3169 PL_lex_state = LEX_INTERPEND;
79072805
LW
3170 }
3171 }
9059aa12
LW
3172 if (PL_expect & XFAKEBRACK) {
3173 PL_expect &= XENUMMASK;
3280af22 3174 PL_bufptr = s;
cea2e8a9 3175 return yylex(); /* ignore fake brackets */
748a9306 3176 }
79072805
LW
3177 force_next('}');
3178 TOKEN(';');
378cc40b
LW
3179 case '&':
3180 s++;
3181 tmp = *s++;
3182 if (tmp == '&')
a0d0e21e 3183 AOPERATOR(ANDAND);
378cc40b 3184 s--;
3280af22 3185 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3186 if (ckWARN(WARN_SEMICOLON)
3187 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3188 {
57843af0 3189 CopLINE_dec(PL_curcop);
cea2e8a9 3190 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3191 CopLINE_inc(PL_curcop);
463ee0b2 3192 }
79072805 3193 BAop(OP_BIT_AND);
463ee0b2 3194 }
79072805 3195
3280af22
NIS
3196 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3197 if (*PL_tokenbuf) {
3198 PL_expect = XOPERATOR;
3199 force_ident(PL_tokenbuf, '&');
463ee0b2 3200 }
79072805
LW
3201 else
3202 PREREF('&');
c07a80fd 3203 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3204 TERM('&');
3205
378cc40b
LW
3206 case '|':
3207 s++;
3208 tmp = *s++;
3209 if (tmp == '|')
a0d0e21e 3210 AOPERATOR(OROR);
378cc40b 3211 s--;
79072805 3212 BOop(OP_BIT_OR);
378cc40b
LW
3213 case '=':
3214 s++;
3215 tmp = *s++;
3216 if (tmp == '=')
79072805
LW
3217 Eop(OP_EQ);
3218 if (tmp == '>')
3219 OPERATOR(',');
378cc40b 3220 if (tmp == '~')
79072805 3221 PMop(OP_MATCH);
599cee73 3222 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3223 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3224 s--;
3280af22
NIS
3225 if (PL_expect == XSTATE && isALPHA(tmp) &&
3226 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3227 {
3280af22
NIS
3228 if (PL_in_eval && !PL_rsfp) {
3229 d = PL_bufend;
a5f75d66
AD
3230 while (s < d) {
3231 if (*s++ == '\n') {
3232 incline(s);
3233 if (strnEQ(s,"=cut",4)) {
3234 s = strchr(s,'\n');
3235 if (s)
3236 s++;
3237 else
3238 s = d;
3239 incline(s);
3240 goto retry;
3241 }
3242 }
3243 }
3244 goto retry;
3245 }
3280af22
NIS
3246 s = PL_bufend;
3247 PL_doextract = TRUE;
a0d0e21e
LW
3248 goto retry;
3249 }
3280af22 3250 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3251 char *t;
51882d45 3252#ifdef PERL_STRICT_CR
bf4acbe4 3253 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3254#else
bf4acbe4 3255 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3256#endif
a0d0e21e
LW
3257 if (*t == '\n' || *t == '#') {
3258 s--;
3280af22 3259 PL_expect = XBLOCK;
a0d0e21e
LW
3260 goto leftbracket;
3261 }
79072805 3262 }
a0d0e21e
LW
3263 yylval.ival = 0;
3264 OPERATOR(ASSIGNOP);
378cc40b
LW
3265 case '!':
3266 s++;
3267 tmp = *s++;
3268 if (tmp == '=')
79072805 3269 Eop(OP_NE);
378cc40b 3270 if (tmp == '~')
79072805 3271 PMop(OP_NOT);
378cc40b
LW
3272 s--;
3273 OPERATOR('!');
3274 case '<':
3280af22 3275 if (PL_expect != XOPERATOR) {
93a17b20 3276 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3277 check_uni();
79072805
LW
3278 if (s[1] == '<')
3279 s = scan_heredoc(s);
3280 else
3281 s = scan_inputsymbol(s);
3282 TERM(sublex_start());
378cc40b
LW
3283 }
3284 s++;
3285 tmp = *s++;
3286 if (tmp == '<')
79072805 3287 SHop(OP_LEFT_SHIFT);
395c3793
LW
3288 if (tmp == '=') {
3289 tmp = *s++;
3290 if (tmp == '>')
79072805 3291 Eop(OP_NCMP);
395c3793 3292 s--;
79072805 3293 Rop(OP_LE);
395c3793 3294 }
378cc40b 3295 s--;
79072805 3296 Rop(OP_LT);
378cc40b
LW
3297 case '>':
3298 s++;
3299 tmp = *s++;
3300 if (tmp == '>')
79072805 3301 SHop(OP_RIGHT_SHIFT);
378cc40b 3302 if (tmp == '=')
79072805 3303 Rop(OP_GE);
378cc40b 3304 s--;
79072805 3305 Rop(OP_GT);
378cc40b
LW
3306
3307 case '$':
bbce6d69 3308 CLINE;
3309
3280af22
NIS
3310 if (PL_expect == XOPERATOR) {
3311 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3312 PL_expect = XTERM;
a0d0e21e 3313 depcom();
bbce6d69 3314 return ','; /* grandfather non-comma-format format */
a0d0e21e 3315 }
8990e307 3316 }
a0d0e21e 3317
7e2040f0 3318 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3319 PL_tokenbuf[0] = '@';
376b8730
SM
3320 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3321 sizeof PL_tokenbuf - 1, FALSE);
3322 if (PL_expect == XOPERATOR)
3323 no_op("Array length", s);
3280af22 3324 if (!PL_tokenbuf[1])
a0d0e21e 3325 PREREF(DOLSHARP);
3280af22
NIS
3326 PL_expect = XOPERATOR;
3327 PL_pending_ident = '#';
463ee0b2 3328 TOKEN(DOLSHARP);
79072805 3329 }
bbce6d69 3330
3280af22 3331 PL_tokenbuf[0] = '$';
376b8730
SM
3332 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3333 sizeof PL_tokenbuf - 1, FALSE);
3334 if (PL_expect == XOPERATOR)
3335 no_op("Scalar", s);
3280af22
NIS
3336 if (!PL_tokenbuf[1]) {
3337 if (s == PL_bufend)
bbce6d69 3338 yyerror("Final $ should be \\$ or $name");
3339 PREREF('$');
8990e307 3340 }
a0d0e21e 3341
bbce6d69 3342 /* This kludge not intended to be bulletproof. */
3280af22 3343 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3344 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3345 newSViv(PL_compiling.cop_arybase));
bbce6d69 3346 yylval.opval->op_private = OPpCONST_ARYBASE;
3347 TERM(THING);
3348 }
3349
ff68c719 3350 d = s;
69d2bceb 3351 tmp = (I32)*s;
3280af22 3352 if (PL_lex_state == LEX_NORMAL)
ff68c719 3353 s = skipspace(s);
3354
3280af22 3355 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3356 char *t;
3357 if (*s == '[') {
3280af22 3358 PL_tokenbuf[0] = '@';
599cee73 3359 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3360 for(t = s + 1;
7e2040f0 3361 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3362 t++) ;
a0d0e21e 3363 if (*t++ == ',') {
3280af22
NIS
3364 PL_bufptr = skipspace(PL_bufptr);
3365 while (t < PL_bufend && *t != ']')
bbce6d69 3366 t++;
cea2e8a9 3367 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3368 "Multidimensional syntax %.*s not supported",
3369 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3370 }
3371 }
bbce6d69 3372 }
3373 else if (*s == '{') {
3280af22 3374 PL_tokenbuf[0] = '%';
599cee73 3375 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3376 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3377 {
3280af22 3378 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3379 STRLEN len;
3380 for (t++; isSPACE(*t); t++) ;
7e2040f0 3381 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3382 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3383 for (; isSPACE(*t); t++) ;
864dbfa3 3384 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3385 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3386 "You need to quote \"%s\"", tmpbuf);
748a9306 3387 }
93a17b20
LW
3388 }
3389 }
2f3197b3 3390 }
bbce6d69 3391
3280af22 3392 PL_expect = XOPERATOR;
69d2bceb 3393 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3394 bool islop = (PL_last_lop == PL_oldoldbufptr);
3395 if (!islop || PL_last_lop_op == OP_GREPSTART)
3396 PL_expect = XOPERATOR;
bbce6d69 3397 else if (strchr("$@\"'`q", *s))
3280af22 3398 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3399 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3400 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3401 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3402 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3403 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3404 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3405 /* binary operators exclude handle interpretations */
3406 switch (tmp) {
3407 case -KEY_x:
3408 case -KEY_eq:
3409 case -KEY_ne:
3410 case -KEY_gt:
3411 case -KEY_lt:
3412 case -KEY_ge:
3413 case -KEY_le:
3414 case -KEY_cmp:
3415 break;
3416 default:
3280af22 3417 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3418 break;
3419 }
3420 }
68dc0745 3421 else {
3422 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3423 if (gv && GvCVu(gv))
3280af22 3424 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3425 }
93a17b20 3426 }
bbce6d69 3427 else if (isDIGIT(*s))
3280af22 3428 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3429 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3430 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3431 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3432 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3433 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3434 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3435 }
3280af22 3436 PL_pending_ident = '$';
79072805 3437 TOKEN('$');
378cc40b
LW
3438
3439 case '@':
3280af22 3440 if (PL_expect == XOPERATOR)
bbce6d69 3441 no_op("Array", s);
3280af22
NIS
3442 PL_tokenbuf[0] = '@';
3443 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3444 if (!PL_tokenbuf[1]) {
3445 if (s == PL_bufend)
bbce6d69 3446 yyerror("Final @ should be \\@ or @name");
3447 PREREF('@');
3448 }
3280af22 3449 if (PL_lex_state == LEX_NORMAL)
ff68c719 3450 s = skipspace(s);
3280af22 3451 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3452 if (*s == '{')
3280af22 3453 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3454
3455 /* Warn about @ where they meant $. */
599cee73 3456 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3457 if (*s == '[' || *s == '{') {
3458 char *t = s + 1;
7e2040f0 3459 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3460 t++;
3461 if (*t == '}' || *t == ']') {
3462 t++;
3280af22 3463 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3464 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3465 "Scalar value %.*s better written as $%.*s",
3280af22 3466 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3467 }
93a17b20
LW
3468 }
3469 }
463ee0b2 3470 }
3280af22 3471 PL_pending_ident = '@';
79072805 3472 TERM('@');
378cc40b
LW
3473
3474 case '/': /* may either be division or pattern */
3475 case '?': /* may either be conditional or pattern */
3280af22 3476 if (PL_expect != XOPERATOR) {
c277df42 3477 /* Disable warning on "study /blah/" */
3280af22
NIS
3478 if (PL_oldoldbufptr == PL_last_uni
3479 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3480 || memNE(PL_last_uni, "study", 5)
3481 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3482 check_uni();
8782bef2 3483 s = scan_pat(s,OP_MATCH);
79072805 3484 TERM(sublex_start());
378cc40b
LW
3485 }
3486 tmp = *s++;
a687059c 3487 if (tmp == '/')
79072805 3488 Mop(OP_DIVIDE);
378cc40b
LW
3489 OPERATOR(tmp);
3490
3491 case '.':
51882d45
GS
3492 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3493#ifdef PERL_STRICT_CR
3494 && s[1] == '\n'
3495#else
3496 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3497#endif
3498 && (s == PL_linestart || s[-1] == '\n') )
3499 {
3280af22
NIS
3500 PL_lex_formbrack = 0;
3501 PL_expect = XSTATE;
79072805
LW
3502 goto rightbracket;
3503 }
3280af22 3504 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3505 tmp = *s++;
a687059c
LW
3506 if (*s == tmp) {
3507 s++;
2f3197b3
LW
3508 if (*s == tmp) {
3509 s++;
79072805 3510 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3511 }
3512 else
79072805 3513 yylval.ival = 0;
378cc40b 3514 OPERATOR(DOTDOT);
a687059c 3515 }
3280af22 3516 if (PL_expect != XOPERATOR)
2f3197b3 3517 check_uni();
79072805 3518 Aop(OP_CONCAT);
378cc40b
LW
3519 }
3520 /* FALL THROUGH */
3521 case '0': case '1': case '2': case '3': case '4':
3522 case '5': case '6': case '7': case '8': case '9':
79072805 3523 s = scan_num(s);
3280af22 3524 if (PL_expect == XOPERATOR)
8990e307 3525 no_op("Number",s);
79072805
LW
3526 TERM(THING);
3527
3528 case '\'':
09bef843 3529 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3530 if (PL_expect == XOPERATOR) {
3531 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3532 PL_expect = XTERM;
a0d0e21e
LW
3533 depcom();
3534 return ','; /* grandfather non-comma-format format */
3535 }
463ee0b2 3536 else
8990e307 3537 no_op("String",s);
463ee0b2 3538 }
79072805 3539 if (!s)
85e6fe83 3540 missingterm((char*)0);
79072805
LW
3541 yylval.ival = OP_CONST;
3542 TERM(sublex_start());
3543
3544 case '"':
09bef843 3545 s = scan_str(s,FALSE,FALSE);
3280af22
NIS
3546 if (PL_expect == XOPERATOR) {
3547 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3548 PL_expect = XTERM;
a0d0e21e
LW
3549 depcom();
3550 return ','; /* grandfather non-comma-format format */
3551 }
463ee0b2 3552 else
8990e307 3553 no_op("String",s);
463ee0b2 3554 }
79072805 3555 if (!s)
85e6fe83 3556 missingterm((char*)0);
4633a7c4 3557 yylval.ival = OP_CONST;
3280af22 3558 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
a0ed51b3 3559 if (*d == '$' || *d == '@' || *d == '\\' || *d & 0x80) {
4633a7c4
LW
3560 yylval.ival = OP_STRINGIFY;
3561 break;
3562 }
3563 }
79072805
LW
3564 TERM(sublex_start());
3565
3566 case '`':
09bef843 3567 s = scan_str(s,FALSE,FALSE);
3280af22 3568 if (PL_expect == XOPERATOR)
8990e307 3569 no_op("Backticks",s);
79072805 3570 if (!s)
85e6fe83 3571 missingterm((char*)0);
79072805
LW
3572 yylval.ival = OP_BACKTICK;
3573 set_csh();
3574 TERM(sublex_start());
3575
3576 case '\\':
3577 s++;
599cee73 3578 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3579 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73