This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
UTF8 output prework.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
2b92dfce
GS
42/* In variables name $^X, these are the legal values for X.
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a 58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff 60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
79072805
LW
72#ifdef ff_next
73#undef ff_next
d48672a2
LW
74#endif
75
a1a0e61e 76#ifdef USE_PURE_BISON
dba4d153
JH
77# ifndef YYMAXLEVEL
78# define YYMAXLEVEL 100
79# endif
20141f0e
IRC
80YYSTYPE* yylval_pointer[YYMAXLEVEL];
81int* yychar_pointer[YYMAXLEVEL];
82int yyactlevel = 0;
22c35a8c
GS
83# undef yylval
84# undef yychar
20141f0e
IRC
85# define yylval (*yylval_pointer[yyactlevel])
86# define yychar (*yychar_pointer[yyactlevel])
87# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
88# undef yylex
dba4d153 89# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
90#endif
91
79072805 92#include "keywords.h"
fe14fcc3 93
ffb4593c
NT
94/* CLINE is a macro that ensures PL_copline has a sane value */
95
ae986130
LW
96#ifdef CLINE
97#undef CLINE
98#endif
57843af0 99#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 100
ffb4593c
NT
101/*
102 * Convenience functions to return different tokens and prime the
9cbb5ea2 103 * lexer for the next token. They all take an argument.
ffb4593c
NT
104 *
105 * TOKEN : generic token (used for '(', DOLSHARP, etc)
106 * OPERATOR : generic operator
107 * AOPERATOR : assignment operator
108 * PREBLOCK : beginning the block after an if, while, foreach, ...
109 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
110 * PREREF : *EXPR where EXPR is not a simple identifier
111 * TERM : expression term
112 * LOOPX : loop exiting command (goto, last, dump, etc)
113 * FTST : file test operator
114 * FUN0 : zero-argument function
2d2e263d 115 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
116 * BOop : bitwise or or xor
117 * BAop : bitwise and
118 * SHop : shift operator
119 * PWop : power operator
9cbb5ea2 120 * PMop : pattern-matching operator
ffb4593c
NT
121 * Aop : addition-level operator
122 * Mop : multiplication-level operator
123 * Eop : equality-testing operator
e5edeb50 124 * Rop : relational operator <= != gt
ffb4593c
NT
125 *
126 * Also see LOP and lop() below.
127 */
128
3280af22
NIS
129#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
130#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
131#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
132#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
133#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
134#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
135#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
136#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
137#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
138#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
139#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
140#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
141#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
142#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
143#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
144#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
145#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
146#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
147#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
148#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 149
a687059c
LW
150/* This bit of chicanery makes a unary function followed by
151 * a parenthesis into a function with one argument, highest precedence.
152 */
2f3197b3 153#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
154 PL_expect = XTERM, \
155 PL_bufptr = s, \
156 PL_last_uni = PL_oldbufptr, \
157 PL_last_lop_op = f, \
a687059c
LW
158 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
159
79072805 160#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
161 PL_bufptr = s, \
162 PL_last_uni = PL_oldbufptr, \
79072805
LW
163 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
164
9f68db38 165/* grandfather return to old style */
3280af22 166#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 167
ffb4593c
NT
168/*
169 * S_ao
170 *
171 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
172 * into an OP_ANDASSIGN or OP_ORASSIGN
173 */
174
76e3520e 175STATIC int
cea2e8a9 176S_ao(pTHX_ int toketype)
a0d0e21e 177{
3280af22
NIS
178 if (*PL_bufptr == '=') {
179 PL_bufptr++;
a0d0e21e
LW
180 if (toketype == ANDAND)
181 yylval.ival = OP_ANDASSIGN;
182 else if (toketype == OROR)
183 yylval.ival = OP_ORASSIGN;
184 toketype = ASSIGNOP;
185 }
186 return toketype;
187}
188
ffb4593c
NT
189/*
190 * S_no_op
191 * When Perl expects an operator and finds something else, no_op
192 * prints the warning. It always prints "<something> found where
193 * operator expected. It prints "Missing semicolon on previous line?"
194 * if the surprise occurs at the start of the line. "do you need to
195 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
196 * where the compiler doesn't know if foo is a method call or a function.
197 * It prints "Missing operator before end of line" if there's nothing
198 * after the missing operator, or "... before <...>" if there is something
199 * after the missing operator.
200 */
201
76e3520e 202STATIC void
cea2e8a9 203S_no_op(pTHX_ char *what, char *s)
463ee0b2 204{
3280af22
NIS
205 char *oldbp = PL_bufptr;
206 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 207
1189a94a
GS
208 if (!s)
209 s = oldbp;
07c798fb 210 else
1189a94a 211 PL_bufptr = s;
cea2e8a9 212 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 213 if (is_first)
cea2e8a9 214 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 215 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 216 char *t;
7e2040f0 217 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 218 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 219 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 220 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 221 }
07c798fb
HS
222 else {
223 assert(s >= oldbp);
cea2e8a9 224 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 225 }
3280af22 226 PL_bufptr = oldbp;
8990e307
LW
227}
228
ffb4593c
NT
229/*
230 * S_missingterm
231 * Complain about missing quote/regexp/heredoc terminator.
232 * If it's called with (char *)NULL then it cauterizes the line buffer.
233 * If we're in a delimited string and the delimiter is a control
234 * character, it's reformatted into a two-char sequence like ^C.
235 * This is fatal.
236 */
237
76e3520e 238STATIC void
cea2e8a9 239S_missingterm(pTHX_ char *s)
8990e307
LW
240{
241 char tmpbuf[3];
242 char q;
243 if (s) {
244 char *nl = strrchr(s,'\n');
d2719217 245 if (nl)
8990e307
LW
246 *nl = '\0';
247 }
9d116dd7
JH
248 else if (
249#ifdef EBCDIC
250 iscntrl(PL_multi_close)
251#else
252 PL_multi_close < 32 || PL_multi_close == 127
253#endif
254 ) {
8990e307 255 *tmpbuf = '^';
3280af22 256 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
257 s = "\\n";
258 tmpbuf[2] = '\0';
259 s = tmpbuf;
260 }
261 else {
3280af22 262 *tmpbuf = PL_multi_close;
8990e307
LW
263 tmpbuf[1] = '\0';
264 s = tmpbuf;
265 }
266 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 267 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 268}
79072805 269
ffb4593c
NT
270/*
271 * Perl_deprecate
ffb4593c
NT
272 */
273
79072805 274void
864dbfa3 275Perl_deprecate(pTHX_ char *s)
a0d0e21e 276{
599cee73 277 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 278 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
279}
280
ffb4593c
NT
281/*
282 * depcom
9cbb5ea2 283 * Deprecate a comma-less variable list.
ffb4593c
NT
284 */
285
76e3520e 286STATIC void
cea2e8a9 287S_depcom(pTHX)
a0d0e21e
LW
288{
289 deprecate("comma-less variable list");
290}
291
ffb4593c 292/*
9cbb5ea2
GS
293 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
294 * utf16-to-utf8-reversed.
ffb4593c
NT
295 */
296
c39cd008
GS
297#ifdef PERL_CR_FILTER
298static void
299strip_return(SV *sv)
300{
301 register char *s = SvPVX(sv);
302 register char *e = s + SvCUR(sv);
303 /* outer loop optimized to do nothing if there are no CR-LFs */
304 while (s < e) {
305 if (*s++ == '\r' && *s == '\n') {
306 /* hit a CR-LF, need to copy the rest */
307 register char *d = s - 1;
308 *d++ = *s++;
309 while (s < e) {
310 if (*s == '\r' && s[1] == '\n')
311 s++;
312 *d++ = *s++;
313 }
314 SvCUR(sv) -= s - d;
315 return;
316 }
317 }
318}
a868473f 319
76e3520e 320STATIC I32
c39cd008 321S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 322{
c39cd008
GS
323 I32 count = FILTER_READ(idx+1, sv, maxlen);
324 if (count > 0 && !maxlen)
325 strip_return(sv);
326 return count;
a868473f
NIS
327}
328#endif
329
ffb4593c
NT
330/*
331 * Perl_lex_start
9cbb5ea2
GS
332 * Initialize variables. Uses the Perl save_stack to save its state (for
333 * recursive calls to the parser).
ffb4593c
NT
334 */
335
a0d0e21e 336void
864dbfa3 337Perl_lex_start(pTHX_ SV *line)
79072805 338{
8990e307
LW
339 char *s;
340 STRLEN len;
341
3280af22
NIS
342 SAVEI32(PL_lex_dojoin);
343 SAVEI32(PL_lex_brackets);
3280af22
NIS
344 SAVEI32(PL_lex_casemods);
345 SAVEI32(PL_lex_starts);
346 SAVEI32(PL_lex_state);
7766f137 347 SAVEVPTR(PL_lex_inpat);
3280af22 348 SAVEI32(PL_lex_inwhat);
18b09519
GS
349 if (PL_lex_state == LEX_KNOWNEXT) {
350 I32 toke = PL_nexttoke;
351 while (--toke >= 0) {
352 SAVEI32(PL_nexttype[toke]);
353 SAVEVPTR(PL_nextval[toke]);
354 }
355 SAVEI32(PL_nexttoke);
18b09519 356 }
57843af0 357 SAVECOPLINE(PL_curcop);
3280af22
NIS
358 SAVEPPTR(PL_bufptr);
359 SAVEPPTR(PL_bufend);
360 SAVEPPTR(PL_oldbufptr);
361 SAVEPPTR(PL_oldoldbufptr);
362 SAVEPPTR(PL_linestart);
363 SAVESPTR(PL_linestr);
364 SAVEPPTR(PL_lex_brackstack);
365 SAVEPPTR(PL_lex_casestack);
c76ac1ee 366 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
367 SAVESPTR(PL_lex_stuff);
368 SAVEI32(PL_lex_defer);
09bef843 369 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 370 SAVESPTR(PL_lex_repl);
bebdddfc
GS
371 SAVEINT(PL_expect);
372 SAVEINT(PL_lex_expect);
3280af22
NIS
373
374 PL_lex_state = LEX_NORMAL;
375 PL_lex_defer = 0;
376 PL_expect = XSTATE;
377 PL_lex_brackets = 0;
3280af22
NIS
378 New(899, PL_lex_brackstack, 120, char);
379 New(899, PL_lex_casestack, 12, char);
380 SAVEFREEPV(PL_lex_brackstack);
381 SAVEFREEPV(PL_lex_casestack);
382 PL_lex_casemods = 0;
383 *PL_lex_casestack = '\0';
384 PL_lex_dojoin = 0;
385 PL_lex_starts = 0;
386 PL_lex_stuff = Nullsv;
387 PL_lex_repl = Nullsv;
388 PL_lex_inpat = 0;
76be56bc 389 PL_nexttoke = 0;
3280af22 390 PL_lex_inwhat = 0;
09bef843 391 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
392 PL_linestr = line;
393 if (SvREADONLY(PL_linestr))
394 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
395 s = SvPV(PL_linestr, len);
8990e307 396 if (len && s[len-1] != ';') {
3280af22
NIS
397 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
398 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
399 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 400 }
3280af22
NIS
401 SvTEMP_off(PL_linestr);
402 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
403 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
404 SvREFCNT_dec(PL_rs);
79cb57f6 405 PL_rs = newSVpvn("\n", 1);
3280af22 406 PL_rsfp = 0;
79072805 407}
a687059c 408
ffb4593c
NT
409/*
410 * Perl_lex_end
9cbb5ea2
GS
411 * Finalizer for lexing operations. Must be called when the parser is
412 * done with the lexer.
ffb4593c
NT
413 */
414
463ee0b2 415void
864dbfa3 416Perl_lex_end(pTHX)
463ee0b2 417{
3280af22 418 PL_doextract = FALSE;
463ee0b2
LW
419}
420
ffb4593c
NT
421/*
422 * S_incline
423 * This subroutine has nothing to do with tilting, whether at windmills
424 * or pinball tables. Its name is short for "increment line". It
57843af0 425 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 426 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
427 * # line 500 "foo.pm"
428 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
429 */
430
76e3520e 431STATIC void
cea2e8a9 432S_incline(pTHX_ char *s)
463ee0b2
LW
433{
434 char *t;
435 char *n;
73659bf1 436 char *e;
463ee0b2 437 char ch;
463ee0b2 438
57843af0 439 CopLINE_inc(PL_curcop);
463ee0b2
LW
440 if (*s++ != '#')
441 return;
bf4acbe4 442 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
443 if (strnEQ(s, "line", 4))
444 s += 4;
445 else
446 return;
447 if (*s == ' ' || *s == '\t')
448 s++;
449 else
450 return;
bf4acbe4 451 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
452 if (!isDIGIT(*s))
453 return;
454 n = s;
455 while (isDIGIT(*s))
456 s++;
bf4acbe4 457 while (SPACE_OR_TAB(*s))
463ee0b2 458 s++;
73659bf1 459 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 460 s++;
73659bf1
GS
461 e = t + 1;
462 }
463ee0b2 463 else {
463ee0b2 464 for (t = s; !isSPACE(*t); t++) ;
73659bf1 465 e = t;
463ee0b2 466 }
bf4acbe4 467 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
468 e++;
469 if (*e != '\n' && *e != '\0')
470 return; /* false alarm */
471
463ee0b2
LW
472 ch = *t;
473 *t = '\0';
f4dd75d9
GS
474 if (t - s > 0) {
475#ifdef USE_ITHREADS
476 Safefree(CopFILE(PL_curcop));
477#else
478 SvREFCNT_dec(CopFILEGV(PL_curcop));
479#endif
57843af0 480 CopFILE_set(PL_curcop, s);
f4dd75d9 481 }
463ee0b2 482 *t = ch;
57843af0 483 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
484}
485
ffb4593c
NT
486/*
487 * S_skipspace
488 * Called to gobble the appropriate amount and type of whitespace.
489 * Skips comments as well.
490 */
491
76e3520e 492STATIC char *
cea2e8a9 493S_skipspace(pTHX_ register char *s)
a687059c 494{
3280af22 495 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 496 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
497 s++;
498 return s;
499 }
500 for (;;) {
fd049845 501 STRLEN prevlen;
09bef843
SB
502 SSize_t oldprevlen, oldoldprevlen;
503 SSize_t oldloplen, oldunilen;
60e6418e
GS
504 while (s < PL_bufend && isSPACE(*s)) {
505 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
506 incline(s);
507 }
ffb4593c
NT
508
509 /* comment */
3280af22
NIS
510 if (s < PL_bufend && *s == '#') {
511 while (s < PL_bufend && *s != '\n')
463ee0b2 512 s++;
60e6418e 513 if (s < PL_bufend) {
463ee0b2 514 s++;
60e6418e
GS
515 if (PL_in_eval && !PL_rsfp) {
516 incline(s);
517 continue;
518 }
519 }
463ee0b2 520 }
ffb4593c
NT
521
522 /* only continue to recharge the buffer if we're at the end
523 * of the buffer, we're not reading from a source filter, and
524 * we're in normal lexing mode
525 */
09bef843
SB
526 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
527 PL_lex_state == LEX_FORMLINE)
463ee0b2 528 return s;
ffb4593c
NT
529
530 /* try to recharge the buffer */
9cbb5ea2
GS
531 if ((s = filter_gets(PL_linestr, PL_rsfp,
532 (prevlen = SvCUR(PL_linestr)))) == Nullch)
533 {
534 /* end of file. Add on the -p or -n magic */
3280af22
NIS
535 if (PL_minus_n || PL_minus_p) {
536 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
537 ";}continue{print or die qq(-p destination: $!\\n)" :
538 "");
3280af22
NIS
539 sv_catpv(PL_linestr,";}");
540 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
541 }
542 else
3280af22 543 sv_setpv(PL_linestr,";");
ffb4593c
NT
544
545 /* reset variables for next time we lex */
9cbb5ea2
GS
546 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
547 = SvPVX(PL_linestr);
3280af22 548 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
549
550 /* Close the filehandle. Could be from -P preprocessor,
551 * STDIN, or a regular file. If we were reading code from
552 * STDIN (because the commandline held no -e or filename)
553 * then we don't close it, we reset it so the code can
554 * read from STDIN too.
555 */
556
3280af22
NIS
557 if (PL_preprocess && !PL_in_eval)
558 (void)PerlProc_pclose(PL_rsfp);
559 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
560 PerlIO_clearerr(PL_rsfp);
8990e307 561 else
3280af22
NIS
562 (void)PerlIO_close(PL_rsfp);
563 PL_rsfp = Nullfp;
463ee0b2
LW
564 return s;
565 }
ffb4593c
NT
566
567 /* not at end of file, so we only read another line */
09bef843
SB
568 /* make corresponding updates to old pointers, for yyerror() */
569 oldprevlen = PL_oldbufptr - PL_bufend;
570 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
571 if (PL_last_uni)
572 oldunilen = PL_last_uni - PL_bufend;
573 if (PL_last_lop)
574 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
575 PL_linestart = PL_bufptr = s + prevlen;
576 PL_bufend = s + SvCUR(PL_linestr);
577 s = PL_bufptr;
09bef843
SB
578 PL_oldbufptr = s + oldprevlen;
579 PL_oldoldbufptr = s + oldoldprevlen;
580 if (PL_last_uni)
581 PL_last_uni = s + oldunilen;
582 if (PL_last_lop)
583 PL_last_lop = s + oldloplen;
a0d0e21e 584 incline(s);
ffb4593c
NT
585
586 /* debugger active and we're not compiling the debugger code,
587 * so store the line into the debugger's array of lines
588 */
3280af22 589 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
590 SV *sv = NEWSV(85,0);
591
592 sv_upgrade(sv, SVt_PVMG);
3280af22 593 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 594 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 595 }
463ee0b2 596 }
a687059c 597}
378cc40b 598
ffb4593c
NT
599/*
600 * S_check_uni
601 * Check the unary operators to ensure there's no ambiguity in how they're
602 * used. An ambiguous piece of code would be:
603 * rand + 5
604 * This doesn't mean rand() + 5. Because rand() is a unary operator,
605 * the +5 is its argument.
606 */
607
76e3520e 608STATIC void
cea2e8a9 609S_check_uni(pTHX)
ba106d47 610{
2f3197b3 611 char *s;
a0d0e21e 612 char *t;
2f3197b3 613
3280af22 614 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 615 return;
3280af22
NIS
616 while (isSPACE(*PL_last_uni))
617 PL_last_uni++;
7e2040f0 618 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 619 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 620 return;
0453d815 621 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 622 char ch = *s;
0453d815
PM
623 *s = '\0';
624 Perl_warner(aTHX_ WARN_AMBIGUOUS,
625 "Warning: Use of \"%s\" without parens is ambiguous",
626 PL_last_uni);
627 *s = ch;
628 }
2f3197b3
LW
629}
630
ffb4593c
NT
631/* workaround to replace the UNI() macro with a function. Only the
632 * hints/uts.sh file mentions this. Other comments elsewhere in the
633 * source indicate Microport Unix might need it too.
634 */
635
ffed7fef
LW
636#ifdef CRIPPLED_CC
637
638#undef UNI
ffed7fef 639#define UNI(f) return uni(f,s)
ffed7fef 640
76e3520e 641STATIC int
cea2e8a9 642S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
643{
644 yylval.ival = f;
3280af22
NIS
645 PL_expect = XTERM;
646 PL_bufptr = s;
8f872242
NIS
647 PL_last_uni = PL_oldbufptr;
648 PL_last_lop_op = f;
ffed7fef
LW
649 if (*s == '(')
650 return FUNC1;
651 s = skipspace(s);
652 if (*s == '(')
653 return FUNC1;
654 else
655 return UNIOP;
656}
657
a0d0e21e
LW
658#endif /* CRIPPLED_CC */
659
ffb4593c
NT
660/*
661 * LOP : macro to build a list operator. Its behaviour has been replaced
662 * with a subroutine, S_lop() for which LOP is just another name.
663 */
664
a0d0e21e
LW
665#define LOP(f,x) return lop(f,x,s)
666
ffb4593c
NT
667/*
668 * S_lop
669 * Build a list operator (or something that might be one). The rules:
670 * - if we have a next token, then it's a list operator [why?]
671 * - if the next thing is an opening paren, then it's a function
672 * - else it's a list operator
673 */
674
76e3520e 675STATIC I32
a0be28da 676S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 677{
79072805 678 yylval.ival = f;
35c8bce7 679 CLINE;
3280af22
NIS
680 PL_expect = x;
681 PL_bufptr = s;
682 PL_last_lop = PL_oldbufptr;
683 PL_last_lop_op = f;
684 if (PL_nexttoke)
a0d0e21e 685 return LSTOP;
79072805
LW
686 if (*s == '(')
687 return FUNC;
688 s = skipspace(s);
689 if (*s == '(')
690 return FUNC;
691 else
692 return LSTOP;
693}
694
ffb4593c
NT
695/*
696 * S_force_next
9cbb5ea2 697 * When the lexer realizes it knows the next token (for instance,
ffb4593c 698 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
699 * to know what token to return the next time the lexer is called. Caller
700 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
701 * handles the token correctly.
ffb4593c
NT
702 */
703
76e3520e 704STATIC void
cea2e8a9 705S_force_next(pTHX_ I32 type)
79072805 706{
3280af22
NIS
707 PL_nexttype[PL_nexttoke] = type;
708 PL_nexttoke++;
709 if (PL_lex_state != LEX_KNOWNEXT) {
710 PL_lex_defer = PL_lex_state;
711 PL_lex_expect = PL_expect;
712 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
713 }
714}
715
ffb4593c
NT
716/*
717 * S_force_word
718 * When the lexer knows the next thing is a word (for instance, it has
719 * just seen -> and it knows that the next char is a word char, then
720 * it calls S_force_word to stick the next word into the PL_next lookahead.
721 *
722 * Arguments:
b1b65b59 723 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
724 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
725 * int check_keyword : if true, Perl checks to make sure the word isn't
726 * a keyword (do this if the word is a label, e.g. goto FOO)
727 * int allow_pack : if true, : characters will also be allowed (require,
728 * use, etc. do this)
9cbb5ea2 729 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
730 */
731
76e3520e 732STATIC char *
cea2e8a9 733S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 734{
463ee0b2
LW
735 register char *s;
736 STRLEN len;
737
738 start = skipspace(start);
739 s = start;
7e2040f0 740 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 741 (allow_pack && *s == ':') ||
15f0808c 742 (allow_initial_tick && *s == '\'') )
a0d0e21e 743 {
3280af22
NIS
744 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
745 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
746 return start;
747 if (token == METHOD) {
748 s = skipspace(s);
749 if (*s == '(')
3280af22 750 PL_expect = XTERM;
463ee0b2 751 else {
3280af22 752 PL_expect = XOPERATOR;
463ee0b2 753 }
79072805 754 }
3280af22
NIS
755 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
756 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
757 force_next(token);
758 }
759 return s;
760}
761
ffb4593c
NT
762/*
763 * S_force_ident
9cbb5ea2 764 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
765 * text only contains the "foo" portion. The first argument is a pointer
766 * to the "foo", and the second argument is the type symbol to prefix.
767 * Forces the next token to be a "WORD".
9cbb5ea2 768 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
769 */
770
76e3520e 771STATIC void
cea2e8a9 772S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
773{
774 if (s && *s) {
11343788 775 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 776 PL_nextval[PL_nexttoke].opval = o;
79072805 777 force_next(WORD);
748a9306 778 if (kind) {
11343788 779 o->op_private = OPpCONST_ENTERED;
55497cff 780 /* XXX see note in pp_entereval() for why we forgo typo
781 warnings if the symbol must be introduced in an eval.
782 GSAR 96-10-12 */
3280af22 783 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
784 kind == '$' ? SVt_PV :
785 kind == '@' ? SVt_PVAV :
786 kind == '%' ? SVt_PVHV :
787 SVt_PVGV
788 );
748a9306 789 }
79072805
LW
790 }
791}
792
1571675a
GS
793NV
794Perl_str_to_version(pTHX_ SV *sv)
795{
796 NV retval = 0.0;
797 NV nshift = 1.0;
798 STRLEN len;
799 char *start = SvPVx(sv,len);
3aa33fe5 800 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
801 char *end = start + len;
802 while (start < end) {
ba210ebe 803 STRLEN skip;
1571675a
GS
804 UV n;
805 if (utf)
dcad2880 806 n = utf8_to_uv((U8*)start, len, &skip, 0);
1571675a
GS
807 else {
808 n = *(U8*)start;
809 skip = 1;
810 }
811 retval += ((NV)n)/nshift;
812 start += skip;
813 nshift *= 1000;
814 }
815 return retval;
816}
817
ffb4593c
NT
818/*
819 * S_force_version
820 * Forces the next token to be a version number.
821 */
822
76e3520e 823STATIC char *
cea2e8a9 824S_force_version(pTHX_ char *s)
89bfa8cd 825{
826 OP *version = Nullop;
44dcb63b 827 char *d;
89bfa8cd 828
829 s = skipspace(s);
830
44dcb63b 831 d = s;
dd629d5b 832 if (*d == 'v')
44dcb63b 833 d++;
44dcb63b 834 if (isDIGIT(*d)) {
a7cb1f99 835 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 836 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 837 SV *ver;
b73d6f50 838 s = scan_num(s, &yylval);
89bfa8cd 839 version = yylval.opval;
dd629d5b
GS
840 ver = cSVOPx(version)->op_sv;
841 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 842 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
843 SvNVX(ver) = str_to_version(ver);
844 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 845 }
89bfa8cd 846 }
847 }
848
849 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 850 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd 851 force_next(WORD);
852
853 return (s);
854}
855
ffb4593c
NT
856/*
857 * S_tokeq
858 * Tokenize a quoted string passed in as an SV. It finds the next
859 * chunk, up to end of string or a backslash. It may make a new
860 * SV containing that chunk (if HINT_NEW_STRING is on). It also
861 * turns \\ into \.
862 */
863
76e3520e 864STATIC SV *
cea2e8a9 865S_tokeq(pTHX_ SV *sv)
79072805
LW
866{
867 register char *s;
868 register char *send;
869 register char *d;
b3ac6de7
IZ
870 STRLEN len = 0;
871 SV *pv = sv;
79072805
LW
872
873 if (!SvLEN(sv))
b3ac6de7 874 goto finish;
79072805 875
a0d0e21e 876 s = SvPV_force(sv, len);
21a311ee 877 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 878 goto finish;
463ee0b2 879 send = s + len;
79072805
LW
880 while (s < send && *s != '\\')
881 s++;
882 if (s == send)
b3ac6de7 883 goto finish;
79072805 884 d = s;
3280af22 885 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 886 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
887 while (s < send) {
888 if (*s == '\\') {
a0d0e21e 889 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
890 s++; /* all that, just for this */
891 }
892 *d++ = *s++;
893 }
894 *d = '\0';
463ee0b2 895 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 896 finish:
3280af22 897 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 898 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
899 return sv;
900}
901
ffb4593c
NT
902/*
903 * Now come three functions related to double-quote context,
904 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
905 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
906 * interact with PL_lex_state, and create fake ( ... ) argument lists
907 * to handle functions and concatenation.
908 * They assume that whoever calls them will be setting up a fake
909 * join call, because each subthing puts a ',' after it. This lets
910 * "lower \luPpEr"
911 * become
912 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
913 *
914 * (I'm not sure whether the spurious commas at the end of lcfirst's
915 * arguments and join's arguments are created or not).
916 */
917
918/*
919 * S_sublex_start
920 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
921 *
922 * Pattern matching will set PL_lex_op to the pattern-matching op to
923 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
924 *
925 * OP_CONST and OP_READLINE are easy--just make the new op and return.
926 *
927 * Everything else becomes a FUNC.
928 *
929 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
930 * had an OP_CONST or OP_READLINE). This just sets us up for a
931 * call to S_sublex_push().
932 */
933
76e3520e 934STATIC I32
cea2e8a9 935S_sublex_start(pTHX)
79072805
LW
936{
937 register I32 op_type = yylval.ival;
79072805
LW
938
939 if (op_type == OP_NULL) {
3280af22
NIS
940 yylval.opval = PL_lex_op;
941 PL_lex_op = Nullop;
79072805
LW
942 return THING;
943 }
944 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 945 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
946
947 if (SvTYPE(sv) == SVt_PVIV) {
948 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
949 STRLEN len;
950 char *p;
951 SV *nsv;
952
953 p = SvPV(sv, len);
79cb57f6 954 nsv = newSVpvn(p, len);
01ec43d0
GS
955 if (SvUTF8(sv))
956 SvUTF8_on(nsv);
b3ac6de7
IZ
957 SvREFCNT_dec(sv);
958 sv = nsv;
959 }
960 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 961 PL_lex_stuff = Nullsv;
79072805
LW
962 return THING;
963 }
964
3280af22
NIS
965 PL_sublex_info.super_state = PL_lex_state;
966 PL_sublex_info.sub_inwhat = op_type;
967 PL_sublex_info.sub_op = PL_lex_op;
968 PL_lex_state = LEX_INTERPPUSH;
55497cff 969
3280af22
NIS
970 PL_expect = XTERM;
971 if (PL_lex_op) {
972 yylval.opval = PL_lex_op;
973 PL_lex_op = Nullop;
55497cff 974 return PMFUNC;
975 }
976 else
977 return FUNC;
978}
979
ffb4593c
NT
980/*
981 * S_sublex_push
982 * Create a new scope to save the lexing state. The scope will be
983 * ended in S_sublex_done. Returns a '(', starting the function arguments
984 * to the uc, lc, etc. found before.
985 * Sets PL_lex_state to LEX_INTERPCONCAT.
986 */
987
76e3520e 988STATIC I32
cea2e8a9 989S_sublex_push(pTHX)
55497cff 990{
f46d017c 991 ENTER;
55497cff 992
3280af22
NIS
993 PL_lex_state = PL_sublex_info.super_state;
994 SAVEI32(PL_lex_dojoin);
995 SAVEI32(PL_lex_brackets);
3280af22
NIS
996 SAVEI32(PL_lex_casemods);
997 SAVEI32(PL_lex_starts);
998 SAVEI32(PL_lex_state);
7766f137 999 SAVEVPTR(PL_lex_inpat);
3280af22 1000 SAVEI32(PL_lex_inwhat);
57843af0 1001 SAVECOPLINE(PL_curcop);
3280af22
NIS
1002 SAVEPPTR(PL_bufptr);
1003 SAVEPPTR(PL_oldbufptr);
1004 SAVEPPTR(PL_oldoldbufptr);
1005 SAVEPPTR(PL_linestart);
1006 SAVESPTR(PL_linestr);
1007 SAVEPPTR(PL_lex_brackstack);
1008 SAVEPPTR(PL_lex_casestack);
1009
1010 PL_linestr = PL_lex_stuff;
1011 PL_lex_stuff = Nullsv;
1012
9cbb5ea2
GS
1013 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1014 = SvPVX(PL_linestr);
3280af22
NIS
1015 PL_bufend += SvCUR(PL_linestr);
1016 SAVEFREESV(PL_linestr);
1017
1018 PL_lex_dojoin = FALSE;
1019 PL_lex_brackets = 0;
3280af22
NIS
1020 New(899, PL_lex_brackstack, 120, char);
1021 New(899, PL_lex_casestack, 12, char);
1022 SAVEFREEPV(PL_lex_brackstack);
1023 SAVEFREEPV(PL_lex_casestack);
1024 PL_lex_casemods = 0;
1025 *PL_lex_casestack = '\0';
1026 PL_lex_starts = 0;
1027 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1028 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1029
1030 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1031 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1032 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1033 else
3280af22 1034 PL_lex_inpat = Nullop;
79072805 1035
55497cff 1036 return '(';
79072805
LW
1037}
1038
ffb4593c
NT
1039/*
1040 * S_sublex_done
1041 * Restores lexer state after a S_sublex_push.
1042 */
1043
76e3520e 1044STATIC I32
cea2e8a9 1045S_sublex_done(pTHX)
79072805 1046{
3280af22
NIS
1047 if (!PL_lex_starts++) {
1048 PL_expect = XOPERATOR;
79cb57f6 1049 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1050 return THING;
1051 }
1052
3280af22
NIS
1053 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1054 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1055 return yylex();
79072805
LW
1056 }
1057
ffb4593c 1058 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1059 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1060 PL_linestr = PL_lex_repl;
1061 PL_lex_inpat = 0;
1062 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1063 PL_bufend += SvCUR(PL_linestr);
1064 SAVEFREESV(PL_linestr);
1065 PL_lex_dojoin = FALSE;
1066 PL_lex_brackets = 0;
3280af22
NIS
1067 PL_lex_casemods = 0;
1068 *PL_lex_casestack = '\0';
1069 PL_lex_starts = 0;
25da4f38 1070 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1071 PL_lex_state = LEX_INTERPNORMAL;
1072 PL_lex_starts++;
e9fa98b2
HS
1073 /* we don't clear PL_lex_repl here, so that we can check later
1074 whether this is an evalled subst; that means we rely on the
1075 logic to ensure sublex_done() is called again only via the
1076 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1077 }
e9fa98b2 1078 else {
3280af22 1079 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1080 PL_lex_repl = Nullsv;
1081 }
79072805 1082 return ',';
ffed7fef
LW
1083 }
1084 else {
f46d017c 1085 LEAVE;
3280af22
NIS
1086 PL_bufend = SvPVX(PL_linestr);
1087 PL_bufend += SvCUR(PL_linestr);
1088 PL_expect = XOPERATOR;
09bef843 1089 PL_sublex_info.sub_inwhat = 0;
79072805 1090 return ')';
ffed7fef
LW
1091 }
1092}
1093
02aa26ce
NT
1094/*
1095 scan_const
1096
1097 Extracts a pattern, double-quoted string, or transliteration. This
1098 is terrifying code.
1099
3280af22
NIS
1100 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1101 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1102 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1103
9b599b2a
GS
1104 Returns a pointer to the character scanned up to. Iff this is
1105 advanced from the start pointer supplied (ie if anything was
1106 successfully parsed), will leave an OP for the substring scanned
1107 in yylval. Caller must intuit reason for not parsing further
1108 by looking at the next characters herself.
1109
02aa26ce
NT
1110 In patterns:
1111 backslashes:
1112 double-quoted style: \r and \n
1113 regexp special ones: \D \s
1114 constants: \x3
1115 backrefs: \1 (deprecated in substitution replacements)
1116 case and quoting: \U \Q \E
1117 stops on @ and $, but not for $ as tail anchor
1118
1119 In transliterations:
1120 characters are VERY literal, except for - not at the start or end
1121 of the string, which indicates a range. scan_const expands the
1122 range to the full set of intermediate characters.
1123
1124 In double-quoted strings:
1125 backslashes:
1126 double-quoted style: \r and \n
1127 constants: \x3
1128 backrefs: \1 (deprecated)
1129 case and quoting: \U \Q \E
1130 stops on @ and $
1131
1132 scan_const does *not* construct ops to handle interpolated strings.
1133 It stops processing as soon as it finds an embedded $ or @ variable
1134 and leaves it to the caller to work out what's going on.
1135
1136 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1137
1138 $ in pattern could be $foo or could be tail anchor. Assumption:
1139 it's a tail anchor if $ is the last thing in the string, or if it's
1140 followed by one of ")| \n\t"
1141
1142 \1 (backreferences) are turned into $1
1143
1144 The structure of the code is
1145 while (there's a character to process) {
1146 handle transliteration ranges
1147 skip regexp comments
1148 skip # initiated comments in //x patterns
1149 check for embedded @foo
1150 check for embedded scalars
1151 if (backslash) {
1152 leave intact backslashes from leave (below)
1153 deprecate \1 in strings and sub replacements
1154 handle string-changing backslashes \l \U \Q \E, etc.
1155 switch (what was escaped) {
1156 handle - in a transliteration (becomes a literal -)
1157 handle \132 octal characters
1158 handle 0x15 hex characters
1159 handle \cV (control V)
1160 handle printf backslashes (\f, \r, \n, etc)
1161 } (end switch)
1162 } (end if backslash)
1163 } (end while character to read)
1164
1165*/
1166
76e3520e 1167STATIC char *
cea2e8a9 1168S_scan_const(pTHX_ char *start)
79072805 1169{
3280af22 1170 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1171 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1172 register char *s = start; /* start of the constant */
1173 register char *d = SvPVX(sv); /* destination for copies */
1174 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1175 bool didrange = FALSE; /* did we just finish a range? */
89491803 1176 bool has_utf8 = FALSE; /* embedded \x{} */
012bcf8d
GS
1177 UV uv;
1178
ac2262e3 1179 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1180 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1181 : UTF;
89491803 1182 I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1183 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1184 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1185 : UTF;
dff6d3cd 1186 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1187 PL_lex_inpat
4a2d328f 1188 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1189 : "";
79072805
LW
1190
1191 while (s < send || dorange) {
02aa26ce 1192 /* get transliterations out of the way (they're most literal) */
3280af22 1193 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1194 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1195 if (dorange) {
02aa26ce 1196 I32 i; /* current expanded character */
8ada0baa 1197 I32 min; /* first character in range */
02aa26ce
NT
1198 I32 max; /* last character in range */
1199
1200 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1201 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1202 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1203 d -= 2; /* eat the first char and the - */
1204
8ada0baa
JH
1205 min = (U8)*d; /* first char in range */
1206 max = (U8)d[1]; /* last char in range */
1207
c2e66d9e 1208 if (min > max) {
01ec43d0
GS
1209 Perl_croak(aTHX_
1210 "Invalid [] range \"%c-%c\" in transliteration operator",
d2560b70 1211 (char)min, (char)max);
c2e66d9e
GS
1212 }
1213
8ada0baa
JH
1214#ifndef ASCIIish
1215 if ((isLOWER(min) && isLOWER(max)) ||
1216 (isUPPER(min) && isUPPER(max))) {
1217 if (isLOWER(min)) {
1218 for (i = min; i <= max; i++)
1219 if (isLOWER(i))
1220 *d++ = i;
1221 } else {
1222 for (i = min; i <= max; i++)
1223 if (isUPPER(i))
1224 *d++ = i;
1225 }
1226 }
1227 else
1228#endif
1229 for (i = min; i <= max; i++)
1230 *d++ = i;
02aa26ce
NT
1231
1232 /* mark the range as done, and continue */
79072805 1233 dorange = FALSE;
01ec43d0 1234 didrange = TRUE;
79072805 1235 continue;
c2e66d9e 1236 }
02aa26ce
NT
1237
1238 /* range begins (ignore - as first or last char) */
79072805 1239 else if (*s == '-' && s+1 < send && s != start) {
01ec43d0 1240 if (didrange) {
1fafa243 1241 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1242 }
a0ed51b3 1243 if (utf) {
a176fa2a 1244 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1245 s++;
1246 continue;
1247 }
79072805
LW
1248 dorange = TRUE;
1249 s++;
01ec43d0
GS
1250 }
1251 else {
1252 didrange = FALSE;
1253 }
79072805 1254 }
02aa26ce
NT
1255
1256 /* if we get here, we're not doing a transliteration */
1257
0f5d15d6
IZ
1258 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1259 except for the last char, which will be done separately. */
3280af22 1260 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1261 if (s[2] == '#') {
1262 while (s < send && *s != ')')
1263 *d++ = *s++;
155aba94
GS
1264 }
1265 else if (s[2] == '{' /* This should match regcomp.c */
1266 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1267 {
cc6b7395 1268 I32 count = 1;
0f5d15d6 1269 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1270 char c;
1271
d9f97599
GS
1272 while (count && (c = *regparse)) {
1273 if (c == '\\' && regparse[1])
1274 regparse++;
cc6b7395
IZ
1275 else if (c == '{')
1276 count++;
1277 else if (c == '}')
1278 count--;
d9f97599 1279 regparse++;
cc6b7395 1280 }
5bdf89e7
IZ
1281 if (*regparse != ')') {
1282 regparse--; /* Leave one char for continuation. */
cc6b7395 1283 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1284 }
0f5d15d6 1285 while (s < regparse)
cc6b7395
IZ
1286 *d++ = *s++;
1287 }
748a9306 1288 }
02aa26ce
NT
1289
1290 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1291 else if (*s == '#' && PL_lex_inpat &&
1292 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1293 while (s+1 < send && *s != '\n')
1294 *d++ = *s++;
1295 }
02aa26ce 1296
5d1d4326
JH
1297 /* check for embedded arrays
1298 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1299 */
7e2040f0 1300 else if (*s == '@' && s[1]
5d1d4326 1301 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1302 break;
02aa26ce
NT
1303
1304 /* check for embedded scalars. only stop if we're sure it's a
1305 variable.
1306 */
79072805 1307 else if (*s == '$') {
3280af22 1308 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1309 break;
c277df42 1310 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1311 break; /* in regexp, $ might be tail anchor */
1312 }
02aa26ce 1313
a0ed51b3
LW
1314 /* (now in tr/// code again) */
1315
89491803 1316 if (*s & 0x80 && this_utf8) {
ba210ebe
JH
1317 STRLEN len;
1318 UV uv;
1319
dcad2880 1320 uv = utf8_to_uv((U8*)s, send - s, &len, UTF8_CHECK_ONLY);
cc366d4b 1321 if (len == (STRLEN)-1) {
fcc8fcf6 1322 /* Illegal UTF8 (a high-bit byte), make it valid. */
ba210ebe
JH
1323 char *old_pvx = SvPVX(sv);
1324 /* need space for one extra char (NOTE: SvCUR() not set here) */
1325 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
1326 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1327 }
1328 else {
1329 while (len--)
1330 *d++ = *s++;
1331 }
89491803 1332 has_utf8 = TRUE;
ba210ebe 1333 continue;
a0ed51b3
LW
1334 }
1335
02aa26ce 1336 /* backslashes */
79072805
LW
1337 if (*s == '\\' && s+1 < send) {
1338 s++;
02aa26ce
NT
1339
1340 /* some backslashes we leave behind */
c9f97d15 1341 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1342 *d++ = '\\';
1343 *d++ = *s++;
1344 continue;
1345 }
02aa26ce
NT
1346
1347 /* deprecate \1 in strings and substitution replacements */
3280af22 1348 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1349 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1350 {
599cee73 1351 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1352 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1353 *--s = '$';
1354 break;
1355 }
02aa26ce
NT
1356
1357 /* string-change backslash escapes */
3280af22 1358 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1359 --s;
1360 break;
1361 }
02aa26ce
NT
1362
1363 /* if we get here, it's either a quoted -, or a digit */
79072805 1364 switch (*s) {
02aa26ce
NT
1365
1366 /* quoted - in transliterations */
79072805 1367 case '-':
3280af22 1368 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1369 *d++ = *s++;
1370 continue;
1371 }
1372 /* FALL THROUGH */
1373 default:
11b8faa4 1374 {
7e84c16c 1375 if (ckWARN(WARN_MISC) && isALNUM(*s))
e476b1b5 1376 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1377 "Unrecognized escape \\%c passed through",
1378 *s);
1379 /* default action is to copy the quoted character */
1380 *d++ = *s++;
1381 continue;
1382 }
02aa26ce
NT
1383
1384 /* \132 indicates an octal constant */
79072805
LW
1385 case '0': case '1': case '2': case '3':
1386 case '4': case '5': case '6': case '7':
ba210ebe
JH
1387 {
1388 STRLEN len = 0; /* disallow underscores */
1389 uv = (UV)scan_oct(s, 3, &len);
1390 s += len;
1391 }
012bcf8d 1392 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1393
1394 /* \x24 indicates a hex constant */
79072805 1395 case 'x':
a0ed51b3
LW
1396 ++s;
1397 if (*s == '{') {
1398 char* e = strchr(s, '}');
adaeee49 1399 if (!e) {
a0ed51b3 1400 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1401 e = s;
1402 }
89491803 1403 else {
ba210ebe
JH
1404 STRLEN len = 1; /* allow underscores */
1405 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
89491803 1406 has_utf8 = TRUE;
ba210ebe
JH
1407 }
1408 s = e + 1;
a0ed51b3
LW
1409 }
1410 else {
ba210ebe
JH
1411 {
1412 STRLEN len = 0; /* disallow underscores */
1413 uv = (UV)scan_hex(s, 2, &len);
1414 s += len;
1415 }
012bcf8d
GS
1416 }
1417
1418 NUM_ESCAPE_INSERT:
1419 /* Insert oct or hex escaped character.
1420 * There will always enough room in sv since such escapes will
1421 * be longer than any utf8 sequence they can end up as
1422 */
89491803
SC
1423 if (uv > 127 || has_utf8) {
1424 if (!this_utf8 && !has_utf8 && uv > 255) {
012bcf8d
GS
1425 /* might need to recode whatever we have accumulated so far
1426 * if it contains any hibit chars
1427 */
1428 int hicount = 0;
1429 char *c;
1430 for (c = SvPVX(sv); c < d; c++) {
1431 if (*c & 0x80)
1432 hicount++;
1433 }
1434 if (hicount) {
1435 char *old_pvx = SvPVX(sv);
1436 char *src, *dst;
1437 d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
1438
1439 src = d - 1;
1440 d += hicount;
1441 dst = d - 1;
1442
1443 while (src < dst) {
1444 if (*src & 0x80) {
1445 dst--;
1446 uv_to_utf8((U8*)dst, (U8)*src--);
1447 dst--;
1448 }
1449 else {
1450 *dst-- = *src--;
1451 }
1452 }
1453 }
1454 }
1455
89491803 1456 if (has_utf8 || uv > 255) {
012bcf8d 1457 d = (char*)uv_to_utf8((U8*)d, uv);
89491803 1458 this_utf8 = TRUE;
012bcf8d 1459 }
a0ed51b3 1460 else {
012bcf8d 1461 *d++ = (char)uv;
a0ed51b3 1462 }
012bcf8d
GS
1463 }
1464 else {
1465 *d++ = (char)uv;
a0ed51b3 1466 }
79072805 1467 continue;
02aa26ce 1468
4a2d328f
IZ
1469 /* \N{latin small letter a} is a named character */
1470 case 'N':
423cee85
JH
1471 ++s;
1472 if (*s == '{') {
1473 char* e = strchr(s, '}');
155aba94 1474 SV *res;
423cee85
JH
1475 STRLEN len;
1476 char *str;
423cee85
JH
1477
1478 if (!e) {
5777a3f7 1479 yyerror("Missing right brace on \\N{}");
423cee85
JH
1480 e = s - 1;
1481 goto cont_scan;
1482 }
1483 res = newSVpvn(s + 1, e - s - 1);
1484 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1485 res, Nullsv, "\\N{...}" );
423cee85 1486 str = SvPV(res,len);
89491803 1487 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1488 char *ostart = SvPVX(sv);
1489 SvCUR_set(sv, d - ostart);
1490 SvPOK_on(sv);
e4f3eed8 1491 *d = '\0';
f08d6ad9 1492 sv_utf8_upgrade(sv);
d2f449dd
SB
1493 /* this just broke our allocation above... */
1494 SvGROW(sv, send - start);
f08d6ad9 1495 d = SvPVX(sv) + SvCUR(sv);
89491803 1496 has_utf8 = TRUE;
f08d6ad9 1497 }
423cee85
JH
1498 if (len > e - s + 4) {
1499 char *odest = SvPVX(sv);
1500
1501 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1502 d = SvPVX(sv) + (d - odest);
1503 }
1504 Copy(str, d, len, char);
1505 d += len;
1506 SvREFCNT_dec(res);
1507 cont_scan:
1508 s = e + 1;
1509 }
1510 else
5777a3f7 1511 yyerror("Missing braces on \\N{}");
423cee85
JH
1512 continue;
1513
02aa26ce 1514 /* \c is a control character */
79072805
LW
1515 case 'c':
1516 s++;
9d116dd7
JH
1517#ifdef EBCDIC
1518 *d = *s++;
1519 if (isLOWER(*d))
1520 *d = toUPPER(*d);
774a9426
GS
1521 *d = toCTRL(*d);
1522 d++;
9d116dd7 1523#else
ba210ebe
JH
1524 {
1525 U8 c = *s++;
1526 *d++ = toCTRL(c);
1527 }
9d116dd7 1528#endif
79072805 1529 continue;
02aa26ce
NT
1530
1531 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1532 case 'b':
1533 *d++ = '\b';
1534 break;
1535 case 'n':
1536 *d++ = '\n';
1537 break;
1538 case 'r':
1539 *d++ = '\r';
1540 break;
1541 case 'f':
1542 *d++ = '\f';
1543 break;
1544 case 't':
1545 *d++ = '\t';
1546 break;
34a3fe2a
PP
1547#ifdef EBCDIC
1548 case 'e':
1549 *d++ = '\047'; /* CP 1047 */
1550 break;
1551 case 'a':
1552 *d++ = '\057'; /* CP 1047 */
1553 break;
1554#else
79072805
LW
1555 case 'e':
1556 *d++ = '\033';
1557 break;
1558 case 'a':
1559 *d++ = '\007';
1560 break;
34a3fe2a 1561#endif
02aa26ce
NT
1562 } /* end switch */
1563
79072805
LW
1564 s++;
1565 continue;
02aa26ce
NT
1566 } /* end if (backslash) */
1567
79072805 1568 *d++ = *s++;
02aa26ce
NT
1569 } /* while loop to process each character */
1570
1571 /* terminate the string and set up the sv */
79072805 1572 *d = '\0';
463ee0b2 1573 SvCUR_set(sv, d - SvPVX(sv));
79072805 1574 SvPOK_on(sv);
89491803 1575 if (has_utf8)
7e2040f0 1576 SvUTF8_on(sv);
79072805 1577
02aa26ce 1578 /* shrink the sv if we allocated more than we used */
79072805
LW
1579 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1580 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1581 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1582 }
02aa26ce 1583
9b599b2a 1584 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1585 if (s > PL_bufptr) {
1586 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1587 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1588 sv, Nullsv,
3280af22 1589 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1590 ? "tr"
3280af22 1591 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1592 ? "s"
1593 : "qq")));
79072805 1594 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1595 } else
8990e307 1596 SvREFCNT_dec(sv);
79072805
LW
1597 return s;
1598}
1599
ffb4593c
NT
1600/* S_intuit_more
1601 * Returns TRUE if there's more to the expression (e.g., a subscript),
1602 * FALSE otherwise.
ffb4593c
NT
1603 *
1604 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1605 *
1606 * ->[ and ->{ return TRUE
1607 * { and [ outside a pattern are always subscripts, so return TRUE
1608 * if we're outside a pattern and it's not { or [, then return FALSE
1609 * if we're in a pattern and the first char is a {
1610 * {4,5} (any digits around the comma) returns FALSE
1611 * if we're in a pattern and the first char is a [
1612 * [] returns FALSE
1613 * [SOMETHING] has a funky algorithm to decide whether it's a
1614 * character class or not. It has to deal with things like
1615 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1616 * anything else returns TRUE
1617 */
1618
9cbb5ea2
GS
1619/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1620
76e3520e 1621STATIC int
cea2e8a9 1622S_intuit_more(pTHX_ register char *s)
79072805 1623{
3280af22 1624 if (PL_lex_brackets)
79072805
LW
1625 return TRUE;
1626 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1627 return TRUE;
1628 if (*s != '{' && *s != '[')
1629 return FALSE;
3280af22 1630 if (!PL_lex_inpat)
79072805
LW
1631 return TRUE;
1632
1633 /* In a pattern, so maybe we have {n,m}. */
1634 if (*s == '{') {
1635 s++;
1636 if (!isDIGIT(*s))
1637 return TRUE;
1638 while (isDIGIT(*s))
1639 s++;
1640 if (*s == ',')
1641 s++;
1642 while (isDIGIT(*s))
1643 s++;
1644 if (*s == '}')
1645 return FALSE;
1646 return TRUE;
1647
1648 }
1649
1650 /* On the other hand, maybe we have a character class */
1651
1652 s++;
1653 if (*s == ']' || *s == '^')
1654 return FALSE;
1655 else {
ffb4593c 1656 /* this is terrifying, and it works */
79072805
LW
1657 int weight = 2; /* let's weigh the evidence */
1658 char seen[256];
f27ffc4a 1659 unsigned char un_char = 255, last_un_char;
93a17b20 1660 char *send = strchr(s,']');
3280af22 1661 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1662
1663 if (!send) /* has to be an expression */
1664 return TRUE;
1665
1666 Zero(seen,256,char);
1667 if (*s == '$')
1668 weight -= 3;
1669 else if (isDIGIT(*s)) {
1670 if (s[1] != ']') {
1671 if (isDIGIT(s[1]) && s[2] == ']')
1672 weight -= 10;
1673 }
1674 else
1675 weight -= 100;
1676 }
1677 for (; s < send; s++) {
1678 last_un_char = un_char;
1679 un_char = (unsigned char)*s;
1680 switch (*s) {
1681 case '@':
1682 case '&':
1683 case '$':
1684 weight -= seen[un_char] * 10;
7e2040f0 1685 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1686 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1687 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1688 weight -= 100;
1689 else
1690 weight -= 10;
1691 }
1692 else if (*s == '$' && s[1] &&
93a17b20
LW
1693 strchr("[#!%*<>()-=",s[1])) {
1694 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1695 weight -= 10;
1696 else
1697 weight -= 1;
1698 }
1699 break;
1700 case '\\':
1701 un_char = 254;
1702 if (s[1]) {
93a17b20 1703 if (strchr("wds]",s[1]))
79072805
LW
1704 weight += 100;
1705 else if (seen['\''] || seen['"'])
1706 weight += 1;
93a17b20 1707 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1708 weight += 40;
1709 else if (isDIGIT(s[1])) {
1710 weight += 40;
1711 while (s[1] && isDIGIT(s[1]))
1712 s++;
1713 }
1714 }
1715 else
1716 weight += 100;
1717 break;
1718 case '-':
1719 if (s[1] == '\\')
1720 weight += 50;
93a17b20 1721 if (strchr("aA01! ",last_un_char))
79072805 1722 weight += 30;
93a17b20 1723 if (strchr("zZ79~",s[1]))
79072805 1724 weight += 30;
f27ffc4a
GS
1725 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1726 weight -= 5; /* cope with negative subscript */
79072805
LW
1727 break;
1728 default:
93a17b20 1729 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1730 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1731 char *d = tmpbuf;
1732 while (isALPHA(*s))
1733 *d++ = *s++;
1734 *d = '\0';
1735 if (keyword(tmpbuf, d - tmpbuf))
1736 weight -= 150;
1737 }
1738 if (un_char == last_un_char + 1)
1739 weight += 5;
1740 weight -= seen[un_char];
1741 break;
1742 }
1743 seen[un_char]++;
1744 }
1745 if (weight >= 0) /* probably a character class */
1746 return FALSE;
1747 }
1748
1749 return TRUE;
1750}
ffed7fef 1751
ffb4593c
NT
1752/*
1753 * S_intuit_method
1754 *
1755 * Does all the checking to disambiguate
1756 * foo bar
1757 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1758 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1759 *
1760 * First argument is the stuff after the first token, e.g. "bar".
1761 *
1762 * Not a method if bar is a filehandle.
1763 * Not a method if foo is a subroutine prototyped to take a filehandle.
1764 * Not a method if it's really "Foo $bar"
1765 * Method if it's "foo $bar"
1766 * Not a method if it's really "print foo $bar"
1767 * Method if it's really "foo package::" (interpreted as package->foo)
1768 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1769 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1770 * =>
1771 */
1772
76e3520e 1773STATIC int
cea2e8a9 1774S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1775{
1776 char *s = start + (*start == '$');
3280af22 1777 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1778 STRLEN len;
1779 GV* indirgv;
1780
1781 if (gv) {
b6c543e3 1782 CV *cv;
a0d0e21e
LW
1783 if (GvIO(gv))
1784 return 0;
b6c543e3
IZ
1785 if ((cv = GvCVu(gv))) {
1786 char *proto = SvPVX(cv);
1787 if (proto) {
1788 if (*proto == ';')
1789 proto++;
1790 if (*proto == '*')
1791 return 0;
1792 }
1793 } else
a0d0e21e
LW
1794 gv = 0;
1795 }
8903cb82 1796 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1797 /* start is the beginning of the possible filehandle/object,
1798 * and s is the end of it
1799 * tmpbuf is a copy of it
1800 */
1801
a0d0e21e 1802 if (*start == '$') {
3280af22 1803 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1804 return 0;
1805 s = skipspace(s);
3280af22
NIS
1806 PL_bufptr = start;
1807 PL_expect = XREF;
a0d0e21e
LW
1808 return *s == '(' ? FUNCMETH : METHOD;
1809 }
1810 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1811 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1812 len -= 2;
1813 tmpbuf[len] = '\0';
1814 goto bare_package;
1815 }
1816 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1817 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1818 return 0;
1819 /* filehandle or package name makes it a method */
89bfa8cd 1820 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1821 s = skipspace(s);
3280af22 1822 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1823 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1824 bare_package:
3280af22 1825 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1826 newSVpvn(tmpbuf,len));
3280af22
NIS
1827 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1828 PL_expect = XTERM;
a0d0e21e 1829 force_next(WORD);
3280af22 1830 PL_bufptr = s;
a0d0e21e
LW
1831 return *s == '(' ? FUNCMETH : METHOD;
1832 }
1833 }
1834 return 0;
1835}
1836
ffb4593c
NT
1837/*
1838 * S_incl_perldb
1839 * Return a string of Perl code to load the debugger. If PERL5DB
1840 * is set, it will return the contents of that, otherwise a
1841 * compile-time require of perl5db.pl.
1842 */
1843
76e3520e 1844STATIC char*
cea2e8a9 1845S_incl_perldb(pTHX)
a0d0e21e 1846{
3280af22 1847 if (PL_perldb) {
76e3520e 1848 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1849
1850 if (pdb)
1851 return pdb;
61bb5906 1852 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1853 return "BEGIN { require 'perl5db.pl' }";
1854 }
1855 return "";
1856}
1857
1858
16d20bd9
AD
1859/* Encoded script support. filter_add() effectively inserts a
1860 * 'pre-processing' function into the current source input stream.
1861 * Note that the filter function only applies to the current source file
1862 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1863 *
1864 * The datasv parameter (which may be NULL) can be used to pass
1865 * private data to this instance of the filter. The filter function
1866 * can recover the SV using the FILTER_DATA macro and use it to
1867 * store private buffers and state information.
1868 *
1869 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1870 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1871 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1872 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1873 * private use must be set using malloc'd pointers.
1874 */
16d20bd9
AD
1875
1876SV *
864dbfa3 1877Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1878{
f4c556ac
GS
1879 if (!funcp)
1880 return Nullsv;
1881
3280af22
NIS
1882 if (!PL_rsfp_filters)
1883 PL_rsfp_filters = newAV();
16d20bd9 1884 if (!datasv)
8c52afec 1885 datasv = NEWSV(255,0);
16d20bd9 1886 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1887 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1888 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1889 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1890 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1891 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1892 av_unshift(PL_rsfp_filters, 1);
1893 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1894 return(datasv);
1895}
1896
1897
1898/* Delete most recently added instance of this filter function. */
a0d0e21e 1899void
864dbfa3 1900Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1901{
e0c19803 1902 SV *datasv;
f4c556ac 1903 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1904 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1905 return;
1906 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1907 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1908 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1909 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1910 IoANY(datasv) = (void *)NULL;
3280af22 1911 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1912
16d20bd9
AD
1913 return;
1914 }
1915 /* we need to search for the correct entry and clear it */
cea2e8a9 1916 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1917}
1918
1919
1920/* Invoke the n'th filter function for the current rsfp. */
1921I32
864dbfa3 1922Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1923
1924
1925 /* 0 = read one text line */
a0d0e21e 1926{
16d20bd9
AD
1927 filter_t funcp;
1928 SV *datasv = NULL;
e50aee73 1929
3280af22 1930 if (!PL_rsfp_filters)
16d20bd9 1931 return -1;
3280af22 1932 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1933 /* Provide a default input filter to make life easy. */
1934 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1935 DEBUG_P(PerlIO_printf(Perl_debug_log,
1936 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1937 if (maxlen) {
1938 /* Want a block */
1939 int len ;
1940 int old_len = SvCUR(buf_sv) ;
1941
1942 /* ensure buf_sv is large enough */
1943 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1944 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1945 if (PerlIO_error(PL_rsfp))
37120919
AD
1946 return -1; /* error */
1947 else
1948 return 0 ; /* end of file */
1949 }
16d20bd9
AD
1950 SvCUR_set(buf_sv, old_len + len) ;
1951 } else {
1952 /* Want a line */
3280af22
NIS
1953 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1954 if (PerlIO_error(PL_rsfp))
37120919
AD
1955 return -1; /* error */
1956 else
1957 return 0 ; /* end of file */
1958 }
16d20bd9
AD
1959 }
1960 return SvCUR(buf_sv);
1961 }
1962 /* Skip this filter slot if filter has been deleted */
3280af22 1963 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1964 DEBUG_P(PerlIO_printf(Perl_debug_log,
1965 "filter_read %d: skipped (filter deleted)\n",
1966 idx));
16d20bd9
AD
1967 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1968 }
1969 /* Get function pointer hidden within datasv */
4755096e 1970 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
1971 DEBUG_P(PerlIO_printf(Perl_debug_log,
1972 "filter_read %d: via function %p (%s)\n",
1973 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1974 /* Call function. The function is expected to */
1975 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1976 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1977 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1978}
1979
76e3520e 1980STATIC char *
cea2e8a9 1981S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1982{
c39cd008 1983#ifdef PERL_CR_FILTER
3280af22 1984 if (!PL_rsfp_filters) {
c39cd008 1985 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1986 }
1987#endif
3280af22 1988 if (PL_rsfp_filters) {
16d20bd9 1989
55497cff 1990 if (!append)
1991 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
1992 if (FILTER_READ(0, sv, 0) > 0)
1993 return ( SvPVX(sv) ) ;
1994 else
1995 return Nullch ;
1996 }
9d116dd7 1997 else
fd049845 1998 return (sv_gets(sv, fp, append));
a0d0e21e
LW
1999}
2000
01ec43d0
GS
2001STATIC HV *
2002S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2003{
2004 GV *gv;
2005
01ec43d0 2006 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2007 return PL_curstash;
2008
2009 if (len > 2 &&
2010 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2011 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2012 {
2013 return GvHV(gv); /* Foo:: */
def3634b
GS
2014 }
2015
2016 /* use constant CLASS => 'MyClass' */
2017 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2018 SV *sv;
2019 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2020 pkgname = SvPV_nolen(sv);
2021 }
2022 }
2023
2024 return gv_stashpv(pkgname, FALSE);
2025}
a0d0e21e 2026
748a9306
LW
2027#ifdef DEBUGGING
2028 static char* exp_name[] =
09bef843
SB
2029 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2030 "ATTRTERM", "TERMBLOCK"
2031 };
748a9306 2032#endif
463ee0b2 2033
02aa26ce
NT
2034/*
2035 yylex
2036
2037 Works out what to call the token just pulled out of the input
2038 stream. The yacc parser takes care of taking the ops we return and
2039 stitching them into a tree.
2040
2041 Returns:
2042 PRIVATEREF
2043
2044 Structure:
2045 if read an identifier
2046 if we're in a my declaration
2047 croak if they tried to say my($foo::bar)
2048 build the ops for a my() declaration
2049 if it's an access to a my() variable
2050 are we in a sort block?
2051 croak if my($a); $a <=> $b
2052 build ops for access to a my() variable
2053 if in a dq string, and they've said @foo and we can't find @foo
2054 croak
2055 build ops for a bareword
2056 if we already built the token before, use it.
2057*/
2058
dba4d153 2059#ifdef USE_PURE_BISON
bf4acbe4 2060#ifdef __SC__
dba4d153 2061#pragma segment Perl_yylex_r
bf4acbe4 2062#endif
864dbfa3 2063int
dba4d153 2064Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2065{
20141f0e
IRC
2066 int r;
2067
20141f0e
IRC
2068 yylval_pointer[yyactlevel] = lvalp;
2069 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
IRC
2070 yyactlevel++;
2071 if (yyactlevel >= YYMAXLEVEL)
2072 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2073
dba4d153 2074 r = Perl_yylex(aTHX);
20141f0e 2075
20141f0e 2076 yyactlevel--;
20141f0e
IRC
2077
2078 return r;
2079}
dba4d153 2080#endif
20141f0e 2081
dba4d153
JH
2082#ifdef __SC__
2083#pragma segment Perl_yylex
2084#endif
2085
2086int
2087#ifdef USE_PURE_BISON
2088Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
2089#else
2090Perl_yylex(pTHX)
2091#endif
20141f0e 2092{
79072805 2093 register char *s;
378cc40b 2094 register char *d;
79072805 2095 register I32 tmp;
463ee0b2 2096 STRLEN len;
161b471a
NIS
2097 GV *gv = Nullgv;
2098 GV **gvp = 0;
a687059c 2099
02aa26ce 2100 /* check if there's an identifier for us to look at */
3280af22 2101 if (PL_pending_ident) {
02aa26ce 2102 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2103 char pit = PL_pending_ident;
2104 PL_pending_ident = 0;
bbce6d69 2105
607df283
SC
2106 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2107 "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
2108
02aa26ce
NT
2109 /* if we're in a my(), we can't allow dynamics here.
2110 $foo'bar has already been turned into $foo::bar, so
2111 just check for colons.
2112
2113 if it's a legal name, the OP is a PADANY.
2114 */
3280af22 2115 if (PL_in_my) {
77ca0c92 2116 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2117 if (strchr(PL_tokenbuf,':'))
2118 yyerror(Perl_form(aTHX_ "No package name allowed for "
2119 "variable %s in \"our\"",
2120 PL_tokenbuf));
77ca0c92
LW
2121 tmp = pad_allocmy(PL_tokenbuf);
2122 }
2123 else {
2124 if (strchr(PL_tokenbuf,':'))
2125 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2126
77ca0c92
LW
2127 yylval.opval = newOP(OP_PADANY, 0);
2128 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2129 return PRIVATEREF;
2130 }
bbce6d69 2131 }
2132
02aa26ce
NT
2133 /*
2134 build the ops for accesses to a my() variable.
2135
2136 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2137 then used in a comparison. This catches most, but not
2138 all cases. For instance, it catches
2139 sort { my($a); $a <=> $b }
2140 but not
2141 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2142 (although why you'd do that is anyone's guess).
2143 */
2144
3280af22 2145 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2146#ifdef USE_THREADS
54b9620d 2147 /* Check for single character per-thread SVs */
3280af22
NIS
2148 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2149 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2150 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2151 {
2faa37cc 2152 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2153 yylval.opval->op_targ = tmp;
2154 return PRIVATEREF;
2155 }
2156#endif /* USE_THREADS */
3280af22 2157 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2158 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2159 /* might be an "our" variable" */
f472eb5c 2160 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2161 /* build ops for a bareword */
f472eb5c
GS
2162 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2163 sv_catpvn(sym, "::", 2);
2164 sv_catpv(sym, PL_tokenbuf+1);
2165 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2166 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2167 gv_fetchpv(SvPVX(sym),
77ca0c92 2168 (PL_in_eval
f472eb5c
GS
2169 ? (GV_ADDMULTI | GV_ADDINEVAL)
2170 : TRUE
77ca0c92
LW
2171 ),
2172 ((PL_tokenbuf[0] == '$') ? SVt_PV
2173 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2174 : SVt_PVHV));
2175 return WORD;
2176 }
2177
02aa26ce 2178 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2179 if (PL_last_lop_op == OP_SORT &&
2180 PL_tokenbuf[0] == '$' &&
2181 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2182 && !PL_tokenbuf[2])
bbce6d69 2183 {
3280af22
NIS
2184 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2185 d < PL_bufend && *d != '\n';
a863c7d1
MB
2186 d++)
2187 {
2188 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2189 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2190 PL_tokenbuf);
a863c7d1 2191 }
bbce6d69 2192 }
2193 }
bbce6d69 2194
a863c7d1
MB
2195 yylval.opval = newOP(OP_PADANY, 0);
2196 yylval.opval->op_targ = tmp;
2197 return PRIVATEREF;
2198 }
bbce6d69 2199 }
2200
02aa26ce
NT
2201 /*
2202 Whine if they've said @foo in a doublequoted string,
2203 and @foo isn't a variable we can find in the symbol
2204 table.
2205 */
3280af22
NIS
2206 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2207 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2208 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2209 && ckWARN(WARN_AMBIGUOUS))
2210 {
2211 /* Downgraded from fatal to warning 20000522 mjd */
2212 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2213 "Possible unintended interpolation of %s in string",
2214 PL_tokenbuf);
2215 }
bbce6d69 2216 }
2217
02aa26ce 2218 /* build ops for a bareword */
3280af22 2219 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2220 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2221 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2222 ((PL_tokenbuf[0] == '$') ? SVt_PV
2223 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 2224 : SVt_PVHV));
2225 return WORD;
2226 }
2227
02aa26ce
NT
2228 /* no identifier pending identification */
2229
3280af22 2230 switch (PL_lex_state) {
79072805
LW
2231#ifdef COMMENTARY
2232 case LEX_NORMAL: /* Some compilers will produce faster */
2233 case LEX_INTERPNORMAL: /* code if we comment these out. */
2234 break;
2235#endif
2236
09bef843 2237 /* when we've already built the next token, just pull it out of the queue */
79072805 2238 case LEX_KNOWNEXT:
3280af22
NIS
2239 PL_nexttoke--;
2240 yylval = PL_nextval[PL_nexttoke];
2241 if (!PL_nexttoke) {
2242 PL_lex_state = PL_lex_defer;
2243 PL_expect = PL_lex_expect;
2244 PL_lex_defer = LEX_NORMAL;
463ee0b2 2245 }
607df283 2246 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f
RB
2247 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2248 (IV)PL_nexttype[PL_nexttoke]); })
607df283 2249
3280af22 2250 return(PL_nexttype[PL_nexttoke]);
79072805 2251
02aa26ce 2252 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2253 when we get here, PL_bufptr is at the \
02aa26ce 2254 */
79072805
LW
2255 case LEX_INTERPCASEMOD:
2256#ifdef DEBUGGING
3280af22 2257 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2258 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2259#endif
02aa26ce 2260 /* handle \E or end of string */
3280af22 2261 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2262 char oldmod;
02aa26ce
NT
2263
2264 /* if at a \E */
3280af22
NIS
2265 if (PL_lex_casemods) {
2266 oldmod = PL_lex_casestack[--PL_lex_casemods];
2267 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2268
3280af22
NIS
2269 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2270 PL_bufptr += 2;
2271 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2272 }
79072805
LW
2273 return ')';
2274 }
3280af22
NIS
2275 if (PL_bufptr != PL_bufend)
2276 PL_bufptr += 2;
2277 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2278 return yylex();
79072805
LW
2279 }
2280 else {
607df283
SC
2281 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2282 "### Saw case modifier at '%s'\n", PL_bufptr); })
3280af22 2283 s = PL_bufptr + 1;
79072805
LW
2284 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2285 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2286 if (strchr("LU", *s) &&
3280af22 2287 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2288 {
3280af22 2289 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2290 return ')';
2291 }
3280af22
NIS
2292 if (PL_lex_casemods > 10) {
2293 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2294 if (newlb != PL_lex_casestack) {
a0d0e21e 2295 SAVEFREEPV(newlb);
3280af22 2296 PL_lex_casestack = newlb;
a0d0e21e
LW
2297 }
2298 }
3280af22
NIS
2299 PL_lex_casestack[PL_lex_casemods++] = *s;
2300 PL_lex_casestack[PL_lex_casemods] = '\0';
2301 PL_lex_state = LEX_INTERPCONCAT;
2302 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2303 force_next('(');
2304 if (*s == 'l')
3280af22 2305 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2306 else if (*s == 'u')
3280af22 2307 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2308 else if (*s == 'L')
3280af22 2309 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2310 else if (*s == 'U')
3280af22 2311 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2312 else if (*s == 'Q')
3280af22 2313 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2314 else
cea2e8a9 2315 Perl_croak(aTHX_ "panic: yylex");
3280af22 2316 PL_bufptr = s + 1;
79072805 2317 force_next(FUNC);
3280af22
NIS
2318 if (PL_lex_starts) {
2319 s = PL_bufptr;
2320 PL_lex_starts = 0;
79072805
LW
2321 Aop(OP_CONCAT);
2322 }
2323 else
cea2e8a9 2324 return yylex();
79072805
LW
2325 }
2326
55497cff 2327 case LEX_INTERPPUSH:
2328 return sublex_push();
2329
79072805 2330 case LEX_INTERPSTART:
3280af22 2331 if (PL_bufptr == PL_bufend)
79072805 2332 return sublex_done();
607df283
SC
2333 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2334 "### Interpolated variable at '%s'\n", PL_bufptr); })
3280af22
NIS
2335 PL_expect = XTERM;
2336 PL_lex_dojoin = (*PL_bufptr == '@');
2337 PL_lex_state = LEX_INTERPNORMAL;
2338 if (PL_lex_dojoin) {
2339 PL_nextval[PL_nexttoke].ival = 0;
79072805 2340 force_next(',');
554b3eca 2341#ifdef USE_THREADS
533c011a
NIS
2342 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2343 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2344 force_next(PRIVATEREF);
2345#else
a0d0e21e 2346 force_ident("\"", '$');
554b3eca 2347#endif /* USE_THREADS */
3280af22 2348 PL_nextval[PL_nexttoke].ival = 0;
79072805 2349 force_next('$');
3280af22 2350 PL_nextval[PL_nexttoke].ival = 0;
79072805 2351 force_next('(');
3280af22 2352 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2353 force_next(FUNC);
2354 }
3280af22
NIS
2355 if (PL_lex_starts++) {
2356 s = PL_bufptr;
79072805
LW
2357 Aop(OP_CONCAT);
2358 }
cea2e8a9 2359 return yylex();
79072805
LW
2360
2361 case LEX_INTERPENDMAYBE:
3280af22
NIS
2362 if (intuit_more(PL_bufptr)) {
2363 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2364 break;
2365 }
2366 /* FALL THROUGH */
2367
2368 case LEX_INTERPEND:
3280af22
NIS
2369 if (PL_lex_dojoin) {
2370 PL_lex_dojoin = FALSE;
2371 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2372 return ')';
2373 }
43a16006 2374 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2375 && SvEVALED(PL_lex_repl))
43a16006 2376 {
e9fa98b2 2377 if (PL_bufptr != PL_bufend)
cea2e8a9 2378 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2379 PL_lex_repl = Nullsv;
2380 }
79072805
LW
2381 /* FALLTHROUGH */
2382 case LEX_INTERPCONCAT:
2383#ifdef DEBUGGING
3280af22 2384 if (PL_lex_brackets)
cea2e8a9 2385 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2386#endif
3280af22 2387 if (PL_bufptr == PL_bufend)
79072805
LW
2388 return sublex_done();
2389
3280af22
NIS
2390 if (SvIVX(PL_linestr) == '\'') {
2391 SV *sv = newSVsv(PL_linestr);
2392 if (!PL_lex_inpat)
76e3520e 2393 sv = tokeq(sv);
3280af22 2394 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2395 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2396 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2397 s = PL_bufend;
79072805
LW
2398 }
2399 else {
3280af22 2400 s = scan_const(PL_bufptr);
79072805 2401 if (*s == '\\')
3280af22 2402 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2403 else
3280af22 2404 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2405 }
2406
3280af22
NIS
2407 if (s != PL_bufptr) {
2408 PL_nextval[PL_nexttoke] = yylval;
2409 PL_expect = XTERM;
79072805 2410 force_next(THING);
3280af22 2411 if (PL_lex_starts++)
79072805
LW
2412 Aop(OP_CONCAT);
2413 else {
3280af22 2414 PL_bufptr = s;
cea2e8a9 2415 return yylex();
79072805
LW
2416 }
2417 }
2418
cea2e8a9 2419 return yylex();
a0d0e21e 2420 case LEX_FORMLINE:
3280af22
NIS
2421 PL_lex_state = LEX_NORMAL;
2422 s = scan_formline(PL_bufptr);
2423 if (!PL_lex_formbrack)
a0d0e21e
LW
2424 goto rightbracket;
2425 OPERATOR(';');
79072805
LW
2426 }
2427
3280af22
NIS
2428 s = PL_bufptr;
2429 PL_oldoldbufptr = PL_oldbufptr;
2430 PL_oldbufptr = s;
607df283 2431 DEBUG_T( {
bf49b057
GS
2432 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2433 exp_name[PL_expect], s);
79072805 2434 } )
463ee0b2
LW
2435
2436 retry:
378cc40b
LW
2437 switch (*s) {
2438 default:
7e2040f0 2439 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2440 goto keylookup;
cea2e8a9 2441 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2442 case 4:
2443 case 26:
2444 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2445 case 0:
3280af22
NIS
2446 if (!PL_rsfp) {
2447 PL_last_uni = 0;
2448 PL_last_lop = 0;
2449 if (PL_lex_brackets)
d98d5fff 2450 yyerror("Missing right curly or square bracket");
607df283
SC
2451 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2452 "### Tokener got EOF\n");
2453 } )
79072805 2454 TOKEN(0);
463ee0b2 2455 }
3280af22 2456 if (s++ < PL_bufend)
a687059c 2457 goto retry; /* ignore stray nulls */
3280af22
NIS
2458 PL_last_uni = 0;
2459 PL_last_lop = 0;
2460 if (!PL_in_eval && !PL_preambled) {
2461 PL_preambled = TRUE;
2462 sv_setpv(PL_linestr,incl_perldb());
2463 if (SvCUR(PL_linestr))
2464 sv_catpv(PL_linestr,";");
2465 if (PL_preambleav){
2466 while(AvFILLp(PL_preambleav) >= 0) {
2467 SV *tmpsv = av_shift(PL_preambleav);
2468 sv_catsv(PL_linestr, tmpsv);
2469 sv_catpv(PL_linestr, ";");
91b7def8 2470 sv_free(tmpsv);
2471 }
3280af22
NIS
2472 sv_free((SV*)PL_preambleav);
2473 PL_preambleav = NULL;
91b7def8 2474 }
3280af22
NIS
2475 if (PL_minus_n || PL_minus_p) {
2476 sv_catpv(PL_linestr, "LINE: while (<>) {");
2477 if (PL_minus_l)
2478 sv_catpv(PL_linestr,"chomp;");
2479 if (PL_minus_a) {
8fd239a7
CS
2480 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2481 if (gv)
2482 GvIMPORTED_AV_on(gv);
3280af22
NIS
2483 if (PL_minus_F) {
2484 if (strchr("/'\"", *PL_splitstr)
2485 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2486 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2487 else {
2488 char delim;
2489 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2490 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2491 delim = *s;
cea2e8a9 2492 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2493 "q" + (delim == '\''), delim);
3280af22 2494 for (s = PL_splitstr; *s; s++) {
54310121 2495 if (*s == '\\')
3280af22
NIS
2496 sv_catpvn(PL_linestr, "\\", 1);
2497 sv_catpvn(PL_linestr, s, 1);
54310121 2498 }
cea2e8a9 2499 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2500 }
2304df62
AD
2501 }
2502 else
3280af22 2503 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2504 }
79072805 2505 }
3280af22
NIS
2506 sv_catpv(PL_linestr, "\n");
2507 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2508 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2509 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2510 SV *sv = NEWSV(85,0);
2511
2512 sv_upgrade(sv, SVt_PVMG);
3280af22 2513 sv_setsv(sv,PL_linestr);
57843af0 2514 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2515 }
79072805 2516 goto retry;
a687059c 2517 }
e929a76b 2518 do {
226017aa
DD
2519 bool bof = PL_rsfp ? TRUE : FALSE;
2520 if (bof) {
2521#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2522# ifdef __GNU_LIBRARY__
2523# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2524# define FTELL_FOR_PIPE_IS_BROKEN
2525# endif
e3f494f1
JH
2526# else
2527# ifdef __GLIBC__
2528# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2529# define FTELL_FOR_PIPE_IS_BROKEN
2530# endif
2531# endif
226017aa
DD
2532# endif
2533#endif
2534#ifdef FTELL_FOR_PIPE_IS_BROKEN
2535 /* This loses the possibility to detect the bof
2536 * situation on perl -P when the libc5 is being used.
2537 * Workaround? Maybe attach some extra state to PL_rsfp?
2538 */
2539 if (!PL_preprocess)
2540 bof = PerlIO_tell(PL_rsfp) == 0;
2541#else
2542 bof = PerlIO_tell(PL_rsfp) == 0;
2543#endif
2544 }
dea0fc0b
JH
2545 s = filter_gets(PL_linestr, PL_rsfp, 0);
2546 if (s == Nullch) {
e929a76b 2547 fake_eof:
3280af22
NIS
2548 if (PL_rsfp) {
2549 if (PL_preprocess && !PL_in_eval)
2550 (void)PerlProc_pclose(PL_rsfp);
2551 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2552 PerlIO_clearerr(PL_rsfp);
395c3793 2553 else
3280af22
NIS
2554 (void)PerlIO_close(PL_rsfp);
2555 PL_rsfp = Nullfp;
4a9ae47a 2556 PL_doextract = FALSE;
395c3793 2557 }
3280af22
NIS
2558 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2559 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2560 sv_catpv(PL_linestr,";}");
2561 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2562 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2563 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2564 goto retry;
2565 }
3280af22
NIS
2566 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2567 sv_setpv(PL_linestr,"");
79072805 2568 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
dea0fc0b
JH
2569 } else if (bof) {
2570 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2571 s = swallow_bom((U8*)s);
378cc40b 2572 }
3280af22 2573 if (PL_doextract) {
a0d0e21e 2574 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2575 PL_doextract = FALSE;
a0d0e21e
LW
2576
2577 /* Incest with pod. */
2578 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2579 sv_setpv(PL_linestr, "");
2580 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2581 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2582 PL_doextract = FALSE;
a0d0e21e 2583 }
b250498f 2584 }
463ee0b2 2585 incline(s);
3280af22
NIS
2586 } while (PL_doextract);
2587 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2588 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2589 SV *sv = NEWSV(85,0);
a687059c 2590
93a17b20 2591 sv_upgrade(sv, SVt_PVMG);
3280af22 2592 sv_setsv(sv,PL_linestr);
57843af0 2593 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2594 }
3280af22 2595 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2596 if (CopLINE(PL_curcop) == 1) {
3280af22 2597 while (s < PL_bufend && isSPACE(*s))
79072805 2598 s++;
a0d0e21e 2599 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2600 s++;
44a8e56a 2601 d = Nullch;
3280af22 2602 if (!PL_in_eval) {
44a8e56a 2603 if (*s == '#' && *(s+1) == '!')
2604 d = s + 2;
2605#ifdef ALTERNATE_SHEBANG
2606 else {
2607 static char as[] = ALTERNATE_SHEBANG;
2608 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2609 d = s + (sizeof(as) - 1);
2610 }
2611#endif /* ALTERNATE_SHEBANG */
2612 }
2613 if (d) {
b8378b72 2614 char *ipath;
774d564b 2615 char *ipathend;
b8378b72 2616
774d564b 2617 while (isSPACE(*d))
b8378b72
CS
2618 d++;
2619 ipath = d;
774d564b 2620 while (*d && !isSPACE(*d))
2621 d++;
2622 ipathend = d;
2623
2624#ifdef ARG_ZERO_IS_SCRIPT
2625 if (ipathend > ipath) {
2626 /*
2627 * HP-UX (at least) sets argv[0] to the script name,
2628 * which makes $^X incorrect. And Digital UNIX and Linux,
2629 * at least, set argv[0] to the basename of the Perl
2630 * interpreter. So, having found "#!", we'll set it right.
2631 */
2632 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2633 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2634 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2635 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2636 SvSETMAGIC(x);
2637 }
774d564b 2638 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2639 }
774d564b 2640#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2641
2642 /*
2643 * Look for options.
2644 */
748a9306 2645 d = instr(s,"perl -");
84e30d1a 2646 if (!d) {
748a9306 2647 d = instr(s,"perl");
84e30d1a
GS
2648#if defined(DOSISH)
2649 /* avoid getting into infinite loops when shebang
2650 * line contains "Perl" rather than "perl" */
2651 if (!d) {
2652 for (d = ipathend-4; d >= ipath; --d) {
2653 if ((*d == 'p' || *d == 'P')
2654 && !ibcmp(d, "perl", 4))
2655 {
2656 break;
2657 }
2658 }
2659 if (d < ipath)
2660 d = Nullch;
2661 }
2662#endif
2663 }
44a8e56a 2664#ifdef ALTERNATE_SHEBANG
2665 /*
2666 * If the ALTERNATE_SHEBANG on this system starts with a
2667 * character that can be part of a Perl expression, then if
2668 * we see it but not "perl", we're probably looking at the
2669 * start of Perl code, not a request to hand off to some
2670 * other interpreter. Similarly, if "perl" is there, but
2671 * not in the first 'word' of the line, we assume the line
2672 * contains the start of the Perl program.
44a8e56a 2673 */
2674 if (d && *s != '#') {
774d564b 2675 char *c = ipath;
44a8e56a 2676 while (*c && !strchr("; \t\r\n\f\v#", *c))
2677 c++;
2678 if (c < d)
2679 d = Nullch; /* "perl" not in first word; ignore */
2680 else
2681 *s = '#'; /* Don't try to parse shebang line */
2682 }
774d564b 2683#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2684#ifndef MACOS_TRADITIONAL
748a9306 2685 if (!d &&
44a8e56a 2686 *s == '#' &&
774d564b 2687 ipathend > ipath &&
3280af22 2688 !PL_minus_c &&
748a9306 2689 !instr(s,"indir") &&
3280af22 2690 instr(PL_origargv[0],"perl"))
748a9306 2691 {
9f68db38 2692 char **newargv;
9f68db38 2693
774d564b 2694 *ipathend = '\0';
2695 s = ipathend + 1;
3280af22 2696 while (s < PL_bufend && isSPACE(*s))
9f68db38 2697 s++;
3280af22
NIS
2698 if (s < PL_bufend) {
2699 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2700 newargv[1] = s;
3280af22 2701 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2702 s++;
2703 *s = '\0';
3280af22 2704 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2705 }
2706 else
3280af22 2707 newargv = PL_origargv;
774d564b 2708 newargv[0] = ipath;
b4748376 2709 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2710 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2711 }
bf4acbe4 2712#endif
748a9306 2713 if (d) {
3280af22
NIS
2714 U32 oldpdb = PL_perldb;
2715 bool oldn = PL_minus_n;
2716 bool oldp = PL_minus_p;
748a9306
LW
2717
2718 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2719 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2720
2721 if (*d++ == '-') {
8cc95fdb 2722 do {
2723 if (*d == 'M' || *d == 'm') {
2724 char *m = d;
2725 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2726 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2727 (int)(d - m), m);
2728 }
2729 d = moreswitches(d);
2730 } while (d);
155aba94
GS
2731 if ((PERLDB_LINE && !oldpdb) ||
2732 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2733 /* if we have already added "LINE: while (<>) {",
2734 we must not do it again */
748a9306 2735 {
3280af22
NIS
2736 sv_setpv(PL_linestr, "");
2737 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2738 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2739 PL_preambled = FALSE;
84902520 2740 if (PERLDB_LINE)
3280af22 2741 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2742 goto retry;
2743 }
a0d0e21e 2744 }
79072805 2745 }
9f68db38 2746 }
79072805 2747 }
3280af22
NIS
2748 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2749 PL_bufptr = s;
2750 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2751 return yylex();
ae986130 2752 }
378cc40b 2753 goto retry;
4fdae800 2754 case '\r':
6a27c188 2755#ifdef PERL_STRICT_CR
cea2e8a9
GS
2756 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2757 Perl_croak(aTHX_
cc507455 2758 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2759#endif
4fdae800 2760 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2761#ifdef MACOS_TRADITIONAL
2762 case '\312':
2763#endif
378cc40b
LW
2764 s++;
2765 goto retry;
378cc40b 2766 case '#':
e929a76b 2767 case '\n':
3280af22 2768 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2769 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2770 /* handle eval qq[#line 1 "foo"\n ...] */
2771 CopLINE_dec(PL_curcop);
2772 incline(s);
2773 }
3280af22 2774 d = PL_bufend;
a687059c 2775 while (s < d && *s != '\n')
378cc40b 2776 s++;
0f85fab0 2777 if (s < d)
378cc40b 2778 s++;
463ee0b2 2779 incline(s);
3280af22
NIS
2780 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2781 PL_bufptr = s;
2782 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2783 return yylex();
a687059c 2784 }
378cc40b 2785 }
a687059c 2786 else {
378cc40b 2787 *s = '\0';
3280af22 2788 PL_bufend = s;
a687059c 2789 }
378cc40b
LW
2790 goto retry;
2791 case '-':
79072805 2792 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2793 I32 ftst = 0;
2794
378cc40b 2795 s++;
3280af22 2796 PL_bufptr = s;
748a9306
LW
2797 tmp = *s++;
2798
bf4acbe4 2799 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2800 s++;
2801
2802 if (strnEQ(s,"=>",2)) {
3280af22 2803 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
607df283
SC
2804 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2805 "### Saw unary minus before =>, forcing word '%s'\n", s);
2806 } )
748a9306
LW
2807 OPERATOR('-'); /* unary minus */
2808 }
3280af22 2809 PL_last_uni = PL_oldbufptr;
748a9306 2810 switch (tmp) {
e5edeb50
JH
2811 case 'r': ftst = OP_FTEREAD; break;
2812 case 'w': ftst = OP_FTEWRITE; break;
2813 case 'x': ftst = OP_FTEEXEC; break;
2814 case 'o': ftst = OP_FTEOWNED; break;
2815 case 'R': ftst = OP_FTRREAD; break;
2816 case 'W': ftst = OP_FTRWRITE; break;
2817 case 'X': ftst = OP_FTREXEC; break;
2818 case 'O': ftst = OP_FTROWNED; break;
2819 case 'e': ftst = OP_FTIS; break;
2820 case 'z': ftst = OP_FTZERO; break;
2821 case 's': ftst = OP_FTSIZE; break;
2822 case 'f': ftst = OP_FTFILE; break;
2823 case 'd': ftst = OP_FTDIR; break;
2824 case 'l': ftst = OP_FTLINK; break;
2825 case 'p': ftst = OP_FTPIPE; break;
2826 case 'S': ftst = OP_FTSOCK; break;
2827 case 'u': ftst = OP_FTSUID; break;
2828 case 'g': ftst = OP_FTSGID; break;
2829 case 'k': ftst = OP_FTSVTX; break;
2830 case 'b': ftst = OP_FTBLK; break;
2831 case 'c': ftst = OP_FTCHR; break;
2832 case 't': ftst = OP_FTTTY; break;
2833 case 'T': ftst = OP_FTTEXT; break;
2834 case 'B': ftst = OP_FTBINARY; break;
2835 case 'M': case 'A': case 'C':
2836 gv_fetchpv("\024",TRUE, SVt_PV);
2837 switch (tmp) {
2838 case 'M': ftst = OP_FTMTIME; break;
2839 case 'A': ftst = OP_FTATIME; break;
2840 case 'C': ftst = OP_FTCTIME; break;
2841 default: break;
2842 }
2843 break;
378cc40b 2844 default:
378cc40b
LW
2845 break;
2846 }
e5edeb50
JH
2847 if (ftst) {
2848 PL_last_lop_op = ftst;
2849 DEBUG_T( { PerlIO_printf(Perl_debug_log,
2850 "### Saw file test %c\n", ftst);
2851 } )
2852 if (*s == '(' && ckWARN(WARN_AMBIGUOUS))
2853 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2854 "Ambiguous -%c() resolved as a file test",
7b3fae3c 2855 tmp);
e5edeb50
JH
2856 FTST(ftst);
2857 }
2858 else {
2859 /* Assume it was a minus followed by a one-letter named
2860 * subroutine call (or a -bareword), then. */
2861 s -= 2;
2862 }
378cc40b 2863 }
a687059c
LW
2864 tmp = *s++;
2865 if (*s == tmp) {
2866 s++;
3280af22 2867 if (PL_expect == XOPERATOR)
79072805
LW
2868 TERM(POSTDEC);
2869 else
2870 OPERATOR(PREDEC);
2871 }
2872 else if (*s == '>') {
2873 s++;
2874 s = skipspace(s);
7e2040f0 2875 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2876 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2877 TOKEN(ARROW);
79072805 2878 }
748a9306
LW
2879 else if (*s == '$')
2880 OPERATOR(ARROW);
463ee0b2 2881 else
748a9306 2882 TERM(ARROW);
a687059c 2883 }
3280af22 2884 if (PL_expect == XOPERATOR)
79072805
LW
2885 Aop(OP_SUBTRACT);
2886 else {
3280af22 2887 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2888 check_uni();
79072805 2889 OPERATOR('-'); /* unary minus */
2f3197b3 2890 }
79072805 2891
378cc40b 2892 case '+':
a687059c
LW
2893 tmp = *s++;
2894 if (*s == tmp) {
378cc40b 2895 s++;
3280af22 2896 if (PL_expect == XOPERATOR)
79072805
LW
2897 TERM(POSTINC);
2898 else
2899 OPERATOR(PREINC);
378cc40b 2900 }
3280af22 2901 if (PL_expect == XOPERATOR)
79072805
LW
2902 Aop(OP_ADD);
2903 else {
3280af22 2904 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2905 check_uni();
a687059c 2906 OPERATOR('+');
2f3197b3 2907 }
a687059c 2908
378cc40b 2909 case '*':
3280af22
NIS
2910 if (PL_expect != XOPERATOR) {
2911 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2912 PL_expect = XOPERATOR;
2913 force_ident(PL_tokenbuf, '*');
2914 if (!*PL_tokenbuf)
a0d0e21e 2915 PREREF('*');
79072805 2916 TERM('*');
a687059c 2917 }
79072805
LW
2918 s++;
2919 if (*s == '*') {
a687059c 2920 s++;
79072805 2921 PWop(OP_POW);
a687059c 2922 }
79072805
LW
2923 Mop(OP_MULTIPLY);
2924
378cc40b 2925 case '%':
3280af22 2926 if (PL_expect == XOPERATOR) {
bbce6d69 2927 ++s;
2928 Mop(OP_MODULO);
a687059c 2929 }
3280af22
NIS
2930 PL_tokenbuf[0] = '%';
2931 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2932 if (!PL_tokenbuf[1]) {
2933 if (s == PL_bufend)
bbce6d69 2934 yyerror("Final % should be \\% or %name");
2935 PREREF('%');
a687059c 2936 }
3280af22 2937 PL_pending_ident = '%';
bbce6d69 2938 TERM('%');
a687059c 2939
378cc40b 2940 case '^':
79072805 2941 s++;
a0d0e21e 2942 BOop(OP_BIT_XOR);
79072805 2943 case '[':
3280af22 2944 PL_lex_brackets++;
79072805 2945 /* FALL THROUGH */
378cc40b 2946 case '~':
378cc40b 2947 case ',':
378cc40b
LW
2948 tmp = *s++;
2949 OPERATOR(tmp);
a0d0e21e
LW
2950 case ':':
2951 if (s[1] == ':') {
2952 len = 0;
2953 goto just_a_word;
2954 }
2955 s++;
09bef843
SB
2956 switch (PL_expect) {
2957 OP *attrs;
2958 case XOPERATOR:
2959 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2960 break;
2961 PL_bufptr = s; /* update in case we back off */
2962 goto grabattrs;
2963 case XATTRBLOCK:
2964 PL_expect = XBLOCK;
2965 goto grabattrs;
2966 case XATTRTERM:
2967 PL_expect = XTERMBLOCK;
2968 grabattrs:
2969 s = skipspace(s);
2970 attrs = Nullop;
7e2040f0 2971 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2972 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2973 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2974 if (tmp < 0) tmp = -tmp;
2975 switch (tmp) {
2976 case KEY_or:
2977 case KEY_and:
2978 case KEY_for:
2979 case KEY_unless:
2980 case KEY_if:
2981 case KEY_while:
2982 case KEY_until:
2983 goto got_attrs;
2984 default:
2985 break;
2986 }
2987 }
09bef843
SB
2988 if (*d == '(') {
2989 d = scan_str(d,TRUE,TRUE);
2990 if (!d) {
2991 if (PL_lex_stuff) {
2992 SvREFCNT_dec(PL_lex_stuff);
2993 PL_lex_stuff = Nullsv;
2994 }
2995 /* MUST advance bufptr here to avoid bogus
2996 "at end of line" context messages from yyerror().
2997 */
2998 PL_bufptr = s + len;
2999 yyerror("Unterminated attribute parameter in attribute list");
3000 if (attrs)
3001 op_free(attrs);
3002 return 0; /* EOF indicator */
3003 }
3004 }
3005 if (PL_lex_stuff) {
3006 SV *sv = newSVpvn(s, len);
3007 sv_catsv(sv, PL_lex_stuff);
3008 attrs = append_elem(OP_LIST, attrs,
3009 newSVOP(OP_CONST, 0, sv));
3010 SvREFCNT_dec(PL_lex_stuff);
3011 PL_lex_stuff = Nullsv;
3012 }
3013 else {
3014 attrs = append_elem(OP_LIST, attrs,
3015 newSVOP(OP_CONST, 0,
3016 newSVpvn(s, len)));
3017 }
3018 s = skipspace(d);
0120eecf 3019 if (*s == ':' && s[1] != ':')
09bef843 3020 s = skipspace(s+1);
0120eecf
GS
3021 else if (s == d)
3022 break; /* require real whitespace or :'s */
09bef843 3023 }
f9829d6b
GS
3024 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3025 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3026 char q = ((*s == '\'') ? '"' : '\'');
3027 /* If here for an expression, and parsed no attrs, back off. */
3028 if (tmp == '=' && !attrs) {
3029 s = PL_bufptr;
3030 break;
3031 }
3032 /* MUST advance bufptr here to avoid bogus "at end of line"
3033 context messages from yyerror().
3034 */
3035 PL_bufptr = s;
3036 if (!*s)
3037 yyerror("Unterminated attribute list");
3038 else
3039 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3040 q, *s, q));
3041 if (attrs)
3042 op_free(attrs);
3043 OPERATOR(':');
3044 }
f9829d6b 3045 got_attrs:
09bef843
SB
3046 if (attrs) {
3047 PL_nextval[PL_nexttoke].opval = attrs;
3048 force_next(THING);
3049 }
3050 TOKEN(COLONATTR);
3051 }
a0d0e21e 3052 OPERATOR(':');
8990e307
LW
3053 case '(':
3054 s++;
3280af22
NIS
3055 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3056 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3057 else
3280af22 3058 PL_expect = XTERM;
a0d0e21e 3059 TOKEN('(');
378cc40b 3060 case ';':
f4dd75d9 3061 CLINE;
378cc40b
LW
3062 tmp = *s++;
3063 OPERATOR(tmp);
3064 case ')':
378cc40b 3065 tmp = *s++;
16d20bd9
AD
3066 s = skipspace(s);
3067 if (*s == '{')
3068 PREBLOCK(tmp);
378cc40b 3069 TERM(tmp);
79072805
LW
3070 case ']':
3071 s++;
3280af22 3072 if (PL_lex_brackets <= 0)
d98d5fff 3073 yyerror("Unmatched right square bracket");
463ee0b2 3074 else
3280af22
NIS
3075 --PL_lex_brackets;
3076 if (PL_lex_state == LEX_INTERPNORMAL) {
3077 if (PL_lex_brackets == 0) {
a0d0e21e 3078 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3079 PL_lex_state = LEX_INTERPEND;
79072805
LW
3080 }
3081 }
4633a7c4 3082 TERM(']');
79072805
LW
3083 case '{':
3084 leftbracket:
79072805 3085 s++;
3280af22
NIS
3086 if (PL_lex_brackets > 100) {
3087 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3088 if (newlb != PL_lex_brackstack) {
8990e307 3089 SAVEFREEPV(newlb);
3280af22 3090 PL_lex_brackstack = newlb;
8990e307
LW
3091 }
3092 }
3280af22 3093 switch (PL_expect) {
a0d0e21e 3094 case XTERM:
3280af22 3095 if (PL_lex_formbrack) {
a0d0e21e
LW
3096 s--;
3097 PRETERMBLOCK(DO);
3098 }
3280af22
NIS
3099 if (PL_oldoldbufptr == PL_last_lop)
3100 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3101 else
3280af22 3102 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3103 OPERATOR(HASHBRACK);
a0d0e21e 3104 case XOPERATOR:
bf4acbe4 3105 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3106 s++;
44a8e56a 3107 d = s;
3280af22
NIS
3108 PL_tokenbuf[0] = '\0';
3109 if (d < PL_bufend && *d == '-') {
3110 PL_tokenbuf[0] = '-';
44a8e56a 3111 d++;
bf4acbe4 3112 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3113 d++;
3114 }
7e2040f0 3115 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3116 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3117 FALSE, &len);
bf4acbe4 3118 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3119 d++;
3120 if (*d == '}') {
3280af22 3121 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3122 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3123 if (minus)
3124 force_next('-');
748a9306
LW
3125 }
3126 }
3127 /* FALL THROUGH */
09bef843 3128 case XATTRBLOCK:
748a9306 3129 case XBLOCK:
3280af22
NIS
3130 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3131 PL_expect = XSTATE;
a0d0e21e 3132 break;
09bef843 3133 case XATTRTERM:
a0d0e21e 3134 case XTERMBLOCK:
3280af22
NIS
3135 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3136 PL_expect = XSTATE;
a0d0e21e
LW
3137 break;
3138 default: {
3139 char *t;
3280af22
NIS
3140 if (PL_oldoldbufptr == PL_last_lop)
3141 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3142 else
3280af22 3143 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3144 s = skipspace(s);
09ecc4b6 3145 if (*s == '}')
a0d0e21e 3146 OPERATOR(HASHBRACK);
b8a4b1be
GS
3147 /* This hack serves to disambiguate a pair of curlies
3148 * as being a block or an anon hash. Normally, expectation
3149 * determines that, but in cases where we're not in a
3150 * position to expect anything in particular (like inside
3151 * eval"") we have to resolve the ambiguity. This code
3152 * covers the case where the first term in the curlies is a
3153 * quoted string. Most other cases need to be explicitly
3154 * disambiguated by prepending a `+' before the opening
3155 * curly in order to force resolution as an anon hash.
3156 *
3157 * XXX should probably propagate the outer expectation
3158 * into eval"" to rely less on this hack, but that could
3159 * potentially break current behavior of eval"".
3160 * GSAR 97-07-21
3161 */
3162 t = s;
3163 if (*s == '\'' || *s == '"' || *s == '`') {
3164 /* common case: get past first string, handling escapes */
3280af22 3165 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3166 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3167 t++;
3168 t++;
a0d0e21e 3169 }
b8a4b1be 3170 else if (*s == 'q') {
3280af22 3171 if (++t < PL_bufend
b8a4b1be 3172 && (!isALNUM(*t)
3280af22 3173 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3174 && !isALNUM(*t))))
3175 {
b8a4b1be
GS
3176 char *tmps;
3177 char open, close, term;
3178 I32 brackets = 1;
3179
3280af22 3180 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3181 t++;
3182 term = *t;
3183 open = term;
3184 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3185 term = tmps[5];
3186 close = term;
3187 if (open == close)
3280af22
NIS
3188 for (t++; t < PL_bufend; t++) {
3189 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3190 t++;
6d07e5e9 3191 else if (*t == open)
b8a4b1be
GS
3192 break;
3193 }
3194 else
3280af22
NIS
3195 for (t++; t < PL_bufend; t++) {
3196 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3197 t++;
6d07e5e9 3198 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3199 break;
3200 else if (*t == open)
3201 brackets++;
3202 }
3203 }
3204 t++;
a0d0e21e 3205 }
7e2040f0 3206 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3207 t += UTF8SKIP(t);
7e2040f0 3208 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3209 t += UTF8SKIP(t);
a0d0e21e 3210 }
3280af22 3211 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3212 t++;
b8a4b1be
GS
3213 /* if comma follows first term, call it an anon hash */
3214 /* XXX it could be a comma expression with loop modifiers */
3280af22 3215 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3216 || (*t == '=' && t[1] == '>')))
a0d0e21e 3217 OPERATOR(HASHBRACK);
3280af22 3218 if (PL_expect == XREF)
4e4e412b 3219 PL_expect = XTERM;
a0d0e21e 3220 else {
3280af22
NIS
3221 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3222 PL_expect = XSTATE;
a0d0e21e 3223 }
8990e307 3224 }
a0d0e21e 3225 break;
463ee0b2 3226 }
57843af0 3227 yylval.ival = CopLINE(PL_curcop);
79072805 3228 if (isSPACE(*s) || *s == '#')
3280af22 3229 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3230 TOKEN('{');
378cc40b 3231 case '}':
79072805
LW
3232 rightbracket:
3233 s++;
3280af22 3234 if (PL_lex_brackets <= 0)
d98d5fff 3235 yyerror("Unmatched right curly bracket");
463ee0b2 3236 else
3280af22 3237 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3238 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3239 PL_lex_formbrack = 0;
3240 if (PL_lex_state == LEX_INTERPNORMAL) {
3241 if (PL_lex_brackets == 0) {
9059aa12
LW
3242 if (PL_expect & XFAKEBRACK) {
3243 PL_expect &= XENUMMASK;
3280af22
NIS
3244 PL_lex_state = LEX_INTERPEND;
3245 PL_bufptr = s;
cea2e8a9 3246 return yylex(); /* ignore fake brackets */
79072805 3247 }
fa83b5b6 3248 if (*s == '-' && s[1] == '>')
3280af22 3249 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3250 else if (*s != '[' && *s != '{')
3280af22 3251 PL_lex_state = LEX_INTERPEND;
79072805
LW
3252 }
3253 }
9059aa12
LW
3254 if (PL_expect & XFAKEBRACK) {
3255 PL_expect &= XENUMMASK;
3280af22 3256 PL_bufptr = s;
cea2e8a9 3257 return yylex(); /* ignore fake brackets */
748a9306 3258 }
79072805
LW
3259 force_next('}');
3260 TOKEN(';');
378cc40b
LW
3261 case '&':
3262 s++;
3263 tmp = *s++;
3264 if (tmp == '&')
a0d0e21e 3265 AOPERATOR(ANDAND);
378cc40b 3266 s--;
3280af22 3267 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3268 if (ckWARN(WARN_SEMICOLON)
3269 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3270 {
57843af0 3271 CopLINE_dec(PL_curcop);
cea2e8a9 3272 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3273 CopLINE_inc(PL_curcop);
463ee0b2 3274 }
79072805 3275 BAop(OP_BIT_AND);
463ee0b2 3276 }
79072805 3277
3280af22
NIS
3278 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3279 if (*PL_tokenbuf) {
3280 PL_expect = XOPERATOR;
3281 force_ident(PL_tokenbuf, '&');
463ee0b2 3282 }
79072805
LW
3283 else
3284 PREREF('&');
c07a80fd 3285 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3286 TERM('&');
3287
378cc40b
LW
3288 case '|':
3289 s++;
3290 tmp = *s++;
3291 if (tmp == '|')
a0d0e21e 3292 AOPERATOR(OROR);
378cc40b 3293 s--;
79072805 3294 BOop(OP_BIT_OR);
378cc40b
LW
3295 case '=':
3296 s++;
3297 tmp = *s++;
3298 if (tmp == '=')
79072805
LW
3299 Eop(OP_EQ);
3300 if (tmp == '>')
3301 OPERATOR(',');
378cc40b 3302 if (tmp == '~')
79072805 3303 PMop(OP_MATCH);
599cee73 3304 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3305 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3306 s--;
3280af22
NIS
3307 if (PL_expect == XSTATE && isALPHA(tmp) &&
3308 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3309 {
3280af22
NIS
3310 if (PL_in_eval && !PL_rsfp) {
3311 d = PL_bufend;
a5f75d66
AD
3312 while (s < d) {
3313 if (*s++ == '\n') {
3314 incline(s);
3315 if (strnEQ(s,"=cut",4)) {
3316 s = strchr(s,'\n');
3317 if (s)
3318 s++;
3319 else
3320 s = d;
3321 incline(s);
3322 goto retry;
3323 }
3324 }
3325 }
3326 goto retry;
3327 }
3280af22
NIS
3328 s = PL_bufend;
3329 PL_doextract = TRUE;
a0d0e21e
LW
3330 goto retry;
3331 }
3280af22 3332 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3333 char *t;
51882d45 3334#ifdef PERL_STRICT_CR
bf4acbe4 3335 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3336#else
bf4acbe4 3337 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3338#endif
a0d0e21e
LW
3339 if (*t == '\n' || *t == '#') {
3340 s--;
3280af22 3341 PL_expect = XBLOCK;
a0d0e21e
LW
3342 goto leftbracket;
3343 }
79072805 3344 }
a0d0e21e
LW
3345 yylval.ival = 0;
3346 OPERATOR(ASSIGNOP);
378cc40b
LW
3347 case '!':
3348 s++;
3349 tmp = *s++;
3350 if (tmp == '=')
79072805 3351 Eop(OP_NE);
378cc40b 3352 if (tmp == '~')
79072805 3353 PMop(OP_NOT);
378cc40b
LW
3354 s--;
3355 OPERATOR('!');
3356 case '<':
3280af22 3357 if (PL_expect != XOPERATOR) {
93a17b20 3358 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3359 check_uni();
79072805
LW
3360 if (s[1] == '<')
3361 s = scan_heredoc(s);
3362 else
3363 s = scan_inputsymbol(s);
3364 TERM(sublex_start());
378cc40b
LW
3365 }
3366 s++;
3367 tmp = *s++;
3368 if (tmp == '<')
79072805 3369 SHop(OP_LEFT_SHIFT);
395c3793
LW
3370 if (tmp == '=') {
3371 tmp = *s++;
3372 if (tmp == '>')
79072805 3373 Eop(OP_NCMP);
395c3793 3374 s--;
79072805 3375 Rop(OP_LE);
395c3793 3376 }
378cc40b 3377 s--;
79072805 3378 Rop(OP_LT);
378cc40b
LW
3379 case '>':
3380 s++;
3381 tmp = *s++;
3382 if (tmp == '>')
79072805 3383 SHop(OP_RIGHT_SHIFT);
378cc40b 3384 if (tmp == '=')
79072805 3385 Rop(OP_GE);
378cc40b 3386 s--;
79072805 3387 Rop(OP_GT);
378cc40b
LW
3388
3389 case '$':
bbce6d69 3390 CLINE;
3391
3280af22
NIS
3392 if (PL_expect == XOPERATOR) {
3393 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3394 PL_expect = XTERM;
a0d0e21e 3395 depcom();
bbce6d69 3396 return ','; /* grandfather non-comma-format format */
a0d0e21e 3397 }
8990e307 3398 }
a0d0e21e 3399
7e2040f0 3400 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3401 PL_tokenbuf[0] = '@';
376b8730
SM
3402 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3403 sizeof PL_tokenbuf - 1, FALSE);
3404 if (PL_expect == XOPERATOR)
3405 no_op("Array length", s);
3280af22 3406 if (!PL_tokenbuf[1])
a0d0e21e 3407 PREREF(DOLSHARP);
3280af22
NIS
3408 PL_expect = XOPERATOR;
3409 PL_pending_ident = '#';
463ee0b2 3410 TOKEN(DOLSHARP);
79072805 3411 }
bbce6d69 3412
3280af22 3413 PL_tokenbuf[0] = '$';
376b8730
SM
3414 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3415 sizeof PL_tokenbuf - 1, FALSE);
3416 if (PL_expect == XOPERATOR)
3417 no_op("Scalar", s);
3280af22
NIS
3418 if (!PL_tokenbuf[1]) {
3419 if (s == PL_bufend)
bbce6d69 3420 yyerror("Final $ should be \\$ or $name");
3421 PREREF('$');
8990e307 3422 }
a0d0e21e 3423
bbce6d69 3424 /* This kludge not intended to be bulletproof. */
3280af22 3425 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3426 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3427 newSViv(PL_compiling.cop_arybase));
bbce6d69 3428 yylval.opval->op_private = OPpCONST_ARYBASE;
3429 TERM(THING);
3430 }
3431
ff68c719 3432 d = s;
69d2bceb 3433 tmp = (I32)*s;
3280af22 3434 if (PL_lex_state == LEX_NORMAL)
ff68c719 3435 s = skipspace(s);
3436
3280af22 3437 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3438 char *t;
3439 if (*s == '[') {
3280af22 3440 PL_tokenbuf[0] = '@';
599cee73 3441 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3442 for(t = s + 1;
7e2040f0 3443 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3444 t++) ;
a0d0e21e 3445 if (*t++ == ',') {
3280af22
NIS
3446 PL_bufptr = skipspace(PL_bufptr);
3447 while (t < PL_bufend && *t != ']')
bbce6d69 3448 t++;
cea2e8a9 3449 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3450 "Multidimensional syntax %.*s not supported",
3451 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3452 }
3453 }
bbce6d69 3454 }
3455 else if (*s == '{') {
3280af22 3456 PL_tokenbuf[0] = '%';
599cee73 3457 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3458 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3459 {
3280af22 3460 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3461 STRLEN len;
3462 for (t++; isSPACE(*t); t++) ;
7e2040f0 3463 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3464 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3465 for (; isSPACE(*t); t++) ;
864dbfa3 3466 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3467 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3468 "You need to quote \"%s\"", tmpbuf);
748a9306 3469 }
93a17b20
LW
3470 }
3471 }
2f3197b3 3472 }
bbce6d69 3473
3280af22 3474 PL_expect = XOPERATOR;
69d2bceb 3475 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3476 bool islop = (PL_last_lop == PL_oldoldbufptr);
3477 if (!islop || PL_last_lop_op == OP_GREPSTART)
3478 PL_expect = XOPERATOR;
bbce6d69 3479 else if (strchr("$@\"'`q", *s))
3280af22 3480 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3481 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3482 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3483 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3484 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3485 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3486 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3487 /* binary operators exclude handle interpretations */
3488 switch (tmp) {
3489 case -KEY_x:
3490 case -KEY_eq:
3491 case -KEY_ne:
3492 case -KEY_gt:
3493 case -KEY_lt:
3494 case -KEY_ge:
3495 case -KEY_le:
3496 case -KEY_cmp:
3497 break;
3498 default:
3280af22 3499 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3500 break;
3501 }
3502 }
68dc0745 3503 else {
3504 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3505 if (gv && GvCVu(gv))
3280af22 3506 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3507 }
93a17b20 3508 }
bbce6d69 3509 else if (isDIGIT(*s))
3280af22 3510 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3511 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3512 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3513 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3514 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3515 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3516 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3517 }
3280af22 3518 PL_pending_ident = '$';
79072805 3519 TOKEN('$');
378cc40b
LW
3520
3521 case '@':
3280af22 3522 if (PL_expect == XOPERATOR)
bbce6d69 3523 no_op("Array", s);
3280af22
NIS
3524 PL_tokenbuf[0] = '@';
3525 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3526 if (!PL_tokenbuf[1]) {
3527 if (s == PL_bufend)
bbce6d69 3528 yyerror("Final @ should be \\@ or @name");
3529 PREREF('@');
3530 }
3280af22 3531 if (PL_lex_state == LEX_NORMAL)
ff68c719 3532 s = skipspace(s);
3280af22 3533 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3534 if (*s == '{')
3280af22 3535 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3536
3537 /* Warn about @ where they meant $. */
599cee73 3538 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3539 if (*s == '[' || *s == '{') {
3540 char *t = s + 1;
7e2040f0 3541 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3542 t++;
3543 if (*t == '}' || *t == ']') {
3544 t++;
3280af22 3545 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3546 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3547 "Scalar value %.*s better written as $%.*s",
3280af22 3548 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3549 }
93a17b20
LW
3550 }
3551 }
463ee0b2 3552 }
3280af22 3553 PL_pending_ident = '@';
79072805 3554 TERM('@');
378cc40b
LW
3555
3556 case '/': /* may either be division or pattern */
3557 case '?': /* may either be conditional or pattern */
3280af22 3558 if (PL_expect != XOPERATOR) {
c277df42 3559 /* Disable warning on "study /blah/" */
3280af22
NIS
3560 if (PL_oldoldbufptr == PL_last_uni
3561 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3562 || memNE(PL_last_uni, "study", 5)
3563 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3564 check_uni();
8782bef2 3565 s = scan_pat(s,OP_MATCH);
79072805 3566 TERM(sublex_start());
378cc40b
LW
3567 }
3568 tmp = *s++;
a687059c 3569 if (tmp == '/')
79072805 3570 Mop(OP_DIVIDE);
378cc40b
LW
3571 OPERATOR(tmp);
3572
3573 case '.':
51882d45
GS
3574 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3575#ifdef PERL_STRICT_CR
3576 && s[1] == '\n'
3577#else
3578 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3579#endif
3580 && (s == PL_linestart || s[-1] == '\n') )
3581 {
3280af22
NIS
3582 PL_lex_formbrack = 0;
3583 PL_expect = XSTATE;
79072805
LW
3584 goto rightbracket;
3585 }
3280af22 3586 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3587 tmp = *s++;
a687059c
LW
3588 if (*s == tmp) {
3589 s++;