This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [PATCH 5.7.0] OUT keyword for xsubpp
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
3818b22b 3 * Copyright (c) 1991-2000, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
ffb4593c
NT
16 * parser, perly.y.
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
7e2040f0 39/*#define UTF (SvUTF8(PL_linestr) && !(PL_hints & HINT_BYTE))*/
a0ed51b3
LW
40#define UTF (PL_hints & HINT_UTF8)
41
2b92dfce
GS
42/* In variables name $^X, these are the legal values for X.
43 * 1999-02-27 mjd-perl-patch@plover.com */
44#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
45
bf4acbe4
GS
46/* On MacOS, respect nonbreaking spaces */
47#ifdef MACOS_TRADITIONAL
48#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
49#else
50#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
51#endif
52
ffb4593c
NT
53/* LEX_* are values for PL_lex_state, the state of the lexer.
54 * They are arranged oddly so that the guard on the switch statement
79072805
LW
55 * can get by with a single comparison (if the compiler is smart enough).
56 */
57
fb73857a
PP
58/* #define LEX_NOTPARSING 11 is done in perl.h. */
59
55497cff
PP
60#define LEX_NORMAL 10
61#define LEX_INTERPNORMAL 9
62#define LEX_INTERPCASEMOD 8
63#define LEX_INTERPPUSH 7
64#define LEX_INTERPSTART 6
65#define LEX_INTERPEND 5
66#define LEX_INTERPENDMAYBE 4
67#define LEX_INTERPCONCAT 3
68#define LEX_INTERPCONST 2
69#define LEX_FORMLINE 1
70#define LEX_KNOWNEXT 0
79072805 71
79072805
LW
72#ifdef ff_next
73#undef ff_next
d48672a2
LW
74#endif
75
a1a0e61e 76#ifdef USE_PURE_BISON
dba4d153
JH
77# ifndef YYMAXLEVEL
78# define YYMAXLEVEL 100
79# endif
20141f0e
RI
80YYSTYPE* yylval_pointer[YYMAXLEVEL];
81int* yychar_pointer[YYMAXLEVEL];
82int yyactlevel = 0;
22c35a8c
GS
83# undef yylval
84# undef yychar
20141f0e
RI
85# define yylval (*yylval_pointer[yyactlevel])
86# define yychar (*yychar_pointer[yyactlevel])
87# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
88# undef yylex
dba4d153 89# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
90#endif
91
79072805 92#include "keywords.h"
fe14fcc3 93
ffb4593c
NT
94/* CLINE is a macro that ensures PL_copline has a sane value */
95
ae986130
LW
96#ifdef CLINE
97#undef CLINE
98#endif
57843af0 99#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 100
ffb4593c
NT
101/*
102 * Convenience functions to return different tokens and prime the
9cbb5ea2 103 * lexer for the next token. They all take an argument.
ffb4593c
NT
104 *
105 * TOKEN : generic token (used for '(', DOLSHARP, etc)
106 * OPERATOR : generic operator
107 * AOPERATOR : assignment operator
108 * PREBLOCK : beginning the block after an if, while, foreach, ...
109 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
110 * PREREF : *EXPR where EXPR is not a simple identifier
111 * TERM : expression term
112 * LOOPX : loop exiting command (goto, last, dump, etc)
113 * FTST : file test operator
114 * FUN0 : zero-argument function
2d2e263d 115 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
116 * BOop : bitwise or or xor
117 * BAop : bitwise and
118 * SHop : shift operator
119 * PWop : power operator
9cbb5ea2 120 * PMop : pattern-matching operator
ffb4593c
NT
121 * Aop : addition-level operator
122 * Mop : multiplication-level operator
123 * Eop : equality-testing operator
124 * Rop : relational operator <= != gt
125 *
126 * Also see LOP and lop() below.
127 */
128
3280af22
NIS
129#define TOKEN(retval) return (PL_bufptr = s,(int)retval)
130#define OPERATOR(retval) return (PL_expect = XTERM,PL_bufptr = s,(int)retval)
131#define AOPERATOR(retval) return ao((PL_expect = XTERM,PL_bufptr = s,(int)retval))
132#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
133#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
134#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s,(int)retval)
135#define TERM(retval) return (CLINE, PL_expect = XOPERATOR,PL_bufptr = s,(int)retval)
136#define LOOPX(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
137#define FTST(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
138#define FUN0(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
139#define FUN1(f) return(yylval.ival = f,PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
140#define BOop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
141#define BAop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
142#define SHop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
143#define PWop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
144#define PMop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
145#define Aop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
146#define Mop(f) return ao((yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
147#define Eop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
148#define Rop(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 149
a687059c
LW
150/* This bit of chicanery makes a unary function followed by
151 * a parenthesis into a function with one argument, highest precedence.
152 */
2f3197b3 153#define UNI(f) return(yylval.ival = f, \
3280af22
NIS
154 PL_expect = XTERM, \
155 PL_bufptr = s, \
156 PL_last_uni = PL_oldbufptr, \
157 PL_last_lop_op = f, \
a687059c
LW
158 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
159
79072805 160#define UNIBRACK(f) return(yylval.ival = f, \
3280af22
NIS
161 PL_bufptr = s, \
162 PL_last_uni = PL_oldbufptr, \
79072805
LW
163 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
164
9f68db38 165/* grandfather return to old style */
3280af22 166#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 167
ffb4593c
NT
168/*
169 * S_ao
170 *
171 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
172 * into an OP_ANDASSIGN or OP_ORASSIGN
173 */
174
76e3520e 175STATIC int
cea2e8a9 176S_ao(pTHX_ int toketype)
a0d0e21e 177{
3280af22
NIS
178 if (*PL_bufptr == '=') {
179 PL_bufptr++;
a0d0e21e
LW
180 if (toketype == ANDAND)
181 yylval.ival = OP_ANDASSIGN;
182 else if (toketype == OROR)
183 yylval.ival = OP_ORASSIGN;
184 toketype = ASSIGNOP;
185 }
186 return toketype;
187}
188
ffb4593c
NT
189/*
190 * S_no_op
191 * When Perl expects an operator and finds something else, no_op
192 * prints the warning. It always prints "<something> found where
193 * operator expected. It prints "Missing semicolon on previous line?"
194 * if the surprise occurs at the start of the line. "do you need to
195 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
196 * where the compiler doesn't know if foo is a method call or a function.
197 * It prints "Missing operator before end of line" if there's nothing
198 * after the missing operator, or "... before <...>" if there is something
199 * after the missing operator.
200 */
201
76e3520e 202STATIC void
cea2e8a9 203S_no_op(pTHX_ char *what, char *s)
463ee0b2 204{
3280af22
NIS
205 char *oldbp = PL_bufptr;
206 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 207
1189a94a
GS
208 if (!s)
209 s = oldbp;
07c798fb 210 else
1189a94a 211 PL_bufptr = s;
cea2e8a9 212 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 213 if (is_first)
cea2e8a9 214 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 215 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 216 char *t;
7e2040f0 217 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 218 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 219 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 220 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 221 }
07c798fb
HS
222 else {
223 assert(s >= oldbp);
cea2e8a9 224 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 225 }
3280af22 226 PL_bufptr = oldbp;
8990e307
LW
227}
228
ffb4593c
NT
229/*
230 * S_missingterm
231 * Complain about missing quote/regexp/heredoc terminator.
232 * If it's called with (char *)NULL then it cauterizes the line buffer.
233 * If we're in a delimited string and the delimiter is a control
234 * character, it's reformatted into a two-char sequence like ^C.
235 * This is fatal.
236 */
237
76e3520e 238STATIC void
cea2e8a9 239S_missingterm(pTHX_ char *s)
8990e307
LW
240{
241 char tmpbuf[3];
242 char q;
243 if (s) {
244 char *nl = strrchr(s,'\n');
d2719217 245 if (nl)
8990e307
LW
246 *nl = '\0';
247 }
9d116dd7
JH
248 else if (
249#ifdef EBCDIC
250 iscntrl(PL_multi_close)
251#else
252 PL_multi_close < 32 || PL_multi_close == 127
253#endif
254 ) {
8990e307 255 *tmpbuf = '^';
3280af22 256 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
257 s = "\\n";
258 tmpbuf[2] = '\0';
259 s = tmpbuf;
260 }
261 else {
3280af22 262 *tmpbuf = PL_multi_close;
8990e307
LW
263 tmpbuf[1] = '\0';
264 s = tmpbuf;
265 }
266 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 267 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 268}
79072805 269
ffb4593c
NT
270/*
271 * Perl_deprecate
ffb4593c
NT
272 */
273
79072805 274void
864dbfa3 275Perl_deprecate(pTHX_ char *s)
a0d0e21e 276{
d008e5eb 277 dTHR;
599cee73 278 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 279 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
280}
281
ffb4593c
NT
282/*
283 * depcom
9cbb5ea2 284 * Deprecate a comma-less variable list.
ffb4593c
NT
285 */
286
76e3520e 287STATIC void
cea2e8a9 288S_depcom(pTHX)
a0d0e21e
LW
289{
290 deprecate("comma-less variable list");
291}
292
ffb4593c 293/*
9cbb5ea2
GS
294 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
295 * utf16-to-utf8-reversed.
ffb4593c
NT
296 */
297
c39cd008
GS
298#ifdef PERL_CR_FILTER
299static void
300strip_return(SV *sv)
301{
302 register char *s = SvPVX(sv);
303 register char *e = s + SvCUR(sv);
304 /* outer loop optimized to do nothing if there are no CR-LFs */
305 while (s < e) {
306 if (*s++ == '\r' && *s == '\n') {
307 /* hit a CR-LF, need to copy the rest */
308 register char *d = s - 1;
309 *d++ = *s++;
310 while (s < e) {
311 if (*s == '\r' && s[1] == '\n')
312 s++;
313 *d++ = *s++;
314 }
315 SvCUR(sv) -= s - d;
316 return;
317 }
318 }
319}
a868473f 320
76e3520e 321STATIC I32
c39cd008 322S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 323{
c39cd008
GS
324 I32 count = FILTER_READ(idx+1, sv, maxlen);
325 if (count > 0 && !maxlen)
326 strip_return(sv);
327 return count;
a868473f
NIS
328}
329#endif
330
ffb4593c
NT
331/*
332 * Perl_lex_start
9cbb5ea2
GS
333 * Initialize variables. Uses the Perl save_stack to save its state (for
334 * recursive calls to the parser).
ffb4593c
NT
335 */
336
a0d0e21e 337void
864dbfa3 338Perl_lex_start(pTHX_ SV *line)
79072805 339{
0f15f207 340 dTHR;
8990e307
LW
341 char *s;
342 STRLEN len;
343
3280af22
NIS
344 SAVEI32(PL_lex_dojoin);
345 SAVEI32(PL_lex_brackets);
3280af22
NIS
346 SAVEI32(PL_lex_casemods);
347 SAVEI32(PL_lex_starts);
348 SAVEI32(PL_lex_state);
7766f137 349 SAVEVPTR(PL_lex_inpat);
3280af22 350 SAVEI32(PL_lex_inwhat);
18b09519
GS
351 if (PL_lex_state == LEX_KNOWNEXT) {
352 I32 toke = PL_nexttoke;
353 while (--toke >= 0) {
354 SAVEI32(PL_nexttype[toke]);
355 SAVEVPTR(PL_nextval[toke]);
356 }
357 SAVEI32(PL_nexttoke);
18b09519 358 }
57843af0 359 SAVECOPLINE(PL_curcop);
3280af22
NIS
360 SAVEPPTR(PL_bufptr);
361 SAVEPPTR(PL_bufend);
362 SAVEPPTR(PL_oldbufptr);
363 SAVEPPTR(PL_oldoldbufptr);
364 SAVEPPTR(PL_linestart);
365 SAVESPTR(PL_linestr);
366 SAVEPPTR(PL_lex_brackstack);
367 SAVEPPTR(PL_lex_casestack);
c76ac1ee 368 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
369 SAVESPTR(PL_lex_stuff);
370 SAVEI32(PL_lex_defer);
09bef843 371 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 372 SAVESPTR(PL_lex_repl);
bebdddfc
GS
373 SAVEINT(PL_expect);
374 SAVEINT(PL_lex_expect);
3280af22
NIS
375
376 PL_lex_state = LEX_NORMAL;
377 PL_lex_defer = 0;
378 PL_expect = XSTATE;
379 PL_lex_brackets = 0;
3280af22
NIS
380 New(899, PL_lex_brackstack, 120, char);
381 New(899, PL_lex_casestack, 12, char);
382 SAVEFREEPV(PL_lex_brackstack);
383 SAVEFREEPV(PL_lex_casestack);
384 PL_lex_casemods = 0;
385 *PL_lex_casestack = '\0';
386 PL_lex_dojoin = 0;
387 PL_lex_starts = 0;
388 PL_lex_stuff = Nullsv;
389 PL_lex_repl = Nullsv;
390 PL_lex_inpat = 0;
76be56bc 391 PL_nexttoke = 0;
3280af22 392 PL_lex_inwhat = 0;
09bef843 393 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
394 PL_linestr = line;
395 if (SvREADONLY(PL_linestr))
396 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
397 s = SvPV(PL_linestr, len);
8990e307 398 if (len && s[len-1] != ';') {
3280af22
NIS
399 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
400 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
401 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 402 }
3280af22
NIS
403 SvTEMP_off(PL_linestr);
404 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
405 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
406 SvREFCNT_dec(PL_rs);
79cb57f6 407 PL_rs = newSVpvn("\n", 1);
3280af22 408 PL_rsfp = 0;
79072805 409}
a687059c 410
ffb4593c
NT
411/*
412 * Perl_lex_end
9cbb5ea2
GS
413 * Finalizer for lexing operations. Must be called when the parser is
414 * done with the lexer.
ffb4593c
NT
415 */
416
463ee0b2 417void
864dbfa3 418Perl_lex_end(pTHX)
463ee0b2 419{
3280af22 420 PL_doextract = FALSE;
463ee0b2
LW
421}
422
ffb4593c
NT
423/*
424 * S_incline
425 * This subroutine has nothing to do with tilting, whether at windmills
426 * or pinball tables. Its name is short for "increment line". It
57843af0 427 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 428 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
429 * # line 500 "foo.pm"
430 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
431 */
432
76e3520e 433STATIC void
cea2e8a9 434S_incline(pTHX_ char *s)
463ee0b2 435{
0f15f207 436 dTHR;
463ee0b2
LW
437 char *t;
438 char *n;
73659bf1 439 char *e;
463ee0b2 440 char ch;
463ee0b2 441
57843af0 442 CopLINE_inc(PL_curcop);
463ee0b2
LW
443 if (*s++ != '#')
444 return;
bf4acbe4 445 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
446 if (strnEQ(s, "line", 4))
447 s += 4;
448 else
449 return;
450 if (*s == ' ' || *s == '\t')
451 s++;
452 else
453 return;
bf4acbe4 454 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
455 if (!isDIGIT(*s))
456 return;
457 n = s;
458 while (isDIGIT(*s))
459 s++;
bf4acbe4 460 while (SPACE_OR_TAB(*s))
463ee0b2 461 s++;
73659bf1 462 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 463 s++;
73659bf1
GS
464 e = t + 1;
465 }
463ee0b2 466 else {
463ee0b2 467 for (t = s; !isSPACE(*t); t++) ;
73659bf1 468 e = t;
463ee0b2 469 }
bf4acbe4 470 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
471 e++;
472 if (*e != '\n' && *e != '\0')
473 return; /* false alarm */
474
463ee0b2
LW
475 ch = *t;
476 *t = '\0';
f4dd75d9
GS
477 if (t - s > 0) {
478#ifdef USE_ITHREADS
479 Safefree(CopFILE(PL_curcop));
480#else
481 SvREFCNT_dec(CopFILEGV(PL_curcop));
482#endif
57843af0 483 CopFILE_set(PL_curcop, s);
f4dd75d9 484 }
463ee0b2 485 *t = ch;
57843af0 486 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
487}
488
ffb4593c
NT
489/*
490 * S_skipspace
491 * Called to gobble the appropriate amount and type of whitespace.
492 * Skips comments as well.
493 */
494
76e3520e 495STATIC char *
cea2e8a9 496S_skipspace(pTHX_ register char *s)
a687059c 497{
11343788 498 dTHR;
3280af22 499 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 500 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
501 s++;
502 return s;
503 }
504 for (;;) {
fd049845 505 STRLEN prevlen;
09bef843
SB
506 SSize_t oldprevlen, oldoldprevlen;
507 SSize_t oldloplen, oldunilen;
60e6418e
GS
508 while (s < PL_bufend && isSPACE(*s)) {
509 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
510 incline(s);
511 }
ffb4593c
NT
512
513 /* comment */
3280af22
NIS
514 if (s < PL_bufend && *s == '#') {
515 while (s < PL_bufend && *s != '\n')
463ee0b2 516 s++;
60e6418e 517 if (s < PL_bufend) {
463ee0b2 518 s++;
60e6418e
GS
519 if (PL_in_eval && !PL_rsfp) {
520 incline(s);
521 continue;
522 }
523 }
463ee0b2 524 }
ffb4593c
NT
525
526 /* only continue to recharge the buffer if we're at the end
527 * of the buffer, we're not reading from a source filter, and
528 * we're in normal lexing mode
529 */
09bef843
SB
530 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
531 PL_lex_state == LEX_FORMLINE)
463ee0b2 532 return s;
ffb4593c
NT
533
534 /* try to recharge the buffer */
9cbb5ea2
GS
535 if ((s = filter_gets(PL_linestr, PL_rsfp,
536 (prevlen = SvCUR(PL_linestr)))) == Nullch)
537 {
538 /* end of file. Add on the -p or -n magic */
3280af22
NIS
539 if (PL_minus_n || PL_minus_p) {
540 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
541 ";}continue{print or die qq(-p destination: $!\\n)" :
542 "");
3280af22
NIS
543 sv_catpv(PL_linestr,";}");
544 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
545 }
546 else
3280af22 547 sv_setpv(PL_linestr,";");
ffb4593c
NT
548
549 /* reset variables for next time we lex */
9cbb5ea2
GS
550 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
551 = SvPVX(PL_linestr);
3280af22 552 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
ffb4593c
NT
553
554 /* Close the filehandle. Could be from -P preprocessor,
555 * STDIN, or a regular file. If we were reading code from
556 * STDIN (because the commandline held no -e or filename)
557 * then we don't close it, we reset it so the code can
558 * read from STDIN too.
559 */
560
3280af22
NIS
561 if (PL_preprocess && !PL_in_eval)
562 (void)PerlProc_pclose(PL_rsfp);
563 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
564 PerlIO_clearerr(PL_rsfp);
8990e307 565 else
3280af22
NIS
566 (void)PerlIO_close(PL_rsfp);
567 PL_rsfp = Nullfp;
463ee0b2
LW
568 return s;
569 }
ffb4593c
NT
570
571 /* not at end of file, so we only read another line */
09bef843
SB
572 /* make corresponding updates to old pointers, for yyerror() */
573 oldprevlen = PL_oldbufptr - PL_bufend;
574 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
575 if (PL_last_uni)
576 oldunilen = PL_last_uni - PL_bufend;
577 if (PL_last_lop)
578 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
579 PL_linestart = PL_bufptr = s + prevlen;
580 PL_bufend = s + SvCUR(PL_linestr);
581 s = PL_bufptr;
09bef843
SB
582 PL_oldbufptr = s + oldprevlen;
583 PL_oldoldbufptr = s + oldoldprevlen;
584 if (PL_last_uni)
585 PL_last_uni = s + oldunilen;
586 if (PL_last_lop)
587 PL_last_lop = s + oldloplen;
a0d0e21e 588 incline(s);
ffb4593c
NT
589
590 /* debugger active and we're not compiling the debugger code,
591 * so store the line into the debugger's array of lines
592 */
3280af22 593 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
594 SV *sv = NEWSV(85,0);
595
596 sv_upgrade(sv, SVt_PVMG);
3280af22 597 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 598 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 599 }
463ee0b2 600 }
a687059c 601}
378cc40b 602
ffb4593c
NT
603/*
604 * S_check_uni
605 * Check the unary operators to ensure there's no ambiguity in how they're
606 * used. An ambiguous piece of code would be:
607 * rand + 5
608 * This doesn't mean rand() + 5. Because rand() is a unary operator,
609 * the +5 is its argument.
610 */
611
76e3520e 612STATIC void
cea2e8a9 613S_check_uni(pTHX)
ba106d47 614{
2f3197b3 615 char *s;
a0d0e21e 616 char *t;
0453d815 617 dTHR;
2f3197b3 618
3280af22 619 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 620 return;
3280af22
NIS
621 while (isSPACE(*PL_last_uni))
622 PL_last_uni++;
7e2040f0 623 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 624 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 625 return;
0453d815 626 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 627 char ch = *s;
0453d815
PM
628 *s = '\0';
629 Perl_warner(aTHX_ WARN_AMBIGUOUS,
630 "Warning: Use of \"%s\" without parens is ambiguous",
631 PL_last_uni);
632 *s = ch;
633 }
2f3197b3
LW
634}
635
ffb4593c
NT
636/* workaround to replace the UNI() macro with a function. Only the
637 * hints/uts.sh file mentions this. Other comments elsewhere in the
638 * source indicate Microport Unix might need it too.
639 */
640
ffed7fef
LW
641#ifdef CRIPPLED_CC
642
643#undef UNI
ffed7fef 644#define UNI(f) return uni(f,s)
ffed7fef 645
76e3520e 646STATIC int
cea2e8a9 647S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
648{
649 yylval.ival = f;
3280af22
NIS
650 PL_expect = XTERM;
651 PL_bufptr = s;
8f872242
NIS
652 PL_last_uni = PL_oldbufptr;
653 PL_last_lop_op = f;
ffed7fef
LW
654 if (*s == '(')
655 return FUNC1;
656 s = skipspace(s);
657 if (*s == '(')
658 return FUNC1;
659 else
660 return UNIOP;
661}
662
a0d0e21e
LW
663#endif /* CRIPPLED_CC */
664
ffb4593c
NT
665/*
666 * LOP : macro to build a list operator. Its behaviour has been replaced
667 * with a subroutine, S_lop() for which LOP is just another name.
668 */
669
a0d0e21e
LW
670#define LOP(f,x) return lop(f,x,s)
671
ffb4593c
NT
672/*
673 * S_lop
674 * Build a list operator (or something that might be one). The rules:
675 * - if we have a next token, then it's a list operator [why?]
676 * - if the next thing is an opening paren, then it's a function
677 * - else it's a list operator
678 */
679
76e3520e 680STATIC I32
a0be28da 681S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 682{
0f15f207 683 dTHR;
79072805 684 yylval.ival = f;
35c8bce7 685 CLINE;
3280af22
NIS
686 PL_expect = x;
687 PL_bufptr = s;
688 PL_last_lop = PL_oldbufptr;
689 PL_last_lop_op = f;
690 if (PL_nexttoke)
a0d0e21e 691 return LSTOP;
79072805
LW
692 if (*s == '(')
693 return FUNC;
694 s = skipspace(s);
695 if (*s == '(')
696 return FUNC;
697 else
698 return LSTOP;
699}
700
ffb4593c
NT
701/*
702 * S_force_next
9cbb5ea2 703 * When the lexer realizes it knows the next token (for instance,
ffb4593c 704 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
705 * to know what token to return the next time the lexer is called. Caller
706 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
707 * handles the token correctly.
ffb4593c
NT
708 */
709
76e3520e 710STATIC void
cea2e8a9 711S_force_next(pTHX_ I32 type)
79072805 712{
3280af22
NIS
713 PL_nexttype[PL_nexttoke] = type;
714 PL_nexttoke++;
715 if (PL_lex_state != LEX_KNOWNEXT) {
716 PL_lex_defer = PL_lex_state;
717 PL_lex_expect = PL_expect;
718 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
719 }
720}
721
ffb4593c
NT
722/*
723 * S_force_word
724 * When the lexer knows the next thing is a word (for instance, it has
725 * just seen -> and it knows that the next char is a word char, then
726 * it calls S_force_word to stick the next word into the PL_next lookahead.
727 *
728 * Arguments:
b1b65b59 729 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
730 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
731 * int check_keyword : if true, Perl checks to make sure the word isn't
732 * a keyword (do this if the word is a label, e.g. goto FOO)
733 * int allow_pack : if true, : characters will also be allowed (require,
734 * use, etc. do this)
9cbb5ea2 735 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
736 */
737
76e3520e 738STATIC char *
cea2e8a9 739S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 740{
463ee0b2
LW
741 register char *s;
742 STRLEN len;
743
744 start = skipspace(start);
745 s = start;
7e2040f0 746 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 747 (allow_pack && *s == ':') ||
15f0808c 748 (allow_initial_tick && *s == '\'') )
a0d0e21e 749 {
3280af22
NIS
750 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
751 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
752 return start;
753 if (token == METHOD) {
754 s = skipspace(s);
755 if (*s == '(')
3280af22 756 PL_expect = XTERM;
463ee0b2 757 else {
3280af22 758 PL_expect = XOPERATOR;
463ee0b2 759 }
79072805 760 }
3280af22
NIS
761 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
762 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
763 force_next(token);
764 }
765 return s;
766}
767
ffb4593c
NT
768/*
769 * S_force_ident
9cbb5ea2 770 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
771 * text only contains the "foo" portion. The first argument is a pointer
772 * to the "foo", and the second argument is the type symbol to prefix.
773 * Forces the next token to be a "WORD".
9cbb5ea2 774 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
775 */
776
76e3520e 777STATIC void
cea2e8a9 778S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
779{
780 if (s && *s) {
11343788 781 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 782 PL_nextval[PL_nexttoke].opval = o;
79072805 783 force_next(WORD);
748a9306 784 if (kind) {
e858de61 785 dTHR; /* just for in_eval */
11343788 786 o->op_private = OPpCONST_ENTERED;
55497cff
PP
787 /* XXX see note in pp_entereval() for why we forgo typo
788 warnings if the symbol must be introduced in an eval.
789 GSAR 96-10-12 */
3280af22 790 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
791 kind == '$' ? SVt_PV :
792 kind == '@' ? SVt_PVAV :
793 kind == '%' ? SVt_PVHV :
794 SVt_PVGV
795 );
748a9306 796 }
79072805
LW
797 }
798}
799
1571675a
GS
800NV
801Perl_str_to_version(pTHX_ SV *sv)
802{
803 NV retval = 0.0;
804 NV nshift = 1.0;
805 STRLEN len;
806 char *start = SvPVx(sv,len);
3aa33fe5 807 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
808 char *end = start + len;
809 while (start < end) {
ba210ebe 810 STRLEN skip;
1571675a
GS
811 UV n;
812 if (utf)
dcad2880 813 n = utf8_to_uv((U8*)start, len, &skip, 0);
1571675a
GS
814 else {
815 n = *(U8*)start;
816 skip = 1;
817 }
818 retval += ((NV)n)/nshift;
819 start += skip;
820 nshift *= 1000;
821 }
822 return retval;
823}
824
ffb4593c
NT
825/*
826 * S_force_version
827 * Forces the next token to be a version number.
828 */
829
76e3520e 830STATIC char *
cea2e8a9 831S_force_version(pTHX_ char *s)
89bfa8cd
PP
832{
833 OP *version = Nullop;
44dcb63b 834 char *d;
89bfa8cd
PP
835
836 s = skipspace(s);
837
44dcb63b 838 d = s;
dd629d5b 839 if (*d == 'v')
44dcb63b 840 d++;
44dcb63b 841 if (isDIGIT(*d)) {
a7cb1f99 842 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 843 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 844 SV *ver;
b73d6f50 845 s = scan_num(s, &yylval);
89bfa8cd 846 version = yylval.opval;
dd629d5b
GS
847 ver = cSVOPx(version)->op_sv;
848 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 849 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
850 SvNVX(ver) = str_to_version(ver);
851 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 852 }
89bfa8cd
PP
853 }
854 }
855
856 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 857 PL_nextval[PL_nexttoke].opval = version;
89bfa8cd
PP
858 force_next(WORD);
859
860 return (s);
861}
862
ffb4593c
NT
863/*
864 * S_tokeq
865 * Tokenize a quoted string passed in as an SV. It finds the next
866 * chunk, up to end of string or a backslash. It may make a new
867 * SV containing that chunk (if HINT_NEW_STRING is on). It also
868 * turns \\ into \.
869 */
870
76e3520e 871STATIC SV *
cea2e8a9 872S_tokeq(pTHX_ SV *sv)
79072805
LW
873{
874 register char *s;
875 register char *send;
876 register char *d;
b3ac6de7
IZ
877 STRLEN len = 0;
878 SV *pv = sv;
79072805
LW
879
880 if (!SvLEN(sv))
b3ac6de7 881 goto finish;
79072805 882
a0d0e21e 883 s = SvPV_force(sv, len);
21a311ee 884 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 885 goto finish;
463ee0b2 886 send = s + len;
79072805
LW
887 while (s < send && *s != '\\')
888 s++;
889 if (s == send)
b3ac6de7 890 goto finish;
79072805 891 d = s;
3280af22 892 if ( PL_hints & HINT_NEW_STRING )
79cb57f6 893 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
79072805
LW
894 while (s < send) {
895 if (*s == '\\') {
a0d0e21e 896 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
897 s++; /* all that, just for this */
898 }
899 *d++ = *s++;
900 }
901 *d = '\0';
463ee0b2 902 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 903 finish:
3280af22 904 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 905 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
906 return sv;
907}
908
ffb4593c
NT
909/*
910 * Now come three functions related to double-quote context,
911 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
912 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
913 * interact with PL_lex_state, and create fake ( ... ) argument lists
914 * to handle functions and concatenation.
915 * They assume that whoever calls them will be setting up a fake
916 * join call, because each subthing puts a ',' after it. This lets
917 * "lower \luPpEr"
918 * become
919 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
920 *
921 * (I'm not sure whether the spurious commas at the end of lcfirst's
922 * arguments and join's arguments are created or not).
923 */
924
925/*
926 * S_sublex_start
927 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
928 *
929 * Pattern matching will set PL_lex_op to the pattern-matching op to
930 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
931 *
932 * OP_CONST and OP_READLINE are easy--just make the new op and return.
933 *
934 * Everything else becomes a FUNC.
935 *
936 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
937 * had an OP_CONST or OP_READLINE). This just sets us up for a
938 * call to S_sublex_push().
939 */
940
76e3520e 941STATIC I32
cea2e8a9 942S_sublex_start(pTHX)
79072805
LW
943{
944 register I32 op_type = yylval.ival;
79072805
LW
945
946 if (op_type == OP_NULL) {
3280af22
NIS
947 yylval.opval = PL_lex_op;
948 PL_lex_op = Nullop;
79072805
LW
949 return THING;
950 }
951 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 952 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
953
954 if (SvTYPE(sv) == SVt_PVIV) {
955 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
956 STRLEN len;
957 char *p;
958 SV *nsv;
959
960 p = SvPV(sv, len);
79cb57f6 961 nsv = newSVpvn(p, len);
01ec43d0
GS
962 if (SvUTF8(sv))
963 SvUTF8_on(nsv);
b3ac6de7
IZ
964 SvREFCNT_dec(sv);
965 sv = nsv;
966 }
967 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 968 PL_lex_stuff = Nullsv;
79072805
LW
969 return THING;
970 }
971
3280af22
NIS
972 PL_sublex_info.super_state = PL_lex_state;
973 PL_sublex_info.sub_inwhat = op_type;
974 PL_sublex_info.sub_op = PL_lex_op;
975 PL_lex_state = LEX_INTERPPUSH;
55497cff 976
3280af22
NIS
977 PL_expect = XTERM;
978 if (PL_lex_op) {
979 yylval.opval = PL_lex_op;
980 PL_lex_op = Nullop;
55497cff
PP
981 return PMFUNC;
982 }
983 else
984 return FUNC;
985}
986
ffb4593c
NT
987/*
988 * S_sublex_push
989 * Create a new scope to save the lexing state. The scope will be
990 * ended in S_sublex_done. Returns a '(', starting the function arguments
991 * to the uc, lc, etc. found before.
992 * Sets PL_lex_state to LEX_INTERPCONCAT.
993 */
994
76e3520e 995STATIC I32
cea2e8a9 996S_sublex_push(pTHX)
55497cff 997{
0f15f207 998 dTHR;
f46d017c 999 ENTER;
55497cff 1000
3280af22
NIS
1001 PL_lex_state = PL_sublex_info.super_state;
1002 SAVEI32(PL_lex_dojoin);
1003 SAVEI32(PL_lex_brackets);
3280af22
NIS
1004 SAVEI32(PL_lex_casemods);
1005 SAVEI32(PL_lex_starts);
1006 SAVEI32(PL_lex_state);
7766f137 1007 SAVEVPTR(PL_lex_inpat);
3280af22 1008 SAVEI32(PL_lex_inwhat);
57843af0 1009 SAVECOPLINE(PL_curcop);
3280af22
NIS
1010 SAVEPPTR(PL_bufptr);
1011 SAVEPPTR(PL_oldbufptr);
1012 SAVEPPTR(PL_oldoldbufptr);
1013 SAVEPPTR(PL_linestart);
1014 SAVESPTR(PL_linestr);
1015 SAVEPPTR(PL_lex_brackstack);
1016 SAVEPPTR(PL_lex_casestack);
1017
1018 PL_linestr = PL_lex_stuff;
1019 PL_lex_stuff = Nullsv;
1020
9cbb5ea2
GS
1021 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1022 = SvPVX(PL_linestr);
3280af22
NIS
1023 PL_bufend += SvCUR(PL_linestr);
1024 SAVEFREESV(PL_linestr);
1025
1026 PL_lex_dojoin = FALSE;
1027 PL_lex_brackets = 0;
3280af22
NIS
1028 New(899, PL_lex_brackstack, 120, char);
1029 New(899, PL_lex_casestack, 12, char);
1030 SAVEFREEPV(PL_lex_brackstack);
1031 SAVEFREEPV(PL_lex_casestack);
1032 PL_lex_casemods = 0;
1033 *PL_lex_casestack = '\0';
1034 PL_lex_starts = 0;
1035 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1036 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1037
1038 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1039 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1040 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1041 else
3280af22 1042 PL_lex_inpat = Nullop;
79072805 1043
55497cff 1044 return '(';
79072805
LW
1045}
1046
ffb4593c
NT
1047/*
1048 * S_sublex_done
1049 * Restores lexer state after a S_sublex_push.
1050 */
1051
76e3520e 1052STATIC I32
cea2e8a9 1053S_sublex_done(pTHX)
79072805 1054{
3280af22
NIS
1055 if (!PL_lex_starts++) {
1056 PL_expect = XOPERATOR;
79cb57f6 1057 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn("",0));
79072805
LW
1058 return THING;
1059 }
1060
3280af22
NIS
1061 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1062 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1063 return yylex();
79072805
LW
1064 }
1065
ffb4593c 1066 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1067 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1068 PL_linestr = PL_lex_repl;
1069 PL_lex_inpat = 0;
1070 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1071 PL_bufend += SvCUR(PL_linestr);
1072 SAVEFREESV(PL_linestr);
1073 PL_lex_dojoin = FALSE;
1074 PL_lex_brackets = 0;
3280af22
NIS
1075 PL_lex_casemods = 0;
1076 *PL_lex_casestack = '\0';
1077 PL_lex_starts = 0;
25da4f38 1078 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1079 PL_lex_state = LEX_INTERPNORMAL;
1080 PL_lex_starts++;
e9fa98b2
HS
1081 /* we don't clear PL_lex_repl here, so that we can check later
1082 whether this is an evalled subst; that means we rely on the
1083 logic to ensure sublex_done() is called again only via the
1084 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1085 }
e9fa98b2 1086 else {
3280af22 1087 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1088 PL_lex_repl = Nullsv;
1089 }
79072805 1090 return ',';
ffed7fef
LW
1091 }
1092 else {
f46d017c 1093 LEAVE;
3280af22
NIS
1094 PL_bufend = SvPVX(PL_linestr);
1095 PL_bufend += SvCUR(PL_linestr);
1096 PL_expect = XOPERATOR;
09bef843 1097 PL_sublex_info.sub_inwhat = 0;
79072805 1098 return ')';
ffed7fef
LW
1099 }
1100}
1101
02aa26ce
NT
1102/*
1103 scan_const
1104
1105 Extracts a pattern, double-quoted string, or transliteration. This
1106 is terrifying code.
1107
3280af22
NIS
1108 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1109 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1110 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1111
9b599b2a
GS
1112 Returns a pointer to the character scanned up to. Iff this is
1113 advanced from the start pointer supplied (ie if anything was
1114 successfully parsed), will leave an OP for the substring scanned
1115 in yylval. Caller must intuit reason for not parsing further
1116 by looking at the next characters herself.
1117
02aa26ce
NT
1118 In patterns:
1119 backslashes:
1120 double-quoted style: \r and \n
1121 regexp special ones: \D \s
1122 constants: \x3
1123 backrefs: \1 (deprecated in substitution replacements)
1124 case and quoting: \U \Q \E
1125 stops on @ and $, but not for $ as tail anchor
1126
1127 In transliterations:
1128 characters are VERY literal, except for - not at the start or end
1129 of the string, which indicates a range. scan_const expands the
1130 range to the full set of intermediate characters.
1131
1132 In double-quoted strings:
1133 backslashes:
1134 double-quoted style: \r and \n
1135 constants: \x3
1136 backrefs: \1 (deprecated)
1137 case and quoting: \U \Q \E
1138 stops on @ and $
1139
1140 scan_const does *not* construct ops to handle interpolated strings.
1141 It stops processing as soon as it finds an embedded $ or @ variable
1142 and leaves it to the caller to work out what's going on.
1143
1144 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1145
1146 $ in pattern could be $foo or could be tail anchor. Assumption:
1147 it's a tail anchor if $ is the last thing in the string, or if it's
1148 followed by one of ")| \n\t"
1149
1150 \1 (backreferences) are turned into $1
1151
1152 The structure of the code is
1153 while (there's a character to process) {
1154 handle transliteration ranges
1155 skip regexp comments
1156 skip # initiated comments in //x patterns
1157 check for embedded @foo
1158 check for embedded scalars
1159 if (backslash) {
1160 leave intact backslashes from leave (below)
1161 deprecate \1 in strings and sub replacements
1162 handle string-changing backslashes \l \U \Q \E, etc.
1163 switch (what was escaped) {
1164 handle - in a transliteration (becomes a literal -)
1165 handle \132 octal characters
1166 handle 0x15 hex characters
1167 handle \cV (control V)
1168 handle printf backslashes (\f, \r, \n, etc)
1169 } (end switch)
1170 } (end if backslash)
1171 } (end while character to read)
1172
1173*/
1174
76e3520e 1175STATIC char *
cea2e8a9 1176S_scan_const(pTHX_ char *start)
79072805 1177{
3280af22 1178 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1179 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1180 register char *s = start; /* start of the constant */
1181 register char *d = SvPVX(sv); /* destination for copies */
1182 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1183 bool didrange = FALSE; /* did we just finish a range? */
89491803 1184 bool has_utf8 = FALSE; /* embedded \x{} */
012bcf8d
GS
1185 UV uv;
1186
ac2262e3 1187 I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
a0ed51b3
LW
1188 ? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
1189 : UTF;
89491803 1190 I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
9cbb5ea2
GS
1191 ? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
1192 OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
a0ed51b3 1193 : UTF;
dff6d3cd 1194 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1195 PL_lex_inpat
4a2d328f 1196 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1197 : "";
79072805
LW
1198
1199 while (s < send || dorange) {
02aa26ce 1200 /* get transliterations out of the way (they're most literal) */
3280af22 1201 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1202 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1203 if (dorange) {
02aa26ce 1204 I32 i; /* current expanded character */
8ada0baa 1205 I32 min; /* first character in range */
02aa26ce
NT
1206 I32 max; /* last character in range */
1207
1208 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1209 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1210 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1211 d -= 2; /* eat the first char and the - */
1212
8ada0baa
JH
1213 min = (U8)*d; /* first char in range */
1214 max = (U8)d[1]; /* last char in range */
1215
c2e66d9e 1216 if (min > max) {
01ec43d0
GS
1217 Perl_croak(aTHX_
1218 "Invalid [] range \"%c-%c\" in transliteration operator",
d2560b70 1219 (char)min, (char)max);
c2e66d9e
GS
1220 }
1221
8ada0baa
JH
1222#ifndef ASCIIish
1223 if ((isLOWER(min) && isLOWER(max)) ||
1224 (isUPPER(min) && isUPPER(max))) {
1225 if (isLOWER(min)) {
1226 for (i = min; i <= max; i++)
1227 if (isLOWER(i))
1228 *d++ = i;
1229 } else {
1230 for (i = min; i <= max; i++)
1231 if (isUPPER(i))
1232 *d++ = i;
1233 }
1234 }
1235 else
1236#endif
1237 for (i = min; i <= max; i++)
1238 *d++ = i;
02aa26ce
NT
1239
1240 /* mark the range as done, and continue */
79072805 1241 dorange = FALSE;
01ec43d0 1242 didrange = TRUE;
79072805 1243 continue;
c2e66d9e 1244 }
02aa26ce
NT
1245
1246 /* range begins (ignore - as first or last char) */
79072805 1247 else if (*s == '-' && s+1 < send && s != start) {
01ec43d0 1248 if (didrange) {
1fafa243 1249 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1250 }
a0ed51b3 1251 if (utf) {
a176fa2a 1252 *d++ = (char)0xff; /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1253 s++;
1254 continue;
1255 }
79072805
LW
1256 dorange = TRUE;
1257 s++;
01ec43d0
GS
1258 }
1259 else {
1260 didrange = FALSE;
1261 }
79072805 1262 }
02aa26ce
NT
1263
1264 /* if we get here, we're not doing a transliteration */
1265
0f5d15d6
IZ
1266 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1267 except for the last char, which will be done separately. */
3280af22 1268 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1269 if (s[2] == '#') {
1270 while (s < send && *s != ')')
1271 *d++ = *s++;
155aba94
GS
1272 }
1273 else if (s[2] == '{' /* This should match regcomp.c */
1274 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1275 {
cc6b7395 1276 I32 count = 1;
0f5d15d6 1277 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1278 char c;
1279
d9f97599
GS
1280 while (count && (c = *regparse)) {
1281 if (c == '\\' && regparse[1])
1282 regparse++;
cc6b7395
IZ
1283 else if (c == '{')
1284 count++;
1285 else if (c == '}')
1286 count--;
d9f97599 1287 regparse++;
cc6b7395 1288 }
5bdf89e7
IZ
1289 if (*regparse != ')') {
1290 regparse--; /* Leave one char for continuation. */
cc6b7395 1291 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1292 }
0f5d15d6 1293 while (s < regparse)
cc6b7395
IZ
1294 *d++ = *s++;
1295 }
748a9306 1296 }
02aa26ce
NT
1297
1298 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1299 else if (*s == '#' && PL_lex_inpat &&
1300 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306
LW
1301 while (s+1 < send && *s != '\n')
1302 *d++ = *s++;
1303 }
02aa26ce 1304
5d1d4326
JH
1305 /* check for embedded arrays
1306 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1307 */
7e2040f0 1308 else if (*s == '@' && s[1]
5d1d4326 1309 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1310 break;
02aa26ce
NT
1311
1312 /* check for embedded scalars. only stop if we're sure it's a
1313 variable.
1314 */
79072805 1315 else if (*s == '$') {
3280af22 1316 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1317 break;
c277df42 1318 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1319 break; /* in regexp, $ might be tail anchor */
1320 }
02aa26ce 1321
a0ed51b3
LW
1322 /* (now in tr/// code again) */
1323
89491803 1324 if (*s & 0x80 && this_utf8) {
ba210ebe
JH
1325 STRLEN len;
1326 UV uv;
1327
dcad2880 1328 uv = utf8_to_uv((U8*)s, send - s, &len, UTF8_CHECK_ONLY);
cc366d4b 1329 if (len == (STRLEN)-1) {
fcc8fcf6 1330 /* Illegal UTF8 (a high-bit byte), make it valid. */
ba210ebe
JH
1331 char *old_pvx = SvPVX(sv);
1332 /* need space for one extra char (NOTE: SvCUR() not set here) */
1333 d = SvGROW(sv, SvLEN(sv) + 1) + (d - old_pvx);
1334 d = (char*)uv_to_utf8((U8*)d, (U8)*s++);
1335 }
1336 else {
1337 while (len--)
1338 *d++ = *s++;
1339 }
89491803 1340 has_utf8 = TRUE;
ba210ebe 1341 continue;
a0ed51b3
LW
1342 }
1343
02aa26ce 1344 /* backslashes */
79072805
LW
1345 if (*s == '\\' && s+1 < send) {
1346 s++;
02aa26ce
NT
1347
1348 /* some backslashes we leave behind */
c9f97d15 1349 if (*leaveit && *s && strchr(leaveit, *s)) {
79072805
LW
1350 *d++ = '\\';
1351 *d++ = *s++;
1352 continue;
1353 }
02aa26ce
NT
1354
1355 /* deprecate \1 in strings and substitution replacements */
3280af22 1356 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1357 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1358 {
d008e5eb 1359 dTHR; /* only for ckWARN */
599cee73 1360 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1361 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1362 *--s = '$';
1363 break;
1364 }
02aa26ce
NT
1365
1366 /* string-change backslash escapes */
3280af22 1367 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1368 --s;
1369 break;
1370 }
02aa26ce
NT
1371
1372 /* if we get here, it's either a quoted -, or a digit */
79072805 1373 switch (*s) {
02aa26ce
NT
1374
1375 /* quoted - in transliterations */
79072805 1376 case '-':
3280af22 1377 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1378 *d++ = *s++;
1379 continue;
1380 }
1381 /* FALL THROUGH */
1382 default:
11b8faa4
JH
1383 {
1384 dTHR;
7e84c16c 1385 if (ckWARN(WARN_MISC) && isALNUM(*s))
e476b1b5 1386 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1387 "Unrecognized escape \\%c passed through",
1388 *s);
1389 /* default action is to copy the quoted character */
1390 *d++ = *s++;
1391 continue;
1392 }
02aa26ce
NT
1393
1394 /* \132 indicates an octal constant */
79072805
LW
1395 case '0': case '1': case '2': case '3':
1396 case '4': case '5': case '6': case '7':
ba210ebe
JH
1397 {
1398 STRLEN len = 0; /* disallow underscores */
1399 uv = (UV)scan_oct(s, 3, &len);
1400 s += len;
1401 }
012bcf8d 1402 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1403
1404 /* \x24 indicates a hex constant */
79072805 1405 case 'x':
a0ed51b3
LW
1406 ++s;
1407 if (*s == '{') {
1408 char* e = strchr(s, '}');
adaeee49 1409 if (!e) {
a0ed51b3 1410 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1411 e = s;
1412 }
89491803 1413 else {
ba210ebe
JH
1414 STRLEN len = 1; /* allow underscores */
1415 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
89491803 1416 has_utf8 = TRUE;
ba210ebe
JH
1417 }
1418 s = e + 1;
a0ed51b3
LW
1419 }
1420 else {
ba210ebe
JH
1421 {
1422 STRLEN len = 0; /* disallow underscores */
1423 uv = (UV)scan_hex(s, 2, &len);
1424 s += len;
1425 }
012bcf8d
GS
1426 }
1427
1428 NUM_ESCAPE_INSERT:
1429 /* Insert oct or hex escaped character.
1430 * There will always enough room in sv since such escapes will
1431 * be longer than any utf8 sequence they can end up as
1432 */
89491803
SC
1433 if (uv > 127 || has_utf8) {
1434 if (!this_utf8 && !has_utf8 && uv > 255) {
012bcf8d
GS
1435 /* might need to recode whatever we have accumulated so far
1436 * if it contains any hibit chars
1437 */
1438 int hicount = 0;
1439 char *c;
1440 for (c = SvPVX(sv); c < d; c++) {
1441 if (*c & 0x80)
1442 hicount++;
1443 }
1444 if (hicount) {
1445 char *old_pvx = SvPVX(sv);
1446 char *src, *dst;
1447 d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
1448
1449 src = d - 1;
1450 d += hicount;
1451 dst = d - 1;
1452
1453 while (src < dst) {
1454 if (*src & 0x80) {
1455 dst--;
1456 uv_to_utf8((U8*)dst, (U8)*src--);
1457 dst--;
1458 }
1459 else {
1460 *dst-- = *src--;
1461 }
1462 }
1463 }
1464 }
1465
89491803 1466 if (has_utf8 || uv > 255) {
012bcf8d 1467 d = (char*)uv_to_utf8((U8*)d, uv);
89491803 1468 this_utf8 = TRUE;
012bcf8d 1469 }
a0ed51b3 1470 else {
012bcf8d 1471 *d++ = (char)uv;
a0ed51b3 1472 }
012bcf8d
GS
1473 }
1474 else {
1475 *d++ = (char)uv;
a0ed51b3 1476 }
79072805 1477 continue;
02aa26ce 1478
4a2d328f
IZ
1479 /* \N{latin small letter a} is a named character */
1480 case 'N':
423cee85
JH
1481 ++s;
1482 if (*s == '{') {
1483 char* e = strchr(s, '}');
155aba94 1484 SV *res;
423cee85
JH
1485 STRLEN len;
1486 char *str;
423cee85
JH
1487
1488 if (!e) {
5777a3f7 1489 yyerror("Missing right brace on \\N{}");
423cee85
JH
1490 e = s - 1;
1491 goto cont_scan;
1492 }
1493 res = newSVpvn(s + 1, e - s - 1);
1494 res = new_constant( Nullch, 0, "charnames",
5777a3f7 1495 res, Nullsv, "\\N{...}" );
423cee85 1496 str = SvPV(res,len);
89491803 1497 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1498 char *ostart = SvPVX(sv);
1499 SvCUR_set(sv, d - ostart);
1500 SvPOK_on(sv);
e4f3eed8 1501 *d = '\0';
f08d6ad9 1502 sv_utf8_upgrade(sv);
d2f449dd
SB
1503 /* this just broke our allocation above... */
1504 SvGROW(sv, send - start);
f08d6ad9 1505 d = SvPVX(sv) + SvCUR(sv);
89491803 1506 has_utf8 = TRUE;
f08d6ad9 1507 }
423cee85
JH
1508 if (len > e - s + 4) {
1509 char *odest = SvPVX(sv);
1510
1511 SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
1512 d = SvPVX(sv) + (d - odest);
1513 }
1514 Copy(str, d, len, char);
1515 d += len;
1516 SvREFCNT_dec(res);
1517 cont_scan:
1518 s = e + 1;
1519 }
1520 else
5777a3f7 1521 yyerror("Missing braces on \\N{}");
423cee85
JH
1522 continue;
1523
02aa26ce 1524 /* \c is a control character */
79072805
LW
1525 case 'c':
1526 s++;
9d116dd7
JH
1527#ifdef EBCDIC
1528 *d = *s++;
1529 if (isLOWER(*d))
1530 *d = toUPPER(*d);
774a9426
GS
1531 *d = toCTRL(*d);
1532 d++;
9d116dd7 1533#else
ba210ebe
JH
1534 {
1535 U8 c = *s++;
1536 *d++ = toCTRL(c);
1537 }
9d116dd7 1538#endif
79072805 1539 continue;
02aa26ce
NT
1540
1541 /* printf-style backslashes, formfeeds, newlines, etc */
79072805
LW
1542 case 'b':
1543 *d++ = '\b';
1544 break;
1545 case 'n':
1546 *d++ = '\n';
1547 break;
1548 case 'r':
1549 *d++ = '\r';
1550 break;
1551 case 'f':
1552 *d++ = '\f';
1553 break;
1554 case 't':
1555 *d++ = '\t';
1556 break;
34a3fe2a
PP
1557#ifdef EBCDIC
1558 case 'e':
1559 *d++ = '\047'; /* CP 1047 */
1560 break;
1561 case 'a':
1562 *d++ = '\057'; /* CP 1047 */
1563 break;
1564#else
79072805
LW
1565 case 'e':
1566 *d++ = '\033';
1567 break;
1568 case 'a':
1569 *d++ = '\007';
1570 break;
34a3fe2a 1571#endif
02aa26ce
NT
1572 } /* end switch */
1573
79072805
LW
1574 s++;
1575 continue;
02aa26ce
NT
1576 } /* end if (backslash) */
1577
79072805 1578 *d++ = *s++;
02aa26ce
NT
1579 } /* while loop to process each character */
1580
1581 /* terminate the string and set up the sv */
79072805 1582 *d = '\0';
463ee0b2 1583 SvCUR_set(sv, d - SvPVX(sv));
79072805 1584 SvPOK_on(sv);
89491803 1585 if (has_utf8)
7e2040f0 1586 SvUTF8_on(sv);
79072805 1587
02aa26ce 1588 /* shrink the sv if we allocated more than we used */
79072805
LW
1589 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1590 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1591 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1592 }
02aa26ce 1593
9b599b2a 1594 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1595 if (s > PL_bufptr) {
1596 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
1597 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1598 sv, Nullsv,
3280af22 1599 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1600 ? "tr"
3280af22 1601 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1602 ? "s"
1603 : "qq")));
79072805 1604 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1605 } else
8990e307 1606 SvREFCNT_dec(sv);
79072805
LW
1607 return s;
1608}
1609
ffb4593c
NT
1610/* S_intuit_more
1611 * Returns TRUE if there's more to the expression (e.g., a subscript),
1612 * FALSE otherwise.
ffb4593c
NT
1613 *
1614 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1615 *
1616 * ->[ and ->{ return TRUE
1617 * { and [ outside a pattern are always subscripts, so return TRUE
1618 * if we're outside a pattern and it's not { or [, then return FALSE
1619 * if we're in a pattern and the first char is a {
1620 * {4,5} (any digits around the comma) returns FALSE
1621 * if we're in a pattern and the first char is a [
1622 * [] returns FALSE
1623 * [SOMETHING] has a funky algorithm to decide whether it's a
1624 * character class or not. It has to deal with things like
1625 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1626 * anything else returns TRUE
1627 */
1628
9cbb5ea2
GS
1629/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1630
76e3520e 1631STATIC int
cea2e8a9 1632S_intuit_more(pTHX_ register char *s)
79072805 1633{
3280af22 1634 if (PL_lex_brackets)
79072805
LW
1635 return TRUE;
1636 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1637 return TRUE;
1638 if (*s != '{' && *s != '[')
1639 return FALSE;
3280af22 1640 if (!PL_lex_inpat)
79072805
LW
1641 return TRUE;
1642
1643 /* In a pattern, so maybe we have {n,m}. */
1644 if (*s == '{') {
1645 s++;
1646 if (!isDIGIT(*s))
1647 return TRUE;
1648 while (isDIGIT(*s))
1649 s++;
1650 if (*s == ',')
1651 s++;
1652 while (isDIGIT(*s))
1653 s++;
1654 if (*s == '}')
1655 return FALSE;
1656 return TRUE;
1657
1658 }
1659
1660 /* On the other hand, maybe we have a character class */
1661
1662 s++;
1663 if (*s == ']' || *s == '^')
1664 return FALSE;
1665 else {
ffb4593c 1666 /* this is terrifying, and it works */
79072805
LW
1667 int weight = 2; /* let's weigh the evidence */
1668 char seen[256];
f27ffc4a 1669 unsigned char un_char = 255, last_un_char;
93a17b20 1670 char *send = strchr(s,']');
3280af22 1671 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1672
1673 if (!send) /* has to be an expression */
1674 return TRUE;
1675
1676 Zero(seen,256,char);
1677 if (*s == '$')
1678 weight -= 3;
1679 else if (isDIGIT(*s)) {
1680 if (s[1] != ']') {
1681 if (isDIGIT(s[1]) && s[2] == ']')
1682 weight -= 10;
1683 }
1684 else
1685 weight -= 100;
1686 }
1687 for (; s < send; s++) {
1688 last_un_char = un_char;
1689 un_char = (unsigned char)*s;
1690 switch (*s) {
1691 case '@':
1692 case '&':
1693 case '$':
1694 weight -= seen[un_char] * 10;
7e2040f0 1695 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1696 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1697 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1698 weight -= 100;
1699 else
1700 weight -= 10;
1701 }
1702 else if (*s == '$' && s[1] &&
93a17b20
LW
1703 strchr("[#!%*<>()-=",s[1])) {
1704 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1705 weight -= 10;
1706 else
1707 weight -= 1;
1708 }
1709 break;
1710 case '\\':
1711 un_char = 254;
1712 if (s[1]) {
93a17b20 1713 if (strchr("wds]",s[1]))
79072805
LW
1714 weight += 100;
1715 else if (seen['\''] || seen['"'])
1716 weight += 1;
93a17b20 1717 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1718 weight += 40;
1719 else if (isDIGIT(s[1])) {
1720 weight += 40;
1721 while (s[1] && isDIGIT(s[1]))
1722 s++;
1723 }
1724 }
1725 else
1726 weight += 100;
1727 break;
1728 case '-':
1729 if (s[1] == '\\')
1730 weight += 50;
93a17b20 1731 if (strchr("aA01! ",last_un_char))
79072805 1732 weight += 30;
93a17b20 1733 if (strchr("zZ79~",s[1]))
79072805 1734 weight += 30;
f27ffc4a
GS
1735 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1736 weight -= 5; /* cope with negative subscript */
79072805
LW
1737 break;
1738 default:
93a17b20 1739 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1740 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1741 char *d = tmpbuf;
1742 while (isALPHA(*s))
1743 *d++ = *s++;
1744 *d = '\0';
1745 if (keyword(tmpbuf, d - tmpbuf))
1746 weight -= 150;
1747 }
1748 if (un_char == last_un_char + 1)
1749 weight += 5;
1750 weight -= seen[un_char];
1751 break;
1752 }
1753 seen[un_char]++;
1754 }
1755 if (weight >= 0) /* probably a character class */
1756 return FALSE;
1757 }
1758
1759 return TRUE;
1760}
ffed7fef 1761
ffb4593c
NT
1762/*
1763 * S_intuit_method
1764 *
1765 * Does all the checking to disambiguate
1766 * foo bar
1767 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1768 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1769 *
1770 * First argument is the stuff after the first token, e.g. "bar".
1771 *
1772 * Not a method if bar is a filehandle.
1773 * Not a method if foo is a subroutine prototyped to take a filehandle.
1774 * Not a method if it's really "Foo $bar"
1775 * Method if it's "foo $bar"
1776 * Not a method if it's really "print foo $bar"
1777 * Method if it's really "foo package::" (interpreted as package->foo)
1778 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1779 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1780 * =>
1781 */
1782
76e3520e 1783STATIC int
cea2e8a9 1784S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1785{
1786 char *s = start + (*start == '$');
3280af22 1787 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1788 STRLEN len;
1789 GV* indirgv;
1790
1791 if (gv) {
b6c543e3 1792 CV *cv;
a0d0e21e
LW
1793 if (GvIO(gv))
1794 return 0;
b6c543e3
IZ
1795 if ((cv = GvCVu(gv))) {
1796 char *proto = SvPVX(cv);
1797 if (proto) {
1798 if (*proto == ';')
1799 proto++;
1800 if (*proto == '*')
1801 return 0;
1802 }
1803 } else
a0d0e21e
LW
1804 gv = 0;
1805 }
8903cb82 1806 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1807 /* start is the beginning of the possible filehandle/object,
1808 * and s is the end of it
1809 * tmpbuf is a copy of it
1810 */
1811
a0d0e21e 1812 if (*start == '$') {
3280af22 1813 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1814 return 0;
1815 s = skipspace(s);
3280af22
NIS
1816 PL_bufptr = start;
1817 PL_expect = XREF;
a0d0e21e
LW
1818 return *s == '(' ? FUNCMETH : METHOD;
1819 }
1820 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1821 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1822 len -= 2;
1823 tmpbuf[len] = '\0';
1824 goto bare_package;
1825 }
1826 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1827 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1828 return 0;
1829 /* filehandle or package name makes it a method */
89bfa8cd 1830 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1831 s = skipspace(s);
3280af22 1832 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1833 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1834 bare_package:
3280af22 1835 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1836 newSVpvn(tmpbuf,len));
3280af22
NIS
1837 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1838 PL_expect = XTERM;
a0d0e21e 1839 force_next(WORD);
3280af22 1840 PL_bufptr = s;
a0d0e21e
LW
1841 return *s == '(' ? FUNCMETH : METHOD;
1842 }
1843 }
1844 return 0;
1845}
1846
ffb4593c
NT
1847/*
1848 * S_incl_perldb
1849 * Return a string of Perl code to load the debugger. If PERL5DB
1850 * is set, it will return the contents of that, otherwise a
1851 * compile-time require of perl5db.pl.
1852 */
1853
76e3520e 1854STATIC char*
cea2e8a9 1855S_incl_perldb(pTHX)
a0d0e21e 1856{
3280af22 1857 if (PL_perldb) {
76e3520e 1858 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1859
1860 if (pdb)
1861 return pdb;
61bb5906 1862 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1863 return "BEGIN { require 'perl5db.pl' }";
1864 }
1865 return "";
1866}
1867
1868
16d20bd9
AD
1869/* Encoded script support. filter_add() effectively inserts a
1870 * 'pre-processing' function into the current source input stream.
1871 * Note that the filter function only applies to the current source file
1872 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1873 *
1874 * The datasv parameter (which may be NULL) can be used to pass
1875 * private data to this instance of the filter. The filter function
1876 * can recover the SV using the FILTER_DATA macro and use it to
1877 * store private buffers and state information.
1878 *
1879 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1880 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1881 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1882 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1883 * private use must be set using malloc'd pointers.
1884 */
16d20bd9
AD
1885
1886SV *
864dbfa3 1887Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1888{
f4c556ac
GS
1889 if (!funcp)
1890 return Nullsv;
1891
3280af22
NIS
1892 if (!PL_rsfp_filters)
1893 PL_rsfp_filters = newAV();
16d20bd9 1894 if (!datasv)
8c52afec 1895 datasv = NEWSV(255,0);
16d20bd9 1896 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1897 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1898 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1899 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1900 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1901 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1902 av_unshift(PL_rsfp_filters, 1);
1903 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1904 return(datasv);
1905}
1906
1907
1908/* Delete most recently added instance of this filter function. */
a0d0e21e 1909void
864dbfa3 1910Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1911{
e0c19803 1912 SV *datasv;
f4c556ac 1913 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1914 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1915 return;
1916 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1917 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1918 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1919 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1920 IoANY(datasv) = (void *)NULL;
3280af22 1921 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1922
16d20bd9
AD
1923 return;
1924 }
1925 /* we need to search for the correct entry and clear it */
cea2e8a9 1926 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1927}
1928
1929
1930/* Invoke the n'th filter function for the current rsfp. */
1931I32
864dbfa3 1932Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
8ac85365
NIS
1933
1934
1935 /* 0 = read one text line */
a0d0e21e 1936{
16d20bd9
AD
1937 filter_t funcp;
1938 SV *datasv = NULL;
e50aee73 1939
3280af22 1940 if (!PL_rsfp_filters)
16d20bd9 1941 return -1;
3280af22 1942 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
1943 /* Provide a default input filter to make life easy. */
1944 /* Note that we append to the line. This is handy. */
f4c556ac
GS
1945 DEBUG_P(PerlIO_printf(Perl_debug_log,
1946 "filter_read %d: from rsfp\n", idx));
16d20bd9
AD
1947 if (maxlen) {
1948 /* Want a block */
1949 int len ;
1950 int old_len = SvCUR(buf_sv) ;
1951
1952 /* ensure buf_sv is large enough */
1953 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
1954 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
1955 if (PerlIO_error(PL_rsfp))
37120919
AD
1956 return -1; /* error */
1957 else
1958 return 0 ; /* end of file */
1959 }
16d20bd9
AD
1960 SvCUR_set(buf_sv, old_len + len) ;
1961 } else {
1962 /* Want a line */
3280af22
NIS
1963 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
1964 if (PerlIO_error(PL_rsfp))
37120919
AD
1965 return -1; /* error */
1966 else
1967 return 0 ; /* end of file */
1968 }
16d20bd9
AD
1969 }
1970 return SvCUR(buf_sv);
1971 }
1972 /* Skip this filter slot if filter has been deleted */
3280af22 1973 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
1974 DEBUG_P(PerlIO_printf(Perl_debug_log,
1975 "filter_read %d: skipped (filter deleted)\n",
1976 idx));
16d20bd9
AD
1977 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
1978 }
1979 /* Get function pointer hidden within datasv */
4755096e 1980 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
1981 DEBUG_P(PerlIO_printf(Perl_debug_log,
1982 "filter_read %d: via function %p (%s)\n",
1983 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
1984 /* Call function. The function is expected to */
1985 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 1986 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 1987 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
1988}
1989
76e3520e 1990STATIC char *
cea2e8a9 1991S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 1992{
c39cd008 1993#ifdef PERL_CR_FILTER
3280af22 1994 if (!PL_rsfp_filters) {
c39cd008 1995 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
1996 }
1997#endif
3280af22 1998 if (PL_rsfp_filters) {
16d20bd9 1999
55497cff
PP
2000 if (!append)
2001 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2002 if (FILTER_READ(0, sv, 0) > 0)
2003 return ( SvPVX(sv) ) ;
2004 else
2005 return Nullch ;
2006 }
9d116dd7 2007 else
fd049845 2008 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2009}
2010
01ec43d0
GS
2011STATIC HV *
2012S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2013{
2014 GV *gv;
2015
01ec43d0 2016 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2017 return PL_curstash;
2018
2019 if (len > 2 &&
2020 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2021 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2022 {
2023 return GvHV(gv); /* Foo:: */
def3634b
GS
2024 }
2025
2026 /* use constant CLASS => 'MyClass' */
2027 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2028 SV *sv;
2029 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2030 pkgname = SvPV_nolen(sv);
2031 }
2032 }
2033
2034 return gv_stashpv(pkgname, FALSE);
2035}
a0d0e21e 2036
748a9306
LW
2037#ifdef DEBUGGING
2038 static char* exp_name[] =
09bef843
SB
2039 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2040 "ATTRTERM", "TERMBLOCK"
2041 };
748a9306 2042#endif
463ee0b2 2043
02aa26ce
NT
2044/*
2045 yylex
2046
2047 Works out what to call the token just pulled out of the input
2048 stream. The yacc parser takes care of taking the ops we return and
2049 stitching them into a tree.
2050
2051 Returns:
2052 PRIVATEREF
2053
2054 Structure:
2055 if read an identifier
2056 if we're in a my declaration
2057 croak if they tried to say my($foo::bar)
2058 build the ops for a my() declaration
2059 if it's an access to a my() variable
2060 are we in a sort block?
2061 croak if my($a); $a <=> $b
2062 build ops for access to a my() variable
2063 if in a dq string, and they've said @foo and we can't find @foo
2064 croak
2065 build ops for a bareword
2066 if we already built the token before, use it.
2067*/
2068
dba4d153 2069#ifdef USE_PURE_BISON
bf4acbe4 2070#ifdef __SC__
dba4d153 2071#pragma segment Perl_yylex_r
bf4acbe4 2072#endif
864dbfa3 2073int
dba4d153 2074Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2075{
b73d6f50 2076 dTHR;
20141f0e
RI
2077 int r;
2078
20141f0e
RI
2079 yylval_pointer[yyactlevel] = lvalp;
2080 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
RI
2081 yyactlevel++;
2082 if (yyactlevel >= YYMAXLEVEL)
2083 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2084
dba4d153 2085 r = Perl_yylex(aTHX);
20141f0e 2086
20141f0e 2087 yyactlevel--;
20141f0e
RI
2088
2089 return r;
2090}
dba4d153 2091#endif
20141f0e 2092
dba4d153
JH
2093#ifdef __SC__
2094#pragma segment Perl_yylex
2095#endif
2096
2097int
2098#ifdef USE_PURE_BISON
2099Perl_yylex(pTHX_ YYSTYPE *lvalp, int *lcharp)
2100#else
2101Perl_yylex(pTHX)
2102#endif
20141f0e 2103{
11343788 2104 dTHR;
79072805 2105 register char *s;
378cc40b 2106 register char *d;
79072805 2107 register I32 tmp;
463ee0b2 2108 STRLEN len;
161b471a
NIS
2109 GV *gv = Nullgv;
2110 GV **gvp = 0;
a687059c 2111
02aa26ce 2112 /* check if there's an identifier for us to look at */
3280af22 2113 if (PL_pending_ident) {
02aa26ce 2114 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2115 char pit = PL_pending_ident;
2116 PL_pending_ident = 0;
bbce6d69 2117
02aa26ce
NT
2118 /* if we're in a my(), we can't allow dynamics here.
2119 $foo'bar has already been turned into $foo::bar, so
2120 just check for colons.
2121
2122 if it's a legal name, the OP is a PADANY.
2123 */
3280af22 2124 if (PL_in_my) {
77ca0c92 2125 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2126 if (strchr(PL_tokenbuf,':'))
2127 yyerror(Perl_form(aTHX_ "No package name allowed for "
2128 "variable %s in \"our\"",
2129 PL_tokenbuf));
77ca0c92
LW
2130 tmp = pad_allocmy(PL_tokenbuf);
2131 }
2132 else {
2133 if (strchr(PL_tokenbuf,':'))
2134 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2135
77ca0c92
LW
2136 yylval.opval = newOP(OP_PADANY, 0);
2137 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2138 return PRIVATEREF;
2139 }
bbce6d69
PP
2140 }
2141
02aa26ce
NT
2142 /*
2143 build the ops for accesses to a my() variable.
2144
2145 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2146 then used in a comparison. This catches most, but not
2147 all cases. For instance, it catches
2148 sort { my($a); $a <=> $b }
2149 but not
2150 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2151 (although why you'd do that is anyone's guess).
2152 */
2153
3280af22 2154 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2155#ifdef USE_THREADS
54b9620d 2156 /* Check for single character per-thread SVs */
3280af22
NIS
2157 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2158 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2159 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2160 {
2faa37cc 2161 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2162 yylval.opval->op_targ = tmp;
2163 return PRIVATEREF;
2164 }
2165#endif /* USE_THREADS */
3280af22 2166 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2167 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2168 /* might be an "our" variable" */
f472eb5c 2169 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2170 /* build ops for a bareword */
f472eb5c
GS
2171 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2172 sv_catpvn(sym, "::", 2);
2173 sv_catpv(sym, PL_tokenbuf+1);
2174 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2175 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2176 gv_fetchpv(SvPVX(sym),
77ca0c92 2177 (PL_in_eval
f472eb5c
GS
2178 ? (GV_ADDMULTI | GV_ADDINEVAL)
2179 : TRUE
77ca0c92
LW
2180 ),
2181 ((PL_tokenbuf[0] == '$') ? SVt_PV
2182 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2183 : SVt_PVHV));
2184 return WORD;
2185 }
2186
02aa26ce 2187 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2188 if (PL_last_lop_op == OP_SORT &&
2189 PL_tokenbuf[0] == '$' &&
2190 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2191 && !PL_tokenbuf[2])
bbce6d69 2192 {
3280af22
NIS
2193 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2194 d < PL_bufend && *d != '\n';
a863c7d1
MB
2195 d++)
2196 {
2197 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2198 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2199 PL_tokenbuf);
a863c7d1 2200 }
bbce6d69
PP
2201 }
2202 }
bbce6d69 2203
a863c7d1
MB
2204 yylval.opval = newOP(OP_PADANY, 0);
2205 yylval.opval->op_targ = tmp;
2206 return PRIVATEREF;
2207 }
bbce6d69
PP
2208 }
2209
02aa26ce
NT
2210 /*
2211 Whine if they've said @foo in a doublequoted string,
2212 and @foo isn't a variable we can find in the symbol
2213 table.
2214 */
3280af22
NIS
2215 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2216 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2217 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2218 && ckWARN(WARN_AMBIGUOUS))
2219 {
2220 /* Downgraded from fatal to warning 20000522 mjd */
2221 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2222 "Possible unintended interpolation of %s in string",
2223 PL_tokenbuf);
2224 }
bbce6d69
PP
2225 }
2226
02aa26ce 2227 /* build ops for a bareword */
3280af22 2228 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2229 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2230 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2231 ((PL_tokenbuf[0] == '$') ? SVt_PV
2232 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69
PP
2233 : SVt_PVHV));
2234 return WORD;
2235 }
2236
02aa26ce
NT
2237 /* no identifier pending identification */
2238
3280af22 2239 switch (PL_lex_state) {
79072805
LW
2240#ifdef COMMENTARY
2241 case LEX_NORMAL: /* Some compilers will produce faster */
2242 case LEX_INTERPNORMAL: /* code if we comment these out. */
2243 break;
2244#endif
2245
09bef843 2246 /* when we've already built the next token, just pull it out of the queue */
79072805 2247 case LEX_KNOWNEXT:
3280af22
NIS
2248 PL_nexttoke--;
2249 yylval = PL_nextval[PL_nexttoke];
2250 if (!PL_nexttoke) {
2251 PL_lex_state = PL_lex_defer;
2252 PL_expect = PL_lex_expect;
2253 PL_lex_defer = LEX_NORMAL;
463ee0b2 2254 }
3280af22 2255 return(PL_nexttype[PL_nexttoke]);
79072805 2256
02aa26ce 2257 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2258 when we get here, PL_bufptr is at the \
02aa26ce 2259 */
79072805
LW
2260 case LEX_INTERPCASEMOD:
2261#ifdef DEBUGGING
3280af22 2262 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2263 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2264#endif
02aa26ce 2265 /* handle \E or end of string */
3280af22 2266 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2267 char oldmod;
02aa26ce
NT
2268
2269 /* if at a \E */
3280af22
NIS
2270 if (PL_lex_casemods) {
2271 oldmod = PL_lex_casestack[--PL_lex_casemods];
2272 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2273
3280af22
NIS
2274 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2275 PL_bufptr += 2;
2276 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2277 }
79072805
LW
2278 return ')';
2279 }
3280af22
NIS
2280 if (PL_bufptr != PL_bufend)
2281 PL_bufptr += 2;
2282 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2283 return yylex();
79072805
LW
2284 }
2285 else {
3280af22 2286 s = PL_bufptr + 1;
79072805
LW
2287 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2288 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2289 if (strchr("LU", *s) &&
3280af22 2290 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2291 {
3280af22 2292 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2293 return ')';
2294 }
3280af22
NIS
2295 if (PL_lex_casemods > 10) {
2296 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2297 if (newlb != PL_lex_casestack) {
a0d0e21e 2298 SAVEFREEPV(newlb);
3280af22 2299 PL_lex_casestack = newlb;
a0d0e21e
LW
2300 }
2301 }
3280af22
NIS
2302 PL_lex_casestack[PL_lex_casemods++] = *s;
2303 PL_lex_casestack[PL_lex_casemods] = '\0';
2304 PL_lex_state = LEX_INTERPCONCAT;
2305 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2306 force_next('(');
2307 if (*s == 'l')
3280af22 2308 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2309 else if (*s == 'u')
3280af22 2310 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2311 else if (*s == 'L')
3280af22 2312 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2313 else if (*s == 'U')
3280af22 2314 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2315 else if (*s == 'Q')
3280af22 2316 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2317 else
cea2e8a9 2318 Perl_croak(aTHX_ "panic: yylex");
3280af22 2319 PL_bufptr = s + 1;
79072805 2320 force_next(FUNC);
3280af22
NIS
2321 if (PL_lex_starts) {
2322 s = PL_bufptr;
2323 PL_lex_starts = 0;
79072805
LW
2324 Aop(OP_CONCAT);
2325 }
2326 else
cea2e8a9 2327 return yylex();
79072805
LW
2328 }
2329
55497cff
PP
2330 case LEX_INTERPPUSH:
2331 return sublex_push();
2332
79072805 2333 case LEX_INTERPSTART:
3280af22 2334 if (PL_bufptr == PL_bufend)
79072805 2335 return sublex_done();
3280af22
NIS
2336 PL_expect = XTERM;
2337 PL_lex_dojoin = (*PL_bufptr == '@');
2338 PL_lex_state = LEX_INTERPNORMAL;
2339 if (PL_lex_dojoin) {
2340 PL_nextval[PL_nexttoke].ival = 0;
79072805 2341 force_next(',');
554b3eca 2342#ifdef USE_THREADS
533c011a
NIS
2343 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2344 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2345 force_next(PRIVATEREF);
2346#else
a0d0e21e 2347 force_ident("\"", '$');
554b3eca 2348#endif /* USE_THREADS */
3280af22 2349 PL_nextval[PL_nexttoke].ival = 0;
79072805 2350 force_next('$');
3280af22 2351 PL_nextval[PL_nexttoke].ival = 0;
79072805 2352 force_next('(');
3280af22 2353 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2354 force_next(FUNC);
2355 }
3280af22
NIS
2356 if (PL_lex_starts++) {
2357 s = PL_bufptr;
79072805
LW
2358 Aop(OP_CONCAT);
2359 }
cea2e8a9 2360 return yylex();
79072805
LW
2361
2362 case LEX_INTERPENDMAYBE:
3280af22
NIS
2363 if (intuit_more(PL_bufptr)) {
2364 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2365 break;
2366 }
2367 /* FALL THROUGH */
2368
2369 case LEX_INTERPEND:
3280af22
NIS
2370 if (PL_lex_dojoin) {
2371 PL_lex_dojoin = FALSE;
2372 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2373 return ')';
2374 }
43a16006 2375 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2376 && SvEVALED(PL_lex_repl))
43a16006 2377 {
e9fa98b2 2378 if (PL_bufptr != PL_bufend)
cea2e8a9 2379 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2380 PL_lex_repl = Nullsv;
2381 }
79072805
LW
2382 /* FALLTHROUGH */
2383 case LEX_INTERPCONCAT:
2384#ifdef DEBUGGING
3280af22 2385 if (PL_lex_brackets)
cea2e8a9 2386 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2387#endif
3280af22 2388 if (PL_bufptr == PL_bufend)
79072805
LW
2389 return sublex_done();
2390
3280af22
NIS
2391 if (SvIVX(PL_linestr) == '\'') {
2392 SV *sv = newSVsv(PL_linestr);
2393 if (!PL_lex_inpat)
76e3520e 2394 sv = tokeq(sv);
3280af22 2395 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2396 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2397 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2398 s = PL_bufend;
79072805
LW
2399 }
2400 else {
3280af22 2401 s = scan_const(PL_bufptr);
79072805 2402 if (*s == '\\')
3280af22 2403 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2404 else
3280af22 2405 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2406 }
2407
3280af22
NIS
2408 if (s != PL_bufptr) {
2409 PL_nextval[PL_nexttoke] = yylval;
2410 PL_expect = XTERM;
79072805 2411 force_next(THING);
3280af22 2412 if (PL_lex_starts++)
79072805
LW
2413 Aop(OP_CONCAT);
2414 else {
3280af22 2415 PL_bufptr = s;
cea2e8a9 2416 return yylex();
79072805
LW
2417 }
2418 }
2419
cea2e8a9 2420 return yylex();
a0d0e21e 2421 case LEX_FORMLINE:
3280af22
NIS
2422 PL_lex_state = LEX_NORMAL;
2423 s = scan_formline(PL_bufptr);
2424 if (!PL_lex_formbrack)
a0d0e21e
LW
2425 goto rightbracket;
2426 OPERATOR(';');
79072805
LW
2427 }
2428
3280af22
NIS
2429 s = PL_bufptr;
2430 PL_oldoldbufptr = PL_oldbufptr;
2431 PL_oldbufptr = s;
79072805 2432 DEBUG_p( {
bf49b057
GS
2433 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2434 exp_name[PL_expect], s);
79072805 2435 } )
463ee0b2
LW
2436
2437 retry:
378cc40b
LW
2438 switch (*s) {
2439 default:
7e2040f0 2440 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2441 goto keylookup;
cea2e8a9 2442 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2443 case 4:
2444 case 26:
2445 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2446 case 0:
3280af22
NIS
2447 if (!PL_rsfp) {
2448 PL_last_uni = 0;
2449 PL_last_lop = 0;
2450 if (PL_lex_brackets)
d98d5fff 2451 yyerror("Missing right curly or square bracket");
79072805 2452 TOKEN(0);
463ee0b2 2453 }
3280af22 2454 if (s++ < PL_bufend)
a687059c 2455 goto retry; /* ignore stray nulls */
3280af22
NIS
2456 PL_last_uni = 0;
2457 PL_last_lop = 0;
2458 if (!PL_in_eval && !PL_preambled) {
2459 PL_preambled = TRUE;
2460 sv_setpv(PL_linestr,incl_perldb());
2461 if (SvCUR(PL_linestr))
2462 sv_catpv(PL_linestr,";");
2463 if (PL_preambleav){
2464 while(AvFILLp(PL_preambleav) >= 0) {
2465 SV *tmpsv = av_shift(PL_preambleav);
2466 sv_catsv(PL_linestr, tmpsv);
2467 sv_catpv(PL_linestr, ";");
91b7def8
PP
2468 sv_free(tmpsv);
2469 }
3280af22
NIS
2470 sv_free((SV*)PL_preambleav);
2471 PL_preambleav = NULL;
91b7def8 2472 }
3280af22
NIS
2473 if (PL_minus_n || PL_minus_p) {
2474 sv_catpv(PL_linestr, "LINE: while (<>) {");
2475 if (PL_minus_l)
2476 sv_catpv(PL_linestr,"chomp;");
2477 if (PL_minus_a) {
8fd239a7
CS
2478 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2479 if (gv)
2480 GvIMPORTED_AV_on(gv);
3280af22
NIS
2481 if (PL_minus_F) {
2482 if (strchr("/'\"", *PL_splitstr)
2483 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2484 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121
PP
2485 else {
2486 char delim;
2487 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2488 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2489 delim = *s;
cea2e8a9 2490 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2491 "q" + (delim == '\''), delim);
3280af22 2492 for (s = PL_splitstr; *s; s++) {
54310121 2493 if (*s == '\\')
3280af22
NIS
2494 sv_catpvn(PL_linestr, "\\", 1);
2495 sv_catpvn(PL_linestr, s, 1);
54310121 2496 }
cea2e8a9 2497 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2498 }
2304df62
AD
2499 }
2500 else
3280af22 2501 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2502 }
79072805 2503 }
3280af22
NIS
2504 sv_catpv(PL_linestr, "\n");
2505 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2506 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2507 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2508 SV *sv = NEWSV(85,0);
2509
2510 sv_upgrade(sv, SVt_PVMG);
3280af22 2511 sv_setsv(sv,PL_linestr);
57843af0 2512 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2513 }
79072805 2514 goto retry;
a687059c 2515 }
e929a76b 2516 do {
226017aa
DD
2517 bool bof = PL_rsfp ? TRUE : FALSE;
2518 if (bof) {
2519#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2520# ifdef __GNU_LIBRARY__
2521# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2522# define FTELL_FOR_PIPE_IS_BROKEN
2523# endif
e3f494f1
JH
2524# else
2525# ifdef __GLIBC__
2526# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2527# define FTELL_FOR_PIPE_IS_BROKEN
2528# endif
2529# endif
226017aa
DD
2530# endif
2531#endif
2532#ifdef FTELL_FOR_PIPE_IS_BROKEN
2533 /* This loses the possibility to detect the bof
2534 * situation on perl -P when the libc5 is being used.
2535 * Workaround? Maybe attach some extra state to PL_rsfp?
2536 */
2537 if (!PL_preprocess)
2538 bof = PerlIO_tell(PL_rsfp) == 0;
2539#else
2540 bof = PerlIO_tell(PL_rsfp) == 0;
2541#endif
2542 }
dea0fc0b
JH
2543 s = filter_gets(PL_linestr, PL_rsfp, 0);
2544 if (s == Nullch) {
e929a76b 2545 fake_eof:
3280af22
NIS
2546 if (PL_rsfp) {
2547 if (PL_preprocess && !PL_in_eval)
2548 (void)PerlProc_pclose(PL_rsfp);
2549 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2550 PerlIO_clearerr(PL_rsfp);
395c3793 2551 else
3280af22
NIS
2552 (void)PerlIO_close(PL_rsfp);
2553 PL_rsfp = Nullfp;
4a9ae47a 2554 PL_doextract = FALSE;
395c3793 2555 }
3280af22
NIS
2556 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2557 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2558 sv_catpv(PL_linestr,";}");
2559 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2560 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2561 PL_minus_n = PL_minus_p = 0;
e929a76b
LW
2562 goto retry;
2563 }
3280af22
NIS
2564 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2565 sv_setpv(PL_linestr,"");
79072805 2566 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
dea0fc0b
JH
2567 } else if (bof) {
2568 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2569 s = swallow_bom((U8*)s);
378cc40b 2570 }
3280af22 2571 if (PL_doextract) {
a0d0e21e 2572 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2573 PL_doextract = FALSE;
a0d0e21e
LW
2574
2575 /* Incest with pod. */
2576 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2577 sv_setpv(PL_linestr, "");
2578 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2579 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2580 PL_doextract = FALSE;
a0d0e21e 2581 }
b250498f 2582 }
463ee0b2 2583 incline(s);
3280af22
NIS
2584 } while (PL_doextract);
2585 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2586 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2587 SV *sv = NEWSV(85,0);
a687059c 2588
93a17b20 2589 sv_upgrade(sv, SVt_PVMG);
3280af22 2590 sv_setsv(sv,PL_linestr);
57843af0 2591 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2592 }
3280af22 2593 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
57843af0 2594 if (CopLINE(PL_curcop) == 1) {
3280af22 2595 while (s < PL_bufend && isSPACE(*s))
79072805 2596 s++;
a0d0e21e 2597 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2598 s++;
44a8e56a 2599 d = Nullch;
3280af22 2600 if (!PL_in_eval) {
44a8e56a
PP
2601 if (*s == '#' && *(s+1) == '!')
2602 d = s + 2;
2603#ifdef ALTERNATE_SHEBANG
2604 else {
2605 static char as[] = ALTERNATE_SHEBANG;
2606 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2607 d = s + (sizeof(as) - 1);
2608 }
2609#endif /* ALTERNATE_SHEBANG */
2610 }
2611 if (d) {
b8378b72 2612 char *ipath;
774d564b 2613 char *ipathend;
b8378b72 2614
774d564b 2615 while (isSPACE(*d))
b8378b72
CS
2616 d++;
2617 ipath = d;
774d564b
PP
2618 while (*d && !isSPACE(*d))
2619 d++;
2620 ipathend = d;
2621
2622#ifdef ARG_ZERO_IS_SCRIPT
2623 if (ipathend > ipath) {
2624 /*
2625 * HP-UX (at least) sets argv[0] to the script name,
2626 * which makes $^X incorrect. And Digital UNIX and Linux,
2627 * at least, set argv[0] to the basename of the Perl
2628 * interpreter. So, having found "#!", we'll set it right.
2629 */
2630 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2631 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2632 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2633 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2634 SvSETMAGIC(x);
2635 }
774d564b 2636 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2637 }
774d564b 2638#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2639
2640 /*
2641 * Look for options.
2642 */
748a9306 2643 d = instr(s,"perl -");
84e30d1a 2644 if (!d) {
748a9306 2645 d = instr(s,"perl");
84e30d1a
GS
2646#if defined(DOSISH)
2647 /* avoid getting into infinite loops when shebang
2648 * line contains "Perl" rather than "perl" */
2649 if (!d) {
2650 for (d = ipathend-4; d >= ipath; --d) {
2651 if ((*d == 'p' || *d == 'P')
2652 && !ibcmp(d, "perl", 4))
2653 {
2654 break;
2655 }
2656 }
2657 if (d < ipath)
2658 d = Nullch;
2659 }
2660#endif
2661 }
44a8e56a
PP
2662#ifdef ALTERNATE_SHEBANG
2663 /*
2664 * If the ALTERNATE_SHEBANG on this system starts with a
2665 * character that can be part of a Perl expression, then if
2666 * we see it but not "perl", we're probably looking at the
2667 * start of Perl code, not a request to hand off to some
2668 * other interpreter. Similarly, if "perl" is there, but
2669 * not in the first 'word' of the line, we assume the line
2670 * contains the start of the Perl program.
44a8e56a
PP
2671 */
2672 if (d && *s != '#') {
774d564b 2673 char *c = ipath;
44a8e56a
PP
2674 while (*c && !strchr("; \t\r\n\f\v#", *c))
2675 c++;
2676 if (c < d)
2677 d = Nullch; /* "perl" not in first word; ignore */
2678 else
2679 *s = '#'; /* Don't try to parse shebang line */
2680 }
774d564b 2681#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2682#ifndef MACOS_TRADITIONAL
748a9306 2683 if (!d &&
44a8e56a 2684 *s == '#' &&
774d564b 2685 ipathend > ipath &&
3280af22 2686 !PL_minus_c &&
748a9306 2687 !instr(s,"indir") &&
3280af22 2688 instr(PL_origargv[0],"perl"))
748a9306 2689 {
9f68db38 2690 char **newargv;
9f68db38 2691
774d564b
PP
2692 *ipathend = '\0';
2693 s = ipathend + 1;
3280af22 2694 while (s < PL_bufend && isSPACE(*s))
9f68db38 2695 s++;
3280af22
NIS
2696 if (s < PL_bufend) {
2697 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2698 newargv[1] = s;
3280af22 2699 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2700 s++;
2701 *s = '\0';
3280af22 2702 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2703 }
2704 else
3280af22 2705 newargv = PL_origargv;
774d564b 2706 newargv[0] = ipath;
80252599 2707 PerlProc_execv(ipath, newargv);
cea2e8a9 2708 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2709 }
bf4acbe4 2710#endif
748a9306 2711 if (d) {
3280af22
NIS
2712 U32 oldpdb = PL_perldb;
2713 bool oldn = PL_minus_n;
2714 bool oldp = PL_minus_p;
748a9306
LW
2715
2716 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2717 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2718
2719 if (*d++ == '-') {
8cc95fdb
PP
2720 do {
2721 if (*d == 'M' || *d == 'm') {
2722 char *m = d;
2723 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2724 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2725 (int)(d - m), m);
2726 }
2727 d = moreswitches(d);
2728 } while (d);
155aba94
GS
2729 if ((PERLDB_LINE && !oldpdb) ||
2730 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2731 /* if we have already added "LINE: while (<>) {",
2732 we must not do it again */
748a9306 2733 {
3280af22
NIS
2734 sv_setpv(PL_linestr, "");
2735 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2736 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
2737 PL_preambled = FALSE;
84902520 2738 if (PERLDB_LINE)
3280af22 2739 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2740 goto retry;
2741 }
a0d0e21e 2742 }
79072805 2743 }
9f68db38 2744 }
79072805 2745 }
3280af22
NIS
2746 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2747 PL_bufptr = s;
2748 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2749 return yylex();
ae986130 2750 }
378cc40b 2751 goto retry;
4fdae800 2752 case '\r':
6a27c188 2753#ifdef PERL_STRICT_CR
cea2e8a9
GS
2754 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
2755 Perl_croak(aTHX_
cc507455 2756 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2757#endif
4fdae800 2758 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2759#ifdef MACOS_TRADITIONAL
2760 case '\312':
2761#endif
378cc40b
LW
2762 s++;
2763 goto retry;
378cc40b 2764 case '#':
e929a76b 2765 case '\n':
3280af22 2766 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2767 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2768 /* handle eval qq[#line 1 "foo"\n ...] */
2769 CopLINE_dec(PL_curcop);
2770 incline(s);
2771 }
3280af22 2772 d = PL_bufend;
a687059c 2773 while (s < d && *s != '\n')
378cc40b 2774 s++;
0f85fab0 2775 if (s < d)
378cc40b 2776 s++;
463ee0b2 2777 incline(s);
3280af22
NIS
2778 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2779 PL_bufptr = s;
2780 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2781 return yylex();
a687059c 2782 }
378cc40b 2783 }
a687059c 2784 else {
378cc40b 2785 *s = '\0';
3280af22 2786 PL_bufend = s;
a687059c 2787 }
378cc40b
LW
2788 goto retry;
2789 case '-':
79072805 2790 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
378cc40b 2791 s++;
3280af22 2792 PL_bufptr = s;
748a9306
LW
2793 tmp = *s++;
2794
bf4acbe4 2795 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2796 s++;
2797
2798 if (strnEQ(s,"=>",2)) {
3280af22 2799 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
748a9306
LW
2800 OPERATOR('-'); /* unary minus */
2801 }
3280af22
NIS
2802 PL_last_uni = PL_oldbufptr;
2803 PL_last_lop_op = OP_FTEREAD; /* good enough */
748a9306 2804 switch (tmp) {
79072805
LW
2805 case 'r': FTST(OP_FTEREAD);
2806 case 'w': FTST(OP_FTEWRITE);
2807 case 'x': FTST(OP_FTEEXEC);
2808 case 'o': FTST(OP_FTEOWNED);
2809 case 'R': FTST(OP_FTRREAD);
2810 case 'W': FTST(OP_FTRWRITE);
2811 case 'X': FTST(OP_FTREXEC);
2812 case 'O': FTST(OP_FTROWNED);
2813 case 'e': FTST(OP_FTIS);
2814 case 'z': FTST(OP_FTZERO);
2815 case 's': FTST(OP_FTSIZE);
2816 case 'f': FTST(OP_FTFILE);
2817 case 'd': FTST(OP_FTDIR);
2818 case 'l': FTST(OP_FTLINK);
2819 case 'p': FTST(OP_FTPIPE);
2820 case 'S': FTST(OP_FTSOCK);
2821 case 'u': FTST(OP_FTSUID);
2822 case 'g': FTST(OP_FTSGID);
2823 case 'k': FTST(OP_FTSVTX);
2824 case 'b': FTST(OP_FTBLK);
2825 case 'c': FTST(OP_FTCHR);
2826 case 't': FTST(OP_FTTTY);
2827 case 'T': FTST(OP_FTTEXT);
2828 case 'B': FTST(OP_FTBINARY);
85e6fe83
LW
2829 case 'M': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTMTIME);
2830 case 'A': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTATIME);
2831 case 'C': gv_fetchpv("\024",TRUE, SVt_PV); FTST(OP_FTCTIME);
378cc40b 2832 default:
cea2e8a9 2833 Perl_croak(aTHX_ "Unrecognized file test: -%c", (int)tmp);
378cc40b
LW
2834 break;
2835 }
2836 }
a687059c
LW
2837 tmp = *s++;
2838 if (*s == tmp) {
2839 s++;
3280af22 2840 if (PL_expect == XOPERATOR)
79072805
LW
2841 TERM(POSTDEC);
2842 else
2843 OPERATOR(PREDEC);
2844 }
2845 else if (*s == '>') {
2846 s++;
2847 s = skipspace(s);
7e2040f0 2848 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2849 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2850 TOKEN(ARROW);
79072805 2851 }
748a9306
LW
2852 else if (*s == '$')
2853 OPERATOR(ARROW);
463ee0b2 2854 else
748a9306 2855 TERM(ARROW);
a687059c 2856 }
3280af22 2857 if (PL_expect == XOPERATOR)
79072805
LW
2858 Aop(OP_SUBTRACT);
2859 else {
3280af22 2860 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2861 check_uni();
79072805 2862 OPERATOR('-'); /* unary minus */
2f3197b3 2863 }
79072805 2864
378cc40b 2865 case '+':
a687059c
LW
2866 tmp = *s++;
2867 if (*s == tmp) {
378cc40b 2868 s++;
3280af22 2869 if (PL_expect == XOPERATOR)
79072805
LW
2870 TERM(POSTINC);
2871 else
2872 OPERATOR(PREINC);
378cc40b 2873 }
3280af22 2874 if (PL_expect == XOPERATOR)
79072805
LW
2875 Aop(OP_ADD);
2876 else {
3280af22 2877 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2878 check_uni();
a687059c 2879 OPERATOR('+');
2f3197b3 2880 }
a687059c 2881
378cc40b 2882 case '*':
3280af22
NIS
2883 if (PL_expect != XOPERATOR) {
2884 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2885 PL_expect = XOPERATOR;
2886 force_ident(PL_tokenbuf, '*');
2887 if (!*PL_tokenbuf)
a0d0e21e 2888 PREREF('*');
79072805 2889 TERM('*');
a687059c 2890 }
79072805
LW
2891 s++;
2892 if (*s == '*') {
a687059c 2893 s++;
79072805 2894 PWop(OP_POW);
a687059c 2895 }
79072805
LW
2896 Mop(OP_MULTIPLY);
2897
378cc40b 2898 case '%':
3280af22 2899 if (PL_expect == XOPERATOR) {
bbce6d69
PP
2900 ++s;
2901 Mop(OP_MODULO);
a687059c 2902 }
3280af22
NIS
2903 PL_tokenbuf[0] = '%';
2904 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2905 if (!PL_tokenbuf[1]) {
2906 if (s == PL_bufend)
bbce6d69
PP
2907 yyerror("Final % should be \\% or %name");
2908 PREREF('%');
a687059c 2909 }
3280af22 2910 PL_pending_ident = '%';
bbce6d69 2911 TERM('%');
a687059c 2912
378cc40b 2913 case '^':
79072805 2914 s++;
a0d0e21e 2915 BOop(OP_BIT_XOR);
79072805 2916 case '[':
3280af22 2917 PL_lex_brackets++;
79072805 2918 /* FALL THROUGH */
378cc40b 2919 case '~':
378cc40b 2920 case ',':
378cc40b
LW
2921 tmp = *s++;
2922 OPERATOR(tmp);
a0d0e21e
LW
2923 case ':':
2924 if (s[1] == ':') {
2925 len = 0;
2926 goto just_a_word;
2927 }
2928 s++;
09bef843
SB
2929 switch (PL_expect) {
2930 OP *attrs;
2931 case XOPERATOR:
2932 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2933 break;
2934 PL_bufptr = s; /* update in case we back off */
2935 goto grabattrs;
2936 case XATTRBLOCK:
2937 PL_expect = XBLOCK;
2938 goto grabattrs;
2939 case XATTRTERM:
2940 PL_expect = XTERMBLOCK;
2941 grabattrs:
2942 s = skipspace(s);
2943 attrs = Nullop;
7e2040f0 2944 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2945 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2946 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2947 if (tmp < 0) tmp = -tmp;
2948 switch (tmp) {
2949 case KEY_or:
2950 case KEY_and:
2951 case KEY_for:
2952 case KEY_unless:
2953 case KEY_if:
2954 case KEY_while:
2955 case KEY_until:
2956 goto got_attrs;
2957 default:
2958 break;
2959 }
2960 }
09bef843
SB
2961 if (*d == '(') {
2962 d = scan_str(d,TRUE,TRUE);
2963 if (!d) {
2964 if (PL_lex_stuff) {
2965 SvREFCNT_dec(PL_lex_stuff);
2966 PL_lex_stuff = Nullsv;
2967 }
2968 /* MUST advance bufptr here to avoid bogus
2969 "at end of line" context messages from yyerror().
2970 */
2971 PL_bufptr = s + len;
2972 yyerror("Unterminated attribute parameter in attribute list");
2973 if (attrs)
2974 op_free(attrs);
2975 return 0; /* EOF indicator */
2976 }
2977 }
2978 if (PL_lex_stuff) {
2979 SV *sv = newSVpvn(s, len);
2980 sv_catsv(sv, PL_lex_stuff);
2981 attrs = append_elem(OP_LIST, attrs,
2982 newSVOP(OP_CONST, 0, sv));
2983 SvREFCNT_dec(PL_lex_stuff);
2984 PL_lex_stuff = Nullsv;
2985 }
2986 else {
2987 attrs = append_elem(OP_LIST, attrs,
2988 newSVOP(OP_CONST, 0,
2989 newSVpvn(s, len)));
2990 }
2991 s = skipspace(d);
0120eecf 2992 if (*s == ':' && s[1] != ':')
09bef843 2993 s = skipspace(s+1);
0120eecf
GS
2994 else if (s == d)
2995 break; /* require real whitespace or :'s */
09bef843 2996 }
f9829d6b
GS
2997 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
2998 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
2999 char q = ((*s == '\'') ? '"' : '\'');
3000 /* If here for an expression, and parsed no attrs, back off. */
3001 if (tmp == '=' && !attrs) {
3002 s = PL_bufptr;
3003 break;
3004 }
3005 /* MUST advance bufptr here to avoid bogus "at end of line"
3006 context messages from yyerror().
3007 */
3008 PL_bufptr = s;
3009 if (!*s)
3010 yyerror("Unterminated attribute list");
3011 else
3012 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3013 q, *s, q));
3014 if (attrs)
3015 op_free(attrs);
3016 OPERATOR(':');
3017 }
f9829d6b 3018 got_attrs:
09bef843
SB
3019 if (attrs) {
3020 PL_nextval[PL_nexttoke].opval = attrs;
3021 force_next(THING);
3022 }
3023 TOKEN(COLONATTR);
3024 }
a0d0e21e 3025 OPERATOR(':');
8990e307
LW
3026 case '(':
3027 s++;
3280af22
NIS
3028 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3029 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3030 else
3280af22 3031 PL_expect = XTERM;
a0d0e21e 3032 TOKEN('(');
378cc40b 3033 case ';':
f4dd75d9 3034 CLINE;
378cc40b
LW
3035 tmp = *s++;
3036 OPERATOR(tmp);
3037 case ')':
378cc40b 3038 tmp = *s++;
16d20bd9
AD
3039 s = skipspace(s);
3040 if (*s == '{')
3041 PREBLOCK(tmp);
378cc40b 3042 TERM(tmp);
79072805
LW
3043 case ']':
3044 s++;
3280af22 3045 if (PL_lex_brackets <= 0)
d98d5fff 3046 yyerror("Unmatched right square bracket");
463ee0b2 3047 else
3280af22
NIS
3048 --PL_lex_brackets;
3049 if (PL_lex_state == LEX_INTERPNORMAL) {
3050 if (PL_lex_brackets == 0) {
a0d0e21e 3051 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3052 PL_lex_state = LEX_INTERPEND;
79072805
LW
3053 }
3054 }
4633a7c4 3055 TERM(']');
79072805
LW
3056 case '{':
3057 leftbracket:
79072805 3058 s++;
3280af22
NIS
3059 if (PL_lex_brackets > 100) {
3060 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3061 if (newlb != PL_lex_brackstack) {
8990e307 3062 SAVEFREEPV(newlb);
3280af22 3063 PL_lex_brackstack = newlb;
8990e307
LW
3064 }
3065 }
3280af22 3066 switch (PL_expect) {
a0d0e21e 3067 case XTERM:
3280af22 3068 if (PL_lex_formbrack) {
a0d0e21e
LW
3069 s--;
3070 PRETERMBLOCK(DO);
3071 }
3280af22
NIS
3072 if (PL_oldoldbufptr == PL_last_lop)
3073 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3074 else
3280af22 3075 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3076 OPERATOR(HASHBRACK);
a0d0e21e 3077 case XOPERATOR:
bf4acbe4 3078 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3079 s++;
44a8e56a 3080 d = s;
3280af22
NIS
3081 PL_tokenbuf[0] = '\0';
3082 if (d < PL_bufend && *d == '-') {
3083 PL_tokenbuf[0] = '-';
44a8e56a 3084 d++;
bf4acbe4 3085 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3086 d++;
3087 }
7e2040f0 3088 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3089 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3090 FALSE, &len);
bf4acbe4 3091 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3092 d++;
3093 if (*d == '}') {
3280af22 3094 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3095 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3096 if (minus)
3097 force_next('-');
748a9306
LW
3098 }
3099 }
3100 /* FALL THROUGH */
09bef843 3101 case XATTRBLOCK:
748a9306 3102 case XBLOCK:
3280af22
NIS
3103 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3104 PL_expect = XSTATE;
a0d0e21e 3105 break;
09bef843 3106 case XATTRTERM:
a0d0e21e 3107 case XTERMBLOCK:
3280af22
NIS
3108 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3109 PL_expect = XSTATE;
a0d0e21e
LW
3110 break;
3111 default: {
3112 char *t;
3280af22
NIS
3113 if (PL_oldoldbufptr == PL_last_lop)
3114 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3115 else
3280af22 3116 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3117 s = skipspace(s);
09ecc4b6 3118 if (*s == '}')
a0d0e21e 3119 OPERATOR(HASHBRACK);
b8a4b1be
GS
3120 /* This hack serves to disambiguate a pair of curlies
3121 * as being a block or an anon hash. Normally, expectation
3122 * determines that, but in cases where we're not in a
3123 * position to expect anything in particular (like inside
3124 * eval"") we have to resolve the ambiguity. This code
3125 * covers the case where the first term in the curlies is a
3126 * quoted string. Most other cases need to be explicitly
3127 * disambiguated by prepending a `+' before the opening
3128 * curly in order to force resolution as an anon hash.
3129 *
3130 * XXX should probably propagate the outer expectation
3131 * into eval"" to rely less on this hack, but that could
3132 * potentially break current behavior of eval"".
3133 * GSAR 97-07-21
3134 */
3135 t = s;
3136 if (*s == '\'' || *s == '"' || *s == '`') {
3137 /* common case: get past first string, handling escapes */
3280af22 3138 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3139 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3140 t++;
3141 t++;
a0d0e21e 3142 }
b8a4b1be 3143 else if (*s == 'q') {
3280af22 3144 if (++t < PL_bufend
b8a4b1be 3145 && (!isALNUM(*t)
3280af22 3146 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3147 && !isALNUM(*t))))
3148 {
b8a4b1be
GS
3149 char *tmps;
3150 char open, close, term;
3151 I32 brackets = 1;
3152
3280af22 3153 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3154 t++;
3155 term = *t;
3156 open = term;
3157 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3158 term = tmps[5];
3159 close = term;
3160 if (open == close)
3280af22
NIS
3161 for (t++; t < PL_bufend; t++) {
3162 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3163 t++;
6d07e5e9 3164 else if (*t == open)
b8a4b1be
GS
3165 break;
3166 }
3167 else
3280af22
NIS
3168 for (t++; t < PL_bufend; t++) {
3169 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3170 t++;
6d07e5e9 3171 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3172 break;
3173 else if (*t == open)
3174 brackets++;
3175 }
3176 }
3177 t++;
a0d0e21e 3178 }
7e2040f0 3179 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3180 t += UTF8SKIP(t);
7e2040f0 3181 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3182 t += UTF8SKIP(t);
a0d0e21e 3183 }
3280af22 3184 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3185 t++;
b8a4b1be
GS
3186 /* if comma follows first term, call it an anon hash */
3187 /* XXX it could be a comma expression with loop modifiers */
3280af22 3188 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3189 || (*t == '=' && t[1] == '>')))
a0d0e21e 3190 OPERATOR(HASHBRACK);
3280af22 3191 if (PL_expect == XREF)
4e4e412b 3192 PL_expect = XTERM;
a0d0e21e 3193 else {
3280af22
NIS
3194 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3195 PL_expect = XSTATE;
a0d0e21e 3196 }
8990e307 3197 }
a0d0e21e 3198 break;
463ee0b2 3199 }
57843af0 3200 yylval.ival = CopLINE(PL_curcop);
79072805 3201 if (isSPACE(*s) || *s == '#')
3280af22 3202 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3203 TOKEN('{');
378cc40b 3204 case '}':
79072805
LW
3205 rightbracket:
3206 s++;
3280af22 3207 if (PL_lex_brackets <= 0)
d98d5fff 3208 yyerror("Unmatched right curly bracket");
463ee0b2 3209 else
3280af22 3210 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3211 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3212 PL_lex_formbrack = 0;
3213 if (PL_lex_state == LEX_INTERPNORMAL) {
3214 if (PL_lex_brackets == 0) {
9059aa12
LW
3215 if (PL_expect & XFAKEBRACK) {
3216 PL_expect &= XENUMMASK;
3280af22
NIS
3217 PL_lex_state = LEX_INTERPEND;
3218 PL_bufptr = s;
cea2e8a9 3219 return yylex(); /* ignore fake brackets */
79072805 3220 }
fa83b5b6 3221 if (*s == '-' && s[1] == '>')
3280af22 3222 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3223 else if (*s != '[' && *s != '{')
3280af22 3224 PL_lex_state = LEX_INTERPEND;
79072805
LW
3225 }
3226 }
9059aa12
LW
3227 if (PL_expect & XFAKEBRACK) {
3228 PL_expect &= XENUMMASK;
3280af22 3229 PL_bufptr = s;
cea2e8a9 3230 return yylex(); /* ignore fake brackets */
748a9306 3231 }
79072805
LW
3232 force_next('}');
3233 TOKEN(';');
378cc40b
LW
3234 case '&':
3235 s++;
3236 tmp = *s++;
3237 if (tmp == '&')
a0d0e21e 3238 AOPERATOR(ANDAND);
378cc40b 3239 s--;
3280af22 3240 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3241 if (ckWARN(WARN_SEMICOLON)
3242 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3243 {
57843af0 3244 CopLINE_dec(PL_curcop);
cea2e8a9 3245 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3246 CopLINE_inc(PL_curcop);
463ee0b2 3247 }
79072805 3248 BAop(OP_BIT_AND);
463ee0b2 3249 }
79072805 3250
3280af22
NIS
3251 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3252 if (*PL_tokenbuf) {
3253 PL_expect = XOPERATOR;
3254 force_ident(PL_tokenbuf, '&');
463ee0b2 3255 }
79072805
LW
3256 else
3257 PREREF('&');
c07a80fd 3258 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3259 TERM('&');
3260
378cc40b
LW
3261 case '|':
3262 s++;
3263 tmp = *s++;
3264 if (tmp == '|')
a0d0e21e 3265 AOPERATOR(OROR);
378cc40b 3266 s--;
79072805 3267 BOop(OP_BIT_OR);
378cc40b
LW
3268 case '=':
3269 s++;
3270 tmp = *s++;
3271 if (tmp == '=')
79072805
LW
3272 Eop(OP_EQ);
3273 if (tmp == '>')
3274 OPERATOR(',');
378cc40b 3275 if (tmp == '~')
79072805 3276 PMop(OP_MATCH);
599cee73 3277 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3278 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3279 s--;
3280af22
NIS
3280 if (PL_expect == XSTATE && isALPHA(tmp) &&
3281 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3282 {
3280af22
NIS
3283 if (PL_in_eval && !PL_rsfp) {
3284 d = PL_bufend;
a5f75d66
AD
3285 while (s < d) {
3286 if (*s++ == '\n') {
3287 incline(s);
3288 if (strnEQ(s,"=cut",4)) {
3289 s = strchr(s,'\n');
3290 if (s)
3291 s++;
3292 else
3293 s = d;
3294 incline(s);
3295 goto retry;
3296 }
3297 }
3298 }
3299 goto retry;
3300 }
3280af22
NIS
3301 s = PL_bufend;
3302 PL_doextract = TRUE;
a0d0e21e
LW
3303 goto retry;
3304 }
3280af22 3305 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3306 char *t;
51882d45 3307#ifdef PERL_STRICT_CR
bf4acbe4 3308 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3309#else
bf4acbe4 3310 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3311#endif
a0d0e21e
LW
3312 if (*t == '\n' || *t == '#') {
3313 s--;
3280af22 3314 PL_expect = XBLOCK;
a0d0e21e
LW
3315 goto leftbracket;
3316 }
79072805 3317 }
a0d0e21e
LW
3318 yylval.ival = 0;
3319 OPERATOR(ASSIGNOP);
378cc40b
LW
3320 case '!':
3321 s++;
3322 tmp = *s++;
3323 if (tmp == '=')
79072805 3324 Eop(OP_NE);
378cc40b 3325 if (tmp == '~')
79072805 3326 PMop(OP_NOT);
378cc40b
LW
3327 s--;
3328 OPERATOR('!');
3329 case '<':
3280af22 3330 if (PL_expect != XOPERATOR) {
93a17b20 3331 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3332 check_uni();
79072805
LW
3333 if (s[1] == '<')
3334 s = scan_heredoc(s);
3335 else
3336 s = scan_inputsymbol(s);
3337 TERM(sublex_start());
378cc40b
LW
3338 }
3339 s++;
3340 tmp = *s++;
3341 if (tmp == '<')
79072805 3342 SHop(OP_LEFT_SHIFT);
395c3793
LW
3343 if (tmp == '=') {
3344 tmp = *s++;
3345 if (tmp == '>')
79072805 3346 Eop(OP_NCMP);
395c3793 3347 s--;
79072805 3348 Rop(OP_LE);
395c3793 3349 }
378cc40b 3350 s--;
79072805 3351 Rop(OP_LT);
378cc40b
LW
3352 case '>':
3353 s++;
3354 tmp = *s++;
3355 if (tmp == '>')
79072805 3356 SHop(OP_RIGHT_SHIFT);
378cc40b 3357 if (tmp == '=')
79072805 3358 Rop(OP_GE);
378cc40b 3359 s--;
79072805 3360 Rop(OP_GT);
378cc40b
LW
3361
3362 case '$':
bbce6d69
PP
3363 CLINE;
3364
3280af22
NIS
3365 if (PL_expect == XOPERATOR) {
3366 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3367 PL_expect = XTERM;
a0d0e21e 3368 depcom();
bbce6d69 3369 return ','; /* grandfather non-comma-format format */
a0d0e21e 3370 }
8990e307 3371 }
a0d0e21e 3372
7e2040f0 3373 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3374 PL_tokenbuf[0] = '@';
376b8730
SM
3375 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3376 sizeof PL_tokenbuf - 1, FALSE);
3377 if (PL_expect == XOPERATOR)
3378 no_op("Array length", s);
3280af22 3379 if (!PL_tokenbuf[1])
a0d0e21e 3380 PREREF(DOLSHARP);
3280af22
NIS
3381 PL_expect = XOPERATOR;
3382 PL_pending_ident = '#';
463ee0b2 3383 TOKEN(DOLSHARP);
79072805 3384 }
bbce6d69 3385
3280af22 3386 PL_tokenbuf[0] = '$';
376b8730
SM
3387 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3388 sizeof PL_tokenbuf - 1, FALSE);
3389 if (PL_expect == XOPERATOR)
3390 no_op("Scalar", s);
3280af22
NIS
3391 if (!PL_tokenbuf[1]) {
3392 if (s == PL_bufend)
bbce6d69
PP
3393 yyerror("Final $ should be \\$ or $name");
3394 PREREF('$');
8990e307 3395 }
a0d0e21e 3396
bbce6d69 3397 /* This kludge not intended to be bulletproof. */
3280af22 3398 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3399 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3400 newSViv(PL_compiling.cop_arybase));
bbce6d69
PP
3401 yylval.opval->op_private = OPpCONST_ARYBASE;
3402 TERM(THING);
3403 }
3404
ff68c719 3405 d = s;
69d2bceb 3406 tmp = (I32)*s;
3280af22 3407 if (PL_lex_state == LEX_NORMAL)
ff68c719
PP
3408 s = skipspace(s);
3409
3280af22 3410 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69
PP
3411 char *t;
3412 if (*s == '[') {
3280af22 3413 PL_tokenbuf[0] = '@';
599cee73 3414 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3415 for(t = s + 1;
7e2040f0 3416 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3417 t++) ;
a0d0e21e 3418 if (*t++ == ',') {
3280af22
NIS
3419 PL_bufptr = skipspace(PL_bufptr);
3420 while (t < PL_bufend && *t != ']')
bbce6d69 3421 t++;
cea2e8a9 3422 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3423 "Multidimensional syntax %.*s not supported",
3424 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3425 }
3426 }
bbce6d69
PP
3427 }
3428 else if (*s == '{') {
3280af22 3429 PL_tokenbuf[0] = '%';
599cee73 3430 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69
PP
3431 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3432 {
3280af22 3433 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3434 STRLEN len;
3435 for (t++; isSPACE(*t); t++) ;
7e2040f0 3436 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3437 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3438 for (; isSPACE(*t); t++) ;
864dbfa3 3439 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3440 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3441 "You need to quote \"%s\"", tmpbuf);
748a9306 3442 }
93a17b20
LW
3443 }
3444 }
2f3197b3 3445 }
bbce6d69 3446
3280af22 3447 PL_expect = XOPERATOR;
69d2bceb 3448 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3449 bool islop = (PL_last_lop == PL_oldoldbufptr);
3450 if (!islop || PL_last_lop_op == OP_GREPSTART)
3451 PL_expect = XOPERATOR;
bbce6d69 3452 else if (strchr("$@\"'`q", *s))
3280af22 3453 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3454 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3455 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3456 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3457 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3458 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3459 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3460 /* binary operators exclude handle interpretations */
3461 switch (tmp) {
3462 case -KEY_x:
3463 case -KEY_eq:
3464 case -KEY_ne:
3465 case -KEY_gt:
3466 case -KEY_lt:
3467 case -KEY_ge:
3468 case -KEY_le:
3469 case -KEY_cmp:
3470 break;
3471 default:
3280af22 3472 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3473 break;
3474 }
3475 }
68dc0745
PP
3476 else {
3477 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3478 if (gv && GvCVu(gv))
3280af22 3479 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3480 }
93a17b20 3481 }
bbce6d69 3482 else if (isDIGIT(*s))
3280af22 3483 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3484 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3485 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3486 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3487 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3488 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3489 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3490 }
3280af22 3491 PL_pending_ident = '$';
79072805 3492 TOKEN('$');
378cc40b
LW
3493
3494 case '@':
3280af22 3495 if (PL_expect == XOPERATOR)
bbce6d69 3496 no_op("Array", s);
3280af22
NIS
3497 PL_tokenbuf[0] = '@';
3498 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3499 if (!PL_tokenbuf[1]) {
3500 if (s == PL_bufend)
bbce6d69
PP
3501 yyerror("Final @ should be \\@ or @name");
3502 PREREF('@');
3503 }
3280af22 3504 if (PL_lex_state == LEX_NORMAL)
ff68c719 3505 s = skipspace(s);
3280af22 3506 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3507 if (*s == '{')
3280af22 3508 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3509
3510 /* Warn about @ where they meant $. */
599cee73 3511 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3512 if (*s == '[' || *s == '{') {
3513 char *t = s + 1;
7e2040f0 3514 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3515 t++;
3516 if (*t == '}' || *t == ']') {
3517 t++;
3280af22 3518 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3519 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3520 "Scalar value %.*s better written as $%.*s",
3280af22 3521 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3522 }
93a17b20
LW
3523 }
3524 }
463ee0b2 3525 }
3280af22 3526 PL_pending_ident = '@';
79072805 3527 TERM('@');
378cc40b
LW
3528
3529 case '/': /* may either be division or pattern */
3530 case '?': /* may either be conditional or pattern */
3280af22 3531 if (PL_expect != XOPERATOR) {
c277df42 3532 /* Disable warning on "study /blah/" */
3280af22
NIS
3533 if (PL_oldoldbufptr == PL_last_uni
3534 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3535 || memNE(PL_last_uni, "study", 5)
3536 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3537 check_uni();
8782bef2 3538 s = scan_pat(s,OP_MATCH);
79072805 3539 TERM(sublex_start());
378cc40b
LW
3540 }
3541 tmp = *s++;
a687059c 3542 if (tmp == '/')
79072805 3543 Mop(OP_DIVIDE);
378cc40b
LW
3544 OPERATOR(tmp);
3545
3546 case '.':
51882d45
GS
3547 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3548#ifdef PERL_STRICT_CR
3549 && s[1] == '\n'
3550#else
3551 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3552#endif
3553 && (s == PL_linestart || s[-1] == '\n') )
3554 {
3280af22
NIS
3555 PL_lex_formbrack = 0;
3556 PL_expect = XSTATE;
79072805
LW
3557 goto rightbracket;
3558 }
3280af22 3559 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3560 tmp = *s++;
a687059c
LW
3561 if (*s == tmp) {
3562 s++;
2f3197b3
LW
3563 if (*s == tmp) {
3564 s++;
79072805 3565 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3566 }
3567 else
79072805 3568 yylval.ival = 0;
378cc40b 3569 OPERATOR(DOTDOT);
a687059c 3570 }
3280af22 3571 if (PL_expect != XOPERATOR)
2f3197b3 3572 check_uni();
79072805 3573 Aop(OP_CONCAT);
378cc40b
LW
3574 }
3575 /* FALL THROUGH */
3576 case '0': case '1': case '2': case '3': case '4':
3577 case '5': case '6': case '7': case '8': case '9':
b73d6f50 3578 s = scan_num(s, &yylval);
3280af22 3579 if (PL_expect == XOPERATOR)