This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Integrate change #18420 from maint-5.8:
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
be3c0a43 3 * Copyright (c) 1991-2002, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
4ac733c9 29static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 30static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
9059aa12
LW
38#define XFAKEBRACK 128
39#define XENUMMASK 127
40
39e02b42
JH
41#ifdef USE_UTF8_SCRIPTS
42# define UTF (!IN_BYTES)
2b9d42f0 43#else
746b446a 44# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 45#endif
a0ed51b3 46
61f0cdd9 47/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
48 * 1999-02-27 mjd-perl-patch@plover.com */
49#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50
bf4acbe4
GS
51/* On MacOS, respect nonbreaking spaces */
52#ifdef MACOS_TRADITIONAL
53#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54#else
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
56#endif
57
ffb4593c
NT
58/* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
79072805
LW
60 * can get by with a single comparison (if the compiler is smart enough).
61 */
62
fb73857a 63/* #define LEX_NOTPARSING 11 is done in perl.h. */
64
55497cff 65#define LEX_NORMAL 10
66#define LEX_INTERPNORMAL 9
67#define LEX_INTERPCASEMOD 8
68#define LEX_INTERPPUSH 7
69#define LEX_INTERPSTART 6
70#define LEX_INTERPEND 5
71#define LEX_INTERPENDMAYBE 4
72#define LEX_INTERPCONCAT 3
73#define LEX_INTERPCONST 2
74#define LEX_FORMLINE 1
75#define LEX_KNOWNEXT 0
79072805 76
79072805
LW
77#ifdef ff_next
78#undef ff_next
d48672a2
LW
79#endif
80
a1a0e61e 81#ifdef USE_PURE_BISON
dba4d153
JH
82# ifndef YYMAXLEVEL
83# define YYMAXLEVEL 100
84# endif
20141f0e
IRC
85YYSTYPE* yylval_pointer[YYMAXLEVEL];
86int* yychar_pointer[YYMAXLEVEL];
6f202aea 87int yyactlevel = -1;
22c35a8c
GS
88# undef yylval
89# undef yychar
20141f0e
IRC
90# define yylval (*yylval_pointer[yyactlevel])
91# define yychar (*yychar_pointer[yyactlevel])
92# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 93# undef yylex
dba4d153 94# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
95#endif
96
79072805 97#include "keywords.h"
fe14fcc3 98
ffb4593c
NT
99/* CLINE is a macro that ensures PL_copline has a sane value */
100
ae986130
LW
101#ifdef CLINE
102#undef CLINE
103#endif
57843af0 104#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 105
ffb4593c
NT
106/*
107 * Convenience functions to return different tokens and prime the
9cbb5ea2 108 * lexer for the next token. They all take an argument.
ffb4593c
NT
109 *
110 * TOKEN : generic token (used for '(', DOLSHARP, etc)
111 * OPERATOR : generic operator
112 * AOPERATOR : assignment operator
113 * PREBLOCK : beginning the block after an if, while, foreach, ...
114 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
115 * PREREF : *EXPR where EXPR is not a simple identifier
116 * TERM : expression term
117 * LOOPX : loop exiting command (goto, last, dump, etc)
118 * FTST : file test operator
119 * FUN0 : zero-argument function
2d2e263d 120 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
121 * BOop : bitwise or or xor
122 * BAop : bitwise and
123 * SHop : shift operator
124 * PWop : power operator
9cbb5ea2 125 * PMop : pattern-matching operator
ffb4593c
NT
126 * Aop : addition-level operator
127 * Mop : multiplication-level operator
128 * Eop : equality-testing operator
e5edeb50 129 * Rop : relational operator <= != gt
ffb4593c
NT
130 *
131 * Also see LOP and lop() below.
132 */
133
075953c3
JH
134/* Note that REPORT() and REPORT2() will be expressions that supply
135 * their own trailing comma, not suitable for statements as such. */
998054bd 136#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
137# define REPORT(x,retval) tokereport(x,s,(int)retval),
138# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 139#else
075953c3
JH
140# define REPORT(x,retval)
141# define REPORT2(x,retval)
998054bd
SC
142#endif
143
075953c3
JH
144#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
145#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
146#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
147#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
148#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
149#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
150#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
151#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
6f33ba73 152#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERMORDORDOR,PL_bufptr = s,(int)UNIOP)
075953c3
JH
153#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
154#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
155#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
156#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
157#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
158#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
159#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
160#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
161#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
162#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
163#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
a687059c 169 */
6f33ba73 170#define UNI2(f,x) return(yylval.ival = f, \
075953c3 171 REPORT("uni",f) \
6f33ba73 172 PL_expect = x, \
3280af22
NIS
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
175 PL_last_lop_op = f, \
a687059c 176 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
6f33ba73
RGS
177#define UNI(f) UNI2(f,XTERM)
178#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 179
79072805 180#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 181 REPORT("uni",f) \
3280af22
NIS
182 PL_bufptr = s, \
183 PL_last_uni = PL_oldbufptr, \
79072805
LW
184 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
185
9f68db38 186/* grandfather return to old style */
3280af22 187#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 188
8fa7f367
JH
189#ifdef DEBUGGING
190
2d00ba3b 191STATIC void
61b2116b 192S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 193{
998054bd 194 DEBUG_T({
9c5ffd7c 195 SV* report = newSVpv(thing, 0);
29b291f7
RB
196 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
197 (IV)rv);
998054bd
SC
198
199 if (s - PL_bufptr > 0)
200 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
201 else {
202 if (PL_oldbufptr && *PL_oldbufptr)
203 sv_catpv(report, PL_tokenbuf);
204 }
205 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
5f80b19c 206 });
998054bd
SC
207}
208
8fa7f367
JH
209#endif
210
ffb4593c
NT
211/*
212 * S_ao
213 *
c963b151
BD
214 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
215 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
216 */
217
76e3520e 218STATIC int
cea2e8a9 219S_ao(pTHX_ int toketype)
a0d0e21e 220{
3280af22
NIS
221 if (*PL_bufptr == '=') {
222 PL_bufptr++;
a0d0e21e
LW
223 if (toketype == ANDAND)
224 yylval.ival = OP_ANDASSIGN;
225 else if (toketype == OROR)
226 yylval.ival = OP_ORASSIGN;
c963b151
BD
227 else if (toketype == DORDOR)
228 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
229 toketype = ASSIGNOP;
230 }
231 return toketype;
232}
233
ffb4593c
NT
234/*
235 * S_no_op
236 * When Perl expects an operator and finds something else, no_op
237 * prints the warning. It always prints "<something> found where
238 * operator expected. It prints "Missing semicolon on previous line?"
239 * if the surprise occurs at the start of the line. "do you need to
240 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
241 * where the compiler doesn't know if foo is a method call or a function.
242 * It prints "Missing operator before end of line" if there's nothing
243 * after the missing operator, or "... before <...>" if there is something
244 * after the missing operator.
245 */
246
76e3520e 247STATIC void
cea2e8a9 248S_no_op(pTHX_ char *what, char *s)
463ee0b2 249{
3280af22
NIS
250 char *oldbp = PL_bufptr;
251 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 252
1189a94a
GS
253 if (!s)
254 s = oldbp;
07c798fb 255 else
1189a94a 256 PL_bufptr = s;
cea2e8a9 257 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 258 if (is_first)
cea2e8a9 259 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 260 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 261 char *t;
7e2040f0 262 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 263 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 264 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 265 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 266 }
07c798fb
HS
267 else {
268 assert(s >= oldbp);
cea2e8a9 269 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 270 }
3280af22 271 PL_bufptr = oldbp;
8990e307
LW
272}
273
ffb4593c
NT
274/*
275 * S_missingterm
276 * Complain about missing quote/regexp/heredoc terminator.
277 * If it's called with (char *)NULL then it cauterizes the line buffer.
278 * If we're in a delimited string and the delimiter is a control
279 * character, it's reformatted into a two-char sequence like ^C.
280 * This is fatal.
281 */
282
76e3520e 283STATIC void
cea2e8a9 284S_missingterm(pTHX_ char *s)
8990e307
LW
285{
286 char tmpbuf[3];
287 char q;
288 if (s) {
289 char *nl = strrchr(s,'\n');
d2719217 290 if (nl)
8990e307
LW
291 *nl = '\0';
292 }
9d116dd7
JH
293 else if (
294#ifdef EBCDIC
295 iscntrl(PL_multi_close)
296#else
297 PL_multi_close < 32 || PL_multi_close == 127
298#endif
299 ) {
8990e307 300 *tmpbuf = '^';
3280af22 301 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
302 s = "\\n";
303 tmpbuf[2] = '\0';
304 s = tmpbuf;
305 }
306 else {
eb160463 307 *tmpbuf = (char)PL_multi_close;
8990e307
LW
308 tmpbuf[1] = '\0';
309 s = tmpbuf;
310 }
311 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 312 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 313}
79072805 314
ffb4593c
NT
315/*
316 * Perl_deprecate
ffb4593c
NT
317 */
318
79072805 319void
864dbfa3 320Perl_deprecate(pTHX_ char *s)
a0d0e21e 321{
599cee73 322 if (ckWARN(WARN_DEPRECATED))
9014280d 323 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
324}
325
12bcd1a6
PM
326void
327Perl_deprecate_old(pTHX_ char *s)
328{
329 /* This function should NOT be called for any new deprecated warnings */
330 /* Use Perl_deprecate instead */
331 /* */
332 /* It is here to maintain backward compatibility with the pre-5.8 */
333 /* warnings category hierarchy. The "deprecated" category used to */
334 /* live under the "syntax" category. It is now a top-level category */
335 /* in its own right. */
336
337 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
338 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
339 "Use of %s is deprecated", s);
340}
341
ffb4593c
NT
342/*
343 * depcom
9cbb5ea2 344 * Deprecate a comma-less variable list.
ffb4593c
NT
345 */
346
76e3520e 347STATIC void
cea2e8a9 348S_depcom(pTHX)
a0d0e21e 349{
12bcd1a6 350 deprecate_old("comma-less variable list");
a0d0e21e
LW
351}
352
ffb4593c 353/*
9cbb5ea2
GS
354 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
355 * utf16-to-utf8-reversed.
ffb4593c
NT
356 */
357
c39cd008
GS
358#ifdef PERL_CR_FILTER
359static void
360strip_return(SV *sv)
361{
362 register char *s = SvPVX(sv);
363 register char *e = s + SvCUR(sv);
364 /* outer loop optimized to do nothing if there are no CR-LFs */
365 while (s < e) {
366 if (*s++ == '\r' && *s == '\n') {
367 /* hit a CR-LF, need to copy the rest */
368 register char *d = s - 1;
369 *d++ = *s++;
370 while (s < e) {
371 if (*s == '\r' && s[1] == '\n')
372 s++;
373 *d++ = *s++;
374 }
375 SvCUR(sv) -= s - d;
376 return;
377 }
378 }
379}
a868473f 380
76e3520e 381STATIC I32
c39cd008 382S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 383{
c39cd008
GS
384 I32 count = FILTER_READ(idx+1, sv, maxlen);
385 if (count > 0 && !maxlen)
386 strip_return(sv);
387 return count;
a868473f
NIS
388}
389#endif
390
ffb4593c
NT
391/*
392 * Perl_lex_start
9cbb5ea2
GS
393 * Initialize variables. Uses the Perl save_stack to save its state (for
394 * recursive calls to the parser).
ffb4593c
NT
395 */
396
a0d0e21e 397void
864dbfa3 398Perl_lex_start(pTHX_ SV *line)
79072805 399{
8990e307
LW
400 char *s;
401 STRLEN len;
402
3280af22
NIS
403 SAVEI32(PL_lex_dojoin);
404 SAVEI32(PL_lex_brackets);
3280af22
NIS
405 SAVEI32(PL_lex_casemods);
406 SAVEI32(PL_lex_starts);
407 SAVEI32(PL_lex_state);
7766f137 408 SAVEVPTR(PL_lex_inpat);
3280af22 409 SAVEI32(PL_lex_inwhat);
18b09519
GS
410 if (PL_lex_state == LEX_KNOWNEXT) {
411 I32 toke = PL_nexttoke;
412 while (--toke >= 0) {
413 SAVEI32(PL_nexttype[toke]);
414 SAVEVPTR(PL_nextval[toke]);
415 }
416 SAVEI32(PL_nexttoke);
18b09519 417 }
57843af0 418 SAVECOPLINE(PL_curcop);
3280af22
NIS
419 SAVEPPTR(PL_bufptr);
420 SAVEPPTR(PL_bufend);
421 SAVEPPTR(PL_oldbufptr);
422 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
423 SAVEPPTR(PL_last_lop);
424 SAVEPPTR(PL_last_uni);
3280af22
NIS
425 SAVEPPTR(PL_linestart);
426 SAVESPTR(PL_linestr);
8edd5f42
RGS
427 SAVEGENERICPV(PL_lex_brackstack);
428 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 429 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
430 SAVESPTR(PL_lex_stuff);
431 SAVEI32(PL_lex_defer);
09bef843 432 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 433 SAVESPTR(PL_lex_repl);
bebdddfc
GS
434 SAVEINT(PL_expect);
435 SAVEINT(PL_lex_expect);
3280af22
NIS
436
437 PL_lex_state = LEX_NORMAL;
438 PL_lex_defer = 0;
439 PL_expect = XSTATE;
440 PL_lex_brackets = 0;
3280af22
NIS
441 New(899, PL_lex_brackstack, 120, char);
442 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
443 PL_lex_casemods = 0;
444 *PL_lex_casestack = '\0';
445 PL_lex_dojoin = 0;
446 PL_lex_starts = 0;
447 PL_lex_stuff = Nullsv;
448 PL_lex_repl = Nullsv;
449 PL_lex_inpat = 0;
76be56bc 450 PL_nexttoke = 0;
3280af22 451 PL_lex_inwhat = 0;
09bef843 452 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
453 PL_linestr = line;
454 if (SvREADONLY(PL_linestr))
455 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
456 s = SvPV(PL_linestr, len);
6f27f9a7 457 if (!len || s[len-1] != ';') {
3280af22
NIS
458 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
459 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
460 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 461 }
3280af22
NIS
462 SvTEMP_off(PL_linestr);
463 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
464 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 465 PL_last_lop = PL_last_uni = Nullch;
3280af22 466 PL_rsfp = 0;
79072805 467}
a687059c 468
ffb4593c
NT
469/*
470 * Perl_lex_end
9cbb5ea2
GS
471 * Finalizer for lexing operations. Must be called when the parser is
472 * done with the lexer.
ffb4593c
NT
473 */
474
463ee0b2 475void
864dbfa3 476Perl_lex_end(pTHX)
463ee0b2 477{
3280af22 478 PL_doextract = FALSE;
463ee0b2
LW
479}
480
ffb4593c
NT
481/*
482 * S_incline
483 * This subroutine has nothing to do with tilting, whether at windmills
484 * or pinball tables. Its name is short for "increment line". It
57843af0 485 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 486 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
487 * # line 500 "foo.pm"
488 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
489 */
490
76e3520e 491STATIC void
cea2e8a9 492S_incline(pTHX_ char *s)
463ee0b2
LW
493{
494 char *t;
495 char *n;
73659bf1 496 char *e;
463ee0b2 497 char ch;
463ee0b2 498
57843af0 499 CopLINE_inc(PL_curcop);
463ee0b2
LW
500 if (*s++ != '#')
501 return;
bf4acbe4 502 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
503 if (strnEQ(s, "line", 4))
504 s += 4;
505 else
506 return;
084592ab 507 if (SPACE_OR_TAB(*s))
73659bf1 508 s++;
4e553d73 509 else
73659bf1 510 return;
bf4acbe4 511 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
512 if (!isDIGIT(*s))
513 return;
514 n = s;
515 while (isDIGIT(*s))
516 s++;
bf4acbe4 517 while (SPACE_OR_TAB(*s))
463ee0b2 518 s++;
73659bf1 519 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 520 s++;
73659bf1
GS
521 e = t + 1;
522 }
463ee0b2 523 else {
463ee0b2 524 for (t = s; !isSPACE(*t); t++) ;
73659bf1 525 e = t;
463ee0b2 526 }
bf4acbe4 527 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
528 e++;
529 if (*e != '\n' && *e != '\0')
530 return; /* false alarm */
531
463ee0b2
LW
532 ch = *t;
533 *t = '\0';
f4dd75d9 534 if (t - s > 0) {
05ec9bb3 535 CopFILE_free(PL_curcop);
57843af0 536 CopFILE_set(PL_curcop, s);
f4dd75d9 537 }
463ee0b2 538 *t = ch;
57843af0 539 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
540}
541
ffb4593c
NT
542/*
543 * S_skipspace
544 * Called to gobble the appropriate amount and type of whitespace.
545 * Skips comments as well.
546 */
547
76e3520e 548STATIC char *
cea2e8a9 549S_skipspace(pTHX_ register char *s)
a687059c 550{
3280af22 551 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 552 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
553 s++;
554 return s;
555 }
556 for (;;) {
fd049845 557 STRLEN prevlen;
09bef843 558 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 559 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
560 while (s < PL_bufend && isSPACE(*s)) {
561 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
562 incline(s);
563 }
ffb4593c
NT
564
565 /* comment */
3280af22
NIS
566 if (s < PL_bufend && *s == '#') {
567 while (s < PL_bufend && *s != '\n')
463ee0b2 568 s++;
60e6418e 569 if (s < PL_bufend) {
463ee0b2 570 s++;
60e6418e
GS
571 if (PL_in_eval && !PL_rsfp) {
572 incline(s);
573 continue;
574 }
575 }
463ee0b2 576 }
ffb4593c
NT
577
578 /* only continue to recharge the buffer if we're at the end
579 * of the buffer, we're not reading from a source filter, and
580 * we're in normal lexing mode
581 */
09bef843
SB
582 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
583 PL_lex_state == LEX_FORMLINE)
463ee0b2 584 return s;
ffb4593c
NT
585
586 /* try to recharge the buffer */
9cbb5ea2
GS
587 if ((s = filter_gets(PL_linestr, PL_rsfp,
588 (prevlen = SvCUR(PL_linestr)))) == Nullch)
589 {
590 /* end of file. Add on the -p or -n magic */
3280af22
NIS
591 if (PL_minus_n || PL_minus_p) {
592 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
593 ";}continue{print or die qq(-p destination: $!\\n)" :
594 "");
3280af22
NIS
595 sv_catpv(PL_linestr,";}");
596 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
597 }
598 else
3280af22 599 sv_setpv(PL_linestr,";");
ffb4593c
NT
600
601 /* reset variables for next time we lex */
9cbb5ea2
GS
602 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
603 = SvPVX(PL_linestr);
3280af22 604 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 605 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
606
607 /* Close the filehandle. Could be from -P preprocessor,
608 * STDIN, or a regular file. If we were reading code from
609 * STDIN (because the commandline held no -e or filename)
610 * then we don't close it, we reset it so the code can
611 * read from STDIN too.
612 */
613
3280af22
NIS
614 if (PL_preprocess && !PL_in_eval)
615 (void)PerlProc_pclose(PL_rsfp);
616 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
617 PerlIO_clearerr(PL_rsfp);
8990e307 618 else
3280af22
NIS
619 (void)PerlIO_close(PL_rsfp);
620 PL_rsfp = Nullfp;
463ee0b2
LW
621 return s;
622 }
ffb4593c
NT
623
624 /* not at end of file, so we only read another line */
09bef843
SB
625 /* make corresponding updates to old pointers, for yyerror() */
626 oldprevlen = PL_oldbufptr - PL_bufend;
627 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
628 if (PL_last_uni)
629 oldunilen = PL_last_uni - PL_bufend;
630 if (PL_last_lop)
631 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
632 PL_linestart = PL_bufptr = s + prevlen;
633 PL_bufend = s + SvCUR(PL_linestr);
634 s = PL_bufptr;
09bef843
SB
635 PL_oldbufptr = s + oldprevlen;
636 PL_oldoldbufptr = s + oldoldprevlen;
637 if (PL_last_uni)
638 PL_last_uni = s + oldunilen;
639 if (PL_last_lop)
640 PL_last_lop = s + oldloplen;
a0d0e21e 641 incline(s);
ffb4593c
NT
642
643 /* debugger active and we're not compiling the debugger code,
644 * so store the line into the debugger's array of lines
645 */
3280af22 646 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
647 SV *sv = NEWSV(85,0);
648
649 sv_upgrade(sv, SVt_PVMG);
3280af22 650 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
651 (void)SvIOK_on(sv);
652 SvIVX(sv) = 0;
57843af0 653 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 654 }
463ee0b2 655 }
a687059c 656}
378cc40b 657
ffb4593c
NT
658/*
659 * S_check_uni
660 * Check the unary operators to ensure there's no ambiguity in how they're
661 * used. An ambiguous piece of code would be:
662 * rand + 5
663 * This doesn't mean rand() + 5. Because rand() is a unary operator,
664 * the +5 is its argument.
665 */
666
76e3520e 667STATIC void
cea2e8a9 668S_check_uni(pTHX)
ba106d47 669{
2f3197b3 670 char *s;
a0d0e21e 671 char *t;
2f3197b3 672
3280af22 673 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 674 return;
3280af22
NIS
675 while (isSPACE(*PL_last_uni))
676 PL_last_uni++;
7e2040f0 677 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 678 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 679 return;
0453d815 680 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 681 char ch = *s;
0453d815 682 *s = '\0';
9014280d 683 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4e553d73 684 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
685 PL_last_uni);
686 *s = ch;
687 }
2f3197b3
LW
688}
689
ffb4593c
NT
690/*
691 * LOP : macro to build a list operator. Its behaviour has been replaced
692 * with a subroutine, S_lop() for which LOP is just another name.
693 */
694
a0d0e21e
LW
695#define LOP(f,x) return lop(f,x,s)
696
ffb4593c
NT
697/*
698 * S_lop
699 * Build a list operator (or something that might be one). The rules:
700 * - if we have a next token, then it's a list operator [why?]
701 * - if the next thing is an opening paren, then it's a function
702 * - else it's a list operator
703 */
704
76e3520e 705STATIC I32
a0be28da 706S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 707{
79072805 708 yylval.ival = f;
35c8bce7 709 CLINE;
075953c3 710 REPORT("lop", f)
3280af22
NIS
711 PL_expect = x;
712 PL_bufptr = s;
713 PL_last_lop = PL_oldbufptr;
eb160463 714 PL_last_lop_op = (OPCODE)f;
3280af22 715 if (PL_nexttoke)
a0d0e21e 716 return LSTOP;
79072805
LW
717 if (*s == '(')
718 return FUNC;
719 s = skipspace(s);
720 if (*s == '(')
721 return FUNC;
722 else
723 return LSTOP;
724}
725
ffb4593c
NT
726/*
727 * S_force_next
9cbb5ea2 728 * When the lexer realizes it knows the next token (for instance,
ffb4593c 729 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
730 * to know what token to return the next time the lexer is called. Caller
731 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
732 * handles the token correctly.
ffb4593c
NT
733 */
734
4e553d73 735STATIC void
cea2e8a9 736S_force_next(pTHX_ I32 type)
79072805 737{
3280af22
NIS
738 PL_nexttype[PL_nexttoke] = type;
739 PL_nexttoke++;
740 if (PL_lex_state != LEX_KNOWNEXT) {
741 PL_lex_defer = PL_lex_state;
742 PL_lex_expect = PL_expect;
743 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
744 }
745}
746
ffb4593c
NT
747/*
748 * S_force_word
749 * When the lexer knows the next thing is a word (for instance, it has
750 * just seen -> and it knows that the next char is a word char, then
751 * it calls S_force_word to stick the next word into the PL_next lookahead.
752 *
753 * Arguments:
b1b65b59 754 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
755 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
756 * int check_keyword : if true, Perl checks to make sure the word isn't
757 * a keyword (do this if the word is a label, e.g. goto FOO)
758 * int allow_pack : if true, : characters will also be allowed (require,
759 * use, etc. do this)
9cbb5ea2 760 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
761 */
762
76e3520e 763STATIC char *
cea2e8a9 764S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 765{
463ee0b2
LW
766 register char *s;
767 STRLEN len;
4e553d73 768
463ee0b2
LW
769 start = skipspace(start);
770 s = start;
7e2040f0 771 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 772 (allow_pack && *s == ':') ||
15f0808c 773 (allow_initial_tick && *s == '\'') )
a0d0e21e 774 {
3280af22
NIS
775 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
776 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
777 return start;
778 if (token == METHOD) {
779 s = skipspace(s);
780 if (*s == '(')
3280af22 781 PL_expect = XTERM;
463ee0b2 782 else {
3280af22 783 PL_expect = XOPERATOR;
463ee0b2 784 }
79072805 785 }
3280af22
NIS
786 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
787 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
788 force_next(token);
789 }
790 return s;
791}
792
ffb4593c
NT
793/*
794 * S_force_ident
9cbb5ea2 795 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
796 * text only contains the "foo" portion. The first argument is a pointer
797 * to the "foo", and the second argument is the type symbol to prefix.
798 * Forces the next token to be a "WORD".
9cbb5ea2 799 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
800 */
801
76e3520e 802STATIC void
cea2e8a9 803S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
804{
805 if (s && *s) {
11343788 806 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 807 PL_nextval[PL_nexttoke].opval = o;
79072805 808 force_next(WORD);
748a9306 809 if (kind) {
11343788 810 o->op_private = OPpCONST_ENTERED;
55497cff 811 /* XXX see note in pp_entereval() for why we forgo typo
812 warnings if the symbol must be introduced in an eval.
813 GSAR 96-10-12 */
3280af22 814 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
815 kind == '$' ? SVt_PV :
816 kind == '@' ? SVt_PVAV :
817 kind == '%' ? SVt_PVHV :
818 SVt_PVGV
819 );
748a9306 820 }
79072805
LW
821 }
822}
823
1571675a
GS
824NV
825Perl_str_to_version(pTHX_ SV *sv)
826{
827 NV retval = 0.0;
828 NV nshift = 1.0;
829 STRLEN len;
830 char *start = SvPVx(sv,len);
3aa33fe5 831 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
832 char *end = start + len;
833 while (start < end) {
ba210ebe 834 STRLEN skip;
1571675a
GS
835 UV n;
836 if (utf)
9041c2e3 837 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
838 else {
839 n = *(U8*)start;
840 skip = 1;
841 }
842 retval += ((NV)n)/nshift;
843 start += skip;
844 nshift *= 1000;
845 }
846 return retval;
847}
848
4e553d73 849/*
ffb4593c
NT
850 * S_force_version
851 * Forces the next token to be a version number.
e759cc13
RGS
852 * If the next token appears to be an invalid version number, (e.g. "v2b"),
853 * and if "guessing" is TRUE, then no new token is created (and the caller
854 * must use an alternative parsing method).
ffb4593c
NT
855 */
856
76e3520e 857STATIC char *
e759cc13 858S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 859{
860 OP *version = Nullop;
44dcb63b 861 char *d;
89bfa8cd 862
863 s = skipspace(s);
864
44dcb63b 865 d = s;
dd629d5b 866 if (*d == 'v')
44dcb63b 867 d++;
44dcb63b 868 if (isDIGIT(*d)) {
e759cc13
RGS
869 while (isDIGIT(*d) || *d == '_' || *d == '.')
870 d++;
9f3d182e 871 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 872 SV *ver;
b73d6f50 873 s = scan_num(s, &yylval);
89bfa8cd 874 version = yylval.opval;
dd629d5b
GS
875 ver = cSVOPx(version)->op_sv;
876 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 877 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
878 SvNVX(ver) = str_to_version(ver);
879 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 880 }
89bfa8cd 881 }
e759cc13
RGS
882 else if (guessing)
883 return s;
89bfa8cd 884 }
885
886 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 887 PL_nextval[PL_nexttoke].opval = version;
4e553d73 888 force_next(WORD);
89bfa8cd 889
e759cc13 890 return s;
89bfa8cd 891}
892
ffb4593c
NT
893/*
894 * S_tokeq
895 * Tokenize a quoted string passed in as an SV. It finds the next
896 * chunk, up to end of string or a backslash. It may make a new
897 * SV containing that chunk (if HINT_NEW_STRING is on). It also
898 * turns \\ into \.
899 */
900
76e3520e 901STATIC SV *
cea2e8a9 902S_tokeq(pTHX_ SV *sv)
79072805
LW
903{
904 register char *s;
905 register char *send;
906 register char *d;
b3ac6de7
IZ
907 STRLEN len = 0;
908 SV *pv = sv;
79072805
LW
909
910 if (!SvLEN(sv))
b3ac6de7 911 goto finish;
79072805 912
a0d0e21e 913 s = SvPV_force(sv, len);
21a311ee 914 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 915 goto finish;
463ee0b2 916 send = s + len;
79072805
LW
917 while (s < send && *s != '\\')
918 s++;
919 if (s == send)
b3ac6de7 920 goto finish;
79072805 921 d = s;
be4731d2 922 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 923 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
924 if (SvUTF8(sv))
925 SvUTF8_on(pv);
926 }
79072805
LW
927 while (s < send) {
928 if (*s == '\\') {
a0d0e21e 929 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
930 s++; /* all that, just for this */
931 }
932 *d++ = *s++;
933 }
934 *d = '\0';
463ee0b2 935 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 936 finish:
3280af22 937 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 938 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
939 return sv;
940}
941
ffb4593c
NT
942/*
943 * Now come three functions related to double-quote context,
944 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
945 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
946 * interact with PL_lex_state, and create fake ( ... ) argument lists
947 * to handle functions and concatenation.
948 * They assume that whoever calls them will be setting up a fake
949 * join call, because each subthing puts a ',' after it. This lets
950 * "lower \luPpEr"
951 * become
952 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
953 *
954 * (I'm not sure whether the spurious commas at the end of lcfirst's
955 * arguments and join's arguments are created or not).
956 */
957
958/*
959 * S_sublex_start
960 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
961 *
962 * Pattern matching will set PL_lex_op to the pattern-matching op to
963 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
964 *
965 * OP_CONST and OP_READLINE are easy--just make the new op and return.
966 *
967 * Everything else becomes a FUNC.
968 *
969 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
970 * had an OP_CONST or OP_READLINE). This just sets us up for a
971 * call to S_sublex_push().
972 */
973
76e3520e 974STATIC I32
cea2e8a9 975S_sublex_start(pTHX)
79072805
LW
976{
977 register I32 op_type = yylval.ival;
79072805
LW
978
979 if (op_type == OP_NULL) {
3280af22
NIS
980 yylval.opval = PL_lex_op;
981 PL_lex_op = Nullop;
79072805
LW
982 return THING;
983 }
984 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 985 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
986
987 if (SvTYPE(sv) == SVt_PVIV) {
988 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
989 STRLEN len;
990 char *p;
991 SV *nsv;
992
993 p = SvPV(sv, len);
79cb57f6 994 nsv = newSVpvn(p, len);
01ec43d0
GS
995 if (SvUTF8(sv))
996 SvUTF8_on(nsv);
b3ac6de7
IZ
997 SvREFCNT_dec(sv);
998 sv = nsv;
4e553d73 999 }
b3ac6de7 1000 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1001 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1002 /* Allow <FH> // "foo" */
1003 if (op_type == OP_READLINE)
1004 PL_expect = XTERMORDORDOR;
79072805
LW
1005 return THING;
1006 }
1007
3280af22
NIS
1008 PL_sublex_info.super_state = PL_lex_state;
1009 PL_sublex_info.sub_inwhat = op_type;
1010 PL_sublex_info.sub_op = PL_lex_op;
1011 PL_lex_state = LEX_INTERPPUSH;
55497cff 1012
3280af22
NIS
1013 PL_expect = XTERM;
1014 if (PL_lex_op) {
1015 yylval.opval = PL_lex_op;
1016 PL_lex_op = Nullop;
55497cff 1017 return PMFUNC;
1018 }
1019 else
1020 return FUNC;
1021}
1022
ffb4593c
NT
1023/*
1024 * S_sublex_push
1025 * Create a new scope to save the lexing state. The scope will be
1026 * ended in S_sublex_done. Returns a '(', starting the function arguments
1027 * to the uc, lc, etc. found before.
1028 * Sets PL_lex_state to LEX_INTERPCONCAT.
1029 */
1030
76e3520e 1031STATIC I32
cea2e8a9 1032S_sublex_push(pTHX)
55497cff 1033{
f46d017c 1034 ENTER;
55497cff 1035
3280af22
NIS
1036 PL_lex_state = PL_sublex_info.super_state;
1037 SAVEI32(PL_lex_dojoin);
1038 SAVEI32(PL_lex_brackets);
3280af22
NIS
1039 SAVEI32(PL_lex_casemods);
1040 SAVEI32(PL_lex_starts);
1041 SAVEI32(PL_lex_state);
7766f137 1042 SAVEVPTR(PL_lex_inpat);
3280af22 1043 SAVEI32(PL_lex_inwhat);
57843af0 1044 SAVECOPLINE(PL_curcop);
3280af22 1045 SAVEPPTR(PL_bufptr);
8452ff4b 1046 SAVEPPTR(PL_bufend);
3280af22
NIS
1047 SAVEPPTR(PL_oldbufptr);
1048 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1049 SAVEPPTR(PL_last_lop);
1050 SAVEPPTR(PL_last_uni);
3280af22
NIS
1051 SAVEPPTR(PL_linestart);
1052 SAVESPTR(PL_linestr);
8edd5f42
RGS
1053 SAVEGENERICPV(PL_lex_brackstack);
1054 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1055
1056 PL_linestr = PL_lex_stuff;
1057 PL_lex_stuff = Nullsv;
1058
9cbb5ea2
GS
1059 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1060 = SvPVX(PL_linestr);
3280af22 1061 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1062 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1063 SAVEFREESV(PL_linestr);
1064
1065 PL_lex_dojoin = FALSE;
1066 PL_lex_brackets = 0;
3280af22
NIS
1067 New(899, PL_lex_brackstack, 120, char);
1068 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
1069 PL_lex_casemods = 0;
1070 *PL_lex_casestack = '\0';
1071 PL_lex_starts = 0;
1072 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1073 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1074
1075 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1076 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1077 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1078 else
3280af22 1079 PL_lex_inpat = Nullop;
79072805 1080
55497cff 1081 return '(';
79072805
LW
1082}
1083
ffb4593c
NT
1084/*
1085 * S_sublex_done
1086 * Restores lexer state after a S_sublex_push.
1087 */
1088
76e3520e 1089STATIC I32
cea2e8a9 1090S_sublex_done(pTHX)
79072805 1091{
3280af22 1092 if (!PL_lex_starts++) {
9aa983d2
JH
1093 SV *sv = newSVpvn("",0);
1094 if (SvUTF8(PL_linestr))
1095 SvUTF8_on(sv);
3280af22 1096 PL_expect = XOPERATOR;
9aa983d2 1097 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1098 return THING;
1099 }
1100
3280af22
NIS
1101 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1102 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1103 return yylex();
79072805
LW
1104 }
1105
ffb4593c 1106 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1107 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1108 PL_linestr = PL_lex_repl;
1109 PL_lex_inpat = 0;
1110 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1111 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1112 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1113 SAVEFREESV(PL_linestr);
1114 PL_lex_dojoin = FALSE;
1115 PL_lex_brackets = 0;
3280af22
NIS
1116 PL_lex_casemods = 0;
1117 *PL_lex_casestack = '\0';
1118 PL_lex_starts = 0;
25da4f38 1119 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1120 PL_lex_state = LEX_INTERPNORMAL;
1121 PL_lex_starts++;
e9fa98b2
HS
1122 /* we don't clear PL_lex_repl here, so that we can check later
1123 whether this is an evalled subst; that means we rely on the
1124 logic to ensure sublex_done() is called again only via the
1125 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1126 }
e9fa98b2 1127 else {
3280af22 1128 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1129 PL_lex_repl = Nullsv;
1130 }
79072805 1131 return ',';
ffed7fef
LW
1132 }
1133 else {
f46d017c 1134 LEAVE;
3280af22
NIS
1135 PL_bufend = SvPVX(PL_linestr);
1136 PL_bufend += SvCUR(PL_linestr);
1137 PL_expect = XOPERATOR;
09bef843 1138 PL_sublex_info.sub_inwhat = 0;
79072805 1139 return ')';
ffed7fef
LW
1140 }
1141}
1142
02aa26ce
NT
1143/*
1144 scan_const
1145
1146 Extracts a pattern, double-quoted string, or transliteration. This
1147 is terrifying code.
1148
3280af22
NIS
1149 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1150 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1151 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1152
9b599b2a
GS
1153 Returns a pointer to the character scanned up to. Iff this is
1154 advanced from the start pointer supplied (ie if anything was
1155 successfully parsed), will leave an OP for the substring scanned
1156 in yylval. Caller must intuit reason for not parsing further
1157 by looking at the next characters herself.
1158
02aa26ce
NT
1159 In patterns:
1160 backslashes:
1161 double-quoted style: \r and \n
1162 regexp special ones: \D \s
1163 constants: \x3
1164 backrefs: \1 (deprecated in substitution replacements)
1165 case and quoting: \U \Q \E
1166 stops on @ and $, but not for $ as tail anchor
1167
1168 In transliterations:
1169 characters are VERY literal, except for - not at the start or end
1170 of the string, which indicates a range. scan_const expands the
1171 range to the full set of intermediate characters.
1172
1173 In double-quoted strings:
1174 backslashes:
1175 double-quoted style: \r and \n
1176 constants: \x3
1177 backrefs: \1 (deprecated)
1178 case and quoting: \U \Q \E
1179 stops on @ and $
1180
1181 scan_const does *not* construct ops to handle interpolated strings.
1182 It stops processing as soon as it finds an embedded $ or @ variable
1183 and leaves it to the caller to work out what's going on.
1184
da6eedaa 1185 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1186
1187 $ in pattern could be $foo or could be tail anchor. Assumption:
1188 it's a tail anchor if $ is the last thing in the string, or if it's
1189 followed by one of ")| \n\t"
1190
1191 \1 (backreferences) are turned into $1
1192
1193 The structure of the code is
1194 while (there's a character to process) {
1195 handle transliteration ranges
1196 skip regexp comments
1197 skip # initiated comments in //x patterns
1198 check for embedded @foo
1199 check for embedded scalars
1200 if (backslash) {
1201 leave intact backslashes from leave (below)
1202 deprecate \1 in strings and sub replacements
1203 handle string-changing backslashes \l \U \Q \E, etc.
1204 switch (what was escaped) {
1205 handle - in a transliteration (becomes a literal -)
1206 handle \132 octal characters
1207 handle 0x15 hex characters
1208 handle \cV (control V)
1209 handle printf backslashes (\f, \r, \n, etc)
1210 } (end switch)
1211 } (end if backslash)
1212 } (end while character to read)
4e553d73 1213
02aa26ce
NT
1214*/
1215
76e3520e 1216STATIC char *
cea2e8a9 1217S_scan_const(pTHX_ char *start)
79072805 1218{
3280af22 1219 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1220 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1221 register char *s = start; /* start of the constant */
1222 register char *d = SvPVX(sv); /* destination for copies */
1223 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1224 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1225 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1226 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1227 UV uv;
1228
dff6d3cd 1229 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1230 PL_lex_inpat
4a2d328f 1231 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1232 : "";
79072805 1233
2b9d42f0
NIS
1234 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1235 /* If we are doing a trans and we know we want UTF8 set expectation */
1236 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1237 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1238 }
1239
1240
79072805 1241 while (s < send || dorange) {
02aa26ce 1242 /* get transliterations out of the way (they're most literal) */
3280af22 1243 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1244 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1245 if (dorange) {
1ba5c669
JH
1246 I32 i; /* current expanded character */
1247 I32 min; /* first character in range */
1248 I32 max; /* last character in range */
02aa26ce 1249
2b9d42f0 1250 if (has_utf8) {
8973db79
JH
1251 char *c = (char*)utf8_hop((U8*)d, -1);
1252 char *e = d++;
1253 while (e-- > c)
1254 *(e + 1) = *e;
25716404 1255 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1256 /* mark the range as done, and continue */
1257 dorange = FALSE;
1258 didrange = TRUE;
1259 continue;
1260 }
2b9d42f0 1261
02aa26ce 1262 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1263 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1264 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1265 d -= 2; /* eat the first char and the - */
1266
8ada0baa
JH
1267 min = (U8)*d; /* first char in range */
1268 max = (U8)d[1]; /* last char in range */
1269
c2e66d9e 1270 if (min > max) {
01ec43d0 1271 Perl_croak(aTHX_
d1573ac7 1272 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1273 (char)min, (char)max);
c2e66d9e
GS
1274 }
1275
c7f1f016 1276#ifdef EBCDIC
8ada0baa
JH
1277 if ((isLOWER(min) && isLOWER(max)) ||
1278 (isUPPER(min) && isUPPER(max))) {
1279 if (isLOWER(min)) {
1280 for (i = min; i <= max; i++)
1281 if (isLOWER(i))
db42d148 1282 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1283 } else {
1284 for (i = min; i <= max; i++)
1285 if (isUPPER(i))
db42d148 1286 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1287 }
1288 }
1289 else
1290#endif
1291 for (i = min; i <= max; i++)
eb160463 1292 *d++ = (char)i;
02aa26ce
NT
1293
1294 /* mark the range as done, and continue */
79072805 1295 dorange = FALSE;
01ec43d0 1296 didrange = TRUE;
79072805 1297 continue;
4e553d73 1298 }
02aa26ce
NT
1299
1300 /* range begins (ignore - as first or last char) */
79072805 1301 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1302 if (didrange) {
1fafa243 1303 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1304 }
2b9d42f0 1305 if (has_utf8) {
25716404 1306 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1307 s++;
1308 continue;
1309 }
79072805
LW
1310 dorange = TRUE;
1311 s++;
01ec43d0
GS
1312 }
1313 else {
1314 didrange = FALSE;
1315 }
79072805 1316 }
02aa26ce
NT
1317
1318 /* if we get here, we're not doing a transliteration */
1319
0f5d15d6
IZ
1320 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1321 except for the last char, which will be done separately. */
3280af22 1322 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1323 if (s[2] == '#') {
1324 while (s < send && *s != ')')
db42d148 1325 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1326 }
1327 else if (s[2] == '{' /* This should match regcomp.c */
1328 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1329 {
cc6b7395 1330 I32 count = 1;
0f5d15d6 1331 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1332 char c;
1333
d9f97599
GS
1334 while (count && (c = *regparse)) {
1335 if (c == '\\' && regparse[1])
1336 regparse++;
4e553d73 1337 else if (c == '{')
cc6b7395 1338 count++;
4e553d73 1339 else if (c == '}')
cc6b7395 1340 count--;
d9f97599 1341 regparse++;
cc6b7395 1342 }
5bdf89e7
IZ
1343 if (*regparse != ')') {
1344 regparse--; /* Leave one char for continuation. */
cc6b7395 1345 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1346 }
0f5d15d6 1347 while (s < regparse)
db42d148 1348 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1349 }
748a9306 1350 }
02aa26ce
NT
1351
1352 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1353 else if (*s == '#' && PL_lex_inpat &&
1354 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1355 while (s+1 < send && *s != '\n')
db42d148 1356 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1357 }
02aa26ce 1358
5d1d4326 1359 /* check for embedded arrays
da6eedaa 1360 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1361 */
7e2040f0 1362 else if (*s == '@' && s[1]
5d1d4326 1363 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1364 break;
02aa26ce
NT
1365
1366 /* check for embedded scalars. only stop if we're sure it's a
1367 variable.
1368 */
79072805 1369 else if (*s == '$') {
3280af22 1370 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1371 break;
6002328a 1372 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1373 break; /* in regexp, $ might be tail anchor */
1374 }
02aa26ce 1375
2b9d42f0
NIS
1376 /* End of else if chain - OP_TRANS rejoin rest */
1377
02aa26ce 1378 /* backslashes */
79072805
LW
1379 if (*s == '\\' && s+1 < send) {
1380 s++;
02aa26ce
NT
1381
1382 /* some backslashes we leave behind */
c9f97d15 1383 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1384 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1385 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1386 continue;
1387 }
02aa26ce
NT
1388
1389 /* deprecate \1 in strings and substitution replacements */
3280af22 1390 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1391 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1392 {
599cee73 1393 if (ckWARN(WARN_SYNTAX))
9014280d 1394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1395 *--s = '$';
1396 break;
1397 }
02aa26ce
NT
1398
1399 /* string-change backslash escapes */
3280af22 1400 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1401 --s;
1402 break;
1403 }
02aa26ce
NT
1404
1405 /* if we get here, it's either a quoted -, or a digit */
79072805 1406 switch (*s) {
02aa26ce
NT
1407
1408 /* quoted - in transliterations */
79072805 1409 case '-':
3280af22 1410 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1411 *d++ = *s++;
1412 continue;
1413 }
1414 /* FALL THROUGH */
1415 default:
11b8faa4 1416 {
707afd92
MS
1417 if (ckWARN(WARN_MISC) &&
1418 isALNUM(*s) &&
1419 *s != '_')
9014280d 1420 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1421 "Unrecognized escape \\%c passed through",
1422 *s);
1423 /* default action is to copy the quoted character */
f9a63242 1424 goto default_action;
11b8faa4 1425 }
02aa26ce
NT
1426
1427 /* \132 indicates an octal constant */
79072805
LW
1428 case '0': case '1': case '2': case '3':
1429 case '4': case '5': case '6': case '7':
ba210ebe 1430 {
53305cf1
NC
1431 I32 flags = 0;
1432 STRLEN len = 3;
1433 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1434 s += len;
1435 }
012bcf8d 1436 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1437
1438 /* \x24 indicates a hex constant */
79072805 1439 case 'x':
a0ed51b3
LW
1440 ++s;
1441 if (*s == '{') {
1442 char* e = strchr(s, '}');
a4c04bdc
NC
1443 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1444 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1445 STRLEN len;
355860ce 1446
53305cf1 1447 ++s;
adaeee49 1448 if (!e) {
a0ed51b3 1449 yyerror("Missing right brace on \\x{}");
355860ce 1450 continue;
ba210ebe 1451 }
53305cf1
NC
1452 len = e - s;
1453 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1454 s = e + 1;
a0ed51b3
LW
1455 }
1456 else {
ba210ebe 1457 {
53305cf1 1458 STRLEN len = 2;
a4c04bdc 1459 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1460 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1461 s += len;
1462 }
012bcf8d
GS
1463 }
1464
1465 NUM_ESCAPE_INSERT:
1466 /* Insert oct or hex escaped character.
301d3d20 1467 * There will always enough room in sv since such
db42d148 1468 * escapes will be longer than any UTF-8 sequence
301d3d20 1469 * they can end up as. */
ba7cea30 1470
c7f1f016
NIS
1471 /* We need to map to chars to ASCII before doing the tests
1472 to cover EBCDIC
1473 */
c4d5f83a 1474 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1475 if (!has_utf8 && uv > 255) {
301d3d20
JH
1476 /* Might need to recode whatever we have
1477 * accumulated so far if it contains any
1478 * hibit chars.
1479 *
1480 * (Can't we keep track of that and avoid
1481 * this rescan? --jhi)
012bcf8d 1482 */
c7f1f016 1483 int hicount = 0;
63cd0674
NIS
1484 U8 *c;
1485 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1486 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1487 hicount++;
db42d148 1488 }
012bcf8d 1489 }
63cd0674 1490 if (hicount) {
db42d148
NIS
1491 STRLEN offset = d - SvPVX(sv);
1492 U8 *src, *dst;
1493 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1494 src = (U8 *)d - 1;
1495 dst = src+hicount;
1496 d += hicount;
1497 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1498 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1499 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1500 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1501 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1502 }
1503 else {
63cd0674 1504 *dst-- = *src;
012bcf8d 1505 }
c7f1f016 1506 src--;
012bcf8d
GS
1507 }
1508 }
1509 }
1510
9aa983d2 1511 if (has_utf8 || uv > 255) {
9041c2e3 1512 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1513 has_utf8 = TRUE;
f9a63242
JH
1514 if (PL_lex_inwhat == OP_TRANS &&
1515 PL_sublex_info.sub_op) {
1516 PL_sublex_info.sub_op->op_private |=
1517 (PL_lex_repl ? OPpTRANS_FROM_UTF
1518 : OPpTRANS_TO_UTF);
f9a63242 1519 }
012bcf8d 1520 }
a0ed51b3 1521 else {
012bcf8d 1522 *d++ = (char)uv;
a0ed51b3 1523 }
012bcf8d
GS
1524 }
1525 else {
c4d5f83a 1526 *d++ = (char) uv;
a0ed51b3 1527 }
79072805 1528 continue;
02aa26ce 1529
b239daa5 1530 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1531 case 'N':
55eda711 1532 ++s;
423cee85
JH
1533 if (*s == '{') {
1534 char* e = strchr(s, '}');
155aba94 1535 SV *res;
423cee85
JH
1536 STRLEN len;
1537 char *str;
4e553d73 1538
423cee85 1539 if (!e) {
5777a3f7 1540 yyerror("Missing right brace on \\N{}");
423cee85
JH
1541 e = s - 1;
1542 goto cont_scan;
1543 }
dbc0d4f2
JH
1544 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1545 /* \N{U+...} */
1546 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1547 PERL_SCAN_DISALLOW_PREFIX;
1548 s += 3;
1549 len = e - s;
1550 uv = grok_hex(s, &len, &flags, NULL);
1551 s = e + 1;
1552 goto NUM_ESCAPE_INSERT;
1553 }
55eda711
JH
1554 res = newSVpvn(s + 1, e - s - 1);
1555 res = new_constant( Nullch, 0, "charnames",
1556 res, Nullsv, "\\N{...}" );
f9a63242
JH
1557 if (has_utf8)
1558 sv_utf8_upgrade(res);
423cee85 1559 str = SvPV(res,len);
1c47067b
JH
1560#ifdef EBCDIC_NEVER_MIND
1561 /* charnames uses pack U and that has been
1562 * recently changed to do the below uni->native
1563 * mapping, so this would be redundant (and wrong,
1564 * the code point would be doubly converted).
1565 * But leave this in just in case the pack U change
1566 * gets revoked, but the semantics is still
1567 * desireable for charnames. --jhi */
cddc7ef4
JH
1568 {
1569 UV uv = utf8_to_uvchr((U8*)str, 0);
1570
1571 if (uv < 0x100) {
1572 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1573
1574 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1575 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1576 str = SvPV(res, len);
1577 }
1578 }
1579#endif
89491803 1580 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1581 char *ostart = SvPVX(sv);
1582 SvCUR_set(sv, d - ostart);
1583 SvPOK_on(sv);
e4f3eed8 1584 *d = '\0';
f08d6ad9 1585 sv_utf8_upgrade(sv);
d2f449dd 1586 /* this just broke our allocation above... */
eb160463 1587 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1588 d = SvPVX(sv) + SvCUR(sv);
89491803 1589 has_utf8 = TRUE;
f08d6ad9 1590 }
eb160463 1591 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1592 char *odest = SvPVX(sv);
1593
8973db79 1594 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1595 d = SvPVX(sv) + (d - odest);
1596 }
1597 Copy(str, d, len, char);
1598 d += len;
1599 SvREFCNT_dec(res);
1600 cont_scan:
1601 s = e + 1;
1602 }
1603 else
5777a3f7 1604 yyerror("Missing braces on \\N{}");
423cee85
JH
1605 continue;
1606
02aa26ce 1607 /* \c is a control character */
79072805
LW
1608 case 'c':
1609 s++;
961ce445 1610 if (s < send) {
ba210ebe 1611 U8 c = *s++;
c7f1f016
NIS
1612#ifdef EBCDIC
1613 if (isLOWER(c))
1614 c = toUPPER(c);
1615#endif
db42d148 1616 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1617 }
961ce445
RGS
1618 else {
1619 yyerror("Missing control char name in \\c");
1620 }
79072805 1621 continue;
02aa26ce
NT
1622
1623 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1624 case 'b':
db42d148 1625 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1626 break;
1627 case 'n':
db42d148 1628 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1629 break;
1630 case 'r':
db42d148 1631 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1632 break;
1633 case 'f':
db42d148 1634 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1635 break;
1636 case 't':
db42d148 1637 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1638 break;
34a3fe2a 1639 case 'e':
db42d148 1640 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1641 break;
1642 case 'a':
db42d148 1643 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1644 break;
02aa26ce
NT
1645 } /* end switch */
1646
79072805
LW
1647 s++;
1648 continue;
02aa26ce
NT
1649 } /* end if (backslash) */
1650
f9a63242 1651 default_action:
2b9d42f0
NIS
1652 /* If we started with encoded form, or already know we want it
1653 and then encode the next character */
1654 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1655 STRLEN len = 1;
1656 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1657 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1658 s += len;
1659 if (need > len) {
1660 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1661 STRLEN off = d - SvPVX(sv);
1662 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1663 }
1664 d = (char*)uvchr_to_utf8((U8*)d, uv);
1665 has_utf8 = TRUE;
1666 }
1667 else {
1668 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1669 }
02aa26ce
NT
1670 } /* while loop to process each character */
1671
1672 /* terminate the string and set up the sv */
79072805 1673 *d = '\0';
463ee0b2 1674 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1675 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1676 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1677
79072805 1678 SvPOK_on(sv);
9f4817db 1679 if (PL_encoding && !has_utf8) {
d0063567
DK
1680 sv_recode_to_utf8(sv, PL_encoding);
1681 if (SvUTF8(sv))
1682 has_utf8 = TRUE;
9f4817db 1683 }
2b9d42f0 1684 if (has_utf8) {
7e2040f0 1685 SvUTF8_on(sv);
2b9d42f0 1686 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1687 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1688 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1689 }
1690 }
79072805 1691
02aa26ce 1692 /* shrink the sv if we allocated more than we used */
79072805
LW
1693 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1694 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1695 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1696 }
02aa26ce 1697
9b599b2a 1698 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1699 if (s > PL_bufptr) {
1700 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1701 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1702 sv, Nullsv,
4e553d73 1703 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1704 ? "tr"
3280af22 1705 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1706 ? "s"
1707 : "qq")));
79072805 1708 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1709 } else
8990e307 1710 SvREFCNT_dec(sv);
79072805
LW
1711 return s;
1712}
1713
ffb4593c
NT
1714/* S_intuit_more
1715 * Returns TRUE if there's more to the expression (e.g., a subscript),
1716 * FALSE otherwise.
ffb4593c
NT
1717 *
1718 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1719 *
1720 * ->[ and ->{ return TRUE
1721 * { and [ outside a pattern are always subscripts, so return TRUE
1722 * if we're outside a pattern and it's not { or [, then return FALSE
1723 * if we're in a pattern and the first char is a {
1724 * {4,5} (any digits around the comma) returns FALSE
1725 * if we're in a pattern and the first char is a [
1726 * [] returns FALSE
1727 * [SOMETHING] has a funky algorithm to decide whether it's a
1728 * character class or not. It has to deal with things like
1729 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1730 * anything else returns TRUE
1731 */
1732
9cbb5ea2
GS
1733/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1734
76e3520e 1735STATIC int
cea2e8a9 1736S_intuit_more(pTHX_ register char *s)
79072805 1737{
3280af22 1738 if (PL_lex_brackets)
79072805
LW
1739 return TRUE;
1740 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1741 return TRUE;
1742 if (*s != '{' && *s != '[')
1743 return FALSE;
3280af22 1744 if (!PL_lex_inpat)
79072805
LW
1745 return TRUE;
1746
1747 /* In a pattern, so maybe we have {n,m}. */
1748 if (*s == '{') {
1749 s++;
1750 if (!isDIGIT(*s))
1751 return TRUE;
1752 while (isDIGIT(*s))
1753 s++;
1754 if (*s == ',')
1755 s++;
1756 while (isDIGIT(*s))
1757 s++;
1758 if (*s == '}')
1759 return FALSE;
1760 return TRUE;
1761
1762 }
1763
1764 /* On the other hand, maybe we have a character class */
1765
1766 s++;
1767 if (*s == ']' || *s == '^')
1768 return FALSE;
1769 else {
ffb4593c 1770 /* this is terrifying, and it works */
79072805
LW
1771 int weight = 2; /* let's weigh the evidence */
1772 char seen[256];
f27ffc4a 1773 unsigned char un_char = 255, last_un_char;
93a17b20 1774 char *send = strchr(s,']');
3280af22 1775 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1776
1777 if (!send) /* has to be an expression */
1778 return TRUE;
1779
1780 Zero(seen,256,char);
1781 if (*s == '$')
1782 weight -= 3;
1783 else if (isDIGIT(*s)) {
1784 if (s[1] != ']') {
1785 if (isDIGIT(s[1]) && s[2] == ']')
1786 weight -= 10;
1787 }
1788 else
1789 weight -= 100;
1790 }
1791 for (; s < send; s++) {
1792 last_un_char = un_char;
1793 un_char = (unsigned char)*s;
1794 switch (*s) {
1795 case '@':
1796 case '&':
1797 case '$':
1798 weight -= seen[un_char] * 10;
7e2040f0 1799 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1800 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1801 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1802 weight -= 100;
1803 else
1804 weight -= 10;
1805 }
1806 else if (*s == '$' && s[1] &&
93a17b20
LW
1807 strchr("[#!%*<>()-=",s[1])) {
1808 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1809 weight -= 10;
1810 else
1811 weight -= 1;
1812 }
1813 break;
1814 case '\\':
1815 un_char = 254;
1816 if (s[1]) {
93a17b20 1817 if (strchr("wds]",s[1]))
79072805
LW
1818 weight += 100;
1819 else if (seen['\''] || seen['"'])
1820 weight += 1;
93a17b20 1821 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1822 weight += 40;
1823 else if (isDIGIT(s[1])) {
1824 weight += 40;
1825 while (s[1] && isDIGIT(s[1]))
1826 s++;
1827 }
1828 }
1829 else
1830 weight += 100;
1831 break;
1832 case '-':
1833 if (s[1] == '\\')
1834 weight += 50;
93a17b20 1835 if (strchr("aA01! ",last_un_char))
79072805 1836 weight += 30;
93a17b20 1837 if (strchr("zZ79~",s[1]))
79072805 1838 weight += 30;
f27ffc4a
GS
1839 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1840 weight -= 5; /* cope with negative subscript */
79072805
LW
1841 break;
1842 default:
93a17b20 1843 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1844 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1845 char *d = tmpbuf;
1846 while (isALPHA(*s))
1847 *d++ = *s++;
1848 *d = '\0';
1849 if (keyword(tmpbuf, d - tmpbuf))
1850 weight -= 150;
1851 }
1852 if (un_char == last_un_char + 1)
1853 weight += 5;
1854 weight -= seen[un_char];
1855 break;
1856 }
1857 seen[un_char]++;
1858 }
1859 if (weight >= 0) /* probably a character class */
1860 return FALSE;
1861 }
1862
1863 return TRUE;
1864}
ffed7fef 1865
ffb4593c
NT
1866/*
1867 * S_intuit_method
1868 *
1869 * Does all the checking to disambiguate
1870 * foo bar
1871 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1872 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1873 *
1874 * First argument is the stuff after the first token, e.g. "bar".
1875 *
1876 * Not a method if bar is a filehandle.
1877 * Not a method if foo is a subroutine prototyped to take a filehandle.
1878 * Not a method if it's really "Foo $bar"
1879 * Method if it's "foo $bar"
1880 * Not a method if it's really "print foo $bar"
1881 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 1882 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 1883 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1884 * =>
1885 */
1886
76e3520e 1887STATIC int
cea2e8a9 1888S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1889{
1890 char *s = start + (*start == '$');
3280af22 1891 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1892 STRLEN len;
1893 GV* indirgv;
1894
1895 if (gv) {
b6c543e3 1896 CV *cv;
a0d0e21e
LW
1897 if (GvIO(gv))
1898 return 0;
b6c543e3
IZ
1899 if ((cv = GvCVu(gv))) {
1900 char *proto = SvPVX(cv);
1901 if (proto) {
1902 if (*proto == ';')
1903 proto++;
1904 if (*proto == '*')
1905 return 0;
1906 }
1907 } else
a0d0e21e
LW
1908 gv = 0;
1909 }
8903cb82 1910 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1911 /* start is the beginning of the possible filehandle/object,
1912 * and s is the end of it
1913 * tmpbuf is a copy of it
1914 */
1915
a0d0e21e 1916 if (*start == '$') {
3280af22 1917 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1918 return 0;
1919 s = skipspace(s);
3280af22
NIS
1920 PL_bufptr = start;
1921 PL_expect = XREF;
a0d0e21e
LW
1922 return *s == '(' ? FUNCMETH : METHOD;
1923 }
1924 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1925 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1926 len -= 2;
1927 tmpbuf[len] = '\0';
1928 goto bare_package;
1929 }
1930 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1931 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1932 return 0;
1933 /* filehandle or package name makes it a method */
89bfa8cd 1934 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1935 s = skipspace(s);
3280af22 1936 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1937 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1938 bare_package:
3280af22 1939 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1940 newSVpvn(tmpbuf,len));
3280af22
NIS
1941 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1942 PL_expect = XTERM;
a0d0e21e 1943 force_next(WORD);
3280af22 1944 PL_bufptr = s;
a0d0e21e
LW
1945 return *s == '(' ? FUNCMETH : METHOD;
1946 }
1947 }
1948 return 0;
1949}
1950
ffb4593c
NT
1951/*
1952 * S_incl_perldb
1953 * Return a string of Perl code to load the debugger. If PERL5DB
1954 * is set, it will return the contents of that, otherwise a
1955 * compile-time require of perl5db.pl.
1956 */
1957
76e3520e 1958STATIC char*
cea2e8a9 1959S_incl_perldb(pTHX)
a0d0e21e 1960{
3280af22 1961 if (PL_perldb) {
76e3520e 1962 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1963
1964 if (pdb)
1965 return pdb;
93189314 1966 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
1967 return "BEGIN { require 'perl5db.pl' }";
1968 }
1969 return "";
1970}
1971
1972
16d20bd9 1973/* Encoded script support. filter_add() effectively inserts a
4e553d73 1974 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1975 * Note that the filter function only applies to the current source file
1976 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1977 *
1978 * The datasv parameter (which may be NULL) can be used to pass
1979 * private data to this instance of the filter. The filter function
1980 * can recover the SV using the FILTER_DATA macro and use it to
1981 * store private buffers and state information.
1982 *
1983 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1984 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1985 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1986 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1987 * private use must be set using malloc'd pointers.
1988 */
16d20bd9
AD
1989
1990SV *
864dbfa3 1991Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1992{
f4c556ac
GS
1993 if (!funcp)
1994 return Nullsv;
1995
3280af22
NIS
1996 if (!PL_rsfp_filters)
1997 PL_rsfp_filters = newAV();
16d20bd9 1998 if (!datasv)
8c52afec 1999 datasv = NEWSV(255,0);
16d20bd9 2000 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2001 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2002 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2003 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2004 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2005 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2006 av_unshift(PL_rsfp_filters, 1);
2007 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2008 return(datasv);
2009}
4e553d73 2010
16d20bd9
AD
2011
2012/* Delete most recently added instance of this filter function. */
a0d0e21e 2013void
864dbfa3 2014Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2015{
e0c19803 2016 SV *datasv;
fe5a182c 2017 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2018 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2019 return;
2020 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2021 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2022 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2023 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2024 IoANY(datasv) = (void *)NULL;
3280af22 2025 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2026
16d20bd9
AD
2027 return;
2028 }
2029 /* we need to search for the correct entry and clear it */
cea2e8a9 2030 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2031}
2032
2033
2034/* Invoke the n'th filter function for the current rsfp. */
2035I32
864dbfa3 2036Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
2037
2038
8ac85365 2039 /* 0 = read one text line */
a0d0e21e 2040{
16d20bd9
AD
2041 filter_t funcp;
2042 SV *datasv = NULL;
e50aee73 2043
3280af22 2044 if (!PL_rsfp_filters)
16d20bd9 2045 return -1;
3280af22 2046 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2047 /* Provide a default input filter to make life easy. */
2048 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2049 DEBUG_P(PerlIO_printf(Perl_debug_log,
2050 "filter_read %d: from rsfp\n", idx));
4e553d73 2051 if (maxlen) {
16d20bd9
AD
2052 /* Want a block */
2053 int len ;
2054 int old_len = SvCUR(buf_sv) ;
2055
2056 /* ensure buf_sv is large enough */
eb160463 2057 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2058 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2059 if (PerlIO_error(PL_rsfp))
37120919
AD
2060 return -1; /* error */
2061 else
2062 return 0 ; /* end of file */
2063 }
16d20bd9
AD
2064 SvCUR_set(buf_sv, old_len + len) ;
2065 } else {
2066 /* Want a line */
3280af22
NIS
2067 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2068 if (PerlIO_error(PL_rsfp))
37120919
AD
2069 return -1; /* error */
2070 else
2071 return 0 ; /* end of file */
2072 }
16d20bd9
AD
2073 }
2074 return SvCUR(buf_sv);
2075 }
2076 /* Skip this filter slot if filter has been deleted */
3280af22 2077 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2078 DEBUG_P(PerlIO_printf(Perl_debug_log,
2079 "filter_read %d: skipped (filter deleted)\n",
2080 idx));
16d20bd9
AD
2081 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2082 }
2083 /* Get function pointer hidden within datasv */
4755096e 2084 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2085 DEBUG_P(PerlIO_printf(Perl_debug_log,
2086 "filter_read %d: via function %p (%s)\n",
fe5a182c 2087 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2088 /* Call function. The function is expected to */
2089 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2090 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2091 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2092}
2093
76e3520e 2094STATIC char *
cea2e8a9 2095S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2096{
c39cd008 2097#ifdef PERL_CR_FILTER
3280af22 2098 if (!PL_rsfp_filters) {
c39cd008 2099 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2100 }
2101#endif
3280af22 2102 if (PL_rsfp_filters) {
16d20bd9 2103
55497cff 2104 if (!append)
2105 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2106 if (FILTER_READ(0, sv, 0) > 0)
2107 return ( SvPVX(sv) ) ;
2108 else
2109 return Nullch ;
2110 }
9d116dd7 2111 else
fd049845 2112 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2113}
2114
01ec43d0
GS
2115STATIC HV *
2116S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2117{
2118 GV *gv;
2119
01ec43d0 2120 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2121 return PL_curstash;
2122
2123 if (len > 2 &&
2124 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2125 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2126 {
2127 return GvHV(gv); /* Foo:: */
def3634b
GS
2128 }
2129
2130 /* use constant CLASS => 'MyClass' */
2131 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2132 SV *sv;
2133 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2134 pkgname = SvPV_nolen(sv);
2135 }
2136 }
2137
2138 return gv_stashpv(pkgname, FALSE);
2139}
a0d0e21e 2140
748a9306
LW
2141#ifdef DEBUGGING
2142 static char* exp_name[] =
09bef843
SB
2143 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2144 "ATTRTERM", "TERMBLOCK"
2145 };
748a9306 2146#endif
463ee0b2 2147
02aa26ce
NT
2148/*
2149 yylex
2150
2151 Works out what to call the token just pulled out of the input
2152 stream. The yacc parser takes care of taking the ops we return and
2153 stitching them into a tree.
2154
2155 Returns:
2156 PRIVATEREF
2157
2158 Structure:
2159 if read an identifier
2160 if we're in a my declaration
2161 croak if they tried to say my($foo::bar)
2162 build the ops for a my() declaration
2163 if it's an access to a my() variable
2164 are we in a sort block?
2165 croak if my($a); $a <=> $b
2166 build ops for access to a my() variable
2167 if in a dq string, and they've said @foo and we can't find @foo
2168 croak
2169 build ops for a bareword
2170 if we already built the token before, use it.
2171*/
2172
dba4d153 2173#ifdef USE_PURE_BISON
864dbfa3 2174int
dba4d153 2175Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2176{
20141f0e
IRC
2177 int r;
2178
6f202aea 2179 yyactlevel++;
20141f0e
IRC
2180 yylval_pointer[yyactlevel] = lvalp;
2181 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
IRC
2182 if (yyactlevel >= YYMAXLEVEL)
2183 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2184
dba4d153 2185 r = Perl_yylex(aTHX);
20141f0e 2186
d8ae6756
IRC
2187 if (yyactlevel > 0)
2188 yyactlevel--;
20141f0e
IRC
2189
2190 return r;
2191}
dba4d153 2192#endif
20141f0e 2193
dba4d153
JH
2194#ifdef __SC__
2195#pragma segment Perl_yylex
2196#endif
dba4d153 2197int
dba4d153 2198Perl_yylex(pTHX)
20141f0e 2199{
79072805 2200 register char *s;
378cc40b 2201 register char *d;
79072805 2202 register I32 tmp;
463ee0b2 2203 STRLEN len;
161b471a
NIS
2204 GV *gv = Nullgv;
2205 GV **gvp = 0;
aa7440fb 2206 bool bof = FALSE;
1d239bbb 2207 I32 orig_keyword = 0;
a687059c 2208
02aa26ce 2209 /* check if there's an identifier for us to look at */
ba979b31 2210 if (PL_pending_ident)
e930465f 2211 return S_pending_ident(aTHX);
bbce6d69 2212
02aa26ce
NT
2213 /* no identifier pending identification */
2214
3280af22 2215 switch (PL_lex_state) {
79072805
LW
2216#ifdef COMMENTARY
2217 case LEX_NORMAL: /* Some compilers will produce faster */
2218 case LEX_INTERPNORMAL: /* code if we comment these out. */
2219 break;
2220#endif
2221
09bef843 2222 /* when we've already built the next token, just pull it out of the queue */
79072805 2223 case LEX_KNOWNEXT:
3280af22
NIS
2224 PL_nexttoke--;
2225 yylval = PL_nextval[PL_nexttoke];
2226 if (!PL_nexttoke) {
2227 PL_lex_state = PL_lex_defer;
2228 PL_expect = PL_lex_expect;
2229 PL_lex_defer = LEX_NORMAL;
463ee0b2 2230 }
607df283 2231 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2232 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2233 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2234
3280af22 2235 return(PL_nexttype[PL_nexttoke]);
79072805 2236
02aa26ce 2237 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2238 when we get here, PL_bufptr is at the \
02aa26ce 2239 */
79072805
LW
2240 case LEX_INTERPCASEMOD:
2241#ifdef DEBUGGING
3280af22 2242 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2243 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2244#endif
02aa26ce 2245 /* handle \E or end of string */
3280af22 2246 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2247 char oldmod;
02aa26ce
NT
2248
2249 /* if at a \E */
3280af22
NIS
2250 if (PL_lex_casemods) {
2251 oldmod = PL_lex_casestack[--PL_lex_casemods];
2252 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2253
3280af22
NIS
2254 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2255 PL_bufptr += 2;
2256 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2257 }
79072805
LW
2258 return ')';
2259 }
3280af22
NIS
2260 if (PL_bufptr != PL_bufend)
2261 PL_bufptr += 2;
2262 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2263 return yylex();
79072805
LW
2264 }
2265 else {
607df283 2266 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2267 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2268 s = PL_bufptr + 1;
79072805 2269 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
eb160463 2270 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
a0d0e21e 2271 if (strchr("LU", *s) &&
3280af22 2272 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2273 {
3280af22 2274 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2275 return ')';
2276 }
3280af22 2277 if (PL_lex_casemods > 10) {
8edd5f42 2278 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
a0d0e21e 2279 }
3280af22
NIS
2280 PL_lex_casestack[PL_lex_casemods++] = *s;
2281 PL_lex_casestack[PL_lex_casemods] = '\0';
2282 PL_lex_state = LEX_INTERPCONCAT;
2283 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2284 force_next('(');
2285 if (*s == 'l')
3280af22 2286 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2287 else if (*s == 'u')
3280af22 2288 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2289 else if (*s == 'L')
3280af22 2290 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2291 else if (*s == 'U')
3280af22 2292 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2293 else if (*s == 'Q')
3280af22 2294 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2295 else
cea2e8a9 2296 Perl_croak(aTHX_ "panic: yylex");
3280af22 2297 PL_bufptr = s + 1;
79072805 2298 force_next(FUNC);
3280af22
NIS
2299 if (PL_lex_starts) {
2300 s = PL_bufptr;
2301 PL_lex_starts = 0;
79072805
LW
2302 Aop(OP_CONCAT);
2303 }
2304 else
cea2e8a9 2305 return yylex();
79072805
LW
2306 }
2307
55497cff 2308 case LEX_INTERPPUSH:
2309 return sublex_push();
2310
79072805 2311 case LEX_INTERPSTART:
3280af22 2312 if (PL_bufptr == PL_bufend)
79072805 2313 return sublex_done();
607df283 2314 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2315 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2316 PL_expect = XTERM;
2317 PL_lex_dojoin = (*PL_bufptr == '@');
2318 PL_lex_state = LEX_INTERPNORMAL;
2319 if (PL_lex_dojoin) {
2320 PL_nextval[PL_nexttoke].ival = 0;
79072805 2321 force_next(',');
a0d0e21e 2322 force_ident("\"", '$');
3280af22 2323 PL_nextval[PL_nexttoke].ival = 0;
79072805 2324 force_next('$');
3280af22 2325 PL_nextval[PL_nexttoke].ival = 0;
79072805 2326 force_next('(');
3280af22 2327 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2328 force_next(FUNC);
2329 }
3280af22
NIS
2330 if (PL_lex_starts++) {
2331 s = PL_bufptr;
79072805
LW
2332 Aop(OP_CONCAT);
2333 }
cea2e8a9 2334 return yylex();
79072805
LW
2335
2336 case LEX_INTERPENDMAYBE:
3280af22
NIS
2337 if (intuit_more(PL_bufptr)) {
2338 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2339 break;
2340 }
2341 /* FALL THROUGH */
2342
2343 case LEX_INTERPEND:
3280af22
NIS
2344 if (PL_lex_dojoin) {
2345 PL_lex_dojoin = FALSE;
2346 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2347 return ')';
2348 }
43a16006 2349 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2350 && SvEVALED(PL_lex_repl))
43a16006 2351 {
e9fa98b2 2352 if (PL_bufptr != PL_bufend)
cea2e8a9 2353 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2354 PL_lex_repl = Nullsv;
2355 }
79072805
LW
2356 /* FALLTHROUGH */
2357 case LEX_INTERPCONCAT:
2358#ifdef DEBUGGING
3280af22 2359 if (PL_lex_brackets)
cea2e8a9 2360 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2361#endif
3280af22 2362 if (PL_bufptr == PL_bufend)
79072805
LW
2363 return sublex_done();
2364
3280af22
NIS
2365 if (SvIVX(PL_linestr) == '\'') {
2366 SV *sv = newSVsv(PL_linestr);
2367 if (!PL_lex_inpat)
76e3520e 2368 sv = tokeq(sv);
3280af22 2369 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2370 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2371 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2372 s = PL_bufend;
79072805
LW
2373 }
2374 else {
3280af22 2375 s = scan_const(PL_bufptr);
79072805 2376 if (*s == '\\')
3280af22 2377 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2378 else
3280af22 2379 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2380 }
2381
3280af22
NIS
2382 if (s != PL_bufptr) {
2383 PL_nextval[PL_nexttoke] = yylval;
2384 PL_expect = XTERM;
79072805 2385 force_next(THING);
3280af22 2386 if (PL_lex_starts++)
79072805
LW
2387 Aop(OP_CONCAT);
2388 else {
3280af22 2389 PL_bufptr = s;
cea2e8a9 2390 return yylex();
79072805
LW
2391 }
2392 }
2393
cea2e8a9 2394 return yylex();
a0d0e21e 2395 case LEX_FORMLINE:
3280af22
NIS
2396 PL_lex_state = LEX_NORMAL;
2397 s = scan_formline(PL_bufptr);
2398 if (!PL_lex_formbrack)
a0d0e21e
LW
2399 goto rightbracket;
2400 OPERATOR(';');
79072805
LW
2401 }
2402
3280af22
NIS
2403 s = PL_bufptr;
2404 PL_oldoldbufptr = PL_oldbufptr;
2405 PL_oldbufptr = s;
607df283 2406 DEBUG_T( {
bf49b057
GS
2407 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2408 exp_name[PL_expect], s);
5f80b19c 2409 } );
463ee0b2
LW
2410
2411 retry:
378cc40b
LW
2412 switch (*s) {
2413 default:
7e2040f0 2414 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2415 goto keylookup;
cea2e8a9 2416 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2417 case 4:
2418 case 26:
2419 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2420 case 0:
3280af22
NIS
2421 if (!PL_rsfp) {
2422 PL_last_uni = 0;
2423 PL_last_lop = 0;
2424 if (PL_lex_brackets)
d98d5fff 2425 yyerror("Missing right curly or square bracket");
4e553d73 2426 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2427 "### Tokener got EOF\n");
5f80b19c 2428 } );
79072805 2429 TOKEN(0);
463ee0b2 2430 }
3280af22 2431 if (s++ < PL_bufend)
a687059c 2432 goto retry; /* ignore stray nulls */
3280af22
NIS
2433 PL_last_uni = 0;
2434 PL_last_lop = 0;
2435 if (!PL_in_eval && !PL_preambled) {
2436 PL_preambled = TRUE;
2437 sv_setpv(PL_linestr,incl_perldb());
2438 if (SvCUR(PL_linestr))
2439 sv_catpv(PL_linestr,";");
2440 if (PL_preambleav){
2441 while(AvFILLp(PL_preambleav) >= 0) {
2442 SV *tmpsv = av_shift(PL_preambleav);
2443 sv_catsv(PL_linestr, tmpsv);
2444 sv_catpv(PL_linestr, ";");
91b7def8 2445 sv_free(tmpsv);
2446 }
3280af22
NIS
2447 sv_free((SV*)PL_preambleav);
2448 PL_preambleav = NULL;
91b7def8 2449 }
3280af22
NIS
2450 if (PL_minus_n || PL_minus_p) {
2451 sv_catpv(PL_linestr, "LINE: while (<>) {");
2452 if (PL_minus_l)
2453 sv_catpv(PL_linestr,"chomp;");
2454 if (PL_minus_a) {
3280af22
NIS
2455 if (PL_minus_F) {
2456 if (strchr("/'\"", *PL_splitstr)
2457 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2458 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2459 else {
2460 char delim;
2461 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2462 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2463 delim = *s;
75c72d73 2464 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2465 "q" + (delim == '\''), delim);
3280af22 2466 for (s = PL_splitstr; *s; s++) {
54310121 2467 if (*s == '\\')
3280af22
NIS
2468 sv_catpvn(PL_linestr, "\\", 1);
2469 sv_catpvn(PL_linestr, s, 1);
54310121 2470 }
cea2e8a9 2471 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2472 }
2304df62
AD
2473 }
2474 else
75c72d73 2475 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2476 }
79072805 2477 }
3280af22
NIS
2478 sv_catpv(PL_linestr, "\n");
2479 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2480 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2481 PL_last_lop = PL_last_uni = Nullch;
3280af22 2482 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2483 SV *sv = NEWSV(85,0);
2484
2485 sv_upgrade(sv, SVt_PVMG);
3280af22 2486 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2487 (void)SvIOK_on(sv);
2488 SvIVX(sv) = 0;
57843af0 2489 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2490 }
79072805 2491 goto retry;
a687059c 2492 }
e929a76b 2493 do {
aa7440fb 2494 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2495 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2496 fake_eof:
2497 if (PL_rsfp) {
2498 if (PL_preprocess && !PL_in_eval)
2499 (void)PerlProc_pclose(PL_rsfp);
2500 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2501 PerlIO_clearerr(PL_rsfp);
2502 else
2503 (void)PerlIO_close(PL_rsfp);
2504 PL_rsfp = Nullfp;
2505 PL_doextract = FALSE;
2506 }
2507 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2508 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2509 sv_catpv(PL_linestr,";}");
2510 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2511 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2512 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2513 PL_minus_n = PL_minus_p = 0;
2514 goto retry;
2515 }
2516 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2517 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2518 sv_setpv(PL_linestr,"");
2519 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2520 }
2521 /* if it looks like the start of a BOM, check if it in fact is */
2522 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2523#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2524# ifdef __GNU_LIBRARY__
2525# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2526# define FTELL_FOR_PIPE_IS_BROKEN
2527# endif
e3f494f1
JH
2528# else
2529# ifdef __GLIBC__
2530# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2531# define FTELL_FOR_PIPE_IS_BROKEN
2532# endif
2533# endif
226017aa
DD
2534# endif
2535#endif
2536#ifdef FTELL_FOR_PIPE_IS_BROKEN
2537 /* This loses the possibility to detect the bof
2538 * situation on perl -P when the libc5 is being used.
2539 * Workaround? Maybe attach some extra state to PL_rsfp?
2540 */
2541 if (!PL_preprocess)
7e28d3af 2542 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2543#else
eb160463 2544 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2545#endif
7e28d3af 2546 if (bof) {
3280af22 2547 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2548 s = swallow_bom((U8*)s);
e929a76b 2549 }
378cc40b 2550 }
3280af22 2551 if (PL_doextract) {
a0d0e21e
LW
2552 /* Incest with pod. */
2553 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2554 sv_setpv(PL_linestr, "");
2555 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2556 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2557 PL_last_lop = PL_last_uni = Nullch;
3280af22 2558 PL_doextract = FALSE;
a0d0e21e 2559 }
4e553d73 2560 }
463ee0b2 2561 incline(s);
3280af22
NIS
2562 } while (PL_doextract);
2563 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2564 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2565 SV *sv = NEWSV(85,0);
a687059c 2566
93a17b20 2567 sv_upgrade(sv, SVt_PVMG);
3280af22 2568 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2569 (void)SvIOK_on(sv);
2570 SvIVX(sv) = 0;
57843af0 2571 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2572 }
3280af22 2573 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2574 PL_last_lop = PL_last_uni = Nullch;
57843af0 2575 if (CopLINE(PL_curcop) == 1) {
3280af22 2576 while (s < PL_bufend && isSPACE(*s))
79072805 2577 s++;
a0d0e21e 2578 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2579 s++;
44a8e56a 2580 d = Nullch;
3280af22 2581 if (!PL_in_eval) {
44a8e56a 2582 if (*s == '#' && *(s+1) == '!')
2583 d = s + 2;
2584#ifdef ALTERNATE_SHEBANG
2585 else {
2586 static char as[] = ALTERNATE_SHEBANG;
2587 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2588 d = s + (sizeof(as) - 1);
2589 }
2590#endif /* ALTERNATE_SHEBANG */
2591 }
2592 if (d) {
b8378b72 2593 char *ipath;
774d564b 2594 char *ipathend;
b8378b72 2595
774d564b 2596 while (isSPACE(*d))
b8378b72
CS
2597 d++;
2598 ipath = d;
774d564b 2599 while (*d && !isSPACE(*d))
2600 d++;
2601 ipathend = d;
2602
2603#ifdef ARG_ZERO_IS_SCRIPT
2604 if (ipathend > ipath) {
2605 /*
2606 * HP-UX (at least) sets argv[0] to the script name,
2607 * which makes $^X incorrect. And Digital UNIX and Linux,
2608 * at least, set argv[0] to the basename of the Perl
2609 * interpreter. So, having found "#!", we'll set it right.
2610 */
ee2f7564 2611 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2612 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2613 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2614 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2615 SvSETMAGIC(x);
2616 }
556c1dec
JH
2617 else {
2618 STRLEN blen;
2619 STRLEN llen;
2620 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2621 char *lstart = SvPV(x,llen);
2622 if (llen < blen) {
2623 bstart += blen - llen;
2624 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2625 sv_setpvn(x, ipath, ipathend - ipath);
2626 SvSETMAGIC(x);
2627 }
2628 }
2629 }
774d564b 2630 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2631 }
774d564b 2632#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2633
2634 /*
2635 * Look for options.
2636 */
748a9306 2637 d = instr(s,"perl -");
84e30d1a 2638 if (!d) {
748a9306 2639 d = instr(s,"perl");
84e30d1a
GS
2640#if defined(DOSISH)
2641 /* avoid getting into infinite loops when shebang
2642 * line contains "Perl" rather than "perl" */
2643 if (!d) {
2644 for (d = ipathend-4; d >= ipath; --d) {
2645 if ((*d == 'p' || *d == 'P')
2646 && !ibcmp(d, "perl", 4))
2647 {
2648 break;
2649 }
2650 }
2651 if (d < ipath)
2652 d = Nullch;
2653 }
2654#endif
2655 }
44a8e56a 2656#ifdef ALTERNATE_SHEBANG
2657 /*
2658 * If the ALTERNATE_SHEBANG on this system starts with a
2659 * character that can be part of a Perl expression, then if
2660 * we see it but not "perl", we're probably looking at the
2661 * start of Perl code, not a request to hand off to some
2662 * other interpreter. Similarly, if "perl" is there, but
2663 * not in the first 'word' of the line, we assume the line
2664 * contains the start of the Perl program.
44a8e56a 2665 */
2666 if (d && *s != '#') {
774d564b 2667 char *c = ipath;
44a8e56a 2668 while (*c && !strchr("; \t\r\n\f\v#", *c))
2669 c++;
2670 if (c < d)
2671 d = Nullch; /* "perl" not in first word; ignore */
2672 else
2673 *s = '#'; /* Don't try to parse shebang line */
2674 }
774d564b 2675#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2676#ifndef MACOS_TRADITIONAL
748a9306 2677 if (!d &&
44a8e56a 2678 *s == '#' &&
774d564b 2679 ipathend > ipath &&
3280af22 2680 !PL_minus_c &&
748a9306 2681 !instr(s,"indir") &&
3280af22 2682 instr(PL_origargv[0],"perl"))
748a9306 2683 {
9f68db38 2684 char **newargv;
9f68db38 2685
774d564b 2686 *ipathend = '\0';
2687 s = ipathend + 1;
3280af22 2688 while (s < PL_bufend && isSPACE(*s))
9f68db38 2689 s++;
3280af22
NIS
2690 if (s < PL_bufend) {
2691 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2692 newargv[1] = s;
3280af22 2693 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2694 s++;
2695 *s = '\0';
3280af22 2696 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2697 }
2698 else
3280af22 2699 newargv = PL_origargv;
774d564b 2700 newargv[0] = ipath;
b4748376 2701 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2702 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2703 }
bf4acbe4 2704#endif
748a9306 2705 if (d) {
3280af22
NIS
2706 U32 oldpdb = PL_perldb;
2707 bool oldn = PL_minus_n;
2708 bool oldp = PL_minus_p;
748a9306
LW
2709
2710 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2711 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2712
2713 if (*d++ == '-') {
a11ec5a9 2714 bool switches_done = PL_doswitches;
8cc95fdb 2715 do {
2716 if (*d == 'M' || *d == 'm') {
2717 char *m = d;
2718 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2719 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2720 (int)(d - m), m);
2721 }
2722 d = moreswitches(d);
2723 } while (d);
155aba94
GS
2724 if ((PERLDB_LINE && !oldpdb) ||
2725 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2726 /* if we have already added "LINE: while (<>) {",
2727 we must not do it again */
748a9306 2728 {
3280af22
NIS
2729 sv_setpv(PL_linestr, "");
2730 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2732 PL_last_lop = PL_last_uni = Nullch;
3280af22 2733 PL_preambled = FALSE;
84902520 2734 if (PERLDB_LINE)
3280af22 2735 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2736 goto retry;
2737 }
a11ec5a9
RGS
2738 if (PL_doswitches && !switches_done) {
2739 int argc = PL_origargc;
2740 char **argv = PL_origargv;
2741 do {
2742 argc--,argv++;
2743 } while (argc && argv[0][0] == '-' && argv[0][1]);
2744 init_argv_symbols(argc,argv);
2745 }
a0d0e21e 2746 }
79072805 2747 }
9f68db38 2748 }
79072805 2749 }
3280af22
NIS
2750 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2751 PL_bufptr = s;
2752 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2753 return yylex();
ae986130 2754 }
378cc40b 2755 goto retry;
4fdae800 2756 case '\r':
6a27c188 2757#ifdef PERL_STRICT_CR
cea2e8a9 2758 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2759 Perl_croak(aTHX_
cc507455 2760 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2761#endif
4fdae800 2762 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2763#ifdef MACOS_TRADITIONAL
2764 case '\312':
2765#endif
378cc40b
LW
2766 s++;
2767 goto retry;
378cc40b 2768 case '#':
e929a76b 2769 case '\n':
3280af22 2770 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2771 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2772 /* handle eval qq[#line 1 "foo"\n ...] */
2773 CopLINE_dec(PL_curcop);
2774 incline(s);
2775 }
3280af22 2776 d = PL_bufend;
a687059c 2777 while (s < d && *s != '\n')
378cc40b 2778 s++;
0f85fab0 2779 if (s < d)
378cc40b 2780 s++;
78c267c1 2781 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2782 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2783 incline(s);
3280af22
NIS
2784 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2785 PL_bufptr = s;
2786 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2787 return yylex();
a687059c 2788 }
378cc40b 2789 }
a687059c 2790 else {
378cc40b 2791 *s = '\0';
3280af22 2792 PL_bufend = s;
a687059c 2793 }
378cc40b
LW
2794 goto retry;
2795 case '-':
79072805 2796 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2797 I32 ftst = 0;
2798
378cc40b 2799 s++;
3280af22 2800 PL_bufptr = s;
748a9306
LW
2801 tmp = *s++;
2802
bf4acbe4 2803 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2804 s++;
2805
2806 if (strnEQ(s,"=>",2)) {
3280af22 2807 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2808 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2809 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2810 } );
748a9306
LW
2811 OPERATOR('-'); /* unary minus */
2812 }
3280af22 2813 PL_last_uni = PL_oldbufptr;
748a9306 2814 switch (tmp) {
e5edeb50
JH
2815 case 'r': ftst = OP_FTEREAD; break;
2816 case 'w': ftst = OP_FTEWRITE; break;
2817 case 'x': ftst = OP_FTEEXEC; break;
2818 case 'o': ftst = OP_FTEOWNED; break;
2819 case 'R': ftst = OP_FTRREAD; break;
2820 case 'W': ftst = OP_FTRWRITE; break;
2821 case 'X': ftst = OP_FTREXEC; break;
2822 case 'O': ftst = OP_FTROWNED; break;
2823 case 'e': ftst = OP_FTIS; break;
2824 case 'z': ftst = OP_FTZERO; break;
2825 case 's': ftst = OP_FTSIZE; break;
2826 case 'f': ftst = OP_FTFILE; break;
2827 case 'd': ftst = OP_FTDIR; break;
2828 case 'l': ftst = OP_FTLINK; break;
2829 case 'p': ftst = OP_FTPIPE; break;
2830 case 'S': ftst = OP_FTSOCK; break;
2831 case 'u': ftst = OP_FTSUID; break;
2832 case 'g': ftst = OP_FTSGID; break;
2833 case 'k': ftst = OP_FTSVTX; break;
2834 case 'b': ftst = OP_FTBLK; break;
2835 case 'c': ftst = OP_FTCHR; break;
2836 case 't': ftst = OP_FTTTY; break;
2837 case 'T': ftst = OP_FTTEXT; break;
2838 case 'B': ftst = OP_FTBINARY; break;
2839 case 'M': case 'A': case 'C':
2840 gv_fetchpv("\024",TRUE, SVt_PV);
2841 switch (tmp) {
2842 case 'M': ftst = OP_FTMTIME; break;
2843 case 'A': ftst = OP_FTATIME; break;
2844 case 'C': ftst = OP_FTCTIME; break;
2845 default: break;
2846 }
2847 break;
378cc40b 2848 default:
378cc40b
LW
2849 break;
2850 }
e5edeb50 2851 if (ftst) {
eb160463 2852 PL_last_lop_op = (OPCODE)ftst;
4e553d73 2853 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2854 "### Saw file test %c\n", (int)ftst);
5f80b19c 2855 } );
e5edeb50
JH
2856 FTST(ftst);
2857 }
2858 else {
2859 /* Assume it was a minus followed by a one-letter named
2860 * subroutine call (or a -bareword), then. */
95c31fe3 2861 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2862 "### %c looked like a file test but was not\n",
2863 (int)ftst);
5f80b19c 2864 } );
e5edeb50
JH
2865 s -= 2;
2866 }
378cc40b 2867 }
a687059c
LW
2868 tmp = *s++;
2869 if (*s == tmp) {
2870 s++;
3280af22 2871 if (PL_expect == XOPERATOR)
79072805
LW
2872 TERM(POSTDEC);
2873 else
2874 OPERATOR(PREDEC);
2875 }
2876 else if (*s == '>') {
2877 s++;
2878 s = skipspace(s);
7e2040f0 2879 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2880 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2881 TOKEN(ARROW);
79072805 2882 }
748a9306
LW
2883 else if (*s == '$')
2884 OPERATOR(ARROW);
463ee0b2 2885 else
748a9306 2886 TERM(ARROW);
a687059c 2887 }
3280af22 2888 if (PL_expect == XOPERATOR)
79072805
LW
2889 Aop(OP_SUBTRACT);
2890 else {
3280af22 2891 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2892 check_uni();
79072805 2893 OPERATOR('-'); /* unary minus */
2f3197b3 2894 }
79072805 2895
378cc40b 2896 case '+':
a687059c
LW
2897 tmp = *s++;
2898 if (*s == tmp) {
378cc40b 2899 s++;
3280af22 2900 if (PL_expect == XOPERATOR)
79072805
LW
2901 TERM(POSTINC);
2902 else
2903 OPERATOR(PREINC);
378cc40b 2904 }
3280af22 2905 if (PL_expect == XOPERATOR)
79072805
LW
2906 Aop(OP_ADD);
2907 else {
3280af22 2908 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2909 check_uni();
a687059c 2910 OPERATOR('+');
2f3197b3 2911 }
a687059c 2912
378cc40b 2913 case '*':
3280af22
NIS
2914 if (PL_expect != XOPERATOR) {
2915 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2916 PL_expect = XOPERATOR;
2917 force_ident(PL_tokenbuf, '*');
2918 if (!*PL_tokenbuf)
a0d0e21e 2919 PREREF('*');
79072805 2920 TERM('*');
a687059c 2921 }
79072805
LW
2922 s++;
2923 if (*s == '*') {
a687059c 2924 s++;
79072805 2925 PWop(OP_POW);
a687059c 2926 }
79072805
LW
2927 Mop(OP_MULTIPLY);
2928
378cc40b 2929 case '%':
3280af22 2930 if (PL_expect == XOPERATOR) {
bbce6d69 2931 ++s;
2932 Mop(OP_MODULO);
a687059c 2933 }
3280af22
NIS
2934 PL_tokenbuf[0] = '%';
2935 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2936 if (!PL_tokenbuf[1]) {
2937 if (s == PL_bufend)
bbce6d69 2938 yyerror("Final % should be \\% or %name");
2939 PREREF('%');
a687059c 2940 }
3280af22 2941 PL_pending_ident = '%';
bbce6d69 2942 TERM('%');
a687059c 2943
378cc40b 2944 case '^':
79072805 2945 s++;
a0d0e21e 2946 BOop(OP_BIT_XOR);
79072805 2947 case '[':
3280af22 2948 PL_lex_brackets++;
79072805 2949 /* FALL THROUGH */
378cc40b 2950 case '~':
378cc40b 2951 case ',':
378cc40b
LW
2952 tmp = *s++;
2953 OPERATOR(tmp);
a0d0e21e
LW
2954 case ':':
2955 if (s[1] == ':') {
2956 len = 0;
2957 goto just_a_word;
2958 }
2959 s++;
09bef843
SB
2960 switch (PL_expect) {
2961 OP *attrs;
2962 case XOPERATOR:
2963 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2964 break;
2965 PL_bufptr = s; /* update in case we back off */
2966 goto grabattrs;
2967 case XATTRBLOCK:
2968 PL_expect = XBLOCK;
2969 goto grabattrs;
2970 case XATTRTERM:
2971 PL_expect = XTERMBLOCK;
2972 grabattrs:
2973 s = skipspace(s);
2974 attrs = Nullop;
7e2040f0 2975 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2976 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2977 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2978 if (tmp < 0) tmp = -tmp;
2979 switch (tmp) {
2980 case KEY_or:
2981 case KEY_and:
c963b151 2982 case KEY_err:
f9829d6b
GS
2983 case KEY_for:
2984 case KEY_unless:
2985 case KEY_if:
2986 case KEY_while:
2987 case KEY_until:
2988 goto got_attrs;
2989 default:
2990 break;
2991 }
2992 }
09bef843
SB
2993 if (*d == '(') {
2994 d = scan_str(d,TRUE,TRUE);
2995 if (!d) {
09bef843
SB
2996 /* MUST advance bufptr here to avoid bogus
2997 "at end of line" context messages from yyerror().
2998 */
2999 PL_bufptr = s + len;
3000 yyerror("Unterminated attribute parameter in attribute list");
3001 if (attrs)
3002 op_free(attrs);
3003 return 0; /* EOF indicator */
3004 }
3005 }
3006 if (PL_lex_stuff) {
3007 SV *sv = newSVpvn(s, len);
3008 sv_catsv(sv, PL_lex_stuff);
3009 attrs = append_elem(OP_LIST, attrs,
3010 newSVOP(OP_CONST, 0, sv));
3011 SvREFCNT_dec(PL_lex_stuff);
3012 PL_lex_stuff = Nullsv;
3013 }
3014 else {
d3cea301
SB
3015 /* NOTE: any CV attrs applied here need to be part of
3016 the CVf_BUILTIN_ATTRS define in cv.h! */
78f9721b
SM
3017 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3018 CvLVALUE_on(PL_compcv);
3019 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3020 CvLOCKED_on(PL_compcv);
3021 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3022 CvMETHOD_on(PL_compcv);
87ecf892 3023#ifdef USE_ITHREADS
d3cea301
SB
3024 else if (PL_in_my == KEY_our && len == 6 &&
3025 strnEQ(s, "unique", len))
7fb37951 3026 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
87ecf892 3027#endif
78f9721b
SM
3028 /* After we've set the flags, it could be argued that
3029 we don't need to do the attributes.pm-based setting
3030 process, and shouldn't bother appending recognized
d3cea301
SB
3031 flags. To experiment with that, uncomment the
3032 following "else". (Note that's already been
3033 uncommented. That keeps the above-applied built-in
3034 attributes from being intercepted (and possibly
3035 rejected) by a package's attribute routines, but is
3036 justified by the performance win for the common case
3037 of applying only built-in attributes.) */
0256094b 3038 else
78f9721b
SM
3039 attrs = append_elem(OP_LIST, attrs,
3040 newSVOP(OP_CONST, 0,
3041 newSVpvn(s, len)));
09bef843
SB
3042 }
3043 s = skipspace(d);
0120eecf 3044 if (*s == ':' && s[1] != ':')
09bef843 3045 s = skipspace(s+1);
0120eecf
GS
3046 else if (s == d)
3047 break; /* require real whitespace or :'s */
09bef843 3048 }
f9829d6b 3049 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3050 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3051 char q = ((*s == '\'') ? '"' : '\'');
3052 /* If here for an expression, and parsed no attrs, back off. */
3053 if (tmp == '=' && !attrs) {
3054 s = PL_bufptr;
3055 break;
3056 }
3057 /* MUST advance bufptr here to avoid bogus "at end of line"
3058 context messages from yyerror().
3059 */
3060 PL_bufptr = s;
3061 if (!*s)
3062 yyerror("Unterminated attribute list");
3063 else
3064 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3065 q, *s, q));
3066 if (attrs)
3067 op_free(attrs);
3068 OPERATOR(':');
3069 }
f9829d6b 3070 got_attrs:
09bef843
SB
3071 if (attrs) {
3072 PL_nextval[PL_nexttoke].opval = attrs;
3073 force_next(THING);
3074 }
3075 TOKEN(COLONATTR);
3076 }
a0d0e21e 3077 OPERATOR(':');
8990e307
LW
3078 case '(':
3079 s++;
3280af22
NIS
3080 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3081 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3082 else
3280af22 3083 PL_expect = XTERM;
a0d0e21e 3084 TOKEN('(');
378cc40b 3085 case ';':
f4dd75d9 3086 CLINE;
378cc40b
LW
3087 tmp = *s++;
3088 OPERATOR(tmp);
3089 case ')':
378cc40b 3090 tmp = *s++;
16d20bd9
AD
3091 s = skipspace(s);
3092 if (*s == '{')
3093 PREBLOCK(tmp);
378cc40b 3094 TERM(tmp);
79072805
LW
3095 case ']':
3096 s++;
3280af22 3097 if (PL_lex_brackets <= 0)
d98d5fff 3098 yyerror("Unmatched right square bracket");
463ee0b2 3099 else
3280af22
NIS
3100 --PL_lex_brackets;
3101 if (PL_lex_state == LEX_INTERPNORMAL) {
3102 if (PL_lex_brackets == 0) {
a0d0e21e 3103 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3104 PL_lex_state = LEX_INTERPEND;
79072805
LW
3105 }
3106 }
4633a7c4 3107 TERM(']');
79072805
LW
3108 case '{':
3109 leftbracket:
79072805 3110 s++;
3280af22 3111 if (PL_lex_brackets > 100) {
8edd5f42 3112 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3113 }
3280af22 3114 switch (PL_expect) {
a0d0e21e 3115 case XTERM:
3280af22 3116 if (PL_lex_formbrack) {
a0d0e21e
LW
3117 s--;
3118 PRETERMBLOCK(DO);
3119 }
3280af22
NIS
3120 if (PL_oldoldbufptr == PL_last_lop)
3121 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3122 else
3280af22 3123 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3124 OPERATOR(HASHBRACK);
a0d0e21e 3125 case XOPERATOR:
bf4acbe4 3126 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3127 s++;
44a8e56a 3128 d = s;
3280af22
NIS
3129 PL_tokenbuf[0] = '\0';
3130 if (d < PL_bufend && *d == '-') {
3131 PL_tokenbuf[0] = '-';
44a8e56a 3132 d++;
bf4acbe4 3133 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3134 d++;
3135 }
7e2040f0 3136 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3137 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3138 FALSE, &len);
bf4acbe4 3139 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3140 d++;
3141 if (*d == '}') {
3280af22 3142 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3143 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3144 if (minus)
3145 force_next('-');
748a9306
LW
3146 }
3147 }
3148 /* FALL THROUGH */
09bef843 3149 case XATTRBLOCK:
748a9306 3150 case XBLOCK:
3280af22
NIS
3151 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3152 PL_expect = XSTATE;
a0d0e21e 3153 break;
09bef843 3154 case XATTRTERM:
a0d0e21e 3155 case XTERMBLOCK:
3280af22
NIS
3156 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3157 PL_expect = XSTATE;
a0d0e21e
LW
3158 break;
3159 default: {
3160 char *t;
3280af22
NIS
3161 if (PL_oldoldbufptr == PL_last_lop)
3162 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3163 else
3280af22 3164 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3165 s = skipspace(s);
8452ff4b
SB
3166 if (*s == '}') {
3167 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3168 PL_expect = XTERM;
3169 /* This hack is to get the ${} in the message. */
3170 PL_bufptr = s+1;
3171 yyerror("syntax error");
3172 break;
3173 }
a0d0e21e 3174 OPERATOR(HASHBRACK);
8452ff4b 3175 }
b8a4b1be
GS
3176 /* This hack serves to disambiguate a pair of curlies
3177 * as being a block or an anon hash. Normally, expectation
3178 * determines that, but in cases where we're not in a
3179 * position to expect anything in particular (like inside
3180 * eval"") we have to resolve the ambiguity. This code
3181 * covers the case where the first term in the curlies is a
3182 * quoted string. Most other cases need to be explicitly
3183 * disambiguated by prepending a `+' before the opening
3184 * curly in order to force resolution as an anon hash.
3185 *
3186 * XXX should probably propagate the outer expectation
3187 * into eval"" to rely less on this hack, but that could
3188 * potentially break current behavior of eval"".
3189 * GSAR 97-07-21
3190 */
3191 t = s;
3192 if (*s == '\'' || *s == '"' || *s == '`') {
3193 /* common case: get past first string, handling escapes */
3280af22 3194 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3195 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3196 t++;
3197 t++;
a0d0e21e 3198 }
b8a4b1be 3199 else if (*s == 'q') {
3280af22 3200 if (++t < PL_bufend
b8a4b1be 3201 && (!isALNUM(*t)
3280af22 3202 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3203 && !isALNUM(*t))))
3204 {
b8a4b1be
GS
3205 char *tmps;
3206 char open, close, term;
3207 I32 brackets = 1;
3208
3280af22 3209 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3210 t++;
3211 term = *t;
3212 open = term;
3213 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3214 term = tmps[5];
3215 close = term;
3216 if (open == close)
3280af22
NIS
3217 for (t++; t < PL_bufend; t++) {
3218 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3219 t++;
6d07e5e9 3220 else if (*t == open)
b8a4b1be
GS
3221 break;
3222 }
3223 else
3280af22
NIS
3224 for (t++; t < PL_bufend; t++) {
3225 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3226 t++;
6d07e5e9 3227 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3228 break;
3229 else if (*t == open)
3230 brackets++;
3231 }
3232 }
3233 t++;
a0d0e21e 3234 }
7e2040f0 3235 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3236 t += UTF8SKIP(t);
7e2040f0 3237 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3238 t += UTF8SKIP(t);
a0d0e21e 3239 }
3280af22 3240 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3241 t++;
b8a4b1be
GS
3242 /* if comma follows first term, call it an anon hash */
3243 /* XXX it could be a comma expression with loop modifiers */
3280af22 3244 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3245 || (*t == '=' && t[1] == '>')))
a0d0e21e 3246 OPERATOR(HASHBRACK);
3280af22 3247 if (PL_expect == XREF)
4e4e412b 3248 PL_expect = XTERM;
a0d0e21e 3249 else {
3280af22
NIS
3250 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3251 PL_expect = XSTATE;
a0d0e21e 3252 }
8990e307 3253 }
a0d0e21e 3254 break;
463ee0b2 3255 }
57843af0 3256 yylval.ival = CopLINE(PL_curcop);
79072805 3257 if (isSPACE(*s) || *s == '#')
3280af22 3258 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3259 TOKEN('{');
378cc40b 3260 case '}':
79072805
LW
3261 rightbracket:
3262 s++;
3280af22 3263 if (PL_lex_brackets <= 0)
d98d5fff 3264 yyerror("Unmatched right curly bracket");
463ee0b2 3265 else
3280af22 3266 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3267 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3268 PL_lex_formbrack = 0;
3269 if (PL_lex_state == LEX_INTERPNORMAL) {
3270 if (PL_lex_brackets == 0) {
9059aa12
LW
3271 if (PL_expect & XFAKEBRACK) {
3272 PL_expect &= XENUMMASK;
3280af22
NIS
3273 PL_lex_state = LEX_INTERPEND;
3274 PL_bufptr = s;
cea2e8a9 3275 return yylex(); /* ignore fake brackets */
79072805 3276 }
fa83b5b6 3277 if (*s == '-' && s[1] == '>')
3280af22 3278 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3279 else if (*s != '[' && *s != '{')
3280af22 3280 PL_lex_state = LEX_INTERPEND;
79072805
LW
3281 }
3282 }
9059aa12
LW
3283 if (PL_expect & XFAKEBRACK) {
3284 PL_expect &= XENUMMASK;
3280af22 3285 PL_bufptr = s;
cea2e8a9 3286 return yylex(); /* ignore fake brackets */
748a9306 3287 }
79072805
LW
3288 force_next('}');
3289 TOKEN(';');
378cc40b
LW
3290 case '&':
3291 s++;
3292 tmp = *s++;
3293 if (tmp == '&')
a0d0e21e 3294 AOPERATOR(ANDAND);
378cc40b 3295 s--;
3280af22 3296 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3297 if (ckWARN(WARN_SEMICOLON)
3298 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3299 {
57843af0 3300 CopLINE_dec(PL_curcop);
9014280d 3301 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3302 CopLINE_inc(PL_curcop);
463ee0b2 3303 }
79072805 3304 BAop(OP_BIT_AND);
463ee0b2 3305 }
79072805 3306
3280af22
NIS
3307 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3308 if (*PL_tokenbuf) {
3309 PL_expect = XOPERATOR;
3310 force_ident(PL_tokenbuf, '&');
463ee0b2 3311 }
79072805
LW
3312 else
3313 PREREF('&');
c07a80fd 3314 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3315 TERM('&');
3316
378cc40b
LW
3317 case '|':
3318 s++;
3319 tmp = *s++;
3320 if (tmp == '|')
a0d0e21e 3321 AOPERATOR(OROR);
378cc40b 3322 s--;
79072805 3323 BOop(OP_BIT_OR);
378cc40b
LW
3324 case '=':
3325 s++;
3326 tmp = *s++;
3327 if (tmp == '=')
79072805
LW
3328 Eop(OP_EQ);
3329 if (tmp == '>')
3330 OPERATOR(',');
378cc40b 3331 if (tmp == '~')
79072805 3332 PMop(OP_MATCH);
599cee73 3333 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3334 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3335 s--;
3280af22
NIS
3336 if (PL_expect == XSTATE && isALPHA(tmp) &&
3337 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3338 {
3280af22
NIS
3339 if (PL_in_eval && !PL_rsfp) {
3340 d = PL_bufend;
a5f75d66
AD
3341 while (s < d) {
3342 if (*s++ == '\n') {
3343 incline(s);
3344 if (strnEQ(s,"=cut",4)) {
3345 s = strchr(s,'\n');
3346 if (s)
3347 s++;
3348 else
3349 s = d;
3350 incline(s);
3351 goto retry;
3352 }
3353 }
3354 }
3355 goto retry;
3356 }
3280af22
NIS
3357 s = PL_bufend;
3358 PL_doextract = TRUE;
a0d0e21e
LW
3359 goto retry;
3360 }
3280af22 3361 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3362 char *t;
51882d45 3363#ifdef PERL_STRICT_CR
bf4acbe4 3364 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3365#else
bf4acbe4 3366 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3367#endif
a0d0e21e
LW
3368 if (*t == '\n' || *t == '#') {
3369 s--;
3280af22 3370 PL_expect = XBLOCK;
a0d0e21e
LW
3371 goto leftbracket;
3372 }
79072805 3373 }
a0d0e21e
LW
3374 yylval.ival = 0;
3375 OPERATOR(ASSIGNOP);
378cc40b
LW
3376 case '!':
3377 s++;
3378 tmp = *s++;
3379 if (tmp == '=')
79072805 3380 Eop(OP_NE);
378cc40b 3381 if (tmp == '~')
79072805 3382 PMop(OP_NOT);
378cc40b
LW
3383 s--;
3384 OPERATOR('!');
3385 case '<':
3280af22 3386 if (PL_expect != XOPERATOR) {
93a17b20 3387 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3388 check_uni();
79072805
LW
3389 if (s[1] == '<')
3390 s = scan_heredoc(s);
3391 else
3392 s = scan_inputsymbol(s);
3393 TERM(sublex_start());
378cc40b
LW
3394 }
3395 s++;
3396 tmp = *s++;
3397 if (tmp == '<')
79072805 3398 SHop(OP_LEFT_SHIFT);
395c3793
LW
3399 if (tmp == '=') {
3400 tmp = *s++;
3401 if (tmp == '>')
79072805 3402 Eop(OP_NCMP);
395c3793 3403 s--;
79072805 3404 Rop(OP_LE);
395c3793 3405 }
378cc40b 3406 s--;
79072805 3407 Rop(OP_LT);
378cc40b
LW
3408 case '>':
3409 s++;
3410 tmp = *s++;
3411 if (tmp == '>')
79072805 3412 SHop(OP_RIGHT_SHIFT);
378cc40b 3413 if (tmp == '=')
79072805 3414 Rop(OP_GE);
378cc40b 3415 s--;
79072805 3416 Rop(OP_GT);
378cc40b
LW
3417
3418 case '$':
bbce6d69 3419 CLINE;
3420
3280af22
NIS
3421 if (PL_expect == XOPERATOR) {
3422 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3423 PL_expect = XTERM;
a0d0e21e 3424 depcom();
bbce6d69 3425 return ','; /* grandfather non-comma-format format */
a0d0e21e 3426 }
8990e307 3427 }
a0d0e21e 3428
7e2040f0 3429 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3430 PL_tokenbuf[0] = '@';
376b8730
SM
3431 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3432 sizeof PL_tokenbuf - 1, FALSE);
3433 if (PL_expect == XOPERATOR)
3434 no_op("Array length", s);
3280af22 3435 if (!PL_tokenbuf[1])
a0d0e21e 3436 PREREF(DOLSHARP);
3280af22
NIS
3437 PL_expect = XOPERATOR;
3438 PL_pending_ident = '#';
463ee0b2 3439 TOKEN(DOLSHARP);
79072805 3440 }
bbce6d69 3441
3280af22 3442 PL_tokenbuf[0] = '$';
376b8730
SM
3443 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3444 sizeof PL_tokenbuf - 1, FALSE);
3445 if (PL_expect == XOPERATOR)
3446 no_op("Scalar", s);
3280af22
NIS
3447 if (!PL_tokenbuf[1]) {
3448 if (s == PL_bufend)
bbce6d69 3449 yyerror("Final $ should be \\$ or $name");
3450 PREREF('$');
8990e307 3451 }
a0d0e21e 3452
bbce6d69 3453 /* This kludge not intended to be bulletproof. */
3280af22 3454 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3455 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3456 newSViv(PL_compiling.cop_arybase));
bbce6d69 3457 yylval.opval->op_private = OPpCONST_ARYBASE;
3458 TERM(THING);
3459 }
3460
ff68c719 3461 d = s;
69d2bceb 3462 tmp = (I32)*s;
3280af22 3463 if (PL_lex_state == LEX_NORMAL)
ff68c719 3464 s = skipspace(s);
3465
3280af22 3466 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3467 char *t;
3468 if (*s == '[') {
3280af22 3469 PL_tokenbuf[0] = '@';
599cee73 3470 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3471 for(t = s + 1;
7e2040f0 3472 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3473 t++) ;
a0d0e21e 3474 if (*t++ == ',') {
3280af22
NIS
3475 PL_bufptr = skipspace(PL_bufptr);
3476 while (t < PL_bufend && *t != ']')
bbce6d69 3477 t++;
9014280d 3478 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73
PM
3479 "Multidimensional syntax %.*s not supported",
3480 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3481 }
3482 }
bbce6d69 3483 }
3484 else if (*s == '{') {
3280af22 3485 PL_tokenbuf[0] = '%';
599cee73 3486 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3487 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3488 {
3280af22 3489 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3490 STRLEN len;
3491 for (t++; isSPACE(*t); t++) ;
7e2040f0 3492 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3493 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3494 for (; isSPACE(*t); t++) ;
864dbfa3 3495 if (*t == ';' && get_cv(tmpbuf, FALSE))
9014280d 3496 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 3497 "You need to quote \"%s\"", tmpbuf);
748a9306 3498 }
93a17b20
LW
3499 }
3500 }
2f3197b3 3501 }
bbce6d69 3502
3280af22 3503 PL_expect = XOPERATOR;
69d2bceb 3504 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3505 bool islop = (PL_last_lop == PL_oldoldbufptr);
3506 if (!islop || PL_last_lop_op == OP_GREPSTART)
3507 PL_expect = XOPERATOR;
bbce6d69 3508 else if (strchr("$@\"'`q", *s))
3280af22 3509 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3510 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3511 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3512 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3513 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3514 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3515 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3516 /* binary operators exclude handle interpretations */
3517 switch (tmp) {
3518 case -KEY_x:
3519 case -KEY_eq:
3520 case -KEY_ne:
3521 case -KEY_gt:
3522 case -KEY_lt:
3523 case -KEY_ge:
3524 case -KEY_le:
3525 case -KEY_cmp:
3526 break;
3527 default:
3280af22 3528 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3529 break;
3530 }
3531 }
68dc0745 3532 else {
3533 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3534 if (gv && GvCVu(gv))
3280af22 3535 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3536 }
93a17b20 3537 }
bbce6d69 3538 else if (isDIGIT(*s))
3280af22 3539 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3540 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3541 PL_expect = XTERM; /* e.g. print $fh .3 */
c963b151
BD
3542 else if (strchr("?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3543 PL_expect = XTERM; /* e.g. print $fh -1 */
7ce6e6b9
RGS
3544 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
3545 PL_expect = XTERM; /* e.g. print $fh /.../
3546 XXX except DORDOR operator */
e0587a03 3547 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3548 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3549 }
3280af22 3550 PL_pending_ident = '$';
79072805 3551 TOKEN('$');
378cc40b
LW
3552
3553 case '@':
3280af22 3554 if (PL_expect == XOPERATOR)
bbce6d69 3555 no_op("Array", s);
3280af22
NIS
3556 PL_tokenbuf[0] = '@';
3557 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3558 if (!PL_tokenbuf[1]) {
3559 if (s == PL_bufend)
bbce6d69 3560 yyerror("Final @ should be \\@ or @name");
3561 PREREF('@');
3562 }
3280af22 3563 if (PL_lex_state == LEX_NORMAL)
ff68c719 3564 s = skipspace(s);
3280af22 3565 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3566 if (*s == '{')
3280af22 3567 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3568
3569 /* Warn about @ where they meant $. */
599cee73 3570 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3571 if (*s == '[' || *s == '{') {
3572 char *t = s + 1;
7e2040f0 3573 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3574 t++;
3575 if (*t == '}' || *t == ']') {
3576 t++;
3280af22 3577 PL_bufptr = skipspace(PL_bufptr);
9014280d 3578 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 3579 "Scalar value %.*s better written as $%.*s",
3280af22 3580 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3581 }
93a17b20
LW
3582 }
3583 }
463ee0b2 3584 }
3280af22 3585 PL_pending_ident = '@';
79072805 3586 TERM('@');
378cc40b 3587
c963b151 3588 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
3589 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3590 s += 2;
3591 AOPERATOR(DORDOR);
3592 }
c963b151
BD
3593 case '?': /* may either be conditional or pattern */
3594 if(PL_expect == XOPERATOR) {
3595 tmp = *s++;
3596 if(tmp == '?') {
3597 OPERATOR('?');
3598 }
3599 else {
3600 tmp = *s++;
3601 if(tmp == '/') {
3602 /* A // operator. */
3603 AOPERATOR(DORDOR);
3604 }
3605 else {
3606 s--;
3607 Mop(OP_DIVIDE);
3608 }
3609 }
3610 }
3611 else {
3612 /* Disable warning on "study /blah/" */
3613 if (PL_oldoldbufptr == PL_last_uni
3614 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3615 || memNE(PL_last_uni, "study", 5)
3616 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3617 ))
3618 check_uni();
3619 s = scan_pat(s,OP_MATCH);
3620 TERM(sublex_start());
3621 }
378cc40b
LW
3622
3623 case '.':
51882d45
GS
3624 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3625#ifdef PERL_STRICT_CR
3626 && s[1] == '\n'
3627#else
3628 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3629#endif
3630 && (s == PL_linestart || s[-1] == '\n') )
3631 {
3280af22
NIS
3632 PL_lex_formbrack = 0;
3633 PL_expect = XSTATE;
79072805
LW
3634 goto rightbracket;
3635 }
3280af22 3636 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3637 tmp = *s++;
a687059c
LW
3638 if (*s == tmp) {
3639 s++;
2f3197b3
LW
3640 if (*s == tmp) {
3641 s++;
79072805 3642 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3643 }
3644 else
79072805 3645 yylval.ival = 0;
378cc40b 3646 OPERATOR(DOTDOT);
a687059c 3647 }
3280af22 3648 if (PL_expect != XOPERATOR)
2f3197b3 3649 check_uni();
79072805 3650 Aop(OP_CONCAT);
378cc40b
LW
3651 }
3652 /* FALL THROUGH */
3653 case '0': case '1': case '2': case '3': case '4':
3654 case '5': case '6': case '7': case '8': case '9':
b73d6f50 3655 s = scan_num(s, &yylval);
4e553d73 3656 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3657 "### Saw number in '%s'\n", s);
5f80b19c 3658 } );
3280af22 3659 if (PL_expect == XOPERATOR)
8990e307 3660 no_op("Number",s);
79072805
LW
3661 TERM(THING);
3662
3663 case '\'':
09bef843 3664 s = scan_str(s,FALSE,FALSE);
4e553d73 3665 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3666 "### Saw string before '%s'\n", s);
5f80b19c 3667 } );
3280af22
NIS
3668 if (PL_expect == XOPERATOR) {
3669 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3670 PL_expect = XTERM;
a0d0e21e
LW
3671 depcom();
3672 return ','; /* grandfather non-comma-format format */
3673 }
463ee0b2 3674 else
8990e307 3675 no_op("String",s);
463ee0b2 3676 }
79072805 3677 if (!s)
85e6fe83 3678 missingterm((char*)0);
79072805
LW
3679 yylval.ival = OP_CONST;
3680 TERM(sublex_start());
3681
3682 case '"':
09bef843 3683 s = scan_str(s,FALSE,FALSE);
4e553d73 3684 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3685 "### Saw string before '%s'\n", s);
5f80b19c 3686 } );
3280af22
NIS
3687 if (PL_expect == XOPERATOR) {
3688 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3689 PL_expect = XTERM;
a0d0e21e
LW
3690 depcom();
3691 return ','; /* grandfather non-comma-format format */
3692 }
463ee0b2 3693 else
8990e307 3694 no_op("String",s);
463ee0b2 3695 }
79072805 3696 if (!s)
85e6fe83 3697 missingterm((char*)0);
4633a7c4 3698 yylval.ival = OP_CONST;
3280af22 3699 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 3700 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4633a7c4
LW
3701 yylval.ival = OP_STRINGIFY;
3702 break;
3703 }
3704 }
79072805
LW
3705 TERM(sublex_start());
3706
3707 case '`':
09bef843 3708 s = scan_str(s,FALSE,FALSE);
4e553d73 3709 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3710 "### Saw backtick string before '%s'\n", s);
5f80b19c 3711 } );
3280af22 3712 if (PL_expect == XOPERATOR)
8990e307 3713 no_op("Backticks",s);
79072805 3714 if (!s)
85e6fe83 3715 missingterm((char*)0);
79072805
LW
3716 yylval.ival = OP_BACKTICK;
3717 set_csh();
3718 TERM(sublex_start());
3719
3720 case '\\':
3721 s++;
599cee73 3722 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
9014280d 3723 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
599cee73 3724 *s, *s);
3280af22 3725 if (PL_expect == XOPERATOR)
8990e307 3726 no_op("Backslash",s);
79072805
LW
3727 OPERATOR(REFGEN);
3728
a7cb1f99 3729 case 'v':
e526c9e6 3730 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
a7cb1f99
GS
3731 char *start = s;
3732 start++;
3733 start++;
dd629d5b 3734 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
3735 start++;
3736 if (*start == '.' && isDIGIT(start[1])) {
b73d6f50 3737 s = scan_num(s, &yylval);
a7cb1f99
GS
3738 TERM(THING);
3739 }
e526c9e6 3740 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
3741 else if (!isALPHA(*start) && (PL_expect == XTERM
3742 || PL_expect == XREF || PL_expect == XSTATE
3743 || PL_expect == XTERMORDORDOR)) {
e526c9e6
GS
3744 char c = *start;
3745 GV *gv;
3746 *start = '\0';
3747 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3748 *start = c;
3749 if (!gv) {
b73d6f50 3750 s = scan_num(s, &yylval);
e526c9e6
GS
3751 TERM(THING);
3752 }
3753 }
a7cb1f99
GS
3754 }
3755 goto keylookup;
79072805 3756 case 'x':
3280af22 3757 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3758 s++;
3759 Mop(OP_REPEAT);
2f3197b3 3760 }
79072805
LW
3761 goto keylookup;
3762
378cc40b 3763 case '_':
79072805
LW
3764 case 'a': case 'A':
3765 case 'b': case 'B':
3766 case 'c': case 'C':
3767 case 'd': case 'D':
3768 case 'e': case 'E':
3769 case 'f': case 'F':
3770 case 'g': case 'G':
3771 case 'h': case 'H':
3772 case 'i': case 'I':
3773 case 'j': case 'J':
3774 case 'k': case 'K':
3775 case 'l': case 'L':
3776 case 'm': case 'M':
3777 case 'n': case 'N':
3778 case 'o': case 'O':
3779 case 'p': case 'P':
3780 case 'q': case 'Q':
3781 case 'r': case 'R':
3782 case 's': case 'S':
3783 case 't': case 'T':
3784 case 'u': case 'U':
a7cb1f99 3785 case 'V':
79072805
LW
3786 case 'w': case 'W':
3787 case 'X':
3788 case 'y': case 'Y':
3789 case 'z': case 'Z':
3790
49dc05e3 3791 keylookup: {
1d239bbb 3792 orig_keyword = 0;
161b471a
NIS
3793 gv = Nullgv;
3794 gvp = 0;
49dc05e3 3795
3280af22
NIS
3796 PL_bufptr = s;
3797 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3798
3799 /* Some keywords can be followed by any delimiter, including ':' */
155aba94
GS
3800 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3801 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3802 (PL_tokenbuf[0] == 'q' &&
3803 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 3804
3805 /* x::* is just a word, unless x is "CORE" */
3280af22 3806 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3807 goto just_a_word;
3808
3643fb5f 3809 d = s;
3280af22 3810 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3811 d++; /* no comments skipped here, or s### is misparsed */
3812
3813 /* Is this a label? */
3280af22
NIS
3814 if (!tmp && PL_expect == XSTATE
3815 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3816 s = d + 1;
3280af22 3817 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3818 CLINE;
3819 TOKEN(LABEL);
3643fb5f
CS
3820 }
3821
3822 /* Check for keywords */
3280af22 3823 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3824
3825 /* Is this a word before a => operator? */
1c3923b3 3826 if (*d == '=' && d[1] == '>') {
748a9306 3827 CLINE;
3280af22 3828 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306 3829 yylval.opval->op_private = OPpCONST_BARE;
0064a8a9 3830 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7948272d 3831 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
748a9306
LW
3832 TERM(WORD);
3833 }
3834
a0d0e21e 3835 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3836 GV *ogv = Nullgv; /* override (winner) */
3837 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3838 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3839 CV *cv;
3280af22 3840 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3841 (cv = GvCVu(gv)))
3842 {
3843 if (GvIMPORTED_CV(gv))
3844 ogv = gv;
3845 else if (! CvMETHOD(cv))
3846 hgv = gv;
3847 }
3848 if (!ogv &&
3280af22
NIS
3849 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3850 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3851 GvCVu(gv) && GvIMPORTED_CV(gv))
3852 {
3853 ogv = gv;
3854 }
3855 }
3856 if (ogv) {
30fe34ed 3857 orig_keyword = tmp;
56f7f34b 3858 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3859 }
3860 else if (gv && !gvp
3861 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3862 && GvCVu(gv)
3280af22 3863 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3864 {
3865 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3866 }
56f7f34b
CS
3867 else { /* no override */
3868 tmp = -tmp;
ac206dc8 3869 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
9014280d 3870 Perl_warner(aTHX_ packWARN(WARN_MISC),
ac206dc8
RGS
3871 "dump() better written as CORE::dump()");
3872 }
56f7f34b
CS
3873 gv = Nullgv;
3874 gvp = 0;
4944e2f7
GS
3875 if (ckWARN(WARN_AMBIGUOUS) && hgv
3876 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
9014280d 3877 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 3878 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3879 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3880 }
a0d0e21e
LW
3881 }
3882
3883 reserved_word:
3884 switch (tmp) {
79072805
LW
3885
3886 default: /* not a keyword */
93a17b20 3887 just_a_word: {
96e4d5b1 3888 SV *sv;
ce29ac45 3889 int pkgname = 0;
3280af22 3890 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3891
3892 /* Get the rest if it looks like a package qualifier */
3893
155aba94 3894 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 3895 STRLEN morelen;
3280af22 3896 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3897 TRUE, &morelen);
3898 if (!morelen)
cea2e8a9 3899 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3900 *s == '\'' ? "'" : "::");
c3e0f903 3901 len += morelen;
ce29ac45 3902 pkgname = 1;
a0d0e21e 3903 }
8990e307 3904
3280af22
NIS
3905 if (PL_expect == XOPERATOR) {
3906 if (PL_bufptr == PL_linestart) {
57843af0 3907 CopLINE_dec(PL_curcop);
9014280d 3908 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3909 CopLINE_inc(PL_curcop);
463ee0b2
LW
3910 }
3911 else
54310121 3912 no_op("Bareword",s);
463ee0b2 3913 }
8990e307 3914
c3e0f903
GS
3915 /* Look for a subroutine with this name in current package,
3916 unless name is "Foo::", in which case Foo is a bearword
3917 (and a package name). */
3918
3919 if (len > 2 &&
3280af22 3920 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3921 {
e476b1b5 3922 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
9014280d 3923 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 3924 "Bareword \"%s\" refers to nonexistent package",
3280af22 3925 PL_tokenbuf);
c3e0f903 3926 len -= 2;
3280af22 3927 PL_tokenbuf[len] = '\0';
c3e0f903
GS
3928 gv = Nullgv;
3929 gvp = 0;
3930 }
3931 else {
3932 len = 0;
3933 if (!gv)
3280af22 3934 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
c3e0f903
GS
3935 }
3936
3937 /* if we saw a global override before, get the right name */
8990e307 3938
49dc05e3 3939 if (gvp) {
79cb57f6 3940 sv = newSVpvn("CORE::GLOBAL::",14);
3280af22 3941 sv_catpv(sv,PL_tokenbuf);
49dc05e3
GS
3942 }
3943 else
3280af22 3944 sv = newSVpv(PL_tokenbuf,0);
8990e307 3945
a0d0e21e
LW
3946 /* Presume this is going to be a bareword of some sort. */
3947
3948 CLINE;
49dc05e3 3949 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
a0d0e21e 3950 yylval.opval->op_private = OPpCONST_BARE;
8f8cf39c
JH
3951 /* UTF-8 package name? */
3952 if (UTF && !IN_BYTES &&
3953 is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
3954 SvUTF8_on(sv);
a0d0e21e 3955
c3e0f903
GS
3956 /* And if "Foo::", then that's what it certainly is. */
3957
3958 if (len)
3959 goto safe_bareword;
3960
8990e307
LW
3961 /* See if it's the indirect object for a list operator. */
3962
3280af22
NIS
3963 if (PL_oldoldbufptr &&
3964 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
3965 (PL_oldoldbufptr == PL_last_lop
3966 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 3967 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
3968 (PL_expect == XREF ||
3969 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 3970 {
748a9306
LW
3971 bool immediate_paren = *s == '(';
3972
a0d0e21e
LW
3973 /* (Now we can afford to cross potential line boundary.) */
3974 s = skipspace(s);
3975
3976 /* Two barewords in a row may indicate method call. */
3977
7e2040f0 3978 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
a0d0e21e
LW
3979 return tmp;
3980
3981 /* If not a declared subroutine, it's an indirect object. */
3982 /* (But it's an indir obj regardless for sort.) */
3983
7948272d 3984 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f0670693 3985 ((!gv || !GvCVu(gv)) &&
a9ef352a 3986 (PL_last_lop_op != OP_MAPSTART &&
f0670693 3987 PL_last_lop_op != OP_GREPSTART))))
a9ef352a 3988 {
3280af22 3989 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 3990 goto bareword;
93a17b20
LW
3991 }
3992 }
8990e307 3993
3280af22 3994 PL_expect = XOPERATOR;
8990e307 3995 s = skipspace(s);
1c3923b3
GS
3996
3997 /* Is this a word before a => operator? */
ce29ac45 3998 if (*s == '=' && s[1] == '>' && !pkgname) {
1c3923b3
GS
3999 CLINE;
4000 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 4001 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7948272d 4002 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
1c3923b3
GS
4003 TERM(WORD);
4004 }
4005
4006 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 4007 if (*s == '(') {
79072805 4008 CLINE;
96e4d5b1 4009 if (gv && GvCVu(gv)) {
bf4acbe4 4010 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
7a52d87a 4011 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
96e4d5b1 4012 s = d + 1;
4013 goto its_constant;
4014 }
4015 }
3280af22
NIS
4016 PL_nextval[PL_nexttoke].opval = yylval.opval;
4017 PL_expect = XOPERATOR;
93a17b20 4018 force_next(WORD);
c07a80fd 4019 yylval.ival = 0;
463ee0b2 4020 TOKEN('&');
79072805 4021 }
93a17b20 4022
a0d0e21e 4023 /* If followed by var or block, call it a method (unless sub) */
8990e307 4024
8ebc5c01 4025 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
3280af22
NIS
4026 PL_last_lop = PL_oldbufptr;
4027 PL_last_lop_op = OP_METHOD;
93a17b20 4028 PREBLOCK(METHOD);
463ee0b2
LW
4029 }
4030
8990e307
LW
4031 /* If followed by a bareword, see if it looks like indir obj. */
4032
30fe34ed
RGS
4033 if (!orig_keyword
4034 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4035 && (tmp = intuit_method(s,gv)))
a0d0e21e 4036 return tmp;
93a17b20 4037
8990e307
LW
4038 /* Not a method, so call it a subroutine (if defined) */
4039
8ebc5c01 4040 if (gv && GvCVu(gv)) {
46fc3d4c 4041 CV* cv;
0453d815 4042 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
9014280d 4043 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
0453d815 4044 "Ambiguous use of -%s resolved as -&%s()",
3280af22 4045 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 4046 /* Check for a constant sub */
46fc3d4c 4047 cv = GvCV(gv);
96e4d5b1 4048 if ((sv = cv_const_sv(cv))) {
4049 its_constant:
4050 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4051 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4052 yylval.opval->op_private = 0;
4053 TOKEN(WORD);
89bfa8cd 4054 }
4055
a5f75d66
AD
4056 /* Resolve to GV now. */
4057 op_free(yylval.opval);
4058 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
9675f7ac 4059 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 4060 PL_last_lop = PL_oldbufptr;
bf848113 4061 PL_last_lop_op = OP_ENTERSUB;
4633a7c4
LW
4062 /* Is there a prototype? */
4063 if (SvPOK(cv)) {
4064 STRLEN len;
7a52d87a 4065 char *proto = SvPV((SV*)cv, len);
4633a7c4
LW
4066 if (!len)
4067 TERM(FUNC0SUB);
7a52d87a 4068 if (strEQ(proto, "$"))
4633a7c4 4069 OPERATOR(UNIOPSUB);
7a52d87a 4070 if (*proto == '&' && *s == '{') {
c99da370
JH
4071 sv_setpv(PL_subname, PL_curstash ?
4072 "__ANON__" : "__ANON__::__ANON__");
4633a7c4
LW
4073 PREBLOCK(LSTOPSUB);
4074 }
a9ef352a 4075 }
3280af22
NIS
4076 PL_nextval[PL_nexttoke].opval = yylval.opval;
4077 PL_expect = XTERM;
8990e307
LW
4078 force_next(WORD);
4079 TOKEN(NOAMP);
4080 }
748a9306 4081
8990e307
LW
4082 /* Call it a bare word */
4083
5603f27d
GS
4084 if (PL_hints & HINT_STRICT_SUBS)
4085 yylval.opval->op_private |= OPpCONST_STRICT;
4086 else {
4087 bareword:
4088 if (ckWARN(WARN_RESERVED)) {
4089 if (lastchar != '-') {
4090 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
238ae712 4091 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
9014280d 4092 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
4093 PL_tokenbuf);
4094 }
748a9306
LW
4095 }
4096 }
c3e0f903
GS
4097
4098 safe_bareword:
f248d071 4099 if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
9014280d 4100 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
0453d815 4101 "Operator or semicolon missing before %c%s",
3280af22 4102 lastchar, PL_tokenbuf);
9014280d 4103 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
0453d815 4104 "Ambiguous use of %c resolved as operator %c",
748a9306
LW
4105 lastchar, lastchar);
4106 }
93a17b20 4107 TOKEN(WORD);
79072805 4108 }
79072805 4109
68dc0745 4110 case KEY___FILE__:
46fc3d4c 4111 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 4112 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 4113 TERM(THING);
4114
79072805 4115 case KEY___LINE__:
cf2093f6 4116 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 4117 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 4118 TERM(THING);
68dc0745 4119
4120 case KEY___PACKAGE__:
4121 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22
NIS
4122 (PL_curstash
4123 ? newSVsv(PL_curstname)
4124 : &PL_sv_undef));
79072805 4125 TERM(THING);
79072805 4126
e50aee73 4127 case KEY___DATA__:
79072805
LW
4128 case KEY___END__: {
4129 GV *gv;
79072805
LW
4130
4131 /*SUPPRESS 560*/
3280af22 4132 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
e50aee73 4133 char *pname = "main";
3280af22
NIS
4134 if (PL_tokenbuf[2] == 'D')
4135 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
cea2e8a9 4136 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
a5f75d66 4137 GvMULTI_on(gv);
79072805 4138 if (!GvIO(gv))
a0d0e21e 4139 GvIOp(gv) = newIO();
3280af22 4140 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
4141#if defined(HAS_FCNTL) && defined(F_SETFD)
4142 {
3280af22 4143 int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
4144 fcntl(fd,F_SETFD,fd >= 3);
4145 }
79072805 4146#endif
fd049845 4147 /* Mark this internal pseudo-handle as clean */
4148 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
3280af22 4149 if (PL_preprocess)
50952442 4150 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
3280af22 4151 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 4152 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 4153 else
50952442 4154 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
4155#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4156 /* if the script was opened in binmode, we need to revert
53129d29 4157 * it to text mode for compatibility; but only iff it has CRs
c39cd008 4158 * XXX this is a questionable hack at best. */
53129d29
GS
4159 if (PL_bufend-PL_bufptr > 2
4160 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
4161 {
4162 Off_t loc = 0;
50952442 4163 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
4164 loc = PerlIO_tell(PL_rsfp);
4165 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4166 }
2986a63f
JH
4167#ifdef NETWARE
4168 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4169#else
c39cd008 4170 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 4171#endif /* NETWARE */
1143fce0
JH
4172#ifdef PERLIO_IS_STDIO /* really? */
4173# if defined(__BORLANDC__)
cb359b41
JH
4174 /* XXX see note in do_binmode() */
4175 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
4176# endif
4177#endif
c39cd008
GS
4178 if (loc > 0)
4179 PerlIO_seek(PL_rsfp, loc, 0);
4180 }
4181 }
4182#endif
7948272d 4183#ifdef PERLIO_LAYERS
0064a8a9 4184 if (UTF && !IN_BYTES)
7948272d
NIS
4185 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4186#endif
3280af22 4187 PL_rsfp = Nullfp;
79072805
LW
4188 }
4189 goto fake_eof;
e929a76b 4190 }
de3bb511 4191
8990e307 4192 case KEY_AUTOLOAD:
ed6116ce 4193 case KEY_DESTROY:
79072805 4194 case KEY_BEGIN:
7d30b5c4 4195 case KEY_CHECK:
7d07dbc2 4196 case KEY_INIT:
7d30b5c4 4197 case KEY_END:
3280af22
NIS
4198 if (PL_expect == XSTATE) {
4199 s = PL_bufptr;
93a17b20 4200 goto really_sub;
79072805
LW
4201 }
4202 goto just_a_word;
4203
a0d0e21e
LW
4204 case KEY_CORE:
4205 if (*s == ':' && s[1] == ':') {
4206 s += 2;
748a9306 4207 d = s;
3280af22 4208 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6798c92b
GS
4209 if (!(tmp = keyword(PL_tokenbuf, len)))
4210 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
4211 if (tmp < 0)
4212 tmp = -tmp;
4213 goto reserved_word;
4214 }
4215 goto just_a_word;
4216
463ee0b2
LW
4217 case KEY_abs:
4218 UNI(OP_ABS);
4219
79072805
LW
4220 case KEY_alarm:
4221 UNI(OP_ALARM);
4222
4223 case KEY_accept:
a0d0e21e 4224 LOP(OP_ACCEPT,XTERM);
79072805 4225
463ee0b2
LW
4226 case KEY_and:
4227 OPERATOR(ANDOP);
4228
79072805 4229 case KEY_atan2:
a0d0e21e 4230 LOP(OP_ATAN2,XTERM);
85e6fe83 4231
79072805 4232 case KEY_bind:
a0d0e21e 4233 LOP(OP_BIND,XTERM);
79072805
LW
4234
4235 case KEY_binmode:
1c1fc3ea 4236 LOP(OP_BINMODE,XTERM);
79072805
LW
4237
4238 case KEY_bless:
a0d0e21e 4239 LOP(OP_BLESS,XTERM);
79072805
LW
4240
4241 case KEY_chop:
4242 UNI(OP_CHOP);
4243
4244 case KEY_continue:
4245 PREBLOCK(CONTINUE);
4246
4247 case KEY_chdir:
85e6fe83 4248 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
79072805
LW
4249 UNI(OP_CHDIR);
4250
4251 case KEY_close:
4252 UNI(OP_CLOSE);
4253
4254 case KEY_closedir:
4255 UNI(OP_CLOSEDIR);
4256
4257 case KEY_cmp:
4258 Eop(OP_SCMP);
4259
4260 case KEY_caller:
4261 UNI(OP_CALLER);
4262
4263 case KEY_crypt:
4264#ifdef FCRYPT
f4c556ac
GS
4265 if (!PL_cryptseen) {
4266 PL_cryptseen = TRUE;
de3bb511 4267 init_des();
f4c556ac 4268 }
a687059c 4269#endif
a0d0e21e 4270 LOP(OP_CRYPT,XTERM);
79072805
LW
4271
4272 case KEY_chmod:
a0d0e21e 4273 LOP(OP_CHMOD,XTERM);
79072805
LW
4274
4275 case KEY_chown:
a0d0e21e 4276 LOP(OP_CHOWN,XTERM);
79072805
LW
4277
4278 case KEY_connect:
a0d0e21e 4279 LOP(OP_CONNECT,XTERM);
79072805 4280
463ee0b2
LW
4281 case KEY_chr:
4282 UNI(OP_CHR);
4283
79072805
LW
4284 case KEY_cos:
4285 UNI(OP_COS);
4286
4287 case KEY_chroot:
4288 UNI(OP_CHROOT);
4289
4290 case KEY_do:
4291 s = skipspace(s);
4292 if (*s == '{')
a0d0e21e 4293 PRETERMBLOCK(DO);
79072805 4294 if (*s != '\'')
89c5585f 4295 s = force_word(s,WORD,TRUE,TRUE,FALSE);
378cc40b 4296 OPERATOR(DO);
79072805
LW
4297
4298 case KEY_die:
3280af22 4299 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 4300 LOP(OP_DIE,XTERM);
79072805
LW
4301
4302 case KEY_defined:
4303 UNI(OP_DEFINED);
4304
4305 case KEY_delete:
a0d0e21e 4306 UNI(OP_DELETE);
79072805
LW
4307
4308 case KEY_dbmopen:
a0d0e21e
LW
4309 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4310 LOP(OP_DBMOPEN,XTERM);
79072805
LW
4311
4312 case KEY_dbmclose:
4313 UNI(OP_DBMCLOSE);
4314
4315 case KEY_dump:
a0d0e21e 4316 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4317 LOOPX(OP_DUMP);
4318
4319 case KEY_else:
4320 PREBLOCK(ELSE);
4321
4322 case KEY_elsif:
57843af0 4323 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4324 OPERATOR(ELSIF);
4325
4326 case KEY_eq:
4327 Eop(OP_SEQ);
4328
a0d0e21e
LW
4329 case KEY_exists:
4330 UNI(OP_EXISTS);
4e553d73 4331
79072805
LW
4332 case KEY_exit:
4333 UNI(OP_EXIT);
4334
4335 case KEY_eval:
79072805 4336 s = skipspace(s);
3280af22 4337 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
463ee0b2 4338 UNIBRACK(OP_ENTEREVAL);
79072805
LW
4339
4340 case KEY_eof:
4341 UNI(OP_EOF);
4342
c963b151
BD
4343 case KEY_err:
4344 OPERATOR(DOROP);
4345
79072805
LW
4346 case KEY_exp:
4347 UNI(OP_EXP);
4348
4349 case KEY_each:
4350 UNI(OP_EACH);
4351
4352 case KEY_exec:
4353 set_csh();
a0d0e21e 4354 LOP(OP_EXEC,XREF);
79072805
LW
4355
4356 case KEY_endhostent:
4357 FUN0(OP_EHOSTENT);
4358
4359 case KEY_endnetent:
4360 FUN0(OP_ENETENT);
4361
4362 case KEY_endservent:
4363 FUN0(OP_ESERVENT);
4364
4365 case KEY_endprotoent:
4366 FUN0(OP_EPROTOENT);
4367
4368 case KEY_endpwent:
4369 FUN0(OP_EPWENT);
4370
4371 case KEY_endgrent:
4372 FUN0(OP_EGRENT);
4373
4374 case KEY_for:
4375 case KEY_foreach:
57843af0 4376 yylval.ival = CopLINE(PL_curcop);
55497cff 4377 s = skipspace(s);
7e2040f0 4378 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 4379 char *p = s;
3280af22 4380 if ((PL_bufend - p) >= 3 &&
55497cff 4381 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4382 p += 2;
77ca0c92
LW
4383 else if ((PL_bufend - p) >= 4 &&
4384 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4385 p += 3;
55497cff 4386 p = skipspace(p);
7e2040f0 4387 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
4388 p = scan_ident(p, PL_bufend,
4389 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4390 p = skipspace(p);
4391 }
4392 if (*p != '$')
cea2e8a9 4393 Perl_croak(aTHX_ "Missing $ on loop variable");
55497cff 4394 }
79072805
LW
4395 OPERATOR(FOR);
4396
4397 case KEY_formline:
a0d0e21e 4398 LOP(OP_FORMLINE,XTERM);
79072805
LW
4399
4400 case KEY_fork:
4401 FUN0(OP_FORK);
4402
4403 case KEY_fcntl:
a0d0e21e 4404 LOP(OP_FCNTL,XTERM);
79072805
LW
4405
4406 case KEY_fileno:
4407 UNI(OP_FILENO);
4408
4409 case KEY_flock:
a0d0e21e 4410 LOP(OP_FLOCK,XTERM);
79072805
LW
4411
4412 case KEY_gt:
4413 Rop(OP_SGT);
4414
4415 case KEY_ge:
4416 Rop(OP_SGE);
4417
4418 case KEY_grep:
2c38e13d 4419 LOP(OP_GREPSTART, XREF);
79072805
LW
4420
4421 case KEY_goto:
a0d0e21e 4422 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4423 LOOPX(OP_GOTO);
4424
4425 case KEY_gmtime:
4426 UNI(OP_GMTIME);
4427
4428 case KEY_getc:
6f33ba73 4429 UNIDOR(OP_GETC);
79072805
LW
4430
4431 case KEY_getppid:
4432 FUN0(OP_GETPPID);
4433
4434 case KEY_getpgrp:
4435 UNI(OP_GETPGRP);
4436
4437 case KEY_getpriority:
a0d0e21e 4438 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
4439
4440 case KEY_getprotobyname:
4441 UNI(OP_GPBYNAME);
4442
4443 case KEY_getprotobynumber:
a0d0e21e 4444 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
4445
4446 case KEY_getprotoent:
4447 FUN0(OP_GPROTOENT);
4448
4449 case KEY_getpwent:
4450 FUN0(OP_GPWENT);
4451
4452 case KEY_getpwnam:
ff68c719 4453 UNI(OP_GPWNAM);
79072805
LW
4454
4455 case KEY_getpwuid:
ff68c719 4456 UNI(OP_GPWUID);
79072805
LW
4457
4458 case KEY_getpeername:
4459 UNI(OP_GETPEERNAME);
4460
4461 case KEY_gethostbyname:
4462 UNI(OP_GHBYNAME);
4463
4464 case KEY_gethostbyaddr:
a0d0e21e 4465 LOP(OP_GHBYADDR,XTERM);
79072805
LW
4466
4467 case KEY_gethostent:
4468 FUN0(OP_GHOSTENT);
4469
4470 case KEY_getnetbyname:
4471 UNI(OP_GNBYNAME);
4472
4473 case KEY_getnetbyaddr:
a0d0e21e 4474 LOP(OP_GNBYADDR,XTERM);
79072805
LW
4475
4476 case KEY_getnetent:
4477 FUN0(OP_GNETENT);
4478
4479 case KEY_getservbyname:
a0d0e21e 4480 LOP(OP_GSBYNAME,XTERM);
79072805
LW
4481
4482 case KEY_getservbyport:
a0d0e21e 4483 LOP(OP_GSBYPORT,XTERM);
79072805
LW
4484
4485 case KEY_getservent:
4486 FUN0(OP_GSERVENT);
4487
4488 case KEY_getsockname:
4489 UNI(OP_GETSOCKNAME);
4490
4491 case KEY_getsockopt:
a0d0e21e 4492 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
4493
4494 case KEY_getgrent:
4495 FUN0(OP_GGRENT);
4496
4497 case KEY_getgrnam:
ff68c719 4498 UNI(OP_GGRNAM);
79072805
LW
4499
4500 case KEY_getgrgid:
ff68c719 4501 UNI(OP_GGRGID);
79072805
LW
4502
4503 case KEY_getlogin:
4504 FUN0(OP_GETLOGIN);
4505
93a17b20 4506 case KEY_glob:
a0d0e21e
LW
4507 set_csh();
4508 LOP(OP_GLOB,XTERM);
93a17b20 4509
79072805
LW
4510 case KEY_hex:
4511 UNI(OP_HEX);
4512
4513 case KEY_if:
57843af0 4514 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4515 OPERATOR(IF);
4516
4517 case KEY_index:
a0d0e21e 4518 LOP(OP_INDEX,XTERM);
79072805
LW
4519
4520 case KEY_int:
4521 UNI(OP_INT);
4522
4523 case KEY_ioctl:
a0d0e21e 4524 LOP(OP_IOCTL,XTERM);
79072805
LW
4525
4526 case KEY_join:
a0d0e21e 4527 LOP(OP_JOIN,XTERM);
79072805
LW
4528
4529 case KEY_keys:
4530 UNI(OP_KEYS);
4531
4532 case KEY_kill:
a0d0e21e 4533 LOP(OP_KILL,XTERM);
79072805
LW
4534
4535 case KEY_last:
a0d0e21e 4536 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 4537 LOOPX(OP_LAST);
4e553d73 4538
79072805
LW
4539 case KEY_lc:
4540 UNI(OP_LC);
4541
4542 case KEY_lcfirst:
4543 UNI(OP_LCFIRST);
4544
4545 case KEY_local:
09bef843 4546 yylval.ival = 0;
79072805
LW
4547 OPERATOR(LOCAL);
4548
4549 case KEY_length:
4550 UNI(OP_LENGTH);
4551
4552 case KEY_lt:
4553 Rop(OP_SLT);
4554
4555 case KEY_le:
4556 Rop(OP_SLE);
4557
4558 case KEY_localtime:
4559 UNI(OP_LOCALTIME);
4560
4561 case KEY_log:
4562 UNI(OP_LOG);
4563
4564 case KEY_link:
a0d0e21e 4565 LOP(OP_LINK,XTERM);
79072805
LW
4566
4567 case KEY_listen:
a0d0e21e 4568 LOP(OP_LISTEN,XTERM);
79072805 4569
c0329465
MB
4570 case KEY_lock:
4571 UNI(OP_LOCK);
4572
79072805
LW
4573 case KEY_lstat:
4574 UNI(OP_LSTAT);
4575
4576 case KEY_m:
8782bef2 4577 s = scan_pat(s,OP_MATCH);
79072805
LW
4578 TERM(sublex_start());
4579
a0d0e21e 4580 case KEY_map:
2c38e13d 4581 LOP(OP_MAPSTART, XREF);
4e4e412b 4582
79072805 4583 case KEY_mkdir:
a0d0e21e 4584 LOP(OP_MKDIR,XTERM);
79072805
LW
4585
4586 case KEY_msgctl:
a0d0e21e 4587 LOP(OP_MSGCTL,XTERM);
79072805
LW
4588
4589 case KEY_msgget:
a0d0e21e 4590 LOP(OP_MSGGET,XTERM);
79072805
LW
4591
4592 case KEY_msgrcv:
a0d0e21e 4593 LOP(OP_MSGRCV,XTERM);
79072805
LW
4594
4595 case KEY_msgsnd:
a0d0e21e 4596 LOP(OP_MSGSND,XTERM);
79072805 4597
77ca0c92 4598 case KEY_our:
93a17b20 4599 case KEY_my:
77ca0c92 4600 PL_in_my = tmp;
c750a3ec 4601 s = skipspace(s);
7e2040f0 4602 if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 4603 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
4604 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4605 goto really_sub;
def3634b 4606 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 4607 if (!PL_in_my_stash) {
c750a3ec 4608 char tmpbuf[1024];
3280af22
NIS
4609 PL_bufptr = s;
4610 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
4611 yyerror(tmpbuf);
4612 }
4613 }
09bef843 4614 yylval.ival = 1;
55497cff 4615 OPERATOR(MY);
93a17b20 4616
79072805 4617 case KEY_next:
a0d0e21e 4618 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4619 LOOPX(OP_NEXT);
4620
4621 case KEY_ne:
4622 Eop(OP_SNE);
4623
a0d0e21e 4624 case KEY_no:
3280af22 4625 if (PL_expect != XSTATE)
a0d0e21e
LW
4626 yyerror("\"no\" not allowed in expression");
4627 s = force_word(s,WORD,FALSE,TRUE,FALSE);
e759cc13 4628 s = force_version(s, FALSE);
a0d0e21e
LW
4629 yylval.ival = 0;
4630 OPERATOR(USE);
4631
4632 case KEY_not:
2d2e263d
LW
4633 if (*s == '(' || (s = skipspace(s), *s == '('))
4634 FUN1(OP_NOT);
4635 else
4636 OPERATOR(NOTOP);
a0d0e21e 4637
79072805 4638 case KEY_open:
93a17b20 4639 s = skipspace(s);
7e2040f0 4640 if (isIDFIRST_lazy_if(s,UTF)) {
93a17b20 4641 char *t;
7e2040f0 4642 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
93a17b20 4643 t = skipspace(d);
66fbe8fb
HS
4644 if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
4645 /* [perl #16184] */
4646 && !(t[0] == '=' && t[1] == '>')
4647 ) {
9014280d 4648 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 4649 "Precedence problem: open %.*s should be open(%.*s)",
66fbe8fb
HS
4650 d - s, s, d - s, s);
4651 }
93a17b20 4652 }
a0d0e21e 4653 LOP(OP_OPEN,XTERM);
79072805 4654
463ee0b2 4655 case KEY_or:
a0d0e21e 4656 yylval.ival = OP_OR;
463ee0b2
LW
4657 OPERATOR(OROP);
4658
79072805
LW
4659 case KEY_ord:
4660 UNI(OP_ORD);
4661
4662 case KEY_oct:
4663 UNI(OP_OCT);
4664
4665 case KEY_opendir:
a0d0e21e 4666 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
4667
4668 case KEY_print:
3280af22 4669 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4670 LOP(OP_PRINT,XREF);
79072805
LW
4671
4672 case KEY_printf:
3280af22 4673 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4674 LOP(OP_PRTF,XREF);
79072805 4675
c07a80fd 4676 case KEY_prototype:
4677 UNI(OP_PROTOTYPE);
4678
79072805 4679 case KEY_push:
a0d0e21e 4680 LOP(OP_PUSH,XTERM);
79072805
LW
4681
4682 case KEY_pop:
6f33ba73 4683 UNIDOR(OP_POP);
79072805 4684
a0d0e21e 4685 case KEY_pos:
6f33ba73 4686 UNIDOR(OP_POS);
4e553d73 4687
79072805 4688 case KEY_pack:
a0d0e21e 4689 LOP(OP_PACK,XTERM);
79072805
LW
4690
4691 case KEY_package:
a0d0e21e 4692 s = force_word(s,WORD,FALSE,TRUE,FALSE);
79072805
LW
4693 OPERATOR(PACKAGE);
4694
4695 case KEY_pipe:
a0d0e21e 4696 LOP(OP_PIPE_OP,XTERM);
79072805
LW
4697
4698 case KEY_q:
09bef843 4699 s = scan_str(s,FALSE,FALSE);
79072805 4700 if (!s)
85e6fe83 4701 missingterm((char*)0);
79072805
LW
4702 yylval.ival = OP_CONST;
4703 TERM(sublex_start());
4704
a0d0e21e
LW
4705 case KEY_quotemeta:
4706 UNI(OP_QUOTEMETA);
4707
8990e307 4708 case KEY_qw:
09bef843 4709 s = scan_str(s,FALSE,FALSE);
8990e307 4710 if (!s)
85e6fe83 4711 missingterm((char*)0);
8127e0e3
GS
4712 force_next(')');
4713 if (SvCUR(PL_lex_stuff)) {
4714 OP *words = Nullop;
4715 int warned = 0;
3280af22 4716 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 4717 while (len) {
7948272d 4718 SV *sv;
8127e0e3
GS
4719 for (; isSPACE(*d) && len; --len, ++d) ;
4720 if (len) {
4721 char *b = d;
e476b1b5 4722 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
4723 for (; !isSPACE(*d) && len; --len, ++d) {
4724 if (*d == ',') {
9014280d 4725 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
4726 "Possible attempt to separate words with commas");
4727 ++warned;
4728 }
4729 else if (*d == '#') {
9014280d 4730 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
4731 "Possible attempt to put comments in qw() list");
4732 ++warned;
4733 }
4734 }
4735 }
4736 else {
4737 for (; !isSPACE(*d) && len; --len, ++d) ;
4738 }
7948272d
NIS
4739 sv = newSVpvn(b, d-b);
4740 if (DO_UTF8(PL_lex_stuff))
4741 SvUTF8_on(sv);
8127e0e3 4742 words = append_elem(OP_LIST, words,
7948272d 4743 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 4744 }
4745 }
8127e0e3
GS
4746 if (words) {
4747 PL_nextval[PL_nexttoke].opval = words;
4748 force_next(THING);
4749 }
55497cff 4750 }
37fd879b 4751 if (PL_lex_stuff) {
8127e0e3 4752 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
4753 PL_lex_stuff = Nullsv;
4754 }
3280af22 4755 PL_expect = XTERM;
8127e0e3 4756 TOKEN('(');
8990e307 4757
79072805 4758 case KEY_qq:
09bef843 4759 s = scan_str(s,FALSE,FALSE);
79072805 4760 if (!s)
85e6fe83 4761 missingterm((char*)0);
a0d0e21e 4762 yylval.ival = OP_STRINGIFY;
3280af22
NIS
4763 if (SvIVX(PL_lex_stuff) == '\'')
4764 SvIVX(PL_lex_stuff) = 0; /* qq'$foo' should intepolate */
79072805
LW
4765 TERM(sublex_start());
4766
8782bef2
GB
4767 case KEY_qr:
4768 s = scan_pat(s,OP_QR);
4769 TERM(sublex_start());
4770
79072805 4771 case KEY_qx:
09bef843 4772 s = scan_str(s,FALSE,FALSE);
79072805 4773 if (!s)
85e6fe83 4774 missingterm((char*)0);
79072805
LW
4775 yylval.ival = OP_BACKTICK;
4776 set_csh();
4777 TERM(sublex_start());
4778
4779 case KEY_return:
4780 OLDLOP(OP_RETURN);
4781
4782 case KEY_require:
a7cb1f99 4783 s = skipspace(s);
e759cc13
RGS
4784 if (isDIGIT(*s)) {
4785 s = force_version(s, FALSE);
a7cb1f99 4786 }
e759cc13
RGS
4787 else if (*s != 'v' || !isDIGIT(s[1])
4788 || (s = force_version(s, TRUE), *s == 'v'))
4789 {
a7cb1f99
GS
4790 *PL_tokenbuf = '\0';
4791 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 4792 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
a7cb1f99
GS
4793 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
4794 else if (*s == '<')
4795 yyerror("<> should be quotes");
4796 }
463ee0b2 4797 UNI(OP_REQUIRE);
79072805
LW
4798
4799 case KEY_reset:
4800 UNI(OP_RESET);
4801
4802 case KEY_redo:
a0d0e21e 4803 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4804 LOOPX(OP_REDO);
4805
4806 case KEY_rename:
a0d0e21e 4807 LOP(OP_RENAME,XTERM);
79072805
LW
4808
4809 case KEY_rand:
4810 UNI(OP_RAND);
4811
4812 case KEY_rmdir:
4813 UNI(OP_RMDIR);
4814
4815 case KEY_rindex:
a0d0e21e 4816 LOP(OP_RINDEX,XTERM);
79072805
LW
4817
4818 case KEY_read:
a0d0e21e 4819 LOP(OP_READ,XTERM);
79072805
LW
4820
4821 case KEY_readdir:
4822 UNI(OP_READDIR);
4823
93a17b20
LW
4824 case KEY_readline:
4825 set_csh();
6f33ba73 4826 UNIDOR(OP_READLINE);
93a17b20
LW
4827
4828 case KEY_readpipe:
4829 set_csh();
4830 UNI(OP_BACKTICK);
4831
79072805
LW
4832 case KEY_rewinddir:
4833 UNI(OP_REWINDDIR);
4834
4835 case KEY_recv:
a0d0e21e 4836 LOP(OP_RECV,XTERM);
79072805
LW
4837
4838 case KEY_reverse:
a0d0e21e 4839 LOP(OP_REVERSE,XTERM);
79072805
LW
4840
4841 case KEY_readlink:
6f33ba73 4842 UNIDOR(OP_READLINK);
79072805
LW
4843
4844 case KEY_ref:
4845 UNI(OP_REF);
4846
4847 case KEY_s:
4848 s = scan_subst(s);
4849 if (yylval.opval)
4850 TERM(sublex_start());
4851 else
4852 TOKEN(1); /* force error */
4853
a0d0e21e
LW
4854 case KEY_chomp:
4855 UNI(OP_CHOMP);
4e553d73 4856
79072805
LW
4857 case KEY_scalar:
4858 UNI(OP_SCALAR);
4859
4860 case KEY_select:
a0d0e21e 4861 LOP(OP_SELECT,XTERM);
79072805
LW
4862
4863 case KEY_seek:
a0d0e21e 4864 LOP(OP_SEEK,XTERM);
79072805
LW
4865
4866 case KEY_semctl:
a0d0e21e 4867 LOP(OP_SEMCTL,XTERM);
79072805
LW
4868
4869 case KEY_semget:
a0d0e21e 4870 LOP(OP_SEMGET,XTERM);
79072805
LW
4871
4872 case KEY_semop:
a0d0e21e 4873 LOP(OP_SEMOP,XTERM);
79072805
LW
4874
4875 case KEY_send:
a0d0e21e 4876 LOP(OP_SEND,XTERM);
79072805
LW
4877
4878 case KEY_setpgrp:
a0d0e21e 4879 LOP(OP_SETPGRP,XTERM);
79072805
LW
4880
4881 case KEY_setpriority:
a0d0e21e 4882 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
4883
4884 case KEY_sethostent:
ff68c719 4885 UNI(OP_SHOSTENT);
79072805
LW
4886
4887 case KEY_setnetent:
ff68c719 4888 UNI(OP_SNETENT);
79072805
LW
4889
4890 case KEY_setservent:
ff68c719 4891 UNI(OP_SSERVENT);
79072805
LW
4892
4893 case KEY_setprotoent:
ff68c719 4894 UNI(OP_SPROTOENT);
79072805
LW
4895
4896 case KEY_setpwent:
4897 FUN0(OP_SPWENT);
4898
4899 case KEY_setgrent:
4900 FUN0(OP_SGRENT);
4901
4902 case KEY_seekdir:
a0d0e21e 4903 LOP(OP_SEEKDIR,XTERM);
79072805
LW
4904
4905 case KEY_setsockopt:
a0d0e21e 4906 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
4907
4908 case KEY_shift:
6f33ba73 4909 UNIDOR(OP_SHIFT);
79072805
LW
4910
4911 case KEY_shmctl:
a0d0e21e 4912 LOP(OP_SHMCTL,XTERM);
79072805
LW
4913
4914 case KEY_shmget:
a0d0e21e 4915 LOP(OP_SHMGET,XTERM);
79072805
LW
4916
4917 case KEY_shmread:
a0d0e21e 4918 LOP(OP_SHMREAD,XTERM);
79072805
LW
4919
4920 case KEY_shmwrite:
a0d0e21e 4921 LOP(OP_SHMWRITE,XTERM);
79072805
LW
4922
4923 case KEY_shutdown:
a0d0e21e 4924 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
4925
4926 case KEY_sin:
4927 UNI(OP_SIN);
4928
4929 case KEY_sleep:
4930 UNI(OP_SLEEP);
4931
4932 case KEY_socket:
a0d0e21e 4933 LOP(OP_SOCKET,XTERM);
79072805
LW
4934
4935 case KEY_socketpair:
a0d0e21e 4936 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
4937
4938 case KEY_sort:
3280af22 4939 checkcomma(s,PL_tokenbuf,"subroutine name");
79072805
LW
4940 s = skipspace(s);
4941 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 4942 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 4943 PL_expect = XTERM;
15f0808c 4944 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 4945 LOP(OP_SORT,XREF);
79072805
LW
4946
4947 case KEY_split:
a0d0e21e 4948 LOP(OP_SPLIT,XTERM);
79072805
LW
4949
4950 case KEY_sprintf:
a0d0e21e 4951 LOP(OP_SPRINTF,XTERM);
79072805
LW
4952
4953 case KEY_splice:
a0d0e21e 4954 LOP(OP_SPLICE,XTERM);
79072805
LW
4955
4956 case KEY_sqrt:
4957 UNI(OP_SQRT);
4958
4959 case KEY_srand:
4960 UNI(OP_SRAND);
4961
4962 case KEY_stat:
4963 UNI(OP_STAT);
4964
4965 case KEY_study:
79072805
LW
4966 UNI(OP_STUDY);
4967
4968 case KEY_substr:
a0d0e21e 4969 LOP(OP_SUBSTR,XTERM);
79072805
LW
4970
4971 case KEY_format:
4972 case KEY_sub:
93a17b20 4973 really_sub:
09bef843 4974 {
3280af22 4975 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 4976 SSize_t tboffset = 0;
09bef843 4977 expectation attrful;
d731386a 4978 bool have_name, have_proto, bad_proto;
09bef843
SB
4979 int key = tmp;
4980
4981 s = skipspace(s);
4982
7e2040f0 4983 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
4984 (*s == ':' && s[1] == ':'))
4985 {
4986 PL_expect = XBLOCK;
4987 attrful = XATTRBLOCK;
b1b65b59
JH
4988 /* remember buffer pos'n for later force_word */
4989 tboffset = s - PL_oldbufptr;
09bef843
SB
4990 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4991 if (strchr(tmpbuf, ':'))
4992 sv_setpv(PL_subname, tmpbuf);
4993 else {
4994 sv_setsv(PL_subname,PL_curstname);
4995 sv_catpvn(PL_subname,"::",2);
4996 sv_catpvn(PL_subname,tmpbuf,len);
4997 }
4998 s = skipspace(d);
4999 have_name = TRUE;
5000 }
463ee0b2 5001 else {
09bef843
SB
5002 if (key == KEY_my)
5003 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5004 PL_expect = XTERMBLOCK;
5005 attrful = XATTRTERM;
5006 sv_setpv(PL_subname,"?");
5007 have_name = FALSE;
463ee0b2 5008 }
4633a7c4 5009
09bef843
SB
5010 if (key == KEY_format) {
5011 if (*s == '=')
5012 PL_lex_formbrack = PL_lex_brackets + 1;
5013 if (have_name)
b1b65b59
JH
5014 (void) force_word(PL_oldbufptr + tboffset, WORD,
5015 FALSE, TRUE, TRUE);
09bef843
SB
5016 OPERATOR(FORMAT);
5017 }
79072805 5018
09bef843
SB
5019 /* Look for a prototype */
5020 if (*s == '(') {
5021 char *p;
5022
5023 s = scan_str(s,FALSE,FALSE);
37fd879b 5024 if (!s)
09bef843 5025 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 5026 /* strip spaces and check for bad characters */
09bef843
SB
5027 d = SvPVX(PL_lex_stuff);
5028 tmp = 0;
d731386a 5029 bad_proto = FALSE;
09bef843 5030 for (p = d; *p; ++p) {
d37a9538 5031 if (!isSPACE(*p)) {
09bef843 5032 d[tmp++] = *p;
d37a9538
ST
5033 if (!strchr("$@%*;[]&\\", *p))
5034 bad_proto = TRUE;
5035 }
09bef843
SB
5036 }
5037 d[tmp] = '\0';
420cdfc1 5038 if (bad_proto && ckWARN(WARN_SYNTAX))
9014280d 5039 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
420cdfc1
ST
5040 "Illegal character in prototype for %s : %s",
5041 SvPVX(PL_subname), d);
09bef843
SB
5042 SvCUR(PL_lex_stuff) = tmp;
5043 have_proto = TRUE;
68dc0745 5044
09bef843 5045 s = skipspace(s);
4633a7c4 5046 }
09bef843
SB
5047 else
5048 have_proto = FALSE;
5049
5050 if (*s == ':' && s[1] != ':')
5051 PL_expect = attrful;
904d85c5
RGS
5052 else if (!have_name && *s != '{' && key == KEY_sub)
5053 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
09bef843
SB
5054
5055 if (have_proto) {
b1b65b59
JH
5056 PL_nextval[PL_nexttoke].opval =
5057 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
09bef843
SB
5058 PL_lex_stuff = Nullsv;
5059 force_next(THING);
68dc0745 5060 }
09bef843 5061 if (!have_name) {
c99da370
JH
5062 sv_setpv(PL_subname,
5063 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
09bef843 5064 TOKEN(ANONSUB);
4633a7c4 5065 }
b1b65b59
JH
5066 (void) force_word(PL_oldbufptr + tboffset, WORD,
5067 FALSE, TRUE, TRUE);
09bef843
SB
5068 if (key == KEY_my)
5069 TOKEN(MYSUB);
5070 TOKEN(SUB);
4633a7c4 5071 }
79072805
LW
5072
5073 case KEY_system:
5074 set_csh();
a0d0e21e 5075 LOP(OP_SYSTEM,XREF);
79072805
LW
5076
5077 case KEY_symlink:
a0d0e21e 5078 LOP(OP_SYMLINK,XTERM);
79072805
LW
5079
5080 case KEY_syscall:
a0d0e21e 5081 LOP(OP_SYSCALL,XTERM);
79072805 5082
c07a80fd 5083 case KEY_sysopen:
5084 LOP(OP_SYSOPEN,XTERM);
5085
137443ea 5086 case KEY_sysseek:
5087 LOP(OP_SYSSEEK,XTERM);
5088
79072805 5089 case KEY_sysread:
a0d0e21e 5090 LOP(OP_SYSREAD,XTERM);
79072805
LW
5091
5092 case KEY_syswrite:
a0d0e21e 5093 LOP(OP_SYSWRITE,XTERM);
79072805
LW
5094
5095 case KEY_tr:
5096 s = scan_trans(s);
5097 TERM(sublex_start());
5098
5099 case KEY_tell:
5100 UNI(OP_TELL);
5101
5102 case KEY_telldir:
5103 UNI(OP_TELLDIR);
5104
463ee0b2 5105 case KEY_tie:
a0d0e21e 5106 LOP(OP_TIE,XTERM);
463ee0b2 5107
c07a80fd 5108 case KEY_tied:
5109 UNI(OP_TIED);
5110
79072805
LW
5111 case KEY_time:
5112 FUN0(OP_TIME);
5113
5114 case KEY_times:
5115 FUN0(OP_TMS);
5116
5117 case KEY_truncate:
a0d0e21e 5118 LOP(OP_TRUNCATE,XTERM);
79072805
LW
5119
5120 case KEY_uc:
5121 UNI(OP_UC);
5122
5123 case KEY_ucfirst:
5124 UNI(OP_UCFIRST);
5125
463ee0b2
LW
5126 case KEY_untie:
5127 UNI(OP_UNTIE);
5128
79072805 5129 case KEY_until:
57843af0 5130 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5131 OPERATOR(UNTIL);
5132
5133 case KEY_unless:
57843af0 5134 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5135 OPERATOR(UNLESS);
5136
5137 case KEY_unlink:
a0d0e21e 5138 LOP(OP_UNLINK,XTERM);
79072805
LW
5139
5140 case KEY_undef:
6f33ba73 5141 UNIDOR(OP_UNDEF);
79072805
LW
5142
5143 case KEY_unpack:
a0d0e21e 5144 LOP(OP_UNPACK,XTERM);
79072805
LW
5145
5146 case KEY_utime:
a0d0e21e 5147 LOP(OP_UTIME,XTERM);
79072805
LW
5148
5149 case KEY_umask:
6f33ba73 5150 UNIDOR(OP_UMASK);
79072805
LW
5151
5152 case KEY_unshift:
a0d0e21e
LW
5153 LOP(OP_UNSHIFT,XTERM);
5154
5155 case KEY_use:
3280af22 5156 if (PL_expect != XSTATE)
a0d0e21e 5157 yyerror("\"use\" not allowed in expression");
89bfa8cd 5158 s = skipspace(s);
a7cb1f99 5159 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
e759cc13 5160 s = force_version(s, TRUE);
a7cb1f99 5161 if (*s == ';' || (s = skipspace(s), *s == ';')) {
3280af22 5162 PL_nextval[PL_nexttoke].opval = Nullop;
89bfa8cd 5163 force_next(WORD);
5164 }
e759cc13
RGS
5165 else if (*s == 'v') {
5166 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5167 s = force_version(s, FALSE);
5168 }
89bfa8cd 5169 }
5170 else {
5171 s = force_word(s,WORD,FALSE,TRUE,FALSE);
e759cc13 5172 s = force_version(s, FALSE);
89bfa8cd 5173 }
a0d0e21e
LW
5174 yylval.ival = 1;
5175 OPERATOR(USE);
79072805
LW
5176
5177 case KEY_values:
5178 UNI(OP_VALUES);
5179
5180 case KEY_vec:
a0d0e21e 5181 LOP(OP_VEC,XTERM);
79072805
LW
5182
5183 case KEY_while:
57843af0 5184 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5185 OPERATOR(WHILE);
5186
5187 case KEY_warn:
3280af22 5188 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 5189 LOP(OP_WARN,XTERM);
79072805
LW
5190
5191 case KEY_wait:
5192 FUN0(OP_WAIT);
5193
5194 case KEY_waitpid:
a0d0e21e 5195 LOP(OP_WAITPID,XTERM);
79072805
LW
5196
5197 case KEY_wantarray:
5198 FUN0(OP_WANTARRAY);
5199
5200 case KEY_write:
9d116dd7
JH
5201#ifdef EBCDIC
5202 {
df3728a2
JH
5203 char ctl_l[2];
5204 ctl_l[0] = toCTRL('L');
5205 ctl_l[1] = '\0';
9d116dd7
JH
5206 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5207 }
5208#else
5209 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5210#endif
79072805
LW
5211 UNI(OP_ENTERWRITE);
5212
5213 case KEY_x:
3280af22 5214 if (PL_expect == XOPERATOR)
79072805
LW
5215 Mop(OP_REPEAT);
5216 check_uni();
5217 goto just_a_word;
5218
a0d0e21e
LW
5219 case KEY_xor:
5220 yylval.ival = OP_XOR;
5221 OPERATOR(OROP);
5222
79072805
LW
5223 case KEY_y:
5224 s = scan_trans(s);
5225 TERM(sublex_start());
5226 }
49dc05e3 5227 }}
79072805 5228}
bf4acbe4
GS
5229#ifdef __SC__
5230#pragma segment Main
5231#endif
79072805 5232
e930465f
JH
5233static int
5234S_pending_ident(pTHX)
8eceec63
SC
5235{
5236 register char *d;
5237 register I32 tmp;
5238 /* pit holds the identifier we read and pending_ident is reset */
5239 char pit = PL_pending_ident;
5240 PL_pending_ident = 0;
5241
5242 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5243 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
5244
5245 /* if we're in a my(), we can't allow dynamics here.
5246 $foo'bar has already been turned into $foo::bar, so
5247 just check for colons.
5248
5249 if it's a legal name, the OP is a PADANY.
5250 */
5251 if (PL_in_my) {
5252 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5253 if (strchr(PL_tokenbuf,':'))
5254 yyerror(Perl_form(aTHX_ "No package name allowed for "
5255 "variable %s in \"our\"",
5256 PL_tokenbuf));
dd2155a4 5257 tmp = allocmy(PL_tokenbuf);
8eceec63
SC
5258 }
5259 else {
5260 if (strchr(PL_tokenbuf,':'))
5261 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5262
5263 yylval.opval = newOP(OP_PADANY, 0);
dd2155a4 5264 yylval.opval->op_targ = allocmy(PL_tokenbuf);
8eceec63
SC
5265 return PRIVATEREF;
5266 }
5267 }
5268
5269 /*
5270 build the ops for accesses to a my() variable.
5271
5272 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5273 then used in a comparison. This catches most, but not
5274 all cases. For instance, it catches
5275 sort { my($a); $a <=> $b }
5276 but not
5277 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5278 (although why you'd do that is anyone's guess).
5279 */
5280
5281 if (!strchr(PL_tokenbuf,':')) {
8eceec63 5282 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
8eceec63 5283 /* might be an "our" variable" */
dd2155a4 5284 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
8eceec63 5285 /* build ops for a bareword */
dd2155a4 5286 SV *sym = newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)), 0);
8eceec63
SC
5287 sv_catpvn(sym, "::", 2);
5288 sv_catpv(sym, PL_tokenbuf+1);
5289 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5290 yylval.opval->op_private = OPpCONST_ENTERED;
5291 gv_fetchpv(SvPVX(sym),
5292 (PL_in_eval
5293 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 5294 : GV_ADDMULTI
8eceec63
SC
5295 ),
5296 ((PL_tokenbuf[0] == '$') ? SVt_PV
5297 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5298 : SVt_PVHV));
5299 return WORD;
5300 }
5301
5302 /* if it's a sort block and they're naming $a or $b */
5303 if (PL_last_lop_op == OP_SORT &&
5304 PL_tokenbuf[0] == '$' &&
5305 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5306 && !PL_tokenbuf[2])
5307 {
5308 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5309 d < PL_bufend && *d != '\n';
5310 d++)
5311 {
5312 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5313 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5314 PL_tokenbuf);
5315 }
5316 }
5317 }
5318
5319 yylval.opval = newOP(OP_PADANY, 0);
5320 yylval.opval->op_targ = tmp;
5321 return PRIVATEREF;
5322 }
5323 }
5324
5325 /*
5326 Whine if they've said @foo in a doublequoted string,
5327 and @foo isn't a variable we can find in the symbol
5328 table.
5329 */
5330 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5331 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
5332 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5333 && ckWARN(WARN_AMBIGUOUS))
5334 {
5335 /* Downgraded from fatal to warning 20000522 mjd */
9014280d 5336 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8eceec63
SC
5337 "Possible unintended interpolation of %s in string",
5338 PL_tokenbuf);
5339 }
5340 }
5341
5342 /* build ops for a bareword */
5343 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5344 yylval.opval->op_private = OPpCONST_ENTERED;
5345 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5346 ((PL_tokenbuf[0] == '$') ? SVt_PV
5347 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5348 : SVt_PVHV));
5349 return WORD;
5350}
5351
79072805 5352I32
864dbfa3 5353Perl_keyword(pTHX_ register char *d, I32 len)
79072805
LW
5354{
5355 switch (*d) {
5356 case '_':
5357 if (d[1] == '_') {
a0d0e21e 5358 if (strEQ(d,"__FILE__")) return -KEY___FILE__;
68dc0745 5359 if (strEQ(d,"__LINE__")) return -KEY___LINE__;
5360 if (strEQ(d,"__PACKAGE__")) return -KEY___PACKAGE__;
e50aee73 5361 if (strEQ(d,"__DATA__")) return KEY___DATA__;
79072805
LW
5362 if (strEQ(d,"__END__")) return KEY___END__;
5363 }
5364 break;
8990e307
LW
5365 case 'A':
5366 if (strEQ(d,"AUTOLOAD")) return KEY_AUTOLOAD;
5367 break;
79072805 5368 case 'a':
463ee0b2
LW
5369 switch (len) {
5370 case 3:
a0d0e21e
LW
5371 if (strEQ(d,"and")) return -KEY_and;
5372 if (strEQ(d,"abs")) return -KEY_abs;
85e6fe83 5373 break;
463ee0b2 5374 case 5:
a0d0e21e
LW
5375 if (strEQ(d,"alarm")) return -KEY_alarm;
5376 if (strEQ(d,"atan2")) return -KEY_atan2;
463ee0b2
LW
5377 break;
5378 case 6:
a0d0e21e 5379 if (strEQ(d,"accept")) return -KEY_accept;
463ee0b2
LW
5380 break;
5381 }
79072805
LW
5382 break;
5383 case 'B':
5384 if (strEQ(d,"BEGIN")) return KEY_BEGIN;
93a17b20 5385 break;
79072805 5386 case 'b':
a0d0e21e
LW
5387 if (strEQ(d,"bless")) return -KEY_bless;
5388 if (strEQ(d,"bind")) return -KEY_bind;
5389 if (strEQ(d,"binmode")) return -KEY_binmode;
5390 break;
5391 case 'C':
5392 if (strEQ(d,"CORE")) return -KEY_CORE;
7d30b5c4 5393 if (strEQ(d,"CHECK")) return KEY_CHECK;
79072805
LW
5394 break;
5395 case 'c':
5396 switch (len) {
5397 case 3:
a0d0e21e
LW
5398 if (strEQ(d,"cmp")) return -KEY_cmp;
5399 if (strEQ(d,"chr")) return -KEY_chr;
5400 if (strEQ(d,"cos")) return -KEY_cos;
79072805
LW
5401 break;
5402 case 4:
77bc9082 5403 if (strEQ(d,"chop")) return -KEY_chop;
79072805
LW
5404 break;
5405 case 5:
a0d0e21e
LW
5406 if (strEQ(d,"close")) return -KEY_close;
5407 if (strEQ(d,"chdir")) return -KEY_chdir;
77bc9082 5408 if (strEQ(d,"chomp")) return -KEY_chomp;
a0d0e21e
LW
5409 if (strEQ(d,"chmod")) return -KEY_chmod;
5410 if (strEQ(d,"chown")) return -KEY_chown;
5411 if (strEQ(d,"crypt")) return -KEY_crypt;
79072805
LW
5412 break;
5413 case 6:
a0d0e21e
LW
5414 if (strEQ(d,"chroot")) return -KEY_chroot;
5415 if (strEQ(d,"caller")) return -KEY_caller;
79072805
LW
5416 break;
5417 case 7:
a0d0e21e 5418 if (strEQ(d,"connect")) return -KEY_connect;
79072805
LW
5419 break;
5420 case 8:
a0d0e21e
LW
5421 if (strEQ(d,"closedir")) return -KEY_closedir;
5422 if (strEQ(d,"continue")) return -KEY_continue;
79072805
LW
5423 break;
5424 }
5425 break;
ed6116ce
LW
5426 case 'D':
5427 if (strEQ(d,"DESTROY")) return KEY_DESTROY;
5428 break;
79072805
LW
5429 case 'd':
5430 switch (len) {
5431 case 2:
5432 if (strEQ(d,"do")) return KEY_do;
5433 break;
5434 case 3:
a0d0e21e 5435 if (strEQ(d,"die")) return -KEY_die;
79072805
LW
5436 break;
5437 case 4:
a0d0e21e 5438 if (strEQ(d,"dump")) return -KEY_dump;
79072805
LW
5439 break;
5440 case 6:
5441 if (strEQ(d,"delete")) return KEY_delete;
5442 break;
5443 case 7:
5444 if (strEQ(d,"defined")) return KEY_defined;
a0d0e21e 5445 if (strEQ(d,"dbmopen")) return -KEY_dbmopen;
79072805
LW
5446 break;
5447 case 8:
a0d0e21e 5448 if (strEQ(d,"dbmclose")) return -KEY_dbmclose;
79072805
LW
5449 break;
5450 }
5451 break;
5452 case 'E':
79072805
LW
5453 if (strEQ(d,"END")) return KEY_END;
5454 break;
5455 case 'e':
5456 switch (len) {
5457 case 2:
a0d0e21e 5458 if (strEQ(d,"eq")) return -KEY_eq;
79072805
LW
5459 break;
5460 case 3:
a0d0e21e 5461 if (strEQ(d,"eof")) return -KEY_eof;
c963b151 5462 if (strEQ(d,"err")) return -KEY_err;
a0d0e21e 5463 if (strEQ(d,"exp")) return -KEY_exp;
79072805
LW
5464 break;
5465 case 4:
5466 if (strEQ(d,"else")) return KEY_else;
a0d0e21e 5467 if (strEQ(d,"exit")) return -KEY_exit;
79072805 5468 if (strEQ(d,"eval")) return KEY_eval;
a0d0e21e 5469 if (strEQ(d,"exec")) return -KEY_exec;
3a6a8333 5470 if (strEQ(d,"each")) return -KEY_each;
79072805
LW
5471 break;
5472 case 5:
5473 if (strEQ(d,"elsif")) return KEY_elsif;
5474 break;
a0d0e21e
LW
5475 case 6:
5476 if (strEQ(d,"exists")) return KEY_exists;
cea2e8a9 5477 if (strEQ(d,"elseif")) Perl_warn(aTHX_ "elseif should be elsif");
a0d0e21e 5478 break;
79072805 5479 case 8:
a0d0e21e
LW
5480 if (strEQ(d,"endgrent")) return -KEY_endgrent;
5481 if (strEQ(d,"endpwent")) return -KEY_endpwent;
79072805
LW
5482 break;
5483 case 9:
a0d0e21e 5484 if (strEQ(d,"endnetent")) return -KEY_endnetent;
79072805
LW
5485 break;
5486 case 10:
a0d0e21e
LW
5487 if (strEQ(d,"endhostent")) return -KEY_endhostent;
5488 if (strEQ(d,"endservent")) return -KEY_endservent;
79072805
LW
5489 break;
5490 case 11:
a0d0e21e 5491 if (strEQ(d,"endprotoent")) return -KEY_endprotoent;
79072805 5492 break;
a687059c 5493 }
a687059c 5494 break;
79072805
LW
5495 case 'f':
5496 switch (len) {
5497 case 3:
5498 if (strEQ(d,"for")) return KEY_for;
5499 break;
5500 case 4:
a0d0e21e 5501 if (strEQ(d,"fork")) return -KEY_fork;
79072805
LW
5502 break;
5503 case 5:
a0d0e21e
LW
5504 if (strEQ(d,"fcntl")) return -KEY_fcntl;
5505 if (strEQ(d,"flock")) return -KEY_flock;
79072805
LW
5506 break;
5507 case 6:
5508 if (strEQ(d,"format")) return KEY_format;
a0d0e21e 5509 if (strEQ(d,"fileno")) return -KEY_fileno;
79072805
LW
5510 break;
5511 case 7:
5512 if (strEQ(d,"foreach")) return KEY_foreach;
5513 break;
5514 case 8:
a0d0e21e 5515 if (strEQ(d,"formline")) return -KEY_formline;
79072805 5516 break;
378cc40b 5517 }
a687059c 5518 break;
79072805 5519 case 'g':
a687059c
LW
5520 if (strnEQ(d,"get",3)) {
5521 d += 3;
5522 if (*d == 'p') {
79072805
LW
5523 switch (len) {
5524 case 7:
a0d0e21e
LW
5525 if (strEQ(d,"ppid")) return -KEY_getppid;
5526 if (strEQ(d,"pgrp")) return -KEY_getpgrp;
79072805
LW
5527 break;
5528 case 8:
a0d0e21e
LW
5529 if (strEQ(d,"pwent")) return -KEY_getpwent;
5530 if (strEQ(d,"pwnam")) return -KEY_getpwnam;
5531 if (strEQ(d,"pwuid")) return -KEY_getpwuid;
79072805
LW
5532 break;
5533 case 11:
a0d0e21e
LW
5534 if (strEQ(d,"peername")) return -KEY_getpeername;
5535 if (strEQ(d,"protoent")) return -KEY_getprotoent;
5536 if (strEQ(d,"priority")) return -KEY_getpriority;
79072805
LW
5537 break;
5538 case 14:
a0d0e21e 5539 if (strEQ(d,"protobyname")) return -KEY_getprotobyname;
79072805
LW
5540 break;
5541 case 16:
a0d0e21e 5542 if (strEQ(d,"protobynumber"))return -KEY_getprotobynumber;
79072805
LW
5543 break;
5544 }
a687059c
LW
5545 }
5546 else if (*d == 'h') {
a0d0e21e
LW
5547 if (strEQ(d,"hostbyname")) return -KEY_gethostbyname;
5548 if (strEQ(d,"hostbyaddr")) return -KEY_gethostbyaddr;
5549 if (strEQ(d,"hostent")) return -KEY_gethostent;
a687059c
LW
5550 }
5551 else if (*d == 'n') {
a0d0e21e
LW
5552 if (strEQ(d,"netbyname")) return -KEY_getnetbyname;
5553 if (strEQ(d,"netbyaddr")) return -KEY_getnetbyaddr;
5554 if (strEQ(d,"netent")) return -KEY_getnetent;
a687059c
LW
5555 }
5556 else if (*d == 's') {
a0d0e21e
LW
5557 if (strEQ(d,"servbyname")) return -KEY_getservbyname;
5558 if (strEQ(d,"servbyport")) return -KEY_getservbyport;
5559 if (strEQ(d,"servent")) return -KEY_getservent;
5560 if (strEQ(d,"sockname")) return -KEY_getsockname;
5561 if (strEQ(d,"sockopt")) return -KEY_getsockopt;
a687059c
LW
5562 }
5563 else if (*d == 'g') {
a0d0e21e
LW
5564 if (strEQ(d,"grent")) return -KEY_getgrent;
5565 if (strEQ(d,"grnam")) return -KEY_getgrnam;
5566 if (strEQ(d,"grgid")) return -KEY_getgrgid;
a687059c
LW
5567 }
5568 else if (*d == 'l') {
a0d0e21e 5569 if (strEQ(d,"login")) return -KEY_getlogin;
a687059c 5570 }
a0d0e21e 5571 else if (strEQ(d,"c")) return -KEY_getc;
79072805 5572 break;
a687059c 5573 }
79072805
LW
5574 switch (len) {
5575 case 2:
a0d0e21e
LW
5576 if (strEQ(d,"gt")) return -KEY_gt;
5577 if (strEQ(d,"ge")) return -KEY_ge;
79072805
LW
5578 break;
5579 case 4:
5580 if (strEQ(d,"grep")) return KEY_grep;
5581 if (strEQ(d,"goto")) return KEY_goto;
fb73857a 5582 if (strEQ(d,"glob")) return KEY_glob;
79072805
LW
5583 break;
5584 case 6:
a0d0e21e 5585 if (strEQ(d,"gmtime")) return -KEY_gmtime;
79072805 5586 break;
378cc40b 5587 }
a687059c 5588 break;
79072805 5589 case 'h':
a0d0e21e 5590 if (strEQ(d,"hex")) return -KEY_hex;
a687059c 5591 break;
7d07dbc2
MB
5592 case 'I':
5593 if (strEQ(d,"INIT")) return KEY_INIT;
5594 break;
79072805
LW
5595 case 'i':
5596 switch (len) {
5597 case 2:
5598 if (strEQ(d,"if")) return KEY_if;
5599 break;
5600 case 3:
a0d0e21e 5601 if (strEQ(d,"int")) return -KEY_int;
79072805
LW
5602 break;
5603 case 5:
a0d0e21e
LW
5604 if (strEQ(d,"index")) return -KEY_index;
5605 if (strEQ(d,"ioctl")) return -KEY_ioctl;
79072805
LW
5606 break;
5607 }
a687059c 5608 break;
79072805 5609 case 'j':
a0d0e21e 5610 if (strEQ(d,"join")) return -KEY_join;
a687059c 5611 break;
79072805
LW
5612 case 'k':
5613 if (len == 4) {
3a6a8333 5614 if (strEQ(d,"keys")) return -KEY_keys;
a0d0e21e 5615 if (strEQ(d,"kill")) return -KEY_kill;
663a0e37 5616 }
79072805 5617 break;
79072805
LW
5618 case 'l':
5619 switch (len) {
5620 case 2:
a0d0e21e
LW
5621 if (strEQ(d,"lt")) return -KEY_lt;
5622 if (strEQ(d,"le")) return -KEY_le;
5623 if (strEQ(d,"lc")) return -KEY_lc;
79072805
LW
5624 break;
5625 case 3:
a0d0e21e 5626 if (strEQ(d,"log")) return -KEY_log;
79072805
LW
5627 break;
5628 case 4:
5629 if (strEQ(d,"last")) return KEY_last;
a0d0e21e 5630 if (strEQ(d,"link")) return -KEY_link;
c0329465 5631 if (strEQ(d,"lock")) return -KEY_lock;
395c3793 5632 break;
79072805
LW
5633 case 5:
5634 if (strEQ(d,"local")) return KEY_local;
a0d0e21e 5635 if (strEQ(d,"lstat")) return -KEY_lstat;
79072805
LW
5636 break;
5637 case 6:
a0d0e21e
LW
5638 if (strEQ(d,"length")) return -KEY_length;
5639 if (strEQ(d,"listen")) return -KEY_listen;
79072805
LW
5640 break;
5641 case 7:
a0d0e21e 5642 if (strEQ(d,"lcfirst")) return -KEY_lcfirst;
79072805
LW
5643 break;
5644 case 9:
a0d0e21e 5645 if (strEQ(d,"localtime")) return -KEY_localtime;
395c3793
LW
5646 break;
5647 }
a687059c 5648 break;
79072805
LW
5649 case 'm':
5650 switch (len) {
5651 case 1: return KEY_m;
93a17b20
LW
5652 case 2:
5653 if (strEQ(d,"my")) return KEY_my;
5654 break;
a0d0e21e
LW
5655 case 3:
5656 if (strEQ(d,"map")) return KEY_map;
5657 break;
79072805 5658 case 5:
a0d0e21e 5659 if (strEQ(d,"mkdir")) return -KEY_mkdir;
79072805
LW
5660 break;
5661 case 6:
a0d0e21e
LW
5662 if (strEQ(d,"msgctl")) return -KEY_msgctl;
5663 if (strEQ(d,"msgget")) return -KEY_msgget;
5664 if (strEQ(d,"msgrcv")) return -KEY_msgrcv;
5665 if (strEQ(d,"msgsnd")) return -KEY_msgsnd;
79072805
LW
5666 break;
5667 }
a687059c 5668 break;
79072805
LW
5669 case 'n':
5670 if (strEQ(d,"next")) return KEY_next;
a0d0e21e
LW
5671 if (strEQ(d,"ne")) return -KEY_ne;
5672 if (strEQ(d,"not")) return -KEY_not;
5673 if (strEQ(d,"no")) return KEY_no;
a687059c 5674 break;
79072805
LW
5675 case 'o':
5676 switch (len) {
463ee0b2 5677 case 2:
a0d0e21e 5678 if (strEQ(d,"or")) return -KEY_or;
463ee0b2 5679 break;
79072805 5680 case 3:
a0d0e21e
LW
5681 if (strEQ(d,"ord")) return -KEY_ord;
5682 if (strEQ(d,"oct")) return -KEY_oct;
77ca0c92 5683 if (strEQ(d,"our")) return KEY_our;
79072805
LW
5684 break;
5685 case 4:
a0d0e21e 5686 if (strEQ(d,"open")) return -KEY_open;
79072805
LW
5687 break;
5688 case 7:
a0d0e21e 5689 if (strEQ(d,"opendir")) return -KEY_opendir;
79072805 5690 break;
fe14fcc3 5691 }
a687059c 5692 break;
79072805
LW
5693 case 'p':
5694 switch (len) {
5695 case 3:
4e553d73 5696 if (strEQ(d,"pop")) return -KEY_pop;
a0d0e21e 5697 if (strEQ(d,"pos")) return KEY_pos;
79072805
LW
5698 break;
5699 case 4:
3a6a8333 5700 if (strEQ(d,"push")) return -KEY_push;
a0d0e21e
LW
5701 if (strEQ(d,"pack")) return -KEY_pack;
5702 if (strEQ(d,"pipe")) return -KEY_pipe;
79072805
LW
5703 break;
5704 case 5:
5705 if (strEQ(d,"print")) return KEY_print;
5706 break;
5707 case 6:
5708 if (strEQ(d,"printf")) return KEY_printf;
5709 break;
5710 case 7:
5711 if (strEQ(d,"package")) return KEY_package;
5712 break;
c07a80fd 5713 case 9:
5714 if (strEQ(d,"prototype")) return KEY_prototype;
663a0e37 5715 }
79072805
LW
5716 break;
5717 case 'q':
5718 if (len <= 2) {
5719 if (strEQ(d,"q")) return KEY_q;
8782bef2 5720 if (strEQ(d,"qr")) return KEY_qr;
79072805 5721 if (strEQ(d,"qq")) return KEY_qq;
8990e307 5722 if (strEQ(d,"qw")) return KEY_qw;
79072805 5723 if (strEQ(d,"qx")) return KEY_qx;
663a0e37 5724 }
a0d0e21e 5725 else if (strEQ(d,"quotemeta")) return -KEY_quotemeta;
79072805
LW
5726 break;
5727 case 'r':
5728 switch (len) {
5729 case 3:
a0d0e21e 5730 if (strEQ(d,"ref")) return -KEY_ref;
79072805
LW
5731 break;
5732 case 4:
a0d0e21e
LW
5733 if (strEQ(d,"read")) return -KEY_read;
5734 if (strEQ(d,"rand")) return -KEY_rand;
5735 if (strEQ(d,"recv")) return -KEY_recv;
79072805
LW
5736 if (strEQ(d,"redo")) return KEY_redo;
5737 break;
5738 case 5:
a0d0e21e
LW
5739 if (strEQ(d,"rmdir")) return -KEY_rmdir;
5740 if (strEQ(d,"reset")) return -KEY_reset;
79072805
LW
5741 break;
5742 case 6:
5743 if (strEQ(d,"return")) return KEY_return;
a0d0e21e
LW
5744 if (strEQ(d,"rename")) return -KEY_rename;
5745 if (strEQ(d,"rindex")) return -KEY_rindex;
79072805
LW
5746 break;
5747 case 7:
ec4ab249 5748 if (strEQ(d,"require")) return KEY_require;
a0d0e21e
LW
5749 if (strEQ(d,"reverse")) return -KEY_reverse;
5750 if (strEQ(d,"readdir")) return -KEY_readdir;
79072805
LW
5751 break;
5752 case 8:
a0d0e21e
LW
5753 if (strEQ(d,"readlink")) return -KEY_readlink;
5754 if (strEQ(d,"readline")) return -KEY_readline;
5755 if (strEQ(d,"readpipe")) return -KEY_readpipe;
79072805
LW
5756 break;
5757 case 9:
a0d0e21e 5758 if (strEQ(d,"rewinddir")) return -KEY_rewinddir;
79072805 5759 break;
a687059c 5760 }
79072805
LW
5761 break;
5762 case 's':
a687059c 5763 switch (d[1]) {
79072805 5764 case 0: return KEY_s;
a687059c 5765 case 'c':
79072805 5766 if (strEQ(d,"scalar")) return KEY_scalar;
a687059c
LW
5767 break;
5768 case 'e':
79072805
LW
5769 switch (len) {
5770 case 4:
a0d0e21e
LW
5771 if (strEQ(d,"seek")) return -KEY_seek;
5772 if (strEQ(d,"send")) return -KEY_send;
79072805
LW
5773 break;
5774 case 5:
a0d0e21e 5775 if (strEQ(d,"semop")) return -KEY_semop;
79072805
LW
5776 break;
5777 case 6:
a0d0e21e
LW
5778 if (strEQ(d,"select")) return -KEY_select;
5779 if (strEQ(d,"semctl")) return -KEY_semctl;
5780 if (strEQ(d,"semget")) return -KEY_semget;
79072805
LW
5781 break;
5782 case 7:
a0d0e21e
LW
5783 if (strEQ(d,"setpgrp")) return -KEY_setpgrp;
5784 if (strEQ(d,"seekdir")) return -KEY_seekdir;
79072805
LW
5785 break;
5786 case 8:
a0d0e21e
LW
5787 if (strEQ(d,"setpwent")) return -KEY_setpwent;
5788 if (strEQ(d,"setgrent")) return -KEY_setgrent;
79072805
LW
5789 break;
5790 case 9:
a0d0e21e 5791 if (strEQ(d,"setnetent")) return -KEY_setnetent;
79072805
LW
5792 break;
5793 case 10:
a0d0e21e
LW
5794 if (strEQ(d,"setsockopt")) return -KEY_setsockopt;
5795 if (strEQ(d,"sethostent")) return -KEY_sethostent;
5796 if (strEQ(d,"setservent")) return -KEY_setservent;
79072805
LW
5797 break;
5798 case 11:
a0d0e21e
LW
5799 if (strEQ(d,"setpriority")) return -KEY_setpriority;
5800 if (strEQ(d,"setprotoent")) return -KEY_setprotoent;
79072805
LW
5801 break;
5802 }
a687059c
LW
5803 break;
5804 case 'h':
79072805
LW
5805 switch (len) {
5806 case 5:
3a6a8333 5807 if (strEQ(d,"shift")) return -KEY_shift;
79072805
LW
5808 break;
5809 case 6:
a0d0e21e
LW
5810 if (strEQ(d,"shmctl")) return -KEY_shmctl;
5811 if (strEQ(d,"shmget")) return -KEY_shmget;
79072805
LW
5812 break;
5813 case 7:
a0d0e21e 5814 if (strEQ(d,"shmread")) return -KEY_shmread;
79072805
LW
5815 break;
5816 case 8:
a0d0e21e
LW
5817 if (strEQ(d,"shmwrite")) return -KEY_shmwrite;
5818 if (strEQ(d,"shutdown")) return -KEY_shutdown;
79072805
LW
5819 break;
5820 }
a687059c
LW
5821 break;
5822 case 'i':
a0d0e21e 5823 if (strEQ(d,"sin")) return -KEY_sin;
a687059c
LW
5824 break;
5825 case 'l':
a0d0e21e 5826 if (strEQ(d,"sleep")) return -KEY_sleep;
a687059c
LW
5827 break;
5828 case 'o':
79072805 5829 if (strEQ(d,"sort")) return KEY_sort;
a0d0e21e
LW
5830 if (strEQ(d,"socket")) return -KEY_socket;
5831 if (strEQ(d,"socketpair")) return -KEY_socketpair;
a687059c
LW
5832 break;
5833 case 'p':
79072805 5834 if (strEQ(d,"split")) return KEY_split;
a0d0e21e 5835 if (strEQ(d,"sprintf")) return -KEY_sprintf;
3a6a8333 5836 if (strEQ(d,"splice")) return -KEY_splice;
a687059c
LW
5837 break;
5838 case 'q':
a0d0e21e 5839 if (strEQ(d,"sqrt")) return -KEY_sqrt;
a687059c
LW
5840 break;
5841 case 'r':
a0d0e21e 5842 if (strEQ(d,"srand")) return -KEY_srand;
a687059c
LW
5843 break;
5844 case 't':
a0d0e21e 5845 if (strEQ(d,"stat")) return -KEY_stat;
79072805 5846 if (strEQ(d,"study")) return KEY_study;
a687059c
LW
5847 break;
5848 case 'u':
a0d0e21e 5849 if (strEQ(d,"substr")) return -KEY_substr;
79072805 5850 if (strEQ(d,"sub")) return KEY_sub;
a687059c
LW
5851 break;
5852 case 'y':
79072805
LW
5853 switch (len) {
5854 case 6:
a0d0e21e 5855 if (strEQ(d,"system")) return -KEY_system;
79072805
LW
5856 break;
5857 case 7:
a0d0e21e
LW
5858 if (strEQ(d,"symlink")) return -KEY_symlink;
5859 if (strEQ(d,"syscall")) return -KEY_syscall;
137443ea 5860 if (strEQ(d,"sysopen")) return -KEY_sysopen;
5861 if (strEQ(d,"sysread")) return -KEY_sysread;
5862 if (strEQ(d,"sysseek")) return -KEY_sysseek;
79072805
LW
5863 break;
5864 case 8:
a0d0e21e 5865 if (strEQ(d,"syswrite")) return -KEY_syswrite;
79072805 5866 break;
a687059c 5867 }
a687059c
LW
5868 break;
5869 }
5870 break;
79072805
LW
5871 case 't':
5872 switch (len) {
5873 case 2:
5874 if (strEQ(d,"tr")) return KEY_tr;
5875 break;
463ee0b2
LW
5876 case 3:
5877 if (strEQ(d,"tie")) return KEY_tie;
5878 break;
79072805 5879 case 4:
a0d0e21e 5880 if (strEQ(d,"tell")) return -KEY_tell;
c07a80fd 5881 if (strEQ(d,"tied")) return KEY_tied;
a0d0e21e 5882 if (strEQ(d,"time")) return -KEY_time;
79072805
LW
5883 break;
5884 case 5:
a0d0e21e 5885 if (strEQ(d,"times")) return -KEY_times;
79072805
LW
5886 break;
5887 case 7:
a0d0e21e 5888 if (strEQ(d,"telldir")) return -KEY_telldir;
79072805
LW
5889 break;
5890 case 8:
a0d0e21e 5891 if (strEQ(d,"truncate")) return -KEY_truncate;
79072805 5892 break;
378cc40b 5893 }
a687059c 5894 break;
79072805
LW
5895 case 'u':
5896 switch (len) {
5897 case 2:
a0d0e21e
LW
5898 if (strEQ(d,"uc")) return -KEY_uc;
5899 break;
5900 case 3:
5901 if (strEQ(d,"use")) return KEY_use;
79072805
LW
5902 break;
5903 case 5:
5904 if (strEQ(d,"undef")) return KEY_undef;
5905 if (strEQ(d,"until")) return KEY_until;
463ee0b2 5906 if (strEQ(d,"untie")) return KEY_untie;
a0d0e21e
LW
5907 if (strEQ(d,"utime")) return -KEY_utime;
5908 if (strEQ(d,"umask")) return -KEY_umask;
79072805
LW
5909 break;
5910 case 6:
5911 if (strEQ(d,"unless")) return KEY_unless;
a0d0e21e
LW
5912 if (strEQ(d,"unpack")) return -KEY_unpack;
5913 if (strEQ(d,"unlink")) return -KEY_unlink;
79072805
LW
5914 break;
5915 case 7:
3a6a8333 5916 if (strEQ(d,"unshift")) return -KEY_unshift;
a0d0e21e 5917 if (strEQ(d,"ucfirst")) return -KEY_ucfirst;
79072805 5918 break;
a687059c
LW
5919 }
5920 break;
79072805 5921 case 'v':
a0d0e21e
LW
5922 if (strEQ(d,"values")) return -KEY_values;
5923 if (strEQ(d,"vec")) return -KEY_vec;
a687059c 5924 break;
79072805
LW
5925 case 'w':
5926 switch (len) {
5927 case 4:
a0d0e21e
LW
5928 if (strEQ(d,"warn")) return -KEY_warn;
5929 if (strEQ(d,"wait")) return -KEY_wait;
79072805
LW
5930 break;
5931 case 5:
5932 if (strEQ(d,"while")) return KEY_while;
a0d0e21e 5933 if (strEQ(d,"write")) return -KEY_write;
79072805
LW
5934 break;
5935 case 7:
a0d0e21e 5936 if (strEQ(d,"waitpid")) return -KEY_waitpid;
79072805
LW
5937 break;
5938 case 9:
a0d0e21e 5939 if (strEQ(d,"wantarray")) return -KEY_wantarray;
79072805 5940 break;
2f3197b3 5941 }
a687059c 5942 break;
79072805 5943 case 'x':
a0d0e21e
LW
5944 if (len == 1) return -KEY_x;
5945 if (strEQ(d,"xor")) return -KEY_xor;
a687059c 5946 break;
79072805
LW
5947 case 'y':
5948 if (len == 1) return KEY_y;
5949 break;
5950 case 'z':
a687059c
LW
5951 break;
5952 }
79072805 5953 return 0;
a687059c
LW
5954}
5955
76e3520e 5956STATIC void
cea2e8a9 5957S_checkcomma(pTHX_ register char *s, char *name, char *what)
a687059c 5958{
2f3197b3
LW
5959 char *w;
5960
d008e5eb 5961 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
5962 if (ckWARN(WARN_SYNTAX)) {
5963 int level = 1;
5964 for (w = s+2; *w && level; w++) {
5965 if (*w == '(')
5966 ++level;
5967 else if (*w == ')')
5968 --level;
5969 }
5970 if (*w)
5971 for (; *w && isSPACE(*w); w++) ;
5972 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9014280d 5973 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 5974 "%s (...) interpreted as function",name);
d008e5eb 5975 }
2f3197b3 5976 }
3280af22 5977 while (s < PL_bufend && isSPACE(*s))
2f3197b3 5978 s++;
a687059c
LW
5979 if (*s == '(')
5980 s++;
3280af22 5981 while (s < PL_bufend && isSPACE(*s))
a687059c 5982 s++;
7e2040f0 5983 if (isIDFIRST_lazy_if(s,UTF)) {
2f3197b3 5984 w = s++;
7e2040f0 5985 while (isALNUM_lazy_if(s,UTF))
a687059c 5986 s++;
3280af22 5987 while (s < PL_bufend && isSPACE(*s))
a687059c 5988 s++;
e929a76b 5989 if (*s == ',') {
463ee0b2 5990 int kw;
e929a76b 5991 *s = '\0';
864dbfa3 5992 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
e929a76b 5993 *s = ',';
463ee0b2 5994 if (kw)
e929a76b 5995 return;
cea2e8a9 5996 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
5997 }
5998 }
5999}
6000
423cee85
JH
6001/* Either returns sv, or mortalizes sv and returns a new SV*.
6002 Best used as sv=new_constant(..., sv, ...).
6003 If s, pv are NULL, calls subroutine with one argument,
6004 and type is used with error messages only. */
6005
b3ac6de7 6006STATIC SV *
dff6d3cd 6007S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9b0e499b 6008 const char *type)
b3ac6de7 6009{
b3ac6de7 6010 dSP;
3280af22 6011 HV *table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 6012 SV *res;
b3ac6de7
IZ
6013 SV **cvp;
6014 SV *cv, *typesv;
f0af216f 6015 const char *why1, *why2, *why3;
4e553d73 6016
f0af216f 6017 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
6018 SV *msg;
6019
f0af216f 6020 why2 = strEQ(key,"charnames")
41ab332f 6021 ? "(possibly a missing \"use charnames ...\")"
f0af216f 6022 : "";
4e553d73 6023 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
6024 (type ? type: "undef"), why2);
6025
6026 /* This is convoluted and evil ("goto considered harmful")
6027 * but I do not understand the intricacies of all the different
6028 * failure modes of %^H in here. The goal here is to make
6029 * the most probable error message user-friendly. --jhi */
6030
6031 goto msgdone;
6032
423cee85 6033 report:
4e553d73 6034 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 6035 (type ? type: "undef"), why1, why2, why3);
41ab332f 6036 msgdone:
423cee85
JH
6037 yyerror(SvPVX(msg));
6038 SvREFCNT_dec(msg);
6039 return sv;
6040 }
b3ac6de7
IZ
6041 cvp = hv_fetch(table, key, strlen(key), FALSE);
6042 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
6043 why1 = "$^H{";
6044 why2 = key;
f0af216f 6045 why3 = "} is not defined";
423cee85 6046 goto report;
b3ac6de7
IZ
6047 }
6048 sv_2mortal(sv); /* Parent created it permanently */
6049 cv = *cvp;
423cee85
JH
6050 if (!pv && s)
6051 pv = sv_2mortal(newSVpvn(s, len));
6052 if (type && pv)
6053 typesv = sv_2mortal(newSVpv(type, 0));
b3ac6de7 6054 else
423cee85 6055 typesv = &PL_sv_undef;
4e553d73 6056
e788e7d3 6057 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
6058 ENTER ;
6059 SAVETMPS;
4e553d73 6060
423cee85 6061 PUSHMARK(SP) ;
a5845cb7 6062 EXTEND(sp, 3);
423cee85
JH
6063 if (pv)
6064 PUSHs(pv);
b3ac6de7 6065 PUSHs(sv);
423cee85
JH
6066 if (pv)
6067 PUSHs(typesv);
b3ac6de7 6068 PUTBACK;
423cee85 6069 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 6070
423cee85 6071 SPAGAIN ;
4e553d73 6072
423cee85 6073 /* Check the eval first */
9b0e499b 6074 if (!PL_in_eval && SvTRUE(ERRSV)) {
423cee85
JH
6075 STRLEN n_a;
6076 sv_catpv(ERRSV, "Propagated");
6077 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
e1f15930 6078 (void)POPs;
423cee85
JH
6079 res = SvREFCNT_inc(sv);
6080 }
6081 else {
6082 res = POPs;
e1f15930 6083 (void)SvREFCNT_inc(res);
423cee85 6084 }
4e553d73 6085
423cee85
JH
6086 PUTBACK ;
6087 FREETMPS ;
6088 LEAVE ;
b3ac6de7 6089 POPSTACK;
4e553d73 6090
b3ac6de7 6091 if (!SvOK(res)) {
423cee85
JH
6092 why1 = "Call to &{$^H{";
6093 why2 = key;
f0af216f 6094 why3 = "}} did not return a defined value";
423cee85
JH
6095 sv = res;
6096 goto report;
9b0e499b 6097 }
423cee85 6098
9b0e499b 6099 return res;
b3ac6de7 6100}
4e553d73 6101
76e3520e 6102STATIC char *
cea2e8a9 6103S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2
LW
6104{
6105 register char *d = dest;
8903cb82 6106 register char *e = d + destlen - 3; /* two-character token, ending NUL */
463ee0b2 6107 for (;;) {
8903cb82 6108 if (d >= e)
cea2e8a9 6109 Perl_croak(aTHX_ ident_too_long);
834a4ddd 6110 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 6111 *d++ = *s++;
7e2040f0 6112 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
6113 *d++ = ':';
6114 *d++ = ':';
6115 s++;
6116 }
c3e0f903 6117 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
463ee0b2
LW
6118 *d++ = *s++;
6119 *d++ = *s++;
6120 }
fd400ab9 6121 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6122 char *t = s + UTF8SKIP(s);
fd400ab9 6123 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6124 t += UTF8SKIP(t);
6125 if (d + (t - s) > e)
cea2e8a9 6126 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6127 Copy(s, d, t - s, char);
6128 d += t - s;
6129 s = t;
6130 }
463ee0b2
LW
6131 else {
6132 *d = '\0';
6133 *slp = d - dest;
6134 return s;
e929a76b 6135 }
378cc40b
LW
6136 }
6137}
6138
76e3520e 6139STATIC char *
cea2e8a9 6140S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b
LW
6141{
6142 register char *d;
8903cb82 6143 register char *e;
79072805 6144 char *bracket = 0;
748a9306 6145 char funny = *s++;
378cc40b 6146
a0d0e21e
LW
6147 if (isSPACE(*s))
6148 s = skipspace(s);
378cc40b 6149 d = dest;
8903cb82 6150 e = d + destlen - 3; /* two-character token, ending NUL */
de3bb511 6151 if (isDIGIT(*s)) {
8903cb82 6152 while (isDIGIT(*s)) {
6153 if (d >= e)
cea2e8a9 6154 Perl_croak(aTHX_ ident_too_long);
378cc40b 6155 *d++ = *s++;
8903cb82 6156 }
378cc40b
LW
6157 }
6158 else {
463ee0b2 6159 for (;;) {
8903cb82 6160 if (d >= e)
cea2e8a9 6161 Perl_croak(aTHX_ ident_too_long);
834a4ddd 6162 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 6163 *d++ = *s++;
7e2040f0 6164 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
6165 *d++ = ':';
6166 *d++ = ':';
6167 s++;
6168 }
a0d0e21e 6169 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
6170 *d++ = *s++;
6171 *d++ = *s++;
6172 }
fd400ab9 6173 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6174 char *t = s + UTF8SKIP(s);
fd400ab9 6175 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6176 t += UTF8SKIP(t);
6177 if (d + (t - s) > e)
cea2e8a9 6178 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6179 Copy(s, d, t - s, char);
6180 d += t - s;
6181 s = t;
6182 }
463ee0b2
LW
6183 else
6184 break;
6185 }
378cc40b
LW
6186 }
6187 *d = '\0';
6188 d = dest;
79072805 6189 if (*d) {
3280af22
NIS
6190 if (PL_lex_state != LEX_NORMAL)
6191 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 6192 return s;
378cc40b 6193 }
748a9306 6194 if (*s == '$' && s[1] &&
7e2040f0 6195 (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
5cd24f17 6196 {
4810e5ec 6197 return s;
5cd24f17 6198 }
79072805
LW
6199 if (*s == '{') {
6200 bracket = s;
6201 s++;
6202 }
6203 else if (ck_uni)
6204 check_uni();
93a17b20 6205 if (s < send)
79072805
LW
6206 *d = *s++;
6207 d[1] = '\0';
2b92dfce 6208 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 6209 *d = toCTRL(*s);
6210 s++;
de3bb511 6211 }
79072805 6212 if (bracket) {
748a9306 6213 if (isSPACE(s[-1])) {
fa83b5b6 6214 while (s < send) {
6215 char ch = *s++;
bf4acbe4 6216 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 6217 *d = ch;
6218 break;
6219 }
6220 }
748a9306 6221 }
7e2040f0 6222 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 6223 d++;
a0ed51b3
LW
6224 if (UTF) {
6225 e = s;
155aba94 6226 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
a0ed51b3 6227 e += UTF8SKIP(e);
fd400ab9 6228 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
a0ed51b3
LW
6229 e += UTF8SKIP(e);
6230 }
6231 Copy(s, d, e - s, char);
6232 d += e - s;
6233 s = e;
6234 }
6235 else {
2b92dfce 6236 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 6237 *d++ = *s++;
2b92dfce 6238 if (d >= e)
cea2e8a9 6239 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 6240 }
79072805 6241 *d = '\0';
bf4acbe4 6242 while (s < send && SPACE_OR_TAB(*s)) s++;
ff68c719 6243 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
599cee73 6244 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
dff6d3cd 6245 const char *brack = *s == '[' ? "[...]" : "{...}";
9014280d 6246 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 6247 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
6248 funny, dest, brack, funny, dest, brack);
6249 }
79072805 6250 bracket++;
a0be28da 6251 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
6252 return s;
6253 }
4e553d73
NIS
6254 }
6255 /* Handle extended ${^Foo} variables
2b92dfce
GS
6256 * 1999-02-27 mjd-perl-patch@plover.com */
6257 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
6258 && isALNUM(*s))
6259 {
6260 d++;
6261 while (isALNUM(*s) && d < e) {
6262 *d++ = *s++;
6263 }
6264 if (d >= e)
cea2e8a9 6265 Perl_croak(aTHX_ ident_too_long);
2b92dfce 6266 *d = '\0';
79072805
LW
6267 }
6268 if (*s == '}') {
6269 s++;
3280af22
NIS
6270 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets)
6271 PL_lex_state = LEX_INTERPEND;
748a9306
LW
6272 if (funny == '#')
6273 funny = '@';
d008e5eb 6274 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 6275 if (ckWARN(WARN_AMBIGUOUS) &&
864dbfa3 6276 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
d008e5eb 6277 {
9014280d 6278 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
6279 "Ambiguous use of %c{%s} resolved to %c%s",
6280 funny, dest, funny, dest);
6281 }
6282 }
79072805
LW
6283 }
6284 else {
6285 s = bracket; /* let the parser handle it */
93a17b20 6286 *dest = '\0';
79072805
LW
6287 }
6288 }
3280af22
NIS
6289 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
6290 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
6291 return s;
6292}
6293
cea2e8a9 6294void
2b36a5a0 6295Perl_pmflag(pTHX_ U32* pmfl, int ch)
a0d0e21e 6296{
bbce6d69 6297 if (ch == 'i')
a0d0e21e 6298 *pmfl |= PMf_FOLD;
a0d0e21e
LW
6299 else if (ch == 'g')
6300 *pmfl |= PMf_GLOBAL;
c90c0ff4 6301 else if (ch == 'c')
6302 *pmfl |= PMf_CONTINUE;
a0d0e21e
LW
6303 else if (ch == 'o')
6304 *pmfl |= PMf_KEEP;
6305 else if (ch == 'm')
6306 *pmfl |= PMf_MULTILINE;
6307 else if (ch == 's')
6308 *pmfl |= PMf_SINGLELINE;
6309 else if (ch == 'x')
6310 *pmfl |= PMf_EXTENDED;
6311}
378cc40b 6312
76e3520e 6313STATIC char *
cea2e8a9 6314S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 6315{
79072805
LW
6316 PMOP *pm;
6317 char *s;
378cc40b 6318
09bef843 6319 s = scan_str(start,FALSE,FALSE);
37fd879b 6320 if (!s)
cea2e8a9 6321 Perl_croak(aTHX_ "Search pattern not terminated");
bbce6d69 6322
8782bef2 6323 pm = (PMOP*)newPMOP(type, 0);
3280af22 6324 if (PL_multi_open == '?')
79072805 6325 pm->op_pmflags |= PMf_ONCE;
8782bef2
GB
6326 if(type == OP_QR) {
6327 while (*s && strchr("iomsx", *s))
6328 pmflag(&pm->op_pmflags,*s++);
6329 }
6330 else {
6331 while (*s && strchr("iogcmsx", *s))
6332 pmflag(&pm->op_pmflags,*s++);
6333 }
4ac733c9
MJD
6334 /* issue a warning if /c is specified,but /g is not */
6335 if (ckWARN(WARN_REGEXP) &&
6336 (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
6337 {
6338 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g);
6339 }
6340
4633a7c4 6341 pm->op_pmpermflags = pm->op_pmflags;
bbce6d69 6342
3280af22 6343 PL_lex_op = (OP*)pm;
79072805 6344 yylval.ival = OP_MATCH;
378cc40b
LW
6345 return s;
6346}
6347
76e3520e 6348STATIC char *
cea2e8a9 6349S_scan_subst(pTHX_ char *start)
79072805 6350{
a0d0e21e 6351 register char *s;
79072805 6352 register PMOP *pm;
4fdae800 6353 I32 first_start;
79072805
LW
6354 I32 es = 0;
6355
79072805
LW
6356 yylval.ival = OP_NULL;
6357
09bef843 6358 s = scan_str(start,FALSE,FALSE);
79072805 6359
37fd879b 6360 if (!s)
cea2e8a9 6361 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 6362
3280af22 6363 if (s[-1] == PL_multi_open)
79072805
LW
6364 s--;
6365
3280af22 6366 first_start = PL_multi_start;
09bef843 6367 s = scan_str(s,FALSE,FALSE);
79072805 6368 if (!s) {
37fd879b 6369 if (PL_lex_stuff) {
3280af22 6370 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6371 PL_lex_stuff = Nullsv;
6372 }
cea2e8a9 6373 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 6374 }
3280af22 6375 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 6376
79072805 6377 pm = (PMOP*)newPMOP(OP_SUBST, 0);
48c036b1 6378 while (*s) {
a687059c
LW
6379 if (*s == 'e') {
6380 s++;
2f3197b3 6381 es++;
a687059c 6382 }
b3eb6a9b 6383 else if (strchr("iogcmsx", *s))
a0d0e21e 6384 pmflag(&pm->op_pmflags,*s++);
48c036b1
GS
6385 else
6386 break;
378cc40b 6387 }
79072805 6388
64e578a2
MJD
6389 /* /c is not meaningful with s/// */
6390 if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE))
4ac733c9 6391 {
64e578a2 6392 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst);
4ac733c9
MJD
6393 }
6394
79072805
LW
6395 if (es) {
6396 SV *repl;
0244c3a4
GS
6397 PL_sublex_info.super_bufptr = s;
6398 PL_sublex_info.super_bufend = PL_bufend;
6399 PL_multi_end = 0;
79072805 6400 pm->op_pmflags |= PMf_EVAL;
79cb57f6 6401 repl = newSVpvn("",0);
463ee0b2 6402 while (es-- > 0)
a0d0e21e 6403 sv_catpv(repl, es ? "eval " : "do ");
79072805 6404 sv_catpvn(repl, "{ ", 2);
3280af22 6405 sv_catsv(repl, PL_lex_repl);
79072805 6406 sv_catpvn(repl, " };", 2);
25da4f38 6407 SvEVALED_on(repl);
3280af22
NIS
6408 SvREFCNT_dec(PL_lex_repl);
6409 PL_lex_repl = repl;
378cc40b 6410 }
79072805 6411
4633a7c4 6412 pm->op_pmpermflags = pm->op_pmflags;
3280af22 6413 PL_lex_op = (OP*)pm;
79072805 6414 yylval.ival = OP_SUBST;
378cc40b
LW
6415 return s;
6416}
6417
76e3520e 6418STATIC char *
cea2e8a9 6419S_scan_trans(pTHX_ char *start)
378cc40b 6420{
a0d0e21e 6421 register char* s;
11343788 6422 OP *o;
79072805
LW
6423 short *tbl;
6424 I32 squash;
a0ed51b3 6425 I32 del;
79072805
LW
6426 I32 complement;
6427
6428 yylval.ival = OP_NULL;
6429
09bef843 6430 s = scan_str(start,FALSE,FALSE);
37fd879b 6431 if (!s)
cea2e8a9 6432 Perl_croak(aTHX_ "Transliteration pattern not terminated");
3280af22 6433 if (s[-1] == PL_multi_open)
2f3197b3
LW
6434 s--;
6435
09bef843 6436 s = scan_str(s,FALSE,FALSE);
79072805 6437 if (!s) {
37fd879b 6438 if (PL_lex_stuff) {
3280af22 6439 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6440 PL_lex_stuff = Nullsv;
6441 }
cea2e8a9 6442 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 6443 }
79072805 6444
a0ed51b3 6445 complement = del = squash = 0;
6940069f 6446 while (strchr("cds", *s)) {
395c3793 6447 if (*s == 'c')
79072805 6448 complement = OPpTRANS_COMPLEMENT;
395c3793 6449 else if (*s == 'd')
a0ed51b3
LW
6450 del = OPpTRANS_DELETE;
6451 else if (*s == 's')
79072805 6452 squash = OPpTRANS_SQUASH;
395c3793
LW
6453 s++;
6454 }
8973db79
JH
6455
6456 New(803, tbl, complement&&!del?258:256, short);
6457 o = newPVOP(OP_TRANS, 0, (char*)tbl);
7948272d
NIS
6458 o->op_private = del|squash|complement|
6459 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
6460 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 6461
3280af22 6462 PL_lex_op = o;
79072805
LW
6463 yylval.ival = OP_TRANS;
6464 return s;
6465}
6466
76e3520e 6467STATIC char *
cea2e8a9 6468S_scan_heredoc(pTHX_ register char *s)
79072805
LW
6469{
6470 SV *herewas;
6471 I32 op_type = OP_SCALAR;
6472 I32 len;
6473 SV *tmpstr;
6474 char term;
6475 register char *d;
fc36a67e 6476 register char *e;
4633a7c4 6477 char *peek;
3280af22 6478 int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
79072805
LW
6479
6480 s += 2;
3280af22
NIS
6481 d = PL_tokenbuf;
6482 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 6483 if (!outer)
79072805 6484 *d++ = '\n';
bf4acbe4 6485 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
4633a7c4
LW
6486 if (*peek && strchr("`'\"",*peek)) {
6487 s = peek;
79072805 6488 term = *s++;
3280af22 6489 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 6490 d += len;
3280af22 6491 if (s < PL_bufend)
79072805 6492 s++;
79072805
LW
6493 }
6494 else {
6495 if (*s == '\\')
6496 s++, term = '\'';
6497 else
6498 term = '"';
7e2040f0 6499 if (!isALNUM_lazy_if(s,UTF))
12bcd1a6 6500 deprecate_old("bare << to mean <<\"\"");
7e2040f0 6501 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 6502 if (d < e)
6503 *d++ = *s;
6504 }
6505 }
3280af22 6506 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 6507 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
6508 *d++ = '\n';
6509 *d = '\0';
3280af22 6510 len = d - PL_tokenbuf;
6a27c188 6511#ifndef PERL_STRICT_CR
f63a84b2
LW
6512 d = strchr(s, '\r');
6513 if (d) {
6514 char *olds = s;
6515 s = d;
3280af22 6516 while (s < PL_bufend) {
f63a84b2
LW
6517 if (*s == '\r') {
6518 *d++ = '\n';
6519 if (*++s == '\n')
6520 s++;
6521 }
6522 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
6523 *d++ = *s++;
6524 s++;
6525 }
6526 else
6527 *d++ = *s++;
6528 }
6529 *d = '\0';
3280af22
NIS
6530 PL_bufend = d;
6531 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2
LW
6532 s = olds;
6533 }
6534#endif
79072805 6535 d = "\n";
3280af22 6536 if (outer || !(d=ninstr(s,PL_bufend,d,d+1)))
79cb57f6 6537 herewas = newSVpvn(s,PL_bufend-s);
79072805 6538 else
79cb57f6 6539 s--, herewas = newSVpvn(s,d-s);
79072805 6540 s += SvCUR(herewas);
748a9306 6541
8d6dde3e 6542 tmpstr = NEWSV(87,79);
748a9306
LW
6543 sv_upgrade(tmpstr, SVt_PVIV);
6544 if (term == '\'') {
79072805 6545 op_type = OP_CONST;
748a9306
LW
6546 SvIVX(tmpstr) = -1;
6547 }
6548 else if (term == '`') {
79072805 6549 op_type = OP_BACKTICK;
748a9306
LW
6550 SvIVX(tmpstr) = '\\';
6551 }
79072805
LW
6552
6553 CLINE;
57843af0 6554 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
6555 PL_multi_open = PL_multi_close = '<';
6556 term = *PL_tokenbuf;
0244c3a4
GS
6557 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6558 char *bufptr = PL_sublex_info.super_bufptr;
6559 char *bufend = PL_sublex_info.super_bufend;
6560 char *olds = s - SvCUR(herewas);
6561 s = strchr(bufptr, '\n');
6562 if (!s)
6563 s = bufend;
6564 d = s;
6565 while (s < bufend &&
6566 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
6567 if (*s++ == '\n')
57843af0 6568 CopLINE_inc(PL_curcop);
0244c3a4
GS
6569 }
6570 if (s >= bufend) {
eb160463 6571 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
6572 missingterm(PL_tokenbuf);
6573 }
6574 sv_setpvn(herewas,bufptr,d-bufptr+1);
6575 sv_setpvn(tmpstr,d+1,s-d);
6576 s += len - 1;
6577 sv_catpvn(herewas,s,bufend-s);
6578 (void)strcpy(bufptr,SvPVX(herewas));
6579
6580 s = olds;
6581 goto retval;
6582 }
6583 else if (!outer) {
79072805 6584 d = s;
3280af22
NIS
6585 while (s < PL_bufend &&
6586 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 6587 if (*s++ == '\n')
57843af0 6588 CopLINE_inc(PL_curcop);
79072805 6589 }
3280af22 6590 if (s >= PL_bufend) {
eb160463 6591 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 6592 missingterm(PL_tokenbuf);
79072805
LW
6593 }
6594 sv_setpvn(tmpstr,d+1,s-d);
6595 s += len - 1;
57843af0 6596 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 6597
3280af22
NIS
6598 sv_catpvn(herewas,s,PL_bufend-s);
6599 sv_setsv(PL_linestr,herewas);
6600 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
6601 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6602 PL_last_lop = PL_last_uni = Nullch;
79072805
LW
6603 }
6604 else
6605 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
3280af22 6606 while (s >= PL_bufend) { /* multiple line string? */
fd2d0953 6607 if (!outer ||
3280af22 6608 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
eb160463 6609 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 6610 missingterm(PL_tokenbuf);
79072805 6611 }
57843af0 6612 CopLINE_inc(PL_curcop);
3280af22 6613 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6614 PL_last_lop = PL_last_uni = Nullch;
6a27c188 6615#ifndef PERL_STRICT_CR
3280af22 6616 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
6617 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
6618 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 6619 {
3280af22
NIS
6620 PL_bufend[-2] = '\n';
6621 PL_bufend--;
6622 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2 6623 }
3280af22
NIS
6624 else if (PL_bufend[-1] == '\r')
6625 PL_bufend[-1] = '\n';
f63a84b2 6626 }
3280af22
NIS
6627 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
6628 PL_bufend[-1] = '\n';
f63a84b2 6629#endif
3280af22 6630 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
6631 SV *sv = NEWSV(88,0);
6632
93a17b20 6633 sv_upgrade(sv, SVt_PVMG);
3280af22 6634 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
6635 (void)SvIOK_on(sv);
6636 SvIVX(sv) = 0;
57843af0 6637 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
79072805 6638 }
3280af22
NIS
6639 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
6640 s = PL_bufend - 1;
79072805 6641 *s = ' ';
3280af22
NIS
6642 sv_catsv(PL_linestr,herewas);
6643 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805
LW
6644 }
6645 else {
3280af22
NIS
6646 s = PL_bufend;
6647 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
6648 }
6649 }
79072805 6650 s++;
0244c3a4 6651retval:
57843af0 6652 PL_multi_end = CopLINE(PL_curcop);
79072805
LW
6653 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
6654 SvLEN_set(tmpstr, SvCUR(tmpstr) + 1);
463ee0b2 6655 Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
79072805 6656 }
8990e307 6657 SvREFCNT_dec(herewas);
0064a8a9 6658 if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
7948272d 6659 SvUTF8_on(tmpstr);
3280af22 6660 PL_lex_stuff = tmpstr;
79072805
LW
6661 yylval.ival = op_type;
6662 return s;
6663}
6664
02aa26ce
NT
6665/* scan_inputsymbol
6666 takes: current position in input buffer
6667 returns: new position in input buffer
6668 side-effects: yylval and lex_op are set.
6669
6670 This code handles:
6671
6672 <> read from ARGV
6673 <FH> read from filehandle
6674 <pkg::FH> read from package qualified filehandle
6675 <pkg'FH> read from package qualified filehandle
6676 <$fh> read from filehandle in $fh
6677 <*.h> filename glob
6678
6679*/
6680
76e3520e 6681STATIC char *
cea2e8a9 6682S_scan_inputsymbol(pTHX_ char *start)
79072805 6683{
02aa26ce 6684 register char *s = start; /* current position in buffer */
79072805 6685 register char *d;
fc36a67e 6686 register char *e;
1b420867 6687 char *end;
79072805
LW
6688 I32 len;
6689
3280af22
NIS
6690 d = PL_tokenbuf; /* start of temp holding space */
6691 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
1b420867
GS
6692 end = strchr(s, '\n');
6693 if (!end)
6694 end = PL_bufend;
6695 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
6696
6697 /* die if we didn't have space for the contents of the <>,
1b420867 6698 or if it didn't end, or if we see a newline
02aa26ce
NT
6699 */
6700
3280af22 6701 if (len >= sizeof PL_tokenbuf)
cea2e8a9 6702 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 6703 if (s >= end)
cea2e8a9 6704 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 6705
fc36a67e 6706 s++;
02aa26ce
NT
6707
6708 /* check for <$fh>
6709 Remember, only scalar variables are interpreted as filehandles by
6710 this code. Anything more complex (e.g., <$fh{$num}>) will be
6711 treated as a glob() call.
6712 This code makes use of the fact that except for the $ at the front,
6713 a scalar variable and a filehandle look the same.
6714 */
4633a7c4 6715 if (*d == '$' && d[1]) d++;
02aa26ce
NT
6716
6717 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 6718 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 6719 d++;
02aa26ce
NT
6720
6721 /* If we've tried to read what we allow filehandles to look like, and
6722 there's still text left, then it must be a glob() and not a getline.
6723 Use scan_str to pull out the stuff between the <> and treat it
6724 as nothing more than a string.
6725 */
6726
3280af22 6727 if (d - PL_tokenbuf != len) {
79072805
LW
6728 yylval.ival = OP_GLOB;
6729 set_csh();
09bef843 6730 s = scan_str(start,FALSE,FALSE);
79072805 6731 if (!s)
cea2e8a9 6732 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
6733 return s;
6734 }
395c3793 6735 else {
9b3023bc
RGS
6736 bool readline_overriden = FALSE;
6737 GV *gv_readline = Nullgv;
6738 GV **gvp;
02aa26ce 6739 /* we're in a filehandle read situation */
3280af22 6740 d = PL_tokenbuf;
02aa26ce
NT
6741
6742 /* turn <> into <ARGV> */
79072805
LW
6743 if (!len)
6744 (void)strcpy(d,"ARGV");
02aa26ce 6745
9b3023bc 6746 /* Check whether readline() is overriden */
ba979b31
NIS
6747 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
6748 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 6749 ||
ba979b31 6750 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
9b3023bc 6751 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
ba979b31 6752 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
6753 readline_overriden = TRUE;
6754
02aa26ce
NT
6755 /* if <$fh>, create the ops to turn the variable into a
6756 filehandle
6757 */
79072805 6758 if (*d == '$') {
a0d0e21e 6759 I32 tmp;
02aa26ce
NT
6760
6761 /* try to find it in the pad for this block, otherwise find
6762 add symbol table ops
6763 */
11343788 6764 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
dd2155a4
DM
6765 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
6766 SV *sym = sv_2mortal(
6767 newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)),0));
f558d5af
JH
6768 sv_catpvn(sym, "::", 2);
6769 sv_catpv(sym, d+1);
6770 d = SvPVX(sym);
6771 goto intro_sym;
6772 }
6773 else {
6774 OP *o = newOP(OP_PADSV, 0);
6775 o->op_targ = tmp;
9b3023bc
RGS
6776 PL_lex_op = readline_overriden
6777 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6778 append_elem(OP_LIST, o,
6779 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
6780 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 6781 }
a0d0e21e
LW
6782 }
6783 else {
f558d5af
JH
6784 GV *gv;
6785 ++d;
6786intro_sym:
6787 gv = gv_fetchpv(d,
6788 (PL_in_eval
6789 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 6790 : GV_ADDMULTI),
f558d5af 6791 SVt_PV);
9b3023bc
RGS
6792 PL_lex_op = readline_overriden
6793 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6794 append_elem(OP_LIST,
6795 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
6796 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
6797 : (OP*)newUNOP(OP_READLINE, 0,
6798 newUNOP(OP_RV2SV, 0,
6799 newGVOP(OP_GV, 0, gv)));
a0d0e21e 6800 }
7c6fadd6
RGS
6801 if (!readline_overriden)
6802 PL_lex_op->op_flags |= OPf_SPECIAL;
f5284f61 6803 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
79072805
LW
6804 yylval.ival = OP_NULL;
6805 }
02aa26ce
NT
6806
6807 /* If it's none of the above, it must be a literal filehandle
6808 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 6809 else {
85e6fe83 6810 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
9b3023bc
RGS
6811 PL_lex_op = readline_overriden
6812 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6813 append_elem(OP_LIST,
6814 newGVOP(OP_GV, 0, gv),
6815 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
6816 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
79072805
LW
6817 yylval.ival = OP_NULL;
6818 }
6819 }
02aa26ce 6820
79072805
LW
6821 return s;
6822}
6823
02aa26ce
NT
6824
6825/* scan_str
6826 takes: start position in buffer
09bef843
SB
6827 keep_quoted preserve \ on the embedded delimiter(s)
6828 keep_delims preserve the delimiters around the string
02aa26ce
NT
6829 returns: position to continue reading from buffer
6830 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
6831 updates the read buffer.
6832
6833 This subroutine pulls a string out of the input. It is called for:
6834 q single quotes q(literal text)
6835 ' single quotes 'literal text'
6836 qq double quotes qq(interpolate $here please)
6837 " double quotes "interpolate $here please"
6838 qx backticks qx(/bin/ls -l)
6839 ` backticks `/bin/ls -l`
6840 qw quote words @EXPORT_OK = qw( func() $spam )
6841 m// regexp match m/this/
6842 s/// regexp substitute s/this/that/
6843 tr/// string transliterate tr/this/that/
6844 y/// string transliterate y/this/that/
6845 ($*@) sub prototypes sub foo ($)
09bef843 6846 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
6847 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
6848
6849 In most of these cases (all but <>, patterns and transliterate)
6850 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
6851 calls scan_str(). s/// makes yylex() call scan_subst() which calls
6852 scan_str(). tr/// and y/// make yylex() call scan_trans() which
6853 calls scan_str().
4e553d73 6854
02aa26ce
NT
6855 It skips whitespace before the string starts, and treats the first
6856 character as the delimiter. If the delimiter is one of ([{< then
6857 the corresponding "close" character )]}> is used as the closing
6858 delimiter. It allows quoting of delimiters, and if the string has
6859 balanced delimiters ([{<>}]) it allows nesting.
6860
37fd879b
HS
6861 On success, the SV with the resulting string is put into lex_stuff or,
6862 if that is already non-NULL, into lex_repl. The second case occurs only
6863 when parsing the RHS of the special constructs s/// and tr/// (y///).
6864 For convenience, the terminating delimiter character is stuffed into
6865 SvIVX of the SV.
02aa26ce
NT
6866*/
6867
76e3520e 6868STATIC char *
09bef843 6869S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 6870{
02aa26ce
NT
6871 SV *sv; /* scalar value: string */
6872 char *tmps; /* temp string, used for delimiter matching */
6873 register char *s = start; /* current position in the buffer */
6874 register char term; /* terminating character */
6875 register char *to; /* current position in the sv's data */
6876 I32 brackets = 1; /* bracket nesting level */
89491803 6877 bool has_utf8 = FALSE; /* is there any utf8 content? */
02aa26ce
NT
6878
6879 /* skip space before the delimiter */
fb73857a 6880 if (isSPACE(*s))
6881 s = skipspace(s);
02aa26ce
NT
6882
6883 /* mark where we are, in case we need to report errors */
79072805 6884 CLINE;
02aa26ce
NT
6885
6886 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 6887 term = *s;
63cd0674 6888 if (!UTF8_IS_INVARIANT((U8)term) && UTF)
89491803 6889 has_utf8 = TRUE;
b1c7b182 6890
02aa26ce 6891 /* mark where we are */
57843af0 6892 PL_multi_start = CopLINE(PL_curcop);
3280af22 6893 PL_multi_open = term;
02aa26ce
NT
6894
6895 /* find corresponding closing delimiter */
93a17b20 6896 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
79072805 6897 term = tmps[5];
3280af22 6898 PL_multi_close = term;
79072805 6899
02aa26ce 6900 /* create a new SV to hold the contents. 87 is leak category, I'm
8d6dde3e
IZ
6901 assuming. 79 is the SV's initial length. What a random number. */
6902 sv = NEWSV(87,79);
ed6116ce
LW
6903 sv_upgrade(sv, SVt_PVIV);
6904 SvIVX(sv) = term;
a0d0e21e 6905 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
6906
6907 /* move past delimiter and try to read a complete string */
09bef843
SB
6908 if (keep_delims)
6909 sv_catpvn(sv, s, 1);
93a17b20
LW
6910 s++;
6911 for (;;) {
02aa26ce 6912 /* extend sv if need be */
3280af22 6913 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 6914 /* set 'to' to the next character in the sv's string */
463ee0b2 6915 to = SvPVX(sv)+SvCUR(sv);
09bef843 6916
02aa26ce 6917 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
6918 if (PL_multi_open == PL_multi_close) {
6919 for (; s < PL_bufend; s++,to++) {
02aa26ce 6920 /* embedded newlines increment the current line number */
3280af22 6921 if (*s == '\n' && !PL_rsfp)
57843af0 6922 CopLINE_inc(PL_curcop);
02aa26ce 6923 /* handle quoted delimiters */
3280af22 6924 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 6925 if (!keep_quoted && s[1] == term)
a0d0e21e 6926 s++;
02aa26ce 6927 /* any other quotes are simply copied straight through */
a0d0e21e
LW
6928 else
6929 *to++ = *s++;
6930 }
02aa26ce
NT
6931 /* terminate when run out of buffer (the for() condition), or
6932 have found the terminator */
93a17b20
LW
6933 else if (*s == term)
6934 break;
63cd0674 6935 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 6936 has_utf8 = TRUE;
93a17b20
LW
6937 *to = *s;
6938 }
6939 }
02aa26ce
NT
6940
6941 /* if the terminator isn't the same as the start character (e.g.,
6942 matched brackets), we have to allow more in the quoting, and
6943 be prepared for nested brackets.
6944 */
93a17b20 6945 else {
02aa26ce 6946 /* read until we run out of string, or we find the terminator */
3280af22 6947 for (; s < PL_bufend; s++,to++) {
02aa26ce 6948 /* embedded newlines increment the line count */
3280af22 6949 if (*s == '\n' && !PL_rsfp)
57843af0 6950 CopLINE_inc(PL_curcop);
02aa26ce 6951 /* backslashes can escape the open or closing characters */
3280af22 6952 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
6953 if (!keep_quoted &&
6954 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
6955 s++;
6956 else
6957 *to++ = *s++;
6958 }
02aa26ce 6959 /* allow nested opens and closes */
3280af22 6960 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 6961 break;
3280af22 6962 else if (*s == PL_multi_open)
93a17b20 6963 brackets++;
63cd0674 6964 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 6965 has_utf8 = TRUE;
93a17b20
LW
6966 *to = *s;
6967 }
6968 }
02aa26ce 6969 /* terminate the copied string and update the sv's end-of-string */
93a17b20 6970 *to = '\0';
463ee0b2 6971 SvCUR_set(sv, to - SvPVX(sv));
93a17b20 6972
02aa26ce
NT
6973 /*
6974 * this next chunk reads more into the buffer if we're not done yet
6975 */
6976
b1c7b182
GS
6977 if (s < PL_bufend)
6978 break; /* handle case where we are done yet :-) */
79072805 6979
6a27c188 6980#ifndef PERL_STRICT_CR
f63a84b2 6981 if (to - SvPVX(sv) >= 2) {
c6f14548
GS
6982 if ((to[-2] == '\r' && to[-1] == '\n') ||
6983 (to[-2] == '\n' && to[-1] == '\r'))
6984 {
f63a84b2
LW
6985 to[-2] = '\n';
6986 to--;
6987 SvCUR_set(sv, to - SvPVX(sv));
6988 }
6989 else if (to[-1] == '\r')
6990 to[-1] = '\n';
6991 }
6992 else if (to - SvPVX(sv) == 1 && to[-1] == '\r')
6993 to[-1] = '\n';
6994#endif
6995
02aa26ce
NT
6996 /* if we're out of file, or a read fails, bail and reset the current
6997 line marker so we can report where the unterminated string began
6998 */
3280af22
NIS
6999 if (!PL_rsfp ||
7000 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
c07a80fd 7001 sv_free(sv);
eb160463 7002 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
79072805
LW
7003 return Nullch;
7004 }
02aa26ce 7005 /* we read a line, so increment our line counter */
57843af0 7006 CopLINE_inc(PL_curcop);
a0ed51b3 7007
02aa26ce 7008 /* update debugger info */
3280af22 7009 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
7010 SV *sv = NEWSV(88,0);
7011
93a17b20 7012 sv_upgrade(sv, SVt_PVMG);
3280af22 7013 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
7014 (void)SvIOK_on(sv);
7015 SvIVX(sv) = 0;
57843af0 7016 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
395c3793 7017 }
a0ed51b3 7018
3280af22
NIS
7019 /* having changed the buffer, we must update PL_bufend */
7020 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 7021 PL_last_lop = PL_last_uni = Nullch;
378cc40b 7022 }
4e553d73 7023
02aa26ce
NT
7024 /* at this point, we have successfully read the delimited string */
7025
09bef843
SB
7026 if (keep_delims)
7027 sv_catpvn(sv, s, 1);
89491803 7028 if (has_utf8)
b1c7b182 7029 SvUTF8_on(sv);
d0063567
DK
7030 else if (PL_encoding)
7031 sv_recode_to_utf8(sv, PL_encoding);
7032
57843af0 7033 PL_multi_end = CopLINE(PL_curcop);
79072805 7034 s++;
02aa26ce
NT
7035
7036 /* if we allocated too much space, give some back */
93a17b20
LW
7037 if (SvCUR(sv) + 5 < SvLEN(sv)) {
7038 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 7039 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 7040 }
02aa26ce
NT
7041
7042 /* decide whether this is the first or second quoted string we've read
7043 for this op
7044 */
4e553d73 7045
3280af22
NIS
7046 if (PL_lex_stuff)
7047 PL_lex_repl = sv;
79072805 7048 else
3280af22 7049 PL_lex_stuff = sv;
378cc40b
LW
7050 return s;
7051}
7052
02aa26ce
NT
7053/*
7054 scan_num
7055 takes: pointer to position in buffer
7056 returns: pointer to new position in buffer
7057 side-effects: builds ops for the constant in yylval.op
7058
7059 Read a number in any of the formats that Perl accepts:
7060
7fd134d9
JH
7061 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
7062 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
7063 0b[01](_?[01])*
7064 0[0-7](_?[0-7])*
7065 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 7066
3280af22 7067 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
7068 thing it reads.
7069
7070 If it reads a number without a decimal point or an exponent, it will
7071 try converting the number to an integer and see if it can do so
7072 without loss of precision.
7073*/
4e553d73 7074
378cc40b 7075char *
b73d6f50 7076Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
378cc40b 7077{
02aa26ce
NT
7078 register char *s = start; /* current position in buffer */
7079 register char *d; /* destination in temp buffer */
7080 register char *e; /* end of temp buffer */
86554af2 7081 NV nv; /* number read, as a double */
a7cb1f99 7082 SV *sv = Nullsv; /* place to put the converted number */
a86a20aa 7083 bool floatit; /* boolean: int or float? */
02aa26ce 7084 char *lastub = 0; /* position of last underbar */
fc36a67e 7085 static char number_too_long[] = "Number too long";
378cc40b 7086
02aa26ce
NT
7087 /* We use the first character to decide what type of number this is */
7088
378cc40b 7089 switch (*s) {
79072805 7090 default:
cea2e8a9 7091 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 7092
02aa26ce 7093 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 7094 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
7095 case '0':
7096 {
02aa26ce
NT
7097 /* variables:
7098 u holds the "number so far"
4f19785b
WSI
7099 shift the power of 2 of the base
7100 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
7101 overflowed was the number more than we can hold?
7102
7103 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
7104 we in octal/hex/binary?" indicator to disallow hex characters
7105 when in octal mode.
02aa26ce 7106 */
9e24b6e2
JH
7107 NV n = 0.0;
7108 UV u = 0;
79072805 7109 I32 shift;
9e24b6e2
JH
7110 bool overflowed = FALSE;
7111 static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
7112 static char* bases[5] = { "", "binary", "", "octal",
7113 "hexadecimal" };
7114 static char* Bases[5] = { "", "Binary", "", "Octal",
7115 "Hexadecimal" };
7116 static char *maxima[5] = { "",
7117 "0b11111111111111111111111111111111",
7118 "",
893fe2c2 7119 "037777777777",
9e24b6e2
JH
7120 "0xffffffff" };
7121 char *base, *Base, *max;
378cc40b 7122
02aa26ce 7123 /* check for hex */
378cc40b
LW
7124 if (s[1] == 'x') {
7125 shift = 4;
7126 s += 2;
4f19785b
WSI
7127 } else if (s[1] == 'b') {
7128 shift = 1;
7129 s += 2;
378cc40b 7130 }
02aa26ce 7131 /* check for a decimal in disguise */
b78218b7 7132 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 7133 goto decimal;
02aa26ce 7134 /* so it must be octal */
928753ea 7135 else {
378cc40b 7136 shift = 3;
928753ea
JH
7137 s++;
7138 }
7139
7140 if (*s == '_') {
7141 if (ckWARN(WARN_SYNTAX))
9014280d 7142 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7143 "Misplaced _ in number");
7144 lastub = s++;
7145 }
9e24b6e2
JH
7146
7147 base = bases[shift];
7148 Base = Bases[shift];
7149 max = maxima[shift];
02aa26ce 7150
4f19785b 7151 /* read the rest of the number */
378cc40b 7152 for (;;) {
9e24b6e2 7153 /* x is used in the overflow test,
893fe2c2 7154 b is the digit we're adding on. */
9e24b6e2 7155 UV x, b;
55497cff 7156
378cc40b 7157 switch (*s) {
02aa26ce
NT
7158
7159 /* if we don't mention it, we're done */
378cc40b
LW
7160 default:
7161 goto out;
02aa26ce 7162
928753ea 7163 /* _ are ignored -- but warned about if consecutive */
de3bb511 7164 case '_':
928753ea 7165 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
9014280d 7166 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7167 "Misplaced _ in number");
7168 lastub = s++;
de3bb511 7169 break;
02aa26ce
NT
7170
7171 /* 8 and 9 are not octal */
378cc40b 7172 case '8': case '9':
4f19785b 7173 if (shift == 3)
cea2e8a9 7174 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 7175 /* FALL THROUGH */
02aa26ce
NT
7176
7177 /* octal digits */
4f19785b 7178 case '2': case '3': case '4':
378cc40b 7179 case '5': case '6': case '7':
4f19785b 7180 if (shift == 1)
cea2e8a9 7181 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
7182 /* FALL THROUGH */
7183
7184 case '0': case '1':
02aa26ce 7185 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 7186 goto digit;
02aa26ce
NT
7187
7188 /* hex digits */
378cc40b
LW
7189 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
7190 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 7191 /* make sure they said 0x */
378cc40b
LW
7192 if (shift != 4)
7193 goto out;
55497cff 7194 b = (*s++ & 7) + 9;
02aa26ce
NT
7195
7196 /* Prepare to put the digit we have onto the end
7197 of the number so far. We check for overflows.
7198 */
7199
55497cff 7200 digit:
9e24b6e2
JH
7201 if (!overflowed) {
7202 x = u << shift; /* make room for the digit */
7203
7204 if ((x >> shift) != u
7205 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
7206 overflowed = TRUE;
7207 n = (NV) u;
767a6a26 7208 if (ckWARN_d(WARN_OVERFLOW))
9014280d 7209 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
9e24b6e2
JH
7210 "Integer overflow in %s number",
7211 base);
7212 } else
7213 u = x | b; /* add the digit to the end */
7214 }
7215 if (overflowed) {
7216 n *= nvshift[shift];
7217 /* If an NV has not enough bits in its
7218 * mantissa to represent an UV this summing of
7219 * small low-order numbers is a waste of time
7220 * (because the NV cannot preserve the
7221 * low-order bits anyway): we could just
7222 * remember when did we overflow and in the
7223 * end just multiply n by the right
7224 * amount. */
7225 n += (NV) b;
55497cff 7226 }
378cc40b
LW
7227 break;
7228 }
7229 }
02aa26ce
NT
7230
7231 /* if we get here, we had success: make a scalar value from
7232 the number.
7233 */
378cc40b 7234 out:
928753ea
JH
7235
7236 /* final misplaced underbar check */
7237 if (s[-1] == '_') {
7238 if (ckWARN(WARN_SYNTAX))
9014280d 7239 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
7240 }
7241
79072805 7242 sv = NEWSV(92,0);
9e24b6e2 7243 if (overflowed) {
767a6a26 7244 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
9014280d 7245 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
9e24b6e2
JH
7246 "%s number > %s non-portable",
7247 Base, max);
7248 sv_setnv(sv, n);
7249 }
7250 else {
15041a67 7251#if UVSIZE > 4
767a6a26 7252 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
9014280d 7253 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
9e24b6e2
JH
7254 "%s number > %s non-portable",
7255 Base, max);
2cc4c2dc 7256#endif
9e24b6e2
JH
7257 sv_setuv(sv, u);
7258 }
2cc4c2dc 7259 if (PL_hints & HINT_NEW_BINARY)
b3ac6de7 7260 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
378cc40b
LW
7261 }
7262 break;
02aa26ce
NT
7263
7264 /*
7265 handle decimal numbers.
7266 we're also sent here when we read a 0 as the first digit
7267 */
378cc40b
LW
7268 case '1': case '2': case '3': case '4': case '5':
7269 case '6': case '7': case '8': case '9': case '.':
7270 decimal:
3280af22
NIS
7271 d = PL_tokenbuf;
7272 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 7273 floatit = FALSE;
02aa26ce
NT
7274
7275 /* read next group of digits and _ and copy into d */
de3bb511 7276 while (isDIGIT(*s) || *s == '_') {
4e553d73 7277 /* skip underscores, checking for misplaced ones
02aa26ce
NT
7278 if -w is on
7279 */
93a17b20 7280 if (*s == '_') {
928753ea 7281 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
9014280d 7282 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7283 "Misplaced _ in number");
7284 lastub = s++;
93a17b20 7285 }
fc36a67e 7286 else {
02aa26ce 7287 /* check for end of fixed-length buffer */
fc36a67e 7288 if (d >= e)
cea2e8a9 7289 Perl_croak(aTHX_ number_too_long);
02aa26ce 7290 /* if we're ok, copy the character */
378cc40b 7291 *d++ = *s++;
fc36a67e 7292 }
378cc40b 7293 }
02aa26ce
NT
7294
7295 /* final misplaced underbar check */
928753ea 7296 if (lastub && s == lastub + 1) {
d008e5eb 7297 if (ckWARN(WARN_SYNTAX))
9014280d 7298 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 7299 }
02aa26ce
NT
7300
7301 /* read a decimal portion if there is one. avoid
7302 3..5 being interpreted as the number 3. followed
7303 by .5
7304 */
2f3197b3 7305 if (*s == '.' && s[1] != '.') {
79072805 7306 floatit = TRUE;
378cc40b 7307 *d++ = *s++;
02aa26ce 7308
928753ea
JH
7309 if (*s == '_') {
7310 if (ckWARN(WARN_SYNTAX))
9014280d 7311 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7312 "Misplaced _ in number");
7313 lastub = s;
7314 }
7315
7316 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 7317 */
fc36a67e 7318 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 7319 /* fixed length buffer check */
fc36a67e 7320 if (d >= e)
cea2e8a9 7321 Perl_croak(aTHX_ number_too_long);
928753ea
JH
7322 if (*s == '_') {
7323 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
9014280d 7324 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7325 "Misplaced _ in number");
7326 lastub = s;
7327 }
7328 else
fc36a67e 7329 *d++ = *s;
378cc40b 7330 }
928753ea
JH
7331 /* fractional part ending in underbar? */
7332 if (s[-1] == '_') {
7333 if (ckWARN(WARN_SYNTAX))
9014280d 7334 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7335 "Misplaced _ in number");
7336 }
dd629d5b
GS
7337 if (*s == '.' && isDIGIT(s[1])) {
7338 /* oops, it's really a v-string, but without the "v" */
f4758303 7339 s = start;
dd629d5b
GS
7340 goto vstring;
7341 }
378cc40b 7342 }
02aa26ce
NT
7343
7344 /* read exponent part, if present */
7fd134d9 7345 if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) {
79072805
LW
7346 floatit = TRUE;
7347 s++;
02aa26ce
NT
7348
7349 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 7350 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 7351
7fd134d9
JH
7352 /* stray preinitial _ */
7353 if (*s == '_') {
7354 if (ckWARN(WARN_SYNTAX))
9014280d 7355 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7fd134d9
JH
7356 "Misplaced _ in number");
7357 lastub = s++;
7358 }
7359
02aa26ce 7360 /* allow positive or negative exponent */
378cc40b
LW
7361 if (*s == '+' || *s == '-')
7362 *d++ = *s++;
02aa26ce 7363
7fd134d9
JH
7364 /* stray initial _ */
7365 if (*s == '_') {
7366 if (ckWARN(WARN_SYNTAX))
9014280d 7367 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7fd134d9
JH
7368 "Misplaced _ in number");
7369 lastub = s++;
7370 }
7371
7fd134d9
JH
7372 /* read digits of exponent */
7373 while (isDIGIT(*s) || *s == '_') {
7374 if (isDIGIT(*s)) {
7375 if (d >= e)
7376 Perl_croak(aTHX_ number_too_long);
b3b48e3e 7377 *d++ = *s++;
7fd134d9
JH
7378 }
7379 else {
7380 if (ckWARN(WARN_SYNTAX) &&
7381 ((lastub && s == lastub + 1) ||
b3b48e3e 7382 (!isDIGIT(s[1]) && s[1] != '_')))
9014280d 7383 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7fd134d9 7384 "Misplaced _ in number");
b3b48e3e 7385 lastub = s++;
7fd134d9 7386 }
7fd134d9 7387 }
378cc40b 7388 }
02aa26ce 7389
02aa26ce
NT
7390
7391 /* make an sv from the string */
79072805 7392 sv = NEWSV(92,0);
097ee67d 7393
0b7fceb9 7394 /*
58bb9ec3
NC
7395 We try to do an integer conversion first if no characters
7396 indicating "float" have been found.
0b7fceb9
MU
7397 */
7398
7399 if (!floatit) {
58bb9ec3
NC
7400 UV uv;
7401 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
7402
7403 if (flags == IS_NUMBER_IN_UV) {
7404 if (uv <= IV_MAX)
86554af2 7405 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
58bb9ec3 7406 else
c239479b 7407 sv_setuv(sv, uv);
58bb9ec3
NC
7408 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
7409 if (uv <= (UV) IV_MIN)
7410 sv_setiv(sv, -(IV)uv);
7411 else
7412 floatit = TRUE;
7413 } else
7414 floatit = TRUE;
7415 }
0b7fceb9 7416 if (floatit) {
58bb9ec3
NC
7417 /* terminate the string */
7418 *d = '\0';
86554af2
JH
7419 nv = Atof(PL_tokenbuf);
7420 sv_setnv(sv, nv);
7421 }
86554af2 7422
b8403495
JH
7423 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
7424 (PL_hints & HINT_NEW_INTEGER) )
4e553d73 7425 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
b8403495
JH
7426 (floatit ? "float" : "integer"),
7427 sv, Nullsv, NULL);
378cc40b 7428 break;
0b7fceb9 7429
e312add1 7430 /* if it starts with a v, it could be a v-string */
a7cb1f99 7431 case 'v':
dd629d5b 7432vstring:
f4758303 7433 sv = NEWSV(92,5); /* preallocate storage space */
b0f01acb 7434 s = scan_vstring(s,sv);
a7cb1f99 7435 break;
79072805 7436 }
a687059c 7437
02aa26ce
NT
7438 /* make the op for the constant and return */
7439
a86a20aa 7440 if (sv)
b73d6f50 7441 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 7442 else
b73d6f50 7443 lvalp->opval = Nullop;
a687059c 7444
378cc40b
LW
7445 return s;
7446}
7447
76e3520e 7448STATIC char *
cea2e8a9 7449S_scan_formline(pTHX_ register char *s)
378cc40b 7450{
79072805 7451 register char *eol;
378cc40b 7452 register char *t;
79cb57f6 7453 SV *stuff = newSVpvn("",0);
79072805 7454 bool needargs = FALSE;
378cc40b 7455
79072805 7456 while (!needargs) {
c2e66d9e 7457 if (*s == '.' || *s == /*{*/'}') {
79072805 7458 /*SUPPRESS 530*/
51882d45 7459#ifdef PERL_STRICT_CR
bf4acbe4 7460 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
51882d45 7461#else
bf4acbe4 7462 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 7463#endif
6a65c6a0 7464 if (*t == '\n' || t == PL_bufend)
79072805
LW
7465 break;
7466 }
3280af22 7467 if (PL_in_eval && !PL_rsfp) {
93a17b20 7468 eol = strchr(s,'\n');
0f85fab0 7469 if (!eol++)
3280af22 7470 eol = PL_bufend;
0f85fab0
LW
7471 }
7472 else
3280af22 7473 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 7474 if (*s != '#') {
a0d0e21e
LW
7475 for (t = s; t < eol; t++) {
7476 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
7477 needargs = FALSE;
7478 goto enough; /* ~~ must be first line in formline */
378cc40b 7479 }
a0d0e21e
LW
7480 if (*t == '@' || *t == '^')
7481 needargs = TRUE;
378cc40b 7482 }
7121b347
MG
7483 if (eol > s) {
7484 sv_catpvn(stuff, s, eol-s);
2dc4c65b 7485#ifndef PERL_STRICT_CR
7121b347
MG
7486 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
7487 char *end = SvPVX(stuff) + SvCUR(stuff);
7488 end[-2] = '\n';
7489 end[-1] = '\0';
7490 SvCUR(stuff)--;
7491 }
2dc4c65b 7492#endif
7121b347
MG
7493 }
7494 else
7495 break;
79072805
LW
7496 }
7497 s = eol;
3280af22
NIS
7498 if (PL_rsfp) {
7499 s = filter_gets(PL_linestr, PL_rsfp, 0);
7500 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
7501 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 7502 PL_last_lop = PL_last_uni = Nullch;
79072805 7503 if (!s) {
3280af22 7504 s = PL_bufptr;
79072805 7505 yyerror("Format not terminated");
378cc40b
LW
7506 break;
7507 }
378cc40b 7508 }
463ee0b2 7509 incline(s);
79072805 7510 }
a0d0e21e
LW
7511 enough:
7512 if (SvCUR(stuff)) {
3280af22 7513 PL_expect = XTERM;
79072805 7514 if (needargs) {
3280af22
NIS
7515 PL_lex_state = LEX_NORMAL;
7516 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
7517 force_next(',');
7518 }
a0d0e21e 7519 else
3280af22
NIS
7520 PL_lex_state = LEX_FORMLINE;
7521 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 7522 force_next(THING);
3280af22 7523 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
79072805 7524 force_next(LSTOP);
378cc40b 7525 }
79072805 7526 else {
8990e307 7527 SvREFCNT_dec(stuff);
3280af22
NIS
7528 PL_lex_formbrack = 0;
7529 PL_bufptr = s;
79072805
LW
7530 }
7531 return s;
378cc40b 7532}
a687059c 7533
76e3520e 7534STATIC void
cea2e8a9 7535S_set_csh(pTHX)
a687059c 7536{
ae986130 7537#ifdef CSH
3280af22
NIS
7538 if (!PL_cshlen)
7539 PL_cshlen = strlen(PL_cshname);
ae986130 7540#endif
a687059c 7541}
463ee0b2 7542
ba6d6ac9 7543I32
864dbfa3 7544Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 7545{
3280af22
NIS
7546 I32 oldsavestack_ix = PL_savestack_ix;
7547 CV* outsidecv = PL_compcv;
8990e307 7548
3280af22
NIS
7549 if (PL_compcv) {
7550 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 7551 }
7766f137 7552 SAVEI32(PL_subline);
3280af22 7553 save_item(PL_subname);
3280af22 7554 SAVESPTR(PL_compcv);
3280af22
NIS
7555
7556 PL_compcv = (CV*)NEWSV(1104,0);
7557 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
7558 CvFLAGS(PL_compcv) |= flags;
7559
57843af0 7560 PL_subline = CopLINE(PL_curcop);
dd2155a4 7561 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
3280af22 7562 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
a3985cdc 7563 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 7564
8990e307
LW
7565 return oldsavestack_ix;
7566}
7567
084592ab
CN
7568#ifdef __SC__
7569#pragma segment Perl_yylex
7570#endif
8990e307 7571int
864dbfa3 7572Perl_yywarn(pTHX_ char *s)
8990e307 7573{
faef0170 7574 PL_in_eval |= EVAL_WARNONLY;
748a9306 7575 yyerror(s);
faef0170 7576 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 7577 return 0;
8990e307
LW
7578}
7579
7580int
864dbfa3 7581Perl_yyerror(pTHX_ char *s)
463ee0b2 7582{
68dc0745 7583 char *where = NULL;
7584 char *context = NULL;
7585 int contlen = -1;
46fc3d4c 7586 SV *msg;
463ee0b2 7587
3280af22 7588 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 7589 where = "at EOF";
3280af22
NIS
7590 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 &&
7591 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) {
f355267c
JH
7592 /*
7593 Only for NetWare:
7594 The code below is removed for NetWare because it abends/crashes on NetWare
7595 when the script has error such as not having the closing quotes like:
7596 if ($var eq "value)
7597 Checking of white spaces is anyway done in NetWare code.
7598 */
7599#ifndef NETWARE
3280af22
NIS
7600 while (isSPACE(*PL_oldoldbufptr))
7601 PL_oldoldbufptr++;
f355267c 7602#endif
3280af22
NIS
7603 context = PL_oldoldbufptr;
7604 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 7605 }
3280af22
NIS
7606 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 &&
7607 PL_oldbufptr != PL_bufptr) {
f355267c
JH
7608 /*
7609 Only for NetWare:
7610 The code below is removed for NetWare because it abends/crashes on NetWare
7611 when the script has error such as not having the closing quotes like:
7612 if ($var eq "value)
7613 Checking of white spaces is anyway done in NetWare code.
7614 */
7615#ifndef NETWARE
3280af22
NIS
7616 while (isSPACE(*PL_oldbufptr))
7617 PL_oldbufptr++;
f355267c 7618#endif
3280af22
NIS
7619 context = PL_oldbufptr;
7620 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
7621 }
7622 else if (yychar > 255)
68dc0745 7623 where = "next token ???";
cdfb297e
GS
7624#ifdef USE_PURE_BISON
7625/* GNU Bison sets the value -2 */
7626 else if (yychar == -2) {
7627#else
463ee0b2 7628 else if ((yychar & 127) == 127) {
cdfb297e 7629#endif
3280af22
NIS
7630 if (PL_lex_state == LEX_NORMAL ||
7631 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 7632 where = "at end of line";
3280af22 7633 else if (PL_lex_inpat)
68dc0745 7634 where = "within pattern";
463ee0b2 7635 else
68dc0745 7636 where = "within string";
463ee0b2 7637 }
46fc3d4c 7638 else {
79cb57f6 7639 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
46fc3d4c 7640 if (yychar < 32)
cea2e8a9 7641 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
46fc3d4c 7642 else if (isPRINT_LC(yychar))
cea2e8a9 7643 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
463ee0b2 7644 else
cea2e8a9 7645 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
46fc3d4c 7646 where = SvPVX(where_sv);
463ee0b2 7647 }
46fc3d4c 7648 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 7649 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 7650 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 7651 if (context)
cea2e8a9 7652 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 7653 else
cea2e8a9 7654 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 7655 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 7656 Perl_sv_catpvf(aTHX_ msg,
57def98f 7657 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 7658 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 7659 PL_multi_end = 0;
a0d0e21e 7660 }
faef0170 7661 if (PL_in_eval & EVAL_WARNONLY)
894356b3 7662 Perl_warn(aTHX_ "%"SVf, msg);
463ee0b2 7663 else
5a844595 7664 qerror(msg);
c7d6bfb2
GS
7665 if (PL_error_count >= 10) {
7666 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 7667 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
248c2a4d 7668 ERRSV, OutCopFILE(PL_curcop));
c7d6bfb2
GS
7669 else
7670 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 7671 OutCopFILE(PL_curcop));
c7d6bfb2 7672 }
3280af22
NIS
7673 PL_in_my = 0;
7674 PL_in_my_stash = Nullhv;
463ee0b2
LW
7675 return 0;
7676}
084592ab
CN
7677#ifdef __SC__
7678#pragma segment Main
7679#endif
4e35701f 7680
b250498f 7681STATIC char*
3ae08724 7682S_swallow_bom(pTHX_ U8 *s)
01ec43d0 7683{
b250498f
GS
7684 STRLEN slen;
7685 slen = SvCUR(PL_linestr);
7686 switch (*s) {
4e553d73
NIS
7687 case 0xFF:
7688 if (s[1] == 0xFE) {
01ec43d0 7689 /* UTF-16 little-endian */
3ae08724 7690 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
01ec43d0
GS
7691 Perl_croak(aTHX_ "Unsupported script encoding");
7692#ifndef PERL_NO_UTF16_FILTER
dea0fc0b 7693 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
3ae08724 7694 s += 2;
dea0fc0b
JH
7695 if (PL_bufend > (char*)s) {
7696 U8 *news;
7697 I32 newlen;
7698
7699 filter_add(utf16rev_textfilter, NULL);
7700 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
f72f5f89
JH
7701 PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
7702 PL_bufend - (char*)s - 1,
dea0fc0b
JH
7703 &newlen);
7704 Copy(news, s, newlen, U8);
7705 SvCUR_set(PL_linestr, newlen);
7706 PL_bufend = SvPVX(PL_linestr) + newlen;
7707 news[newlen++] = '\0';
7708 Safefree(news);
7709 }
b250498f 7710#else
01ec43d0 7711 Perl_croak(aTHX_ "Unsupported script encoding");
b250498f 7712#endif
01ec43d0
GS
7713 }
7714 break;
78ae23f5 7715 case 0xFE:
3ae08724 7716 if (s[1] == 0xFF) { /* UTF-16 big-endian */
01ec43d0 7717#ifndef PERL_NO_UTF16_FILTER
dea0fc0b
JH
7718 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
7719 s += 2;
7720 if (PL_bufend > (char *)s) {
7721 U8 *news;
7722 I32 newlen;
7723
7724 filter_add(utf16_textfilter, NULL);
7725 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
7726 PL_bufend = (char*)utf16_to_utf8(s, news,
7727 PL_bufend - (char*)s,
7728 &newlen);
7729 Copy(news, s, newlen, U8);
7730 SvCUR_set(PL_linestr, newlen);
7731 PL_bufend = SvPVX(PL_linestr) + newlen;
7732 news[newlen++] = '\0';
7733 Safefree(news);
7734 }
b250498f 7735#else
01ec43d0 7736 Perl_croak(aTHX_ "Unsupported script encoding");
b250498f 7737#endif
01ec43d0
GS
7738 }
7739 break;
3ae08724
GS
7740 case 0xEF:
7741 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
dea0fc0b 7742 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
01ec43d0
GS
7743 s += 3; /* UTF-8 */
7744 }
7745 break;
7746 case 0:
7747 if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */
3ae08724 7748 s[2] == 0xFE && s[3] == 0xFF)
01ec43d0
GS
7749 {
7750 Perl_croak(aTHX_ "Unsupported script encoding");
7751 }
7752 }
b8f84bb2 7753 return (char*)s;
b250498f 7754}
4755096e 7755
4755096e
GS
7756/*
7757 * restore_rsfp
7758 * Restore a source filter.
7759 */
7760
7761static void
acfe0abc 7762restore_rsfp(pTHX_ void *f)
4755096e
GS
7763{
7764 PerlIO *fp = (PerlIO*)f;
7765
7766 if (PL_rsfp == PerlIO_stdin())
7767 PerlIO_clearerr(PL_rsfp);
7768 else if (PL_rsfp && (PL_rsfp != fp))
7769 PerlIO_close(PL_rsfp);
7770 PL_rsfp = fp;
7771}
6e3aabd6
GS
7772
7773#ifndef PERL_NO_UTF16_FILTER
7774static I32
acfe0abc 7775utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6
GS
7776{
7777 I32 count = FILTER_READ(idx+1, sv, maxlen);
7778 if (count) {
7779 U8* tmps;
7780 U8* tend;
dea0fc0b 7781 I32 newlen;
6e3aabd6 7782 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
f72f5f89
JH
7783 if (!*SvPV_nolen(sv))
7784 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7785 return count;
4e553d73 7786
dea0fc0b 7787 tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
6e3aabd6
GS
7788 sv_usepvn(sv, (char*)tmps, tend - tmps);
7789 }
7790 return count;
7791}
7792
7793static I32
acfe0abc 7794utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6
GS
7795{
7796 I32 count = FILTER_READ(idx+1, sv, maxlen);
7797 if (count) {
7798 U8* tmps;
7799 U8* tend;
dea0fc0b 7800 I32 newlen;
f72f5f89
JH
7801 if (!*SvPV_nolen(sv))
7802 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7803 return count;
7804
6e3aabd6 7805 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
dea0fc0b 7806 tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
6e3aabd6
GS
7807 sv_usepvn(sv, (char*)tmps, tend - tmps);
7808 }
7809 return count;
7810}
7811#endif
9f4817db 7812