This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
'A' is not 65 everywhere.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
be3c0a43 3 * Copyright (c) 1991-2002, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
4ac733c9 29static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 30static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
9059aa12
LW
38#define XFAKEBRACK 128
39#define XENUMMASK 127
40
39e02b42
JH
41#ifdef USE_UTF8_SCRIPTS
42# define UTF (!IN_BYTES)
2b9d42f0 43#else
746b446a 44# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 45#endif
a0ed51b3 46
61f0cdd9 47/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
48 * 1999-02-27 mjd-perl-patch@plover.com */
49#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50
bf4acbe4
GS
51/* On MacOS, respect nonbreaking spaces */
52#ifdef MACOS_TRADITIONAL
53#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54#else
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
56#endif
57
ffb4593c
NT
58/* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
79072805
LW
60 * can get by with a single comparison (if the compiler is smart enough).
61 */
62
fb73857a 63/* #define LEX_NOTPARSING 11 is done in perl.h. */
64
55497cff 65#define LEX_NORMAL 10
66#define LEX_INTERPNORMAL 9
67#define LEX_INTERPCASEMOD 8
68#define LEX_INTERPPUSH 7
69#define LEX_INTERPSTART 6
70#define LEX_INTERPEND 5
71#define LEX_INTERPENDMAYBE 4
72#define LEX_INTERPCONCAT 3
73#define LEX_INTERPCONST 2
74#define LEX_FORMLINE 1
75#define LEX_KNOWNEXT 0
79072805 76
79072805
LW
77#ifdef ff_next
78#undef ff_next
d48672a2
LW
79#endif
80
a1a0e61e 81#ifdef USE_PURE_BISON
dba4d153
JH
82# ifndef YYMAXLEVEL
83# define YYMAXLEVEL 100
84# endif
20141f0e
IRC
85YYSTYPE* yylval_pointer[YYMAXLEVEL];
86int* yychar_pointer[YYMAXLEVEL];
6f202aea 87int yyactlevel = -1;
22c35a8c
GS
88# undef yylval
89# undef yychar
20141f0e
IRC
90# define yylval (*yylval_pointer[yyactlevel])
91# define yychar (*yychar_pointer[yyactlevel])
92# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 93# undef yylex
dba4d153 94# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
95#endif
96
79072805 97#include "keywords.h"
fe14fcc3 98
ffb4593c
NT
99/* CLINE is a macro that ensures PL_copline has a sane value */
100
ae986130
LW
101#ifdef CLINE
102#undef CLINE
103#endif
57843af0 104#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 105
ffb4593c
NT
106/*
107 * Convenience functions to return different tokens and prime the
9cbb5ea2 108 * lexer for the next token. They all take an argument.
ffb4593c
NT
109 *
110 * TOKEN : generic token (used for '(', DOLSHARP, etc)
111 * OPERATOR : generic operator
112 * AOPERATOR : assignment operator
113 * PREBLOCK : beginning the block after an if, while, foreach, ...
114 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
115 * PREREF : *EXPR where EXPR is not a simple identifier
116 * TERM : expression term
117 * LOOPX : loop exiting command (goto, last, dump, etc)
118 * FTST : file test operator
119 * FUN0 : zero-argument function
2d2e263d 120 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
121 * BOop : bitwise or or xor
122 * BAop : bitwise and
123 * SHop : shift operator
124 * PWop : power operator
9cbb5ea2 125 * PMop : pattern-matching operator
ffb4593c
NT
126 * Aop : addition-level operator
127 * Mop : multiplication-level operator
128 * Eop : equality-testing operator
e5edeb50 129 * Rop : relational operator <= != gt
ffb4593c
NT
130 *
131 * Also see LOP and lop() below.
132 */
133
075953c3
JH
134/* Note that REPORT() and REPORT2() will be expressions that supply
135 * their own trailing comma, not suitable for statements as such. */
998054bd 136#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
137# define REPORT(x,retval) tokereport(x,s,(int)retval),
138# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 139#else
075953c3
JH
140# define REPORT(x,retval)
141# define REPORT2(x,retval)
998054bd
SC
142#endif
143
075953c3
JH
144#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
145#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
146#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
147#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
148#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
149#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
150#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
151#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
6f33ba73 152#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERMORDORDOR,PL_bufptr = s,(int)UNIOP)
075953c3
JH
153#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
154#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
155#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
156#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
157#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
158#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
159#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
160#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
161#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
162#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
163#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
a687059c 169 */
6f33ba73 170#define UNI2(f,x) return(yylval.ival = f, \
075953c3 171 REPORT("uni",f) \
6f33ba73 172 PL_expect = x, \
3280af22
NIS
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
175 PL_last_lop_op = f, \
a687059c 176 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
6f33ba73
RGS
177#define UNI(f) UNI2(f,XTERM)
178#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 179
79072805 180#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 181 REPORT("uni",f) \
3280af22
NIS
182 PL_bufptr = s, \
183 PL_last_uni = PL_oldbufptr, \
79072805
LW
184 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
185
9f68db38 186/* grandfather return to old style */
3280af22 187#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 188
8fa7f367
JH
189#ifdef DEBUGGING
190
2d00ba3b 191STATIC void
61b2116b 192S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 193{
998054bd 194 DEBUG_T({
9c5ffd7c 195 SV* report = newSVpv(thing, 0);
29b291f7
RB
196 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
197 (IV)rv);
998054bd
SC
198
199 if (s - PL_bufptr > 0)
200 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
201 else {
202 if (PL_oldbufptr && *PL_oldbufptr)
203 sv_catpv(report, PL_tokenbuf);
204 }
205 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
5f80b19c 206 });
998054bd
SC
207}
208
8fa7f367
JH
209#endif
210
ffb4593c
NT
211/*
212 * S_ao
213 *
c963b151
BD
214 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
215 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
216 */
217
76e3520e 218STATIC int
cea2e8a9 219S_ao(pTHX_ int toketype)
a0d0e21e 220{
3280af22
NIS
221 if (*PL_bufptr == '=') {
222 PL_bufptr++;
a0d0e21e
LW
223 if (toketype == ANDAND)
224 yylval.ival = OP_ANDASSIGN;
225 else if (toketype == OROR)
226 yylval.ival = OP_ORASSIGN;
c963b151
BD
227 else if (toketype == DORDOR)
228 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
229 toketype = ASSIGNOP;
230 }
231 return toketype;
232}
233
ffb4593c
NT
234/*
235 * S_no_op
236 * When Perl expects an operator and finds something else, no_op
237 * prints the warning. It always prints "<something> found where
238 * operator expected. It prints "Missing semicolon on previous line?"
239 * if the surprise occurs at the start of the line. "do you need to
240 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
241 * where the compiler doesn't know if foo is a method call or a function.
242 * It prints "Missing operator before end of line" if there's nothing
243 * after the missing operator, or "... before <...>" if there is something
244 * after the missing operator.
245 */
246
76e3520e 247STATIC void
cea2e8a9 248S_no_op(pTHX_ char *what, char *s)
463ee0b2 249{
3280af22
NIS
250 char *oldbp = PL_bufptr;
251 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 252
1189a94a
GS
253 if (!s)
254 s = oldbp;
07c798fb 255 else
1189a94a 256 PL_bufptr = s;
cea2e8a9 257 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 258 if (is_first)
cea2e8a9 259 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 260 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 261 char *t;
7e2040f0 262 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 263 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 264 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 265 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 266 }
07c798fb
HS
267 else {
268 assert(s >= oldbp);
cea2e8a9 269 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 270 }
3280af22 271 PL_bufptr = oldbp;
8990e307
LW
272}
273
ffb4593c
NT
274/*
275 * S_missingterm
276 * Complain about missing quote/regexp/heredoc terminator.
277 * If it's called with (char *)NULL then it cauterizes the line buffer.
278 * If we're in a delimited string and the delimiter is a control
279 * character, it's reformatted into a two-char sequence like ^C.
280 * This is fatal.
281 */
282
76e3520e 283STATIC void
cea2e8a9 284S_missingterm(pTHX_ char *s)
8990e307
LW
285{
286 char tmpbuf[3];
287 char q;
288 if (s) {
289 char *nl = strrchr(s,'\n');
d2719217 290 if (nl)
8990e307
LW
291 *nl = '\0';
292 }
9d116dd7
JH
293 else if (
294#ifdef EBCDIC
295 iscntrl(PL_multi_close)
296#else
297 PL_multi_close < 32 || PL_multi_close == 127
298#endif
299 ) {
8990e307 300 *tmpbuf = '^';
3280af22 301 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
302 s = "\\n";
303 tmpbuf[2] = '\0';
304 s = tmpbuf;
305 }
306 else {
eb160463 307 *tmpbuf = (char)PL_multi_close;
8990e307
LW
308 tmpbuf[1] = '\0';
309 s = tmpbuf;
310 }
311 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 312 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 313}
79072805 314
ffb4593c
NT
315/*
316 * Perl_deprecate
ffb4593c
NT
317 */
318
79072805 319void
864dbfa3 320Perl_deprecate(pTHX_ char *s)
a0d0e21e 321{
599cee73 322 if (ckWARN(WARN_DEPRECATED))
9014280d 323 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
324}
325
12bcd1a6
PM
326void
327Perl_deprecate_old(pTHX_ char *s)
328{
329 /* This function should NOT be called for any new deprecated warnings */
330 /* Use Perl_deprecate instead */
331 /* */
332 /* It is here to maintain backward compatibility with the pre-5.8 */
333 /* warnings category hierarchy. The "deprecated" category used to */
334 /* live under the "syntax" category. It is now a top-level category */
335 /* in its own right. */
336
337 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
338 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
339 "Use of %s is deprecated", s);
340}
341
ffb4593c
NT
342/*
343 * depcom
9cbb5ea2 344 * Deprecate a comma-less variable list.
ffb4593c
NT
345 */
346
76e3520e 347STATIC void
cea2e8a9 348S_depcom(pTHX)
a0d0e21e 349{
12bcd1a6 350 deprecate_old("comma-less variable list");
a0d0e21e
LW
351}
352
ffb4593c 353/*
9cbb5ea2
GS
354 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
355 * utf16-to-utf8-reversed.
ffb4593c
NT
356 */
357
c39cd008
GS
358#ifdef PERL_CR_FILTER
359static void
360strip_return(SV *sv)
361{
362 register char *s = SvPVX(sv);
363 register char *e = s + SvCUR(sv);
364 /* outer loop optimized to do nothing if there are no CR-LFs */
365 while (s < e) {
366 if (*s++ == '\r' && *s == '\n') {
367 /* hit a CR-LF, need to copy the rest */
368 register char *d = s - 1;
369 *d++ = *s++;
370 while (s < e) {
371 if (*s == '\r' && s[1] == '\n')
372 s++;
373 *d++ = *s++;
374 }
375 SvCUR(sv) -= s - d;
376 return;
377 }
378 }
379}
a868473f 380
76e3520e 381STATIC I32
c39cd008 382S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 383{
c39cd008
GS
384 I32 count = FILTER_READ(idx+1, sv, maxlen);
385 if (count > 0 && !maxlen)
386 strip_return(sv);
387 return count;
a868473f
NIS
388}
389#endif
390
ffb4593c
NT
391/*
392 * Perl_lex_start
9cbb5ea2
GS
393 * Initialize variables. Uses the Perl save_stack to save its state (for
394 * recursive calls to the parser).
ffb4593c
NT
395 */
396
a0d0e21e 397void
864dbfa3 398Perl_lex_start(pTHX_ SV *line)
79072805 399{
8990e307
LW
400 char *s;
401 STRLEN len;
402
3280af22
NIS
403 SAVEI32(PL_lex_dojoin);
404 SAVEI32(PL_lex_brackets);
3280af22
NIS
405 SAVEI32(PL_lex_casemods);
406 SAVEI32(PL_lex_starts);
407 SAVEI32(PL_lex_state);
7766f137 408 SAVEVPTR(PL_lex_inpat);
3280af22 409 SAVEI32(PL_lex_inwhat);
18b09519
GS
410 if (PL_lex_state == LEX_KNOWNEXT) {
411 I32 toke = PL_nexttoke;
412 while (--toke >= 0) {
413 SAVEI32(PL_nexttype[toke]);
414 SAVEVPTR(PL_nextval[toke]);
415 }
416 SAVEI32(PL_nexttoke);
18b09519 417 }
57843af0 418 SAVECOPLINE(PL_curcop);
3280af22
NIS
419 SAVEPPTR(PL_bufptr);
420 SAVEPPTR(PL_bufend);
421 SAVEPPTR(PL_oldbufptr);
422 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
423 SAVEPPTR(PL_last_lop);
424 SAVEPPTR(PL_last_uni);
3280af22
NIS
425 SAVEPPTR(PL_linestart);
426 SAVESPTR(PL_linestr);
8edd5f42
RGS
427 SAVEGENERICPV(PL_lex_brackstack);
428 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 429 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
430 SAVESPTR(PL_lex_stuff);
431 SAVEI32(PL_lex_defer);
09bef843 432 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 433 SAVESPTR(PL_lex_repl);
bebdddfc
GS
434 SAVEINT(PL_expect);
435 SAVEINT(PL_lex_expect);
3280af22
NIS
436
437 PL_lex_state = LEX_NORMAL;
438 PL_lex_defer = 0;
439 PL_expect = XSTATE;
440 PL_lex_brackets = 0;
3280af22
NIS
441 New(899, PL_lex_brackstack, 120, char);
442 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
443 PL_lex_casemods = 0;
444 *PL_lex_casestack = '\0';
445 PL_lex_dojoin = 0;
446 PL_lex_starts = 0;
447 PL_lex_stuff = Nullsv;
448 PL_lex_repl = Nullsv;
449 PL_lex_inpat = 0;
76be56bc 450 PL_nexttoke = 0;
3280af22 451 PL_lex_inwhat = 0;
09bef843 452 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
453 PL_linestr = line;
454 if (SvREADONLY(PL_linestr))
455 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
456 s = SvPV(PL_linestr, len);
6f27f9a7 457 if (!len || s[len-1] != ';') {
3280af22
NIS
458 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
459 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
460 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 461 }
3280af22
NIS
462 SvTEMP_off(PL_linestr);
463 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
464 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 465 PL_last_lop = PL_last_uni = Nullch;
3280af22 466 PL_rsfp = 0;
79072805 467}
a687059c 468
ffb4593c
NT
469/*
470 * Perl_lex_end
9cbb5ea2
GS
471 * Finalizer for lexing operations. Must be called when the parser is
472 * done with the lexer.
ffb4593c
NT
473 */
474
463ee0b2 475void
864dbfa3 476Perl_lex_end(pTHX)
463ee0b2 477{
3280af22 478 PL_doextract = FALSE;
463ee0b2
LW
479}
480
ffb4593c
NT
481/*
482 * S_incline
483 * This subroutine has nothing to do with tilting, whether at windmills
484 * or pinball tables. Its name is short for "increment line". It
57843af0 485 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 486 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
487 * # line 500 "foo.pm"
488 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
489 */
490
76e3520e 491STATIC void
cea2e8a9 492S_incline(pTHX_ char *s)
463ee0b2
LW
493{
494 char *t;
495 char *n;
73659bf1 496 char *e;
463ee0b2 497 char ch;
463ee0b2 498
57843af0 499 CopLINE_inc(PL_curcop);
463ee0b2
LW
500 if (*s++ != '#')
501 return;
bf4acbe4 502 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
503 if (strnEQ(s, "line", 4))
504 s += 4;
505 else
506 return;
084592ab 507 if (SPACE_OR_TAB(*s))
73659bf1 508 s++;
4e553d73 509 else
73659bf1 510 return;
bf4acbe4 511 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
512 if (!isDIGIT(*s))
513 return;
514 n = s;
515 while (isDIGIT(*s))
516 s++;
bf4acbe4 517 while (SPACE_OR_TAB(*s))
463ee0b2 518 s++;
73659bf1 519 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 520 s++;
73659bf1
GS
521 e = t + 1;
522 }
463ee0b2 523 else {
463ee0b2 524 for (t = s; !isSPACE(*t); t++) ;
73659bf1 525 e = t;
463ee0b2 526 }
bf4acbe4 527 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
528 e++;
529 if (*e != '\n' && *e != '\0')
530 return; /* false alarm */
531
463ee0b2
LW
532 ch = *t;
533 *t = '\0';
f4dd75d9 534 if (t - s > 0) {
05ec9bb3 535 CopFILE_free(PL_curcop);
57843af0 536 CopFILE_set(PL_curcop, s);
f4dd75d9 537 }
463ee0b2 538 *t = ch;
57843af0 539 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
540}
541
ffb4593c
NT
542/*
543 * S_skipspace
544 * Called to gobble the appropriate amount and type of whitespace.
545 * Skips comments as well.
546 */
547
76e3520e 548STATIC char *
cea2e8a9 549S_skipspace(pTHX_ register char *s)
a687059c 550{
3280af22 551 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 552 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
553 s++;
554 return s;
555 }
556 for (;;) {
fd049845 557 STRLEN prevlen;
09bef843 558 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 559 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
560 while (s < PL_bufend && isSPACE(*s)) {
561 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
562 incline(s);
563 }
ffb4593c
NT
564
565 /* comment */
3280af22
NIS
566 if (s < PL_bufend && *s == '#') {
567 while (s < PL_bufend && *s != '\n')
463ee0b2 568 s++;
60e6418e 569 if (s < PL_bufend) {
463ee0b2 570 s++;
60e6418e
GS
571 if (PL_in_eval && !PL_rsfp) {
572 incline(s);
573 continue;
574 }
575 }
463ee0b2 576 }
ffb4593c
NT
577
578 /* only continue to recharge the buffer if we're at the end
579 * of the buffer, we're not reading from a source filter, and
580 * we're in normal lexing mode
581 */
09bef843
SB
582 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
583 PL_lex_state == LEX_FORMLINE)
463ee0b2 584 return s;
ffb4593c
NT
585
586 /* try to recharge the buffer */
9cbb5ea2
GS
587 if ((s = filter_gets(PL_linestr, PL_rsfp,
588 (prevlen = SvCUR(PL_linestr)))) == Nullch)
589 {
590 /* end of file. Add on the -p or -n magic */
3280af22
NIS
591 if (PL_minus_n || PL_minus_p) {
592 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
593 ";}continue{print or die qq(-p destination: $!\\n)" :
594 "");
3280af22
NIS
595 sv_catpv(PL_linestr,";}");
596 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
597 }
598 else
3280af22 599 sv_setpv(PL_linestr,";");
ffb4593c
NT
600
601 /* reset variables for next time we lex */
9cbb5ea2
GS
602 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
603 = SvPVX(PL_linestr);
3280af22 604 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 605 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
606
607 /* Close the filehandle. Could be from -P preprocessor,
608 * STDIN, or a regular file. If we were reading code from
609 * STDIN (because the commandline held no -e or filename)
610 * then we don't close it, we reset it so the code can
611 * read from STDIN too.
612 */
613
3280af22
NIS
614 if (PL_preprocess && !PL_in_eval)
615 (void)PerlProc_pclose(PL_rsfp);
616 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
617 PerlIO_clearerr(PL_rsfp);
8990e307 618 else
3280af22
NIS
619 (void)PerlIO_close(PL_rsfp);
620 PL_rsfp = Nullfp;
463ee0b2
LW
621 return s;
622 }
ffb4593c
NT
623
624 /* not at end of file, so we only read another line */
09bef843
SB
625 /* make corresponding updates to old pointers, for yyerror() */
626 oldprevlen = PL_oldbufptr - PL_bufend;
627 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
628 if (PL_last_uni)
629 oldunilen = PL_last_uni - PL_bufend;
630 if (PL_last_lop)
631 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
632 PL_linestart = PL_bufptr = s + prevlen;
633 PL_bufend = s + SvCUR(PL_linestr);
634 s = PL_bufptr;
09bef843
SB
635 PL_oldbufptr = s + oldprevlen;
636 PL_oldoldbufptr = s + oldoldprevlen;
637 if (PL_last_uni)
638 PL_last_uni = s + oldunilen;
639 if (PL_last_lop)
640 PL_last_lop = s + oldloplen;
a0d0e21e 641 incline(s);
ffb4593c
NT
642
643 /* debugger active and we're not compiling the debugger code,
644 * so store the line into the debugger's array of lines
645 */
3280af22 646 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
647 SV *sv = NEWSV(85,0);
648
649 sv_upgrade(sv, SVt_PVMG);
3280af22 650 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
651 (void)SvIOK_on(sv);
652 SvIVX(sv) = 0;
57843af0 653 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 654 }
463ee0b2 655 }
a687059c 656}
378cc40b 657
ffb4593c
NT
658/*
659 * S_check_uni
660 * Check the unary operators to ensure there's no ambiguity in how they're
661 * used. An ambiguous piece of code would be:
662 * rand + 5
663 * This doesn't mean rand() + 5. Because rand() is a unary operator,
664 * the +5 is its argument.
665 */
666
76e3520e 667STATIC void
cea2e8a9 668S_check_uni(pTHX)
ba106d47 669{
2f3197b3 670 char *s;
a0d0e21e 671 char *t;
2f3197b3 672
3280af22 673 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 674 return;
3280af22
NIS
675 while (isSPACE(*PL_last_uni))
676 PL_last_uni++;
7e2040f0 677 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 678 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 679 return;
0453d815 680 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 681 char ch = *s;
0453d815 682 *s = '\0';
9014280d 683 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4e553d73 684 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
685 PL_last_uni);
686 *s = ch;
687 }
2f3197b3
LW
688}
689
ffb4593c
NT
690/*
691 * LOP : macro to build a list operator. Its behaviour has been replaced
692 * with a subroutine, S_lop() for which LOP is just another name.
693 */
694
a0d0e21e
LW
695#define LOP(f,x) return lop(f,x,s)
696
ffb4593c
NT
697/*
698 * S_lop
699 * Build a list operator (or something that might be one). The rules:
700 * - if we have a next token, then it's a list operator [why?]
701 * - if the next thing is an opening paren, then it's a function
702 * - else it's a list operator
703 */
704
76e3520e 705STATIC I32
a0be28da 706S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 707{
79072805 708 yylval.ival = f;
35c8bce7 709 CLINE;
075953c3 710 REPORT("lop", f)
3280af22
NIS
711 PL_expect = x;
712 PL_bufptr = s;
713 PL_last_lop = PL_oldbufptr;
eb160463 714 PL_last_lop_op = (OPCODE)f;
3280af22 715 if (PL_nexttoke)
a0d0e21e 716 return LSTOP;
79072805
LW
717 if (*s == '(')
718 return FUNC;
719 s = skipspace(s);
720 if (*s == '(')
721 return FUNC;
722 else
723 return LSTOP;
724}
725
ffb4593c
NT
726/*
727 * S_force_next
9cbb5ea2 728 * When the lexer realizes it knows the next token (for instance,
ffb4593c 729 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
730 * to know what token to return the next time the lexer is called. Caller
731 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
732 * handles the token correctly.
ffb4593c
NT
733 */
734
4e553d73 735STATIC void
cea2e8a9 736S_force_next(pTHX_ I32 type)
79072805 737{
3280af22
NIS
738 PL_nexttype[PL_nexttoke] = type;
739 PL_nexttoke++;
740 if (PL_lex_state != LEX_KNOWNEXT) {
741 PL_lex_defer = PL_lex_state;
742 PL_lex_expect = PL_expect;
743 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
744 }
745}
746
ffb4593c
NT
747/*
748 * S_force_word
749 * When the lexer knows the next thing is a word (for instance, it has
750 * just seen -> and it knows that the next char is a word char, then
751 * it calls S_force_word to stick the next word into the PL_next lookahead.
752 *
753 * Arguments:
b1b65b59 754 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
755 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
756 * int check_keyword : if true, Perl checks to make sure the word isn't
757 * a keyword (do this if the word is a label, e.g. goto FOO)
758 * int allow_pack : if true, : characters will also be allowed (require,
759 * use, etc. do this)
9cbb5ea2 760 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
761 */
762
76e3520e 763STATIC char *
cea2e8a9 764S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 765{
463ee0b2
LW
766 register char *s;
767 STRLEN len;
4e553d73 768
463ee0b2
LW
769 start = skipspace(start);
770 s = start;
7e2040f0 771 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 772 (allow_pack && *s == ':') ||
15f0808c 773 (allow_initial_tick && *s == '\'') )
a0d0e21e 774 {
3280af22
NIS
775 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
776 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
777 return start;
778 if (token == METHOD) {
779 s = skipspace(s);
780 if (*s == '(')
3280af22 781 PL_expect = XTERM;
463ee0b2 782 else {
3280af22 783 PL_expect = XOPERATOR;
463ee0b2 784 }
79072805 785 }
3280af22
NIS
786 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
787 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
788 force_next(token);
789 }
790 return s;
791}
792
ffb4593c
NT
793/*
794 * S_force_ident
9cbb5ea2 795 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
796 * text only contains the "foo" portion. The first argument is a pointer
797 * to the "foo", and the second argument is the type symbol to prefix.
798 * Forces the next token to be a "WORD".
9cbb5ea2 799 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
800 */
801
76e3520e 802STATIC void
cea2e8a9 803S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
804{
805 if (s && *s) {
11343788 806 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 807 PL_nextval[PL_nexttoke].opval = o;
79072805 808 force_next(WORD);
748a9306 809 if (kind) {
11343788 810 o->op_private = OPpCONST_ENTERED;
55497cff 811 /* XXX see note in pp_entereval() for why we forgo typo
812 warnings if the symbol must be introduced in an eval.
813 GSAR 96-10-12 */
3280af22 814 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
815 kind == '$' ? SVt_PV :
816 kind == '@' ? SVt_PVAV :
817 kind == '%' ? SVt_PVHV :
818 SVt_PVGV
819 );
748a9306 820 }
79072805
LW
821 }
822}
823
1571675a
GS
824NV
825Perl_str_to_version(pTHX_ SV *sv)
826{
827 NV retval = 0.0;
828 NV nshift = 1.0;
829 STRLEN len;
830 char *start = SvPVx(sv,len);
3aa33fe5 831 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
832 char *end = start + len;
833 while (start < end) {
ba210ebe 834 STRLEN skip;
1571675a
GS
835 UV n;
836 if (utf)
9041c2e3 837 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
838 else {
839 n = *(U8*)start;
840 skip = 1;
841 }
842 retval += ((NV)n)/nshift;
843 start += skip;
844 nshift *= 1000;
845 }
846 return retval;
847}
848
4e553d73 849/*
ffb4593c
NT
850 * S_force_version
851 * Forces the next token to be a version number.
e759cc13
RGS
852 * If the next token appears to be an invalid version number, (e.g. "v2b"),
853 * and if "guessing" is TRUE, then no new token is created (and the caller
854 * must use an alternative parsing method).
ffb4593c
NT
855 */
856
76e3520e 857STATIC char *
e759cc13 858S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 859{
860 OP *version = Nullop;
44dcb63b 861 char *d;
89bfa8cd 862
863 s = skipspace(s);
864
44dcb63b 865 d = s;
dd629d5b 866 if (*d == 'v')
44dcb63b 867 d++;
44dcb63b 868 if (isDIGIT(*d)) {
e759cc13
RGS
869 while (isDIGIT(*d) || *d == '_' || *d == '.')
870 d++;
9f3d182e 871 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 872 SV *ver;
b73d6f50 873 s = scan_num(s, &yylval);
89bfa8cd 874 version = yylval.opval;
dd629d5b
GS
875 ver = cSVOPx(version)->op_sv;
876 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 877 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
878 SvNVX(ver) = str_to_version(ver);
879 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 880 }
89bfa8cd 881 }
e759cc13
RGS
882 else if (guessing)
883 return s;
89bfa8cd 884 }
885
886 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 887 PL_nextval[PL_nexttoke].opval = version;
4e553d73 888 force_next(WORD);
89bfa8cd 889
e759cc13 890 return s;
89bfa8cd 891}
892
ffb4593c
NT
893/*
894 * S_tokeq
895 * Tokenize a quoted string passed in as an SV. It finds the next
896 * chunk, up to end of string or a backslash. It may make a new
897 * SV containing that chunk (if HINT_NEW_STRING is on). It also
898 * turns \\ into \.
899 */
900
76e3520e 901STATIC SV *
cea2e8a9 902S_tokeq(pTHX_ SV *sv)
79072805
LW
903{
904 register char *s;
905 register char *send;
906 register char *d;
b3ac6de7
IZ
907 STRLEN len = 0;
908 SV *pv = sv;
79072805
LW
909
910 if (!SvLEN(sv))
b3ac6de7 911 goto finish;
79072805 912
a0d0e21e 913 s = SvPV_force(sv, len);
21a311ee 914 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 915 goto finish;
463ee0b2 916 send = s + len;
79072805
LW
917 while (s < send && *s != '\\')
918 s++;
919 if (s == send)
b3ac6de7 920 goto finish;
79072805 921 d = s;
be4731d2 922 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 923 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
924 if (SvUTF8(sv))
925 SvUTF8_on(pv);
926 }
79072805
LW
927 while (s < send) {
928 if (*s == '\\') {
a0d0e21e 929 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
930 s++; /* all that, just for this */
931 }
932 *d++ = *s++;
933 }
934 *d = '\0';
463ee0b2 935 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 936 finish:
3280af22 937 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 938 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
939 return sv;
940}
941
ffb4593c
NT
942/*
943 * Now come three functions related to double-quote context,
944 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
945 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
946 * interact with PL_lex_state, and create fake ( ... ) argument lists
947 * to handle functions and concatenation.
948 * They assume that whoever calls them will be setting up a fake
949 * join call, because each subthing puts a ',' after it. This lets
950 * "lower \luPpEr"
951 * become
952 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
953 *
954 * (I'm not sure whether the spurious commas at the end of lcfirst's
955 * arguments and join's arguments are created or not).
956 */
957
958/*
959 * S_sublex_start
960 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
961 *
962 * Pattern matching will set PL_lex_op to the pattern-matching op to
963 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
964 *
965 * OP_CONST and OP_READLINE are easy--just make the new op and return.
966 *
967 * Everything else becomes a FUNC.
968 *
969 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
970 * had an OP_CONST or OP_READLINE). This just sets us up for a
971 * call to S_sublex_push().
972 */
973
76e3520e 974STATIC I32
cea2e8a9 975S_sublex_start(pTHX)
79072805
LW
976{
977 register I32 op_type = yylval.ival;
79072805
LW
978
979 if (op_type == OP_NULL) {
3280af22
NIS
980 yylval.opval = PL_lex_op;
981 PL_lex_op = Nullop;
79072805
LW
982 return THING;
983 }
984 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 985 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
986
987 if (SvTYPE(sv) == SVt_PVIV) {
988 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
989 STRLEN len;
990 char *p;
991 SV *nsv;
992
993 p = SvPV(sv, len);
79cb57f6 994 nsv = newSVpvn(p, len);
01ec43d0
GS
995 if (SvUTF8(sv))
996 SvUTF8_on(nsv);
b3ac6de7
IZ
997 SvREFCNT_dec(sv);
998 sv = nsv;
4e553d73 999 }
b3ac6de7 1000 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1001 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1002 /* Allow <FH> // "foo" */
1003 if (op_type == OP_READLINE)
1004 PL_expect = XTERMORDORDOR;
79072805
LW
1005 return THING;
1006 }
1007
3280af22
NIS
1008 PL_sublex_info.super_state = PL_lex_state;
1009 PL_sublex_info.sub_inwhat = op_type;
1010 PL_sublex_info.sub_op = PL_lex_op;
1011 PL_lex_state = LEX_INTERPPUSH;
55497cff 1012
3280af22
NIS
1013 PL_expect = XTERM;
1014 if (PL_lex_op) {
1015 yylval.opval = PL_lex_op;
1016 PL_lex_op = Nullop;
55497cff 1017 return PMFUNC;
1018 }
1019 else
1020 return FUNC;
1021}
1022
ffb4593c
NT
1023/*
1024 * S_sublex_push
1025 * Create a new scope to save the lexing state. The scope will be
1026 * ended in S_sublex_done. Returns a '(', starting the function arguments
1027 * to the uc, lc, etc. found before.
1028 * Sets PL_lex_state to LEX_INTERPCONCAT.
1029 */
1030
76e3520e 1031STATIC I32
cea2e8a9 1032S_sublex_push(pTHX)
55497cff 1033{
f46d017c 1034 ENTER;
55497cff 1035
3280af22
NIS
1036 PL_lex_state = PL_sublex_info.super_state;
1037 SAVEI32(PL_lex_dojoin);
1038 SAVEI32(PL_lex_brackets);
3280af22
NIS
1039 SAVEI32(PL_lex_casemods);
1040 SAVEI32(PL_lex_starts);
1041 SAVEI32(PL_lex_state);
7766f137 1042 SAVEVPTR(PL_lex_inpat);
3280af22 1043 SAVEI32(PL_lex_inwhat);
57843af0 1044 SAVECOPLINE(PL_curcop);
3280af22 1045 SAVEPPTR(PL_bufptr);
8452ff4b 1046 SAVEPPTR(PL_bufend);
3280af22
NIS
1047 SAVEPPTR(PL_oldbufptr);
1048 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1049 SAVEPPTR(PL_last_lop);
1050 SAVEPPTR(PL_last_uni);
3280af22
NIS
1051 SAVEPPTR(PL_linestart);
1052 SAVESPTR(PL_linestr);
8edd5f42
RGS
1053 SAVEGENERICPV(PL_lex_brackstack);
1054 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1055
1056 PL_linestr = PL_lex_stuff;
1057 PL_lex_stuff = Nullsv;
1058
9cbb5ea2
GS
1059 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1060 = SvPVX(PL_linestr);
3280af22 1061 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1062 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1063 SAVEFREESV(PL_linestr);
1064
1065 PL_lex_dojoin = FALSE;
1066 PL_lex_brackets = 0;
3280af22
NIS
1067 New(899, PL_lex_brackstack, 120, char);
1068 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
1069 PL_lex_casemods = 0;
1070 *PL_lex_casestack = '\0';
1071 PL_lex_starts = 0;
1072 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1073 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1074
1075 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1076 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1077 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1078 else
3280af22 1079 PL_lex_inpat = Nullop;
79072805 1080
55497cff 1081 return '(';
79072805
LW
1082}
1083
ffb4593c
NT
1084/*
1085 * S_sublex_done
1086 * Restores lexer state after a S_sublex_push.
1087 */
1088
76e3520e 1089STATIC I32
cea2e8a9 1090S_sublex_done(pTHX)
79072805 1091{
3280af22 1092 if (!PL_lex_starts++) {
9aa983d2
JH
1093 SV *sv = newSVpvn("",0);
1094 if (SvUTF8(PL_linestr))
1095 SvUTF8_on(sv);
3280af22 1096 PL_expect = XOPERATOR;
9aa983d2 1097 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1098 return THING;
1099 }
1100
3280af22
NIS
1101 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1102 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1103 return yylex();
79072805
LW
1104 }
1105
ffb4593c 1106 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1107 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1108 PL_linestr = PL_lex_repl;
1109 PL_lex_inpat = 0;
1110 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1111 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1112 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1113 SAVEFREESV(PL_linestr);
1114 PL_lex_dojoin = FALSE;
1115 PL_lex_brackets = 0;
3280af22
NIS
1116 PL_lex_casemods = 0;
1117 *PL_lex_casestack = '\0';
1118 PL_lex_starts = 0;
25da4f38 1119 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1120 PL_lex_state = LEX_INTERPNORMAL;
1121 PL_lex_starts++;
e9fa98b2
HS
1122 /* we don't clear PL_lex_repl here, so that we can check later
1123 whether this is an evalled subst; that means we rely on the
1124 logic to ensure sublex_done() is called again only via the
1125 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1126 }
e9fa98b2 1127 else {
3280af22 1128 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1129 PL_lex_repl = Nullsv;
1130 }
79072805 1131 return ',';
ffed7fef
LW
1132 }
1133 else {
f46d017c 1134 LEAVE;
3280af22
NIS
1135 PL_bufend = SvPVX(PL_linestr);
1136 PL_bufend += SvCUR(PL_linestr);
1137 PL_expect = XOPERATOR;
09bef843 1138 PL_sublex_info.sub_inwhat = 0;
79072805 1139 return ')';
ffed7fef
LW
1140 }
1141}
1142
02aa26ce
NT
1143/*
1144 scan_const
1145
1146 Extracts a pattern, double-quoted string, or transliteration. This
1147 is terrifying code.
1148
3280af22
NIS
1149 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1150 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1151 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1152
9b599b2a
GS
1153 Returns a pointer to the character scanned up to. Iff this is
1154 advanced from the start pointer supplied (ie if anything was
1155 successfully parsed), will leave an OP for the substring scanned
1156 in yylval. Caller must intuit reason for not parsing further
1157 by looking at the next characters herself.
1158
02aa26ce
NT
1159 In patterns:
1160 backslashes:
1161 double-quoted style: \r and \n
1162 regexp special ones: \D \s
1163 constants: \x3
1164 backrefs: \1 (deprecated in substitution replacements)
1165 case and quoting: \U \Q \E
1166 stops on @ and $, but not for $ as tail anchor
1167
1168 In transliterations:
1169 characters are VERY literal, except for - not at the start or end
1170 of the string, which indicates a range. scan_const expands the
1171 range to the full set of intermediate characters.
1172
1173 In double-quoted strings:
1174 backslashes:
1175 double-quoted style: \r and \n
1176 constants: \x3
1177 backrefs: \1 (deprecated)
1178 case and quoting: \U \Q \E
1179 stops on @ and $
1180
1181 scan_const does *not* construct ops to handle interpolated strings.
1182 It stops processing as soon as it finds an embedded $ or @ variable
1183 and leaves it to the caller to work out what's going on.
1184
da6eedaa 1185 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1186
1187 $ in pattern could be $foo or could be tail anchor. Assumption:
1188 it's a tail anchor if $ is the last thing in the string, or if it's
1189 followed by one of ")| \n\t"
1190
1191 \1 (backreferences) are turned into $1
1192
1193 The structure of the code is
1194 while (there's a character to process) {
1195 handle transliteration ranges
1196 skip regexp comments
1197 skip # initiated comments in //x patterns
1198 check for embedded @foo
1199 check for embedded scalars
1200 if (backslash) {
1201 leave intact backslashes from leave (below)
1202 deprecate \1 in strings and sub replacements
1203 handle string-changing backslashes \l \U \Q \E, etc.
1204 switch (what was escaped) {
1205 handle - in a transliteration (becomes a literal -)
1206 handle \132 octal characters
1207 handle 0x15 hex characters
1208 handle \cV (control V)
1209 handle printf backslashes (\f, \r, \n, etc)
1210 } (end switch)
1211 } (end if backslash)
1212 } (end while character to read)
4e553d73 1213
02aa26ce
NT
1214*/
1215
76e3520e 1216STATIC char *
cea2e8a9 1217S_scan_const(pTHX_ char *start)
79072805 1218{
3280af22 1219 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1220 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1221 register char *s = start; /* start of the constant */
1222 register char *d = SvPVX(sv); /* destination for copies */
1223 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1224 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1225 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1226 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1227 UV uv;
1228
dff6d3cd 1229 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1230 PL_lex_inpat
4a2d328f 1231 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1232 : "";
79072805 1233
2b9d42f0
NIS
1234 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1235 /* If we are doing a trans and we know we want UTF8 set expectation */
1236 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1237 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1238 }
1239
1240
79072805 1241 while (s < send || dorange) {
02aa26ce 1242 /* get transliterations out of the way (they're most literal) */
3280af22 1243 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1244 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1245 if (dorange) {
1ba5c669
JH
1246 I32 i; /* current expanded character */
1247 I32 min; /* first character in range */
1248 I32 max; /* last character in range */
02aa26ce 1249
2b9d42f0 1250 if (has_utf8) {
8973db79
JH
1251 char *c = (char*)utf8_hop((U8*)d, -1);
1252 char *e = d++;
1253 while (e-- > c)
1254 *(e + 1) = *e;
25716404 1255 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1256 /* mark the range as done, and continue */
1257 dorange = FALSE;
1258 didrange = TRUE;
1259 continue;
1260 }
2b9d42f0 1261
02aa26ce 1262 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1263 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1264 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1265 d -= 2; /* eat the first char and the - */
1266
8ada0baa
JH
1267 min = (U8)*d; /* first char in range */
1268 max = (U8)d[1]; /* last char in range */
1269
c2e66d9e 1270 if (min > max) {
01ec43d0 1271 Perl_croak(aTHX_
d1573ac7 1272 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1273 (char)min, (char)max);
c2e66d9e
GS
1274 }
1275
c7f1f016 1276#ifdef EBCDIC
8ada0baa
JH
1277 if ((isLOWER(min) && isLOWER(max)) ||
1278 (isUPPER(min) && isUPPER(max))) {
1279 if (isLOWER(min)) {
1280 for (i = min; i <= max; i++)
1281 if (isLOWER(i))
db42d148 1282 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1283 } else {
1284 for (i = min; i <= max; i++)
1285 if (isUPPER(i))
db42d148 1286 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1287 }
1288 }
1289 else
1290#endif
1291 for (i = min; i <= max; i++)
eb160463 1292 *d++ = (char)i;
02aa26ce
NT
1293
1294 /* mark the range as done, and continue */
79072805 1295 dorange = FALSE;
01ec43d0 1296 didrange = TRUE;
79072805 1297 continue;
4e553d73 1298 }
02aa26ce
NT
1299
1300 /* range begins (ignore - as first or last char) */
79072805 1301 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1302 if (didrange) {
1fafa243 1303 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1304 }
2b9d42f0 1305 if (has_utf8) {
25716404 1306 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1307 s++;
1308 continue;
1309 }
79072805
LW
1310 dorange = TRUE;
1311 s++;
01ec43d0
GS
1312 }
1313 else {
1314 didrange = FALSE;
1315 }
79072805 1316 }
02aa26ce
NT
1317
1318 /* if we get here, we're not doing a transliteration */
1319
0f5d15d6
IZ
1320 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1321 except for the last char, which will be done separately. */
3280af22 1322 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1323 if (s[2] == '#') {
1324 while (s < send && *s != ')')
db42d148 1325 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1326 }
1327 else if (s[2] == '{' /* This should match regcomp.c */
1328 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1329 {
cc6b7395 1330 I32 count = 1;
0f5d15d6 1331 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1332 char c;
1333
d9f97599
GS
1334 while (count && (c = *regparse)) {
1335 if (c == '\\' && regparse[1])
1336 regparse++;
4e553d73 1337 else if (c == '{')
cc6b7395 1338 count++;
4e553d73 1339 else if (c == '}')
cc6b7395 1340 count--;
d9f97599 1341 regparse++;
cc6b7395 1342 }
5bdf89e7
IZ
1343 if (*regparse != ')') {
1344 regparse--; /* Leave one char for continuation. */
cc6b7395 1345 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1346 }
0f5d15d6 1347 while (s < regparse)
db42d148 1348 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1349 }
748a9306 1350 }
02aa26ce
NT
1351
1352 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1353 else if (*s == '#' && PL_lex_inpat &&
1354 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1355 while (s+1 < send && *s != '\n')
db42d148 1356 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1357 }
02aa26ce 1358
5d1d4326 1359 /* check for embedded arrays
da6eedaa 1360 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1361 */
7e2040f0 1362 else if (*s == '@' && s[1]
5d1d4326 1363 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1364 break;
02aa26ce
NT
1365
1366 /* check for embedded scalars. only stop if we're sure it's a
1367 variable.
1368 */
79072805 1369 else if (*s == '$') {
3280af22 1370 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1371 break;
6002328a 1372 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1373 break; /* in regexp, $ might be tail anchor */
1374 }
02aa26ce 1375
2b9d42f0
NIS
1376 /* End of else if chain - OP_TRANS rejoin rest */
1377
02aa26ce 1378 /* backslashes */
79072805
LW
1379 if (*s == '\\' && s+1 < send) {
1380 s++;
02aa26ce
NT
1381
1382 /* some backslashes we leave behind */
c9f97d15 1383 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1384 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1385 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1386 continue;
1387 }
02aa26ce
NT
1388
1389 /* deprecate \1 in strings and substitution replacements */
3280af22 1390 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1391 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1392 {
599cee73 1393 if (ckWARN(WARN_SYNTAX))
9014280d 1394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1395 *--s = '$';
1396 break;
1397 }
02aa26ce
NT
1398
1399 /* string-change backslash escapes */
3280af22 1400 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1401 --s;
1402 break;
1403 }
02aa26ce
NT
1404
1405 /* if we get here, it's either a quoted -, or a digit */
79072805 1406 switch (*s) {
02aa26ce
NT
1407
1408 /* quoted - in transliterations */
79072805 1409 case '-':
3280af22 1410 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1411 *d++ = *s++;
1412 continue;
1413 }
1414 /* FALL THROUGH */
1415 default:
11b8faa4 1416 {
707afd92
MS
1417 if (ckWARN(WARN_MISC) &&
1418 isALNUM(*s) &&
1419 *s != '_')
9014280d 1420 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1421 "Unrecognized escape \\%c passed through",
1422 *s);
1423 /* default action is to copy the quoted character */
f9a63242 1424 goto default_action;
11b8faa4 1425 }
02aa26ce
NT
1426
1427 /* \132 indicates an octal constant */
79072805
LW
1428 case '0': case '1': case '2': case '3':
1429 case '4': case '5': case '6': case '7':
ba210ebe 1430 {
53305cf1
NC
1431 I32 flags = 0;
1432 STRLEN len = 3;
1433 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1434 s += len;
1435 }
012bcf8d 1436 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1437
1438 /* \x24 indicates a hex constant */
79072805 1439 case 'x':
a0ed51b3
LW
1440 ++s;
1441 if (*s == '{') {
1442 char* e = strchr(s, '}');
a4c04bdc
NC
1443 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1444 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1445 STRLEN len;
355860ce 1446
53305cf1 1447 ++s;
adaeee49 1448 if (!e) {
a0ed51b3 1449 yyerror("Missing right brace on \\x{}");
355860ce 1450 continue;
ba210ebe 1451 }
53305cf1
NC
1452 len = e - s;
1453 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1454 s = e + 1;
a0ed51b3
LW
1455 }
1456 else {
ba210ebe 1457 {
53305cf1 1458 STRLEN len = 2;
a4c04bdc 1459 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1460 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1461 s += len;
1462 }
012bcf8d
GS
1463 }
1464
1465 NUM_ESCAPE_INSERT:
1466 /* Insert oct or hex escaped character.
301d3d20 1467 * There will always enough room in sv since such
db42d148 1468 * escapes will be longer than any UTF-8 sequence
301d3d20 1469 * they can end up as. */
ba7cea30 1470
c7f1f016
NIS
1471 /* We need to map to chars to ASCII before doing the tests
1472 to cover EBCDIC
1473 */
c4d5f83a 1474 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1475 if (!has_utf8 && uv > 255) {
301d3d20
JH
1476 /* Might need to recode whatever we have
1477 * accumulated so far if it contains any
1478 * hibit chars.
1479 *
1480 * (Can't we keep track of that and avoid
1481 * this rescan? --jhi)
012bcf8d 1482 */
c7f1f016 1483 int hicount = 0;
63cd0674
NIS
1484 U8 *c;
1485 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1486 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1487 hicount++;
db42d148 1488 }
012bcf8d 1489 }
63cd0674 1490 if (hicount) {
db42d148
NIS
1491 STRLEN offset = d - SvPVX(sv);
1492 U8 *src, *dst;
1493 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1494 src = (U8 *)d - 1;
1495 dst = src+hicount;
1496 d += hicount;
1497 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1498 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1499 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1500 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1501 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1502 }
1503 else {
63cd0674 1504 *dst-- = *src;
012bcf8d 1505 }
c7f1f016 1506 src--;
012bcf8d
GS
1507 }
1508 }
1509 }
1510
9aa983d2 1511 if (has_utf8 || uv > 255) {
9041c2e3 1512 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1513 has_utf8 = TRUE;
f9a63242
JH
1514 if (PL_lex_inwhat == OP_TRANS &&
1515 PL_sublex_info.sub_op) {
1516 PL_sublex_info.sub_op->op_private |=
1517 (PL_lex_repl ? OPpTRANS_FROM_UTF
1518 : OPpTRANS_TO_UTF);
f9a63242 1519 }
012bcf8d 1520 }
a0ed51b3 1521 else {
012bcf8d 1522 *d++ = (char)uv;
a0ed51b3 1523 }
012bcf8d
GS
1524 }
1525 else {
c4d5f83a 1526 *d++ = (char) uv;
a0ed51b3 1527 }
79072805 1528 continue;
02aa26ce 1529
b239daa5 1530 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1531 case 'N':
55eda711 1532 ++s;
423cee85
JH
1533 if (*s == '{') {
1534 char* e = strchr(s, '}');
155aba94 1535 SV *res;
423cee85
JH
1536 STRLEN len;
1537 char *str;
4e553d73 1538
423cee85 1539 if (!e) {
5777a3f7 1540 yyerror("Missing right brace on \\N{}");
423cee85
JH
1541 e = s - 1;
1542 goto cont_scan;
1543 }
dbc0d4f2
JH
1544 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1545 /* \N{U+...} */
1546 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1547 PERL_SCAN_DISALLOW_PREFIX;
1548 s += 3;
1549 len = e - s;
1550 uv = grok_hex(s, &len, &flags, NULL);
1551 s = e + 1;
1552 goto NUM_ESCAPE_INSERT;
1553 }
55eda711
JH
1554 res = newSVpvn(s + 1, e - s - 1);
1555 res = new_constant( Nullch, 0, "charnames",
1556 res, Nullsv, "\\N{...}" );
f9a63242
JH
1557 if (has_utf8)
1558 sv_utf8_upgrade(res);
423cee85 1559 str = SvPV(res,len);
1c47067b
JH
1560#ifdef EBCDIC_NEVER_MIND
1561 /* charnames uses pack U and that has been
1562 * recently changed to do the below uni->native
1563 * mapping, so this would be redundant (and wrong,
1564 * the code point would be doubly converted).
1565 * But leave this in just in case the pack U change
1566 * gets revoked, but the semantics is still
1567 * desireable for charnames. --jhi */
cddc7ef4
JH
1568 {
1569 UV uv = utf8_to_uvchr((U8*)str, 0);
1570
1571 if (uv < 0x100) {
1572 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1573
1574 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1575 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1576 str = SvPV(res, len);
1577 }
1578 }
1579#endif
89491803 1580 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1581 char *ostart = SvPVX(sv);
1582 SvCUR_set(sv, d - ostart);
1583 SvPOK_on(sv);
e4f3eed8 1584 *d = '\0';
f08d6ad9 1585 sv_utf8_upgrade(sv);
d2f449dd 1586 /* this just broke our allocation above... */
eb160463 1587 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1588 d = SvPVX(sv) + SvCUR(sv);
89491803 1589 has_utf8 = TRUE;
f08d6ad9 1590 }
eb160463 1591 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1592 char *odest = SvPVX(sv);
1593
8973db79 1594 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1595 d = SvPVX(sv) + (d - odest);
1596 }
1597 Copy(str, d, len, char);
1598 d += len;
1599 SvREFCNT_dec(res);
1600 cont_scan:
1601 s = e + 1;
1602 }
1603 else
5777a3f7 1604 yyerror("Missing braces on \\N{}");
423cee85
JH
1605 continue;
1606
02aa26ce 1607 /* \c is a control character */
79072805
LW
1608 case 'c':
1609 s++;
961ce445 1610 if (s < send) {
ba210ebe 1611 U8 c = *s++;
c7f1f016
NIS
1612#ifdef EBCDIC
1613 if (isLOWER(c))
1614 c = toUPPER(c);
1615#endif
db42d148 1616 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1617 }
961ce445
RGS
1618 else {
1619 yyerror("Missing control char name in \\c");
1620 }
79072805 1621 continue;
02aa26ce
NT
1622
1623 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1624 case 'b':
db42d148 1625 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1626 break;
1627 case 'n':
db42d148 1628 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1629 break;
1630 case 'r':
db42d148 1631 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1632 break;
1633 case 'f':
db42d148 1634 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1635 break;
1636 case 't':
db42d148 1637 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1638 break;
34a3fe2a 1639 case 'e':
db42d148 1640 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1641 break;
1642 case 'a':
db42d148 1643 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1644 break;
02aa26ce
NT
1645 } /* end switch */
1646
79072805
LW
1647 s++;
1648 continue;
02aa26ce
NT
1649 } /* end if (backslash) */
1650
f9a63242 1651 default_action:
2b9d42f0
NIS
1652 /* If we started with encoded form, or already know we want it
1653 and then encode the next character */
1654 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1655 STRLEN len = 1;
1656 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1657 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1658 s += len;
1659 if (need > len) {
1660 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1661 STRLEN off = d - SvPVX(sv);
1662 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1663 }
1664 d = (char*)uvchr_to_utf8((U8*)d, uv);
1665 has_utf8 = TRUE;
1666 }
1667 else {
1668 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1669 }
02aa26ce
NT
1670 } /* while loop to process each character */
1671
1672 /* terminate the string and set up the sv */
79072805 1673 *d = '\0';
463ee0b2 1674 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1675 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1676 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1677
79072805 1678 SvPOK_on(sv);
9f4817db 1679 if (PL_encoding && !has_utf8) {
d0063567
DK
1680 sv_recode_to_utf8(sv, PL_encoding);
1681 if (SvUTF8(sv))
1682 has_utf8 = TRUE;
9f4817db 1683 }
2b9d42f0 1684 if (has_utf8) {
7e2040f0 1685 SvUTF8_on(sv);
2b9d42f0 1686 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1687 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1688 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1689 }
1690 }
79072805 1691
02aa26ce 1692 /* shrink the sv if we allocated more than we used */
79072805
LW
1693 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1694 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1695 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1696 }
02aa26ce 1697
9b599b2a 1698 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1699 if (s > PL_bufptr) {
1700 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1701 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1702 sv, Nullsv,
4e553d73 1703 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1704 ? "tr"
3280af22 1705 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1706 ? "s"
1707 : "qq")));
79072805 1708 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1709 } else
8990e307 1710 SvREFCNT_dec(sv);
79072805
LW
1711 return s;
1712}
1713
ffb4593c
NT
1714/* S_intuit_more
1715 * Returns TRUE if there's more to the expression (e.g., a subscript),
1716 * FALSE otherwise.
ffb4593c
NT
1717 *
1718 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1719 *
1720 * ->[ and ->{ return TRUE
1721 * { and [ outside a pattern are always subscripts, so return TRUE
1722 * if we're outside a pattern and it's not { or [, then return FALSE
1723 * if we're in a pattern and the first char is a {
1724 * {4,5} (any digits around the comma) returns FALSE
1725 * if we're in a pattern and the first char is a [
1726 * [] returns FALSE
1727 * [SOMETHING] has a funky algorithm to decide whether it's a
1728 * character class or not. It has to deal with things like
1729 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1730 * anything else returns TRUE
1731 */
1732
9cbb5ea2
GS
1733/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1734
76e3520e 1735STATIC int
cea2e8a9 1736S_intuit_more(pTHX_ register char *s)
79072805 1737{
3280af22 1738 if (PL_lex_brackets)
79072805
LW
1739 return TRUE;
1740 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1741 return TRUE;
1742 if (*s != '{' && *s != '[')
1743 return FALSE;
3280af22 1744 if (!PL_lex_inpat)
79072805
LW
1745 return TRUE;
1746
1747 /* In a pattern, so maybe we have {n,m}. */
1748 if (*s == '{') {
1749 s++;
1750 if (!isDIGIT(*s))
1751 return TRUE;
1752 while (isDIGIT(*s))
1753 s++;
1754 if (*s == ',')
1755 s++;
1756 while (isDIGIT(*s))
1757 s++;
1758 if (*s == '}')
1759 return FALSE;
1760 return TRUE;
1761
1762 }
1763
1764 /* On the other hand, maybe we have a character class */
1765
1766 s++;
1767 if (*s == ']' || *s == '^')
1768 return FALSE;
1769 else {
ffb4593c 1770 /* this is terrifying, and it works */
79072805
LW
1771 int weight = 2; /* let's weigh the evidence */
1772 char seen[256];
f27ffc4a 1773 unsigned char un_char = 255, last_un_char;
93a17b20 1774 char *send = strchr(s,']');
3280af22 1775 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1776
1777 if (!send) /* has to be an expression */
1778 return TRUE;
1779
1780 Zero(seen,256,char);
1781 if (*s == '$')
1782 weight -= 3;
1783 else if (isDIGIT(*s)) {
1784 if (s[1] != ']') {
1785 if (isDIGIT(s[1]) && s[2] == ']')
1786 weight -= 10;
1787 }
1788 else
1789 weight -= 100;
1790 }
1791 for (; s < send; s++) {
1792 last_un_char = un_char;
1793 un_char = (unsigned char)*s;
1794 switch (*s) {
1795 case '@':
1796 case '&':
1797 case '$':
1798 weight -= seen[un_char] * 10;
7e2040f0 1799 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1800 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1801 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1802 weight -= 100;
1803 else
1804 weight -= 10;
1805 }
1806 else if (*s == '$' && s[1] &&
93a17b20
LW
1807 strchr("[#!%*<>()-=",s[1])) {
1808 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1809 weight -= 10;
1810 else
1811 weight -= 1;
1812 }
1813 break;
1814 case '\\':
1815 un_char = 254;
1816 if (s[1]) {
93a17b20 1817 if (strchr("wds]",s[1]))
79072805
LW
1818 weight += 100;
1819 else if (seen['\''] || seen['"'])
1820 weight += 1;
93a17b20 1821 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1822 weight += 40;
1823 else if (isDIGIT(s[1])) {
1824 weight += 40;
1825 while (s[1] && isDIGIT(s[1]))
1826 s++;
1827 }
1828 }
1829 else
1830 weight += 100;
1831 break;
1832 case '-':
1833 if (s[1] == '\\')
1834 weight += 50;
93a17b20 1835 if (strchr("aA01! ",last_un_char))
79072805 1836 weight += 30;
93a17b20 1837 if (strchr("zZ79~",s[1]))
79072805 1838 weight += 30;
f27ffc4a
GS
1839 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1840 weight -= 5; /* cope with negative subscript */
79072805
LW
1841 break;
1842 default:
93a17b20 1843 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1844 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1845 char *d = tmpbuf;
1846 while (isALPHA(*s))
1847 *d++ = *s++;
1848 *d = '\0';
1849 if (keyword(tmpbuf, d - tmpbuf))
1850 weight -= 150;
1851 }
1852 if (un_char == last_un_char + 1)
1853 weight += 5;
1854 weight -= seen[un_char];
1855 break;
1856 }
1857 seen[un_char]++;
1858 }
1859 if (weight >= 0) /* probably a character class */
1860 return FALSE;
1861 }
1862
1863 return TRUE;
1864}
ffed7fef 1865
ffb4593c
NT
1866/*
1867 * S_intuit_method
1868 *
1869 * Does all the checking to disambiguate
1870 * foo bar
1871 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1872 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1873 *
1874 * First argument is the stuff after the first token, e.g. "bar".
1875 *
1876 * Not a method if bar is a filehandle.
1877 * Not a method if foo is a subroutine prototyped to take a filehandle.
1878 * Not a method if it's really "Foo $bar"
1879 * Method if it's "foo $bar"
1880 * Not a method if it's really "print foo $bar"
1881 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 1882 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 1883 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1884 * =>
1885 */
1886
76e3520e 1887STATIC int
cea2e8a9 1888S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1889{
1890 char *s = start + (*start == '$');
3280af22 1891 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1892 STRLEN len;
1893 GV* indirgv;
1894
1895 if (gv) {
b6c543e3 1896 CV *cv;
a0d0e21e
LW
1897 if (GvIO(gv))
1898 return 0;
b6c543e3
IZ
1899 if ((cv = GvCVu(gv))) {
1900 char *proto = SvPVX(cv);
1901 if (proto) {
1902 if (*proto == ';')
1903 proto++;
1904 if (*proto == '*')
1905 return 0;
1906 }
1907 } else
a0d0e21e
LW
1908 gv = 0;
1909 }
8903cb82 1910 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1911 /* start is the beginning of the possible filehandle/object,
1912 * and s is the end of it
1913 * tmpbuf is a copy of it
1914 */
1915
a0d0e21e 1916 if (*start == '$') {
3280af22 1917 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1918 return 0;
1919 s = skipspace(s);
3280af22
NIS
1920 PL_bufptr = start;
1921 PL_expect = XREF;
a0d0e21e
LW
1922 return *s == '(' ? FUNCMETH : METHOD;
1923 }
1924 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1925 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1926 len -= 2;
1927 tmpbuf[len] = '\0';
1928 goto bare_package;
1929 }
1930 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1931 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1932 return 0;
1933 /* filehandle or package name makes it a method */
89bfa8cd 1934 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1935 s = skipspace(s);
3280af22 1936 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1937 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1938 bare_package:
3280af22 1939 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1940 newSVpvn(tmpbuf,len));
3280af22
NIS
1941 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1942 PL_expect = XTERM;
a0d0e21e 1943 force_next(WORD);
3280af22 1944 PL_bufptr = s;
a0d0e21e
LW
1945 return *s == '(' ? FUNCMETH : METHOD;
1946 }
1947 }
1948 return 0;
1949}
1950
ffb4593c
NT
1951/*
1952 * S_incl_perldb
1953 * Return a string of Perl code to load the debugger. If PERL5DB
1954 * is set, it will return the contents of that, otherwise a
1955 * compile-time require of perl5db.pl.
1956 */
1957
76e3520e 1958STATIC char*
cea2e8a9 1959S_incl_perldb(pTHX)
a0d0e21e 1960{
3280af22 1961 if (PL_perldb) {
76e3520e 1962 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1963
1964 if (pdb)
1965 return pdb;
93189314 1966 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
1967 return "BEGIN { require 'perl5db.pl' }";
1968 }
1969 return "";
1970}
1971
1972
16d20bd9 1973/* Encoded script support. filter_add() effectively inserts a
4e553d73 1974 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1975 * Note that the filter function only applies to the current source file
1976 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1977 *
1978 * The datasv parameter (which may be NULL) can be used to pass
1979 * private data to this instance of the filter. The filter function
1980 * can recover the SV using the FILTER_DATA macro and use it to
1981 * store private buffers and state information.
1982 *
1983 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1984 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1985 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1986 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1987 * private use must be set using malloc'd pointers.
1988 */
16d20bd9
AD
1989
1990SV *
864dbfa3 1991Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1992{
f4c556ac
GS
1993 if (!funcp)
1994 return Nullsv;
1995
3280af22
NIS
1996 if (!PL_rsfp_filters)
1997 PL_rsfp_filters = newAV();
16d20bd9 1998 if (!datasv)
8c52afec 1999 datasv = NEWSV(255,0);
16d20bd9 2000 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2001 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2002 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2003 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2004 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2005 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2006 av_unshift(PL_rsfp_filters, 1);
2007 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2008 return(datasv);
2009}
4e553d73 2010
16d20bd9
AD
2011
2012/* Delete most recently added instance of this filter function. */
a0d0e21e 2013void
864dbfa3 2014Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2015{
e0c19803 2016 SV *datasv;
fe5a182c 2017 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2018 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2019 return;
2020 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2021 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2022 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2023 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2024 IoANY(datasv) = (void *)NULL;
3280af22 2025 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2026
16d20bd9
AD
2027 return;
2028 }
2029 /* we need to search for the correct entry and clear it */
cea2e8a9 2030 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2031}
2032
2033
2034/* Invoke the n'th filter function for the current rsfp. */
2035I32
864dbfa3 2036Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
2037
2038
8ac85365 2039 /* 0 = read one text line */
a0d0e21e 2040{
16d20bd9
AD
2041 filter_t funcp;
2042 SV *datasv = NULL;
e50aee73 2043
3280af22 2044 if (!PL_rsfp_filters)
16d20bd9 2045 return -1;
3280af22 2046 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2047 /* Provide a default input filter to make life easy. */
2048 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2049 DEBUG_P(PerlIO_printf(Perl_debug_log,
2050 "filter_read %d: from rsfp\n", idx));
4e553d73 2051 if (maxlen) {
16d20bd9
AD
2052 /* Want a block */
2053 int len ;
2054 int old_len = SvCUR(buf_sv) ;
2055
2056 /* ensure buf_sv is large enough */
eb160463 2057 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2058 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2059 if (PerlIO_error(PL_rsfp))
37120919
AD
2060 return -1; /* error */
2061 else
2062 return 0 ; /* end of file */
2063 }
16d20bd9
AD
2064 SvCUR_set(buf_sv, old_len + len) ;
2065 } else {
2066 /* Want a line */
3280af22
NIS
2067 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2068 if (PerlIO_error(PL_rsfp))
37120919
AD
2069 return -1; /* error */
2070 else
2071 return 0 ; /* end of file */
2072 }
16d20bd9
AD
2073 }
2074 return SvCUR(buf_sv);
2075 }
2076 /* Skip this filter slot if filter has been deleted */
3280af22 2077 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2078 DEBUG_P(PerlIO_printf(Perl_debug_log,
2079 "filter_read %d: skipped (filter deleted)\n",
2080 idx));
16d20bd9
AD
2081 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2082 }
2083 /* Get function pointer hidden within datasv */
4755096e 2084 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2085 DEBUG_P(PerlIO_printf(Perl_debug_log,
2086 "filter_read %d: via function %p (%s)\n",
fe5a182c 2087 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2088 /* Call function. The function is expected to */
2089 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2090 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2091 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2092}
2093
76e3520e 2094STATIC char *
cea2e8a9 2095S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2096{
c39cd008 2097#ifdef PERL_CR_FILTER
3280af22 2098 if (!PL_rsfp_filters) {
c39cd008 2099 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2100 }
2101#endif
3280af22 2102 if (PL_rsfp_filters) {
16d20bd9 2103
55497cff 2104 if (!append)
2105 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2106 if (FILTER_READ(0, sv, 0) > 0)
2107 return ( SvPVX(sv) ) ;
2108 else
2109 return Nullch ;
2110 }
9d116dd7 2111 else
fd049845 2112 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2113}
2114
01ec43d0
GS
2115STATIC HV *
2116S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2117{
2118 GV *gv;
2119
01ec43d0 2120 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2121 return PL_curstash;
2122
2123 if (len > 2 &&
2124 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2125 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2126 {
2127 return GvHV(gv); /* Foo:: */
def3634b
GS
2128 }
2129
2130 /* use constant CLASS => 'MyClass' */
2131 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2132 SV *sv;
2133 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2134 pkgname = SvPV_nolen(sv);
2135 }
2136 }
2137
2138 return gv_stashpv(pkgname, FALSE);
2139}
a0d0e21e 2140
748a9306
LW
2141#ifdef DEBUGGING
2142 static char* exp_name[] =
09bef843
SB
2143 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2144 "ATTRTERM", "TERMBLOCK"
2145 };
748a9306 2146#endif
463ee0b2 2147
02aa26ce
NT
2148/*
2149 yylex
2150
2151 Works out what to call the token just pulled out of the input
2152 stream. The yacc parser takes care of taking the ops we return and
2153 stitching them into a tree.
2154
2155 Returns:
2156 PRIVATEREF
2157
2158 Structure:
2159 if read an identifier
2160 if we're in a my declaration
2161 croak if they tried to say my($foo::bar)
2162 build the ops for a my() declaration
2163 if it's an access to a my() variable
2164 are we in a sort block?
2165 croak if my($a); $a <=> $b
2166 build ops for access to a my() variable
2167 if in a dq string, and they've said @foo and we can't find @foo
2168 croak
2169 build ops for a bareword
2170 if we already built the token before, use it.
2171*/
2172
dba4d153 2173#ifdef USE_PURE_BISON
864dbfa3 2174int
dba4d153 2175Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2176{
20141f0e
IRC
2177 int r;
2178
6f202aea 2179 yyactlevel++;
20141f0e
IRC
2180 yylval_pointer[yyactlevel] = lvalp;
2181 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
IRC
2182 if (yyactlevel >= YYMAXLEVEL)
2183 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2184
dba4d153 2185 r = Perl_yylex(aTHX);
20141f0e 2186
d8ae6756
IRC
2187 if (yyactlevel > 0)
2188 yyactlevel--;
20141f0e
IRC
2189
2190 return r;
2191}
dba4d153 2192#endif
20141f0e 2193
dba4d153
JH
2194#ifdef __SC__
2195#pragma segment Perl_yylex
2196#endif
dba4d153 2197int
dba4d153 2198Perl_yylex(pTHX)
20141f0e 2199{
79072805 2200 register char *s;
378cc40b 2201 register char *d;
79072805 2202 register I32 tmp;
463ee0b2 2203 STRLEN len;
161b471a
NIS
2204 GV *gv = Nullgv;
2205 GV **gvp = 0;
aa7440fb 2206 bool bof = FALSE;
1d239bbb 2207 I32 orig_keyword = 0;
a687059c 2208
02aa26ce 2209 /* check if there's an identifier for us to look at */
ba979b31 2210 if (PL_pending_ident)
e930465f 2211 return S_pending_ident(aTHX);
bbce6d69 2212
02aa26ce
NT
2213 /* no identifier pending identification */
2214
3280af22 2215 switch (PL_lex_state) {
79072805
LW
2216#ifdef COMMENTARY
2217 case LEX_NORMAL: /* Some compilers will produce faster */
2218 case LEX_INTERPNORMAL: /* code if we comment these out. */
2219 break;
2220#endif
2221
09bef843 2222 /* when we've already built the next token, just pull it out of the queue */
79072805 2223 case LEX_KNOWNEXT:
3280af22
NIS
2224 PL_nexttoke--;
2225 yylval = PL_nextval[PL_nexttoke];
2226 if (!PL_nexttoke) {
2227 PL_lex_state = PL_lex_defer;
2228 PL_expect = PL_lex_expect;
2229 PL_lex_defer = LEX_NORMAL;
463ee0b2 2230 }
607df283 2231 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2232 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2233 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2234
3280af22 2235 return(PL_nexttype[PL_nexttoke]);
79072805 2236
02aa26ce 2237 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2238 when we get here, PL_bufptr is at the \
02aa26ce 2239 */
79072805
LW
2240 case LEX_INTERPCASEMOD:
2241#ifdef DEBUGGING
3280af22 2242 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2243 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2244#endif
02aa26ce 2245 /* handle \E or end of string */
3280af22 2246 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2247 char oldmod;
02aa26ce
NT
2248
2249 /* if at a \E */
3280af22
NIS
2250 if (PL_lex_casemods) {
2251 oldmod = PL_lex_casestack[--PL_lex_casemods];
2252 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2253
3280af22
NIS
2254 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2255 PL_bufptr += 2;
2256 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2257 }
79072805
LW
2258 return ')';
2259 }
3280af22
NIS
2260 if (PL_bufptr != PL_bufend)
2261 PL_bufptr += 2;
2262 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2263 return yylex();
79072805
LW
2264 }
2265 else {
607df283 2266 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2267 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2268 s = PL_bufptr + 1;
6e909404
JH
2269 if (s[1] == '\\' && s[2] == 'E') {
2270 PL_bufptr = s + 3;
2271 PL_lex_state = LEX_INTERPCONCAT;
2272 return yylex();
a0d0e21e 2273 }
6e909404
JH
2274 else {
2275 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2276 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2277 if (strchr("LU", *s) &&
2278 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2279 PL_lex_casestack[--PL_lex_casemods] = '\0';
2280 return ')';
2281 }
2282 if (PL_lex_casemods > 10)
2283 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2284 PL_lex_casestack[PL_lex_casemods++] = *s;
2285 PL_lex_casestack[PL_lex_casemods] = '\0';
2286 PL_lex_state = LEX_INTERPCONCAT;
2287 PL_nextval[PL_nexttoke].ival = 0;
2288 force_next('(');
2289 if (*s == 'l')
2290 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2291 else if (*s == 'u')
2292 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2293 else if (*s == 'L')
2294 PL_nextval[PL_nexttoke].ival = OP_LC;
2295 else if (*s == 'U')
2296 PL_nextval[PL_nexttoke].ival = OP_UC;
2297 else if (*s == 'Q')
2298 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2299 else
2300 Perl_croak(aTHX_ "panic: yylex");
2301 PL_bufptr = s + 1;
a0d0e21e 2302 }
79072805 2303 force_next(FUNC);
3280af22
NIS
2304 if (PL_lex_starts) {
2305 s = PL_bufptr;
2306 PL_lex_starts = 0;
79072805
LW
2307 Aop(OP_CONCAT);
2308 }
2309 else
cea2e8a9 2310 return yylex();
79072805
LW
2311 }
2312
55497cff 2313 case LEX_INTERPPUSH:
2314 return sublex_push();
2315
79072805 2316 case LEX_INTERPSTART:
3280af22 2317 if (PL_bufptr == PL_bufend)
79072805 2318 return sublex_done();
607df283 2319 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2320 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2321 PL_expect = XTERM;
2322 PL_lex_dojoin = (*PL_bufptr == '@');
2323 PL_lex_state = LEX_INTERPNORMAL;
2324 if (PL_lex_dojoin) {
2325 PL_nextval[PL_nexttoke].ival = 0;
79072805 2326 force_next(',');
a0d0e21e 2327 force_ident("\"", '$');
3280af22 2328 PL_nextval[PL_nexttoke].ival = 0;
79072805 2329 force_next('$');
3280af22 2330 PL_nextval[PL_nexttoke].ival = 0;
79072805 2331 force_next('(');
3280af22 2332 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2333 force_next(FUNC);
2334 }
3280af22
NIS
2335 if (PL_lex_starts++) {
2336 s = PL_bufptr;
79072805
LW
2337 Aop(OP_CONCAT);
2338 }
cea2e8a9 2339 return yylex();
79072805
LW
2340
2341 case LEX_INTERPENDMAYBE:
3280af22
NIS
2342 if (intuit_more(PL_bufptr)) {
2343 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2344 break;
2345 }
2346 /* FALL THROUGH */
2347
2348 case LEX_INTERPEND:
3280af22
NIS
2349 if (PL_lex_dojoin) {
2350 PL_lex_dojoin = FALSE;
2351 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2352 return ')';
2353 }
43a16006 2354 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2355 && SvEVALED(PL_lex_repl))
43a16006 2356 {
e9fa98b2 2357 if (PL_bufptr != PL_bufend)
cea2e8a9 2358 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2359 PL_lex_repl = Nullsv;
2360 }
79072805
LW
2361 /* FALLTHROUGH */
2362 case LEX_INTERPCONCAT:
2363#ifdef DEBUGGING
3280af22 2364 if (PL_lex_brackets)
cea2e8a9 2365 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2366#endif
3280af22 2367 if (PL_bufptr == PL_bufend)
79072805
LW
2368 return sublex_done();
2369
3280af22
NIS
2370 if (SvIVX(PL_linestr) == '\'') {
2371 SV *sv = newSVsv(PL_linestr);
2372 if (!PL_lex_inpat)
76e3520e 2373 sv = tokeq(sv);
3280af22 2374 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2375 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2376 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2377 s = PL_bufend;
79072805
LW
2378 }
2379 else {
3280af22 2380 s = scan_const(PL_bufptr);
79072805 2381 if (*s == '\\')
3280af22 2382 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2383 else
3280af22 2384 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2385 }
2386
3280af22
NIS
2387 if (s != PL_bufptr) {
2388 PL_nextval[PL_nexttoke] = yylval;
2389 PL_expect = XTERM;
79072805 2390 force_next(THING);
3280af22 2391 if (PL_lex_starts++)
79072805
LW
2392 Aop(OP_CONCAT);
2393 else {
3280af22 2394 PL_bufptr = s;
cea2e8a9 2395 return yylex();
79072805
LW
2396 }
2397 }
2398
cea2e8a9 2399 return yylex();
a0d0e21e 2400 case LEX_FORMLINE:
3280af22
NIS
2401 PL_lex_state = LEX_NORMAL;
2402 s = scan_formline(PL_bufptr);
2403 if (!PL_lex_formbrack)
a0d0e21e
LW
2404 goto rightbracket;
2405 OPERATOR(';');
79072805
LW
2406 }
2407
3280af22
NIS
2408 s = PL_bufptr;
2409 PL_oldoldbufptr = PL_oldbufptr;
2410 PL_oldbufptr = s;
607df283 2411 DEBUG_T( {
bf49b057
GS
2412 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2413 exp_name[PL_expect], s);
5f80b19c 2414 } );
463ee0b2
LW
2415
2416 retry:
378cc40b
LW
2417 switch (*s) {
2418 default:
7e2040f0 2419 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2420 goto keylookup;
cea2e8a9 2421 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2422 case 4:
2423 case 26:
2424 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2425 case 0:
3280af22
NIS
2426 if (!PL_rsfp) {
2427 PL_last_uni = 0;
2428 PL_last_lop = 0;
2429 if (PL_lex_brackets)
d98d5fff 2430 yyerror("Missing right curly or square bracket");
4e553d73 2431 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2432 "### Tokener got EOF\n");
5f80b19c 2433 } );
79072805 2434 TOKEN(0);
463ee0b2 2435 }
3280af22 2436 if (s++ < PL_bufend)
a687059c 2437 goto retry; /* ignore stray nulls */
3280af22
NIS
2438 PL_last_uni = 0;
2439 PL_last_lop = 0;
2440 if (!PL_in_eval && !PL_preambled) {
2441 PL_preambled = TRUE;
2442 sv_setpv(PL_linestr,incl_perldb());
2443 if (SvCUR(PL_linestr))
2444 sv_catpv(PL_linestr,";");
2445 if (PL_preambleav){
2446 while(AvFILLp(PL_preambleav) >= 0) {
2447 SV *tmpsv = av_shift(PL_preambleav);
2448 sv_catsv(PL_linestr, tmpsv);
2449 sv_catpv(PL_linestr, ";");
91b7def8 2450 sv_free(tmpsv);
2451 }
3280af22
NIS
2452 sv_free((SV*)PL_preambleav);
2453 PL_preambleav = NULL;
91b7def8 2454 }
3280af22
NIS
2455 if (PL_minus_n || PL_minus_p) {
2456 sv_catpv(PL_linestr, "LINE: while (<>) {");
2457 if (PL_minus_l)
2458 sv_catpv(PL_linestr,"chomp;");
2459 if (PL_minus_a) {
3280af22
NIS
2460 if (PL_minus_F) {
2461 if (strchr("/'\"", *PL_splitstr)
2462 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2463 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2464 else {
2465 char delim;
2466 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2467 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2468 delim = *s;
75c72d73 2469 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2470 "q" + (delim == '\''), delim);
3280af22 2471 for (s = PL_splitstr; *s; s++) {
54310121 2472 if (*s == '\\')
3280af22
NIS
2473 sv_catpvn(PL_linestr, "\\", 1);
2474 sv_catpvn(PL_linestr, s, 1);
54310121 2475 }
cea2e8a9 2476 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2477 }
2304df62
AD
2478 }
2479 else
75c72d73 2480 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2481 }
79072805 2482 }
3280af22
NIS
2483 sv_catpv(PL_linestr, "\n");
2484 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2485 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2486 PL_last_lop = PL_last_uni = Nullch;
3280af22 2487 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2488 SV *sv = NEWSV(85,0);
2489
2490 sv_upgrade(sv, SVt_PVMG);
3280af22 2491 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2492 (void)SvIOK_on(sv);
2493 SvIVX(sv) = 0;
57843af0 2494 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2495 }
79072805 2496 goto retry;
a687059c 2497 }
e929a76b 2498 do {
aa7440fb 2499 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2500 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2501 fake_eof:
2502 if (PL_rsfp) {
2503 if (PL_preprocess && !PL_in_eval)
2504 (void)PerlProc_pclose(PL_rsfp);
2505 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2506 PerlIO_clearerr(PL_rsfp);
2507 else
2508 (void)PerlIO_close(PL_rsfp);
2509 PL_rsfp = Nullfp;
2510 PL_doextract = FALSE;
2511 }
2512 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2513 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2514 sv_catpv(PL_linestr,";}");
2515 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2516 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2517 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2518 PL_minus_n = PL_minus_p = 0;
2519 goto retry;
2520 }
2521 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2522 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2523 sv_setpv(PL_linestr,"");
2524 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2525 }
2526 /* if it looks like the start of a BOM, check if it in fact is */
2527 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2528#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2529# ifdef __GNU_LIBRARY__
2530# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2531# define FTELL_FOR_PIPE_IS_BROKEN
2532# endif
e3f494f1
JH
2533# else
2534# ifdef __GLIBC__
2535# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2536# define FTELL_FOR_PIPE_IS_BROKEN
2537# endif
2538# endif
226017aa
DD
2539# endif
2540#endif
2541#ifdef FTELL_FOR_PIPE_IS_BROKEN
2542 /* This loses the possibility to detect the bof
2543 * situation on perl -P when the libc5 is being used.
2544 * Workaround? Maybe attach some extra state to PL_rsfp?
2545 */
2546 if (!PL_preprocess)
7e28d3af 2547 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2548#else
eb160463 2549 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2550#endif
7e28d3af 2551 if (bof) {
3280af22 2552 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2553 s = swallow_bom((U8*)s);
e929a76b 2554 }
378cc40b 2555 }
3280af22 2556 if (PL_doextract) {
a0d0e21e
LW
2557 /* Incest with pod. */
2558 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2559 sv_setpv(PL_linestr, "");
2560 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2561 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2562 PL_last_lop = PL_last_uni = Nullch;
3280af22 2563 PL_doextract = FALSE;
a0d0e21e 2564 }
4e553d73 2565 }
463ee0b2 2566 incline(s);
3280af22
NIS
2567 } while (PL_doextract);
2568 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2569 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2570 SV *sv = NEWSV(85,0);
a687059c 2571
93a17b20 2572 sv_upgrade(sv, SVt_PVMG);
3280af22 2573 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2574 (void)SvIOK_on(sv);
2575 SvIVX(sv) = 0;
57843af0 2576 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2577 }
3280af22 2578 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2579 PL_last_lop = PL_last_uni = Nullch;
57843af0 2580 if (CopLINE(PL_curcop) == 1) {
3280af22 2581 while (s < PL_bufend && isSPACE(*s))
79072805 2582 s++;
a0d0e21e 2583 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2584 s++;
44a8e56a 2585 d = Nullch;
3280af22 2586 if (!PL_in_eval) {
44a8e56a 2587 if (*s == '#' && *(s+1) == '!')
2588 d = s + 2;
2589#ifdef ALTERNATE_SHEBANG
2590 else {
2591 static char as[] = ALTERNATE_SHEBANG;
2592 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2593 d = s + (sizeof(as) - 1);
2594 }
2595#endif /* ALTERNATE_SHEBANG */
2596 }
2597 if (d) {
b8378b72 2598 char *ipath;
774d564b 2599 char *ipathend;
b8378b72 2600
774d564b 2601 while (isSPACE(*d))
b8378b72
CS
2602 d++;
2603 ipath = d;
774d564b 2604 while (*d && !isSPACE(*d))
2605 d++;
2606 ipathend = d;
2607
2608#ifdef ARG_ZERO_IS_SCRIPT
2609 if (ipathend > ipath) {
2610 /*
2611 * HP-UX (at least) sets argv[0] to the script name,
2612 * which makes $^X incorrect. And Digital UNIX and Linux,
2613 * at least, set argv[0] to the basename of the Perl
2614 * interpreter. So, having found "#!", we'll set it right.
2615 */
ee2f7564 2616 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2617 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2618 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2619 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2620 SvSETMAGIC(x);
2621 }
556c1dec
JH
2622 else {
2623 STRLEN blen;
2624 STRLEN llen;
2625 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2626 char *lstart = SvPV(x,llen);
2627 if (llen < blen) {
2628 bstart += blen - llen;
2629 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2630 sv_setpvn(x, ipath, ipathend - ipath);
2631 SvSETMAGIC(x);
2632 }
2633 }
2634 }
774d564b 2635 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2636 }
774d564b 2637#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2638
2639 /*
2640 * Look for options.
2641 */
748a9306 2642 d = instr(s,"perl -");
84e30d1a 2643 if (!d) {
748a9306 2644 d = instr(s,"perl");
84e30d1a
GS
2645#if defined(DOSISH)
2646 /* avoid getting into infinite loops when shebang
2647 * line contains "Perl" rather than "perl" */
2648 if (!d) {
2649 for (d = ipathend-4; d >= ipath; --d) {
2650 if ((*d == 'p' || *d == 'P')
2651 && !ibcmp(d, "perl", 4))
2652 {
2653 break;
2654 }
2655 }
2656 if (d < ipath)
2657 d = Nullch;
2658 }
2659#endif
2660 }
44a8e56a 2661#ifdef ALTERNATE_SHEBANG
2662 /*
2663 * If the ALTERNATE_SHEBANG on this system starts with a
2664 * character that can be part of a Perl expression, then if
2665 * we see it but not "perl", we're probably looking at the
2666 * start of Perl code, not a request to hand off to some
2667 * other interpreter. Similarly, if "perl" is there, but
2668 * not in the first 'word' of the line, we assume the line
2669 * contains the start of the Perl program.
44a8e56a 2670 */
2671 if (d && *s != '#') {
774d564b 2672 char *c = ipath;
44a8e56a 2673 while (*c && !strchr("; \t\r\n\f\v#", *c))
2674 c++;
2675 if (c < d)
2676 d = Nullch; /* "perl" not in first word; ignore */
2677 else
2678 *s = '#'; /* Don't try to parse shebang line */
2679 }
774d564b 2680#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2681#ifndef MACOS_TRADITIONAL
748a9306 2682 if (!d &&
44a8e56a 2683 *s == '#' &&
774d564b 2684 ipathend > ipath &&
3280af22 2685 !PL_minus_c &&
748a9306 2686 !instr(s,"indir") &&
3280af22 2687 instr(PL_origargv[0],"perl"))
748a9306 2688 {
9f68db38 2689 char **newargv;
9f68db38 2690
774d564b 2691 *ipathend = '\0';
2692 s = ipathend + 1;
3280af22 2693 while (s < PL_bufend && isSPACE(*s))
9f68db38 2694 s++;
3280af22
NIS
2695 if (s < PL_bufend) {
2696 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2697 newargv[1] = s;
3280af22 2698 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2699 s++;
2700 *s = '\0';
3280af22 2701 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2702 }
2703 else
3280af22 2704 newargv = PL_origargv;
774d564b 2705 newargv[0] = ipath;
b4748376 2706 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2707 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2708 }
bf4acbe4 2709#endif
748a9306 2710 if (d) {
3280af22
NIS
2711 U32 oldpdb = PL_perldb;
2712 bool oldn = PL_minus_n;
2713 bool oldp = PL_minus_p;
748a9306
LW
2714
2715 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2716 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2717
2718 if (*d++ == '-') {
a11ec5a9 2719 bool switches_done = PL_doswitches;
8cc95fdb 2720 do {
2721 if (*d == 'M' || *d == 'm') {
2722 char *m = d;
2723 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2724 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2725 (int)(d - m), m);
2726 }
2727 d = moreswitches(d);
2728 } while (d);
155aba94
GS
2729 if ((PERLDB_LINE && !oldpdb) ||
2730 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2731 /* if we have already added "LINE: while (<>) {",
2732 we must not do it again */
748a9306 2733 {
3280af22
NIS
2734 sv_setpv(PL_linestr, "");
2735 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2736 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2737 PL_last_lop = PL_last_uni = Nullch;
3280af22 2738 PL_preambled = FALSE;
84902520 2739 if (PERLDB_LINE)
3280af22 2740 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2741 goto retry;
2742 }
a11ec5a9
RGS
2743 if (PL_doswitches && !switches_done) {
2744 int argc = PL_origargc;
2745 char **argv = PL_origargv;
2746 do {
2747 argc--,argv++;
2748 } while (argc && argv[0][0] == '-' && argv[0][1]);
2749 init_argv_symbols(argc,argv);
2750 }
a0d0e21e 2751 }
79072805 2752 }
9f68db38 2753 }
79072805 2754 }
3280af22
NIS
2755 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2756 PL_bufptr = s;
2757 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2758 return yylex();
ae986130 2759 }
378cc40b 2760 goto retry;
4fdae800 2761 case '\r':
6a27c188 2762#ifdef PERL_STRICT_CR
cea2e8a9 2763 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2764 Perl_croak(aTHX_
cc507455 2765 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2766#endif
4fdae800 2767 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2768#ifdef MACOS_TRADITIONAL
2769 case '\312':
2770#endif
378cc40b
LW
2771 s++;
2772 goto retry;
378cc40b 2773 case '#':
e929a76b 2774 case '\n':
3280af22 2775 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2776 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2777 /* handle eval qq[#line 1 "foo"\n ...] */
2778 CopLINE_dec(PL_curcop);
2779 incline(s);
2780 }
3280af22 2781 d = PL_bufend;
a687059c 2782 while (s < d && *s != '\n')
378cc40b 2783 s++;
0f85fab0 2784 if (s < d)
378cc40b 2785 s++;
78c267c1 2786 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2787 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2788 incline(s);
3280af22
NIS
2789 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2790 PL_bufptr = s;
2791 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2792 return yylex();
a687059c 2793 }
378cc40b 2794 }
a687059c 2795 else {
378cc40b 2796 *s = '\0';
3280af22 2797 PL_bufend = s;
a687059c 2798 }
378cc40b
LW
2799 goto retry;
2800 case '-':
79072805 2801 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2802 I32 ftst = 0;
2803
378cc40b 2804 s++;
3280af22 2805 PL_bufptr = s;
748a9306
LW
2806 tmp = *s++;
2807
bf4acbe4 2808 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2809 s++;
2810
2811 if (strnEQ(s,"=>",2)) {
3280af22 2812 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2813 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2814 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2815 } );
748a9306
LW
2816 OPERATOR('-'); /* unary minus */
2817 }
3280af22 2818 PL_last_uni = PL_oldbufptr;
748a9306 2819 switch (tmp) {
e5edeb50
JH
2820 case 'r': ftst = OP_FTEREAD; break;
2821 case 'w': ftst = OP_FTEWRITE; break;
2822 case 'x': ftst = OP_FTEEXEC; break;
2823 case 'o': ftst = OP_FTEOWNED; break;
2824 case 'R': ftst = OP_FTRREAD; break;
2825 case 'W': ftst = OP_FTRWRITE; break;
2826 case 'X': ftst = OP_FTREXEC; break;
2827 case 'O': ftst = OP_FTROWNED; break;
2828 case 'e': ftst = OP_FTIS; break;
2829 case 'z': ftst = OP_FTZERO; break;
2830 case 's': ftst = OP_FTSIZE; break;
2831 case 'f': ftst = OP_FTFILE; break;
2832 case 'd': ftst = OP_FTDIR; break;
2833 case 'l': ftst = OP_FTLINK; break;
2834 case 'p': ftst = OP_FTPIPE; break;
2835 case 'S': ftst = OP_FTSOCK; break;
2836 case 'u': ftst = OP_FTSUID; break;
2837 case 'g': ftst = OP_FTSGID; break;
2838 case 'k': ftst = OP_FTSVTX; break;
2839 case 'b': ftst = OP_FTBLK; break;
2840 case 'c': ftst = OP_FTCHR; break;
2841 case 't': ftst = OP_FTTTY; break;
2842 case 'T': ftst = OP_FTTEXT; break;
2843 case 'B': ftst = OP_FTBINARY; break;
2844 case 'M': case 'A': case 'C':
2845 gv_fetchpv("\024",TRUE, SVt_PV);
2846 switch (tmp) {
2847 case 'M': ftst = OP_FTMTIME; break;
2848 case 'A': ftst = OP_FTATIME; break;
2849 case 'C': ftst = OP_FTCTIME; break;
2850 default: break;
2851 }
2852 break;
378cc40b 2853 default:
378cc40b
LW
2854 break;
2855 }
e5edeb50 2856 if (ftst) {
eb160463 2857 PL_last_lop_op = (OPCODE)ftst;
4e553d73 2858 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2859 "### Saw file test %c\n", (int)ftst);
5f80b19c 2860 } );
e5edeb50
JH
2861 FTST(ftst);
2862 }
2863 else {
2864 /* Assume it was a minus followed by a one-letter named
2865 * subroutine call (or a -bareword), then. */
95c31fe3 2866 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2867 "### %c looked like a file test but was not\n",
2868 (int)ftst);
5f80b19c 2869 } );
e5edeb50
JH
2870 s -= 2;
2871 }
378cc40b 2872 }
a687059c
LW
2873 tmp = *s++;
2874 if (*s == tmp) {
2875 s++;
3280af22 2876 if (PL_expect == XOPERATOR)
79072805
LW
2877 TERM(POSTDEC);
2878 else
2879 OPERATOR(PREDEC);
2880 }
2881 else if (*s == '>') {
2882 s++;
2883 s = skipspace(s);
7e2040f0 2884 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2885 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2886 TOKEN(ARROW);
79072805 2887 }
748a9306
LW
2888 else if (*s == '$')
2889 OPERATOR(ARROW);
463ee0b2 2890 else
748a9306 2891 TERM(ARROW);
a687059c 2892 }
3280af22 2893 if (PL_expect == XOPERATOR)
79072805
LW
2894 Aop(OP_SUBTRACT);
2895 else {
3280af22 2896 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2897 check_uni();
79072805 2898 OPERATOR('-'); /* unary minus */
2f3197b3 2899 }
79072805 2900
378cc40b 2901 case '+':
a687059c
LW
2902 tmp = *s++;
2903 if (*s == tmp) {
378cc40b 2904 s++;
3280af22 2905 if (PL_expect == XOPERATOR)
79072805
LW
2906 TERM(POSTINC);
2907 else
2908 OPERATOR(PREINC);
378cc40b 2909 }
3280af22 2910 if (PL_expect == XOPERATOR)
79072805
LW
2911 Aop(OP_ADD);
2912 else {
3280af22 2913 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2914 check_uni();
a687059c 2915 OPERATOR('+');
2f3197b3 2916 }
a687059c 2917
378cc40b 2918 case '*':
3280af22
NIS
2919 if (PL_expect != XOPERATOR) {
2920 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2921 PL_expect = XOPERATOR;
2922 force_ident(PL_tokenbuf, '*');
2923 if (!*PL_tokenbuf)
a0d0e21e 2924 PREREF('*');
79072805 2925 TERM('*');
a687059c 2926 }
79072805
LW
2927 s++;
2928 if (*s == '*') {
a687059c 2929 s++;
79072805 2930 PWop(OP_POW);
a687059c 2931 }
79072805
LW
2932 Mop(OP_MULTIPLY);
2933
378cc40b 2934 case '%':
3280af22 2935 if (PL_expect == XOPERATOR) {
bbce6d69 2936 ++s;
2937 Mop(OP_MODULO);
a687059c 2938 }
3280af22
NIS
2939 PL_tokenbuf[0] = '%';
2940 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2941 if (!PL_tokenbuf[1]) {
2942 if (s == PL_bufend)
bbce6d69 2943 yyerror("Final % should be \\% or %name");
2944 PREREF('%');
a687059c 2945 }
3280af22 2946 PL_pending_ident = '%';
bbce6d69 2947 TERM('%');
a687059c 2948
378cc40b 2949 case '^':
79072805 2950 s++;
a0d0e21e 2951 BOop(OP_BIT_XOR);
79072805 2952 case '[':
3280af22 2953 PL_lex_brackets++;
79072805 2954 /* FALL THROUGH */
378cc40b 2955 case '~':
378cc40b 2956 case ',':
378cc40b
LW
2957 tmp = *s++;
2958 OPERATOR(tmp);
a0d0e21e
LW
2959 case ':':
2960 if (s[1] == ':') {
2961 len = 0;
2962 goto just_a_word;
2963 }
2964 s++;
09bef843
SB
2965 switch (PL_expect) {
2966 OP *attrs;
2967 case XOPERATOR:
2968 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2969 break;
2970 PL_bufptr = s; /* update in case we back off */
2971 goto grabattrs;
2972 case XATTRBLOCK:
2973 PL_expect = XBLOCK;
2974 goto grabattrs;
2975 case XATTRTERM:
2976 PL_expect = XTERMBLOCK;
2977 grabattrs:
2978 s = skipspace(s);
2979 attrs = Nullop;
7e2040f0 2980 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2981 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2982 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2983 if (tmp < 0) tmp = -tmp;
2984 switch (tmp) {
2985 case KEY_or:
2986 case KEY_and:
c963b151 2987 case KEY_err:
f9829d6b
GS
2988 case KEY_for:
2989 case KEY_unless:
2990 case KEY_if:
2991 case KEY_while:
2992 case KEY_until:
2993 goto got_attrs;
2994 default:
2995 break;
2996 }
2997 }
09bef843
SB
2998 if (*d == '(') {
2999 d = scan_str(d,TRUE,TRUE);
3000 if (!d) {
09bef843
SB
3001 /* MUST advance bufptr here to avoid bogus
3002 "at end of line" context messages from yyerror().
3003 */
3004 PL_bufptr = s + len;
3005 yyerror("Unterminated attribute parameter in attribute list");
3006 if (attrs)
3007 op_free(attrs);
3008 return 0; /* EOF indicator */
3009 }
3010 }
3011 if (PL_lex_stuff) {
3012 SV *sv = newSVpvn(s, len);
3013 sv_catsv(sv, PL_lex_stuff);
3014 attrs = append_elem(OP_LIST, attrs,
3015 newSVOP(OP_CONST, 0, sv));
3016 SvREFCNT_dec(PL_lex_stuff);
3017 PL_lex_stuff = Nullsv;
3018 }
3019 else {
d3cea301
SB
3020 /* NOTE: any CV attrs applied here need to be part of
3021 the CVf_BUILTIN_ATTRS define in cv.h! */
78f9721b
SM
3022 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3023 CvLVALUE_on(PL_compcv);
3024 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3025 CvLOCKED_on(PL_compcv);
3026 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3027 CvMETHOD_on(PL_compcv);
06492da6
SF
3028 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3029 CvASSERTION_on(PL_compcv);
87ecf892 3030#ifdef USE_ITHREADS
d3cea301
SB
3031 else if (PL_in_my == KEY_our && len == 6 &&
3032 strnEQ(s, "unique", len))
7fb37951 3033 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
87ecf892 3034#endif
78f9721b
SM
3035 /* After we've set the flags, it could be argued that
3036 we don't need to do the attributes.pm-based setting
3037 process, and shouldn't bother appending recognized
d3cea301
SB
3038 flags. To experiment with that, uncomment the
3039 following "else". (Note that's already been
3040 uncommented. That keeps the above-applied built-in
3041 attributes from being intercepted (and possibly
3042 rejected) by a package's attribute routines, but is
3043 justified by the performance win for the common case
3044 of applying only built-in attributes.) */
0256094b 3045 else
78f9721b
SM
3046 attrs = append_elem(OP_LIST, attrs,
3047 newSVOP(OP_CONST, 0,
3048 newSVpvn(s, len)));
09bef843
SB
3049 }
3050 s = skipspace(d);
0120eecf 3051 if (*s == ':' && s[1] != ':')
09bef843 3052 s = skipspace(s+1);
0120eecf
GS
3053 else if (s == d)
3054 break; /* require real whitespace or :'s */
09bef843 3055 }
f9829d6b 3056 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3057 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3058 char q = ((*s == '\'') ? '"' : '\'');
3059 /* If here for an expression, and parsed no attrs, back off. */
3060 if (tmp == '=' && !attrs) {
3061 s = PL_bufptr;
3062 break;
3063 }
3064 /* MUST advance bufptr here to avoid bogus "at end of line"
3065 context messages from yyerror().
3066 */
3067 PL_bufptr = s;
3068 if (!*s)
3069 yyerror("Unterminated attribute list");
3070 else
3071 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3072 q, *s, q));
3073 if (attrs)
3074 op_free(attrs);
3075 OPERATOR(':');
3076 }
f9829d6b 3077 got_attrs:
09bef843
SB
3078 if (attrs) {
3079 PL_nextval[PL_nexttoke].opval = attrs;
3080 force_next(THING);
3081 }
3082 TOKEN(COLONATTR);
3083 }
a0d0e21e 3084 OPERATOR(':');
8990e307
LW
3085 case '(':
3086 s++;
3280af22
NIS
3087 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3088 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3089 else
3280af22 3090 PL_expect = XTERM;
a0d0e21e 3091 TOKEN('(');
378cc40b 3092 case ';':
f4dd75d9 3093 CLINE;
378cc40b
LW
3094 tmp = *s++;
3095 OPERATOR(tmp);
3096 case ')':
378cc40b 3097 tmp = *s++;
16d20bd9
AD
3098 s = skipspace(s);
3099 if (*s == '{')
3100 PREBLOCK(tmp);
378cc40b 3101 TERM(tmp);
79072805
LW
3102 case ']':
3103 s++;
3280af22 3104 if (PL_lex_brackets <= 0)
d98d5fff 3105 yyerror("Unmatched right square bracket");
463ee0b2 3106 else
3280af22
NIS
3107 --PL_lex_brackets;
3108 if (PL_lex_state == LEX_INTERPNORMAL) {
3109 if (PL_lex_brackets == 0) {
a0d0e21e 3110 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3111 PL_lex_state = LEX_INTERPEND;
79072805
LW
3112 }
3113 }
4633a7c4 3114 TERM(']');
79072805
LW
3115 case '{':
3116 leftbracket:
79072805 3117 s++;
3280af22 3118 if (PL_lex_brackets > 100) {
8edd5f42 3119 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3120 }
3280af22 3121 switch (PL_expect) {
a0d0e21e 3122 case XTERM:
3280af22 3123 if (PL_lex_formbrack) {
a0d0e21e
LW
3124 s--;
3125 PRETERMBLOCK(DO);
3126 }
3280af22
NIS
3127 if (PL_oldoldbufptr == PL_last_lop)
3128 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3129 else
3280af22 3130 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3131 OPERATOR(HASHBRACK);
a0d0e21e 3132 case XOPERATOR:
bf4acbe4 3133 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3134 s++;
44a8e56a 3135 d = s;
3280af22
NIS
3136 PL_tokenbuf[0] = '\0';
3137 if (d < PL_bufend && *d == '-') {
3138 PL_tokenbuf[0] = '-';
44a8e56a 3139 d++;
bf4acbe4 3140 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3141 d++;
3142 }
7e2040f0 3143 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3144 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3145 FALSE, &len);
bf4acbe4 3146 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3147 d++;
3148 if (*d == '}') {
3280af22 3149 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3150 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3151 if (minus)
3152 force_next('-');
748a9306
LW
3153 }
3154 }
3155 /* FALL THROUGH */
09bef843 3156 case XATTRBLOCK:
748a9306 3157 case XBLOCK:
3280af22
NIS
3158 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3159 PL_expect = XSTATE;
a0d0e21e 3160 break;
09bef843 3161 case XATTRTERM:
a0d0e21e 3162 case XTERMBLOCK:
3280af22
NIS
3163 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3164 PL_expect = XSTATE;
a0d0e21e
LW
3165 break;
3166 default: {
3167 char *t;
3280af22
NIS
3168 if (PL_oldoldbufptr == PL_last_lop)
3169 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3170 else
3280af22 3171 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3172 s = skipspace(s);
8452ff4b
SB
3173 if (*s == '}') {
3174 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3175 PL_expect = XTERM;
3176 /* This hack is to get the ${} in the message. */
3177 PL_bufptr = s+1;
3178 yyerror("syntax error");
3179 break;
3180 }
a0d0e21e 3181 OPERATOR(HASHBRACK);
8452ff4b 3182 }
b8a4b1be
GS
3183 /* This hack serves to disambiguate a pair of curlies
3184 * as being a block or an anon hash. Normally, expectation
3185 * determines that, but in cases where we're not in a
3186 * position to expect anything in particular (like inside
3187 * eval"") we have to resolve the ambiguity. This code
3188 * covers the case where the first term in the curlies is a
3189 * quoted string. Most other cases need to be explicitly
3190 * disambiguated by prepending a `+' before the opening
3191 * curly in order to force resolution as an anon hash.
3192 *
3193 * XXX should probably propagate the outer expectation
3194 * into eval"" to rely less on this hack, but that could
3195 * potentially break current behavior of eval"".
3196 * GSAR 97-07-21
3197 */
3198 t = s;
3199 if (*s == '\'' || *s == '"' || *s == '`') {
3200 /* common case: get past first string, handling escapes */
3280af22 3201 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3202 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3203 t++;
3204 t++;
a0d0e21e 3205 }
b8a4b1be 3206 else if (*s == 'q') {
3280af22 3207 if (++t < PL_bufend
b8a4b1be 3208 && (!isALNUM(*t)
3280af22 3209 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3210 && !isALNUM(*t))))
3211 {
b8a4b1be
GS
3212 char *tmps;
3213 char open, close, term;
3214 I32 brackets = 1;
3215
3280af22 3216 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3217 t++;
3218 term = *t;
3219 open = term;
3220 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3221 term = tmps[5];
3222 close = term;
3223 if (open == close)
3280af22
NIS
3224 for (t++; t < PL_bufend; t++) {
3225 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3226 t++;
6d07e5e9 3227 else if (*t == open)
b8a4b1be
GS
3228 break;
3229 }
3230 else
3280af22
NIS
3231 for (t++; t < PL_bufend; t++) {
3232 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3233 t++;
6d07e5e9 3234 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3235 break;
3236 else if (*t == open)
3237 brackets++;
3238 }
3239 }
3240 t++;
a0d0e21e 3241 }
7e2040f0 3242 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3243 t += UTF8SKIP(t);
7e2040f0 3244 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3245 t += UTF8SKIP(t);
a0d0e21e 3246 }
3280af22 3247 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3248 t++;
b8a4b1be
GS
3249 /* if comma follows first term, call it an anon hash */
3250 /* XXX it could be a comma expression with loop modifiers */
3280af22 3251 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3252 || (*t == '=' && t[1] == '>')))
a0d0e21e 3253 OPERATOR(HASHBRACK);
3280af22 3254 if (PL_expect == XREF)
4e4e412b 3255 PL_expect = XTERM;
a0d0e21e 3256 else {
3280af22
NIS
3257 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3258 PL_expect = XSTATE;
a0d0e21e 3259 }
8990e307 3260 }
a0d0e21e 3261 break;
463ee0b2 3262 }
57843af0 3263 yylval.ival = CopLINE(PL_curcop);
79072805 3264 if (isSPACE(*s) || *s == '#')
3280af22 3265 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3266 TOKEN('{');
378cc40b 3267 case '}':
79072805
LW
3268 rightbracket:
3269 s++;
3280af22 3270 if (PL_lex_brackets <= 0)
d98d5fff 3271 yyerror("Unmatched right curly bracket");
463ee0b2 3272 else
3280af22 3273 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3274 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3275 PL_lex_formbrack = 0;
3276 if (PL_lex_state == LEX_INTERPNORMAL) {
3277 if (PL_lex_brackets == 0) {
9059aa12
LW
3278 if (PL_expect & XFAKEBRACK) {
3279 PL_expect &= XENUMMASK;
3280af22
NIS
3280 PL_lex_state = LEX_INTERPEND;
3281 PL_bufptr = s;
cea2e8a9 3282 return yylex(); /* ignore fake brackets */
79072805 3283 }
fa83b5b6 3284 if (*s == '-' && s[1] == '>')
3280af22 3285 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3286 else if (*s != '[' && *s != '{')
3280af22 3287 PL_lex_state = LEX_INTERPEND;
79072805
LW
3288 }
3289 }
9059aa12
LW
3290 if (PL_expect & XFAKEBRACK) {
3291 PL_expect &= XENUMMASK;
3280af22 3292 PL_bufptr = s;
cea2e8a9 3293 return yylex(); /* ignore fake brackets */
748a9306 3294 }
79072805
LW
3295 force_next('}');
3296 TOKEN(';');
378cc40b
LW
3297 case '&':
3298 s++;
3299 tmp = *s++;
3300 if (tmp == '&')
a0d0e21e 3301 AOPERATOR(ANDAND);
378cc40b 3302 s--;
3280af22 3303 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3304 if (ckWARN(WARN_SEMICOLON)
3305 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3306 {
57843af0 3307 CopLINE_dec(PL_curcop);
9014280d 3308 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3309 CopLINE_inc(PL_curcop);
463ee0b2 3310 }
79072805 3311 BAop(OP_BIT_AND);
463ee0b2 3312 }
79072805 3313
3280af22
NIS
3314 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3315 if (*PL_tokenbuf) {
3316 PL_expect = XOPERATOR;
3317 force_ident(PL_tokenbuf, '&');
463ee0b2 3318 }
79072805
LW
3319 else
3320 PREREF('&');
c07a80fd 3321 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3322 TERM('&');
3323
378cc40b
LW
3324 case '|':
3325 s++;
3326 tmp = *s++;
3327 if (tmp == '|')
a0d0e21e 3328 AOPERATOR(OROR);
378cc40b 3329 s--;
79072805 3330 BOop(OP_BIT_OR);
378cc40b
LW
3331 case '=':
3332 s++;
3333 tmp = *s++;
3334 if (tmp == '=')
79072805
LW
3335 Eop(OP_EQ);
3336 if (tmp == '>')
3337 OPERATOR(',');
378cc40b 3338 if (tmp == '~')
79072805 3339 PMop(OP_MATCH);
599cee73 3340 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3341 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3342 s--;
3280af22
NIS
3343 if (PL_expect == XSTATE && isALPHA(tmp) &&
3344 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3345 {
3280af22
NIS
3346 if (PL_in_eval && !PL_rsfp) {
3347 d = PL_bufend;
a5f75d66
AD
3348 while (s < d) {
3349 if (*s++ == '\n') {
3350 incline(s);
3351 if (strnEQ(s,"=cut",4)) {
3352 s = strchr(s,'\n');
3353 if (s)
3354 s++;
3355 else
3356 s = d;
3357 incline(s);
3358 goto retry;
3359 }
3360 }
3361 }
3362 goto retry;
3363 }
3280af22
NIS
3364 s = PL_bufend;
3365 PL_doextract = TRUE;
a0d0e21e
LW
3366 goto retry;
3367 }
3280af22 3368 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3369 char *t;
51882d45 3370#ifdef PERL_STRICT_CR
bf4acbe4 3371 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3372#else
bf4acbe4 3373 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3374#endif
a0d0e21e
LW
3375 if (*t == '\n' || *t == '#') {
3376 s--;
3280af22 3377 PL_expect = XBLOCK;
a0d0e21e
LW
3378 goto leftbracket;
3379 }
79072805 3380 }
a0d0e21e
LW
3381 yylval.ival = 0;
3382 OPERATOR(ASSIGNOP);
378cc40b
LW
3383 case '!':
3384 s++;
3385 tmp = *s++;
3386 if (tmp == '=')
79072805 3387 Eop(OP_NE);
378cc40b 3388 if (tmp == '~')
79072805 3389 PMop(OP_NOT);
378cc40b
LW
3390 s--;
3391 OPERATOR('!');
3392 case '<':
3280af22 3393 if (PL_expect != XOPERATOR) {
93a17b20 3394 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3395 check_uni();
79072805
LW
3396 if (s[1] == '<')
3397 s = scan_heredoc(s);
3398 else
3399 s = scan_inputsymbol(s);
3400 TERM(sublex_start());
378cc40b
LW
3401 }
3402 s++;
3403 tmp = *s++;
3404 if (tmp == '<')
79072805 3405 SHop(OP_LEFT_SHIFT);
395c3793
LW
3406 if (tmp == '=') {
3407 tmp = *s++;
3408 if (tmp == '>')
79072805 3409 Eop(OP_NCMP);
395c3793 3410 s--;
79072805 3411 Rop(OP_LE);
395c3793 3412 }
378cc40b 3413 s--;
79072805 3414 Rop(OP_LT);
378cc40b
LW
3415 case '>':
3416 s++;
3417 tmp = *s++;
3418 if (tmp == '>')
79072805 3419 SHop(OP_RIGHT_SHIFT);
378cc40b 3420 if (tmp == '=')
79072805 3421 Rop(OP_GE);
378cc40b 3422 s--;
79072805 3423 Rop(OP_GT);
378cc40b
LW
3424
3425 case '$':
bbce6d69 3426 CLINE;
3427
3280af22
NIS
3428 if (PL_expect == XOPERATOR) {
3429 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3430 PL_expect = XTERM;
a0d0e21e 3431 depcom();
bbce6d69 3432 return ','; /* grandfather non-comma-format format */
a0d0e21e 3433 }
8990e307 3434 }
a0d0e21e 3435
7e2040f0 3436 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3437 PL_tokenbuf[0] = '@';
376b8730
SM
3438 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3439 sizeof PL_tokenbuf - 1, FALSE);
3440 if (PL_expect == XOPERATOR)
3441 no_op("Array length", s);
3280af22 3442 if (!PL_tokenbuf[1])
a0d0e21e 3443 PREREF(DOLSHARP);
3280af22
NIS
3444 PL_expect = XOPERATOR;
3445 PL_pending_ident = '#';
463ee0b2 3446 TOKEN(DOLSHARP);
79072805 3447 }
bbce6d69 3448
3280af22 3449 PL_tokenbuf[0] = '$';
376b8730
SM
3450 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3451 sizeof PL_tokenbuf - 1, FALSE);
3452 if (PL_expect == XOPERATOR)
3453 no_op("Scalar", s);
3280af22
NIS
3454 if (!PL_tokenbuf[1]) {
3455 if (s == PL_bufend)
bbce6d69 3456 yyerror("Final $ should be \\$ or $name");
3457 PREREF('$');
8990e307 3458 }
a0d0e21e 3459
bbce6d69 3460 /* This kludge not intended to be bulletproof. */
3280af22 3461 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3462 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3463 newSViv(PL_compiling.cop_arybase));
bbce6d69 3464 yylval.opval->op_private = OPpCONST_ARYBASE;
3465 TERM(THING);
3466 }
3467
ff68c719 3468 d = s;
69d2bceb 3469 tmp = (I32)*s;
3280af22 3470 if (PL_lex_state == LEX_NORMAL)
ff68c719 3471 s = skipspace(s);
3472
3280af22 3473 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3474 char *t;
3475 if (*s == '[') {
3280af22 3476 PL_tokenbuf[0] = '@';
599cee73 3477 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3478 for(t = s + 1;
7e2040f0 3479 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3480 t++) ;
a0d0e21e 3481 if (*t++ == ',') {
3280af22
NIS
3482 PL_bufptr = skipspace(PL_bufptr);
3483 while (t < PL_bufend && *t != ']')
bbce6d69 3484 t++;
9014280d 3485 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73
PM
3486 "Multidimensional syntax %.*s not supported",
3487 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3488 }
3489 }
bbce6d69 3490 }
3491 else if (*s == '{') {
3280af22 3492 PL_tokenbuf[0] = '%';
599cee73 3493 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3494 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3495 {
3280af22 3496 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3497 STRLEN len;
3498 for (t++; isSPACE(*t); t++) ;
7e2040f0 3499 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3500 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3501 for (; isSPACE(*t); t++) ;
864dbfa3 3502 if (*t == ';' && get_cv(tmpbuf, FALSE))
9014280d 3503 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 3504 "You need to quote \"%s\"", tmpbuf);
748a9306 3505 }
93a17b20
LW
3506 }
3507 }
2f3197b3 3508 }
bbce6d69 3509
3280af22 3510 PL_expect = XOPERATOR;
69d2bceb 3511 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3512 bool islop = (PL_last_lop == PL_oldoldbufptr);
3513 if (!islop || PL_last_lop_op == OP_GREPSTART)
3514 PL_expect = XOPERATOR;
bbce6d69 3515 else if (strchr("$@\"'`q", *s))
3280af22 3516 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3517 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3518 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3519 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3520 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3521 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3522 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3523 /* binary operators exclude handle interpretations */
3524 switch (tmp) {
3525 case -KEY_x:
3526 case -KEY_eq:
3527 case -KEY_ne:
3528 case -KEY_gt:
3529 case -KEY_lt:
3530 case -KEY_ge:
3531 case -KEY_le:
3532 case -KEY_cmp:
3533 break;
3534 default:
3280af22 3535 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3536 break;
3537 }
3538 }
68dc0745 3539 else {
3540 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3541 if (gv && GvCVu(gv))
3280af22 3542 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3543 }
93a17b20 3544 }
bbce6d69 3545 else if (isDIGIT(*s))
3280af22 3546 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3547 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3548 PL_expect = XTERM; /* e.g. print $fh .3 */
c963b151
BD
3549 else if (strchr("?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3550 PL_expect = XTERM; /* e.g. print $fh -1 */
7ce6e6b9
RGS
3551 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
3552 PL_expect = XTERM; /* e.g. print $fh /.../
3553 XXX except DORDOR operator */
e0587a03 3554 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3555 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3556 }
3280af22 3557 PL_pending_ident = '$';
79072805 3558 TOKEN('$');
378cc40b
LW
3559
3560 case '@':
3280af22 3561 if (PL_expect == XOPERATOR)
bbce6d69 3562 no_op("Array", s);
3280af22
NIS
3563 PL_tokenbuf[0] = '@';
3564 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3565 if (!PL_tokenbuf[1]) {
3566 if (s == PL_bufend)
bbce6d69 3567 yyerror("Final @ should be \\@ or @name");
3568 PREREF('@');
3569 }
3280af22 3570 if (PL_lex_state == LEX_NORMAL)
ff68c719 3571 s = skipspace(s);
3280af22 3572 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3573 if (*s == '{')
3280af22 3574 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3575
3576 /* Warn about @ where they meant $. */
599cee73 3577 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3578 if (*s == '[' || *s == '{') {
3579 char *t = s + 1;
7e2040f0 3580 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3581 t++;
3582 if (*t == '}' || *t == ']') {
3583 t++;
3280af22 3584 PL_bufptr = skipspace(PL_bufptr);
9014280d 3585 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 3586 "Scalar value %.*s better written as $%.*s",
3280af22 3587 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3588 }
93a17b20
LW
3589 }
3590 }
463ee0b2 3591 }
3280af22 3592 PL_pending_ident = '@';
79072805 3593 TERM('@');
378cc40b 3594
c963b151 3595 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
3596 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3597 s += 2;
3598 AOPERATOR(DORDOR);
3599 }
c963b151
BD
3600 case '?': /* may either be conditional or pattern */
3601 if(PL_expect == XOPERATOR) {
3602 tmp = *s++;
3603 if(tmp == '?') {
3604 OPERATOR('?');
3605 }
3606 else {
3607 tmp = *s++;
3608 if(tmp == '/') {
3609 /* A // operator. */
3610 AOPERATOR(DORDOR);
3611 }
3612 else {
3613 s--;
3614 Mop(OP_DIVIDE);
3615 }
3616 }
3617 }
3618 else {
3619 /* Disable warning on "study /blah/" */
3620 if (PL_oldoldbufptr == PL_last_uni
3621 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3622 || memNE(PL_last_uni, "study", 5)
3623 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3624 ))
3625 check_uni();
3626 s = scan_pat(s,OP_MATCH);
3627 TERM(sublex_start());
3628 }
378cc40b
LW
3629
3630 case '.':
51882d45
GS
3631 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3632#ifdef PERL_STRICT_CR
3633 && s[1] == '\n'
3634#else
3635 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3636#endif
3637 && (s == PL_linestart || s[-1] == '\n') )
3638 {
3280af22
NIS
3639 PL_lex_formbrack = 0;
3640 PL_expect = XSTATE;
79072805
LW
3641 goto rightbracket;
3642 }
3280af22 3643 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3644 tmp = *s++;
a687059c
LW
3645 if (*s == tmp) {
3646 s++;
2f3197b3
LW
3647 if (*s == tmp) {
3648 s++;
79072805 3649 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3650 }
3651 else
79072805 3652 yylval.ival = 0;
378cc40b 3653 OPERATOR(DOTDOT);
a687059c 3654 }
3280af22 3655 if (PL_expect != XOPERATOR)
2f3197b3 3656 check_uni();
79072805 3657 Aop(OP_CONCAT);
378cc40b
LW
3658 }
3659 /* FALL THROUGH */
3660 case '0': case '1': case '2': case '3': case '4':
3661 case '5': case '6': case '7': case '8': case '9':
b73d6f50 3662 s = scan_num(s, &yylval);
4e553d73 3663 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3664 "### Saw number in '%s'\n", s);
5f80b19c 3665 } );
3280af22 3666 if (PL_expect == XOPERATOR)
8990e307 3667 no_op("Number",s);
79072805
LW
3668 TERM(THING);
3669
3670 case '\'':
09bef843 3671 s = scan_str(s,FALSE,FALSE);
4e553d73 3672 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3673 "### Saw string before '%s'\n", s);
5f80b19c 3674 } );
3280af22
NIS
3675 if (PL_expect == XOPERATOR) {
3676 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3677 PL_expect = XTERM;
a0d0e21e
LW
3678 depcom();
3679 return ','; /* grandfather non-comma-format format */
3680 }
463ee0b2 3681 else
8990e307 3682 no_op("String",s);
463ee0b2 3683 }
79072805 3684 if (!s)
85e6fe83 3685 missingterm((char*)0);
79072805
LW
3686 yylval.ival = OP_CONST;
3687 TERM(sublex_start());
3688
3689 case '"':
09bef843 3690 s = scan_str(s,FALSE,FALSE);
4e553d73 3691 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3692 "### Saw string before '%s'\n", s);
5f80b19c 3693 } );
3280af22
NIS
3694 if (PL_expect == XOPERATOR) {
3695 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3696 PL_expect = XTERM;
a0d0e21e
LW
3697 depcom();
3698 return ','; /* grandfather non-comma-format format */
3699 }
463ee0b2 3700 else
8990e307 3701 no_op("String",s);
463ee0b2 3702 }
79072805 3703 if (!s)
85e6fe83 3704 missingterm((char*)0);
4633a7c4 3705 yylval.ival = OP_CONST;
3280af22 3706 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 3707 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4633a7c4
LW
3708 yylval.ival = OP_STRINGIFY;
3709 break;
3710 }
3711 }
79072805
LW
3712 TERM(sublex_start());
3713
3714 case '`':
09bef843 3715 s = scan_str(s,FALSE,FALSE);
4e553d73 3716 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3717 "### Saw backtick string before '%s'\n", s);
5f80b19c 3718 } );
3280af22 3719 if (PL_expect == XOPERATOR)
8990e307 3720 no_op("Backticks",s);
79072805 3721 if (!s)
85e6fe83 3722 missingterm((char*)0);
79072805
LW
3723 yylval.ival = OP_BACKTICK;
3724 set_csh();
3725 TERM(sublex_start());
3726
3727 case '\\':
3728 s++;
599cee73 3729 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
9014280d 3730 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
599cee73 3731 *s, *s);
3280af22 3732 if (PL_expect == XOPERATOR)
8990e307 3733 no_op("Backslash",s);
79072805
LW
3734 OPERATOR(REFGEN);
3735
a7cb1f99 3736 case 'v':
e526c9e6 3737 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
a7cb1f99
GS
3738 char *start = s;
3739 start++;
3740 start++;
dd629d5b 3741 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
3742 start++;
3743 if (*start == '.' && isDIGIT(start[1])) {
b73d6f50 3744 s = scan_num(s, &yylval);
a7cb1f99
GS
3745 TERM(THING);
3746 }
e526c9e6 3747 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
3748 else if (!isALPHA(*start) && (PL_expect == XTERM
3749 || PL_expect == XREF || PL_expect == XSTATE
3750 || PL_expect == XTERMORDORDOR)) {
e526c9e6
GS
3751 char c = *start;
3752 GV *gv;
3753 *start = '\0';
3754 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3755 *start = c;
3756 if (!gv) {
b73d6f50 3757 s = scan_num(s, &yylval);
e526c9e6
GS
3758 TERM(THING);
3759 }
3760 }
a7cb1f99
GS
3761 }
3762 goto keylookup;
79072805 3763 case 'x':
3280af22 3764 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3765 s++;
3766 Mop(OP_REPEAT);
2f3197b3 3767 }
79072805
LW
3768 goto keylookup;
3769
378cc40b 3770 case '_':
79072805
LW
3771 case 'a': case 'A':
3772 case 'b': case 'B':
3773 case 'c': case 'C':
3774 case 'd': case 'D':
3775 case 'e': case 'E':
3776 case 'f': case 'F':
3777 case 'g': case 'G':
3778 case 'h': case 'H':
3779 case 'i': case 'I':
3780 case 'j': case 'J':
3781 case 'k': case 'K':
3782 case 'l': case 'L':
3783 case 'm': case 'M':
3784 case 'n': case 'N':
3785 case 'o': case 'O':
3786 case 'p': case 'P':
3787 case 'q': case 'Q':
3788 case 'r': case 'R':
3789 case 's': case 'S':
3790 case 't': case 'T':
3791 case 'u': case 'U':
a7cb1f99 3792 case 'V':
79072805
LW
3793 case 'w': case 'W':
3794 case 'X':
3795 case 'y': case 'Y':
3796 case 'z': case 'Z':
3797
49dc05e3 3798 keylookup: {
1d239bbb 3799 orig_keyword = 0;
161b471a
NIS
3800 gv = Nullgv;
3801 gvp = 0;
49dc05e3 3802
3280af22
NIS
3803 PL_bufptr = s;
3804 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3805
3806 /* Some keywords can be followed by any delimiter, including ':' */
155aba94
GS
3807 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3808 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3809 (PL_tokenbuf[0] == 'q' &&
3810 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 3811
3812 /* x::* is just a word, unless x is "CORE" */
3280af22 3813 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3814 goto just_a_word;
3815
3643fb5f 3816 d = s;
3280af22 3817 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3818 d++; /* no comments skipped here, or s### is misparsed */
3819
3820 /* Is this a label? */
3280af22
NIS
3821 if (!tmp && PL_expect == XSTATE
3822 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3823 s = d + 1;
3280af22 3824 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3825 CLINE;
3826 TOKEN(LABEL);
3643fb5f
CS
3827 }
3828
3829 /* Check for keywords */
3280af22 3830 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3831
3832 /* Is this a word before a => operator? */
1c3923b3 3833 if (*d == '=' && d[1] == '>') {
748a9306 3834 CLINE;
3280af22 3835 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306 3836 yylval.opval->op_private = OPpCONST_BARE;
0064a8a9 3837 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7948272d 3838 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
748a9306
LW
3839 TERM(WORD);
3840 }
3841
a0d0e21e 3842 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3843 GV *ogv = Nullgv; /* override (winner) */
3844 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3845 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3846 CV *cv;
3280af22 3847 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3848 (cv = GvCVu(gv)))
3849 {
3850 if (GvIMPORTED_CV(gv))
3851 ogv = gv;
3852 else if (! CvMETHOD(cv))
3853 hgv = gv;
3854 }
3855 if (!ogv &&
3280af22
NIS
3856 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3857 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3858 GvCVu(gv) && GvIMPORTED_CV(gv))
3859 {
3860 ogv = gv;
3861 }
3862 }
3863 if (ogv) {
30fe34ed 3864 orig_keyword = tmp;
56f7f34b 3865 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3866 }
3867 else if (gv && !gvp
3868 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3869 && GvCVu(gv)
3280af22 3870 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3871 {
3872 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3873 }
56f7f34b
CS
3874 else { /* no override */
3875 tmp = -tmp;
ac206dc8 3876 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
9014280d 3877 Perl_warner(aTHX_ packWARN(WARN_MISC),
ac206dc8
RGS
3878 "dump() better written as CORE::dump()");
3879 }
56f7f34b
CS
3880 gv = Nullgv;
3881 gvp = 0;
4944e2f7
GS
3882 if (ckWARN(WARN_AMBIGUOUS) && hgv
3883 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
9014280d 3884 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 3885 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3886 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3887 }
a0d0e21e
LW
3888 }
3889
3890 reserved_word:
3891 switch (tmp) {
79072805
LW
3892
3893 default: /* not a keyword */
93a17b20 3894 just_a_word: {
96e4d5b1 3895 SV *sv;
ce29ac45 3896 int pkgname = 0;
3280af22 3897 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3898
3899 /* Get the rest if it looks like a package qualifier */
3900
155aba94 3901 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 3902 STRLEN morelen;
3280af22 3903 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3904 TRUE, &morelen);
3905 if (!morelen)
cea2e8a9 3906 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3907 *s == '\'' ? "'" : "::");
c3e0f903 3908 len += morelen;
ce29ac45 3909 pkgname = 1;
a0d0e21e 3910 }
8990e307 3911
3280af22
NIS
3912 if (PL_expect == XOPERATOR) {
3913 if (PL_bufptr == PL_linestart) {
57843af0 3914 CopLINE_dec(PL_curcop);
9014280d 3915 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3916 CopLINE_inc(PL_curcop);
463ee0b2
LW
3917 }
3918 else
54310121 3919 no_op("Bareword",s);
463ee0b2 3920 }
8990e307 3921
c3e0f903
GS
3922 /* Look for a subroutine with this name in current package,
3923 unless name is "Foo::", in which case Foo is a bearword
3924 (and a package name). */
3925
3926 if (len > 2 &&
3280af22 3927 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3928 {
e476b1b5 3929 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
9014280d 3930 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 3931 "Bareword \"%s\" refers to nonexistent package",
3280af22 3932 PL_tokenbuf);
c3e0f903 3933 len -= 2;
3280af22 3934 PL_tokenbuf[len] = '\0';
c3e0f903
GS
3935 gv = Nullgv;
3936 gvp = 0;
3937 }
3938 else {
3939 len = 0;
3940 if (!gv)
3280af22 3941 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
c3e0f903
GS
3942 }
3943
3944 /* if we saw a global override before, get the right name */
8990e307 3945
49dc05e3 3946 if (gvp) {
79cb57f6 3947 sv = newSVpvn("CORE::GLOBAL::",14);
3280af22 3948 sv_catpv(sv,PL_tokenbuf);
49dc05e3
GS
3949 }
3950 else
3280af22 3951 sv = newSVpv(PL_tokenbuf,0);
8990e307 3952
a0d0e21e
LW
3953 /* Presume this is going to be a bareword of some sort. */
3954
3955 CLINE;
49dc05e3 3956 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
a0d0e21e 3957 yylval.opval->op_private = OPpCONST_BARE;
8f8cf39c
JH
3958 /* UTF-8 package name? */
3959 if (UTF && !IN_BYTES &&
3960 is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
3961 SvUTF8_on(sv);
a0d0e21e 3962
c3e0f903
GS
3963 /* And if "Foo::", then that's what it certainly is. */
3964
3965 if (len)
3966 goto safe_bareword;
3967
8990e307
LW
3968 /* See if it's the indirect object for a list operator. */
3969
3280af22
NIS
3970 if (PL_oldoldbufptr &&
3971 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
3972 (PL_oldoldbufptr == PL_last_lop
3973 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 3974 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
3975 (PL_expect == XREF ||
3976 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 3977 {
748a9306
LW
3978 bool immediate_paren = *s == '(';
3979
a0d0e21e
LW
3980 /* (Now we can afford to cross potential line boundary.) */
3981 s = skipspace(s);
3982
3983 /* Two barewords in a row may indicate method call. */
3984
7e2040f0 3985 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
a0d0e21e
LW
3986 return tmp;
3987
3988 /* If not a declared subroutine, it's an indirect object. */
3989 /* (But it's an indir obj regardless for sort.) */
3990
7948272d 3991 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f0670693 3992 ((!gv || !GvCVu(gv)) &&
a9ef352a 3993 (PL_last_lop_op != OP_MAPSTART &&
f0670693 3994 PL_last_lop_op != OP_GREPSTART))))
a9ef352a 3995 {
3280af22 3996 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 3997 goto bareword;
93a17b20
LW
3998 }
3999 }
8990e307 4000
3280af22 4001 PL_expect = XOPERATOR;
8990e307 4002 s = skipspace(s);
1c3923b3
GS
4003
4004 /* Is this a word before a => operator? */
ce29ac45 4005 if (*s == '=' && s[1] == '>' && !pkgname) {
1c3923b3
GS
4006 CLINE;
4007 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 4008 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7948272d 4009 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
1c3923b3
GS
4010 TERM(WORD);
4011 }
4012
4013 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 4014 if (*s == '(') {
79072805 4015 CLINE;
96e4d5b1 4016 if (gv && GvCVu(gv)) {
bf4acbe4 4017 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
7a52d87a 4018 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
96e4d5b1 4019 s = d + 1;
4020 goto its_constant;
4021 }
4022 }
3280af22
NIS
4023 PL_nextval[PL_nexttoke].opval = yylval.opval;
4024 PL_expect = XOPERATOR;
93a17b20 4025 force_next(WORD);
c07a80fd 4026 yylval.ival = 0;
463ee0b2 4027 TOKEN('&');
79072805 4028 }
93a17b20 4029
a0d0e21e 4030 /* If followed by var or block, call it a method (unless sub) */
8990e307 4031
8ebc5c01 4032 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
3280af22
NIS
4033 PL_last_lop = PL_oldbufptr;
4034 PL_last_lop_op = OP_METHOD;
93a17b20 4035 PREBLOCK(METHOD);
463ee0b2
LW
4036 }
4037
8990e307
LW
4038 /* If followed by a bareword, see if it looks like indir obj. */
4039
30fe34ed
RGS
4040 if (!orig_keyword
4041 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4042 && (tmp = intuit_method(s,gv)))
a0d0e21e 4043 return tmp;
93a17b20 4044
8990e307
LW
4045 /* Not a method, so call it a subroutine (if defined) */
4046
8ebc5c01 4047 if (gv && GvCVu(gv)) {
46fc3d4c 4048 CV* cv;
0453d815 4049 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
9014280d 4050 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
0453d815 4051 "Ambiguous use of -%s resolved as -&%s()",
3280af22 4052 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 4053 /* Check for a constant sub */
46fc3d4c 4054 cv = GvCV(gv);
96e4d5b1 4055 if ((sv = cv_const_sv(cv))) {
4056 its_constant:
4057 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4058 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4059 yylval.opval->op_private = 0;
4060 TOKEN(WORD);
89bfa8cd 4061 }
4062
a5f75d66
AD
4063 /* Resolve to GV now. */
4064 op_free(yylval.opval);
4065 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
9675f7ac 4066 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 4067 PL_last_lop = PL_oldbufptr;
bf848113 4068 PL_last_lop_op = OP_ENTERSUB;
4633a7c4
LW
4069 /* Is there a prototype? */
4070 if (SvPOK(cv)) {
4071 STRLEN len;
7a52d87a 4072 char *proto = SvPV((SV*)cv, len);
4633a7c4
LW
4073 if (!len)
4074 TERM(FUNC0SUB);
7a52d87a 4075 if (strEQ(proto, "$"))
4633a7c4 4076 OPERATOR(UNIOPSUB);
7a52d87a 4077 if (*proto == '&' && *s == '{') {
c99da370
JH
4078 sv_setpv(PL_subname, PL_curstash ?
4079 "__ANON__" : "__ANON__::__ANON__");
4633a7c4
LW
4080 PREBLOCK(LSTOPSUB);
4081 }
a9ef352a 4082 }
3280af22
NIS
4083 PL_nextval[PL_nexttoke].opval = yylval.opval;
4084 PL_expect = XTERM;
8990e307
LW
4085 force_next(WORD);
4086 TOKEN(NOAMP);
4087 }
748a9306 4088
8990e307
LW
4089 /* Call it a bare word */
4090
5603f27d
GS
4091 if (PL_hints & HINT_STRICT_SUBS)
4092 yylval.opval->op_private |= OPpCONST_STRICT;
4093 else {
4094 bareword:
4095 if (ckWARN(WARN_RESERVED)) {
4096 if (lastchar != '-') {
4097 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
238ae712 4098 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
9014280d 4099 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
4100 PL_tokenbuf);
4101 }
748a9306
LW
4102 }
4103 }
c3e0f903
GS
4104
4105 safe_bareword:
f248d071 4106 if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
9014280d 4107 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
0453d815 4108 "Operator or semicolon missing before %c%s",
3280af22 4109 lastchar, PL_tokenbuf);
9014280d 4110 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
0453d815 4111 "Ambiguous use of %c resolved as operator %c",
748a9306
LW
4112 lastchar, lastchar);
4113 }
93a17b20 4114 TOKEN(WORD);
79072805 4115 }
79072805 4116
68dc0745 4117 case KEY___FILE__:
46fc3d4c 4118 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 4119 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 4120 TERM(THING);
4121
79072805 4122 case KEY___LINE__:
cf2093f6 4123 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 4124 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 4125 TERM(THING);
68dc0745 4126
4127 case KEY___PACKAGE__:
4128 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22
NIS
4129 (PL_curstash
4130 ? newSVsv(PL_curstname)
4131 : &PL_sv_undef));
79072805 4132 TERM(THING);
79072805 4133
e50aee73 4134 case KEY___DATA__:
79072805
LW
4135 case KEY___END__: {
4136 GV *gv;
79072805
LW
4137
4138 /*SUPPRESS 560*/
3280af22 4139 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
e50aee73 4140 char *pname = "main";
3280af22
NIS
4141 if (PL_tokenbuf[2] == 'D')
4142 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
cea2e8a9 4143 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
a5f75d66 4144 GvMULTI_on(gv);
79072805 4145 if (!GvIO(gv))
a0d0e21e 4146 GvIOp(gv) = newIO();
3280af22 4147 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
4148#if defined(HAS_FCNTL) && defined(F_SETFD)
4149 {
3280af22 4150 int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
4151 fcntl(fd,F_SETFD,fd >= 3);
4152 }
79072805 4153#endif
fd049845 4154 /* Mark this internal pseudo-handle as clean */
4155 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
3280af22 4156 if (PL_preprocess)
50952442 4157 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
3280af22 4158 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 4159 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 4160 else
50952442 4161 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
4162#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4163 /* if the script was opened in binmode, we need to revert
53129d29 4164 * it to text mode for compatibility; but only iff it has CRs
c39cd008 4165 * XXX this is a questionable hack at best. */
53129d29
GS
4166 if (PL_bufend-PL_bufptr > 2
4167 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
4168 {
4169 Off_t loc = 0;
50952442 4170 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
4171 loc = PerlIO_tell(PL_rsfp);
4172 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4173 }
2986a63f
JH
4174#ifdef NETWARE
4175 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4176#else
c39cd008 4177 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 4178#endif /* NETWARE */
1143fce0
JH
4179#ifdef PERLIO_IS_STDIO /* really? */
4180# if defined(__BORLANDC__)
cb359b41
JH
4181 /* XXX see note in do_binmode() */
4182 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
4183# endif
4184#endif
c39cd008
GS
4185 if (loc > 0)
4186 PerlIO_seek(PL_rsfp, loc, 0);
4187 }
4188 }
4189#endif
7948272d 4190#ifdef PERLIO_LAYERS
0064a8a9 4191 if (UTF && !IN_BYTES)
7948272d
NIS
4192 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4193#endif
3280af22 4194 PL_rsfp = Nullfp;
79072805
LW
4195 }
4196 goto fake_eof;
e929a76b 4197 }
de3bb511 4198
8990e307 4199 case KEY_AUTOLOAD:
ed6116ce 4200 case KEY_DESTROY:
79072805 4201 case KEY_BEGIN:
7d30b5c4 4202 case KEY_CHECK:
7d07dbc2 4203 case KEY_INIT:
7d30b5c4 4204 case KEY_END:
3280af22
NIS
4205 if (PL_expect == XSTATE) {
4206 s = PL_bufptr;
93a17b20 4207 goto really_sub;
79072805
LW
4208 }
4209 goto just_a_word;
4210
a0d0e21e
LW
4211 case KEY_CORE:
4212 if (*s == ':' && s[1] == ':') {
4213 s += 2;
748a9306 4214 d = s;
3280af22 4215 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6798c92b
GS
4216 if (!(tmp = keyword(PL_tokenbuf, len)))
4217 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
4218 if (tmp < 0)
4219 tmp = -tmp;
4220 goto reserved_word;
4221 }
4222 goto just_a_word;
4223
463ee0b2
LW
4224 case KEY_abs:
4225 UNI(OP_ABS);
4226
79072805
LW
4227 case KEY_alarm:
4228 UNI(OP_ALARM);
4229
4230 case KEY_accept:
a0d0e21e 4231 LOP(OP_ACCEPT,XTERM);
79072805 4232
463ee0b2
LW
4233 case KEY_and:
4234 OPERATOR(ANDOP);
4235
79072805 4236 case KEY_atan2:
a0d0e21e 4237 LOP(OP_ATAN2,XTERM);
85e6fe83 4238
79072805 4239 case KEY_bind:
a0d0e21e 4240 LOP(OP_BIND,XTERM);
79072805
LW
4241
4242 case KEY_binmode:
1c1fc3ea 4243 LOP(OP_BINMODE,XTERM);
79072805
LW
4244
4245 case KEY_bless:
a0d0e21e 4246 LOP(OP_BLESS,XTERM);
79072805
LW
4247
4248 case KEY_chop:
4249 UNI(OP_CHOP);
4250
4251 case KEY_continue:
4252 PREBLOCK(CONTINUE);
4253
4254 case KEY_chdir:
85e6fe83 4255 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
79072805
LW
4256 UNI(OP_CHDIR);
4257
4258 case KEY_close:
4259 UNI(OP_CLOSE);
4260
4261 case KEY_closedir:
4262 UNI(OP_CLOSEDIR);
4263
4264 case KEY_cmp:
4265 Eop(OP_SCMP);
4266
4267 case KEY_caller:
4268 UNI(OP_CALLER);
4269
4270 case KEY_crypt:
4271#ifdef FCRYPT
f4c556ac
GS
4272 if (!PL_cryptseen) {
4273 PL_cryptseen = TRUE;
de3bb511 4274 init_des();
f4c556ac 4275 }
a687059c 4276#endif
a0d0e21e 4277 LOP(OP_CRYPT,XTERM);
79072805
LW
4278
4279 case KEY_chmod:
a0d0e21e 4280 LOP(OP_CHMOD,XTERM);
79072805
LW
4281
4282 case KEY_chown:
a0d0e21e 4283 LOP(OP_CHOWN,XTERM);
79072805
LW
4284
4285 case KEY_connect:
a0d0e21e 4286 LOP(OP_CONNECT,XTERM);
79072805 4287
463ee0b2
LW
4288 case KEY_chr:
4289 UNI(OP_CHR);
4290
79072805
LW
4291 case KEY_cos:
4292 UNI(OP_COS);
4293
4294 case KEY_chroot:
4295 UNI(OP_CHROOT);
4296
4297 case KEY_do:
4298 s = skipspace(s);
4299 if (*s == '{')
a0d0e21e 4300 PRETERMBLOCK(DO);
79072805 4301 if (*s != '\'')
89c5585f 4302 s = force_word(s,WORD,TRUE,TRUE,FALSE);
378cc40b 4303 OPERATOR(DO);
79072805
LW
4304
4305 case KEY_die:
3280af22 4306 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 4307 LOP(OP_DIE,XTERM);
79072805
LW
4308
4309 case KEY_defined:
4310 UNI(OP_DEFINED);
4311
4312 case KEY_delete:
a0d0e21e 4313 UNI(OP_DELETE);
79072805
LW
4314
4315 case KEY_dbmopen:
a0d0e21e
LW
4316 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4317 LOP(OP_DBMOPEN,XTERM);
79072805
LW
4318
4319 case KEY_dbmclose:
4320 UNI(OP_DBMCLOSE);
4321
4322 case KEY_dump:
a0d0e21e 4323 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4324 LOOPX(OP_DUMP);
4325
4326 case KEY_else:
4327 PREBLOCK(ELSE);
4328
4329 case KEY_elsif:
57843af0 4330 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4331 OPERATOR(ELSIF);
4332
4333 case KEY_eq:
4334 Eop(OP_SEQ);
4335
a0d0e21e
LW
4336 case KEY_exists:
4337 UNI(OP_EXISTS);
4e553d73 4338
79072805
LW
4339 case KEY_exit:
4340 UNI(OP_EXIT);
4341
4342 case KEY_eval:
79072805 4343 s = skipspace(s);
3280af22 4344 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
463ee0b2 4345 UNIBRACK(OP_ENTEREVAL);
79072805
LW
4346
4347 case KEY_eof:
4348 UNI(OP_EOF);
4349
c963b151
BD
4350 case KEY_err:
4351 OPERATOR(DOROP);
4352
79072805
LW
4353 case KEY_exp:
4354 UNI(OP_EXP);
4355
4356 case KEY_each:
4357 UNI(OP_EACH);
4358
4359 case KEY_exec:
4360 set_csh();
a0d0e21e 4361 LOP(OP_EXEC,XREF);
79072805
LW
4362
4363 case KEY_endhostent:
4364 FUN0(OP_EHOSTENT);
4365
4366 case KEY_endnetent:
4367 FUN0(OP_ENETENT);
4368
4369 case KEY_endservent:
4370 FUN0(OP_ESERVENT);
4371
4372 case KEY_endprotoent:
4373 FUN0(OP_EPROTOENT);
4374
4375 case KEY_endpwent:
4376 FUN0(OP_EPWENT);
4377
4378 case KEY_endgrent:
4379 FUN0(OP_EGRENT);
4380
4381 case KEY_for:
4382 case KEY_foreach:
57843af0 4383 yylval.ival = CopLINE(PL_curcop);
55497cff 4384 s = skipspace(s);
7e2040f0 4385 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 4386 char *p = s;
3280af22 4387 if ((PL_bufend - p) >= 3 &&
55497cff 4388 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4389 p += 2;
77ca0c92
LW
4390 else if ((PL_bufend - p) >= 4 &&
4391 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4392 p += 3;
55497cff 4393 p = skipspace(p);
7e2040f0 4394 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
4395 p = scan_ident(p, PL_bufend,
4396 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4397 p = skipspace(p);
4398 }
4399 if (*p != '$')
cea2e8a9 4400 Perl_croak(aTHX_ "Missing $ on loop variable");
55497cff 4401 }
79072805
LW
4402 OPERATOR(FOR);
4403
4404 case KEY_formline:
a0d0e21e 4405 LOP(OP_FORMLINE,XTERM);
79072805
LW
4406
4407 case KEY_fork:
4408 FUN0(OP_FORK);
4409
4410 case KEY_fcntl:
a0d0e21e 4411 LOP(OP_FCNTL,XTERM);
79072805
LW
4412
4413 case KEY_fileno:
4414 UNI(OP_FILENO);
4415
4416 case KEY_flock:
a0d0e21e 4417 LOP(OP_FLOCK,XTERM);
79072805
LW
4418
4419 case KEY_gt:
4420 Rop(OP_SGT);
4421
4422 case KEY_ge:
4423 Rop(OP_SGE);
4424
4425 case KEY_grep:
2c38e13d 4426 LOP(OP_GREPSTART, XREF);
79072805
LW
4427
4428 case KEY_goto:
a0d0e21e 4429 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4430 LOOPX(OP_GOTO);
4431
4432 case KEY_gmtime:
4433 UNI(OP_GMTIME);
4434
4435 case KEY_getc:
6f33ba73 4436 UNIDOR(OP_GETC);
79072805
LW
4437
4438 case KEY_getppid:
4439 FUN0(OP_GETPPID);
4440
4441 case KEY_getpgrp:
4442 UNI(OP_GETPGRP);
4443
4444 case KEY_getpriority:
a0d0e21e 4445 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
4446
4447 case KEY_getprotobyname:
4448 UNI(OP_GPBYNAME);
4449
4450 case KEY_getprotobynumber:
a0d0e21e 4451 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
4452
4453 case KEY_getprotoent:
4454 FUN0(OP_GPROTOENT);
4455
4456 case KEY_getpwent:
4457 FUN0(OP_GPWENT);
4458
4459 case KEY_getpwnam:
ff68c719 4460 UNI(OP_GPWNAM);
79072805
LW
4461
4462 case KEY_getpwuid:
ff68c719 4463 UNI(OP_GPWUID);
79072805
LW
4464
4465 case KEY_getpeername:
4466 UNI(OP_GETPEERNAME);
4467
4468 case KEY_gethostbyname:
4469 UNI(OP_GHBYNAME);
4470
4471 case KEY_gethostbyaddr:
a0d0e21e 4472 LOP(OP_GHBYADDR,XTERM);
79072805
LW
4473
4474 case KEY_gethostent:
4475 FUN0(OP_GHOSTENT);
4476
4477 case KEY_getnetbyname:
4478 UNI(OP_GNBYNAME);
4479
4480 case KEY_getnetbyaddr:
a0d0e21e 4481 LOP(OP_GNBYADDR,XTERM);
79072805
LW
4482
4483 case KEY_getnetent:
4484 FUN0(OP_GNETENT);
4485
4486 case KEY_getservbyname:
a0d0e21e 4487 LOP(OP_GSBYNAME,XTERM);
79072805
LW
4488
4489 case KEY_getservbyport:
a0d0e21e 4490 LOP(OP_GSBYPORT,XTERM);
79072805
LW
4491
4492 case KEY_getservent:
4493 FUN0(OP_GSERVENT);
4494
4495 case KEY_getsockname:
4496 UNI(OP_GETSOCKNAME);
4497
4498 case KEY_getsockopt:
a0d0e21e 4499 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
4500
4501 case KEY_getgrent:
4502 FUN0(OP_GGRENT);
4503
4504 case KEY_getgrnam:
ff68c719 4505 UNI(OP_GGRNAM);
79072805
LW
4506
4507 case KEY_getgrgid:
ff68c719 4508 UNI(OP_GGRGID);
79072805
LW
4509
4510 case KEY_getlogin:
4511 FUN0(OP_GETLOGIN);
4512
93a17b20 4513 case KEY_glob:
a0d0e21e
LW
4514 set_csh();
4515 LOP(OP_GLOB,XTERM);
93a17b20 4516
79072805
LW
4517 case KEY_hex:
4518 UNI(OP_HEX);
4519
4520 case KEY_if:
57843af0 4521 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4522 OPERATOR(IF);
4523
4524 case KEY_index:
a0d0e21e 4525 LOP(OP_INDEX,XTERM);
79072805
LW
4526
4527 case KEY_int:
4528 UNI(OP_INT);
4529
4530 case KEY_ioctl:
a0d0e21e 4531 LOP(OP_IOCTL,XTERM);
79072805
LW
4532
4533 case KEY_join:
a0d0e21e 4534 LOP(OP_JOIN,XTERM);
79072805
LW
4535
4536 case KEY_keys:
4537 UNI(OP_KEYS);
4538
4539 case KEY_kill:
a0d0e21e 4540 LOP(OP_KILL,XTERM);
79072805
LW
4541
4542 case KEY_last:
a0d0e21e 4543 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 4544 LOOPX(OP_LAST);
4e553d73 4545
79072805
LW
4546 case KEY_lc:
4547 UNI(OP_LC);
4548
4549 case KEY_lcfirst:
4550 UNI(OP_LCFIRST);
4551
4552 case KEY_local:
09bef843 4553 yylval.ival = 0;
79072805
LW
4554 OPERATOR(LOCAL);
4555
4556 case KEY_length:
4557 UNI(OP_LENGTH);
4558
4559 case KEY_lt:
4560 Rop(OP_SLT);
4561
4562 case KEY_le:
4563 Rop(OP_SLE);
4564
4565 case KEY_localtime:
4566 UNI(OP_LOCALTIME);
4567
4568 case KEY_log:
4569 UNI(OP_LOG);
4570
4571 case KEY_link:
a0d0e21e 4572 LOP(OP_LINK,XTERM);
79072805
LW
4573
4574 case KEY_listen:
a0d0e21e 4575 LOP(OP_LISTEN,XTERM);
79072805 4576
c0329465
MB
4577 case KEY_lock:
4578 UNI(OP_LOCK);
4579
79072805
LW
4580 case KEY_lstat:
4581 UNI(OP_LSTAT);
4582
4583 case KEY_m:
8782bef2 4584 s = scan_pat(s,OP_MATCH);
79072805
LW
4585 TERM(sublex_start());
4586
a0d0e21e 4587 case KEY_map:
2c38e13d 4588 LOP(OP_MAPSTART, XREF);
4e4e412b 4589
79072805 4590 case KEY_mkdir:
a0d0e21e 4591 LOP(OP_MKDIR,XTERM);
79072805
LW
4592
4593 case KEY_msgctl:
a0d0e21e 4594 LOP(OP_MSGCTL,XTERM);
79072805
LW
4595
4596 case KEY_msgget:
a0d0e21e 4597 LOP(OP_MSGGET,XTERM);
79072805
LW
4598
4599 case KEY_msgrcv:
a0d0e21e 4600 LOP(OP_MSGRCV,XTERM);
79072805
LW
4601
4602 case KEY_msgsnd:
a0d0e21e 4603 LOP(OP_MSGSND,XTERM);
79072805 4604
77ca0c92 4605 case KEY_our:
93a17b20 4606 case KEY_my:
77ca0c92 4607 PL_in_my = tmp;
c750a3ec 4608 s = skipspace(s);
7e2040f0 4609 if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 4610 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
4611 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4612 goto really_sub;
def3634b 4613 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 4614 if (!PL_in_my_stash) {
c750a3ec 4615 char tmpbuf[1024];
3280af22
NIS
4616 PL_bufptr = s;
4617 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
4618 yyerror(tmpbuf);
4619 }
4620 }
09bef843 4621 yylval.ival = 1;
55497cff 4622 OPERATOR(MY);
93a17b20 4623
79072805 4624 case KEY_next:
a0d0e21e 4625 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4626 LOOPX(OP_NEXT);
4627
4628 case KEY_ne:
4629 Eop(OP_SNE);
4630
a0d0e21e 4631 case KEY_no:
3280af22 4632 if (PL_expect != XSTATE)
a0d0e21e
LW
4633 yyerror("\"no\" not allowed in expression");
4634 s = force_word(s,WORD,FALSE,TRUE,FALSE);
e759cc13 4635 s = force_version(s, FALSE);
a0d0e21e
LW
4636 yylval.ival = 0;
4637 OPERATOR(USE);
4638
4639 case KEY_not:
2d2e263d
LW
4640 if (*s == '(' || (s = skipspace(s), *s == '('))
4641 FUN1(OP_NOT);
4642 else
4643 OPERATOR(NOTOP);
a0d0e21e 4644
79072805 4645 case KEY_open:
93a17b20 4646 s = skipspace(s);
7e2040f0 4647 if (isIDFIRST_lazy_if(s,UTF)) {
93a17b20 4648 char *t;
7e2040f0 4649 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
93a17b20 4650 t = skipspace(d);
66fbe8fb
HS
4651 if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
4652 /* [perl #16184] */
4653 && !(t[0] == '=' && t[1] == '>')
4654 ) {
9014280d 4655 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 4656 "Precedence problem: open %.*s should be open(%.*s)",
66fbe8fb
HS
4657 d - s, s, d - s, s);
4658 }
93a17b20 4659 }
a0d0e21e 4660 LOP(OP_OPEN,XTERM);
79072805 4661
463ee0b2 4662 case KEY_or:
a0d0e21e 4663 yylval.ival = OP_OR;
463ee0b2
LW
4664 OPERATOR(OROP);
4665
79072805
LW
4666 case KEY_ord:
4667 UNI(OP_ORD);
4668
4669 case KEY_oct:
4670 UNI(OP_OCT);
4671
4672 case KEY_opendir:
a0d0e21e 4673 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
4674
4675 case KEY_print:
3280af22 4676 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4677 LOP(OP_PRINT,XREF);
79072805
LW
4678
4679 case KEY_printf:
3280af22 4680 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4681 LOP(OP_PRTF,XREF);
79072805 4682
c07a80fd 4683 case KEY_prototype:
4684 UNI(OP_PROTOTYPE);
4685
79072805 4686 case KEY_push:
a0d0e21e 4687 LOP(OP_PUSH,XTERM);
79072805
LW
4688
4689 case KEY_pop:
6f33ba73 4690 UNIDOR(OP_POP);
79072805 4691
a0d0e21e 4692 case KEY_pos:
6f33ba73 4693 UNIDOR(OP_POS);
4e553d73 4694
79072805 4695 case KEY_pack:
a0d0e21e 4696 LOP(OP_PACK,XTERM);
79072805
LW
4697
4698 case KEY_package:
a0d0e21e 4699 s = force_word(s,WORD,FALSE,TRUE,FALSE);
79072805
LW
4700 OPERATOR(PACKAGE);
4701
4702 case KEY_pipe:
a0d0e21e 4703 LOP(OP_PIPE_OP,XTERM);
79072805
LW
4704
4705 case KEY_q:
09bef843 4706 s = scan_str(s,FALSE,FALSE);
79072805 4707 if (!s)
85e6fe83 4708 missingterm((char*)0);
79072805
LW
4709 yylval.ival = OP_CONST;
4710 TERM(sublex_start());
4711
a0d0e21e
LW
4712 case KEY_quotemeta:
4713 UNI(OP_QUOTEMETA);
4714
8990e307 4715 case KEY_qw:
09bef843 4716 s = scan_str(s,FALSE,FALSE);
8990e307 4717 if (!s)
85e6fe83 4718 missingterm((char*)0);
8127e0e3
GS
4719 force_next(')');
4720 if (SvCUR(PL_lex_stuff)) {
4721 OP *words = Nullop;
4722 int warned = 0;
3280af22 4723 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 4724 while (len) {
7948272d 4725 SV *sv;
8127e0e3
GS
4726 for (; isSPACE(*d) && len; --len, ++d) ;
4727 if (len) {
4728 char *b = d;
e476b1b5 4729 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
4730 for (; !isSPACE(*d) && len; --len, ++d) {
4731 if (*d == ',') {
9014280d 4732 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
4733 "Possible attempt to separate words with commas");
4734 ++warned;
4735 }
4736 else if (*d == '#') {
9014280d 4737 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
4738 "Possible attempt to put comments in qw() list");
4739 ++warned;
4740 }
4741 }
4742 }
4743 else {
4744 for (; !isSPACE(*d) && len; --len, ++d) ;
4745 }
7948272d
NIS
4746 sv = newSVpvn(b, d-b);
4747 if (DO_UTF8(PL_lex_stuff))
4748 SvUTF8_on(sv);
8127e0e3 4749 words = append_elem(OP_LIST, words,
7948272d 4750 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 4751 }
4752 }
8127e0e3
GS
4753 if (words) {
4754 PL_nextval[PL_nexttoke].opval = words;
4755 force_next(THING);
4756 }
55497cff 4757 }
37fd879b 4758 if (PL_lex_stuff) {
8127e0e3 4759 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
4760 PL_lex_stuff = Nullsv;
4761 }
3280af22 4762 PL_expect = XTERM;
8127e0e3 4763 TOKEN('(');
8990e307 4764
79072805 4765 case KEY_qq:
09bef843 4766 s = scan_str(s,FALSE,FALSE);
79072805 4767 if (!s)
85e6fe83 4768 missingterm((char*)0);
a0d0e21e 4769 yylval.ival = OP_STRINGIFY;
3280af22
NIS
4770 if (SvIVX(PL_lex_stuff) == '\'')
4771 SvIVX(PL_lex_stuff) = 0; /* qq'$foo' should intepolate */
79072805
LW
4772 TERM(sublex_start());
4773
8782bef2
GB
4774 case KEY_qr:
4775 s = scan_pat(s,OP_QR);
4776 TERM(sublex_start());
4777
79072805 4778 case KEY_qx:
09bef843 4779 s = scan_str(s,FALSE,FALSE);
79072805 4780 if (!s)
85e6fe83 4781 missingterm((char*)0);
79072805
LW
4782 yylval.ival = OP_BACKTICK;
4783 set_csh();
4784 TERM(sublex_start());
4785
4786 case KEY_return:
4787 OLDLOP(OP_RETURN);
4788
4789 case KEY_require:
a7cb1f99 4790 s = skipspace(s);
e759cc13
RGS
4791 if (isDIGIT(*s)) {
4792 s = force_version(s, FALSE);
a7cb1f99 4793 }
e759cc13
RGS
4794 else if (*s != 'v' || !isDIGIT(s[1])
4795 || (s = force_version(s, TRUE), *s == 'v'))
4796 {
a7cb1f99
GS
4797 *PL_tokenbuf = '\0';
4798 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 4799 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
a7cb1f99
GS
4800 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
4801 else if (*s == '<')
4802 yyerror("<> should be quotes");
4803 }
463ee0b2 4804 UNI(OP_REQUIRE);
79072805
LW
4805
4806 case KEY_reset:
4807 UNI(OP_RESET);
4808
4809 case KEY_redo:
a0d0e21e 4810 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4811 LOOPX(OP_REDO);
4812
4813 case KEY_rename:
a0d0e21e 4814 LOP(OP_RENAME,XTERM);
79072805
LW
4815
4816 case KEY_rand:
4817 UNI(OP_RAND);
4818
4819 case KEY_rmdir:
4820 UNI(OP_RMDIR);
4821
4822 case KEY_rindex:
a0d0e21e 4823 LOP(OP_RINDEX,XTERM);
79072805
LW
4824
4825 case KEY_read:
a0d0e21e 4826 LOP(OP_READ,XTERM);
79072805
LW
4827
4828 case KEY_readdir:
4829 UNI(OP_READDIR);
4830
93a17b20
LW
4831 case KEY_readline:
4832 set_csh();
6f33ba73 4833 UNIDOR(OP_READLINE);
93a17b20
LW
4834
4835 case KEY_readpipe:
4836 set_csh();
4837 UNI(OP_BACKTICK);
4838
79072805
LW
4839 case KEY_rewinddir:
4840 UNI(OP_REWINDDIR);
4841
4842 case KEY_recv:
a0d0e21e 4843 LOP(OP_RECV,XTERM);
79072805
LW
4844
4845 case KEY_reverse:
a0d0e21e 4846 LOP(OP_REVERSE,XTERM);
79072805
LW
4847
4848 case KEY_readlink:
6f33ba73 4849 UNIDOR(OP_READLINK);
79072805
LW
4850
4851 case KEY_ref:
4852 UNI(OP_REF);
4853
4854 case KEY_s:
4855 s = scan_subst(s);
4856 if (yylval.opval)
4857 TERM(sublex_start());
4858 else
4859 TOKEN(1); /* force error */
4860
a0d0e21e
LW
4861 case KEY_chomp:
4862 UNI(OP_CHOMP);
4e553d73 4863
79072805
LW
4864 case KEY_scalar:
4865 UNI(OP_SCALAR);
4866
4867 case KEY_select:
a0d0e21e 4868 LOP(OP_SELECT,XTERM);
79072805
LW
4869
4870 case KEY_seek:
a0d0e21e 4871 LOP(OP_SEEK,XTERM);
79072805
LW
4872
4873 case KEY_semctl:
a0d0e21e 4874 LOP(OP_SEMCTL,XTERM);
79072805
LW
4875
4876 case KEY_semget:
a0d0e21e 4877 LOP(OP_SEMGET,XTERM);
79072805
LW
4878
4879 case KEY_semop:
a0d0e21e 4880 LOP(OP_SEMOP,XTERM);
79072805
LW
4881
4882 case KEY_send:
a0d0e21e 4883 LOP(OP_SEND,XTERM);
79072805
LW
4884
4885 case KEY_setpgrp:
a0d0e21e 4886 LOP(OP_SETPGRP,XTERM);
79072805
LW
4887
4888 case KEY_setpriority:
a0d0e21e 4889 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
4890
4891 case KEY_sethostent:
ff68c719 4892 UNI(OP_SHOSTENT);
79072805
LW
4893
4894 case KEY_setnetent:
ff68c719 4895 UNI(OP_SNETENT);
79072805
LW
4896
4897 case KEY_setservent:
ff68c719 4898 UNI(OP_SSERVENT);
79072805
LW
4899
4900 case KEY_setprotoent:
ff68c719 4901 UNI(OP_SPROTOENT);
79072805
LW
4902
4903 case KEY_setpwent:
4904 FUN0(OP_SPWENT);
4905
4906 case KEY_setgrent:
4907 FUN0(OP_SGRENT);
4908
4909 case KEY_seekdir:
a0d0e21e 4910 LOP(OP_SEEKDIR,XTERM);
79072805
LW
4911
4912 case KEY_setsockopt:
a0d0e21e 4913 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
4914
4915 case KEY_shift:
6f33ba73 4916 UNIDOR(OP_SHIFT);
79072805
LW
4917
4918 case KEY_shmctl:
a0d0e21e 4919 LOP(OP_SHMCTL,XTERM);
79072805
LW
4920
4921 case KEY_shmget:
a0d0e21e 4922 LOP(OP_SHMGET,XTERM);
79072805
LW
4923
4924 case KEY_shmread:
a0d0e21e 4925 LOP(OP_SHMREAD,XTERM);
79072805
LW
4926
4927 case KEY_shmwrite:
a0d0e21e 4928 LOP(OP_SHMWRITE,XTERM);
79072805
LW
4929
4930 case KEY_shutdown:
a0d0e21e 4931 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
4932
4933 case KEY_sin:
4934 UNI(OP_SIN);
4935
4936 case KEY_sleep:
4937 UNI(OP_SLEEP);
4938
4939 case KEY_socket:
a0d0e21e 4940 LOP(OP_SOCKET,XTERM);
79072805
LW
4941
4942 case KEY_socketpair:
a0d0e21e 4943 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
4944
4945 case KEY_sort:
3280af22 4946 checkcomma(s,PL_tokenbuf,"subroutine name");
79072805
LW
4947 s = skipspace(s);
4948 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 4949 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 4950 PL_expect = XTERM;
15f0808c 4951 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 4952 LOP(OP_SORT,XREF);
79072805
LW
4953
4954 case KEY_split:
a0d0e21e 4955 LOP(OP_SPLIT,XTERM);
79072805
LW
4956
4957 case KEY_sprintf:
a0d0e21e 4958 LOP(OP_SPRINTF,XTERM);
79072805
LW
4959
4960 case KEY_splice:
a0d0e21e 4961 LOP(OP_SPLICE,XTERM);
79072805
LW
4962
4963 case KEY_sqrt:
4964 UNI(OP_SQRT);
4965
4966 case KEY_srand:
4967 UNI(OP_SRAND);
4968
4969 case KEY_stat:
4970 UNI(OP_STAT);
4971
4972 case KEY_study:
79072805
LW
4973 UNI(OP_STUDY);
4974
4975 case KEY_substr:
a0d0e21e 4976 LOP(OP_SUBSTR,XTERM);
79072805
LW
4977
4978 case KEY_format:
4979 case KEY_sub:
93a17b20 4980 really_sub:
09bef843 4981 {
3280af22 4982 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 4983 SSize_t tboffset = 0;
09bef843 4984 expectation attrful;
d731386a 4985 bool have_name, have_proto, bad_proto;
09bef843
SB
4986 int key = tmp;
4987
4988 s = skipspace(s);
4989
7e2040f0 4990 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
4991 (*s == ':' && s[1] == ':'))
4992 {
4993 PL_expect = XBLOCK;
4994 attrful = XATTRBLOCK;
b1b65b59
JH
4995 /* remember buffer pos'n for later force_word */
4996 tboffset = s - PL_oldbufptr;
09bef843
SB
4997 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4998 if (strchr(tmpbuf, ':'))
4999 sv_setpv(PL_subname, tmpbuf);
5000 else {
5001 sv_setsv(PL_subname,PL_curstname);
5002 sv_catpvn(PL_subname,"::",2);
5003 sv_catpvn(PL_subname,tmpbuf,len);
5004 }
5005 s = skipspace(d);
5006 have_name = TRUE;
5007 }
463ee0b2 5008 else {
09bef843
SB
5009 if (key == KEY_my)
5010 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5011 PL_expect = XTERMBLOCK;
5012 attrful = XATTRTERM;
5013 sv_setpv(PL_subname,"?");
5014 have_name = FALSE;
463ee0b2 5015 }
4633a7c4 5016
09bef843
SB
5017 if (key == KEY_format) {
5018 if (*s == '=')
5019 PL_lex_formbrack = PL_lex_brackets + 1;
5020 if (have_name)
b1b65b59
JH
5021 (void) force_word(PL_oldbufptr + tboffset, WORD,
5022 FALSE, TRUE, TRUE);
09bef843
SB
5023 OPERATOR(FORMAT);
5024 }
79072805 5025
09bef843
SB
5026 /* Look for a prototype */
5027 if (*s == '(') {
5028 char *p;
5029
5030 s = scan_str(s,FALSE,FALSE);
37fd879b 5031 if (!s)
09bef843 5032 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 5033 /* strip spaces and check for bad characters */
09bef843
SB
5034 d = SvPVX(PL_lex_stuff);
5035 tmp = 0;
d731386a 5036 bad_proto = FALSE;
09bef843 5037 for (p = d; *p; ++p) {
d37a9538 5038 if (!isSPACE(*p)) {
09bef843 5039 d[tmp++] = *p;
d37a9538
ST
5040 if (!strchr("$@%*;[]&\\", *p))
5041 bad_proto = TRUE;
5042 }
09bef843
SB
5043 }
5044 d[tmp] = '\0';
420cdfc1 5045 if (bad_proto && ckWARN(WARN_SYNTAX))
9014280d 5046 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
35c1215d
NC
5047 "Illegal character in prototype for %"SVf" : %s",
5048 PL_subname, d);
09bef843
SB
5049 SvCUR(PL_lex_stuff) = tmp;
5050 have_proto = TRUE;
68dc0745 5051
09bef843 5052 s = skipspace(s);
4633a7c4 5053 }
09bef843
SB
5054 else
5055 have_proto = FALSE;
5056
5057 if (*s == ':' && s[1] != ':')
5058 PL_expect = attrful;
904d85c5
RGS
5059 else if (!have_name && *s != '{' && key == KEY_sub)
5060 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
09bef843
SB
5061
5062 if (have_proto) {
b1b65b59
JH
5063 PL_nextval[PL_nexttoke].opval =
5064 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
09bef843
SB
5065 PL_lex_stuff = Nullsv;
5066 force_next(THING);
68dc0745 5067 }
09bef843 5068 if (!have_name) {
c99da370
JH
5069 sv_setpv(PL_subname,
5070 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
09bef843 5071 TOKEN(ANONSUB);
4633a7c4 5072 }
b1b65b59
JH
5073 (void) force_word(PL_oldbufptr + tboffset, WORD,
5074 FALSE, TRUE, TRUE);
09bef843
SB
5075 if (key == KEY_my)
5076 TOKEN(MYSUB);
5077 TOKEN(SUB);
4633a7c4 5078 }
79072805
LW
5079
5080 case KEY_system:
5081 set_csh();
a0d0e21e 5082 LOP(OP_SYSTEM,XREF);
79072805
LW
5083
5084 case KEY_symlink:
a0d0e21e 5085 LOP(OP_SYMLINK,XTERM);
79072805
LW
5086
5087 case KEY_syscall:
a0d0e21e 5088 LOP(OP_SYSCALL,XTERM);
79072805 5089
c07a80fd 5090 case KEY_sysopen:
5091 LOP(OP_SYSOPEN,XTERM);
5092
137443ea 5093 case KEY_sysseek:
5094 LOP(OP_SYSSEEK,XTERM);
5095
79072805 5096 case KEY_sysread:
a0d0e21e 5097 LOP(OP_SYSREAD,XTERM);
79072805
LW
5098
5099 case KEY_syswrite:
a0d0e21e 5100 LOP(OP_SYSWRITE,XTERM);
79072805
LW
5101
5102 case KEY_tr:
5103 s = scan_trans(s);
5104 TERM(sublex_start());
5105
5106 case KEY_tell:
5107 UNI(OP_TELL);
5108
5109 case KEY_telldir:
5110 UNI(OP_TELLDIR);
5111
463ee0b2 5112 case KEY_tie:
a0d0e21e 5113 LOP(OP_TIE,XTERM);
463ee0b2 5114
c07a80fd 5115 case KEY_tied:
5116 UNI(OP_TIED);
5117
79072805
LW
5118 case KEY_time:
5119 FUN0(OP_TIME);
5120
5121 case KEY_times:
5122 FUN0(OP_TMS);
5123
5124 case KEY_truncate:
a0d0e21e 5125 LOP(OP_TRUNCATE,XTERM);
79072805
LW
5126
5127 case KEY_uc:
5128 UNI(OP_UC);
5129
5130 case KEY_ucfirst:
5131 UNI(OP_UCFIRST);
5132
463ee0b2
LW
5133 case KEY_untie:
5134 UNI(OP_UNTIE);
5135
79072805 5136 case KEY_until:
57843af0 5137 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5138 OPERATOR(UNTIL);
5139
5140 case KEY_unless:
57843af0 5141 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5142 OPERATOR(UNLESS);
5143
5144 case KEY_unlink:
a0d0e21e 5145 LOP(OP_UNLINK,XTERM);
79072805
LW
5146
5147 case KEY_undef:
6f33ba73 5148 UNIDOR(OP_UNDEF);
79072805
LW
5149
5150 case KEY_unpack:
a0d0e21e 5151 LOP(OP_UNPACK,XTERM);
79072805
LW
5152
5153 case KEY_utime:
a0d0e21e 5154 LOP(OP_UTIME,XTERM);
79072805
LW
5155
5156 case KEY_umask:
6f33ba73 5157 UNIDOR(OP_UMASK);
79072805
LW
5158
5159 case KEY_unshift:
a0d0e21e
LW
5160 LOP(OP_UNSHIFT,XTERM);
5161
5162 case KEY_use:
3280af22 5163 if (PL_expect != XSTATE)
a0d0e21e 5164 yyerror("\"use\" not allowed in expression");
89bfa8cd 5165 s = skipspace(s);
a7cb1f99 5166 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
e759cc13 5167 s = force_version(s, TRUE);
a7cb1f99 5168 if (*s == ';' || (s = skipspace(s), *s == ';')) {
3280af22 5169 PL_nextval[PL_nexttoke].opval = Nullop;
89bfa8cd 5170 force_next(WORD);
5171 }
e759cc13
RGS
5172 else if (*s == 'v') {
5173 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5174 s = force_version(s, FALSE);
5175 }
89bfa8cd 5176 }
5177 else {
5178 s = force_word(s,WORD,FALSE,TRUE,FALSE);
e759cc13 5179 s = force_version(s, FALSE);
89bfa8cd 5180 }
a0d0e21e
LW
5181 yylval.ival = 1;
5182 OPERATOR(USE);
79072805
LW
5183
5184 case KEY_values:
5185 UNI(OP_VALUES);
5186
5187 case KEY_vec:
a0d0e21e 5188 LOP(OP_VEC,XTERM);
79072805
LW
5189
5190 case KEY_while:
57843af0 5191 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5192 OPERATOR(WHILE);
5193
5194 case KEY_warn:
3280af22 5195 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 5196 LOP(OP_WARN,XTERM);
79072805
LW
5197
5198 case KEY_wait:
5199 FUN0(OP_WAIT);
5200
5201 case KEY_waitpid:
a0d0e21e 5202 LOP(OP_WAITPID,XTERM);
79072805
LW
5203
5204 case KEY_wantarray:
5205 FUN0(OP_WANTARRAY);
5206
5207 case KEY_write:
9d116dd7
JH
5208#ifdef EBCDIC
5209 {
df3728a2
JH
5210 char ctl_l[2];
5211 ctl_l[0] = toCTRL('L');
5212 ctl_l[1] = '\0';
9d116dd7
JH
5213 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5214 }
5215#else
5216 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5217#endif
79072805
LW
5218 UNI(OP_ENTERWRITE);
5219
5220 case KEY_x:
3280af22 5221 if (PL_expect == XOPERATOR)
79072805
LW
5222 Mop(OP_REPEAT);
5223 check_uni();
5224 goto just_a_word;
5225
a0d0e21e
LW
5226 case KEY_xor:
5227 yylval.ival = OP_XOR;
5228 OPERATOR(OROP);
5229
79072805
LW
5230 case KEY_y:
5231 s = scan_trans(s);
5232 TERM(sublex_start());
5233 }
49dc05e3 5234 }}
79072805 5235}
bf4acbe4
GS
5236#ifdef __SC__
5237#pragma segment Main
5238#endif
79072805 5239
e930465f
JH
5240static int
5241S_pending_ident(pTHX)
8eceec63
SC
5242{
5243 register char *d;
a55b55d8 5244 register I32 tmp = 0;
8eceec63
SC
5245 /* pit holds the identifier we read and pending_ident is reset */
5246 char pit = PL_pending_ident;
5247 PL_pending_ident = 0;
5248
5249 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5250 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
5251
5252 /* if we're in a my(), we can't allow dynamics here.
5253 $foo'bar has already been turned into $foo::bar, so
5254 just check for colons.
5255
5256 if it's a legal name, the OP is a PADANY.
5257 */
5258 if (PL_in_my) {
5259 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5260 if (strchr(PL_tokenbuf,':'))
5261 yyerror(Perl_form(aTHX_ "No package name allowed for "
5262 "variable %s in \"our\"",
5263 PL_tokenbuf));
dd2155a4 5264 tmp = allocmy(PL_tokenbuf);
8eceec63
SC
5265 }
5266 else {
5267 if (strchr(PL_tokenbuf,':'))
5268 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5269
5270 yylval.opval = newOP(OP_PADANY, 0);
dd2155a4 5271 yylval.opval->op_targ = allocmy(PL_tokenbuf);
8eceec63
SC
5272 return PRIVATEREF;
5273 }
5274 }
5275
5276 /*
5277 build the ops for accesses to a my() variable.
5278
5279 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5280 then used in a comparison. This catches most, but not
5281 all cases. For instance, it catches
5282 sort { my($a); $a <=> $b }
5283 but not
5284 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5285 (although why you'd do that is anyone's guess).
5286 */
5287
5288 if (!strchr(PL_tokenbuf,':')) {
8716503d
DM
5289 if (!PL_in_my)
5290 tmp = pad_findmy(PL_tokenbuf);
5291 if (tmp != NOT_IN_PAD) {
8eceec63 5292 /* might be an "our" variable" */
dd2155a4 5293 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
8eceec63 5294 /* build ops for a bareword */
dd2155a4 5295 SV *sym = newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)), 0);
8eceec63
SC
5296 sv_catpvn(sym, "::", 2);
5297 sv_catpv(sym, PL_tokenbuf+1);
5298 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5299 yylval.opval->op_private = OPpCONST_ENTERED;
5300 gv_fetchpv(SvPVX(sym),
5301 (PL_in_eval
5302 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 5303 : GV_ADDMULTI
8eceec63
SC
5304 ),
5305 ((PL_tokenbuf[0] == '$') ? SVt_PV
5306 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5307 : SVt_PVHV));
5308 return WORD;
5309 }
5310
5311 /* if it's a sort block and they're naming $a or $b */
5312 if (PL_last_lop_op == OP_SORT &&
5313 PL_tokenbuf[0] == '$' &&
5314 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5315 && !PL_tokenbuf[2])
5316 {
5317 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5318 d < PL_bufend && *d != '\n';
5319 d++)
5320 {
5321 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5322 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5323 PL_tokenbuf);
5324 }
5325 }
5326 }
5327
5328 yylval.opval = newOP(OP_PADANY, 0);
5329 yylval.opval->op_targ = tmp;
5330 return PRIVATEREF;
5331 }
5332 }
5333
5334 /*
5335 Whine if they've said @foo in a doublequoted string,
5336 and @foo isn't a variable we can find in the symbol
5337 table.
5338 */
5339 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5340 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
5341 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5342 && ckWARN(WARN_AMBIGUOUS))
5343 {
5344 /* Downgraded from fatal to warning 20000522 mjd */
9014280d 5345 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8eceec63
SC
5346 "Possible unintended interpolation of %s in string",
5347 PL_tokenbuf);
5348 }
5349 }
5350
5351 /* build ops for a bareword */
5352 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5353 yylval.opval->op_private = OPpCONST_ENTERED;
5354 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5355 ((PL_tokenbuf[0] == '$') ? SVt_PV
5356 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5357 : SVt_PVHV));
5358 return WORD;
5359}
5360
79072805 5361I32
864dbfa3 5362Perl_keyword(pTHX_ register char *d, I32 len)
79072805
LW
5363{
5364 switch (*d) {
5365 case '_':
5366 if (d[1] == '_') {
a0d0e21e 5367 if (strEQ(d,"__FILE__")) return -KEY___FILE__;
68dc0745 5368 if (strEQ(d,"__LINE__")) return -KEY___LINE__;
5369 if (strEQ(d,"__PACKAGE__")) return -KEY___PACKAGE__;
e50aee73 5370 if (strEQ(d,"__DATA__")) return KEY___DATA__;
79072805
LW
5371 if (strEQ(d,"__END__")) return KEY___END__;
5372 }
5373 break;
8990e307
LW
5374 case 'A':
5375 if (strEQ(d,"AUTOLOAD")) return KEY_AUTOLOAD;
5376 break;
79072805 5377 case 'a':
463ee0b2
LW
5378 switch (len) {
5379 case 3:
a0d0e21e
LW
5380 if (strEQ(d,"and")) return -KEY_and;
5381 if (strEQ(d,"abs")) return -KEY_abs;
85e6fe83 5382 break;
463ee0b2 5383 case 5:
a0d0e21e
LW
5384 if (strEQ(d,"alarm")) return -KEY_alarm;
5385 if (strEQ(d,"atan2")) return -KEY_atan2;
463ee0b2
LW
5386 break;
5387 case 6:
a0d0e21e 5388 if (strEQ(d,"accept")) return -KEY_accept;
463ee0b2
LW
5389 break;
5390 }
79072805
LW
5391 break;
5392 case 'B':
5393 if (strEQ(d,"BEGIN")) return KEY_BEGIN;
93a17b20 5394 break;
79072805 5395 case 'b':
a0d0e21e
LW
5396 if (strEQ(d,"bless")) return -KEY_bless;
5397 if (strEQ(d,"bind")) return -KEY_bind;
5398 if (strEQ(d,"binmode")) return -KEY_binmode;
5399 break;
5400 case 'C':
5401 if (strEQ(d,"CORE")) return -KEY_CORE;
7d30b5c4 5402 if (strEQ(d,"CHECK")) return KEY_CHECK;
79072805
LW
5403 break;
5404 case 'c':
5405 switch (len) {
5406 case 3:
a0d0e21e
LW
5407 if (strEQ(d,"cmp")) return -KEY_cmp;
5408 if (strEQ(d,"chr")) return -KEY_chr;
5409 if (strEQ(d,"cos")) return -KEY_cos;
79072805
LW
5410 break;
5411 case 4:
77bc9082 5412 if (strEQ(d,"chop")) return -KEY_chop;
79072805
LW
5413 break;
5414 case 5:
a0d0e21e
LW
5415 if (strEQ(d,"close")) return -KEY_close;
5416 if (strEQ(d,"chdir")) return -KEY_chdir;
77bc9082 5417 if (strEQ(d,"chomp")) return -KEY_chomp;
a0d0e21e
LW
5418 if (strEQ(d,"chmod")) return -KEY_chmod;
5419 if (strEQ(d,"chown")) return -KEY_chown;
5420 if (strEQ(d,"crypt")) return -KEY_crypt;
79072805
LW
5421 break;
5422 case 6:
a0d0e21e
LW
5423 if (strEQ(d,"chroot")) return -KEY_chroot;
5424 if (strEQ(d,"caller")) return -KEY_caller;
79072805
LW
5425 break;
5426 case 7:
a0d0e21e 5427 if (strEQ(d,"connect")) return -KEY_connect;
79072805
LW
5428 break;
5429 case 8:
a0d0e21e
LW
5430 if (strEQ(d,"closedir")) return -KEY_closedir;
5431 if (strEQ(d,"continue")) return -KEY_continue;
79072805
LW
5432 break;
5433 }
5434 break;
ed6116ce
LW
5435 case 'D':
5436 if (strEQ(d,"DESTROY")) return KEY_DESTROY;
5437 break;
79072805
LW
5438 case 'd':
5439 switch (len) {
5440 case 2:
5441 if (strEQ(d,"do")) return KEY_do;
5442 break;
5443 case 3:
a0d0e21e 5444 if (strEQ(d,"die")) return -KEY_die;
79072805
LW
5445 break;
5446 case 4:
a0d0e21e 5447 if (strEQ(d,"dump")) return -KEY_dump;
79072805
LW
5448 break;
5449 case 6:
5450 if (strEQ(d,"delete")) return KEY_delete;
5451 break;
5452 case 7:
5453 if (strEQ(d,"defined")) return KEY_defined;
a0d0e21e 5454 if (strEQ(d,"dbmopen")) return -KEY_dbmopen;
79072805
LW
5455 break;
5456 case 8:
a0d0e21e 5457 if (strEQ(d,"dbmclose")) return -KEY_dbmclose;
79072805
LW
5458 break;
5459 }
5460 break;
5461 case 'E':
79072805
LW
5462 if (strEQ(d,"END")) return KEY_END;
5463 break;
5464 case 'e':
5465 switch (len) {
5466 case 2:
a0d0e21e 5467 if (strEQ(d,"eq")) return -KEY_eq;
79072805
LW
5468 break;
5469 case 3:
a0d0e21e 5470 if (strEQ(d,"eof")) return -KEY_eof;
c963b151 5471 if (strEQ(d,"err")) return -KEY_err;
a0d0e21e 5472 if (strEQ(d,"exp")) return -KEY_exp;
79072805
LW
5473 break;
5474 case 4:
5475 if (strEQ(d,"else")) return KEY_else;
a0d0e21e 5476 if (strEQ(d,"exit")) return -KEY_exit;
79072805 5477 if (strEQ(d,"eval")) return KEY_eval;
a0d0e21e 5478 if (strEQ(d,"exec")) return -KEY_exec;
3a6a8333 5479 if (strEQ(d,"each")) return -KEY_each;
79072805
LW
5480 break;
5481 case 5:
5482 if (strEQ(d,"elsif")) return KEY_elsif;
5483 break;
a0d0e21e
LW
5484 case 6:
5485 if (strEQ(d,"exists")) return KEY_exists;
cea2e8a9 5486 if (strEQ(d,"elseif")) Perl_warn(aTHX_ "elseif should be elsif");
a0d0e21e 5487 break;
79072805 5488 case 8:
a0d0e21e
LW
5489 if (strEQ(d,"endgrent")) return -KEY_endgrent;
5490 if (strEQ(d,"endpwent")) return -KEY_endpwent;
79072805
LW
5491 break;
5492 case 9:
a0d0e21e 5493 if (strEQ(d,"endnetent")) return -KEY_endnetent;
79072805
LW
5494 break;
5495 case 10:
a0d0e21e
LW
5496 if (strEQ(d,"endhostent")) return -KEY_endhostent;
5497 if (strEQ(d,"endservent")) return -KEY_endservent;
79072805
LW
5498 break;
5499 case 11:
a0d0e21e 5500 if (strEQ(d,"endprotoent")) return -KEY_endprotoent;
79072805 5501 break;
a687059c 5502 }
a687059c 5503 break;
79072805
LW
5504 case 'f':
5505 switch (len) {
5506 case 3:
5507 if (strEQ(d,"for")) return KEY_for;
5508 break;
5509 case 4:
a0d0e21e 5510 if (strEQ(d,"fork")) return -KEY_fork;
79072805
LW
5511 break;
5512 case 5:
a0d0e21e
LW
5513 if (strEQ(d,"fcntl")) return -KEY_fcntl;
5514 if (strEQ(d,"flock")) return -KEY_flock;
79072805
LW
5515 break;
5516 case 6:
5517 if (strEQ(d,"format")) return KEY_format;
a0d0e21e 5518 if (strEQ(d,"fileno")) return -KEY_fileno;
79072805
LW
5519 break;
5520 case 7:
5521 if (strEQ(d,"foreach")) return KEY_foreach;
5522 break;
5523 case 8:
a0d0e21e 5524 if (strEQ(d,"formline")) return -KEY_formline;
79072805 5525 break;
378cc40b 5526 }
a687059c 5527 break;
79072805 5528 case 'g':
a687059c
LW
5529 if (strnEQ(d,"get",3)) {
5530 d += 3;
5531 if (*d == 'p') {
79072805
LW
5532 switch (len) {
5533 case 7:
a0d0e21e
LW
5534 if (strEQ(d,"ppid")) return -KEY_getppid;
5535 if (strEQ(d,"pgrp")) return -KEY_getpgrp;
79072805
LW
5536 break;
5537 case 8:
a0d0e21e
LW
5538 if (strEQ(d,"pwent")) return -KEY_getpwent;
5539 if (strEQ(d,"pwnam")) return -KEY_getpwnam;
5540 if (strEQ(d,"pwuid")) return -KEY_getpwuid;
79072805
LW
5541 break;
5542 case 11:
a0d0e21e
LW
5543 if (strEQ(d,"peername")) return -KEY_getpeername;
5544 if (strEQ(d,"protoent")) return -KEY_getprotoent;
5545 if (strEQ(d,"priority")) return -KEY_getpriority;
79072805
LW
5546 break;
5547 case 14:
a0d0e21e 5548 if (strEQ(d,"protobyname")) return -KEY_getprotobyname;
79072805
LW
5549 break;
5550 case 16:
a0d0e21e 5551 if (strEQ(d,"protobynumber"))return -KEY_getprotobynumber;
79072805
LW
5552 break;
5553 }
a687059c
LW
5554 }
5555 else if (*d == 'h') {
a0d0e21e
LW
5556 if (strEQ(d,"hostbyname")) return -KEY_gethostbyname;
5557 if (strEQ(d,"hostbyaddr")) return -KEY_gethostbyaddr;
5558 if (strEQ(d,"hostent")) return -KEY_gethostent;
a687059c
LW
5559 }
5560 else if (*d == 'n') {
a0d0e21e
LW
5561 if (strEQ(d,"netbyname")) return -KEY_getnetbyname;
5562 if (strEQ(d,"netbyaddr")) return -KEY_getnetbyaddr;
5563 if (strEQ(d,"netent")) return -KEY_getnetent;
a687059c
LW
5564 }
5565 else if (*d == 's') {
a0d0e21e
LW
5566 if (strEQ(d,"servbyname")) return -KEY_getservbyname;
5567 if (strEQ(d,"servbyport")) return -KEY_getservbyport;
5568 if (strEQ(d,"servent")) return -KEY_getservent;
5569 if (strEQ(d,"sockname")) return -KEY_getsockname;
5570 if (strEQ(d,"sockopt")) return -KEY_getsockopt;
a687059c
LW
5571 }
5572 else if (*d == 'g') {
a0d0e21e
LW
5573 if (strEQ(d,"grent")) return -KEY_getgrent;
5574 if (strEQ(d,"grnam")) return -KEY_getgrnam;
5575 if (strEQ(d,"grgid")) return -KEY_getgrgid;
a687059c
LW
5576 }
5577 else if (*d == 'l') {
a0d0e21e 5578 if (strEQ(d,"login")) return -KEY_getlogin;
a687059c 5579 }
a0d0e21e 5580 else if (strEQ(d,"c")) return -KEY_getc;
79072805 5581 break;
a687059c 5582 }
79072805
LW
5583 switch (len) {
5584 case 2:
a0d0e21e
LW
5585 if (strEQ(d,"gt")) return -KEY_gt;
5586 if (strEQ(d,"ge")) return -KEY_ge;
79072805
LW
5587 break;
5588 case 4:
5589 if (strEQ(d,"grep")) return KEY_grep;
5590 if (strEQ(d,"goto")) return KEY_goto;
fb73857a 5591 if (strEQ(d,"glob")) return KEY_glob;
79072805
LW
5592 break;
5593 case 6:
a0d0e21e 5594 if (strEQ(d,"gmtime")) return -KEY_gmtime;
79072805 5595 break;
378cc40b 5596 }
a687059c 5597 break;
79072805 5598 case 'h':
a0d0e21e 5599 if (strEQ(d,"hex")) return -KEY_hex;
a687059c 5600 break;
7d07dbc2
MB
5601 case 'I':
5602 if (strEQ(d,"INIT")) return KEY_INIT;
5603 break;
79072805
LW
5604 case 'i':
5605 switch (len) {
5606 case 2:
5607 if (strEQ(d,"if")) return KEY_if;
5608 break;
5609 case 3:
a0d0e21e 5610 if (strEQ(d,"int")) return -KEY_int;
79072805
LW
5611 break;
5612 case 5:
a0d0e21e
LW
5613 if (strEQ(d,"index")) return -KEY_index;
5614 if (strEQ(d,"ioctl")) return -KEY_ioctl;
79072805
LW
5615 break;
5616 }
a687059c 5617 break;
79072805 5618 case 'j':
a0d0e21e 5619 if (strEQ(d,"join")) return -KEY_join;
a687059c 5620 break;
79072805
LW
5621 case 'k':
5622 if (len == 4) {
3a6a8333 5623 if (strEQ(d,"keys")) return -KEY_keys;
a0d0e21e 5624 if (strEQ(d,"kill")) return -KEY_kill;
663a0e37 5625 }
79072805 5626 break;
79072805
LW
5627 case 'l':
5628 switch (len) {
5629 case 2:
a0d0e21e
LW
5630 if (strEQ(d,"lt")) return -KEY_lt;
5631 if (strEQ(d,"le")) return -KEY_le;
5632 if (strEQ(d,"lc")) return -KEY_lc;
79072805
LW
5633 break;
5634 case 3:
a0d0e21e 5635 if (strEQ(d,"log")) return -KEY_log;
79072805
LW
5636 break;
5637 case 4:
5638 if (strEQ(d,"last")) return KEY_last;
a0d0e21e 5639 if (strEQ(d,"link")) return -KEY_link;
c0329465 5640 if (strEQ(d,"lock")) return -KEY_lock;
395c3793 5641 break;
79072805
LW
5642 case 5:
5643 if (strEQ(d,"local")) return KEY_local;
a0d0e21e 5644 if (strEQ(d,"lstat")) return -KEY_lstat;
79072805
LW
5645 break;
5646 case 6:
a0d0e21e
LW
5647 if (strEQ(d,"length")) return -KEY_length;
5648 if (strEQ(d,"listen")) return -KEY_listen;
79072805
LW
5649 break;
5650 case 7:
a0d0e21e 5651 if (strEQ(d,"lcfirst")) return -KEY_lcfirst;
79072805
LW
5652 break;
5653 case 9:
a0d0e21e 5654 if (strEQ(d,"localtime")) return -KEY_localtime;
395c3793
LW
5655 break;
5656 }
a687059c 5657 break;
79072805
LW
5658 case 'm':
5659 switch (len) {
5660 case 1: return KEY_m;
93a17b20
LW
5661 case 2:
5662 if (strEQ(d,"my")) return KEY_my;
5663 break;
a0d0e21e
LW
5664 case 3:
5665 if (strEQ(d,"map")) return KEY_map;
5666 break;
79072805 5667 case 5:
a0d0e21e 5668 if (strEQ(d,"mkdir")) return -KEY_mkdir;
79072805
LW
5669 break;
5670 case 6:
a0d0e21e
LW
5671 if (strEQ(d,"msgctl")) return -KEY_msgctl;
5672 if (strEQ(d,"msgget")) return -KEY_msgget;
5673 if (strEQ(d,"msgrcv")) return -KEY_msgrcv;
5674 if (strEQ(d,"msgsnd")) return -KEY_msgsnd;
79072805
LW
5675 break;
5676 }
a687059c 5677 break;
79072805
LW
5678 case 'n':
5679 if (strEQ(d,"next")) return KEY_next;
a0d0e21e
LW
5680 if (strEQ(d,"ne")) return -KEY_ne;
5681 if (strEQ(d,"not")) return -KEY_not;
5682 if (strEQ(d,"no")) return KEY_no;
a687059c 5683 break;
79072805
LW
5684 case 'o':
5685 switch (len) {
463ee0b2 5686 case 2:
a0d0e21e 5687 if (strEQ(d,"or")) return -KEY_or;
463ee0b2 5688 break;
79072805 5689 case 3:
a0d0e21e
LW
5690 if (strEQ(d,"ord")) return -KEY_ord;
5691 if (strEQ(d,"oct")) return -KEY_oct;
77ca0c92 5692 if (strEQ(d,"our")) return KEY_our;
79072805
LW
5693 break;
5694 case 4:
a0d0e21e 5695 if (strEQ(d,"open")) return -KEY_open;
79072805
LW
5696 break;
5697 case 7:
a0d0e21e 5698 if (strEQ(d,"opendir")) return -KEY_opendir;
79072805 5699 break;
fe14fcc3 5700 }
a687059c 5701 break;
79072805
LW
5702 case 'p':
5703 switch (len) {
5704 case 3:
4e553d73 5705 if (strEQ(d,"pop")) return -KEY_pop;
a0d0e21e 5706 if (strEQ(d,"pos")) return KEY_pos;
79072805
LW
5707 break;
5708 case 4:
3a6a8333 5709 if (strEQ(d,"push")) return -KEY_push;
a0d0e21e
LW
5710 if (strEQ(d,"pack")) return -KEY_pack;
5711 if (strEQ(d,"pipe")) return -KEY_pipe;
79072805
LW
5712 break;
5713 case 5:
5714 if (strEQ(d,"print")) return KEY_print;
5715 break;
5716 case 6:
5717 if (strEQ(d,"printf")) return KEY_printf;
5718 break;
5719 case 7:
5720 if (strEQ(d,"package")) return KEY_package;
5721 break;
c07a80fd 5722 case 9:
5723 if (strEQ(d,"prototype")) return KEY_prototype;
663a0e37 5724 }
79072805
LW
5725 break;
5726 case 'q':
5727 if (len <= 2) {
5728 if (strEQ(d,"q")) return KEY_q;
8782bef2 5729 if (strEQ(d,"qr")) return KEY_qr;
79072805 5730 if (strEQ(d,"qq")) return KEY_qq;
8990e307 5731 if (strEQ(d,"qw")) return KEY_qw;
79072805 5732 if (strEQ(d,"qx")) return KEY_qx;
663a0e37 5733 }
a0d0e21e 5734 else if (strEQ(d,"quotemeta")) return -KEY_quotemeta;
79072805
LW
5735 break;
5736 case 'r':
5737 switch (len) {
5738 case 3:
a0d0e21e 5739 if (strEQ(d,"ref")) return -KEY_ref;
79072805
LW
5740 break;
5741 case 4:
a0d0e21e
LW
5742 if (strEQ(d,"read")) return -KEY_read;
5743 if (strEQ(d,"rand")) return -KEY_rand;
5744 if (strEQ(d,"recv")) return -KEY_recv;
79072805
LW
5745 if (strEQ(d,"redo")) return KEY_redo;
5746 break;
5747 case 5:
a0d0e21e
LW
5748 if (strEQ(d,"rmdir")) return -KEY_rmdir;
5749 if (strEQ(d,"reset")) return -KEY_reset;
79072805
LW
5750 break;
5751 case 6:
5752 if (strEQ(d,"return")) return KEY_return;
a0d0e21e
LW
5753 if (strEQ(d,"rename")) return -KEY_rename;
5754 if (strEQ(d,"rindex")) return -KEY_rindex;
79072805
LW
5755 break;
5756 case 7:
ec4ab249 5757 if (strEQ(d,"require")) return KEY_require;
a0d0e21e
LW
5758 if (strEQ(d,"reverse")) return -KEY_reverse;
5759 if (strEQ(d,"readdir")) return -KEY_readdir;
79072805
LW
5760 break;
5761 case 8:
a0d0e21e
LW
5762 if (strEQ(d,"readlink")) return -KEY_readlink;
5763 if (strEQ(d,"readline")) return -KEY_readline;
5764 if (strEQ(d,"readpipe")) return -KEY_readpipe;
79072805
LW
5765 break;
5766 case 9:
a0d0e21e 5767 if (strEQ(d,"rewinddir")) return -KEY_rewinddir;
79072805 5768 break;
a687059c 5769 }
79072805
LW
5770 break;
5771 case 's':
a687059c 5772 switch (d[1]) {
79072805 5773 case 0: return KEY_s;
a687059c 5774 case 'c':
79072805 5775 if (strEQ(d,"scalar")) return KEY_scalar;
a687059c
LW
5776 break;
5777 case 'e':
79072805
LW
5778 switch (len) {
5779 case 4:
a0d0e21e
LW
5780 if (strEQ(d,"seek")) return -KEY_seek;
5781 if (strEQ(d,"send")) return -KEY_send;
79072805
LW
5782 break;
5783 case 5:
a0d0e21e 5784 if (strEQ(d,"semop")) return -KEY_semop;
79072805
LW
5785 break;
5786 case 6:
a0d0e21e
LW
5787 if (strEQ(d,"select")) return -KEY_select;
5788 if (strEQ(d,"semctl")) return -KEY_semctl;
5789 if (strEQ(d,"semget")) return -KEY_semget;
79072805
LW
5790 break;
5791 case 7:
a0d0e21e
LW
5792 if (strEQ(d,"setpgrp")) return -KEY_setpgrp;
5793 if (strEQ(d,"seekdir")) return -KEY_seekdir;
79072805
LW
5794 break;
5795 case 8:
a0d0e21e
LW
5796 if (strEQ(d,"setpwent")) return -KEY_setpwent;
5797 if (strEQ(d,"setgrent")) return -KEY_setgrent;
79072805
LW
5798 break;
5799 case 9:
a0d0e21e 5800 if (strEQ(d,"setnetent")) return -KEY_setnetent;
79072805
LW
5801 break;
5802 case 10:
a0d0e21e
LW
5803 if (strEQ(d,"setsockopt")) return -KEY_setsockopt;
5804 if (strEQ(d,"sethostent")) return -KEY_sethostent;
5805 if (strEQ(d,"setservent")) return -KEY_setservent;
79072805
LW
5806 break;
5807 case 11:
a0d0e21e
LW
5808 if (strEQ(d,"setpriority")) return -KEY_setpriority;
5809 if (strEQ(d,"setprotoent")) return -KEY_setprotoent;
79072805
LW
5810 break;
5811 }
a687059c
LW
5812 break;
5813 case 'h':
79072805
LW
5814 switch (len) {
5815 case 5:
3a6a8333 5816 if (strEQ(d,"shift")) return -KEY_shift;
79072805
LW
5817 break;
5818 case 6:
a0d0e21e
LW
5819 if (strEQ(d,"shmctl")) return -KEY_shmctl;
5820 if (strEQ(d,"shmget")) return -KEY_shmget;
79072805
LW
5821 break;
5822 case 7:
a0d0e21e 5823 if (strEQ(d,"shmread")) return -KEY_shmread;
79072805
LW
5824 break;
5825 case 8:
a0d0e21e
LW
5826 if (strEQ(d,"shmwrite")) return -KEY_shmwrite;
5827 if (strEQ(d,"shutdown")) return -KEY_shutdown;
79072805
LW
5828 break;
5829 }
a687059c
LW
5830 break;
5831 case 'i':
a0d0e21e 5832 if (strEQ(d,"sin")) return -KEY_sin;
a687059c
LW
5833 break;
5834 case 'l':
a0d0e21e 5835 if (strEQ(d,"sleep")) return -KEY_sleep;
a687059c
LW
5836 break;
5837 case 'o':
79072805 5838 if (strEQ(d,"sort")) return KEY_sort;
a0d0e21e
LW
5839 if (strEQ(d,"socket")) return -KEY_socket;
5840 if (strEQ(d,"socketpair")) return -KEY_socketpair;
a687059c
LW
5841 break;
5842 case 'p':
79072805 5843 if (strEQ(d,"split")) return KEY_split;
a0d0e21e 5844 if (strEQ(d,"sprintf")) return -KEY_sprintf;
3a6a8333 5845 if (strEQ(d,"splice")) return -KEY_splice;
a687059c
LW
5846 break;
5847 case 'q':
a0d0e21e 5848 if (strEQ(d,"sqrt")) return -KEY_sqrt;
a687059c
LW
5849 break;
5850 case 'r':
a0d0e21e 5851 if (strEQ(d,"srand")) return -KEY_srand;
a687059c
LW
5852 break;
5853 case 't':
a0d0e21e 5854 if (strEQ(d,"stat")) return -KEY_stat;
79072805 5855 if (strEQ(d,"study")) return KEY_study;
a687059c
LW
5856 break;
5857 case 'u':
a0d0e21e 5858 if (strEQ(d,"substr")) return -KEY_substr;
79072805 5859 if (strEQ(d,"sub")) return KEY_sub;
a687059c
LW
5860 break;
5861 case 'y':
79072805
LW
5862 switch (len) {
5863 case 6:
a0d0e21e 5864 if (strEQ(d,"system")) return -KEY_system;
79072805
LW
5865 break;
5866 case 7:
a0d0e21e
LW
5867 if (strEQ(d,"symlink")) return -KEY_symlink;
5868 if (strEQ(d,"syscall")) return -KEY_syscall;
137443ea 5869 if (strEQ(d,"sysopen")) return -KEY_sysopen;
5870 if (strEQ(d,"sysread")) return -KEY_sysread;
5871 if (strEQ(d,"sysseek")) return -KEY_sysseek;
79072805
LW
5872 break;
5873 case 8:
a0d0e21e 5874 if (strEQ(d,"syswrite")) return -KEY_syswrite;
79072805 5875 break;
a687059c 5876 }
a687059c
LW
5877 break;
5878 }
5879 break;
79072805
LW
5880 case 't':
5881 switch (len) {
5882 case 2:
5883 if (strEQ(d,"tr")) return KEY_tr;
5884 break;
463ee0b2
LW
5885 case 3:
5886 if (strEQ(d,"tie")) return KEY_tie;
5887 break;
79072805 5888 case 4:
a0d0e21e 5889 if (strEQ(d,"tell")) return -KEY_tell;
c07a80fd 5890 if (strEQ(d,"tied")) return KEY_tied;
a0d0e21e 5891 if (strEQ(d,"time")) return -KEY_time;
79072805
LW
5892 break;
5893 case 5:
a0d0e21e 5894 if (strEQ(d,"times")) return -KEY_times;
79072805
LW
5895 break;
5896 case 7:
a0d0e21e 5897 if (strEQ(d,"telldir")) return -KEY_telldir;
79072805
LW
5898 break;
5899 case 8:
a0d0e21e 5900 if (strEQ(d,"truncate")) return -KEY_truncate;
79072805 5901 break;
378cc40b 5902 }
a687059c 5903 break;
79072805
LW
5904 case 'u':
5905 switch (len) {
5906 case 2:
a0d0e21e
LW
5907 if (strEQ(d,"uc")) return -KEY_uc;
5908 break;
5909 case 3:
5910 if (strEQ(d,"use")) return KEY_use;
79072805
LW
5911 break;
5912 case 5:
5913 if (strEQ(d,"undef")) return KEY_undef;
5914 if (strEQ(d,"until")) return KEY_until;
463ee0b2 5915 if (strEQ(d,"untie")) return KEY_untie;
a0d0e21e
LW
5916 if (strEQ(d,"utime")) return -KEY_utime;
5917 if (strEQ(d,"umask")) return -KEY_umask;
79072805
LW
5918 break;
5919 case 6:
5920 if (strEQ(d,"unless")) return KEY_unless;
a0d0e21e
LW
5921 if (strEQ(d,"unpack")) return -KEY_unpack;
5922 if (strEQ(d,"unlink")) return -KEY_unlink;
79072805
LW
5923 break;
5924 case 7:
3a6a8333 5925 if (strEQ(d,"unshift")) return -KEY_unshift;
a0d0e21e 5926 if (strEQ(d,"ucfirst")) return -KEY_ucfirst;
79072805 5927 break;
a687059c
LW
5928 }
5929 break;
79072805 5930 case 'v':
a0d0e21e
LW
5931 if (strEQ(d,"values")) return -KEY_values;
5932 if (strEQ(d,"vec")) return -KEY_vec;
a687059c 5933 break;
79072805
LW
5934 case 'w':
5935 switch (len) {
5936 case 4:
a0d0e21e
LW
5937 if (strEQ(d,"warn")) return -KEY_warn;
5938 if (strEQ(d,"wait")) return -KEY_wait;
79072805
LW
5939 break;
5940 case 5:
5941 if (strEQ(d,"while")) return KEY_while;
a0d0e21e 5942 if (strEQ(d,"write")) return -KEY_write;
79072805
LW
5943 break;
5944 case 7:
a0d0e21e 5945 if (strEQ(d,"waitpid")) return -KEY_waitpid;
79072805
LW
5946 break;
5947 case 9:
a0d0e21e 5948 if (strEQ(d,"wantarray")) return -KEY_wantarray;
79072805 5949 break;
2f3197b3 5950 }
a687059c 5951 break;
79072805 5952 case 'x':
a0d0e21e
LW
5953 if (len == 1) return -KEY_x;
5954 if (strEQ(d,"xor")) return -KEY_xor;
a687059c 5955 break;
79072805
LW
5956 case 'y':
5957 if (len == 1) return KEY_y;
5958 break;
5959 case 'z':
a687059c
LW
5960 break;
5961 }
79072805 5962 return 0;
a687059c
LW
5963}
5964
76e3520e 5965STATIC void
cea2e8a9 5966S_checkcomma(pTHX_ register char *s, char *name, char *what)
a687059c 5967{
2f3197b3
LW
5968 char *w;
5969
d008e5eb 5970 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
5971 if (ckWARN(WARN_SYNTAX)) {
5972 int level = 1;
5973 for (w = s+2; *w && level; w++) {
5974 if (*w == '(')
5975 ++level;
5976 else if (*w == ')')
5977 --level;
5978 }
5979 if (*w)
5980 for (; *w && isSPACE(*w); w++) ;
5981 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9014280d 5982 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 5983 "%s (...) interpreted as function",name);
d008e5eb 5984 }
2f3197b3 5985 }
3280af22 5986 while (s < PL_bufend && isSPACE(*s))
2f3197b3 5987 s++;
a687059c
LW
5988 if (*s == '(')
5989 s++;
3280af22 5990 while (s < PL_bufend && isSPACE(*s))
a687059c 5991 s++;
7e2040f0 5992 if (isIDFIRST_lazy_if(s,UTF)) {
2f3197b3 5993 w = s++;
7e2040f0 5994 while (isALNUM_lazy_if(s,UTF))
a687059c 5995 s++;
3280af22 5996 while (s < PL_bufend && isSPACE(*s))
a687059c 5997 s++;
e929a76b 5998 if (*s == ',') {
463ee0b2 5999 int kw;
e929a76b 6000 *s = '\0';
864dbfa3 6001 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
e929a76b 6002 *s = ',';
463ee0b2 6003 if (kw)
e929a76b 6004 return;
cea2e8a9 6005 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
6006 }
6007 }
6008}
6009
423cee85
JH
6010/* Either returns sv, or mortalizes sv and returns a new SV*.
6011 Best used as sv=new_constant(..., sv, ...).
6012 If s, pv are NULL, calls subroutine with one argument,
6013 and type is used with error messages only. */
6014
b3ac6de7 6015STATIC SV *
dff6d3cd 6016S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9b0e499b 6017 const char *type)
b3ac6de7 6018{
b3ac6de7 6019 dSP;
3280af22 6020 HV *table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 6021 SV *res;
b3ac6de7
IZ
6022 SV **cvp;
6023 SV *cv, *typesv;
f0af216f 6024 const char *why1, *why2, *why3;
4e553d73 6025
f0af216f 6026 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
6027 SV *msg;
6028
f0af216f 6029 why2 = strEQ(key,"charnames")
41ab332f 6030 ? "(possibly a missing \"use charnames ...\")"
f0af216f 6031 : "";
4e553d73 6032 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
6033 (type ? type: "undef"), why2);
6034
6035 /* This is convoluted and evil ("goto considered harmful")
6036 * but I do not understand the intricacies of all the different
6037 * failure modes of %^H in here. The goal here is to make
6038 * the most probable error message user-friendly. --jhi */
6039
6040 goto msgdone;
6041
423cee85 6042 report:
4e553d73 6043 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 6044 (type ? type: "undef"), why1, why2, why3);
41ab332f 6045 msgdone:
423cee85
JH
6046 yyerror(SvPVX(msg));
6047 SvREFCNT_dec(msg);
6048 return sv;
6049 }
b3ac6de7
IZ
6050 cvp = hv_fetch(table, key, strlen(key), FALSE);
6051 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
6052 why1 = "$^H{";
6053 why2 = key;
f0af216f 6054 why3 = "} is not defined";
423cee85 6055 goto report;
b3ac6de7
IZ
6056 }
6057 sv_2mortal(sv); /* Parent created it permanently */
6058 cv = *cvp;
423cee85
JH
6059 if (!pv && s)
6060 pv = sv_2mortal(newSVpvn(s, len));
6061 if (type && pv)
6062 typesv = sv_2mortal(newSVpv(type, 0));
b3ac6de7 6063 else
423cee85 6064 typesv = &PL_sv_undef;
4e553d73 6065
e788e7d3 6066 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
6067 ENTER ;
6068 SAVETMPS;
4e553d73 6069
423cee85 6070 PUSHMARK(SP) ;
a5845cb7 6071 EXTEND(sp, 3);
423cee85
JH
6072 if (pv)
6073 PUSHs(pv);
b3ac6de7 6074 PUSHs(sv);
423cee85
JH
6075 if (pv)
6076 PUSHs(typesv);
b3ac6de7 6077 PUTBACK;
423cee85 6078 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 6079
423cee85 6080 SPAGAIN ;
4e553d73 6081
423cee85 6082 /* Check the eval first */
9b0e499b 6083 if (!PL_in_eval && SvTRUE(ERRSV)) {
423cee85
JH
6084 STRLEN n_a;
6085 sv_catpv(ERRSV, "Propagated");
6086 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
e1f15930 6087 (void)POPs;
423cee85
JH
6088 res = SvREFCNT_inc(sv);
6089 }
6090 else {
6091 res = POPs;
e1f15930 6092 (void)SvREFCNT_inc(res);
423cee85 6093 }
4e553d73 6094
423cee85
JH
6095 PUTBACK ;
6096 FREETMPS ;
6097 LEAVE ;
b3ac6de7 6098 POPSTACK;
4e553d73 6099
b3ac6de7 6100 if (!SvOK(res)) {
423cee85
JH
6101 why1 = "Call to &{$^H{";
6102 why2 = key;
f0af216f 6103 why3 = "}} did not return a defined value";
423cee85
JH
6104 sv = res;
6105 goto report;
9b0e499b 6106 }
423cee85 6107
9b0e499b 6108 return res;
b3ac6de7 6109}
4e553d73 6110
76e3520e 6111STATIC char *
cea2e8a9 6112S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2
LW
6113{
6114 register char *d = dest;
8903cb82 6115 register char *e = d + destlen - 3; /* two-character token, ending NUL */
463ee0b2 6116 for (;;) {
8903cb82 6117 if (d >= e)
cea2e8a9 6118 Perl_croak(aTHX_ ident_too_long);
834a4ddd 6119 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 6120 *d++ = *s++;
7e2040f0 6121 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
6122 *d++ = ':';
6123 *d++ = ':';
6124 s++;
6125 }
c3e0f903 6126 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
463ee0b2
LW
6127 *d++ = *s++;
6128 *d++ = *s++;
6129 }
fd400ab9 6130 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6131 char *t = s + UTF8SKIP(s);
fd400ab9 6132 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6133 t += UTF8SKIP(t);
6134 if (d + (t - s) > e)
cea2e8a9 6135 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6136 Copy(s, d, t - s, char);
6137 d += t - s;
6138 s = t;
6139 }
463ee0b2
LW
6140 else {
6141 *d = '\0';
6142 *slp = d - dest;
6143 return s;
e929a76b 6144 }
378cc40b
LW
6145 }
6146}
6147
76e3520e 6148STATIC char *
cea2e8a9 6149S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b
LW
6150{
6151 register char *d;
8903cb82 6152 register char *e;
79072805 6153 char *bracket = 0;
748a9306 6154 char funny = *s++;
378cc40b 6155
a0d0e21e
LW
6156 if (isSPACE(*s))
6157 s = skipspace(s);
378cc40b 6158 d = dest;
8903cb82 6159 e = d + destlen - 3; /* two-character token, ending NUL */
de3bb511 6160 if (isDIGIT(*s)) {
8903cb82 6161 while (isDIGIT(*s)) {
6162 if (d >= e)
cea2e8a9 6163 Perl_croak(aTHX_ ident_too_long);
378cc40b 6164 *d++ = *s++;
8903cb82 6165 }
378cc40b
LW
6166 }
6167 else {
463ee0b2 6168 for (;;) {
8903cb82 6169 if (d >= e)
cea2e8a9 6170 Perl_croak(aTHX_ ident_too_long);
834a4ddd 6171 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 6172 *d++ = *s++;
7e2040f0 6173 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
6174 *d++ = ':';
6175 *d++ = ':';
6176 s++;
6177 }
a0d0e21e 6178 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
6179 *d++ = *s++;
6180 *d++ = *s++;
6181 }
fd400ab9 6182 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6183 char *t = s + UTF8SKIP(s);
fd400ab9 6184 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6185 t += UTF8SKIP(t);
6186 if (d + (t - s) > e)
cea2e8a9 6187 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6188 Copy(s, d, t - s, char);
6189 d += t - s;
6190 s = t;
6191 }
463ee0b2
LW
6192 else
6193 break;
6194 }
378cc40b
LW
6195 }
6196 *d = '\0';
6197 d = dest;
79072805 6198 if (*d) {
3280af22
NIS
6199 if (PL_lex_state != LEX_NORMAL)
6200 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 6201 return s;
378cc40b 6202 }
748a9306 6203 if (*s == '$' && s[1] &&
7e2040f0 6204 (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
5cd24f17 6205 {
4810e5ec 6206 return s;
5cd24f17 6207 }
79072805
LW
6208 if (*s == '{') {
6209 bracket = s;
6210 s++;
6211 }
6212 else if (ck_uni)
6213 check_uni();
93a17b20 6214 if (s < send)
79072805
LW
6215 *d = *s++;
6216 d[1] = '\0';
2b92dfce 6217 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 6218 *d = toCTRL(*s);
6219 s++;
de3bb511 6220 }
79072805 6221 if (bracket) {
748a9306 6222 if (isSPACE(s[-1])) {
fa83b5b6 6223 while (s < send) {
6224 char ch = *s++;
bf4acbe4 6225 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 6226 *d = ch;
6227 break;
6228 }
6229 }
748a9306 6230 }
7e2040f0 6231 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 6232 d++;
a0ed51b3
LW
6233 if (UTF) {
6234 e = s;
155aba94 6235 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
a0ed51b3 6236 e += UTF8SKIP(e);
fd400ab9 6237 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
a0ed51b3
LW
6238 e += UTF8SKIP(e);
6239 }
6240 Copy(s, d, e - s, char);
6241 d += e - s;
6242 s = e;
6243 }
6244 else {
2b92dfce 6245 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 6246 *d++ = *s++;
2b92dfce 6247 if (d >= e)
cea2e8a9 6248 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 6249 }
79072805 6250 *d = '\0';
bf4acbe4 6251 while (s < send && SPACE_OR_TAB(*s)) s++;
ff68c719 6252 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
599cee73 6253 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
dff6d3cd 6254 const char *brack = *s == '[' ? "[...]" : "{...}";
9014280d 6255 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 6256 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
6257 funny, dest, brack, funny, dest, brack);
6258 }
79072805 6259 bracket++;
a0be28da 6260 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
6261 return s;
6262 }
4e553d73
NIS
6263 }
6264 /* Handle extended ${^Foo} variables
2b92dfce
GS
6265 * 1999-02-27 mjd-perl-patch@plover.com */
6266 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
6267 && isALNUM(*s))
6268 {
6269 d++;
6270 while (isALNUM(*s) && d < e) {
6271 *d++ = *s++;
6272 }
6273 if (d >= e)
cea2e8a9 6274 Perl_croak(aTHX_ ident_too_long);
2b92dfce 6275 *d = '\0';
79072805
LW
6276 }
6277 if (*s == '}') {
6278 s++;
3280af22
NIS
6279 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets)
6280 PL_lex_state = LEX_INTERPEND;
748a9306
LW
6281 if (funny == '#')
6282 funny = '@';
d008e5eb 6283 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 6284 if (ckWARN(WARN_AMBIGUOUS) &&
864dbfa3 6285 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
d008e5eb 6286 {
9014280d 6287 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
6288 "Ambiguous use of %c{%s} resolved to %c%s",
6289 funny, dest, funny, dest);
6290 }
6291 }
437fd210
AE
6292 if (PL_lex_inwhat == OP_STRINGIFY)
6293 PL_expect = XREF;
79072805
LW
6294 }
6295 else {
6296 s = bracket; /* let the parser handle it */
93a17b20 6297 *dest = '\0';
79072805
LW
6298 }
6299 }
3280af22
NIS
6300 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
6301 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
6302 return s;
6303}
6304
cea2e8a9 6305void
2b36a5a0 6306Perl_pmflag(pTHX_ U32* pmfl, int ch)
a0d0e21e 6307{
bbce6d69 6308 if (ch == 'i')
a0d0e21e 6309 *pmfl |= PMf_FOLD;
a0d0e21e
LW
6310 else if (ch == 'g')
6311 *pmfl |= PMf_GLOBAL;
c90c0ff4 6312 else if (ch == 'c')
6313 *pmfl |= PMf_CONTINUE;
a0d0e21e
LW
6314 else if (ch == 'o')
6315 *pmfl |= PMf_KEEP;
6316 else if (ch == 'm')
6317 *pmfl |= PMf_MULTILINE;
6318 else if (ch == 's')
6319 *pmfl |= PMf_SINGLELINE;
6320 else if (ch == 'x')
6321 *pmfl |= PMf_EXTENDED;
6322}
378cc40b 6323
76e3520e 6324STATIC char *
cea2e8a9 6325S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 6326{
79072805
LW
6327 PMOP *pm;
6328 char *s;
378cc40b 6329
09bef843 6330 s = scan_str(start,FALSE,FALSE);
37fd879b 6331 if (!s)
cea2e8a9 6332 Perl_croak(aTHX_ "Search pattern not terminated");
bbce6d69 6333
8782bef2 6334 pm = (PMOP*)newPMOP(type, 0);
3280af22 6335 if (PL_multi_open == '?')
79072805 6336 pm->op_pmflags |= PMf_ONCE;
8782bef2
GB
6337 if(type == OP_QR) {
6338 while (*s && strchr("iomsx", *s))
6339 pmflag(&pm->op_pmflags,*s++);
6340 }
6341 else {
6342 while (*s && strchr("iogcmsx", *s))
6343 pmflag(&pm->op_pmflags,*s++);
6344 }
4ac733c9
MJD
6345 /* issue a warning if /c is specified,but /g is not */
6346 if (ckWARN(WARN_REGEXP) &&
6347 (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
6348 {
6349 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g);
6350 }
6351
4633a7c4 6352 pm->op_pmpermflags = pm->op_pmflags;
bbce6d69 6353
3280af22 6354 PL_lex_op = (OP*)pm;
79072805 6355 yylval.ival = OP_MATCH;
378cc40b
LW
6356 return s;
6357}
6358
76e3520e 6359STATIC char *
cea2e8a9 6360S_scan_subst(pTHX_ char *start)
79072805 6361{
a0d0e21e 6362 register char *s;
79072805 6363 register PMOP *pm;
4fdae800 6364 I32 first_start;
79072805
LW
6365 I32 es = 0;
6366
79072805
LW
6367 yylval.ival = OP_NULL;
6368
09bef843 6369 s = scan_str(start,FALSE,FALSE);
79072805 6370
37fd879b 6371 if (!s)
cea2e8a9 6372 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 6373
3280af22 6374 if (s[-1] == PL_multi_open)
79072805
LW
6375 s--;
6376
3280af22 6377 first_start = PL_multi_start;
09bef843 6378 s = scan_str(s,FALSE,FALSE);
79072805 6379 if (!s) {
37fd879b 6380 if (PL_lex_stuff) {
3280af22 6381 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6382 PL_lex_stuff = Nullsv;
6383 }
cea2e8a9 6384 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 6385 }
3280af22 6386 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 6387
79072805 6388 pm = (PMOP*)newPMOP(OP_SUBST, 0);
48c036b1 6389 while (*s) {
a687059c
LW
6390 if (*s == 'e') {
6391 s++;
2f3197b3 6392 es++;
a687059c 6393 }
b3eb6a9b 6394 else if (strchr("iogcmsx", *s))
a0d0e21e 6395 pmflag(&pm->op_pmflags,*s++);
48c036b1
GS
6396 else
6397 break;
378cc40b 6398 }
79072805 6399
64e578a2
MJD
6400 /* /c is not meaningful with s/// */
6401 if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE))
4ac733c9 6402 {
64e578a2 6403 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst);
4ac733c9
MJD
6404 }
6405
79072805
LW
6406 if (es) {
6407 SV *repl;
0244c3a4
GS
6408 PL_sublex_info.super_bufptr = s;
6409 PL_sublex_info.super_bufend = PL_bufend;
6410 PL_multi_end = 0;
79072805 6411 pm->op_pmflags |= PMf_EVAL;
79cb57f6 6412 repl = newSVpvn("",0);
463ee0b2 6413 while (es-- > 0)
a0d0e21e 6414 sv_catpv(repl, es ? "eval " : "do ");
79072805 6415 sv_catpvn(repl, "{ ", 2);
3280af22 6416 sv_catsv(repl, PL_lex_repl);
79072805 6417 sv_catpvn(repl, " };", 2);
25da4f38 6418 SvEVALED_on(repl);
3280af22
NIS
6419 SvREFCNT_dec(PL_lex_repl);
6420 PL_lex_repl = repl;
378cc40b 6421 }
79072805 6422
4633a7c4 6423 pm->op_pmpermflags = pm->op_pmflags;
3280af22 6424 PL_lex_op = (OP*)pm;
79072805 6425 yylval.ival = OP_SUBST;
378cc40b
LW
6426 return s;
6427}
6428
76e3520e 6429STATIC char *
cea2e8a9 6430S_scan_trans(pTHX_ char *start)
378cc40b 6431{
a0d0e21e 6432 register char* s;
11343788 6433 OP *o;
79072805
LW
6434 short *tbl;
6435 I32 squash;
a0ed51b3 6436 I32 del;
79072805
LW
6437 I32 complement;
6438
6439 yylval.ival = OP_NULL;
6440
09bef843 6441 s = scan_str(start,FALSE,FALSE);
37fd879b 6442 if (!s)
cea2e8a9 6443 Perl_croak(aTHX_ "Transliteration pattern not terminated");
3280af22 6444 if (s[-1] == PL_multi_open)
2f3197b3
LW
6445 s--;
6446
09bef843 6447 s = scan_str(s,FALSE,FALSE);
79072805 6448 if (!s) {
37fd879b 6449 if (PL_lex_stuff) {
3280af22 6450 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6451 PL_lex_stuff = Nullsv;
6452 }
cea2e8a9 6453 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 6454 }
79072805 6455
a0ed51b3 6456 complement = del = squash = 0;
6940069f 6457 while (strchr("cds", *s)) {
395c3793 6458 if (*s == 'c')
79072805 6459 complement = OPpTRANS_COMPLEMENT;
395c3793 6460 else if (*s == 'd')
a0ed51b3
LW
6461 del = OPpTRANS_DELETE;
6462 else if (*s == 's')
79072805 6463 squash = OPpTRANS_SQUASH;
395c3793
LW
6464 s++;
6465 }
8973db79
JH
6466
6467 New(803, tbl, complement&&!del?258:256, short);
6468 o = newPVOP(OP_TRANS, 0, (char*)tbl);
7948272d
NIS
6469 o->op_private = del|squash|complement|
6470 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
6471 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 6472
3280af22 6473 PL_lex_op = o;
79072805
LW
6474 yylval.ival = OP_TRANS;
6475 return s;
6476}
6477
76e3520e 6478STATIC char *
cea2e8a9 6479S_scan_heredoc(pTHX_ register char *s)
79072805
LW
6480{
6481 SV *herewas;
6482 I32 op_type = OP_SCALAR;
6483 I32 len;
6484 SV *tmpstr;
6485 char term;
6486 register char *d;
fc36a67e 6487 register char *e;
4633a7c4 6488 char *peek;
3280af22 6489 int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
79072805
LW
6490
6491 s += 2;
3280af22
NIS
6492 d = PL_tokenbuf;
6493 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 6494 if (!outer)
79072805 6495 *d++ = '\n';
bf4acbe4 6496 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
4633a7c4
LW
6497 if (*peek && strchr("`'\"",*peek)) {
6498 s = peek;
79072805 6499 term = *s++;
3280af22 6500 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 6501 d += len;
3280af22 6502 if (s < PL_bufend)
79072805 6503 s++;
79072805
LW
6504 }
6505 else {
6506 if (*s == '\\')
6507 s++, term = '\'';
6508 else
6509 term = '"';
7e2040f0 6510 if (!isALNUM_lazy_if(s,UTF))
12bcd1a6 6511 deprecate_old("bare << to mean <<\"\"");
7e2040f0 6512 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 6513 if (d < e)
6514 *d++ = *s;
6515 }
6516 }
3280af22 6517 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 6518 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
6519 *d++ = '\n';
6520 *d = '\0';
3280af22 6521 len = d - PL_tokenbuf;
6a27c188 6522#ifndef PERL_STRICT_CR
f63a84b2
LW
6523 d = strchr(s, '\r');
6524 if (d) {
6525 char *olds = s;
6526 s = d;
3280af22 6527 while (s < PL_bufend) {
f63a84b2
LW
6528 if (*s == '\r') {
6529 *d++ = '\n';
6530 if (*++s == '\n')
6531 s++;
6532 }
6533 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
6534 *d++ = *s++;
6535 s++;
6536 }
6537 else
6538 *d++ = *s++;
6539 }
6540 *d = '\0';
3280af22
NIS
6541 PL_bufend = d;
6542 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2
LW
6543 s = olds;
6544 }
6545#endif
79072805 6546 d = "\n";
3280af22 6547 if (outer || !(d=ninstr(s,PL_bufend,d,d+1)))
79cb57f6 6548 herewas = newSVpvn(s,PL_bufend-s);
79072805 6549 else
79cb57f6 6550 s--, herewas = newSVpvn(s,d-s);
79072805 6551 s += SvCUR(herewas);
748a9306 6552
8d6dde3e 6553 tmpstr = NEWSV(87,79);
748a9306
LW
6554 sv_upgrade(tmpstr, SVt_PVIV);
6555 if (term == '\'') {
79072805 6556 op_type = OP_CONST;
748a9306
LW
6557 SvIVX(tmpstr) = -1;
6558 }
6559 else if (term == '`') {
79072805 6560 op_type = OP_BACKTICK;
748a9306
LW
6561 SvIVX(tmpstr) = '\\';
6562 }
79072805
LW
6563
6564 CLINE;
57843af0 6565 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
6566 PL_multi_open = PL_multi_close = '<';
6567 term = *PL_tokenbuf;
0244c3a4
GS
6568 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6569 char *bufptr = PL_sublex_info.super_bufptr;
6570 char *bufend = PL_sublex_info.super_bufend;
6571 char *olds = s - SvCUR(herewas);
6572 s = strchr(bufptr, '\n');
6573 if (!s)
6574 s = bufend;
6575 d = s;
6576 while (s < bufend &&
6577 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
6578 if (*s++ == '\n')
57843af0 6579 CopLINE_inc(PL_curcop);
0244c3a4
GS
6580 }
6581 if (s >= bufend) {
eb160463 6582 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
6583 missingterm(PL_tokenbuf);
6584 }
6585 sv_setpvn(herewas,bufptr,d-bufptr+1);
6586 sv_setpvn(tmpstr,d+1,s-d);
6587 s += len - 1;
6588 sv_catpvn(herewas,s,bufend-s);
6589 (void)strcpy(bufptr,SvPVX(herewas));
6590
6591 s = olds;
6592 goto retval;
6593 }
6594 else if (!outer) {
79072805 6595 d = s;
3280af22
NIS
6596 while (s < PL_bufend &&
6597 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 6598 if (*s++ == '\n')
57843af0 6599 CopLINE_inc(PL_curcop);
79072805 6600 }
3280af22 6601 if (s >= PL_bufend) {
eb160463 6602 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 6603 missingterm(PL_tokenbuf);
79072805
LW
6604 }
6605 sv_setpvn(tmpstr,d+1,s-d);
6606 s += len - 1;
57843af0 6607 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 6608
3280af22
NIS
6609 sv_catpvn(herewas,s,PL_bufend-s);
6610 sv_setsv(PL_linestr,herewas);
6611 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
6612 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6613 PL_last_lop = PL_last_uni = Nullch;
79072805
LW
6614 }
6615 else
6616 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
3280af22 6617 while (s >= PL_bufend) { /* multiple line string? */
fd2d0953 6618 if (!outer ||
3280af22 6619 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
eb160463 6620 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 6621 missingterm(PL_tokenbuf);
79072805 6622 }
57843af0 6623 CopLINE_inc(PL_curcop);
3280af22 6624 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6625 PL_last_lop = PL_last_uni = Nullch;
6a27c188 6626#ifndef PERL_STRICT_CR
3280af22 6627 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
6628 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
6629 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 6630 {
3280af22
NIS
6631 PL_bufend[-2] = '\n';
6632 PL_bufend--;
6633 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2 6634 }
3280af22
NIS
6635 else if (PL_bufend[-1] == '\r')
6636 PL_bufend[-1] = '\n';
f63a84b2 6637 }
3280af22
NIS
6638 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
6639 PL_bufend[-1] = '\n';
f63a84b2 6640#endif
3280af22 6641 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
6642 SV *sv = NEWSV(88,0);
6643
93a17b20 6644 sv_upgrade(sv, SVt_PVMG);
3280af22 6645 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
6646 (void)SvIOK_on(sv);
6647 SvIVX(sv) = 0;
57843af0 6648 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
79072805 6649 }
3280af22
NIS
6650 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
6651 s = PL_bufend - 1;
79072805 6652 *s = ' ';
3280af22
NIS
6653 sv_catsv(PL_linestr,herewas);
6654 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805
LW
6655 }
6656 else {
3280af22
NIS
6657 s = PL_bufend;
6658 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
6659 }
6660 }
79072805 6661 s++;
0244c3a4 6662retval:
57843af0 6663 PL_multi_end = CopLINE(PL_curcop);
79072805
LW
6664 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
6665 SvLEN_set(tmpstr, SvCUR(tmpstr) + 1);
463ee0b2 6666 Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
79072805 6667 }
8990e307 6668 SvREFCNT_dec(herewas);
0064a8a9 6669 if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
7948272d 6670 SvUTF8_on(tmpstr);
3280af22 6671 PL_lex_stuff = tmpstr;
79072805
LW
6672 yylval.ival = op_type;
6673 return s;
6674}
6675
02aa26ce
NT
6676/* scan_inputsymbol
6677 takes: current position in input buffer
6678 returns: new position in input buffer
6679 side-effects: yylval and lex_op are set.
6680
6681 This code handles:
6682
6683 <> read from ARGV
6684 <FH> read from filehandle
6685 <pkg::FH> read from package qualified filehandle
6686 <pkg'FH> read from package qualified filehandle
6687 <$fh> read from filehandle in $fh
6688 <*.h> filename glob
6689
6690*/
6691
76e3520e 6692STATIC char *
cea2e8a9 6693S_scan_inputsymbol(pTHX_ char *start)
79072805 6694{
02aa26ce 6695 register char *s = start; /* current position in buffer */
79072805 6696 register char *d;
fc36a67e 6697 register char *e;
1b420867 6698 char *end;
79072805
LW
6699 I32 len;
6700
3280af22
NIS
6701 d = PL_tokenbuf; /* start of temp holding space */
6702 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
1b420867
GS
6703 end = strchr(s, '\n');
6704 if (!end)
6705 end = PL_bufend;
6706 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
6707
6708 /* die if we didn't have space for the contents of the <>,
1b420867 6709 or if it didn't end, or if we see a newline
02aa26ce
NT
6710 */
6711
3280af22 6712 if (len >= sizeof PL_tokenbuf)
cea2e8a9 6713 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 6714 if (s >= end)
cea2e8a9 6715 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 6716
fc36a67e 6717 s++;
02aa26ce
NT
6718
6719 /* check for <$fh>
6720 Remember, only scalar variables are interpreted as filehandles by
6721 this code. Anything more complex (e.g., <$fh{$num}>) will be
6722 treated as a glob() call.
6723 This code makes use of the fact that except for the $ at the front,
6724 a scalar variable and a filehandle look the same.
6725 */
4633a7c4 6726 if (*d == '$' && d[1]) d++;
02aa26ce
NT
6727
6728 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 6729 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 6730 d++;
02aa26ce
NT
6731
6732 /* If we've tried to read what we allow filehandles to look like, and
6733 there's still text left, then it must be a glob() and not a getline.
6734 Use scan_str to pull out the stuff between the <> and treat it
6735 as nothing more than a string.
6736 */
6737
3280af22 6738 if (d - PL_tokenbuf != len) {
79072805
LW
6739 yylval.ival = OP_GLOB;
6740 set_csh();
09bef843 6741 s = scan_str(start,FALSE,FALSE);
79072805 6742 if (!s)
cea2e8a9 6743 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
6744 return s;
6745 }
395c3793 6746 else {
9b3023bc
RGS
6747 bool readline_overriden = FALSE;
6748 GV *gv_readline = Nullgv;
6749 GV **gvp;
02aa26ce 6750 /* we're in a filehandle read situation */
3280af22 6751 d = PL_tokenbuf;
02aa26ce
NT
6752
6753 /* turn <> into <ARGV> */
79072805
LW
6754 if (!len)
6755 (void)strcpy(d,"ARGV");
02aa26ce 6756
9b3023bc 6757 /* Check whether readline() is overriden */
ba979b31
NIS
6758 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
6759 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 6760 ||
ba979b31 6761 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
9b3023bc 6762 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
ba979b31 6763 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
6764 readline_overriden = TRUE;
6765
02aa26ce
NT
6766 /* if <$fh>, create the ops to turn the variable into a
6767 filehandle
6768 */
79072805 6769 if (*d == '$') {
a0d0e21e 6770 I32 tmp;
02aa26ce
NT
6771
6772 /* try to find it in the pad for this block, otherwise find
6773 add symbol table ops
6774 */
11343788 6775 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
dd2155a4
DM
6776 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
6777 SV *sym = sv_2mortal(
6778 newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)),0));
f558d5af
JH
6779 sv_catpvn(sym, "::", 2);
6780 sv_catpv(sym, d+1);
6781 d = SvPVX(sym);
6782 goto intro_sym;
6783 }
6784 else {
6785 OP *o = newOP(OP_PADSV, 0);
6786 o->op_targ = tmp;
9b3023bc
RGS
6787 PL_lex_op = readline_overriden
6788 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6789 append_elem(OP_LIST, o,
6790 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
6791 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 6792 }
a0d0e21e
LW
6793 }
6794 else {
f558d5af
JH
6795 GV *gv;
6796 ++d;
6797intro_sym:
6798 gv = gv_fetchpv(d,
6799 (PL_in_eval
6800 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 6801 : GV_ADDMULTI),
f558d5af 6802 SVt_PV);
9b3023bc
RGS
6803 PL_lex_op = readline_overriden
6804 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6805 append_elem(OP_LIST,
6806 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
6807 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
6808 : (OP*)newUNOP(OP_READLINE, 0,
6809 newUNOP(OP_RV2SV, 0,
6810 newGVOP(OP_GV, 0, gv)));
a0d0e21e 6811 }
7c6fadd6
RGS
6812 if (!readline_overriden)
6813 PL_lex_op->op_flags |= OPf_SPECIAL;
f5284f61 6814 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
79072805
LW
6815 yylval.ival = OP_NULL;
6816 }
02aa26ce
NT
6817
6818 /* If it's none of the above, it must be a literal filehandle
6819 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 6820 else {
85e6fe83 6821 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
9b3023bc
RGS
6822 PL_lex_op = readline_overriden
6823 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6824 append_elem(OP_LIST,
6825 newGVOP(OP_GV, 0, gv),
6826 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
6827 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
79072805
LW
6828 yylval.ival = OP_NULL;
6829 }
6830 }
02aa26ce 6831
79072805
LW
6832 return s;
6833}
6834
02aa26ce
NT
6835
6836/* scan_str
6837 takes: start position in buffer
09bef843
SB
6838 keep_quoted preserve \ on the embedded delimiter(s)
6839 keep_delims preserve the delimiters around the string
02aa26ce
NT
6840 returns: position to continue reading from buffer
6841 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
6842 updates the read buffer.
6843
6844 This subroutine pulls a string out of the input. It is called for:
6845 q single quotes q(literal text)
6846 ' single quotes 'literal text'
6847 qq double quotes qq(interpolate $here please)
6848 " double quotes "interpolate $here please"
6849 qx backticks qx(/bin/ls -l)
6850 ` backticks `/bin/ls -l`
6851 qw quote words @EXPORT_OK = qw( func() $spam )
6852 m// regexp match m/this/
6853 s/// regexp substitute s/this/that/
6854 tr/// string transliterate tr/this/that/
6855 y/// string transliterate y/this/that/
6856 ($*@) sub prototypes sub foo ($)
09bef843 6857 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
6858 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
6859
6860 In most of these cases (all but <>, patterns and transliterate)
6861 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
6862 calls scan_str(). s/// makes yylex() call scan_subst() which calls
6863 scan_str(). tr/// and y/// make yylex() call scan_trans() which
6864 calls scan_str().
4e553d73 6865
02aa26ce
NT
6866 It skips whitespace before the string starts, and treats the first
6867 character as the delimiter. If the delimiter is one of ([{< then
6868 the corresponding "close" character )]}> is used as the closing
6869 delimiter. It allows quoting of delimiters, and if the string has
6870 balanced delimiters ([{<>}]) it allows nesting.
6871
37fd879b
HS
6872 On success, the SV with the resulting string is put into lex_stuff or,
6873 if that is already non-NULL, into lex_repl. The second case occurs only
6874 when parsing the RHS of the special constructs s/// and tr/// (y///).
6875 For convenience, the terminating delimiter character is stuffed into
6876 SvIVX of the SV.
02aa26ce
NT
6877*/
6878
76e3520e 6879STATIC char *
09bef843 6880S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 6881{
02aa26ce
NT
6882 SV *sv; /* scalar value: string */
6883 char *tmps; /* temp string, used for delimiter matching */
6884 register char *s = start; /* current position in the buffer */
6885 register char term; /* terminating character */
6886 register char *to; /* current position in the sv's data */
6887 I32 brackets = 1; /* bracket nesting level */
89491803 6888 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e
IH
6889 I32 termcode; /* terminating char. code */
6890 U8 termstr[UTF8_MAXLEN]; /* terminating string */
6891 STRLEN termlen; /* length of terminating string */
6892 char *last = NULL; /* last position for nesting bracket */
02aa26ce
NT
6893
6894 /* skip space before the delimiter */
fb73857a 6895 if (isSPACE(*s))
6896 s = skipspace(s);
02aa26ce
NT
6897
6898 /* mark where we are, in case we need to report errors */
79072805 6899 CLINE;
02aa26ce
NT
6900
6901 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 6902 term = *s;
220e2d4e
IH
6903 if (!UTF) {
6904 termcode = termstr[0] = term;
6905 termlen = 1;
6906 }
6907 else {
f3b9ce0f 6908 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
6909 Copy(s, termstr, termlen, U8);
6910 if (!UTF8_IS_INVARIANT(term))
6911 has_utf8 = TRUE;
6912 }
b1c7b182 6913
02aa26ce 6914 /* mark where we are */
57843af0 6915 PL_multi_start = CopLINE(PL_curcop);
3280af22 6916 PL_multi_open = term;
02aa26ce
NT
6917
6918 /* find corresponding closing delimiter */
93a17b20 6919 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
6920 termcode = termstr[0] = term = tmps[5];
6921
3280af22 6922 PL_multi_close = term;
79072805 6923
02aa26ce 6924 /* create a new SV to hold the contents. 87 is leak category, I'm
8d6dde3e
IZ
6925 assuming. 79 is the SV's initial length. What a random number. */
6926 sv = NEWSV(87,79);
ed6116ce 6927 sv_upgrade(sv, SVt_PVIV);
220e2d4e 6928 SvIVX(sv) = termcode;
a0d0e21e 6929 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
6930
6931 /* move past delimiter and try to read a complete string */
09bef843 6932 if (keep_delims)
220e2d4e
IH
6933 sv_catpvn(sv, s, termlen);
6934 s += termlen;
93a17b20 6935 for (;;) {
220e2d4e
IH
6936 if (PL_encoding && !UTF) {
6937 bool cont = TRUE;
6938
6939 while (cont) {
6940 int offset = s - SvPVX(PL_linestr);
6941 bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 6942 &offset, (char*)termstr, termlen);
220e2d4e
IH
6943 char *ns = SvPVX(PL_linestr) + offset;
6944 char *svlast = SvEND(sv) - 1;
6945
6946 for (; s < ns; s++) {
6947 if (*s == '\n' && !PL_rsfp)
6948 CopLINE_inc(PL_curcop);
6949 }
6950 if (!found)
6951 goto read_more_line;
6952 else {
6953 /* handle quoted delimiters */
6954 if (*(svlast-1) == '\\') {
6955 char *t;
6956 for (t = svlast-2; t >= SvPVX(sv) && *t == '\\';)
6957 t--;
6958 if ((svlast-1 - t) % 2) {
6959 if (!keep_quoted) {
6960 *(svlast-1) = term;
6961 *svlast = '\0';
6962 SvCUR_set(sv, SvCUR(sv) - 1);
6963 }
6964 continue;
6965 }
6966 }
6967 if (PL_multi_open == PL_multi_close) {
6968 cont = FALSE;
6969 }
6970 else {
6971 char *t, *w;
6972 if (!last)
6973 last = SvPVX(sv);
6974 for (w = t = last; t < svlast; w++, t++) {
6975 /* At here, all closes are "was quoted" one,
6976 so we don't check PL_multi_close. */
6977 if (*t == '\\') {
6978 if (!keep_quoted && *(t+1) == PL_multi_open)
6979 t++;
6980 else
6981 *w++ = *t++;
6982 }
6983 else if (*t == PL_multi_open)
6984 brackets++;
6985
6986 *w = *t;
6987 }
6988 if (w < t) {
6989 *w++ = term;
6990 *w = '\0';
6991 SvCUR_set(sv, w - SvPVX(sv));
6992 }
6993 last = w;
6994 if (--brackets <= 0)
6995 cont = FALSE;
6996 }
6997 }
6998 }
6999 if (!keep_delims) {
7000 SvCUR_set(sv, SvCUR(sv) - 1);
7001 *SvEND(sv) = '\0';
7002 }
7003 break;
7004 }
7005
02aa26ce 7006 /* extend sv if need be */
3280af22 7007 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 7008 /* set 'to' to the next character in the sv's string */
463ee0b2 7009 to = SvPVX(sv)+SvCUR(sv);
09bef843 7010
02aa26ce 7011 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
7012 if (PL_multi_open == PL_multi_close) {
7013 for (; s < PL_bufend; s++,to++) {
02aa26ce 7014 /* embedded newlines increment the current line number */
3280af22 7015 if (*s == '\n' && !PL_rsfp)
57843af0 7016 CopLINE_inc(PL_curcop);
02aa26ce 7017 /* handle quoted delimiters */
3280af22 7018 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 7019 if (!keep_quoted && s[1] == term)
a0d0e21e 7020 s++;
02aa26ce 7021 /* any other quotes are simply copied straight through */
a0d0e21e
LW
7022 else
7023 *to++ = *s++;
7024 }
02aa26ce
NT
7025 /* terminate when run out of buffer (the for() condition), or
7026 have found the terminator */
220e2d4e
IH
7027 else if (*s == term) {
7028 if (termlen == 1)
7029 break;
f3b9ce0f 7030 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
7031 break;
7032 }
63cd0674 7033 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 7034 has_utf8 = TRUE;
93a17b20
LW
7035 *to = *s;
7036 }
7037 }
02aa26ce
NT
7038
7039 /* if the terminator isn't the same as the start character (e.g.,
7040 matched brackets), we have to allow more in the quoting, and
7041 be prepared for nested brackets.
7042 */
93a17b20 7043 else {
02aa26ce 7044 /* read until we run out of string, or we find the terminator */
3280af22 7045 for (; s < PL_bufend; s++,to++) {
02aa26ce 7046 /* embedded newlines increment the line count */
3280af22 7047 if (*s == '\n' && !PL_rsfp)
57843af0 7048 CopLINE_inc(PL_curcop);
02aa26ce 7049 /* backslashes can escape the open or closing characters */
3280af22 7050 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
7051 if (!keep_quoted &&
7052 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
7053 s++;
7054 else
7055 *to++ = *s++;
7056 }
02aa26ce 7057 /* allow nested opens and closes */
3280af22 7058 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 7059 break;
3280af22 7060 else if (*s == PL_multi_open)
93a17b20 7061 brackets++;
63cd0674 7062 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 7063 has_utf8 = TRUE;
93a17b20
LW
7064 *to = *s;
7065 }
7066 }
02aa26ce 7067 /* terminate the copied string and update the sv's end-of-string */
93a17b20 7068 *to = '\0';
463ee0b2 7069 SvCUR_set(sv, to - SvPVX(sv));
93a17b20 7070
02aa26ce
NT
7071 /*
7072 * this next chunk reads more into the buffer if we're not done yet
7073 */
7074
b1c7b182
GS
7075 if (s < PL_bufend)
7076 break; /* handle case where we are done yet :-) */
79072805 7077
6a27c188 7078#ifndef PERL_STRICT_CR
f63a84b2 7079 if (to - SvPVX(sv) >= 2) {
c6f14548
GS
7080 if ((to[-2] == '\r' && to[-1] == '\n') ||
7081 (to[-2] == '\n' && to[-1] == '\r'))
7082 {
f63a84b2
LW
7083 to[-2] = '\n';
7084 to--;
7085 SvCUR_set(sv, to - SvPVX(sv));
7086 }
7087 else if (to[-1] == '\r')
7088 to[-1] = '\n';
7089 }
7090 else if (to - SvPVX(sv) == 1 && to[-1] == '\r')
7091 to[-1] = '\n';
7092#endif
7093
220e2d4e 7094 read_more_line:
02aa26ce
NT
7095 /* if we're out of file, or a read fails, bail and reset the current
7096 line marker so we can report where the unterminated string began
7097 */
3280af22
NIS
7098 if (!PL_rsfp ||
7099 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
c07a80fd 7100 sv_free(sv);
eb160463 7101 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
79072805
LW
7102 return Nullch;
7103 }
02aa26ce 7104 /* we read a line, so increment our line counter */
57843af0 7105 CopLINE_inc(PL_curcop);
a0ed51b3 7106
02aa26ce 7107 /* update debugger info */
3280af22 7108 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
7109 SV *sv = NEWSV(88,0);
7110
93a17b20 7111 sv_upgrade(sv, SVt_PVMG);
3280af22 7112 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
7113 (void)SvIOK_on(sv);
7114 SvIVX(sv) = 0;
57843af0 7115 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
395c3793 7116 }
a0ed51b3 7117
3280af22
NIS
7118 /* having changed the buffer, we must update PL_bufend */
7119 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 7120 PL_last_lop = PL_last_uni = Nullch;
378cc40b 7121 }
4e553d73 7122
02aa26ce
NT
7123 /* at this point, we have successfully read the delimited string */
7124
220e2d4e
IH
7125 if (!PL_encoding || UTF) {
7126 if (keep_delims)
7127 sv_catpvn(sv, s, termlen);
7128 s += termlen;
7129 }
7130 if (has_utf8 || PL_encoding)
b1c7b182 7131 SvUTF8_on(sv);
d0063567 7132
57843af0 7133 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
7134
7135 /* if we allocated too much space, give some back */
93a17b20
LW
7136 if (SvCUR(sv) + 5 < SvLEN(sv)) {
7137 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 7138 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 7139 }
02aa26ce
NT
7140
7141 /* decide whether this is the first or second quoted string we've read
7142 for this op
7143 */
4e553d73 7144
3280af22
NIS
7145 if (PL_lex_stuff)
7146 PL_lex_repl = sv;
79072805 7147 else
3280af22 7148 PL_lex_stuff = sv;
378cc40b
LW
7149 return s;
7150}
7151
02aa26ce
NT
7152/*
7153 scan_num
7154 takes: pointer to position in buffer
7155 returns: pointer to new position in buffer
7156 side-effects: builds ops for the constant in yylval.op
7157
7158 Read a number in any of the formats that Perl accepts:
7159
7fd134d9
JH
7160 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
7161 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
7162 0b[01](_?[01])*
7163 0[0-7](_?[0-7])*
7164 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 7165
3280af22 7166 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
7167 thing it reads.
7168
7169 If it reads a number without a decimal point or an exponent, it will
7170 try converting the number to an integer and see if it can do so
7171 without loss of precision.
7172*/
4e553d73 7173
378cc40b 7174char *
b73d6f50 7175Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
378cc40b 7176{
02aa26ce
NT
7177 register char *s = start; /* current position in buffer */
7178 register char *d; /* destination in temp buffer */
7179 register char *e; /* end of temp buffer */
86554af2 7180 NV nv; /* number read, as a double */
a7cb1f99 7181 SV *sv = Nullsv; /* place to put the converted number */
a86a20aa 7182 bool floatit; /* boolean: int or float? */
02aa26ce 7183 char *lastub = 0; /* position of last underbar */
fc36a67e 7184 static char number_too_long[] = "Number too long";
378cc40b 7185
02aa26ce
NT
7186 /* We use the first character to decide what type of number this is */
7187
378cc40b 7188 switch (*s) {
79072805 7189 default:
cea2e8a9 7190 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 7191
02aa26ce 7192 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 7193 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
7194 case '0':
7195 {
02aa26ce
NT
7196 /* variables:
7197 u holds the "number so far"
4f19785b
WSI
7198 shift the power of 2 of the base
7199 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
7200 overflowed was the number more than we can hold?
7201
7202 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
7203 we in octal/hex/binary?" indicator to disallow hex characters
7204 when in octal mode.
02aa26ce 7205 */
9e24b6e2
JH
7206 NV n = 0.0;
7207 UV u = 0;
79072805 7208 I32 shift;
9e24b6e2
JH
7209 bool overflowed = FALSE;
7210 static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
7211 static char* bases[5] = { "", "binary", "", "octal",
7212 "hexadecimal" };
7213 static char* Bases[5] = { "", "Binary", "", "Octal",
7214 "Hexadecimal" };
7215 static char *maxima[5] = { "",
7216 "0b11111111111111111111111111111111",
7217 "",
893fe2c2 7218 "037777777777",
9e24b6e2
JH
7219 "0xffffffff" };
7220 char *base, *Base, *max;
378cc40b 7221
02aa26ce 7222 /* check for hex */
378cc40b
LW
7223 if (s[1] == 'x') {
7224 shift = 4;
7225 s += 2;
4f19785b
WSI
7226 } else if (s[1] == 'b') {
7227 shift = 1;
7228 s += 2;
378cc40b 7229 }
02aa26ce 7230 /* check for a decimal in disguise */
b78218b7 7231 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 7232 goto decimal;
02aa26ce 7233 /* so it must be octal */
928753ea 7234 else {
378cc40b 7235 shift = 3;
928753ea
JH
7236 s++;
7237 }
7238
7239 if (*s == '_') {
7240 if (ckWARN(WARN_SYNTAX))
9014280d 7241 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7242 "Misplaced _ in number");
7243 lastub = s++;
7244 }
9e24b6e2
JH
7245
7246 base = bases[shift];
7247 Base = Bases[shift];
7248 max = maxima[shift];
02aa26ce 7249
4f19785b 7250 /* read the rest of the number */
378cc40b 7251 for (;;) {
9e24b6e2 7252 /* x is used in the overflow test,
893fe2c2 7253 b is the digit we're adding on. */
9e24b6e2 7254 UV x, b;
55497cff 7255
378cc40b 7256 switch (*s) {
02aa26ce
NT
7257
7258 /* if we don't mention it, we're done */
378cc40b
LW
7259 default:
7260 goto out;
02aa26ce 7261
928753ea 7262 /* _ are ignored -- but warned about if consecutive */
de3bb511 7263 case '_':
928753ea 7264 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
9014280d 7265 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7266 "Misplaced _ in number");
7267 lastub = s++;
de3bb511 7268 break;
02aa26ce
NT
7269
7270 /* 8 and 9 are not octal */
378cc40b 7271 case '8': case '9':
4f19785b 7272 if (shift == 3)
cea2e8a9 7273 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 7274 /* FALL THROUGH */
02aa26ce
NT
7275
7276 /* octal digits */
4f19785b 7277 case '2': case '3': case '4':
378cc40b 7278 case '5': case '6': case '7':
4f19785b 7279 if (shift == 1)
cea2e8a9 7280 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
7281 /* FALL THROUGH */
7282
7283 case '0': case '1':
02aa26ce 7284 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 7285 goto digit;
02aa26ce
NT
7286
7287 /* hex digits */
378cc40b
LW
7288 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
7289 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 7290 /* make sure they said 0x */
378cc40b
LW
7291 if (shift != 4)
7292 goto out;
55497cff 7293 b = (*s++ & 7) + 9;
02aa26ce
NT
7294
7295 /* Prepare to put the digit we have onto the end
7296 of the number so far. We check for overflows.
7297 */
7298
55497cff 7299 digit:
9e24b6e2
JH
7300 if (!overflowed) {
7301 x = u << shift; /* make room for the digit */
7302
7303 if ((x >> shift) != u
7304 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
7305 overflowed = TRUE;
7306 n = (NV) u;
767a6a26 7307 if (ckWARN_d(WARN_OVERFLOW))
9014280d 7308 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
9e24b6e2
JH
7309 "Integer overflow in %s number",
7310 base);
7311 } else
7312 u = x | b; /* add the digit to the end */
7313 }
7314 if (overflowed) {
7315 n *= nvshift[shift];
7316 /* If an NV has not enough bits in its
7317 * mantissa to represent an UV this summing of
7318 * small low-order numbers is a waste of time
7319 * (because the NV cannot preserve the
7320 * low-order bits anyway): we could just
7321 * remember when did we overflow and in the
7322 * end just multiply n by the right
7323 * amount. */
7324 n += (NV) b;
55497cff 7325 }
378cc40b
LW
7326 break;
7327 }
7328 }
02aa26ce
NT
7329
7330 /* if we get here, we had success: make a scalar value from
7331 the number.
7332 */
378cc40b 7333 out:
928753ea
JH
7334
7335 /* final misplaced underbar check */
7336 if (s[-1] == '_') {
7337 if (ckWARN(WARN_SYNTAX))
9014280d 7338 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
7339 }
7340
79072805 7341 sv = NEWSV(92,0);
9e24b6e2 7342 if (overflowed) {
767a6a26 7343 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
9014280d 7344 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
9e24b6e2
JH
7345 "%s number > %s non-portable",
7346 Base, max);
7347 sv_setnv(sv, n);
7348 }
7349 else {
15041a67 7350#if UVSIZE > 4
767a6a26 7351 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
9014280d 7352 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
9e24b6e2
JH
7353 "%s number > %s non-portable",
7354 Base, max);
2cc4c2dc 7355#endif
9e24b6e2
JH
7356 sv_setuv(sv, u);
7357 }
2cc4c2dc 7358 if (PL_hints & HINT_NEW_BINARY)
b3ac6de7 7359 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
378cc40b
LW
7360 }
7361 break;
02aa26ce
NT
7362
7363 /*
7364 handle decimal numbers.
7365 we're also sent here when we read a 0 as the first digit
7366 */
378cc40b
LW
7367 case '1': case '2': case '3': case '4': case '5':
7368 case '6': case '7': case '8': case '9': case '.':
7369 decimal:
3280af22
NIS
7370 d = PL_tokenbuf;
7371 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 7372 floatit = FALSE;
02aa26ce
NT
7373
7374 /* read next group of digits and _ and copy into d */
de3bb511 7375 while (isDIGIT(*s) || *s == '_') {
4e553d73 7376 /* skip underscores, checking for misplaced ones
02aa26ce
NT
7377 if -w is on
7378 */
93a17b20 7379 if (*s == '_') {
928753ea 7380 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
9014280d 7381 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7382 "Misplaced _ in number");
7383 lastub = s++;
93a17b20 7384 }
fc36a67e 7385 else {
02aa26ce 7386 /* check for end of fixed-length buffer */
fc36a67e 7387 if (d >= e)
cea2e8a9 7388 Perl_croak(aTHX_ number_too_long);
02aa26ce 7389 /* if we're ok, copy the character */
378cc40b 7390 *d++ = *s++;
fc36a67e 7391 }
378cc40b 7392 }
02aa26ce
NT
7393
7394 /* final misplaced underbar check */
928753ea 7395 if (lastub && s == lastub + 1) {
d008e5eb 7396 if (ckWARN(WARN_SYNTAX))
9014280d 7397 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 7398 }
02aa26ce
NT
7399
7400 /* read a decimal portion if there is one. avoid
7401 3..5 being interpreted as the number 3. followed
7402 by .5
7403 */
2f3197b3 7404 if (*s == '.' && s[1] != '.') {
79072805 7405 floatit = TRUE;
378cc40b 7406 *d++ = *s++;
02aa26ce 7407
928753ea
JH
7408 if (*s == '_') {
7409 if (ckWARN(WARN_SYNTAX))
9014280d 7410 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7411 "Misplaced _ in number");
7412 lastub = s;
7413 }
7414
7415 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 7416 */
fc36a67e 7417 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 7418 /* fixed length buffer check */
fc36a67e 7419 if (d >= e)
cea2e8a9 7420 Perl_croak(aTHX_ number_too_long);
928753ea
JH
7421 if (*s == '_') {
7422 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
9014280d 7423 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7424 "Misplaced _ in number");
7425 lastub = s;
7426 }
7427 else
fc36a67e 7428 *d++ = *s;
378cc40b 7429 }
928753ea
JH
7430 /* fractional part ending in underbar? */
7431 if (s[-1] == '_') {
7432 if (ckWARN(WARN_SYNTAX))
9014280d 7433 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7434 "Misplaced _ in number");
7435 }
dd629d5b
GS
7436 if (*s == '.' && isDIGIT(s[1])) {
7437 /* oops, it's really a v-string, but without the "v" */
f4758303 7438 s = start;
dd629d5b
GS
7439 goto vstring;
7440 }
378cc40b 7441 }
02aa26ce
NT
7442
7443 /* read exponent part, if present */
7fd134d9 7444 if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) {
79072805
LW
7445 floatit = TRUE;
7446 s++;
02aa26ce
NT
7447
7448 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 7449 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 7450
7fd134d9
JH
7451 /* stray preinitial _ */
7452 if (*s == '_') {
7453 if (ckWARN(WARN_SYNTAX))
9014280d 7454 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7fd134d9
JH
7455 "Misplaced _ in number");
7456 lastub = s++;
7457 }
7458
02aa26ce 7459 /* allow positive or negative exponent */
378cc40b
LW
7460 if (*s == '+' || *s == '-')
7461 *d++ = *s++;
02aa26ce 7462
7fd134d9
JH
7463 /* stray initial _ */
7464 if (*s == '_') {
7465 if (ckWARN(WARN_SYNTAX))
9014280d 7466 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7fd134d9
JH
7467 "Misplaced _ in number");
7468 lastub = s++;
7469 }
7470
7fd134d9
JH
7471 /* read digits of exponent */
7472 while (isDIGIT(*s) || *s == '_') {
7473 if (isDIGIT(*s)) {
7474 if (d >= e)
7475 Perl_croak(aTHX_ number_too_long);
b3b48e3e 7476 *d++ = *s++;
7fd134d9
JH
7477 }
7478 else {
7479 if (ckWARN(WARN_SYNTAX) &&
7480 ((lastub && s == lastub + 1) ||
b3b48e3e 7481 (!isDIGIT(s[1]) && s[1] != '_')))
9014280d 7482 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7fd134d9 7483 "Misplaced _ in number");
b3b48e3e 7484 lastub = s++;
7fd134d9 7485 }
7fd134d9 7486 }
378cc40b 7487 }
02aa26ce 7488
02aa26ce
NT
7489
7490 /* make an sv from the string */
79072805 7491 sv = NEWSV(92,0);
097ee67d 7492
0b7fceb9 7493 /*
58bb9ec3
NC
7494 We try to do an integer conversion first if no characters
7495 indicating "float" have been found.
0b7fceb9
MU
7496 */
7497
7498 if (!floatit) {
58bb9ec3
NC
7499 UV uv;
7500 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
7501
7502 if (flags == IS_NUMBER_IN_UV) {
7503 if (uv <= IV_MAX)
86554af2 7504 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
58bb9ec3 7505 else
c239479b 7506 sv_setuv(sv, uv);
58bb9ec3
NC
7507 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
7508 if (uv <= (UV) IV_MIN)
7509 sv_setiv(sv, -(IV)uv);
7510 else
7511 floatit = TRUE;
7512 } else
7513 floatit = TRUE;
7514 }
0b7fceb9 7515 if (floatit) {
58bb9ec3
NC
7516 /* terminate the string */
7517 *d = '\0';
86554af2
JH
7518 nv = Atof(PL_tokenbuf);
7519 sv_setnv(sv, nv);
7520 }
86554af2 7521
b8403495
JH
7522 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
7523 (PL_hints & HINT_NEW_INTEGER) )
4e553d73 7524 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
b8403495
JH
7525 (floatit ? "float" : "integer"),
7526 sv, Nullsv, NULL);
378cc40b 7527 break;
0b7fceb9 7528
e312add1 7529 /* if it starts with a v, it could be a v-string */
a7cb1f99 7530 case 'v':
dd629d5b 7531vstring:
f4758303 7532 sv = NEWSV(92,5); /* preallocate storage space */
b0f01acb 7533 s = scan_vstring(s,sv);
a7cb1f99 7534 break;
79072805 7535 }
a687059c 7536
02aa26ce
NT
7537 /* make the op for the constant and return */
7538
a86a20aa 7539 if (sv)
b73d6f50 7540 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 7541 else
b73d6f50 7542 lvalp->opval = Nullop;
a687059c 7543
378cc40b
LW
7544 return s;
7545}
7546
76e3520e 7547STATIC char *
cea2e8a9 7548S_scan_formline(pTHX_ register char *s)
378cc40b 7549{
79072805 7550 register char *eol;
378cc40b 7551 register char *t;
79cb57f6 7552 SV *stuff = newSVpvn("",0);
79072805 7553 bool needargs = FALSE;
378cc40b 7554
79072805 7555 while (!needargs) {
c2e66d9e 7556 if (*s == '.' || *s == /*{*/'}') {
79072805 7557 /*SUPPRESS 530*/
51882d45 7558#ifdef PERL_STRICT_CR
bf4acbe4 7559 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
51882d45 7560#else
bf4acbe4 7561 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 7562#endif
6a65c6a0 7563 if (*t == '\n' || t == PL_bufend)
79072805
LW
7564 break;
7565 }
3280af22 7566 if (PL_in_eval && !PL_rsfp) {
93a17b20 7567 eol = strchr(s,'\n');
0f85fab0 7568 if (!eol++)
3280af22 7569 eol = PL_bufend;
0f85fab0
LW
7570 }
7571 else
3280af22 7572 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 7573 if (*s != '#') {
a0d0e21e
LW
7574 for (t = s; t < eol; t++) {
7575 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
7576 needargs = FALSE;
7577 goto enough; /* ~~ must be first line in formline */
378cc40b 7578 }
a0d0e21e
LW
7579 if (*t == '@' || *t == '^')
7580 needargs = TRUE;
378cc40b 7581 }
7121b347
MG
7582 if (eol > s) {
7583 sv_catpvn(stuff, s, eol-s);
2dc4c65b 7584#ifndef PERL_STRICT_CR
7121b347
MG
7585 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
7586 char *end = SvPVX(stuff) + SvCUR(stuff);
7587 end[-2] = '\n';
7588 end[-1] = '\0';
7589 SvCUR(stuff)--;
7590 }
2dc4c65b 7591#endif
7121b347
MG
7592 }
7593 else
7594 break;
79072805
LW
7595 }
7596 s = eol;
3280af22
NIS
7597 if (PL_rsfp) {
7598 s = filter_gets(PL_linestr, PL_rsfp, 0);
7599 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
7600 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 7601 PL_last_lop = PL_last_uni = Nullch;
79072805 7602 if (!s) {
3280af22 7603 s = PL_bufptr;
79072805 7604 yyerror("Format not terminated");
378cc40b
LW
7605 break;
7606 }
378cc40b 7607 }
463ee0b2 7608 incline(s);
79072805 7609 }
a0d0e21e
LW
7610 enough:
7611 if (SvCUR(stuff)) {
3280af22 7612 PL_expect = XTERM;
79072805 7613 if (needargs) {
3280af22
NIS
7614 PL_lex_state = LEX_NORMAL;
7615 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
7616 force_next(',');
7617 }
a0d0e21e 7618 else
3280af22
NIS
7619 PL_lex_state = LEX_FORMLINE;
7620 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 7621 force_next(THING);
3280af22 7622 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
79072805 7623 force_next(LSTOP);
378cc40b 7624 }
79072805 7625 else {
8990e307 7626 SvREFCNT_dec(stuff);
3280af22
NIS
7627 PL_lex_formbrack = 0;
7628 PL_bufptr = s;
79072805
LW
7629 }
7630 return s;
378cc40b 7631}
a687059c 7632
76e3520e 7633STATIC void
cea2e8a9 7634S_set_csh(pTHX)
a687059c 7635{
ae986130 7636#ifdef CSH
3280af22
NIS
7637 if (!PL_cshlen)
7638 PL_cshlen = strlen(PL_cshname);
ae986130 7639#endif
a687059c 7640}
463ee0b2 7641
ba6d6ac9 7642I32
864dbfa3 7643Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 7644{
3280af22
NIS
7645 I32 oldsavestack_ix = PL_savestack_ix;
7646 CV* outsidecv = PL_compcv;
8990e307 7647
3280af22
NIS
7648 if (PL_compcv) {
7649 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 7650 }
7766f137 7651 SAVEI32(PL_subline);
3280af22 7652 save_item(PL_subname);
3280af22 7653 SAVESPTR(PL_compcv);
3280af22
NIS
7654
7655 PL_compcv = (CV*)NEWSV(1104,0);
7656 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
7657 CvFLAGS(PL_compcv) |= flags;
7658
57843af0 7659 PL_subline = CopLINE(PL_curcop);
dd2155a4 7660 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
3280af22 7661 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
a3985cdc 7662 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 7663
8990e307
LW
7664 return oldsavestack_ix;
7665}
7666
084592ab
CN
7667#ifdef __SC__
7668#pragma segment Perl_yylex
7669#endif
8990e307 7670int
864dbfa3 7671Perl_yywarn(pTHX_ char *s)
8990e307 7672{
faef0170 7673 PL_in_eval |= EVAL_WARNONLY;
748a9306 7674 yyerror(s);
faef0170 7675 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 7676 return 0;
8990e307
LW
7677}
7678
7679int
864dbfa3 7680Perl_yyerror(pTHX_ char *s)
463ee0b2 7681{
68dc0745 7682 char *where = NULL;
7683 char *context = NULL;
7684 int contlen = -1;
46fc3d4c 7685 SV *msg;
463ee0b2 7686
3280af22 7687 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 7688 where = "at EOF";
3280af22
NIS
7689 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 &&
7690 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) {
f355267c
JH
7691 /*
7692 Only for NetWare:
7693 The code below is removed for NetWare because it abends/crashes on NetWare
7694 when the script has error such as not having the closing quotes like:
7695 if ($var eq "value)
7696 Checking of white spaces is anyway done in NetWare code.
7697 */
7698#ifndef NETWARE
3280af22
NIS
7699 while (isSPACE(*PL_oldoldbufptr))
7700 PL_oldoldbufptr++;
f355267c 7701#endif
3280af22
NIS
7702 context = PL_oldoldbufptr;
7703 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 7704 }
3280af22
NIS
7705 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 &&
7706 PL_oldbufptr != PL_bufptr) {
f355267c
JH
7707 /*
7708 Only for NetWare:
7709 The code below is removed for NetWare because it abends/crashes on NetWare
7710 when the script has error such as not having the closing quotes like:
7711 if ($var eq "value)
7712 Checking of white spaces is anyway done in NetWare code.
7713 */
7714#ifndef NETWARE
3280af22
NIS
7715 while (isSPACE(*PL_oldbufptr))
7716 PL_oldbufptr++;
f355267c 7717#endif
3280af22
NIS
7718 context = PL_oldbufptr;
7719 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
7720 }
7721 else if (yychar > 255)
68dc0745 7722 where = "next token ???";
cdfb297e
GS
7723#ifdef USE_PURE_BISON
7724/* GNU Bison sets the value -2 */
7725 else if (yychar == -2) {
7726#else
463ee0b2 7727 else if ((yychar & 127) == 127) {
cdfb297e 7728#endif
3280af22
NIS
7729 if (PL_lex_state == LEX_NORMAL ||
7730 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 7731 where = "at end of line";
3280af22 7732 else if (PL_lex_inpat)
68dc0745 7733 where = "within pattern";
463ee0b2 7734 else
68dc0745 7735 where = "within string";
463ee0b2 7736 }
46fc3d4c 7737 else {
79cb57f6 7738 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
46fc3d4c 7739 if (yychar < 32)
cea2e8a9 7740 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
46fc3d4c 7741 else if (isPRINT_LC(yychar))
cea2e8a9 7742 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
463ee0b2 7743 else
cea2e8a9 7744 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
46fc3d4c 7745 where = SvPVX(where_sv);
463ee0b2 7746 }
46fc3d4c 7747 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 7748 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 7749 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 7750 if (context)
cea2e8a9 7751 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 7752 else
cea2e8a9 7753 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 7754 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 7755 Perl_sv_catpvf(aTHX_ msg,
57def98f 7756 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 7757 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 7758 PL_multi_end = 0;
a0d0e21e 7759 }
faef0170 7760 if (PL_in_eval & EVAL_WARNONLY)
894356b3 7761 Perl_warn(aTHX_ "%"SVf, msg);
463ee0b2 7762 else
5a844595 7763 qerror(msg);
c7d6bfb2
GS
7764 if (PL_error_count >= 10) {
7765 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 7766 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
248c2a4d 7767 ERRSV, OutCopFILE(PL_curcop));
c7d6bfb2
GS
7768 else
7769 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 7770 OutCopFILE(PL_curcop));
c7d6bfb2 7771 }
3280af22
NIS
7772 PL_in_my = 0;
7773 PL_in_my_stash = Nullhv;
463ee0b2
LW
7774 return 0;
7775}
084592ab
CN
7776#ifdef __SC__
7777#pragma segment Main
7778#endif
4e35701f 7779
b250498f 7780STATIC char*
3ae08724 7781S_swallow_bom(pTHX_ U8 *s)
01ec43d0 7782{
b250498f
GS
7783 STRLEN slen;
7784 slen = SvCUR(PL_linestr);
7785 switch (*s) {
4e553d73
NIS
7786 case 0xFF:
7787 if (s[1] == 0xFE) {
01ec43d0 7788 /* UTF-16 little-endian */
3ae08724 7789 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
01ec43d0
GS
7790 Perl_croak(aTHX_ "Unsupported script encoding");
7791#ifndef PERL_NO_UTF16_FILTER
dea0fc0b 7792 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
3ae08724 7793 s += 2;
dea0fc0b
JH
7794 if (PL_bufend > (char*)s) {
7795 U8 *news;
7796 I32 newlen;
7797
7798 filter_add(utf16rev_textfilter, NULL);
7799 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
f72f5f89
JH
7800 PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
7801 PL_bufend - (char*)s - 1,
dea0fc0b
JH
7802 &newlen);
7803 Copy(news, s, newlen, U8);
7804 SvCUR_set(PL_linestr, newlen);
7805 PL_bufend = SvPVX(PL_linestr) + newlen;
7806 news[newlen++] = '\0';
7807 Safefree(news);
7808 }
b250498f 7809#else
01ec43d0 7810 Perl_croak(aTHX_ "Unsupported script encoding");
b250498f 7811#endif
01ec43d0
GS
7812 }
7813 break;
78ae23f5 7814 case 0xFE:
3ae08724 7815 if (s[1] == 0xFF) { /* UTF-16 big-endian */
01ec43d0 7816#ifndef PERL_NO_UTF16_FILTER
dea0fc0b
JH
7817 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
7818 s += 2;
7819 if (PL_bufend > (char *)s) {
7820 U8 *news;
7821 I32 newlen;
7822
7823 filter_add(utf16_textfilter, NULL);
7824 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
7825 PL_bufend = (char*)utf16_to_utf8(s, news,
7826 PL_bufend - (char*)s,
7827 &newlen);
7828 Copy(news, s, newlen, U8);
7829 SvCUR_set(PL_linestr, newlen);
7830 PL_bufend = SvPVX(PL_linestr) + newlen;
7831 news[newlen++] = '\0';
7832 Safefree(news);
7833 }
b250498f 7834#else
01ec43d0 7835 Perl_croak(aTHX_ "Unsupported script encoding");
b250498f 7836#endif
01ec43d0
GS
7837 }
7838 break;
3ae08724
GS
7839 case 0xEF:
7840 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
dea0fc0b 7841 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
01ec43d0
GS
7842 s += 3; /* UTF-8 */
7843 }
7844 break;
7845 case 0:
7846 if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */
3ae08724 7847 s[2] == 0xFE && s[3] == 0xFF)
01ec43d0
GS
7848 {
7849 Perl_croak(aTHX_ "Unsupported script encoding");
7850 }
7851 }
b8f84bb2 7852 return (char*)s;
b250498f 7853}
4755096e 7854
4755096e
GS
7855/*
7856 * restore_rsfp
7857 * Restore a source filter.
7858 */
7859
7860static void
acfe0abc 7861restore_rsfp(pTHX_ void *f)
4755096e
GS
7862{
7863 PerlIO *fp = (PerlIO*)f;
7864
7865 if (PL_rsfp == PerlIO_stdin())
7866 PerlIO_clearerr(PL_rsfp);
7867 else if (PL_rsfp && (PL_rsfp != fp))
7868 PerlIO_close(PL_rsfp);
7869 PL_rsfp = fp;
7870}
6e3aabd6
GS
7871
7872#ifndef PERL_NO_UTF16_FILTER
7873static I32
acfe0abc 7874utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6
GS
7875{
7876 I32 count = FILTER_READ(idx+1, sv, maxlen);
7877 if (count) {
7878 U8* tmps;
7879 U8* tend;
dea0fc0b 7880 I32 newlen;
6e3aabd6 7881 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
f72f5f89
JH
7882 if (!*SvPV_nolen(sv))
7883 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7884 return count;
4e553d73 7885
dea0fc0b 7886 tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
6e3aabd6
GS
7887 sv_usepvn(sv, (char*)tmps, tend - tmps);
7888 }
7889 return count;
7890}
7891
7892static I32
acfe0abc 7893utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6
GS
7894{
7895 I32 count = FILTER_READ(idx+1, sv, maxlen);
7896 if (count) {
7897 U8* tmps;
7898 U8* tend;
dea0fc0b 7899 I32 newlen;
f72f5f89
JH
7900 if (!*SvPV_nolen(sv))
7901 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7902 return count;
7903
6e3aabd6 7904 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
dea0fc0b 7905 tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
6e3aabd6
GS
7906 sv_usepvn(sv, (char*)tmps, tend - tmps);
7907 }
7908 return count;
7909}
7910#endif
9f4817db 7911