This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Document the hint constants and where they're used.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
be3c0a43 3 * Copyright (c) 1991-2002, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
4ac733c9 29static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 30static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
9059aa12
LW
38#define XFAKEBRACK 128
39#define XENUMMASK 127
40
39e02b42
JH
41#ifdef USE_UTF8_SCRIPTS
42# define UTF (!IN_BYTES)
2b9d42f0 43#else
746b446a 44# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 45#endif
a0ed51b3 46
61f0cdd9 47/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
48 * 1999-02-27 mjd-perl-patch@plover.com */
49#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50
bf4acbe4
GS
51/* On MacOS, respect nonbreaking spaces */
52#ifdef MACOS_TRADITIONAL
53#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54#else
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
56#endif
57
ffb4593c
NT
58/* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
79072805
LW
60 * can get by with a single comparison (if the compiler is smart enough).
61 */
62
fb73857a 63/* #define LEX_NOTPARSING 11 is done in perl.h. */
64
55497cff 65#define LEX_NORMAL 10
66#define LEX_INTERPNORMAL 9
67#define LEX_INTERPCASEMOD 8
68#define LEX_INTERPPUSH 7
69#define LEX_INTERPSTART 6
70#define LEX_INTERPEND 5
71#define LEX_INTERPENDMAYBE 4
72#define LEX_INTERPCONCAT 3
73#define LEX_INTERPCONST 2
74#define LEX_FORMLINE 1
75#define LEX_KNOWNEXT 0
79072805 76
79072805
LW
77#ifdef ff_next
78#undef ff_next
d48672a2
LW
79#endif
80
a1a0e61e 81#ifdef USE_PURE_BISON
dba4d153
JH
82# ifndef YYMAXLEVEL
83# define YYMAXLEVEL 100
84# endif
20141f0e
IRC
85YYSTYPE* yylval_pointer[YYMAXLEVEL];
86int* yychar_pointer[YYMAXLEVEL];
6f202aea 87int yyactlevel = -1;
22c35a8c
GS
88# undef yylval
89# undef yychar
20141f0e
IRC
90# define yylval (*yylval_pointer[yyactlevel])
91# define yychar (*yychar_pointer[yyactlevel])
92# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 93# undef yylex
dba4d153 94# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
95#endif
96
79072805 97#include "keywords.h"
fe14fcc3 98
ffb4593c
NT
99/* CLINE is a macro that ensures PL_copline has a sane value */
100
ae986130
LW
101#ifdef CLINE
102#undef CLINE
103#endif
57843af0 104#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 105
ffb4593c
NT
106/*
107 * Convenience functions to return different tokens and prime the
9cbb5ea2 108 * lexer for the next token. They all take an argument.
ffb4593c
NT
109 *
110 * TOKEN : generic token (used for '(', DOLSHARP, etc)
111 * OPERATOR : generic operator
112 * AOPERATOR : assignment operator
113 * PREBLOCK : beginning the block after an if, while, foreach, ...
114 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
115 * PREREF : *EXPR where EXPR is not a simple identifier
116 * TERM : expression term
117 * LOOPX : loop exiting command (goto, last, dump, etc)
118 * FTST : file test operator
119 * FUN0 : zero-argument function
2d2e263d 120 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
121 * BOop : bitwise or or xor
122 * BAop : bitwise and
123 * SHop : shift operator
124 * PWop : power operator
9cbb5ea2 125 * PMop : pattern-matching operator
ffb4593c
NT
126 * Aop : addition-level operator
127 * Mop : multiplication-level operator
128 * Eop : equality-testing operator
e5edeb50 129 * Rop : relational operator <= != gt
ffb4593c
NT
130 *
131 * Also see LOP and lop() below.
132 */
133
075953c3
JH
134/* Note that REPORT() and REPORT2() will be expressions that supply
135 * their own trailing comma, not suitable for statements as such. */
998054bd 136#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
137# define REPORT(x,retval) tokereport(x,s,(int)retval),
138# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 139#else
075953c3
JH
140# define REPORT(x,retval)
141# define REPORT2(x,retval)
998054bd
SC
142#endif
143
075953c3
JH
144#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
145#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
146#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
147#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
148#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
149#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
150#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
151#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
6f33ba73 152#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERMORDORDOR,PL_bufptr = s,(int)UNIOP)
075953c3
JH
153#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
154#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
155#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
156#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
157#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
158#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
159#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
160#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
161#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
162#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
163#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
a687059c 169 */
6f33ba73 170#define UNI2(f,x) return(yylval.ival = f, \
075953c3 171 REPORT("uni",f) \
6f33ba73 172 PL_expect = x, \
3280af22
NIS
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
175 PL_last_lop_op = f, \
a687059c 176 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
6f33ba73
RGS
177#define UNI(f) UNI2(f,XTERM)
178#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 179
79072805 180#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 181 REPORT("uni",f) \
3280af22
NIS
182 PL_bufptr = s, \
183 PL_last_uni = PL_oldbufptr, \
79072805
LW
184 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
185
9f68db38 186/* grandfather return to old style */
3280af22 187#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 188
8fa7f367
JH
189#ifdef DEBUGGING
190
2d00ba3b 191STATIC void
61b2116b 192S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 193{
998054bd 194 DEBUG_T({
9c5ffd7c 195 SV* report = newSVpv(thing, 0);
29b291f7
RB
196 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
197 (IV)rv);
998054bd
SC
198
199 if (s - PL_bufptr > 0)
200 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
201 else {
202 if (PL_oldbufptr && *PL_oldbufptr)
203 sv_catpv(report, PL_tokenbuf);
204 }
205 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
5f80b19c 206 });
998054bd
SC
207}
208
8fa7f367
JH
209#endif
210
ffb4593c
NT
211/*
212 * S_ao
213 *
c963b151
BD
214 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
215 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
216 */
217
76e3520e 218STATIC int
cea2e8a9 219S_ao(pTHX_ int toketype)
a0d0e21e 220{
3280af22
NIS
221 if (*PL_bufptr == '=') {
222 PL_bufptr++;
a0d0e21e
LW
223 if (toketype == ANDAND)
224 yylval.ival = OP_ANDASSIGN;
225 else if (toketype == OROR)
226 yylval.ival = OP_ORASSIGN;
c963b151
BD
227 else if (toketype == DORDOR)
228 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
229 toketype = ASSIGNOP;
230 }
231 return toketype;
232}
233
ffb4593c
NT
234/*
235 * S_no_op
236 * When Perl expects an operator and finds something else, no_op
237 * prints the warning. It always prints "<something> found where
238 * operator expected. It prints "Missing semicolon on previous line?"
239 * if the surprise occurs at the start of the line. "do you need to
240 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
241 * where the compiler doesn't know if foo is a method call or a function.
242 * It prints "Missing operator before end of line" if there's nothing
243 * after the missing operator, or "... before <...>" if there is something
244 * after the missing operator.
245 */
246
76e3520e 247STATIC void
cea2e8a9 248S_no_op(pTHX_ char *what, char *s)
463ee0b2 249{
3280af22
NIS
250 char *oldbp = PL_bufptr;
251 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 252
1189a94a
GS
253 if (!s)
254 s = oldbp;
07c798fb 255 else
1189a94a 256 PL_bufptr = s;
cea2e8a9 257 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 258 if (is_first)
cea2e8a9 259 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 260 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 261 char *t;
7e2040f0 262 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 263 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 264 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 265 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 266 }
07c798fb
HS
267 else {
268 assert(s >= oldbp);
cea2e8a9 269 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 270 }
3280af22 271 PL_bufptr = oldbp;
8990e307
LW
272}
273
ffb4593c
NT
274/*
275 * S_missingterm
276 * Complain about missing quote/regexp/heredoc terminator.
277 * If it's called with (char *)NULL then it cauterizes the line buffer.
278 * If we're in a delimited string and the delimiter is a control
279 * character, it's reformatted into a two-char sequence like ^C.
280 * This is fatal.
281 */
282
76e3520e 283STATIC void
cea2e8a9 284S_missingterm(pTHX_ char *s)
8990e307
LW
285{
286 char tmpbuf[3];
287 char q;
288 if (s) {
289 char *nl = strrchr(s,'\n');
d2719217 290 if (nl)
8990e307
LW
291 *nl = '\0';
292 }
9d116dd7
JH
293 else if (
294#ifdef EBCDIC
295 iscntrl(PL_multi_close)
296#else
297 PL_multi_close < 32 || PL_multi_close == 127
298#endif
299 ) {
8990e307 300 *tmpbuf = '^';
3280af22 301 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
302 s = "\\n";
303 tmpbuf[2] = '\0';
304 s = tmpbuf;
305 }
306 else {
eb160463 307 *tmpbuf = (char)PL_multi_close;
8990e307
LW
308 tmpbuf[1] = '\0';
309 s = tmpbuf;
310 }
311 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 312 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 313}
79072805 314
ffb4593c
NT
315/*
316 * Perl_deprecate
ffb4593c
NT
317 */
318
79072805 319void
864dbfa3 320Perl_deprecate(pTHX_ char *s)
a0d0e21e 321{
599cee73 322 if (ckWARN(WARN_DEPRECATED))
9014280d 323 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
324}
325
12bcd1a6
PM
326void
327Perl_deprecate_old(pTHX_ char *s)
328{
329 /* This function should NOT be called for any new deprecated warnings */
330 /* Use Perl_deprecate instead */
331 /* */
332 /* It is here to maintain backward compatibility with the pre-5.8 */
333 /* warnings category hierarchy. The "deprecated" category used to */
334 /* live under the "syntax" category. It is now a top-level category */
335 /* in its own right. */
336
337 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
338 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
339 "Use of %s is deprecated", s);
340}
341
ffb4593c
NT
342/*
343 * depcom
9cbb5ea2 344 * Deprecate a comma-less variable list.
ffb4593c
NT
345 */
346
76e3520e 347STATIC void
cea2e8a9 348S_depcom(pTHX)
a0d0e21e 349{
12bcd1a6 350 deprecate_old("comma-less variable list");
a0d0e21e
LW
351}
352
ffb4593c 353/*
9cbb5ea2
GS
354 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
355 * utf16-to-utf8-reversed.
ffb4593c
NT
356 */
357
c39cd008
GS
358#ifdef PERL_CR_FILTER
359static void
360strip_return(SV *sv)
361{
362 register char *s = SvPVX(sv);
363 register char *e = s + SvCUR(sv);
364 /* outer loop optimized to do nothing if there are no CR-LFs */
365 while (s < e) {
366 if (*s++ == '\r' && *s == '\n') {
367 /* hit a CR-LF, need to copy the rest */
368 register char *d = s - 1;
369 *d++ = *s++;
370 while (s < e) {
371 if (*s == '\r' && s[1] == '\n')
372 s++;
373 *d++ = *s++;
374 }
375 SvCUR(sv) -= s - d;
376 return;
377 }
378 }
379}
a868473f 380
76e3520e 381STATIC I32
c39cd008 382S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 383{
c39cd008
GS
384 I32 count = FILTER_READ(idx+1, sv, maxlen);
385 if (count > 0 && !maxlen)
386 strip_return(sv);
387 return count;
a868473f
NIS
388}
389#endif
390
ffb4593c
NT
391/*
392 * Perl_lex_start
9cbb5ea2
GS
393 * Initialize variables. Uses the Perl save_stack to save its state (for
394 * recursive calls to the parser).
ffb4593c
NT
395 */
396
a0d0e21e 397void
864dbfa3 398Perl_lex_start(pTHX_ SV *line)
79072805 399{
8990e307
LW
400 char *s;
401 STRLEN len;
402
3280af22
NIS
403 SAVEI32(PL_lex_dojoin);
404 SAVEI32(PL_lex_brackets);
3280af22
NIS
405 SAVEI32(PL_lex_casemods);
406 SAVEI32(PL_lex_starts);
407 SAVEI32(PL_lex_state);
7766f137 408 SAVEVPTR(PL_lex_inpat);
3280af22 409 SAVEI32(PL_lex_inwhat);
18b09519
GS
410 if (PL_lex_state == LEX_KNOWNEXT) {
411 I32 toke = PL_nexttoke;
412 while (--toke >= 0) {
413 SAVEI32(PL_nexttype[toke]);
414 SAVEVPTR(PL_nextval[toke]);
415 }
416 SAVEI32(PL_nexttoke);
18b09519 417 }
57843af0 418 SAVECOPLINE(PL_curcop);
3280af22
NIS
419 SAVEPPTR(PL_bufptr);
420 SAVEPPTR(PL_bufend);
421 SAVEPPTR(PL_oldbufptr);
422 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
423 SAVEPPTR(PL_last_lop);
424 SAVEPPTR(PL_last_uni);
3280af22
NIS
425 SAVEPPTR(PL_linestart);
426 SAVESPTR(PL_linestr);
427 SAVEPPTR(PL_lex_brackstack);
428 SAVEPPTR(PL_lex_casestack);
c76ac1ee 429 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
430 SAVESPTR(PL_lex_stuff);
431 SAVEI32(PL_lex_defer);
09bef843 432 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 433 SAVESPTR(PL_lex_repl);
bebdddfc
GS
434 SAVEINT(PL_expect);
435 SAVEINT(PL_lex_expect);
3280af22
NIS
436
437 PL_lex_state = LEX_NORMAL;
438 PL_lex_defer = 0;
439 PL_expect = XSTATE;
440 PL_lex_brackets = 0;
3280af22
NIS
441 New(899, PL_lex_brackstack, 120, char);
442 New(899, PL_lex_casestack, 12, char);
443 SAVEFREEPV(PL_lex_brackstack);
444 SAVEFREEPV(PL_lex_casestack);
445 PL_lex_casemods = 0;
446 *PL_lex_casestack = '\0';
447 PL_lex_dojoin = 0;
448 PL_lex_starts = 0;
449 PL_lex_stuff = Nullsv;
450 PL_lex_repl = Nullsv;
451 PL_lex_inpat = 0;
76be56bc 452 PL_nexttoke = 0;
3280af22 453 PL_lex_inwhat = 0;
09bef843 454 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
455 PL_linestr = line;
456 if (SvREADONLY(PL_linestr))
457 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
458 s = SvPV(PL_linestr, len);
6f27f9a7 459 if (!len || s[len-1] != ';') {
3280af22
NIS
460 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
461 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
462 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 463 }
3280af22
NIS
464 SvTEMP_off(PL_linestr);
465 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
466 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 467 PL_last_lop = PL_last_uni = Nullch;
3280af22 468 PL_rsfp = 0;
79072805 469}
a687059c 470
ffb4593c
NT
471/*
472 * Perl_lex_end
9cbb5ea2
GS
473 * Finalizer for lexing operations. Must be called when the parser is
474 * done with the lexer.
ffb4593c
NT
475 */
476
463ee0b2 477void
864dbfa3 478Perl_lex_end(pTHX)
463ee0b2 479{
3280af22 480 PL_doextract = FALSE;
463ee0b2
LW
481}
482
ffb4593c
NT
483/*
484 * S_incline
485 * This subroutine has nothing to do with tilting, whether at windmills
486 * or pinball tables. Its name is short for "increment line". It
57843af0 487 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 488 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
489 * # line 500 "foo.pm"
490 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
491 */
492
76e3520e 493STATIC void
cea2e8a9 494S_incline(pTHX_ char *s)
463ee0b2
LW
495{
496 char *t;
497 char *n;
73659bf1 498 char *e;
463ee0b2 499 char ch;
463ee0b2 500
57843af0 501 CopLINE_inc(PL_curcop);
463ee0b2
LW
502 if (*s++ != '#')
503 return;
bf4acbe4 504 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
505 if (strnEQ(s, "line", 4))
506 s += 4;
507 else
508 return;
084592ab 509 if (SPACE_OR_TAB(*s))
73659bf1 510 s++;
4e553d73 511 else
73659bf1 512 return;
bf4acbe4 513 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
514 if (!isDIGIT(*s))
515 return;
516 n = s;
517 while (isDIGIT(*s))
518 s++;
bf4acbe4 519 while (SPACE_OR_TAB(*s))
463ee0b2 520 s++;
73659bf1 521 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 522 s++;
73659bf1
GS
523 e = t + 1;
524 }
463ee0b2 525 else {
463ee0b2 526 for (t = s; !isSPACE(*t); t++) ;
73659bf1 527 e = t;
463ee0b2 528 }
bf4acbe4 529 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
530 e++;
531 if (*e != '\n' && *e != '\0')
532 return; /* false alarm */
533
463ee0b2
LW
534 ch = *t;
535 *t = '\0';
f4dd75d9 536 if (t - s > 0) {
05ec9bb3 537 CopFILE_free(PL_curcop);
57843af0 538 CopFILE_set(PL_curcop, s);
f4dd75d9 539 }
463ee0b2 540 *t = ch;
57843af0 541 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
542}
543
ffb4593c
NT
544/*
545 * S_skipspace
546 * Called to gobble the appropriate amount and type of whitespace.
547 * Skips comments as well.
548 */
549
76e3520e 550STATIC char *
cea2e8a9 551S_skipspace(pTHX_ register char *s)
a687059c 552{
3280af22 553 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 554 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
555 s++;
556 return s;
557 }
558 for (;;) {
fd049845 559 STRLEN prevlen;
09bef843 560 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 561 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
562 while (s < PL_bufend && isSPACE(*s)) {
563 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
564 incline(s);
565 }
ffb4593c
NT
566
567 /* comment */
3280af22
NIS
568 if (s < PL_bufend && *s == '#') {
569 while (s < PL_bufend && *s != '\n')
463ee0b2 570 s++;
60e6418e 571 if (s < PL_bufend) {
463ee0b2 572 s++;
60e6418e
GS
573 if (PL_in_eval && !PL_rsfp) {
574 incline(s);
575 continue;
576 }
577 }
463ee0b2 578 }
ffb4593c
NT
579
580 /* only continue to recharge the buffer if we're at the end
581 * of the buffer, we're not reading from a source filter, and
582 * we're in normal lexing mode
583 */
09bef843
SB
584 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
585 PL_lex_state == LEX_FORMLINE)
463ee0b2 586 return s;
ffb4593c
NT
587
588 /* try to recharge the buffer */
9cbb5ea2
GS
589 if ((s = filter_gets(PL_linestr, PL_rsfp,
590 (prevlen = SvCUR(PL_linestr)))) == Nullch)
591 {
592 /* end of file. Add on the -p or -n magic */
3280af22
NIS
593 if (PL_minus_n || PL_minus_p) {
594 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
595 ";}continue{print or die qq(-p destination: $!\\n)" :
596 "");
3280af22
NIS
597 sv_catpv(PL_linestr,";}");
598 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
599 }
600 else
3280af22 601 sv_setpv(PL_linestr,";");
ffb4593c
NT
602
603 /* reset variables for next time we lex */
9cbb5ea2
GS
604 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
605 = SvPVX(PL_linestr);
3280af22 606 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 607 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
608
609 /* Close the filehandle. Could be from -P preprocessor,
610 * STDIN, or a regular file. If we were reading code from
611 * STDIN (because the commandline held no -e or filename)
612 * then we don't close it, we reset it so the code can
613 * read from STDIN too.
614 */
615
3280af22
NIS
616 if (PL_preprocess && !PL_in_eval)
617 (void)PerlProc_pclose(PL_rsfp);
618 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
619 PerlIO_clearerr(PL_rsfp);
8990e307 620 else
3280af22
NIS
621 (void)PerlIO_close(PL_rsfp);
622 PL_rsfp = Nullfp;
463ee0b2
LW
623 return s;
624 }
ffb4593c
NT
625
626 /* not at end of file, so we only read another line */
09bef843
SB
627 /* make corresponding updates to old pointers, for yyerror() */
628 oldprevlen = PL_oldbufptr - PL_bufend;
629 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
630 if (PL_last_uni)
631 oldunilen = PL_last_uni - PL_bufend;
632 if (PL_last_lop)
633 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
634 PL_linestart = PL_bufptr = s + prevlen;
635 PL_bufend = s + SvCUR(PL_linestr);
636 s = PL_bufptr;
09bef843
SB
637 PL_oldbufptr = s + oldprevlen;
638 PL_oldoldbufptr = s + oldoldprevlen;
639 if (PL_last_uni)
640 PL_last_uni = s + oldunilen;
641 if (PL_last_lop)
642 PL_last_lop = s + oldloplen;
a0d0e21e 643 incline(s);
ffb4593c
NT
644
645 /* debugger active and we're not compiling the debugger code,
646 * so store the line into the debugger's array of lines
647 */
3280af22 648 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
649 SV *sv = NEWSV(85,0);
650
651 sv_upgrade(sv, SVt_PVMG);
3280af22 652 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
653 (void)SvIOK_on(sv);
654 SvIVX(sv) = 0;
57843af0 655 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 656 }
463ee0b2 657 }
a687059c 658}
378cc40b 659
ffb4593c
NT
660/*
661 * S_check_uni
662 * Check the unary operators to ensure there's no ambiguity in how they're
663 * used. An ambiguous piece of code would be:
664 * rand + 5
665 * This doesn't mean rand() + 5. Because rand() is a unary operator,
666 * the +5 is its argument.
667 */
668
76e3520e 669STATIC void
cea2e8a9 670S_check_uni(pTHX)
ba106d47 671{
2f3197b3 672 char *s;
a0d0e21e 673 char *t;
2f3197b3 674
3280af22 675 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 676 return;
3280af22
NIS
677 while (isSPACE(*PL_last_uni))
678 PL_last_uni++;
7e2040f0 679 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 680 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 681 return;
0453d815 682 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 683 char ch = *s;
0453d815 684 *s = '\0';
9014280d 685 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
4e553d73 686 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
687 PL_last_uni);
688 *s = ch;
689 }
2f3197b3
LW
690}
691
ffb4593c
NT
692/*
693 * LOP : macro to build a list operator. Its behaviour has been replaced
694 * with a subroutine, S_lop() for which LOP is just another name.
695 */
696
a0d0e21e
LW
697#define LOP(f,x) return lop(f,x,s)
698
ffb4593c
NT
699/*
700 * S_lop
701 * Build a list operator (or something that might be one). The rules:
702 * - if we have a next token, then it's a list operator [why?]
703 * - if the next thing is an opening paren, then it's a function
704 * - else it's a list operator
705 */
706
76e3520e 707STATIC I32
a0be28da 708S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 709{
79072805 710 yylval.ival = f;
35c8bce7 711 CLINE;
075953c3 712 REPORT("lop", f)
3280af22
NIS
713 PL_expect = x;
714 PL_bufptr = s;
715 PL_last_lop = PL_oldbufptr;
eb160463 716 PL_last_lop_op = (OPCODE)f;
3280af22 717 if (PL_nexttoke)
a0d0e21e 718 return LSTOP;
79072805
LW
719 if (*s == '(')
720 return FUNC;
721 s = skipspace(s);
722 if (*s == '(')
723 return FUNC;
724 else
725 return LSTOP;
726}
727
ffb4593c
NT
728/*
729 * S_force_next
9cbb5ea2 730 * When the lexer realizes it knows the next token (for instance,
ffb4593c 731 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
732 * to know what token to return the next time the lexer is called. Caller
733 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
734 * handles the token correctly.
ffb4593c
NT
735 */
736
4e553d73 737STATIC void
cea2e8a9 738S_force_next(pTHX_ I32 type)
79072805 739{
3280af22
NIS
740 PL_nexttype[PL_nexttoke] = type;
741 PL_nexttoke++;
742 if (PL_lex_state != LEX_KNOWNEXT) {
743 PL_lex_defer = PL_lex_state;
744 PL_lex_expect = PL_expect;
745 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
746 }
747}
748
ffb4593c
NT
749/*
750 * S_force_word
751 * When the lexer knows the next thing is a word (for instance, it has
752 * just seen -> and it knows that the next char is a word char, then
753 * it calls S_force_word to stick the next word into the PL_next lookahead.
754 *
755 * Arguments:
b1b65b59 756 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
757 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
758 * int check_keyword : if true, Perl checks to make sure the word isn't
759 * a keyword (do this if the word is a label, e.g. goto FOO)
760 * int allow_pack : if true, : characters will also be allowed (require,
761 * use, etc. do this)
9cbb5ea2 762 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
763 */
764
76e3520e 765STATIC char *
cea2e8a9 766S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 767{
463ee0b2
LW
768 register char *s;
769 STRLEN len;
4e553d73 770
463ee0b2
LW
771 start = skipspace(start);
772 s = start;
7e2040f0 773 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 774 (allow_pack && *s == ':') ||
15f0808c 775 (allow_initial_tick && *s == '\'') )
a0d0e21e 776 {
3280af22
NIS
777 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
778 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
779 return start;
780 if (token == METHOD) {
781 s = skipspace(s);
782 if (*s == '(')
3280af22 783 PL_expect = XTERM;
463ee0b2 784 else {
3280af22 785 PL_expect = XOPERATOR;
463ee0b2 786 }
79072805 787 }
3280af22
NIS
788 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
789 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
790 force_next(token);
791 }
792 return s;
793}
794
ffb4593c
NT
795/*
796 * S_force_ident
9cbb5ea2 797 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
798 * text only contains the "foo" portion. The first argument is a pointer
799 * to the "foo", and the second argument is the type symbol to prefix.
800 * Forces the next token to be a "WORD".
9cbb5ea2 801 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
802 */
803
76e3520e 804STATIC void
cea2e8a9 805S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
806{
807 if (s && *s) {
11343788 808 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 809 PL_nextval[PL_nexttoke].opval = o;
79072805 810 force_next(WORD);
748a9306 811 if (kind) {
11343788 812 o->op_private = OPpCONST_ENTERED;
55497cff 813 /* XXX see note in pp_entereval() for why we forgo typo
814 warnings if the symbol must be introduced in an eval.
815 GSAR 96-10-12 */
3280af22 816 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
817 kind == '$' ? SVt_PV :
818 kind == '@' ? SVt_PVAV :
819 kind == '%' ? SVt_PVHV :
820 SVt_PVGV
821 );
748a9306 822 }
79072805
LW
823 }
824}
825
1571675a
GS
826NV
827Perl_str_to_version(pTHX_ SV *sv)
828{
829 NV retval = 0.0;
830 NV nshift = 1.0;
831 STRLEN len;
832 char *start = SvPVx(sv,len);
3aa33fe5 833 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
834 char *end = start + len;
835 while (start < end) {
ba210ebe 836 STRLEN skip;
1571675a
GS
837 UV n;
838 if (utf)
9041c2e3 839 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
840 else {
841 n = *(U8*)start;
842 skip = 1;
843 }
844 retval += ((NV)n)/nshift;
845 start += skip;
846 nshift *= 1000;
847 }
848 return retval;
849}
850
4e553d73 851/*
ffb4593c
NT
852 * S_force_version
853 * Forces the next token to be a version number.
e759cc13
RGS
854 * If the next token appears to be an invalid version number, (e.g. "v2b"),
855 * and if "guessing" is TRUE, then no new token is created (and the caller
856 * must use an alternative parsing method).
ffb4593c
NT
857 */
858
76e3520e 859STATIC char *
e759cc13 860S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 861{
862 OP *version = Nullop;
44dcb63b 863 char *d;
89bfa8cd 864
865 s = skipspace(s);
866
44dcb63b 867 d = s;
dd629d5b 868 if (*d == 'v')
44dcb63b 869 d++;
44dcb63b 870 if (isDIGIT(*d)) {
e759cc13
RGS
871 while (isDIGIT(*d) || *d == '_' || *d == '.')
872 d++;
9f3d182e 873 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 874 SV *ver;
b73d6f50 875 s = scan_num(s, &yylval);
89bfa8cd 876 version = yylval.opval;
dd629d5b
GS
877 ver = cSVOPx(version)->op_sv;
878 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 879 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
880 SvNVX(ver) = str_to_version(ver);
881 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 882 }
89bfa8cd 883 }
e759cc13
RGS
884 else if (guessing)
885 return s;
89bfa8cd 886 }
887
888 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 889 PL_nextval[PL_nexttoke].opval = version;
4e553d73 890 force_next(WORD);
89bfa8cd 891
e759cc13 892 return s;
89bfa8cd 893}
894
ffb4593c
NT
895/*
896 * S_tokeq
897 * Tokenize a quoted string passed in as an SV. It finds the next
898 * chunk, up to end of string or a backslash. It may make a new
899 * SV containing that chunk (if HINT_NEW_STRING is on). It also
900 * turns \\ into \.
901 */
902
76e3520e 903STATIC SV *
cea2e8a9 904S_tokeq(pTHX_ SV *sv)
79072805
LW
905{
906 register char *s;
907 register char *send;
908 register char *d;
b3ac6de7
IZ
909 STRLEN len = 0;
910 SV *pv = sv;
79072805
LW
911
912 if (!SvLEN(sv))
b3ac6de7 913 goto finish;
79072805 914
a0d0e21e 915 s = SvPV_force(sv, len);
21a311ee 916 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 917 goto finish;
463ee0b2 918 send = s + len;
79072805
LW
919 while (s < send && *s != '\\')
920 s++;
921 if (s == send)
b3ac6de7 922 goto finish;
79072805 923 d = s;
be4731d2 924 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 925 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
926 if (SvUTF8(sv))
927 SvUTF8_on(pv);
928 }
79072805
LW
929 while (s < send) {
930 if (*s == '\\') {
a0d0e21e 931 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
932 s++; /* all that, just for this */
933 }
934 *d++ = *s++;
935 }
936 *d = '\0';
463ee0b2 937 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 938 finish:
3280af22 939 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 940 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
941 return sv;
942}
943
ffb4593c
NT
944/*
945 * Now come three functions related to double-quote context,
946 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
947 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
948 * interact with PL_lex_state, and create fake ( ... ) argument lists
949 * to handle functions and concatenation.
950 * They assume that whoever calls them will be setting up a fake
951 * join call, because each subthing puts a ',' after it. This lets
952 * "lower \luPpEr"
953 * become
954 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
955 *
956 * (I'm not sure whether the spurious commas at the end of lcfirst's
957 * arguments and join's arguments are created or not).
958 */
959
960/*
961 * S_sublex_start
962 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
963 *
964 * Pattern matching will set PL_lex_op to the pattern-matching op to
965 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
966 *
967 * OP_CONST and OP_READLINE are easy--just make the new op and return.
968 *
969 * Everything else becomes a FUNC.
970 *
971 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
972 * had an OP_CONST or OP_READLINE). This just sets us up for a
973 * call to S_sublex_push().
974 */
975
76e3520e 976STATIC I32
cea2e8a9 977S_sublex_start(pTHX)
79072805
LW
978{
979 register I32 op_type = yylval.ival;
79072805
LW
980
981 if (op_type == OP_NULL) {
3280af22
NIS
982 yylval.opval = PL_lex_op;
983 PL_lex_op = Nullop;
79072805
LW
984 return THING;
985 }
986 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 987 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
988
989 if (SvTYPE(sv) == SVt_PVIV) {
990 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
991 STRLEN len;
992 char *p;
993 SV *nsv;
994
995 p = SvPV(sv, len);
79cb57f6 996 nsv = newSVpvn(p, len);
01ec43d0
GS
997 if (SvUTF8(sv))
998 SvUTF8_on(nsv);
b3ac6de7
IZ
999 SvREFCNT_dec(sv);
1000 sv = nsv;
4e553d73 1001 }
b3ac6de7 1002 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1003 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1004 /* Allow <FH> // "foo" */
1005 if (op_type == OP_READLINE)
1006 PL_expect = XTERMORDORDOR;
79072805
LW
1007 return THING;
1008 }
1009
3280af22
NIS
1010 PL_sublex_info.super_state = PL_lex_state;
1011 PL_sublex_info.sub_inwhat = op_type;
1012 PL_sublex_info.sub_op = PL_lex_op;
1013 PL_lex_state = LEX_INTERPPUSH;
55497cff 1014
3280af22
NIS
1015 PL_expect = XTERM;
1016 if (PL_lex_op) {
1017 yylval.opval = PL_lex_op;
1018 PL_lex_op = Nullop;
55497cff 1019 return PMFUNC;
1020 }
1021 else
1022 return FUNC;
1023}
1024
ffb4593c
NT
1025/*
1026 * S_sublex_push
1027 * Create a new scope to save the lexing state. The scope will be
1028 * ended in S_sublex_done. Returns a '(', starting the function arguments
1029 * to the uc, lc, etc. found before.
1030 * Sets PL_lex_state to LEX_INTERPCONCAT.
1031 */
1032
76e3520e 1033STATIC I32
cea2e8a9 1034S_sublex_push(pTHX)
55497cff 1035{
f46d017c 1036 ENTER;
55497cff 1037
3280af22
NIS
1038 PL_lex_state = PL_sublex_info.super_state;
1039 SAVEI32(PL_lex_dojoin);
1040 SAVEI32(PL_lex_brackets);
3280af22
NIS
1041 SAVEI32(PL_lex_casemods);
1042 SAVEI32(PL_lex_starts);
1043 SAVEI32(PL_lex_state);
7766f137 1044 SAVEVPTR(PL_lex_inpat);
3280af22 1045 SAVEI32(PL_lex_inwhat);
57843af0 1046 SAVECOPLINE(PL_curcop);
3280af22 1047 SAVEPPTR(PL_bufptr);
8452ff4b 1048 SAVEPPTR(PL_bufend);
3280af22
NIS
1049 SAVEPPTR(PL_oldbufptr);
1050 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1051 SAVEPPTR(PL_last_lop);
1052 SAVEPPTR(PL_last_uni);
3280af22
NIS
1053 SAVEPPTR(PL_linestart);
1054 SAVESPTR(PL_linestr);
1055 SAVEPPTR(PL_lex_brackstack);
1056 SAVEPPTR(PL_lex_casestack);
1057
1058 PL_linestr = PL_lex_stuff;
1059 PL_lex_stuff = Nullsv;
1060
9cbb5ea2
GS
1061 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1062 = SvPVX(PL_linestr);
3280af22 1063 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1064 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1065 SAVEFREESV(PL_linestr);
1066
1067 PL_lex_dojoin = FALSE;
1068 PL_lex_brackets = 0;
3280af22
NIS
1069 New(899, PL_lex_brackstack, 120, char);
1070 New(899, PL_lex_casestack, 12, char);
1071 SAVEFREEPV(PL_lex_brackstack);
1072 SAVEFREEPV(PL_lex_casestack);
1073 PL_lex_casemods = 0;
1074 *PL_lex_casestack = '\0';
1075 PL_lex_starts = 0;
1076 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1077 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1078
1079 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1080 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1081 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1082 else
3280af22 1083 PL_lex_inpat = Nullop;
79072805 1084
55497cff 1085 return '(';
79072805
LW
1086}
1087
ffb4593c
NT
1088/*
1089 * S_sublex_done
1090 * Restores lexer state after a S_sublex_push.
1091 */
1092
76e3520e 1093STATIC I32
cea2e8a9 1094S_sublex_done(pTHX)
79072805 1095{
3280af22 1096 if (!PL_lex_starts++) {
9aa983d2
JH
1097 SV *sv = newSVpvn("",0);
1098 if (SvUTF8(PL_linestr))
1099 SvUTF8_on(sv);
3280af22 1100 PL_expect = XOPERATOR;
9aa983d2 1101 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1102 return THING;
1103 }
1104
3280af22
NIS
1105 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1106 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1107 return yylex();
79072805
LW
1108 }
1109
ffb4593c 1110 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1111 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1112 PL_linestr = PL_lex_repl;
1113 PL_lex_inpat = 0;
1114 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1115 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1116 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1117 SAVEFREESV(PL_linestr);
1118 PL_lex_dojoin = FALSE;
1119 PL_lex_brackets = 0;
3280af22
NIS
1120 PL_lex_casemods = 0;
1121 *PL_lex_casestack = '\0';
1122 PL_lex_starts = 0;
25da4f38 1123 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1124 PL_lex_state = LEX_INTERPNORMAL;
1125 PL_lex_starts++;
e9fa98b2
HS
1126 /* we don't clear PL_lex_repl here, so that we can check later
1127 whether this is an evalled subst; that means we rely on the
1128 logic to ensure sublex_done() is called again only via the
1129 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1130 }
e9fa98b2 1131 else {
3280af22 1132 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1133 PL_lex_repl = Nullsv;
1134 }
79072805 1135 return ',';
ffed7fef
LW
1136 }
1137 else {
f46d017c 1138 LEAVE;
3280af22
NIS
1139 PL_bufend = SvPVX(PL_linestr);
1140 PL_bufend += SvCUR(PL_linestr);
1141 PL_expect = XOPERATOR;
09bef843 1142 PL_sublex_info.sub_inwhat = 0;
79072805 1143 return ')';
ffed7fef
LW
1144 }
1145}
1146
02aa26ce
NT
1147/*
1148 scan_const
1149
1150 Extracts a pattern, double-quoted string, or transliteration. This
1151 is terrifying code.
1152
3280af22
NIS
1153 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1154 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1155 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1156
9b599b2a
GS
1157 Returns a pointer to the character scanned up to. Iff this is
1158 advanced from the start pointer supplied (ie if anything was
1159 successfully parsed), will leave an OP for the substring scanned
1160 in yylval. Caller must intuit reason for not parsing further
1161 by looking at the next characters herself.
1162
02aa26ce
NT
1163 In patterns:
1164 backslashes:
1165 double-quoted style: \r and \n
1166 regexp special ones: \D \s
1167 constants: \x3
1168 backrefs: \1 (deprecated in substitution replacements)
1169 case and quoting: \U \Q \E
1170 stops on @ and $, but not for $ as tail anchor
1171
1172 In transliterations:
1173 characters are VERY literal, except for - not at the start or end
1174 of the string, which indicates a range. scan_const expands the
1175 range to the full set of intermediate characters.
1176
1177 In double-quoted strings:
1178 backslashes:
1179 double-quoted style: \r and \n
1180 constants: \x3
1181 backrefs: \1 (deprecated)
1182 case and quoting: \U \Q \E
1183 stops on @ and $
1184
1185 scan_const does *not* construct ops to handle interpolated strings.
1186 It stops processing as soon as it finds an embedded $ or @ variable
1187 and leaves it to the caller to work out what's going on.
1188
da6eedaa 1189 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1190
1191 $ in pattern could be $foo or could be tail anchor. Assumption:
1192 it's a tail anchor if $ is the last thing in the string, or if it's
1193 followed by one of ")| \n\t"
1194
1195 \1 (backreferences) are turned into $1
1196
1197 The structure of the code is
1198 while (there's a character to process) {
1199 handle transliteration ranges
1200 skip regexp comments
1201 skip # initiated comments in //x patterns
1202 check for embedded @foo
1203 check for embedded scalars
1204 if (backslash) {
1205 leave intact backslashes from leave (below)
1206 deprecate \1 in strings and sub replacements
1207 handle string-changing backslashes \l \U \Q \E, etc.
1208 switch (what was escaped) {
1209 handle - in a transliteration (becomes a literal -)
1210 handle \132 octal characters
1211 handle 0x15 hex characters
1212 handle \cV (control V)
1213 handle printf backslashes (\f, \r, \n, etc)
1214 } (end switch)
1215 } (end if backslash)
1216 } (end while character to read)
4e553d73 1217
02aa26ce
NT
1218*/
1219
76e3520e 1220STATIC char *
cea2e8a9 1221S_scan_const(pTHX_ char *start)
79072805 1222{
3280af22 1223 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1224 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1225 register char *s = start; /* start of the constant */
1226 register char *d = SvPVX(sv); /* destination for copies */
1227 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1228 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1229 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1230 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1231 UV uv;
1232
dff6d3cd 1233 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1234 PL_lex_inpat
4a2d328f 1235 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1236 : "";
79072805 1237
2b9d42f0
NIS
1238 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1239 /* If we are doing a trans and we know we want UTF8 set expectation */
1240 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1241 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1242 }
1243
1244
79072805 1245 while (s < send || dorange) {
02aa26ce 1246 /* get transliterations out of the way (they're most literal) */
3280af22 1247 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1248 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1249 if (dorange) {
1ba5c669
JH
1250 I32 i; /* current expanded character */
1251 I32 min; /* first character in range */
1252 I32 max; /* last character in range */
02aa26ce 1253
2b9d42f0 1254 if (has_utf8) {
8973db79
JH
1255 char *c = (char*)utf8_hop((U8*)d, -1);
1256 char *e = d++;
1257 while (e-- > c)
1258 *(e + 1) = *e;
25716404 1259 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1260 /* mark the range as done, and continue */
1261 dorange = FALSE;
1262 didrange = TRUE;
1263 continue;
1264 }
2b9d42f0 1265
02aa26ce 1266 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1267 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1268 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1269 d -= 2; /* eat the first char and the - */
1270
8ada0baa
JH
1271 min = (U8)*d; /* first char in range */
1272 max = (U8)d[1]; /* last char in range */
1273
c2e66d9e 1274 if (min > max) {
01ec43d0 1275 Perl_croak(aTHX_
d1573ac7 1276 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1277 (char)min, (char)max);
c2e66d9e
GS
1278 }
1279
c7f1f016 1280#ifdef EBCDIC
8ada0baa
JH
1281 if ((isLOWER(min) && isLOWER(max)) ||
1282 (isUPPER(min) && isUPPER(max))) {
1283 if (isLOWER(min)) {
1284 for (i = min; i <= max; i++)
1285 if (isLOWER(i))
db42d148 1286 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1287 } else {
1288 for (i = min; i <= max; i++)
1289 if (isUPPER(i))
db42d148 1290 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1291 }
1292 }
1293 else
1294#endif
1295 for (i = min; i <= max; i++)
eb160463 1296 *d++ = (char)i;
02aa26ce
NT
1297
1298 /* mark the range as done, and continue */
79072805 1299 dorange = FALSE;
01ec43d0 1300 didrange = TRUE;
79072805 1301 continue;
4e553d73 1302 }
02aa26ce
NT
1303
1304 /* range begins (ignore - as first or last char) */
79072805 1305 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1306 if (didrange) {
1fafa243 1307 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1308 }
2b9d42f0 1309 if (has_utf8) {
25716404 1310 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1311 s++;
1312 continue;
1313 }
79072805
LW
1314 dorange = TRUE;
1315 s++;
01ec43d0
GS
1316 }
1317 else {
1318 didrange = FALSE;
1319 }
79072805 1320 }
02aa26ce
NT
1321
1322 /* if we get here, we're not doing a transliteration */
1323
0f5d15d6
IZ
1324 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1325 except for the last char, which will be done separately. */
3280af22 1326 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1327 if (s[2] == '#') {
1328 while (s < send && *s != ')')
db42d148 1329 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1330 }
1331 else if (s[2] == '{' /* This should match regcomp.c */
1332 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1333 {
cc6b7395 1334 I32 count = 1;
0f5d15d6 1335 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1336 char c;
1337
d9f97599
GS
1338 while (count && (c = *regparse)) {
1339 if (c == '\\' && regparse[1])
1340 regparse++;
4e553d73 1341 else if (c == '{')
cc6b7395 1342 count++;
4e553d73 1343 else if (c == '}')
cc6b7395 1344 count--;
d9f97599 1345 regparse++;
cc6b7395 1346 }
5bdf89e7
IZ
1347 if (*regparse != ')') {
1348 regparse--; /* Leave one char for continuation. */
cc6b7395 1349 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1350 }
0f5d15d6 1351 while (s < regparse)
db42d148 1352 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1353 }
748a9306 1354 }
02aa26ce
NT
1355
1356 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1357 else if (*s == '#' && PL_lex_inpat &&
1358 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1359 while (s+1 < send && *s != '\n')
db42d148 1360 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1361 }
02aa26ce 1362
5d1d4326 1363 /* check for embedded arrays
da6eedaa 1364 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1365 */
7e2040f0 1366 else if (*s == '@' && s[1]
5d1d4326 1367 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1368 break;
02aa26ce
NT
1369
1370 /* check for embedded scalars. only stop if we're sure it's a
1371 variable.
1372 */
79072805 1373 else if (*s == '$') {
3280af22 1374 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1375 break;
6002328a 1376 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1377 break; /* in regexp, $ might be tail anchor */
1378 }
02aa26ce 1379
2b9d42f0
NIS
1380 /* End of else if chain - OP_TRANS rejoin rest */
1381
02aa26ce 1382 /* backslashes */
79072805
LW
1383 if (*s == '\\' && s+1 < send) {
1384 s++;
02aa26ce
NT
1385
1386 /* some backslashes we leave behind */
c9f97d15 1387 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1388 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1389 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1390 continue;
1391 }
02aa26ce
NT
1392
1393 /* deprecate \1 in strings and substitution replacements */
3280af22 1394 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1395 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1396 {
599cee73 1397 if (ckWARN(WARN_SYNTAX))
9014280d 1398 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1399 *--s = '$';
1400 break;
1401 }
02aa26ce
NT
1402
1403 /* string-change backslash escapes */
3280af22 1404 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1405 --s;
1406 break;
1407 }
02aa26ce
NT
1408
1409 /* if we get here, it's either a quoted -, or a digit */
79072805 1410 switch (*s) {
02aa26ce
NT
1411
1412 /* quoted - in transliterations */
79072805 1413 case '-':
3280af22 1414 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1415 *d++ = *s++;
1416 continue;
1417 }
1418 /* FALL THROUGH */
1419 default:
11b8faa4 1420 {
707afd92
MS
1421 if (ckWARN(WARN_MISC) &&
1422 isALNUM(*s) &&
1423 *s != '_')
9014280d 1424 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1425 "Unrecognized escape \\%c passed through",
1426 *s);
1427 /* default action is to copy the quoted character */
f9a63242 1428 goto default_action;
11b8faa4 1429 }
02aa26ce
NT
1430
1431 /* \132 indicates an octal constant */
79072805
LW
1432 case '0': case '1': case '2': case '3':
1433 case '4': case '5': case '6': case '7':
ba210ebe 1434 {
53305cf1
NC
1435 I32 flags = 0;
1436 STRLEN len = 3;
1437 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1438 s += len;
1439 }
012bcf8d 1440 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1441
1442 /* \x24 indicates a hex constant */
79072805 1443 case 'x':
a0ed51b3
LW
1444 ++s;
1445 if (*s == '{') {
1446 char* e = strchr(s, '}');
a4c04bdc
NC
1447 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1448 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1449 STRLEN len;
355860ce 1450
53305cf1 1451 ++s;
adaeee49 1452 if (!e) {
a0ed51b3 1453 yyerror("Missing right brace on \\x{}");
355860ce 1454 continue;
ba210ebe 1455 }
53305cf1
NC
1456 len = e - s;
1457 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1458 s = e + 1;
a0ed51b3
LW
1459 }
1460 else {
ba210ebe 1461 {
53305cf1 1462 STRLEN len = 2;
a4c04bdc 1463 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1464 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1465 s += len;
1466 }
012bcf8d
GS
1467 }
1468
1469 NUM_ESCAPE_INSERT:
1470 /* Insert oct or hex escaped character.
301d3d20 1471 * There will always enough room in sv since such
db42d148 1472 * escapes will be longer than any UTF-8 sequence
301d3d20 1473 * they can end up as. */
ba7cea30 1474
c7f1f016
NIS
1475 /* We need to map to chars to ASCII before doing the tests
1476 to cover EBCDIC
1477 */
c4d5f83a 1478 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1479 if (!has_utf8 && uv > 255) {
301d3d20
JH
1480 /* Might need to recode whatever we have
1481 * accumulated so far if it contains any
1482 * hibit chars.
1483 *
1484 * (Can't we keep track of that and avoid
1485 * this rescan? --jhi)
012bcf8d 1486 */
c7f1f016 1487 int hicount = 0;
63cd0674
NIS
1488 U8 *c;
1489 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1490 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1491 hicount++;
db42d148 1492 }
012bcf8d 1493 }
63cd0674 1494 if (hicount) {
db42d148
NIS
1495 STRLEN offset = d - SvPVX(sv);
1496 U8 *src, *dst;
1497 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1498 src = (U8 *)d - 1;
1499 dst = src+hicount;
1500 d += hicount;
1501 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1502 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1503 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1504 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1505 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1506 }
1507 else {
63cd0674 1508 *dst-- = *src;
012bcf8d 1509 }
c7f1f016 1510 src--;
012bcf8d
GS
1511 }
1512 }
1513 }
1514
9aa983d2 1515 if (has_utf8 || uv > 255) {
9041c2e3 1516 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1517 has_utf8 = TRUE;
f9a63242
JH
1518 if (PL_lex_inwhat == OP_TRANS &&
1519 PL_sublex_info.sub_op) {
1520 PL_sublex_info.sub_op->op_private |=
1521 (PL_lex_repl ? OPpTRANS_FROM_UTF
1522 : OPpTRANS_TO_UTF);
f9a63242 1523 }
012bcf8d 1524 }
a0ed51b3 1525 else {
012bcf8d 1526 *d++ = (char)uv;
a0ed51b3 1527 }
012bcf8d
GS
1528 }
1529 else {
c4d5f83a 1530 *d++ = (char) uv;
a0ed51b3 1531 }
79072805 1532 continue;
02aa26ce 1533
b239daa5 1534 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1535 case 'N':
55eda711 1536 ++s;
423cee85
JH
1537 if (*s == '{') {
1538 char* e = strchr(s, '}');
155aba94 1539 SV *res;
423cee85
JH
1540 STRLEN len;
1541 char *str;
4e553d73 1542
423cee85 1543 if (!e) {
5777a3f7 1544 yyerror("Missing right brace on \\N{}");
423cee85
JH
1545 e = s - 1;
1546 goto cont_scan;
1547 }
dbc0d4f2
JH
1548 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1549 /* \N{U+...} */
1550 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1551 PERL_SCAN_DISALLOW_PREFIX;
1552 s += 3;
1553 len = e - s;
1554 uv = grok_hex(s, &len, &flags, NULL);
1555 s = e + 1;
1556 goto NUM_ESCAPE_INSERT;
1557 }
55eda711
JH
1558 res = newSVpvn(s + 1, e - s - 1);
1559 res = new_constant( Nullch, 0, "charnames",
1560 res, Nullsv, "\\N{...}" );
f9a63242
JH
1561 if (has_utf8)
1562 sv_utf8_upgrade(res);
423cee85 1563 str = SvPV(res,len);
1c47067b
JH
1564#ifdef EBCDIC_NEVER_MIND
1565 /* charnames uses pack U and that has been
1566 * recently changed to do the below uni->native
1567 * mapping, so this would be redundant (and wrong,
1568 * the code point would be doubly converted).
1569 * But leave this in just in case the pack U change
1570 * gets revoked, but the semantics is still
1571 * desireable for charnames. --jhi */
cddc7ef4
JH
1572 {
1573 UV uv = utf8_to_uvchr((U8*)str, 0);
1574
1575 if (uv < 0x100) {
1576 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1577
1578 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1579 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1580 str = SvPV(res, len);
1581 }
1582 }
1583#endif
89491803 1584 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1585 char *ostart = SvPVX(sv);
1586 SvCUR_set(sv, d - ostart);
1587 SvPOK_on(sv);
e4f3eed8 1588 *d = '\0';
f08d6ad9 1589 sv_utf8_upgrade(sv);
d2f449dd 1590 /* this just broke our allocation above... */
eb160463 1591 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1592 d = SvPVX(sv) + SvCUR(sv);
89491803 1593 has_utf8 = TRUE;
f08d6ad9 1594 }
eb160463 1595 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1596 char *odest = SvPVX(sv);
1597
8973db79 1598 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1599 d = SvPVX(sv) + (d - odest);
1600 }
1601 Copy(str, d, len, char);
1602 d += len;
1603 SvREFCNT_dec(res);
1604 cont_scan:
1605 s = e + 1;
1606 }
1607 else
5777a3f7 1608 yyerror("Missing braces on \\N{}");
423cee85
JH
1609 continue;
1610
02aa26ce 1611 /* \c is a control character */
79072805
LW
1612 case 'c':
1613 s++;
ba210ebe
JH
1614 {
1615 U8 c = *s++;
c7f1f016
NIS
1616#ifdef EBCDIC
1617 if (isLOWER(c))
1618 c = toUPPER(c);
1619#endif
db42d148 1620 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1621 }
79072805 1622 continue;
02aa26ce
NT
1623
1624 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1625 case 'b':
db42d148 1626 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1627 break;
1628 case 'n':
db42d148 1629 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1630 break;
1631 case 'r':
db42d148 1632 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1633 break;
1634 case 'f':
db42d148 1635 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1636 break;
1637 case 't':
db42d148 1638 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1639 break;
34a3fe2a 1640 case 'e':
db42d148 1641 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1642 break;
1643 case 'a':
db42d148 1644 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1645 break;
02aa26ce
NT
1646 } /* end switch */
1647
79072805
LW
1648 s++;
1649 continue;
02aa26ce
NT
1650 } /* end if (backslash) */
1651
f9a63242 1652 default_action:
2b9d42f0
NIS
1653 /* If we started with encoded form, or already know we want it
1654 and then encode the next character */
1655 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1656 STRLEN len = 1;
1657 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1658 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1659 s += len;
1660 if (need > len) {
1661 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1662 STRLEN off = d - SvPVX(sv);
1663 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1664 }
1665 d = (char*)uvchr_to_utf8((U8*)d, uv);
1666 has_utf8 = TRUE;
1667 }
1668 else {
1669 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1670 }
02aa26ce
NT
1671 } /* while loop to process each character */
1672
1673 /* terminate the string and set up the sv */
79072805 1674 *d = '\0';
463ee0b2 1675 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1676 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1677 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1678
79072805 1679 SvPOK_on(sv);
9f4817db 1680 if (PL_encoding && !has_utf8) {
d0063567
DK
1681 sv_recode_to_utf8(sv, PL_encoding);
1682 if (SvUTF8(sv))
1683 has_utf8 = TRUE;
9f4817db 1684 }
2b9d42f0 1685 if (has_utf8) {
7e2040f0 1686 SvUTF8_on(sv);
2b9d42f0 1687 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1688 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1689 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1690 }
1691 }
79072805 1692
02aa26ce 1693 /* shrink the sv if we allocated more than we used */
79072805
LW
1694 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1695 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1696 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1697 }
02aa26ce 1698
9b599b2a 1699 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1700 if (s > PL_bufptr) {
1701 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1702 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1703 sv, Nullsv,
4e553d73 1704 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1705 ? "tr"
3280af22 1706 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1707 ? "s"
1708 : "qq")));
79072805 1709 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1710 } else
8990e307 1711 SvREFCNT_dec(sv);
79072805
LW
1712 return s;
1713}
1714
ffb4593c
NT
1715/* S_intuit_more
1716 * Returns TRUE if there's more to the expression (e.g., a subscript),
1717 * FALSE otherwise.
ffb4593c
NT
1718 *
1719 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1720 *
1721 * ->[ and ->{ return TRUE
1722 * { and [ outside a pattern are always subscripts, so return TRUE
1723 * if we're outside a pattern and it's not { or [, then return FALSE
1724 * if we're in a pattern and the first char is a {
1725 * {4,5} (any digits around the comma) returns FALSE
1726 * if we're in a pattern and the first char is a [
1727 * [] returns FALSE
1728 * [SOMETHING] has a funky algorithm to decide whether it's a
1729 * character class or not. It has to deal with things like
1730 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1731 * anything else returns TRUE
1732 */
1733
9cbb5ea2
GS
1734/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1735
76e3520e 1736STATIC int
cea2e8a9 1737S_intuit_more(pTHX_ register char *s)
79072805 1738{
3280af22 1739 if (PL_lex_brackets)
79072805
LW
1740 return TRUE;
1741 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1742 return TRUE;
1743 if (*s != '{' && *s != '[')
1744 return FALSE;
3280af22 1745 if (!PL_lex_inpat)
79072805
LW
1746 return TRUE;
1747
1748 /* In a pattern, so maybe we have {n,m}. */
1749 if (*s == '{') {
1750 s++;
1751 if (!isDIGIT(*s))
1752 return TRUE;
1753 while (isDIGIT(*s))
1754 s++;
1755 if (*s == ',')
1756 s++;
1757 while (isDIGIT(*s))
1758 s++;
1759 if (*s == '}')
1760 return FALSE;
1761 return TRUE;
1762
1763 }
1764
1765 /* On the other hand, maybe we have a character class */
1766
1767 s++;
1768 if (*s == ']' || *s == '^')
1769 return FALSE;
1770 else {
ffb4593c 1771 /* this is terrifying, and it works */
79072805
LW
1772 int weight = 2; /* let's weigh the evidence */
1773 char seen[256];
f27ffc4a 1774 unsigned char un_char = 255, last_un_char;
93a17b20 1775 char *send = strchr(s,']');
3280af22 1776 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1777
1778 if (!send) /* has to be an expression */
1779 return TRUE;
1780
1781 Zero(seen,256,char);
1782 if (*s == '$')
1783 weight -= 3;
1784 else if (isDIGIT(*s)) {
1785 if (s[1] != ']') {
1786 if (isDIGIT(s[1]) && s[2] == ']')
1787 weight -= 10;
1788 }
1789 else
1790 weight -= 100;
1791 }
1792 for (; s < send; s++) {
1793 last_un_char = un_char;
1794 un_char = (unsigned char)*s;
1795 switch (*s) {
1796 case '@':
1797 case '&':
1798 case '$':
1799 weight -= seen[un_char] * 10;
7e2040f0 1800 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1801 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1802 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1803 weight -= 100;
1804 else
1805 weight -= 10;
1806 }
1807 else if (*s == '$' && s[1] &&
93a17b20
LW
1808 strchr("[#!%*<>()-=",s[1])) {
1809 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1810 weight -= 10;
1811 else
1812 weight -= 1;
1813 }
1814 break;
1815 case '\\':
1816 un_char = 254;
1817 if (s[1]) {
93a17b20 1818 if (strchr("wds]",s[1]))
79072805
LW
1819 weight += 100;
1820 else if (seen['\''] || seen['"'])
1821 weight += 1;
93a17b20 1822 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1823 weight += 40;
1824 else if (isDIGIT(s[1])) {
1825 weight += 40;
1826 while (s[1] && isDIGIT(s[1]))
1827 s++;
1828 }
1829 }
1830 else
1831 weight += 100;
1832 break;
1833 case '-':
1834 if (s[1] == '\\')
1835 weight += 50;
93a17b20 1836 if (strchr("aA01! ",last_un_char))
79072805 1837 weight += 30;
93a17b20 1838 if (strchr("zZ79~",s[1]))
79072805 1839 weight += 30;
f27ffc4a
GS
1840 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1841 weight -= 5; /* cope with negative subscript */
79072805
LW
1842 break;
1843 default:
93a17b20 1844 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1845 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1846 char *d = tmpbuf;
1847 while (isALPHA(*s))
1848 *d++ = *s++;
1849 *d = '\0';
1850 if (keyword(tmpbuf, d - tmpbuf))
1851 weight -= 150;
1852 }
1853 if (un_char == last_un_char + 1)
1854 weight += 5;
1855 weight -= seen[un_char];
1856 break;
1857 }
1858 seen[un_char]++;
1859 }
1860 if (weight >= 0) /* probably a character class */
1861 return FALSE;
1862 }
1863
1864 return TRUE;
1865}
ffed7fef 1866
ffb4593c
NT
1867/*
1868 * S_intuit_method
1869 *
1870 * Does all the checking to disambiguate
1871 * foo bar
1872 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1873 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1874 *
1875 * First argument is the stuff after the first token, e.g. "bar".
1876 *
1877 * Not a method if bar is a filehandle.
1878 * Not a method if foo is a subroutine prototyped to take a filehandle.
1879 * Not a method if it's really "Foo $bar"
1880 * Method if it's "foo $bar"
1881 * Not a method if it's really "print foo $bar"
1882 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 1883 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 1884 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1885 * =>
1886 */
1887
76e3520e 1888STATIC int
cea2e8a9 1889S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1890{
1891 char *s = start + (*start == '$');
3280af22 1892 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1893 STRLEN len;
1894 GV* indirgv;
1895
1896 if (gv) {
b6c543e3 1897 CV *cv;
a0d0e21e
LW
1898 if (GvIO(gv))
1899 return 0;
b6c543e3
IZ
1900 if ((cv = GvCVu(gv))) {
1901 char *proto = SvPVX(cv);
1902 if (proto) {
1903 if (*proto == ';')
1904 proto++;
1905 if (*proto == '*')
1906 return 0;
1907 }
1908 } else
a0d0e21e
LW
1909 gv = 0;
1910 }
8903cb82 1911 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1912 /* start is the beginning of the possible filehandle/object,
1913 * and s is the end of it
1914 * tmpbuf is a copy of it
1915 */
1916
a0d0e21e 1917 if (*start == '$') {
3280af22 1918 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1919 return 0;
1920 s = skipspace(s);
3280af22
NIS
1921 PL_bufptr = start;
1922 PL_expect = XREF;
a0d0e21e
LW
1923 return *s == '(' ? FUNCMETH : METHOD;
1924 }
1925 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1926 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1927 len -= 2;
1928 tmpbuf[len] = '\0';
1929 goto bare_package;
1930 }
1931 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1932 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1933 return 0;
1934 /* filehandle or package name makes it a method */
89bfa8cd 1935 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1936 s = skipspace(s);
3280af22 1937 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1938 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1939 bare_package:
3280af22 1940 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1941 newSVpvn(tmpbuf,len));
3280af22
NIS
1942 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1943 PL_expect = XTERM;
a0d0e21e 1944 force_next(WORD);
3280af22 1945 PL_bufptr = s;
a0d0e21e
LW
1946 return *s == '(' ? FUNCMETH : METHOD;
1947 }
1948 }
1949 return 0;
1950}
1951
ffb4593c
NT
1952/*
1953 * S_incl_perldb
1954 * Return a string of Perl code to load the debugger. If PERL5DB
1955 * is set, it will return the contents of that, otherwise a
1956 * compile-time require of perl5db.pl.
1957 */
1958
76e3520e 1959STATIC char*
cea2e8a9 1960S_incl_perldb(pTHX)
a0d0e21e 1961{
3280af22 1962 if (PL_perldb) {
76e3520e 1963 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1964
1965 if (pdb)
1966 return pdb;
93189314 1967 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
1968 return "BEGIN { require 'perl5db.pl' }";
1969 }
1970 return "";
1971}
1972
1973
16d20bd9 1974/* Encoded script support. filter_add() effectively inserts a
4e553d73 1975 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1976 * Note that the filter function only applies to the current source file
1977 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1978 *
1979 * The datasv parameter (which may be NULL) can be used to pass
1980 * private data to this instance of the filter. The filter function
1981 * can recover the SV using the FILTER_DATA macro and use it to
1982 * store private buffers and state information.
1983 *
1984 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1985 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1986 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1987 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1988 * private use must be set using malloc'd pointers.
1989 */
16d20bd9
AD
1990
1991SV *
864dbfa3 1992Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1993{
f4c556ac
GS
1994 if (!funcp)
1995 return Nullsv;
1996
3280af22
NIS
1997 if (!PL_rsfp_filters)
1998 PL_rsfp_filters = newAV();
16d20bd9 1999 if (!datasv)
8c52afec 2000 datasv = NEWSV(255,0);
16d20bd9 2001 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2002 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2003 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2004 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2005 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2006 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2007 av_unshift(PL_rsfp_filters, 1);
2008 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2009 return(datasv);
2010}
4e553d73 2011
16d20bd9
AD
2012
2013/* Delete most recently added instance of this filter function. */
a0d0e21e 2014void
864dbfa3 2015Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2016{
e0c19803 2017 SV *datasv;
fe5a182c 2018 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2019 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2020 return;
2021 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2022 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2023 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2024 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2025 IoANY(datasv) = (void *)NULL;
3280af22 2026 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2027
16d20bd9
AD
2028 return;
2029 }
2030 /* we need to search for the correct entry and clear it */
cea2e8a9 2031 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2032}
2033
2034
2035/* Invoke the n'th filter function for the current rsfp. */
2036I32
864dbfa3 2037Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
2038
2039
8ac85365 2040 /* 0 = read one text line */
a0d0e21e 2041{
16d20bd9
AD
2042 filter_t funcp;
2043 SV *datasv = NULL;
e50aee73 2044
3280af22 2045 if (!PL_rsfp_filters)
16d20bd9 2046 return -1;
3280af22 2047 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2048 /* Provide a default input filter to make life easy. */
2049 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2050 DEBUG_P(PerlIO_printf(Perl_debug_log,
2051 "filter_read %d: from rsfp\n", idx));
4e553d73 2052 if (maxlen) {
16d20bd9
AD
2053 /* Want a block */
2054 int len ;
2055 int old_len = SvCUR(buf_sv) ;
2056
2057 /* ensure buf_sv is large enough */
eb160463 2058 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2059 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2060 if (PerlIO_error(PL_rsfp))
37120919
AD
2061 return -1; /* error */
2062 else
2063 return 0 ; /* end of file */
2064 }
16d20bd9
AD
2065 SvCUR_set(buf_sv, old_len + len) ;
2066 } else {
2067 /* Want a line */
3280af22
NIS
2068 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2069 if (PerlIO_error(PL_rsfp))
37120919
AD
2070 return -1; /* error */
2071 else
2072 return 0 ; /* end of file */
2073 }
16d20bd9
AD
2074 }
2075 return SvCUR(buf_sv);
2076 }
2077 /* Skip this filter slot if filter has been deleted */
3280af22 2078 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2079 DEBUG_P(PerlIO_printf(Perl_debug_log,
2080 "filter_read %d: skipped (filter deleted)\n",
2081 idx));
16d20bd9
AD
2082 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2083 }
2084 /* Get function pointer hidden within datasv */
4755096e 2085 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2086 DEBUG_P(PerlIO_printf(Perl_debug_log,
2087 "filter_read %d: via function %p (%s)\n",
fe5a182c 2088 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2089 /* Call function. The function is expected to */
2090 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2091 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2092 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2093}
2094
76e3520e 2095STATIC char *
cea2e8a9 2096S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2097{
c39cd008 2098#ifdef PERL_CR_FILTER
3280af22 2099 if (!PL_rsfp_filters) {
c39cd008 2100 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2101 }
2102#endif
3280af22 2103 if (PL_rsfp_filters) {
16d20bd9 2104
55497cff 2105 if (!append)
2106 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2107 if (FILTER_READ(0, sv, 0) > 0)
2108 return ( SvPVX(sv) ) ;
2109 else
2110 return Nullch ;
2111 }
9d116dd7 2112 else
fd049845 2113 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2114}
2115
01ec43d0
GS
2116STATIC HV *
2117S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2118{
2119 GV *gv;
2120
01ec43d0 2121 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2122 return PL_curstash;
2123
2124 if (len > 2 &&
2125 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2126 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2127 {
2128 return GvHV(gv); /* Foo:: */
def3634b
GS
2129 }
2130
2131 /* use constant CLASS => 'MyClass' */
2132 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2133 SV *sv;
2134 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2135 pkgname = SvPV_nolen(sv);
2136 }
2137 }
2138
2139 return gv_stashpv(pkgname, FALSE);
2140}
a0d0e21e 2141
748a9306
LW
2142#ifdef DEBUGGING
2143 static char* exp_name[] =
09bef843
SB
2144 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2145 "ATTRTERM", "TERMBLOCK"
2146 };
748a9306 2147#endif
463ee0b2 2148
02aa26ce
NT
2149/*
2150 yylex
2151
2152 Works out what to call the token just pulled out of the input
2153 stream. The yacc parser takes care of taking the ops we return and
2154 stitching them into a tree.
2155
2156 Returns:
2157 PRIVATEREF
2158
2159 Structure:
2160 if read an identifier
2161 if we're in a my declaration
2162 croak if they tried to say my($foo::bar)
2163 build the ops for a my() declaration
2164 if it's an access to a my() variable
2165 are we in a sort block?
2166 croak if my($a); $a <=> $b
2167 build ops for access to a my() variable
2168 if in a dq string, and they've said @foo and we can't find @foo
2169 croak
2170 build ops for a bareword
2171 if we already built the token before, use it.
2172*/
2173
dba4d153 2174#ifdef USE_PURE_BISON
864dbfa3 2175int
dba4d153 2176Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2177{
20141f0e
IRC
2178 int r;
2179
6f202aea 2180 yyactlevel++;
20141f0e
IRC
2181 yylval_pointer[yyactlevel] = lvalp;
2182 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
IRC
2183 if (yyactlevel >= YYMAXLEVEL)
2184 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2185
dba4d153 2186 r = Perl_yylex(aTHX);
20141f0e 2187
d8ae6756
IRC
2188 if (yyactlevel > 0)
2189 yyactlevel--;
20141f0e
IRC
2190
2191 return r;
2192}
dba4d153 2193#endif
20141f0e 2194
dba4d153
JH
2195#ifdef __SC__
2196#pragma segment Perl_yylex
2197#endif
dba4d153 2198int
dba4d153 2199Perl_yylex(pTHX)
20141f0e 2200{
79072805 2201 register char *s;
378cc40b 2202 register char *d;
79072805 2203 register I32 tmp;
463ee0b2 2204 STRLEN len;
161b471a
NIS
2205 GV *gv = Nullgv;
2206 GV **gvp = 0;
aa7440fb 2207 bool bof = FALSE;
a687059c 2208
02aa26ce 2209 /* check if there's an identifier for us to look at */
ba979b31 2210 if (PL_pending_ident)
e930465f 2211 return S_pending_ident(aTHX);
bbce6d69 2212
02aa26ce
NT
2213 /* no identifier pending identification */
2214
3280af22 2215 switch (PL_lex_state) {
79072805
LW
2216#ifdef COMMENTARY
2217 case LEX_NORMAL: /* Some compilers will produce faster */
2218 case LEX_INTERPNORMAL: /* code if we comment these out. */
2219 break;
2220#endif
2221
09bef843 2222 /* when we've already built the next token, just pull it out of the queue */
79072805 2223 case LEX_KNOWNEXT:
3280af22
NIS
2224 PL_nexttoke--;
2225 yylval = PL_nextval[PL_nexttoke];
2226 if (!PL_nexttoke) {
2227 PL_lex_state = PL_lex_defer;
2228 PL_expect = PL_lex_expect;
2229 PL_lex_defer = LEX_NORMAL;
463ee0b2 2230 }
607df283 2231 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2232 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2233 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2234
3280af22 2235 return(PL_nexttype[PL_nexttoke]);
79072805 2236
02aa26ce 2237 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2238 when we get here, PL_bufptr is at the \
02aa26ce 2239 */
79072805
LW
2240 case LEX_INTERPCASEMOD:
2241#ifdef DEBUGGING
3280af22 2242 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2243 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2244#endif
02aa26ce 2245 /* handle \E or end of string */
3280af22 2246 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2247 char oldmod;
02aa26ce
NT
2248
2249 /* if at a \E */
3280af22
NIS
2250 if (PL_lex_casemods) {
2251 oldmod = PL_lex_casestack[--PL_lex_casemods];
2252 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2253
3280af22
NIS
2254 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2255 PL_bufptr += 2;
2256 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2257 }
79072805
LW
2258 return ')';
2259 }
3280af22
NIS
2260 if (PL_bufptr != PL_bufend)
2261 PL_bufptr += 2;
2262 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2263 return yylex();
79072805
LW
2264 }
2265 else {
607df283 2266 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2267 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2268 s = PL_bufptr + 1;
79072805 2269 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
eb160463 2270 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
a0d0e21e 2271 if (strchr("LU", *s) &&
3280af22 2272 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2273 {
3280af22 2274 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2275 return ')';
2276 }
3280af22
NIS
2277 if (PL_lex_casemods > 10) {
2278 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2279 if (newlb != PL_lex_casestack) {
a0d0e21e 2280 SAVEFREEPV(newlb);
3280af22 2281 PL_lex_casestack = newlb;
a0d0e21e
LW
2282 }
2283 }
3280af22
NIS
2284 PL_lex_casestack[PL_lex_casemods++] = *s;
2285 PL_lex_casestack[PL_lex_casemods] = '\0';
2286 PL_lex_state = LEX_INTERPCONCAT;
2287 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2288 force_next('(');
2289 if (*s == 'l')
3280af22 2290 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2291 else if (*s == 'u')
3280af22 2292 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2293 else if (*s == 'L')
3280af22 2294 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2295 else if (*s == 'U')
3280af22 2296 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2297 else if (*s == 'Q')
3280af22 2298 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2299 else
cea2e8a9 2300 Perl_croak(aTHX_ "panic: yylex");
3280af22 2301 PL_bufptr = s + 1;
79072805 2302 force_next(FUNC);
3280af22
NIS
2303 if (PL_lex_starts) {
2304 s = PL_bufptr;
2305 PL_lex_starts = 0;
79072805
LW
2306 Aop(OP_CONCAT);
2307 }
2308 else
cea2e8a9 2309 return yylex();
79072805
LW
2310 }
2311
55497cff 2312 case LEX_INTERPPUSH:
2313 return sublex_push();
2314
79072805 2315 case LEX_INTERPSTART:
3280af22 2316 if (PL_bufptr == PL_bufend)
79072805 2317 return sublex_done();
607df283 2318 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2319 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2320 PL_expect = XTERM;
2321 PL_lex_dojoin = (*PL_bufptr == '@');
2322 PL_lex_state = LEX_INTERPNORMAL;
2323 if (PL_lex_dojoin) {
2324 PL_nextval[PL_nexttoke].ival = 0;
79072805 2325 force_next(',');
a0d0e21e 2326 force_ident("\"", '$');
3280af22 2327 PL_nextval[PL_nexttoke].ival = 0;
79072805 2328 force_next('$');
3280af22 2329 PL_nextval[PL_nexttoke].ival = 0;
79072805 2330 force_next('(');
3280af22 2331 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2332 force_next(FUNC);
2333 }
3280af22
NIS
2334 if (PL_lex_starts++) {
2335 s = PL_bufptr;
79072805
LW
2336 Aop(OP_CONCAT);
2337 }
cea2e8a9 2338 return yylex();
79072805
LW
2339
2340 case LEX_INTERPENDMAYBE:
3280af22
NIS
2341 if (intuit_more(PL_bufptr)) {
2342 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2343 break;
2344 }
2345 /* FALL THROUGH */
2346
2347 case LEX_INTERPEND:
3280af22
NIS
2348 if (PL_lex_dojoin) {
2349 PL_lex_dojoin = FALSE;
2350 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2351 return ')';
2352 }
43a16006 2353 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2354 && SvEVALED(PL_lex_repl))
43a16006 2355 {
e9fa98b2 2356 if (PL_bufptr != PL_bufend)
cea2e8a9 2357 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2358 PL_lex_repl = Nullsv;
2359 }
79072805
LW
2360 /* FALLTHROUGH */
2361 case LEX_INTERPCONCAT:
2362#ifdef DEBUGGING
3280af22 2363 if (PL_lex_brackets)
cea2e8a9 2364 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2365#endif
3280af22 2366 if (PL_bufptr == PL_bufend)
79072805
LW
2367 return sublex_done();
2368
3280af22
NIS
2369 if (SvIVX(PL_linestr) == '\'') {
2370 SV *sv = newSVsv(PL_linestr);
2371 if (!PL_lex_inpat)
76e3520e 2372 sv = tokeq(sv);
3280af22 2373 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2374 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2375 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2376 s = PL_bufend;
79072805
LW
2377 }
2378 else {
3280af22 2379 s = scan_const(PL_bufptr);
79072805 2380 if (*s == '\\')
3280af22 2381 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2382 else
3280af22 2383 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2384 }
2385
3280af22
NIS
2386 if (s != PL_bufptr) {
2387 PL_nextval[PL_nexttoke] = yylval;
2388 PL_expect = XTERM;
79072805 2389 force_next(THING);
3280af22 2390 if (PL_lex_starts++)
79072805
LW
2391 Aop(OP_CONCAT);
2392 else {
3280af22 2393 PL_bufptr = s;
cea2e8a9 2394 return yylex();
79072805
LW
2395 }
2396 }
2397
cea2e8a9 2398 return yylex();
a0d0e21e 2399 case LEX_FORMLINE:
3280af22
NIS
2400 PL_lex_state = LEX_NORMAL;
2401 s = scan_formline(PL_bufptr);
2402 if (!PL_lex_formbrack)
a0d0e21e
LW
2403 goto rightbracket;
2404 OPERATOR(';');
79072805
LW
2405 }
2406
3280af22
NIS
2407 s = PL_bufptr;
2408 PL_oldoldbufptr = PL_oldbufptr;
2409 PL_oldbufptr = s;
607df283 2410 DEBUG_T( {
bf49b057
GS
2411 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2412 exp_name[PL_expect], s);
5f80b19c 2413 } );
463ee0b2
LW
2414
2415 retry:
378cc40b
LW
2416 switch (*s) {
2417 default:
7e2040f0 2418 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2419 goto keylookup;
cea2e8a9 2420 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2421 case 4:
2422 case 26:
2423 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2424 case 0:
3280af22
NIS
2425 if (!PL_rsfp) {
2426 PL_last_uni = 0;
2427 PL_last_lop = 0;
2428 if (PL_lex_brackets)
d98d5fff 2429 yyerror("Missing right curly or square bracket");
4e553d73 2430 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2431 "### Tokener got EOF\n");
5f80b19c 2432 } );
79072805 2433 TOKEN(0);
463ee0b2 2434 }
3280af22 2435 if (s++ < PL_bufend)
a687059c 2436 goto retry; /* ignore stray nulls */
3280af22
NIS
2437 PL_last_uni = 0;
2438 PL_last_lop = 0;
2439 if (!PL_in_eval && !PL_preambled) {
2440 PL_preambled = TRUE;
2441 sv_setpv(PL_linestr,incl_perldb());
2442 if (SvCUR(PL_linestr))
2443 sv_catpv(PL_linestr,";");
2444 if (PL_preambleav){
2445 while(AvFILLp(PL_preambleav) >= 0) {
2446 SV *tmpsv = av_shift(PL_preambleav);
2447 sv_catsv(PL_linestr, tmpsv);
2448 sv_catpv(PL_linestr, ";");
91b7def8 2449 sv_free(tmpsv);
2450 }
3280af22
NIS
2451 sv_free((SV*)PL_preambleav);
2452 PL_preambleav = NULL;
91b7def8 2453 }
3280af22
NIS
2454 if (PL_minus_n || PL_minus_p) {
2455 sv_catpv(PL_linestr, "LINE: while (<>) {");
2456 if (PL_minus_l)
2457 sv_catpv(PL_linestr,"chomp;");
2458 if (PL_minus_a) {
3280af22
NIS
2459 if (PL_minus_F) {
2460 if (strchr("/'\"", *PL_splitstr)
2461 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2462 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2463 else {
2464 char delim;
2465 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2466 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2467 delim = *s;
75c72d73 2468 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2469 "q" + (delim == '\''), delim);
3280af22 2470 for (s = PL_splitstr; *s; s++) {
54310121 2471 if (*s == '\\')
3280af22
NIS
2472 sv_catpvn(PL_linestr, "\\", 1);
2473 sv_catpvn(PL_linestr, s, 1);
54310121 2474 }
cea2e8a9 2475 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2476 }
2304df62
AD
2477 }
2478 else
75c72d73 2479 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2480 }
79072805 2481 }
3280af22
NIS
2482 sv_catpv(PL_linestr, "\n");
2483 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2484 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2485 PL_last_lop = PL_last_uni = Nullch;
3280af22 2486 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2487 SV *sv = NEWSV(85,0);
2488
2489 sv_upgrade(sv, SVt_PVMG);
3280af22 2490 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2491 (void)SvIOK_on(sv);
2492 SvIVX(sv) = 0;
57843af0 2493 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2494 }
79072805 2495 goto retry;
a687059c 2496 }
e929a76b 2497 do {
aa7440fb 2498 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2499 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2500 fake_eof:
2501 if (PL_rsfp) {
2502 if (PL_preprocess && !PL_in_eval)
2503 (void)PerlProc_pclose(PL_rsfp);
2504 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2505 PerlIO_clearerr(PL_rsfp);
2506 else
2507 (void)PerlIO_close(PL_rsfp);
2508 PL_rsfp = Nullfp;
2509 PL_doextract = FALSE;
2510 }
2511 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2512 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2513 sv_catpv(PL_linestr,";}");
2514 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2515 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2516 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2517 PL_minus_n = PL_minus_p = 0;
2518 goto retry;
2519 }
2520 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2521 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2522 sv_setpv(PL_linestr,"");
2523 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2524 }
2525 /* if it looks like the start of a BOM, check if it in fact is */
2526 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2527#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2528# ifdef __GNU_LIBRARY__
2529# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2530# define FTELL_FOR_PIPE_IS_BROKEN
2531# endif
e3f494f1
JH
2532# else
2533# ifdef __GLIBC__
2534# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2535# define FTELL_FOR_PIPE_IS_BROKEN
2536# endif
2537# endif
226017aa
DD
2538# endif
2539#endif
2540#ifdef FTELL_FOR_PIPE_IS_BROKEN
2541 /* This loses the possibility to detect the bof
2542 * situation on perl -P when the libc5 is being used.
2543 * Workaround? Maybe attach some extra state to PL_rsfp?
2544 */
2545 if (!PL_preprocess)
7e28d3af 2546 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2547#else
eb160463 2548 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2549#endif
7e28d3af 2550 if (bof) {
3280af22 2551 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2552 s = swallow_bom((U8*)s);
e929a76b 2553 }
378cc40b 2554 }
3280af22 2555 if (PL_doextract) {
a0d0e21e
LW
2556 /* Incest with pod. */
2557 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2558 sv_setpv(PL_linestr, "");
2559 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2560 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2561 PL_last_lop = PL_last_uni = Nullch;
3280af22 2562 PL_doextract = FALSE;
a0d0e21e 2563 }
4e553d73 2564 }
463ee0b2 2565 incline(s);
3280af22
NIS
2566 } while (PL_doextract);
2567 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2568 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2569 SV *sv = NEWSV(85,0);
a687059c 2570
93a17b20 2571 sv_upgrade(sv, SVt_PVMG);
3280af22 2572 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2573 (void)SvIOK_on(sv);
2574 SvIVX(sv) = 0;
57843af0 2575 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2576 }
3280af22 2577 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2578 PL_last_lop = PL_last_uni = Nullch;
57843af0 2579 if (CopLINE(PL_curcop) == 1) {
3280af22 2580 while (s < PL_bufend && isSPACE(*s))
79072805 2581 s++;
a0d0e21e 2582 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2583 s++;
44a8e56a 2584 d = Nullch;
3280af22 2585 if (!PL_in_eval) {
44a8e56a 2586 if (*s == '#' && *(s+1) == '!')
2587 d = s + 2;
2588#ifdef ALTERNATE_SHEBANG
2589 else {
2590 static char as[] = ALTERNATE_SHEBANG;
2591 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2592 d = s + (sizeof(as) - 1);
2593 }
2594#endif /* ALTERNATE_SHEBANG */
2595 }
2596 if (d) {
b8378b72 2597 char *ipath;
774d564b 2598 char *ipathend;
b8378b72 2599
774d564b 2600 while (isSPACE(*d))
b8378b72
CS
2601 d++;
2602 ipath = d;
774d564b 2603 while (*d && !isSPACE(*d))
2604 d++;
2605 ipathend = d;
2606
2607#ifdef ARG_ZERO_IS_SCRIPT
2608 if (ipathend > ipath) {
2609 /*
2610 * HP-UX (at least) sets argv[0] to the script name,
2611 * which makes $^X incorrect. And Digital UNIX and Linux,
2612 * at least, set argv[0] to the basename of the Perl
2613 * interpreter. So, having found "#!", we'll set it right.
2614 */
ee2f7564 2615 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2616 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2617 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2618 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2619 SvSETMAGIC(x);
2620 }
556c1dec
JH
2621 else {
2622 STRLEN blen;
2623 STRLEN llen;
2624 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2625 char *lstart = SvPV(x,llen);
2626 if (llen < blen) {
2627 bstart += blen - llen;
2628 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2629 sv_setpvn(x, ipath, ipathend - ipath);
2630 SvSETMAGIC(x);
2631 }
2632 }
2633 }
774d564b 2634 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2635 }
774d564b 2636#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2637
2638 /*
2639 * Look for options.
2640 */
748a9306 2641 d = instr(s,"perl -");
84e30d1a 2642 if (!d) {
748a9306 2643 d = instr(s,"perl");
84e30d1a
GS
2644#if defined(DOSISH)
2645 /* avoid getting into infinite loops when shebang
2646 * line contains "Perl" rather than "perl" */
2647 if (!d) {
2648 for (d = ipathend-4; d >= ipath; --d) {
2649 if ((*d == 'p' || *d == 'P')
2650 && !ibcmp(d, "perl", 4))
2651 {
2652 break;
2653 }
2654 }
2655 if (d < ipath)
2656 d = Nullch;
2657 }
2658#endif
2659 }
44a8e56a 2660#ifdef ALTERNATE_SHEBANG
2661 /*
2662 * If the ALTERNATE_SHEBANG on this system starts with a
2663 * character that can be part of a Perl expression, then if
2664 * we see it but not "perl", we're probably looking at the
2665 * start of Perl code, not a request to hand off to some
2666 * other interpreter. Similarly, if "perl" is there, but
2667 * not in the first 'word' of the line, we assume the line
2668 * contains the start of the Perl program.
44a8e56a 2669 */
2670 if (d && *s != '#') {
774d564b 2671 char *c = ipath;
44a8e56a 2672 while (*c && !strchr("; \t\r\n\f\v#", *c))
2673 c++;
2674 if (c < d)
2675 d = Nullch; /* "perl" not in first word; ignore */
2676 else
2677 *s = '#'; /* Don't try to parse shebang line */
2678 }
774d564b 2679#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2680#ifndef MACOS_TRADITIONAL
748a9306 2681 if (!d &&
44a8e56a 2682 *s == '#' &&
774d564b 2683 ipathend > ipath &&
3280af22 2684 !PL_minus_c &&
748a9306 2685 !instr(s,"indir") &&
3280af22 2686 instr(PL_origargv[0],"perl"))
748a9306 2687 {
9f68db38 2688 char **newargv;
9f68db38 2689
774d564b 2690 *ipathend = '\0';
2691 s = ipathend + 1;
3280af22 2692 while (s < PL_bufend && isSPACE(*s))
9f68db38 2693 s++;
3280af22
NIS
2694 if (s < PL_bufend) {
2695 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2696 newargv[1] = s;
3280af22 2697 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2698 s++;
2699 *s = '\0';
3280af22 2700 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2701 }
2702 else
3280af22 2703 newargv = PL_origargv;
774d564b 2704 newargv[0] = ipath;
b4748376 2705 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2706 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2707 }
bf4acbe4 2708#endif
748a9306 2709 if (d) {
3280af22
NIS
2710 U32 oldpdb = PL_perldb;
2711 bool oldn = PL_minus_n;
2712 bool oldp = PL_minus_p;
748a9306
LW
2713
2714 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2715 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2716
2717 if (*d++ == '-') {
a11ec5a9 2718 bool switches_done = PL_doswitches;
8cc95fdb 2719 do {
2720 if (*d == 'M' || *d == 'm') {
2721 char *m = d;
2722 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2723 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2724 (int)(d - m), m);
2725 }
2726 d = moreswitches(d);
2727 } while (d);
155aba94
GS
2728 if ((PERLDB_LINE && !oldpdb) ||
2729 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2730 /* if we have already added "LINE: while (<>) {",
2731 we must not do it again */
748a9306 2732 {
3280af22
NIS
2733 sv_setpv(PL_linestr, "");
2734 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2735 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2736 PL_last_lop = PL_last_uni = Nullch;
3280af22 2737 PL_preambled = FALSE;
84902520 2738 if (PERLDB_LINE)
3280af22 2739 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2740 goto retry;
2741 }
a11ec5a9
RGS
2742 if (PL_doswitches && !switches_done) {
2743 int argc = PL_origargc;
2744 char **argv = PL_origargv;
2745 do {
2746 argc--,argv++;
2747 } while (argc && argv[0][0] == '-' && argv[0][1]);
2748 init_argv_symbols(argc,argv);
2749 }
a0d0e21e 2750 }
79072805 2751 }
9f68db38 2752 }
79072805 2753 }
3280af22
NIS
2754 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2755 PL_bufptr = s;
2756 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2757 return yylex();
ae986130 2758 }
378cc40b 2759 goto retry;
4fdae800 2760 case '\r':
6a27c188 2761#ifdef PERL_STRICT_CR
cea2e8a9 2762 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2763 Perl_croak(aTHX_
cc507455 2764 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2765#endif
4fdae800 2766 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2767#ifdef MACOS_TRADITIONAL
2768 case '\312':
2769#endif
378cc40b
LW
2770 s++;
2771 goto retry;
378cc40b 2772 case '#':
e929a76b 2773 case '\n':
3280af22 2774 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2775 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2776 /* handle eval qq[#line 1 "foo"\n ...] */
2777 CopLINE_dec(PL_curcop);
2778 incline(s);
2779 }
3280af22 2780 d = PL_bufend;
a687059c 2781 while (s < d && *s != '\n')
378cc40b 2782 s++;
0f85fab0 2783 if (s < d)
378cc40b 2784 s++;
78c267c1 2785 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2786 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2787 incline(s);
3280af22
NIS
2788 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2789 PL_bufptr = s;
2790 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2791 return yylex();
a687059c 2792 }
378cc40b 2793 }
a687059c 2794 else {
378cc40b 2795 *s = '\0';
3280af22 2796 PL_bufend = s;
a687059c 2797 }
378cc40b
LW
2798 goto retry;
2799 case '-':
79072805 2800 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2801 I32 ftst = 0;
2802
378cc40b 2803 s++;
3280af22 2804 PL_bufptr = s;
748a9306
LW
2805 tmp = *s++;
2806
bf4acbe4 2807 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2808 s++;
2809
2810 if (strnEQ(s,"=>",2)) {
3280af22 2811 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2812 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2813 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2814 } );
748a9306
LW
2815 OPERATOR('-'); /* unary minus */
2816 }
3280af22 2817 PL_last_uni = PL_oldbufptr;
748a9306 2818 switch (tmp) {
e5edeb50
JH
2819 case 'r': ftst = OP_FTEREAD; break;
2820 case 'w': ftst = OP_FTEWRITE; break;
2821 case 'x': ftst = OP_FTEEXEC; break;
2822 case 'o': ftst = OP_FTEOWNED; break;
2823 case 'R': ftst = OP_FTRREAD; break;
2824 case 'W': ftst = OP_FTRWRITE; break;
2825 case 'X': ftst = OP_FTREXEC; break;
2826 case 'O': ftst = OP_FTROWNED; break;
2827 case 'e': ftst = OP_FTIS; break;
2828 case 'z': ftst = OP_FTZERO; break;
2829 case 's': ftst = OP_FTSIZE; break;
2830 case 'f': ftst = OP_FTFILE; break;
2831 case 'd': ftst = OP_FTDIR; break;
2832 case 'l': ftst = OP_FTLINK; break;
2833 case 'p': ftst = OP_FTPIPE; break;
2834 case 'S': ftst = OP_FTSOCK; break;
2835 case 'u': ftst = OP_FTSUID; break;
2836 case 'g': ftst = OP_FTSGID; break;
2837 case 'k': ftst = OP_FTSVTX; break;
2838 case 'b': ftst = OP_FTBLK; break;
2839 case 'c': ftst = OP_FTCHR; break;
2840 case 't': ftst = OP_FTTTY; break;
2841 case 'T': ftst = OP_FTTEXT; break;
2842 case 'B': ftst = OP_FTBINARY; break;
2843 case 'M': case 'A': case 'C':
2844 gv_fetchpv("\024",TRUE, SVt_PV);
2845 switch (tmp) {
2846 case 'M': ftst = OP_FTMTIME; break;
2847 case 'A': ftst = OP_FTATIME; break;
2848 case 'C': ftst = OP_FTCTIME; break;
2849 default: break;
2850 }
2851 break;
378cc40b 2852 default:
378cc40b
LW
2853 break;
2854 }
e5edeb50 2855 if (ftst) {
eb160463 2856 PL_last_lop_op = (OPCODE)ftst;
4e553d73 2857 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2858 "### Saw file test %c\n", (int)ftst);
5f80b19c 2859 } );
e5edeb50
JH
2860 FTST(ftst);
2861 }
2862 else {
2863 /* Assume it was a minus followed by a one-letter named
2864 * subroutine call (or a -bareword), then. */
95c31fe3 2865 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2866 "### %c looked like a file test but was not\n",
2867 (int)ftst);
5f80b19c 2868 } );
e5edeb50
JH
2869 s -= 2;
2870 }
378cc40b 2871 }
a687059c
LW
2872 tmp = *s++;
2873 if (*s == tmp) {
2874 s++;
3280af22 2875 if (PL_expect == XOPERATOR)
79072805
LW
2876 TERM(POSTDEC);
2877 else
2878 OPERATOR(PREDEC);
2879 }
2880 else if (*s == '>') {
2881 s++;
2882 s = skipspace(s);
7e2040f0 2883 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2884 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2885 TOKEN(ARROW);
79072805 2886 }
748a9306
LW
2887 else if (*s == '$')
2888 OPERATOR(ARROW);
463ee0b2 2889 else
748a9306 2890 TERM(ARROW);
a687059c 2891 }
3280af22 2892 if (PL_expect == XOPERATOR)
79072805
LW
2893 Aop(OP_SUBTRACT);
2894 else {
3280af22 2895 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2896 check_uni();
79072805 2897 OPERATOR('-'); /* unary minus */
2f3197b3 2898 }
79072805 2899
378cc40b 2900 case '+':
a687059c
LW
2901 tmp = *s++;
2902 if (*s == tmp) {
378cc40b 2903 s++;
3280af22 2904 if (PL_expect == XOPERATOR)
79072805
LW
2905 TERM(POSTINC);
2906 else
2907 OPERATOR(PREINC);
378cc40b 2908 }
3280af22 2909 if (PL_expect == XOPERATOR)
79072805
LW
2910 Aop(OP_ADD);
2911 else {
3280af22 2912 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2913 check_uni();
a687059c 2914 OPERATOR('+');
2f3197b3 2915 }
a687059c 2916
378cc40b 2917 case '*':
3280af22
NIS
2918 if (PL_expect != XOPERATOR) {
2919 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2920 PL_expect = XOPERATOR;
2921 force_ident(PL_tokenbuf, '*');
2922 if (!*PL_tokenbuf)
a0d0e21e 2923 PREREF('*');
79072805 2924 TERM('*');
a687059c 2925 }
79072805
LW
2926 s++;
2927 if (*s == '*') {
a687059c 2928 s++;
79072805 2929 PWop(OP_POW);
a687059c 2930 }
79072805
LW
2931 Mop(OP_MULTIPLY);
2932
378cc40b 2933 case '%':
3280af22 2934 if (PL_expect == XOPERATOR) {
bbce6d69 2935 ++s;
2936 Mop(OP_MODULO);
a687059c 2937 }
3280af22
NIS
2938 PL_tokenbuf[0] = '%';
2939 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2940 if (!PL_tokenbuf[1]) {
2941 if (s == PL_bufend)
bbce6d69 2942 yyerror("Final % should be \\% or %name");
2943 PREREF('%');
a687059c 2944 }
3280af22 2945 PL_pending_ident = '%';
bbce6d69 2946 TERM('%');
a687059c 2947
378cc40b 2948 case '^':
79072805 2949 s++;
a0d0e21e 2950 BOop(OP_BIT_XOR);
79072805 2951 case '[':
3280af22 2952 PL_lex_brackets++;
79072805 2953 /* FALL THROUGH */
378cc40b 2954 case '~':
378cc40b 2955 case ',':
378cc40b
LW
2956 tmp = *s++;
2957 OPERATOR(tmp);
a0d0e21e
LW
2958 case ':':
2959 if (s[1] == ':') {
2960 len = 0;
2961 goto just_a_word;
2962 }
2963 s++;
09bef843
SB
2964 switch (PL_expect) {
2965 OP *attrs;
2966 case XOPERATOR:
2967 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2968 break;
2969 PL_bufptr = s; /* update in case we back off */
2970 goto grabattrs;
2971 case XATTRBLOCK:
2972 PL_expect = XBLOCK;
2973 goto grabattrs;
2974 case XATTRTERM:
2975 PL_expect = XTERMBLOCK;
2976 grabattrs:
2977 s = skipspace(s);
2978 attrs = Nullop;
7e2040f0 2979 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2980 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2981 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2982 if (tmp < 0) tmp = -tmp;
2983 switch (tmp) {
2984 case KEY_or:
2985 case KEY_and:
c963b151 2986 case KEY_err:
f9829d6b
GS
2987 case KEY_for:
2988 case KEY_unless:
2989 case KEY_if:
2990 case KEY_while:
2991 case KEY_until:
2992 goto got_attrs;
2993 default:
2994 break;
2995 }
2996 }
09bef843
SB
2997 if (*d == '(') {
2998 d = scan_str(d,TRUE,TRUE);
2999 if (!d) {
09bef843
SB
3000 /* MUST advance bufptr here to avoid bogus
3001 "at end of line" context messages from yyerror().
3002 */
3003 PL_bufptr = s + len;
3004 yyerror("Unterminated attribute parameter in attribute list");
3005 if (attrs)
3006 op_free(attrs);
3007 return 0; /* EOF indicator */
3008 }
3009 }
3010 if (PL_lex_stuff) {
3011 SV *sv = newSVpvn(s, len);
3012 sv_catsv(sv, PL_lex_stuff);
3013 attrs = append_elem(OP_LIST, attrs,
3014 newSVOP(OP_CONST, 0, sv));
3015 SvREFCNT_dec(PL_lex_stuff);
3016 PL_lex_stuff = Nullsv;
3017 }
3018 else {
d3cea301
SB
3019 /* NOTE: any CV attrs applied here need to be part of
3020 the CVf_BUILTIN_ATTRS define in cv.h! */
78f9721b
SM
3021 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3022 CvLVALUE_on(PL_compcv);
3023 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3024 CvLOCKED_on(PL_compcv);
3025 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3026 CvMETHOD_on(PL_compcv);
87ecf892 3027#ifdef USE_ITHREADS
d3cea301
SB
3028 else if (PL_in_my == KEY_our && len == 6 &&
3029 strnEQ(s, "unique", len))
7fb37951 3030 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
87ecf892 3031#endif
78f9721b
SM
3032 /* After we've set the flags, it could be argued that
3033 we don't need to do the attributes.pm-based setting
3034 process, and shouldn't bother appending recognized
d3cea301
SB
3035 flags. To experiment with that, uncomment the
3036 following "else". (Note that's already been
3037 uncommented. That keeps the above-applied built-in
3038 attributes from being intercepted (and possibly
3039 rejected) by a package's attribute routines, but is
3040 justified by the performance win for the common case
3041 of applying only built-in attributes.) */
0256094b 3042 else
78f9721b
SM
3043 attrs = append_elem(OP_LIST, attrs,
3044 newSVOP(OP_CONST, 0,
3045 newSVpvn(s, len)));
09bef843
SB
3046 }
3047 s = skipspace(d);
0120eecf 3048 if (*s == ':' && s[1] != ':')
09bef843 3049 s = skipspace(s+1);
0120eecf
GS
3050 else if (s == d)
3051 break; /* require real whitespace or :'s */
09bef843 3052 }
f9829d6b 3053 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3054 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3055 char q = ((*s == '\'') ? '"' : '\'');
3056 /* If here for an expression, and parsed no attrs, back off. */
3057 if (tmp == '=' && !attrs) {
3058 s = PL_bufptr;
3059 break;
3060 }
3061 /* MUST advance bufptr here to avoid bogus "at end of line"
3062 context messages from yyerror().
3063 */
3064 PL_bufptr = s;
3065 if (!*s)
3066 yyerror("Unterminated attribute list");
3067 else
3068 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3069 q, *s, q));
3070 if (attrs)
3071 op_free(attrs);
3072 OPERATOR(':');
3073 }
f9829d6b 3074 got_attrs:
09bef843
SB
3075 if (attrs) {
3076 PL_nextval[PL_nexttoke].opval = attrs;
3077 force_next(THING);
3078 }
3079 TOKEN(COLONATTR);
3080 }
a0d0e21e 3081 OPERATOR(':');
8990e307
LW
3082 case '(':
3083 s++;
3280af22
NIS
3084 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3085 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3086 else
3280af22 3087 PL_expect = XTERM;
a0d0e21e 3088 TOKEN('(');
378cc40b 3089 case ';':
f4dd75d9 3090 CLINE;
378cc40b
LW
3091 tmp = *s++;
3092 OPERATOR(tmp);
3093 case ')':
378cc40b 3094 tmp = *s++;
16d20bd9
AD
3095 s = skipspace(s);
3096 if (*s == '{')
3097 PREBLOCK(tmp);
378cc40b 3098 TERM(tmp);
79072805
LW
3099 case ']':
3100 s++;
3280af22 3101 if (PL_lex_brackets <= 0)
d98d5fff 3102 yyerror("Unmatched right square bracket");
463ee0b2 3103 else
3280af22
NIS
3104 --PL_lex_brackets;
3105 if (PL_lex_state == LEX_INTERPNORMAL) {
3106 if (PL_lex_brackets == 0) {
a0d0e21e 3107 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3108 PL_lex_state = LEX_INTERPEND;
79072805
LW
3109 }
3110 }
4633a7c4 3111 TERM(']');
79072805
LW
3112 case '{':
3113 leftbracket:
79072805 3114 s++;
3280af22
NIS
3115 if (PL_lex_brackets > 100) {
3116 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3117 if (newlb != PL_lex_brackstack) {
8990e307 3118 SAVEFREEPV(newlb);
3280af22 3119 PL_lex_brackstack = newlb;
8990e307
LW
3120 }
3121 }
3280af22 3122 switch (PL_expect) {
a0d0e21e 3123 case XTERM:
3280af22 3124 if (PL_lex_formbrack) {
a0d0e21e
LW
3125 s--;
3126 PRETERMBLOCK(DO);
3127 }
3280af22
NIS
3128 if (PL_oldoldbufptr == PL_last_lop)
3129 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3130 else
3280af22 3131 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3132 OPERATOR(HASHBRACK);
a0d0e21e 3133 case XOPERATOR:
bf4acbe4 3134 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3135 s++;
44a8e56a 3136 d = s;
3280af22
NIS
3137 PL_tokenbuf[0] = '\0';
3138 if (d < PL_bufend && *d == '-') {
3139 PL_tokenbuf[0] = '-';
44a8e56a 3140 d++;
bf4acbe4 3141 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3142 d++;
3143 }
7e2040f0 3144 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3145 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3146 FALSE, &len);
bf4acbe4 3147 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3148 d++;
3149 if (*d == '}') {
3280af22 3150 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3151 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3152 if (minus)
3153 force_next('-');
748a9306
LW
3154 }
3155 }
3156 /* FALL THROUGH */
09bef843 3157 case XATTRBLOCK:
748a9306 3158 case XBLOCK:
3280af22
NIS
3159 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3160 PL_expect = XSTATE;
a0d0e21e 3161 break;
09bef843 3162 case XATTRTERM:
a0d0e21e 3163 case XTERMBLOCK:
3280af22
NIS
3164 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3165 PL_expect = XSTATE;
a0d0e21e
LW
3166 break;
3167 default: {
3168 char *t;
3280af22
NIS
3169 if (PL_oldoldbufptr == PL_last_lop)
3170 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3171 else
3280af22 3172 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3173 s = skipspace(s);
8452ff4b
SB
3174 if (*s == '}') {
3175 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3176 PL_expect = XTERM;
3177 /* This hack is to get the ${} in the message. */
3178 PL_bufptr = s+1;
3179 yyerror("syntax error");
3180 break;
3181 }
a0d0e21e 3182 OPERATOR(HASHBRACK);
8452ff4b 3183 }
b8a4b1be
GS
3184 /* This hack serves to disambiguate a pair of curlies
3185 * as being a block or an anon hash. Normally, expectation
3186 * determines that, but in cases where we're not in a
3187 * position to expect anything in particular (like inside
3188 * eval"") we have to resolve the ambiguity. This code
3189 * covers the case where the first term in the curlies is a
3190 * quoted string. Most other cases need to be explicitly
3191 * disambiguated by prepending a `+' before the opening
3192 * curly in order to force resolution as an anon hash.
3193 *
3194 * XXX should probably propagate the outer expectation
3195 * into eval"" to rely less on this hack, but that could
3196 * potentially break current behavior of eval"".
3197 * GSAR 97-07-21
3198 */
3199 t = s;
3200 if (*s == '\'' || *s == '"' || *s == '`') {
3201 /* common case: get past first string, handling escapes */
3280af22 3202 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3203 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3204 t++;
3205 t++;
a0d0e21e 3206 }
b8a4b1be 3207 else if (*s == 'q') {
3280af22 3208 if (++t < PL_bufend
b8a4b1be 3209 && (!isALNUM(*t)
3280af22 3210 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3211 && !isALNUM(*t))))
3212 {
b8a4b1be
GS
3213 char *tmps;
3214 char open, close, term;
3215 I32 brackets = 1;
3216
3280af22 3217 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3218 t++;
3219 term = *t;
3220 open = term;
3221 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3222 term = tmps[5];
3223 close = term;
3224 if (open == close)
3280af22
NIS
3225 for (t++; t < PL_bufend; t++) {
3226 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3227 t++;
6d07e5e9 3228 else if (*t == open)
b8a4b1be
GS
3229 break;
3230 }
3231 else
3280af22
NIS
3232 for (t++; t < PL_bufend; t++) {
3233 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3234 t++;
6d07e5e9 3235 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3236 break;
3237 else if (*t == open)
3238 brackets++;
3239 }
3240 }
3241 t++;
a0d0e21e 3242 }
7e2040f0 3243 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3244 t += UTF8SKIP(t);
7e2040f0 3245 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3246 t += UTF8SKIP(t);
a0d0e21e 3247 }
3280af22 3248 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3249 t++;
b8a4b1be
GS
3250 /* if comma follows first term, call it an anon hash */
3251 /* XXX it could be a comma expression with loop modifiers */
3280af22 3252 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3253 || (*t == '=' && t[1] == '>')))
a0d0e21e 3254 OPERATOR(HASHBRACK);
3280af22 3255 if (PL_expect == XREF)
4e4e412b 3256 PL_expect = XTERM;
a0d0e21e 3257 else {
3280af22
NIS
3258 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3259 PL_expect = XSTATE;
a0d0e21e 3260 }
8990e307 3261 }
a0d0e21e 3262 break;
463ee0b2 3263 }
57843af0 3264 yylval.ival = CopLINE(PL_curcop);
79072805 3265 if (isSPACE(*s) || *s == '#')
3280af22 3266 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3267 TOKEN('{');
378cc40b 3268 case '}':
79072805
LW
3269 rightbracket:
3270 s++;
3280af22 3271 if (PL_lex_brackets <= 0)
d98d5fff 3272 yyerror("Unmatched right curly bracket");
463ee0b2 3273 else
3280af22 3274 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3275 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3276 PL_lex_formbrack = 0;
3277 if (PL_lex_state == LEX_INTERPNORMAL) {
3278 if (PL_lex_brackets == 0) {
9059aa12
LW
3279 if (PL_expect & XFAKEBRACK) {
3280 PL_expect &= XENUMMASK;
3280af22
NIS
3281 PL_lex_state = LEX_INTERPEND;
3282 PL_bufptr = s;
cea2e8a9 3283 return yylex(); /* ignore fake brackets */
79072805 3284 }
fa83b5b6 3285 if (*s == '-' && s[1] == '>')
3280af22 3286 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3287 else if (*s != '[' && *s != '{')
3280af22 3288 PL_lex_state = LEX_INTERPEND;
79072805
LW
3289 }
3290 }
9059aa12
LW
3291 if (PL_expect & XFAKEBRACK) {
3292 PL_expect &= XENUMMASK;
3280af22 3293 PL_bufptr = s;
cea2e8a9 3294 return yylex(); /* ignore fake brackets */
748a9306 3295 }
79072805
LW
3296 force_next('}');
3297 TOKEN(';');
378cc40b
LW
3298 case '&':
3299 s++;
3300 tmp = *s++;
3301 if (tmp == '&')
a0d0e21e 3302 AOPERATOR(ANDAND);
378cc40b 3303 s--;
3280af22 3304 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3305 if (ckWARN(WARN_SEMICOLON)
3306 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3307 {
57843af0 3308 CopLINE_dec(PL_curcop);
9014280d 3309 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3310 CopLINE_inc(PL_curcop);
463ee0b2 3311 }
79072805 3312 BAop(OP_BIT_AND);
463ee0b2 3313 }
79072805 3314
3280af22
NIS
3315 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3316 if (*PL_tokenbuf) {
3317 PL_expect = XOPERATOR;
3318 force_ident(PL_tokenbuf, '&');
463ee0b2 3319 }
79072805
LW
3320 else
3321 PREREF('&');
c07a80fd 3322 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3323 TERM('&');
3324
378cc40b
LW
3325 case '|':
3326 s++;
3327 tmp = *s++;
3328 if (tmp == '|')
a0d0e21e 3329 AOPERATOR(OROR);
378cc40b 3330 s--;
79072805 3331 BOop(OP_BIT_OR);
378cc40b
LW
3332 case '=':
3333 s++;
3334 tmp = *s++;
3335 if (tmp == '=')
79072805
LW
3336 Eop(OP_EQ);
3337 if (tmp == '>')
3338 OPERATOR(',');
378cc40b 3339 if (tmp == '~')
79072805 3340 PMop(OP_MATCH);
599cee73 3341 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3342 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3343 s--;
3280af22
NIS
3344 if (PL_expect == XSTATE && isALPHA(tmp) &&
3345 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3346 {
3280af22
NIS
3347 if (PL_in_eval && !PL_rsfp) {
3348 d = PL_bufend;
a5f75d66
AD
3349 while (s < d) {
3350 if (*s++ == '\n') {
3351 incline(s);
3352 if (strnEQ(s,"=cut",4)) {
3353 s = strchr(s,'\n');
3354 if (s)
3355 s++;
3356 else
3357 s = d;
3358 incline(s);
3359 goto retry;
3360 }
3361 }
3362 }
3363 goto retry;
3364 }
3280af22
NIS
3365 s = PL_bufend;
3366 PL_doextract = TRUE;
a0d0e21e
LW
3367 goto retry;
3368 }
3280af22 3369 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3370 char *t;
51882d45 3371#ifdef PERL_STRICT_CR
bf4acbe4 3372 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3373#else
bf4acbe4 3374 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3375#endif
a0d0e21e
LW
3376 if (*t == '\n' || *t == '#') {
3377 s--;
3280af22 3378 PL_expect = XBLOCK;
a0d0e21e
LW
3379 goto leftbracket;
3380 }
79072805 3381 }
a0d0e21e
LW
3382 yylval.ival = 0;
3383 OPERATOR(ASSIGNOP);
378cc40b
LW
3384 case '!':
3385 s++;
3386 tmp = *s++;
3387 if (tmp == '=')
79072805 3388 Eop(OP_NE);
378cc40b 3389 if (tmp == '~')
79072805 3390 PMop(OP_NOT);
378cc40b
LW
3391 s--;
3392 OPERATOR('!');
3393 case '<':
3280af22 3394 if (PL_expect != XOPERATOR) {
93a17b20 3395 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3396 check_uni();
79072805
LW
3397 if (s[1] == '<')
3398 s = scan_heredoc(s);
3399 else
3400 s = scan_inputsymbol(s);
3401 TERM(sublex_start());
378cc40b
LW
3402 }
3403 s++;
3404 tmp = *s++;
3405 if (tmp == '<')
79072805 3406 SHop(OP_LEFT_SHIFT);
395c3793
LW
3407 if (tmp == '=') {
3408 tmp = *s++;
3409 if (tmp == '>')
79072805 3410 Eop(OP_NCMP);
395c3793 3411 s--;
79072805 3412 Rop(OP_LE);
395c3793 3413 }
378cc40b 3414 s--;
79072805 3415 Rop(OP_LT);
378cc40b
LW
3416 case '>':
3417 s++;
3418 tmp = *s++;
3419 if (tmp == '>')
79072805 3420 SHop(OP_RIGHT_SHIFT);
378cc40b 3421 if (tmp == '=')
79072805 3422 Rop(OP_GE);
378cc40b 3423 s--;
79072805 3424 Rop(OP_GT);
378cc40b
LW
3425
3426 case '$':
bbce6d69 3427 CLINE;
3428
3280af22
NIS
3429 if (PL_expect == XOPERATOR) {
3430 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3431 PL_expect = XTERM;
a0d0e21e 3432 depcom();
bbce6d69 3433 return ','; /* grandfather non-comma-format format */
a0d0e21e 3434 }
8990e307 3435 }
a0d0e21e 3436
7e2040f0 3437 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3438 PL_tokenbuf[0] = '@';
376b8730
SM
3439 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3440 sizeof PL_tokenbuf - 1, FALSE);
3441 if (PL_expect == XOPERATOR)
3442 no_op("Array length", s);
3280af22 3443 if (!PL_tokenbuf[1])
a0d0e21e 3444 PREREF(DOLSHARP);
3280af22
NIS
3445 PL_expect = XOPERATOR;
3446 PL_pending_ident = '#';
463ee0b2 3447 TOKEN(DOLSHARP);
79072805 3448 }
bbce6d69 3449
3280af22 3450 PL_tokenbuf[0] = '$';
376b8730
SM
3451 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3452 sizeof PL_tokenbuf - 1, FALSE);
3453 if (PL_expect == XOPERATOR)
3454 no_op("Scalar", s);
3280af22
NIS
3455 if (!PL_tokenbuf[1]) {
3456 if (s == PL_bufend)
bbce6d69 3457 yyerror("Final $ should be \\$ or $name");
3458 PREREF('$');
8990e307 3459 }
a0d0e21e 3460
bbce6d69 3461 /* This kludge not intended to be bulletproof. */
3280af22 3462 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3463 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3464 newSViv(PL_compiling.cop_arybase));
bbce6d69 3465 yylval.opval->op_private = OPpCONST_ARYBASE;
3466 TERM(THING);
3467 }
3468
ff68c719 3469 d = s;
69d2bceb 3470 tmp = (I32)*s;
3280af22 3471 if (PL_lex_state == LEX_NORMAL)
ff68c719 3472 s = skipspace(s);
3473
3280af22 3474 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3475 char *t;
3476 if (*s == '[') {
3280af22 3477 PL_tokenbuf[0] = '@';
599cee73 3478 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3479 for(t = s + 1;
7e2040f0 3480 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3481 t++) ;
a0d0e21e 3482 if (*t++ == ',') {
3280af22
NIS
3483 PL_bufptr = skipspace(PL_bufptr);
3484 while (t < PL_bufend && *t != ']')
bbce6d69 3485 t++;
9014280d 3486 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73
PM
3487 "Multidimensional syntax %.*s not supported",
3488 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3489 }
3490 }
bbce6d69 3491 }
3492 else if (*s == '{') {
3280af22 3493 PL_tokenbuf[0] = '%';
599cee73 3494 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3495 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3496 {
3280af22 3497 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3498 STRLEN len;
3499 for (t++; isSPACE(*t); t++) ;
7e2040f0 3500 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3501 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3502 for (; isSPACE(*t); t++) ;
864dbfa3 3503 if (*t == ';' && get_cv(tmpbuf, FALSE))
9014280d 3504 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 3505 "You need to quote \"%s\"", tmpbuf);
748a9306 3506 }
93a17b20
LW
3507 }
3508 }
2f3197b3 3509 }
bbce6d69 3510
3280af22 3511 PL_expect = XOPERATOR;
69d2bceb 3512 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3513 bool islop = (PL_last_lop == PL_oldoldbufptr);
3514 if (!islop || PL_last_lop_op == OP_GREPSTART)
3515 PL_expect = XOPERATOR;
bbce6d69 3516 else if (strchr("$@\"'`q", *s))
3280af22 3517 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3518 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3519 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3520 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3521 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3522 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3523 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3524 /* binary operators exclude handle interpretations */
3525 switch (tmp) {
3526 case -KEY_x:
3527 case -KEY_eq:
3528 case -KEY_ne:
3529 case -KEY_gt:
3530 case -KEY_lt:
3531 case -KEY_ge:
3532 case -KEY_le:
3533 case -KEY_cmp:
3534 break;
3535 default:
3280af22 3536 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3537 break;
3538 }
3539 }
68dc0745 3540 else {
3541 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3542 if (gv && GvCVu(gv))
3280af22 3543 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3544 }
93a17b20 3545 }
bbce6d69 3546 else if (isDIGIT(*s))
3280af22 3547 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3548 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3549 PL_expect = XTERM; /* e.g. print $fh .3 */
c963b151
BD
3550 else if (strchr("?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3551 PL_expect = XTERM; /* e.g. print $fh -1 */
7ce6e6b9
RGS
3552 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
3553 PL_expect = XTERM; /* e.g. print $fh /.../
3554 XXX except DORDOR operator */
e0587a03 3555 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3556 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3557 }
3280af22 3558 PL_pending_ident = '$';
79072805 3559 TOKEN('$');
378cc40b
LW
3560
3561 case '@':
3280af22 3562 if (PL_expect == XOPERATOR)
bbce6d69 3563 no_op("Array", s);
3280af22
NIS
3564 PL_tokenbuf[0] = '@';
3565 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3566 if (!PL_tokenbuf[1]) {
3567 if (s == PL_bufend)
bbce6d69 3568 yyerror("Final @ should be \\@ or @name");
3569 PREREF('@');
3570 }
3280af22 3571 if (PL_lex_state == LEX_NORMAL)
ff68c719 3572 s = skipspace(s);
3280af22 3573 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3574 if (*s == '{')
3280af22 3575 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3576
3577 /* Warn about @ where they meant $. */
599cee73 3578 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3579 if (*s == '[' || *s == '{') {
3580 char *t = s + 1;
7e2040f0 3581 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3582 t++;
3583 if (*t == '}' || *t == ']') {
3584 t++;
3280af22 3585 PL_bufptr = skipspace(PL_bufptr);
9014280d 3586 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 3587 "Scalar value %.*s better written as $%.*s",
3280af22 3588 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3589 }
93a17b20
LW
3590 }
3591 }
463ee0b2 3592 }
3280af22 3593 PL_pending_ident = '@';
79072805 3594 TERM('@');
378cc40b 3595
c963b151 3596 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
3597 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3598 s += 2;
3599 AOPERATOR(DORDOR);
3600 }
c963b151
BD
3601 case '?': /* may either be conditional or pattern */
3602 if(PL_expect == XOPERATOR) {
3603 tmp = *s++;
3604 if(tmp == '?') {
3605 OPERATOR('?');
3606 }
3607 else {
3608 tmp = *s++;
3609 if(tmp == '/') {
3610 /* A // operator. */
3611 AOPERATOR(DORDOR);
3612 }
3613 else {
3614 s--;
3615 Mop(OP_DIVIDE);
3616 }
3617 }
3618 }
3619 else {
3620 /* Disable warning on "study /blah/" */
3621 if (PL_oldoldbufptr == PL_last_uni
3622 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3623 || memNE(PL_last_uni, "study", 5)
3624 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3625 ))
3626 check_uni();
3627 s = scan_pat(s,OP_MATCH);
3628 TERM(sublex_start());
3629 }
378cc40b
LW
3630
3631 case '.':
51882d45
GS
3632 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3633#ifdef PERL_STRICT_CR
3634 && s[1] == '\n'
3635#else
3636 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3637#endif
3638 && (s == PL_linestart || s[-1] == '\n') )
3639 {
3280af22
NIS
3640 PL_lex_formbrack = 0;
3641 PL_expect = XSTATE;
79072805
LW
3642 goto rightbracket;
3643 }
3280af22 3644 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3645 tmp = *s++;
a687059c
LW
3646 if (*s == tmp) {
3647 s++;
2f3197b3
LW
3648 if (*s == tmp) {
3649 s++;
79072805 3650 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3651 }
3652 else
79072805 3653 yylval.ival = 0;
378cc40b 3654 OPERATOR(DOTDOT);
a687059c 3655 }
3280af22 3656 if (PL_expect != XOPERATOR)
2f3197b3 3657 check_uni();
79072805 3658 Aop(OP_CONCAT);
378cc40b
LW
3659 }
3660 /* FALL THROUGH */
3661 case '0': case '1': case '2': case '3': case '4':
3662 case '5': case '6': case '7': case '8': case '9':
b73d6f50 3663 s = scan_num(s, &yylval);
4e553d73 3664 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3665 "### Saw number in '%s'\n", s);
5f80b19c 3666 } );
3280af22 3667 if (PL_expect == XOPERATOR)
8990e307 3668 no_op("Number",s);
79072805
LW
3669 TERM(THING);
3670
3671 case '\'':
09bef843 3672 s = scan_str(s,FALSE,FALSE);
4e553d73 3673 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3674 "### Saw string before '%s'\n", s);
5f80b19c 3675 } );
3280af22
NIS
3676 if (PL_expect == XOPERATOR) {
3677 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3678 PL_expect = XTERM;
a0d0e21e
LW
3679 depcom();
3680 return ','; /* grandfather non-comma-format format */
3681 }
463ee0b2 3682 else
8990e307 3683 no_op("String",s);
463ee0b2 3684 }
79072805 3685 if (!s)
85e6fe83 3686 missingterm((char*)0);
79072805
LW
3687 yylval.ival = OP_CONST;
3688 TERM(sublex_start());
3689
3690 case '"':
09bef843 3691 s = scan_str(s,FALSE,FALSE);
4e553d73 3692 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3693 "### Saw string before '%s'\n", s);
5f80b19c 3694 } );
3280af22
NIS
3695 if (PL_expect == XOPERATOR) {
3696 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3697 PL_expect = XTERM;
a0d0e21e
LW
3698 depcom();
3699 return ','; /* grandfather non-comma-format format */
3700 }
463ee0b2 3701 else
8990e307 3702 no_op("String",s);
463ee0b2 3703 }
79072805 3704 if (!s)
85e6fe83 3705 missingterm((char*)0);
4633a7c4 3706 yylval.ival = OP_CONST;
3280af22 3707 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 3708 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4633a7c4
LW
3709 yylval.ival = OP_STRINGIFY;
3710 break;
3711 }
3712 }
79072805
LW
3713 TERM(sublex_start());
3714
3715 case '`':
09bef843 3716 s = scan_str(s,FALSE,FALSE);
4e553d73 3717 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3718 "### Saw backtick string before '%s'\n", s);
5f80b19c 3719 } );
3280af22 3720 if (PL_expect == XOPERATOR)
8990e307 3721 no_op("Backticks",s);
79072805 3722 if (!s)
85e6fe83 3723 missingterm((char*)0);
79072805
LW
3724 yylval.ival = OP_BACKTICK;
3725 set_csh();
3726 TERM(sublex_start());
3727
3728 case '\\':
3729 s++;
599cee73 3730 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
9014280d 3731 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
599cee73 3732 *s, *s);
3280af22 3733 if (PL_expect == XOPERATOR)
8990e307 3734 no_op("Backslash",s);
79072805
LW
3735 OPERATOR(REFGEN);
3736
a7cb1f99 3737 case 'v':
e526c9e6 3738 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
a7cb1f99
GS
3739 char *start = s;
3740 start++;
3741 start++;
dd629d5b 3742 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
3743 start++;
3744 if (*start == '.' && isDIGIT(start[1])) {
b73d6f50 3745 s = scan_num(s, &yylval);
a7cb1f99
GS
3746 TERM(THING);
3747 }
e526c9e6 3748 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
3749 else if (!isALPHA(*start) && (PL_expect == XTERM
3750 || PL_expect == XREF || PL_expect == XSTATE
3751 || PL_expect == XTERMORDORDOR)) {
e526c9e6
GS
3752 char c = *start;
3753 GV *gv;
3754 *start = '\0';
3755 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3756 *start = c;
3757 if (!gv) {
b73d6f50 3758 s = scan_num(s, &yylval);
e526c9e6
GS
3759 TERM(THING);
3760 }
3761 }
a7cb1f99
GS
3762 }
3763 goto keylookup;
79072805 3764 case 'x':
3280af22 3765 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3766 s++;
3767 Mop(OP_REPEAT);
2f3197b3 3768 }
79072805
LW
3769 goto keylookup;
3770
378cc40b 3771 case '_':
79072805
LW
3772 case 'a': case 'A':
3773 case 'b': case 'B':
3774 case 'c': case 'C':
3775 case 'd': case 'D':
3776 case 'e': case 'E':
3777 case 'f': case 'F':
3778 case 'g': case 'G':
3779 case 'h': case 'H':
3780 case 'i': case 'I':
3781 case 'j': case 'J':
3782 case 'k': case 'K':
3783 case 'l': case 'L':
3784 case 'm': case 'M':
3785 case 'n': case 'N':
3786 case 'o': case 'O':
3787 case 'p': case 'P':
3788 case 'q': case 'Q':
3789 case 'r': case 'R':
3790 case 's': case 'S':
3791 case 't': case 'T':
3792 case 'u': case 'U':
a7cb1f99 3793 case 'V':
79072805
LW
3794 case 'w': case 'W':
3795 case 'X':
3796 case 'y': case 'Y':
3797 case 'z': case 'Z':
3798
49dc05e3 3799 keylookup: {
30fe34ed 3800 I32 orig_keyword = 0;
161b471a
NIS
3801 gv = Nullgv;
3802 gvp = 0;
49dc05e3 3803
3280af22
NIS
3804 PL_bufptr = s;
3805 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3806
3807 /* Some keywords can be followed by any delimiter, including ':' */
155aba94
GS
3808 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3809 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3810 (PL_tokenbuf[0] == 'q' &&
3811 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 3812
3813 /* x::* is just a word, unless x is "CORE" */
3280af22 3814 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3815 goto just_a_word;
3816
3643fb5f 3817 d = s;
3280af22 3818 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3819 d++; /* no comments skipped here, or s### is misparsed */
3820
3821 /* Is this a label? */
3280af22
NIS
3822 if (!tmp && PL_expect == XSTATE
3823 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3824 s = d + 1;
3280af22 3825 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3826 CLINE;
3827 TOKEN(LABEL);
3643fb5f
CS
3828 }
3829
3830 /* Check for keywords */
3280af22 3831 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3832
3833 /* Is this a word before a => operator? */
1c3923b3 3834 if (*d == '=' && d[1] == '>') {
748a9306 3835 CLINE;
3280af22 3836 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306 3837 yylval.opval->op_private = OPpCONST_BARE;
0064a8a9 3838 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7948272d 3839 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
748a9306
LW
3840 TERM(WORD);
3841 }
3842
a0d0e21e 3843 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3844 GV *ogv = Nullgv; /* override (winner) */
3845 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3846 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3847 CV *cv;
3280af22 3848 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3849 (cv = GvCVu(gv)))
3850 {
3851 if (GvIMPORTED_CV(gv))
3852 ogv = gv;
3853 else if (! CvMETHOD(cv))
3854 hgv = gv;
3855 }
3856 if (!ogv &&
3280af22
NIS
3857 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3858 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3859 GvCVu(gv) && GvIMPORTED_CV(gv))
3860 {
3861 ogv = gv;
3862 }
3863 }
3864 if (ogv) {
30fe34ed 3865 orig_keyword = tmp;
56f7f34b 3866 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3867 }
3868 else if (gv && !gvp
3869 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3870 && GvCVu(gv)
3280af22 3871 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3872 {
3873 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3874 }
56f7f34b
CS
3875 else { /* no override */
3876 tmp = -tmp;
ac206dc8 3877 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
9014280d 3878 Perl_warner(aTHX_ packWARN(WARN_MISC),
ac206dc8
RGS
3879 "dump() better written as CORE::dump()");
3880 }
56f7f34b
CS
3881 gv = Nullgv;
3882 gvp = 0;
4944e2f7
GS
3883 if (ckWARN(WARN_AMBIGUOUS) && hgv
3884 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
9014280d 3885 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 3886 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3887 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3888 }
a0d0e21e
LW
3889 }
3890
3891 reserved_word:
3892 switch (tmp) {
79072805
LW
3893
3894 default: /* not a keyword */
93a17b20 3895 just_a_word: {
96e4d5b1 3896 SV *sv;
ce29ac45 3897 int pkgname = 0;
3280af22 3898 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3899
3900 /* Get the rest if it looks like a package qualifier */
3901
155aba94 3902 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 3903 STRLEN morelen;
3280af22 3904 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3905 TRUE, &morelen);
3906 if (!morelen)
cea2e8a9 3907 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3908 *s == '\'' ? "'" : "::");
c3e0f903 3909 len += morelen;
ce29ac45 3910 pkgname = 1;
a0d0e21e 3911 }
8990e307 3912
3280af22
NIS
3913 if (PL_expect == XOPERATOR) {
3914 if (PL_bufptr == PL_linestart) {
57843af0 3915 CopLINE_dec(PL_curcop);
9014280d 3916 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3917 CopLINE_inc(PL_curcop);
463ee0b2
LW
3918 }
3919 else
54310121 3920 no_op("Bareword",s);
463ee0b2 3921 }
8990e307 3922
c3e0f903
GS
3923 /* Look for a subroutine with this name in current package,
3924 unless name is "Foo::", in which case Foo is a bearword
3925 (and a package name). */
3926
3927 if (len > 2 &&
3280af22 3928 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3929 {
e476b1b5 3930 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
9014280d 3931 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 3932 "Bareword \"%s\" refers to nonexistent package",
3280af22 3933 PL_tokenbuf);
c3e0f903 3934 len -= 2;
3280af22 3935 PL_tokenbuf[len] = '\0';
c3e0f903
GS
3936 gv = Nullgv;
3937 gvp = 0;
3938 }
3939 else {
3940 len = 0;
3941 if (!gv)
3280af22 3942 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
c3e0f903
GS
3943 }
3944
3945 /* if we saw a global override before, get the right name */
8990e307 3946
49dc05e3 3947 if (gvp) {
79cb57f6 3948 sv = newSVpvn("CORE::GLOBAL::",14);
3280af22 3949 sv_catpv(sv,PL_tokenbuf);
49dc05e3
GS
3950 }
3951 else
3280af22 3952 sv = newSVpv(PL_tokenbuf,0);
8990e307 3953
a0d0e21e
LW
3954 /* Presume this is going to be a bareword of some sort. */
3955
3956 CLINE;
49dc05e3 3957 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
a0d0e21e 3958 yylval.opval->op_private = OPpCONST_BARE;
8f8cf39c
JH
3959 /* UTF-8 package name? */
3960 if (UTF && !IN_BYTES &&
3961 is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
3962 SvUTF8_on(sv);
a0d0e21e 3963
c3e0f903
GS
3964 /* And if "Foo::", then that's what it certainly is. */
3965
3966 if (len)
3967 goto safe_bareword;
3968
8990e307
LW
3969 /* See if it's the indirect object for a list operator. */
3970
3280af22
NIS
3971 if (PL_oldoldbufptr &&
3972 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
3973 (PL_oldoldbufptr == PL_last_lop
3974 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 3975 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
3976 (PL_expect == XREF ||
3977 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 3978 {
748a9306
LW
3979 bool immediate_paren = *s == '(';
3980
a0d0e21e
LW
3981 /* (Now we can afford to cross potential line boundary.) */
3982 s = skipspace(s);
3983
3984 /* Two barewords in a row may indicate method call. */
3985
7e2040f0 3986 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
a0d0e21e
LW
3987 return tmp;
3988
3989 /* If not a declared subroutine, it's an indirect object. */
3990 /* (But it's an indir obj regardless for sort.) */
3991
7948272d 3992 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f0670693 3993 ((!gv || !GvCVu(gv)) &&
a9ef352a 3994 (PL_last_lop_op != OP_MAPSTART &&
f0670693 3995 PL_last_lop_op != OP_GREPSTART))))
a9ef352a 3996 {
3280af22 3997 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 3998 goto bareword;
93a17b20
LW
3999 }
4000 }
8990e307 4001
3280af22 4002 PL_expect = XOPERATOR;
8990e307 4003 s = skipspace(s);
1c3923b3
GS
4004
4005 /* Is this a word before a => operator? */
ce29ac45 4006 if (*s == '=' && s[1] == '>' && !pkgname) {
1c3923b3
GS
4007 CLINE;
4008 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 4009 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7948272d 4010 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
1c3923b3
GS
4011 TERM(WORD);
4012 }
4013
4014 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 4015 if (*s == '(') {
79072805 4016 CLINE;
96e4d5b1 4017 if (gv && GvCVu(gv)) {
bf4acbe4 4018 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
7a52d87a 4019 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
96e4d5b1 4020 s = d + 1;
4021 goto its_constant;
4022 }
4023 }
3280af22
NIS
4024 PL_nextval[PL_nexttoke].opval = yylval.opval;
4025 PL_expect = XOPERATOR;
93a17b20 4026 force_next(WORD);
c07a80fd 4027 yylval.ival = 0;
463ee0b2 4028 TOKEN('&');
79072805 4029 }
93a17b20 4030
a0d0e21e 4031 /* If followed by var or block, call it a method (unless sub) */
8990e307 4032
8ebc5c01 4033 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
3280af22
NIS
4034 PL_last_lop = PL_oldbufptr;
4035 PL_last_lop_op = OP_METHOD;
93a17b20 4036 PREBLOCK(METHOD);
463ee0b2
LW
4037 }
4038
8990e307
LW
4039 /* If followed by a bareword, see if it looks like indir obj. */
4040
30fe34ed
RGS
4041 if (!orig_keyword
4042 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4043 && (tmp = intuit_method(s,gv)))
a0d0e21e 4044 return tmp;
93a17b20 4045
8990e307
LW
4046 /* Not a method, so call it a subroutine (if defined) */
4047
8ebc5c01 4048 if (gv && GvCVu(gv)) {
46fc3d4c 4049 CV* cv;
0453d815 4050 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
9014280d 4051 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
0453d815 4052 "Ambiguous use of -%s resolved as -&%s()",
3280af22 4053 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 4054 /* Check for a constant sub */
46fc3d4c 4055 cv = GvCV(gv);
96e4d5b1 4056 if ((sv = cv_const_sv(cv))) {
4057 its_constant:
4058 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4059 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4060 yylval.opval->op_private = 0;
4061 TOKEN(WORD);
89bfa8cd 4062 }
4063
a5f75d66
AD
4064 /* Resolve to GV now. */
4065 op_free(yylval.opval);
4066 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
9675f7ac 4067 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 4068 PL_last_lop = PL_oldbufptr;
bf848113 4069 PL_last_lop_op = OP_ENTERSUB;
4633a7c4
LW
4070 /* Is there a prototype? */
4071 if (SvPOK(cv)) {
4072 STRLEN len;
7a52d87a 4073 char *proto = SvPV((SV*)cv, len);
4633a7c4
LW
4074 if (!len)
4075 TERM(FUNC0SUB);
7a52d87a 4076 if (strEQ(proto, "$"))
4633a7c4 4077 OPERATOR(UNIOPSUB);
7a52d87a 4078 if (*proto == '&' && *s == '{') {
c99da370
JH
4079 sv_setpv(PL_subname, PL_curstash ?
4080 "__ANON__" : "__ANON__::__ANON__");
4633a7c4
LW
4081 PREBLOCK(LSTOPSUB);
4082 }
a9ef352a 4083 }
3280af22
NIS
4084 PL_nextval[PL_nexttoke].opval = yylval.opval;
4085 PL_expect = XTERM;
8990e307
LW
4086 force_next(WORD);
4087 TOKEN(NOAMP);
4088 }
748a9306 4089
8990e307
LW
4090 /* Call it a bare word */
4091
5603f27d
GS
4092 if (PL_hints & HINT_STRICT_SUBS)
4093 yylval.opval->op_private |= OPpCONST_STRICT;
4094 else {
4095 bareword:
4096 if (ckWARN(WARN_RESERVED)) {
4097 if (lastchar != '-') {
4098 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
238ae712 4099 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
9014280d 4100 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
4101 PL_tokenbuf);
4102 }
748a9306
LW
4103 }
4104 }
c3e0f903
GS
4105
4106 safe_bareword:
f248d071 4107 if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
9014280d 4108 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
0453d815 4109 "Operator or semicolon missing before %c%s",
3280af22 4110 lastchar, PL_tokenbuf);
9014280d 4111 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
0453d815 4112 "Ambiguous use of %c resolved as operator %c",
748a9306
LW
4113 lastchar, lastchar);
4114 }
93a17b20 4115 TOKEN(WORD);
79072805 4116 }
79072805 4117
68dc0745 4118 case KEY___FILE__:
46fc3d4c 4119 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 4120 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 4121 TERM(THING);
4122
79072805 4123 case KEY___LINE__:
cf2093f6 4124 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 4125 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 4126 TERM(THING);
68dc0745 4127
4128 case KEY___PACKAGE__:
4129 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22
NIS
4130 (PL_curstash
4131 ? newSVsv(PL_curstname)
4132 : &PL_sv_undef));
79072805 4133 TERM(THING);
79072805 4134
e50aee73 4135 case KEY___DATA__:
79072805
LW
4136 case KEY___END__: {
4137 GV *gv;
79072805
LW
4138
4139 /*SUPPRESS 560*/
3280af22 4140 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
e50aee73 4141 char *pname = "main";
3280af22
NIS
4142 if (PL_tokenbuf[2] == 'D')
4143 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
cea2e8a9 4144 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
a5f75d66 4145 GvMULTI_on(gv);
79072805 4146 if (!GvIO(gv))
a0d0e21e 4147 GvIOp(gv) = newIO();
3280af22 4148 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
4149#if defined(HAS_FCNTL) && defined(F_SETFD)
4150 {
3280af22 4151 int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
4152 fcntl(fd,F_SETFD,fd >= 3);
4153 }
79072805 4154#endif
fd049845 4155 /* Mark this internal pseudo-handle as clean */
4156 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
3280af22 4157 if (PL_preprocess)
50952442 4158 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
3280af22 4159 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 4160 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 4161 else
50952442 4162 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
4163#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4164 /* if the script was opened in binmode, we need to revert
53129d29 4165 * it to text mode for compatibility; but only iff it has CRs
c39cd008 4166 * XXX this is a questionable hack at best. */
53129d29
GS
4167 if (PL_bufend-PL_bufptr > 2
4168 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
4169 {
4170 Off_t loc = 0;
50952442 4171 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
4172 loc = PerlIO_tell(PL_rsfp);
4173 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4174 }
2986a63f
JH
4175#ifdef NETWARE
4176 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4177#else
c39cd008 4178 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 4179#endif /* NETWARE */
1143fce0
JH
4180#ifdef PERLIO_IS_STDIO /* really? */
4181# if defined(__BORLANDC__)
cb359b41
JH
4182 /* XXX see note in do_binmode() */
4183 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
4184# endif
4185#endif
c39cd008
GS
4186 if (loc > 0)
4187 PerlIO_seek(PL_rsfp, loc, 0);
4188 }
4189 }
4190#endif
7948272d 4191#ifdef PERLIO_LAYERS
0064a8a9 4192 if (UTF && !IN_BYTES)
7948272d
NIS
4193 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4194#endif
3280af22 4195 PL_rsfp = Nullfp;
79072805
LW
4196 }
4197 goto fake_eof;
e929a76b 4198 }
de3bb511 4199
8990e307 4200 case KEY_AUTOLOAD:
ed6116ce 4201 case KEY_DESTROY:
79072805 4202 case KEY_BEGIN:
7d30b5c4 4203 case KEY_CHECK:
7d07dbc2 4204 case KEY_INIT:
7d30b5c4 4205 case KEY_END:
3280af22
NIS
4206 if (PL_expect == XSTATE) {
4207 s = PL_bufptr;
93a17b20 4208 goto really_sub;
79072805
LW
4209 }
4210 goto just_a_word;
4211
a0d0e21e
LW
4212 case KEY_CORE:
4213 if (*s == ':' && s[1] == ':') {
4214 s += 2;
748a9306 4215 d = s;
3280af22 4216 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6798c92b
GS
4217 if (!(tmp = keyword(PL_tokenbuf, len)))
4218 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
4219 if (tmp < 0)
4220 tmp = -tmp;
4221 goto reserved_word;
4222 }
4223 goto just_a_word;
4224
463ee0b2
LW
4225 case KEY_abs:
4226 UNI(OP_ABS);
4227
79072805
LW
4228 case KEY_alarm:
4229 UNI(OP_ALARM);
4230
4231 case KEY_accept:
a0d0e21e 4232 LOP(OP_ACCEPT,XTERM);
79072805 4233
463ee0b2
LW
4234 case KEY_and:
4235 OPERATOR(ANDOP);
4236
79072805 4237 case KEY_atan2:
a0d0e21e 4238 LOP(OP_ATAN2,XTERM);
85e6fe83 4239
79072805 4240 case KEY_bind:
a0d0e21e 4241 LOP(OP_BIND,XTERM);
79072805
LW
4242
4243 case KEY_binmode:
1c1fc3ea 4244 LOP(OP_BINMODE,XTERM);
79072805
LW
4245
4246 case KEY_bless:
a0d0e21e 4247 LOP(OP_BLESS,XTERM);
79072805
LW
4248
4249 case KEY_chop:
4250 UNI(OP_CHOP);
4251
4252 case KEY_continue:
4253 PREBLOCK(CONTINUE);
4254
4255 case KEY_chdir:
85e6fe83 4256 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
79072805
LW
4257 UNI(OP_CHDIR);
4258
4259 case KEY_close:
4260 UNI(OP_CLOSE);
4261
4262 case KEY_closedir:
4263 UNI(OP_CLOSEDIR);
4264
4265 case KEY_cmp:
4266 Eop(OP_SCMP);
4267
4268 case KEY_caller:
4269 UNI(OP_CALLER);
4270
4271 case KEY_crypt:
4272#ifdef FCRYPT
f4c556ac
GS
4273 if (!PL_cryptseen) {
4274 PL_cryptseen = TRUE;
de3bb511 4275 init_des();
f4c556ac 4276 }
a687059c 4277#endif
a0d0e21e 4278 LOP(OP_CRYPT,XTERM);
79072805
LW
4279
4280 case KEY_chmod:
a0d0e21e 4281 LOP(OP_CHMOD,XTERM);
79072805
LW
4282
4283 case KEY_chown:
a0d0e21e 4284 LOP(OP_CHOWN,XTERM);
79072805
LW
4285
4286 case KEY_connect:
a0d0e21e 4287 LOP(OP_CONNECT,XTERM);
79072805 4288
463ee0b2
LW
4289 case KEY_chr:
4290 UNI(OP_CHR);
4291
79072805
LW
4292 case KEY_cos:
4293 UNI(OP_COS);
4294
4295 case KEY_chroot:
4296 UNI(OP_CHROOT);
4297
4298 case KEY_do:
4299 s = skipspace(s);
4300 if (*s == '{')
a0d0e21e 4301 PRETERMBLOCK(DO);
79072805 4302 if (*s != '\'')
89c5585f 4303 s = force_word(s,WORD,TRUE,TRUE,FALSE);
378cc40b 4304 OPERATOR(DO);
79072805
LW
4305
4306 case KEY_die:
3280af22 4307 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 4308 LOP(OP_DIE,XTERM);
79072805
LW
4309
4310 case KEY_defined:
4311 UNI(OP_DEFINED);
4312
4313 case KEY_delete:
a0d0e21e 4314 UNI(OP_DELETE);
79072805
LW
4315
4316 case KEY_dbmopen:
a0d0e21e
LW
4317 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4318 LOP(OP_DBMOPEN,XTERM);
79072805
LW
4319
4320 case KEY_dbmclose:
4321 UNI(OP_DBMCLOSE);
4322
4323 case KEY_dump:
a0d0e21e 4324 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4325 LOOPX(OP_DUMP);
4326
4327 case KEY_else:
4328 PREBLOCK(ELSE);
4329
4330 case KEY_elsif:
57843af0 4331 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4332 OPERATOR(ELSIF);
4333
4334 case KEY_eq:
4335 Eop(OP_SEQ);
4336
a0d0e21e
LW
4337 case KEY_exists:
4338 UNI(OP_EXISTS);
4e553d73 4339
79072805
LW
4340 case KEY_exit:
4341 UNI(OP_EXIT);
4342
4343 case KEY_eval:
79072805 4344 s = skipspace(s);
3280af22 4345 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
463ee0b2 4346 UNIBRACK(OP_ENTEREVAL);
79072805
LW
4347
4348 case KEY_eof:
4349 UNI(OP_EOF);
4350
c963b151
BD
4351 case KEY_err:
4352 OPERATOR(DOROP);
4353
79072805
LW
4354 case KEY_exp:
4355 UNI(OP_EXP);
4356
4357 case KEY_each:
4358 UNI(OP_EACH);
4359
4360 case KEY_exec:
4361 set_csh();
a0d0e21e 4362 LOP(OP_EXEC,XREF);
79072805
LW
4363
4364 case KEY_endhostent:
4365 FUN0(OP_EHOSTENT);
4366
4367 case KEY_endnetent:
4368 FUN0(OP_ENETENT);
4369
4370 case KEY_endservent:
4371 FUN0(OP_ESERVENT);
4372
4373 case KEY_endprotoent:
4374 FUN0(OP_EPROTOENT);
4375
4376 case KEY_endpwent:
4377 FUN0(OP_EPWENT);
4378
4379 case KEY_endgrent:
4380 FUN0(OP_EGRENT);
4381
4382 case KEY_for:
4383 case KEY_foreach:
57843af0 4384 yylval.ival = CopLINE(PL_curcop);
55497cff 4385 s = skipspace(s);
7e2040f0 4386 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 4387 char *p = s;
3280af22 4388 if ((PL_bufend - p) >= 3 &&
55497cff 4389 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4390 p += 2;
77ca0c92
LW
4391 else if ((PL_bufend - p) >= 4 &&
4392 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4393 p += 3;
55497cff 4394 p = skipspace(p);
7e2040f0 4395 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
4396 p = scan_ident(p, PL_bufend,
4397 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4398 p = skipspace(p);
4399 }
4400 if (*p != '$')
cea2e8a9 4401 Perl_croak(aTHX_ "Missing $ on loop variable");
55497cff 4402 }
79072805
LW
4403 OPERATOR(FOR);
4404
4405 case KEY_formline:
a0d0e21e 4406 LOP(OP_FORMLINE,XTERM);
79072805
LW
4407
4408 case KEY_fork:
4409 FUN0(OP_FORK);
4410
4411 case KEY_fcntl:
a0d0e21e 4412 LOP(OP_FCNTL,XTERM);
79072805
LW
4413
4414 case KEY_fileno:
4415 UNI(OP_FILENO);
4416
4417 case KEY_flock:
a0d0e21e 4418 LOP(OP_FLOCK,XTERM);
79072805
LW
4419
4420 case KEY_gt:
4421 Rop(OP_SGT);
4422
4423 case KEY_ge:
4424 Rop(OP_SGE);
4425
4426 case KEY_grep:
2c38e13d 4427 LOP(OP_GREPSTART, XREF);
79072805
LW
4428
4429 case KEY_goto:
a0d0e21e 4430 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4431 LOOPX(OP_GOTO);
4432
4433 case KEY_gmtime:
4434 UNI(OP_GMTIME);
4435
4436 case KEY_getc:
6f33ba73 4437 UNIDOR(OP_GETC);
79072805
LW
4438
4439 case KEY_getppid:
4440 FUN0(OP_GETPPID);
4441
4442 case KEY_getpgrp:
4443 UNI(OP_GETPGRP);
4444
4445 case KEY_getpriority:
a0d0e21e 4446 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
4447
4448 case KEY_getprotobyname:
4449 UNI(OP_GPBYNAME);
4450
4451 case KEY_getprotobynumber:
a0d0e21e 4452 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
4453
4454 case KEY_getprotoent:
4455 FUN0(OP_GPROTOENT);
4456
4457 case KEY_getpwent:
4458 FUN0(OP_GPWENT);
4459
4460 case KEY_getpwnam:
ff68c719 4461 UNI(OP_GPWNAM);
79072805
LW
4462
4463 case KEY_getpwuid:
ff68c719 4464 UNI(OP_GPWUID);
79072805
LW
4465
4466 case KEY_getpeername:
4467 UNI(OP_GETPEERNAME);
4468
4469 case KEY_gethostbyname:
4470 UNI(OP_GHBYNAME);
4471
4472 case KEY_gethostbyaddr:
a0d0e21e 4473 LOP(OP_GHBYADDR,XTERM);
79072805
LW
4474
4475 case KEY_gethostent:
4476 FUN0(OP_GHOSTENT);
4477
4478 case KEY_getnetbyname:
4479 UNI(OP_GNBYNAME);
4480
4481 case KEY_getnetbyaddr:
a0d0e21e 4482 LOP(OP_GNBYADDR,XTERM);
79072805
LW
4483
4484 case KEY_getnetent:
4485 FUN0(OP_GNETENT);
4486
4487 case KEY_getservbyname:
a0d0e21e 4488 LOP(OP_GSBYNAME,XTERM);
79072805
LW
4489
4490 case KEY_getservbyport:
a0d0e21e 4491 LOP(OP_GSBYPORT,XTERM);
79072805
LW
4492
4493 case KEY_getservent:
4494 FUN0(OP_GSERVENT);
4495
4496 case KEY_getsockname:
4497 UNI(OP_GETSOCKNAME);
4498
4499 case KEY_getsockopt:
a0d0e21e 4500 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
4501
4502 case KEY_getgrent:
4503 FUN0(OP_GGRENT);
4504
4505 case KEY_getgrnam:
ff68c719 4506 UNI(OP_GGRNAM);
79072805
LW
4507
4508 case KEY_getgrgid:
ff68c719 4509 UNI(OP_GGRGID);
79072805
LW
4510
4511 case KEY_getlogin:
4512 FUN0(OP_GETLOGIN);
4513
93a17b20 4514 case KEY_glob:
a0d0e21e
LW
4515 set_csh();
4516 LOP(OP_GLOB,XTERM);
93a17b20 4517
79072805
LW
4518 case KEY_hex:
4519 UNI(OP_HEX);
4520
4521 case KEY_if:
57843af0 4522 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4523 OPERATOR(IF);
4524
4525 case KEY_index:
a0d0e21e 4526 LOP(OP_INDEX,XTERM);
79072805
LW
4527
4528 case KEY_int:
4529 UNI(OP_INT);
4530
4531 case KEY_ioctl:
a0d0e21e 4532 LOP(OP_IOCTL,XTERM);
79072805
LW
4533
4534 case KEY_join:
a0d0e21e 4535 LOP(OP_JOIN,XTERM);
79072805
LW
4536
4537 case KEY_keys:
4538 UNI(OP_KEYS);
4539
4540 case KEY_kill:
a0d0e21e 4541 LOP(OP_KILL,XTERM);
79072805
LW
4542
4543 case KEY_last:
a0d0e21e 4544 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 4545 LOOPX(OP_LAST);
4e553d73 4546
79072805
LW
4547 case KEY_lc:
4548 UNI(OP_LC);
4549
4550 case KEY_lcfirst:
4551 UNI(OP_LCFIRST);
4552
4553 case KEY_local:
09bef843 4554 yylval.ival = 0;
79072805
LW
4555 OPERATOR(LOCAL);
4556
4557 case KEY_length:
4558 UNI(OP_LENGTH);
4559
4560 case KEY_lt:
4561 Rop(OP_SLT);
4562
4563 case KEY_le:
4564 Rop(OP_SLE);
4565
4566 case KEY_localtime:
4567 UNI(OP_LOCALTIME);
4568
4569 case KEY_log:
4570 UNI(OP_LOG);
4571
4572 case KEY_link:
a0d0e21e 4573 LOP(OP_LINK,XTERM);
79072805
LW
4574
4575 case KEY_listen:
a0d0e21e 4576 LOP(OP_LISTEN,XTERM);
79072805 4577
c0329465
MB
4578 case KEY_lock:
4579 UNI(OP_LOCK);
4580
79072805
LW
4581 case KEY_lstat:
4582 UNI(OP_LSTAT);
4583
4584 case KEY_m:
8782bef2 4585 s = scan_pat(s,OP_MATCH);
79072805
LW
4586 TERM(sublex_start());
4587
a0d0e21e 4588 case KEY_map:
2c38e13d 4589 LOP(OP_MAPSTART, XREF);
4e4e412b 4590
79072805 4591 case KEY_mkdir:
a0d0e21e 4592 LOP(OP_MKDIR,XTERM);
79072805
LW
4593
4594 case KEY_msgctl:
a0d0e21e 4595 LOP(OP_MSGCTL,XTERM);
79072805
LW
4596
4597 case KEY_msgget:
a0d0e21e 4598 LOP(OP_MSGGET,XTERM);
79072805
LW
4599
4600 case KEY_msgrcv:
a0d0e21e 4601 LOP(OP_MSGRCV,XTERM);
79072805
LW
4602
4603 case KEY_msgsnd:
a0d0e21e 4604 LOP(OP_MSGSND,XTERM);
79072805 4605
77ca0c92 4606 case KEY_our:
93a17b20 4607 case KEY_my:
77ca0c92 4608 PL_in_my = tmp;
c750a3ec 4609 s = skipspace(s);
7e2040f0 4610 if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 4611 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
4612 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4613 goto really_sub;
def3634b 4614 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 4615 if (!PL_in_my_stash) {
c750a3ec 4616 char tmpbuf[1024];
3280af22
NIS
4617 PL_bufptr = s;
4618 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
4619 yyerror(tmpbuf);
4620 }
4621 }
09bef843 4622 yylval.ival = 1;
55497cff 4623 OPERATOR(MY);
93a17b20 4624
79072805 4625 case KEY_next:
a0d0e21e 4626 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4627 LOOPX(OP_NEXT);
4628
4629 case KEY_ne:
4630 Eop(OP_SNE);
4631
a0d0e21e 4632 case KEY_no:
3280af22 4633 if (PL_expect != XSTATE)
a0d0e21e
LW
4634 yyerror("\"no\" not allowed in expression");
4635 s = force_word(s,WORD,FALSE,TRUE,FALSE);
e759cc13 4636 s = force_version(s, FALSE);
a0d0e21e
LW
4637 yylval.ival = 0;
4638 OPERATOR(USE);
4639
4640 case KEY_not:
2d2e263d
LW
4641 if (*s == '(' || (s = skipspace(s), *s == '('))
4642 FUN1(OP_NOT);
4643 else
4644 OPERATOR(NOTOP);
a0d0e21e 4645
79072805 4646 case KEY_open:
93a17b20 4647 s = skipspace(s);
7e2040f0 4648 if (isIDFIRST_lazy_if(s,UTF)) {
93a17b20 4649 char *t;
7e2040f0 4650 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
93a17b20 4651 t = skipspace(d);
66fbe8fb
HS
4652 if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
4653 /* [perl #16184] */
4654 && !(t[0] == '=' && t[1] == '>')
4655 ) {
9014280d 4656 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 4657 "Precedence problem: open %.*s should be open(%.*s)",
66fbe8fb
HS
4658 d - s, s, d - s, s);
4659 }
93a17b20 4660 }
a0d0e21e 4661 LOP(OP_OPEN,XTERM);
79072805 4662
463ee0b2 4663 case KEY_or:
a0d0e21e 4664 yylval.ival = OP_OR;
463ee0b2
LW
4665 OPERATOR(OROP);
4666
79072805
LW
4667 case KEY_ord:
4668 UNI(OP_ORD);
4669
4670 case KEY_oct:
4671 UNI(OP_OCT);
4672
4673 case KEY_opendir:
a0d0e21e 4674 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
4675
4676 case KEY_print:
3280af22 4677 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4678 LOP(OP_PRINT,XREF);
79072805
LW
4679
4680 case KEY_printf:
3280af22 4681 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4682 LOP(OP_PRTF,XREF);
79072805 4683
c07a80fd 4684 case KEY_prototype:
4685 UNI(OP_PROTOTYPE);
4686
79072805 4687 case KEY_push:
a0d0e21e 4688 LOP(OP_PUSH,XTERM);
79072805
LW
4689
4690 case KEY_pop:
6f33ba73 4691 UNIDOR(OP_POP);
79072805 4692
a0d0e21e 4693 case KEY_pos:
6f33ba73 4694 UNIDOR(OP_POS);
4e553d73 4695
79072805 4696 case KEY_pack:
a0d0e21e 4697 LOP(OP_PACK,XTERM);
79072805
LW
4698
4699 case KEY_package:
a0d0e21e 4700 s = force_word(s,WORD,FALSE,TRUE,FALSE);
79072805
LW
4701 OPERATOR(PACKAGE);
4702
4703 case KEY_pipe:
a0d0e21e 4704 LOP(OP_PIPE_OP,XTERM);
79072805
LW
4705
4706 case KEY_q:
09bef843 4707 s = scan_str(s,FALSE,FALSE);
79072805 4708 if (!s)
85e6fe83 4709 missingterm((char*)0);
79072805
LW
4710 yylval.ival = OP_CONST;
4711 TERM(sublex_start());
4712
a0d0e21e
LW
4713 case KEY_quotemeta:
4714 UNI(OP_QUOTEMETA);
4715
8990e307 4716 case KEY_qw:
09bef843 4717 s = scan_str(s,FALSE,FALSE);
8990e307 4718 if (!s)
85e6fe83 4719 missingterm((char*)0);
8127e0e3
GS
4720 force_next(')');
4721 if (SvCUR(PL_lex_stuff)) {
4722 OP *words = Nullop;
4723 int warned = 0;
3280af22 4724 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 4725 while (len) {
7948272d 4726 SV *sv;
8127e0e3
GS
4727 for (; isSPACE(*d) && len; --len, ++d) ;
4728 if (len) {
4729 char *b = d;
e476b1b5 4730 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
4731 for (; !isSPACE(*d) && len; --len, ++d) {
4732 if (*d == ',') {
9014280d 4733 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
4734 "Possible attempt to separate words with commas");
4735 ++warned;
4736 }
4737 else if (*d == '#') {
9014280d 4738 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
4739 "Possible attempt to put comments in qw() list");
4740 ++warned;
4741 }
4742 }
4743 }
4744 else {
4745 for (; !isSPACE(*d) && len; --len, ++d) ;
4746 }
7948272d
NIS
4747 sv = newSVpvn(b, d-b);
4748 if (DO_UTF8(PL_lex_stuff))
4749 SvUTF8_on(sv);
8127e0e3 4750 words = append_elem(OP_LIST, words,
7948272d 4751 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 4752 }
4753 }
8127e0e3
GS
4754 if (words) {
4755 PL_nextval[PL_nexttoke].opval = words;
4756 force_next(THING);
4757 }
55497cff 4758 }
37fd879b 4759 if (PL_lex_stuff) {
8127e0e3 4760 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
4761 PL_lex_stuff = Nullsv;
4762 }
3280af22 4763 PL_expect = XTERM;
8127e0e3 4764 TOKEN('(');
8990e307 4765
79072805 4766 case KEY_qq:
09bef843 4767 s = scan_str(s,FALSE,FALSE);
79072805 4768 if (!s)
85e6fe83 4769 missingterm((char*)0);
a0d0e21e 4770 yylval.ival = OP_STRINGIFY;
3280af22
NIS
4771 if (SvIVX(PL_lex_stuff) == '\'')
4772 SvIVX(PL_lex_stuff) = 0; /* qq'$foo' should intepolate */
79072805
LW
4773 TERM(sublex_start());
4774
8782bef2
GB
4775 case KEY_qr:
4776 s = scan_pat(s,OP_QR);
4777 TERM(sublex_start());
4778
79072805 4779 case KEY_qx:
09bef843 4780 s = scan_str(s,FALSE,FALSE);
79072805 4781 if (!s)
85e6fe83 4782 missingterm((char*)0);
79072805
LW
4783 yylval.ival = OP_BACKTICK;
4784 set_csh();
4785 TERM(sublex_start());
4786
4787 case KEY_return:
4788 OLDLOP(OP_RETURN);
4789
4790 case KEY_require:
a7cb1f99 4791 s = skipspace(s);
e759cc13
RGS
4792 if (isDIGIT(*s)) {
4793 s = force_version(s, FALSE);
a7cb1f99 4794 }
e759cc13
RGS
4795 else if (*s != 'v' || !isDIGIT(s[1])
4796 || (s = force_version(s, TRUE), *s == 'v'))
4797 {
a7cb1f99
GS
4798 *PL_tokenbuf = '\0';
4799 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 4800 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
a7cb1f99
GS
4801 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
4802 else if (*s == '<')
4803 yyerror("<> should be quotes");
4804 }
463ee0b2 4805 UNI(OP_REQUIRE);
79072805
LW
4806
4807 case KEY_reset:
4808 UNI(OP_RESET);
4809
4810 case KEY_redo:
a0d0e21e 4811 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4812 LOOPX(OP_REDO);
4813
4814 case KEY_rename:
a0d0e21e 4815 LOP(OP_RENAME,XTERM);
79072805
LW
4816
4817 case KEY_rand:
4818 UNI(OP_RAND);
4819
4820 case KEY_rmdir:
4821 UNI(OP_RMDIR);
4822
4823 case KEY_rindex:
a0d0e21e 4824 LOP(OP_RINDEX,XTERM);
79072805
LW
4825
4826 case KEY_read:
a0d0e21e 4827 LOP(OP_READ,XTERM);
79072805
LW
4828
4829 case KEY_readdir:
4830 UNI(OP_READDIR);
4831
93a17b20
LW
4832 case KEY_readline:
4833 set_csh();
6f33ba73 4834 UNIDOR(OP_READLINE);
93a17b20
LW
4835
4836 case KEY_readpipe:
4837 set_csh();
4838 UNI(OP_BACKTICK);
4839
79072805
LW
4840 case KEY_rewinddir:
4841 UNI(OP_REWINDDIR);
4842
4843 case KEY_recv:
a0d0e21e 4844 LOP(OP_RECV,XTERM);
79072805
LW
4845
4846 case KEY_reverse:
a0d0e21e 4847 LOP(OP_REVERSE,XTERM);
79072805
LW
4848
4849 case KEY_readlink:
6f33ba73 4850 UNIDOR(OP_READLINK);
79072805
LW
4851
4852 case KEY_ref:
4853 UNI(OP_REF);
4854
4855 case KEY_s:
4856 s = scan_subst(s);
4857 if (yylval.opval)
4858 TERM(sublex_start());
4859 else
4860 TOKEN(1); /* force error */
4861
a0d0e21e
LW
4862 case KEY_chomp:
4863 UNI(OP_CHOMP);
4e553d73 4864
79072805
LW
4865 case KEY_scalar:
4866 UNI(OP_SCALAR);
4867
4868 case KEY_select:
a0d0e21e 4869 LOP(OP_SELECT,XTERM);
79072805
LW
4870
4871 case KEY_seek:
a0d0e21e 4872 LOP(OP_SEEK,XTERM);
79072805
LW
4873
4874 case KEY_semctl:
a0d0e21e 4875 LOP(OP_SEMCTL,XTERM);
79072805
LW
4876
4877 case KEY_semget:
a0d0e21e 4878 LOP(OP_SEMGET,XTERM);
79072805
LW
4879
4880 case KEY_semop:
a0d0e21e 4881 LOP(OP_SEMOP,XTERM);
79072805
LW
4882
4883 case KEY_send:
a0d0e21e 4884 LOP(OP_SEND,XTERM);
79072805
LW
4885
4886 case KEY_setpgrp:
a0d0e21e 4887 LOP(OP_SETPGRP,XTERM);
79072805
LW
4888
4889 case KEY_setpriority:
a0d0e21e 4890 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
4891
4892 case KEY_sethostent:
ff68c719 4893 UNI(OP_SHOSTENT);
79072805
LW
4894
4895 case KEY_setnetent:
ff68c719 4896 UNI(OP_SNETENT);
79072805
LW
4897
4898 case KEY_setservent:
ff68c719 4899 UNI(OP_SSERVENT);
79072805
LW
4900
4901 case KEY_setprotoent:
ff68c719 4902 UNI(OP_SPROTOENT);
79072805
LW
4903
4904 case KEY_setpwent:
4905 FUN0(OP_SPWENT);
4906
4907 case KEY_setgrent:
4908 FUN0(OP_SGRENT);
4909
4910 case KEY_seekdir:
a0d0e21e 4911 LOP(OP_SEEKDIR,XTERM);
79072805
LW
4912
4913 case KEY_setsockopt:
a0d0e21e 4914 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
4915
4916 case KEY_shift:
6f33ba73 4917 UNIDOR(OP_SHIFT);
79072805
LW
4918
4919 case KEY_shmctl:
a0d0e21e 4920 LOP(OP_SHMCTL,XTERM);
79072805
LW
4921
4922 case KEY_shmget:
a0d0e21e 4923 LOP(OP_SHMGET,XTERM);
79072805
LW
4924
4925 case KEY_shmread:
a0d0e21e 4926 LOP(OP_SHMREAD,XTERM);
79072805
LW
4927
4928 case KEY_shmwrite:
a0d0e21e 4929 LOP(OP_SHMWRITE,XTERM);
79072805
LW
4930
4931 case KEY_shutdown:
a0d0e21e 4932 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
4933
4934 case KEY_sin:
4935 UNI(OP_SIN);
4936
4937 case KEY_sleep:
4938 UNI(OP_SLEEP);
4939
4940 case KEY_socket:
a0d0e21e 4941 LOP(OP_SOCKET,XTERM);
79072805
LW
4942
4943 case KEY_socketpair:
a0d0e21e 4944 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
4945
4946 case KEY_sort:
3280af22 4947 checkcomma(s,PL_tokenbuf,"subroutine name");
79072805
LW
4948 s = skipspace(s);
4949 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 4950 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 4951 PL_expect = XTERM;
15f0808c 4952 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 4953 LOP(OP_SORT,XREF);
79072805
LW
4954
4955 case KEY_split:
a0d0e21e 4956 LOP(OP_SPLIT,XTERM);
79072805
LW
4957
4958 case KEY_sprintf:
a0d0e21e 4959 LOP(OP_SPRINTF,XTERM);
79072805
LW
4960
4961 case KEY_splice:
a0d0e21e 4962 LOP(OP_SPLICE,XTERM);
79072805
LW
4963
4964 case KEY_sqrt:
4965 UNI(OP_SQRT);
4966
4967 case KEY_srand:
4968 UNI(OP_SRAND);
4969
4970 case KEY_stat:
4971 UNI(OP_STAT);
4972
4973 case KEY_study:
79072805
LW
4974 UNI(OP_STUDY);
4975
4976 case KEY_substr:
a0d0e21e 4977 LOP(OP_SUBSTR,XTERM);
79072805
LW
4978
4979 case KEY_format:
4980 case KEY_sub:
93a17b20 4981 really_sub:
09bef843 4982 {
3280af22 4983 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 4984 SSize_t tboffset = 0;
09bef843 4985 expectation attrful;
d731386a 4986 bool have_name, have_proto, bad_proto;
09bef843
SB
4987 int key = tmp;
4988
4989 s = skipspace(s);
4990
7e2040f0 4991 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
4992 (*s == ':' && s[1] == ':'))
4993 {
4994 PL_expect = XBLOCK;
4995 attrful = XATTRBLOCK;
b1b65b59
JH
4996 /* remember buffer pos'n for later force_word */
4997 tboffset = s - PL_oldbufptr;
09bef843
SB
4998 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4999 if (strchr(tmpbuf, ':'))
5000 sv_setpv(PL_subname, tmpbuf);
5001 else {
5002 sv_setsv(PL_subname,PL_curstname);
5003 sv_catpvn(PL_subname,"::",2);
5004 sv_catpvn(PL_subname,tmpbuf,len);
5005 }
5006 s = skipspace(d);
5007 have_name = TRUE;
5008 }
463ee0b2 5009 else {
09bef843
SB
5010 if (key == KEY_my)
5011 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5012 PL_expect = XTERMBLOCK;
5013 attrful = XATTRTERM;
5014 sv_setpv(PL_subname,"?");
5015 have_name = FALSE;
463ee0b2 5016 }
4633a7c4 5017
09bef843
SB
5018 if (key == KEY_format) {
5019 if (*s == '=')
5020 PL_lex_formbrack = PL_lex_brackets + 1;
5021 if (have_name)
b1b65b59
JH
5022 (void) force_word(PL_oldbufptr + tboffset, WORD,
5023 FALSE, TRUE, TRUE);
09bef843
SB
5024 OPERATOR(FORMAT);
5025 }
79072805 5026
09bef843
SB
5027 /* Look for a prototype */
5028 if (*s == '(') {
5029 char *p;
5030
5031 s = scan_str(s,FALSE,FALSE);
37fd879b 5032 if (!s)
09bef843 5033 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 5034 /* strip spaces and check for bad characters */
09bef843
SB
5035 d = SvPVX(PL_lex_stuff);
5036 tmp = 0;
d731386a 5037 bad_proto = FALSE;
09bef843 5038 for (p = d; *p; ++p) {
d37a9538 5039 if (!isSPACE(*p)) {
09bef843 5040 d[tmp++] = *p;
d37a9538
ST
5041 if (!strchr("$@%*;[]&\\", *p))
5042 bad_proto = TRUE;
5043 }
09bef843
SB
5044 }
5045 d[tmp] = '\0';
420cdfc1 5046 if (bad_proto && ckWARN(WARN_SYNTAX))
9014280d 5047 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
420cdfc1
ST
5048 "Illegal character in prototype for %s : %s",
5049 SvPVX(PL_subname), d);
09bef843
SB
5050 SvCUR(PL_lex_stuff) = tmp;
5051 have_proto = TRUE;
68dc0745 5052
09bef843 5053 s = skipspace(s);
4633a7c4 5054 }
09bef843
SB
5055 else
5056 have_proto = FALSE;
5057
5058 if (*s == ':' && s[1] != ':')
5059 PL_expect = attrful;
904d85c5
RGS
5060 else if (!have_name && *s != '{' && key == KEY_sub)
5061 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
09bef843
SB
5062
5063 if (have_proto) {
b1b65b59
JH
5064 PL_nextval[PL_nexttoke].opval =
5065 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
09bef843
SB
5066 PL_lex_stuff = Nullsv;
5067 force_next(THING);
68dc0745 5068 }
09bef843 5069 if (!have_name) {
c99da370
JH
5070 sv_setpv(PL_subname,
5071 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
09bef843 5072 TOKEN(ANONSUB);
4633a7c4 5073 }
b1b65b59
JH
5074 (void) force_word(PL_oldbufptr + tboffset, WORD,
5075 FALSE, TRUE, TRUE);
09bef843
SB
5076 if (key == KEY_my)
5077 TOKEN(MYSUB);
5078 TOKEN(SUB);
4633a7c4 5079 }
79072805
LW
5080
5081 case KEY_system:
5082 set_csh();
a0d0e21e 5083 LOP(OP_SYSTEM,XREF);
79072805
LW
5084
5085 case KEY_symlink:
a0d0e21e 5086 LOP(OP_SYMLINK,XTERM);
79072805
LW
5087
5088 case KEY_syscall:
a0d0e21e 5089 LOP(OP_SYSCALL,XTERM);
79072805 5090
c07a80fd 5091 case KEY_sysopen:
5092 LOP(OP_SYSOPEN,XTERM);
5093
137443ea 5094 case KEY_sysseek:
5095 LOP(OP_SYSSEEK,XTERM);
5096
79072805 5097 case KEY_sysread:
a0d0e21e 5098 LOP(OP_SYSREAD,XTERM);
79072805
LW
5099
5100 case KEY_syswrite:
a0d0e21e 5101 LOP(OP_SYSWRITE,XTERM);
79072805
LW
5102
5103 case KEY_tr:
5104 s = scan_trans(s);
5105 TERM(sublex_start());
5106
5107 case KEY_tell:
5108 UNI(OP_TELL);
5109
5110 case KEY_telldir:
5111 UNI(OP_TELLDIR);
5112
463ee0b2 5113 case KEY_tie:
a0d0e21e 5114 LOP(OP_TIE,XTERM);
463ee0b2 5115
c07a80fd 5116 case KEY_tied:
5117 UNI(OP_TIED);
5118
79072805
LW
5119 case KEY_time:
5120 FUN0(OP_TIME);
5121
5122 case KEY_times:
5123 FUN0(OP_TMS);
5124
5125 case KEY_truncate:
a0d0e21e 5126 LOP(OP_TRUNCATE,XTERM);
79072805
LW
5127
5128 case KEY_uc:
5129 UNI(OP_UC);
5130
5131 case KEY_ucfirst:
5132 UNI(OP_UCFIRST);
5133
463ee0b2
LW
5134 case KEY_untie:
5135 UNI(OP_UNTIE);
5136
79072805 5137 case KEY_until:
57843af0 5138 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5139 OPERATOR(UNTIL);
5140
5141 case KEY_unless:
57843af0 5142 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5143 OPERATOR(UNLESS);
5144
5145 case KEY_unlink:
a0d0e21e 5146 LOP(OP_UNLINK,XTERM);
79072805
LW
5147
5148 case KEY_undef:
6f33ba73 5149 UNIDOR(OP_UNDEF);
79072805
LW
5150
5151 case KEY_unpack:
a0d0e21e 5152 LOP(OP_UNPACK,XTERM);
79072805
LW
5153
5154 case KEY_utime:
a0d0e21e 5155 LOP(OP_UTIME,XTERM);
79072805
LW
5156
5157 case KEY_umask:
6f33ba73 5158 UNIDOR(OP_UMASK);
79072805
LW
5159
5160 case KEY_unshift:
a0d0e21e
LW
5161 LOP(OP_UNSHIFT,XTERM);
5162
5163 case KEY_use:
3280af22 5164 if (PL_expect != XSTATE)
a0d0e21e 5165 yyerror("\"use\" not allowed in expression");
89bfa8cd 5166 s = skipspace(s);
a7cb1f99 5167 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
e759cc13 5168 s = force_version(s, TRUE);
a7cb1f99 5169 if (*s == ';' || (s = skipspace(s), *s == ';')) {
3280af22 5170 PL_nextval[PL_nexttoke].opval = Nullop;
89bfa8cd 5171 force_next(WORD);
5172 }
e759cc13
RGS
5173 else if (*s == 'v') {
5174 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5175 s = force_version(s, FALSE);
5176 }
89bfa8cd 5177 }
5178 else {
5179 s = force_word(s,WORD,FALSE,TRUE,FALSE);
e759cc13 5180 s = force_version(s, FALSE);
89bfa8cd 5181 }
a0d0e21e
LW
5182 yylval.ival = 1;
5183 OPERATOR(USE);
79072805
LW
5184
5185 case KEY_values:
5186 UNI(OP_VALUES);
5187
5188 case KEY_vec:
a0d0e21e 5189 LOP(OP_VEC,XTERM);
79072805
LW
5190
5191 case KEY_while:
57843af0 5192 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5193 OPERATOR(WHILE);
5194
5195 case KEY_warn:
3280af22 5196 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 5197 LOP(OP_WARN,XTERM);
79072805
LW
5198
5199 case KEY_wait:
5200 FUN0(OP_WAIT);
5201
5202 case KEY_waitpid:
a0d0e21e 5203 LOP(OP_WAITPID,XTERM);
79072805
LW
5204
5205 case KEY_wantarray:
5206 FUN0(OP_WANTARRAY);
5207
5208 case KEY_write:
9d116dd7
JH
5209#ifdef EBCDIC
5210 {
df3728a2
JH
5211 char ctl_l[2];
5212 ctl_l[0] = toCTRL('L');
5213 ctl_l[1] = '\0';
9d116dd7
JH
5214 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5215 }
5216#else
5217 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5218#endif
79072805
LW
5219 UNI(OP_ENTERWRITE);
5220
5221 case KEY_x:
3280af22 5222 if (PL_expect == XOPERATOR)
79072805
LW
5223 Mop(OP_REPEAT);
5224 check_uni();
5225 goto just_a_word;
5226
a0d0e21e
LW
5227 case KEY_xor:
5228 yylval.ival = OP_XOR;
5229 OPERATOR(OROP);
5230
79072805
LW
5231 case KEY_y:
5232 s = scan_trans(s);
5233 TERM(sublex_start());
5234 }
49dc05e3 5235 }}
79072805 5236}
bf4acbe4
GS
5237#ifdef __SC__
5238#pragma segment Main
5239#endif
79072805 5240
e930465f
JH
5241static int
5242S_pending_ident(pTHX)
8eceec63
SC
5243{
5244 register char *d;
5245 register I32 tmp;
5246 /* pit holds the identifier we read and pending_ident is reset */
5247 char pit = PL_pending_ident;
5248 PL_pending_ident = 0;
5249
5250 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5251 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
5252
5253 /* if we're in a my(), we can't allow dynamics here.
5254 $foo'bar has already been turned into $foo::bar, so
5255 just check for colons.
5256
5257 if it's a legal name, the OP is a PADANY.
5258 */
5259 if (PL_in_my) {
5260 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5261 if (strchr(PL_tokenbuf,':'))
5262 yyerror(Perl_form(aTHX_ "No package name allowed for "
5263 "variable %s in \"our\"",
5264 PL_tokenbuf));
dd2155a4 5265 tmp = allocmy(PL_tokenbuf);
8eceec63
SC
5266 }
5267 else {
5268 if (strchr(PL_tokenbuf,':'))
5269 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5270
5271 yylval.opval = newOP(OP_PADANY, 0);
dd2155a4 5272 yylval.opval->op_targ = allocmy(PL_tokenbuf);
8eceec63
SC
5273 return PRIVATEREF;
5274 }
5275 }
5276
5277 /*
5278 build the ops for accesses to a my() variable.
5279
5280 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5281 then used in a comparison. This catches most, but not
5282 all cases. For instance, it catches
5283 sort { my($a); $a <=> $b }
5284 but not
5285 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5286 (although why you'd do that is anyone's guess).
5287 */
5288
5289 if (!strchr(PL_tokenbuf,':')) {
8eceec63 5290 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
8eceec63 5291 /* might be an "our" variable" */
dd2155a4 5292 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
8eceec63 5293 /* build ops for a bareword */
dd2155a4 5294 SV *sym = newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)), 0);
8eceec63
SC
5295 sv_catpvn(sym, "::", 2);
5296 sv_catpv(sym, PL_tokenbuf+1);
5297 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5298 yylval.opval->op_private = OPpCONST_ENTERED;
5299 gv_fetchpv(SvPVX(sym),
5300 (PL_in_eval
5301 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 5302 : GV_ADDMULTI
8eceec63
SC
5303 ),
5304 ((PL_tokenbuf[0] == '$') ? SVt_PV
5305 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5306 : SVt_PVHV));
5307 return WORD;
5308 }
5309
5310 /* if it's a sort block and they're naming $a or $b */
5311 if (PL_last_lop_op == OP_SORT &&
5312 PL_tokenbuf[0] == '$' &&
5313 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5314 && !PL_tokenbuf[2])
5315 {
5316 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5317 d < PL_bufend && *d != '\n';
5318 d++)
5319 {
5320 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5321 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5322 PL_tokenbuf);
5323 }
5324 }
5325 }
5326
5327 yylval.opval = newOP(OP_PADANY, 0);
5328 yylval.opval->op_targ = tmp;
5329 return PRIVATEREF;
5330 }
5331 }
5332
5333 /*
5334 Whine if they've said @foo in a doublequoted string,
5335 and @foo isn't a variable we can find in the symbol
5336 table.
5337 */
5338 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5339 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
5340 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5341 && ckWARN(WARN_AMBIGUOUS))
5342 {
5343 /* Downgraded from fatal to warning 20000522 mjd */
9014280d 5344 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8eceec63
SC
5345 "Possible unintended interpolation of %s in string",
5346 PL_tokenbuf);
5347 }
5348 }
5349
5350 /* build ops for a bareword */
5351 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5352 yylval.opval->op_private = OPpCONST_ENTERED;
5353 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5354 ((PL_tokenbuf[0] == '$') ? SVt_PV
5355 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5356 : SVt_PVHV));
5357 return WORD;
5358}
5359
79072805 5360I32
864dbfa3 5361Perl_keyword(pTHX_ register char *d, I32 len)
79072805
LW
5362{
5363 switch (*d) {
5364 case '_':
5365 if (d[1] == '_') {
a0d0e21e 5366 if (strEQ(d,"__FILE__")) return -KEY___FILE__;
68dc0745 5367 if (strEQ(d,"__LINE__")) return -KEY___LINE__;
5368 if (strEQ(d,"__PACKAGE__")) return -KEY___PACKAGE__;
e50aee73 5369 if (strEQ(d,"__DATA__")) return KEY___DATA__;
79072805
LW
5370 if (strEQ(d,"__END__")) return KEY___END__;
5371 }
5372 break;
8990e307
LW
5373 case 'A':
5374 if (strEQ(d,"AUTOLOAD")) return KEY_AUTOLOAD;
5375 break;
79072805 5376 case 'a':
463ee0b2
LW
5377 switch (len) {
5378 case 3:
a0d0e21e
LW
5379 if (strEQ(d,"and")) return -KEY_and;
5380 if (strEQ(d,"abs")) return -KEY_abs;
85e6fe83 5381 break;
463ee0b2 5382 case 5:
a0d0e21e
LW
5383 if (strEQ(d,"alarm")) return -KEY_alarm;
5384 if (strEQ(d,"atan2")) return -KEY_atan2;
463ee0b2
LW
5385 break;
5386 case 6:
a0d0e21e 5387 if (strEQ(d,"accept")) return -KEY_accept;
463ee0b2
LW
5388 break;
5389 }
79072805
LW
5390 break;
5391 case 'B':
5392 if (strEQ(d,"BEGIN")) return KEY_BEGIN;
93a17b20 5393 break;
79072805 5394 case 'b':
a0d0e21e
LW
5395 if (strEQ(d,"bless")) return -KEY_bless;
5396 if (strEQ(d,"bind")) return -KEY_bind;
5397 if (strEQ(d,"binmode")) return -KEY_binmode;
5398 break;
5399 case 'C':
5400 if (strEQ(d,"CORE")) return -KEY_CORE;
7d30b5c4 5401 if (strEQ(d,"CHECK")) return KEY_CHECK;
79072805
LW
5402 break;
5403 case 'c':
5404 switch (len) {
5405 case 3:
a0d0e21e
LW
5406 if (strEQ(d,"cmp")) return -KEY_cmp;
5407 if (strEQ(d,"chr")) return -KEY_chr;
5408 if (strEQ(d,"cos")) return -KEY_cos;
79072805
LW
5409 break;
5410 case 4:
77bc9082 5411 if (strEQ(d,"chop")) return -KEY_chop;
79072805
LW
5412 break;
5413 case 5:
a0d0e21e
LW
5414 if (strEQ(d,"close")) return -KEY_close;
5415 if (strEQ(d,"chdir")) return -KEY_chdir;
77bc9082 5416 if (strEQ(d,"chomp")) return -KEY_chomp;
a0d0e21e
LW
5417 if (strEQ(d,"chmod")) return -KEY_chmod;
5418 if (strEQ(d,"chown")) return -KEY_chown;
5419 if (strEQ(d,"crypt")) return -KEY_crypt;
79072805
LW
5420 break;
5421 case 6:
a0d0e21e
LW
5422 if (strEQ(d,"chroot")) return -KEY_chroot;
5423 if (strEQ(d,"caller")) return -KEY_caller;
79072805
LW
5424 break;
5425 case 7:
a0d0e21e 5426 if (strEQ(d,"connect")) return -KEY_connect;
79072805
LW
5427 break;
5428 case 8:
a0d0e21e
LW
5429 if (strEQ(d,"closedir")) return -KEY_closedir;
5430 if (strEQ(d,"continue")) return -KEY_continue;
79072805
LW
5431 break;
5432 }
5433 break;
ed6116ce
LW
5434 case 'D':
5435 if (strEQ(d,"DESTROY")) return KEY_DESTROY;
5436 break;
79072805
LW
5437 case 'd':
5438 switch (len) {
5439 case 2:
5440 if (strEQ(d,"do")) return KEY_do;
5441 break;
5442 case 3:
a0d0e21e 5443 if (strEQ(d,"die")) return -KEY_die;
79072805
LW
5444 break;
5445 case 4:
a0d0e21e 5446 if (strEQ(d,"dump")) return -KEY_dump;
79072805
LW
5447 break;
5448 case 6:
5449 if (strEQ(d,"delete")) return KEY_delete;
5450 break;
5451 case 7:
5452 if (strEQ(d,"defined")) return KEY_defined;
a0d0e21e 5453 if (strEQ(d,"dbmopen")) return -KEY_dbmopen;
79072805
LW
5454 break;
5455 case 8:
a0d0e21e 5456 if (strEQ(d,"dbmclose")) return -KEY_dbmclose;
79072805
LW
5457 break;
5458 }
5459 break;
5460 case 'E':
79072805
LW
5461 if (strEQ(d,"END")) return KEY_END;
5462 break;
5463 case 'e':
5464 switch (len) {
5465 case 2:
a0d0e21e 5466 if (strEQ(d,"eq")) return -KEY_eq;
79072805
LW
5467 break;
5468 case 3:
a0d0e21e 5469 if (strEQ(d,"eof")) return -KEY_eof;
c963b151 5470 if (strEQ(d,"err")) return -KEY_err;
a0d0e21e 5471 if (strEQ(d,"exp")) return -KEY_exp;
79072805
LW
5472 break;
5473 case 4:
5474 if (strEQ(d,"else")) return KEY_else;
a0d0e21e 5475 if (strEQ(d,"exit")) return -KEY_exit;
79072805 5476 if (strEQ(d,"eval")) return KEY_eval;
a0d0e21e 5477 if (strEQ(d,"exec")) return -KEY_exec;
3a6a8333 5478 if (strEQ(d,"each")) return -KEY_each;
79072805
LW
5479 break;
5480 case 5:
5481 if (strEQ(d,"elsif")) return KEY_elsif;
5482 break;
a0d0e21e
LW
5483 case 6:
5484 if (strEQ(d,"exists")) return KEY_exists;
cea2e8a9 5485 if (strEQ(d,"elseif")) Perl_warn(aTHX_ "elseif should be elsif");
a0d0e21e 5486 break;
79072805 5487 case 8:
a0d0e21e
LW
5488 if (strEQ(d,"endgrent")) return -KEY_endgrent;
5489 if (strEQ(d,"endpwent")) return -KEY_endpwent;
79072805
LW
5490 break;
5491 case 9:
a0d0e21e 5492 if (strEQ(d,"endnetent")) return -KEY_endnetent;
79072805
LW
5493 break;
5494 case 10:
a0d0e21e
LW
5495 if (strEQ(d,"endhostent")) return -KEY_endhostent;
5496 if (strEQ(d,"endservent")) return -KEY_endservent;
79072805
LW
5497 break;
5498 case 11:
a0d0e21e 5499 if (strEQ(d,"endprotoent")) return -KEY_endprotoent;
79072805 5500 break;
a687059c 5501 }
a687059c 5502 break;
79072805
LW
5503 case 'f':
5504 switch (len) {
5505 case 3:
5506 if (strEQ(d,"for")) return KEY_for;
5507 break;
5508 case 4:
a0d0e21e 5509 if (strEQ(d,"fork")) return -KEY_fork;
79072805
LW
5510 break;
5511 case 5:
a0d0e21e
LW
5512 if (strEQ(d,"fcntl")) return -KEY_fcntl;
5513 if (strEQ(d,"flock")) return -KEY_flock;
79072805
LW
5514 break;
5515 case 6:
5516 if (strEQ(d,"format")) return KEY_format;
a0d0e21e 5517 if (strEQ(d,"fileno")) return -KEY_fileno;
79072805
LW
5518 break;
5519 case 7:
5520 if (strEQ(d,"foreach")) return KEY_foreach;
5521 break;
5522 case 8:
a0d0e21e 5523 if (strEQ(d,"formline")) return -KEY_formline;
79072805 5524 break;
378cc40b 5525 }
a687059c 5526 break;
79072805 5527 case 'g':
a687059c
LW
5528 if (strnEQ(d,"get",3)) {
5529 d += 3;
5530 if (*d == 'p') {
79072805
LW
5531 switch (len) {
5532 case 7:
a0d0e21e
LW
5533 if (strEQ(d,"ppid")) return -KEY_getppid;
5534 if (strEQ(d,"pgrp")) return -KEY_getpgrp;
79072805
LW
5535 break;
5536 case 8:
a0d0e21e
LW
5537 if (strEQ(d,"pwent")) return -KEY_getpwent;
5538 if (strEQ(d,"pwnam")) return -KEY_getpwnam;
5539 if (strEQ(d,"pwuid")) return -KEY_getpwuid;
79072805
LW
5540 break;
5541 case 11:
a0d0e21e
LW
5542 if (strEQ(d,"peername")) return -KEY_getpeername;
5543 if (strEQ(d,"protoent")) return -KEY_getprotoent;
5544 if (strEQ(d,"priority")) return -KEY_getpriority;
79072805
LW
5545 break;
5546 case 14:
a0d0e21e 5547 if (strEQ(d,"protobyname")) return -KEY_getprotobyname;
79072805
LW
5548 break;
5549 case 16:
a0d0e21e 5550 if (strEQ(d,"protobynumber"))return -KEY_getprotobynumber;
79072805
LW
5551 break;
5552 }
a687059c
LW
5553 }
5554 else if (*d == 'h') {
a0d0e21e
LW
5555 if (strEQ(d,"hostbyname")) return -KEY_gethostbyname;
5556 if (strEQ(d,"hostbyaddr")) return -KEY_gethostbyaddr;
5557 if (strEQ(d,"hostent")) return -KEY_gethostent;
a687059c
LW
5558 }
5559 else if (*d == 'n') {
a0d0e21e
LW
5560 if (strEQ(d,"netbyname")) return -KEY_getnetbyname;
5561 if (strEQ(d,"netbyaddr")) return -KEY_getnetbyaddr;
5562 if (strEQ(d,"netent")) return -KEY_getnetent;
a687059c
LW
5563 }
5564 else if (*d == 's') {
a0d0e21e
LW
5565 if (strEQ(d,"servbyname")) return -KEY_getservbyname;
5566 if (strEQ(d,"servbyport")) return -KEY_getservbyport;
5567 if (strEQ(d,"servent")) return -KEY_getservent;
5568 if (strEQ(d,"sockname")) return -KEY_getsockname;
5569 if (strEQ(d,"sockopt")) return -KEY_getsockopt;
a687059c
LW
5570 }
5571 else if (*d == 'g') {
a0d0e21e
LW
5572 if (strEQ(d,"grent")) return -KEY_getgrent;
5573 if (strEQ(d,"grnam")) return -KEY_getgrnam;
5574 if (strEQ(d,"grgid")) return -KEY_getgrgid;
a687059c
LW
5575 }
5576 else if (*d == 'l') {
a0d0e21e 5577 if (strEQ(d,"login")) return -KEY_getlogin;
a687059c 5578 }
a0d0e21e 5579 else if (strEQ(d,"c")) return -KEY_getc;
79072805 5580 break;
a687059c 5581 }
79072805
LW
5582 switch (len) {
5583 case 2:
a0d0e21e
LW
5584 if (strEQ(d,"gt")) return -KEY_gt;
5585 if (strEQ(d,"ge")) return -KEY_ge;
79072805
LW
5586 break;
5587 case 4:
5588 if (strEQ(d,"grep")) return KEY_grep;
5589 if (strEQ(d,"goto")) return KEY_goto;
fb73857a 5590 if (strEQ(d,"glob")) return KEY_glob;
79072805
LW
5591 break;
5592 case 6:
a0d0e21e 5593 if (strEQ(d,"gmtime")) return -KEY_gmtime;
79072805 5594 break;
378cc40b 5595 }
a687059c 5596 break;
79072805 5597 case 'h':
a0d0e21e 5598 if (strEQ(d,"hex")) return -KEY_hex;
a687059c 5599 break;
7d07dbc2
MB
5600 case 'I':
5601 if (strEQ(d,"INIT")) return KEY_INIT;
5602 break;
79072805
LW
5603 case 'i':
5604 switch (len) {
5605 case 2:
5606 if (strEQ(d,"if")) return KEY_if;
5607 break;
5608 case 3:
a0d0e21e 5609 if (strEQ(d,"int")) return -KEY_int;
79072805
LW
5610 break;
5611 case 5:
a0d0e21e
LW
5612 if (strEQ(d,"index")) return -KEY_index;
5613 if (strEQ(d,"ioctl")) return -KEY_ioctl;
79072805
LW
5614 break;
5615 }
a687059c 5616 break;
79072805 5617 case 'j':
a0d0e21e 5618 if (strEQ(d,"join")) return -KEY_join;
a687059c 5619 break;
79072805
LW
5620 case 'k':
5621 if (len == 4) {
3a6a8333 5622 if (strEQ(d,"keys")) return -KEY_keys;
a0d0e21e 5623 if (strEQ(d,"kill")) return -KEY_kill;
663a0e37 5624 }
79072805 5625 break;
79072805
LW
5626 case 'l':
5627 switch (len) {
5628 case 2:
a0d0e21e
LW
5629 if (strEQ(d,"lt")) return -KEY_lt;
5630 if (strEQ(d,"le")) return -KEY_le;
5631 if (strEQ(d,"lc")) return -KEY_lc;
79072805
LW
5632 break;
5633 case 3:
a0d0e21e 5634 if (strEQ(d,"log")) return -KEY_log;
79072805
LW
5635 break;
5636 case 4:
5637 if (strEQ(d,"last")) return KEY_last;
a0d0e21e 5638 if (strEQ(d,"link")) return -KEY_link;
c0329465 5639 if (strEQ(d,"lock")) return -KEY_lock;
395c3793 5640 break;
79072805
LW
5641 case 5:
5642 if (strEQ(d,"local")) return KEY_local;
a0d0e21e 5643 if (strEQ(d,"lstat")) return -KEY_lstat;
79072805
LW
5644 break;
5645 case 6:
a0d0e21e
LW
5646 if (strEQ(d,"length")) return -KEY_length;
5647 if (strEQ(d,"listen")) return -KEY_listen;
79072805
LW
5648 break;
5649 case 7:
a0d0e21e 5650 if (strEQ(d,"lcfirst")) return -KEY_lcfirst;
79072805
LW
5651 break;
5652 case 9:
a0d0e21e 5653 if (strEQ(d,"localtime")) return -KEY_localtime;
395c3793
LW
5654 break;
5655 }
a687059c 5656 break;
79072805
LW
5657 case 'm':
5658 switch (len) {
5659 case 1: return KEY_m;
93a17b20
LW
5660 case 2:
5661 if (strEQ(d,"my")) return KEY_my;
5662 break;
a0d0e21e
LW
5663 case 3:
5664 if (strEQ(d,"map")) return KEY_map;
5665 break;
79072805 5666 case 5:
a0d0e21e 5667 if (strEQ(d,"mkdir")) return -KEY_mkdir;
79072805
LW
5668 break;
5669 case 6:
a0d0e21e
LW
5670 if (strEQ(d,"msgctl")) return -KEY_msgctl;
5671 if (strEQ(d,"msgget")) return -KEY_msgget;
5672 if (strEQ(d,"msgrcv")) return -KEY_msgrcv;
5673 if (strEQ(d,"msgsnd")) return -KEY_msgsnd;
79072805
LW
5674 break;
5675 }
a687059c 5676 break;
79072805
LW
5677 case 'n':
5678 if (strEQ(d,"next")) return KEY_next;
a0d0e21e
LW
5679 if (strEQ(d,"ne")) return -KEY_ne;
5680 if (strEQ(d,"not")) return -KEY_not;
5681 if (strEQ(d,"no")) return KEY_no;
a687059c 5682 break;
79072805
LW
5683 case 'o':
5684 switch (len) {
463ee0b2 5685 case 2:
a0d0e21e 5686 if (strEQ(d,"or")) return -KEY_or;
463ee0b2 5687 break;
79072805 5688 case 3:
a0d0e21e
LW
5689 if (strEQ(d,"ord")) return -KEY_ord;
5690 if (strEQ(d,"oct")) return -KEY_oct;
77ca0c92 5691 if (strEQ(d,"our")) return KEY_our;
79072805
LW
5692 break;
5693 case 4:
a0d0e21e 5694 if (strEQ(d,"open")) return -KEY_open;
79072805
LW
5695 break;
5696 case 7:
a0d0e21e 5697 if (strEQ(d,"opendir")) return -KEY_opendir;
79072805 5698 break;
fe14fcc3 5699 }
a687059c 5700 break;
79072805
LW
5701 case 'p':
5702 switch (len) {
5703 case 3:
4e553d73 5704 if (strEQ(d,"pop")) return -KEY_pop;
a0d0e21e 5705 if (strEQ(d,"pos")) return KEY_pos;
79072805
LW
5706 break;
5707 case 4:
3a6a8333 5708 if (strEQ(d,"push")) return -KEY_push;
a0d0e21e
LW
5709 if (strEQ(d,"pack")) return -KEY_pack;
5710 if (strEQ(d,"pipe")) return -KEY_pipe;
79072805
LW
5711 break;
5712 case 5:
5713 if (strEQ(d,"print")) return KEY_print;
5714 break;
5715 case 6:
5716 if (strEQ(d,"printf")) return KEY_printf;
5717 break;
5718 case 7:
5719 if (strEQ(d,"package")) return KEY_package;
5720 break;
c07a80fd 5721 case 9:
5722 if (strEQ(d,"prototype")) return KEY_prototype;
663a0e37 5723 }
79072805
LW
5724 break;
5725 case 'q':
5726 if (len <= 2) {
5727 if (strEQ(d,"q")) return KEY_q;
8782bef2 5728 if (strEQ(d,"qr")) return KEY_qr;
79072805 5729 if (strEQ(d,"qq")) return KEY_qq;
8990e307 5730 if (strEQ(d,"qw")) return KEY_qw;
79072805 5731 if (strEQ(d,"qx")) return KEY_qx;
663a0e37 5732 }
a0d0e21e 5733 else if (strEQ(d,"quotemeta")) return -KEY_quotemeta;
79072805
LW
5734 break;
5735 case 'r':
5736 switch (len) {
5737 case 3:
a0d0e21e 5738 if (strEQ(d,"ref")) return -KEY_ref;
79072805
LW
5739 break;
5740 case 4:
a0d0e21e
LW
5741 if (strEQ(d,"read")) return -KEY_read;
5742 if (strEQ(d,"rand")) return -KEY_rand;
5743 if (strEQ(d,"recv")) return -KEY_recv;
79072805
LW
5744 if (strEQ(d,"redo")) return KEY_redo;
5745 break;
5746 case 5:
a0d0e21e
LW
5747 if (strEQ(d,"rmdir")) return -KEY_rmdir;
5748 if (strEQ(d,"reset")) return -KEY_reset;
79072805
LW
5749 break;
5750 case 6:
5751 if (strEQ(d,"return")) return KEY_return;
a0d0e21e
LW
5752 if (strEQ(d,"rename")) return -KEY_rename;
5753 if (strEQ(d,"rindex")) return -KEY_rindex;
79072805
LW
5754 break;
5755 case 7:
ec4ab249 5756 if (strEQ(d,"require")) return KEY_require;
a0d0e21e
LW
5757 if (strEQ(d,"reverse")) return -KEY_reverse;
5758 if (strEQ(d,"readdir")) return -KEY_readdir;
79072805
LW
5759 break;
5760 case 8:
a0d0e21e
LW
5761 if (strEQ(d,"readlink")) return -KEY_readlink;
5762 if (strEQ(d,"readline")) return -KEY_readline;
5763 if (strEQ(d,"readpipe")) return -KEY_readpipe;
79072805
LW
5764 break;
5765 case 9:
a0d0e21e 5766 if (strEQ(d,"rewinddir")) return -KEY_rewinddir;
79072805 5767 break;
a687059c 5768 }
79072805
LW
5769 break;
5770 case 's':
a687059c 5771 switch (d[1]) {
79072805 5772 case 0: return KEY_s;
a687059c 5773 case 'c':
79072805 5774 if (strEQ(d,"scalar")) return KEY_scalar;
a687059c
LW
5775 break;
5776 case 'e':
79072805
LW
5777 switch (len) {
5778 case 4:
a0d0e21e
LW
5779 if (strEQ(d,"seek")) return -KEY_seek;
5780 if (strEQ(d,"send")) return -KEY_send;
79072805
LW
5781 break;
5782 case 5:
a0d0e21e 5783 if (strEQ(d,"semop")) return -KEY_semop;
79072805
LW
5784 break;
5785 case 6:
a0d0e21e
LW
5786 if (strEQ(d,"select")) return -KEY_select;
5787 if (strEQ(d,"semctl")) return -KEY_semctl;
5788 if (strEQ(d,"semget")) return -KEY_semget;
79072805
LW
5789 break;
5790 case 7:
a0d0e21e
LW
5791 if (strEQ(d,"setpgrp")) return -KEY_setpgrp;
5792 if (strEQ(d,"seekdir")) return -KEY_seekdir;
79072805
LW
5793 break;
5794 case 8:
a0d0e21e
LW
5795 if (strEQ(d,"setpwent")) return -KEY_setpwent;
5796 if (strEQ(d,"setgrent")) return -KEY_setgrent;
79072805
LW
5797 break;
5798 case 9:
a0d0e21e 5799 if (strEQ(d,"setnetent")) return -KEY_setnetent;
79072805
LW
5800 break;
5801 case 10:
a0d0e21e
LW
5802 if (strEQ(d,"setsockopt")) return -KEY_setsockopt;
5803 if (strEQ(d,"sethostent")) return -KEY_sethostent;
5804 if (strEQ(d,"setservent")) return -KEY_setservent;
79072805
LW
5805 break;
5806 case 11:
a0d0e21e
LW
5807 if (strEQ(d,"setpriority")) return -KEY_setpriority;
5808 if (strEQ(d,"setprotoent")) return -KEY_setprotoent;
79072805
LW
5809 break;
5810 }
a687059c
LW
5811 break;
5812 case 'h':
79072805
LW
5813 switch (len) {
5814 case 5:
3a6a8333 5815 if (strEQ(d,"shift")) return -KEY_shift;
79072805
LW
5816 break;
5817 case 6:
a0d0e21e
LW
5818 if (strEQ(d,"shmctl")) return -KEY_shmctl;
5819 if (strEQ(d,"shmget")) return -KEY_shmget;
79072805
LW
5820 break;
5821 case 7:
a0d0e21e 5822 if (strEQ(d,"shmread")) return -KEY_shmread;
79072805
LW
5823 break;
5824 case 8:
a0d0e21e
LW
5825 if (strEQ(d,"shmwrite")) return -KEY_shmwrite;
5826 if (strEQ(d,"shutdown")) return -KEY_shutdown;
79072805
LW
5827 break;
5828 }
a687059c
LW
5829 break;
5830 case 'i':
a0d0e21e 5831 if (strEQ(d,"sin")) return -KEY_sin;
a687059c
LW
5832 break;
5833 case 'l':
a0d0e21e 5834 if (strEQ(d,"sleep")) return -KEY_sleep;
a687059c
LW
5835 break;
5836 case 'o':
79072805 5837 if (strEQ(d,"sort")) return KEY_sort;
a0d0e21e
LW
5838 if (strEQ(d,"socket")) return -KEY_socket;
5839 if (strEQ(d,"socketpair")) return -KEY_socketpair;
a687059c
LW
5840 break;
5841 case 'p':
79072805 5842 if (strEQ(d,"split")) return KEY_split;
a0d0e21e 5843 if (strEQ(d,"sprintf")) return -KEY_sprintf;
3a6a8333 5844 if (strEQ(d,"splice")) return -KEY_splice;
a687059c
LW
5845 break;
5846 case 'q':
a0d0e21e 5847 if (strEQ(d,"sqrt")) return -KEY_sqrt;
a687059c
LW
5848 break;
5849 case 'r':
a0d0e21e 5850 if (strEQ(d,"srand")) return -KEY_srand;
a687059c
LW
5851 break;
5852 case 't':
a0d0e21e 5853 if (strEQ(d,"stat")) return -KEY_stat;
79072805 5854 if (strEQ(d,"study")) return KEY_study;
a687059c
LW
5855 break;
5856 case 'u':
a0d0e21e 5857 if (strEQ(d,"substr")) return -KEY_substr;
79072805 5858 if (strEQ(d,"sub")) return KEY_sub;
a687059c
LW
5859 break;
5860 case 'y':
79072805
LW
5861 switch (len) {
5862 case 6:
a0d0e21e 5863 if (strEQ(d,"system")) return -KEY_system;
79072805
LW
5864 break;
5865 case 7:
a0d0e21e
LW
5866 if (strEQ(d,"symlink")) return -KEY_symlink;
5867 if (strEQ(d,"syscall")) return -KEY_syscall;
137443ea 5868 if (strEQ(d,"sysopen")) return -KEY_sysopen;
5869 if (strEQ(d,"sysread")) return -KEY_sysread;
5870 if (strEQ(d,"sysseek")) return -KEY_sysseek;
79072805
LW
5871 break;
5872 case 8:
a0d0e21e 5873 if (strEQ(d,"syswrite")) return -KEY_syswrite;
79072805 5874 break;
a687059c 5875 }
a687059c
LW
5876 break;
5877 }
5878 break;
79072805
LW
5879 case 't':
5880 switch (len) {
5881 case 2:
5882 if (strEQ(d,"tr")) return KEY_tr;
5883 break;
463ee0b2
LW
5884 case 3:
5885 if (strEQ(d,"tie")) return KEY_tie;
5886 break;
79072805 5887 case 4:
a0d0e21e 5888 if (strEQ(d,"tell")) return -KEY_tell;
c07a80fd 5889 if (strEQ(d,"tied")) return KEY_tied;
a0d0e21e 5890 if (strEQ(d,"time")) return -KEY_time;
79072805
LW
5891 break;
5892 case 5:
a0d0e21e 5893 if (strEQ(d,"times")) return -KEY_times;
79072805
LW
5894 break;
5895 case 7:
a0d0e21e 5896 if (strEQ(d,"telldir")) return -KEY_telldir;
79072805
LW
5897 break;
5898 case 8:
a0d0e21e 5899 if (strEQ(d,"truncate")) return -KEY_truncate;
79072805 5900 break;
378cc40b 5901 }
a687059c 5902 break;
79072805
LW
5903 case 'u':
5904 switch (len) {
5905 case 2:
a0d0e21e
LW
5906 if (strEQ(d,"uc")) return -KEY_uc;
5907 break;
5908 case 3:
5909 if (strEQ(d,"use")) return KEY_use;
79072805
LW
5910 break;
5911 case 5:
5912 if (strEQ(d,"undef")) return KEY_undef;
5913 if (strEQ(d,"until")) return KEY_until;
463ee0b2 5914 if (strEQ(d,"untie")) return KEY_untie;
a0d0e21e
LW
5915 if (strEQ(d,"utime")) return -KEY_utime;
5916 if (strEQ(d,"umask")) return -KEY_umask;
79072805
LW
5917 break;
5918 case 6:
5919 if (strEQ(d,"unless")) return KEY_unless;
a0d0e21e
LW
5920 if (strEQ(d,"unpack")) return -KEY_unpack;
5921 if (strEQ(d,"unlink")) return -KEY_unlink;
79072805
LW
5922 break;
5923 case 7:
3a6a8333 5924 if (strEQ(d,"unshift")) return -KEY_unshift;
a0d0e21e 5925 if (strEQ(d,"ucfirst")) return -KEY_ucfirst;
79072805 5926 break;
a687059c
LW
5927 }
5928 break;
79072805 5929 case 'v':
a0d0e21e
LW
5930 if (strEQ(d,"values")) return -KEY_values;
5931 if (strEQ(d,"vec")) return -KEY_vec;
a687059c 5932 break;
79072805
LW
5933 case 'w':
5934 switch (len) {
5935 case 4:
a0d0e21e
LW
5936 if (strEQ(d,"warn")) return -KEY_warn;
5937 if (strEQ(d,"wait")) return -KEY_wait;
79072805
LW
5938 break;
5939 case 5:
5940 if (strEQ(d,"while")) return KEY_while;
a0d0e21e 5941 if (strEQ(d,"write")) return -KEY_write;
79072805
LW
5942 break;
5943 case 7:
a0d0e21e 5944 if (strEQ(d,"waitpid")) return -KEY_waitpid;
79072805
LW
5945 break;
5946 case 9:
a0d0e21e 5947 if (strEQ(d,"wantarray")) return -KEY_wantarray;
79072805 5948 break;
2f3197b3 5949 }
a687059c 5950 break;
79072805 5951 case 'x':
a0d0e21e
LW
5952 if (len == 1) return -KEY_x;
5953 if (strEQ(d,"xor")) return -KEY_xor;
a687059c 5954 break;
79072805
LW
5955 case 'y':
5956 if (len == 1) return KEY_y;
5957 break;
5958 case 'z':
a687059c
LW
5959 break;
5960 }
79072805 5961 return 0;
a687059c
LW
5962}
5963
76e3520e 5964STATIC void
cea2e8a9 5965S_checkcomma(pTHX_ register char *s, char *name, char *what)
a687059c 5966{
2f3197b3
LW
5967 char *w;
5968
d008e5eb 5969 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
5970 if (ckWARN(WARN_SYNTAX)) {
5971 int level = 1;
5972 for (w = s+2; *w && level; w++) {
5973 if (*w == '(')
5974 ++level;
5975 else if (*w == ')')
5976 --level;
5977 }
5978 if (*w)
5979 for (; *w && isSPACE(*w); w++) ;
5980 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9014280d 5981 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 5982 "%s (...) interpreted as function",name);
d008e5eb 5983 }
2f3197b3 5984 }
3280af22 5985 while (s < PL_bufend && isSPACE(*s))
2f3197b3 5986 s++;
a687059c
LW
5987 if (*s == '(')
5988 s++;
3280af22 5989 while (s < PL_bufend && isSPACE(*s))
a687059c 5990 s++;
7e2040f0 5991 if (isIDFIRST_lazy_if(s,UTF)) {
2f3197b3 5992 w = s++;
7e2040f0 5993 while (isALNUM_lazy_if(s,UTF))
a687059c 5994 s++;
3280af22 5995 while (s < PL_bufend && isSPACE(*s))
a687059c 5996 s++;
e929a76b 5997 if (*s == ',') {
463ee0b2 5998 int kw;
e929a76b 5999 *s = '\0';
864dbfa3 6000 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
e929a76b 6001 *s = ',';
463ee0b2 6002 if (kw)
e929a76b 6003 return;
cea2e8a9 6004 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
6005 }
6006 }
6007}
6008
423cee85
JH
6009/* Either returns sv, or mortalizes sv and returns a new SV*.
6010 Best used as sv=new_constant(..., sv, ...).
6011 If s, pv are NULL, calls subroutine with one argument,
6012 and type is used with error messages only. */
6013
b3ac6de7 6014STATIC SV *
dff6d3cd 6015S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9b0e499b 6016 const char *type)
b3ac6de7 6017{
b3ac6de7 6018 dSP;
3280af22 6019 HV *table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 6020 SV *res;
b3ac6de7
IZ
6021 SV **cvp;
6022 SV *cv, *typesv;
f0af216f 6023 const char *why1, *why2, *why3;
4e553d73 6024
f0af216f 6025 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
6026 SV *msg;
6027
f0af216f 6028 why2 = strEQ(key,"charnames")
41ab332f 6029 ? "(possibly a missing \"use charnames ...\")"
f0af216f 6030 : "";
4e553d73 6031 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
6032 (type ? type: "undef"), why2);
6033
6034 /* This is convoluted and evil ("goto considered harmful")
6035 * but I do not understand the intricacies of all the different
6036 * failure modes of %^H in here. The goal here is to make
6037 * the most probable error message user-friendly. --jhi */
6038
6039 goto msgdone;
6040
423cee85 6041 report:
4e553d73 6042 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 6043 (type ? type: "undef"), why1, why2, why3);
41ab332f 6044 msgdone:
423cee85
JH
6045 yyerror(SvPVX(msg));
6046 SvREFCNT_dec(msg);
6047 return sv;
6048 }
b3ac6de7
IZ
6049 cvp = hv_fetch(table, key, strlen(key), FALSE);
6050 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
6051 why1 = "$^H{";
6052 why2 = key;
f0af216f 6053 why3 = "} is not defined";
423cee85 6054 goto report;
b3ac6de7
IZ
6055 }
6056 sv_2mortal(sv); /* Parent created it permanently */
6057 cv = *cvp;
423cee85
JH
6058 if (!pv && s)
6059 pv = sv_2mortal(newSVpvn(s, len));
6060 if (type && pv)
6061 typesv = sv_2mortal(newSVpv(type, 0));
b3ac6de7 6062 else
423cee85 6063 typesv = &PL_sv_undef;
4e553d73 6064
e788e7d3 6065 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
6066 ENTER ;
6067 SAVETMPS;
4e553d73 6068
423cee85 6069 PUSHMARK(SP) ;
a5845cb7 6070 EXTEND(sp, 3);
423cee85
JH
6071 if (pv)
6072 PUSHs(pv);
b3ac6de7 6073 PUSHs(sv);
423cee85
JH
6074 if (pv)
6075 PUSHs(typesv);
b3ac6de7 6076 PUTBACK;
423cee85 6077 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 6078
423cee85 6079 SPAGAIN ;
4e553d73 6080
423cee85 6081 /* Check the eval first */
9b0e499b 6082 if (!PL_in_eval && SvTRUE(ERRSV)) {
423cee85
JH
6083 STRLEN n_a;
6084 sv_catpv(ERRSV, "Propagated");
6085 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
e1f15930 6086 (void)POPs;
423cee85
JH
6087 res = SvREFCNT_inc(sv);
6088 }
6089 else {
6090 res = POPs;
e1f15930 6091 (void)SvREFCNT_inc(res);
423cee85 6092 }
4e553d73 6093
423cee85
JH
6094 PUTBACK ;
6095 FREETMPS ;
6096 LEAVE ;
b3ac6de7 6097 POPSTACK;
4e553d73 6098
b3ac6de7 6099 if (!SvOK(res)) {
423cee85
JH
6100 why1 = "Call to &{$^H{";
6101 why2 = key;
f0af216f 6102 why3 = "}} did not return a defined value";
423cee85
JH
6103 sv = res;
6104 goto report;
9b0e499b 6105 }
423cee85 6106
9b0e499b 6107 return res;
b3ac6de7 6108}
4e553d73 6109
76e3520e 6110STATIC char *
cea2e8a9 6111S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2
LW
6112{
6113 register char *d = dest;
8903cb82 6114 register char *e = d + destlen - 3; /* two-character token, ending NUL */
463ee0b2 6115 for (;;) {
8903cb82 6116 if (d >= e)
cea2e8a9 6117 Perl_croak(aTHX_ ident_too_long);
834a4ddd 6118 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 6119 *d++ = *s++;
7e2040f0 6120 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
6121 *d++ = ':';
6122 *d++ = ':';
6123 s++;
6124 }
c3e0f903 6125 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
463ee0b2
LW
6126 *d++ = *s++;
6127 *d++ = *s++;
6128 }
fd400ab9 6129 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6130 char *t = s + UTF8SKIP(s);
fd400ab9 6131 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6132 t += UTF8SKIP(t);
6133 if (d + (t - s) > e)
cea2e8a9 6134 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6135 Copy(s, d, t - s, char);
6136 d += t - s;
6137 s = t;
6138 }
463ee0b2
LW
6139 else {
6140 *d = '\0';
6141 *slp = d - dest;
6142 return s;
e929a76b 6143 }
378cc40b
LW
6144 }
6145}
6146
76e3520e 6147STATIC char *
cea2e8a9 6148S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b
LW
6149{
6150 register char *d;
8903cb82 6151 register char *e;
79072805 6152 char *bracket = 0;
748a9306 6153 char funny = *s++;
378cc40b 6154
a0d0e21e
LW
6155 if (isSPACE(*s))
6156 s = skipspace(s);
378cc40b 6157 d = dest;
8903cb82 6158 e = d + destlen - 3; /* two-character token, ending NUL */
de3bb511 6159 if (isDIGIT(*s)) {
8903cb82 6160 while (isDIGIT(*s)) {
6161 if (d >= e)
cea2e8a9 6162 Perl_croak(aTHX_ ident_too_long);
378cc40b 6163 *d++ = *s++;
8903cb82 6164 }
378cc40b
LW
6165 }
6166 else {
463ee0b2 6167 for (;;) {
8903cb82 6168 if (d >= e)
cea2e8a9 6169 Perl_croak(aTHX_ ident_too_long);
834a4ddd 6170 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 6171 *d++ = *s++;
7e2040f0 6172 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
6173 *d++ = ':';
6174 *d++ = ':';
6175 s++;
6176 }
a0d0e21e 6177 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
6178 *d++ = *s++;
6179 *d++ = *s++;
6180 }
fd400ab9 6181 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6182 char *t = s + UTF8SKIP(s);
fd400ab9 6183 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6184 t += UTF8SKIP(t);
6185 if (d + (t - s) > e)
cea2e8a9 6186 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6187 Copy(s, d, t - s, char);
6188 d += t - s;
6189 s = t;
6190 }
463ee0b2
LW
6191 else
6192 break;
6193 }
378cc40b
LW
6194 }
6195 *d = '\0';
6196 d = dest;
79072805 6197 if (*d) {
3280af22
NIS
6198 if (PL_lex_state != LEX_NORMAL)
6199 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 6200 return s;
378cc40b 6201 }
748a9306 6202 if (*s == '$' && s[1] &&
7e2040f0 6203 (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
5cd24f17 6204 {
4810e5ec 6205 return s;
5cd24f17 6206 }
79072805
LW
6207 if (*s == '{') {
6208 bracket = s;
6209 s++;
6210 }
6211 else if (ck_uni)
6212 check_uni();
93a17b20 6213 if (s < send)
79072805
LW
6214 *d = *s++;
6215 d[1] = '\0';
2b92dfce 6216 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 6217 *d = toCTRL(*s);
6218 s++;
de3bb511 6219 }
79072805 6220 if (bracket) {
748a9306 6221 if (isSPACE(s[-1])) {
fa83b5b6 6222 while (s < send) {
6223 char ch = *s++;
bf4acbe4 6224 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 6225 *d = ch;
6226 break;
6227 }
6228 }
748a9306 6229 }
7e2040f0 6230 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 6231 d++;
a0ed51b3
LW
6232 if (UTF) {
6233 e = s;
155aba94 6234 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
a0ed51b3 6235 e += UTF8SKIP(e);
fd400ab9 6236 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
a0ed51b3
LW
6237 e += UTF8SKIP(e);
6238 }
6239 Copy(s, d, e - s, char);
6240 d += e - s;
6241 s = e;
6242 }
6243 else {
2b92dfce 6244 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 6245 *d++ = *s++;
2b92dfce 6246 if (d >= e)
cea2e8a9 6247 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 6248 }
79072805 6249 *d = '\0';
bf4acbe4 6250 while (s < send && SPACE_OR_TAB(*s)) s++;
ff68c719 6251 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
599cee73 6252 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
dff6d3cd 6253 const char *brack = *s == '[' ? "[...]" : "{...}";
9014280d 6254 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 6255 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
6256 funny, dest, brack, funny, dest, brack);
6257 }
79072805 6258 bracket++;
a0be28da 6259 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
6260 return s;
6261 }
4e553d73
NIS
6262 }
6263 /* Handle extended ${^Foo} variables
2b92dfce
GS
6264 * 1999-02-27 mjd-perl-patch@plover.com */
6265 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
6266 && isALNUM(*s))
6267 {
6268 d++;
6269 while (isALNUM(*s) && d < e) {
6270 *d++ = *s++;
6271 }
6272 if (d >= e)
cea2e8a9 6273 Perl_croak(aTHX_ ident_too_long);
2b92dfce 6274 *d = '\0';
79072805
LW
6275 }
6276 if (*s == '}') {
6277 s++;
3280af22
NIS
6278 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets)
6279 PL_lex_state = LEX_INTERPEND;
748a9306
LW
6280 if (funny == '#')
6281 funny = '@';
d008e5eb 6282 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 6283 if (ckWARN(WARN_AMBIGUOUS) &&
864dbfa3 6284 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
d008e5eb 6285 {
9014280d 6286 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
6287 "Ambiguous use of %c{%s} resolved to %c%s",
6288 funny, dest, funny, dest);
6289 }
6290 }
79072805
LW
6291 }
6292 else {
6293 s = bracket; /* let the parser handle it */
93a17b20 6294 *dest = '\0';
79072805
LW
6295 }
6296 }
3280af22
NIS
6297 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
6298 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
6299 return s;
6300}
6301
cea2e8a9 6302void
2b36a5a0 6303Perl_pmflag(pTHX_ U32* pmfl, int ch)
a0d0e21e 6304{
bbce6d69 6305 if (ch == 'i')
a0d0e21e 6306 *pmfl |= PMf_FOLD;
a0d0e21e
LW
6307 else if (ch == 'g')
6308 *pmfl |= PMf_GLOBAL;
c90c0ff4 6309 else if (ch == 'c')
6310 *pmfl |= PMf_CONTINUE;
a0d0e21e
LW
6311 else if (ch == 'o')
6312 *pmfl |= PMf_KEEP;
6313 else if (ch == 'm')
6314 *pmfl |= PMf_MULTILINE;
6315 else if (ch == 's')
6316 *pmfl |= PMf_SINGLELINE;
6317 else if (ch == 'x')
6318 *pmfl |= PMf_EXTENDED;
6319}
378cc40b 6320
76e3520e 6321STATIC char *
cea2e8a9 6322S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 6323{
79072805
LW
6324 PMOP *pm;
6325 char *s;
378cc40b 6326
09bef843 6327 s = scan_str(start,FALSE,FALSE);
37fd879b 6328 if (!s)
cea2e8a9 6329 Perl_croak(aTHX_ "Search pattern not terminated");
bbce6d69 6330
8782bef2 6331 pm = (PMOP*)newPMOP(type, 0);
3280af22 6332 if (PL_multi_open == '?')
79072805 6333 pm->op_pmflags |= PMf_ONCE;
8782bef2
GB
6334 if(type == OP_QR) {
6335 while (*s && strchr("iomsx", *s))
6336 pmflag(&pm->op_pmflags,*s++);
6337 }
6338 else {
6339 while (*s && strchr("iogcmsx", *s))
6340 pmflag(&pm->op_pmflags,*s++);
6341 }
4ac733c9
MJD
6342 /* issue a warning if /c is specified,but /g is not */
6343 if (ckWARN(WARN_REGEXP) &&
6344 (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
6345 {
6346 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g);
6347 }
6348
4633a7c4 6349 pm->op_pmpermflags = pm->op_pmflags;
bbce6d69 6350
3280af22 6351 PL_lex_op = (OP*)pm;
79072805 6352 yylval.ival = OP_MATCH;
378cc40b
LW
6353 return s;
6354}
6355
76e3520e 6356STATIC char *
cea2e8a9 6357S_scan_subst(pTHX_ char *start)
79072805 6358{
a0d0e21e 6359 register char *s;
79072805 6360 register PMOP *pm;
4fdae800 6361 I32 first_start;
79072805
LW
6362 I32 es = 0;
6363
79072805
LW
6364 yylval.ival = OP_NULL;
6365
09bef843 6366 s = scan_str(start,FALSE,FALSE);
79072805 6367
37fd879b 6368 if (!s)
cea2e8a9 6369 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 6370
3280af22 6371 if (s[-1] == PL_multi_open)
79072805
LW
6372 s--;
6373
3280af22 6374 first_start = PL_multi_start;
09bef843 6375 s = scan_str(s,FALSE,FALSE);
79072805 6376 if (!s) {
37fd879b 6377 if (PL_lex_stuff) {
3280af22 6378 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6379 PL_lex_stuff = Nullsv;
6380 }
cea2e8a9 6381 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 6382 }
3280af22 6383 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 6384
79072805 6385 pm = (PMOP*)newPMOP(OP_SUBST, 0);
48c036b1 6386 while (*s) {
a687059c
LW
6387 if (*s == 'e') {
6388 s++;
2f3197b3 6389 es++;
a687059c 6390 }
b3eb6a9b 6391 else if (strchr("iogcmsx", *s))
a0d0e21e 6392 pmflag(&pm->op_pmflags,*s++);
48c036b1
GS
6393 else
6394 break;
378cc40b 6395 }
79072805 6396
64e578a2
MJD
6397 /* /c is not meaningful with s/// */
6398 if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE))
4ac733c9 6399 {
64e578a2 6400 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst);
4ac733c9
MJD
6401 }
6402
79072805
LW
6403 if (es) {
6404 SV *repl;
0244c3a4
GS
6405 PL_sublex_info.super_bufptr = s;
6406 PL_sublex_info.super_bufend = PL_bufend;
6407 PL_multi_end = 0;
79072805 6408 pm->op_pmflags |= PMf_EVAL;
79cb57f6 6409 repl = newSVpvn("",0);
463ee0b2 6410 while (es-- > 0)
a0d0e21e 6411 sv_catpv(repl, es ? "eval " : "do ");
79072805 6412 sv_catpvn(repl, "{ ", 2);
3280af22 6413 sv_catsv(repl, PL_lex_repl);
79072805 6414 sv_catpvn(repl, " };", 2);
25da4f38 6415 SvEVALED_on(repl);
3280af22
NIS
6416 SvREFCNT_dec(PL_lex_repl);
6417 PL_lex_repl = repl;
378cc40b 6418 }
79072805 6419
4633a7c4 6420 pm->op_pmpermflags = pm->op_pmflags;
3280af22 6421 PL_lex_op = (OP*)pm;
79072805 6422 yylval.ival = OP_SUBST;
378cc40b
LW
6423 return s;
6424}
6425
76e3520e 6426STATIC char *
cea2e8a9 6427S_scan_trans(pTHX_ char *start)
378cc40b 6428{
a0d0e21e 6429 register char* s;
11343788 6430 OP *o;
79072805
LW
6431 short *tbl;
6432 I32 squash;
a0ed51b3 6433 I32 del;
79072805
LW
6434 I32 complement;
6435
6436 yylval.ival = OP_NULL;
6437
09bef843 6438 s = scan_str(start,FALSE,FALSE);
37fd879b 6439 if (!s)
cea2e8a9 6440 Perl_croak(aTHX_ "Transliteration pattern not terminated");
3280af22 6441 if (s[-1] == PL_multi_open)
2f3197b3
LW
6442 s--;
6443
09bef843 6444 s = scan_str(s,FALSE,FALSE);
79072805 6445 if (!s) {
37fd879b 6446 if (PL_lex_stuff) {
3280af22 6447 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6448 PL_lex_stuff = Nullsv;
6449 }
cea2e8a9 6450 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 6451 }
79072805 6452
a0ed51b3 6453 complement = del = squash = 0;
6940069f 6454 while (strchr("cds", *s)) {
395c3793 6455 if (*s == 'c')
79072805 6456 complement = OPpTRANS_COMPLEMENT;
395c3793 6457 else if (*s == 'd')
a0ed51b3
LW
6458 del = OPpTRANS_DELETE;
6459 else if (*s == 's')
79072805 6460 squash = OPpTRANS_SQUASH;
395c3793
LW
6461 s++;
6462 }
8973db79
JH
6463
6464 New(803, tbl, complement&&!del?258:256, short);
6465 o = newPVOP(OP_TRANS, 0, (char*)tbl);
7948272d
NIS
6466 o->op_private = del|squash|complement|
6467 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
6468 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 6469
3280af22 6470 PL_lex_op = o;
79072805
LW
6471 yylval.ival = OP_TRANS;
6472 return s;
6473}
6474
76e3520e 6475STATIC char *
cea2e8a9 6476S_scan_heredoc(pTHX_ register char *s)
79072805
LW
6477{
6478 SV *herewas;
6479 I32 op_type = OP_SCALAR;
6480 I32 len;
6481 SV *tmpstr;
6482 char term;
6483 register char *d;
fc36a67e 6484 register char *e;
4633a7c4 6485 char *peek;
3280af22 6486 int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
79072805
LW
6487
6488 s += 2;
3280af22
NIS
6489 d = PL_tokenbuf;
6490 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 6491 if (!outer)
79072805 6492 *d++ = '\n';
bf4acbe4 6493 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
4633a7c4
LW
6494 if (*peek && strchr("`'\"",*peek)) {
6495 s = peek;
79072805 6496 term = *s++;
3280af22 6497 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 6498 d += len;
3280af22 6499 if (s < PL_bufend)
79072805 6500 s++;
79072805
LW
6501 }
6502 else {
6503 if (*s == '\\')
6504 s++, term = '\'';
6505 else
6506 term = '"';
7e2040f0 6507 if (!isALNUM_lazy_if(s,UTF))
12bcd1a6 6508 deprecate_old("bare << to mean <<\"\"");
7e2040f0 6509 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 6510 if (d < e)
6511 *d++ = *s;
6512 }
6513 }
3280af22 6514 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 6515 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
6516 *d++ = '\n';
6517 *d = '\0';
3280af22 6518 len = d - PL_tokenbuf;
6a27c188 6519#ifndef PERL_STRICT_CR
f63a84b2
LW
6520 d = strchr(s, '\r');
6521 if (d) {
6522 char *olds = s;
6523 s = d;
3280af22 6524 while (s < PL_bufend) {
f63a84b2
LW
6525 if (*s == '\r') {
6526 *d++ = '\n';
6527 if (*++s == '\n')
6528 s++;
6529 }
6530 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
6531 *d++ = *s++;
6532 s++;
6533 }
6534 else
6535 *d++ = *s++;
6536 }
6537 *d = '\0';
3280af22
NIS
6538 PL_bufend = d;
6539 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2
LW
6540 s = olds;
6541 }
6542#endif
79072805 6543 d = "\n";
3280af22 6544 if (outer || !(d=ninstr(s,PL_bufend,d,d+1)))
79cb57f6 6545 herewas = newSVpvn(s,PL_bufend-s);
79072805 6546 else
79cb57f6 6547 s--, herewas = newSVpvn(s,d-s);
79072805 6548 s += SvCUR(herewas);
748a9306 6549
8d6dde3e 6550 tmpstr = NEWSV(87,79);
748a9306
LW
6551 sv_upgrade(tmpstr, SVt_PVIV);
6552 if (term == '\'') {
79072805 6553 op_type = OP_CONST;
748a9306
LW
6554 SvIVX(tmpstr) = -1;
6555 }
6556 else if (term == '`') {
79072805 6557 op_type = OP_BACKTICK;
748a9306
LW
6558 SvIVX(tmpstr) = '\\';
6559 }
79072805
LW
6560
6561 CLINE;
57843af0 6562 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
6563 PL_multi_open = PL_multi_close = '<';
6564 term = *PL_tokenbuf;
0244c3a4
GS
6565 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6566 char *bufptr = PL_sublex_info.super_bufptr;
6567 char *bufend = PL_sublex_info.super_bufend;
6568 char *olds = s - SvCUR(herewas);
6569 s = strchr(bufptr, '\n');
6570 if (!s)
6571 s = bufend;
6572 d = s;
6573 while (s < bufend &&
6574 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
6575 if (*s++ == '\n')
57843af0 6576 CopLINE_inc(PL_curcop);
0244c3a4
GS
6577 }
6578 if (s >= bufend) {
eb160463 6579 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
6580 missingterm(PL_tokenbuf);
6581 }
6582 sv_setpvn(herewas,bufptr,d-bufptr+1);
6583 sv_setpvn(tmpstr,d+1,s-d);
6584 s += len - 1;
6585 sv_catpvn(herewas,s,bufend-s);
6586 (void)strcpy(bufptr,SvPVX(herewas));
6587
6588 s = olds;
6589 goto retval;
6590 }
6591 else if (!outer) {
79072805 6592 d = s;
3280af22
NIS
6593 while (s < PL_bufend &&
6594 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 6595 if (*s++ == '\n')
57843af0 6596 CopLINE_inc(PL_curcop);
79072805 6597 }
3280af22 6598 if (s >= PL_bufend) {
eb160463 6599 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 6600 missingterm(PL_tokenbuf);
79072805
LW
6601 }
6602 sv_setpvn(tmpstr,d+1,s-d);
6603 s += len - 1;
57843af0 6604 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 6605
3280af22
NIS
6606 sv_catpvn(herewas,s,PL_bufend-s);
6607 sv_setsv(PL_linestr,herewas);
6608 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
6609 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6610 PL_last_lop = PL_last_uni = Nullch;
79072805
LW
6611 }
6612 else
6613 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
3280af22 6614 while (s >= PL_bufend) { /* multiple line string? */
fd2d0953 6615 if (!outer ||
3280af22 6616 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
eb160463 6617 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 6618 missingterm(PL_tokenbuf);
79072805 6619 }
57843af0 6620 CopLINE_inc(PL_curcop);
3280af22 6621 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6622 PL_last_lop = PL_last_uni = Nullch;
6a27c188 6623#ifndef PERL_STRICT_CR
3280af22 6624 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
6625 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
6626 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 6627 {
3280af22
NIS
6628 PL_bufend[-2] = '\n';
6629 PL_bufend--;
6630 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2 6631 }
3280af22
NIS
6632 else if (PL_bufend[-1] == '\r')
6633 PL_bufend[-1] = '\n';
f63a84b2 6634 }
3280af22
NIS
6635 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
6636 PL_bufend[-1] = '\n';
f63a84b2 6637#endif
3280af22 6638 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
6639 SV *sv = NEWSV(88,0);
6640
93a17b20 6641 sv_upgrade(sv, SVt_PVMG);
3280af22 6642 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
6643 (void)SvIOK_on(sv);
6644 SvIVX(sv) = 0;
57843af0 6645 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
79072805 6646 }
3280af22
NIS
6647 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
6648 s = PL_bufend - 1;
79072805 6649 *s = ' ';
3280af22
NIS
6650 sv_catsv(PL_linestr,herewas);
6651 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805
LW
6652 }
6653 else {
3280af22
NIS
6654 s = PL_bufend;
6655 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
6656 }
6657 }
79072805 6658 s++;
0244c3a4 6659retval:
57843af0 6660 PL_multi_end = CopLINE(PL_curcop);
79072805
LW
6661 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
6662 SvLEN_set(tmpstr, SvCUR(tmpstr) + 1);
463ee0b2 6663 Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
79072805 6664 }
8990e307 6665 SvREFCNT_dec(herewas);
0064a8a9 6666 if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
7948272d 6667 SvUTF8_on(tmpstr);
3280af22 6668 PL_lex_stuff = tmpstr;
79072805
LW
6669 yylval.ival = op_type;
6670 return s;
6671}
6672
02aa26ce
NT
6673/* scan_inputsymbol
6674 takes: current position in input buffer
6675 returns: new position in input buffer
6676 side-effects: yylval and lex_op are set.
6677
6678 This code handles:
6679
6680 <> read from ARGV
6681 <FH> read from filehandle
6682 <pkg::FH> read from package qualified filehandle
6683 <pkg'FH> read from package qualified filehandle
6684 <$fh> read from filehandle in $fh
6685 <*.h> filename glob
6686
6687*/
6688
76e3520e 6689STATIC char *
cea2e8a9 6690S_scan_inputsymbol(pTHX_ char *start)
79072805 6691{
02aa26ce 6692 register char *s = start; /* current position in buffer */
79072805 6693 register char *d;
fc36a67e 6694 register char *e;
1b420867 6695 char *end;
79072805
LW
6696 I32 len;
6697
3280af22
NIS
6698 d = PL_tokenbuf; /* start of temp holding space */
6699 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
1b420867
GS
6700 end = strchr(s, '\n');
6701 if (!end)
6702 end = PL_bufend;
6703 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
6704
6705 /* die if we didn't have space for the contents of the <>,
1b420867 6706 or if it didn't end, or if we see a newline
02aa26ce
NT
6707 */
6708
3280af22 6709 if (len >= sizeof PL_tokenbuf)
cea2e8a9 6710 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 6711 if (s >= end)
cea2e8a9 6712 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 6713
fc36a67e 6714 s++;
02aa26ce
NT
6715
6716 /* check for <$fh>
6717 Remember, only scalar variables are interpreted as filehandles by
6718 this code. Anything more complex (e.g., <$fh{$num}>) will be
6719 treated as a glob() call.
6720 This code makes use of the fact that except for the $ at the front,
6721 a scalar variable and a filehandle look the same.
6722 */
4633a7c4 6723 if (*d == '$' && d[1]) d++;
02aa26ce
NT
6724
6725 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 6726 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 6727 d++;
02aa26ce
NT
6728
6729 /* If we've tried to read what we allow filehandles to look like, and
6730 there's still text left, then it must be a glob() and not a getline.
6731 Use scan_str to pull out the stuff between the <> and treat it
6732 as nothing more than a string.
6733 */
6734
3280af22 6735 if (d - PL_tokenbuf != len) {
79072805
LW
6736 yylval.ival = OP_GLOB;
6737 set_csh();
09bef843 6738 s = scan_str(start,FALSE,FALSE);
79072805 6739 if (!s)
cea2e8a9 6740 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
6741 return s;
6742 }
395c3793 6743 else {
9b3023bc
RGS
6744 bool readline_overriden = FALSE;
6745 GV *gv_readline = Nullgv;
6746 GV **gvp;
02aa26ce 6747 /* we're in a filehandle read situation */
3280af22 6748 d = PL_tokenbuf;
02aa26ce
NT
6749
6750 /* turn <> into <ARGV> */
79072805
LW
6751 if (!len)
6752 (void)strcpy(d,"ARGV");
02aa26ce 6753
9b3023bc 6754 /* Check whether readline() is overriden */
ba979b31
NIS
6755 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
6756 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 6757 ||
ba979b31 6758 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
9b3023bc 6759 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
ba979b31 6760 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
6761 readline_overriden = TRUE;
6762
02aa26ce
NT
6763 /* if <$fh>, create the ops to turn the variable into a
6764 filehandle
6765 */
79072805 6766 if (*d == '$') {
a0d0e21e 6767 I32 tmp;
02aa26ce
NT
6768
6769 /* try to find it in the pad for this block, otherwise find
6770 add symbol table ops
6771 */
11343788 6772 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
dd2155a4
DM
6773 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
6774 SV *sym = sv_2mortal(
6775 newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)),0));
f558d5af
JH
6776 sv_catpvn(sym, "::", 2);
6777 sv_catpv(sym, d+1);
6778 d = SvPVX(sym);
6779 goto intro_sym;
6780 }
6781 else {
6782 OP *o = newOP(OP_PADSV, 0);
6783 o->op_targ = tmp;
9b3023bc
RGS
6784 PL_lex_op = readline_overriden
6785 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6786 append_elem(OP_LIST, o,
6787 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
6788 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 6789 }
a0d0e21e
LW
6790 }
6791 else {
f558d5af
JH
6792 GV *gv;
6793 ++d;
6794intro_sym:
6795 gv = gv_fetchpv(d,
6796 (PL_in_eval
6797 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 6798 : GV_ADDMULTI),
f558d5af 6799 SVt_PV);
9b3023bc
RGS
6800 PL_lex_op = readline_overriden
6801 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6802 append_elem(OP_LIST,
6803 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
6804 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
6805 : (OP*)newUNOP(OP_READLINE, 0,
6806 newUNOP(OP_RV2SV, 0,
6807 newGVOP(OP_GV, 0, gv)));
a0d0e21e 6808 }
7c6fadd6
RGS
6809 if (!readline_overriden)
6810 PL_lex_op->op_flags |= OPf_SPECIAL;
f5284f61 6811 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
79072805
LW
6812 yylval.ival = OP_NULL;
6813 }
02aa26ce
NT
6814
6815 /* If it's none of the above, it must be a literal filehandle
6816 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 6817 else {
85e6fe83 6818 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
9b3023bc
RGS
6819 PL_lex_op = readline_overriden
6820 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6821 append_elem(OP_LIST,
6822 newGVOP(OP_GV, 0, gv),
6823 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
6824 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
79072805
LW
6825 yylval.ival = OP_NULL;
6826 }
6827 }
02aa26ce 6828
79072805
LW
6829 return s;
6830}
6831
02aa26ce
NT
6832
6833/* scan_str
6834 takes: start position in buffer
09bef843
SB
6835 keep_quoted preserve \ on the embedded delimiter(s)
6836 keep_delims preserve the delimiters around the string
02aa26ce
NT
6837 returns: position to continue reading from buffer
6838 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
6839 updates the read buffer.
6840
6841 This subroutine pulls a string out of the input. It is called for:
6842 q single quotes q(literal text)
6843 ' single quotes 'literal text'
6844 qq double quotes qq(interpolate $here please)
6845 " double quotes "interpolate $here please"
6846 qx backticks qx(/bin/ls -l)
6847 ` backticks `/bin/ls -l`
6848 qw quote words @EXPORT_OK = qw( func() $spam )
6849 m// regexp match m/this/
6850 s/// regexp substitute s/this/that/
6851 tr/// string transliterate tr/this/that/
6852 y/// string transliterate y/this/that/
6853 ($*@) sub prototypes sub foo ($)
09bef843 6854 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
6855 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
6856
6857 In most of these cases (all but <>, patterns and transliterate)
6858 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
6859 calls scan_str(). s/// makes yylex() call scan_subst() which calls
6860 scan_str(). tr/// and y/// make yylex() call scan_trans() which
6861 calls scan_str().
4e553d73 6862
02aa26ce
NT
6863 It skips whitespace before the string starts, and treats the first
6864 character as the delimiter. If the delimiter is one of ([{< then
6865 the corresponding "close" character )]}> is used as the closing
6866 delimiter. It allows quoting of delimiters, and if the string has
6867 balanced delimiters ([{<>}]) it allows nesting.
6868
37fd879b
HS
6869 On success, the SV with the resulting string is put into lex_stuff or,
6870 if that is already non-NULL, into lex_repl. The second case occurs only
6871 when parsing the RHS of the special constructs s/// and tr/// (y///).
6872 For convenience, the terminating delimiter character is stuffed into
6873 SvIVX of the SV.
02aa26ce
NT
6874*/
6875
76e3520e 6876STATIC char *
09bef843 6877S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 6878{
02aa26ce
NT
6879 SV *sv; /* scalar value: string */
6880 char *tmps; /* temp string, used for delimiter matching */
6881 register char *s = start; /* current position in the buffer */
6882 register char term; /* terminating character */
6883 register char *to; /* current position in the sv's data */
6884 I32 brackets = 1; /* bracket nesting level */
89491803 6885 bool has_utf8 = FALSE; /* is there any utf8 content? */
02aa26ce
NT
6886
6887 /* skip space before the delimiter */
fb73857a 6888 if (isSPACE(*s))
6889 s = skipspace(s);
02aa26ce
NT
6890
6891 /* mark where we are, in case we need to report errors */
79072805 6892 CLINE;
02aa26ce
NT
6893
6894 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 6895 term = *s;
63cd0674 6896 if (!UTF8_IS_INVARIANT((U8)term) && UTF)
89491803 6897 has_utf8 = TRUE;
b1c7b182 6898
02aa26ce 6899 /* mark where we are */
57843af0 6900 PL_multi_start = CopLINE(PL_curcop);
3280af22 6901 PL_multi_open = term;
02aa26ce
NT
6902
6903 /* find corresponding closing delimiter */
93a17b20 6904 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
79072805 6905 term = tmps[5];
3280af22 6906 PL_multi_close = term;
79072805 6907
02aa26ce 6908 /* create a new SV to hold the contents. 87 is leak category, I'm
8d6dde3e
IZ
6909 assuming. 79 is the SV's initial length. What a random number. */
6910 sv = NEWSV(87,79);
ed6116ce
LW
6911 sv_upgrade(sv, SVt_PVIV);
6912 SvIVX(sv) = term;
a0d0e21e 6913 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
6914
6915 /* move past delimiter and try to read a complete string */
09bef843
SB
6916 if (keep_delims)
6917 sv_catpvn(sv, s, 1);
93a17b20
LW
6918 s++;
6919 for (;;) {
02aa26ce 6920 /* extend sv if need be */
3280af22 6921 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 6922 /* set 'to' to the next character in the sv's string */
463ee0b2 6923 to = SvPVX(sv)+SvCUR(sv);
09bef843 6924
02aa26ce 6925 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
6926 if (PL_multi_open == PL_multi_close) {
6927 for (; s < PL_bufend; s++,to++) {
02aa26ce 6928 /* embedded newlines increment the current line number */
3280af22 6929 if (*s == '\n' && !PL_rsfp)
57843af0 6930 CopLINE_inc(PL_curcop);
02aa26ce 6931 /* handle quoted delimiters */
3280af22 6932 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 6933 if (!keep_quoted && s[1] == term)
a0d0e21e 6934 s++;
02aa26ce 6935 /* any other quotes are simply copied straight through */
a0d0e21e
LW
6936 else
6937 *to++ = *s++;
6938 }
02aa26ce
NT
6939 /* terminate when run out of buffer (the for() condition), or
6940 have found the terminator */
93a17b20
LW
6941 else if (*s == term)
6942 break;
63cd0674 6943 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 6944 has_utf8 = TRUE;
93a17b20
LW
6945 *to = *s;
6946 }
6947 }
02aa26ce
NT
6948
6949 /* if the terminator isn't the same as the start character (e.g.,
6950 matched brackets), we have to allow more in the quoting, and
6951 be prepared for nested brackets.
6952 */
93a17b20 6953 else {
02aa26ce 6954 /* read until we run out of string, or we find the terminator */
3280af22 6955 for (; s < PL_bufend; s++,to++) {
02aa26ce 6956 /* embedded newlines increment the line count */
3280af22 6957 if (*s == '\n' && !PL_rsfp)
57843af0 6958 CopLINE_inc(PL_curcop);
02aa26ce 6959 /* backslashes can escape the open or closing characters */
3280af22 6960 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
6961 if (!keep_quoted &&
6962 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
6963 s++;
6964 else
6965 *to++ = *s++;
6966 }
02aa26ce 6967 /* allow nested opens and closes */
3280af22 6968 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 6969 break;
3280af22 6970 else if (*s == PL_multi_open)
93a17b20 6971 brackets++;
63cd0674 6972 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 6973 has_utf8 = TRUE;
93a17b20
LW
6974 *to = *s;
6975 }
6976 }
02aa26ce 6977 /* terminate the copied string and update the sv's end-of-string */
93a17b20 6978 *to = '\0';
463ee0b2 6979 SvCUR_set(sv, to - SvPVX(sv));
93a17b20 6980
02aa26ce
NT
6981 /*
6982 * this next chunk reads more into the buffer if we're not done yet
6983 */
6984
b1c7b182
GS
6985 if (s < PL_bufend)
6986 break; /* handle case where we are done yet :-) */
79072805 6987
6a27c188 6988#ifndef PERL_STRICT_CR
f63a84b2 6989 if (to - SvPVX(sv) >= 2) {
c6f14548
GS
6990 if ((to[-2] == '\r' && to[-1] == '\n') ||
6991 (to[-2] == '\n' && to[-1] == '\r'))
6992 {
f63a84b2
LW
6993 to[-2] = '\n';
6994 to--;
6995 SvCUR_set(sv, to - SvPVX(sv));
6996 }
6997 else if (to[-1] == '\r')
6998 to[-1] = '\n';
6999 }
7000 else if (to - SvPVX(sv) == 1 && to[-1] == '\r')
7001 to[-1] = '\n';
7002#endif
7003
02aa26ce
NT
7004 /* if we're out of file, or a read fails, bail and reset the current
7005 line marker so we can report where the unterminated string began
7006 */
3280af22
NIS
7007 if (!PL_rsfp ||
7008 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
c07a80fd 7009 sv_free(sv);
eb160463 7010 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
79072805
LW
7011 return Nullch;
7012 }
02aa26ce 7013 /* we read a line, so increment our line counter */
57843af0 7014 CopLINE_inc(PL_curcop);
a0ed51b3 7015
02aa26ce 7016 /* update debugger info */
3280af22 7017 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
7018 SV *sv = NEWSV(88,0);
7019
93a17b20 7020 sv_upgrade(sv, SVt_PVMG);
3280af22 7021 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
7022 (void)SvIOK_on(sv);
7023 SvIVX(sv) = 0;
57843af0 7024 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
395c3793 7025 }
a0ed51b3 7026
3280af22
NIS
7027 /* having changed the buffer, we must update PL_bufend */
7028 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 7029 PL_last_lop = PL_last_uni = Nullch;
378cc40b 7030 }
4e553d73 7031
02aa26ce
NT
7032 /* at this point, we have successfully read the delimited string */
7033
09bef843
SB
7034 if (keep_delims)
7035 sv_catpvn(sv, s, 1);
89491803 7036 if (has_utf8)
b1c7b182 7037 SvUTF8_on(sv);
d0063567
DK
7038 else if (PL_encoding)
7039 sv_recode_to_utf8(sv, PL_encoding);
7040
57843af0 7041 PL_multi_end = CopLINE(PL_curcop);
79072805 7042 s++;
02aa26ce
NT
7043
7044 /* if we allocated too much space, give some back */
93a17b20
LW
7045 if (SvCUR(sv) + 5 < SvLEN(sv)) {
7046 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 7047 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 7048 }
02aa26ce
NT
7049
7050 /* decide whether this is the first or second quoted string we've read
7051 for this op
7052 */
4e553d73 7053
3280af22
NIS
7054 if (PL_lex_stuff)
7055 PL_lex_repl = sv;
79072805 7056 else
3280af22 7057 PL_lex_stuff = sv;
378cc40b
LW
7058 return s;
7059}
7060
02aa26ce
NT
7061/*
7062 scan_num
7063 takes: pointer to position in buffer
7064 returns: pointer to new position in buffer
7065 side-effects: builds ops for the constant in yylval.op
7066
7067 Read a number in any of the formats that Perl accepts:
7068
7fd134d9
JH
7069 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
7070 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
7071 0b[01](_?[01])*
7072 0[0-7](_?[0-7])*
7073 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 7074
3280af22 7075 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
7076 thing it reads.
7077
7078 If it reads a number without a decimal point or an exponent, it will
7079 try converting the number to an integer and see if it can do so
7080 without loss of precision.
7081*/
4e553d73 7082
378cc40b 7083char *
b73d6f50 7084Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
378cc40b 7085{
02aa26ce
NT
7086 register char *s = start; /* current position in buffer */
7087 register char *d; /* destination in temp buffer */
7088 register char *e; /* end of temp buffer */
86554af2 7089 NV nv; /* number read, as a double */
a7cb1f99 7090 SV *sv = Nullsv; /* place to put the converted number */
a86a20aa 7091 bool floatit; /* boolean: int or float? */
02aa26ce 7092 char *lastub = 0; /* position of last underbar */
fc36a67e 7093 static char number_too_long[] = "Number too long";
378cc40b 7094
02aa26ce
NT
7095 /* We use the first character to decide what type of number this is */
7096
378cc40b 7097 switch (*s) {
79072805 7098 default:
cea2e8a9 7099 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 7100
02aa26ce 7101 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 7102 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
7103 case '0':
7104 {
02aa26ce
NT
7105 /* variables:
7106 u holds the "number so far"
4f19785b
WSI
7107 shift the power of 2 of the base
7108 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
7109 overflowed was the number more than we can hold?
7110
7111 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
7112 we in octal/hex/binary?" indicator to disallow hex characters
7113 when in octal mode.
02aa26ce 7114 */
9e24b6e2
JH
7115 NV n = 0.0;
7116 UV u = 0;
79072805 7117 I32 shift;
9e24b6e2
JH
7118 bool overflowed = FALSE;
7119 static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
7120 static char* bases[5] = { "", "binary", "", "octal",
7121 "hexadecimal" };
7122 static char* Bases[5] = { "", "Binary", "", "Octal",
7123 "Hexadecimal" };
7124 static char *maxima[5] = { "",
7125 "0b11111111111111111111111111111111",
7126 "",
893fe2c2 7127 "037777777777",
9e24b6e2
JH
7128 "0xffffffff" };
7129 char *base, *Base, *max;
378cc40b 7130
02aa26ce 7131 /* check for hex */
378cc40b
LW
7132 if (s[1] == 'x') {
7133 shift = 4;
7134 s += 2;
4f19785b
WSI
7135 } else if (s[1] == 'b') {
7136 shift = 1;
7137 s += 2;
378cc40b 7138 }
02aa26ce 7139 /* check for a decimal in disguise */
b78218b7 7140 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 7141 goto decimal;
02aa26ce 7142 /* so it must be octal */
928753ea 7143 else {
378cc40b 7144 shift = 3;
928753ea
JH
7145 s++;
7146 }
7147
7148 if (*s == '_') {
7149 if (ckWARN(WARN_SYNTAX))
9014280d 7150 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7151 "Misplaced _ in number");
7152 lastub = s++;
7153 }
9e24b6e2
JH
7154
7155 base = bases[shift];
7156 Base = Bases[shift];
7157 max = maxima[shift];
02aa26ce 7158
4f19785b 7159 /* read the rest of the number */
378cc40b 7160 for (;;) {
9e24b6e2 7161 /* x is used in the overflow test,
893fe2c2 7162 b is the digit we're adding on. */
9e24b6e2 7163 UV x, b;
55497cff 7164
378cc40b 7165 switch (*s) {
02aa26ce
NT
7166
7167 /* if we don't mention it, we're done */
378cc40b
LW
7168 default:
7169 goto out;
02aa26ce 7170
928753ea 7171 /* _ are ignored -- but warned about if consecutive */
de3bb511 7172 case '_':
928753ea 7173 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
9014280d 7174 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7175 "Misplaced _ in number");
7176 lastub = s++;
de3bb511 7177 break;
02aa26ce
NT
7178
7179 /* 8 and 9 are not octal */
378cc40b 7180 case '8': case '9':
4f19785b 7181 if (shift == 3)
cea2e8a9 7182 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 7183 /* FALL THROUGH */
02aa26ce
NT
7184
7185 /* octal digits */
4f19785b 7186 case '2': case '3': case '4':
378cc40b 7187 case '5': case '6': case '7':
4f19785b 7188 if (shift == 1)
cea2e8a9 7189 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
7190 /* FALL THROUGH */
7191
7192 case '0': case '1':
02aa26ce 7193 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 7194 goto digit;
02aa26ce
NT
7195
7196 /* hex digits */
378cc40b
LW
7197 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
7198 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 7199 /* make sure they said 0x */
378cc40b
LW
7200 if (shift != 4)
7201 goto out;
55497cff 7202 b = (*s++ & 7) + 9;
02aa26ce
NT
7203
7204 /* Prepare to put the digit we have onto the end
7205 of the number so far. We check for overflows.
7206 */
7207
55497cff 7208 digit:
9e24b6e2
JH
7209 if (!overflowed) {
7210 x = u << shift; /* make room for the digit */
7211
7212 if ((x >> shift) != u
7213 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
7214 overflowed = TRUE;
7215 n = (NV) u;
767a6a26 7216 if (ckWARN_d(WARN_OVERFLOW))
9014280d 7217 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
9e24b6e2
JH
7218 "Integer overflow in %s number",
7219 base);
7220 } else
7221 u = x | b; /* add the digit to the end */
7222 }
7223 if (overflowed) {
7224 n *= nvshift[shift];
7225 /* If an NV has not enough bits in its
7226 * mantissa to represent an UV this summing of
7227 * small low-order numbers is a waste of time
7228 * (because the NV cannot preserve the
7229 * low-order bits anyway): we could just
7230 * remember when did we overflow and in the
7231 * end just multiply n by the right
7232 * amount. */
7233 n += (NV) b;
55497cff 7234 }
378cc40b
LW
7235 break;
7236 }
7237 }
02aa26ce
NT
7238
7239 /* if we get here, we had success: make a scalar value from
7240 the number.
7241 */
378cc40b 7242 out:
928753ea
JH
7243
7244 /* final misplaced underbar check */
7245 if (s[-1] == '_') {
7246 if (ckWARN(WARN_SYNTAX))
9014280d 7247 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
7248 }
7249
79072805 7250 sv = NEWSV(92,0);
9e24b6e2 7251 if (overflowed) {
767a6a26 7252 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
9014280d 7253 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
9e24b6e2
JH
7254 "%s number > %s non-portable",
7255 Base, max);
7256 sv_setnv(sv, n);
7257 }
7258 else {
15041a67 7259#if UVSIZE > 4
767a6a26 7260 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
9014280d 7261 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
9e24b6e2
JH
7262 "%s number > %s non-portable",
7263 Base, max);
2cc4c2dc 7264#endif
9e24b6e2
JH
7265 sv_setuv(sv, u);
7266 }
2cc4c2dc 7267 if (PL_hints & HINT_NEW_BINARY)
b3ac6de7 7268 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
378cc40b
LW
7269 }
7270 break;
02aa26ce
NT
7271
7272 /*
7273 handle decimal numbers.
7274 we're also sent here when we read a 0 as the first digit
7275 */
378cc40b
LW
7276 case '1': case '2': case '3': case '4': case '5':
7277 case '6': case '7': case '8': case '9': case '.':
7278 decimal:
3280af22
NIS
7279 d = PL_tokenbuf;
7280 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 7281 floatit = FALSE;
02aa26ce
NT
7282
7283 /* read next group of digits and _ and copy into d */
de3bb511 7284 while (isDIGIT(*s) || *s == '_') {
4e553d73 7285 /* skip underscores, checking for misplaced ones
02aa26ce
NT
7286 if -w is on
7287 */
93a17b20 7288 if (*s == '_') {
928753ea 7289 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
9014280d 7290 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7291 "Misplaced _ in number");
7292 lastub = s++;
93a17b20 7293 }
fc36a67e 7294 else {
02aa26ce 7295 /* check for end of fixed-length buffer */
fc36a67e 7296 if (d >= e)
cea2e8a9 7297 Perl_croak(aTHX_ number_too_long);
02aa26ce 7298 /* if we're ok, copy the character */
378cc40b 7299 *d++ = *s++;
fc36a67e 7300 }
378cc40b 7301 }
02aa26ce
NT
7302
7303 /* final misplaced underbar check */
928753ea 7304 if (lastub && s == lastub + 1) {
d008e5eb 7305 if (ckWARN(WARN_SYNTAX))
9014280d 7306 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 7307 }
02aa26ce
NT
7308
7309 /* read a decimal portion if there is one. avoid
7310 3..5 being interpreted as the number 3. followed
7311 by .5
7312 */
2f3197b3 7313 if (*s == '.' && s[1] != '.') {
79072805 7314 floatit = TRUE;
378cc40b 7315 *d++ = *s++;
02aa26ce 7316
928753ea
JH
7317 if (*s == '_') {
7318 if (ckWARN(WARN_SYNTAX))
9014280d 7319 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7320 "Misplaced _ in number");
7321 lastub = s;
7322 }
7323
7324 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 7325 */
fc36a67e 7326 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 7327 /* fixed length buffer check */
fc36a67e 7328 if (d >= e)
cea2e8a9 7329 Perl_croak(aTHX_ number_too_long);
928753ea
JH
7330 if (*s == '_') {
7331 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
9014280d 7332 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7333 "Misplaced _ in number");
7334 lastub = s;
7335 }
7336 else
fc36a67e 7337 *d++ = *s;
378cc40b 7338 }
928753ea
JH
7339 /* fractional part ending in underbar? */
7340 if (s[-1] == '_') {
7341 if (ckWARN(WARN_SYNTAX))
9014280d 7342 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7343 "Misplaced _ in number");
7344 }
dd629d5b
GS
7345 if (*s == '.' && isDIGIT(s[1])) {
7346 /* oops, it's really a v-string, but without the "v" */
f4758303 7347 s = start;
dd629d5b
GS
7348 goto vstring;
7349 }
378cc40b 7350 }
02aa26ce
NT
7351
7352 /* read exponent part, if present */
7fd134d9 7353 if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) {
79072805
LW
7354 floatit = TRUE;
7355 s++;
02aa26ce
NT
7356
7357 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 7358 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 7359
7fd134d9
JH
7360 /* stray preinitial _ */
7361 if (*s == '_') {
7362 if (ckWARN(WARN_SYNTAX))
9014280d 7363 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7fd134d9
JH
7364 "Misplaced _ in number");
7365 lastub = s++;
7366 }
7367
02aa26ce 7368 /* allow positive or negative exponent */
378cc40b
LW
7369 if (*s == '+' || *s == '-')
7370 *d++ = *s++;
02aa26ce 7371
7fd134d9
JH
7372 /* stray initial _ */
7373 if (*s == '_') {
7374 if (ckWARN(WARN_SYNTAX))
9014280d 7375 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7fd134d9
JH
7376 "Misplaced _ in number");
7377 lastub = s++;
7378 }
7379
7fd134d9
JH
7380 /* read digits of exponent */
7381 while (isDIGIT(*s) || *s == '_') {
7382 if (isDIGIT(*s)) {
7383 if (d >= e)
7384 Perl_croak(aTHX_ number_too_long);
b3b48e3e 7385 *d++ = *s++;
7fd134d9
JH
7386 }
7387 else {
7388 if (ckWARN(WARN_SYNTAX) &&
7389 ((lastub && s == lastub + 1) ||
b3b48e3e 7390 (!isDIGIT(s[1]) && s[1] != '_')))
9014280d 7391 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7fd134d9 7392 "Misplaced _ in number");
b3b48e3e 7393 lastub = s++;
7fd134d9 7394 }
7fd134d9 7395 }
378cc40b 7396 }
02aa26ce 7397
02aa26ce
NT
7398
7399 /* make an sv from the string */
79072805 7400 sv = NEWSV(92,0);
097ee67d 7401
0b7fceb9 7402 /*
58bb9ec3
NC
7403 We try to do an integer conversion first if no characters
7404 indicating "float" have been found.
0b7fceb9
MU
7405 */
7406
7407 if (!floatit) {
58bb9ec3
NC
7408 UV uv;
7409 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
7410
7411 if (flags == IS_NUMBER_IN_UV) {
7412 if (uv <= IV_MAX)
86554af2 7413 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
58bb9ec3 7414 else
c239479b 7415 sv_setuv(sv, uv);
58bb9ec3
NC
7416 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
7417 if (uv <= (UV) IV_MIN)
7418 sv_setiv(sv, -(IV)uv);
7419 else
7420 floatit = TRUE;
7421 } else
7422 floatit = TRUE;
7423 }
0b7fceb9 7424 if (floatit) {
58bb9ec3
NC
7425 /* terminate the string */
7426 *d = '\0';
86554af2
JH
7427 nv = Atof(PL_tokenbuf);
7428 sv_setnv(sv, nv);
7429 }
86554af2 7430
b8403495
JH
7431 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
7432 (PL_hints & HINT_NEW_INTEGER) )
4e553d73 7433 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
b8403495
JH
7434 (floatit ? "float" : "integer"),
7435 sv, Nullsv, NULL);
378cc40b 7436 break;
0b7fceb9 7437
e312add1 7438 /* if it starts with a v, it could be a v-string */
a7cb1f99 7439 case 'v':
dd629d5b 7440vstring:
f4758303 7441 sv = NEWSV(92,5); /* preallocate storage space */
b0f01acb 7442 s = scan_vstring(s,sv);
a7cb1f99 7443 break;
79072805 7444 }
a687059c 7445
02aa26ce
NT
7446 /* make the op for the constant and return */
7447
a86a20aa 7448 if (sv)
b73d6f50 7449 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 7450 else
b73d6f50 7451 lvalp->opval = Nullop;
a687059c 7452
378cc40b
LW
7453 return s;
7454}
7455
76e3520e 7456STATIC char *
cea2e8a9 7457S_scan_formline(pTHX_ register char *s)
378cc40b 7458{
79072805 7459 register char *eol;
378cc40b 7460 register char *t;
79cb57f6 7461 SV *stuff = newSVpvn("",0);
79072805 7462 bool needargs = FALSE;
378cc40b 7463
79072805 7464 while (!needargs) {
c2e66d9e 7465 if (*s == '.' || *s == /*{*/'}') {
79072805 7466 /*SUPPRESS 530*/
51882d45 7467#ifdef PERL_STRICT_CR
bf4acbe4 7468 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
51882d45 7469#else
bf4acbe4 7470 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 7471#endif
6a65c6a0 7472 if (*t == '\n' || t == PL_bufend)
79072805
LW
7473 break;
7474 }
3280af22 7475 if (PL_in_eval && !PL_rsfp) {
93a17b20 7476 eol = strchr(s,'\n');
0f85fab0 7477 if (!eol++)
3280af22 7478 eol = PL_bufend;
0f85fab0
LW
7479 }
7480 else
3280af22 7481 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 7482 if (*s != '#') {
a0d0e21e
LW
7483 for (t = s; t < eol; t++) {
7484 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
7485 needargs = FALSE;
7486 goto enough; /* ~~ must be first line in formline */
378cc40b 7487 }
a0d0e21e
LW
7488 if (*t == '@' || *t == '^')
7489 needargs = TRUE;
378cc40b 7490 }
7121b347
MG
7491 if (eol > s) {
7492 sv_catpvn(stuff, s, eol-s);
2dc4c65b 7493#ifndef PERL_STRICT_CR
7121b347
MG
7494 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
7495 char *end = SvPVX(stuff) + SvCUR(stuff);
7496 end[-2] = '\n';
7497 end[-1] = '\0';
7498 SvCUR(stuff)--;
7499 }
2dc4c65b 7500#endif
7121b347
MG
7501 }
7502 else
7503 break;
79072805
LW
7504 }
7505 s = eol;
3280af22
NIS
7506 if (PL_rsfp) {
7507 s = filter_gets(PL_linestr, PL_rsfp, 0);
7508 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
7509 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 7510 PL_last_lop = PL_last_uni = Nullch;
79072805 7511 if (!s) {
3280af22 7512 s = PL_bufptr;
79072805 7513 yyerror("Format not terminated");
378cc40b
LW
7514 break;
7515 }
378cc40b 7516 }
463ee0b2 7517 incline(s);
79072805 7518 }
a0d0e21e
LW
7519 enough:
7520 if (SvCUR(stuff)) {
3280af22 7521 PL_expect = XTERM;
79072805 7522 if (needargs) {
3280af22
NIS
7523 PL_lex_state = LEX_NORMAL;
7524 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
7525 force_next(',');
7526 }
a0d0e21e 7527 else
3280af22
NIS
7528 PL_lex_state = LEX_FORMLINE;
7529 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 7530 force_next(THING);
3280af22 7531 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
79072805 7532 force_next(LSTOP);
378cc40b 7533 }
79072805 7534 else {
8990e307 7535 SvREFCNT_dec(stuff);
3280af22
NIS
7536 PL_lex_formbrack = 0;
7537 PL_bufptr = s;
79072805
LW
7538 }
7539 return s;
378cc40b 7540}
a687059c 7541
76e3520e 7542STATIC void
cea2e8a9 7543S_set_csh(pTHX)
a687059c 7544{
ae986130 7545#ifdef CSH
3280af22
NIS
7546 if (!PL_cshlen)
7547 PL_cshlen = strlen(PL_cshname);
ae986130 7548#endif
a687059c 7549}
463ee0b2 7550
ba6d6ac9 7551I32
864dbfa3 7552Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 7553{
3280af22
NIS
7554 I32 oldsavestack_ix = PL_savestack_ix;
7555 CV* outsidecv = PL_compcv;
8990e307 7556
3280af22
NIS
7557 if (PL_compcv) {
7558 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 7559 }
7766f137 7560 SAVEI32(PL_subline);
3280af22 7561 save_item(PL_subname);
3280af22 7562 SAVESPTR(PL_compcv);
3280af22
NIS
7563
7564 PL_compcv = (CV*)NEWSV(1104,0);
7565 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
7566 CvFLAGS(PL_compcv) |= flags;
7567
57843af0 7568 PL_subline = CopLINE(PL_curcop);
dd2155a4 7569 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
3280af22 7570 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
748a9306 7571
8990e307
LW
7572 return oldsavestack_ix;
7573}
7574
084592ab
CN
7575#ifdef __SC__
7576#pragma segment Perl_yylex
7577#endif
8990e307 7578int
864dbfa3 7579Perl_yywarn(pTHX_ char *s)
8990e307 7580{
faef0170 7581 PL_in_eval |= EVAL_WARNONLY;
748a9306 7582 yyerror(s);
faef0170 7583 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 7584 return 0;
8990e307
LW
7585}
7586
7587int
864dbfa3 7588Perl_yyerror(pTHX_ char *s)
463ee0b2 7589{
68dc0745 7590 char *where = NULL;
7591 char *context = NULL;
7592 int contlen = -1;
46fc3d4c 7593 SV *msg;
463ee0b2 7594
3280af22 7595 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 7596 where = "at EOF";
3280af22
NIS
7597 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 &&
7598 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) {
f355267c
JH
7599 /*
7600 Only for NetWare:
7601 The code below is removed for NetWare because it abends/crashes on NetWare
7602 when the script has error such as not having the closing quotes like:
7603 if ($var eq "value)
7604 Checking of white spaces is anyway done in NetWare code.
7605 */
7606#ifndef NETWARE
3280af22
NIS
7607 while (isSPACE(*PL_oldoldbufptr))
7608 PL_oldoldbufptr++;
f355267c 7609#endif
3280af22
NIS
7610 context = PL_oldoldbufptr;
7611 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 7612 }
3280af22
NIS
7613 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 &&
7614 PL_oldbufptr != PL_bufptr) {
f355267c
JH
7615 /*
7616 Only for NetWare:
7617 The code below is removed for NetWare because it abends/crashes on NetWare
7618 when the script has error such as not having the closing quotes like:
7619 if ($var eq "value)
7620 Checking of white spaces is anyway done in NetWare code.
7621 */
7622#ifndef NETWARE
3280af22
NIS
7623 while (isSPACE(*PL_oldbufptr))
7624 PL_oldbufptr++;
f355267c 7625#endif
3280af22
NIS
7626 context = PL_oldbufptr;
7627 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
7628 }
7629 else if (yychar > 255)
68dc0745 7630 where = "next token ???";
cdfb297e
GS
7631#ifdef USE_PURE_BISON
7632/* GNU Bison sets the value -2 */
7633 else if (yychar == -2) {
7634#else
463ee0b2 7635 else if ((yychar & 127) == 127) {
cdfb297e 7636#endif
3280af22
NIS
7637 if (PL_lex_state == LEX_NORMAL ||
7638 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 7639 where = "at end of line";
3280af22 7640 else if (PL_lex_inpat)
68dc0745 7641 where = "within pattern";
463ee0b2 7642 else
68dc0745 7643 where = "within string";
463ee0b2 7644 }
46fc3d4c 7645 else {
79cb57f6 7646 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
46fc3d4c 7647 if (yychar < 32)
cea2e8a9 7648 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
46fc3d4c 7649 else if (isPRINT_LC(yychar))
cea2e8a9 7650 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
463ee0b2 7651 else
cea2e8a9 7652 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
46fc3d4c 7653 where = SvPVX(where_sv);
463ee0b2 7654 }
46fc3d4c 7655 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 7656 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 7657 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 7658 if (context)
cea2e8a9 7659 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 7660 else
cea2e8a9 7661 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 7662 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 7663 Perl_sv_catpvf(aTHX_ msg,
57def98f 7664 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 7665 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 7666 PL_multi_end = 0;
a0d0e21e 7667 }
faef0170 7668 if (PL_in_eval & EVAL_WARNONLY)
894356b3 7669 Perl_warn(aTHX_ "%"SVf, msg);
463ee0b2 7670 else
5a844595 7671 qerror(msg);
c7d6bfb2
GS
7672 if (PL_error_count >= 10) {
7673 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 7674 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
248c2a4d 7675 ERRSV, OutCopFILE(PL_curcop));
c7d6bfb2
GS
7676 else
7677 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 7678 OutCopFILE(PL_curcop));
c7d6bfb2 7679 }
3280af22
NIS
7680 PL_in_my = 0;
7681 PL_in_my_stash = Nullhv;
463ee0b2
LW
7682 return 0;
7683}
084592ab
CN
7684#ifdef __SC__
7685#pragma segment Main
7686#endif
4e35701f 7687
b250498f 7688STATIC char*
3ae08724 7689S_swallow_bom(pTHX_ U8 *s)
01ec43d0 7690{
b250498f
GS
7691 STRLEN slen;
7692 slen = SvCUR(PL_linestr);
7693 switch (*s) {
4e553d73
NIS
7694 case 0xFF:
7695 if (s[1] == 0xFE) {
01ec43d0 7696 /* UTF-16 little-endian */
3ae08724 7697 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
01ec43d0
GS
7698 Perl_croak(aTHX_ "Unsupported script encoding");
7699#ifndef PERL_NO_UTF16_FILTER
dea0fc0b 7700 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
3ae08724 7701 s += 2;
dea0fc0b
JH
7702 if (PL_bufend > (char*)s) {
7703 U8 *news;
7704 I32 newlen;
7705
7706 filter_add(utf16rev_textfilter, NULL);
7707 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
f72f5f89
JH
7708 PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
7709 PL_bufend - (char*)s - 1,
dea0fc0b
JH
7710 &newlen);
7711 Copy(news, s, newlen, U8);
7712 SvCUR_set(PL_linestr, newlen);
7713 PL_bufend = SvPVX(PL_linestr) + newlen;
7714 news[newlen++] = '\0';
7715 Safefree(news);
7716 }
b250498f 7717#else
01ec43d0 7718 Perl_croak(aTHX_ "Unsupported script encoding");
b250498f 7719#endif
01ec43d0
GS
7720 }
7721 break;
78ae23f5 7722 case 0xFE:
3ae08724 7723 if (s[1] == 0xFF) { /* UTF-16 big-endian */
01ec43d0 7724#ifndef PERL_NO_UTF16_FILTER
dea0fc0b
JH
7725 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
7726 s += 2;
7727 if (PL_bufend > (char *)s) {
7728 U8 *news;
7729 I32 newlen;
7730
7731 filter_add(utf16_textfilter, NULL);
7732 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
7733 PL_bufend = (char*)utf16_to_utf8(s, news,
7734 PL_bufend - (char*)s,
7735 &newlen);
7736 Copy(news, s, newlen, U8);
7737 SvCUR_set(PL_linestr, newlen);
7738 PL_bufend = SvPVX(PL_linestr) + newlen;
7739 news[newlen++] = '\0';
7740 Safefree(news);
7741 }
b250498f 7742#else
01ec43d0 7743 Perl_croak(aTHX_ "Unsupported script encoding");
b250498f 7744#endif
01ec43d0
GS
7745 }
7746 break;
3ae08724
GS
7747 case 0xEF:
7748 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
dea0fc0b 7749 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
01ec43d0
GS
7750 s += 3; /* UTF-8 */
7751 }
7752 break;
7753 case 0:
7754 if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */
3ae08724 7755 s[2] == 0xFE && s[3] == 0xFF)
01ec43d0
GS
7756 {
7757 Perl_croak(aTHX_ "Unsupported script encoding");
7758 }
7759 }
b8f84bb2 7760 return (char*)s;
b250498f 7761}
4755096e 7762
4755096e
GS
7763/*
7764 * restore_rsfp
7765 * Restore a source filter.
7766 */
7767
7768static void
acfe0abc 7769restore_rsfp(pTHX_ void *f)
4755096e
GS
7770{
7771 PerlIO *fp = (PerlIO*)f;
7772
7773 if (PL_rsfp == PerlIO_stdin())
7774 PerlIO_clearerr(PL_rsfp);
7775 else if (PL_rsfp && (PL_rsfp != fp))
7776 PerlIO_close(PL_rsfp);
7777 PL_rsfp = fp;
7778}
6e3aabd6
GS
7779
7780#ifndef PERL_NO_UTF16_FILTER
7781static I32
acfe0abc 7782utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6
GS
7783{
7784 I32 count = FILTER_READ(idx+1, sv, maxlen);
7785 if (count) {
7786 U8* tmps;
7787 U8* tend;
dea0fc0b 7788 I32 newlen;
6e3aabd6 7789 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
f72f5f89
JH
7790 if (!*SvPV_nolen(sv))
7791 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7792 return count;
4e553d73 7793
dea0fc0b 7794 tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
6e3aabd6
GS
7795 sv_usepvn(sv, (char*)tmps, tend - tmps);
7796 }
7797 return count;
7798}
7799
7800static I32
acfe0abc 7801utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6
GS
7802{
7803 I32 count = FILTER_READ(idx+1, sv, maxlen);
7804 if (count) {
7805 U8* tmps;
7806 U8* tend;
dea0fc0b 7807 I32 newlen;
f72f5f89
JH
7808 if (!*SvPV_nolen(sv))
7809 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7810 return count;
7811
6e3aabd6 7812 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
dea0fc0b 7813 tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
6e3aabd6
GS
7814 sv_usepvn(sv, (char*)tmps, tend - tmps);
7815 }
7816 return count;
7817}
7818#endif
9f4817db 7819