This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
NetWare change from Ananth Kesari.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
be3c0a43 3 * Copyright (c) 1991-2002, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
acfe0abc 30static void restore_rsfp(pTHX_ void *f);
6e3aabd6 31#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
32static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
39e02b42
JH
39#ifdef USE_UTF8_SCRIPTS
40# define UTF (!IN_BYTES)
2b9d42f0 41#else
39e02b42
JH
42# ifdef EBCDIC /* For now 'use utf8' does not affect tokenizer on EBCDIC */
43# define UTF (PL_linestr && DO_UTF8(PL_linestr))
44# else
45# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
46# endif
2b9d42f0 47#endif
a0ed51b3 48
61f0cdd9 49/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
50 * 1999-02-27 mjd-perl-patch@plover.com */
51#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
52
bf4acbe4
GS
53/* On MacOS, respect nonbreaking spaces */
54#ifdef MACOS_TRADITIONAL
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
56#else
57#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
58#endif
59
ffb4593c
NT
60/* LEX_* are values for PL_lex_state, the state of the lexer.
61 * They are arranged oddly so that the guard on the switch statement
79072805
LW
62 * can get by with a single comparison (if the compiler is smart enough).
63 */
64
fb73857a 65/* #define LEX_NOTPARSING 11 is done in perl.h. */
66
55497cff 67#define LEX_NORMAL 10
68#define LEX_INTERPNORMAL 9
69#define LEX_INTERPCASEMOD 8
70#define LEX_INTERPPUSH 7
71#define LEX_INTERPSTART 6
72#define LEX_INTERPEND 5
73#define LEX_INTERPENDMAYBE 4
74#define LEX_INTERPCONCAT 3
75#define LEX_INTERPCONST 2
76#define LEX_FORMLINE 1
77#define LEX_KNOWNEXT 0
79072805 78
79072805
LW
79#ifdef ff_next
80#undef ff_next
d48672a2
LW
81#endif
82
a1a0e61e 83#ifdef USE_PURE_BISON
dba4d153
JH
84# ifndef YYMAXLEVEL
85# define YYMAXLEVEL 100
86# endif
20141f0e
IRC
87YYSTYPE* yylval_pointer[YYMAXLEVEL];
88int* yychar_pointer[YYMAXLEVEL];
6f202aea 89int yyactlevel = -1;
22c35a8c
GS
90# undef yylval
91# undef yychar
20141f0e
IRC
92# define yylval (*yylval_pointer[yyactlevel])
93# define yychar (*yychar_pointer[yyactlevel])
94# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 95# undef yylex
dba4d153 96# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
97#endif
98
79072805 99#include "keywords.h"
fe14fcc3 100
ffb4593c
NT
101/* CLINE is a macro that ensures PL_copline has a sane value */
102
ae986130
LW
103#ifdef CLINE
104#undef CLINE
105#endif
57843af0 106#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 107
ffb4593c
NT
108/*
109 * Convenience functions to return different tokens and prime the
9cbb5ea2 110 * lexer for the next token. They all take an argument.
ffb4593c
NT
111 *
112 * TOKEN : generic token (used for '(', DOLSHARP, etc)
113 * OPERATOR : generic operator
114 * AOPERATOR : assignment operator
115 * PREBLOCK : beginning the block after an if, while, foreach, ...
116 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
117 * PREREF : *EXPR where EXPR is not a simple identifier
118 * TERM : expression term
119 * LOOPX : loop exiting command (goto, last, dump, etc)
120 * FTST : file test operator
121 * FUN0 : zero-argument function
2d2e263d 122 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
123 * BOop : bitwise or or xor
124 * BAop : bitwise and
125 * SHop : shift operator
126 * PWop : power operator
9cbb5ea2 127 * PMop : pattern-matching operator
ffb4593c
NT
128 * Aop : addition-level operator
129 * Mop : multiplication-level operator
130 * Eop : equality-testing operator
e5edeb50 131 * Rop : relational operator <= != gt
ffb4593c
NT
132 *
133 * Also see LOP and lop() below.
134 */
135
075953c3
JH
136/* Note that REPORT() and REPORT2() will be expressions that supply
137 * their own trailing comma, not suitable for statements as such. */
998054bd 138#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
139# define REPORT(x,retval) tokereport(x,s,(int)retval),
140# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 141#else
075953c3
JH
142# define REPORT(x,retval)
143# define REPORT2(x,retval)
998054bd
SC
144#endif
145
075953c3
JH
146#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
147#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
148#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
149#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
150#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
151#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
152#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
153#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
154#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
155#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
156#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
157#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
158#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
159#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
160#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
161#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
162#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
163#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
164#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
165#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 166
a687059c
LW
167/* This bit of chicanery makes a unary function followed by
168 * a parenthesis into a function with one argument, highest precedence.
169 */
2f3197b3 170#define UNI(f) return(yylval.ival = f, \
075953c3 171 REPORT("uni",f) \
3280af22
NIS
172 PL_expect = XTERM, \
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
175 PL_last_lop_op = f, \
a687059c
LW
176 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
177
79072805 178#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 179 REPORT("uni",f) \
3280af22
NIS
180 PL_bufptr = s, \
181 PL_last_uni = PL_oldbufptr, \
79072805
LW
182 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
183
9f68db38 184/* grandfather return to old style */
3280af22 185#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 186
8fa7f367
JH
187#ifdef DEBUGGING
188
2d00ba3b 189STATIC void
61b2116b 190S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 191{
998054bd 192 DEBUG_T({
9c5ffd7c 193 SV* report = newSVpv(thing, 0);
29b291f7
RB
194 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
195 (IV)rv);
998054bd
SC
196
197 if (s - PL_bufptr > 0)
198 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
199 else {
200 if (PL_oldbufptr && *PL_oldbufptr)
201 sv_catpv(report, PL_tokenbuf);
202 }
203 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
5f80b19c 204 });
998054bd
SC
205}
206
8fa7f367
JH
207#endif
208
ffb4593c
NT
209/*
210 * S_ao
211 *
212 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
213 * into an OP_ANDASSIGN or OP_ORASSIGN
214 */
215
76e3520e 216STATIC int
cea2e8a9 217S_ao(pTHX_ int toketype)
a0d0e21e 218{
3280af22
NIS
219 if (*PL_bufptr == '=') {
220 PL_bufptr++;
a0d0e21e
LW
221 if (toketype == ANDAND)
222 yylval.ival = OP_ANDASSIGN;
223 else if (toketype == OROR)
224 yylval.ival = OP_ORASSIGN;
225 toketype = ASSIGNOP;
226 }
227 return toketype;
228}
229
ffb4593c
NT
230/*
231 * S_no_op
232 * When Perl expects an operator and finds something else, no_op
233 * prints the warning. It always prints "<something> found where
234 * operator expected. It prints "Missing semicolon on previous line?"
235 * if the surprise occurs at the start of the line. "do you need to
236 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
237 * where the compiler doesn't know if foo is a method call or a function.
238 * It prints "Missing operator before end of line" if there's nothing
239 * after the missing operator, or "... before <...>" if there is something
240 * after the missing operator.
241 */
242
76e3520e 243STATIC void
cea2e8a9 244S_no_op(pTHX_ char *what, char *s)
463ee0b2 245{
3280af22
NIS
246 char *oldbp = PL_bufptr;
247 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 248
1189a94a
GS
249 if (!s)
250 s = oldbp;
07c798fb 251 else
1189a94a 252 PL_bufptr = s;
cea2e8a9 253 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 254 if (is_first)
cea2e8a9 255 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 256 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 257 char *t;
7e2040f0 258 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 259 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 260 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 261 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 262 }
07c798fb
HS
263 else {
264 assert(s >= oldbp);
cea2e8a9 265 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 266 }
3280af22 267 PL_bufptr = oldbp;
8990e307
LW
268}
269
ffb4593c
NT
270/*
271 * S_missingterm
272 * Complain about missing quote/regexp/heredoc terminator.
273 * If it's called with (char *)NULL then it cauterizes the line buffer.
274 * If we're in a delimited string and the delimiter is a control
275 * character, it's reformatted into a two-char sequence like ^C.
276 * This is fatal.
277 */
278
76e3520e 279STATIC void
cea2e8a9 280S_missingterm(pTHX_ char *s)
8990e307
LW
281{
282 char tmpbuf[3];
283 char q;
284 if (s) {
285 char *nl = strrchr(s,'\n');
d2719217 286 if (nl)
8990e307
LW
287 *nl = '\0';
288 }
9d116dd7
JH
289 else if (
290#ifdef EBCDIC
291 iscntrl(PL_multi_close)
292#else
293 PL_multi_close < 32 || PL_multi_close == 127
294#endif
295 ) {
8990e307 296 *tmpbuf = '^';
3280af22 297 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
298 s = "\\n";
299 tmpbuf[2] = '\0';
300 s = tmpbuf;
301 }
302 else {
3280af22 303 *tmpbuf = PL_multi_close;
8990e307
LW
304 tmpbuf[1] = '\0';
305 s = tmpbuf;
306 }
307 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 308 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 309}
79072805 310
ffb4593c
NT
311/*
312 * Perl_deprecate
ffb4593c
NT
313 */
314
79072805 315void
864dbfa3 316Perl_deprecate(pTHX_ char *s)
a0d0e21e 317{
599cee73 318 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 319 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
320}
321
ffb4593c
NT
322/*
323 * depcom
9cbb5ea2 324 * Deprecate a comma-less variable list.
ffb4593c
NT
325 */
326
76e3520e 327STATIC void
cea2e8a9 328S_depcom(pTHX)
a0d0e21e
LW
329{
330 deprecate("comma-less variable list");
331}
332
ffb4593c 333/*
9cbb5ea2
GS
334 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
335 * utf16-to-utf8-reversed.
ffb4593c
NT
336 */
337
c39cd008
GS
338#ifdef PERL_CR_FILTER
339static void
340strip_return(SV *sv)
341{
342 register char *s = SvPVX(sv);
343 register char *e = s + SvCUR(sv);
344 /* outer loop optimized to do nothing if there are no CR-LFs */
345 while (s < e) {
346 if (*s++ == '\r' && *s == '\n') {
347 /* hit a CR-LF, need to copy the rest */
348 register char *d = s - 1;
349 *d++ = *s++;
350 while (s < e) {
351 if (*s == '\r' && s[1] == '\n')
352 s++;
353 *d++ = *s++;
354 }
355 SvCUR(sv) -= s - d;
356 return;
357 }
358 }
359}
a868473f 360
76e3520e 361STATIC I32
c39cd008 362S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 363{
c39cd008
GS
364 I32 count = FILTER_READ(idx+1, sv, maxlen);
365 if (count > 0 && !maxlen)
366 strip_return(sv);
367 return count;
a868473f
NIS
368}
369#endif
370
ffb4593c
NT
371/*
372 * Perl_lex_start
9cbb5ea2
GS
373 * Initialize variables. Uses the Perl save_stack to save its state (for
374 * recursive calls to the parser).
ffb4593c
NT
375 */
376
a0d0e21e 377void
864dbfa3 378Perl_lex_start(pTHX_ SV *line)
79072805 379{
8990e307
LW
380 char *s;
381 STRLEN len;
382
3280af22
NIS
383 SAVEI32(PL_lex_dojoin);
384 SAVEI32(PL_lex_brackets);
3280af22
NIS
385 SAVEI32(PL_lex_casemods);
386 SAVEI32(PL_lex_starts);
387 SAVEI32(PL_lex_state);
7766f137 388 SAVEVPTR(PL_lex_inpat);
3280af22 389 SAVEI32(PL_lex_inwhat);
18b09519
GS
390 if (PL_lex_state == LEX_KNOWNEXT) {
391 I32 toke = PL_nexttoke;
392 while (--toke >= 0) {
393 SAVEI32(PL_nexttype[toke]);
394 SAVEVPTR(PL_nextval[toke]);
395 }
396 SAVEI32(PL_nexttoke);
18b09519 397 }
57843af0 398 SAVECOPLINE(PL_curcop);
3280af22
NIS
399 SAVEPPTR(PL_bufptr);
400 SAVEPPTR(PL_bufend);
401 SAVEPPTR(PL_oldbufptr);
402 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
403 SAVEPPTR(PL_last_lop);
404 SAVEPPTR(PL_last_uni);
3280af22
NIS
405 SAVEPPTR(PL_linestart);
406 SAVESPTR(PL_linestr);
407 SAVEPPTR(PL_lex_brackstack);
408 SAVEPPTR(PL_lex_casestack);
c76ac1ee 409 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
410 SAVESPTR(PL_lex_stuff);
411 SAVEI32(PL_lex_defer);
09bef843 412 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 413 SAVESPTR(PL_lex_repl);
bebdddfc
GS
414 SAVEINT(PL_expect);
415 SAVEINT(PL_lex_expect);
3280af22
NIS
416
417 PL_lex_state = LEX_NORMAL;
418 PL_lex_defer = 0;
419 PL_expect = XSTATE;
420 PL_lex_brackets = 0;
3280af22
NIS
421 New(899, PL_lex_brackstack, 120, char);
422 New(899, PL_lex_casestack, 12, char);
423 SAVEFREEPV(PL_lex_brackstack);
424 SAVEFREEPV(PL_lex_casestack);
425 PL_lex_casemods = 0;
426 *PL_lex_casestack = '\0';
427 PL_lex_dojoin = 0;
428 PL_lex_starts = 0;
429 PL_lex_stuff = Nullsv;
430 PL_lex_repl = Nullsv;
431 PL_lex_inpat = 0;
76be56bc 432 PL_nexttoke = 0;
3280af22 433 PL_lex_inwhat = 0;
09bef843 434 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
435 PL_linestr = line;
436 if (SvREADONLY(PL_linestr))
437 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
438 s = SvPV(PL_linestr, len);
8990e307 439 if (len && s[len-1] != ';') {
3280af22
NIS
440 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
441 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
442 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 443 }
3280af22
NIS
444 SvTEMP_off(PL_linestr);
445 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
446 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 447 PL_last_lop = PL_last_uni = Nullch;
3280af22 448 PL_rsfp = 0;
79072805 449}
a687059c 450
ffb4593c
NT
451/*
452 * Perl_lex_end
9cbb5ea2
GS
453 * Finalizer for lexing operations. Must be called when the parser is
454 * done with the lexer.
ffb4593c
NT
455 */
456
463ee0b2 457void
864dbfa3 458Perl_lex_end(pTHX)
463ee0b2 459{
3280af22 460 PL_doextract = FALSE;
463ee0b2
LW
461}
462
ffb4593c
NT
463/*
464 * S_incline
465 * This subroutine has nothing to do with tilting, whether at windmills
466 * or pinball tables. Its name is short for "increment line". It
57843af0 467 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 468 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
469 * # line 500 "foo.pm"
470 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
471 */
472
76e3520e 473STATIC void
cea2e8a9 474S_incline(pTHX_ char *s)
463ee0b2
LW
475{
476 char *t;
477 char *n;
73659bf1 478 char *e;
463ee0b2 479 char ch;
463ee0b2 480
57843af0 481 CopLINE_inc(PL_curcop);
463ee0b2
LW
482 if (*s++ != '#')
483 return;
bf4acbe4 484 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
485 if (strnEQ(s, "line", 4))
486 s += 4;
487 else
488 return;
084592ab 489 if (SPACE_OR_TAB(*s))
73659bf1 490 s++;
4e553d73 491 else
73659bf1 492 return;
bf4acbe4 493 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
494 if (!isDIGIT(*s))
495 return;
496 n = s;
497 while (isDIGIT(*s))
498 s++;
bf4acbe4 499 while (SPACE_OR_TAB(*s))
463ee0b2 500 s++;
73659bf1 501 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 502 s++;
73659bf1
GS
503 e = t + 1;
504 }
463ee0b2 505 else {
463ee0b2 506 for (t = s; !isSPACE(*t); t++) ;
73659bf1 507 e = t;
463ee0b2 508 }
bf4acbe4 509 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
510 e++;
511 if (*e != '\n' && *e != '\0')
512 return; /* false alarm */
513
463ee0b2
LW
514 ch = *t;
515 *t = '\0';
f4dd75d9 516 if (t - s > 0) {
05ec9bb3 517 CopFILE_free(PL_curcop);
57843af0 518 CopFILE_set(PL_curcop, s);
f4dd75d9 519 }
463ee0b2 520 *t = ch;
57843af0 521 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
522}
523
ffb4593c
NT
524/*
525 * S_skipspace
526 * Called to gobble the appropriate amount and type of whitespace.
527 * Skips comments as well.
528 */
529
76e3520e 530STATIC char *
cea2e8a9 531S_skipspace(pTHX_ register char *s)
a687059c 532{
3280af22 533 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 534 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
535 s++;
536 return s;
537 }
538 for (;;) {
fd049845 539 STRLEN prevlen;
09bef843 540 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 541 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
542 while (s < PL_bufend && isSPACE(*s)) {
543 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
544 incline(s);
545 }
ffb4593c
NT
546
547 /* comment */
3280af22
NIS
548 if (s < PL_bufend && *s == '#') {
549 while (s < PL_bufend && *s != '\n')
463ee0b2 550 s++;
60e6418e 551 if (s < PL_bufend) {
463ee0b2 552 s++;
60e6418e
GS
553 if (PL_in_eval && !PL_rsfp) {
554 incline(s);
555 continue;
556 }
557 }
463ee0b2 558 }
ffb4593c
NT
559
560 /* only continue to recharge the buffer if we're at the end
561 * of the buffer, we're not reading from a source filter, and
562 * we're in normal lexing mode
563 */
09bef843
SB
564 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
565 PL_lex_state == LEX_FORMLINE)
463ee0b2 566 return s;
ffb4593c
NT
567
568 /* try to recharge the buffer */
9cbb5ea2
GS
569 if ((s = filter_gets(PL_linestr, PL_rsfp,
570 (prevlen = SvCUR(PL_linestr)))) == Nullch)
571 {
572 /* end of file. Add on the -p or -n magic */
3280af22
NIS
573 if (PL_minus_n || PL_minus_p) {
574 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
575 ";}continue{print or die qq(-p destination: $!\\n)" :
576 "");
3280af22
NIS
577 sv_catpv(PL_linestr,";}");
578 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
579 }
580 else
3280af22 581 sv_setpv(PL_linestr,";");
ffb4593c
NT
582
583 /* reset variables for next time we lex */
9cbb5ea2
GS
584 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
585 = SvPVX(PL_linestr);
3280af22 586 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 587 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
588
589 /* Close the filehandle. Could be from -P preprocessor,
590 * STDIN, or a regular file. If we were reading code from
591 * STDIN (because the commandline held no -e or filename)
592 * then we don't close it, we reset it so the code can
593 * read from STDIN too.
594 */
595
3280af22
NIS
596 if (PL_preprocess && !PL_in_eval)
597 (void)PerlProc_pclose(PL_rsfp);
598 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
599 PerlIO_clearerr(PL_rsfp);
8990e307 600 else
3280af22
NIS
601 (void)PerlIO_close(PL_rsfp);
602 PL_rsfp = Nullfp;
463ee0b2
LW
603 return s;
604 }
ffb4593c
NT
605
606 /* not at end of file, so we only read another line */
09bef843
SB
607 /* make corresponding updates to old pointers, for yyerror() */
608 oldprevlen = PL_oldbufptr - PL_bufend;
609 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
610 if (PL_last_uni)
611 oldunilen = PL_last_uni - PL_bufend;
612 if (PL_last_lop)
613 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
614 PL_linestart = PL_bufptr = s + prevlen;
615 PL_bufend = s + SvCUR(PL_linestr);
616 s = PL_bufptr;
09bef843
SB
617 PL_oldbufptr = s + oldprevlen;
618 PL_oldoldbufptr = s + oldoldprevlen;
619 if (PL_last_uni)
620 PL_last_uni = s + oldunilen;
621 if (PL_last_lop)
622 PL_last_lop = s + oldloplen;
a0d0e21e 623 incline(s);
ffb4593c
NT
624
625 /* debugger active and we're not compiling the debugger code,
626 * so store the line into the debugger's array of lines
627 */
3280af22 628 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
629 SV *sv = NEWSV(85,0);
630
631 sv_upgrade(sv, SVt_PVMG);
3280af22 632 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
633 (void)SvIOK_on(sv);
634 SvIVX(sv) = 0;
57843af0 635 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 636 }
463ee0b2 637 }
a687059c 638}
378cc40b 639
ffb4593c
NT
640/*
641 * S_check_uni
642 * Check the unary operators to ensure there's no ambiguity in how they're
643 * used. An ambiguous piece of code would be:
644 * rand + 5
645 * This doesn't mean rand() + 5. Because rand() is a unary operator,
646 * the +5 is its argument.
647 */
648
76e3520e 649STATIC void
cea2e8a9 650S_check_uni(pTHX)
ba106d47 651{
2f3197b3 652 char *s;
a0d0e21e 653 char *t;
2f3197b3 654
3280af22 655 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 656 return;
3280af22
NIS
657 while (isSPACE(*PL_last_uni))
658 PL_last_uni++;
7e2040f0 659 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 660 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 661 return;
0453d815 662 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 663 char ch = *s;
0453d815 664 *s = '\0';
4e553d73
NIS
665 Perl_warner(aTHX_ WARN_AMBIGUOUS,
666 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
667 PL_last_uni);
668 *s = ch;
669 }
2f3197b3
LW
670}
671
ffb4593c
NT
672/* workaround to replace the UNI() macro with a function. Only the
673 * hints/uts.sh file mentions this. Other comments elsewhere in the
674 * source indicate Microport Unix might need it too.
675 */
676
ffed7fef
LW
677#ifdef CRIPPLED_CC
678
679#undef UNI
ffed7fef 680#define UNI(f) return uni(f,s)
ffed7fef 681
76e3520e 682STATIC int
cea2e8a9 683S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
684{
685 yylval.ival = f;
3280af22
NIS
686 PL_expect = XTERM;
687 PL_bufptr = s;
8f872242
NIS
688 PL_last_uni = PL_oldbufptr;
689 PL_last_lop_op = f;
ffed7fef
LW
690 if (*s == '(')
691 return FUNC1;
692 s = skipspace(s);
693 if (*s == '(')
694 return FUNC1;
695 else
696 return UNIOP;
697}
698
a0d0e21e
LW
699#endif /* CRIPPLED_CC */
700
ffb4593c
NT
701/*
702 * LOP : macro to build a list operator. Its behaviour has been replaced
703 * with a subroutine, S_lop() for which LOP is just another name.
704 */
705
a0d0e21e
LW
706#define LOP(f,x) return lop(f,x,s)
707
ffb4593c
NT
708/*
709 * S_lop
710 * Build a list operator (or something that might be one). The rules:
711 * - if we have a next token, then it's a list operator [why?]
712 * - if the next thing is an opening paren, then it's a function
713 * - else it's a list operator
714 */
715
76e3520e 716STATIC I32
a0be28da 717S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 718{
79072805 719 yylval.ival = f;
35c8bce7 720 CLINE;
075953c3 721 REPORT("lop", f)
3280af22
NIS
722 PL_expect = x;
723 PL_bufptr = s;
724 PL_last_lop = PL_oldbufptr;
725 PL_last_lop_op = f;
726 if (PL_nexttoke)
a0d0e21e 727 return LSTOP;
79072805
LW
728 if (*s == '(')
729 return FUNC;
730 s = skipspace(s);
731 if (*s == '(')
732 return FUNC;
733 else
734 return LSTOP;
735}
736
ffb4593c
NT
737/*
738 * S_force_next
9cbb5ea2 739 * When the lexer realizes it knows the next token (for instance,
ffb4593c 740 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
741 * to know what token to return the next time the lexer is called. Caller
742 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
743 * handles the token correctly.
ffb4593c
NT
744 */
745
4e553d73 746STATIC void
cea2e8a9 747S_force_next(pTHX_ I32 type)
79072805 748{
3280af22
NIS
749 PL_nexttype[PL_nexttoke] = type;
750 PL_nexttoke++;
751 if (PL_lex_state != LEX_KNOWNEXT) {
752 PL_lex_defer = PL_lex_state;
753 PL_lex_expect = PL_expect;
754 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
755 }
756}
757
ffb4593c
NT
758/*
759 * S_force_word
760 * When the lexer knows the next thing is a word (for instance, it has
761 * just seen -> and it knows that the next char is a word char, then
762 * it calls S_force_word to stick the next word into the PL_next lookahead.
763 *
764 * Arguments:
b1b65b59 765 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
766 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
767 * int check_keyword : if true, Perl checks to make sure the word isn't
768 * a keyword (do this if the word is a label, e.g. goto FOO)
769 * int allow_pack : if true, : characters will also be allowed (require,
770 * use, etc. do this)
9cbb5ea2 771 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
772 */
773
76e3520e 774STATIC char *
cea2e8a9 775S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 776{
463ee0b2
LW
777 register char *s;
778 STRLEN len;
4e553d73 779
463ee0b2
LW
780 start = skipspace(start);
781 s = start;
7e2040f0 782 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 783 (allow_pack && *s == ':') ||
15f0808c 784 (allow_initial_tick && *s == '\'') )
a0d0e21e 785 {
3280af22
NIS
786 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
787 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
788 return start;
789 if (token == METHOD) {
790 s = skipspace(s);
791 if (*s == '(')
3280af22 792 PL_expect = XTERM;
463ee0b2 793 else {
3280af22 794 PL_expect = XOPERATOR;
463ee0b2 795 }
79072805 796 }
3280af22
NIS
797 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
798 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
799 force_next(token);
800 }
801 return s;
802}
803
ffb4593c
NT
804/*
805 * S_force_ident
9cbb5ea2 806 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
807 * text only contains the "foo" portion. The first argument is a pointer
808 * to the "foo", and the second argument is the type symbol to prefix.
809 * Forces the next token to be a "WORD".
9cbb5ea2 810 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
811 */
812
76e3520e 813STATIC void
cea2e8a9 814S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
815{
816 if (s && *s) {
11343788 817 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 818 PL_nextval[PL_nexttoke].opval = o;
79072805 819 force_next(WORD);
748a9306 820 if (kind) {
11343788 821 o->op_private = OPpCONST_ENTERED;
55497cff 822 /* XXX see note in pp_entereval() for why we forgo typo
823 warnings if the symbol must be introduced in an eval.
824 GSAR 96-10-12 */
3280af22 825 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
826 kind == '$' ? SVt_PV :
827 kind == '@' ? SVt_PVAV :
828 kind == '%' ? SVt_PVHV :
829 SVt_PVGV
830 );
748a9306 831 }
79072805
LW
832 }
833}
834
1571675a
GS
835NV
836Perl_str_to_version(pTHX_ SV *sv)
837{
838 NV retval = 0.0;
839 NV nshift = 1.0;
840 STRLEN len;
841 char *start = SvPVx(sv,len);
3aa33fe5 842 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
843 char *end = start + len;
844 while (start < end) {
ba210ebe 845 STRLEN skip;
1571675a
GS
846 UV n;
847 if (utf)
9041c2e3 848 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
849 else {
850 n = *(U8*)start;
851 skip = 1;
852 }
853 retval += ((NV)n)/nshift;
854 start += skip;
855 nshift *= 1000;
856 }
857 return retval;
858}
859
4e553d73 860/*
ffb4593c
NT
861 * S_force_version
862 * Forces the next token to be a version number.
e759cc13
RGS
863 * If the next token appears to be an invalid version number, (e.g. "v2b"),
864 * and if "guessing" is TRUE, then no new token is created (and the caller
865 * must use an alternative parsing method).
ffb4593c
NT
866 */
867
76e3520e 868STATIC char *
e759cc13 869S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 870{
871 OP *version = Nullop;
44dcb63b 872 char *d;
89bfa8cd 873
874 s = skipspace(s);
875
44dcb63b 876 d = s;
dd629d5b 877 if (*d == 'v')
44dcb63b 878 d++;
44dcb63b 879 if (isDIGIT(*d)) {
e759cc13
RGS
880 while (isDIGIT(*d) || *d == '_' || *d == '.')
881 d++;
9f3d182e 882 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 883 SV *ver;
b73d6f50 884 s = scan_num(s, &yylval);
89bfa8cd 885 version = yylval.opval;
dd629d5b
GS
886 ver = cSVOPx(version)->op_sv;
887 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 888 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
889 SvNVX(ver) = str_to_version(ver);
890 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 891 }
89bfa8cd 892 }
e759cc13
RGS
893 else if (guessing)
894 return s;
89bfa8cd 895 }
896
897 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 898 PL_nextval[PL_nexttoke].opval = version;
4e553d73 899 force_next(WORD);
89bfa8cd 900
e759cc13 901 return s;
89bfa8cd 902}
903
ffb4593c
NT
904/*
905 * S_tokeq
906 * Tokenize a quoted string passed in as an SV. It finds the next
907 * chunk, up to end of string or a backslash. It may make a new
908 * SV containing that chunk (if HINT_NEW_STRING is on). It also
909 * turns \\ into \.
910 */
911
76e3520e 912STATIC SV *
cea2e8a9 913S_tokeq(pTHX_ SV *sv)
79072805
LW
914{
915 register char *s;
916 register char *send;
917 register char *d;
b3ac6de7
IZ
918 STRLEN len = 0;
919 SV *pv = sv;
79072805
LW
920
921 if (!SvLEN(sv))
b3ac6de7 922 goto finish;
79072805 923
a0d0e21e 924 s = SvPV_force(sv, len);
21a311ee 925 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 926 goto finish;
463ee0b2 927 send = s + len;
79072805
LW
928 while (s < send && *s != '\\')
929 s++;
930 if (s == send)
b3ac6de7 931 goto finish;
79072805 932 d = s;
be4731d2 933 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 934 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
935 if (SvUTF8(sv))
936 SvUTF8_on(pv);
937 }
79072805
LW
938 while (s < send) {
939 if (*s == '\\') {
a0d0e21e 940 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
941 s++; /* all that, just for this */
942 }
943 *d++ = *s++;
944 }
945 *d = '\0';
463ee0b2 946 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 947 finish:
3280af22 948 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 949 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
950 return sv;
951}
952
ffb4593c
NT
953/*
954 * Now come three functions related to double-quote context,
955 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
956 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
957 * interact with PL_lex_state, and create fake ( ... ) argument lists
958 * to handle functions and concatenation.
959 * They assume that whoever calls them will be setting up a fake
960 * join call, because each subthing puts a ',' after it. This lets
961 * "lower \luPpEr"
962 * become
963 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
964 *
965 * (I'm not sure whether the spurious commas at the end of lcfirst's
966 * arguments and join's arguments are created or not).
967 */
968
969/*
970 * S_sublex_start
971 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
972 *
973 * Pattern matching will set PL_lex_op to the pattern-matching op to
974 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
975 *
976 * OP_CONST and OP_READLINE are easy--just make the new op and return.
977 *
978 * Everything else becomes a FUNC.
979 *
980 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
981 * had an OP_CONST or OP_READLINE). This just sets us up for a
982 * call to S_sublex_push().
983 */
984
76e3520e 985STATIC I32
cea2e8a9 986S_sublex_start(pTHX)
79072805
LW
987{
988 register I32 op_type = yylval.ival;
79072805
LW
989
990 if (op_type == OP_NULL) {
3280af22
NIS
991 yylval.opval = PL_lex_op;
992 PL_lex_op = Nullop;
79072805
LW
993 return THING;
994 }
995 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 996 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
997
998 if (SvTYPE(sv) == SVt_PVIV) {
999 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1000 STRLEN len;
1001 char *p;
1002 SV *nsv;
1003
1004 p = SvPV(sv, len);
79cb57f6 1005 nsv = newSVpvn(p, len);
01ec43d0
GS
1006 if (SvUTF8(sv))
1007 SvUTF8_on(nsv);
b3ac6de7
IZ
1008 SvREFCNT_dec(sv);
1009 sv = nsv;
4e553d73 1010 }
b3ac6de7 1011 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1012 PL_lex_stuff = Nullsv;
79072805
LW
1013 return THING;
1014 }
1015
3280af22
NIS
1016 PL_sublex_info.super_state = PL_lex_state;
1017 PL_sublex_info.sub_inwhat = op_type;
1018 PL_sublex_info.sub_op = PL_lex_op;
1019 PL_lex_state = LEX_INTERPPUSH;
55497cff 1020
3280af22
NIS
1021 PL_expect = XTERM;
1022 if (PL_lex_op) {
1023 yylval.opval = PL_lex_op;
1024 PL_lex_op = Nullop;
55497cff 1025 return PMFUNC;
1026 }
1027 else
1028 return FUNC;
1029}
1030
ffb4593c
NT
1031/*
1032 * S_sublex_push
1033 * Create a new scope to save the lexing state. The scope will be
1034 * ended in S_sublex_done. Returns a '(', starting the function arguments
1035 * to the uc, lc, etc. found before.
1036 * Sets PL_lex_state to LEX_INTERPCONCAT.
1037 */
1038
76e3520e 1039STATIC I32
cea2e8a9 1040S_sublex_push(pTHX)
55497cff 1041{
f46d017c 1042 ENTER;
55497cff 1043
3280af22
NIS
1044 PL_lex_state = PL_sublex_info.super_state;
1045 SAVEI32(PL_lex_dojoin);
1046 SAVEI32(PL_lex_brackets);
3280af22
NIS
1047 SAVEI32(PL_lex_casemods);
1048 SAVEI32(PL_lex_starts);
1049 SAVEI32(PL_lex_state);
7766f137 1050 SAVEVPTR(PL_lex_inpat);
3280af22 1051 SAVEI32(PL_lex_inwhat);
57843af0 1052 SAVECOPLINE(PL_curcop);
3280af22 1053 SAVEPPTR(PL_bufptr);
8452ff4b 1054 SAVEPPTR(PL_bufend);
3280af22
NIS
1055 SAVEPPTR(PL_oldbufptr);
1056 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1057 SAVEPPTR(PL_last_lop);
1058 SAVEPPTR(PL_last_uni);
3280af22
NIS
1059 SAVEPPTR(PL_linestart);
1060 SAVESPTR(PL_linestr);
1061 SAVEPPTR(PL_lex_brackstack);
1062 SAVEPPTR(PL_lex_casestack);
1063
1064 PL_linestr = PL_lex_stuff;
1065 PL_lex_stuff = Nullsv;
1066
9cbb5ea2
GS
1067 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1068 = SvPVX(PL_linestr);
3280af22 1069 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1070 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1071 SAVEFREESV(PL_linestr);
1072
1073 PL_lex_dojoin = FALSE;
1074 PL_lex_brackets = 0;
3280af22
NIS
1075 New(899, PL_lex_brackstack, 120, char);
1076 New(899, PL_lex_casestack, 12, char);
1077 SAVEFREEPV(PL_lex_brackstack);
1078 SAVEFREEPV(PL_lex_casestack);
1079 PL_lex_casemods = 0;
1080 *PL_lex_casestack = '\0';
1081 PL_lex_starts = 0;
1082 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1083 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1084
1085 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1086 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1087 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1088 else
3280af22 1089 PL_lex_inpat = Nullop;
79072805 1090
55497cff 1091 return '(';
79072805
LW
1092}
1093
ffb4593c
NT
1094/*
1095 * S_sublex_done
1096 * Restores lexer state after a S_sublex_push.
1097 */
1098
76e3520e 1099STATIC I32
cea2e8a9 1100S_sublex_done(pTHX)
79072805 1101{
3280af22 1102 if (!PL_lex_starts++) {
9aa983d2
JH
1103 SV *sv = newSVpvn("",0);
1104 if (SvUTF8(PL_linestr))
1105 SvUTF8_on(sv);
3280af22 1106 PL_expect = XOPERATOR;
9aa983d2 1107 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1108 return THING;
1109 }
1110
3280af22
NIS
1111 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1112 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1113 return yylex();
79072805
LW
1114 }
1115
ffb4593c 1116 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1117 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1118 PL_linestr = PL_lex_repl;
1119 PL_lex_inpat = 0;
1120 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1121 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1122 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1123 SAVEFREESV(PL_linestr);
1124 PL_lex_dojoin = FALSE;
1125 PL_lex_brackets = 0;
3280af22
NIS
1126 PL_lex_casemods = 0;
1127 *PL_lex_casestack = '\0';
1128 PL_lex_starts = 0;
25da4f38 1129 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1130 PL_lex_state = LEX_INTERPNORMAL;
1131 PL_lex_starts++;
e9fa98b2
HS
1132 /* we don't clear PL_lex_repl here, so that we can check later
1133 whether this is an evalled subst; that means we rely on the
1134 logic to ensure sublex_done() is called again only via the
1135 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1136 }
e9fa98b2 1137 else {
3280af22 1138 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1139 PL_lex_repl = Nullsv;
1140 }
79072805 1141 return ',';
ffed7fef
LW
1142 }
1143 else {
f46d017c 1144 LEAVE;
3280af22
NIS
1145 PL_bufend = SvPVX(PL_linestr);
1146 PL_bufend += SvCUR(PL_linestr);
1147 PL_expect = XOPERATOR;
09bef843 1148 PL_sublex_info.sub_inwhat = 0;
79072805 1149 return ')';
ffed7fef
LW
1150 }
1151}
1152
02aa26ce
NT
1153/*
1154 scan_const
1155
1156 Extracts a pattern, double-quoted string, or transliteration. This
1157 is terrifying code.
1158
3280af22
NIS
1159 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1160 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1161 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1162
9b599b2a
GS
1163 Returns a pointer to the character scanned up to. Iff this is
1164 advanced from the start pointer supplied (ie if anything was
1165 successfully parsed), will leave an OP for the substring scanned
1166 in yylval. Caller must intuit reason for not parsing further
1167 by looking at the next characters herself.
1168
02aa26ce
NT
1169 In patterns:
1170 backslashes:
1171 double-quoted style: \r and \n
1172 regexp special ones: \D \s
1173 constants: \x3
1174 backrefs: \1 (deprecated in substitution replacements)
1175 case and quoting: \U \Q \E
1176 stops on @ and $, but not for $ as tail anchor
1177
1178 In transliterations:
1179 characters are VERY literal, except for - not at the start or end
1180 of the string, which indicates a range. scan_const expands the
1181 range to the full set of intermediate characters.
1182
1183 In double-quoted strings:
1184 backslashes:
1185 double-quoted style: \r and \n
1186 constants: \x3
1187 backrefs: \1 (deprecated)
1188 case and quoting: \U \Q \E
1189 stops on @ and $
1190
1191 scan_const does *not* construct ops to handle interpolated strings.
1192 It stops processing as soon as it finds an embedded $ or @ variable
1193 and leaves it to the caller to work out what's going on.
1194
1195 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1196
1197 $ in pattern could be $foo or could be tail anchor. Assumption:
1198 it's a tail anchor if $ is the last thing in the string, or if it's
1199 followed by one of ")| \n\t"
1200
1201 \1 (backreferences) are turned into $1
1202
1203 The structure of the code is
1204 while (there's a character to process) {
1205 handle transliteration ranges
1206 skip regexp comments
1207 skip # initiated comments in //x patterns
1208 check for embedded @foo
1209 check for embedded scalars
1210 if (backslash) {
1211 leave intact backslashes from leave (below)
1212 deprecate \1 in strings and sub replacements
1213 handle string-changing backslashes \l \U \Q \E, etc.
1214 switch (what was escaped) {
1215 handle - in a transliteration (becomes a literal -)
1216 handle \132 octal characters
1217 handle 0x15 hex characters
1218 handle \cV (control V)
1219 handle printf backslashes (\f, \r, \n, etc)
1220 } (end switch)
1221 } (end if backslash)
1222 } (end while character to read)
4e553d73 1223
02aa26ce
NT
1224*/
1225
76e3520e 1226STATIC char *
cea2e8a9 1227S_scan_const(pTHX_ char *start)
79072805 1228{
3280af22 1229 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1230 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1231 register char *s = start; /* start of the constant */
1232 register char *d = SvPVX(sv); /* destination for copies */
1233 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1234 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1235 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1236 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1237 UV uv;
1238
dff6d3cd 1239 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1240 PL_lex_inpat
4a2d328f 1241 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1242 : "";
79072805 1243
2b9d42f0
NIS
1244 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1245 /* If we are doing a trans and we know we want UTF8 set expectation */
1246 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1247 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1248 }
1249
1250
79072805 1251 while (s < send || dorange) {
02aa26ce 1252 /* get transliterations out of the way (they're most literal) */
3280af22 1253 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1254 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1255 if (dorange) {
1ba5c669
JH
1256 I32 i; /* current expanded character */
1257 I32 min; /* first character in range */
1258 I32 max; /* last character in range */
02aa26ce 1259
2b9d42f0 1260 if (has_utf8) {
8973db79
JH
1261 char *c = (char*)utf8_hop((U8*)d, -1);
1262 char *e = d++;
1263 while (e-- > c)
1264 *(e + 1) = *e;
25716404 1265 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1266 /* mark the range as done, and continue */
1267 dorange = FALSE;
1268 didrange = TRUE;
1269 continue;
1270 }
2b9d42f0 1271
02aa26ce 1272 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1273 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1274 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1275 d -= 2; /* eat the first char and the - */
1276
8ada0baa
JH
1277 min = (U8)*d; /* first char in range */
1278 max = (U8)d[1]; /* last char in range */
1279
c2e66d9e 1280 if (min > max) {
01ec43d0 1281 Perl_croak(aTHX_
1ba5c669
JH
1282 "Invalid [] range \"%c-%c\" in transliteration operator",
1283 (char)min, (char)max);
c2e66d9e
GS
1284 }
1285
c7f1f016 1286#ifdef EBCDIC
8ada0baa
JH
1287 if ((isLOWER(min) && isLOWER(max)) ||
1288 (isUPPER(min) && isUPPER(max))) {
1289 if (isLOWER(min)) {
1290 for (i = min; i <= max; i++)
1291 if (isLOWER(i))
db42d148 1292 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1293 } else {
1294 for (i = min; i <= max; i++)
1295 if (isUPPER(i))
db42d148 1296 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1297 }
1298 }
1299 else
1300#endif
1301 for (i = min; i <= max; i++)
1302 *d++ = i;
02aa26ce
NT
1303
1304 /* mark the range as done, and continue */
79072805 1305 dorange = FALSE;
01ec43d0 1306 didrange = TRUE;
79072805 1307 continue;
4e553d73 1308 }
02aa26ce
NT
1309
1310 /* range begins (ignore - as first or last char) */
79072805 1311 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1312 if (didrange) {
1fafa243 1313 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1314 }
2b9d42f0 1315 if (has_utf8) {
25716404 1316 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1317 s++;
1318 continue;
1319 }
79072805
LW
1320 dorange = TRUE;
1321 s++;
01ec43d0
GS
1322 }
1323 else {
1324 didrange = FALSE;
1325 }
79072805 1326 }
02aa26ce
NT
1327
1328 /* if we get here, we're not doing a transliteration */
1329
0f5d15d6
IZ
1330 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1331 except for the last char, which will be done separately. */
3280af22 1332 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1333 if (s[2] == '#') {
1334 while (s < send && *s != ')')
db42d148 1335 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1336 }
1337 else if (s[2] == '{' /* This should match regcomp.c */
1338 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1339 {
cc6b7395 1340 I32 count = 1;
0f5d15d6 1341 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1342 char c;
1343
d9f97599
GS
1344 while (count && (c = *regparse)) {
1345 if (c == '\\' && regparse[1])
1346 regparse++;
4e553d73 1347 else if (c == '{')
cc6b7395 1348 count++;
4e553d73 1349 else if (c == '}')
cc6b7395 1350 count--;
d9f97599 1351 regparse++;
cc6b7395 1352 }
5bdf89e7
IZ
1353 if (*regparse != ')') {
1354 regparse--; /* Leave one char for continuation. */
cc6b7395 1355 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1356 }
0f5d15d6 1357 while (s < regparse)
db42d148 1358 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1359 }
748a9306 1360 }
02aa26ce
NT
1361
1362 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1363 else if (*s == '#' && PL_lex_inpat &&
1364 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1365 while (s+1 < send && *s != '\n')
db42d148 1366 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1367 }
02aa26ce 1368
5d1d4326
JH
1369 /* check for embedded arrays
1370 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1371 */
7e2040f0 1372 else if (*s == '@' && s[1]
5d1d4326 1373 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1374 break;
02aa26ce
NT
1375
1376 /* check for embedded scalars. only stop if we're sure it's a
1377 variable.
1378 */
79072805 1379 else if (*s == '$') {
3280af22 1380 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1381 break;
6002328a 1382 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1383 break; /* in regexp, $ might be tail anchor */
1384 }
02aa26ce 1385
2b9d42f0
NIS
1386 /* End of else if chain - OP_TRANS rejoin rest */
1387
02aa26ce 1388 /* backslashes */
79072805
LW
1389 if (*s == '\\' && s+1 < send) {
1390 s++;
02aa26ce
NT
1391
1392 /* some backslashes we leave behind */
c9f97d15 1393 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1394 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1395 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1396 continue;
1397 }
02aa26ce
NT
1398
1399 /* deprecate \1 in strings and substitution replacements */
3280af22 1400 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1401 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1402 {
599cee73 1403 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1404 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1405 *--s = '$';
1406 break;
1407 }
02aa26ce
NT
1408
1409 /* string-change backslash escapes */
3280af22 1410 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1411 --s;
1412 break;
1413 }
02aa26ce
NT
1414
1415 /* if we get here, it's either a quoted -, or a digit */
79072805 1416 switch (*s) {
02aa26ce
NT
1417
1418 /* quoted - in transliterations */
79072805 1419 case '-':
3280af22 1420 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1421 *d++ = *s++;
1422 continue;
1423 }
1424 /* FALL THROUGH */
1425 default:
11b8faa4 1426 {
707afd92
MS
1427 if (ckWARN(WARN_MISC) &&
1428 isALNUM(*s) &&
1429 *s != '_')
4e553d73 1430 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1431 "Unrecognized escape \\%c passed through",
1432 *s);
1433 /* default action is to copy the quoted character */
f9a63242 1434 goto default_action;
11b8faa4 1435 }
02aa26ce
NT
1436
1437 /* \132 indicates an octal constant */
79072805
LW
1438 case '0': case '1': case '2': case '3':
1439 case '4': case '5': case '6': case '7':
ba210ebe 1440 {
53305cf1
NC
1441 I32 flags = 0;
1442 STRLEN len = 3;
1443 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1444 s += len;
1445 }
012bcf8d 1446 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1447
1448 /* \x24 indicates a hex constant */
79072805 1449 case 'x':
a0ed51b3
LW
1450 ++s;
1451 if (*s == '{') {
1452 char* e = strchr(s, '}');
a4c04bdc
NC
1453 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1454 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1455 STRLEN len;
355860ce 1456
53305cf1 1457 ++s;
adaeee49 1458 if (!e) {
a0ed51b3 1459 yyerror("Missing right brace on \\x{}");
355860ce 1460 continue;
ba210ebe 1461 }
53305cf1
NC
1462 len = e - s;
1463 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1464 s = e + 1;
a0ed51b3
LW
1465 }
1466 else {
ba210ebe 1467 {
53305cf1 1468 STRLEN len = 2;
a4c04bdc 1469 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1470 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1471 s += len;
1472 }
012bcf8d
GS
1473 }
1474
1475 NUM_ESCAPE_INSERT:
1476 /* Insert oct or hex escaped character.
301d3d20 1477 * There will always enough room in sv since such
db42d148 1478 * escapes will be longer than any UTF-8 sequence
301d3d20 1479 * they can end up as. */
ba7cea30 1480
c7f1f016
NIS
1481 /* We need to map to chars to ASCII before doing the tests
1482 to cover EBCDIC
1483 */
c4d5f83a 1484 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1485 if (!has_utf8 && uv > 255) {
301d3d20
JH
1486 /* Might need to recode whatever we have
1487 * accumulated so far if it contains any
1488 * hibit chars.
1489 *
1490 * (Can't we keep track of that and avoid
1491 * this rescan? --jhi)
012bcf8d 1492 */
c7f1f016 1493 int hicount = 0;
63cd0674
NIS
1494 U8 *c;
1495 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1496 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1497 hicount++;
db42d148 1498 }
012bcf8d 1499 }
63cd0674 1500 if (hicount) {
db42d148
NIS
1501 STRLEN offset = d - SvPVX(sv);
1502 U8 *src, *dst;
1503 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1504 src = (U8 *)d - 1;
1505 dst = src+hicount;
1506 d += hicount;
1507 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1508 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1509 U8 ch = NATIVE_TO_ASCII(*src);
db42d148
NIS
1510 *dst-- = UTF8_EIGHT_BIT_LO(ch);
1511 *dst-- = UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1512 }
1513 else {
63cd0674 1514 *dst-- = *src;
012bcf8d 1515 }
c7f1f016 1516 src--;
012bcf8d
GS
1517 }
1518 }
1519 }
1520
9aa983d2 1521 if (has_utf8 || uv > 255) {
9041c2e3 1522 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1523 has_utf8 = TRUE;
f9a63242
JH
1524 if (PL_lex_inwhat == OP_TRANS &&
1525 PL_sublex_info.sub_op) {
1526 PL_sublex_info.sub_op->op_private |=
1527 (PL_lex_repl ? OPpTRANS_FROM_UTF
1528 : OPpTRANS_TO_UTF);
f9a63242 1529 }
012bcf8d 1530 }
a0ed51b3 1531 else {
012bcf8d 1532 *d++ = (char)uv;
a0ed51b3 1533 }
012bcf8d
GS
1534 }
1535 else {
c4d5f83a 1536 *d++ = (char) uv;
a0ed51b3 1537 }
79072805 1538 continue;
02aa26ce 1539
b239daa5 1540 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1541 case 'N':
55eda711 1542 ++s;
423cee85
JH
1543 if (*s == '{') {
1544 char* e = strchr(s, '}');
155aba94 1545 SV *res;
423cee85
JH
1546 STRLEN len;
1547 char *str;
4e553d73 1548
423cee85 1549 if (!e) {
5777a3f7 1550 yyerror("Missing right brace on \\N{}");
423cee85
JH
1551 e = s - 1;
1552 goto cont_scan;
1553 }
55eda711
JH
1554 res = newSVpvn(s + 1, e - s - 1);
1555 res = new_constant( Nullch, 0, "charnames",
1556 res, Nullsv, "\\N{...}" );
f9a63242
JH
1557 if (has_utf8)
1558 sv_utf8_upgrade(res);
423cee85 1559 str = SvPV(res,len);
1c47067b
JH
1560#ifdef EBCDIC_NEVER_MIND
1561 /* charnames uses pack U and that has been
1562 * recently changed to do the below uni->native
1563 * mapping, so this would be redundant (and wrong,
1564 * the code point would be doubly converted).
1565 * But leave this in just in case the pack U change
1566 * gets revoked, but the semantics is still
1567 * desireable for charnames. --jhi */
cddc7ef4
JH
1568 {
1569 UV uv = utf8_to_uvchr((U8*)str, 0);
1570
1571 if (uv < 0x100) {
1572 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1573
1574 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1575 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1576 str = SvPV(res, len);
1577 }
1578 }
1579#endif
89491803 1580 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1581 char *ostart = SvPVX(sv);
1582 SvCUR_set(sv, d - ostart);
1583 SvPOK_on(sv);
e4f3eed8 1584 *d = '\0';
f08d6ad9 1585 sv_utf8_upgrade(sv);
d2f449dd
SB
1586 /* this just broke our allocation above... */
1587 SvGROW(sv, send - start);
f08d6ad9 1588 d = SvPVX(sv) + SvCUR(sv);
89491803 1589 has_utf8 = TRUE;
f08d6ad9 1590 }
b239daa5 1591 if (len > e - s + 4) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1592 char *odest = SvPVX(sv);
1593
8973db79 1594 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1595 d = SvPVX(sv) + (d - odest);
1596 }
1597 Copy(str, d, len, char);
1598 d += len;
1599 SvREFCNT_dec(res);
1600 cont_scan:
1601 s = e + 1;
1602 }
1603 else
5777a3f7 1604 yyerror("Missing braces on \\N{}");
423cee85
JH
1605 continue;
1606
02aa26ce 1607 /* \c is a control character */
79072805
LW
1608 case 'c':
1609 s++;
ba210ebe
JH
1610 {
1611 U8 c = *s++;
c7f1f016
NIS
1612#ifdef EBCDIC
1613 if (isLOWER(c))
1614 c = toUPPER(c);
1615#endif
db42d148 1616 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1617 }
79072805 1618 continue;
02aa26ce
NT
1619
1620 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1621 case 'b':
db42d148 1622 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1623 break;
1624 case 'n':
db42d148 1625 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1626 break;
1627 case 'r':
db42d148 1628 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1629 break;
1630 case 'f':
db42d148 1631 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1632 break;
1633 case 't':
db42d148 1634 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1635 break;
34a3fe2a 1636 case 'e':
db42d148 1637 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1638 break;
1639 case 'a':
db42d148 1640 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1641 break;
02aa26ce
NT
1642 } /* end switch */
1643
79072805
LW
1644 s++;
1645 continue;
02aa26ce
NT
1646 } /* end if (backslash) */
1647
f9a63242 1648 default_action:
2b9d42f0
NIS
1649 /* If we started with encoded form, or already know we want it
1650 and then encode the next character */
1651 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1652 STRLEN len = 1;
1653 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1654 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1655 s += len;
1656 if (need > len) {
1657 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1658 STRLEN off = d - SvPVX(sv);
1659 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1660 }
1661 d = (char*)uvchr_to_utf8((U8*)d, uv);
1662 has_utf8 = TRUE;
1663 }
1664 else {
1665 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1666 }
02aa26ce
NT
1667 } /* while loop to process each character */
1668
1669 /* terminate the string and set up the sv */
79072805 1670 *d = '\0';
463ee0b2 1671 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1672 if (SvCUR(sv) >= SvLEN(sv))
585602fa 1673 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1674
79072805 1675 SvPOK_on(sv);
9f4817db 1676 if (PL_encoding && !has_utf8) {
5d170f3a 1677 Perl_sv_recode_to_utf8(aTHX_ sv, PL_encoding);
9f4817db
JH
1678 has_utf8 = TRUE;
1679 }
2b9d42f0 1680 if (has_utf8) {
7e2040f0 1681 SvUTF8_on(sv);
2b9d42f0
NIS
1682 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1683 PL_sublex_info.sub_op->op_private |=
1684 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1685 }
1686 }
79072805 1687
02aa26ce 1688 /* shrink the sv if we allocated more than we used */
79072805
LW
1689 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1690 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1691 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1692 }
02aa26ce 1693
9b599b2a 1694 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1695 if (s > PL_bufptr) {
1696 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1697 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1698 sv, Nullsv,
4e553d73 1699 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1700 ? "tr"
3280af22 1701 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1702 ? "s"
1703 : "qq")));
79072805 1704 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1705 } else
8990e307 1706 SvREFCNT_dec(sv);
79072805
LW
1707 return s;
1708}
1709
ffb4593c
NT
1710/* S_intuit_more
1711 * Returns TRUE if there's more to the expression (e.g., a subscript),
1712 * FALSE otherwise.
ffb4593c
NT
1713 *
1714 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1715 *
1716 * ->[ and ->{ return TRUE
1717 * { and [ outside a pattern are always subscripts, so return TRUE
1718 * if we're outside a pattern and it's not { or [, then return FALSE
1719 * if we're in a pattern and the first char is a {
1720 * {4,5} (any digits around the comma) returns FALSE
1721 * if we're in a pattern and the first char is a [
1722 * [] returns FALSE
1723 * [SOMETHING] has a funky algorithm to decide whether it's a
1724 * character class or not. It has to deal with things like
1725 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1726 * anything else returns TRUE
1727 */
1728
9cbb5ea2
GS
1729/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1730
76e3520e 1731STATIC int
cea2e8a9 1732S_intuit_more(pTHX_ register char *s)
79072805 1733{
3280af22 1734 if (PL_lex_brackets)
79072805
LW
1735 return TRUE;
1736 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1737 return TRUE;
1738 if (*s != '{' && *s != '[')
1739 return FALSE;
3280af22 1740 if (!PL_lex_inpat)
79072805
LW
1741 return TRUE;
1742
1743 /* In a pattern, so maybe we have {n,m}. */
1744 if (*s == '{') {
1745 s++;
1746 if (!isDIGIT(*s))
1747 return TRUE;
1748 while (isDIGIT(*s))
1749 s++;
1750 if (*s == ',')
1751 s++;
1752 while (isDIGIT(*s))
1753 s++;
1754 if (*s == '}')
1755 return FALSE;
1756 return TRUE;
1757
1758 }
1759
1760 /* On the other hand, maybe we have a character class */
1761
1762 s++;
1763 if (*s == ']' || *s == '^')
1764 return FALSE;
1765 else {
ffb4593c 1766 /* this is terrifying, and it works */
79072805
LW
1767 int weight = 2; /* let's weigh the evidence */
1768 char seen[256];
f27ffc4a 1769 unsigned char un_char = 255, last_un_char;
93a17b20 1770 char *send = strchr(s,']');
3280af22 1771 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1772
1773 if (!send) /* has to be an expression */
1774 return TRUE;
1775
1776 Zero(seen,256,char);
1777 if (*s == '$')
1778 weight -= 3;
1779 else if (isDIGIT(*s)) {
1780 if (s[1] != ']') {
1781 if (isDIGIT(s[1]) && s[2] == ']')
1782 weight -= 10;
1783 }
1784 else
1785 weight -= 100;
1786 }
1787 for (; s < send; s++) {
1788 last_un_char = un_char;
1789 un_char = (unsigned char)*s;
1790 switch (*s) {
1791 case '@':
1792 case '&':
1793 case '$':
1794 weight -= seen[un_char] * 10;
7e2040f0 1795 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1796 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1797 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1798 weight -= 100;
1799 else
1800 weight -= 10;
1801 }
1802 else if (*s == '$' && s[1] &&
93a17b20
LW
1803 strchr("[#!%*<>()-=",s[1])) {
1804 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1805 weight -= 10;
1806 else
1807 weight -= 1;
1808 }
1809 break;
1810 case '\\':
1811 un_char = 254;
1812 if (s[1]) {
93a17b20 1813 if (strchr("wds]",s[1]))
79072805
LW
1814 weight += 100;
1815 else if (seen['\''] || seen['"'])
1816 weight += 1;
93a17b20 1817 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1818 weight += 40;
1819 else if (isDIGIT(s[1])) {
1820 weight += 40;
1821 while (s[1] && isDIGIT(s[1]))
1822 s++;
1823 }
1824 }
1825 else
1826 weight += 100;
1827 break;
1828 case '-':
1829 if (s[1] == '\\')
1830 weight += 50;
93a17b20 1831 if (strchr("aA01! ",last_un_char))
79072805 1832 weight += 30;
93a17b20 1833 if (strchr("zZ79~",s[1]))
79072805 1834 weight += 30;
f27ffc4a
GS
1835 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1836 weight -= 5; /* cope with negative subscript */
79072805
LW
1837 break;
1838 default:
93a17b20 1839 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1840 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1841 char *d = tmpbuf;
1842 while (isALPHA(*s))
1843 *d++ = *s++;
1844 *d = '\0';
1845 if (keyword(tmpbuf, d - tmpbuf))
1846 weight -= 150;
1847 }
1848 if (un_char == last_un_char + 1)
1849 weight += 5;
1850 weight -= seen[un_char];
1851 break;
1852 }
1853 seen[un_char]++;
1854 }
1855 if (weight >= 0) /* probably a character class */
1856 return FALSE;
1857 }
1858
1859 return TRUE;
1860}
ffed7fef 1861
ffb4593c
NT
1862/*
1863 * S_intuit_method
1864 *
1865 * Does all the checking to disambiguate
1866 * foo bar
1867 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1868 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1869 *
1870 * First argument is the stuff after the first token, e.g. "bar".
1871 *
1872 * Not a method if bar is a filehandle.
1873 * Not a method if foo is a subroutine prototyped to take a filehandle.
1874 * Not a method if it's really "Foo $bar"
1875 * Method if it's "foo $bar"
1876 * Not a method if it's really "print foo $bar"
1877 * Method if it's really "foo package::" (interpreted as package->foo)
1878 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1879 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1880 * =>
1881 */
1882
76e3520e 1883STATIC int
cea2e8a9 1884S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1885{
1886 char *s = start + (*start == '$');
3280af22 1887 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1888 STRLEN len;
1889 GV* indirgv;
1890
1891 if (gv) {
b6c543e3 1892 CV *cv;
a0d0e21e
LW
1893 if (GvIO(gv))
1894 return 0;
b6c543e3
IZ
1895 if ((cv = GvCVu(gv))) {
1896 char *proto = SvPVX(cv);
1897 if (proto) {
1898 if (*proto == ';')
1899 proto++;
1900 if (*proto == '*')
1901 return 0;
1902 }
1903 } else
a0d0e21e
LW
1904 gv = 0;
1905 }
8903cb82 1906 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1907 /* start is the beginning of the possible filehandle/object,
1908 * and s is the end of it
1909 * tmpbuf is a copy of it
1910 */
1911
a0d0e21e 1912 if (*start == '$') {
3280af22 1913 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1914 return 0;
1915 s = skipspace(s);
3280af22
NIS
1916 PL_bufptr = start;
1917 PL_expect = XREF;
a0d0e21e
LW
1918 return *s == '(' ? FUNCMETH : METHOD;
1919 }
1920 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1921 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1922 len -= 2;
1923 tmpbuf[len] = '\0';
1924 goto bare_package;
1925 }
1926 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1927 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1928 return 0;
1929 /* filehandle or package name makes it a method */
89bfa8cd 1930 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1931 s = skipspace(s);
3280af22 1932 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1933 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1934 bare_package:
3280af22 1935 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1936 newSVpvn(tmpbuf,len));
3280af22
NIS
1937 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1938 PL_expect = XTERM;
a0d0e21e 1939 force_next(WORD);
3280af22 1940 PL_bufptr = s;
a0d0e21e
LW
1941 return *s == '(' ? FUNCMETH : METHOD;
1942 }
1943 }
1944 return 0;
1945}
1946
ffb4593c
NT
1947/*
1948 * S_incl_perldb
1949 * Return a string of Perl code to load the debugger. If PERL5DB
1950 * is set, it will return the contents of that, otherwise a
1951 * compile-time require of perl5db.pl.
1952 */
1953
76e3520e 1954STATIC char*
cea2e8a9 1955S_incl_perldb(pTHX)
a0d0e21e 1956{
3280af22 1957 if (PL_perldb) {
76e3520e 1958 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1959
1960 if (pdb)
1961 return pdb;
91487cfc 1962 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1963 return "BEGIN { require 'perl5db.pl' }";
1964 }
1965 return "";
1966}
1967
1968
16d20bd9 1969/* Encoded script support. filter_add() effectively inserts a
4e553d73 1970 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1971 * Note that the filter function only applies to the current source file
1972 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1973 *
1974 * The datasv parameter (which may be NULL) can be used to pass
1975 * private data to this instance of the filter. The filter function
1976 * can recover the SV using the FILTER_DATA macro and use it to
1977 * store private buffers and state information.
1978 *
1979 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1980 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1981 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1982 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1983 * private use must be set using malloc'd pointers.
1984 */
16d20bd9
AD
1985
1986SV *
864dbfa3 1987Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1988{
f4c556ac
GS
1989 if (!funcp)
1990 return Nullsv;
1991
3280af22
NIS
1992 if (!PL_rsfp_filters)
1993 PL_rsfp_filters = newAV();
16d20bd9 1994 if (!datasv)
8c52afec 1995 datasv = NEWSV(255,0);
16d20bd9 1996 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1997 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1998 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1999 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2000 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2001 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2002 av_unshift(PL_rsfp_filters, 1);
2003 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2004 return(datasv);
2005}
4e553d73 2006
16d20bd9
AD
2007
2008/* Delete most recently added instance of this filter function. */
a0d0e21e 2009void
864dbfa3 2010Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2011{
e0c19803 2012 SV *datasv;
fe5a182c 2013 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2014 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2015 return;
2016 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2017 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2018 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2019 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2020 IoANY(datasv) = (void *)NULL;
3280af22 2021 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2022
16d20bd9
AD
2023 return;
2024 }
2025 /* we need to search for the correct entry and clear it */
cea2e8a9 2026 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2027}
2028
2029
2030/* Invoke the n'th filter function for the current rsfp. */
2031I32
864dbfa3 2032Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
2033
2034
8ac85365 2035 /* 0 = read one text line */
a0d0e21e 2036{
16d20bd9
AD
2037 filter_t funcp;
2038 SV *datasv = NULL;
e50aee73 2039
3280af22 2040 if (!PL_rsfp_filters)
16d20bd9 2041 return -1;
3280af22 2042 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2043 /* Provide a default input filter to make life easy. */
2044 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2045 DEBUG_P(PerlIO_printf(Perl_debug_log,
2046 "filter_read %d: from rsfp\n", idx));
4e553d73 2047 if (maxlen) {
16d20bd9
AD
2048 /* Want a block */
2049 int len ;
2050 int old_len = SvCUR(buf_sv) ;
2051
2052 /* ensure buf_sv is large enough */
2053 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
2054 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2055 if (PerlIO_error(PL_rsfp))
37120919
AD
2056 return -1; /* error */
2057 else
2058 return 0 ; /* end of file */
2059 }
16d20bd9
AD
2060 SvCUR_set(buf_sv, old_len + len) ;
2061 } else {
2062 /* Want a line */
3280af22
NIS
2063 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2064 if (PerlIO_error(PL_rsfp))
37120919
AD
2065 return -1; /* error */
2066 else
2067 return 0 ; /* end of file */
2068 }
16d20bd9
AD
2069 }
2070 return SvCUR(buf_sv);
2071 }
2072 /* Skip this filter slot if filter has been deleted */
3280af22 2073 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2074 DEBUG_P(PerlIO_printf(Perl_debug_log,
2075 "filter_read %d: skipped (filter deleted)\n",
2076 idx));
16d20bd9
AD
2077 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2078 }
2079 /* Get function pointer hidden within datasv */
4755096e 2080 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2081 DEBUG_P(PerlIO_printf(Perl_debug_log,
2082 "filter_read %d: via function %p (%s)\n",
fe5a182c 2083 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2084 /* Call function. The function is expected to */
2085 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2086 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2087 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2088}
2089
76e3520e 2090STATIC char *
cea2e8a9 2091S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2092{
c39cd008 2093#ifdef PERL_CR_FILTER
3280af22 2094 if (!PL_rsfp_filters) {
c39cd008 2095 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2096 }
2097#endif
3280af22 2098 if (PL_rsfp_filters) {
16d20bd9 2099
55497cff 2100 if (!append)
2101 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2102 if (FILTER_READ(0, sv, 0) > 0)
2103 return ( SvPVX(sv) ) ;
2104 else
2105 return Nullch ;
2106 }
9d116dd7 2107 else
fd049845 2108 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2109}
2110
01ec43d0
GS
2111STATIC HV *
2112S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2113{
2114 GV *gv;
2115
01ec43d0 2116 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2117 return PL_curstash;
2118
2119 if (len > 2 &&
2120 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2121 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2122 {
2123 return GvHV(gv); /* Foo:: */
def3634b
GS
2124 }
2125
2126 /* use constant CLASS => 'MyClass' */
2127 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2128 SV *sv;
2129 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2130 pkgname = SvPV_nolen(sv);
2131 }
2132 }
2133
2134 return gv_stashpv(pkgname, FALSE);
2135}
a0d0e21e 2136
748a9306
LW
2137#ifdef DEBUGGING
2138 static char* exp_name[] =
09bef843
SB
2139 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2140 "ATTRTERM", "TERMBLOCK"
2141 };
748a9306 2142#endif
463ee0b2 2143
02aa26ce
NT
2144/*
2145 yylex
2146
2147 Works out what to call the token just pulled out of the input
2148 stream. The yacc parser takes care of taking the ops we return and
2149 stitching them into a tree.
2150
2151 Returns:
2152 PRIVATEREF
2153
2154 Structure:
2155 if read an identifier
2156 if we're in a my declaration
2157 croak if they tried to say my($foo::bar)
2158 build the ops for a my() declaration
2159 if it's an access to a my() variable
2160 are we in a sort block?
2161 croak if my($a); $a <=> $b
2162 build ops for access to a my() variable
2163 if in a dq string, and they've said @foo and we can't find @foo
2164 croak
2165 build ops for a bareword
2166 if we already built the token before, use it.
2167*/
2168
dba4d153 2169#ifdef USE_PURE_BISON
864dbfa3 2170int
dba4d153 2171Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2172{
20141f0e
IRC
2173 int r;
2174
6f202aea 2175 yyactlevel++;
20141f0e
IRC
2176 yylval_pointer[yyactlevel] = lvalp;
2177 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
IRC
2178 if (yyactlevel >= YYMAXLEVEL)
2179 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2180
dba4d153 2181 r = Perl_yylex(aTHX);
20141f0e 2182
d8ae6756
IRC
2183 if (yyactlevel > 0)
2184 yyactlevel--;
20141f0e
IRC
2185
2186 return r;
2187}
dba4d153 2188#endif
20141f0e 2189
dba4d153
JH
2190#ifdef __SC__
2191#pragma segment Perl_yylex
2192#endif
dba4d153 2193int
dba4d153 2194Perl_yylex(pTHX)
20141f0e 2195{
79072805 2196 register char *s;
378cc40b 2197 register char *d;
79072805 2198 register I32 tmp;
463ee0b2 2199 STRLEN len;
161b471a
NIS
2200 GV *gv = Nullgv;
2201 GV **gvp = 0;
aa7440fb 2202 bool bof = FALSE;
a687059c 2203
02aa26ce 2204 /* check if there's an identifier for us to look at */
ba979b31 2205 if (PL_pending_ident)
e930465f 2206 return S_pending_ident(aTHX);
bbce6d69 2207
02aa26ce
NT
2208 /* no identifier pending identification */
2209
3280af22 2210 switch (PL_lex_state) {
79072805
LW
2211#ifdef COMMENTARY
2212 case LEX_NORMAL: /* Some compilers will produce faster */
2213 case LEX_INTERPNORMAL: /* code if we comment these out. */
2214 break;
2215#endif
2216
09bef843 2217 /* when we've already built the next token, just pull it out of the queue */
79072805 2218 case LEX_KNOWNEXT:
3280af22
NIS
2219 PL_nexttoke--;
2220 yylval = PL_nextval[PL_nexttoke];
2221 if (!PL_nexttoke) {
2222 PL_lex_state = PL_lex_defer;
2223 PL_expect = PL_lex_expect;
2224 PL_lex_defer = LEX_NORMAL;
463ee0b2 2225 }
607df283 2226 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2227 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2228 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2229
3280af22 2230 return(PL_nexttype[PL_nexttoke]);
79072805 2231
02aa26ce 2232 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2233 when we get here, PL_bufptr is at the \
02aa26ce 2234 */
79072805
LW
2235 case LEX_INTERPCASEMOD:
2236#ifdef DEBUGGING
3280af22 2237 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2238 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2239#endif
02aa26ce 2240 /* handle \E or end of string */
3280af22 2241 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2242 char oldmod;
02aa26ce
NT
2243
2244 /* if at a \E */
3280af22
NIS
2245 if (PL_lex_casemods) {
2246 oldmod = PL_lex_casestack[--PL_lex_casemods];
2247 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2248
3280af22
NIS
2249 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2250 PL_bufptr += 2;
2251 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2252 }
79072805
LW
2253 return ')';
2254 }
3280af22
NIS
2255 if (PL_bufptr != PL_bufend)
2256 PL_bufptr += 2;
2257 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2258 return yylex();
79072805
LW
2259 }
2260 else {
607df283 2261 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2262 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2263 s = PL_bufptr + 1;
79072805
LW
2264 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2265 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2266 if (strchr("LU", *s) &&
3280af22 2267 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2268 {
3280af22 2269 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2270 return ')';
2271 }
3280af22
NIS
2272 if (PL_lex_casemods > 10) {
2273 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2274 if (newlb != PL_lex_casestack) {
a0d0e21e 2275 SAVEFREEPV(newlb);
3280af22 2276 PL_lex_casestack = newlb;
a0d0e21e
LW
2277 }
2278 }
3280af22
NIS
2279 PL_lex_casestack[PL_lex_casemods++] = *s;
2280 PL_lex_casestack[PL_lex_casemods] = '\0';
2281 PL_lex_state = LEX_INTERPCONCAT;
2282 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2283 force_next('(');
2284 if (*s == 'l')
3280af22 2285 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2286 else if (*s == 'u')
3280af22 2287 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2288 else if (*s == 'L')
3280af22 2289 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2290 else if (*s == 'U')
3280af22 2291 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2292 else if (*s == 'Q')
3280af22 2293 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2294 else
cea2e8a9 2295 Perl_croak(aTHX_ "panic: yylex");
3280af22 2296 PL_bufptr = s + 1;
79072805 2297 force_next(FUNC);
3280af22
NIS
2298 if (PL_lex_starts) {
2299 s = PL_bufptr;
2300 PL_lex_starts = 0;
79072805
LW
2301 Aop(OP_CONCAT);
2302 }
2303 else
cea2e8a9 2304 return yylex();
79072805
LW
2305 }
2306
55497cff 2307 case LEX_INTERPPUSH:
2308 return sublex_push();
2309
79072805 2310 case LEX_INTERPSTART:
3280af22 2311 if (PL_bufptr == PL_bufend)
79072805 2312 return sublex_done();
607df283 2313 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2314 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2315 PL_expect = XTERM;
2316 PL_lex_dojoin = (*PL_bufptr == '@');
2317 PL_lex_state = LEX_INTERPNORMAL;
2318 if (PL_lex_dojoin) {
2319 PL_nextval[PL_nexttoke].ival = 0;
79072805 2320 force_next(',');
4d1ff10f 2321#ifdef USE_5005THREADS
533c011a
NIS
2322 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2323 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2324 force_next(PRIVATEREF);
2325#else
a0d0e21e 2326 force_ident("\"", '$');
4d1ff10f 2327#endif /* USE_5005THREADS */
3280af22 2328 PL_nextval[PL_nexttoke].ival = 0;
79072805 2329 force_next('$');
3280af22 2330 PL_nextval[PL_nexttoke].ival = 0;
79072805 2331 force_next('(');
3280af22 2332 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2333 force_next(FUNC);
2334 }
3280af22
NIS
2335 if (PL_lex_starts++) {
2336 s = PL_bufptr;
79072805
LW
2337 Aop(OP_CONCAT);
2338 }
cea2e8a9 2339 return yylex();
79072805
LW
2340
2341 case LEX_INTERPENDMAYBE:
3280af22
NIS
2342 if (intuit_more(PL_bufptr)) {
2343 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2344 break;
2345 }
2346 /* FALL THROUGH */
2347
2348 case LEX_INTERPEND:
3280af22
NIS
2349 if (PL_lex_dojoin) {
2350 PL_lex_dojoin = FALSE;
2351 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2352 return ')';
2353 }
43a16006 2354 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2355 && SvEVALED(PL_lex_repl))
43a16006 2356 {
e9fa98b2 2357 if (PL_bufptr != PL_bufend)
cea2e8a9 2358 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2359 PL_lex_repl = Nullsv;
2360 }
79072805
LW
2361 /* FALLTHROUGH */
2362 case LEX_INTERPCONCAT:
2363#ifdef DEBUGGING
3280af22 2364 if (PL_lex_brackets)
cea2e8a9 2365 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2366#endif
3280af22 2367 if (PL_bufptr == PL_bufend)
79072805
LW
2368 return sublex_done();
2369
3280af22
NIS
2370 if (SvIVX(PL_linestr) == '\'') {
2371 SV *sv = newSVsv(PL_linestr);
2372 if (!PL_lex_inpat)
76e3520e 2373 sv = tokeq(sv);
3280af22 2374 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2375 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2376 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2377 s = PL_bufend;
79072805
LW
2378 }
2379 else {
3280af22 2380 s = scan_const(PL_bufptr);
79072805 2381 if (*s == '\\')
3280af22 2382 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2383 else
3280af22 2384 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2385 }
2386
3280af22
NIS
2387 if (s != PL_bufptr) {
2388 PL_nextval[PL_nexttoke] = yylval;
2389 PL_expect = XTERM;
79072805 2390 force_next(THING);
3280af22 2391 if (PL_lex_starts++)
79072805
LW
2392 Aop(OP_CONCAT);
2393 else {
3280af22 2394 PL_bufptr = s;
cea2e8a9 2395 return yylex();
79072805
LW
2396 }
2397 }
2398
cea2e8a9 2399 return yylex();
a0d0e21e 2400 case LEX_FORMLINE:
3280af22
NIS
2401 PL_lex_state = LEX_NORMAL;
2402 s = scan_formline(PL_bufptr);
2403 if (!PL_lex_formbrack)
a0d0e21e
LW
2404 goto rightbracket;
2405 OPERATOR(';');
79072805
LW
2406 }
2407
3280af22
NIS
2408 s = PL_bufptr;
2409 PL_oldoldbufptr = PL_oldbufptr;
2410 PL_oldbufptr = s;
607df283 2411 DEBUG_T( {
bf49b057
GS
2412 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2413 exp_name[PL_expect], s);
5f80b19c 2414 } );
463ee0b2
LW
2415
2416 retry:
378cc40b
LW
2417 switch (*s) {
2418 default:
7e2040f0 2419 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2420 goto keylookup;
cea2e8a9 2421 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2422 case 4:
2423 case 26:
2424 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2425 case 0:
3280af22
NIS
2426 if (!PL_rsfp) {
2427 PL_last_uni = 0;
2428 PL_last_lop = 0;
2429 if (PL_lex_brackets)
d98d5fff 2430 yyerror("Missing right curly or square bracket");
4e553d73 2431 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2432 "### Tokener got EOF\n");
5f80b19c 2433 } );
79072805 2434 TOKEN(0);
463ee0b2 2435 }
3280af22 2436 if (s++ < PL_bufend)
a687059c 2437 goto retry; /* ignore stray nulls */
3280af22
NIS
2438 PL_last_uni = 0;
2439 PL_last_lop = 0;
2440 if (!PL_in_eval && !PL_preambled) {
2441 PL_preambled = TRUE;
2442 sv_setpv(PL_linestr,incl_perldb());
2443 if (SvCUR(PL_linestr))
2444 sv_catpv(PL_linestr,";");
2445 if (PL_preambleav){
2446 while(AvFILLp(PL_preambleav) >= 0) {
2447 SV *tmpsv = av_shift(PL_preambleav);
2448 sv_catsv(PL_linestr, tmpsv);
2449 sv_catpv(PL_linestr, ";");
91b7def8 2450 sv_free(tmpsv);
2451 }
3280af22
NIS
2452 sv_free((SV*)PL_preambleav);
2453 PL_preambleav = NULL;
91b7def8 2454 }
3280af22
NIS
2455 if (PL_minus_n || PL_minus_p) {
2456 sv_catpv(PL_linestr, "LINE: while (<>) {");
2457 if (PL_minus_l)
2458 sv_catpv(PL_linestr,"chomp;");
2459 if (PL_minus_a) {
3280af22
NIS
2460 if (PL_minus_F) {
2461 if (strchr("/'\"", *PL_splitstr)
2462 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2463 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2464 else {
2465 char delim;
2466 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2467 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2468 delim = *s;
75c72d73 2469 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2470 "q" + (delim == '\''), delim);
3280af22 2471 for (s = PL_splitstr; *s; s++) {
54310121 2472 if (*s == '\\')
3280af22
NIS
2473 sv_catpvn(PL_linestr, "\\", 1);
2474 sv_catpvn(PL_linestr, s, 1);
54310121 2475 }
cea2e8a9 2476 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2477 }
2304df62
AD
2478 }
2479 else
75c72d73 2480 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2481 }
79072805 2482 }
3280af22
NIS
2483 sv_catpv(PL_linestr, "\n");
2484 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2485 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2486 PL_last_lop = PL_last_uni = Nullch;
3280af22 2487 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2488 SV *sv = NEWSV(85,0);
2489
2490 sv_upgrade(sv, SVt_PVMG);
3280af22 2491 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2492 (void)SvIOK_on(sv);
2493 SvIVX(sv) = 0;
57843af0 2494 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2495 }
79072805 2496 goto retry;
a687059c 2497 }
e929a76b 2498 do {
aa7440fb 2499 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2500 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2501 fake_eof:
2502 if (PL_rsfp) {
2503 if (PL_preprocess && !PL_in_eval)
2504 (void)PerlProc_pclose(PL_rsfp);
2505 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2506 PerlIO_clearerr(PL_rsfp);
2507 else
2508 (void)PerlIO_close(PL_rsfp);
2509 PL_rsfp = Nullfp;
2510 PL_doextract = FALSE;
2511 }
2512 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2513 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2514 sv_catpv(PL_linestr,";}");
2515 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2516 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2517 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2518 PL_minus_n = PL_minus_p = 0;
2519 goto retry;
2520 }
2521 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2522 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2523 sv_setpv(PL_linestr,"");
2524 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2525 }
2526 /* if it looks like the start of a BOM, check if it in fact is */
2527 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2528#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2529# ifdef __GNU_LIBRARY__
2530# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2531# define FTELL_FOR_PIPE_IS_BROKEN
2532# endif
e3f494f1
JH
2533# else
2534# ifdef __GLIBC__
2535# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2536# define FTELL_FOR_PIPE_IS_BROKEN
2537# endif
2538# endif
226017aa
DD
2539# endif
2540#endif
2541#ifdef FTELL_FOR_PIPE_IS_BROKEN
2542 /* This loses the possibility to detect the bof
2543 * situation on perl -P when the libc5 is being used.
2544 * Workaround? Maybe attach some extra state to PL_rsfp?
2545 */
2546 if (!PL_preprocess)
7e28d3af 2547 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2548#else
7e28d3af 2549 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2550#endif
7e28d3af 2551 if (bof) {
3280af22 2552 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2553 s = swallow_bom((U8*)s);
e929a76b 2554 }
378cc40b 2555 }
3280af22 2556 if (PL_doextract) {
a0d0e21e 2557 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2558 PL_doextract = FALSE;
a0d0e21e
LW
2559
2560 /* Incest with pod. */
2561 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2562 sv_setpv(PL_linestr, "");
2563 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2564 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2565 PL_last_lop = PL_last_uni = Nullch;
3280af22 2566 PL_doextract = FALSE;
a0d0e21e 2567 }
4e553d73 2568 }
463ee0b2 2569 incline(s);
3280af22
NIS
2570 } while (PL_doextract);
2571 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2572 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2573 SV *sv = NEWSV(85,0);
a687059c 2574
93a17b20 2575 sv_upgrade(sv, SVt_PVMG);
3280af22 2576 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2577 (void)SvIOK_on(sv);
2578 SvIVX(sv) = 0;
57843af0 2579 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2580 }
3280af22 2581 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2582 PL_last_lop = PL_last_uni = Nullch;
57843af0 2583 if (CopLINE(PL_curcop) == 1) {
3280af22 2584 while (s < PL_bufend && isSPACE(*s))
79072805 2585 s++;
a0d0e21e 2586 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2587 s++;
44a8e56a 2588 d = Nullch;
3280af22 2589 if (!PL_in_eval) {
44a8e56a 2590 if (*s == '#' && *(s+1) == '!')
2591 d = s + 2;
2592#ifdef ALTERNATE_SHEBANG
2593 else {
2594 static char as[] = ALTERNATE_SHEBANG;
2595 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2596 d = s + (sizeof(as) - 1);
2597 }
2598#endif /* ALTERNATE_SHEBANG */
2599 }
2600 if (d) {
b8378b72 2601 char *ipath;
774d564b 2602 char *ipathend;
b8378b72 2603
774d564b 2604 while (isSPACE(*d))
b8378b72
CS
2605 d++;
2606 ipath = d;
774d564b 2607 while (*d && !isSPACE(*d))
2608 d++;
2609 ipathend = d;
2610
2611#ifdef ARG_ZERO_IS_SCRIPT
2612 if (ipathend > ipath) {
2613 /*
2614 * HP-UX (at least) sets argv[0] to the script name,
2615 * which makes $^X incorrect. And Digital UNIX and Linux,
2616 * at least, set argv[0] to the basename of the Perl
2617 * interpreter. So, having found "#!", we'll set it right.
2618 */
ee2f7564 2619 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2620 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2621 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2622 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2623 SvSETMAGIC(x);
2624 }
774d564b 2625 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2626 }
774d564b 2627#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2628
2629 /*
2630 * Look for options.
2631 */
748a9306 2632 d = instr(s,"perl -");
84e30d1a 2633 if (!d) {
748a9306 2634 d = instr(s,"perl");
84e30d1a
GS
2635#if defined(DOSISH)
2636 /* avoid getting into infinite loops when shebang
2637 * line contains "Perl" rather than "perl" */
2638 if (!d) {
2639 for (d = ipathend-4; d >= ipath; --d) {
2640 if ((*d == 'p' || *d == 'P')
2641 && !ibcmp(d, "perl", 4))
2642 {
2643 break;
2644 }
2645 }
2646 if (d < ipath)
2647 d = Nullch;
2648 }
2649#endif
2650 }
44a8e56a 2651#ifdef ALTERNATE_SHEBANG
2652 /*
2653 * If the ALTERNATE_SHEBANG on this system starts with a
2654 * character that can be part of a Perl expression, then if
2655 * we see it but not "perl", we're probably looking at the
2656 * start of Perl code, not a request to hand off to some
2657 * other interpreter. Similarly, if "perl" is there, but
2658 * not in the first 'word' of the line, we assume the line
2659 * contains the start of the Perl program.
44a8e56a 2660 */
2661 if (d && *s != '#') {
774d564b 2662 char *c = ipath;
44a8e56a 2663 while (*c && !strchr("; \t\r\n\f\v#", *c))
2664 c++;
2665 if (c < d)
2666 d = Nullch; /* "perl" not in first word; ignore */
2667 else
2668 *s = '#'; /* Don't try to parse shebang line */
2669 }
774d564b 2670#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2671#ifndef MACOS_TRADITIONAL
748a9306 2672 if (!d &&
44a8e56a 2673 *s == '#' &&
774d564b 2674 ipathend > ipath &&
3280af22 2675 !PL_minus_c &&
748a9306 2676 !instr(s,"indir") &&
3280af22 2677 instr(PL_origargv[0],"perl"))
748a9306 2678 {
9f68db38 2679 char **newargv;
9f68db38 2680
774d564b 2681 *ipathend = '\0';
2682 s = ipathend + 1;
3280af22 2683 while (s < PL_bufend && isSPACE(*s))
9f68db38 2684 s++;
3280af22
NIS
2685 if (s < PL_bufend) {
2686 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2687 newargv[1] = s;
3280af22 2688 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2689 s++;
2690 *s = '\0';
3280af22 2691 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2692 }
2693 else
3280af22 2694 newargv = PL_origargv;
774d564b 2695 newargv[0] = ipath;
b4748376 2696 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2697 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2698 }
bf4acbe4 2699#endif
748a9306 2700 if (d) {
3280af22
NIS
2701 U32 oldpdb = PL_perldb;
2702 bool oldn = PL_minus_n;
2703 bool oldp = PL_minus_p;
748a9306
LW
2704
2705 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2706 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2707
2708 if (*d++ == '-') {
a11ec5a9 2709 bool switches_done = PL_doswitches;
8cc95fdb 2710 do {
2711 if (*d == 'M' || *d == 'm') {
2712 char *m = d;
2713 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2714 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2715 (int)(d - m), m);
2716 }
2717 d = moreswitches(d);
2718 } while (d);
155aba94
GS
2719 if ((PERLDB_LINE && !oldpdb) ||
2720 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2721 /* if we have already added "LINE: while (<>) {",
2722 we must not do it again */
748a9306 2723 {
3280af22
NIS
2724 sv_setpv(PL_linestr, "");
2725 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2726 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2727 PL_last_lop = PL_last_uni = Nullch;
3280af22 2728 PL_preambled = FALSE;
84902520 2729 if (PERLDB_LINE)
3280af22 2730 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2731 goto retry;
2732 }
a11ec5a9
RGS
2733 if (PL_doswitches && !switches_done) {
2734 int argc = PL_origargc;
2735 char **argv = PL_origargv;
2736 do {
2737 argc--,argv++;
2738 } while (argc && argv[0][0] == '-' && argv[0][1]);
2739 init_argv_symbols(argc,argv);
2740 }
a0d0e21e 2741 }
79072805 2742 }
9f68db38 2743 }
79072805 2744 }
3280af22
NIS
2745 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2746 PL_bufptr = s;
2747 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2748 return yylex();
ae986130 2749 }
378cc40b 2750 goto retry;
4fdae800 2751 case '\r':
6a27c188 2752#ifdef PERL_STRICT_CR
cea2e8a9 2753 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2754 Perl_croak(aTHX_
cc507455 2755 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2756#endif
4fdae800 2757 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2758#ifdef MACOS_TRADITIONAL
2759 case '\312':
2760#endif
378cc40b
LW
2761 s++;
2762 goto retry;
378cc40b 2763 case '#':
e929a76b 2764 case '\n':
3280af22 2765 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2766 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2767 /* handle eval qq[#line 1 "foo"\n ...] */
2768 CopLINE_dec(PL_curcop);
2769 incline(s);
2770 }
3280af22 2771 d = PL_bufend;
a687059c 2772 while (s < d && *s != '\n')
378cc40b 2773 s++;
0f85fab0 2774 if (s < d)
378cc40b 2775 s++;
78c267c1 2776 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2777 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2778 incline(s);
3280af22
NIS
2779 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2780 PL_bufptr = s;
2781 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2782 return yylex();
a687059c 2783 }
378cc40b 2784 }
a687059c 2785 else {
378cc40b 2786 *s = '\0';
3280af22 2787 PL_bufend = s;
a687059c 2788 }
378cc40b
LW
2789 goto retry;
2790 case '-':
79072805 2791 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2792 I32 ftst = 0;
2793
378cc40b 2794 s++;
3280af22 2795 PL_bufptr = s;
748a9306
LW
2796 tmp = *s++;
2797
bf4acbe4 2798 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2799 s++;
2800
2801 if (strnEQ(s,"=>",2)) {
3280af22 2802 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2803 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2804 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2805 } );
748a9306
LW
2806 OPERATOR('-'); /* unary minus */
2807 }
3280af22 2808 PL_last_uni = PL_oldbufptr;
748a9306 2809 switch (tmp) {
e5edeb50
JH
2810 case 'r': ftst = OP_FTEREAD; break;
2811 case 'w': ftst = OP_FTEWRITE; break;
2812 case 'x': ftst = OP_FTEEXEC; break;
2813 case 'o': ftst = OP_FTEOWNED; break;
2814 case 'R': ftst = OP_FTRREAD; break;
2815 case 'W': ftst = OP_FTRWRITE; break;
2816 case 'X': ftst = OP_FTREXEC; break;
2817 case 'O': ftst = OP_FTROWNED; break;
2818 case 'e': ftst = OP_FTIS; break;
2819 case 'z': ftst = OP_FTZERO; break;
2820 case 's': ftst = OP_FTSIZE; break;
2821 case 'f': ftst = OP_FTFILE; break;
2822 case 'd': ftst = OP_FTDIR; break;
2823 case 'l': ftst = OP_FTLINK; break;
2824 case 'p': ftst = OP_FTPIPE; break;
2825 case 'S': ftst = OP_FTSOCK; break;
2826 case 'u': ftst = OP_FTSUID; break;
2827 case 'g': ftst = OP_FTSGID; break;
2828 case 'k': ftst = OP_FTSVTX; break;
2829 case 'b': ftst = OP_FTBLK; break;
2830 case 'c': ftst = OP_FTCHR; break;
2831 case 't': ftst = OP_FTTTY; break;
2832 case 'T': ftst = OP_FTTEXT; break;
2833 case 'B': ftst = OP_FTBINARY; break;
2834 case 'M': case 'A': case 'C':
2835 gv_fetchpv("\024",TRUE, SVt_PV);
2836 switch (tmp) {
2837 case 'M': ftst = OP_FTMTIME; break;
2838 case 'A': ftst = OP_FTATIME; break;
2839 case 'C': ftst = OP_FTCTIME; break;
2840 default: break;
2841 }
2842 break;
378cc40b 2843 default:
378cc40b
LW
2844 break;
2845 }
e5edeb50
JH
2846 if (ftst) {
2847 PL_last_lop_op = ftst;
4e553d73 2848 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2849 "### Saw file test %c\n", (int)ftst);
5f80b19c 2850 } );
e5edeb50
JH
2851 FTST(ftst);
2852 }
2853 else {
2854 /* Assume it was a minus followed by a one-letter named
2855 * subroutine call (or a -bareword), then. */
95c31fe3 2856 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2857 "### %c looked like a file test but was not\n",
2858 (int)ftst);
5f80b19c 2859 } );
e5edeb50
JH
2860 s -= 2;
2861 }
378cc40b 2862 }
a687059c
LW
2863 tmp = *s++;
2864 if (*s == tmp) {
2865 s++;
3280af22 2866 if (PL_expect == XOPERATOR)
79072805
LW
2867 TERM(POSTDEC);
2868 else
2869 OPERATOR(PREDEC);
2870 }
2871 else if (*s == '>') {
2872 s++;
2873 s = skipspace(s);
7e2040f0 2874 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2875 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2876 TOKEN(ARROW);
79072805 2877 }
748a9306
LW
2878 else if (*s == '$')
2879 OPERATOR(ARROW);
463ee0b2 2880 else
748a9306 2881 TERM(ARROW);
a687059c 2882 }
3280af22 2883 if (PL_expect == XOPERATOR)
79072805
LW
2884 Aop(OP_SUBTRACT);
2885 else {
3280af22 2886 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2887 check_uni();
79072805 2888 OPERATOR('-'); /* unary minus */
2f3197b3 2889 }
79072805 2890
378cc40b 2891 case '+':
a687059c
LW
2892 tmp = *s++;
2893 if (*s == tmp) {
378cc40b 2894 s++;
3280af22 2895 if (PL_expect == XOPERATOR)
79072805
LW
2896 TERM(POSTINC);
2897 else
2898 OPERATOR(PREINC);
378cc40b 2899 }
3280af22 2900 if (PL_expect == XOPERATOR)
79072805
LW
2901 Aop(OP_ADD);
2902 else {
3280af22 2903 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2904 check_uni();
a687059c 2905 OPERATOR('+');
2f3197b3 2906 }
a687059c 2907
378cc40b 2908 case '*':
3280af22
NIS
2909 if (PL_expect != XOPERATOR) {
2910 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2911 PL_expect = XOPERATOR;
2912 force_ident(PL_tokenbuf, '*');
2913 if (!*PL_tokenbuf)
a0d0e21e 2914 PREREF('*');
79072805 2915 TERM('*');
a687059c 2916 }
79072805
LW
2917 s++;
2918 if (*s == '*') {
a687059c 2919 s++;
79072805 2920 PWop(OP_POW);
a687059c 2921 }
79072805
LW
2922 Mop(OP_MULTIPLY);
2923
378cc40b 2924 case '%':
3280af22 2925 if (PL_expect == XOPERATOR) {
bbce6d69 2926 ++s;
2927 Mop(OP_MODULO);
a687059c 2928 }
3280af22
NIS
2929 PL_tokenbuf[0] = '%';
2930 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
2931 if (!PL_tokenbuf[1]) {
2932 if (s == PL_bufend)
bbce6d69 2933 yyerror("Final % should be \\% or %name");
2934 PREREF('%');
a687059c 2935 }
3280af22 2936 PL_pending_ident = '%';
bbce6d69 2937 TERM('%');
a687059c 2938
378cc40b 2939 case '^':
79072805 2940 s++;
a0d0e21e 2941 BOop(OP_BIT_XOR);
79072805 2942 case '[':
3280af22 2943 PL_lex_brackets++;
79072805 2944 /* FALL THROUGH */
378cc40b 2945 case '~':
378cc40b 2946 case ',':
378cc40b
LW
2947 tmp = *s++;
2948 OPERATOR(tmp);
a0d0e21e
LW
2949 case ':':
2950 if (s[1] == ':') {
2951 len = 0;
2952 goto just_a_word;
2953 }
2954 s++;
09bef843
SB
2955 switch (PL_expect) {
2956 OP *attrs;
2957 case XOPERATOR:
2958 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
2959 break;
2960 PL_bufptr = s; /* update in case we back off */
2961 goto grabattrs;
2962 case XATTRBLOCK:
2963 PL_expect = XBLOCK;
2964 goto grabattrs;
2965 case XATTRTERM:
2966 PL_expect = XTERMBLOCK;
2967 grabattrs:
2968 s = skipspace(s);
2969 attrs = Nullop;
7e2040f0 2970 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 2971 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
2972 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
2973 if (tmp < 0) tmp = -tmp;
2974 switch (tmp) {
2975 case KEY_or:
2976 case KEY_and:
2977 case KEY_for:
2978 case KEY_unless:
2979 case KEY_if:
2980 case KEY_while:
2981 case KEY_until:
2982 goto got_attrs;
2983 default:
2984 break;
2985 }
2986 }
09bef843
SB
2987 if (*d == '(') {
2988 d = scan_str(d,TRUE,TRUE);
2989 if (!d) {
09bef843
SB
2990 /* MUST advance bufptr here to avoid bogus
2991 "at end of line" context messages from yyerror().
2992 */
2993 PL_bufptr = s + len;
2994 yyerror("Unterminated attribute parameter in attribute list");
2995 if (attrs)
2996 op_free(attrs);
2997 return 0; /* EOF indicator */
2998 }
2999 }
3000 if (PL_lex_stuff) {
3001 SV *sv = newSVpvn(s, len);
3002 sv_catsv(sv, PL_lex_stuff);
3003 attrs = append_elem(OP_LIST, attrs,
3004 newSVOP(OP_CONST, 0, sv));
3005 SvREFCNT_dec(PL_lex_stuff);
3006 PL_lex_stuff = Nullsv;
3007 }
3008 else {
78f9721b
SM
3009 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3010 CvLVALUE_on(PL_compcv);
3011 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3012 CvLOCKED_on(PL_compcv);
3013 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3014 CvMETHOD_on(PL_compcv);
87ecf892 3015#ifdef USE_ITHREADS
c8a3bf85 3016 else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "unique", len))
7fb37951 3017 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
87ecf892 3018#endif
78f9721b
SM
3019 /* After we've set the flags, it could be argued that
3020 we don't need to do the attributes.pm-based setting
3021 process, and shouldn't bother appending recognized
3022 flags. To experiment with that, uncomment the
3023 following "else": */
0256094b 3024 else
78f9721b
SM
3025 attrs = append_elem(OP_LIST, attrs,
3026 newSVOP(OP_CONST, 0,
3027 newSVpvn(s, len)));
09bef843
SB
3028 }
3029 s = skipspace(d);
0120eecf 3030 if (*s == ':' && s[1] != ':')
09bef843 3031 s = skipspace(s+1);
0120eecf
GS
3032 else if (s == d)
3033 break; /* require real whitespace or :'s */
09bef843 3034 }
f9829d6b
GS
3035 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3036 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3037 char q = ((*s == '\'') ? '"' : '\'');
3038 /* If here for an expression, and parsed no attrs, back off. */
3039 if (tmp == '=' && !attrs) {
3040 s = PL_bufptr;
3041 break;
3042 }
3043 /* MUST advance bufptr here to avoid bogus "at end of line"
3044 context messages from yyerror().
3045 */
3046 PL_bufptr = s;
3047 if (!*s)
3048 yyerror("Unterminated attribute list");
3049 else
3050 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3051 q, *s, q));
3052 if (attrs)
3053 op_free(attrs);
3054 OPERATOR(':');
3055 }
f9829d6b 3056 got_attrs:
09bef843
SB
3057 if (attrs) {
3058 PL_nextval[PL_nexttoke].opval = attrs;
3059 force_next(THING);
3060 }
3061 TOKEN(COLONATTR);
3062 }
a0d0e21e 3063 OPERATOR(':');
8990e307
LW
3064 case '(':
3065 s++;
3280af22
NIS
3066 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3067 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3068 else
3280af22 3069 PL_expect = XTERM;
a0d0e21e 3070 TOKEN('(');
378cc40b 3071 case ';':
f4dd75d9 3072 CLINE;
378cc40b
LW
3073 tmp = *s++;
3074 OPERATOR(tmp);
3075 case ')':
378cc40b 3076 tmp = *s++;
16d20bd9
AD
3077 s = skipspace(s);
3078 if (*s == '{')
3079 PREBLOCK(tmp);
378cc40b 3080 TERM(tmp);
79072805
LW
3081 case ']':
3082 s++;
3280af22 3083 if (PL_lex_brackets <= 0)
d98d5fff 3084 yyerror("Unmatched right square bracket");
463ee0b2 3085 else
3280af22
NIS
3086 --PL_lex_brackets;
3087 if (PL_lex_state == LEX_INTERPNORMAL) {
3088 if (PL_lex_brackets == 0) {
a0d0e21e 3089 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3090 PL_lex_state = LEX_INTERPEND;
79072805
LW
3091 }
3092 }
4633a7c4 3093 TERM(']');
79072805
LW
3094 case '{':
3095 leftbracket:
79072805 3096 s++;
3280af22
NIS
3097 if (PL_lex_brackets > 100) {
3098 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3099 if (newlb != PL_lex_brackstack) {
8990e307 3100 SAVEFREEPV(newlb);
3280af22 3101 PL_lex_brackstack = newlb;
8990e307
LW
3102 }
3103 }
3280af22 3104 switch (PL_expect) {
a0d0e21e 3105 case XTERM:
3280af22 3106 if (PL_lex_formbrack) {
a0d0e21e
LW
3107 s--;
3108 PRETERMBLOCK(DO);
3109 }
3280af22
NIS
3110 if (PL_oldoldbufptr == PL_last_lop)
3111 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3112 else
3280af22 3113 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3114 OPERATOR(HASHBRACK);
a0d0e21e 3115 case XOPERATOR:
bf4acbe4 3116 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3117 s++;
44a8e56a 3118 d = s;
3280af22
NIS
3119 PL_tokenbuf[0] = '\0';
3120 if (d < PL_bufend && *d == '-') {
3121 PL_tokenbuf[0] = '-';
44a8e56a 3122 d++;
bf4acbe4 3123 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3124 d++;
3125 }
7e2040f0 3126 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3127 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3128 FALSE, &len);
bf4acbe4 3129 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3130 d++;
3131 if (*d == '}') {
3280af22 3132 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3133 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3134 if (minus)
3135 force_next('-');
748a9306
LW
3136 }
3137 }
3138 /* FALL THROUGH */
09bef843 3139 case XATTRBLOCK:
748a9306 3140 case XBLOCK:
3280af22
NIS
3141 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3142 PL_expect = XSTATE;
a0d0e21e 3143 break;
09bef843 3144 case XATTRTERM:
a0d0e21e 3145 case XTERMBLOCK:
3280af22
NIS
3146 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3147 PL_expect = XSTATE;
a0d0e21e
LW
3148 break;
3149 default: {
3150 char *t;
3280af22
NIS
3151 if (PL_oldoldbufptr == PL_last_lop)
3152 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3153 else
3280af22 3154 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3155 s = skipspace(s);
8452ff4b
SB
3156 if (*s == '}') {
3157 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3158 PL_expect = XTERM;
3159 /* This hack is to get the ${} in the message. */
3160 PL_bufptr = s+1;
3161 yyerror("syntax error");
3162 break;
3163 }
a0d0e21e 3164 OPERATOR(HASHBRACK);
8452ff4b 3165 }
b8a4b1be
GS
3166 /* This hack serves to disambiguate a pair of curlies
3167 * as being a block or an anon hash. Normally, expectation
3168 * determines that, but in cases where we're not in a
3169 * position to expect anything in particular (like inside
3170 * eval"") we have to resolve the ambiguity. This code
3171 * covers the case where the first term in the curlies is a
3172 * quoted string. Most other cases need to be explicitly
3173 * disambiguated by prepending a `+' before the opening
3174 * curly in order to force resolution as an anon hash.
3175 *
3176 * XXX should probably propagate the outer expectation
3177 * into eval"" to rely less on this hack, but that could
3178 * potentially break current behavior of eval"".
3179 * GSAR 97-07-21
3180 */
3181 t = s;
3182 if (*s == '\'' || *s == '"' || *s == '`') {
3183 /* common case: get past first string, handling escapes */
3280af22 3184 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3185 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3186 t++;
3187 t++;
a0d0e21e 3188 }
b8a4b1be 3189 else if (*s == 'q') {
3280af22 3190 if (++t < PL_bufend
b8a4b1be 3191 && (!isALNUM(*t)
3280af22 3192 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3193 && !isALNUM(*t))))
3194 {
b8a4b1be
GS
3195 char *tmps;
3196 char open, close, term;
3197 I32 brackets = 1;
3198
3280af22 3199 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3200 t++;
3201 term = *t;
3202 open = term;
3203 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3204 term = tmps[5];
3205 close = term;
3206 if (open == close)
3280af22
NIS
3207 for (t++; t < PL_bufend; t++) {
3208 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3209 t++;
6d07e5e9 3210 else if (*t == open)
b8a4b1be
GS
3211 break;
3212 }
3213 else
3280af22
NIS
3214 for (t++; t < PL_bufend; t++) {
3215 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3216 t++;
6d07e5e9 3217 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3218 break;
3219 else if (*t == open)
3220 brackets++;
3221 }
3222 }
3223 t++;
a0d0e21e 3224 }
7e2040f0 3225 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3226 t += UTF8SKIP(t);
7e2040f0 3227 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3228 t += UTF8SKIP(t);
a0d0e21e 3229 }
3280af22 3230 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3231 t++;
b8a4b1be
GS
3232 /* if comma follows first term, call it an anon hash */
3233 /* XXX it could be a comma expression with loop modifiers */
3280af22 3234 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3235 || (*t == '=' && t[1] == '>')))
a0d0e21e 3236 OPERATOR(HASHBRACK);
3280af22 3237 if (PL_expect == XREF)
4e4e412b 3238 PL_expect = XTERM;
a0d0e21e 3239 else {
3280af22
NIS
3240 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3241 PL_expect = XSTATE;
a0d0e21e 3242 }
8990e307 3243 }
a0d0e21e 3244 break;
463ee0b2 3245 }
57843af0 3246 yylval.ival = CopLINE(PL_curcop);
79072805 3247 if (isSPACE(*s) || *s == '#')
3280af22 3248 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3249 TOKEN('{');
378cc40b 3250 case '}':
79072805
LW
3251 rightbracket:
3252 s++;
3280af22 3253 if (PL_lex_brackets <= 0)
d98d5fff 3254 yyerror("Unmatched right curly bracket");
463ee0b2 3255 else
3280af22 3256 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3257 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3258 PL_lex_formbrack = 0;
3259 if (PL_lex_state == LEX_INTERPNORMAL) {
3260 if (PL_lex_brackets == 0) {
9059aa12
LW
3261 if (PL_expect & XFAKEBRACK) {
3262 PL_expect &= XENUMMASK;
3280af22
NIS
3263 PL_lex_state = LEX_INTERPEND;
3264 PL_bufptr = s;
cea2e8a9 3265 return yylex(); /* ignore fake brackets */
79072805 3266 }
fa83b5b6 3267 if (*s == '-' && s[1] == '>')
3280af22 3268 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3269 else if (*s != '[' && *s != '{')
3280af22 3270 PL_lex_state = LEX_INTERPEND;
79072805
LW
3271 }
3272 }
9059aa12
LW
3273 if (PL_expect & XFAKEBRACK) {
3274 PL_expect &= XENUMMASK;
3280af22 3275 PL_bufptr = s;
cea2e8a9 3276 return yylex(); /* ignore fake brackets */
748a9306 3277 }
79072805
LW
3278 force_next('}');
3279 TOKEN(';');
378cc40b
LW
3280 case '&':
3281 s++;
3282 tmp = *s++;
3283 if (tmp == '&')
a0d0e21e 3284 AOPERATOR(ANDAND);
378cc40b 3285 s--;
3280af22 3286 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3287 if (ckWARN(WARN_SEMICOLON)
3288 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3289 {
57843af0 3290 CopLINE_dec(PL_curcop);
cea2e8a9 3291 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3292 CopLINE_inc(PL_curcop);
463ee0b2 3293 }
79072805 3294 BAop(OP_BIT_AND);
463ee0b2 3295 }
79072805 3296
3280af22
NIS
3297 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3298 if (*PL_tokenbuf) {
3299 PL_expect = XOPERATOR;
3300 force_ident(PL_tokenbuf, '&');
463ee0b2 3301 }
79072805
LW
3302 else
3303 PREREF('&');
c07a80fd 3304 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3305 TERM('&');
3306
378cc40b
LW
3307 case '|':
3308 s++;
3309 tmp = *s++;
3310 if (tmp == '|')
a0d0e21e 3311 AOPERATOR(OROR);
378cc40b 3312 s--;
79072805 3313 BOop(OP_BIT_OR);
378cc40b
LW
3314 case '=':
3315 s++;
3316 tmp = *s++;
3317 if (tmp == '=')
79072805
LW
3318 Eop(OP_EQ);
3319 if (tmp == '>')
3320 OPERATOR(',');
378cc40b 3321 if (tmp == '~')
79072805 3322 PMop(OP_MATCH);
599cee73 3323 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3324 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3325 s--;
3280af22
NIS
3326 if (PL_expect == XSTATE && isALPHA(tmp) &&
3327 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3328 {
3280af22
NIS
3329 if (PL_in_eval && !PL_rsfp) {
3330 d = PL_bufend;
a5f75d66
AD
3331 while (s < d) {
3332 if (*s++ == '\n') {
3333 incline(s);
3334 if (strnEQ(s,"=cut",4)) {
3335 s = strchr(s,'\n');
3336 if (s)
3337 s++;
3338 else
3339 s = d;
3340 incline(s);
3341 goto retry;
3342 }
3343 }
3344 }
3345 goto retry;
3346 }
3280af22
NIS
3347 s = PL_bufend;
3348 PL_doextract = TRUE;
a0d0e21e
LW
3349 goto retry;
3350 }
3280af22 3351 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3352 char *t;
51882d45 3353#ifdef PERL_STRICT_CR
bf4acbe4 3354 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3355#else
bf4acbe4 3356 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3357#endif
a0d0e21e
LW
3358 if (*t == '\n' || *t == '#') {
3359 s--;
3280af22 3360 PL_expect = XBLOCK;
a0d0e21e
LW
3361 goto leftbracket;
3362 }
79072805 3363 }
a0d0e21e
LW
3364 yylval.ival = 0;
3365 OPERATOR(ASSIGNOP);
378cc40b
LW
3366 case '!':
3367 s++;
3368 tmp = *s++;
3369 if (tmp == '=')
79072805 3370 Eop(OP_NE);
378cc40b 3371 if (tmp == '~')
79072805 3372 PMop(OP_NOT);
378cc40b
LW
3373 s--;
3374 OPERATOR('!');
3375 case '<':
3280af22 3376 if (PL_expect != XOPERATOR) {
93a17b20 3377 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3378 check_uni();
79072805
LW
3379 if (s[1] == '<')
3380 s = scan_heredoc(s);
3381 else
3382 s = scan_inputsymbol(s);
3383 TERM(sublex_start());
378cc40b
LW
3384 }
3385 s++;
3386 tmp = *s++;
3387 if (tmp == '<')
79072805 3388 SHop(OP_LEFT_SHIFT);
395c3793
LW
3389 if (tmp == '=') {
3390 tmp = *s++;
3391 if (tmp == '>')
79072805 3392 Eop(OP_NCMP);
395c3793 3393 s--;
79072805 3394 Rop(OP_LE);
395c3793 3395 }
378cc40b 3396 s--;
79072805 3397 Rop(OP_LT);
378cc40b
LW
3398 case '>':
3399 s++;
3400 tmp = *s++;
3401 if (tmp == '>')
79072805 3402 SHop(OP_RIGHT_SHIFT);
378cc40b 3403 if (tmp == '=')
79072805 3404 Rop(OP_GE);
378cc40b 3405 s--;
79072805 3406 Rop(OP_GT);
378cc40b
LW
3407
3408 case '$':
bbce6d69 3409 CLINE;
3410
3280af22
NIS
3411 if (PL_expect == XOPERATOR) {
3412 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3413 PL_expect = XTERM;
a0d0e21e 3414 depcom();
bbce6d69 3415 return ','; /* grandfather non-comma-format format */
a0d0e21e 3416 }
8990e307 3417 }
a0d0e21e 3418
7e2040f0 3419 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3420 PL_tokenbuf[0] = '@';
376b8730
SM
3421 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3422 sizeof PL_tokenbuf - 1, FALSE);
3423 if (PL_expect == XOPERATOR)
3424 no_op("Array length", s);
3280af22 3425 if (!PL_tokenbuf[1])
a0d0e21e 3426 PREREF(DOLSHARP);
3280af22
NIS
3427 PL_expect = XOPERATOR;
3428 PL_pending_ident = '#';
463ee0b2 3429 TOKEN(DOLSHARP);
79072805 3430 }
bbce6d69 3431
3280af22 3432 PL_tokenbuf[0] = '$';
376b8730
SM
3433 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3434 sizeof PL_tokenbuf - 1, FALSE);
3435 if (PL_expect == XOPERATOR)
3436 no_op("Scalar", s);
3280af22
NIS
3437 if (!PL_tokenbuf[1]) {
3438 if (s == PL_bufend)
bbce6d69 3439 yyerror("Final $ should be \\$ or $name");
3440 PREREF('$');
8990e307 3441 }
a0d0e21e 3442
bbce6d69 3443 /* This kludge not intended to be bulletproof. */
3280af22 3444 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3445 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3446 newSViv(PL_compiling.cop_arybase));
bbce6d69 3447 yylval.opval->op_private = OPpCONST_ARYBASE;
3448 TERM(THING);
3449 }
3450
ff68c719 3451 d = s;
69d2bceb 3452 tmp = (I32)*s;
3280af22 3453 if (PL_lex_state == LEX_NORMAL)
ff68c719 3454 s = skipspace(s);
3455
3280af22 3456 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3457 char *t;
3458 if (*s == '[') {
3280af22 3459 PL_tokenbuf[0] = '@';
599cee73 3460 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3461 for(t = s + 1;
7e2040f0 3462 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3463 t++) ;
a0d0e21e 3464 if (*t++ == ',') {
3280af22
NIS
3465 PL_bufptr = skipspace(PL_bufptr);
3466 while (t < PL_bufend && *t != ']')
bbce6d69 3467 t++;
cea2e8a9 3468 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3469 "Multidimensional syntax %.*s not supported",
3470 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3471 }
3472 }
bbce6d69 3473 }
3474 else if (*s == '{') {
3280af22 3475 PL_tokenbuf[0] = '%';
599cee73 3476 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3477 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3478 {
3280af22 3479 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3480 STRLEN len;
3481 for (t++; isSPACE(*t); t++) ;
7e2040f0 3482 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3483 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3484 for (; isSPACE(*t); t++) ;
864dbfa3 3485 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3486 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3487 "You need to quote \"%s\"", tmpbuf);
748a9306 3488 }
93a17b20
LW
3489 }
3490 }
2f3197b3 3491 }
bbce6d69 3492
3280af22 3493 PL_expect = XOPERATOR;
69d2bceb 3494 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3495 bool islop = (PL_last_lop == PL_oldoldbufptr);
3496 if (!islop || PL_last_lop_op == OP_GREPSTART)
3497 PL_expect = XOPERATOR;
bbce6d69 3498 else if (strchr("$@\"'`q", *s))
3280af22 3499 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3500 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3501 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3502 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3503 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3504 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3505 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3506 /* binary operators exclude handle interpretations */
3507 switch (tmp) {
3508 case -KEY_x:
3509 case -KEY_eq:
3510 case -KEY_ne:
3511 case -KEY_gt:
3512 case -KEY_lt:
3513 case -KEY_ge:
3514 case -KEY_le:
3515 case -KEY_cmp:
3516 break;
3517 default:
3280af22 3518 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3519 break;
3520 }
3521 }
68dc0745 3522 else {
3523 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3524 if (gv && GvCVu(gv))
3280af22 3525 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3526 }
93a17b20 3527 }
bbce6d69 3528 else if (isDIGIT(*s))
3280af22 3529 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3530 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3531 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3532 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3533 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3534 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3535 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3536 }
3280af22 3537 PL_pending_ident = '$';
79072805 3538 TOKEN('$');
378cc40b
LW
3539
3540 case '@':
3280af22 3541 if (PL_expect == XOPERATOR)
bbce6d69 3542 no_op("Array", s);
3280af22
NIS
3543 PL_tokenbuf[0] = '@';
3544 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3545 if (!PL_tokenbuf[1]) {
3546 if (s == PL_bufend)
bbce6d69 3547 yyerror("Final @ should be \\@ or @name");
3548 PREREF('@');
3549 }
3280af22 3550 if (PL_lex_state == LEX_NORMAL)
ff68c719 3551 s = skipspace(s);
3280af22 3552 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3553 if (*s == '{')
3280af22 3554 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3555
3556 /* Warn about @ where they meant $. */
599cee73 3557 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3558 if (*s == '[' || *s == '{') {
3559 char *t = s + 1;
7e2040f0 3560 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3561 t++;
3562 if (*t == '}' || *t == ']') {
3563 t++;
3280af22 3564 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3565 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3566 "Scalar value %.*s better written as $%.*s",
3280af22 3567 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3568 }
93a17b20
LW
3569 }
3570 }
463ee0b2 3571 }
3280af22 3572 PL_pending_ident = '@';
79072805 3573 TERM('@');
378cc40b
LW
3574
3575 case '/': /* may either be division or pattern */
3576 case '?': /* may either be conditional or pattern */
3280af22 3577 if (PL_expect != XOPERATOR) {
c277df42 3578 /* Disable warning on "study /blah/" */
4e553d73
NIS
3579 if (PL_oldoldbufptr == PL_last_uni
3580 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3581 || memNE(PL_last_uni, "study", 5)
3582 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3583 check_uni();
8782bef2 3584 s = scan_pat(s,OP_MATCH);
79072805 3585 TERM(sublex_start());
378cc40b
LW
3586 }
3587 tmp = *s++;
a687059c 3588 if (tmp == '/')
79072805 3589 Mop(OP_DIVIDE);
378cc40b
LW
3590 OPERATOR(tmp);
3591
3592 case '.':
51882d45
GS
3593 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3594#ifdef PERL_STRICT_CR
3595 && s[1] == '\n'
3596#else
3597 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3598#endif
3599 && (s == PL_linestart || s[-1] == '\n') )
3600 {
3280af22
NIS
3601 PL_lex_formbrack = 0;
3602 PL_expect = XSTATE;
79072805
LW
3603 goto rightbracket;
3604 }
3280af22 3605 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3606 tmp = *s++;
a687059c
LW
3607 if (*s == tmp) {
3608 s++;
2f3197b3
LW
3609 if (*s == tmp) {
3610 s++;
79072805 3611 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3612 }
3613 else
79072805 3614 yylval.ival = 0;
378cc40b 3615 OPERATOR(DOTDOT);
a687059c 3616 }
3280af22 3617 if (PL_expect != XOPERATOR)
2f3197b3 3618 check_uni();
79072805 3619 Aop(OP_CONCAT);
378cc40b
LW
3620 }
3621 /* FALL THROUGH */
3622 case '0': case '1': case '2': case '3': case '4':
3623 case '5': case '6': case '7': case '8': case '9':
b73d6f50 3624 s = scan_num(s, &yylval);
4e553d73 3625 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3626 "### Saw number in '%s'\n", s);
5f80b19c 3627 } );
3280af22 3628 if (PL_expect == XOPERATOR)
8990e307 3629 no_op("Number",s);
79072805
LW
3630 TERM(THING);
3631
3632 case '\'':
09bef843 3633 s = scan_str(s,FALSE,FALSE);
4e553d73 3634 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3635 "### Saw string before '%s'\n", s);
5f80b19c 3636 } );
3280af22
NIS
3637 if (PL_expect == XOPERATOR) {
3638 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3639 PL_expect = XTERM;
a0d0e21e
LW
3640 depcom();
3641 return ','; /* grandfather non-comma-format format */
3642 }
463ee0b2 3643 else
8990e307 3644 no_op("String",s);
463ee0b2 3645 }
79072805 3646 if (!s)
85e6fe83 3647 missingterm((char*)0);
79072805
LW
3648 yylval.ival = OP_CONST;
3649 TERM(sublex_start());
3650
3651 case '"':
09bef843 3652 s = scan_str(s,FALSE,FALSE);
4e553d73 3653 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3654 "### Saw string before '%s'\n", s);
5f80b19c 3655 } );
3280af22
NIS
3656 if (PL_expect == XOPERATOR) {
3657 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3658 PL_expect = XTERM;
a0d0e21e
LW
3659 depcom();
3660 return ','; /* grandfather non-comma-format format */
3661 }
463ee0b2 3662 else
8990e307 3663 no_op("String",s);
463ee0b2 3664 }
79072805 3665 if (!s)
85e6fe83 3666 missingterm((char*)0);
4633a7c4 3667 yylval.ival = OP_CONST;
3280af22 3668 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 3669 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4633a7c4
LW
3670 yylval.ival = OP_STRINGIFY;
3671 break;
3672 }
3673 }
79072805
LW
3674 TERM(sublex_start());
3675
3676 case '`':
09bef843 3677 s = scan_str(s,FALSE,FALSE);
4e553d73 3678 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3679 "### Saw backtick string before '%s'\n", s);
5f80b19c 3680 } );
3280af22 3681 if (PL_expect == XOPERATOR)
8990e307 3682 no_op("Backticks",s);
79072805 3683 if (!s)
85e6fe83 3684 missingterm((char*)0);
79072805
LW
3685 yylval.ival = OP_BACKTICK;
3686 set_csh();
3687 TERM(sublex_start());
3688
3689 case '\\':
3690 s++;
599cee73 3691 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3692 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3693 *s, *s);
3280af22 3694 if (PL_expect == XOPERATOR)
8990e307 3695 no_op("Backslash",s);
79072805
LW
3696 OPERATOR(REFGEN);
3697
a7cb1f99 3698 case 'v':
e526c9e6 3699 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
a7cb1f99
GS
3700 char *start = s;
3701 start++;
3702 start++;
dd629d5b 3703 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
3704 start++;
3705 if (*start == '.' && isDIGIT(start[1])) {
b73d6f50 3706 s = scan_num(s, &yylval);
a7cb1f99
GS
3707 TERM(THING);
3708 }
e526c9e6 3709 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
62ec34bd 3710 else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF || PL_expect == XSTATE)) {
e526c9e6
GS
3711 char c = *start;
3712 GV *gv;
3713 *start = '\0';
3714 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3715 *start = c;
3716 if (!gv) {
b73d6f50 3717 s = scan_num(s, &yylval);
e526c9e6
GS
3718 TERM(THING);
3719 }
3720 }
a7cb1f99
GS
3721 }
3722 goto keylookup;
79072805 3723 case 'x':
3280af22 3724 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3725 s++;
3726 Mop(OP_REPEAT);
2f3197b3 3727 }
79072805
LW
3728 goto keylookup;
3729
378cc40b 3730 case '_':
79072805
LW
3731 case 'a': case 'A':
3732 case 'b': case 'B':
3733 case 'c': case 'C':
3734 case 'd': case 'D':
3735 case 'e': case 'E':
3736 case 'f': case 'F':
3737 case 'g': case 'G':
3738 case 'h': case 'H':
3739 case 'i': case 'I':
3740 case 'j': case 'J':
3741 case 'k': case 'K':
3742 case 'l': case 'L':
3743 case 'm': case 'M':
3744 case 'n': case 'N':
3745 case 'o': case 'O':
3746 case 'p': case 'P':
3747 case 'q': case 'Q':
3748 case 'r': case 'R':
3749 case 's': case 'S':
3750 case 't': case 'T':
3751 case 'u': case 'U':
a7cb1f99 3752 case 'V':
79072805
LW
3753 case 'w': case 'W':
3754 case 'X':
3755 case 'y': case 'Y':
3756 case 'z': case 'Z':
3757
49dc05e3 3758 keylookup: {
161b471a
NIS
3759 gv = Nullgv;
3760 gvp = 0;
49dc05e3 3761
3280af22
NIS
3762 PL_bufptr = s;
3763 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3764
3765 /* Some keywords can be followed by any delimiter, including ':' */
155aba94
GS
3766 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3767 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3768 (PL_tokenbuf[0] == 'q' &&
3769 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 3770
3771 /* x::* is just a word, unless x is "CORE" */
3280af22 3772 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3773 goto just_a_word;
3774
3643fb5f 3775 d = s;
3280af22 3776 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3777 d++; /* no comments skipped here, or s### is misparsed */
3778
3779 /* Is this a label? */
3280af22
NIS
3780 if (!tmp && PL_expect == XSTATE
3781 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3782 s = d + 1;
3280af22 3783 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3784 CLINE;
3785 TOKEN(LABEL);
3643fb5f
CS
3786 }
3787
3788 /* Check for keywords */
3280af22 3789 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3790
3791 /* Is this a word before a => operator? */
1c3923b3 3792 if (*d == '=' && d[1] == '>') {
748a9306 3793 CLINE;
3280af22 3794 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306 3795 yylval.opval->op_private = OPpCONST_BARE;
0064a8a9 3796 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7948272d 3797 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
748a9306
LW
3798 TERM(WORD);
3799 }
3800
a0d0e21e 3801 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3802 GV *ogv = Nullgv; /* override (winner) */
3803 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3804 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3805 CV *cv;
3280af22 3806 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3807 (cv = GvCVu(gv)))
3808 {
3809 if (GvIMPORTED_CV(gv))
3810 ogv = gv;
3811 else if (! CvMETHOD(cv))
3812 hgv = gv;
3813 }
3814 if (!ogv &&
3280af22
NIS
3815 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3816 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3817 GvCVu(gv) && GvIMPORTED_CV(gv))
3818 {
3819 ogv = gv;
3820 }
3821 }
3822 if (ogv) {
3823 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3824 }
3825 else if (gv && !gvp
3826 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3827 && GvCVu(gv)
3280af22 3828 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3829 {
3830 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3831 }
56f7f34b
CS
3832 else { /* no override */
3833 tmp = -tmp;
ac206dc8
RGS
3834 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
3835 Perl_warner(aTHX_ WARN_MISC,
3836 "dump() better written as CORE::dump()");
3837 }
56f7f34b
CS
3838 gv = Nullgv;
3839 gvp = 0;
4944e2f7
GS
3840 if (ckWARN(WARN_AMBIGUOUS) && hgv
3841 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3842 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3843 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3844 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3845 }
a0d0e21e
LW
3846 }
3847
3848 reserved_word:
3849 switch (tmp) {
79072805
LW
3850
3851 default: /* not a keyword */
93a17b20 3852 just_a_word: {
96e4d5b1 3853 SV *sv;
ce29ac45 3854 int pkgname = 0;
3280af22 3855 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3856
3857 /* Get the rest if it looks like a package qualifier */
3858
155aba94 3859 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 3860 STRLEN morelen;
3280af22 3861 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3862 TRUE, &morelen);
3863 if (!morelen)
cea2e8a9 3864 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3865 *s == '\'' ? "'" : "::");
c3e0f903 3866 len += morelen;
ce29ac45 3867 pkgname = 1;
a0d0e21e 3868 }
8990e307 3869
3280af22
NIS
3870 if (PL_expect == XOPERATOR) {
3871 if (PL_bufptr == PL_linestart) {
57843af0 3872 CopLINE_dec(PL_curcop);
cea2e8a9 3873 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3874 CopLINE_inc(PL_curcop);
463ee0b2
LW
3875 }
3876 else
54310121 3877 no_op("Bareword",s);
463ee0b2 3878 }
8990e307 3879
c3e0f903
GS
3880 /* Look for a subroutine with this name in current package,
3881 unless name is "Foo::", in which case Foo is a bearword
3882 (and a package name). */
3883
3884 if (len > 2 &&
3280af22 3885 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3886 {
e476b1b5 3887 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
4e553d73 3888 Perl_warner(aTHX_ WARN_BAREWORD,
599cee73 3889 "Bareword \"%s\" refers to nonexistent package",
3280af22 3890 PL_tokenbuf);
c3e0f903 3891 len -= 2;
3280af22 3892 PL_tokenbuf[len] = '\0';
c3e0f903
GS
3893 gv = Nullgv;
3894 gvp = 0;
3895 }
3896 else {
3897 len = 0;
3898 if (!gv)
3280af22 3899 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
c3e0f903
GS
3900 }
3901
3902 /* if we saw a global override before, get the right name */
8990e307 3903
49dc05e3 3904 if (gvp) {
79cb57f6 3905 sv = newSVpvn("CORE::GLOBAL::",14);
3280af22 3906 sv_catpv(sv,PL_tokenbuf);
49dc05e3
GS
3907 }
3908 else
3280af22 3909 sv = newSVpv(PL_tokenbuf,0);
8990e307 3910
a0d0e21e
LW
3911 /* Presume this is going to be a bareword of some sort. */
3912
3913 CLINE;
49dc05e3 3914 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
a0d0e21e
LW
3915 yylval.opval->op_private = OPpCONST_BARE;
3916
c3e0f903
GS
3917 /* And if "Foo::", then that's what it certainly is. */
3918
3919 if (len)
3920 goto safe_bareword;
3921
8990e307
LW
3922 /* See if it's the indirect object for a list operator. */
3923
3280af22
NIS
3924 if (PL_oldoldbufptr &&
3925 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
3926 (PL_oldoldbufptr == PL_last_lop
3927 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 3928 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
3929 (PL_expect == XREF ||
3930 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 3931 {
748a9306
LW
3932 bool immediate_paren = *s == '(';
3933
a0d0e21e
LW
3934 /* (Now we can afford to cross potential line boundary.) */
3935 s = skipspace(s);
3936
3937 /* Two barewords in a row may indicate method call. */
3938
7e2040f0 3939 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
a0d0e21e
LW
3940 return tmp;
3941
3942 /* If not a declared subroutine, it's an indirect object. */
3943 /* (But it's an indir obj regardless for sort.) */
3944
7948272d 3945 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f0670693 3946 ((!gv || !GvCVu(gv)) &&
a9ef352a 3947 (PL_last_lop_op != OP_MAPSTART &&
f0670693 3948 PL_last_lop_op != OP_GREPSTART))))
a9ef352a 3949 {
3280af22 3950 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 3951 goto bareword;
93a17b20
LW
3952 }
3953 }
8990e307 3954
3280af22 3955 PL_expect = XOPERATOR;
8990e307 3956 s = skipspace(s);
1c3923b3
GS
3957
3958 /* Is this a word before a => operator? */
ce29ac45 3959 if (*s == '=' && s[1] == '>' && !pkgname) {
1c3923b3
GS
3960 CLINE;
3961 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 3962 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7948272d 3963 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
1c3923b3
GS
3964 TERM(WORD);
3965 }
3966
3967 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 3968 if (*s == '(') {
79072805 3969 CLINE;
96e4d5b1 3970 if (gv && GvCVu(gv)) {
bf4acbe4 3971 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
7a52d87a 3972 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
96e4d5b1 3973 s = d + 1;
3974 goto its_constant;
3975 }
3976 }
3280af22
NIS
3977 PL_nextval[PL_nexttoke].opval = yylval.opval;
3978 PL_expect = XOPERATOR;
93a17b20 3979 force_next(WORD);
c07a80fd 3980 yylval.ival = 0;
463ee0b2 3981 TOKEN('&');
79072805 3982 }
93a17b20 3983
a0d0e21e 3984 /* If followed by var or block, call it a method (unless sub) */
8990e307 3985
8ebc5c01 3986 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
3280af22
NIS
3987 PL_last_lop = PL_oldbufptr;
3988 PL_last_lop_op = OP_METHOD;
93a17b20 3989 PREBLOCK(METHOD);
463ee0b2
LW
3990 }
3991
8990e307
LW
3992 /* If followed by a bareword, see if it looks like indir obj. */
3993
7e2040f0 3994 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp = intuit_method(s,gv)))
a0d0e21e 3995 return tmp;
93a17b20 3996
8990e307
LW
3997 /* Not a method, so call it a subroutine (if defined) */
3998
8ebc5c01 3999 if (gv && GvCVu(gv)) {
46fc3d4c 4000 CV* cv;
0453d815
PM
4001 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4002 Perl_warner(aTHX_ WARN_AMBIGUOUS,
4003 "Ambiguous use of -%s resolved as -&%s()",
3280af22 4004 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 4005 /* Check for a constant sub */
46fc3d4c 4006 cv = GvCV(gv);
96e4d5b1 4007 if ((sv = cv_const_sv(cv))) {
4008 its_constant:
4009 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4010 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4011 yylval.opval->op_private = 0;
4012 TOKEN(WORD);
89bfa8cd 4013 }
4014
a5f75d66
AD
4015 /* Resolve to GV now. */
4016 op_free(yylval.opval);
4017 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
9675f7ac 4018 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 4019 PL_last_lop = PL_oldbufptr;
bf848113 4020 PL_last_lop_op = OP_ENTERSUB;
4633a7c4
LW
4021 /* Is there a prototype? */
4022 if (SvPOK(cv)) {
4023 STRLEN len;
7a52d87a 4024 char *proto = SvPV((SV*)cv, len);
4633a7c4
LW
4025 if (!len)
4026 TERM(FUNC0SUB);
7a52d87a 4027 if (strEQ(proto, "$"))
4633a7c4 4028 OPERATOR(UNIOPSUB);
7a52d87a 4029 if (*proto == '&' && *s == '{') {
3280af22 4030 sv_setpv(PL_subname,"__ANON__");
4633a7c4
LW
4031 PREBLOCK(LSTOPSUB);
4032 }
a9ef352a 4033 }
3280af22
NIS
4034 PL_nextval[PL_nexttoke].opval = yylval.opval;
4035 PL_expect = XTERM;
8990e307
LW
4036 force_next(WORD);
4037 TOKEN(NOAMP);
4038 }
748a9306 4039
8990e307
LW
4040 /* Call it a bare word */
4041
5603f27d
GS
4042 if (PL_hints & HINT_STRICT_SUBS)
4043 yylval.opval->op_private |= OPpCONST_STRICT;
4044 else {
4045 bareword:
4046 if (ckWARN(WARN_RESERVED)) {
4047 if (lastchar != '-') {
4048 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
9bbb1ada 4049 if (!*d && strNE(PL_tokenbuf,"main"))
cea2e8a9 4050 Perl_warner(aTHX_ WARN_RESERVED, PL_warn_reserved,
5603f27d
GS
4051 PL_tokenbuf);
4052 }
748a9306
LW
4053 }
4054 }
c3e0f903
GS
4055
4056 safe_bareword:
f248d071 4057 if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
0453d815
PM
4058 Perl_warner(aTHX_ WARN_AMBIGUOUS,
4059 "Operator or semicolon missing before %c%s",
3280af22 4060 lastchar, PL_tokenbuf);
0453d815
PM
4061 Perl_warner(aTHX_ WARN_AMBIGUOUS,
4062 "Ambiguous use of %c resolved as operator %c",
748a9306
LW
4063 lastchar, lastchar);
4064 }
93a17b20 4065 TOKEN(WORD);
79072805 4066 }
79072805 4067
68dc0745 4068 case KEY___FILE__:
46fc3d4c 4069 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 4070 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 4071 TERM(THING);
4072
79072805 4073 case KEY___LINE__:
cf2093f6 4074 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 4075 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 4076 TERM(THING);
68dc0745 4077
4078 case KEY___PACKAGE__:
4079 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22
NIS
4080 (PL_curstash
4081 ? newSVsv(PL_curstname)
4082 : &PL_sv_undef));
79072805 4083 TERM(THING);
79072805 4084
e50aee73 4085 case KEY___DATA__:
79072805
LW
4086 case KEY___END__: {
4087 GV *gv;
79072805
LW
4088
4089 /*SUPPRESS 560*/
3280af22 4090 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
e50aee73 4091 char *pname = "main";
3280af22
NIS
4092 if (PL_tokenbuf[2] == 'D')
4093 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
cea2e8a9 4094 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
a5f75d66 4095 GvMULTI_on(gv);
79072805 4096 if (!GvIO(gv))
a0d0e21e 4097 GvIOp(gv) = newIO();
3280af22 4098 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
4099#if defined(HAS_FCNTL) && defined(F_SETFD)
4100 {
3280af22 4101 int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
4102 fcntl(fd,F_SETFD,fd >= 3);
4103 }
79072805 4104#endif
fd049845 4105 /* Mark this internal pseudo-handle as clean */
4106 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
3280af22 4107 if (PL_preprocess)
50952442 4108 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
3280af22 4109 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 4110 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 4111 else
50952442 4112 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
4113#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4114 /* if the script was opened in binmode, we need to revert
53129d29 4115 * it to text mode for compatibility; but only iff it has CRs
c39cd008 4116 * XXX this is a questionable hack at best. */
53129d29
GS
4117 if (PL_bufend-PL_bufptr > 2
4118 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
4119 {
4120 Off_t loc = 0;
50952442 4121 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
4122 loc = PerlIO_tell(PL_rsfp);
4123 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4124 }
2986a63f
JH
4125#ifdef NETWARE
4126 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4127#else
c39cd008 4128 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 4129#endif /* NETWARE */
1143fce0
JH
4130#ifdef PERLIO_IS_STDIO /* really? */
4131# if defined(__BORLANDC__)
cb359b41
JH
4132 /* XXX see note in do_binmode() */
4133 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
4134# endif
4135#endif
c39cd008
GS
4136 if (loc > 0)
4137 PerlIO_seek(PL_rsfp, loc, 0);
4138 }
4139 }
4140#endif
7948272d 4141#ifdef PERLIO_LAYERS
0064a8a9 4142 if (UTF && !IN_BYTES)
7948272d
NIS
4143 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4144#endif
3280af22 4145 PL_rsfp = Nullfp;
79072805
LW
4146 }
4147 goto fake_eof;
e929a76b 4148 }
de3bb511 4149
8990e307 4150 case KEY_AUTOLOAD:
ed6116ce 4151 case KEY_DESTROY:
79072805 4152 case KEY_BEGIN:
7d30b5c4 4153 case KEY_CHECK:
7d07dbc2 4154 case KEY_INIT:
7d30b5c4 4155 case KEY_END:
3280af22
NIS
4156 if (PL_expect == XSTATE) {
4157 s = PL_bufptr;
93a17b20 4158 goto really_sub;
79072805
LW
4159 }
4160 goto just_a_word;
4161
a0d0e21e
LW
4162 case KEY_CORE:
4163 if (*s == ':' && s[1] == ':') {
4164 s += 2;
748a9306 4165 d = s;
3280af22 4166 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6798c92b
GS
4167 if (!(tmp = keyword(PL_tokenbuf, len)))
4168 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
4169 if (tmp < 0)
4170 tmp = -tmp;
4171 goto reserved_word;
4172 }
4173 goto just_a_word;
4174
463ee0b2
LW
4175 case KEY_abs:
4176 UNI(OP_ABS);
4177
79072805
LW
4178 case KEY_alarm:
4179 UNI(OP_ALARM);
4180
4181 case KEY_accept:
a0d0e21e 4182 LOP(OP_ACCEPT,XTERM);
79072805 4183
463ee0b2
LW
4184 case KEY_and:
4185 OPERATOR(ANDOP);
4186
79072805 4187 case KEY_atan2:
a0d0e21e 4188 LOP(OP_ATAN2,XTERM);
85e6fe83 4189
79072805 4190 case KEY_bind:
a0d0e21e 4191 LOP(OP_BIND,XTERM);
79072805
LW
4192
4193 case KEY_binmode:
1c1fc3ea 4194 LOP(OP_BINMODE,XTERM);
79072805
LW
4195
4196 case KEY_bless:
a0d0e21e 4197 LOP(OP_BLESS,XTERM);
79072805
LW
4198
4199 case KEY_chop:
4200 UNI(OP_CHOP);
4201
4202 case KEY_continue:
4203 PREBLOCK(CONTINUE);
4204
4205 case KEY_chdir:
85e6fe83 4206 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
79072805
LW
4207 UNI(OP_CHDIR);
4208
4209 case KEY_close:
4210 UNI(OP_CLOSE);
4211
4212 case KEY_closedir:
4213 UNI(OP_CLOSEDIR);
4214
4215 case KEY_cmp:
4216 Eop(OP_SCMP);
4217
4218 case KEY_caller:
4219 UNI(OP_CALLER);
4220
4221 case KEY_crypt:
4222#ifdef FCRYPT
f4c556ac
GS
4223 if (!PL_cryptseen) {
4224 PL_cryptseen = TRUE;
de3bb511 4225 init_des();
f4c556ac 4226 }
a687059c 4227#endif
a0d0e21e 4228 LOP(OP_CRYPT,XTERM);
79072805
LW
4229
4230 case KEY_chmod:
a0d0e21e 4231 LOP(OP_CHMOD,XTERM);
79072805
LW
4232
4233 case KEY_chown:
a0d0e21e 4234 LOP(OP_CHOWN,XTERM);
79072805
LW
4235
4236 case KEY_connect:
a0d0e21e 4237 LOP(OP_CONNECT,XTERM);
79072805 4238
463ee0b2
LW
4239 case KEY_chr:
4240 UNI(OP_CHR);
4241
79072805
LW
4242 case KEY_cos:
4243 UNI(OP_COS);
4244
4245 case KEY_chroot:
4246 UNI(OP_CHROOT);
4247
4248 case KEY_do:
4249 s = skipspace(s);
4250 if (*s == '{')
a0d0e21e 4251 PRETERMBLOCK(DO);
79072805 4252 if (*s != '\'')
89c5585f 4253 s = force_word(s,WORD,TRUE,TRUE,FALSE);
378cc40b 4254 OPERATOR(DO);
79072805
LW
4255
4256 case KEY_die:
3280af22 4257 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 4258 LOP(OP_DIE,XTERM);
79072805
LW
4259
4260 case KEY_defined:
4261 UNI(OP_DEFINED);
4262
4263 case KEY_delete:
a0d0e21e 4264 UNI(OP_DELETE);
79072805
LW
4265
4266 case KEY_dbmopen:
a0d0e21e
LW
4267 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4268 LOP(OP_DBMOPEN,XTERM);
79072805
LW
4269
4270 case KEY_dbmclose:
4271 UNI(OP_DBMCLOSE);
4272
4273 case KEY_dump:
a0d0e21e 4274 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4275 LOOPX(OP_DUMP);
4276
4277 case KEY_else:
4278 PREBLOCK(ELSE);
4279
4280 case KEY_elsif:
57843af0 4281 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4282 OPERATOR(ELSIF);
4283
4284 case KEY_eq:
4285 Eop(OP_SEQ);
4286
a0d0e21e
LW
4287 case KEY_exists:
4288 UNI(OP_EXISTS);
4e553d73 4289
79072805
LW
4290 case KEY_exit:
4291 UNI(OP_EXIT);
4292
4293 case KEY_eval:
79072805 4294 s = skipspace(s);
3280af22 4295 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
463ee0b2 4296 UNIBRACK(OP_ENTEREVAL);
79072805
LW
4297
4298 case KEY_eof:
4299 UNI(OP_EOF);
4300
4301 case KEY_exp:
4302 UNI(OP_EXP);
4303
4304 case KEY_each:
4305 UNI(OP_EACH);
4306
4307 case KEY_exec:
4308 set_csh();
a0d0e21e 4309 LOP(OP_EXEC,XREF);
79072805
LW
4310
4311 case KEY_endhostent:
4312 FUN0(OP_EHOSTENT);
4313
4314 case KEY_endnetent:
4315 FUN0(OP_ENETENT);
4316
4317 case KEY_endservent:
4318 FUN0(OP_ESERVENT);
4319
4320 case KEY_endprotoent:
4321 FUN0(OP_EPROTOENT);
4322
4323 case KEY_endpwent:
4324 FUN0(OP_EPWENT);
4325
4326 case KEY_endgrent:
4327 FUN0(OP_EGRENT);
4328
4329 case KEY_for:
4330 case KEY_foreach:
57843af0 4331 yylval.ival = CopLINE(PL_curcop);
55497cff 4332 s = skipspace(s);
7e2040f0 4333 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 4334 char *p = s;
3280af22 4335 if ((PL_bufend - p) >= 3 &&
55497cff 4336 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4337 p += 2;
77ca0c92
LW
4338 else if ((PL_bufend - p) >= 4 &&
4339 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4340 p += 3;
55497cff 4341 p = skipspace(p);
7e2040f0 4342 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
4343 p = scan_ident(p, PL_bufend,
4344 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4345 p = skipspace(p);
4346 }
4347 if (*p != '$')
cea2e8a9 4348 Perl_croak(aTHX_ "Missing $ on loop variable");
55497cff 4349 }
79072805
LW
4350 OPERATOR(FOR);
4351
4352 case KEY_formline:
a0d0e21e 4353 LOP(OP_FORMLINE,XTERM);
79072805
LW
4354
4355 case KEY_fork:
4356 FUN0(OP_FORK);
4357
4358 case KEY_fcntl:
a0d0e21e 4359 LOP(OP_FCNTL,XTERM);
79072805
LW
4360
4361 case KEY_fileno:
4362 UNI(OP_FILENO);
4363
4364 case KEY_flock:
a0d0e21e 4365 LOP(OP_FLOCK,XTERM);
79072805
LW
4366
4367 case KEY_gt:
4368 Rop(OP_SGT);
4369
4370 case KEY_ge:
4371 Rop(OP_SGE);
4372
4373 case KEY_grep:
2c38e13d 4374 LOP(OP_GREPSTART, XREF);
79072805
LW
4375
4376 case KEY_goto:
a0d0e21e 4377 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4378 LOOPX(OP_GOTO);
4379
4380 case KEY_gmtime:
4381 UNI(OP_GMTIME);
4382
4383 case KEY_getc:
4384 UNI(OP_GETC);
4385
4386 case KEY_getppid:
4387 FUN0(OP_GETPPID);
4388
4389 case KEY_getpgrp:
4390 UNI(OP_GETPGRP);
4391
4392 case KEY_getpriority:
a0d0e21e 4393 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
4394
4395 case KEY_getprotobyname:
4396 UNI(OP_GPBYNAME);
4397
4398 case KEY_getprotobynumber:
a0d0e21e 4399 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
4400
4401 case KEY_getprotoent:
4402 FUN0(OP_GPROTOENT);
4403
4404 case KEY_getpwent:
4405 FUN0(OP_GPWENT);
4406
4407 case KEY_getpwnam:
ff68c719 4408 UNI(OP_GPWNAM);
79072805
LW
4409
4410 case KEY_getpwuid:
ff68c719 4411 UNI(OP_GPWUID);
79072805
LW
4412
4413 case KEY_getpeername:
4414 UNI(OP_GETPEERNAME);
4415
4416 case KEY_gethostbyname:
4417 UNI(OP_GHBYNAME);
4418
4419 case KEY_gethostbyaddr:
a0d0e21e 4420 LOP(OP_GHBYADDR,XTERM);
79072805
LW
4421
4422 case KEY_gethostent:
4423 FUN0(OP_GHOSTENT);
4424
4425 case KEY_getnetbyname:
4426 UNI(OP_GNBYNAME);
4427
4428 case KEY_getnetbyaddr:
a0d0e21e 4429 LOP(OP_GNBYADDR,XTERM);
79072805
LW
4430
4431 case KEY_getnetent:
4432 FUN0(OP_GNETENT);
4433
4434 case KEY_getservbyname:
a0d0e21e 4435 LOP(OP_GSBYNAME,XTERM);
79072805
LW
4436
4437 case KEY_getservbyport:
a0d0e21e 4438 LOP(OP_GSBYPORT,XTERM);
79072805
LW
4439
4440 case KEY_getservent:
4441 FUN0(OP_GSERVENT);
4442
4443 case KEY_getsockname:
4444 UNI(OP_GETSOCKNAME);
4445
4446 case KEY_getsockopt:
a0d0e21e 4447 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
4448
4449 case KEY_getgrent:
4450 FUN0(OP_GGRENT);
4451
4452 case KEY_getgrnam:
ff68c719 4453 UNI(OP_GGRNAM);
79072805
LW
4454
4455 case KEY_getgrgid:
ff68c719 4456 UNI(OP_GGRGID);
79072805
LW
4457
4458 case KEY_getlogin:
4459 FUN0(OP_GETLOGIN);
4460
93a17b20 4461 case KEY_glob:
a0d0e21e
LW
4462 set_csh();
4463 LOP(OP_GLOB,XTERM);
93a17b20 4464
79072805
LW
4465 case KEY_hex:
4466 UNI(OP_HEX);
4467
4468 case KEY_if:
57843af0 4469 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4470 OPERATOR(IF);
4471
4472 case KEY_index:
a0d0e21e 4473 LOP(OP_INDEX,XTERM);
79072805
LW
4474
4475 case KEY_int:
4476 UNI(OP_INT);
4477
4478 case KEY_ioctl:
a0d0e21e 4479 LOP(OP_IOCTL,XTERM);
79072805
LW
4480
4481 case KEY_join:
a0d0e21e 4482 LOP(OP_JOIN,XTERM);
79072805
LW
4483
4484 case KEY_keys:
4485 UNI(OP_KEYS);
4486
4487 case KEY_kill:
a0d0e21e 4488 LOP(OP_KILL,XTERM);
79072805
LW
4489
4490 case KEY_last:
a0d0e21e 4491 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 4492 LOOPX(OP_LAST);
4e553d73 4493
79072805
LW
4494 case KEY_lc:
4495 UNI(OP_LC);
4496
4497 case KEY_lcfirst:
4498 UNI(OP_LCFIRST);
4499
4500 case KEY_local:
09bef843 4501 yylval.ival = 0;
79072805
LW
4502 OPERATOR(LOCAL);
4503
4504 case KEY_length:
4505 UNI(OP_LENGTH);
4506
4507 case KEY_lt:
4508 Rop(OP_SLT);
4509
4510 case KEY_le:
4511 Rop(OP_SLE);
4512
4513 case KEY_localtime:
4514 UNI(OP_LOCALTIME);
4515
4516 case KEY_log:
4517 UNI(OP_LOG);
4518
4519 case KEY_link:
a0d0e21e 4520 LOP(OP_LINK,XTERM);
79072805
LW
4521
4522 case KEY_listen:
a0d0e21e 4523 LOP(OP_LISTEN,XTERM);
79072805 4524
c0329465
MB
4525 case KEY_lock:
4526 UNI(OP_LOCK);
4527
79072805
LW
4528 case KEY_lstat:
4529 UNI(OP_LSTAT);
4530
4531 case KEY_m:
8782bef2 4532 s = scan_pat(s,OP_MATCH);
79072805
LW
4533 TERM(sublex_start());
4534
a0d0e21e 4535 case KEY_map:
2c38e13d 4536 LOP(OP_MAPSTART, XREF);
4e4e412b 4537
79072805 4538 case KEY_mkdir:
a0d0e21e 4539 LOP(OP_MKDIR,XTERM);
79072805
LW
4540
4541 case KEY_msgctl:
a0d0e21e 4542 LOP(OP_MSGCTL,XTERM);
79072805
LW
4543
4544 case KEY_msgget:
a0d0e21e 4545 LOP(OP_MSGGET,XTERM);
79072805
LW
4546
4547 case KEY_msgrcv:
a0d0e21e 4548 LOP(OP_MSGRCV,XTERM);
79072805
LW
4549
4550 case KEY_msgsnd:
a0d0e21e 4551 LOP(OP_MSGSND,XTERM);
79072805 4552
77ca0c92 4553 case KEY_our:
93a17b20 4554 case KEY_my:
77ca0c92 4555 PL_in_my = tmp;
c750a3ec 4556 s = skipspace(s);
7e2040f0 4557 if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 4558 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
4559 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4560 goto really_sub;
def3634b 4561 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 4562 if (!PL_in_my_stash) {
c750a3ec 4563 char tmpbuf[1024];
3280af22
NIS
4564 PL_bufptr = s;
4565 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
4566 yyerror(tmpbuf);
4567 }
4568 }
09bef843 4569 yylval.ival = 1;
55497cff 4570 OPERATOR(MY);
93a17b20 4571
79072805 4572 case KEY_next:
a0d0e21e 4573 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4574 LOOPX(OP_NEXT);
4575
4576 case KEY_ne:
4577 Eop(OP_SNE);
4578
a0d0e21e 4579 case KEY_no:
3280af22 4580 if (PL_expect != XSTATE)
a0d0e21e
LW
4581 yyerror("\"no\" not allowed in expression");
4582 s = force_word(s,WORD,FALSE,TRUE,FALSE);
e759cc13 4583 s = force_version(s, FALSE);
a0d0e21e
LW
4584 yylval.ival = 0;
4585 OPERATOR(USE);
4586
4587 case KEY_not:
2d2e263d
LW
4588 if (*s == '(' || (s = skipspace(s), *s == '('))
4589 FUN1(OP_NOT);
4590 else
4591 OPERATOR(NOTOP);
a0d0e21e 4592
79072805 4593 case KEY_open:
93a17b20 4594 s = skipspace(s);
7e2040f0 4595 if (isIDFIRST_lazy_if(s,UTF)) {
93a17b20 4596 char *t;
7e2040f0 4597 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
93a17b20 4598 t = skipspace(d);
e476b1b5
GS
4599 if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE))
4600 Perl_warner(aTHX_ WARN_PRECEDENCE,
0453d815
PM
4601 "Precedence problem: open %.*s should be open(%.*s)",
4602 d-s,s, d-s,s);
93a17b20 4603 }
a0d0e21e 4604 LOP(OP_OPEN,XTERM);
79072805 4605
463ee0b2 4606 case KEY_or:
a0d0e21e 4607 yylval.ival = OP_OR;
463ee0b2
LW
4608 OPERATOR(OROP);
4609
79072805
LW
4610 case KEY_ord:
4611 UNI(OP_ORD);
4612
4613 case KEY_oct:
4614 UNI(OP_OCT);
4615
4616 case KEY_opendir:
a0d0e21e 4617 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
4618
4619 case KEY_print:
3280af22 4620 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4621 LOP(OP_PRINT,XREF);
79072805
LW
4622
4623 case KEY_printf:
3280af22 4624 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4625 LOP(OP_PRTF,XREF);
79072805 4626
c07a80fd 4627 case KEY_prototype:
4628 UNI(OP_PROTOTYPE);
4629
79072805 4630 case KEY_push:
a0d0e21e 4631 LOP(OP_PUSH,XTERM);
79072805
LW
4632
4633 case KEY_pop:
4634 UNI(OP_POP);
4635
a0d0e21e
LW
4636 case KEY_pos:
4637 UNI(OP_POS);
4e553d73 4638
79072805 4639 case KEY_pack:
a0d0e21e 4640 LOP(OP_PACK,XTERM);
79072805
LW
4641
4642 case KEY_package:
a0d0e21e 4643 s = force_word(s,WORD,FALSE,TRUE,FALSE);
79072805
LW
4644 OPERATOR(PACKAGE);
4645
4646 case KEY_pipe:
a0d0e21e 4647 LOP(OP_PIPE_OP,XTERM);
79072805
LW
4648
4649 case KEY_q:
09bef843 4650 s = scan_str(s,FALSE,FALSE);
79072805 4651 if (!s)
85e6fe83 4652 missingterm((char*)0);
79072805
LW
4653 yylval.ival = OP_CONST;
4654 TERM(sublex_start());
4655
a0d0e21e
LW
4656 case KEY_quotemeta:
4657 UNI(OP_QUOTEMETA);
4658
8990e307 4659 case KEY_qw:
09bef843 4660 s = scan_str(s,FALSE,FALSE);
8990e307 4661 if (!s)
85e6fe83 4662 missingterm((char*)0);
8127e0e3
GS
4663 force_next(')');
4664 if (SvCUR(PL_lex_stuff)) {
4665 OP *words = Nullop;
4666 int warned = 0;
3280af22 4667 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 4668 while (len) {
7948272d 4669 SV *sv;
8127e0e3
GS
4670 for (; isSPACE(*d) && len; --len, ++d) ;
4671 if (len) {
4672 char *b = d;
e476b1b5 4673 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
4674 for (; !isSPACE(*d) && len; --len, ++d) {
4675 if (*d == ',') {
e476b1b5 4676 Perl_warner(aTHX_ WARN_QW,
8127e0e3
GS
4677 "Possible attempt to separate words with commas");
4678 ++warned;
4679 }
4680 else if (*d == '#') {
e476b1b5 4681 Perl_warner(aTHX_ WARN_QW,
8127e0e3
GS
4682 "Possible attempt to put comments in qw() list");
4683 ++warned;
4684 }
4685 }
4686 }
4687 else {
4688 for (; !isSPACE(*d) && len; --len, ++d) ;
4689 }
7948272d
NIS
4690 sv = newSVpvn(b, d-b);
4691 if (DO_UTF8(PL_lex_stuff))
4692 SvUTF8_on(sv);
8127e0e3 4693 words = append_elem(OP_LIST, words,
7948272d 4694 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 4695 }
4696 }
8127e0e3
GS
4697 if (words) {
4698 PL_nextval[PL_nexttoke].opval = words;
4699 force_next(THING);
4700 }
55497cff 4701 }
37fd879b 4702 if (PL_lex_stuff) {
8127e0e3 4703 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
4704 PL_lex_stuff = Nullsv;
4705 }
3280af22 4706 PL_expect = XTERM;
8127e0e3 4707 TOKEN('(');
8990e307 4708
79072805 4709 case KEY_qq:
09bef843 4710 s = scan_str(s,FALSE,FALSE);
79072805 4711 if (!s)
85e6fe83 4712 missingterm((char*)0);
a0d0e21e 4713 yylval.ival = OP_STRINGIFY;
3280af22
NIS
4714 if (SvIVX(PL_lex_stuff) == '\'')
4715 SvIVX(PL_lex_stuff) = 0; /* qq'$foo' should intepolate */
79072805
LW
4716 TERM(sublex_start());
4717
8782bef2
GB
4718 case KEY_qr:
4719 s = scan_pat(s,OP_QR);
4720 TERM(sublex_start());
4721
79072805 4722 case KEY_qx:
09bef843 4723 s = scan_str(s,FALSE,FALSE);
79072805 4724 if (!s)
85e6fe83 4725 missingterm((char*)0);
79072805
LW
4726 yylval.ival = OP_BACKTICK;
4727 set_csh();
4728 TERM(sublex_start());
4729
4730 case KEY_return:
4731 OLDLOP(OP_RETURN);
4732
4733 case KEY_require:
a7cb1f99 4734 s = skipspace(s);
e759cc13
RGS
4735 if (isDIGIT(*s)) {
4736 s = force_version(s, FALSE);
a7cb1f99 4737 }
e759cc13
RGS
4738 else if (*s != 'v' || !isDIGIT(s[1])
4739 || (s = force_version(s, TRUE), *s == 'v'))
4740 {
a7cb1f99
GS
4741 *PL_tokenbuf = '\0';
4742 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 4743 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
a7cb1f99
GS
4744 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
4745 else if (*s == '<')
4746 yyerror("<> should be quotes");
4747 }
463ee0b2 4748 UNI(OP_REQUIRE);
79072805
LW
4749
4750 case KEY_reset:
4751 UNI(OP_RESET);
4752
4753 case KEY_redo:
a0d0e21e 4754 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4755 LOOPX(OP_REDO);
4756
4757 case KEY_rename:
a0d0e21e 4758 LOP(OP_RENAME,XTERM);
79072805
LW
4759
4760 case KEY_rand:
4761 UNI(OP_RAND);
4762
4763 case KEY_rmdir:
4764 UNI(OP_RMDIR);
4765
4766 case KEY_rindex:
a0d0e21e 4767 LOP(OP_RINDEX,XTERM);
79072805
LW
4768
4769 case KEY_read:
a0d0e21e 4770 LOP(OP_READ,XTERM);
79072805
LW
4771
4772 case KEY_readdir:
4773 UNI(OP_READDIR);
4774
93a17b20
LW
4775 case KEY_readline:
4776 set_csh();
4777 UNI(OP_READLINE);
4778
4779 case KEY_readpipe:
4780 set_csh();
4781 UNI(OP_BACKTICK);
4782
79072805
LW
4783 case KEY_rewinddir:
4784 UNI(OP_REWINDDIR);
4785
4786 case KEY_recv:
a0d0e21e 4787 LOP(OP_RECV,XTERM);
79072805
LW
4788
4789 case KEY_reverse:
a0d0e21e 4790 LOP(OP_REVERSE,XTERM);
79072805
LW
4791
4792 case KEY_readlink:
4793 UNI(OP_READLINK);
4794
4795 case KEY_ref:
4796 UNI(OP_REF);
4797
4798 case KEY_s:
4799 s = scan_subst(s);
4800 if (yylval.opval)
4801 TERM(sublex_start());
4802 else
4803 TOKEN(1); /* force error */
4804
a0d0e21e
LW
4805 case KEY_chomp:
4806 UNI(OP_CHOMP);
4e553d73 4807
79072805
LW
4808 case KEY_scalar:
4809 UNI(OP_SCALAR);
4810
4811 case KEY_select:
a0d0e21e 4812 LOP(OP_SELECT,XTERM);
79072805
LW
4813
4814 case KEY_seek:
a0d0e21e 4815 LOP(OP_SEEK,XTERM);
79072805
LW
4816
4817 case KEY_semctl:
a0d0e21e 4818 LOP(OP_SEMCTL,XTERM);
79072805
LW
4819
4820 case KEY_semget:
a0d0e21e 4821 LOP(OP_SEMGET,XTERM);
79072805
LW
4822
4823 case KEY_semop:
a0d0e21e 4824 LOP(OP_SEMOP,XTERM);
79072805
LW
4825
4826 case KEY_send:
a0d0e21e 4827 LOP(OP_SEND,XTERM);
79072805
LW
4828
4829 case KEY_setpgrp:
a0d0e21e 4830 LOP(OP_SETPGRP,XTERM);
79072805
LW
4831
4832 case KEY_setpriority:
a0d0e21e 4833 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
4834
4835 case KEY_sethostent:
ff68c719 4836 UNI(OP_SHOSTENT);
79072805
LW
4837
4838 case KEY_setnetent:
ff68c719 4839 UNI(OP_SNETENT);
79072805
LW
4840
4841 case KEY_setservent:
ff68c719 4842 UNI(OP_SSERVENT);
79072805
LW
4843
4844 case KEY_setprotoent:
ff68c719 4845 UNI(OP_SPROTOENT);
79072805
LW
4846
4847 case KEY_setpwent:
4848 FUN0(OP_SPWENT);
4849
4850 case KEY_setgrent:
4851 FUN0(OP_SGRENT);
4852
4853 case KEY_seekdir:
a0d0e21e 4854 LOP(OP_SEEKDIR,XTERM);
79072805
LW
4855
4856 case KEY_setsockopt:
a0d0e21e 4857 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
4858
4859 case KEY_shift:
4860 UNI(OP_SHIFT);
4861
4862 case KEY_shmctl:
a0d0e21e 4863 LOP(OP_SHMCTL,XTERM);
79072805
LW
4864
4865 case KEY_shmget:
a0d0e21e 4866 LOP(OP_SHMGET,XTERM);
79072805
LW
4867
4868 case KEY_shmread:
a0d0e21e 4869 LOP(OP_SHMREAD,XTERM);
79072805
LW
4870
4871 case KEY_shmwrite:
a0d0e21e 4872 LOP(OP_SHMWRITE,XTERM);
79072805
LW
4873
4874 case KEY_shutdown:
a0d0e21e 4875 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
4876
4877 case KEY_sin:
4878 UNI(OP_SIN);
4879
4880 case KEY_sleep:
4881 UNI(OP_SLEEP);
4882
4883 case KEY_socket:
a0d0e21e 4884 LOP(OP_SOCKET,XTERM);
79072805
LW
4885
4886 case KEY_socketpair:
a0d0e21e 4887 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
4888
4889 case KEY_sort:
3280af22 4890 checkcomma(s,PL_tokenbuf,"subroutine name");
79072805
LW
4891 s = skipspace(s);
4892 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 4893 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 4894 PL_expect = XTERM;
15f0808c 4895 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 4896 LOP(OP_SORT,XREF);
79072805
LW
4897
4898 case KEY_split:
a0d0e21e 4899 LOP(OP_SPLIT,XTERM);
79072805
LW
4900
4901 case KEY_sprintf:
a0d0e21e 4902 LOP(OP_SPRINTF,XTERM);
79072805
LW
4903
4904 case KEY_splice:
a0d0e21e 4905 LOP(OP_SPLICE,XTERM);
79072805
LW
4906
4907 case KEY_sqrt:
4908 UNI(OP_SQRT);
4909
4910 case KEY_srand:
4911 UNI(OP_SRAND);
4912
4913 case KEY_stat:
4914 UNI(OP_STAT);
4915
4916 case KEY_study:
79072805
LW
4917 UNI(OP_STUDY);
4918
4919 case KEY_substr:
a0d0e21e 4920 LOP(OP_SUBSTR,XTERM);
79072805
LW
4921
4922 case KEY_format:
4923 case KEY_sub:
93a17b20 4924 really_sub:
09bef843 4925 {
3280af22 4926 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 4927 SSize_t tboffset = 0;
09bef843 4928 expectation attrful;
d731386a 4929 bool have_name, have_proto, bad_proto;
09bef843
SB
4930 int key = tmp;
4931
4932 s = skipspace(s);
4933
7e2040f0 4934 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
4935 (*s == ':' && s[1] == ':'))
4936 {
4937 PL_expect = XBLOCK;
4938 attrful = XATTRBLOCK;
b1b65b59
JH
4939 /* remember buffer pos'n for later force_word */
4940 tboffset = s - PL_oldbufptr;
09bef843
SB
4941 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4942 if (strchr(tmpbuf, ':'))
4943 sv_setpv(PL_subname, tmpbuf);
4944 else {
4945 sv_setsv(PL_subname,PL_curstname);
4946 sv_catpvn(PL_subname,"::",2);
4947 sv_catpvn(PL_subname,tmpbuf,len);
4948 }
4949 s = skipspace(d);
4950 have_name = TRUE;
4951 }
463ee0b2 4952 else {
09bef843
SB
4953 if (key == KEY_my)
4954 Perl_croak(aTHX_ "Missing name in \"my sub\"");
4955 PL_expect = XTERMBLOCK;
4956 attrful = XATTRTERM;
4957 sv_setpv(PL_subname,"?");
4958 have_name = FALSE;
463ee0b2 4959 }
4633a7c4 4960
09bef843
SB
4961 if (key == KEY_format) {
4962 if (*s == '=')
4963 PL_lex_formbrack = PL_lex_brackets + 1;
4964 if (have_name)
b1b65b59
JH
4965 (void) force_word(PL_oldbufptr + tboffset, WORD,
4966 FALSE, TRUE, TRUE);
09bef843
SB
4967 OPERATOR(FORMAT);
4968 }
79072805 4969
09bef843
SB
4970 /* Look for a prototype */
4971 if (*s == '(') {
4972 char *p;
4973
4974 s = scan_str(s,FALSE,FALSE);
37fd879b 4975 if (!s)
09bef843 4976 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 4977 /* strip spaces and check for bad characters */
09bef843
SB
4978 d = SvPVX(PL_lex_stuff);
4979 tmp = 0;
d731386a 4980 bad_proto = FALSE;
09bef843 4981 for (p = d; *p; ++p) {
d37a9538 4982 if (!isSPACE(*p)) {
09bef843 4983 d[tmp++] = *p;
d37a9538
ST
4984 if (!strchr("$@%*;[]&\\", *p))
4985 bad_proto = TRUE;
4986 }
09bef843
SB
4987 }
4988 d[tmp] = '\0';
420cdfc1
ST
4989 if (bad_proto && ckWARN(WARN_SYNTAX))
4990 Perl_warner(aTHX_ WARN_SYNTAX,
4991 "Illegal character in prototype for %s : %s",
4992 SvPVX(PL_subname), d);
09bef843
SB
4993 SvCUR(PL_lex_stuff) = tmp;
4994 have_proto = TRUE;
68dc0745 4995
09bef843 4996 s = skipspace(s);
4633a7c4 4997 }
09bef843
SB
4998 else
4999 have_proto = FALSE;
5000
5001 if (*s == ':' && s[1] != ':')
5002 PL_expect = attrful;
5003
5004 if (have_proto) {
b1b65b59
JH
5005 PL_nextval[PL_nexttoke].opval =
5006 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
09bef843
SB
5007 PL_lex_stuff = Nullsv;
5008 force_next(THING);
68dc0745 5009 }
09bef843
SB
5010 if (!have_name) {
5011 sv_setpv(PL_subname,"__ANON__");
5012 TOKEN(ANONSUB);
4633a7c4 5013 }
b1b65b59
JH
5014 (void) force_word(PL_oldbufptr + tboffset, WORD,
5015 FALSE, TRUE, TRUE);
09bef843
SB
5016 if (key == KEY_my)
5017 TOKEN(MYSUB);
5018 TOKEN(SUB);
4633a7c4 5019 }
79072805
LW
5020
5021 case KEY_system:
5022 set_csh();
a0d0e21e 5023 LOP(OP_SYSTEM,XREF);
79072805
LW
5024
5025 case KEY_symlink:
a0d0e21e 5026 LOP(OP_SYMLINK,XTERM);
79072805
LW
5027
5028 case KEY_syscall:
a0d0e21e 5029 LOP(OP_SYSCALL,XTERM);
79072805 5030
c07a80fd 5031 case KEY_sysopen:
5032 LOP(OP_SYSOPEN,XTERM);
5033
137443ea 5034 case KEY_sysseek:
5035 LOP(OP_SYSSEEK,XTERM);
5036
79072805 5037 case KEY_sysread:
a0d0e21e 5038 LOP(OP_SYSREAD,XTERM);
79072805
LW
5039
5040 case KEY_syswrite:
a0d0e21e 5041 LOP(OP_SYSWRITE,XTERM);
79072805
LW
5042
5043 case KEY_tr:
5044 s = scan_trans(s);
5045 TERM(sublex_start());
5046
5047 case KEY_tell:
5048 UNI(OP_TELL);
5049
5050 case KEY_telldir:
5051 UNI(OP_TELLDIR);
5052
463ee0b2 5053 case KEY_tie:
a0d0e21e 5054 LOP(OP_TIE,XTERM);
463ee0b2 5055
c07a80fd 5056 case KEY_tied:
5057 UNI(OP_TIED);
5058
79072805
LW
5059 case KEY_time:
5060 FUN0(OP_TIME);
5061
5062 case KEY_times:
5063 FUN0(OP_TMS);
5064
5065 case KEY_truncate:
a0d0e21e 5066 LOP(OP_TRUNCATE,XTERM);
79072805
LW
5067
5068 case KEY_uc:
5069 UNI(OP_UC);
5070
5071 case KEY_ucfirst:
5072 UNI(OP_UCFIRST);
5073
463ee0b2
LW
5074 case KEY_untie:
5075 UNI(OP_UNTIE);
5076
79072805 5077 case KEY_until:
57843af0 5078 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5079 OPERATOR(UNTIL);
5080
5081 case KEY_unless:
57843af0 5082 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5083 OPERATOR(UNLESS);
5084
5085 case KEY_unlink:
a0d0e21e 5086 LOP(OP_UNLINK,XTERM);
79072805
LW
5087
5088 case KEY_undef:
5089 UNI(OP_UNDEF);
5090
5091 case KEY_unpack:
a0d0e21e 5092 LOP(OP_UNPACK,XTERM);
79072805
LW
5093
5094 case KEY_utime:
a0d0e21e 5095 LOP(OP_UTIME,XTERM);
79072805
LW
5096
5097 case KEY_umask:
5098 UNI(OP_UMASK);
5099
5100 case KEY_unshift:
a0d0e21e
LW
5101 LOP(OP_UNSHIFT,XTERM);
5102
5103 case KEY_use:
3280af22 5104 if (PL_expect != XSTATE)
a0d0e21e 5105 yyerror("\"use\" not allowed in expression");
89bfa8cd 5106 s = skipspace(s);
a7cb1f99 5107 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
e759cc13 5108 s = force_version(s, TRUE);
a7cb1f99 5109 if (*s == ';' || (s = skipspace(s), *s == ';')) {
3280af22 5110 PL_nextval[PL_nexttoke].opval = Nullop;
89bfa8cd 5111 force_next(WORD);
5112 }
e759cc13
RGS
5113 else if (*s == 'v') {
5114 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5115 s = force_version(s, FALSE);
5116 }
89bfa8cd 5117 }
5118 else {
5119 s = force_word(s,WORD,FALSE,TRUE,FALSE);
e759cc13 5120 s = force_version(s, FALSE);
89bfa8cd 5121 }
a0d0e21e
LW
5122 yylval.ival = 1;
5123 OPERATOR(USE);
79072805
LW
5124
5125 case KEY_values:
5126 UNI(OP_VALUES);
5127
5128 case KEY_vec:
a0d0e21e 5129 LOP(OP_VEC,XTERM);
79072805
LW
5130
5131 case KEY_while:
57843af0 5132 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5133 OPERATOR(WHILE);
5134
5135 case KEY_warn:
3280af22 5136 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 5137 LOP(OP_WARN,XTERM);
79072805
LW
5138
5139 case KEY_wait:
5140 FUN0(OP_WAIT);
5141
5142 case KEY_waitpid:
a0d0e21e 5143 LOP(OP_WAITPID,XTERM);
79072805
LW
5144
5145 case KEY_wantarray:
5146 FUN0(OP_WANTARRAY);
5147
5148 case KEY_write:
9d116dd7
JH
5149#ifdef EBCDIC
5150 {
df3728a2
JH
5151 char ctl_l[2];
5152 ctl_l[0] = toCTRL('L');
5153 ctl_l[1] = '\0';
9d116dd7
JH
5154 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5155 }
5156#else
5157 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5158#endif
79072805
LW
5159 UNI(OP_ENTERWRITE);
5160
5161 case KEY_x:
3280af22 5162 if (PL_expect == XOPERATOR)
79072805
LW
5163 Mop(OP_REPEAT);
5164 check_uni();
5165 goto just_a_word;
5166
a0d0e21e
LW
5167 case KEY_xor:
5168 yylval.ival = OP_XOR;
5169 OPERATOR(OROP);
5170
79072805
LW
5171 case KEY_y:
5172 s = scan_trans(s);
5173 TERM(sublex_start());
5174 }
49dc05e3 5175 }}
79072805 5176}
bf4acbe4
GS
5177#ifdef __SC__
5178#pragma segment Main
5179#endif
79072805 5180
e930465f
JH
5181static int
5182S_pending_ident(pTHX)
8eceec63
SC
5183{
5184 register char *d;
5185 register I32 tmp;
5186 /* pit holds the identifier we read and pending_ident is reset */
5187 char pit = PL_pending_ident;
5188 PL_pending_ident = 0;
5189
5190 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5191 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
5192
5193 /* if we're in a my(), we can't allow dynamics here.
5194 $foo'bar has already been turned into $foo::bar, so
5195 just check for colons.
5196
5197 if it's a legal name, the OP is a PADANY.
5198 */
5199 if (PL_in_my) {
5200 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5201 if (strchr(PL_tokenbuf,':'))
5202 yyerror(Perl_form(aTHX_ "No package name allowed for "
5203 "variable %s in \"our\"",
5204 PL_tokenbuf));
5205 tmp = pad_allocmy(PL_tokenbuf);
5206 }
5207 else {
5208 if (strchr(PL_tokenbuf,':'))
5209 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5210
5211 yylval.opval = newOP(OP_PADANY, 0);
5212 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
5213 return PRIVATEREF;
5214 }
5215 }
5216
5217 /*
5218 build the ops for accesses to a my() variable.
5219
5220 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5221 then used in a comparison. This catches most, but not
5222 all cases. For instance, it catches
5223 sort { my($a); $a <=> $b }
5224 but not
5225 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5226 (although why you'd do that is anyone's guess).
5227 */
5228
5229 if (!strchr(PL_tokenbuf,':')) {
4d1ff10f 5230#ifdef USE_5005THREADS
8eceec63
SC
5231 /* Check for single character per-thread SVs */
5232 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
5233 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
5234 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
5235 {
5236 yylval.opval = newOP(OP_THREADSV, 0);
5237 yylval.opval->op_targ = tmp;
5238 return PRIVATEREF;
5239 }
4d1ff10f 5240#endif /* USE_5005THREADS */
8eceec63
SC
5241 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
5242 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
5243 /* might be an "our" variable" */
5244 if (SvFLAGS(namesv) & SVpad_OUR) {
5245 /* build ops for a bareword */
5246 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
5247 sv_catpvn(sym, "::", 2);
5248 sv_catpv(sym, PL_tokenbuf+1);
5249 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5250 yylval.opval->op_private = OPpCONST_ENTERED;
5251 gv_fetchpv(SvPVX(sym),
5252 (PL_in_eval
5253 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 5254 : GV_ADDMULTI
8eceec63
SC
5255 ),
5256 ((PL_tokenbuf[0] == '$') ? SVt_PV
5257 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5258 : SVt_PVHV));
5259 return WORD;
5260 }
5261
5262 /* if it's a sort block and they're naming $a or $b */
5263 if (PL_last_lop_op == OP_SORT &&
5264 PL_tokenbuf[0] == '$' &&
5265 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5266 && !PL_tokenbuf[2])
5267 {
5268 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5269 d < PL_bufend && *d != '\n';
5270 d++)
5271 {
5272 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5273 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5274 PL_tokenbuf);
5275 }
5276 }
5277 }
5278
5279 yylval.opval = newOP(OP_PADANY, 0);
5280 yylval.opval->op_targ = tmp;
5281 return PRIVATEREF;
5282 }
5283 }
5284
5285 /*
5286 Whine if they've said @foo in a doublequoted string,
5287 and @foo isn't a variable we can find in the symbol
5288 table.
5289 */
5290 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5291 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
5292 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5293 && ckWARN(WARN_AMBIGUOUS))
5294 {
5295 /* Downgraded from fatal to warning 20000522 mjd */
5296 Perl_warner(aTHX_ WARN_AMBIGUOUS,
5297 "Possible unintended interpolation of %s in string",
5298 PL_tokenbuf);
5299 }
5300 }
5301
5302 /* build ops for a bareword */
5303 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5304 yylval.opval->op_private = OPpCONST_ENTERED;
5305 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5306 ((PL_tokenbuf[0] == '$') ? SVt_PV
5307 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5308 : SVt_PVHV));
5309 return WORD;
5310}
5311
79072805 5312I32
864dbfa3 5313Perl_keyword(pTHX_ register char *d, I32 len)
79072805
LW
5314{
5315 switch (*d) {
5316 case '_':
5317 if (d[1] == '_') {
a0d0e21e 5318 if (strEQ(d,"__FILE__")) return -KEY___FILE__;
68dc0745 5319 if (strEQ(d,"__LINE__")) return -KEY___LINE__;
5320 if (strEQ(d,"__PACKAGE__")) return -KEY___PACKAGE__;
e50aee73 5321 if (strEQ(d,"__DATA__")) return KEY___DATA__;
79072805
LW
5322 if (strEQ(d,"__END__")) return KEY___END__;
5323 }
5324 break;
8990e307
LW
5325 case 'A':
5326 if (strEQ(d,"AUTOLOAD")) return KEY_AUTOLOAD;
5327 break;
79072805 5328 case 'a':
463ee0b2
LW
5329 switch (len) {
5330 case 3:
a0d0e21e
LW
5331 if (strEQ(d,"and")) return -KEY_and;
5332 if (strEQ(d,"abs")) return -KEY_abs;
85e6fe83 5333 break;
463ee0b2 5334 case 5:
a0d0e21e
LW
5335 if (strEQ(d,"alarm")) return -KEY_alarm;
5336 if (strEQ(d,"atan2")) return -KEY_atan2;
463ee0b2
LW
5337 break;
5338 case 6:
a0d0e21e 5339 if (strEQ(d,"accept")) return -KEY_accept;
463ee0b2
LW
5340 break;
5341 }
79072805
LW
5342 break;
5343 case 'B':
5344 if (strEQ(d,"BEGIN")) return KEY_BEGIN;
93a17b20 5345 break;
79072805 5346 case 'b':
a0d0e21e
LW
5347 if (strEQ(d,"bless")) return -KEY_bless;
5348 if (strEQ(d,"bind")) return -KEY_bind;
5349 if (strEQ(d,"binmode")) return -KEY_binmode;
5350 break;
5351 case 'C':
5352 if (strEQ(d,"CORE")) return -KEY_CORE;
7d30b5c4 5353 if (strEQ(d,"CHECK")) return KEY_CHECK;
79072805
LW
5354 break;
5355 case 'c':
5356 switch (len) {
5357 case 3:
a0d0e21e
LW
5358 if (strEQ(d,"cmp")) return -KEY_cmp;
5359 if (strEQ(d,"chr")) return -KEY_chr;
5360 if (strEQ(d,"cos")) return -KEY_cos;
79072805
LW
5361 break;
5362 case 4:
77bc9082 5363 if (strEQ(d,"chop")) return -KEY_chop;
79072805
LW
5364 break;
5365 case 5:
a0d0e21e
LW
5366 if (strEQ(d,"close")) return -KEY_close;
5367 if (strEQ(d,"chdir")) return -KEY_chdir;
77bc9082 5368 if (strEQ(d,"chomp")) return -KEY_chomp;
a0d0e21e
LW
5369 if (strEQ(d,"chmod")) return -KEY_chmod;
5370 if (strEQ(d,"chown")) return -KEY_chown;
5371 if (strEQ(d,"crypt")) return -KEY_crypt;
79072805
LW
5372 break;
5373 case 6:
a0d0e21e
LW
5374 if (strEQ(d,"chroot")) return -KEY_chroot;
5375 if (strEQ(d,"caller")) return -KEY_caller;
79072805
LW
5376 break;
5377 case 7:
a0d0e21e 5378 if (strEQ(d,"connect")) return -KEY_connect;
79072805
LW
5379 break;
5380 case 8:
a0d0e21e
LW
5381 if (strEQ(d,"closedir")) return -KEY_closedir;
5382 if (strEQ(d,"continue")) return -KEY_continue;
79072805
LW
5383 break;
5384 }
5385 break;
ed6116ce
LW
5386 case 'D':
5387 if (strEQ(d,"DESTROY")) return KEY_DESTROY;
5388 break;
79072805
LW
5389 case 'd':
5390 switch (len) {
5391 case 2:
5392 if (strEQ(d,"do")) return KEY_do;
5393 break;
5394 case 3:
a0d0e21e 5395 if (strEQ(d,"die")) return -KEY_die;
79072805
LW
5396 break;
5397 case 4:
a0d0e21e 5398 if (strEQ(d,"dump")) return -KEY_dump;
79072805
LW
5399 break;
5400 case 6:
5401 if (strEQ(d,"delete")) return KEY_delete;
5402 break;
5403 case 7:
5404 if (strEQ(d,"defined")) return KEY_defined;
a0d0e21e 5405 if (strEQ(d,"dbmopen")) return -KEY_dbmopen;
79072805
LW
5406 break;
5407 case 8:
a0d0e21e 5408 if (strEQ(d,"dbmclose")) return -KEY_dbmclose;
79072805
LW
5409 break;
5410 }
5411 break;
5412 case 'E':
79072805
LW
5413 if (strEQ(d,"END")) return KEY_END;
5414 break;
5415 case 'e':
5416 switch (len) {
5417 case 2:
a0d0e21e 5418 if (strEQ(d,"eq")) return -KEY_eq;
79072805
LW
5419 break;
5420 case 3:
a0d0e21e
LW
5421 if (strEQ(d,"eof")) return -KEY_eof;
5422 if (strEQ(d,"exp")) return -KEY_exp;
79072805
LW
5423 break;
5424 case 4:
5425 if (strEQ(d,"else")) return KEY_else;
a0d0e21e 5426 if (strEQ(d,"exit")) return -KEY_exit;
79072805 5427 if (strEQ(d,"eval")) return KEY_eval;
a0d0e21e 5428 if (strEQ(d,"exec")) return -KEY_exec;
3a6a8333 5429 if (strEQ(d,"each")) return -KEY_each;
79072805
LW
5430 break;
5431 case 5:
5432 if (strEQ(d,"elsif")) return KEY_elsif;
5433 break;
a0d0e21e
LW
5434 case 6:
5435 if (strEQ(d,"exists")) return KEY_exists;
cea2e8a9 5436 if (strEQ(d,"elseif")) Perl_warn(aTHX_ "elseif should be elsif");
a0d0e21e 5437 break;
79072805 5438 case 8:
a0d0e21e
LW
5439 if (strEQ(d,"endgrent")) return -KEY_endgrent;
5440 if (strEQ(d,"endpwent")) return -KEY_endpwent;
79072805
LW
5441 break;
5442 case 9:
a0d0e21e 5443 if (strEQ(d,"endnetent")) return -KEY_endnetent;
79072805
LW
5444 break;
5445 case 10:
a0d0e21e
LW
5446 if (strEQ(d,"endhostent")) return -KEY_endhostent;
5447 if (strEQ(d,"endservent")) return -KEY_endservent;
79072805
LW
5448 break;
5449 case 11:
a0d0e21e 5450 if (strEQ(d,"endprotoent")) return -KEY_endprotoent;
79072805 5451 break;
a687059c 5452 }
a687059c 5453 break;
79072805
LW
5454 case 'f':
5455 switch (len) {
5456 case 3:
5457 if (strEQ(d,"for")) return KEY_for;
5458 break;
5459 case 4:
a0d0e21e 5460 if (strEQ(d,"fork")) return -KEY_fork;
79072805
LW
5461 break;
5462 case 5:
a0d0e21e
LW
5463 if (strEQ(d,"fcntl")) return -KEY_fcntl;
5464 if (strEQ(d,"flock")) return -KEY_flock;
79072805
LW
5465 break;
5466 case 6:
5467 if (strEQ(d,"format")) return KEY_format;
a0d0e21e 5468 if (strEQ(d,"fileno")) return -KEY_fileno;
79072805
LW
5469 break;
5470 case 7:
5471 if (strEQ(d,"foreach")) return KEY_foreach;
5472 break;
5473 case 8:
a0d0e21e 5474 if (strEQ(d,"formline")) return -KEY_formline;
79072805 5475 break;
378cc40b 5476 }
a687059c 5477 break;
79072805 5478 case 'g':
a687059c
LW
5479 if (strnEQ(d,"get",3)) {
5480 d += 3;
5481 if (*d == 'p') {
79072805
LW
5482 switch (len) {
5483 case 7:
a0d0e21e
LW
5484 if (strEQ(d,"ppid")) return -KEY_getppid;
5485 if (strEQ(d,"pgrp")) return -KEY_getpgrp;
79072805
LW
5486 break;
5487 case 8:
a0d0e21e
LW
5488 if (strEQ(d,"pwent")) return -KEY_getpwent;
5489 if (strEQ(d,"pwnam")) return -KEY_getpwnam;
5490 if (strEQ(d,"pwuid")) return -KEY_getpwuid;
79072805
LW
5491 break;
5492 case 11:
a0d0e21e
LW
5493 if (strEQ(d,"peername")) return -KEY_getpeername;
5494 if (strEQ(d,"protoent")) return -KEY_getprotoent;
5495 if (strEQ(d,"priority")) return -KEY_getpriority;
79072805
LW
5496 break;
5497 case 14:
a0d0e21e 5498 if (strEQ(d,"protobyname")) return -KEY_getprotobyname;
79072805
LW
5499 break;
5500 case 16:
a0d0e21e 5501 if (strEQ(d,"protobynumber"))return -KEY_getprotobynumber;
79072805
LW
5502 break;
5503 }
a687059c
LW
5504 }
5505 else if (*d == 'h') {
a0d0e21e
LW
5506 if (strEQ(d,"hostbyname")) return -KEY_gethostbyname;
5507 if (strEQ(d,"hostbyaddr")) return -KEY_gethostbyaddr;
5508 if (strEQ(d,"hostent")) return -KEY_gethostent;
a687059c
LW
5509 }
5510 else if (*d == 'n') {
a0d0e21e
LW
5511 if (strEQ(d,"netbyname")) return -KEY_getnetbyname;
5512 if (strEQ(d,"netbyaddr")) return -KEY_getnetbyaddr;
5513 if (strEQ(d,"netent")) return -KEY_getnetent;
a687059c
LW
5514 }
5515 else if (*d == 's') {
a0d0e21e
LW
5516 if (strEQ(d,"servbyname")) return -KEY_getservbyname;
5517 if (strEQ(d,"servbyport")) return -KEY_getservbyport;
5518 if (strEQ(d,"servent")) return -KEY_getservent;
5519 if (strEQ(d,"sockname")) return -KEY_getsockname;
5520 if (strEQ(d,"sockopt")) return -KEY_getsockopt;
a687059c
LW
5521 }
5522 else if (*d == 'g') {
a0d0e21e
LW
5523 if (strEQ(d,"grent")) return -KEY_getgrent;
5524 if (strEQ(d,"grnam")) return -KEY_getgrnam;
5525 if (strEQ(d,"grgid")) return -KEY_getgrgid;
a687059c
LW
5526 }
5527 else if (*d == 'l') {
a0d0e21e 5528 if (strEQ(d,"login")) return -KEY_getlogin;
a687059c 5529 }
a0d0e21e 5530 else if (strEQ(d,"c")) return -KEY_getc;
79072805 5531 break;
a687059c 5532 }
79072805
LW
5533 switch (len) {
5534 case 2:
a0d0e21e
LW
5535 if (strEQ(d,"gt")) return -KEY_gt;
5536 if (strEQ(d,"ge")) return -KEY_ge;
79072805
LW
5537 break;
5538 case 4:
5539 if (strEQ(d,"grep")) return KEY_grep;
5540 if (strEQ(d,"goto")) return KEY_goto;
fb73857a 5541 if (strEQ(d,"glob")) return KEY_glob;
79072805
LW
5542 break;
5543 case 6:
a0d0e21e 5544 if (strEQ(d,"gmtime")) return -KEY_gmtime;
79072805 5545 break;
378cc40b 5546 }
a687059c 5547 break;
79072805 5548 case 'h':
a0d0e21e 5549 if (strEQ(d,"hex")) return -KEY_hex;
a687059c 5550 break;
7d07dbc2
MB
5551 case 'I':
5552 if (strEQ(d,"INIT")) return KEY_INIT;
5553 break;
79072805
LW
5554 case 'i':
5555 switch (len) {
5556 case 2:
5557 if (strEQ(d,"if")) return KEY_if;
5558 break;
5559 case 3:
a0d0e21e 5560 if (strEQ(d,"int")) return -KEY_int;
79072805
LW
5561 break;
5562 case 5:
a0d0e21e
LW
5563 if (strEQ(d,"index")) return -KEY_index;
5564 if (strEQ(d,"ioctl")) return -KEY_ioctl;
79072805
LW
5565 break;
5566 }
a687059c 5567 break;
79072805 5568 case 'j':
a0d0e21e 5569 if (strEQ(d,"join")) return -KEY_join;
a687059c 5570 break;
79072805
LW
5571 case 'k':
5572 if (len == 4) {
3a6a8333 5573 if (strEQ(d,"keys")) return -KEY_keys;
a0d0e21e 5574 if (strEQ(d,"kill")) return -KEY_kill;
663a0e37 5575 }
79072805 5576 break;
79072805
LW
5577 case 'l':
5578 switch (len) {
5579 case 2:
a0d0e21e
LW
5580 if (strEQ(d,"lt")) return -KEY_lt;
5581 if (strEQ(d,"le")) return -KEY_le;
5582 if (strEQ(d,"lc")) return -KEY_lc;
79072805
LW
5583 break;
5584 case 3:
a0d0e21e 5585 if (strEQ(d,"log")) return -KEY_log;
79072805
LW
5586 break;
5587 case 4:
5588 if (strEQ(d,"last")) return KEY_last;
a0d0e21e 5589 if (strEQ(d,"link")) return -KEY_link;
c0329465 5590 if (strEQ(d,"lock")) return -KEY_lock;
395c3793 5591 break;
79072805
LW
5592 case 5:
5593 if (strEQ(d,"local")) return KEY_local;
a0d0e21e 5594 if (strEQ(d,"lstat")) return -KEY_lstat;
79072805
LW
5595 break;
5596 case 6:
a0d0e21e
LW
5597 if (strEQ(d,"length")) return -KEY_length;
5598 if (strEQ(d,"listen")) return -KEY_listen;
79072805
LW
5599 break;
5600 case 7:
a0d0e21e 5601 if (strEQ(d,"lcfirst")) return -KEY_lcfirst;
79072805
LW
5602 break;
5603 case 9:
a0d0e21e 5604 if (strEQ(d,"localtime")) return -KEY_localtime;
395c3793
LW
5605 break;
5606 }
a687059c 5607 break;
79072805
LW
5608 case 'm':
5609 switch (len) {
5610 case 1: return KEY_m;
93a17b20
LW
5611 case 2:
5612 if (strEQ(d,"my")) return KEY_my;
5613 break;
a0d0e21e
LW
5614 case 3:
5615 if (strEQ(d,"map")) return KEY_map;
5616 break;
79072805 5617 case 5:
a0d0e21e 5618 if (strEQ(d,"mkdir")) return -KEY_mkdir;
79072805
LW
5619 break;
5620 case 6:
a0d0e21e
LW
5621 if (strEQ(d,"msgctl")) return -KEY_msgctl;
5622 if (strEQ(d,"msgget")) return -KEY_msgget;
5623 if (strEQ(d,"msgrcv")) return -KEY_msgrcv;
5624 if (strEQ(d,"msgsnd")) return -KEY_msgsnd;
79072805
LW
5625 break;
5626 }
a687059c 5627 break;
79072805
LW
5628 case 'n':
5629 if (strEQ(d,"next")) return KEY_next;
a0d0e21e
LW
5630 if (strEQ(d,"ne")) return -KEY_ne;
5631 if (strEQ(d,"not")) return -KEY_not;
5632 if (strEQ(d,"no")) return KEY_no;
a687059c 5633 break;
79072805
LW
5634 case 'o':
5635 switch (len) {
463ee0b2 5636 case 2:
a0d0e21e 5637 if (strEQ(d,"or")) return -KEY_or;
463ee0b2 5638 break;
79072805 5639 case 3:
a0d0e21e
LW
5640 if (strEQ(d,"ord")) return -KEY_ord;
5641 if (strEQ(d,"oct")) return -KEY_oct;
77ca0c92 5642 if (strEQ(d,"our")) return KEY_our;
79072805
LW
5643 break;
5644 case 4:
a0d0e21e 5645 if (strEQ(d,"open")) return -KEY_open;
79072805
LW
5646 break;
5647 case 7:
a0d0e21e 5648 if (strEQ(d,"opendir")) return -KEY_opendir;
79072805 5649 break;
fe14fcc3 5650 }
a687059c 5651 break;
79072805
LW
5652 case 'p':
5653 switch (len) {
5654 case 3:
4e553d73 5655 if (strEQ(d,"pop")) return -KEY_pop;
a0d0e21e 5656 if (strEQ(d,"pos")) return KEY_pos;
79072805
LW
5657 break;
5658 case 4:
3a6a8333 5659 if (strEQ(d,"push")) return -KEY_push;
a0d0e21e
LW
5660 if (strEQ(d,"pack")) return -KEY_pack;
5661 if (strEQ(d,"pipe")) return -KEY_pipe;
79072805
LW
5662 break;
5663 case 5:
5664 if (strEQ(d,"print")) return KEY_print;
5665 break;
5666 case 6:
5667 if (strEQ(d,"printf")) return KEY_printf;
5668 break;
5669 case 7:
5670 if (strEQ(d,"package")) return KEY_package;
5671 break;
c07a80fd 5672 case 9:
5673 if (strEQ(d,"prototype")) return KEY_prototype;
663a0e37 5674 }
79072805
LW
5675 break;
5676 case 'q':
5677 if (len <= 2) {
5678 if (strEQ(d,"q")) return KEY_q;
8782bef2 5679 if (strEQ(d,"qr")) return KEY_qr;
79072805 5680 if (strEQ(d,"qq")) return KEY_qq;
8990e307 5681 if (strEQ(d,"qw")) return KEY_qw;
79072805 5682 if (strEQ(d,"qx")) return KEY_qx;
663a0e37 5683 }
a0d0e21e 5684 else if (strEQ(d,"quotemeta")) return -KEY_quotemeta;
79072805
LW
5685 break;
5686 case 'r':
5687 switch (len) {
5688 case 3:
a0d0e21e 5689 if (strEQ(d,"ref")) return -KEY_ref;
79072805
LW
5690 break;
5691 case 4:
a0d0e21e
LW
5692 if (strEQ(d,"read")) return -KEY_read;
5693 if (strEQ(d,"rand")) return -KEY_rand;
5694 if (strEQ(d,"recv")) return -KEY_recv;
79072805
LW
5695 if (strEQ(d,"redo")) return KEY_redo;
5696 break;
5697 case 5:
a0d0e21e
LW
5698 if (strEQ(d,"rmdir")) return -KEY_rmdir;
5699 if (strEQ(d,"reset")) return -KEY_reset;
79072805
LW
5700 break;
5701 case 6:
5702 if (strEQ(d,"return")) return KEY_return;
a0d0e21e
LW
5703 if (strEQ(d,"rename")) return -KEY_rename;
5704 if (strEQ(d,"rindex")) return -KEY_rindex;
79072805
LW
5705 break;
5706 case 7:
ec4ab249 5707 if (strEQ(d,"require")) return KEY_require;
a0d0e21e
LW
5708 if (strEQ(d,"reverse")) return -KEY_reverse;
5709 if (strEQ(d,"readdir")) return -KEY_readdir;
79072805
LW
5710 break;
5711 case 8:
a0d0e21e
LW
5712 if (strEQ(d,"readlink")) return -KEY_readlink;
5713 if (strEQ(d,"readline")) return -KEY_readline;
5714 if (strEQ(d,"readpipe")) return -KEY_readpipe;
79072805
LW
5715 break;
5716 case 9:
a0d0e21e 5717 if (strEQ(d,"rewinddir")) return -KEY_rewinddir;
79072805 5718 break;
a687059c 5719 }
79072805
LW
5720 break;
5721 case 's':
a687059c 5722 switch (d[1]) {
79072805 5723 case 0: return KEY_s;
a687059c 5724 case 'c':
79072805 5725 if (strEQ(d,"scalar")) return KEY_scalar;
a687059c
LW
5726 break;
5727 case 'e':
79072805
LW
5728 switch (len) {
5729 case 4:
a0d0e21e
LW
5730 if (strEQ(d,"seek")) return -KEY_seek;
5731 if (strEQ(d,"send")) return -KEY_send;
79072805
LW
5732 break;
5733 case 5:
a0d0e21e 5734 if (strEQ(d,"semop")) return -KEY_semop;
79072805
LW
5735 break;
5736 case 6:
a0d0e21e
LW
5737 if (strEQ(d,"select")) return -KEY_select;
5738 if (strEQ(d,"semctl")) return -KEY_semctl;
5739 if (strEQ(d,"semget")) return -KEY_semget;
79072805
LW
5740 break;
5741 case 7:
a0d0e21e
LW
5742 if (strEQ(d,"setpgrp")) return -KEY_setpgrp;
5743 if (strEQ(d,"seekdir")) return -KEY_seekdir;
79072805
LW
5744 break;
5745 case 8:
a0d0e21e
LW
5746 if (strEQ(d,"setpwent")) return -KEY_setpwent;
5747 if (strEQ(d,"setgrent")) return -KEY_setgrent;
79072805
LW
5748 break;
5749 case 9:
a0d0e21e 5750 if (strEQ(d,"setnetent")) return -KEY_setnetent;
79072805
LW
5751 break;
5752 case 10:
a0d0e21e
LW
5753 if (strEQ(d,"setsockopt")) return -KEY_setsockopt;
5754 if (strEQ(d,"sethostent")) return -KEY_sethostent;
5755 if (strEQ(d,"setservent")) return -KEY_setservent;
79072805
LW
5756 break;
5757 case 11:
a0d0e21e
LW
5758 if (strEQ(d,"setpriority")) return -KEY_setpriority;
5759 if (strEQ(d,"setprotoent")) return -KEY_setprotoent;
79072805
LW
5760 break;
5761 }
a687059c
LW
5762 break;
5763 case 'h':
79072805
LW
5764 switch (len) {
5765 case 5:
3a6a8333 5766 if (strEQ(d,"shift")) return -KEY_shift;
79072805
LW
5767 break;
5768 case 6:
a0d0e21e
LW
5769 if (strEQ(d,"shmctl")) return -KEY_shmctl;
5770 if (strEQ(d,"shmget")) return -KEY_shmget;
79072805
LW
5771 break;
5772 case 7:
a0d0e21e 5773 if (strEQ(d,"shmread")) return -KEY_shmread;
79072805
LW
5774 break;
5775 case 8:
a0d0e21e
LW
5776 if (strEQ(d,"shmwrite")) return -KEY_shmwrite;
5777 if (strEQ(d,"shutdown")) return -KEY_shutdown;
79072805
LW
5778 break;
5779 }
a687059c
LW
5780 break;
5781 case 'i':
a0d0e21e 5782 if (strEQ(d,"sin")) return -KEY_sin;
a687059c
LW
5783 break;
5784 case 'l':
a0d0e21e 5785 if (strEQ(d,"sleep")) return -KEY_sleep;
a687059c
LW
5786 break;
5787 case 'o':
79072805 5788 if (strEQ(d,"sort")) return KEY_sort;
a0d0e21e
LW
5789 if (strEQ(d,"socket")) return -KEY_socket;
5790 if (strEQ(d,"socketpair")) return -KEY_socketpair;
a687059c
LW
5791 break;
5792 case 'p':
79072805 5793 if (strEQ(d,"split")) return KEY_split;
a0d0e21e 5794 if (strEQ(d,"sprintf")) return -KEY_sprintf;
3a6a8333 5795 if (strEQ(d,"splice")) return -KEY_splice;
a687059c
LW
5796 break;
5797 case 'q':
a0d0e21e 5798 if (strEQ(d,"sqrt")) return -KEY_sqrt;
a687059c
LW
5799 break;
5800 case 'r':
a0d0e21e 5801 if (strEQ(d,"srand")) return -KEY_srand;
a687059c
LW
5802 break;
5803 case 't':
a0d0e21e 5804 if (strEQ(d,"stat")) return -KEY_stat;
79072805 5805 if (strEQ(d,"study")) return KEY_study;
a687059c
LW
5806 break;
5807 case 'u':
a0d0e21e 5808 if (strEQ(d,"substr")) return -KEY_substr;
79072805 5809 if (strEQ(d,"sub")) return KEY_sub;
a687059c
LW
5810 break;
5811 case 'y':
79072805
LW
5812 switch (len) {
5813 case 6:
a0d0e21e 5814 if (strEQ(d,"system")) return -KEY_system;
79072805
LW
5815 break;
5816 case 7:
a0d0e21e
LW
5817 if (strEQ(d,"symlink")) return -KEY_symlink;
5818 if (strEQ(d,"syscall")) return -KEY_syscall;
137443ea 5819 if (strEQ(d,"sysopen")) return -KEY_sysopen;
5820 if (strEQ(d,"sysread")) return -KEY_sysread;
5821 if (strEQ(d,"sysseek")) return -KEY_sysseek;
79072805
LW
5822 break;
5823 case 8:
a0d0e21e 5824 if (strEQ(d,"syswrite")) return -KEY_syswrite;
79072805 5825 break;
a687059c 5826 }
a687059c
LW
5827 break;
5828 }
5829 break;
79072805
LW
5830 case 't':
5831 switch (len) {
5832 case 2:
5833 if (strEQ(d,"tr")) return KEY_tr;
5834 break;
463ee0b2
LW
5835 case 3:
5836 if (strEQ(d,"tie")) return KEY_tie;
5837 break;
79072805 5838 case 4:
a0d0e21e 5839 if (strEQ(d,"tell")) return -KEY_tell;
c07a80fd 5840 if (strEQ(d,"tied")) return KEY_tied;
a0d0e21e 5841 if (strEQ(d,"time")) return -KEY_time;
79072805
LW
5842 break;
5843 case 5:
a0d0e21e 5844 if (strEQ(d,"times")) return -KEY_times;
79072805
LW
5845 break;
5846 case 7:
a0d0e21e 5847 if (strEQ(d,"telldir")) return -KEY_telldir;
79072805
LW
5848 break;
5849 case 8:
a0d0e21e 5850 if (strEQ(d,"truncate")) return -KEY_truncate;
79072805 5851 break;
378cc40b 5852 }
a687059c 5853 break;
79072805
LW
5854 case 'u':
5855 switch (len) {
5856 case 2:
a0d0e21e
LW
5857 if (strEQ(d,"uc")) return -KEY_uc;
5858 break;
5859 case 3:
5860 if (strEQ(d,"use")) return KEY_use;
79072805
LW
5861 break;
5862 case 5:
5863 if (strEQ(d,"undef")) return KEY_undef;
5864 if (strEQ(d,"until")) return KEY_until;
463ee0b2 5865 if (strEQ(d,"untie")) return KEY_untie;
a0d0e21e
LW
5866 if (strEQ(d,"utime")) return -KEY_utime;
5867 if (strEQ(d,"umask")) return -KEY_umask;
79072805
LW
5868 break;
5869 case 6:
5870 if (strEQ(d,"unless")) return KEY_unless;
a0d0e21e
LW
5871 if (strEQ(d,"unpack")) return -KEY_unpack;
5872 if (strEQ(d,"unlink")) return -KEY_unlink;
79072805
LW
5873 break;
5874 case 7:
3a6a8333 5875 if (strEQ(d,"unshift")) return -KEY_unshift;
a0d0e21e 5876 if (strEQ(d,"ucfirst")) return -KEY_ucfirst;
79072805 5877 break;
a687059c
LW
5878 }
5879 break;
79072805 5880 case 'v':
a0d0e21e
LW
5881 if (strEQ(d,"values")) return -KEY_values;
5882 if (strEQ(d,"vec")) return -KEY_vec;
a687059c 5883 break;
79072805
LW
5884 case 'w':
5885 switch (len) {
5886 case 4:
a0d0e21e
LW
5887 if (strEQ(d,"warn")) return -KEY_warn;
5888 if (strEQ(d,"wait")) return -KEY_wait;
79072805
LW
5889 break;
5890 case 5:
5891 if (strEQ(d,"while")) return KEY_while;
a0d0e21e 5892 if (strEQ(d,"write")) return -KEY_write;
79072805
LW
5893 break;
5894 case 7:
a0d0e21e 5895 if (strEQ(d,"waitpid")) return -KEY_waitpid;
79072805
LW
5896 break;
5897 case 9:
a0d0e21e 5898 if (strEQ(d,"wantarray")) return -KEY_wantarray;
79072805 5899 break;
2f3197b3 5900 }
a687059c 5901 break;
79072805 5902 case 'x':
a0d0e21e
LW
5903 if (len == 1) return -KEY_x;
5904 if (strEQ(d,"xor")) return -KEY_xor;
a687059c 5905 break;
79072805
LW
5906 case 'y':
5907 if (len == 1) return KEY_y;
5908 break;
5909 case 'z':
a687059c
LW
5910 break;
5911 }
79072805 5912 return 0;
a687059c
LW
5913}
5914
76e3520e 5915STATIC void
cea2e8a9 5916S_checkcomma(pTHX_ register char *s, char *name, char *what)
a687059c 5917{
2f3197b3
LW
5918 char *w;
5919
d008e5eb 5920 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
5921 if (ckWARN(WARN_SYNTAX)) {
5922 int level = 1;
5923 for (w = s+2; *w && level; w++) {
5924 if (*w == '(')
5925 ++level;
5926 else if (*w == ')')
5927 --level;
5928 }
5929 if (*w)
5930 for (; *w && isSPACE(*w); w++) ;
5931 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
65cec589
GS
5932 Perl_warner(aTHX_ WARN_SYNTAX,
5933 "%s (...) interpreted as function",name);
d008e5eb 5934 }
2f3197b3 5935 }
3280af22 5936 while (s < PL_bufend && isSPACE(*s))
2f3197b3 5937 s++;
a687059c
LW
5938 if (*s == '(')
5939 s++;
3280af22 5940 while (s < PL_bufend && isSPACE(*s))
a687059c 5941 s++;
7e2040f0 5942 if (isIDFIRST_lazy_if(s,UTF)) {
2f3197b3 5943 w = s++;
7e2040f0 5944 while (isALNUM_lazy_if(s,UTF))
a687059c 5945 s++;
3280af22 5946 while (s < PL_bufend && isSPACE(*s))
a687059c 5947 s++;
e929a76b 5948 if (*s == ',') {
463ee0b2 5949 int kw;
e929a76b 5950 *s = '\0';
864dbfa3 5951 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
e929a76b 5952 *s = ',';
463ee0b2 5953 if (kw)
e929a76b 5954 return;
cea2e8a9 5955 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
5956 }
5957 }
5958}
5959
423cee85
JH
5960/* Either returns sv, or mortalizes sv and returns a new SV*.
5961 Best used as sv=new_constant(..., sv, ...).
5962 If s, pv are NULL, calls subroutine with one argument,
5963 and type is used with error messages only. */
5964
b3ac6de7 5965STATIC SV *
dff6d3cd 5966S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9b0e499b 5967 const char *type)
b3ac6de7 5968{
b3ac6de7 5969 dSP;
3280af22 5970 HV *table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 5971 SV *res;
b3ac6de7
IZ
5972 SV **cvp;
5973 SV *cv, *typesv;
f0af216f 5974 const char *why1, *why2, *why3;
4e553d73 5975
f0af216f 5976 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
5977 SV *msg;
5978
f0af216f 5979 why2 = strEQ(key,"charnames")
41ab332f 5980 ? "(possibly a missing \"use charnames ...\")"
f0af216f 5981 : "";
4e553d73 5982 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
5983 (type ? type: "undef"), why2);
5984
5985 /* This is convoluted and evil ("goto considered harmful")
5986 * but I do not understand the intricacies of all the different
5987 * failure modes of %^H in here. The goal here is to make
5988 * the most probable error message user-friendly. --jhi */
5989
5990 goto msgdone;
5991
423cee85 5992 report:
4e553d73 5993 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 5994 (type ? type: "undef"), why1, why2, why3);
41ab332f 5995 msgdone:
423cee85
JH
5996 yyerror(SvPVX(msg));
5997 SvREFCNT_dec(msg);
5998 return sv;
5999 }
b3ac6de7
IZ
6000 cvp = hv_fetch(table, key, strlen(key), FALSE);
6001 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
6002 why1 = "$^H{";
6003 why2 = key;
f0af216f 6004 why3 = "} is not defined";
423cee85 6005 goto report;
b3ac6de7
IZ
6006 }
6007 sv_2mortal(sv); /* Parent created it permanently */
6008 cv = *cvp;
423cee85
JH
6009 if (!pv && s)
6010 pv = sv_2mortal(newSVpvn(s, len));
6011 if (type && pv)
6012 typesv = sv_2mortal(newSVpv(type, 0));
b3ac6de7 6013 else
423cee85 6014 typesv = &PL_sv_undef;
4e553d73 6015
e788e7d3 6016 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
6017 ENTER ;
6018 SAVETMPS;
4e553d73 6019
423cee85 6020 PUSHMARK(SP) ;
a5845cb7 6021 EXTEND(sp, 3);
423cee85
JH
6022 if (pv)
6023 PUSHs(pv);
b3ac6de7 6024 PUSHs(sv);
423cee85
JH
6025 if (pv)
6026 PUSHs(typesv);
b3ac6de7 6027 PUTBACK;
423cee85 6028 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 6029
423cee85 6030 SPAGAIN ;
4e553d73 6031
423cee85 6032 /* Check the eval first */
9b0e499b 6033 if (!PL_in_eval && SvTRUE(ERRSV)) {
423cee85
JH
6034 STRLEN n_a;
6035 sv_catpv(ERRSV, "Propagated");
6036 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
e1f15930 6037 (void)POPs;
423cee85
JH
6038 res = SvREFCNT_inc(sv);
6039 }
6040 else {
6041 res = POPs;
e1f15930 6042 (void)SvREFCNT_inc(res);
423cee85 6043 }
4e553d73 6044
423cee85
JH
6045 PUTBACK ;
6046 FREETMPS ;
6047 LEAVE ;
b3ac6de7 6048 POPSTACK;
4e553d73 6049
b3ac6de7 6050 if (!SvOK(res)) {
423cee85
JH
6051 why1 = "Call to &{$^H{";
6052 why2 = key;
f0af216f 6053 why3 = "}} did not return a defined value";
423cee85
JH
6054 sv = res;
6055 goto report;
9b0e499b 6056 }
423cee85 6057
9b0e499b 6058 return res;
b3ac6de7 6059}
4e553d73 6060
76e3520e 6061STATIC char *
cea2e8a9 6062S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2
LW
6063{
6064 register char *d = dest;
8903cb82 6065 register char *e = d + destlen - 3; /* two-character token, ending NUL */
463ee0b2 6066 for (;;) {
8903cb82 6067 if (d >= e)
cea2e8a9 6068 Perl_croak(aTHX_ ident_too_long);
834a4ddd 6069 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 6070 *d++ = *s++;
7e2040f0 6071 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
6072 *d++ = ':';
6073 *d++ = ':';
6074 s++;
6075 }
c3e0f903 6076 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
463ee0b2
LW
6077 *d++ = *s++;
6078 *d++ = *s++;
6079 }
fd400ab9 6080 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6081 char *t = s + UTF8SKIP(s);
fd400ab9 6082 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6083 t += UTF8SKIP(t);
6084 if (d + (t - s) > e)
cea2e8a9 6085 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6086 Copy(s, d, t - s, char);
6087 d += t - s;
6088 s = t;
6089 }
463ee0b2
LW
6090 else {
6091 *d = '\0';
6092 *slp = d - dest;
6093 return s;
e929a76b 6094 }
378cc40b
LW
6095 }
6096}
6097
76e3520e 6098STATIC char *
cea2e8a9 6099S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b
LW
6100{
6101 register char *d;
8903cb82 6102 register char *e;
79072805 6103 char *bracket = 0;
748a9306 6104 char funny = *s++;
378cc40b 6105
a0d0e21e
LW
6106 if (isSPACE(*s))
6107 s = skipspace(s);
378cc40b 6108 d = dest;
8903cb82 6109 e = d + destlen - 3; /* two-character token, ending NUL */
de3bb511 6110 if (isDIGIT(*s)) {
8903cb82 6111 while (isDIGIT(*s)) {
6112 if (d >= e)
cea2e8a9 6113 Perl_croak(aTHX_ ident_too_long);
378cc40b 6114 *d++ = *s++;
8903cb82 6115 }
378cc40b
LW
6116 }
6117 else {
463ee0b2 6118 for (;;) {
8903cb82 6119 if (d >= e)
cea2e8a9 6120 Perl_croak(aTHX_ ident_too_long);
834a4ddd 6121 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 6122 *d++ = *s++;
7e2040f0 6123 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
6124 *d++ = ':';
6125 *d++ = ':';
6126 s++;
6127 }
a0d0e21e 6128 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
6129 *d++ = *s++;
6130 *d++ = *s++;
6131 }
fd400ab9 6132 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6133 char *t = s + UTF8SKIP(s);
fd400ab9 6134 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6135 t += UTF8SKIP(t);
6136 if (d + (t - s) > e)
cea2e8a9 6137 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6138 Copy(s, d, t - s, char);
6139 d += t - s;
6140 s = t;
6141 }
463ee0b2
LW
6142 else
6143 break;
6144 }
378cc40b
LW
6145 }
6146 *d = '\0';
6147 d = dest;
79072805 6148 if (*d) {
3280af22
NIS
6149 if (PL_lex_state != LEX_NORMAL)
6150 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 6151 return s;
378cc40b 6152 }
748a9306 6153 if (*s == '$' && s[1] &&
7e2040f0 6154 (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
5cd24f17 6155 {
4810e5ec 6156 return s;
5cd24f17 6157 }
79072805
LW
6158 if (*s == '{') {
6159 bracket = s;
6160 s++;
6161 }
6162 else if (ck_uni)
6163 check_uni();
93a17b20 6164 if (s < send)
79072805
LW
6165 *d = *s++;
6166 d[1] = '\0';
2b92dfce 6167 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 6168 *d = toCTRL(*s);
6169 s++;
de3bb511 6170 }
79072805 6171 if (bracket) {
748a9306 6172 if (isSPACE(s[-1])) {
fa83b5b6 6173 while (s < send) {
6174 char ch = *s++;
bf4acbe4 6175 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 6176 *d = ch;
6177 break;
6178 }
6179 }
748a9306 6180 }
7e2040f0 6181 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 6182 d++;
a0ed51b3
LW
6183 if (UTF) {
6184 e = s;
155aba94 6185 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
a0ed51b3 6186 e += UTF8SKIP(e);
fd400ab9 6187 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
a0ed51b3
LW
6188 e += UTF8SKIP(e);
6189 }
6190 Copy(s, d, e - s, char);
6191 d += e - s;
6192 s = e;
6193 }
6194 else {
2b92dfce 6195 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 6196 *d++ = *s++;
2b92dfce 6197 if (d >= e)
cea2e8a9 6198 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 6199 }
79072805 6200 *d = '\0';
bf4acbe4 6201 while (s < send && SPACE_OR_TAB(*s)) s++;
ff68c719 6202 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
599cee73 6203 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
dff6d3cd 6204 const char *brack = *s == '[' ? "[...]" : "{...}";
cea2e8a9 6205 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 6206 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
6207 funny, dest, brack, funny, dest, brack);
6208 }
79072805 6209 bracket++;
a0be28da 6210 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
6211 return s;
6212 }
4e553d73
NIS
6213 }
6214 /* Handle extended ${^Foo} variables
2b92dfce
GS
6215 * 1999-02-27 mjd-perl-patch@plover.com */
6216 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
6217 && isALNUM(*s))
6218 {
6219 d++;
6220 while (isALNUM(*s) && d < e) {
6221 *d++ = *s++;
6222 }
6223 if (d >= e)
cea2e8a9 6224 Perl_croak(aTHX_ ident_too_long);
2b92dfce 6225 *d = '\0';
79072805
LW
6226 }
6227 if (*s == '}') {
6228 s++;
3280af22
NIS
6229 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets)
6230 PL_lex_state = LEX_INTERPEND;
748a9306
LW
6231 if (funny == '#')
6232 funny = '@';
d008e5eb 6233 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 6234 if (ckWARN(WARN_AMBIGUOUS) &&
864dbfa3 6235 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
d008e5eb 6236 {
cea2e8a9 6237 Perl_warner(aTHX_ WARN_AMBIGUOUS,
d008e5eb
GS
6238 "Ambiguous use of %c{%s} resolved to %c%s",
6239 funny, dest, funny, dest);
6240 }
6241 }
79072805
LW
6242 }
6243 else {
6244 s = bracket; /* let the parser handle it */
93a17b20 6245 *dest = '\0';
79072805
LW
6246 }
6247 }
3280af22
NIS
6248 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
6249 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
6250 return s;
6251}
6252
cea2e8a9
GS
6253void
6254Perl_pmflag(pTHX_ U16 *pmfl, int ch)
a0d0e21e 6255{
bbce6d69 6256 if (ch == 'i')
a0d0e21e 6257 *pmfl |= PMf_FOLD;
a0d0e21e
LW
6258 else if (ch == 'g')
6259 *pmfl |= PMf_GLOBAL;
c90c0ff4 6260 else if (ch == 'c')
6261 *pmfl |= PMf_CONTINUE;
a0d0e21e
LW
6262 else if (ch == 'o')
6263 *pmfl |= PMf_KEEP;
6264 else if (ch == 'm')
6265 *pmfl |= PMf_MULTILINE;
6266 else if (ch == 's')
6267 *pmfl |= PMf_SINGLELINE;
6268 else if (ch == 'x')
6269 *pmfl |= PMf_EXTENDED;
6270}
378cc40b 6271
76e3520e 6272STATIC char *
cea2e8a9 6273S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 6274{
79072805
LW
6275 PMOP *pm;
6276 char *s;
378cc40b 6277
09bef843 6278 s = scan_str(start,FALSE,FALSE);
37fd879b 6279 if (!s)
cea2e8a9 6280 Perl_croak(aTHX_ "Search pattern not terminated");
bbce6d69 6281
8782bef2 6282 pm = (PMOP*)newPMOP(type, 0);
3280af22 6283 if (PL_multi_open == '?')
79072805 6284 pm->op_pmflags |= PMf_ONCE;
8782bef2
GB
6285 if(type == OP_QR) {
6286 while (*s && strchr("iomsx", *s))
6287 pmflag(&pm->op_pmflags,*s++);
6288 }
6289 else {
6290 while (*s && strchr("iogcmsx", *s))
6291 pmflag(&pm->op_pmflags,*s++);
6292 }
4633a7c4 6293 pm->op_pmpermflags = pm->op_pmflags;
bbce6d69 6294
3280af22 6295 PL_lex_op = (OP*)pm;
79072805 6296 yylval.ival = OP_MATCH;
378cc40b
LW
6297 return s;
6298}
6299
76e3520e 6300STATIC char *
cea2e8a9 6301S_scan_subst(pTHX_ char *start)
79072805 6302{
a0d0e21e 6303 register char *s;
79072805 6304 register PMOP *pm;
4fdae800 6305 I32 first_start;
79072805
LW
6306 I32 es = 0;
6307
79072805
LW
6308 yylval.ival = OP_NULL;
6309
09bef843 6310 s = scan_str(start,FALSE,FALSE);
79072805 6311
37fd879b 6312 if (!s)
cea2e8a9 6313 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 6314
3280af22 6315 if (s[-1] == PL_multi_open)
79072805
LW
6316 s--;
6317
3280af22 6318 first_start = PL_multi_start;
09bef843 6319 s = scan_str(s,FALSE,FALSE);
79072805 6320 if (!s) {
37fd879b 6321 if (PL_lex_stuff) {
3280af22 6322 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6323 PL_lex_stuff = Nullsv;
6324 }
cea2e8a9 6325 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 6326 }
3280af22 6327 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 6328
79072805 6329 pm = (PMOP*)newPMOP(OP_SUBST, 0);
48c036b1 6330 while (*s) {
a687059c
LW
6331 if (*s == 'e') {
6332 s++;
2f3197b3 6333 es++;
a687059c 6334 }
b3eb6a9b 6335 else if (strchr("iogcmsx", *s))
a0d0e21e 6336 pmflag(&pm->op_pmflags,*s++);
48c036b1
GS
6337 else
6338 break;
378cc40b 6339 }
79072805
LW
6340
6341 if (es) {
6342 SV *repl;
0244c3a4
GS
6343 PL_sublex_info.super_bufptr = s;
6344 PL_sublex_info.super_bufend = PL_bufend;
6345 PL_multi_end = 0;
79072805 6346 pm->op_pmflags |= PMf_EVAL;
79cb57f6 6347 repl = newSVpvn("",0);
463ee0b2 6348 while (es-- > 0)
a0d0e21e 6349 sv_catpv(repl, es ? "eval " : "do ");
79072805 6350 sv_catpvn(repl, "{ ", 2);
3280af22 6351 sv_catsv(repl, PL_lex_repl);
79072805 6352 sv_catpvn(repl, " };", 2);
25da4f38 6353 SvEVALED_on(repl);
3280af22
NIS
6354 SvREFCNT_dec(PL_lex_repl);
6355 PL_lex_repl = repl;
378cc40b 6356 }
79072805 6357
4633a7c4 6358 pm->op_pmpermflags = pm->op_pmflags;
3280af22 6359 PL_lex_op = (OP*)pm;
79072805 6360 yylval.ival = OP_SUBST;
378cc40b
LW
6361 return s;
6362}
6363
76e3520e 6364STATIC char *
cea2e8a9 6365S_scan_trans(pTHX_ char *start)
378cc40b 6366{
a0d0e21e 6367 register char* s;
11343788 6368 OP *o;
79072805
LW
6369 short *tbl;
6370 I32 squash;
a0ed51b3 6371 I32 del;
79072805
LW
6372 I32 complement;
6373
6374 yylval.ival = OP_NULL;
6375
09bef843 6376 s = scan_str(start,FALSE,FALSE);
37fd879b 6377 if (!s)
cea2e8a9 6378 Perl_croak(aTHX_ "Transliteration pattern not terminated");
3280af22 6379 if (s[-1] == PL_multi_open)
2f3197b3
LW
6380 s--;
6381
09bef843 6382 s = scan_str(s,FALSE,FALSE);
79072805 6383 if (!s) {
37fd879b 6384 if (PL_lex_stuff) {
3280af22 6385 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6386 PL_lex_stuff = Nullsv;
6387 }
cea2e8a9 6388 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 6389 }
79072805 6390
a0ed51b3 6391 complement = del = squash = 0;
6940069f 6392 while (strchr("cds", *s)) {
395c3793 6393 if (*s == 'c')
79072805 6394 complement = OPpTRANS_COMPLEMENT;
395c3793 6395 else if (*s == 'd')
a0ed51b3
LW
6396 del = OPpTRANS_DELETE;
6397 else if (*s == 's')
79072805 6398 squash = OPpTRANS_SQUASH;
395c3793
LW
6399 s++;
6400 }
8973db79
JH
6401
6402 New(803, tbl, complement&&!del?258:256, short);
6403 o = newPVOP(OP_TRANS, 0, (char*)tbl);
7948272d
NIS
6404 o->op_private = del|squash|complement|
6405 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
6406 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 6407
3280af22 6408 PL_lex_op = o;
79072805
LW
6409 yylval.ival = OP_TRANS;
6410 return s;
6411}
6412
76e3520e 6413STATIC char *
cea2e8a9 6414S_scan_heredoc(pTHX_ register char *s)
79072805
LW
6415{
6416 SV *herewas;
6417 I32 op_type = OP_SCALAR;
6418 I32 len;
6419 SV *tmpstr;
6420 char term;
6421 register char *d;
fc36a67e 6422 register char *e;
4633a7c4 6423 char *peek;
3280af22 6424 int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
79072805
LW
6425
6426 s += 2;
3280af22
NIS
6427 d = PL_tokenbuf;
6428 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 6429 if (!outer)
79072805 6430 *d++ = '\n';
bf4acbe4 6431 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
4633a7c4
LW
6432 if (*peek && strchr("`'\"",*peek)) {
6433 s = peek;
79072805 6434 term = *s++;
3280af22 6435 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 6436 d += len;
3280af22 6437 if (s < PL_bufend)
79072805 6438 s++;
79072805
LW
6439 }
6440 else {
6441 if (*s == '\\')
6442 s++, term = '\'';
6443 else
6444 term = '"';
7e2040f0 6445 if (!isALNUM_lazy_if(s,UTF))
4633a7c4 6446 deprecate("bare << to mean <<\"\"");
7e2040f0 6447 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 6448 if (d < e)
6449 *d++ = *s;
6450 }
6451 }
3280af22 6452 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 6453 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
6454 *d++ = '\n';
6455 *d = '\0';
3280af22 6456 len = d - PL_tokenbuf;
6a27c188 6457#ifndef PERL_STRICT_CR
f63a84b2
LW
6458 d = strchr(s, '\r');
6459 if (d) {
6460 char *olds = s;
6461 s = d;
3280af22 6462 while (s < PL_bufend) {
f63a84b2
LW
6463 if (*s == '\r') {
6464 *d++ = '\n';
6465 if (*++s == '\n')
6466 s++;
6467 }
6468 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
6469 *d++ = *s++;
6470 s++;
6471 }
6472 else
6473 *d++ = *s++;
6474 }
6475 *d = '\0';
3280af22
NIS
6476 PL_bufend = d;
6477 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2
LW
6478 s = olds;
6479 }
6480#endif
79072805 6481 d = "\n";
3280af22 6482 if (outer || !(d=ninstr(s,PL_bufend,d,d+1)))
79cb57f6 6483 herewas = newSVpvn(s,PL_bufend-s);
79072805 6484 else
79cb57f6 6485 s--, herewas = newSVpvn(s,d-s);
79072805 6486 s += SvCUR(herewas);
748a9306 6487
8d6dde3e 6488 tmpstr = NEWSV(87,79);
748a9306
LW
6489 sv_upgrade(tmpstr, SVt_PVIV);
6490 if (term == '\'') {
79072805 6491 op_type = OP_CONST;
748a9306
LW
6492 SvIVX(tmpstr) = -1;
6493 }
6494 else if (term == '`') {
79072805 6495 op_type = OP_BACKTICK;
748a9306
LW
6496 SvIVX(tmpstr) = '\\';
6497 }
79072805
LW
6498
6499 CLINE;
57843af0 6500 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
6501 PL_multi_open = PL_multi_close = '<';
6502 term = *PL_tokenbuf;
0244c3a4
GS
6503 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6504 char *bufptr = PL_sublex_info.super_bufptr;
6505 char *bufend = PL_sublex_info.super_bufend;
6506 char *olds = s - SvCUR(herewas);
6507 s = strchr(bufptr, '\n');
6508 if (!s)
6509 s = bufend;
6510 d = s;
6511 while (s < bufend &&
6512 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
6513 if (*s++ == '\n')
57843af0 6514 CopLINE_inc(PL_curcop);
0244c3a4
GS
6515 }
6516 if (s >= bufend) {
57843af0 6517 CopLINE_set(PL_curcop, PL_multi_start);
0244c3a4
GS
6518 missingterm(PL_tokenbuf);
6519 }
6520 sv_setpvn(herewas,bufptr,d-bufptr+1);
6521 sv_setpvn(tmpstr,d+1,s-d);
6522 s += len - 1;
6523 sv_catpvn(herewas,s,bufend-s);
6524 (void)strcpy(bufptr,SvPVX(herewas));
6525
6526 s = olds;
6527 goto retval;
6528 }
6529 else if (!outer) {
79072805 6530 d = s;
3280af22
NIS
6531 while (s < PL_bufend &&
6532 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 6533 if (*s++ == '\n')
57843af0 6534 CopLINE_inc(PL_curcop);
79072805 6535 }
3280af22 6536 if (s >= PL_bufend) {
57843af0 6537 CopLINE_set(PL_curcop, PL_multi_start);
3280af22 6538 missingterm(PL_tokenbuf);
79072805
LW
6539 }
6540 sv_setpvn(tmpstr,d+1,s-d);
6541 s += len - 1;
57843af0 6542 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 6543
3280af22
NIS
6544 sv_catpvn(herewas,s,PL_bufend-s);
6545 sv_setsv(PL_linestr,herewas);
6546 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
6547 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6548 PL_last_lop = PL_last_uni = Nullch;
79072805
LW
6549 }
6550 else
6551 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
3280af22 6552 while (s >= PL_bufend) { /* multiple line string? */
fd2d0953 6553 if (!outer ||
3280af22 6554 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
57843af0 6555 CopLINE_set(PL_curcop, PL_multi_start);
3280af22 6556 missingterm(PL_tokenbuf);
79072805 6557 }
57843af0 6558 CopLINE_inc(PL_curcop);
3280af22 6559 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6560 PL_last_lop = PL_last_uni = Nullch;
6a27c188 6561#ifndef PERL_STRICT_CR
3280af22 6562 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
6563 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
6564 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 6565 {
3280af22
NIS
6566 PL_bufend[-2] = '\n';
6567 PL_bufend--;
6568 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2 6569 }
3280af22
NIS
6570 else if (PL_bufend[-1] == '\r')
6571 PL_bufend[-1] = '\n';
f63a84b2 6572 }
3280af22
NIS
6573 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
6574 PL_bufend[-1] = '\n';
f63a84b2 6575#endif
3280af22 6576 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
6577 SV *sv = NEWSV(88,0);
6578
93a17b20 6579 sv_upgrade(sv, SVt_PVMG);
3280af22 6580 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
6581 (void)SvIOK_on(sv);
6582 SvIVX(sv) = 0;
57843af0 6583 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
79072805 6584 }
3280af22
NIS
6585 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
6586 s = PL_bufend - 1;
79072805 6587 *s = ' ';
3280af22
NIS
6588 sv_catsv(PL_linestr,herewas);
6589 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805
LW
6590 }
6591 else {
3280af22
NIS
6592 s = PL_bufend;
6593 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
6594 }
6595 }
79072805 6596 s++;
0244c3a4 6597retval:
57843af0 6598 PL_multi_end = CopLINE(PL_curcop);
79072805
LW
6599 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
6600 SvLEN_set(tmpstr, SvCUR(tmpstr) + 1);
463ee0b2 6601 Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
79072805 6602 }
8990e307 6603 SvREFCNT_dec(herewas);
0064a8a9 6604 if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
7948272d 6605 SvUTF8_on(tmpstr);
3280af22 6606 PL_lex_stuff = tmpstr;
79072805
LW
6607 yylval.ival = op_type;
6608 return s;
6609}
6610
02aa26ce
NT
6611/* scan_inputsymbol
6612 takes: current position in input buffer
6613 returns: new position in input buffer
6614 side-effects: yylval and lex_op are set.
6615
6616 This code handles:
6617
6618 <> read from ARGV
6619 <FH> read from filehandle
6620 <pkg::FH> read from package qualified filehandle
6621 <pkg'FH> read from package qualified filehandle
6622 <$fh> read from filehandle in $fh
6623 <*.h> filename glob
6624
6625*/
6626
76e3520e 6627STATIC char *
cea2e8a9 6628S_scan_inputsymbol(pTHX_ char *start)
79072805 6629{
02aa26ce 6630 register char *s = start; /* current position in buffer */
79072805 6631 register char *d;
fc36a67e 6632 register char *e;
1b420867 6633 char *end;
79072805
LW
6634 I32 len;
6635
3280af22
NIS
6636 d = PL_tokenbuf; /* start of temp holding space */
6637 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
1b420867
GS
6638 end = strchr(s, '\n');
6639 if (!end)
6640 end = PL_bufend;
6641 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
6642
6643 /* die if we didn't have space for the contents of the <>,
1b420867 6644 or if it didn't end, or if we see a newline
02aa26ce
NT
6645 */
6646
3280af22 6647 if (len >= sizeof PL_tokenbuf)
cea2e8a9 6648 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 6649 if (s >= end)
cea2e8a9 6650 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 6651
fc36a67e 6652 s++;
02aa26ce
NT
6653
6654 /* check for <$fh>
6655 Remember, only scalar variables are interpreted as filehandles by
6656 this code. Anything more complex (e.g., <$fh{$num}>) will be
6657 treated as a glob() call.
6658 This code makes use of the fact that except for the $ at the front,
6659 a scalar variable and a filehandle look the same.
6660 */
4633a7c4 6661 if (*d == '$' && d[1]) d++;
02aa26ce
NT
6662
6663 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 6664 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 6665 d++;
02aa26ce
NT
6666
6667 /* If we've tried to read what we allow filehandles to look like, and
6668 there's still text left, then it must be a glob() and not a getline.
6669 Use scan_str to pull out the stuff between the <> and treat it
6670 as nothing more than a string.
6671 */
6672
3280af22 6673 if (d - PL_tokenbuf != len) {
79072805
LW
6674 yylval.ival = OP_GLOB;
6675 set_csh();
09bef843 6676 s = scan_str(start,FALSE,FALSE);
79072805 6677 if (!s)
cea2e8a9 6678 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
6679 return s;
6680 }
395c3793 6681 else {
9b3023bc
RGS
6682 bool readline_overriden = FALSE;
6683 GV *gv_readline = Nullgv;
6684 GV **gvp;
02aa26ce 6685 /* we're in a filehandle read situation */
3280af22 6686 d = PL_tokenbuf;
02aa26ce
NT
6687
6688 /* turn <> into <ARGV> */
79072805
LW
6689 if (!len)
6690 (void)strcpy(d,"ARGV");
02aa26ce 6691
9b3023bc 6692 /* Check whether readline() is overriden */
ba979b31
NIS
6693 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
6694 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 6695 ||
ba979b31 6696 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
9b3023bc 6697 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
ba979b31 6698 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
6699 readline_overriden = TRUE;
6700
02aa26ce
NT
6701 /* if <$fh>, create the ops to turn the variable into a
6702 filehandle
6703 */
79072805 6704 if (*d == '$') {
a0d0e21e 6705 I32 tmp;
02aa26ce
NT
6706
6707 /* try to find it in the pad for this block, otherwise find
6708 add symbol table ops
6709 */
11343788 6710 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
f558d5af
JH
6711 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
6712 if (SvFLAGS(namesv) & SVpad_OUR) {
6713 SV *sym = sv_2mortal(newSVpv(HvNAME(GvSTASH(namesv)),0));
6714 sv_catpvn(sym, "::", 2);
6715 sv_catpv(sym, d+1);
6716 d = SvPVX(sym);
6717 goto intro_sym;
6718 }
6719 else {
6720 OP *o = newOP(OP_PADSV, 0);
6721 o->op_targ = tmp;
9b3023bc
RGS
6722 PL_lex_op = readline_overriden
6723 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6724 append_elem(OP_LIST, o,
6725 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
6726 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 6727 }
a0d0e21e
LW
6728 }
6729 else {
f558d5af
JH
6730 GV *gv;
6731 ++d;
6732intro_sym:
6733 gv = gv_fetchpv(d,
6734 (PL_in_eval
6735 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 6736 : GV_ADDMULTI),
f558d5af 6737 SVt_PV);
9b3023bc
RGS
6738 PL_lex_op = readline_overriden
6739 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6740 append_elem(OP_LIST,
6741 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
6742 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
6743 : (OP*)newUNOP(OP_READLINE, 0,
6744 newUNOP(OP_RV2SV, 0,
6745 newGVOP(OP_GV, 0, gv)));
a0d0e21e 6746 }
7c6fadd6
RGS
6747 if (!readline_overriden)
6748 PL_lex_op->op_flags |= OPf_SPECIAL;
f5284f61 6749 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
79072805
LW
6750 yylval.ival = OP_NULL;
6751 }
02aa26ce
NT
6752
6753 /* If it's none of the above, it must be a literal filehandle
6754 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 6755 else {
85e6fe83 6756 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
9b3023bc
RGS
6757 PL_lex_op = readline_overriden
6758 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
6759 append_elem(OP_LIST,
6760 newGVOP(OP_GV, 0, gv),
6761 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
6762 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
79072805
LW
6763 yylval.ival = OP_NULL;
6764 }
6765 }
02aa26ce 6766
79072805
LW
6767 return s;
6768}
6769
02aa26ce
NT
6770
6771/* scan_str
6772 takes: start position in buffer
09bef843
SB
6773 keep_quoted preserve \ on the embedded delimiter(s)
6774 keep_delims preserve the delimiters around the string
02aa26ce
NT
6775 returns: position to continue reading from buffer
6776 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
6777 updates the read buffer.
6778
6779 This subroutine pulls a string out of the input. It is called for:
6780 q single quotes q(literal text)
6781 ' single quotes 'literal text'
6782 qq double quotes qq(interpolate $here please)
6783 " double quotes "interpolate $here please"
6784 qx backticks qx(/bin/ls -l)
6785 ` backticks `/bin/ls -l`
6786 qw quote words @EXPORT_OK = qw( func() $spam )
6787 m// regexp match m/this/
6788 s/// regexp substitute s/this/that/
6789 tr/// string transliterate tr/this/that/
6790 y/// string transliterate y/this/that/
6791 ($*@) sub prototypes sub foo ($)
09bef843 6792 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
6793 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
6794
6795 In most of these cases (all but <>, patterns and transliterate)
6796 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
6797 calls scan_str(). s/// makes yylex() call scan_subst() which calls
6798 scan_str(). tr/// and y/// make yylex() call scan_trans() which
6799 calls scan_str().
4e553d73 6800
02aa26ce
NT
6801 It skips whitespace before the string starts, and treats the first
6802 character as the delimiter. If the delimiter is one of ([{< then
6803 the corresponding "close" character )]}> is used as the closing
6804 delimiter. It allows quoting of delimiters, and if the string has
6805 balanced delimiters ([{<>}]) it allows nesting.
6806
37fd879b
HS
6807 On success, the SV with the resulting string is put into lex_stuff or,
6808 if that is already non-NULL, into lex_repl. The second case occurs only
6809 when parsing the RHS of the special constructs s/// and tr/// (y///).
6810 For convenience, the terminating delimiter character is stuffed into
6811 SvIVX of the SV.
02aa26ce
NT
6812*/
6813
76e3520e 6814STATIC char *
09bef843 6815S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 6816{
02aa26ce
NT
6817 SV *sv; /* scalar value: string */
6818 char *tmps; /* temp string, used for delimiter matching */
6819 register char *s = start; /* current position in the buffer */
6820 register char term; /* terminating character */
6821 register char *to; /* current position in the sv's data */
6822 I32 brackets = 1; /* bracket nesting level */
89491803 6823 bool has_utf8 = FALSE; /* is there any utf8 content? */
02aa26ce
NT
6824
6825 /* skip space before the delimiter */
fb73857a 6826 if (isSPACE(*s))
6827 s = skipspace(s);
02aa26ce
NT
6828
6829 /* mark where we are, in case we need to report errors */
79072805 6830 CLINE;
02aa26ce
NT
6831
6832 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 6833 term = *s;
63cd0674 6834 if (!UTF8_IS_INVARIANT((U8)term) && UTF)
89491803 6835 has_utf8 = TRUE;
b1c7b182 6836
02aa26ce 6837 /* mark where we are */
57843af0 6838 PL_multi_start = CopLINE(PL_curcop);
3280af22 6839 PL_multi_open = term;
02aa26ce
NT
6840
6841 /* find corresponding closing delimiter */
93a17b20 6842 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
79072805 6843 term = tmps[5];
3280af22 6844 PL_multi_close = term;
79072805 6845
02aa26ce 6846 /* create a new SV to hold the contents. 87 is leak category, I'm
8d6dde3e
IZ
6847 assuming. 79 is the SV's initial length. What a random number. */
6848 sv = NEWSV(87,79);
ed6116ce
LW
6849 sv_upgrade(sv, SVt_PVIV);
6850 SvIVX(sv) = term;
a0d0e21e 6851 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
6852
6853 /* move past delimiter and try to read a complete string */
09bef843
SB
6854 if (keep_delims)
6855 sv_catpvn(sv, s, 1);
93a17b20
LW
6856 s++;
6857 for (;;) {
02aa26ce 6858 /* extend sv if need be */
3280af22 6859 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 6860 /* set 'to' to the next character in the sv's string */
463ee0b2 6861 to = SvPVX(sv)+SvCUR(sv);
09bef843 6862
02aa26ce 6863 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
6864 if (PL_multi_open == PL_multi_close) {
6865 for (; s < PL_bufend; s++,to++) {
02aa26ce 6866 /* embedded newlines increment the current line number */
3280af22 6867 if (*s == '\n' && !PL_rsfp)
57843af0 6868 CopLINE_inc(PL_curcop);
02aa26ce 6869 /* handle quoted delimiters */
3280af22 6870 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 6871 if (!keep_quoted && s[1] == term)
a0d0e21e 6872 s++;
02aa26ce 6873 /* any other quotes are simply copied straight through */
a0d0e21e
LW
6874 else
6875 *to++ = *s++;
6876 }
02aa26ce
NT
6877 /* terminate when run out of buffer (the for() condition), or
6878 have found the terminator */
93a17b20
LW
6879 else if (*s == term)
6880 break;
63cd0674 6881 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 6882 has_utf8 = TRUE;
93a17b20
LW
6883 *to = *s;
6884 }
6885 }
02aa26ce
NT
6886
6887 /* if the terminator isn't the same as the start character (e.g.,
6888 matched brackets), we have to allow more in the quoting, and
6889 be prepared for nested brackets.
6890 */
93a17b20 6891 else {
02aa26ce 6892 /* read until we run out of string, or we find the terminator */
3280af22 6893 for (; s < PL_bufend; s++,to++) {
02aa26ce 6894 /* embedded newlines increment the line count */
3280af22 6895 if (*s == '\n' && !PL_rsfp)
57843af0 6896 CopLINE_inc(PL_curcop);
02aa26ce 6897 /* backslashes can escape the open or closing characters */
3280af22 6898 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
6899 if (!keep_quoted &&
6900 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
6901 s++;
6902 else
6903 *to++ = *s++;
6904 }
02aa26ce 6905 /* allow nested opens and closes */
3280af22 6906 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 6907 break;
3280af22 6908 else if (*s == PL_multi_open)
93a17b20 6909 brackets++;
63cd0674 6910 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 6911 has_utf8 = TRUE;
93a17b20
LW
6912 *to = *s;
6913 }
6914 }
02aa26ce 6915 /* terminate the copied string and update the sv's end-of-string */
93a17b20 6916 *to = '\0';
463ee0b2 6917 SvCUR_set(sv, to - SvPVX(sv));
93a17b20 6918
02aa26ce
NT
6919 /*
6920 * this next chunk reads more into the buffer if we're not done yet
6921 */
6922
b1c7b182
GS
6923 if (s < PL_bufend)
6924 break; /* handle case where we are done yet :-) */
79072805 6925
6a27c188 6926#ifndef PERL_STRICT_CR
f63a84b2 6927 if (to - SvPVX(sv) >= 2) {
c6f14548
GS
6928 if ((to[-2] == '\r' && to[-1] == '\n') ||
6929 (to[-2] == '\n' && to[-1] == '\r'))
6930 {
f63a84b2
LW
6931 to[-2] = '\n';
6932 to--;
6933 SvCUR_set(sv, to - SvPVX(sv));
6934 }
6935 else if (to[-1] == '\r')
6936 to[-1] = '\n';
6937 }
6938 else if (to - SvPVX(sv) == 1 && to[-1] == '\r')
6939 to[-1] = '\n';
6940#endif
6941
02aa26ce
NT
6942 /* if we're out of file, or a read fails, bail and reset the current
6943 line marker so we can report where the unterminated string began
6944 */
3280af22
NIS
6945 if (!PL_rsfp ||
6946 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
c07a80fd 6947 sv_free(sv);
57843af0 6948 CopLINE_set(PL_curcop, PL_multi_start);
79072805
LW
6949 return Nullch;
6950 }
02aa26ce 6951 /* we read a line, so increment our line counter */
57843af0 6952 CopLINE_inc(PL_curcop);
a0ed51b3 6953
02aa26ce 6954 /* update debugger info */
3280af22 6955 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
6956 SV *sv = NEWSV(88,0);
6957
93a17b20 6958 sv_upgrade(sv, SVt_PVMG);
3280af22 6959 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
6960 (void)SvIOK_on(sv);
6961 SvIVX(sv) = 0;
57843af0 6962 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
395c3793 6963 }
a0ed51b3 6964
3280af22
NIS
6965 /* having changed the buffer, we must update PL_bufend */
6966 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6967 PL_last_lop = PL_last_uni = Nullch;
378cc40b 6968 }
4e553d73 6969
02aa26ce
NT
6970 /* at this point, we have successfully read the delimited string */
6971
09bef843
SB
6972 if (keep_delims)
6973 sv_catpvn(sv, s, 1);
89491803 6974 if (has_utf8)
b1c7b182 6975 SvUTF8_on(sv);
57843af0 6976 PL_multi_end = CopLINE(PL_curcop);
79072805 6977 s++;
02aa26ce
NT
6978
6979 /* if we allocated too much space, give some back */
93a17b20
LW
6980 if (SvCUR(sv) + 5 < SvLEN(sv)) {
6981 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 6982 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 6983 }
02aa26ce
NT
6984
6985 /* decide whether this is the first or second quoted string we've read
6986 for this op
6987 */
4e553d73 6988
3280af22
NIS
6989 if (PL_lex_stuff)
6990 PL_lex_repl = sv;
79072805 6991 else
3280af22 6992 PL_lex_stuff = sv;
378cc40b
LW
6993 return s;
6994}
6995
02aa26ce
NT
6996/*
6997 scan_num
6998 takes: pointer to position in buffer
6999 returns: pointer to new position in buffer
7000 side-effects: builds ops for the constant in yylval.op
7001
7002 Read a number in any of the formats that Perl accepts:
7003
7fd134d9
JH
7004 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
7005 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
7006 0b[01](_?[01])*
7007 0[0-7](_?[0-7])*
7008 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 7009
3280af22 7010 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
7011 thing it reads.
7012
7013 If it reads a number without a decimal point or an exponent, it will
7014 try converting the number to an integer and see if it can do so
7015 without loss of precision.
7016*/
4e553d73 7017
378cc40b 7018char *
b73d6f50 7019Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
378cc40b 7020{
02aa26ce
NT
7021 register char *s = start; /* current position in buffer */
7022 register char *d; /* destination in temp buffer */
7023 register char *e; /* end of temp buffer */
86554af2 7024 NV nv; /* number read, as a double */
a7cb1f99 7025 SV *sv = Nullsv; /* place to put the converted number */
a86a20aa 7026 bool floatit; /* boolean: int or float? */
02aa26ce 7027 char *lastub = 0; /* position of last underbar */
fc36a67e 7028 static char number_too_long[] = "Number too long";
378cc40b 7029
02aa26ce
NT
7030 /* We use the first character to decide what type of number this is */
7031
378cc40b 7032 switch (*s) {
79072805 7033 default:
cea2e8a9 7034 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 7035
02aa26ce 7036 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 7037 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
7038 case '0':
7039 {
02aa26ce
NT
7040 /* variables:
7041 u holds the "number so far"
4f19785b
WSI
7042 shift the power of 2 of the base
7043 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
7044 overflowed was the number more than we can hold?
7045
7046 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
7047 we in octal/hex/binary?" indicator to disallow hex characters
7048 when in octal mode.
02aa26ce 7049 */
9e24b6e2
JH
7050 NV n = 0.0;
7051 UV u = 0;
79072805 7052 I32 shift;
9e24b6e2
JH
7053 bool overflowed = FALSE;
7054 static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
7055 static char* bases[5] = { "", "binary", "", "octal",
7056 "hexadecimal" };
7057 static char* Bases[5] = { "", "Binary", "", "Octal",
7058 "Hexadecimal" };
7059 static char *maxima[5] = { "",
7060 "0b11111111111111111111111111111111",
7061 "",
893fe2c2 7062 "037777777777",
9e24b6e2
JH
7063 "0xffffffff" };
7064 char *base, *Base, *max;
378cc40b 7065
02aa26ce 7066 /* check for hex */
378cc40b
LW
7067 if (s[1] == 'x') {
7068 shift = 4;
7069 s += 2;
4f19785b
WSI
7070 } else if (s[1] == 'b') {
7071 shift = 1;
7072 s += 2;
378cc40b 7073 }
02aa26ce 7074 /* check for a decimal in disguise */
b78218b7 7075 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 7076 goto decimal;
02aa26ce 7077 /* so it must be octal */
928753ea 7078 else {
378cc40b 7079 shift = 3;
928753ea
JH
7080 s++;
7081 }
7082
7083 if (*s == '_') {
7084 if (ckWARN(WARN_SYNTAX))
7085 Perl_warner(aTHX_ WARN_SYNTAX,
7086 "Misplaced _ in number");
7087 lastub = s++;
7088 }
9e24b6e2
JH
7089
7090 base = bases[shift];
7091 Base = Bases[shift];
7092 max = maxima[shift];
02aa26ce 7093
4f19785b 7094 /* read the rest of the number */
378cc40b 7095 for (;;) {
9e24b6e2 7096 /* x is used in the overflow test,
893fe2c2 7097 b is the digit we're adding on. */
9e24b6e2 7098 UV x, b;
55497cff 7099
378cc40b 7100 switch (*s) {
02aa26ce
NT
7101
7102 /* if we don't mention it, we're done */
378cc40b
LW
7103 default:
7104 goto out;
02aa26ce 7105
928753ea 7106 /* _ are ignored -- but warned about if consecutive */
de3bb511 7107 case '_':
928753ea
JH
7108 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
7109 Perl_warner(aTHX_ WARN_SYNTAX,
7110 "Misplaced _ in number");
7111 lastub = s++;
de3bb511 7112 break;
02aa26ce
NT
7113
7114 /* 8 and 9 are not octal */
378cc40b 7115 case '8': case '9':
4f19785b 7116 if (shift == 3)
cea2e8a9 7117 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 7118 /* FALL THROUGH */
02aa26ce
NT
7119
7120 /* octal digits */
4f19785b 7121 case '2': case '3': case '4':
378cc40b 7122 case '5': case '6': case '7':
4f19785b 7123 if (shift == 1)
cea2e8a9 7124 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
7125 /* FALL THROUGH */
7126
7127 case '0': case '1':
02aa26ce 7128 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 7129 goto digit;
02aa26ce
NT
7130
7131 /* hex digits */
378cc40b
LW
7132 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
7133 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 7134 /* make sure they said 0x */
378cc40b
LW
7135 if (shift != 4)
7136 goto out;
55497cff 7137 b = (*s++ & 7) + 9;
02aa26ce
NT
7138
7139 /* Prepare to put the digit we have onto the end
7140 of the number so far. We check for overflows.
7141 */
7142
55497cff 7143 digit:
9e24b6e2
JH
7144 if (!overflowed) {
7145 x = u << shift; /* make room for the digit */
7146
7147 if ((x >> shift) != u
7148 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
7149 overflowed = TRUE;
7150 n = (NV) u;
767a6a26
PM
7151 if (ckWARN_d(WARN_OVERFLOW))
7152 Perl_warner(aTHX_ WARN_OVERFLOW,
9e24b6e2
JH
7153 "Integer overflow in %s number",
7154 base);
7155 } else
7156 u = x | b; /* add the digit to the end */
7157 }
7158 if (overflowed) {
7159 n *= nvshift[shift];
7160 /* If an NV has not enough bits in its
7161 * mantissa to represent an UV this summing of
7162 * small low-order numbers is a waste of time
7163 * (because the NV cannot preserve the
7164 * low-order bits anyway): we could just
7165 * remember when did we overflow and in the
7166 * end just multiply n by the right
7167 * amount. */
7168 n += (NV) b;
55497cff 7169 }
378cc40b
LW
7170 break;
7171 }
7172 }
02aa26ce
NT
7173
7174 /* if we get here, we had success: make a scalar value from
7175 the number.
7176 */
378cc40b 7177 out:
928753ea
JH
7178
7179 /* final misplaced underbar check */
7180 if (s[-1] == '_') {
7181 if (ckWARN(WARN_SYNTAX))
7182 Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number");
7183 }
7184
79072805 7185 sv = NEWSV(92,0);
9e24b6e2 7186 if (overflowed) {
767a6a26
PM
7187 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
7188 Perl_warner(aTHX_ WARN_PORTABLE,
9e24b6e2
JH
7189 "%s number > %s non-portable",
7190 Base, max);
7191 sv_setnv(sv, n);
7192 }
7193 else {
15041a67 7194#if UVSIZE > 4
767a6a26
PM
7195 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
7196 Perl_warner(aTHX_ WARN_PORTABLE,
9e24b6e2
JH
7197 "%s number > %s non-portable",
7198 Base, max);
2cc4c2dc 7199#endif
9e24b6e2
JH
7200 sv_setuv(sv, u);
7201 }
2cc4c2dc 7202 if (PL_hints & HINT_NEW_BINARY)
b3ac6de7 7203 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
378cc40b
LW
7204 }
7205 break;
02aa26ce
NT
7206
7207 /*
7208 handle decimal numbers.
7209 we're also sent here when we read a 0 as the first digit
7210 */
378cc40b
LW
7211 case '1': case '2': case '3': case '4': case '5':
7212 case '6': case '7': case '8': case '9': case '.':
7213 decimal:
3280af22
NIS
7214 d = PL_tokenbuf;
7215 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 7216 floatit = FALSE;
02aa26ce
NT
7217
7218 /* read next group of digits and _ and copy into d */
de3bb511 7219 while (isDIGIT(*s) || *s == '_') {
4e553d73 7220 /* skip underscores, checking for misplaced ones
02aa26ce
NT
7221 if -w is on
7222 */
93a17b20 7223 if (*s == '_') {
928753ea
JH
7224 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
7225 Perl_warner(aTHX_ WARN_SYNTAX,
7226 "Misplaced _ in number");
7227 lastub = s++;
93a17b20 7228 }
fc36a67e 7229 else {
02aa26ce 7230 /* check for end of fixed-length buffer */
fc36a67e 7231 if (d >= e)
cea2e8a9 7232 Perl_croak(aTHX_ number_too_long);
02aa26ce 7233 /* if we're ok, copy the character */
378cc40b 7234 *d++ = *s++;
fc36a67e 7235 }
378cc40b 7236 }
02aa26ce
NT
7237
7238 /* final misplaced underbar check */
928753ea 7239 if (lastub && s == lastub + 1) {
d008e5eb 7240 if (ckWARN(WARN_SYNTAX))
cea2e8a9 7241 Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number");
d008e5eb 7242 }
02aa26ce
NT
7243
7244 /* read a decimal portion if there is one. avoid
7245 3..5 being interpreted as the number 3. followed
7246 by .5
7247 */
2f3197b3 7248 if (*s == '.' && s[1] != '.') {
79072805 7249 floatit = TRUE;
378cc40b 7250 *d++ = *s++;
02aa26ce 7251
928753ea
JH
7252 if (*s == '_') {
7253 if (ckWARN(WARN_SYNTAX))
7254 Perl_warner(aTHX_ WARN_SYNTAX,
7255 "Misplaced _ in number");
7256 lastub = s;
7257 }
7258
7259 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 7260 */
fc36a67e 7261 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 7262 /* fixed length buffer check */
fc36a67e 7263 if (d >= e)
cea2e8a9 7264 Perl_croak(aTHX_ number_too_long);
928753ea
JH
7265 if (*s == '_') {
7266 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
7267 Perl_warner(aTHX_ WARN_SYNTAX,
7268 "Misplaced _ in number");
7269 lastub = s;
7270 }
7271 else
fc36a67e 7272 *d++ = *s;
378cc40b 7273 }
928753ea
JH
7274 /* fractional part ending in underbar? */
7275 if (s[-1] == '_') {
7276 if (ckWARN(WARN_SYNTAX))
7277 Perl_warner(aTHX_ WARN_SYNTAX,
7278 "Misplaced _ in number");
7279 }
dd629d5b
GS
7280 if (*s == '.' && isDIGIT(s[1])) {
7281 /* oops, it's really a v-string, but without the "v" */
f4758303 7282 s = start;
dd629d5b
GS
7283 goto vstring;
7284 }
378cc40b 7285 }
02aa26ce
NT
7286
7287 /* read exponent part, if present */
7fd134d9 7288 if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) {
79072805
LW
7289 floatit = TRUE;
7290 s++;
02aa26ce
NT
7291
7292 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 7293 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 7294
7fd134d9
JH
7295 /* stray preinitial _ */
7296 if (*s == '_') {
7297 if (ckWARN(WARN_SYNTAX))
7298 Perl_warner(aTHX_ WARN_SYNTAX,
7299 "Misplaced _ in number");
7300 lastub = s++;
7301 }
7302
02aa26ce 7303 /* allow positive or negative exponent */
378cc40b
LW
7304 if (*s == '+' || *s == '-')
7305 *d++ = *s++;
02aa26ce 7306
7fd134d9
JH
7307 /* stray initial _ */
7308 if (*s == '_') {
7309 if (ckWARN(WARN_SYNTAX))
7310 Perl_warner(aTHX_ WARN_SYNTAX,
7311 "Misplaced _ in number");
7312 lastub = s++;
7313 }
7314
7fd134d9
JH
7315 /* read digits of exponent */
7316 while (isDIGIT(*s) || *s == '_') {
7317 if (isDIGIT(*s)) {
7318 if (d >= e)
7319 Perl_croak(aTHX_ number_too_long);
b3b48e3e 7320 *d++ = *s++;
7fd134d9
JH
7321 }
7322 else {
7323 if (ckWARN(WARN_SYNTAX) &&
7324 ((lastub && s == lastub + 1) ||
b3b48e3e 7325 (!isDIGIT(s[1]) && s[1] != '_')))
7fd134d9
JH
7326 Perl_warner(aTHX_ WARN_SYNTAX,
7327 "Misplaced _ in number");
b3b48e3e 7328 lastub = s++;
7fd134d9 7329 }
7fd134d9 7330 }
378cc40b 7331 }
02aa26ce 7332
02aa26ce
NT
7333
7334 /* make an sv from the string */
79072805 7335 sv = NEWSV(92,0);
097ee67d 7336
0b7fceb9 7337 /*
58bb9ec3
NC
7338 We try to do an integer conversion first if no characters
7339 indicating "float" have been found.
0b7fceb9
MU
7340 */
7341
7342 if (!floatit) {
58bb9ec3
NC
7343 UV uv;
7344 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
7345
7346 if (flags == IS_NUMBER_IN_UV) {
7347 if (uv <= IV_MAX)
86554af2 7348 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
58bb9ec3 7349 else
c239479b 7350 sv_setuv(sv, uv);
58bb9ec3
NC
7351 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
7352 if (uv <= (UV) IV_MIN)
7353 sv_setiv(sv, -(IV)uv);
7354 else
7355 floatit = TRUE;
7356 } else
7357 floatit = TRUE;
7358 }
0b7fceb9 7359 if (floatit) {
58bb9ec3
NC
7360 /* terminate the string */
7361 *d = '\0';
86554af2
JH
7362 nv = Atof(PL_tokenbuf);
7363 sv_setnv(sv, nv);
7364 }
86554af2 7365
b8403495
JH
7366 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
7367 (PL_hints & HINT_NEW_INTEGER) )
4e553d73 7368 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
b8403495
JH
7369 (floatit ? "float" : "integer"),
7370 sv, Nullsv, NULL);
378cc40b 7371 break;
0b7fceb9 7372
e312add1 7373 /* if it starts with a v, it could be a v-string */
a7cb1f99 7374 case 'v':
dd629d5b 7375vstring:
f4758303
JP
7376 sv = NEWSV(92,5); /* preallocate storage space */
7377 s = new_vstring(s,sv);
a7cb1f99 7378 break;
79072805 7379 }
a687059c 7380
02aa26ce
NT
7381 /* make the op for the constant and return */
7382
a86a20aa 7383 if (sv)
b73d6f50 7384 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 7385 else
b73d6f50 7386 lvalp->opval = Nullop;
a687059c 7387
378cc40b
LW
7388 return s;
7389}
7390
76e3520e 7391STATIC char *
cea2e8a9 7392S_scan_formline(pTHX_ register char *s)
378cc40b 7393{
79072805 7394 register char *eol;
378cc40b 7395 register char *t;
79cb57f6 7396 SV *stuff = newSVpvn("",0);
79072805 7397 bool needargs = FALSE;
378cc40b 7398
79072805 7399 while (!needargs) {
c2e66d9e 7400 if (*s == '.' || *s == /*{*/'}') {
79072805 7401 /*SUPPRESS 530*/
51882d45 7402#ifdef PERL_STRICT_CR
bf4acbe4 7403 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
51882d45 7404#else
bf4acbe4 7405 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 7406#endif
6a65c6a0 7407 if (*t == '\n' || t == PL_bufend)
79072805
LW
7408 break;
7409 }
3280af22 7410 if (PL_in_eval && !PL_rsfp) {
93a17b20 7411 eol = strchr(s,'\n');
0f85fab0 7412 if (!eol++)
3280af22 7413 eol = PL_bufend;
0f85fab0
LW
7414 }
7415 else
3280af22 7416 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 7417 if (*s != '#') {
a0d0e21e
LW
7418 for (t = s; t < eol; t++) {
7419 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
7420 needargs = FALSE;
7421 goto enough; /* ~~ must be first line in formline */
378cc40b 7422 }
a0d0e21e
LW
7423 if (*t == '@' || *t == '^')
7424 needargs = TRUE;
378cc40b 7425 }
7121b347
MG
7426 if (eol > s) {
7427 sv_catpvn(stuff, s, eol-s);
2dc4c65b 7428#ifndef PERL_STRICT_CR
7121b347
MG
7429 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
7430 char *end = SvPVX(stuff) + SvCUR(stuff);
7431 end[-2] = '\n';
7432 end[-1] = '\0';
7433 SvCUR(stuff)--;
7434 }
2dc4c65b 7435#endif
7121b347
MG
7436 }
7437 else
7438 break;
79072805
LW
7439 }
7440 s = eol;
3280af22
NIS
7441 if (PL_rsfp) {
7442 s = filter_gets(PL_linestr, PL_rsfp, 0);
7443 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
7444 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 7445 PL_last_lop = PL_last_uni = Nullch;
79072805 7446 if (!s) {
3280af22 7447 s = PL_bufptr;
79072805 7448 yyerror("Format not terminated");
378cc40b
LW
7449 break;
7450 }
378cc40b 7451 }
463ee0b2 7452 incline(s);
79072805 7453 }
a0d0e21e
LW
7454 enough:
7455 if (SvCUR(stuff)) {
3280af22 7456 PL_expect = XTERM;
79072805 7457 if (needargs) {
3280af22
NIS
7458 PL_lex_state = LEX_NORMAL;
7459 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
7460 force_next(',');
7461 }
a0d0e21e 7462 else
3280af22
NIS
7463 PL_lex_state = LEX_FORMLINE;
7464 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 7465 force_next(THING);
3280af22 7466 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
79072805 7467 force_next(LSTOP);
378cc40b 7468 }
79072805 7469 else {
8990e307 7470 SvREFCNT_dec(stuff);
3280af22
NIS
7471 PL_lex_formbrack = 0;
7472 PL_bufptr = s;
79072805
LW
7473 }
7474 return s;
378cc40b 7475}
a687059c 7476
76e3520e 7477STATIC void
cea2e8a9 7478S_set_csh(pTHX)
a687059c 7479{
ae986130 7480#ifdef CSH
3280af22
NIS
7481 if (!PL_cshlen)
7482 PL_cshlen = strlen(PL_cshname);
ae986130 7483#endif
a687059c 7484}
463ee0b2 7485
ba6d6ac9 7486I32
864dbfa3 7487Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 7488{
3280af22
NIS
7489 I32 oldsavestack_ix = PL_savestack_ix;
7490 CV* outsidecv = PL_compcv;
748a9306 7491 AV* comppadlist;
8990e307 7492
3280af22
NIS
7493 if (PL_compcv) {
7494 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 7495 }
7766f137 7496 SAVEI32(PL_subline);
3280af22
NIS
7497 save_item(PL_subname);
7498 SAVEI32(PL_padix);
354992b1 7499 SAVECOMPPAD();
3280af22
NIS
7500 SAVESPTR(PL_comppad_name);
7501 SAVESPTR(PL_compcv);
7502 SAVEI32(PL_comppad_name_fill);
7503 SAVEI32(PL_min_intro_pending);
7504 SAVEI32(PL_max_intro_pending);
7505 SAVEI32(PL_pad_reset_pending);
7506
7507 PL_compcv = (CV*)NEWSV(1104,0);
7508 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
7509 CvFLAGS(PL_compcv) |= flags;
7510
7511 PL_comppad = newAV();
7512 av_push(PL_comppad, Nullsv);
7513 PL_curpad = AvARRAY(PL_comppad);
7514 PL_comppad_name = newAV();
7515 PL_comppad_name_fill = 0;
7516 PL_min_intro_pending = 0;
7517 PL_padix = 0;
57843af0 7518 PL_subline = CopLINE(PL_curcop);
4d1ff10f 7519#ifdef USE_5005THREADS
79cb57f6 7520 av_store(PL_comppad_name, 0, newSVpvn("@_", 2));
533c011a
NIS
7521 PL_curpad[0] = (SV*)newAV();
7522 SvPADMY_on(PL_curpad[0]); /* XXX Needed? */
4d1ff10f 7523#endif /* USE_5005THREADS */
748a9306
LW
7524
7525 comppadlist = newAV();
7526 AvREAL_off(comppadlist);
3280af22
NIS
7527 av_store(comppadlist, 0, (SV*)PL_comppad_name);
7528 av_store(comppadlist, 1, (SV*)PL_comppad);
748a9306 7529
3280af22
NIS
7530 CvPADLIST(PL_compcv) = comppadlist;
7531 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
4d1ff10f 7532#ifdef USE_5005THREADS
533c011a
NIS
7533 CvOWNER(PL_compcv) = 0;
7534 New(666, CvMUTEXP(PL_compcv), 1, perl_mutex);
7535 MUTEX_INIT(CvMUTEXP(PL_compcv));
4d1ff10f 7536#endif /* USE_5005THREADS */
748a9306 7537
8990e307
LW
7538 return oldsavestack_ix;
7539}
7540
084592ab
CN
7541#ifdef __SC__
7542#pragma segment Perl_yylex
7543#endif
8990e307 7544int
864dbfa3 7545Perl_yywarn(pTHX_ char *s)
8990e307 7546{
faef0170 7547 PL_in_eval |= EVAL_WARNONLY;
748a9306 7548 yyerror(s);
faef0170 7549 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 7550 return 0;
8990e307
LW
7551}
7552
7553int
864dbfa3 7554Perl_yyerror(pTHX_ char *s)
463ee0b2 7555{
68dc0745 7556 char *where = NULL;
7557 char *context = NULL;
7558 int contlen = -1;
46fc3d4c 7559 SV *msg;
463ee0b2 7560
3280af22 7561 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 7562 where = "at EOF";
3280af22
NIS
7563 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 &&
7564 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) {
7565 while (isSPACE(*PL_oldoldbufptr))
7566 PL_oldoldbufptr++;
7567 context = PL_oldoldbufptr;
7568 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 7569 }
3280af22
NIS
7570 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 &&
7571 PL_oldbufptr != PL_bufptr) {
7572 while (isSPACE(*PL_oldbufptr))
7573 PL_oldbufptr++;
7574 context = PL_oldbufptr;
7575 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
7576 }
7577 else if (yychar > 255)
68dc0745 7578 where = "next token ???";
cdfb297e
GS
7579#ifdef USE_PURE_BISON
7580/* GNU Bison sets the value -2 */
7581 else if (yychar == -2) {
7582#else
463ee0b2 7583 else if ((yychar & 127) == 127) {
cdfb297e 7584#endif
3280af22
NIS
7585 if (PL_lex_state == LEX_NORMAL ||
7586 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 7587 where = "at end of line";
3280af22 7588 else if (PL_lex_inpat)
68dc0745 7589 where = "within pattern";
463ee0b2 7590 else
68dc0745 7591 where = "within string";
463ee0b2 7592 }
46fc3d4c 7593 else {
79cb57f6 7594 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
46fc3d4c 7595 if (yychar < 32)
cea2e8a9 7596 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
46fc3d4c 7597 else if (isPRINT_LC(yychar))
cea2e8a9 7598 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
463ee0b2 7599 else
cea2e8a9 7600 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
46fc3d4c 7601 where = SvPVX(where_sv);
463ee0b2 7602 }
46fc3d4c 7603 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 7604 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 7605 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 7606 if (context)
cea2e8a9 7607 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 7608 else
cea2e8a9 7609 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 7610 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 7611 Perl_sv_catpvf(aTHX_ msg,
57def98f 7612 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 7613 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 7614 PL_multi_end = 0;
a0d0e21e 7615 }
faef0170 7616 if (PL_in_eval & EVAL_WARNONLY)
894356b3 7617 Perl_warn(aTHX_ "%"SVf, msg);
463ee0b2 7618 else
5a844595 7619 qerror(msg);
c7d6bfb2
GS
7620 if (PL_error_count >= 10) {
7621 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 7622 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
248c2a4d 7623 ERRSV, OutCopFILE(PL_curcop));
c7d6bfb2
GS
7624 else
7625 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 7626 OutCopFILE(PL_curcop));
c7d6bfb2 7627 }
3280af22
NIS
7628 PL_in_my = 0;
7629 PL_in_my_stash = Nullhv;
463ee0b2
LW
7630 return 0;
7631}
084592ab
CN
7632#ifdef __SC__
7633#pragma segment Main
7634#endif
4e35701f 7635
b250498f 7636STATIC char*
3ae08724 7637S_swallow_bom(pTHX_ U8 *s)
01ec43d0 7638{
b250498f
GS
7639 STRLEN slen;
7640 slen = SvCUR(PL_linestr);
7641 switch (*s) {
4e553d73
NIS
7642 case 0xFF:
7643 if (s[1] == 0xFE) {
01ec43d0 7644 /* UTF-16 little-endian */
3ae08724 7645 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
01ec43d0
GS
7646 Perl_croak(aTHX_ "Unsupported script encoding");
7647#ifndef PERL_NO_UTF16_FILTER
dea0fc0b 7648 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
3ae08724 7649 s += 2;
dea0fc0b
JH
7650 if (PL_bufend > (char*)s) {
7651 U8 *news;
7652 I32 newlen;
7653
7654 filter_add(utf16rev_textfilter, NULL);
7655 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
f72f5f89
JH
7656 PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
7657 PL_bufend - (char*)s - 1,
dea0fc0b
JH
7658 &newlen);
7659 Copy(news, s, newlen, U8);
7660 SvCUR_set(PL_linestr, newlen);
7661 PL_bufend = SvPVX(PL_linestr) + newlen;
7662 news[newlen++] = '\0';
7663 Safefree(news);
7664 }
b250498f 7665#else
01ec43d0 7666 Perl_croak(aTHX_ "Unsupported script encoding");
b250498f 7667#endif
01ec43d0
GS
7668 }
7669 break;
78ae23f5 7670 case 0xFE:
3ae08724 7671 if (s[1] == 0xFF) { /* UTF-16 big-endian */
01ec43d0 7672#ifndef PERL_NO_UTF16_FILTER
dea0fc0b
JH
7673 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
7674 s += 2;
7675 if (PL_bufend > (char *)s) {
7676 U8 *news;
7677 I32 newlen;
7678
7679 filter_add(utf16_textfilter, NULL);
7680 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
7681 PL_bufend = (char*)utf16_to_utf8(s, news,
7682 PL_bufend - (char*)s,
7683 &newlen);
7684 Copy(news, s, newlen, U8);
7685 SvCUR_set(PL_linestr, newlen);
7686 PL_bufend = SvPVX(PL_linestr) + newlen;
7687 news[newlen++] = '\0';
7688 Safefree(news);
7689 }
b250498f 7690#else
01ec43d0 7691 Perl_croak(aTHX_ "Unsupported script encoding");
b250498f 7692#endif
01ec43d0
GS
7693 }
7694 break;
3ae08724
GS
7695 case 0xEF:
7696 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
dea0fc0b 7697 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
01ec43d0
GS
7698 s += 3; /* UTF-8 */
7699 }
7700 break;
7701 case 0:
7702 if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */
3ae08724 7703 s[2] == 0xFE && s[3] == 0xFF)
01ec43d0
GS
7704 {
7705 Perl_croak(aTHX_ "Unsupported script encoding");
7706 }
7707 }
b8f84bb2 7708 return (char*)s;
b250498f 7709}
4755096e 7710
4755096e
GS
7711/*
7712 * restore_rsfp
7713 * Restore a source filter.
7714 */
7715
7716static void
acfe0abc 7717restore_rsfp(pTHX_ void *f)
4755096e
GS
7718{
7719 PerlIO *fp = (PerlIO*)f;
7720
7721 if (PL_rsfp == PerlIO_stdin())
7722 PerlIO_clearerr(PL_rsfp);
7723 else if (PL_rsfp && (PL_rsfp != fp))
7724 PerlIO_close(PL_rsfp);
7725 PL_rsfp = fp;
7726}
6e3aabd6
GS
7727
7728#ifndef PERL_NO_UTF16_FILTER
7729static I32
acfe0abc 7730utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6
GS
7731{
7732 I32 count = FILTER_READ(idx+1, sv, maxlen);
7733 if (count) {
7734 U8* tmps;
7735 U8* tend;
dea0fc0b 7736 I32 newlen;
6e3aabd6 7737 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
f72f5f89
JH
7738 if (!*SvPV_nolen(sv))
7739 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7740 return count;
4e553d73 7741
dea0fc0b 7742 tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
6e3aabd6
GS
7743 sv_usepvn(sv, (char*)tmps, tend - tmps);
7744 }
7745 return count;
7746}
7747
7748static I32
acfe0abc 7749utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6
GS
7750{
7751 I32 count = FILTER_READ(idx+1, sv, maxlen);
7752 if (count) {
7753 U8* tmps;
7754 U8* tend;
dea0fc0b 7755 I32 newlen;
f72f5f89
JH
7756 if (!*SvPV_nolen(sv))
7757 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7758 return count;
7759
6e3aabd6 7760 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
dea0fc0b 7761 tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
6e3aabd6
GS
7762 sv_usepvn(sv, (char*)tmps, tend - tmps);
7763 }
7764 return count;
7765}
7766#endif
9f4817db 7767