This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[ID 20010525.001] Pod typo nits fixed
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
bc89e66f 3 * Copyright (c) 1991-2001, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
2b9d42f0
NIS
39#ifdef EBCDIC
40/* For now 'use utf8' does not affect tokenizer on EBCDIC */
41#define UTF (PL_linestr && DO_UTF8(PL_linestr))
42#else
43#define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
44#endif
a0ed51b3 45
4e553d73 46/* In variables name $^X, these are the legal values for X.
2b92dfce
GS
47 * 1999-02-27 mjd-perl-patch@plover.com */
48#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
49
bf4acbe4
GS
50/* On MacOS, respect nonbreaking spaces */
51#ifdef MACOS_TRADITIONAL
52#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
53#else
54#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
55#endif
56
ffb4593c
NT
57/* LEX_* are values for PL_lex_state, the state of the lexer.
58 * They are arranged oddly so that the guard on the switch statement
79072805
LW
59 * can get by with a single comparison (if the compiler is smart enough).
60 */
61
fb73857a 62/* #define LEX_NOTPARSING 11 is done in perl.h. */
63
55497cff 64#define LEX_NORMAL 10
65#define LEX_INTERPNORMAL 9
66#define LEX_INTERPCASEMOD 8
67#define LEX_INTERPPUSH 7
68#define LEX_INTERPSTART 6
69#define LEX_INTERPEND 5
70#define LEX_INTERPENDMAYBE 4
71#define LEX_INTERPCONCAT 3
72#define LEX_INTERPCONST 2
73#define LEX_FORMLINE 1
74#define LEX_KNOWNEXT 0
79072805 75
79072805
LW
76#ifdef ff_next
77#undef ff_next
d48672a2
LW
78#endif
79
a1a0e61e 80#ifdef USE_PURE_BISON
dba4d153
JH
81# ifndef YYMAXLEVEL
82# define YYMAXLEVEL 100
83# endif
20141f0e
IRC
84YYSTYPE* yylval_pointer[YYMAXLEVEL];
85int* yychar_pointer[YYMAXLEVEL];
6f202aea 86int yyactlevel = -1;
22c35a8c
GS
87# undef yylval
88# undef yychar
20141f0e
IRC
89# define yylval (*yylval_pointer[yyactlevel])
90# define yychar (*yychar_pointer[yyactlevel])
91# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 92# undef yylex
dba4d153 93# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
94#endif
95
79072805 96#include "keywords.h"
fe14fcc3 97
ffb4593c
NT
98/* CLINE is a macro that ensures PL_copline has a sane value */
99
ae986130
LW
100#ifdef CLINE
101#undef CLINE
102#endif
57843af0 103#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 104
ffb4593c
NT
105/*
106 * Convenience functions to return different tokens and prime the
9cbb5ea2 107 * lexer for the next token. They all take an argument.
ffb4593c
NT
108 *
109 * TOKEN : generic token (used for '(', DOLSHARP, etc)
110 * OPERATOR : generic operator
111 * AOPERATOR : assignment operator
112 * PREBLOCK : beginning the block after an if, while, foreach, ...
113 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
114 * PREREF : *EXPR where EXPR is not a simple identifier
115 * TERM : expression term
116 * LOOPX : loop exiting command (goto, last, dump, etc)
117 * FTST : file test operator
118 * FUN0 : zero-argument function
2d2e263d 119 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
120 * BOop : bitwise or or xor
121 * BAop : bitwise and
122 * SHop : shift operator
123 * PWop : power operator
9cbb5ea2 124 * PMop : pattern-matching operator
ffb4593c
NT
125 * Aop : addition-level operator
126 * Mop : multiplication-level operator
127 * Eop : equality-testing operator
e5edeb50 128 * Rop : relational operator <= != gt
ffb4593c
NT
129 *
130 * Also see LOP and lop() below.
131 */
132
075953c3
JH
133/* Note that REPORT() and REPORT2() will be expressions that supply
134 * their own trailing comma, not suitable for statements as such. */
998054bd 135#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
136# define REPORT(x,retval) tokereport(x,s,(int)retval),
137# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 138#else
075953c3
JH
139# define REPORT(x,retval)
140# define REPORT2(x,retval)
998054bd
SC
141#endif
142
075953c3
JH
143#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
144#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
145#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
146#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
147#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
148#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
149#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
150#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
151#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
152#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
153#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
154#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
155#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
156#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
157#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
158#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
159#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
160#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
161#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
162#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 163
a687059c
LW
164/* This bit of chicanery makes a unary function followed by
165 * a parenthesis into a function with one argument, highest precedence.
166 */
2f3197b3 167#define UNI(f) return(yylval.ival = f, \
075953c3 168 REPORT("uni",f) \
3280af22
NIS
169 PL_expect = XTERM, \
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
172 PL_last_lop_op = f, \
a687059c
LW
173 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
174
79072805 175#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 176 REPORT("uni",f) \
3280af22
NIS
177 PL_bufptr = s, \
178 PL_last_uni = PL_oldbufptr, \
79072805
LW
179 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
180
9f68db38 181/* grandfather return to old style */
3280af22 182#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 183
2d00ba3b 184STATIC void
61b2116b 185S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 186{
998054bd
SC
187 SV *report;
188 DEBUG_T({
189 report = newSVpv(thing, 0);
29b291f7
RB
190 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
191 (IV)rv);
998054bd
SC
192
193 if (s - PL_bufptr > 0)
194 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
195 else {
196 if (PL_oldbufptr && *PL_oldbufptr)
197 sv_catpv(report, PL_tokenbuf);
198 }
199 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
200 })
201}
202
ffb4593c
NT
203/*
204 * S_ao
205 *
206 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
207 * into an OP_ANDASSIGN or OP_ORASSIGN
208 */
209
76e3520e 210STATIC int
cea2e8a9 211S_ao(pTHX_ int toketype)
a0d0e21e 212{
3280af22
NIS
213 if (*PL_bufptr == '=') {
214 PL_bufptr++;
a0d0e21e
LW
215 if (toketype == ANDAND)
216 yylval.ival = OP_ANDASSIGN;
217 else if (toketype == OROR)
218 yylval.ival = OP_ORASSIGN;
219 toketype = ASSIGNOP;
220 }
221 return toketype;
222}
223
ffb4593c
NT
224/*
225 * S_no_op
226 * When Perl expects an operator and finds something else, no_op
227 * prints the warning. It always prints "<something> found where
228 * operator expected. It prints "Missing semicolon on previous line?"
229 * if the surprise occurs at the start of the line. "do you need to
230 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
231 * where the compiler doesn't know if foo is a method call or a function.
232 * It prints "Missing operator before end of line" if there's nothing
233 * after the missing operator, or "... before <...>" if there is something
234 * after the missing operator.
235 */
236
76e3520e 237STATIC void
cea2e8a9 238S_no_op(pTHX_ char *what, char *s)
463ee0b2 239{
3280af22
NIS
240 char *oldbp = PL_bufptr;
241 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 242
1189a94a
GS
243 if (!s)
244 s = oldbp;
07c798fb 245 else
1189a94a 246 PL_bufptr = s;
cea2e8a9 247 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 248 if (is_first)
cea2e8a9 249 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 250 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 251 char *t;
7e2040f0 252 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 253 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 254 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 255 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 256 }
07c798fb
HS
257 else {
258 assert(s >= oldbp);
cea2e8a9 259 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 260 }
3280af22 261 PL_bufptr = oldbp;
8990e307
LW
262}
263
ffb4593c
NT
264/*
265 * S_missingterm
266 * Complain about missing quote/regexp/heredoc terminator.
267 * If it's called with (char *)NULL then it cauterizes the line buffer.
268 * If we're in a delimited string and the delimiter is a control
269 * character, it's reformatted into a two-char sequence like ^C.
270 * This is fatal.
271 */
272
76e3520e 273STATIC void
cea2e8a9 274S_missingterm(pTHX_ char *s)
8990e307
LW
275{
276 char tmpbuf[3];
277 char q;
278 if (s) {
279 char *nl = strrchr(s,'\n');
d2719217 280 if (nl)
8990e307
LW
281 *nl = '\0';
282 }
9d116dd7
JH
283 else if (
284#ifdef EBCDIC
285 iscntrl(PL_multi_close)
286#else
287 PL_multi_close < 32 || PL_multi_close == 127
288#endif
289 ) {
8990e307 290 *tmpbuf = '^';
3280af22 291 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
292 s = "\\n";
293 tmpbuf[2] = '\0';
294 s = tmpbuf;
295 }
296 else {
3280af22 297 *tmpbuf = PL_multi_close;
8990e307
LW
298 tmpbuf[1] = '\0';
299 s = tmpbuf;
300 }
301 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 302 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 303}
79072805 304
ffb4593c
NT
305/*
306 * Perl_deprecate
ffb4593c
NT
307 */
308
79072805 309void
864dbfa3 310Perl_deprecate(pTHX_ char *s)
a0d0e21e 311{
599cee73 312 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 313 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
314}
315
ffb4593c
NT
316/*
317 * depcom
9cbb5ea2 318 * Deprecate a comma-less variable list.
ffb4593c
NT
319 */
320
76e3520e 321STATIC void
cea2e8a9 322S_depcom(pTHX)
a0d0e21e
LW
323{
324 deprecate("comma-less variable list");
325}
326
ffb4593c 327/*
9cbb5ea2
GS
328 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
329 * utf16-to-utf8-reversed.
ffb4593c
NT
330 */
331
c39cd008
GS
332#ifdef PERL_CR_FILTER
333static void
334strip_return(SV *sv)
335{
336 register char *s = SvPVX(sv);
337 register char *e = s + SvCUR(sv);
338 /* outer loop optimized to do nothing if there are no CR-LFs */
339 while (s < e) {
340 if (*s++ == '\r' && *s == '\n') {
341 /* hit a CR-LF, need to copy the rest */
342 register char *d = s - 1;
343 *d++ = *s++;
344 while (s < e) {
345 if (*s == '\r' && s[1] == '\n')
346 s++;
347 *d++ = *s++;
348 }
349 SvCUR(sv) -= s - d;
350 return;
351 }
352 }
353}
a868473f 354
76e3520e 355STATIC I32
c39cd008 356S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 357{
c39cd008
GS
358 I32 count = FILTER_READ(idx+1, sv, maxlen);
359 if (count > 0 && !maxlen)
360 strip_return(sv);
361 return count;
a868473f
NIS
362}
363#endif
364
ffb4593c
NT
365/*
366 * Perl_lex_start
9cbb5ea2
GS
367 * Initialize variables. Uses the Perl save_stack to save its state (for
368 * recursive calls to the parser).
ffb4593c
NT
369 */
370
a0d0e21e 371void
864dbfa3 372Perl_lex_start(pTHX_ SV *line)
79072805 373{
8990e307
LW
374 char *s;
375 STRLEN len;
376
3280af22
NIS
377 SAVEI32(PL_lex_dojoin);
378 SAVEI32(PL_lex_brackets);
3280af22
NIS
379 SAVEI32(PL_lex_casemods);
380 SAVEI32(PL_lex_starts);
381 SAVEI32(PL_lex_state);
7766f137 382 SAVEVPTR(PL_lex_inpat);
3280af22 383 SAVEI32(PL_lex_inwhat);
18b09519
GS
384 if (PL_lex_state == LEX_KNOWNEXT) {
385 I32 toke = PL_nexttoke;
386 while (--toke >= 0) {
387 SAVEI32(PL_nexttype[toke]);
388 SAVEVPTR(PL_nextval[toke]);
389 }
390 SAVEI32(PL_nexttoke);
18b09519 391 }
57843af0 392 SAVECOPLINE(PL_curcop);
3280af22
NIS
393 SAVEPPTR(PL_bufptr);
394 SAVEPPTR(PL_bufend);
395 SAVEPPTR(PL_oldbufptr);
396 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
397 SAVEPPTR(PL_last_lop);
398 SAVEPPTR(PL_last_uni);
3280af22
NIS
399 SAVEPPTR(PL_linestart);
400 SAVESPTR(PL_linestr);
401 SAVEPPTR(PL_lex_brackstack);
402 SAVEPPTR(PL_lex_casestack);
c76ac1ee 403 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
404 SAVESPTR(PL_lex_stuff);
405 SAVEI32(PL_lex_defer);
09bef843 406 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 407 SAVESPTR(PL_lex_repl);
bebdddfc
GS
408 SAVEINT(PL_expect);
409 SAVEINT(PL_lex_expect);
3280af22
NIS
410
411 PL_lex_state = LEX_NORMAL;
412 PL_lex_defer = 0;
413 PL_expect = XSTATE;
414 PL_lex_brackets = 0;
3280af22
NIS
415 New(899, PL_lex_brackstack, 120, char);
416 New(899, PL_lex_casestack, 12, char);
417 SAVEFREEPV(PL_lex_brackstack);
418 SAVEFREEPV(PL_lex_casestack);
419 PL_lex_casemods = 0;
420 *PL_lex_casestack = '\0';
421 PL_lex_dojoin = 0;
422 PL_lex_starts = 0;
423 PL_lex_stuff = Nullsv;
424 PL_lex_repl = Nullsv;
425 PL_lex_inpat = 0;
76be56bc 426 PL_nexttoke = 0;
3280af22 427 PL_lex_inwhat = 0;
09bef843 428 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
429 PL_linestr = line;
430 if (SvREADONLY(PL_linestr))
431 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
432 s = SvPV(PL_linestr, len);
8990e307 433 if (len && s[len-1] != ';') {
3280af22
NIS
434 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
435 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
436 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 437 }
3280af22
NIS
438 SvTEMP_off(PL_linestr);
439 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
440 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 441 PL_last_lop = PL_last_uni = Nullch;
3280af22 442 SvREFCNT_dec(PL_rs);
79cb57f6 443 PL_rs = newSVpvn("\n", 1);
3280af22 444 PL_rsfp = 0;
79072805 445}
a687059c 446
ffb4593c
NT
447/*
448 * Perl_lex_end
9cbb5ea2
GS
449 * Finalizer for lexing operations. Must be called when the parser is
450 * done with the lexer.
ffb4593c
NT
451 */
452
463ee0b2 453void
864dbfa3 454Perl_lex_end(pTHX)
463ee0b2 455{
3280af22 456 PL_doextract = FALSE;
463ee0b2
LW
457}
458
ffb4593c
NT
459/*
460 * S_incline
461 * This subroutine has nothing to do with tilting, whether at windmills
462 * or pinball tables. Its name is short for "increment line". It
57843af0 463 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 464 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
465 * # line 500 "foo.pm"
466 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
467 */
468
76e3520e 469STATIC void
cea2e8a9 470S_incline(pTHX_ char *s)
463ee0b2
LW
471{
472 char *t;
473 char *n;
73659bf1 474 char *e;
463ee0b2 475 char ch;
463ee0b2 476
57843af0 477 CopLINE_inc(PL_curcop);
463ee0b2
LW
478 if (*s++ != '#')
479 return;
bf4acbe4 480 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
481 if (strnEQ(s, "line", 4))
482 s += 4;
483 else
484 return;
084592ab 485 if (SPACE_OR_TAB(*s))
73659bf1 486 s++;
4e553d73 487 else
73659bf1 488 return;
bf4acbe4 489 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
490 if (!isDIGIT(*s))
491 return;
492 n = s;
493 while (isDIGIT(*s))
494 s++;
bf4acbe4 495 while (SPACE_OR_TAB(*s))
463ee0b2 496 s++;
73659bf1 497 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 498 s++;
73659bf1
GS
499 e = t + 1;
500 }
463ee0b2 501 else {
463ee0b2 502 for (t = s; !isSPACE(*t); t++) ;
73659bf1 503 e = t;
463ee0b2 504 }
bf4acbe4 505 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
506 e++;
507 if (*e != '\n' && *e != '\0')
508 return; /* false alarm */
509
463ee0b2
LW
510 ch = *t;
511 *t = '\0';
f4dd75d9
GS
512 if (t - s > 0) {
513#ifdef USE_ITHREADS
514 Safefree(CopFILE(PL_curcop));
515#else
516 SvREFCNT_dec(CopFILEGV(PL_curcop));
517#endif
57843af0 518 CopFILE_set(PL_curcop, s);
f4dd75d9 519 }
463ee0b2 520 *t = ch;
57843af0 521 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
522}
523
ffb4593c
NT
524/*
525 * S_skipspace
526 * Called to gobble the appropriate amount and type of whitespace.
527 * Skips comments as well.
528 */
529
76e3520e 530STATIC char *
cea2e8a9 531S_skipspace(pTHX_ register char *s)
a687059c 532{
3280af22 533 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 534 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
535 s++;
536 return s;
537 }
538 for (;;) {
fd049845 539 STRLEN prevlen;
09bef843
SB
540 SSize_t oldprevlen, oldoldprevlen;
541 SSize_t oldloplen, oldunilen;
60e6418e
GS
542 while (s < PL_bufend && isSPACE(*s)) {
543 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
544 incline(s);
545 }
ffb4593c
NT
546
547 /* comment */
3280af22
NIS
548 if (s < PL_bufend && *s == '#') {
549 while (s < PL_bufend && *s != '\n')
463ee0b2 550 s++;
60e6418e 551 if (s < PL_bufend) {
463ee0b2 552 s++;
60e6418e
GS
553 if (PL_in_eval && !PL_rsfp) {
554 incline(s);
555 continue;
556 }
557 }
463ee0b2 558 }
ffb4593c
NT
559
560 /* only continue to recharge the buffer if we're at the end
561 * of the buffer, we're not reading from a source filter, and
562 * we're in normal lexing mode
563 */
09bef843
SB
564 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
565 PL_lex_state == LEX_FORMLINE)
463ee0b2 566 return s;
ffb4593c
NT
567
568 /* try to recharge the buffer */
9cbb5ea2
GS
569 if ((s = filter_gets(PL_linestr, PL_rsfp,
570 (prevlen = SvCUR(PL_linestr)))) == Nullch)
571 {
572 /* end of file. Add on the -p or -n magic */
3280af22
NIS
573 if (PL_minus_n || PL_minus_p) {
574 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
575 ";}continue{print or die qq(-p destination: $!\\n)" :
576 "");
3280af22
NIS
577 sv_catpv(PL_linestr,";}");
578 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
579 }
580 else
3280af22 581 sv_setpv(PL_linestr,";");
ffb4593c
NT
582
583 /* reset variables for next time we lex */
9cbb5ea2
GS
584 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
585 = SvPVX(PL_linestr);
3280af22 586 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 587 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
588
589 /* Close the filehandle. Could be from -P preprocessor,
590 * STDIN, or a regular file. If we were reading code from
591 * STDIN (because the commandline held no -e or filename)
592 * then we don't close it, we reset it so the code can
593 * read from STDIN too.
594 */
595
3280af22
NIS
596 if (PL_preprocess && !PL_in_eval)
597 (void)PerlProc_pclose(PL_rsfp);
598 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
599 PerlIO_clearerr(PL_rsfp);
8990e307 600 else
3280af22
NIS
601 (void)PerlIO_close(PL_rsfp);
602 PL_rsfp = Nullfp;
463ee0b2
LW
603 return s;
604 }
ffb4593c
NT
605
606 /* not at end of file, so we only read another line */
09bef843
SB
607 /* make corresponding updates to old pointers, for yyerror() */
608 oldprevlen = PL_oldbufptr - PL_bufend;
609 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
610 if (PL_last_uni)
611 oldunilen = PL_last_uni - PL_bufend;
612 if (PL_last_lop)
613 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
614 PL_linestart = PL_bufptr = s + prevlen;
615 PL_bufend = s + SvCUR(PL_linestr);
616 s = PL_bufptr;
09bef843
SB
617 PL_oldbufptr = s + oldprevlen;
618 PL_oldoldbufptr = s + oldoldprevlen;
619 if (PL_last_uni)
620 PL_last_uni = s + oldunilen;
621 if (PL_last_lop)
622 PL_last_lop = s + oldloplen;
a0d0e21e 623 incline(s);
ffb4593c
NT
624
625 /* debugger active and we're not compiling the debugger code,
626 * so store the line into the debugger's array of lines
627 */
3280af22 628 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
629 SV *sv = NEWSV(85,0);
630
631 sv_upgrade(sv, SVt_PVMG);
3280af22 632 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 633 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 634 }
463ee0b2 635 }
a687059c 636}
378cc40b 637
ffb4593c
NT
638/*
639 * S_check_uni
640 * Check the unary operators to ensure there's no ambiguity in how they're
641 * used. An ambiguous piece of code would be:
642 * rand + 5
643 * This doesn't mean rand() + 5. Because rand() is a unary operator,
644 * the +5 is its argument.
645 */
646
76e3520e 647STATIC void
cea2e8a9 648S_check_uni(pTHX)
ba106d47 649{
2f3197b3 650 char *s;
a0d0e21e 651 char *t;
2f3197b3 652
3280af22 653 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 654 return;
3280af22
NIS
655 while (isSPACE(*PL_last_uni))
656 PL_last_uni++;
7e2040f0 657 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 658 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 659 return;
0453d815 660 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 661 char ch = *s;
0453d815 662 *s = '\0';
4e553d73
NIS
663 Perl_warner(aTHX_ WARN_AMBIGUOUS,
664 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
665 PL_last_uni);
666 *s = ch;
667 }
2f3197b3
LW
668}
669
ffb4593c
NT
670/* workaround to replace the UNI() macro with a function. Only the
671 * hints/uts.sh file mentions this. Other comments elsewhere in the
672 * source indicate Microport Unix might need it too.
673 */
674
ffed7fef
LW
675#ifdef CRIPPLED_CC
676
677#undef UNI
ffed7fef 678#define UNI(f) return uni(f,s)
ffed7fef 679
76e3520e 680STATIC int
cea2e8a9 681S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
682{
683 yylval.ival = f;
3280af22
NIS
684 PL_expect = XTERM;
685 PL_bufptr = s;
8f872242
NIS
686 PL_last_uni = PL_oldbufptr;
687 PL_last_lop_op = f;
ffed7fef
LW
688 if (*s == '(')
689 return FUNC1;
690 s = skipspace(s);
691 if (*s == '(')
692 return FUNC1;
693 else
694 return UNIOP;
695}
696
a0d0e21e
LW
697#endif /* CRIPPLED_CC */
698
ffb4593c
NT
699/*
700 * LOP : macro to build a list operator. Its behaviour has been replaced
701 * with a subroutine, S_lop() for which LOP is just another name.
702 */
703
a0d0e21e
LW
704#define LOP(f,x) return lop(f,x,s)
705
ffb4593c
NT
706/*
707 * S_lop
708 * Build a list operator (or something that might be one). The rules:
709 * - if we have a next token, then it's a list operator [why?]
710 * - if the next thing is an opening paren, then it's a function
711 * - else it's a list operator
712 */
713
76e3520e 714STATIC I32
a0be28da 715S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 716{
79072805 717 yylval.ival = f;
35c8bce7 718 CLINE;
075953c3 719 REPORT("lop", f)
3280af22
NIS
720 PL_expect = x;
721 PL_bufptr = s;
722 PL_last_lop = PL_oldbufptr;
723 PL_last_lop_op = f;
724 if (PL_nexttoke)
a0d0e21e 725 return LSTOP;
79072805
LW
726 if (*s == '(')
727 return FUNC;
728 s = skipspace(s);
729 if (*s == '(')
730 return FUNC;
731 else
732 return LSTOP;
733}
734
ffb4593c
NT
735/*
736 * S_force_next
9cbb5ea2 737 * When the lexer realizes it knows the next token (for instance,
ffb4593c 738 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
739 * to know what token to return the next time the lexer is called. Caller
740 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
741 * handles the token correctly.
ffb4593c
NT
742 */
743
4e553d73 744STATIC void
cea2e8a9 745S_force_next(pTHX_ I32 type)
79072805 746{
3280af22
NIS
747 PL_nexttype[PL_nexttoke] = type;
748 PL_nexttoke++;
749 if (PL_lex_state != LEX_KNOWNEXT) {
750 PL_lex_defer = PL_lex_state;
751 PL_lex_expect = PL_expect;
752 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
753 }
754}
755
ffb4593c
NT
756/*
757 * S_force_word
758 * When the lexer knows the next thing is a word (for instance, it has
759 * just seen -> and it knows that the next char is a word char, then
760 * it calls S_force_word to stick the next word into the PL_next lookahead.
761 *
762 * Arguments:
b1b65b59 763 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
764 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
765 * int check_keyword : if true, Perl checks to make sure the word isn't
766 * a keyword (do this if the word is a label, e.g. goto FOO)
767 * int allow_pack : if true, : characters will also be allowed (require,
768 * use, etc. do this)
9cbb5ea2 769 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
770 */
771
76e3520e 772STATIC char *
cea2e8a9 773S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 774{
463ee0b2
LW
775 register char *s;
776 STRLEN len;
4e553d73 777
463ee0b2
LW
778 start = skipspace(start);
779 s = start;
7e2040f0 780 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 781 (allow_pack && *s == ':') ||
15f0808c 782 (allow_initial_tick && *s == '\'') )
a0d0e21e 783 {
3280af22
NIS
784 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
785 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
786 return start;
787 if (token == METHOD) {
788 s = skipspace(s);
789 if (*s == '(')
3280af22 790 PL_expect = XTERM;
463ee0b2 791 else {
3280af22 792 PL_expect = XOPERATOR;
463ee0b2 793 }
79072805 794 }
3280af22
NIS
795 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
796 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
797 force_next(token);
798 }
799 return s;
800}
801
ffb4593c
NT
802/*
803 * S_force_ident
9cbb5ea2 804 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
805 * text only contains the "foo" portion. The first argument is a pointer
806 * to the "foo", and the second argument is the type symbol to prefix.
807 * Forces the next token to be a "WORD".
9cbb5ea2 808 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
809 */
810
76e3520e 811STATIC void
cea2e8a9 812S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
813{
814 if (s && *s) {
11343788 815 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 816 PL_nextval[PL_nexttoke].opval = o;
79072805 817 force_next(WORD);
748a9306 818 if (kind) {
11343788 819 o->op_private = OPpCONST_ENTERED;
55497cff 820 /* XXX see note in pp_entereval() for why we forgo typo
821 warnings if the symbol must be introduced in an eval.
822 GSAR 96-10-12 */
3280af22 823 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
824 kind == '$' ? SVt_PV :
825 kind == '@' ? SVt_PVAV :
826 kind == '%' ? SVt_PVHV :
827 SVt_PVGV
828 );
748a9306 829 }
79072805
LW
830 }
831}
832
1571675a
GS
833NV
834Perl_str_to_version(pTHX_ SV *sv)
835{
836 NV retval = 0.0;
837 NV nshift = 1.0;
838 STRLEN len;
839 char *start = SvPVx(sv,len);
3aa33fe5 840 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
841 char *end = start + len;
842 while (start < end) {
ba210ebe 843 STRLEN skip;
1571675a
GS
844 UV n;
845 if (utf)
9041c2e3 846 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
847 else {
848 n = *(U8*)start;
849 skip = 1;
850 }
851 retval += ((NV)n)/nshift;
852 start += skip;
853 nshift *= 1000;
854 }
855 return retval;
856}
857
4e553d73 858/*
ffb4593c
NT
859 * S_force_version
860 * Forces the next token to be a version number.
861 */
862
76e3520e 863STATIC char *
cea2e8a9 864S_force_version(pTHX_ char *s)
89bfa8cd 865{
866 OP *version = Nullop;
44dcb63b 867 char *d;
89bfa8cd 868
869 s = skipspace(s);
870
44dcb63b 871 d = s;
dd629d5b 872 if (*d == 'v')
44dcb63b 873 d++;
44dcb63b 874 if (isDIGIT(*d)) {
a7cb1f99 875 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 876 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 877 SV *ver;
b73d6f50 878 s = scan_num(s, &yylval);
89bfa8cd 879 version = yylval.opval;
dd629d5b
GS
880 ver = cSVOPx(version)->op_sv;
881 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 882 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
883 SvNVX(ver) = str_to_version(ver);
884 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 885 }
89bfa8cd 886 }
887 }
888
889 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 890 PL_nextval[PL_nexttoke].opval = version;
4e553d73 891 force_next(WORD);
89bfa8cd 892
893 return (s);
894}
895
ffb4593c
NT
896/*
897 * S_tokeq
898 * Tokenize a quoted string passed in as an SV. It finds the next
899 * chunk, up to end of string or a backslash. It may make a new
900 * SV containing that chunk (if HINT_NEW_STRING is on). It also
901 * turns \\ into \.
902 */
903
76e3520e 904STATIC SV *
cea2e8a9 905S_tokeq(pTHX_ SV *sv)
79072805
LW
906{
907 register char *s;
908 register char *send;
909 register char *d;
b3ac6de7
IZ
910 STRLEN len = 0;
911 SV *pv = sv;
79072805
LW
912
913 if (!SvLEN(sv))
b3ac6de7 914 goto finish;
79072805 915
a0d0e21e 916 s = SvPV_force(sv, len);
21a311ee 917 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 918 goto finish;
463ee0b2 919 send = s + len;
79072805
LW
920 while (s < send && *s != '\\')
921 s++;
922 if (s == send)
b3ac6de7 923 goto finish;
79072805 924 d = s;
be4731d2 925 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 926 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
927 if (SvUTF8(sv))
928 SvUTF8_on(pv);
929 }
79072805
LW
930 while (s < send) {
931 if (*s == '\\') {
a0d0e21e 932 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
933 s++; /* all that, just for this */
934 }
935 *d++ = *s++;
936 }
937 *d = '\0';
463ee0b2 938 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 939 finish:
3280af22 940 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 941 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
942 return sv;
943}
944
ffb4593c
NT
945/*
946 * Now come three functions related to double-quote context,
947 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
948 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
949 * interact with PL_lex_state, and create fake ( ... ) argument lists
950 * to handle functions and concatenation.
951 * They assume that whoever calls them will be setting up a fake
952 * join call, because each subthing puts a ',' after it. This lets
953 * "lower \luPpEr"
954 * become
955 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
956 *
957 * (I'm not sure whether the spurious commas at the end of lcfirst's
958 * arguments and join's arguments are created or not).
959 */
960
961/*
962 * S_sublex_start
963 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
964 *
965 * Pattern matching will set PL_lex_op to the pattern-matching op to
966 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
967 *
968 * OP_CONST and OP_READLINE are easy--just make the new op and return.
969 *
970 * Everything else becomes a FUNC.
971 *
972 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
973 * had an OP_CONST or OP_READLINE). This just sets us up for a
974 * call to S_sublex_push().
975 */
976
76e3520e 977STATIC I32
cea2e8a9 978S_sublex_start(pTHX)
79072805
LW
979{
980 register I32 op_type = yylval.ival;
79072805
LW
981
982 if (op_type == OP_NULL) {
3280af22
NIS
983 yylval.opval = PL_lex_op;
984 PL_lex_op = Nullop;
79072805
LW
985 return THING;
986 }
987 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 988 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
989
990 if (SvTYPE(sv) == SVt_PVIV) {
991 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
992 STRLEN len;
993 char *p;
994 SV *nsv;
995
996 p = SvPV(sv, len);
79cb57f6 997 nsv = newSVpvn(p, len);
01ec43d0
GS
998 if (SvUTF8(sv))
999 SvUTF8_on(nsv);
b3ac6de7
IZ
1000 SvREFCNT_dec(sv);
1001 sv = nsv;
4e553d73 1002 }
b3ac6de7 1003 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1004 PL_lex_stuff = Nullsv;
79072805
LW
1005 return THING;
1006 }
1007
3280af22
NIS
1008 PL_sublex_info.super_state = PL_lex_state;
1009 PL_sublex_info.sub_inwhat = op_type;
1010 PL_sublex_info.sub_op = PL_lex_op;
1011 PL_lex_state = LEX_INTERPPUSH;
55497cff 1012
3280af22
NIS
1013 PL_expect = XTERM;
1014 if (PL_lex_op) {
1015 yylval.opval = PL_lex_op;
1016 PL_lex_op = Nullop;
55497cff 1017 return PMFUNC;
1018 }
1019 else
1020 return FUNC;
1021}
1022
ffb4593c
NT
1023/*
1024 * S_sublex_push
1025 * Create a new scope to save the lexing state. The scope will be
1026 * ended in S_sublex_done. Returns a '(', starting the function arguments
1027 * to the uc, lc, etc. found before.
1028 * Sets PL_lex_state to LEX_INTERPCONCAT.
1029 */
1030
76e3520e 1031STATIC I32
cea2e8a9 1032S_sublex_push(pTHX)
55497cff 1033{
f46d017c 1034 ENTER;
55497cff 1035
3280af22
NIS
1036 PL_lex_state = PL_sublex_info.super_state;
1037 SAVEI32(PL_lex_dojoin);
1038 SAVEI32(PL_lex_brackets);
3280af22
NIS
1039 SAVEI32(PL_lex_casemods);
1040 SAVEI32(PL_lex_starts);
1041 SAVEI32(PL_lex_state);
7766f137 1042 SAVEVPTR(PL_lex_inpat);
3280af22 1043 SAVEI32(PL_lex_inwhat);
57843af0 1044 SAVECOPLINE(PL_curcop);
3280af22
NIS
1045 SAVEPPTR(PL_bufptr);
1046 SAVEPPTR(PL_oldbufptr);
1047 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1048 SAVEPPTR(PL_last_lop);
1049 SAVEPPTR(PL_last_uni);
3280af22
NIS
1050 SAVEPPTR(PL_linestart);
1051 SAVESPTR(PL_linestr);
1052 SAVEPPTR(PL_lex_brackstack);
1053 SAVEPPTR(PL_lex_casestack);
1054
1055 PL_linestr = PL_lex_stuff;
1056 PL_lex_stuff = Nullsv;
1057
9cbb5ea2
GS
1058 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1059 = SvPVX(PL_linestr);
3280af22 1060 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1061 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1062 SAVEFREESV(PL_linestr);
1063
1064 PL_lex_dojoin = FALSE;
1065 PL_lex_brackets = 0;
3280af22
NIS
1066 New(899, PL_lex_brackstack, 120, char);
1067 New(899, PL_lex_casestack, 12, char);
1068 SAVEFREEPV(PL_lex_brackstack);
1069 SAVEFREEPV(PL_lex_casestack);
1070 PL_lex_casemods = 0;
1071 *PL_lex_casestack = '\0';
1072 PL_lex_starts = 0;
1073 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1074 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1075
1076 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1077 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1078 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1079 else
3280af22 1080 PL_lex_inpat = Nullop;
79072805 1081
55497cff 1082 return '(';
79072805
LW
1083}
1084
ffb4593c
NT
1085/*
1086 * S_sublex_done
1087 * Restores lexer state after a S_sublex_push.
1088 */
1089
76e3520e 1090STATIC I32
cea2e8a9 1091S_sublex_done(pTHX)
79072805 1092{
3280af22 1093 if (!PL_lex_starts++) {
9aa983d2
JH
1094 SV *sv = newSVpvn("",0);
1095 if (SvUTF8(PL_linestr))
1096 SvUTF8_on(sv);
3280af22 1097 PL_expect = XOPERATOR;
9aa983d2 1098 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1099 return THING;
1100 }
1101
3280af22
NIS
1102 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1103 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1104 return yylex();
79072805
LW
1105 }
1106
ffb4593c 1107 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1108 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1109 PL_linestr = PL_lex_repl;
1110 PL_lex_inpat = 0;
1111 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1112 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1113 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1114 SAVEFREESV(PL_linestr);
1115 PL_lex_dojoin = FALSE;
1116 PL_lex_brackets = 0;
3280af22
NIS
1117 PL_lex_casemods = 0;
1118 *PL_lex_casestack = '\0';
1119 PL_lex_starts = 0;
25da4f38 1120 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1121 PL_lex_state = LEX_INTERPNORMAL;
1122 PL_lex_starts++;
e9fa98b2
HS
1123 /* we don't clear PL_lex_repl here, so that we can check later
1124 whether this is an evalled subst; that means we rely on the
1125 logic to ensure sublex_done() is called again only via the
1126 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1127 }
e9fa98b2 1128 else {
3280af22 1129 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1130 PL_lex_repl = Nullsv;
1131 }
79072805 1132 return ',';
ffed7fef
LW
1133 }
1134 else {
f46d017c 1135 LEAVE;
3280af22
NIS
1136 PL_bufend = SvPVX(PL_linestr);
1137 PL_bufend += SvCUR(PL_linestr);
1138 PL_expect = XOPERATOR;
09bef843 1139 PL_sublex_info.sub_inwhat = 0;
79072805 1140 return ')';
ffed7fef
LW
1141 }
1142}
1143
02aa26ce
NT
1144/*
1145 scan_const
1146
1147 Extracts a pattern, double-quoted string, or transliteration. This
1148 is terrifying code.
1149
3280af22
NIS
1150 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1151 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1152 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1153
9b599b2a
GS
1154 Returns a pointer to the character scanned up to. Iff this is
1155 advanced from the start pointer supplied (ie if anything was
1156 successfully parsed), will leave an OP for the substring scanned
1157 in yylval. Caller must intuit reason for not parsing further
1158 by looking at the next characters herself.
1159
02aa26ce
NT
1160 In patterns:
1161 backslashes:
1162 double-quoted style: \r and \n
1163 regexp special ones: \D \s
1164 constants: \x3
1165 backrefs: \1 (deprecated in substitution replacements)
1166 case and quoting: \U \Q \E
1167 stops on @ and $, but not for $ as tail anchor
1168
1169 In transliterations:
1170 characters are VERY literal, except for - not at the start or end
1171 of the string, which indicates a range. scan_const expands the
1172 range to the full set of intermediate characters.
1173
1174 In double-quoted strings:
1175 backslashes:
1176 double-quoted style: \r and \n
1177 constants: \x3
1178 backrefs: \1 (deprecated)
1179 case and quoting: \U \Q \E
1180 stops on @ and $
1181
1182 scan_const does *not* construct ops to handle interpolated strings.
1183 It stops processing as soon as it finds an embedded $ or @ variable
1184 and leaves it to the caller to work out what's going on.
1185
1186 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1187
1188 $ in pattern could be $foo or could be tail anchor. Assumption:
1189 it's a tail anchor if $ is the last thing in the string, or if it's
1190 followed by one of ")| \n\t"
1191
1192 \1 (backreferences) are turned into $1
1193
1194 The structure of the code is
1195 while (there's a character to process) {
1196 handle transliteration ranges
1197 skip regexp comments
1198 skip # initiated comments in //x patterns
1199 check for embedded @foo
1200 check for embedded scalars
1201 if (backslash) {
1202 leave intact backslashes from leave (below)
1203 deprecate \1 in strings and sub replacements
1204 handle string-changing backslashes \l \U \Q \E, etc.
1205 switch (what was escaped) {
1206 handle - in a transliteration (becomes a literal -)
1207 handle \132 octal characters
1208 handle 0x15 hex characters
1209 handle \cV (control V)
1210 handle printf backslashes (\f, \r, \n, etc)
1211 } (end switch)
1212 } (end if backslash)
1213 } (end while character to read)
4e553d73 1214
02aa26ce
NT
1215*/
1216
76e3520e 1217STATIC char *
cea2e8a9 1218S_scan_const(pTHX_ char *start)
79072805 1219{
3280af22 1220 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1221 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1222 register char *s = start; /* start of the constant */
1223 register char *d = SvPVX(sv); /* destination for copies */
1224 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1225 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1226 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1227 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1228 UV uv;
1229
dff6d3cd 1230 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1231 PL_lex_inpat
4a2d328f 1232 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1233 : "";
79072805 1234
2b9d42f0
NIS
1235 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1236 /* If we are doing a trans and we know we want UTF8 set expectation */
1237 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1238 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1239 }
1240
1241
79072805 1242 while (s < send || dorange) {
02aa26ce 1243 /* get transliterations out of the way (they're most literal) */
3280af22 1244 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1245 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1246 if (dorange) {
1ba5c669
JH
1247 I32 i; /* current expanded character */
1248 I32 min; /* first character in range */
1249 I32 max; /* last character in range */
02aa26ce 1250
2b9d42f0 1251 if (has_utf8) {
8973db79
JH
1252 char *c = (char*)utf8_hop((U8*)d, -1);
1253 char *e = d++;
1254 while (e-- > c)
1255 *(e + 1) = *e;
2b9d42f0 1256 *c = UTF_TO_NATIVE(0xff);
8973db79
JH
1257 /* mark the range as done, and continue */
1258 dorange = FALSE;
1259 didrange = TRUE;
1260 continue;
1261 }
2b9d42f0 1262
02aa26ce 1263 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1264 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1265 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1266 d -= 2; /* eat the first char and the - */
1267
8ada0baa
JH
1268 min = (U8)*d; /* first char in range */
1269 max = (U8)d[1]; /* last char in range */
1270
c2e66d9e 1271 if (min > max) {
01ec43d0 1272 Perl_croak(aTHX_
1ba5c669
JH
1273 "Invalid [] range \"%c-%c\" in transliteration operator",
1274 (char)min, (char)max);
c2e66d9e
GS
1275 }
1276
c7f1f016 1277#ifdef EBCDIC
8ada0baa
JH
1278 if ((isLOWER(min) && isLOWER(max)) ||
1279 (isUPPER(min) && isUPPER(max))) {
1280 if (isLOWER(min)) {
1281 for (i = min; i <= max; i++)
1282 if (isLOWER(i))
db42d148 1283 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1284 } else {
1285 for (i = min; i <= max; i++)
1286 if (isUPPER(i))
db42d148 1287 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1288 }
1289 }
1290 else
1291#endif
1292 for (i = min; i <= max; i++)
1293 *d++ = i;
02aa26ce
NT
1294
1295 /* mark the range as done, and continue */
79072805 1296 dorange = FALSE;
01ec43d0 1297 didrange = TRUE;
79072805 1298 continue;
4e553d73 1299 }
02aa26ce
NT
1300
1301 /* range begins (ignore - as first or last char) */
79072805 1302 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1303 if (didrange) {
1fafa243 1304 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1305 }
2b9d42f0
NIS
1306 if (has_utf8) {
1307 *d++ = UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1308 s++;
1309 continue;
1310 }
79072805
LW
1311 dorange = TRUE;
1312 s++;
01ec43d0
GS
1313 }
1314 else {
1315 didrange = FALSE;
1316 }
79072805 1317 }
02aa26ce
NT
1318
1319 /* if we get here, we're not doing a transliteration */
1320
0f5d15d6
IZ
1321 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1322 except for the last char, which will be done separately. */
3280af22 1323 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1324 if (s[2] == '#') {
1325 while (s < send && *s != ')')
db42d148 1326 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1327 }
1328 else if (s[2] == '{' /* This should match regcomp.c */
1329 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1330 {
cc6b7395 1331 I32 count = 1;
0f5d15d6 1332 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1333 char c;
1334
d9f97599
GS
1335 while (count && (c = *regparse)) {
1336 if (c == '\\' && regparse[1])
1337 regparse++;
4e553d73 1338 else if (c == '{')
cc6b7395 1339 count++;
4e553d73 1340 else if (c == '}')
cc6b7395 1341 count--;
d9f97599 1342 regparse++;
cc6b7395 1343 }
5bdf89e7
IZ
1344 if (*regparse != ')') {
1345 regparse--; /* Leave one char for continuation. */
cc6b7395 1346 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1347 }
0f5d15d6 1348 while (s < regparse)
db42d148 1349 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1350 }
748a9306 1351 }
02aa26ce
NT
1352
1353 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1354 else if (*s == '#' && PL_lex_inpat &&
1355 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1356 while (s+1 < send && *s != '\n')
db42d148 1357 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1358 }
02aa26ce 1359
5d1d4326
JH
1360 /* check for embedded arrays
1361 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1362 */
7e2040f0 1363 else if (*s == '@' && s[1]
5d1d4326 1364 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1365 break;
02aa26ce
NT
1366
1367 /* check for embedded scalars. only stop if we're sure it's a
1368 variable.
1369 */
79072805 1370 else if (*s == '$') {
3280af22 1371 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1372 break;
c277df42 1373 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1374 break; /* in regexp, $ might be tail anchor */
1375 }
02aa26ce 1376
2b9d42f0
NIS
1377 /* End of else if chain - OP_TRANS rejoin rest */
1378
02aa26ce 1379 /* backslashes */
79072805
LW
1380 if (*s == '\\' && s+1 < send) {
1381 s++;
02aa26ce
NT
1382
1383 /* some backslashes we leave behind */
c9f97d15 1384 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1385 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1386 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1387 continue;
1388 }
02aa26ce
NT
1389
1390 /* deprecate \1 in strings and substitution replacements */
3280af22 1391 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1392 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1393 {
599cee73 1394 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1395 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1396 *--s = '$';
1397 break;
1398 }
02aa26ce
NT
1399
1400 /* string-change backslash escapes */
3280af22 1401 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1402 --s;
1403 break;
1404 }
02aa26ce
NT
1405
1406 /* if we get here, it's either a quoted -, or a digit */
79072805 1407 switch (*s) {
02aa26ce
NT
1408
1409 /* quoted - in transliterations */
79072805 1410 case '-':
3280af22 1411 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1412 *d++ = *s++;
1413 continue;
1414 }
1415 /* FALL THROUGH */
1416 default:
11b8faa4 1417 {
7e84c16c 1418 if (ckWARN(WARN_MISC) && isALNUM(*s))
4e553d73 1419 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1420 "Unrecognized escape \\%c passed through",
1421 *s);
1422 /* default action is to copy the quoted character */
f9a63242 1423 goto default_action;
11b8faa4 1424 }
02aa26ce
NT
1425
1426 /* \132 indicates an octal constant */
79072805
LW
1427 case '0': case '1': case '2': case '3':
1428 case '4': case '5': case '6': case '7':
ba210ebe
JH
1429 {
1430 STRLEN len = 0; /* disallow underscores */
1431 uv = (UV)scan_oct(s, 3, &len);
1432 s += len;
1433 }
012bcf8d 1434 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1435
1436 /* \x24 indicates a hex constant */
79072805 1437 case 'x':
a0ed51b3
LW
1438 ++s;
1439 if (*s == '{') {
1440 char* e = strchr(s, '}');
adaeee49 1441 if (!e) {
a0ed51b3 1442 yyerror("Missing right brace on \\x{}");
adaeee49
GA
1443 e = s;
1444 }
89491803 1445 else {
ba210ebe
JH
1446 STRLEN len = 1; /* allow underscores */
1447 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
1448 }
1449 s = e + 1;
a0ed51b3
LW
1450 }
1451 else {
ba210ebe
JH
1452 {
1453 STRLEN len = 0; /* disallow underscores */
1454 uv = (UV)scan_hex(s, 2, &len);
1455 s += len;
1456 }
012bcf8d
GS
1457 }
1458
1459 NUM_ESCAPE_INSERT:
1460 /* Insert oct or hex escaped character.
301d3d20 1461 * There will always enough room in sv since such
db42d148 1462 * escapes will be longer than any UTF-8 sequence
301d3d20 1463 * they can end up as. */
ba7cea30 1464
c7f1f016
NIS
1465 /* We need to map to chars to ASCII before doing the tests
1466 to cover EBCDIC
1467 */
c4d5f83a 1468 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1469 if (!has_utf8 && uv > 255) {
301d3d20
JH
1470 /* Might need to recode whatever we have
1471 * accumulated so far if it contains any
1472 * hibit chars.
1473 *
1474 * (Can't we keep track of that and avoid
1475 * this rescan? --jhi)
012bcf8d 1476 */
c7f1f016 1477 int hicount = 0;
63cd0674
NIS
1478 U8 *c;
1479 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1480 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1481 hicount++;
db42d148 1482 }
012bcf8d 1483 }
63cd0674 1484 if (hicount) {
db42d148
NIS
1485 STRLEN offset = d - SvPVX(sv);
1486 U8 *src, *dst;
1487 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1488 src = (U8 *)d - 1;
1489 dst = src+hicount;
1490 d += hicount;
1491 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1492 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1493 U8 ch = NATIVE_TO_ASCII(*src);
db42d148
NIS
1494 *dst-- = UTF8_EIGHT_BIT_LO(ch);
1495 *dst-- = UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1496 }
1497 else {
63cd0674 1498 *dst-- = *src;
012bcf8d 1499 }
c7f1f016 1500 src--;
012bcf8d
GS
1501 }
1502 }
1503 }
1504
9aa983d2 1505 if (has_utf8 || uv > 255) {
9041c2e3 1506 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1507 has_utf8 = TRUE;
f9a63242
JH
1508 if (PL_lex_inwhat == OP_TRANS &&
1509 PL_sublex_info.sub_op) {
1510 PL_sublex_info.sub_op->op_private |=
1511 (PL_lex_repl ? OPpTRANS_FROM_UTF
1512 : OPpTRANS_TO_UTF);
f9a63242 1513 }
012bcf8d 1514 }
a0ed51b3 1515 else {
012bcf8d 1516 *d++ = (char)uv;
a0ed51b3 1517 }
012bcf8d
GS
1518 }
1519 else {
c4d5f83a 1520 *d++ = (char) uv;
a0ed51b3 1521 }
79072805 1522 continue;
02aa26ce 1523
4a2d328f
IZ
1524 /* \N{latin small letter a} is a named character */
1525 case 'N':
55eda711 1526 ++s;
423cee85
JH
1527 if (*s == '{') {
1528 char* e = strchr(s, '}');
155aba94 1529 SV *res;
423cee85
JH
1530 STRLEN len;
1531 char *str;
4e553d73 1532
423cee85 1533 if (!e) {
5777a3f7 1534 yyerror("Missing right brace on \\N{}");
423cee85
JH
1535 e = s - 1;
1536 goto cont_scan;
1537 }
55eda711
JH
1538 res = newSVpvn(s + 1, e - s - 1);
1539 res = new_constant( Nullch, 0, "charnames",
1540 res, Nullsv, "\\N{...}" );
f9a63242
JH
1541 if (has_utf8)
1542 sv_utf8_upgrade(res);
423cee85 1543 str = SvPV(res,len);
89491803 1544 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1545 char *ostart = SvPVX(sv);
1546 SvCUR_set(sv, d - ostart);
1547 SvPOK_on(sv);
e4f3eed8 1548 *d = '\0';
f08d6ad9 1549 sv_utf8_upgrade(sv);
d2f449dd
SB
1550 /* this just broke our allocation above... */
1551 SvGROW(sv, send - start);
f08d6ad9 1552 d = SvPVX(sv) + SvCUR(sv);
89491803 1553 has_utf8 = TRUE;
f08d6ad9 1554 }
423cee85
JH
1555 if (len > e - s + 4) {
1556 char *odest = SvPVX(sv);
1557
8973db79 1558 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1559 d = SvPVX(sv) + (d - odest);
1560 }
1561 Copy(str, d, len, char);
1562 d += len;
1563 SvREFCNT_dec(res);
1564 cont_scan:
1565 s = e + 1;
1566 }
1567 else
5777a3f7 1568 yyerror("Missing braces on \\N{}");
423cee85
JH
1569 continue;
1570
02aa26ce 1571 /* \c is a control character */
79072805
LW
1572 case 'c':
1573 s++;
ba210ebe
JH
1574 {
1575 U8 c = *s++;
c7f1f016
NIS
1576#ifdef EBCDIC
1577 if (isLOWER(c))
1578 c = toUPPER(c);
1579#endif
db42d148 1580 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1581 }
79072805 1582 continue;
02aa26ce
NT
1583
1584 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1585 case 'b':
db42d148 1586 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1587 break;
1588 case 'n':
db42d148 1589 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1590 break;
1591 case 'r':
db42d148 1592 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1593 break;
1594 case 'f':
db42d148 1595 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1596 break;
1597 case 't':
db42d148 1598 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1599 break;
34a3fe2a 1600 case 'e':
db42d148 1601 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1602 break;
1603 case 'a':
db42d148 1604 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1605 break;
02aa26ce
NT
1606 } /* end switch */
1607
79072805
LW
1608 s++;
1609 continue;
02aa26ce
NT
1610 } /* end if (backslash) */
1611
f9a63242 1612 default_action:
2b9d42f0
NIS
1613 /* If we started with encoded form, or already know we want it
1614 and then encode the next character */
1615 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1616 STRLEN len = 1;
1617 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1618 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1619 s += len;
1620 if (need > len) {
1621 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1622 STRLEN off = d - SvPVX(sv);
1623 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1624 }
1625 d = (char*)uvchr_to_utf8((U8*)d, uv);
1626 has_utf8 = TRUE;
1627 }
1628 else {
1629 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1630 }
02aa26ce
NT
1631 } /* while loop to process each character */
1632
1633 /* terminate the string and set up the sv */
79072805 1634 *d = '\0';
463ee0b2 1635 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0
NIS
1636 if (SvCUR(sv) >= SvLEN(sv))
1637 Perl_croak(aTHX_ "panic:constant overflowed allocated space");
1638
79072805 1639 SvPOK_on(sv);
2b9d42f0 1640 if (has_utf8) {
7e2040f0 1641 SvUTF8_on(sv);
2b9d42f0
NIS
1642 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1643 PL_sublex_info.sub_op->op_private |=
1644 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1645 }
1646 }
79072805 1647
02aa26ce 1648 /* shrink the sv if we allocated more than we used */
79072805
LW
1649 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1650 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1651 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1652 }
02aa26ce 1653
9b599b2a 1654 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1655 if (s > PL_bufptr) {
1656 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1657 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1658 sv, Nullsv,
4e553d73 1659 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1660 ? "tr"
3280af22 1661 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1662 ? "s"
1663 : "qq")));
79072805 1664 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1665 } else
8990e307 1666 SvREFCNT_dec(sv);
79072805
LW
1667 return s;
1668}
1669
ffb4593c
NT
1670/* S_intuit_more
1671 * Returns TRUE if there's more to the expression (e.g., a subscript),
1672 * FALSE otherwise.
ffb4593c
NT
1673 *
1674 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1675 *
1676 * ->[ and ->{ return TRUE
1677 * { and [ outside a pattern are always subscripts, so return TRUE
1678 * if we're outside a pattern and it's not { or [, then return FALSE
1679 * if we're in a pattern and the first char is a {
1680 * {4,5} (any digits around the comma) returns FALSE
1681 * if we're in a pattern and the first char is a [
1682 * [] returns FALSE
1683 * [SOMETHING] has a funky algorithm to decide whether it's a
1684 * character class or not. It has to deal with things like
1685 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1686 * anything else returns TRUE
1687 */
1688
9cbb5ea2
GS
1689/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1690
76e3520e 1691STATIC int
cea2e8a9 1692S_intuit_more(pTHX_ register char *s)
79072805 1693{
3280af22 1694 if (PL_lex_brackets)
79072805
LW
1695 return TRUE;
1696 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1697 return TRUE;
1698 if (*s != '{' && *s != '[')
1699 return FALSE;
3280af22 1700 if (!PL_lex_inpat)
79072805
LW
1701 return TRUE;
1702
1703 /* In a pattern, so maybe we have {n,m}. */
1704 if (*s == '{') {
1705 s++;
1706 if (!isDIGIT(*s))
1707 return TRUE;
1708 while (isDIGIT(*s))
1709 s++;
1710 if (*s == ',')
1711 s++;
1712 while (isDIGIT(*s))
1713 s++;
1714 if (*s == '}')
1715 return FALSE;
1716 return TRUE;
1717
1718 }
1719
1720 /* On the other hand, maybe we have a character class */
1721
1722 s++;
1723 if (*s == ']' || *s == '^')
1724 return FALSE;
1725 else {
ffb4593c 1726 /* this is terrifying, and it works */
79072805
LW
1727 int weight = 2; /* let's weigh the evidence */
1728 char seen[256];
f27ffc4a 1729 unsigned char un_char = 255, last_un_char;
93a17b20 1730 char *send = strchr(s,']');
3280af22 1731 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1732
1733 if (!send) /* has to be an expression */
1734 return TRUE;
1735
1736 Zero(seen,256,char);
1737 if (*s == '$')
1738 weight -= 3;
1739 else if (isDIGIT(*s)) {
1740 if (s[1] != ']') {
1741 if (isDIGIT(s[1]) && s[2] == ']')
1742 weight -= 10;
1743 }
1744 else
1745 weight -= 100;
1746 }
1747 for (; s < send; s++) {
1748 last_un_char = un_char;
1749 un_char = (unsigned char)*s;
1750 switch (*s) {
1751 case '@':
1752 case '&':
1753 case '$':
1754 weight -= seen[un_char] * 10;
7e2040f0 1755 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1756 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1757 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1758 weight -= 100;
1759 else
1760 weight -= 10;
1761 }
1762 else if (*s == '$' && s[1] &&
93a17b20
LW
1763 strchr("[#!%*<>()-=",s[1])) {
1764 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1765 weight -= 10;
1766 else
1767 weight -= 1;
1768 }
1769 break;
1770 case '\\':
1771 un_char = 254;
1772 if (s[1]) {
93a17b20 1773 if (strchr("wds]",s[1]))
79072805
LW
1774 weight += 100;
1775 else if (seen['\''] || seen['"'])
1776 weight += 1;
93a17b20 1777 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1778 weight += 40;
1779 else if (isDIGIT(s[1])) {
1780 weight += 40;
1781 while (s[1] && isDIGIT(s[1]))
1782 s++;
1783 }
1784 }
1785 else
1786 weight += 100;
1787 break;
1788 case '-':
1789 if (s[1] == '\\')
1790 weight += 50;
93a17b20 1791 if (strchr("aA01! ",last_un_char))
79072805 1792 weight += 30;
93a17b20 1793 if (strchr("zZ79~",s[1]))
79072805 1794 weight += 30;
f27ffc4a
GS
1795 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1796 weight -= 5; /* cope with negative subscript */
79072805
LW
1797 break;
1798 default:
93a17b20 1799 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1800 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1801 char *d = tmpbuf;
1802 while (isALPHA(*s))
1803 *d++ = *s++;
1804 *d = '\0';
1805 if (keyword(tmpbuf, d - tmpbuf))
1806 weight -= 150;
1807 }
1808 if (un_char == last_un_char + 1)
1809 weight += 5;
1810 weight -= seen[un_char];
1811 break;
1812 }
1813 seen[un_char]++;
1814 }
1815 if (weight >= 0) /* probably a character class */
1816 return FALSE;
1817 }
1818
1819 return TRUE;
1820}
ffed7fef 1821
ffb4593c
NT
1822/*
1823 * S_intuit_method
1824 *
1825 * Does all the checking to disambiguate
1826 * foo bar
1827 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1828 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1829 *
1830 * First argument is the stuff after the first token, e.g. "bar".
1831 *
1832 * Not a method if bar is a filehandle.
1833 * Not a method if foo is a subroutine prototyped to take a filehandle.
1834 * Not a method if it's really "Foo $bar"
1835 * Method if it's "foo $bar"
1836 * Not a method if it's really "print foo $bar"
1837 * Method if it's really "foo package::" (interpreted as package->foo)
1838 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1839 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1840 * =>
1841 */
1842
76e3520e 1843STATIC int
cea2e8a9 1844S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1845{
1846 char *s = start + (*start == '$');
3280af22 1847 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1848 STRLEN len;
1849 GV* indirgv;
1850
1851 if (gv) {
b6c543e3 1852 CV *cv;
a0d0e21e
LW
1853 if (GvIO(gv))
1854 return 0;
b6c543e3
IZ
1855 if ((cv = GvCVu(gv))) {
1856 char *proto = SvPVX(cv);
1857 if (proto) {
1858 if (*proto == ';')
1859 proto++;
1860 if (*proto == '*')
1861 return 0;
1862 }
1863 } else
a0d0e21e
LW
1864 gv = 0;
1865 }
8903cb82 1866 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1867 /* start is the beginning of the possible filehandle/object,
1868 * and s is the end of it
1869 * tmpbuf is a copy of it
1870 */
1871
a0d0e21e 1872 if (*start == '$') {
3280af22 1873 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1874 return 0;
1875 s = skipspace(s);
3280af22
NIS
1876 PL_bufptr = start;
1877 PL_expect = XREF;
a0d0e21e
LW
1878 return *s == '(' ? FUNCMETH : METHOD;
1879 }
1880 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1881 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1882 len -= 2;
1883 tmpbuf[len] = '\0';
1884 goto bare_package;
1885 }
1886 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1887 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1888 return 0;
1889 /* filehandle or package name makes it a method */
89bfa8cd 1890 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1891 s = skipspace(s);
3280af22 1892 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1893 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1894 bare_package:
3280af22 1895 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1896 newSVpvn(tmpbuf,len));
3280af22
NIS
1897 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1898 PL_expect = XTERM;
a0d0e21e 1899 force_next(WORD);
3280af22 1900 PL_bufptr = s;
a0d0e21e
LW
1901 return *s == '(' ? FUNCMETH : METHOD;
1902 }
1903 }
1904 return 0;
1905}
1906
ffb4593c
NT
1907/*
1908 * S_incl_perldb
1909 * Return a string of Perl code to load the debugger. If PERL5DB
1910 * is set, it will return the contents of that, otherwise a
1911 * compile-time require of perl5db.pl.
1912 */
1913
76e3520e 1914STATIC char*
cea2e8a9 1915S_incl_perldb(pTHX)
a0d0e21e 1916{
3280af22 1917 if (PL_perldb) {
76e3520e 1918 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1919
1920 if (pdb)
1921 return pdb;
61bb5906 1922 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1923 return "BEGIN { require 'perl5db.pl' }";
1924 }
1925 return "";
1926}
1927
1928
16d20bd9 1929/* Encoded script support. filter_add() effectively inserts a
4e553d73 1930 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1931 * Note that the filter function only applies to the current source file
1932 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1933 *
1934 * The datasv parameter (which may be NULL) can be used to pass
1935 * private data to this instance of the filter. The filter function
1936 * can recover the SV using the FILTER_DATA macro and use it to
1937 * store private buffers and state information.
1938 *
1939 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1940 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1941 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1942 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1943 * private use must be set using malloc'd pointers.
1944 */
16d20bd9
AD
1945
1946SV *
864dbfa3 1947Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1948{
f4c556ac
GS
1949 if (!funcp)
1950 return Nullsv;
1951
3280af22
NIS
1952 if (!PL_rsfp_filters)
1953 PL_rsfp_filters = newAV();
16d20bd9 1954 if (!datasv)
8c52afec 1955 datasv = NEWSV(255,0);
16d20bd9 1956 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1957 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1958 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1959 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1960 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1961 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1962 av_unshift(PL_rsfp_filters, 1);
1963 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1964 return(datasv);
1965}
4e553d73 1966
16d20bd9
AD
1967
1968/* Delete most recently added instance of this filter function. */
a0d0e21e 1969void
864dbfa3 1970Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1971{
e0c19803 1972 SV *datasv;
f4c556ac 1973 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1974 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1975 return;
1976 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1977 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1978 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1979 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1980 IoANY(datasv) = (void *)NULL;
3280af22 1981 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1982
16d20bd9
AD
1983 return;
1984 }
1985 /* we need to search for the correct entry and clear it */
cea2e8a9 1986 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1987}
1988
1989
1990/* Invoke the n'th filter function for the current rsfp. */
1991I32
864dbfa3 1992Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
1993
1994
8ac85365 1995 /* 0 = read one text line */
a0d0e21e 1996{
16d20bd9
AD
1997 filter_t funcp;
1998 SV *datasv = NULL;
e50aee73 1999
3280af22 2000 if (!PL_rsfp_filters)
16d20bd9 2001 return -1;
3280af22 2002 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2003 /* Provide a default input filter to make life easy. */
2004 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2005 DEBUG_P(PerlIO_printf(Perl_debug_log,
2006 "filter_read %d: from rsfp\n", idx));
4e553d73 2007 if (maxlen) {
16d20bd9
AD
2008 /* Want a block */
2009 int len ;
2010 int old_len = SvCUR(buf_sv) ;
2011
2012 /* ensure buf_sv is large enough */
2013 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
2014 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2015 if (PerlIO_error(PL_rsfp))
37120919
AD
2016 return -1; /* error */
2017 else
2018 return 0 ; /* end of file */
2019 }
16d20bd9
AD
2020 SvCUR_set(buf_sv, old_len + len) ;
2021 } else {
2022 /* Want a line */
3280af22
NIS
2023 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2024 if (PerlIO_error(PL_rsfp))
37120919
AD
2025 return -1; /* error */
2026 else
2027 return 0 ; /* end of file */
2028 }
16d20bd9
AD
2029 }
2030 return SvCUR(buf_sv);
2031 }
2032 /* Skip this filter slot if filter has been deleted */
3280af22 2033 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2034 DEBUG_P(PerlIO_printf(Perl_debug_log,
2035 "filter_read %d: skipped (filter deleted)\n",
2036 idx));
16d20bd9
AD
2037 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2038 }
2039 /* Get function pointer hidden within datasv */
4755096e 2040 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2041 DEBUG_P(PerlIO_printf(Perl_debug_log,
2042 "filter_read %d: via function %p (%s)\n",
2043 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2044 /* Call function. The function is expected to */
2045 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2046 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 2047 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
2048}
2049
76e3520e 2050STATIC char *
cea2e8a9 2051S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2052{
c39cd008 2053#ifdef PERL_CR_FILTER
3280af22 2054 if (!PL_rsfp_filters) {
c39cd008 2055 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2056 }
2057#endif
3280af22 2058 if (PL_rsfp_filters) {
16d20bd9 2059
55497cff 2060 if (!append)
2061 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2062 if (FILTER_READ(0, sv, 0) > 0)
2063 return ( SvPVX(sv) ) ;
2064 else
2065 return Nullch ;
2066 }
9d116dd7 2067 else
fd049845 2068 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2069}
2070
01ec43d0
GS
2071STATIC HV *
2072S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2073{
2074 GV *gv;
2075
01ec43d0 2076 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2077 return PL_curstash;
2078
2079 if (len > 2 &&
2080 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2081 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2082 {
2083 return GvHV(gv); /* Foo:: */
def3634b
GS
2084 }
2085
2086 /* use constant CLASS => 'MyClass' */
2087 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2088 SV *sv;
2089 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2090 pkgname = SvPV_nolen(sv);
2091 }
2092 }
2093
2094 return gv_stashpv(pkgname, FALSE);
2095}
a0d0e21e 2096
748a9306
LW
2097#ifdef DEBUGGING
2098 static char* exp_name[] =
09bef843
SB
2099 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2100 "ATTRTERM", "TERMBLOCK"
2101 };
748a9306 2102#endif
463ee0b2 2103
02aa26ce
NT
2104/*
2105 yylex
2106
2107 Works out what to call the token just pulled out of the input
2108 stream. The yacc parser takes care of taking the ops we return and
2109 stitching them into a tree.
2110
2111 Returns:
2112 PRIVATEREF
2113
2114 Structure:
2115 if read an identifier
2116 if we're in a my declaration
2117 croak if they tried to say my($foo::bar)
2118 build the ops for a my() declaration
2119 if it's an access to a my() variable
2120 are we in a sort block?
2121 croak if my($a); $a <=> $b
2122 build ops for access to a my() variable
2123 if in a dq string, and they've said @foo and we can't find @foo
2124 croak
2125 build ops for a bareword
2126 if we already built the token before, use it.
2127*/
2128
dba4d153 2129#ifdef USE_PURE_BISON
864dbfa3 2130int
dba4d153 2131Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2132{
20141f0e
IRC
2133 int r;
2134
6f202aea 2135 yyactlevel++;
20141f0e
IRC
2136 yylval_pointer[yyactlevel] = lvalp;
2137 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
IRC
2138 if (yyactlevel >= YYMAXLEVEL)
2139 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2140
dba4d153 2141 r = Perl_yylex(aTHX);
20141f0e 2142
d8ae6756
IRC
2143 if (yyactlevel > 0)
2144 yyactlevel--;
20141f0e
IRC
2145
2146 return r;
2147}
dba4d153 2148#endif
20141f0e 2149
dba4d153
JH
2150#ifdef __SC__
2151#pragma segment Perl_yylex
2152#endif
dba4d153 2153int
dba4d153 2154Perl_yylex(pTHX)
20141f0e 2155{
79072805 2156 register char *s;
378cc40b 2157 register char *d;
79072805 2158 register I32 tmp;
463ee0b2 2159 STRLEN len;
161b471a
NIS
2160 GV *gv = Nullgv;
2161 GV **gvp = 0;
aa7440fb 2162 bool bof = FALSE;
a687059c 2163
02aa26ce 2164 /* check if there's an identifier for us to look at */
3280af22 2165 if (PL_pending_ident) {
02aa26ce 2166 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2167 char pit = PL_pending_ident;
2168 PL_pending_ident = 0;
bbce6d69 2169
607df283
SC
2170 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2171 "### Tokener saw identifier '%s'\n", PL_tokenbuf); })
2172
02aa26ce
NT
2173 /* if we're in a my(), we can't allow dynamics here.
2174 $foo'bar has already been turned into $foo::bar, so
2175 just check for colons.
2176
2177 if it's a legal name, the OP is a PADANY.
2178 */
3280af22 2179 if (PL_in_my) {
77ca0c92 2180 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2181 if (strchr(PL_tokenbuf,':'))
2182 yyerror(Perl_form(aTHX_ "No package name allowed for "
2183 "variable %s in \"our\"",
2184 PL_tokenbuf));
77ca0c92
LW
2185 tmp = pad_allocmy(PL_tokenbuf);
2186 }
2187 else {
2188 if (strchr(PL_tokenbuf,':'))
2189 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2190
77ca0c92
LW
2191 yylval.opval = newOP(OP_PADANY, 0);
2192 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2193 return PRIVATEREF;
2194 }
bbce6d69 2195 }
2196
4e553d73 2197 /*
02aa26ce
NT
2198 build the ops for accesses to a my() variable.
2199
2200 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2201 then used in a comparison. This catches most, but not
2202 all cases. For instance, it catches
2203 sort { my($a); $a <=> $b }
2204 but not
2205 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2206 (although why you'd do that is anyone's guess).
2207 */
2208
3280af22 2209 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2210#ifdef USE_THREADS
54b9620d 2211 /* Check for single character per-thread SVs */
3280af22
NIS
2212 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2213 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2214 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2215 {
2faa37cc 2216 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2217 yylval.opval->op_targ = tmp;
2218 return PRIVATEREF;
2219 }
2220#endif /* USE_THREADS */
3280af22 2221 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2222 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2223 /* might be an "our" variable" */
f472eb5c 2224 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2225 /* build ops for a bareword */
f472eb5c
GS
2226 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2227 sv_catpvn(sym, "::", 2);
2228 sv_catpv(sym, PL_tokenbuf+1);
2229 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2230 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2231 gv_fetchpv(SvPVX(sym),
77ca0c92 2232 (PL_in_eval
f472eb5c
GS
2233 ? (GV_ADDMULTI | GV_ADDINEVAL)
2234 : TRUE
77ca0c92
LW
2235 ),
2236 ((PL_tokenbuf[0] == '$') ? SVt_PV
2237 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2238 : SVt_PVHV));
2239 return WORD;
2240 }
2241
02aa26ce 2242 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2243 if (PL_last_lop_op == OP_SORT &&
2244 PL_tokenbuf[0] == '$' &&
2245 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2246 && !PL_tokenbuf[2])
bbce6d69 2247 {
3280af22
NIS
2248 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2249 d < PL_bufend && *d != '\n';
a863c7d1
MB
2250 d++)
2251 {
2252 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2253 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2254 PL_tokenbuf);
a863c7d1 2255 }
bbce6d69 2256 }
2257 }
bbce6d69 2258
a863c7d1
MB
2259 yylval.opval = newOP(OP_PADANY, 0);
2260 yylval.opval->op_targ = tmp;
2261 return PRIVATEREF;
2262 }
bbce6d69 2263 }
2264
02aa26ce
NT
2265 /*
2266 Whine if they've said @foo in a doublequoted string,
2267 and @foo isn't a variable we can find in the symbol
2268 table.
2269 */
3280af22
NIS
2270 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2271 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2272 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2273 && ckWARN(WARN_AMBIGUOUS))
2274 {
2275 /* Downgraded from fatal to warning 20000522 mjd */
2276 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2277 "Possible unintended interpolation of %s in string",
2278 PL_tokenbuf);
2279 }
bbce6d69 2280 }
2281
02aa26ce 2282 /* build ops for a bareword */
3280af22 2283 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2284 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2285 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2286 ((PL_tokenbuf[0] == '$') ? SVt_PV
2287 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 2288 : SVt_PVHV));
2289 return WORD;
2290 }
2291
02aa26ce
NT
2292 /* no identifier pending identification */
2293
3280af22 2294 switch (PL_lex_state) {
79072805
LW
2295#ifdef COMMENTARY
2296 case LEX_NORMAL: /* Some compilers will produce faster */
2297 case LEX_INTERPNORMAL: /* code if we comment these out. */
2298 break;
2299#endif
2300
09bef843 2301 /* when we've already built the next token, just pull it out of the queue */
79072805 2302 case LEX_KNOWNEXT:
3280af22
NIS
2303 PL_nexttoke--;
2304 yylval = PL_nextval[PL_nexttoke];
2305 if (!PL_nexttoke) {
2306 PL_lex_state = PL_lex_defer;
2307 PL_expect = PL_lex_expect;
2308 PL_lex_defer = LEX_NORMAL;
463ee0b2 2309 }
607df283 2310 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f
RB
2311 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
2312 (IV)PL_nexttype[PL_nexttoke]); })
607df283 2313
3280af22 2314 return(PL_nexttype[PL_nexttoke]);
79072805 2315
02aa26ce 2316 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2317 when we get here, PL_bufptr is at the \
02aa26ce 2318 */
79072805
LW
2319 case LEX_INTERPCASEMOD:
2320#ifdef DEBUGGING
3280af22 2321 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2322 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2323#endif
02aa26ce 2324 /* handle \E or end of string */
3280af22 2325 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2326 char oldmod;
02aa26ce
NT
2327
2328 /* if at a \E */
3280af22
NIS
2329 if (PL_lex_casemods) {
2330 oldmod = PL_lex_casestack[--PL_lex_casemods];
2331 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2332
3280af22
NIS
2333 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2334 PL_bufptr += 2;
2335 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2336 }
79072805
LW
2337 return ')';
2338 }
3280af22
NIS
2339 if (PL_bufptr != PL_bufend)
2340 PL_bufptr += 2;
2341 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2342 return yylex();
79072805
LW
2343 }
2344 else {
607df283
SC
2345 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2346 "### Saw case modifier at '%s'\n", PL_bufptr); })
3280af22 2347 s = PL_bufptr + 1;
79072805
LW
2348 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2349 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2350 if (strchr("LU", *s) &&
3280af22 2351 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2352 {
3280af22 2353 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2354 return ')';
2355 }
3280af22
NIS
2356 if (PL_lex_casemods > 10) {
2357 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2358 if (newlb != PL_lex_casestack) {
a0d0e21e 2359 SAVEFREEPV(newlb);
3280af22 2360 PL_lex_casestack = newlb;
a0d0e21e
LW
2361 }
2362 }
3280af22
NIS
2363 PL_lex_casestack[PL_lex_casemods++] = *s;
2364 PL_lex_casestack[PL_lex_casemods] = '\0';
2365 PL_lex_state = LEX_INTERPCONCAT;
2366 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2367 force_next('(');
2368 if (*s == 'l')
3280af22 2369 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2370 else if (*s == 'u')
3280af22 2371 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2372 else if (*s == 'L')
3280af22 2373 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2374 else if (*s == 'U')
3280af22 2375 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2376 else if (*s == 'Q')
3280af22 2377 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2378 else
cea2e8a9 2379 Perl_croak(aTHX_ "panic: yylex");
3280af22 2380 PL_bufptr = s + 1;
79072805 2381 force_next(FUNC);
3280af22
NIS
2382 if (PL_lex_starts) {
2383 s = PL_bufptr;
2384 PL_lex_starts = 0;
79072805
LW
2385 Aop(OP_CONCAT);
2386 }
2387 else
cea2e8a9 2388 return yylex();
79072805
LW
2389 }
2390
55497cff 2391 case LEX_INTERPPUSH:
2392 return sublex_push();
2393
79072805 2394 case LEX_INTERPSTART:
3280af22 2395 if (PL_bufptr == PL_bufend)
79072805 2396 return sublex_done();
607df283
SC
2397 DEBUG_T({ PerlIO_printf(Perl_debug_log,
2398 "### Interpolated variable at '%s'\n", PL_bufptr); })
3280af22
NIS
2399 PL_expect = XTERM;
2400 PL_lex_dojoin = (*PL_bufptr == '@');
2401 PL_lex_state = LEX_INTERPNORMAL;
2402 if (PL_lex_dojoin) {
2403 PL_nextval[PL_nexttoke].ival = 0;
79072805 2404 force_next(',');
554b3eca 2405#ifdef USE_THREADS
533c011a
NIS
2406 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2407 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2408 force_next(PRIVATEREF);
2409#else
a0d0e21e 2410 force_ident("\"", '$');
554b3eca 2411#endif /* USE_THREADS */
3280af22 2412 PL_nextval[PL_nexttoke].ival = 0;
79072805 2413 force_next('$');
3280af22 2414 PL_nextval[PL_nexttoke].ival = 0;
79072805 2415 force_next('(');
3280af22 2416 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2417 force_next(FUNC);
2418 }
3280af22
NIS
2419 if (PL_lex_starts++) {
2420 s = PL_bufptr;
79072805
LW
2421 Aop(OP_CONCAT);
2422 }
cea2e8a9 2423 return yylex();
79072805
LW
2424
2425 case LEX_INTERPENDMAYBE:
3280af22
NIS
2426 if (intuit_more(PL_bufptr)) {
2427 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2428 break;
2429 }
2430 /* FALL THROUGH */
2431
2432 case LEX_INTERPEND:
3280af22
NIS
2433 if (PL_lex_dojoin) {
2434 PL_lex_dojoin = FALSE;
2435 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2436 return ')';
2437 }
43a16006 2438 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2439 && SvEVALED(PL_lex_repl))
43a16006 2440 {
e9fa98b2 2441 if (PL_bufptr != PL_bufend)
cea2e8a9 2442 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2443 PL_lex_repl = Nullsv;
2444 }
79072805
LW
2445 /* FALLTHROUGH */
2446 case LEX_INTERPCONCAT:
2447#ifdef DEBUGGING
3280af22 2448 if (PL_lex_brackets)
cea2e8a9 2449 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2450#endif
3280af22 2451 if (PL_bufptr == PL_bufend)
79072805
LW
2452 return sublex_done();
2453
3280af22
NIS
2454 if (SvIVX(PL_linestr) == '\'') {
2455 SV *sv = newSVsv(PL_linestr);
2456 if (!PL_lex_inpat)
76e3520e 2457 sv = tokeq(sv);
3280af22 2458 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2459 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2460 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2461 s = PL_bufend;
79072805
LW
2462 }
2463 else {
3280af22 2464 s = scan_const(PL_bufptr);
79072805 2465 if (*s == '\\')
3280af22 2466 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2467 else
3280af22 2468 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2469 }
2470
3280af22
NIS
2471 if (s != PL_bufptr) {
2472 PL_nextval[PL_nexttoke] = yylval;
2473 PL_expect = XTERM;
79072805 2474 force_next(THING);
3280af22 2475 if (PL_lex_starts++)
79072805
LW
2476 Aop(OP_CONCAT);
2477 else {
3280af22 2478 PL_bufptr = s;
cea2e8a9 2479 return yylex();
79072805
LW
2480 }
2481 }
2482
cea2e8a9 2483 return yylex();
a0d0e21e 2484 case LEX_FORMLINE:
3280af22
NIS
2485 PL_lex_state = LEX_NORMAL;
2486 s = scan_formline(PL_bufptr);
2487 if (!PL_lex_formbrack)
a0d0e21e
LW
2488 goto rightbracket;
2489 OPERATOR(';');
79072805
LW
2490 }
2491
3280af22
NIS
2492 s = PL_bufptr;
2493 PL_oldoldbufptr = PL_oldbufptr;
2494 PL_oldbufptr = s;
607df283 2495 DEBUG_T( {
bf49b057
GS
2496 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2497 exp_name[PL_expect], s);
79072805 2498 } )
463ee0b2
LW
2499
2500 retry:
378cc40b
LW
2501 switch (*s) {
2502 default:
7e2040f0 2503 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2504 goto keylookup;
cea2e8a9 2505 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2506 case 4:
2507 case 26:
2508 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2509 case 0:
3280af22
NIS
2510 if (!PL_rsfp) {
2511 PL_last_uni = 0;
2512 PL_last_lop = 0;
2513 if (PL_lex_brackets)
d98d5fff 2514 yyerror("Missing right curly or square bracket");
4e553d73 2515 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2516 "### Tokener got EOF\n");
2517 } )
79072805 2518 TOKEN(0);
463ee0b2 2519 }
3280af22 2520 if (s++ < PL_bufend)
a687059c 2521 goto retry; /* ignore stray nulls */
3280af22
NIS
2522 PL_last_uni = 0;
2523 PL_last_lop = 0;
2524 if (!PL_in_eval && !PL_preambled) {
2525 PL_preambled = TRUE;
2526 sv_setpv(PL_linestr,incl_perldb());
2527 if (SvCUR(PL_linestr))
2528 sv_catpv(PL_linestr,";");
2529 if (PL_preambleav){
2530 while(AvFILLp(PL_preambleav) >= 0) {
2531 SV *tmpsv = av_shift(PL_preambleav);
2532 sv_catsv(PL_linestr, tmpsv);
2533 sv_catpv(PL_linestr, ";");
91b7def8 2534 sv_free(tmpsv);
2535 }
3280af22
NIS
2536 sv_free((SV*)PL_preambleav);
2537 PL_preambleav = NULL;
91b7def8 2538 }
3280af22
NIS
2539 if (PL_minus_n || PL_minus_p) {
2540 sv_catpv(PL_linestr, "LINE: while (<>) {");
2541 if (PL_minus_l)
2542 sv_catpv(PL_linestr,"chomp;");
2543 if (PL_minus_a) {
8fd239a7
CS
2544 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2545 if (gv)
2546 GvIMPORTED_AV_on(gv);
3280af22
NIS
2547 if (PL_minus_F) {
2548 if (strchr("/'\"", *PL_splitstr)
2549 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2550 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2551 else {
2552 char delim;
2553 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2554 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2555 delim = *s;
cea2e8a9 2556 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2557 "q" + (delim == '\''), delim);
3280af22 2558 for (s = PL_splitstr; *s; s++) {
54310121 2559 if (*s == '\\')
3280af22
NIS
2560 sv_catpvn(PL_linestr, "\\", 1);
2561 sv_catpvn(PL_linestr, s, 1);
54310121 2562 }
cea2e8a9 2563 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2564 }
2304df62
AD
2565 }
2566 else
3280af22 2567 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2568 }
79072805 2569 }
3280af22
NIS
2570 sv_catpv(PL_linestr, "\n");
2571 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2572 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2573 PL_last_lop = PL_last_uni = Nullch;
3280af22 2574 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2575 SV *sv = NEWSV(85,0);
2576
2577 sv_upgrade(sv, SVt_PVMG);
3280af22 2578 sv_setsv(sv,PL_linestr);
57843af0 2579 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2580 }
79072805 2581 goto retry;
a687059c 2582 }
e929a76b 2583 do {
aa7440fb 2584 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2585 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2586 fake_eof:
2587 if (PL_rsfp) {
2588 if (PL_preprocess && !PL_in_eval)
2589 (void)PerlProc_pclose(PL_rsfp);
2590 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2591 PerlIO_clearerr(PL_rsfp);
2592 else
2593 (void)PerlIO_close(PL_rsfp);
2594 PL_rsfp = Nullfp;
2595 PL_doextract = FALSE;
2596 }
2597 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2598 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2599 sv_catpv(PL_linestr,";}");
2600 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2601 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2602 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2603 PL_minus_n = PL_minus_p = 0;
2604 goto retry;
2605 }
2606 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2607 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2608 sv_setpv(PL_linestr,"");
2609 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2610 }
2611 /* if it looks like the start of a BOM, check if it in fact is */
2612 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2613#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2614# ifdef __GNU_LIBRARY__
2615# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2616# define FTELL_FOR_PIPE_IS_BROKEN
2617# endif
e3f494f1
JH
2618# else
2619# ifdef __GLIBC__
2620# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2621# define FTELL_FOR_PIPE_IS_BROKEN
2622# endif
2623# endif
226017aa
DD
2624# endif
2625#endif
2626#ifdef FTELL_FOR_PIPE_IS_BROKEN
2627 /* This loses the possibility to detect the bof
2628 * situation on perl -P when the libc5 is being used.
2629 * Workaround? Maybe attach some extra state to PL_rsfp?
2630 */
2631 if (!PL_preprocess)
7e28d3af 2632 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2633#else
7e28d3af 2634 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2635#endif
7e28d3af 2636 if (bof) {
3280af22 2637 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2638 s = swallow_bom((U8*)s);
e929a76b 2639 }
378cc40b 2640 }
3280af22 2641 if (PL_doextract) {
a0d0e21e 2642 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2643 PL_doextract = FALSE;
a0d0e21e
LW
2644
2645 /* Incest with pod. */
2646 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2647 sv_setpv(PL_linestr, "");
2648 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2649 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2650 PL_last_lop = PL_last_uni = Nullch;
3280af22 2651 PL_doextract = FALSE;
a0d0e21e 2652 }
4e553d73 2653 }
463ee0b2 2654 incline(s);
3280af22
NIS
2655 } while (PL_doextract);
2656 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2657 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2658 SV *sv = NEWSV(85,0);
a687059c 2659
93a17b20 2660 sv_upgrade(sv, SVt_PVMG);
3280af22 2661 sv_setsv(sv,PL_linestr);
57843af0 2662 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2663 }
3280af22 2664 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2665 PL_last_lop = PL_last_uni = Nullch;
57843af0 2666 if (CopLINE(PL_curcop) == 1) {
3280af22 2667 while (s < PL_bufend && isSPACE(*s))
79072805 2668 s++;
a0d0e21e 2669 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2670 s++;
44a8e56a 2671 d = Nullch;
3280af22 2672 if (!PL_in_eval) {
44a8e56a 2673 if (*s == '#' && *(s+1) == '!')
2674 d = s + 2;
2675#ifdef ALTERNATE_SHEBANG
2676 else {
2677 static char as[] = ALTERNATE_SHEBANG;
2678 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2679 d = s + (sizeof(as) - 1);
2680 }
2681#endif /* ALTERNATE_SHEBANG */
2682 }
2683 if (d) {
b8378b72 2684 char *ipath;
774d564b 2685 char *ipathend;
b8378b72 2686
774d564b 2687 while (isSPACE(*d))
b8378b72
CS
2688 d++;
2689 ipath = d;
774d564b 2690 while (*d && !isSPACE(*d))
2691 d++;
2692 ipathend = d;
2693
2694#ifdef ARG_ZERO_IS_SCRIPT
2695 if (ipathend > ipath) {
2696 /*
2697 * HP-UX (at least) sets argv[0] to the script name,
2698 * which makes $^X incorrect. And Digital UNIX and Linux,
2699 * at least, set argv[0] to the basename of the Perl
2700 * interpreter. So, having found "#!", we'll set it right.
2701 */
2702 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2703 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2704 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2705 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2706 SvSETMAGIC(x);
2707 }
774d564b 2708 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2709 }
774d564b 2710#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2711
2712 /*
2713 * Look for options.
2714 */
748a9306 2715 d = instr(s,"perl -");
84e30d1a 2716 if (!d) {
748a9306 2717 d = instr(s,"perl");
84e30d1a
GS
2718#if defined(DOSISH)
2719 /* avoid getting into infinite loops when shebang
2720 * line contains "Perl" rather than "perl" */
2721 if (!d) {
2722 for (d = ipathend-4; d >= ipath; --d) {
2723 if ((*d == 'p' || *d == 'P')
2724 && !ibcmp(d, "perl", 4))
2725 {
2726 break;
2727 }
2728 }
2729 if (d < ipath)
2730 d = Nullch;
2731 }
2732#endif
2733 }
44a8e56a 2734#ifdef ALTERNATE_SHEBANG
2735 /*
2736 * If the ALTERNATE_SHEBANG on this system starts with a
2737 * character that can be part of a Perl expression, then if
2738 * we see it but not "perl", we're probably looking at the
2739 * start of Perl code, not a request to hand off to some
2740 * other interpreter. Similarly, if "perl" is there, but
2741 * not in the first 'word' of the line, we assume the line
2742 * contains the start of the Perl program.
44a8e56a 2743 */
2744 if (d && *s != '#') {
774d564b 2745 char *c = ipath;
44a8e56a 2746 while (*c && !strchr("; \t\r\n\f\v#", *c))
2747 c++;
2748 if (c < d)
2749 d = Nullch; /* "perl" not in first word; ignore */
2750 else
2751 *s = '#'; /* Don't try to parse shebang line */
2752 }
774d564b 2753#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2754#ifndef MACOS_TRADITIONAL
748a9306 2755 if (!d &&
44a8e56a 2756 *s == '#' &&
774d564b 2757 ipathend > ipath &&
3280af22 2758 !PL_minus_c &&
748a9306 2759 !instr(s,"indir") &&
3280af22 2760 instr(PL_origargv[0],"perl"))
748a9306 2761 {
9f68db38 2762 char **newargv;
9f68db38 2763
774d564b 2764 *ipathend = '\0';
2765 s = ipathend + 1;
3280af22 2766 while (s < PL_bufend && isSPACE(*s))
9f68db38 2767 s++;
3280af22
NIS
2768 if (s < PL_bufend) {
2769 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2770 newargv[1] = s;
3280af22 2771 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2772 s++;
2773 *s = '\0';
3280af22 2774 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2775 }
2776 else
3280af22 2777 newargv = PL_origargv;
774d564b 2778 newargv[0] = ipath;
b4748376 2779 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2780 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2781 }
bf4acbe4 2782#endif
748a9306 2783 if (d) {
3280af22
NIS
2784 U32 oldpdb = PL_perldb;
2785 bool oldn = PL_minus_n;
2786 bool oldp = PL_minus_p;
748a9306
LW
2787
2788 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2789 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2790
2791 if (*d++ == '-') {
8cc95fdb 2792 do {
2793 if (*d == 'M' || *d == 'm') {
2794 char *m = d;
2795 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2796 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2797 (int)(d - m), m);
2798 }
2799 d = moreswitches(d);
2800 } while (d);
155aba94
GS
2801 if ((PERLDB_LINE && !oldpdb) ||
2802 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2803 /* if we have already added "LINE: while (<>) {",
2804 we must not do it again */
748a9306 2805 {
3280af22
NIS
2806 sv_setpv(PL_linestr, "");
2807 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2808 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2809 PL_last_lop = PL_last_uni = Nullch;
3280af22 2810 PL_preambled = FALSE;
84902520 2811 if (PERLDB_LINE)
3280af22 2812 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2813 goto retry;
2814 }
a0d0e21e 2815 }
79072805 2816 }
9f68db38 2817 }
79072805 2818 }
3280af22
NIS
2819 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2820 PL_bufptr = s;
2821 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2822 return yylex();
ae986130 2823 }
378cc40b 2824 goto retry;
4fdae800 2825 case '\r':
6a27c188 2826#ifdef PERL_STRICT_CR
cea2e8a9 2827 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2828 Perl_croak(aTHX_
cc507455 2829 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2830#endif
4fdae800 2831 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2832#ifdef MACOS_TRADITIONAL
2833 case '\312':
2834#endif
378cc40b
LW
2835 s++;
2836 goto retry;
378cc40b 2837 case '#':
e929a76b 2838 case '\n':
3280af22 2839 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2840 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2841 /* handle eval qq[#line 1 "foo"\n ...] */
2842 CopLINE_dec(PL_curcop);
2843 incline(s);
2844 }
3280af22 2845 d = PL_bufend;
a687059c 2846 while (s < d && *s != '\n')
378cc40b 2847 s++;
0f85fab0 2848 if (s < d)
378cc40b 2849 s++;
78c267c1 2850 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2851 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2852 incline(s);
3280af22
NIS
2853 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2854 PL_bufptr = s;
2855 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2856 return yylex();
a687059c 2857 }
378cc40b 2858 }
a687059c 2859 else {
378cc40b 2860 *s = '\0';
3280af22 2861 PL_bufend = s;
a687059c 2862 }
378cc40b
LW
2863 goto retry;
2864 case '-':
79072805 2865 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2866 I32 ftst = 0;
2867
378cc40b 2868 s++;
3280af22 2869 PL_bufptr = s;
748a9306
LW
2870 tmp = *s++;
2871
bf4acbe4 2872 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2873 s++;
2874
2875 if (strnEQ(s,"=>",2)) {
3280af22 2876 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2877 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
2878 "### Saw unary minus before =>, forcing word '%s'\n", s);
2879 } )
748a9306
LW
2880 OPERATOR('-'); /* unary minus */
2881 }
3280af22 2882 PL_last_uni = PL_oldbufptr;
748a9306 2883 switch (tmp) {
e5edeb50
JH
2884 case 'r': ftst = OP_FTEREAD; break;
2885 case 'w': ftst = OP_FTEWRITE; break;
2886 case 'x': ftst = OP_FTEEXEC; break;
2887 case 'o': ftst = OP_FTEOWNED; break;
2888 case 'R': ftst = OP_FTRREAD; break;
2889 case 'W': ftst = OP_FTRWRITE; break;
2890 case 'X': ftst = OP_FTREXEC; break;
2891 case 'O': ftst = OP_FTROWNED; break;
2892 case 'e': ftst = OP_FTIS; break;
2893 case 'z': ftst = OP_FTZERO; break;
2894 case 's': ftst = OP_FTSIZE; break;
2895 case 'f': ftst = OP_FTFILE; break;
2896 case 'd': ftst = OP_FTDIR; break;
2897 case 'l': ftst = OP_FTLINK; break;
2898 case 'p': ftst = OP_FTPIPE; break;
2899 case 'S': ftst = OP_FTSOCK; break;
2900 case 'u': ftst = OP_FTSUID; break;
2901 case 'g': ftst = OP_FTSGID; break;
2902 case 'k': ftst = OP_FTSVTX; break;
2903 case 'b': ftst = OP_FTBLK; break;
2904 case 'c': ftst = OP_FTCHR; break;
2905 case 't': ftst = OP_FTTTY; break;
2906 case 'T': ftst = OP_FTTEXT; break;
2907 case 'B': ftst = OP_FTBINARY; break;
2908 case 'M': case 'A': case 'C':
2909 gv_fetchpv("\024",TRUE, SVt_PV);
2910 switch (tmp) {
2911 case 'M': ftst = OP_FTMTIME; break;
2912 case 'A': ftst = OP_FTATIME; break;
2913 case 'C': ftst = OP_FTCTIME; break;
2914 default: break;
2915 }
2916 break;
378cc40b 2917 default:
378cc40b
LW
2918 break;
2919 }
e5edeb50
JH
2920 if (ftst) {
2921 PL_last_lop_op = ftst;
4e553d73 2922 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2923 "### Saw file test %c\n", (int)ftst);
e5edeb50 2924 } )
e5edeb50
JH
2925 FTST(ftst);
2926 }
2927 else {
2928 /* Assume it was a minus followed by a one-letter named
2929 * subroutine call (or a -bareword), then. */
95c31fe3 2930 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2931 "### %c looked like a file test but was not\n",
2932 (int)ftst);
95c31fe3 2933 } )
e5edeb50
JH
2934 s -= 2;
2935 }
378cc40b 2936 }
a687059c
LW
2937 tmp = *s++;
2938 if (*s == tmp) {
2939 s++;
3280af22 2940 if (PL_expect == XOPERATOR)
79072805
LW
2941 TERM(POSTDEC);
2942 else
2943 OPERATOR(PREDEC);
2944 }
2945 else if (*s == '>') {
2946 s++;
2947 s = skipspace(s);
7e2040f0 2948 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2949 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2950 TOKEN(ARROW);
79072805 2951 }
748a9306
LW
2952 else if (*s == '$')
2953 OPERATOR(ARROW);
463ee0b2 2954 else
748a9306 2955 TERM(ARROW);
a687059c 2956 }
3280af22 2957 if (PL_expect == XOPERATOR)
79072805
LW
2958 Aop(OP_SUBTRACT);
2959 else {
3280af22 2960 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2961 check_uni();
79072805 2962 OPERATOR('-'); /* unary minus */
2f3197b3 2963 }
79072805 2964
378cc40b 2965 case '+':
a687059c
LW
2966 tmp = *s++;
2967 if (*s == tmp) {
378cc40b 2968 s++;
3280af22 2969 if (PL_expect == XOPERATOR)
79072805
LW
2970 TERM(POSTINC);
2971 else
2972 OPERATOR(PREINC);
378cc40b 2973 }
3280af22 2974 if (PL_expect == XOPERATOR)
79072805
LW
2975 Aop(OP_ADD);
2976 else {
3280af22 2977 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2978 check_uni();
a687059c 2979 OPERATOR('+');
2f3197b3 2980 }
a687059c 2981
378cc40b 2982 case '*':
3280af22
NIS
2983 if (PL_expect != XOPERATOR) {
2984 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2985 PL_expect = XOPERATOR;
2986 force_ident(PL_tokenbuf, '*');
2987 if (!*PL_tokenbuf)
a0d0e21e 2988 PREREF('*');
79072805 2989 TERM('*');
a687059c 2990 }
79072805
LW
2991 s++;
2992 if (*s == '*') {
a687059c 2993 s++;
79072805 2994 PWop(OP_POW);
a687059c 2995 }
79072805
LW
2996 Mop(OP_MULTIPLY);
2997
378cc40b 2998 case '%':
3280af22 2999 if (PL_expect == XOPERATOR) {
bbce6d69 3000 ++s;
3001 Mop(OP_MODULO);
a687059c 3002 }
3280af22
NIS
3003 PL_tokenbuf[0] = '%';
3004 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3005 if (!PL_tokenbuf[1]) {
3006 if (s == PL_bufend)
bbce6d69 3007 yyerror("Final % should be \\% or %name");
3008 PREREF('%');
a687059c 3009 }
3280af22 3010 PL_pending_ident = '%';
bbce6d69 3011 TERM('%');
a687059c 3012
378cc40b 3013 case '^':
79072805 3014 s++;
a0d0e21e 3015 BOop(OP_BIT_XOR);
79072805 3016 case '[':
3280af22 3017 PL_lex_brackets++;
79072805 3018 /* FALL THROUGH */
378cc40b 3019 case '~':
378cc40b 3020 case ',':
378cc40b
LW
3021 tmp = *s++;
3022 OPERATOR(tmp);
a0d0e21e
LW
3023 case ':':
3024 if (s[1] == ':') {
3025 len = 0;
3026 goto just_a_word;
3027 }
3028 s++;
09bef843
SB
3029 switch (PL_expect) {
3030 OP *attrs;
3031 case XOPERATOR:
3032 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3033 break;
3034 PL_bufptr = s; /* update in case we back off */
3035 goto grabattrs;
3036 case XATTRBLOCK:
3037 PL_expect = XBLOCK;
3038 goto grabattrs;
3039 case XATTRTERM:
3040 PL_expect = XTERMBLOCK;
3041 grabattrs:
3042 s = skipspace(s);
3043 attrs = Nullop;
7e2040f0 3044 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3045 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3046 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3047 if (tmp < 0) tmp = -tmp;
3048 switch (tmp) {
3049 case KEY_or:
3050 case KEY_and:
3051 case KEY_for:
3052 case KEY_unless:
3053 case KEY_if:
3054 case KEY_while:
3055 case KEY_until:
3056 goto got_attrs;
3057 default:
3058 break;
3059 }
3060 }
09bef843
SB
3061 if (*d == '(') {
3062 d = scan_str(d,TRUE,TRUE);
3063 if (!d) {
09bef843
SB
3064 /* MUST advance bufptr here to avoid bogus
3065 "at end of line" context messages from yyerror().
3066 */
3067 PL_bufptr = s + len;
3068 yyerror("Unterminated attribute parameter in attribute list");
3069 if (attrs)
3070 op_free(attrs);
3071 return 0; /* EOF indicator */
3072 }
3073 }
3074 if (PL_lex_stuff) {
3075 SV *sv = newSVpvn(s, len);
3076 sv_catsv(sv, PL_lex_stuff);
3077 attrs = append_elem(OP_LIST, attrs,
3078 newSVOP(OP_CONST, 0, sv));
3079 SvREFCNT_dec(PL_lex_stuff);
3080 PL_lex_stuff = Nullsv;
3081 }
3082 else {
78f9721b
SM
3083 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3084 CvLVALUE_on(PL_compcv);
3085 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3086 CvLOCKED_on(PL_compcv);
3087 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3088 CvMETHOD_on(PL_compcv);
87ecf892
DM
3089#ifdef USE_ITHREADS
3090 else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "shared", len))
3091 GvSHARED_on(cGVOPx_gv(yylval.opval));
3092#endif
78f9721b
SM
3093 /* After we've set the flags, it could be argued that
3094 we don't need to do the attributes.pm-based setting
3095 process, and shouldn't bother appending recognized
3096 flags. To experiment with that, uncomment the
3097 following "else": */
0256094b 3098 else
78f9721b
SM
3099 attrs = append_elem(OP_LIST, attrs,
3100 newSVOP(OP_CONST, 0,
3101 newSVpvn(s, len)));
09bef843
SB
3102 }
3103 s = skipspace(d);
0120eecf 3104 if (*s == ':' && s[1] != ':')
09bef843 3105 s = skipspace(s+1);
0120eecf
GS
3106 else if (s == d)
3107 break; /* require real whitespace or :'s */
09bef843 3108 }
f9829d6b
GS
3109 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3110 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3111 char q = ((*s == '\'') ? '"' : '\'');
3112 /* If here for an expression, and parsed no attrs, back off. */
3113 if (tmp == '=' && !attrs) {
3114 s = PL_bufptr;
3115 break;
3116 }
3117 /* MUST advance bufptr here to avoid bogus "at end of line"
3118 context messages from yyerror().
3119 */
3120 PL_bufptr = s;
3121 if (!*s)
3122 yyerror("Unterminated attribute list");
3123 else
3124 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3125 q, *s, q));
3126 if (attrs)
3127 op_free(attrs);
3128 OPERATOR(':');
3129 }
f9829d6b 3130 got_attrs:
09bef843
SB
3131 if (attrs) {
3132 PL_nextval[PL_nexttoke].opval = attrs;
3133 force_next(THING);
3134 }
3135 TOKEN(COLONATTR);
3136 }
a0d0e21e 3137 OPERATOR(':');
8990e307
LW
3138 case '(':
3139 s++;
3280af22
NIS
3140 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3141 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3142 else
3280af22 3143 PL_expect = XTERM;
a0d0e21e 3144 TOKEN('(');
378cc40b 3145 case ';':
f4dd75d9 3146 CLINE;
378cc40b
LW
3147 tmp = *s++;
3148 OPERATOR(tmp);
3149 case ')':
378cc40b 3150 tmp = *s++;
16d20bd9
AD
3151 s = skipspace(s);
3152 if (*s == '{')
3153 PREBLOCK(tmp);
378cc40b 3154 TERM(tmp);
79072805
LW
3155 case ']':
3156 s++;
3280af22 3157 if (PL_lex_brackets <= 0)
d98d5fff 3158 yyerror("Unmatched right square bracket");
463ee0b2 3159 else
3280af22
NIS
3160 --PL_lex_brackets;
3161 if (PL_lex_state == LEX_INTERPNORMAL) {
3162 if (PL_lex_brackets == 0) {
a0d0e21e 3163 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3164 PL_lex_state = LEX_INTERPEND;
79072805
LW
3165 }
3166 }
4633a7c4 3167 TERM(']');
79072805
LW
3168 case '{':
3169 leftbracket:
79072805 3170 s++;
3280af22
NIS
3171 if (PL_lex_brackets > 100) {
3172 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3173 if (newlb != PL_lex_brackstack) {
8990e307 3174 SAVEFREEPV(newlb);
3280af22 3175 PL_lex_brackstack = newlb;
8990e307
LW
3176 }
3177 }
3280af22 3178 switch (PL_expect) {
a0d0e21e 3179 case XTERM:
3280af22 3180 if (PL_lex_formbrack) {
a0d0e21e
LW
3181 s--;
3182 PRETERMBLOCK(DO);
3183 }
3280af22
NIS
3184 if (PL_oldoldbufptr == PL_last_lop)
3185 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3186 else
3280af22 3187 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3188 OPERATOR(HASHBRACK);
a0d0e21e 3189 case XOPERATOR:
bf4acbe4 3190 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3191 s++;
44a8e56a 3192 d = s;
3280af22
NIS
3193 PL_tokenbuf[0] = '\0';
3194 if (d < PL_bufend && *d == '-') {
3195 PL_tokenbuf[0] = '-';
44a8e56a 3196 d++;
bf4acbe4 3197 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3198 d++;
3199 }
7e2040f0 3200 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3201 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3202 FALSE, &len);
bf4acbe4 3203 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3204 d++;
3205 if (*d == '}') {
3280af22 3206 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3207 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3208 if (minus)
3209 force_next('-');
748a9306
LW
3210 }
3211 }
3212 /* FALL THROUGH */
09bef843 3213 case XATTRBLOCK:
748a9306 3214 case XBLOCK:
3280af22
NIS
3215 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3216 PL_expect = XSTATE;
a0d0e21e 3217 break;
09bef843 3218 case XATTRTERM:
a0d0e21e 3219 case XTERMBLOCK:
3280af22
NIS
3220 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3221 PL_expect = XSTATE;
a0d0e21e
LW
3222 break;
3223 default: {
3224 char *t;
3280af22
NIS
3225 if (PL_oldoldbufptr == PL_last_lop)
3226 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3227 else
3280af22 3228 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3229 s = skipspace(s);
09ecc4b6 3230 if (*s == '}')
a0d0e21e 3231 OPERATOR(HASHBRACK);
b8a4b1be
GS
3232 /* This hack serves to disambiguate a pair of curlies
3233 * as being a block or an anon hash. Normally, expectation
3234 * determines that, but in cases where we're not in a
3235 * position to expect anything in particular (like inside
3236 * eval"") we have to resolve the ambiguity. This code
3237 * covers the case where the first term in the curlies is a
3238 * quoted string. Most other cases need to be explicitly
3239 * disambiguated by prepending a `+' before the opening
3240 * curly in order to force resolution as an anon hash.
3241 *
3242 * XXX should probably propagate the outer expectation
3243 * into eval"" to rely less on this hack, but that could
3244 * potentially break current behavior of eval"".
3245 * GSAR 97-07-21
3246 */
3247 t = s;
3248 if (*s == '\'' || *s == '"' || *s == '`') {
3249 /* common case: get past first string, handling escapes */
3280af22 3250 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3251 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3252 t++;
3253 t++;
a0d0e21e 3254 }
b8a4b1be 3255 else if (*s == 'q') {
3280af22 3256 if (++t < PL_bufend
b8a4b1be 3257 && (!isALNUM(*t)
3280af22 3258 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3259 && !isALNUM(*t))))
3260 {
b8a4b1be
GS
3261 char *tmps;
3262 char open, close, term;
3263 I32 brackets = 1;
3264
3280af22 3265 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3266 t++;
3267 term = *t;
3268 open = term;
3269 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3270 term = tmps[5];
3271 close = term;
3272 if (open == close)
3280af22
NIS
3273 for (t++; t < PL_bufend; t++) {
3274 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3275 t++;
6d07e5e9 3276 else if (*t == open)
b8a4b1be
GS
3277 break;
3278 }
3279 else
3280af22
NIS
3280 for (t++; t < PL_bufend; t++) {
3281 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3282 t++;
6d07e5e9 3283 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3284 break;
3285 else if (*t == open)
3286 brackets++;
3287 }
3288 }
3289 t++;
a0d0e21e 3290 }
7e2040f0 3291 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3292 t += UTF8SKIP(t);
7e2040f0 3293 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3294 t += UTF8SKIP(t);
a0d0e21e 3295 }
3280af22 3296 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3297 t++;
b8a4b1be
GS
3298 /* if comma follows first term, call it an anon hash */
3299 /* XXX it could be a comma expression with loop modifiers */
3280af22 3300 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3301 || (*t == '=' && t[1] == '>')))
a0d0e21e 3302 OPERATOR(HASHBRACK);
3280af22 3303 if (PL_expect == XREF)
4e4e412b 3304 PL_expect = XTERM;
a0d0e21e 3305 else {
3280af22
NIS
3306 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3307 PL_expect = XSTATE;
a0d0e21e 3308 }
8990e307 3309 }
a0d0e21e 3310 break;
463ee0b2 3311 }
57843af0 3312 yylval.ival = CopLINE(PL_curcop);
79072805 3313 if (isSPACE(*s) || *s == '#')
3280af22 3314 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3315 TOKEN('{');
378cc40b 3316 case '}':
79072805
LW
3317 rightbracket:
3318 s++;
3280af22 3319 if (PL_lex_brackets <= 0)
d98d5fff 3320 yyerror("Unmatched right curly bracket");
463ee0b2 3321 else
3280af22 3322 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3323 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3324 PL_lex_formbrack = 0;
3325 if (PL_lex_state == LEX_INTERPNORMAL) {
3326 if (PL_lex_brackets == 0) {
9059aa12
LW
3327 if (PL_expect & XFAKEBRACK) {
3328 PL_expect &= XENUMMASK;
3280af22
NIS
3329 PL_lex_state = LEX_INTERPEND;
3330 PL_bufptr = s;
cea2e8a9 3331 return yylex(); /* ignore fake brackets */
79072805 3332 }
fa83b5b6 3333 if (*s == '-' && s[1] == '>')
3280af22 3334 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3335 else if (*s != '[' && *s != '{')
3280af22 3336 PL_lex_state = LEX_INTERPEND;
79072805
LW
3337 }
3338 }
9059aa12
LW
3339 if (PL_expect & XFAKEBRACK) {
3340 PL_expect &= XENUMMASK;
3280af22 3341 PL_bufptr = s;
cea2e8a9 3342 return yylex(); /* ignore fake brackets */
748a9306 3343 }
79072805
LW
3344 force_next('}');
3345 TOKEN(';');
378cc40b
LW
3346 case '&':
3347 s++;
3348 tmp = *s++;
3349 if (tmp == '&')
a0d0e21e 3350 AOPERATOR(ANDAND);
378cc40b 3351 s--;
3280af22 3352 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3353 if (ckWARN(WARN_SEMICOLON)
3354 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3355 {
57843af0 3356 CopLINE_dec(PL_curcop);
cea2e8a9 3357 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3358 CopLINE_inc(PL_curcop);
463ee0b2 3359 }
79072805 3360 BAop(OP_BIT_AND);
463ee0b2 3361 }
79072805 3362
3280af22
NIS
3363 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3364 if (*PL_tokenbuf) {
3365 PL_expect = XOPERATOR;
3366 force_ident(PL_tokenbuf, '&');
463ee0b2 3367 }
79072805
LW
3368 else
3369 PREREF('&');
c07a80fd 3370 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3371 TERM('&');
3372
378cc40b
LW
3373 case '|':
3374 s++;
3375 tmp = *s++;
3376 if (tmp == '|')
a0d0e21e 3377 AOPERATOR(OROR);
378cc40b 3378 s--;
79072805 3379 BOop(OP_BIT_OR);
378cc40b
LW
3380 case '=':
3381 s++;
3382 tmp = *s++;
3383 if (tmp == '=')
79072805
LW
3384 Eop(OP_EQ);
3385 if (tmp == '>')
3386 OPERATOR(',');
378cc40b 3387 if (tmp == '~')
79072805 3388 PMop(OP_MATCH);
599cee73 3389 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3390 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3391 s--;
3280af22
NIS
3392 if (PL_expect == XSTATE && isALPHA(tmp) &&
3393 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3394 {
3280af22
NIS
3395 if (PL_in_eval && !PL_rsfp) {
3396 d = PL_bufend;
a5f75d66
AD
3397 while (s < d) {
3398 if (*s++ == '\n') {
3399 incline(s);
3400 if (strnEQ(s,"=cut",4)) {
3401 s = strchr(s,'\n');
3402 if (s)
3403 s++;
3404 else
3405 s = d;
3406 incline(s);
3407 goto retry;
3408 }
3409 }
3410 }
3411 goto retry;
3412 }
3280af22
NIS
3413 s = PL_bufend;
3414 PL_doextract = TRUE;
a0d0e21e
LW
3415 goto retry;
3416 }
3280af22 3417 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3418 char *t;
51882d45 3419#ifdef PERL_STRICT_CR
bf4acbe4 3420 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3421#else
bf4acbe4 3422 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3423#endif
a0d0e21e
LW
3424 if (*t == '\n' || *t == '#') {
3425 s--;
3280af22 3426 PL_expect = XBLOCK;
a0d0e21e
LW
3427 goto leftbracket;
3428 }
79072805 3429 }
a0d0e21e
LW
3430 yylval.ival = 0;
3431 OPERATOR(ASSIGNOP);
378cc40b
LW
3432 case '!':
3433 s++;
3434 tmp = *s++;
3435 if (tmp == '=')
79072805 3436 Eop(OP_NE);
378cc40b 3437 if (tmp == '~')
79072805 3438 PMop(OP_NOT);
378cc40b
LW
3439 s--;
3440 OPERATOR('!');
3441 case '<':
3280af22 3442 if (PL_expect != XOPERATOR) {
93a17b20 3443 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3444 check_uni();
79072805
LW
3445 if (s[1] == '<')
3446 s = scan_heredoc(s);
3447 else
3448 s = scan_inputsymbol(s);
3449 TERM(sublex_start());
378cc40b
LW
3450 }
3451 s++;
3452 tmp = *s++;
3453 if (tmp == '<')
79072805 3454 SHop(OP_LEFT_SHIFT);
395c3793
LW
3455 if (tmp == '=') {
3456 tmp = *s++;
3457 if (tmp == '>')
79072805 3458 Eop(OP_NCMP);
395c3793 3459 s--;
79072805 3460 Rop(OP_LE);
395c3793 3461 }
378cc40b 3462 s--;
79072805 3463 Rop(OP_LT);
378cc40b
LW
3464 case '>':
3465 s++;
3466 tmp = *s++;
3467 if (tmp == '>')
79072805 3468 SHop(OP_RIGHT_SHIFT);
378cc40b 3469 if (tmp == '=')
79072805 3470 Rop(OP_GE);
378cc40b 3471 s--;
79072805 3472 Rop(OP_GT);
378cc40b
LW
3473
3474 case '$':
bbce6d69 3475 CLINE;
3476
3280af22
NIS
3477 if (PL_expect == XOPERATOR) {
3478 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3479 PL_expect = XTERM;
a0d0e21e 3480 depcom();
bbce6d69 3481 return ','; /* grandfather non-comma-format format */
a0d0e21e 3482 }
8990e307 3483 }
a0d0e21e 3484
7e2040f0 3485 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3486 PL_tokenbuf[0] = '@';
376b8730
SM
3487 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3488 sizeof PL_tokenbuf - 1, FALSE);
3489 if (PL_expect == XOPERATOR)
3490 no_op("Array length", s);
3280af22 3491 if (!PL_tokenbuf[1])
a0d0e21e 3492 PREREF(DOLSHARP);
3280af22
NIS
3493 PL_expect = XOPERATOR;
3494 PL_pending_ident = '#';
463ee0b2 3495 TOKEN(DOLSHARP);
79072805 3496 }
bbce6d69 3497
3280af22 3498 PL_tokenbuf[0] = '$';
376b8730
SM
3499 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3500 sizeof PL_tokenbuf - 1, FALSE);
3501 if (PL_expect == XOPERATOR)
3502 no_op("Scalar", s);
3280af22
NIS
3503 if (!PL_tokenbuf[1]) {
3504 if (s == PL_bufend)
bbce6d69 3505 yyerror("Final $ should be \\$ or $name");
3506 PREREF('$');
8990e307 3507 }
a0d0e21e 3508
bbce6d69 3509 /* This kludge not intended to be bulletproof. */
3280af22 3510 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3511 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3512 newSViv(PL_compiling.cop_arybase));
bbce6d69 3513 yylval.opval->op_private = OPpCONST_ARYBASE;
3514 TERM(THING);
3515 }
3516
ff68c719 3517 d = s;
69d2bceb 3518 tmp = (I32)*s;
3280af22 3519 if (PL_lex_state == LEX_NORMAL)
ff68c719 3520 s = skipspace(s);
3521
3280af22 3522 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3523 char *t;
3524 if (*s == '[') {
3280af22 3525 PL_tokenbuf[0] = '@';
599cee73 3526 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3527 for(t = s + 1;
7e2040f0 3528 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3529 t++) ;
a0d0e21e 3530 if (*t++ == ',') {
3280af22
NIS
3531 PL_bufptr = skipspace(PL_bufptr);
3532 while (t < PL_bufend && *t != ']')
bbce6d69 3533 t++;
cea2e8a9 3534 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3535 "Multidimensional syntax %.*s not supported",
3536 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3537 }
3538 }
bbce6d69 3539 }
3540 else if (*s == '{') {
3280af22 3541 PL_tokenbuf[0] = '%';
599cee73 3542 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3543 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3544 {
3280af22 3545 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3546 STRLEN len;
3547 for (t++; isSPACE(*t); t++) ;
7e2040f0 3548 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3549 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3550 for (; isSPACE(*t); t++) ;
864dbfa3 3551 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3552 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3553 "You need to quote \"%s\"", tmpbuf);
748a9306 3554 }
93a17b20
LW
3555 }
3556 }
2f3197b3 3557 }
bbce6d69 3558
3280af22 3559 PL_expect = XOPERATOR;
69d2bceb 3560 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3561 bool islop = (PL_last_lop == PL_oldoldbufptr);
3562 if (!islop || PL_last_lop_op == OP_GREPSTART)
3563 PL_expect = XOPERATOR;
bbce6d69 3564 else if (strchr("$@\"'`q", *s))
3280af22 3565 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3566 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3567 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3568 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3569 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3570 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3571 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3572 /* binary operators exclude handle interpretations */
3573 switch (tmp) {
3574 case -KEY_x:
3575 case -KEY_eq:
3576 case -KEY_ne:
3577 case -KEY_gt:
3578 case -KEY_lt:
3579 case -KEY_ge:
3580 case -KEY_le:
3581 case -KEY_cmp:
3582 break;
3583 default:
3280af22 3584 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3585 break;
3586 }
3587 }
68dc0745 3588 else {
3589 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3590 if (gv && GvCVu(gv))
3280af22 3591 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3592 }
93a17b20 3593 }
bbce6d69 3594 else if (isDIGIT(*s))
3280af22 3595 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3596 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3597 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3598 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3599 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3600 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3601 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3602 }
3280af22 3603 PL_pending_ident = '$';
79072805 3604 TOKEN('$');
378cc40b
LW
3605
3606 case '@':
3280af22 3607 if (PL_expect == XOPERATOR)
bbce6d69 3608 no_op("Array", s);
3280af22
NIS
3609 PL_tokenbuf[0] = '@';
3610 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3611 if (!PL_tokenbuf[1]) {
3612 if (s == PL_bufend)
bbce6d69 3613 yyerror("Final @ should be \\@ or @name");
3614 PREREF('@');
3615 }
3280af22 3616 if (PL_lex_state == LEX_NORMAL)
ff68c719 3617 s = skipspace(s);
3280af22 3618 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3619 if (*s == '{')
3280af22 3620 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3621
3622 /* Warn about @ where they meant $. */
599cee73 3623 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3624 if (*s == '[' || *s == '{') {
3625 char *t = s + 1;
7e2040f0 3626 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3627 t++;
3628 if (*t == '}' || *t == ']') {
3629 t++;
3280af22 3630 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3631 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3632 "Scalar value %.*s better written as $%.*s",
3280af22 3633 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3634 }
93a17b20
LW
3635 }
3636 }
463ee0b2 3637 }
3280af22 3638 PL_pending_ident = '@';
79072805 3639 TERM('@');
378cc40b
LW
3640
3641 case '/': /* may either be division or pattern */
3642 case '?': /* may either be conditional or pattern */
3280af22 3643 if (PL_expect != XOPERATOR) {
c277df42 3644 /* Disable warning on "study /blah/" */
4e553d73
NIS
3645 if (PL_oldoldbufptr == PL_last_uni
3646 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3647 || memNE(PL_last_uni, "study", 5)
3648 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3649 check_uni();
8782bef2 3650 s = scan_pat(s,OP_MATCH);
79072805 3651 TERM(sublex_start());
378cc40b
LW
3652 }
3653 tmp = *s++;
a687059c 3654 if (tmp == '/')
79072805 3655 Mop(OP_DIVIDE);
378cc40b
LW
3656 OPERATOR(tmp);
3657
3658 case '.':
51882d45
GS
3659 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3660#ifdef PERL_STRICT_CR
3661 && s[1] == '\n'
3662#else
3663 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3664#endif
3665 && (s == PL_linestart || s[-1] == '\n') )
3666 {
3280af22
NIS
3667 PL_lex_formbrack = 0;
3668 PL_expect = XSTATE;
79072805
LW
3669 goto rightbracket;
3670 }
3280af22 3671 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3672 tmp = *s++;
a687059c
LW
3673 if (*s == tmp) {
3674 s++;
2f3197b3
LW
3675 if (*s == tmp) {
3676 s++;
79072805 3677 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3678 }
3679 else
79072805 3680 yylval.ival = 0;
378cc40b 3681 OPERATOR(DOTDOT);
a687059c 3682 }
3280af22 3683 if (PL_expect != XOPERATOR)
2f3197b3 3684 check_uni();
79072805 3685 Aop(OP_CONCAT);
378cc40b
LW
3686 }
3687 /* FALL THROUGH */
3688 case '0': case '1': case '2': case '3': case '4':
3689 case '5': case '6': case '7': case '8': case '9':
b73d6f50 3690 s = scan_num(s, &yylval);
4e553d73 3691 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283
SC
3692 "### Saw number in '%s'\n", s);
3693 } )
3280af22 3694 if (PL_expect == XOPERATOR)
8990e307 3695 no_op("Number",s);
79072805
LW
3696 TERM(THING);
3697
3698 case '\'':
09bef843 3699 s = scan_str(s,FALSE,FALSE);
4e553d73 3700 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3701 "### Saw string before '%s'\n", s);
607df283 3702 } )
3280af22
NIS
3703 if (PL_expect == XOPERATOR) {
3704 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3705 PL_expect = XTERM;
a0d0e21e
LW
3706 depcom();
3707 return ','; /* grandfather non-comma-format format */
3708 }
463ee0b2 3709 else
8990e307 3710 no_op("String",s);
463ee0b2 3711 }
79072805 3712 if (!s)
85e6fe83 3713 missingterm((char*)0);
79072805
LW
3714 yylval.ival = OP_CONST;
3715 TERM(sublex_start());
3716
3717 case '"':
09bef843 3718 s = scan_str(s,FALSE,FALSE);
4e553d73 3719 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3720 "### Saw string before '%s'\n", s);
607df283 3721 } )
3280af22
NIS
3722 if (PL_expect == XOPERATOR) {
3723 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3724 PL_expect = XTERM;
a0d0e21e
LW
3725 depcom();
3726 return ','; /* grandfather non-comma-format format */
3727 }
463ee0b2 3728 else
8990e307 3729 no_op("String",s);
463ee0b2 3730 }
79072805 3731 if (!s)
85e6fe83 3732 missingterm((char*)0);
4633a7c4 3733 yylval.ival = OP_CONST;
3280af22 3734 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 3735 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4633a7c4
LW
3736 yylval.ival = OP_STRINGIFY;
3737 break;
3738 }
3739 }
79072805
LW
3740 TERM(sublex_start());
3741
3742 case '`':
09bef843 3743 s = scan_str(s,FALSE,FALSE);
4e553d73 3744 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3745 "### Saw backtick string before '%s'\n", s);
607df283 3746 } )
3280af22 3747 if (PL_expect == XOPERATOR)
8990e307 3748 no_op("Backticks",s);
79072805 3749 if (!s)
85e6fe83 3750 missingterm((char*)0);
79072805
LW
3751 yylval.ival = OP_BACKTICK;
3752 set_csh();
3753 TERM(sublex_start());
3754
3755 case '\\':
3756 s++;
599cee73 3757 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3758 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3759 *s, *s);
3280af22 3760 if (PL_expect == XOPERATOR)
8990e307 3761 no_op("Backslash",s);
79072805
LW
3762 OPERATOR(REFGEN);
3763
a7cb1f99 3764 case 'v':
e526c9e6 3765 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
a7cb1f99
GS
3766 char *start = s;
3767 start++;
3768 start++;
dd629d5b 3769 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
3770 start++;
3771 if (*start == '.' && isDIGIT(start[1])) {
b73d6f50 3772 s = scan_num(s, &yylval);
a7cb1f99
GS
3773 TERM(THING);
3774 }
e526c9e6
GS
3775 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
3776 else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF)) {
3777 char c = *start;
3778 GV *gv;
3779 *start = '\0';
3780 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3781 *start = c;
3782 if (!gv) {
b73d6f50 3783 s = scan_num(s, &yylval);
e526c9e6
GS
3784 TERM(THING);
3785 }
3786 }
a7cb1f99
GS
3787 }
3788 goto keylookup;
79072805 3789 case 'x':
3280af22 3790 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3791 s++;
3792 Mop(OP_REPEAT);
2f3197b3 3793 }
79072805
LW
3794 goto keylookup;
3795
378cc40b 3796 case '_':
79072805
LW
3797 case 'a': case 'A':
3798 case 'b': case 'B':
3799 case 'c': case 'C':
3800 case 'd': case 'D':
3801 case 'e': case 'E':
3802 case 'f': case 'F':
3803 case 'g': case 'G':
3804 case 'h': case 'H':
3805 case 'i': case 'I':
3806 case 'j': case 'J':
3807 case 'k': case 'K':
3808 case 'l': case 'L':
3809 case 'm': case 'M':
3810 case 'n': case 'N':
3811 case 'o': case 'O':
3812 case 'p': case 'P':
3813 case 'q': case 'Q':
3814 case 'r': case 'R':
3815 case 's': case 'S':
3816 case 't': case 'T':
3817 case 'u': case 'U':
a7cb1f99 3818 case 'V':
79072805
LW
3819 case 'w': case 'W':
3820 case 'X':
3821 case 'y': case 'Y':
3822 case 'z': case 'Z':
3823
49dc05e3 3824 keylookup: {
161b471a
NIS
3825 gv = Nullgv;
3826 gvp = 0;
49dc05e3 3827
3280af22
NIS
3828 PL_bufptr = s;
3829 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3830
3831 /* Some keywords can be followed by any delimiter, including ':' */
155aba94
GS
3832 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3833 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3834 (PL_tokenbuf[0] == 'q' &&
3835 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 3836
3837 /* x::* is just a word, unless x is "CORE" */
3280af22 3838 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3839 goto just_a_word;
3840
3643fb5f 3841 d = s;
3280af22 3842 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3843 d++; /* no comments skipped here, or s### is misparsed */
3844
3845 /* Is this a label? */
3280af22
NIS
3846 if (!tmp && PL_expect == XSTATE
3847 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3848 s = d + 1;
3280af22 3849 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3850 CLINE;
3851 TOKEN(LABEL);
3643fb5f
CS
3852 }
3853
3854 /* Check for keywords */
3280af22 3855 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3856
3857 /* Is this a word before a => operator? */
1c3923b3 3858 if (*d == '=' && d[1] == '>') {
748a9306 3859 CLINE;
3280af22 3860 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306 3861 yylval.opval->op_private = OPpCONST_BARE;
7948272d
NIS
3862 if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
3863 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
748a9306
LW
3864 TERM(WORD);
3865 }
3866
a0d0e21e 3867 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3868 GV *ogv = Nullgv; /* override (winner) */
3869 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3870 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3871 CV *cv;
3280af22 3872 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3873 (cv = GvCVu(gv)))
3874 {
3875 if (GvIMPORTED_CV(gv))
3876 ogv = gv;
3877 else if (! CvMETHOD(cv))
3878 hgv = gv;
3879 }
3880 if (!ogv &&
3280af22
NIS
3881 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3882 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3883 GvCVu(gv) && GvIMPORTED_CV(gv))
3884 {
3885 ogv = gv;
3886 }
3887 }
3888 if (ogv) {
3889 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3890 }
3891 else if (gv && !gvp
3892 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3893 && GvCVu(gv)
3280af22 3894 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3895 {
3896 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3897 }
56f7f34b
CS
3898 else { /* no override */
3899 tmp = -tmp;
3900 gv = Nullgv;
3901 gvp = 0;
4944e2f7
GS
3902 if (ckWARN(WARN_AMBIGUOUS) && hgv
3903 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3904 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3905 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3906 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3907 }
a0d0e21e
LW
3908 }
3909
3910 reserved_word:
3911 switch (tmp) {
79072805
LW
3912
3913 default: /* not a keyword */
93a17b20 3914 just_a_word: {
96e4d5b1 3915 SV *sv;
3280af22 3916 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3917
3918 /* Get the rest if it looks like a package qualifier */
3919
155aba94 3920 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 3921 STRLEN morelen;
3280af22 3922 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3923 TRUE, &morelen);
3924 if (!morelen)
cea2e8a9 3925 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3926 *s == '\'' ? "'" : "::");
c3e0f903 3927 len += morelen;
a0d0e21e 3928 }
8990e307 3929
3280af22
NIS
3930 if (PL_expect == XOPERATOR) {
3931 if (PL_bufptr == PL_linestart) {
57843af0 3932 CopLINE_dec(PL_curcop);
cea2e8a9 3933 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3934 CopLINE_inc(PL_curcop);
463ee0b2
LW
3935 }
3936 else
54310121 3937 no_op("Bareword",s);
463ee0b2 3938 }
8990e307 3939
c3e0f903
GS
3940 /* Look for a subroutine with this name in current package,
3941 unless name is "Foo::", in which case Foo is a bearword
3942 (and a package name). */
3943
3944 if (len > 2 &&
3280af22 3945 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3946 {
e476b1b5 3947 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
4e553d73 3948 Perl_warner(aTHX_ WARN_BAREWORD,
599cee73 3949 "Bareword \"%s\" refers to nonexistent package",
3280af22 3950 PL_tokenbuf);
c3e0f903 3951 len -= 2;
3280af22 3952 PL_tokenbuf[len] = '\0';
c3e0f903
GS
3953 gv = Nullgv;
3954 gvp = 0;
3955 }
3956 else {
3957 len = 0;
3958 if (!gv)
3280af22 3959 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
c3e0f903
GS
3960 }
3961
3962 /* if we saw a global override before, get the right name */
8990e307 3963
49dc05e3 3964 if (gvp) {
79cb57f6 3965 sv = newSVpvn("CORE::GLOBAL::",14);
3280af22 3966 sv_catpv(sv,PL_tokenbuf);
49dc05e3
GS
3967 }
3968 else
3280af22 3969 sv = newSVpv(PL_tokenbuf,0);
8990e307 3970
a0d0e21e
LW
3971 /* Presume this is going to be a bareword of some sort. */
3972
3973 CLINE;
49dc05e3 3974 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
a0d0e21e
LW
3975 yylval.opval->op_private = OPpCONST_BARE;
3976
c3e0f903
GS
3977 /* And if "Foo::", then that's what it certainly is. */
3978
3979 if (len)
3980 goto safe_bareword;
3981
8990e307
LW
3982 /* See if it's the indirect object for a list operator. */
3983
3280af22
NIS
3984 if (PL_oldoldbufptr &&
3985 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
3986 (PL_oldoldbufptr == PL_last_lop
3987 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 3988 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
3989 (PL_expect == XREF ||
3990 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 3991 {
748a9306
LW
3992 bool immediate_paren = *s == '(';
3993
a0d0e21e
LW
3994 /* (Now we can afford to cross potential line boundary.) */
3995 s = skipspace(s);
3996
3997 /* Two barewords in a row may indicate method call. */
3998
7e2040f0 3999 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
a0d0e21e
LW
4000 return tmp;
4001
4002 /* If not a declared subroutine, it's an indirect object. */
4003 /* (But it's an indir obj regardless for sort.) */
4004
7948272d 4005 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f0670693 4006 ((!gv || !GvCVu(gv)) &&
a9ef352a 4007 (PL_last_lop_op != OP_MAPSTART &&
f0670693 4008 PL_last_lop_op != OP_GREPSTART))))
a9ef352a 4009 {
3280af22 4010 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 4011 goto bareword;
93a17b20
LW
4012 }
4013 }
8990e307 4014
8990e307 4015
3280af22 4016 PL_expect = XOPERATOR;
8990e307 4017 s = skipspace(s);
1c3923b3
GS
4018
4019 /* Is this a word before a => operator? */
4020 if (*s == '=' && s[1] == '>') {
4021 CLINE;
4022 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
7948272d
NIS
4023 if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
4024 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
1c3923b3
GS
4025 TERM(WORD);
4026 }
4027
4028 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 4029 if (*s == '(') {
79072805 4030 CLINE;
96e4d5b1 4031 if (gv && GvCVu(gv)) {
bf4acbe4 4032 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
7a52d87a 4033 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
96e4d5b1 4034 s = d + 1;
4035 goto its_constant;
4036 }
4037 }
3280af22
NIS
4038 PL_nextval[PL_nexttoke].opval = yylval.opval;
4039 PL_expect = XOPERATOR;
93a17b20 4040 force_next(WORD);
c07a80fd 4041 yylval.ival = 0;
463ee0b2 4042 TOKEN('&');
79072805 4043 }
93a17b20 4044
a0d0e21e 4045 /* If followed by var or block, call it a method (unless sub) */
8990e307 4046
8ebc5c01 4047 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
3280af22
NIS
4048 PL_last_lop = PL_oldbufptr;
4049 PL_last_lop_op = OP_METHOD;
93a17b20 4050 PREBLOCK(METHOD);
463ee0b2
LW
4051 }
4052
8990e307
LW
4053 /* If followed by a bareword, see if it looks like indir obj. */
4054
7e2040f0 4055 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp = intuit_method(s,gv)))
a0d0e21e 4056 return tmp;
93a17b20 4057
8990e307
LW
4058 /* Not a method, so call it a subroutine (if defined) */
4059
8ebc5c01 4060 if (gv && GvCVu(gv)) {
46fc3d4c 4061 CV* cv;
0453d815
PM
4062 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4063 Perl_warner(aTHX_ WARN_AMBIGUOUS,
4064 "Ambiguous use of -%s resolved as -&%s()",
3280af22 4065 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 4066 /* Check for a constant sub */
46fc3d4c 4067 cv = GvCV(gv);
96e4d5b1 4068 if ((sv = cv_const_sv(cv))) {
4069 its_constant:
4070 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4071 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4072 yylval.opval->op_private = 0;
4073 TOKEN(WORD);
89bfa8cd 4074 }
4075
a5f75d66
AD
4076 /* Resolve to GV now. */
4077 op_free(yylval.opval);
4078 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
9675f7ac 4079 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 4080 PL_last_lop = PL_oldbufptr;
bf848113 4081 PL_last_lop_op = OP_ENTERSUB;
4633a7c4
LW
4082 /* Is there a prototype? */
4083 if (SvPOK(cv)) {
4084 STRLEN len;
7a52d87a 4085 char *proto = SvPV((SV*)cv, len);
4633a7c4
LW
4086 if (!len)
4087 TERM(FUNC0SUB);
7a52d87a 4088 if (strEQ(proto, "$"))
4633a7c4 4089 OPERATOR(UNIOPSUB);
7a52d87a 4090 if (*proto == '&' && *s == '{') {
3280af22 4091 sv_setpv(PL_subname,"__ANON__");
4633a7c4
LW
4092 PREBLOCK(LSTOPSUB);
4093 }
a9ef352a 4094 }
3280af22
NIS
4095 PL_nextval[PL_nexttoke].opval = yylval.opval;
4096 PL_expect = XTERM;
8990e307
LW
4097 force_next(WORD);
4098 TOKEN(NOAMP);
4099 }
748a9306 4100
8990e307
LW
4101 /* Call it a bare word */
4102
5603f27d
GS
4103 if (PL_hints & HINT_STRICT_SUBS)
4104 yylval.opval->op_private |= OPpCONST_STRICT;
4105 else {
4106 bareword:
4107 if (ckWARN(WARN_RESERVED)) {
4108 if (lastchar != '-') {
4109 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4110 if (!*d)
cea2e8a9 4111 Perl_warner(aTHX_ WARN_RESERVED, PL_warn_reserved,
5603f27d
GS
4112 PL_tokenbuf);
4113 }
748a9306
LW
4114 }
4115 }
c3e0f903
GS
4116
4117 safe_bareword:
f248d071 4118 if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
0453d815
PM
4119 Perl_warner(aTHX_ WARN_AMBIGUOUS,
4120 "Operator or semicolon missing before %c%s",
3280af22 4121 lastchar, PL_tokenbuf);
0453d815
PM
4122 Perl_warner(aTHX_ WARN_AMBIGUOUS,
4123 "Ambiguous use of %c resolved as operator %c",
748a9306
LW
4124 lastchar, lastchar);
4125 }
93a17b20 4126 TOKEN(WORD);
79072805 4127 }
79072805 4128
68dc0745 4129 case KEY___FILE__:
46fc3d4c 4130 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 4131 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 4132 TERM(THING);
4133
79072805 4134 case KEY___LINE__:
cf2093f6 4135 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 4136 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 4137 TERM(THING);
68dc0745 4138
4139 case KEY___PACKAGE__:
4140 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22
NIS
4141 (PL_curstash
4142 ? newSVsv(PL_curstname)
4143 : &PL_sv_undef));
79072805 4144 TERM(THING);
79072805 4145
e50aee73 4146 case KEY___DATA__:
79072805
LW
4147 case KEY___END__: {
4148 GV *gv;
79072805
LW
4149
4150 /*SUPPRESS 560*/
3280af22 4151 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
e50aee73 4152 char *pname = "main";
3280af22
NIS
4153 if (PL_tokenbuf[2] == 'D')
4154 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
cea2e8a9 4155 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
a5f75d66 4156 GvMULTI_on(gv);
79072805 4157 if (!GvIO(gv))
a0d0e21e 4158 GvIOp(gv) = newIO();
3280af22 4159 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
4160#if defined(HAS_FCNTL) && defined(F_SETFD)
4161 {
3280af22 4162 int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
4163 fcntl(fd,F_SETFD,fd >= 3);
4164 }
79072805 4165#endif
fd049845 4166 /* Mark this internal pseudo-handle as clean */
4167 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
3280af22 4168 if (PL_preprocess)
50952442 4169 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
3280af22 4170 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 4171 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 4172 else
50952442 4173 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
4174#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4175 /* if the script was opened in binmode, we need to revert
53129d29 4176 * it to text mode for compatibility; but only iff it has CRs
c39cd008 4177 * XXX this is a questionable hack at best. */
53129d29
GS
4178 if (PL_bufend-PL_bufptr > 2
4179 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
4180 {
4181 Off_t loc = 0;
50952442 4182 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
4183 loc = PerlIO_tell(PL_rsfp);
4184 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4185 }
4186 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
1143fce0
JH
4187#ifdef PERLIO_IS_STDIO /* really? */
4188# if defined(__BORLANDC__)
cb359b41
JH
4189 /* XXX see note in do_binmode() */
4190 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
4191# endif
4192#endif
c39cd008
GS
4193 if (loc > 0)
4194 PerlIO_seek(PL_rsfp, loc, 0);
4195 }
4196 }
4197#endif
7948272d
NIS
4198#ifdef PERLIO_LAYERS
4199 if (UTF && !IN_BYTE)
4200 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4201#endif
3280af22 4202 PL_rsfp = Nullfp;
79072805
LW
4203 }
4204 goto fake_eof;
e929a76b 4205 }
de3bb511 4206
8990e307 4207 case KEY_AUTOLOAD:
ed6116ce 4208 case KEY_DESTROY:
79072805 4209 case KEY_BEGIN:
7d30b5c4 4210 case KEY_CHECK:
7d07dbc2 4211 case KEY_INIT:
7d30b5c4 4212 case KEY_END:
3280af22
NIS
4213 if (PL_expect == XSTATE) {
4214 s = PL_bufptr;
93a17b20 4215 goto really_sub;
79072805
LW
4216 }
4217 goto just_a_word;
4218
a0d0e21e
LW
4219 case KEY_CORE:
4220 if (*s == ':' && s[1] == ':') {
4221 s += 2;
748a9306 4222 d = s;
3280af22 4223 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6798c92b
GS
4224 if (!(tmp = keyword(PL_tokenbuf, len)))
4225 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
4226 if (tmp < 0)
4227 tmp = -tmp;
4228 goto reserved_word;
4229 }
4230 goto just_a_word;
4231
463ee0b2
LW
4232 case KEY_abs:
4233 UNI(OP_ABS);
4234
79072805
LW
4235 case KEY_alarm:
4236 UNI(OP_ALARM);
4237
4238 case KEY_accept:
a0d0e21e 4239 LOP(OP_ACCEPT,XTERM);
79072805 4240
463ee0b2
LW
4241 case KEY_and:
4242 OPERATOR(ANDOP);
4243
79072805 4244 case KEY_atan2:
a0d0e21e 4245 LOP(OP_ATAN2,XTERM);
85e6fe83 4246
79072805 4247 case KEY_bind:
a0d0e21e 4248 LOP(OP_BIND,XTERM);
79072805
LW
4249
4250 case KEY_binmode:
1c1fc3ea 4251 LOP(OP_BINMODE,XTERM);
79072805
LW
4252
4253 case KEY_bless:
a0d0e21e 4254 LOP(OP_BLESS,XTERM);
79072805
LW
4255
4256 case KEY_chop:
4257 UNI(OP_CHOP);
4258
4259 case KEY_continue:
4260 PREBLOCK(CONTINUE);
4261
4262 case KEY_chdir:
85e6fe83 4263 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
79072805
LW
4264 UNI(OP_CHDIR);
4265
4266 case KEY_close:
4267 UNI(OP_CLOSE);
4268
4269 case KEY_closedir:
4270 UNI(OP_CLOSEDIR);
4271
4272 case KEY_cmp:
4273 Eop(OP_SCMP);
4274
4275 case KEY_caller:
4276 UNI(OP_CALLER);
4277
4278 case KEY_crypt:
4279#ifdef FCRYPT
f4c556ac
GS
4280 if (!PL_cryptseen) {
4281 PL_cryptseen = TRUE;
de3bb511 4282 init_des();
f4c556ac 4283 }
a687059c 4284#endif
a0d0e21e 4285 LOP(OP_CRYPT,XTERM);
79072805
LW
4286
4287 case KEY_chmod:
e476b1b5 4288 if (ckWARN(WARN_CHMOD)) {
3280af22 4289 for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ;
748a9306 4290 if (*d != '0' && isDIGIT(*d))
e476b1b5 4291 Perl_warner(aTHX_ WARN_CHMOD,
5a211162 4292 "chmod() mode argument is missing initial 0");
748a9306 4293 }
a0d0e21e 4294 LOP(OP_CHMOD,XTERM);
79072805
LW
4295
4296 case KEY_chown:
a0d0e21e 4297 LOP(OP_CHOWN,XTERM);
79072805
LW
4298
4299 case KEY_connect:
a0d0e21e 4300 LOP(OP_CONNECT,XTERM);
79072805 4301
463ee0b2
LW
4302 case KEY_chr:
4303 UNI(OP_CHR);
4304
79072805
LW
4305 case KEY_cos:
4306 UNI(OP_COS);
4307
4308 case KEY_chroot:
4309 UNI(OP_CHROOT);
4310
4311 case KEY_do:
4312 s = skipspace(s);
4313 if (*s == '{')
a0d0e21e 4314 PRETERMBLOCK(DO);
79072805 4315 if (*s != '\'')
a0d0e21e 4316 s = force_word(s,WORD,FALSE,TRUE,FALSE);
378cc40b 4317 OPERATOR(DO);
79072805
LW
4318
4319 case KEY_die:
3280af22 4320 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 4321 LOP(OP_DIE,XTERM);
79072805
LW
4322
4323 case KEY_defined:
4324 UNI(OP_DEFINED);
4325
4326 case KEY_delete:
a0d0e21e 4327 UNI(OP_DELETE);
79072805
LW
4328
4329 case KEY_dbmopen:
a0d0e21e
LW
4330 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4331 LOP(OP_DBMOPEN,XTERM);
79072805
LW
4332
4333 case KEY_dbmclose:
4334 UNI(OP_DBMCLOSE);
4335
4336 case KEY_dump:
a0d0e21e 4337 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4338 LOOPX(OP_DUMP);
4339
4340 case KEY_else:
4341 PREBLOCK(ELSE);
4342
4343 case KEY_elsif:
57843af0 4344 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4345 OPERATOR(ELSIF);
4346
4347 case KEY_eq:
4348 Eop(OP_SEQ);
4349
a0d0e21e
LW
4350 case KEY_exists:
4351 UNI(OP_EXISTS);
4e553d73 4352
79072805
LW
4353 case KEY_exit:
4354 UNI(OP_EXIT);
4355
4356 case KEY_eval:
79072805 4357 s = skipspace(s);
3280af22 4358 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
463ee0b2 4359 UNIBRACK(OP_ENTEREVAL);
79072805
LW
4360
4361 case KEY_eof:
4362 UNI(OP_EOF);
4363
4364 case KEY_exp:
4365 UNI(OP_EXP);
4366
4367 case KEY_each:
4368 UNI(OP_EACH);
4369
4370 case KEY_exec:
4371 set_csh();
a0d0e21e 4372 LOP(OP_EXEC,XREF);
79072805
LW
4373
4374 case KEY_endhostent:
4375 FUN0(OP_EHOSTENT);
4376
4377 case KEY_endnetent:
4378 FUN0(OP_ENETENT);
4379
4380 case KEY_endservent:
4381 FUN0(OP_ESERVENT);
4382
4383 case KEY_endprotoent:
4384 FUN0(OP_EPROTOENT);
4385
4386 case KEY_endpwent:
4387 FUN0(OP_EPWENT);
4388
4389 case KEY_endgrent:
4390 FUN0(OP_EGRENT);
4391
4392 case KEY_for:
4393 case KEY_foreach:
57843af0 4394 yylval.ival = CopLINE(PL_curcop);
55497cff 4395 s = skipspace(s);
7e2040f0 4396 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 4397 char *p = s;
3280af22 4398 if ((PL_bufend - p) >= 3 &&
55497cff 4399 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4400 p += 2;
77ca0c92
LW
4401 else if ((PL_bufend - p) >= 4 &&
4402 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4403 p += 3;
55497cff 4404 p = skipspace(p);
7e2040f0 4405 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
4406 p = scan_ident(p, PL_bufend,
4407 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4408 p = skipspace(p);
4409 }
4410 if (*p != '$')
cea2e8a9 4411 Perl_croak(aTHX_ "Missing $ on loop variable");
55497cff 4412 }
79072805
LW
4413 OPERATOR(FOR);
4414
4415 case KEY_formline:
a0d0e21e 4416 LOP(OP_FORMLINE,XTERM);
79072805
LW
4417
4418 case KEY_fork:
4419 FUN0(OP_FORK);
4420
4421 case KEY_fcntl:
a0d0e21e 4422 LOP(OP_FCNTL,XTERM);
79072805
LW
4423
4424 case KEY_fileno:
4425 UNI(OP_FILENO);
4426
4427 case KEY_flock:
a0d0e21e 4428 LOP(OP_FLOCK,XTERM);
79072805
LW
4429
4430 case KEY_gt:
4431 Rop(OP_SGT);
4432
4433 case KEY_ge:
4434 Rop(OP_SGE);
4435
4436 case KEY_grep:
2c38e13d 4437 LOP(OP_GREPSTART, XREF);
79072805
LW
4438
4439 case KEY_goto:
a0d0e21e 4440 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4441 LOOPX(OP_GOTO);
4442
4443 case KEY_gmtime:
4444 UNI(OP_GMTIME);
4445
4446 case KEY_getc:
4447 UNI(OP_GETC);
4448
4449 case KEY_getppid:
4450 FUN0(OP_GETPPID);
4451
4452 case KEY_getpgrp:
4453 UNI(OP_GETPGRP);
4454
4455 case KEY_getpriority:
a0d0e21e 4456 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
4457
4458 case KEY_getprotobyname:
4459 UNI(OP_GPBYNAME);
4460
4461 case KEY_getprotobynumber:
a0d0e21e 4462 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
4463
4464 case KEY_getprotoent:
4465 FUN0(OP_GPROTOENT);
4466
4467 case KEY_getpwent:
4468 FUN0(OP_GPWENT);
4469
4470 case KEY_getpwnam:
ff68c719 4471 UNI(OP_GPWNAM);
79072805
LW
4472
4473 case KEY_getpwuid:
ff68c719 4474 UNI(OP_GPWUID);
79072805
LW
4475
4476 case KEY_getpeername:
4477 UNI(OP_GETPEERNAME);
4478
4479 case KEY_gethostbyname:
4480 UNI(OP_GHBYNAME);
4481
4482 case KEY_gethostbyaddr:
a0d0e21e 4483 LOP(OP_GHBYADDR,XTERM);
79072805
LW
4484
4485 case KEY_gethostent:
4486 FUN0(OP_GHOSTENT);
4487
4488 case KEY_getnetbyname:
4489 UNI(OP_GNBYNAME);
4490
4491 case KEY_getnetbyaddr:
a0d0e21e 4492 LOP(OP_GNBYADDR,XTERM);
79072805
LW
4493
4494 case KEY_getnetent:
4495 FUN0(OP_GNETENT);
4496
4497 case KEY_getservbyname:
a0d0e21e 4498 LOP(OP_GSBYNAME,XTERM);
79072805
LW
4499
4500 case KEY_getservbyport:
a0d0e21e 4501 LOP(OP_GSBYPORT,XTERM);
79072805
LW
4502
4503 case KEY_getservent:
4504 FUN0(OP_GSERVENT);
4505
4506 case KEY_getsockname:
4507 UNI(OP_GETSOCKNAME);
4508
4509 case KEY_getsockopt:
a0d0e21e 4510 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
4511
4512 case KEY_getgrent:
4513 FUN0(OP_GGRENT);
4514
4515 case KEY_getgrnam:
ff68c719 4516 UNI(OP_GGRNAM);
79072805
LW
4517
4518 case KEY_getgrgid:
ff68c719 4519 UNI(OP_GGRGID);
79072805
LW
4520
4521 case KEY_getlogin:
4522 FUN0(OP_GETLOGIN);
4523
93a17b20 4524 case KEY_glob:
a0d0e21e
LW
4525 set_csh();
4526 LOP(OP_GLOB,XTERM);
93a17b20 4527
79072805
LW
4528 case KEY_hex:
4529 UNI(OP_HEX);
4530
4531 case KEY_if:
57843af0 4532 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4533 OPERATOR(IF);
4534
4535 case KEY_index:
a0d0e21e 4536 LOP(OP_INDEX,XTERM);
79072805
LW
4537
4538 case KEY_int:
4539 UNI(OP_INT);
4540
4541 case KEY_ioctl:
a0d0e21e 4542 LOP(OP_IOCTL,XTERM);
79072805
LW
4543
4544 case KEY_join:
a0d0e21e 4545 LOP(OP_JOIN,XTERM);
79072805
LW
4546
4547 case KEY_keys:
4548 UNI(OP_KEYS);
4549
4550 case KEY_kill:
a0d0e21e 4551 LOP(OP_KILL,XTERM);
79072805
LW
4552
4553 case KEY_last:
a0d0e21e 4554 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 4555 LOOPX(OP_LAST);
4e553d73 4556
79072805
LW
4557 case KEY_lc:
4558 UNI(OP_LC);
4559
4560 case KEY_lcfirst:
4561 UNI(OP_LCFIRST);
4562
4563 case KEY_local:
09bef843 4564 yylval.ival = 0;
79072805
LW
4565 OPERATOR(LOCAL);
4566
4567 case KEY_length:
4568 UNI(OP_LENGTH);
4569
4570 case KEY_lt:
4571 Rop(OP_SLT);
4572
4573 case KEY_le:
4574 Rop(OP_SLE);
4575
4576 case KEY_localtime:
4577 UNI(OP_LOCALTIME);
4578
4579 case KEY_log:
4580 UNI(OP_LOG);
4581
4582 case KEY_link:
a0d0e21e 4583 LOP(OP_LINK,XTERM);
79072805
LW
4584
4585 case KEY_listen:
a0d0e21e 4586 LOP(OP_LISTEN,XTERM);
79072805 4587
c0329465
MB
4588 case KEY_lock:
4589 UNI(OP_LOCK);
4590
79072805
LW
4591 case KEY_lstat:
4592 UNI(OP_LSTAT);
4593
4594 case KEY_m:
8782bef2 4595 s = scan_pat(s,OP_MATCH);
79072805
LW
4596 TERM(sublex_start());
4597
a0d0e21e 4598 case KEY_map:
2c38e13d 4599 LOP(OP_MAPSTART, XREF);
4e4e412b 4600
79072805 4601 case KEY_mkdir:
a0d0e21e 4602 LOP(OP_MKDIR,XTERM);
79072805
LW
4603
4604 case KEY_msgctl:
a0d0e21e 4605 LOP(OP_MSGCTL,XTERM);
79072805
LW
4606
4607 case KEY_msgget:
a0d0e21e 4608 LOP(OP_MSGGET,XTERM);
79072805
LW
4609
4610 case KEY_msgrcv:
a0d0e21e 4611 LOP(OP_MSGRCV,XTERM);
79072805
LW
4612
4613 case KEY_msgsnd:
a0d0e21e 4614 LOP(OP_MSGSND,XTERM);
79072805 4615
77ca0c92 4616 case KEY_our:
93a17b20 4617 case KEY_my:
77ca0c92 4618 PL_in_my = tmp;
c750a3ec 4619 s = skipspace(s);
7e2040f0 4620 if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 4621 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
4622 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4623 goto really_sub;
def3634b 4624 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 4625 if (!PL_in_my_stash) {
c750a3ec 4626 char tmpbuf[1024];
3280af22
NIS
4627 PL_bufptr = s;
4628 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
4629 yyerror(tmpbuf);
4630 }
4631 }
09bef843 4632 yylval.ival = 1;
55497cff 4633 OPERATOR(MY);
93a17b20 4634
79072805 4635 case KEY_next:
a0d0e21e 4636 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4637 LOOPX(OP_NEXT);
4638
4639 case KEY_ne:
4640 Eop(OP_SNE);
4641
a0d0e21e 4642 case KEY_no:
3280af22 4643 if (PL_expect != XSTATE)
a0d0e21e
LW
4644 yyerror("\"no\" not allowed in expression");
4645 s = force_word(s,WORD,FALSE,TRUE,FALSE);
89bfa8cd 4646 s = force_version(s);
a0d0e21e
LW
4647 yylval.ival = 0;
4648 OPERATOR(USE);
4649
4650 case KEY_not:
2d2e263d
LW
4651 if (*s == '(' || (s = skipspace(s), *s == '('))
4652 FUN1(OP_NOT);
4653 else
4654 OPERATOR(NOTOP);
a0d0e21e 4655
79072805 4656 case KEY_open:
93a17b20 4657 s = skipspace(s);
7e2040f0 4658 if (isIDFIRST_lazy_if(s,UTF)) {
93a17b20 4659 char *t;
7e2040f0 4660 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
93a17b20 4661 t = skipspace(d);
e476b1b5
GS
4662 if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE))
4663 Perl_warner(aTHX_ WARN_PRECEDENCE,
0453d815
PM
4664 "Precedence problem: open %.*s should be open(%.*s)",
4665 d-s,s, d-s,s);
93a17b20 4666 }
a0d0e21e 4667 LOP(OP_OPEN,XTERM);
79072805 4668
463ee0b2 4669 case KEY_or:
a0d0e21e 4670 yylval.ival = OP_OR;
463ee0b2
LW
4671 OPERATOR(OROP);
4672
79072805
LW
4673 case KEY_ord:
4674 UNI(OP_ORD);
4675
4676 case KEY_oct:
4677 UNI(OP_OCT);
4678
4679 case KEY_opendir:
a0d0e21e 4680 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
4681
4682 case KEY_print:
3280af22 4683 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4684 LOP(OP_PRINT,XREF);
79072805
LW
4685
4686 case KEY_printf:
3280af22 4687 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4688 LOP(OP_PRTF,XREF);
79072805 4689
c07a80fd 4690 case KEY_prototype:
4691 UNI(OP_PROTOTYPE);
4692
79072805 4693 case KEY_push:
a0d0e21e 4694 LOP(OP_PUSH,XTERM);
79072805
LW
4695
4696 case KEY_pop:
4697 UNI(OP_POP);
4698
a0d0e21e
LW
4699 case KEY_pos:
4700 UNI(OP_POS);
4e553d73 4701
79072805 4702 case KEY_pack:
a0d0e21e 4703 LOP(OP_PACK,XTERM);
79072805
LW
4704
4705 case KEY_package:
a0d0e21e 4706 s = force_word(s,WORD,FALSE,TRUE,FALSE);
79072805
LW
4707 OPERATOR(PACKAGE);
4708
4709 case KEY_pipe:
a0d0e21e 4710 LOP(OP_PIPE_OP,XTERM);
79072805
LW
4711
4712 case KEY_q:
09bef843 4713 s = scan_str(s,FALSE,FALSE);
79072805 4714 if (!s)
85e6fe83 4715 missingterm((char*)0);
79072805
LW
4716 yylval.ival = OP_CONST;
4717 TERM(sublex_start());
4718
a0d0e21e
LW
4719 case KEY_quotemeta:
4720 UNI(OP_QUOTEMETA);
4721
8990e307 4722 case KEY_qw:
09bef843 4723 s = scan_str(s,FALSE,FALSE);
8990e307 4724 if (!s)
85e6fe83 4725 missingterm((char*)0);
8127e0e3
GS
4726 force_next(')');
4727 if (SvCUR(PL_lex_stuff)) {
4728 OP *words = Nullop;
4729 int warned = 0;
3280af22 4730 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 4731 while (len) {
7948272d 4732 SV *sv;
8127e0e3
GS
4733 for (; isSPACE(*d) && len; --len, ++d) ;
4734 if (len) {
4735 char *b = d;
e476b1b5 4736 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
4737 for (; !isSPACE(*d) && len; --len, ++d) {
4738 if (*d == ',') {
e476b1b5 4739 Perl_warner(aTHX_ WARN_QW,
8127e0e3
GS
4740 "Possible attempt to separate words with commas");
4741 ++warned;
4742 }
4743 else if (*d == '#') {
e476b1b5 4744 Perl_warner(aTHX_ WARN_QW,
8127e0e3
GS
4745 "Possible attempt to put comments in qw() list");
4746 ++warned;
4747 }
4748 }
4749 }
4750 else {
4751 for (; !isSPACE(*d) && len; --len, ++d) ;
4752 }
7948272d
NIS
4753 sv = newSVpvn(b, d-b);
4754 if (DO_UTF8(PL_lex_stuff))
4755 SvUTF8_on(sv);
8127e0e3 4756 words = append_elem(OP_LIST, words,
7948272d 4757 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 4758 }
4759 }
8127e0e3
GS
4760 if (words) {
4761 PL_nextval[PL_nexttoke].opval = words;
4762 force_next(THING);
4763 }
55497cff 4764 }
37fd879b 4765 if (PL_lex_stuff) {
8127e0e3 4766 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
4767 PL_lex_stuff = Nullsv;
4768 }
3280af22 4769 PL_expect = XTERM;
8127e0e3 4770 TOKEN('(');
8990e307 4771
79072805 4772 case KEY_qq:
09bef843 4773 s = scan_str(s,FALSE,FALSE);
79072805 4774 if (!s)
85e6fe83 4775 missingterm((char*)0);
a0d0e21e 4776 yylval.ival = OP_STRINGIFY;
3280af22
NIS
4777 if (SvIVX(PL_lex_stuff) == '\'')
4778 SvIVX(PL_lex_stuff) = 0; /* qq'$foo' should intepolate */
79072805
LW
4779 TERM(sublex_start());
4780
8782bef2
GB
4781 case KEY_qr:
4782 s = scan_pat(s,OP_QR);
4783 TERM(sublex_start());
4784
79072805 4785 case KEY_qx:
09bef843 4786 s = scan_str(s,FALSE,FALSE);
79072805 4787 if (!s)
85e6fe83 4788 missingterm((char*)0);
79072805
LW
4789 yylval.ival = OP_BACKTICK;
4790 set_csh();
4791 TERM(sublex_start());
4792
4793 case KEY_return:
4794 OLDLOP(OP_RETURN);
4795
4796 case KEY_require:
a7cb1f99
GS
4797 s = skipspace(s);
4798 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4799 s = force_version(s);
4800 }
4801 else {
4802 *PL_tokenbuf = '\0';
4803 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 4804 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
a7cb1f99
GS
4805 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
4806 else if (*s == '<')
4807 yyerror("<> should be quotes");
4808 }
463ee0b2 4809 UNI(OP_REQUIRE);
79072805
LW
4810
4811 case KEY_reset:
4812 UNI(OP_RESET);
4813
4814 case KEY_redo:
a0d0e21e 4815 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4816 LOOPX(OP_REDO);
4817
4818 case KEY_rename:
a0d0e21e 4819 LOP(OP_RENAME,XTERM);
79072805
LW
4820
4821 case KEY_rand:
4822 UNI(OP_RAND);
4823
4824 case KEY_rmdir:
4825 UNI(OP_RMDIR);
4826
4827 case KEY_rindex:
a0d0e21e 4828 LOP(OP_RINDEX,XTERM);
79072805
LW
4829
4830 case KEY_read:
a0d0e21e 4831 LOP(OP_READ,XTERM);
79072805
LW
4832
4833 case KEY_readdir:
4834 UNI(OP_READDIR);
4835
93a17b20
LW
4836 case KEY_readline:
4837 set_csh();
4838 UNI(OP_READLINE);
4839
4840 case KEY_readpipe:
4841 set_csh();
4842 UNI(OP_BACKTICK);
4843
79072805
LW
4844 case KEY_rewinddir:
4845 UNI(OP_REWINDDIR);
4846
4847 case KEY_recv:
a0d0e21e 4848 LOP(OP_RECV,XTERM);
79072805
LW
4849
4850 case KEY_reverse:
a0d0e21e 4851 LOP(OP_REVERSE,XTERM);
79072805
LW
4852
4853 case KEY_readlink:
4854 UNI(OP_READLINK);
4855
4856 case KEY_ref:
4857 UNI(OP_REF);
4858
4859 case KEY_s:
4860 s = scan_subst(s);
4861 if (yylval.opval)
4862 TERM(sublex_start());
4863 else
4864 TOKEN(1); /* force error */
4865
a0d0e21e
LW
4866 case KEY_chomp:
4867 UNI(OP_CHOMP);
4e553d73 4868
79072805
LW
4869 case KEY_scalar:
4870 UNI(OP_SCALAR);
4871
4872 case KEY_select:
a0d0e21e 4873 LOP(OP_SELECT,XTERM);
79072805
LW
4874
4875 case KEY_seek:
a0d0e21e 4876 LOP(OP_SEEK,XTERM);
79072805
LW
4877
4878 case KEY_semctl:
a0d0e21e 4879 LOP(OP_SEMCTL,XTERM);
79072805
LW
4880
4881 case KEY_semget:
a0d0e21e 4882 LOP(OP_SEMGET,XTERM);
79072805
LW
4883
4884 case KEY_semop:
a0d0e21e 4885 LOP(OP_SEMOP,XTERM);
79072805
LW
4886
4887 case KEY_send:
a0d0e21e 4888 LOP(OP_SEND,XTERM);
79072805
LW
4889
4890 case KEY_setpgrp:
a0d0e21e 4891 LOP(OP_SETPGRP,XTERM);
79072805
LW
4892
4893 case KEY_setpriority:
a0d0e21e 4894 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
4895
4896 case KEY_sethostent:
ff68c719 4897 UNI(OP_SHOSTENT);
79072805
LW
4898
4899 case KEY_setnetent:
ff68c719 4900 UNI(OP_SNETENT);
79072805
LW
4901
4902 case KEY_setservent:
ff68c719 4903 UNI(OP_SSERVENT);
79072805
LW
4904
4905 case KEY_setprotoent:
ff68c719 4906 UNI(OP_SPROTOENT);
79072805
LW
4907
4908 case KEY_setpwent:
4909 FUN0(OP_SPWENT);
4910
4911 case KEY_setgrent:
4912 FUN0(OP_SGRENT);
4913
4914 case KEY_seekdir:
a0d0e21e 4915 LOP(OP_SEEKDIR,XTERM);
79072805
LW
4916
4917 case KEY_setsockopt:
a0d0e21e 4918 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
4919
4920 case KEY_shift:
4921 UNI(OP_SHIFT);
4922
4923 case KEY_shmctl:
a0d0e21e 4924 LOP(OP_SHMCTL,XTERM);
79072805
LW
4925
4926 case KEY_shmget:
a0d0e21e 4927 LOP(OP_SHMGET,XTERM);
79072805
LW
4928
4929 case KEY_shmread:
a0d0e21e 4930 LOP(OP_SHMREAD,XTERM);
79072805
LW
4931
4932 case KEY_shmwrite:
a0d0e21e 4933 LOP(OP_SHMWRITE,XTERM);
79072805
LW
4934
4935 case KEY_shutdown:
a0d0e21e 4936 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
4937
4938 case KEY_sin:
4939 UNI(OP_SIN);
4940
4941 case KEY_sleep:
4942 UNI(OP_SLEEP);
4943
4944 case KEY_socket:
a0d0e21e 4945 LOP(OP_SOCKET,XTERM);
79072805
LW
4946
4947 case KEY_socketpair:
a0d0e21e 4948 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
4949
4950 case KEY_sort:
3280af22 4951 checkcomma(s,PL_tokenbuf,"subroutine name");
79072805
LW
4952 s = skipspace(s);
4953 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 4954 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 4955 PL_expect = XTERM;
15f0808c 4956 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 4957 LOP(OP_SORT,XREF);
79072805
LW
4958
4959 case KEY_split:
a0d0e21e 4960 LOP(OP_SPLIT,XTERM);
79072805
LW
4961
4962 case KEY_sprintf:
a0d0e21e 4963 LOP(OP_SPRINTF,XTERM);
79072805
LW
4964
4965 case KEY_splice:
a0d0e21e 4966 LOP(OP_SPLICE,XTERM);
79072805
LW
4967
4968 case KEY_sqrt:
4969 UNI(OP_SQRT);
4970
4971 case KEY_srand:
4972 UNI(OP_SRAND);
4973
4974 case KEY_stat:
4975 UNI(OP_STAT);
4976
4977 case KEY_study:
79072805
LW
4978 UNI(OP_STUDY);
4979
4980 case KEY_substr:
a0d0e21e 4981 LOP(OP_SUBSTR,XTERM);
79072805
LW
4982
4983 case KEY_format:
4984 case KEY_sub:
93a17b20 4985 really_sub:
09bef843 4986 {
3280af22 4987 char tmpbuf[sizeof PL_tokenbuf];
b1b65b59 4988 SSize_t tboffset;
09bef843
SB
4989 expectation attrful;
4990 bool have_name, have_proto;
4991 int key = tmp;
4992
4993 s = skipspace(s);
4994
7e2040f0 4995 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
4996 (*s == ':' && s[1] == ':'))
4997 {
4998 PL_expect = XBLOCK;
4999 attrful = XATTRBLOCK;
b1b65b59
JH
5000 /* remember buffer pos'n for later force_word */
5001 tboffset = s - PL_oldbufptr;
09bef843
SB
5002 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5003 if (strchr(tmpbuf, ':'))
5004 sv_setpv(PL_subname, tmpbuf);
5005 else {
5006 sv_setsv(PL_subname,PL_curstname);
5007 sv_catpvn(PL_subname,"::",2);
5008 sv_catpvn(PL_subname,tmpbuf,len);
5009 }
5010 s = skipspace(d);
5011 have_name = TRUE;
5012 }
463ee0b2 5013 else {
09bef843
SB
5014 if (key == KEY_my)
5015 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5016 PL_expect = XTERMBLOCK;
5017 attrful = XATTRTERM;
5018 sv_setpv(PL_subname,"?");
5019 have_name = FALSE;
463ee0b2 5020 }
4633a7c4 5021
09bef843
SB
5022 if (key == KEY_format) {
5023 if (*s == '=')
5024 PL_lex_formbrack = PL_lex_brackets + 1;
5025 if (have_name)
b1b65b59
JH
5026 (void) force_word(PL_oldbufptr + tboffset, WORD,
5027 FALSE, TRUE, TRUE);
09bef843
SB
5028 OPERATOR(FORMAT);
5029 }
79072805 5030
09bef843
SB
5031 /* Look for a prototype */
5032 if (*s == '(') {
5033 char *p;
5034
5035 s = scan_str(s,FALSE,FALSE);
37fd879b 5036 if (!s)
09bef843 5037 Perl_croak(aTHX_ "Prototype not terminated");
09bef843
SB
5038 /* strip spaces */
5039 d = SvPVX(PL_lex_stuff);
5040 tmp = 0;
5041 for (p = d; *p; ++p) {
5042 if (!isSPACE(*p))
5043 d[tmp++] = *p;
5044 }
5045 d[tmp] = '\0';
5046 SvCUR(PL_lex_stuff) = tmp;
5047 have_proto = TRUE;
68dc0745 5048
09bef843 5049 s = skipspace(s);
4633a7c4 5050 }
09bef843
SB
5051 else
5052 have_proto = FALSE;
5053
5054 if (*s == ':' && s[1] != ':')
5055 PL_expect = attrful;
5056
5057 if (have_proto) {
b1b65b59
JH
5058 PL_nextval[PL_nexttoke].opval =
5059 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
09bef843
SB
5060 PL_lex_stuff = Nullsv;
5061 force_next(THING);
68dc0745 5062 }
09bef843
SB
5063 if (!have_name) {
5064 sv_setpv(PL_subname,"__ANON__");
5065 TOKEN(ANONSUB);
4633a7c4 5066 }
b1b65b59
JH
5067 (void) force_word(PL_oldbufptr + tboffset, WORD,
5068 FALSE, TRUE, TRUE);
09bef843
SB
5069 if (key == KEY_my)
5070 TOKEN(MYSUB);
5071 TOKEN(SUB);
4633a7c4 5072 }
79072805
LW
5073
5074 case KEY_system:
5075 set_csh();
a0d0e21e 5076 LOP(OP_SYSTEM,XREF);
79072805
LW
5077
5078 case KEY_symlink:
a0d0e21e 5079 LOP(OP_SYMLINK,XTERM);
79072805
LW
5080
5081 case KEY_syscall:
a0d0e21e 5082 LOP(OP_SYSCALL,XTERM);
79072805 5083
c07a80fd 5084 case KEY_sysopen:
5085 LOP(OP_SYSOPEN,XTERM);
5086
137443ea 5087 case KEY_sysseek:
5088 LOP(OP_SYSSEEK,XTERM);
5089
79072805 5090 case KEY_sysread:
a0d0e21e 5091 LOP(OP_SYSREAD,XTERM);
79072805
LW
5092
5093 case KEY_syswrite:
a0d0e21e 5094 LOP(OP_SYSWRITE,XTERM);
79072805
LW
5095
5096 case KEY_tr:
5097 s = scan_trans(s);
5098 TERM(sublex_start());
5099
5100 case KEY_tell:
5101 UNI(OP_TELL);
5102
5103 case KEY_telldir:
5104 UNI(OP_TELLDIR);
5105
463ee0b2 5106 case KEY_tie:
a0d0e21e 5107 LOP(OP_TIE,XTERM);
463ee0b2 5108
c07a80fd 5109 case KEY_tied:
5110 UNI(OP_TIED);
5111
79072805
LW
5112 case KEY_time:
5113 FUN0(OP_TIME);
5114
5115 case KEY_times:
5116 FUN0(OP_TMS);
5117
5118 case KEY_truncate:
a0d0e21e 5119 LOP(OP_TRUNCATE,XTERM);
79072805
LW
5120
5121 case KEY_uc:
5122 UNI(OP_UC);
5123
5124 case KEY_ucfirst:
5125 UNI(OP_UCFIRST);
5126
463ee0b2
LW
5127 case KEY_untie:
5128 UNI(OP_UNTIE);
5129
79072805 5130 case KEY_until:
57843af0 5131 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5132 OPERATOR(UNTIL);
5133
5134 case KEY_unless:
57843af0 5135 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5136 OPERATOR(UNLESS);
5137
5138 case KEY_unlink:
a0d0e21e 5139 LOP(OP_UNLINK,XTERM);
79072805
LW
5140
5141 case KEY_undef:
5142 UNI(OP_UNDEF);
5143
5144 case KEY_unpack:
a0d0e21e 5145 LOP(OP_UNPACK,XTERM);
79072805
LW
5146
5147 case KEY_utime:
a0d0e21e 5148 LOP(OP_UTIME,XTERM);
79072805
LW
5149
5150 case KEY_umask:
e476b1b5 5151 if (ckWARN(WARN_UMASK)) {
3280af22 5152 for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ;
4e553d73 5153 if (*d != '0' && isDIGIT(*d))
e476b1b5 5154 Perl_warner(aTHX_ WARN_UMASK,
4438c4b7 5155 "umask: argument is missing initial 0");
748a9306 5156 }
79072805
LW
5157 UNI(OP_UMASK);
5158
5159 case KEY_unshift:
a0d0e21e
LW
5160 LOP(OP_UNSHIFT,XTERM);
5161
5162 case KEY_use:
3280af22 5163 if (PL_expect != XSTATE)
a0d0e21e 5164 yyerror("\"use\" not allowed in expression");
89bfa8cd 5165 s = skipspace(s);
a7cb1f99 5166 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
89bfa8cd 5167 s = force_version(s);
a7cb1f99 5168 if (*s == ';' || (s = skipspace(s), *s == ';')) {
3280af22 5169 PL_nextval[PL_nexttoke].opval = Nullop;
89bfa8cd 5170 force_next(WORD);
5171 }
5172 }
5173 else {
5174 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5175 s = force_version(s);
5176 }
a0d0e21e
LW
5177 yylval.ival = 1;
5178 OPERATOR(USE);
79072805
LW
5179
5180 case KEY_values:
5181 UNI(OP_VALUES);
5182
5183 case KEY_vec:
a0d0e21e 5184 LOP(OP_VEC,XTERM);
79072805
LW
5185
5186 case KEY_while:
57843af0 5187 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5188 OPERATOR(WHILE);
5189
5190 case KEY_warn:
3280af22 5191 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 5192 LOP(OP_WARN,XTERM);
79072805
LW
5193
5194 case KEY_wait:
5195 FUN0(OP_WAIT);
5196
5197 case KEY_waitpid:
a0d0e21e 5198 LOP(OP_WAITPID,XTERM);
79072805
LW
5199
5200 case KEY_wantarray:
5201 FUN0(OP_WANTARRAY);
5202
5203 case KEY_write:
9d116dd7
JH
5204#ifdef EBCDIC
5205 {
5206 static char ctl_l[2];
5207
4e553d73 5208 if (ctl_l[0] == '\0')
9d116dd7
JH
5209 ctl_l[0] = toCTRL('L');
5210 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5211 }
5212#else
5213 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5214#endif
79072805
LW
5215 UNI(OP_ENTERWRITE);
5216
5217 case KEY_x:
3280af22 5218 if (PL_expect == XOPERATOR)
79072805
LW
5219 Mop(OP_REPEAT);
5220 check_uni();
5221 goto just_a_word;
5222
a0d0e21e
LW
5223 case KEY_xor:
5224 yylval.ival = OP_XOR;
5225 OPERATOR(OROP);
5226
79072805
LW
5227 case KEY_y:
5228 s = scan_trans(s);
5229 TERM(sublex_start());
5230 }
49dc05e3 5231 }}
79072805 5232}
bf4acbe4
GS
5233#ifdef __SC__
5234#pragma segment Main
5235#endif
79072805
LW
5236
5237I32
864dbfa3 5238Perl_keyword(pTHX_ register char *d, I32 len)
79072805
LW
5239{
5240 switch (*d) {
5241 case '_':
5242 if (d[1] == '_') {
a0d0e21e 5243 if (strEQ(d,"__FILE__")) return -KEY___FILE__;
68dc0745 5244 if (strEQ(d,"__LINE__")) return -KEY___LINE__;
5245 if (strEQ(d,"__PACKAGE__")) return -KEY___PACKAGE__;
e50aee73 5246 if (strEQ(d,"__DATA__")) return KEY___DATA__;
79072805
LW
5247 if (strEQ(d,"__END__")) return KEY___END__;
5248 }
5249 break;
8990e307
LW
5250 case 'A':
5251 if (strEQ(d,"AUTOLOAD")) return KEY_AUTOLOAD;
5252 break;
79072805 5253 case 'a':
463ee0b2
LW
5254 switch (len) {
5255 case 3:
a0d0e21e
LW
5256 if (strEQ(d,"and")) return -KEY_and;
5257 if (strEQ(d,"abs")) return -KEY_abs;
85e6fe83 5258 break;
463ee0b2 5259 case 5:
a0d0e21e
LW
5260 if (strEQ(d,"alarm")) return -KEY_alarm;
5261 if (strEQ(d,"atan2")) return -KEY_atan2;
463ee0b2
LW
5262 break;
5263 case 6:
a0d0e21e 5264 if (strEQ(d,"accept")) return -KEY_accept;
463ee0b2
LW
5265 break;
5266 }
79072805
LW
5267 break;
5268 case 'B':
5269 if (strEQ(d,"BEGIN")) return KEY_BEGIN;
93a17b20 5270 break;
79072805 5271 case 'b':
a0d0e21e
LW
5272 if (strEQ(d,"bless")) return -KEY_bless;
5273 if (strEQ(d,"bind")) return -KEY_bind;
5274 if (strEQ(d,"binmode")) return -KEY_binmode;
5275 break;
5276 case 'C':
5277 if (strEQ(d,"CORE")) return -KEY_CORE;
7d30b5c4 5278 if (strEQ(d,"CHECK")) return KEY_CHECK;
79072805
LW
5279 break;
5280 case 'c':
5281 switch (len) {
5282 case 3:
a0d0e21e
LW
5283 if (strEQ(d,"cmp")) return -KEY_cmp;
5284 if (strEQ(d,"chr")) return -KEY_chr;
5285 if (strEQ(d,"cos")) return -KEY_cos;
79072805
LW
5286 break;
5287 case 4:
79e5458b 5288 if (strEQ(d,"chop")) return -KEY_chop;
79072805
LW
5289 break;
5290 case 5:
a0d0e21e
LW
5291 if (strEQ(d,"close")) return -KEY_close;
5292 if (strEQ(d,"chdir")) return -KEY_chdir;
79e5458b 5293 if (strEQ(d,"chomp")) return -KEY_chomp;
a0d0e21e
LW
5294 if (strEQ(d,"chmod")) return -KEY_chmod;
5295 if (strEQ(d,"chown")) return -KEY_chown;
5296 if (strEQ(d,"crypt")) return -KEY_crypt;
79072805
LW
5297 break;
5298 case 6:
a0d0e21e
LW
5299 if (strEQ(d,"chroot")) return -KEY_chroot;
5300 if (strEQ(d,"caller")) return -KEY_caller;
79072805
LW
5301 break;
5302 case 7:
a0d0e21e 5303 if (strEQ(d,"connect")) return -KEY_connect;
79072805
LW
5304 break;
5305 case 8:
a0d0e21e
LW
5306 if (strEQ(d,"closedir")) return -KEY_closedir;
5307 if (strEQ(d,"continue")) return -KEY_continue;
79072805
LW
5308 break;
5309 }
5310 break;
ed6116ce
LW
5311 case 'D':
5312 if (strEQ(d,"DESTROY")) return KEY_DESTROY;
5313 break;
79072805
LW
5314 case 'd':
5315 switch (len) {
5316 case 2:
5317 if (strEQ(d,"do")) return KEY_do;
5318 break;
5319 case 3:
a0d0e21e 5320 if (strEQ(d,"die")) return -KEY_die;
79072805
LW
5321 break;
5322 case 4:
a0d0e21e 5323 if (strEQ(d,"dump")) return -KEY_dump;
79072805
LW
5324 break;
5325 case 6:
5326 if (strEQ(d,"delete")) return KEY_delete;
5327 break;
5328 case 7:
5329 if (strEQ(d,"defined")) return KEY_defined;
a0d0e21e 5330 if (strEQ(d,"dbmopen")) return -KEY_dbmopen;
79072805
LW
5331 break;
5332 case 8:
a0d0e21e 5333 if (strEQ(d,"dbmclose")) return -KEY_dbmclose;
79072805
LW
5334 break;
5335 }
5336 break;
5337 case 'E':
79072805
LW
5338 if (strEQ(d,"END")) return KEY_END;
5339 break;
5340 case 'e':
5341 switch (len) {
5342 case 2:
a0d0e21e 5343 if (strEQ(d,"eq")) return -KEY_eq;
79072805
LW
5344 break;
5345 case 3:
a0d0e21e
LW
5346 if (strEQ(d,"eof")) return -KEY_eof;
5347 if (strEQ(d,"exp")) return -KEY_exp;
79072805
LW
5348 break;
5349 case 4:
5350 if (strEQ(d,"else")) return KEY_else;
a0d0e21e 5351 if (strEQ(d,"exit")) return -KEY_exit;
79072805 5352 if (strEQ(d,"eval")) return KEY_eval;
a0d0e21e 5353 if (strEQ(d,"exec")) return -KEY_exec;
3a6a8333 5354 if (strEQ(d,"each")) return -KEY_each;
79072805
LW
5355 break;
5356 case 5:
5357 if (strEQ(d,"elsif")) return KEY_elsif;
5358 break;
a0d0e21e
LW
5359 case 6:
5360 if (strEQ(d,"exists")) return KEY_exists;
cea2e8a9 5361 if (strEQ(d,"elseif")) Perl_warn(aTHX_ "elseif should be elsif");
a0d0e21e 5362 break;
79072805 5363 case 8:
a0d0e21e
LW
5364 if (strEQ(d,"endgrent")) return -KEY_endgrent;
5365 if (strEQ(d,"endpwent")) return -KEY_endpwent;
79072805
LW
5366 break;
5367 case 9:
a0d0e21e 5368 if (strEQ(d,"endnetent")) return -KEY_endnetent;
79072805
LW
5369 break;
5370 case 10:
a0d0e21e
LW
5371 if (strEQ(d,"endhostent")) return -KEY_endhostent;
5372 if (strEQ(d,"endservent")) return -KEY_endservent;
79072805
LW
5373 break;
5374 case 11:
a0d0e21e 5375 if (strEQ(d,"endprotoent")) return -KEY_endprotoent;
79072805 5376 break;
a687059c 5377 }
a687059c 5378 break;
79072805
LW
5379 case 'f':
5380 switch (len) {
5381 case 3:
5382 if (strEQ(d,"for")) return KEY_for;
5383 break;
5384 case 4:
a0d0e21e 5385 if (strEQ(d,"fork")) return -KEY_fork;
79072805
LW
5386 break;
5387 case 5:
a0d0e21e
LW
5388 if (strEQ(d,"fcntl")) return -KEY_fcntl;
5389 if (strEQ(d,"flock")) return -KEY_flock;
79072805
LW
5390 break;
5391 case 6:
5392 if (strEQ(d,"format")) return KEY_format;
a0d0e21e 5393 if (strEQ(d,"fileno")) return -KEY_fileno;
79072805
LW
5394 break;
5395 case 7:
5396 if (strEQ(d,"foreach")) return KEY_foreach;
5397 break;
5398 case 8:
a0d0e21e 5399 if (strEQ(d,"formline")) return -KEY_formline;
79072805 5400 break;
378cc40b 5401 }
a687059c 5402 break;
79072805 5403 case 'g':
a687059c
LW
5404 if (strnEQ(d,"get",3)) {
5405 d += 3;
5406 if (*d == 'p') {
79072805
LW
5407 switch (len) {
5408 case 7:
a0d0e21e
LW
5409 if (strEQ(d,"ppid")) return -KEY_getppid;
5410 if (strEQ(d,"pgrp")) return -KEY_getpgrp;
79072805
LW
5411 break;
5412 case 8:
a0d0e21e
LW
5413 if (strEQ(d,"pwent")) return -KEY_getpwent;
5414 if (strEQ(d,"pwnam")) return -KEY_getpwnam;
5415 if (strEQ(d,"pwuid")) return -KEY_getpwuid;
79072805
LW
5416 break;
5417 case 11:
a0d0e21e
LW
5418 if (strEQ(d,"peername")) return -KEY_getpeername;
5419 if (strEQ(d,"protoent")) return -KEY_getprotoent;
5420 if (strEQ(d,"priority")) return -KEY_getpriority;
79072805
LW
5421 break;
5422 case 14:
a0d0e21e 5423 if (strEQ(d,"protobyname")) return -KEY_getprotobyname;
79072805
LW
5424 break;
5425 case 16:
a0d0e21e 5426 if (strEQ(d,"protobynumber"))return -KEY_getprotobynumber;
79072805
LW
5427 break;
5428 }
a687059c
LW
5429 }
5430 else if (*d == 'h') {
a0d0e21e
LW
5431 if (strEQ(d,"hostbyname")) return -KEY_gethostbyname;
5432 if (strEQ(d,"hostbyaddr")) return -KEY_gethostbyaddr;
5433 if (strEQ(d,"hostent")) return -KEY_gethostent;
a687059c
LW
5434 }
5435 else if (*d == 'n') {
a0d0e21e
LW
5436 if (strEQ(d,"netbyname")) return -KEY_getnetbyname;
5437 if (strEQ(d,"netbyaddr")) return -KEY_getnetbyaddr;
5438 if (strEQ(d,"netent")) return -KEY_getnetent;
a687059c
LW
5439 }
5440 else if (*d == 's') {
a0d0e21e
LW
5441 if (strEQ(d,"servbyname")) return -KEY_getservbyname;
5442 if (strEQ(d,"servbyport")) return -KEY_getservbyport;
5443 if (strEQ(d,"servent")) return -KEY_getservent;
5444 if (strEQ(d,"sockname")) return -KEY_getsockname;
5445 if (strEQ(d,"sockopt")) return -KEY_getsockopt;
a687059c
LW
5446 }
5447 else if (*d == 'g') {
a0d0e21e
LW
5448 if (strEQ(d,"grent")) return -KEY_getgrent;
5449 if (strEQ(d,"grnam")) return -KEY_getgrnam;
5450 if (strEQ(d,"grgid")) return -KEY_getgrgid;
a687059c
LW
5451 }
5452 else if (*d == 'l') {
a0d0e21e 5453 if (strEQ(d,"login")) return -KEY_getlogin;
a687059c 5454 }
a0d0e21e 5455 else if (strEQ(d,"c")) return -KEY_getc;
79072805 5456 break;
a687059c 5457 }
79072805
LW
5458 switch (len) {
5459 case 2:
a0d0e21e
LW
5460 if (strEQ(d,"gt")) return -KEY_gt;
5461 if (strEQ(d,"ge")) return -KEY_ge;
79072805
LW
5462 break;
5463 case 4:
5464 if (strEQ(d,"grep")) return KEY_grep;
5465 if (strEQ(d,"goto")) return KEY_goto;
fb73857a 5466 if (strEQ(d,"glob")) return KEY_glob;
79072805
LW
5467 break;
5468 case 6:
a0d0e21e 5469 if (strEQ(d,"gmtime")) return -KEY_gmtime;
79072805 5470 break;
378cc40b 5471 }
a687059c 5472 break;
79072805 5473 case 'h':
a0d0e21e 5474 if (strEQ(d,"hex")) return -KEY_hex;
a687059c 5475 break;
7d07dbc2
MB
5476 case 'I':
5477 if (strEQ(d,"INIT")) return KEY_INIT;
5478 break;
79072805
LW
5479 case 'i':
5480 switch (len) {
5481 case 2:
5482 if (strEQ(d,"if")) return KEY_if;
5483 break;
5484 case 3:
a0d0e21e 5485 if (strEQ(d,"int")) return -KEY_int;
79072805
LW
5486 break;
5487 case 5:
a0d0e21e
LW
5488 if (strEQ(d,"index")) return -KEY_index;
5489 if (strEQ(d,"ioctl")) return -KEY_ioctl;
79072805
LW
5490 break;
5491 }
a687059c 5492 break;
79072805 5493 case 'j':
a0d0e21e 5494 if (strEQ(d,"join")) return -KEY_join;
a687059c 5495 break;
79072805
LW
5496 case 'k':
5497 if (len == 4) {
3a6a8333 5498 if (strEQ(d,"keys")) return -KEY_keys;
a0d0e21e 5499 if (strEQ(d,"kill")) return -KEY_kill;
663a0e37 5500 }
79072805 5501 break;
79072805
LW
5502 case 'l':
5503 switch (len) {
5504 case 2:
a0d0e21e
LW
5505 if (strEQ(d,"lt")) return -KEY_lt;
5506 if (strEQ(d,"le")) return -KEY_le;
5507 if (strEQ(d,"lc")) return -KEY_lc;
79072805
LW
5508 break;
5509 case 3:
a0d0e21e 5510 if (strEQ(d,"log")) return -KEY_log;
79072805
LW
5511 break;
5512 case 4:
5513 if (strEQ(d,"last")) return KEY_last;
a0d0e21e 5514 if (strEQ(d,"link")) return -KEY_link;
c0329465 5515 if (strEQ(d,"lock")) return -KEY_lock;
395c3793 5516 break;
79072805
LW
5517 case 5:
5518 if (strEQ(d,"local")) return KEY_local;
a0d0e21e 5519 if (strEQ(d,"lstat")) return -KEY_lstat;
79072805
LW
5520 break;
5521 case 6:
a0d0e21e
LW
5522 if (strEQ(d,"length")) return -KEY_length;
5523 if (strEQ(d,"listen")) return -KEY_listen;
79072805
LW
5524 break;
5525 case 7:
a0d0e21e 5526 if (strEQ(d,"lcfirst")) return -KEY_lcfirst;
79072805
LW
5527 break;
5528 case 9:
a0d0e21e 5529 if (strEQ(d,"localtime")) return -KEY_localtime;
395c3793
LW
5530 break;
5531 }
a687059c 5532 break;
79072805
LW
5533 case 'm':
5534 switch (len) {
5535 case 1: return KEY_m;
93a17b20
LW
5536 case 2:
5537 if (strEQ(d,"my")) return KEY_my;
5538 break;
a0d0e21e
LW
5539 case 3:
5540 if (strEQ(d,"map")) return KEY_map;
5541 break;
79072805 5542 case 5:
a0d0e21e 5543 if (strEQ(d,"mkdir")) return -KEY_mkdir;
79072805
LW
5544 break;
5545 case 6:
a0d0e21e
LW
5546 if (strEQ(d,"msgctl")) return -KEY_msgctl;
5547 if (strEQ(d,"msgget")) return -KEY_msgget;
5548 if (strEQ(d,"msgrcv")) return -KEY_msgrcv;
5549 if (strEQ(d,"msgsnd")) return -KEY_msgsnd;
79072805
LW
5550 break;
5551 }
a687059c 5552 break;
79072805
LW
5553 case 'n':
5554 if (strEQ(d,"next")) return KEY_next;
a0d0e21e
LW
5555 if (strEQ(d,"ne")) return -KEY_ne;
5556 if (strEQ(d,"not")) return -KEY_not;
5557 if (strEQ(d,"no")) return KEY_no;
a687059c 5558 break;
79072805
LW
5559 case 'o':
5560 switch (len) {
463ee0b2 5561 case 2:
a0d0e21e 5562 if (strEQ(d,"or")) return -KEY_or;
463ee0b2 5563 break;
79072805 5564 case 3:
a0d0e21e
LW
5565 if (strEQ(d,"ord")) return -KEY_ord;
5566 if (strEQ(d,"oct")) return -KEY_oct;
77ca0c92 5567 if (strEQ(d,"our")) return KEY_our;
79072805
LW
5568 break;
5569 case 4:
a0d0e21e 5570 if (strEQ(d,"open")) return -KEY_open;
79072805
LW
5571 break;
5572 case 7:
a0d0e21e 5573 if (strEQ(d,"opendir")) return -KEY_opendir;
79072805 5574 break;
fe14fcc3 5575 }
a687059c 5576 break;
79072805
LW
5577 case 'p':
5578 switch (len) {
5579 case 3:
4e553d73 5580 if (strEQ(d,"pop")) return -KEY_pop;
a0d0e21e 5581 if (strEQ(d,"pos")) return KEY_pos;
79072805
LW
5582 break;
5583 case 4:
3a6a8333 5584 if (strEQ(d,"push")) return -KEY_push;
a0d0e21e
LW
5585 if (strEQ(d,"pack")) return -KEY_pack;
5586 if (strEQ(d,"pipe")) return -KEY_pipe;
79072805
LW
5587 break;
5588 case 5:
5589 if (strEQ(d,"print")) return KEY_print;
5590 break;
5591 case 6:
5592 if (strEQ(d,"printf")) return KEY_printf;
5593 break;
5594 case 7:
5595 if (strEQ(d,"package")) return KEY_package;
5596 break;
c07a80fd 5597 case 9:
5598 if (strEQ(d,"prototype")) return KEY_prototype;
663a0e37 5599 }
79072805
LW
5600 break;
5601 case 'q':
5602 if (len <= 2) {
5603 if (strEQ(d,"q")) return KEY_q;
8782bef2 5604 if (strEQ(d,"qr")) return KEY_qr;
79072805 5605 if (strEQ(d,"qq")) return KEY_qq;
8990e307 5606 if (strEQ(d,"qw")) return KEY_qw;
79072805 5607 if (strEQ(d,"qx")) return KEY_qx;
663a0e37 5608 }
a0d0e21e 5609 else if (strEQ(d,"quotemeta")) return -KEY_quotemeta;
79072805
LW
5610 break;
5611 case 'r':
5612 switch (len) {
5613 case 3:
a0d0e21e 5614 if (strEQ(d,"ref")) return -KEY_ref;
79072805
LW
5615 break;
5616 case 4:
a0d0e21e
LW
5617 if (strEQ(d,"read")) return -KEY_read;
5618 if (strEQ(d,"rand")) return -KEY_rand;
5619 if (strEQ(d,"recv")) return -KEY_recv;
79072805
LW
5620 if (strEQ(d,"redo")) return KEY_redo;
5621 break;
5622 case 5:
a0d0e21e
LW
5623 if (strEQ(d,"rmdir")) return -KEY_rmdir;
5624 if (strEQ(d,"reset")) return -KEY_reset;
79072805
LW
5625 break;
5626 case 6:
5627 if (strEQ(d,"return")) return KEY_return;
a0d0e21e
LW
5628 if (strEQ(d,"rename")) return -KEY_rename;
5629 if (strEQ(d,"rindex")) return -KEY_rindex;
79072805
LW
5630 break;
5631 case 7:
ec4ab249 5632 if (strEQ(d,"require")) return KEY_require;
a0d0e21e
LW
5633 if (strEQ(d,"reverse")) return -KEY_reverse;
5634 if (strEQ(d,"readdir")) return -KEY_readdir;
79072805
LW
5635 break;
5636 case 8:
a0d0e21e
LW
5637 if (strEQ(d,"readlink")) return -KEY_readlink;
5638 if (strEQ(d,"readline")) return -KEY_readline;
5639 if (strEQ(d,"readpipe")) return -KEY_readpipe;
79072805
LW
5640 break;
5641 case 9:
a0d0e21e 5642 if (strEQ(d,"rewinddir")) return -KEY_rewinddir;
79072805 5643 break;
a687059c 5644 }
79072805
LW
5645 break;
5646 case 's':
a687059c 5647 switch (d[1]) {
79072805 5648 case 0: return KEY_s;
a687059c 5649 case 'c':
79072805 5650 if (strEQ(d,"scalar")) return KEY_scalar;
a687059c
LW
5651 break;
5652 case 'e':
79072805
LW
5653 switch (len) {
5654 case 4:
a0d0e21e
LW
5655 if (strEQ(d,"seek")) return -KEY_seek;
5656 if (strEQ(d,"send")) return -KEY_send;
79072805
LW
5657 break;
5658 case 5:
a0d0e21e 5659 if (strEQ(d,"semop")) return -KEY_semop;
79072805
LW
5660 break;
5661 case 6:
a0d0e21e
LW
5662 if (strEQ(d,"select")) return -KEY_select;
5663 if (strEQ(d,"semctl")) return -KEY_semctl;
5664 if (strEQ(d,"semget")) return -KEY_semget;
79072805
LW
5665 break;
5666 case 7:
a0d0e21e
LW
5667 if (strEQ(d,"setpgrp")) return -KEY_setpgrp;
5668 if (strEQ(d,"seekdir")) return -KEY_seekdir;
79072805
LW
5669 break;
5670 case 8:
a0d0e21e
LW
5671 if (strEQ(d,"setpwent")) return -KEY_setpwent;
5672 if (strEQ(d,"setgrent")) return -KEY_setgrent;
79072805
LW
5673 break;
5674 case 9:
a0d0e21e 5675 if (strEQ(d,"setnetent")) return -KEY_setnetent;
79072805
LW
5676 break;
5677 case 10:
a0d0e21e
LW
5678 if (strEQ(d,"setsockopt")) return -KEY_setsockopt;
5679 if (strEQ(d,"sethostent")) return -KEY_sethostent;
5680 if (strEQ(d,"setservent")) return -KEY_setservent;
79072805
LW
5681 break;
5682 case 11:
a0d0e21e
LW
5683 if (strEQ(d,"setpriority")) return -KEY_setpriority;
5684 if (strEQ(d,"setprotoent")) return -KEY_setprotoent;
79072805
LW
5685 break;
5686 }
a687059c
LW
5687 break;
5688 case 'h':
79072805
LW
5689 switch (len) {
5690 case 5:
3a6a8333 5691 if (strEQ(d,"shift")) return -KEY_shift;
79072805
LW
5692 break;
5693 case 6:
a0d0e21e
LW
5694 if (strEQ(d,"shmctl")) return -KEY_shmctl;
5695 if (strEQ(d,"shmget")) return -KEY_shmget;
79072805
LW
5696 break;
5697 case 7:
a0d0e21e 5698 if (strEQ(d,"shmread")) return -KEY_shmread;
79072805
LW
5699 break;
5700 case 8:
a0d0e21e
LW
5701 if (strEQ(d,"shmwrite")) return -KEY_shmwrite;
5702 if (strEQ(d,"shutdown")) return -KEY_shutdown;
79072805
LW
5703 break;
5704 }
a687059c
LW
5705 break;
5706 case 'i':
a0d0e21e 5707 if (strEQ(d,"sin")) return -KEY_sin;
a687059c
LW
5708 break;
5709 case 'l':
a0d0e21e 5710 if (strEQ(d,"sleep")) return -KEY_sleep;
a687059c
LW
5711 break;
5712 case 'o':
79072805 5713 if (strEQ(d,"sort")) return KEY_sort;
a0d0e21e
LW
5714 if (strEQ(d,"socket")) return -KEY_socket;
5715 if (strEQ(d,"socketpair")) return -KEY_socketpair;
a687059c
LW
5716 break;
5717 case 'p':
79072805 5718 if (strEQ(d,"split")) return KEY_split;
a0d0e21e 5719 if (strEQ(d,"sprintf")) return -KEY_sprintf;
3a6a8333 5720 if (strEQ(d,"splice")) return -KEY_splice;
a687059c
LW
5721 break;
5722 case 'q':
a0d0e21e 5723 if (strEQ(d,"sqrt")) return -KEY_sqrt;
a687059c
LW
5724 break;
5725 case 'r':
a0d0e21e 5726 if (strEQ(d,"srand")) return -KEY_srand;
a687059c
LW
5727 break;
5728 case 't':
a0d0e21e 5729 if (strEQ(d,"stat")) return -KEY_stat;
79072805 5730 if (strEQ(d,"study")) return KEY_study;
a687059c
LW
5731 break;
5732 case 'u':
a0d0e21e 5733 if (strEQ(d,"substr")) return -KEY_substr;
79072805 5734 if (strEQ(d,"sub")) return KEY_sub;
a687059c
LW
5735 break;
5736 case 'y':
79072805
LW
5737 switch (len) {
5738 case 6:
a0d0e21e 5739 if (strEQ(d,"system")) return -KEY_system;
79072805
LW
5740 break;
5741 case 7:
a0d0e21e
LW
5742 if (strEQ(d,"symlink")) return -KEY_symlink;
5743 if (strEQ(d,"syscall")) return -KEY_syscall;
137443ea 5744 if (strEQ(d,"sysopen")) return -KEY_sysopen;
5745 if (strEQ(d,"sysread")) return -KEY_sysread;
5746 if (strEQ(d,"sysseek")) return -KEY_sysseek;
79072805
LW
5747 break;
5748 case 8:
a0d0e21e 5749 if (strEQ(d,"syswrite")) return -KEY_syswrite;
79072805 5750 break;
a687059c 5751 }
a687059c
LW
5752 break;
5753 }
5754 break;
79072805
LW
5755 case 't':
5756 switch (len) {
5757 case 2:
5758 if (strEQ(d,"tr")) return KEY_tr;
5759 break;
463ee0b2
LW
5760 case 3:
5761 if (strEQ(d,"tie")) return KEY_tie;
5762 break;
79072805 5763 case 4:
a0d0e21e 5764 if (strEQ(d,"tell")) return -KEY_tell;
c07a80fd 5765 if (strEQ(d,"tied")) return KEY_tied;
a0d0e21e 5766 if (strEQ(d,"time")) return -KEY_time;
79072805
LW
5767 break;
5768 case 5:
a0d0e21e 5769 if (strEQ(d,"times")) return -KEY_times;
79072805
LW
5770 break;
5771 case 7:
a0d0e21e 5772 if (strEQ(d,"telldir")) return -KEY_telldir;
79072805
LW
5773 break;
5774 case 8:
a0d0e21e 5775 if (strEQ(d,"truncate")) return -KEY_truncate;
79072805 5776 break;
378cc40b 5777 }
a687059c 5778 break;
79072805
LW
5779 case 'u':
5780 switch (len) {
5781 case 2:
a0d0e21e
LW
5782 if (strEQ(d,"uc")) return -KEY_uc;
5783 break;
5784 case 3:
5785 if (strEQ(d,"use")) return KEY_use;
79072805
LW
5786 break;
5787 case 5:
5788 if (strEQ(d,"undef")) return KEY_undef;
5789 if (strEQ(d,"until")) return KEY_until;
463ee0b2 5790 if (strEQ(d,"untie")) return KEY_untie;
a0d0e21e
LW
5791 if (strEQ(d,"utime")) return -KEY_utime;
5792 if (strEQ(d,"umask")) return -KEY_umask;
79072805
LW
5793 break;
5794 case 6:
5795 if (strEQ(d,"unless")) return KEY_unless;
a0d0e21e
LW
5796 if (strEQ(d,"unpack")) return -KEY_unpack;
5797 if (strEQ(d,"unlink")) return -KEY_unlink;
79072805
LW
5798 break;
5799 case 7:
3a6a8333 5800 if (strEQ(d,"unshift")) return -KEY_unshift;
a0d0e21e 5801 if (strEQ(d,"ucfirst")) return -KEY_ucfirst;
79072805 5802 break;
a687059c
LW
5803 }
5804 break;
79072805 5805 case 'v':
a0d0e21e
LW
5806 if (strEQ(d,"values")) return -KEY_values;
5807 if (strEQ(d,"vec")) return -KEY_vec;
a687059c 5808 break;
79072805
LW
5809 case 'w':
5810 switch (len) {
5811 case 4:
a0d0e21e
LW
5812 if (strEQ(d,"warn")) return -KEY_warn;
5813 if (strEQ(d,"wait")) return -KEY_wait;
79072805
LW
5814 break;
5815 case 5:
5816 if (strEQ(d,"while")) return KEY_while;
a0d0e21e 5817 if (strEQ(d,"write")) return -KEY_write;
79072805
LW
5818 break;
5819 case 7:
a0d0e21e 5820 if (strEQ(d,"waitpid")) return -KEY_waitpid;
79072805
LW
5821 break;
5822 case 9:
a0d0e21e 5823 if (strEQ(d,"wantarray")) return -KEY_wantarray;
79072805 5824 break;
2f3197b3 5825 }
a687059c 5826 break;
79072805 5827 case 'x':
a0d0e21e
LW
5828 if (len == 1) return -KEY_x;
5829 if (strEQ(d,"xor")) return -KEY_xor;
a687059c 5830 break;
79072805
LW
5831 case 'y':
5832 if (len == 1) return KEY_y;
5833 break;
5834 case 'z':
a687059c
LW
5835 break;
5836 }
79072805 5837 return 0;
a687059c
LW
5838}
5839
76e3520e 5840STATIC void
cea2e8a9 5841S_checkcomma(pTHX_ register char *s, char *name, char *what)
a687059c 5842{
2f3197b3
LW
5843 char *w;
5844
d008e5eb 5845 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
5846 if (ckWARN(WARN_SYNTAX)) {
5847 int level = 1;
5848 for (w = s+2; *w && level; w++) {
5849 if (*w == '(')
5850 ++level;
5851 else if (*w == ')')
5852 --level;
5853 }
5854 if (*w)
5855 for (; *w && isSPACE(*w); w++) ;
5856 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
65cec589
GS
5857 Perl_warner(aTHX_ WARN_SYNTAX,
5858 "%s (...) interpreted as function",name);
d008e5eb 5859 }
2f3197b3 5860 }
3280af22 5861 while (s < PL_bufend && isSPACE(*s))
2f3197b3 5862 s++;
a687059c
LW
5863 if (*s == '(')
5864 s++;
3280af22 5865 while (s < PL_bufend && isSPACE(*s))
a687059c 5866 s++;
7e2040f0 5867 if (isIDFIRST_lazy_if(s,UTF)) {
2f3197b3 5868 w = s++;
7e2040f0 5869 while (isALNUM_lazy_if(s,UTF))
a687059c 5870 s++;
3280af22 5871 while (s < PL_bufend && isSPACE(*s))
a687059c 5872 s++;
e929a76b 5873 if (*s == ',') {
463ee0b2 5874 int kw;
e929a76b 5875 *s = '\0';
864dbfa3 5876 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
e929a76b 5877 *s = ',';
463ee0b2 5878 if (kw)
e929a76b 5879 return;
cea2e8a9 5880 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
5881 }
5882 }
5883}
5884
423cee85
JH
5885/* Either returns sv, or mortalizes sv and returns a new SV*.
5886 Best used as sv=new_constant(..., sv, ...).
5887 If s, pv are NULL, calls subroutine with one argument,
5888 and type is used with error messages only. */
5889
b3ac6de7 5890STATIC SV *
dff6d3cd 5891S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9b0e499b 5892 const char *type)
b3ac6de7 5893{
b3ac6de7 5894 dSP;
3280af22 5895 HV *table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 5896 SV *res;
b3ac6de7
IZ
5897 SV **cvp;
5898 SV *cv, *typesv;
f0af216f 5899 const char *why1, *why2, *why3;
4e553d73 5900
f0af216f 5901 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
5902 SV *msg;
5903
f0af216f 5904 why2 = strEQ(key,"charnames")
41ab332f 5905 ? "(possibly a missing \"use charnames ...\")"
f0af216f 5906 : "";
4e553d73 5907 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
5908 (type ? type: "undef"), why2);
5909
5910 /* This is convoluted and evil ("goto considered harmful")
5911 * but I do not understand the intricacies of all the different
5912 * failure modes of %^H in here. The goal here is to make
5913 * the most probable error message user-friendly. --jhi */
5914
5915 goto msgdone;
5916
423cee85 5917 report:
4e553d73 5918 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 5919 (type ? type: "undef"), why1, why2, why3);
41ab332f 5920 msgdone:
423cee85
JH
5921 yyerror(SvPVX(msg));
5922 SvREFCNT_dec(msg);
5923 return sv;
5924 }
b3ac6de7
IZ
5925 cvp = hv_fetch(table, key, strlen(key), FALSE);
5926 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
5927 why1 = "$^H{";
5928 why2 = key;
f0af216f 5929 why3 = "} is not defined";
423cee85 5930 goto report;
b3ac6de7
IZ
5931 }
5932 sv_2mortal(sv); /* Parent created it permanently */
5933 cv = *cvp;
423cee85
JH
5934 if (!pv && s)
5935 pv = sv_2mortal(newSVpvn(s, len));
5936 if (type && pv)
5937 typesv = sv_2mortal(newSVpv(type, 0));
b3ac6de7 5938 else
423cee85 5939 typesv = &PL_sv_undef;
4e553d73 5940
e788e7d3 5941 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
5942 ENTER ;
5943 SAVETMPS;
4e553d73 5944
423cee85 5945 PUSHMARK(SP) ;
a5845cb7 5946 EXTEND(sp, 3);
423cee85
JH
5947 if (pv)
5948 PUSHs(pv);
b3ac6de7 5949 PUSHs(sv);
423cee85
JH
5950 if (pv)
5951 PUSHs(typesv);
b3ac6de7 5952 PUTBACK;
423cee85 5953 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 5954
423cee85 5955 SPAGAIN ;
4e553d73 5956
423cee85 5957 /* Check the eval first */
9b0e499b 5958 if (!PL_in_eval && SvTRUE(ERRSV)) {
423cee85
JH
5959 STRLEN n_a;
5960 sv_catpv(ERRSV, "Propagated");
5961 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
e1f15930 5962 (void)POPs;
423cee85
JH
5963 res = SvREFCNT_inc(sv);
5964 }
5965 else {
5966 res = POPs;
e1f15930 5967 (void)SvREFCNT_inc(res);
423cee85 5968 }
4e553d73 5969
423cee85
JH
5970 PUTBACK ;
5971 FREETMPS ;
5972 LEAVE ;
b3ac6de7 5973 POPSTACK;
4e553d73 5974
b3ac6de7 5975 if (!SvOK(res)) {
423cee85
JH
5976 why1 = "Call to &{$^H{";
5977 why2 = key;
f0af216f 5978 why3 = "}} did not return a defined value";
423cee85
JH
5979 sv = res;
5980 goto report;
9b0e499b 5981 }
423cee85 5982
9b0e499b 5983 return res;
b3ac6de7 5984}
4e553d73 5985
76e3520e 5986STATIC char *
cea2e8a9 5987S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2
LW
5988{
5989 register char *d = dest;
8903cb82 5990 register char *e = d + destlen - 3; /* two-character token, ending NUL */
463ee0b2 5991 for (;;) {
8903cb82 5992 if (d >= e)
cea2e8a9 5993 Perl_croak(aTHX_ ident_too_long);
834a4ddd 5994 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 5995 *d++ = *s++;
7e2040f0 5996 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
5997 *d++ = ':';
5998 *d++ = ':';
5999 s++;
6000 }
c3e0f903 6001 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
463ee0b2
LW
6002 *d++ = *s++;
6003 *d++ = *s++;
6004 }
fd400ab9 6005 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6006 char *t = s + UTF8SKIP(s);
fd400ab9 6007 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6008 t += UTF8SKIP(t);
6009 if (d + (t - s) > e)
cea2e8a9 6010 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6011 Copy(s, d, t - s, char);
6012 d += t - s;
6013 s = t;
6014 }
463ee0b2
LW
6015 else {
6016 *d = '\0';
6017 *slp = d - dest;
6018 return s;
e929a76b 6019 }
378cc40b
LW
6020 }
6021}
6022
76e3520e 6023STATIC char *
cea2e8a9 6024S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b
LW
6025{
6026 register char *d;
8903cb82 6027 register char *e;
79072805 6028 char *bracket = 0;
748a9306 6029 char funny = *s++;
378cc40b 6030
a0d0e21e
LW
6031 if (isSPACE(*s))
6032 s = skipspace(s);
378cc40b 6033 d = dest;
8903cb82 6034 e = d + destlen - 3; /* two-character token, ending NUL */
de3bb511 6035 if (isDIGIT(*s)) {
8903cb82 6036 while (isDIGIT(*s)) {
6037 if (d >= e)
cea2e8a9 6038 Perl_croak(aTHX_ ident_too_long);
378cc40b 6039 *d++ = *s++;
8903cb82 6040 }
378cc40b
LW
6041 }
6042 else {
463ee0b2 6043 for (;;) {
8903cb82 6044 if (d >= e)
cea2e8a9 6045 Perl_croak(aTHX_ ident_too_long);
834a4ddd 6046 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 6047 *d++ = *s++;
7e2040f0 6048 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
6049 *d++ = ':';
6050 *d++ = ':';
6051 s++;
6052 }
a0d0e21e 6053 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
6054 *d++ = *s++;
6055 *d++ = *s++;
6056 }
fd400ab9 6057 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6058 char *t = s + UTF8SKIP(s);
fd400ab9 6059 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6060 t += UTF8SKIP(t);
6061 if (d + (t - s) > e)
cea2e8a9 6062 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6063 Copy(s, d, t - s, char);
6064 d += t - s;
6065 s = t;
6066 }
463ee0b2
LW
6067 else
6068 break;
6069 }
378cc40b
LW
6070 }
6071 *d = '\0';
6072 d = dest;
79072805 6073 if (*d) {
3280af22
NIS
6074 if (PL_lex_state != LEX_NORMAL)
6075 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 6076 return s;
378cc40b 6077 }
748a9306 6078 if (*s == '$' && s[1] &&
7e2040f0 6079 (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
5cd24f17 6080 {
4810e5ec 6081 return s;
5cd24f17 6082 }
79072805
LW
6083 if (*s == '{') {
6084 bracket = s;
6085 s++;
6086 }
6087 else if (ck_uni)
6088 check_uni();
93a17b20 6089 if (s < send)
79072805
LW
6090 *d = *s++;
6091 d[1] = '\0';
2b92dfce 6092 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 6093 *d = toCTRL(*s);
6094 s++;
de3bb511 6095 }
79072805 6096 if (bracket) {
748a9306 6097 if (isSPACE(s[-1])) {
fa83b5b6 6098 while (s < send) {
6099 char ch = *s++;
bf4acbe4 6100 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 6101 *d = ch;
6102 break;
6103 }
6104 }
748a9306 6105 }
7e2040f0 6106 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 6107 d++;
a0ed51b3
LW
6108 if (UTF) {
6109 e = s;
155aba94 6110 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
a0ed51b3 6111 e += UTF8SKIP(e);
fd400ab9 6112 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
a0ed51b3
LW
6113 e += UTF8SKIP(e);
6114 }
6115 Copy(s, d, e - s, char);
6116 d += e - s;
6117 s = e;
6118 }
6119 else {
2b92dfce 6120 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 6121 *d++ = *s++;
2b92dfce 6122 if (d >= e)
cea2e8a9 6123 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 6124 }
79072805 6125 *d = '\0';
bf4acbe4 6126 while (s < send && SPACE_OR_TAB(*s)) s++;
ff68c719 6127 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
599cee73 6128 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
dff6d3cd 6129 const char *brack = *s == '[' ? "[...]" : "{...}";
cea2e8a9 6130 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 6131 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
6132 funny, dest, brack, funny, dest, brack);
6133 }
79072805 6134 bracket++;
a0be28da 6135 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
6136 return s;
6137 }
4e553d73
NIS
6138 }
6139 /* Handle extended ${^Foo} variables
2b92dfce
GS
6140 * 1999-02-27 mjd-perl-patch@plover.com */
6141 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
6142 && isALNUM(*s))
6143 {
6144 d++;
6145 while (isALNUM(*s) && d < e) {
6146 *d++ = *s++;
6147 }
6148 if (d >= e)
cea2e8a9 6149 Perl_croak(aTHX_ ident_too_long);
2b92dfce 6150 *d = '\0';
79072805
LW
6151 }
6152 if (*s == '}') {
6153 s++;
3280af22
NIS
6154 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets)
6155 PL_lex_state = LEX_INTERPEND;
748a9306
LW
6156 if (funny == '#')
6157 funny = '@';
d008e5eb 6158 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 6159 if (ckWARN(WARN_AMBIGUOUS) &&
864dbfa3 6160 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
d008e5eb 6161 {
cea2e8a9 6162 Perl_warner(aTHX_ WARN_AMBIGUOUS,
d008e5eb
GS
6163 "Ambiguous use of %c{%s} resolved to %c%s",
6164 funny, dest, funny, dest);
6165 }
6166 }
79072805
LW
6167 }
6168 else {
6169 s = bracket; /* let the parser handle it */
93a17b20 6170 *dest = '\0';
79072805
LW
6171 }
6172 }
3280af22
NIS
6173 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
6174 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
6175 return s;
6176}
6177
cea2e8a9
GS
6178void
6179Perl_pmflag(pTHX_ U16 *pmfl, int ch)
a0d0e21e 6180{
bbce6d69 6181 if (ch == 'i')
a0d0e21e 6182 *pmfl |= PMf_FOLD;
a0d0e21e
LW
6183 else if (ch == 'g')
6184 *pmfl |= PMf_GLOBAL;
c90c0ff4 6185 else if (ch == 'c')
6186 *pmfl |= PMf_CONTINUE;
a0d0e21e
LW
6187 else if (ch == 'o')
6188 *pmfl |= PMf_KEEP;
6189 else if (ch == 'm')
6190 *pmfl |= PMf_MULTILINE;
6191 else if (ch == 's')
6192 *pmfl |= PMf_SINGLELINE;
6193 else if (ch == 'x')
6194 *pmfl |= PMf_EXTENDED;
6195}
378cc40b 6196
76e3520e 6197STATIC char *
cea2e8a9 6198S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 6199{
79072805
LW
6200 PMOP *pm;
6201 char *s;
378cc40b 6202
09bef843 6203 s = scan_str(start,FALSE,FALSE);
37fd879b 6204 if (!s)
cea2e8a9 6205 Perl_croak(aTHX_ "Search pattern not terminated");
bbce6d69 6206
8782bef2 6207 pm = (PMOP*)newPMOP(type, 0);
3280af22 6208 if (PL_multi_open == '?')
79072805 6209 pm->op_pmflags |= PMf_ONCE;
8782bef2
GB
6210 if(type == OP_QR) {
6211 while (*s && strchr("iomsx", *s))
6212 pmflag(&pm->op_pmflags,*s++);
6213 }
6214 else {
6215 while (*s && strchr("iogcmsx", *s))
6216 pmflag(&pm->op_pmflags,*s++);
6217 }
4633a7c4 6218 pm->op_pmpermflags = pm->op_pmflags;
bbce6d69 6219
3280af22 6220 PL_lex_op = (OP*)pm;
79072805 6221 yylval.ival = OP_MATCH;
378cc40b
LW
6222 return s;
6223}
6224
76e3520e 6225STATIC char *
cea2e8a9 6226S_scan_subst(pTHX_ char *start)
79072805 6227{
a0d0e21e 6228 register char *s;
79072805 6229 register PMOP *pm;
4fdae800 6230 I32 first_start;
79072805
LW
6231 I32 es = 0;
6232
79072805
LW
6233 yylval.ival = OP_NULL;
6234
09bef843 6235 s = scan_str(start,FALSE,FALSE);
79072805 6236
37fd879b 6237 if (!s)
cea2e8a9 6238 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 6239
3280af22 6240 if (s[-1] == PL_multi_open)
79072805
LW
6241 s--;
6242
3280af22 6243 first_start = PL_multi_start;
09bef843 6244 s = scan_str(s,FALSE,FALSE);
79072805 6245 if (!s) {
37fd879b 6246 if (PL_lex_stuff) {
3280af22 6247 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6248 PL_lex_stuff = Nullsv;
6249 }
cea2e8a9 6250 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 6251 }
3280af22 6252 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 6253
79072805 6254 pm = (PMOP*)newPMOP(OP_SUBST, 0);
48c036b1 6255 while (*s) {
a687059c
LW
6256 if (*s == 'e') {
6257 s++;
2f3197b3 6258 es++;
a687059c 6259 }
b3eb6a9b 6260 else if (strchr("iogcmsx", *s))
a0d0e21e 6261 pmflag(&pm->op_pmflags,*s++);
48c036b1
GS
6262 else
6263 break;
378cc40b 6264 }
79072805
LW
6265
6266 if (es) {
6267 SV *repl;
0244c3a4
GS
6268 PL_sublex_info.super_bufptr = s;
6269 PL_sublex_info.super_bufend = PL_bufend;
6270 PL_multi_end = 0;
79072805 6271 pm->op_pmflags |= PMf_EVAL;
79cb57f6 6272 repl = newSVpvn("",0);
463ee0b2 6273 while (es-- > 0)
a0d0e21e 6274 sv_catpv(repl, es ? "eval " : "do ");
79072805 6275 sv_catpvn(repl, "{ ", 2);
3280af22 6276 sv_catsv(repl, PL_lex_repl);
79072805 6277 sv_catpvn(repl, " };", 2);
25da4f38 6278 SvEVALED_on(repl);
3280af22
NIS
6279 SvREFCNT_dec(PL_lex_repl);
6280 PL_lex_repl = repl;
378cc40b 6281 }
79072805 6282
4633a7c4 6283 pm->op_pmpermflags = pm->op_pmflags;
3280af22 6284 PL_lex_op = (OP*)pm;
79072805 6285 yylval.ival = OP_SUBST;
378cc40b
LW
6286 return s;
6287}
6288
76e3520e 6289STATIC char *
cea2e8a9 6290S_scan_trans(pTHX_ char *start)
378cc40b 6291{
a0d0e21e 6292 register char* s;
11343788 6293 OP *o;
79072805
LW
6294 short *tbl;
6295 I32 squash;
a0ed51b3 6296 I32 del;
79072805
LW
6297 I32 complement;
6298
6299 yylval.ival = OP_NULL;
6300
09bef843 6301 s = scan_str(start,FALSE,FALSE);
37fd879b 6302 if (!s)
cea2e8a9 6303 Perl_croak(aTHX_ "Transliteration pattern not terminated");
3280af22 6304 if (s[-1] == PL_multi_open)
2f3197b3
LW
6305 s--;
6306
09bef843 6307 s = scan_str(s,FALSE,FALSE);
79072805 6308 if (!s) {
37fd879b 6309 if (PL_lex_stuff) {
3280af22 6310 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6311 PL_lex_stuff = Nullsv;
6312 }
cea2e8a9 6313 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 6314 }
79072805 6315
a0ed51b3 6316 complement = del = squash = 0;
6940069f 6317 while (strchr("cds", *s)) {
395c3793 6318 if (*s == 'c')
79072805 6319 complement = OPpTRANS_COMPLEMENT;
395c3793 6320 else if (*s == 'd')
a0ed51b3
LW
6321 del = OPpTRANS_DELETE;
6322 else if (*s == 's')
79072805 6323 squash = OPpTRANS_SQUASH;
395c3793
LW
6324 s++;
6325 }
8973db79
JH
6326
6327 New(803, tbl, complement&&!del?258:256, short);
6328 o = newPVOP(OP_TRANS, 0, (char*)tbl);
7948272d
NIS
6329 o->op_private = del|squash|complement|
6330 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
6331 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 6332
3280af22 6333 PL_lex_op = o;
79072805
LW
6334 yylval.ival = OP_TRANS;
6335 return s;
6336}
6337
76e3520e 6338STATIC char *
cea2e8a9 6339S_scan_heredoc(pTHX_ register char *s)
79072805
LW
6340{
6341 SV *herewas;
6342 I32 op_type = OP_SCALAR;
6343 I32 len;
6344 SV *tmpstr;
6345 char term;
6346 register char *d;
fc36a67e 6347 register char *e;
4633a7c4 6348 char *peek;
3280af22 6349 int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
79072805
LW
6350
6351 s += 2;
3280af22
NIS
6352 d = PL_tokenbuf;
6353 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 6354 if (!outer)
79072805 6355 *d++ = '\n';
bf4acbe4 6356 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
4633a7c4
LW
6357 if (*peek && strchr("`'\"",*peek)) {
6358 s = peek;
79072805 6359 term = *s++;
3280af22 6360 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 6361 d += len;
3280af22 6362 if (s < PL_bufend)
79072805 6363 s++;
79072805
LW
6364 }
6365 else {
6366 if (*s == '\\')
6367 s++, term = '\'';
6368 else
6369 term = '"';
7e2040f0 6370 if (!isALNUM_lazy_if(s,UTF))
4633a7c4 6371 deprecate("bare << to mean <<\"\"");
7e2040f0 6372 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 6373 if (d < e)
6374 *d++ = *s;
6375 }
6376 }
3280af22 6377 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 6378 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
6379 *d++ = '\n';
6380 *d = '\0';
3280af22 6381 len = d - PL_tokenbuf;
6a27c188 6382#ifndef PERL_STRICT_CR
f63a84b2
LW
6383 d = strchr(s, '\r');
6384 if (d) {
6385 char *olds = s;
6386 s = d;
3280af22 6387 while (s < PL_bufend) {
f63a84b2
LW
6388 if (*s == '\r') {
6389 *d++ = '\n';
6390 if (*++s == '\n')
6391 s++;
6392 }
6393 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
6394 *d++ = *s++;
6395 s++;
6396 }
6397 else
6398 *d++ = *s++;
6399 }
6400 *d = '\0';
3280af22
NIS
6401 PL_bufend = d;
6402 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2
LW
6403 s = olds;
6404 }
6405#endif
79072805 6406 d = "\n";
3280af22 6407 if (outer || !(d=ninstr(s,PL_bufend,d,d+1)))
79cb57f6 6408 herewas = newSVpvn(s,PL_bufend-s);
79072805 6409 else
79cb57f6 6410 s--, herewas = newSVpvn(s,d-s);
79072805 6411 s += SvCUR(herewas);
748a9306 6412
8d6dde3e 6413 tmpstr = NEWSV(87,79);
748a9306
LW
6414 sv_upgrade(tmpstr, SVt_PVIV);
6415 if (term == '\'') {
79072805 6416 op_type = OP_CONST;
748a9306
LW
6417 SvIVX(tmpstr) = -1;
6418 }
6419 else if (term == '`') {
79072805 6420 op_type = OP_BACKTICK;
748a9306
LW
6421 SvIVX(tmpstr) = '\\';
6422 }
79072805
LW
6423
6424 CLINE;
57843af0 6425 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
6426 PL_multi_open = PL_multi_close = '<';
6427 term = *PL_tokenbuf;
0244c3a4
GS
6428 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6429 char *bufptr = PL_sublex_info.super_bufptr;
6430 char *bufend = PL_sublex_info.super_bufend;
6431 char *olds = s - SvCUR(herewas);
6432 s = strchr(bufptr, '\n');
6433 if (!s)
6434 s = bufend;
6435 d = s;
6436 while (s < bufend &&
6437 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
6438 if (*s++ == '\n')
57843af0 6439 CopLINE_inc(PL_curcop);
0244c3a4
GS
6440 }
6441 if (s >= bufend) {
57843af0 6442 CopLINE_set(PL_curcop, PL_multi_start);
0244c3a4
GS
6443 missingterm(PL_tokenbuf);
6444 }
6445 sv_setpvn(herewas,bufptr,d-bufptr+1);
6446 sv_setpvn(tmpstr,d+1,s-d);
6447 s += len - 1;
6448 sv_catpvn(herewas,s,bufend-s);
6449 (void)strcpy(bufptr,SvPVX(herewas));
6450
6451 s = olds;
6452 goto retval;
6453 }
6454 else if (!outer) {
79072805 6455 d = s;
3280af22
NIS
6456 while (s < PL_bufend &&
6457 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 6458 if (*s++ == '\n')
57843af0 6459 CopLINE_inc(PL_curcop);
79072805 6460 }
3280af22 6461 if (s >= PL_bufend) {
57843af0 6462 CopLINE_set(PL_curcop, PL_multi_start);
3280af22 6463 missingterm(PL_tokenbuf);
79072805
LW
6464 }
6465 sv_setpvn(tmpstr,d+1,s-d);
6466 s += len - 1;
57843af0 6467 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 6468
3280af22
NIS
6469 sv_catpvn(herewas,s,PL_bufend-s);
6470 sv_setsv(PL_linestr,herewas);
6471 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
6472 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6473 PL_last_lop = PL_last_uni = Nullch;
79072805
LW
6474 }
6475 else
6476 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
3280af22 6477 while (s >= PL_bufend) { /* multiple line string? */
fd2d0953 6478 if (!outer ||
3280af22 6479 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
57843af0 6480 CopLINE_set(PL_curcop, PL_multi_start);
3280af22 6481 missingterm(PL_tokenbuf);
79072805 6482 }
57843af0 6483 CopLINE_inc(PL_curcop);
3280af22 6484 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6485 PL_last_lop = PL_last_uni = Nullch;
6a27c188 6486#ifndef PERL_STRICT_CR
3280af22 6487 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
6488 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
6489 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 6490 {
3280af22
NIS
6491 PL_bufend[-2] = '\n';
6492 PL_bufend--;
6493 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2 6494 }
3280af22
NIS
6495 else if (PL_bufend[-1] == '\r')
6496 PL_bufend[-1] = '\n';
f63a84b2 6497 }
3280af22
NIS
6498 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
6499 PL_bufend[-1] = '\n';
f63a84b2 6500#endif
3280af22 6501 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
6502 SV *sv = NEWSV(88,0);
6503
93a17b20 6504 sv_upgrade(sv, SVt_PVMG);
3280af22 6505 sv_setsv(sv,PL_linestr);
57843af0 6506 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
79072805 6507 }
3280af22
NIS
6508 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
6509 s = PL_bufend - 1;
79072805 6510 *s = ' ';
3280af22
NIS
6511 sv_catsv(PL_linestr,herewas);
6512 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805
LW
6513 }
6514 else {
3280af22
NIS
6515 s = PL_bufend;
6516 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
6517 }
6518 }
79072805 6519 s++;
0244c3a4 6520retval:
57843af0 6521 PL_multi_end = CopLINE(PL_curcop);
79072805
LW
6522 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
6523 SvLEN_set(tmpstr, SvCUR(tmpstr) + 1);
463ee0b2 6524 Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
79072805 6525 }
8990e307 6526 SvREFCNT_dec(herewas);
7948272d
NIS
6527 if (UTF && !IN_BYTE && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
6528 SvUTF8_on(tmpstr);
3280af22 6529 PL_lex_stuff = tmpstr;
79072805
LW
6530 yylval.ival = op_type;
6531 return s;
6532}
6533
02aa26ce
NT
6534/* scan_inputsymbol
6535 takes: current position in input buffer
6536 returns: new position in input buffer
6537 side-effects: yylval and lex_op are set.
6538
6539 This code handles:
6540
6541 <> read from ARGV
6542 <FH> read from filehandle
6543 <pkg::FH> read from package qualified filehandle
6544 <pkg'FH> read from package qualified filehandle
6545 <$fh> read from filehandle in $fh
6546 <*.h> filename glob
6547
6548*/
6549
76e3520e 6550STATIC char *
cea2e8a9 6551S_scan_inputsymbol(pTHX_ char *start)
79072805 6552{
02aa26ce 6553 register char *s = start; /* current position in buffer */
79072805 6554 register char *d;
fc36a67e 6555 register char *e;
1b420867 6556 char *end;
79072805
LW
6557 I32 len;
6558
3280af22
NIS
6559 d = PL_tokenbuf; /* start of temp holding space */
6560 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
1b420867
GS
6561 end = strchr(s, '\n');
6562 if (!end)
6563 end = PL_bufend;
6564 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
6565
6566 /* die if we didn't have space for the contents of the <>,
1b420867 6567 or if it didn't end, or if we see a newline
02aa26ce
NT
6568 */
6569
3280af22 6570 if (len >= sizeof PL_tokenbuf)
cea2e8a9 6571 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 6572 if (s >= end)
cea2e8a9 6573 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 6574
fc36a67e 6575 s++;
02aa26ce
NT
6576
6577 /* check for <$fh>
6578 Remember, only scalar variables are interpreted as filehandles by
6579 this code. Anything more complex (e.g., <$fh{$num}>) will be
6580 treated as a glob() call.
6581 This code makes use of the fact that except for the $ at the front,
6582 a scalar variable and a filehandle look the same.
6583 */
4633a7c4 6584 if (*d == '$' && d[1]) d++;
02aa26ce
NT
6585
6586 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 6587 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 6588 d++;
02aa26ce
NT
6589
6590 /* If we've tried to read what we allow filehandles to look like, and
6591 there's still text left, then it must be a glob() and not a getline.
6592 Use scan_str to pull out the stuff between the <> and treat it
6593 as nothing more than a string.
6594 */
6595
3280af22 6596 if (d - PL_tokenbuf != len) {
79072805
LW
6597 yylval.ival = OP_GLOB;
6598 set_csh();
09bef843 6599 s = scan_str(start,FALSE,FALSE);
79072805 6600 if (!s)
cea2e8a9 6601 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
6602 return s;
6603 }
395c3793 6604 else {
02aa26ce 6605 /* we're in a filehandle read situation */
3280af22 6606 d = PL_tokenbuf;
02aa26ce
NT
6607
6608 /* turn <> into <ARGV> */
79072805
LW
6609 if (!len)
6610 (void)strcpy(d,"ARGV");
02aa26ce
NT
6611
6612 /* if <$fh>, create the ops to turn the variable into a
6613 filehandle
6614 */
79072805 6615 if (*d == '$') {
a0d0e21e 6616 I32 tmp;
02aa26ce
NT
6617
6618 /* try to find it in the pad for this block, otherwise find
6619 add symbol table ops
6620 */
11343788
MB
6621 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
6622 OP *o = newOP(OP_PADSV, 0);
6623 o->op_targ = tmp;
f5284f61 6624 PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, o);
a0d0e21e
LW
6625 }
6626 else {
6627 GV *gv = gv_fetchpv(d+1,TRUE, SVt_PV);
3280af22 6628 PL_lex_op = (OP*)newUNOP(OP_READLINE, 0,
a0d0e21e 6629 newUNOP(OP_RV2SV, 0,
f5284f61 6630 newGVOP(OP_GV, 0, gv)));
a0d0e21e 6631 }
f5284f61
IZ
6632 PL_lex_op->op_flags |= OPf_SPECIAL;
6633 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
79072805
LW
6634 yylval.ival = OP_NULL;
6635 }
02aa26ce
NT
6636
6637 /* If it's none of the above, it must be a literal filehandle
6638 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 6639 else {
85e6fe83 6640 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
3280af22 6641 PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
79072805
LW
6642 yylval.ival = OP_NULL;
6643 }
6644 }
02aa26ce 6645
79072805
LW
6646 return s;
6647}
6648
02aa26ce
NT
6649
6650/* scan_str
6651 takes: start position in buffer
09bef843
SB
6652 keep_quoted preserve \ on the embedded delimiter(s)
6653 keep_delims preserve the delimiters around the string
02aa26ce
NT
6654 returns: position to continue reading from buffer
6655 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
6656 updates the read buffer.
6657
6658 This subroutine pulls a string out of the input. It is called for:
6659 q single quotes q(literal text)
6660 ' single quotes 'literal text'
6661 qq double quotes qq(interpolate $here please)
6662 " double quotes "interpolate $here please"
6663 qx backticks qx(/bin/ls -l)
6664 ` backticks `/bin/ls -l`
6665 qw quote words @EXPORT_OK = qw( func() $spam )
6666 m// regexp match m/this/
6667 s/// regexp substitute s/this/that/
6668 tr/// string transliterate tr/this/that/
6669 y/// string transliterate y/this/that/
6670 ($*@) sub prototypes sub foo ($)
09bef843 6671 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
6672 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
6673
6674 In most of these cases (all but <>, patterns and transliterate)
6675 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
6676 calls scan_str(). s/// makes yylex() call scan_subst() which calls
6677 scan_str(). tr/// and y/// make yylex() call scan_trans() which
6678 calls scan_str().
4e553d73 6679
02aa26ce
NT
6680 It skips whitespace before the string starts, and treats the first
6681 character as the delimiter. If the delimiter is one of ([{< then
6682 the corresponding "close" character )]}> is used as the closing
6683 delimiter. It allows quoting of delimiters, and if the string has
6684 balanced delimiters ([{<>}]) it allows nesting.
6685
37fd879b
HS
6686 On success, the SV with the resulting string is put into lex_stuff or,
6687 if that is already non-NULL, into lex_repl. The second case occurs only
6688 when parsing the RHS of the special constructs s/// and tr/// (y///).
6689 For convenience, the terminating delimiter character is stuffed into
6690 SvIVX of the SV.
02aa26ce
NT
6691*/
6692
76e3520e 6693STATIC char *
09bef843 6694S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 6695{
02aa26ce
NT
6696 SV *sv; /* scalar value: string */
6697 char *tmps; /* temp string, used for delimiter matching */
6698 register char *s = start; /* current position in the buffer */
6699 register char term; /* terminating character */
6700 register char *to; /* current position in the sv's data */
6701 I32 brackets = 1; /* bracket nesting level */
89491803 6702 bool has_utf8 = FALSE; /* is there any utf8 content? */
02aa26ce
NT
6703
6704 /* skip space before the delimiter */
fb73857a 6705 if (isSPACE(*s))
6706 s = skipspace(s);
02aa26ce
NT
6707
6708 /* mark where we are, in case we need to report errors */
79072805 6709 CLINE;
02aa26ce
NT
6710
6711 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 6712 term = *s;
63cd0674 6713 if (!UTF8_IS_INVARIANT((U8)term) && UTF)
89491803 6714 has_utf8 = TRUE;
b1c7b182 6715
02aa26ce 6716 /* mark where we are */
57843af0 6717 PL_multi_start = CopLINE(PL_curcop);
3280af22 6718 PL_multi_open = term;
02aa26ce
NT
6719
6720 /* find corresponding closing delimiter */
93a17b20 6721 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
79072805 6722 term = tmps[5];
3280af22 6723 PL_multi_close = term;
79072805 6724
02aa26ce 6725 /* create a new SV to hold the contents. 87 is leak category, I'm
8d6dde3e
IZ
6726 assuming. 79 is the SV's initial length. What a random number. */
6727 sv = NEWSV(87,79);
ed6116ce
LW
6728 sv_upgrade(sv, SVt_PVIV);
6729 SvIVX(sv) = term;
a0d0e21e 6730 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
6731
6732 /* move past delimiter and try to read a complete string */
09bef843
SB
6733 if (keep_delims)
6734 sv_catpvn(sv, s, 1);
93a17b20
LW
6735 s++;
6736 for (;;) {
02aa26ce 6737 /* extend sv if need be */
3280af22 6738 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 6739 /* set 'to' to the next character in the sv's string */
463ee0b2 6740 to = SvPVX(sv)+SvCUR(sv);
09bef843 6741
02aa26ce 6742 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
6743 if (PL_multi_open == PL_multi_close) {
6744 for (; s < PL_bufend; s++,to++) {
02aa26ce 6745 /* embedded newlines increment the current line number */
3280af22 6746 if (*s == '\n' && !PL_rsfp)
57843af0 6747 CopLINE_inc(PL_curcop);
02aa26ce 6748 /* handle quoted delimiters */
3280af22 6749 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 6750 if (!keep_quoted && s[1] == term)
a0d0e21e 6751 s++;
02aa26ce 6752 /* any other quotes are simply copied straight through */
a0d0e21e
LW
6753 else
6754 *to++ = *s++;
6755 }
02aa26ce
NT
6756 /* terminate when run out of buffer (the for() condition), or
6757 have found the terminator */
93a17b20
LW
6758 else if (*s == term)
6759 break;
63cd0674 6760 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 6761 has_utf8 = TRUE;
93a17b20
LW
6762 *to = *s;
6763 }
6764 }
02aa26ce
NT
6765
6766 /* if the terminator isn't the same as the start character (e.g.,
6767 matched brackets), we have to allow more in the quoting, and
6768 be prepared for nested brackets.
6769 */
93a17b20 6770 else {
02aa26ce 6771 /* read until we run out of string, or we find the terminator */
3280af22 6772 for (; s < PL_bufend; s++,to++) {
02aa26ce 6773 /* embedded newlines increment the line count */
3280af22 6774 if (*s == '\n' && !PL_rsfp)
57843af0 6775 CopLINE_inc(PL_curcop);
02aa26ce 6776 /* backslashes can escape the open or closing characters */
3280af22 6777 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
6778 if (!keep_quoted &&
6779 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
6780 s++;
6781 else
6782 *to++ = *s++;
6783 }
02aa26ce 6784 /* allow nested opens and closes */
3280af22 6785 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 6786 break;
3280af22 6787 else if (*s == PL_multi_open)
93a17b20 6788 brackets++;
63cd0674 6789 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 6790 has_utf8 = TRUE;
93a17b20
LW
6791 *to = *s;
6792 }
6793 }
02aa26ce 6794 /* terminate the copied string and update the sv's end-of-string */
93a17b20 6795 *to = '\0';
463ee0b2 6796 SvCUR_set(sv, to - SvPVX(sv));
93a17b20 6797
02aa26ce
NT
6798 /*
6799 * this next chunk reads more into the buffer if we're not done yet
6800 */
6801
b1c7b182
GS
6802 if (s < PL_bufend)
6803 break; /* handle case where we are done yet :-) */
79072805 6804
6a27c188 6805#ifndef PERL_STRICT_CR
f63a84b2 6806 if (to - SvPVX(sv) >= 2) {
c6f14548
GS
6807 if ((to[-2] == '\r' && to[-1] == '\n') ||
6808 (to[-2] == '\n' && to[-1] == '\r'))
6809 {
f63a84b2
LW
6810 to[-2] = '\n';
6811 to--;
6812 SvCUR_set(sv, to - SvPVX(sv));
6813 }
6814 else if (to[-1] == '\r')
6815 to[-1] = '\n';
6816 }
6817 else if (to - SvPVX(sv) == 1 && to[-1] == '\r')
6818 to[-1] = '\n';
6819#endif
6820
02aa26ce
NT
6821 /* if we're out of file, or a read fails, bail and reset the current
6822 line marker so we can report where the unterminated string began
6823 */
3280af22
NIS
6824 if (!PL_rsfp ||
6825 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
c07a80fd 6826 sv_free(sv);
57843af0 6827 CopLINE_set(PL_curcop, PL_multi_start);
79072805
LW
6828 return Nullch;
6829 }
02aa26ce 6830 /* we read a line, so increment our line counter */
57843af0 6831 CopLINE_inc(PL_curcop);
a0ed51b3 6832
02aa26ce 6833 /* update debugger info */
3280af22 6834 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
6835 SV *sv = NEWSV(88,0);
6836
93a17b20 6837 sv_upgrade(sv, SVt_PVMG);
3280af22 6838 sv_setsv(sv,PL_linestr);
57843af0 6839 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
395c3793 6840 }
a0ed51b3 6841
3280af22
NIS
6842 /* having changed the buffer, we must update PL_bufend */
6843 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6844 PL_last_lop = PL_last_uni = Nullch;
378cc40b 6845 }
4e553d73 6846
02aa26ce
NT
6847 /* at this point, we have successfully read the delimited string */
6848
09bef843
SB
6849 if (keep_delims)
6850 sv_catpvn(sv, s, 1);
89491803 6851 if (has_utf8)
b1c7b182 6852 SvUTF8_on(sv);
57843af0 6853 PL_multi_end = CopLINE(PL_curcop);
79072805 6854 s++;
02aa26ce
NT
6855
6856 /* if we allocated too much space, give some back */
93a17b20
LW
6857 if (SvCUR(sv) + 5 < SvLEN(sv)) {
6858 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 6859 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 6860 }
02aa26ce
NT
6861
6862 /* decide whether this is the first or second quoted string we've read
6863 for this op
6864 */
4e553d73 6865
3280af22
NIS
6866 if (PL_lex_stuff)
6867 PL_lex_repl = sv;
79072805 6868 else
3280af22 6869 PL_lex_stuff = sv;
378cc40b
LW
6870 return s;
6871}
6872
02aa26ce
NT
6873/*
6874 scan_num
6875 takes: pointer to position in buffer
6876 returns: pointer to new position in buffer
6877 side-effects: builds ops for the constant in yylval.op
6878
6879 Read a number in any of the formats that Perl accepts:
6880
7fd134d9
JH
6881 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
6882 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
6883 0b[01](_?[01])*
6884 0[0-7](_?[0-7])*
6885 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 6886
3280af22 6887 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
6888 thing it reads.
6889
6890 If it reads a number without a decimal point or an exponent, it will
6891 try converting the number to an integer and see if it can do so
6892 without loss of precision.
6893*/
4e553d73 6894
378cc40b 6895char *
b73d6f50 6896Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
378cc40b 6897{
02aa26ce
NT
6898 register char *s = start; /* current position in buffer */
6899 register char *d; /* destination in temp buffer */
6900 register char *e; /* end of temp buffer */
86554af2 6901 NV nv; /* number read, as a double */
a7cb1f99 6902 SV *sv = Nullsv; /* place to put the converted number */
b8403495 6903 bool floatit; /* boolean: int or float? */
02aa26ce 6904 char *lastub = 0; /* position of last underbar */
fc36a67e 6905 static char number_too_long[] = "Number too long";
378cc40b 6906
02aa26ce
NT
6907 /* We use the first character to decide what type of number this is */
6908
378cc40b 6909 switch (*s) {
79072805 6910 default:
cea2e8a9 6911 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 6912
02aa26ce 6913 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 6914 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
6915 case '0':
6916 {
02aa26ce
NT
6917 /* variables:
6918 u holds the "number so far"
4f19785b
WSI
6919 shift the power of 2 of the base
6920 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
6921 overflowed was the number more than we can hold?
6922
6923 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
6924 we in octal/hex/binary?" indicator to disallow hex characters
6925 when in octal mode.
02aa26ce 6926 */
9e24b6e2
JH
6927 NV n = 0.0;
6928 UV u = 0;
79072805 6929 I32 shift;
9e24b6e2
JH
6930 bool overflowed = FALSE;
6931 static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
6932 static char* bases[5] = { "", "binary", "", "octal",
6933 "hexadecimal" };
6934 static char* Bases[5] = { "", "Binary", "", "Octal",
6935 "Hexadecimal" };
6936 static char *maxima[5] = { "",
6937 "0b11111111111111111111111111111111",
6938 "",
893fe2c2 6939 "037777777777",
9e24b6e2
JH
6940 "0xffffffff" };
6941 char *base, *Base, *max;
378cc40b 6942
02aa26ce 6943 /* check for hex */
378cc40b
LW
6944 if (s[1] == 'x') {
6945 shift = 4;
6946 s += 2;
4f19785b
WSI
6947 } else if (s[1] == 'b') {
6948 shift = 1;
6949 s += 2;
378cc40b 6950 }
02aa26ce 6951 /* check for a decimal in disguise */
b78218b7 6952 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 6953 goto decimal;
02aa26ce 6954 /* so it must be octal */
928753ea 6955 else {
378cc40b 6956 shift = 3;
928753ea
JH
6957 s++;
6958 }
6959
6960 if (*s == '_') {
6961 if (ckWARN(WARN_SYNTAX))
6962 Perl_warner(aTHX_ WARN_SYNTAX,
6963 "Misplaced _ in number");
6964 lastub = s++;
6965 }
9e24b6e2
JH
6966
6967 base = bases[shift];
6968 Base = Bases[shift];
6969 max = maxima[shift];
02aa26ce 6970
4f19785b 6971 /* read the rest of the number */
378cc40b 6972 for (;;) {
9e24b6e2 6973 /* x is used in the overflow test,
893fe2c2 6974 b is the digit we're adding on. */
9e24b6e2 6975 UV x, b;
55497cff 6976
378cc40b 6977 switch (*s) {
02aa26ce
NT
6978
6979 /* if we don't mention it, we're done */
378cc40b
LW
6980 default:
6981 goto out;
02aa26ce 6982
928753ea 6983 /* _ are ignored -- but warned about if consecutive */
de3bb511 6984 case '_':
928753ea
JH
6985 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
6986 Perl_warner(aTHX_ WARN_SYNTAX,
6987 "Misplaced _ in number");
6988 lastub = s++;
de3bb511 6989 break;
02aa26ce
NT
6990
6991 /* 8 and 9 are not octal */
378cc40b 6992 case '8': case '9':
4f19785b 6993 if (shift == 3)
cea2e8a9 6994 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 6995 /* FALL THROUGH */
02aa26ce
NT
6996
6997 /* octal digits */
4f19785b 6998 case '2': case '3': case '4':
378cc40b 6999 case '5': case '6': case '7':
4f19785b 7000 if (shift == 1)
cea2e8a9 7001 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
7002 /* FALL THROUGH */
7003
7004 case '0': case '1':
02aa26ce 7005 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 7006 goto digit;
02aa26ce
NT
7007
7008 /* hex digits */
378cc40b
LW
7009 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
7010 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 7011 /* make sure they said 0x */
378cc40b
LW
7012 if (shift != 4)
7013 goto out;
55497cff 7014 b = (*s++ & 7) + 9;
02aa26ce
NT
7015
7016 /* Prepare to put the digit we have onto the end
7017 of the number so far. We check for overflows.
7018 */
7019
55497cff 7020 digit:
9e24b6e2
JH
7021 if (!overflowed) {
7022 x = u << shift; /* make room for the digit */
7023
7024 if ((x >> shift) != u
7025 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
7026 overflowed = TRUE;
7027 n = (NV) u;
767a6a26
PM
7028 if (ckWARN_d(WARN_OVERFLOW))
7029 Perl_warner(aTHX_ WARN_OVERFLOW,
9e24b6e2
JH
7030 "Integer overflow in %s number",
7031 base);
7032 } else
7033 u = x | b; /* add the digit to the end */
7034 }
7035 if (overflowed) {
7036 n *= nvshift[shift];
7037 /* If an NV has not enough bits in its
7038 * mantissa to represent an UV this summing of
7039 * small low-order numbers is a waste of time
7040 * (because the NV cannot preserve the
7041 * low-order bits anyway): we could just
7042 * remember when did we overflow and in the
7043 * end just multiply n by the right
7044 * amount. */
7045 n += (NV) b;
55497cff 7046 }
378cc40b
LW
7047 break;
7048 }
7049 }
02aa26ce
NT
7050
7051 /* if we get here, we had success: make a scalar value from
7052 the number.
7053 */
378cc40b 7054 out:
928753ea
JH
7055
7056 /* final misplaced underbar check */
7057 if (s[-1] == '_') {
7058 if (ckWARN(WARN_SYNTAX))
7059 Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number");
7060 }
7061
79072805 7062 sv = NEWSV(92,0);
9e24b6e2 7063 if (overflowed) {
767a6a26
PM
7064 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
7065 Perl_warner(aTHX_ WARN_PORTABLE,
9e24b6e2
JH
7066 "%s number > %s non-portable",
7067 Base, max);
7068 sv_setnv(sv, n);
7069 }
7070 else {
15041a67 7071#if UVSIZE > 4
767a6a26
PM
7072 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
7073 Perl_warner(aTHX_ WARN_PORTABLE,
9e24b6e2
JH
7074 "%s number > %s non-portable",
7075 Base, max);
2cc4c2dc 7076#endif
9e24b6e2
JH
7077 sv_setuv(sv, u);
7078 }
2cc4c2dc 7079 if (PL_hints & HINT_NEW_BINARY)
b3ac6de7 7080 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
378cc40b
LW
7081 }
7082 break;
02aa26ce
NT
7083
7084 /*
7085 handle decimal numbers.
7086 we're also sent here when we read a 0 as the first digit
7087 */
378cc40b
LW
7088 case '1': case '2': case '3': case '4': case '5':
7089 case '6': case '7': case '8': case '9': case '.':
7090 decimal:
3280af22
NIS
7091 d = PL_tokenbuf;
7092 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 7093 floatit = FALSE;
02aa26ce
NT
7094
7095 /* read next group of digits and _ and copy into d */
de3bb511 7096 while (isDIGIT(*s) || *s == '_') {
4e553d73 7097 /* skip underscores, checking for misplaced ones
02aa26ce
NT
7098 if -w is on
7099 */
93a17b20 7100 if (*s == '_') {
928753ea
JH
7101 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
7102 Perl_warner(aTHX_ WARN_SYNTAX,
7103 "Misplaced _ in number");
7104 lastub = s++;
93a17b20 7105 }
fc36a67e 7106 else {
02aa26ce 7107 /* check for end of fixed-length buffer */
fc36a67e 7108 if (d >= e)
cea2e8a9 7109 Perl_croak(aTHX_ number_too_long);
02aa26ce 7110 /* if we're ok, copy the character */
378cc40b 7111 *d++ = *s++;
fc36a67e 7112 }
378cc40b 7113 }
02aa26ce
NT
7114
7115 /* final misplaced underbar check */
928753ea 7116 if (lastub && s == lastub + 1) {
d008e5eb 7117 if (ckWARN(WARN_SYNTAX))
cea2e8a9 7118 Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number");
d008e5eb 7119 }
02aa26ce
NT
7120
7121 /* read a decimal portion if there is one. avoid
7122 3..5 being interpreted as the number 3. followed
7123 by .5
7124 */
2f3197b3 7125 if (*s == '.' && s[1] != '.') {
79072805 7126 floatit = TRUE;
378cc40b 7127 *d++ = *s++;
02aa26ce 7128
928753ea
JH
7129 if (*s == '_') {
7130 if (ckWARN(WARN_SYNTAX))
7131 Perl_warner(aTHX_ WARN_SYNTAX,
7132 "Misplaced _ in number");
7133 lastub = s;
7134 }
7135
7136 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 7137 */
fc36a67e 7138 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 7139 /* fixed length buffer check */
fc36a67e 7140 if (d >= e)
cea2e8a9 7141 Perl_croak(aTHX_ number_too_long);
928753ea
JH
7142 if (*s == '_') {
7143 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
7144 Perl_warner(aTHX_ WARN_SYNTAX,
7145 "Misplaced _ in number");
7146 lastub = s;
7147 }
7148 else
fc36a67e 7149 *d++ = *s;
378cc40b 7150 }
928753ea
JH
7151 /* fractional part ending in underbar? */
7152 if (s[-1] == '_') {
7153 if (ckWARN(WARN_SYNTAX))
7154 Perl_warner(aTHX_ WARN_SYNTAX,
7155 "Misplaced _ in number");
7156 }
dd629d5b
GS
7157 if (*s == '.' && isDIGIT(s[1])) {
7158 /* oops, it's really a v-string, but without the "v" */
7159 s = start - 1;
7160 goto vstring;
7161 }
378cc40b 7162 }
02aa26ce
NT
7163
7164 /* read exponent part, if present */
7fd134d9 7165 if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) {
79072805
LW
7166 floatit = TRUE;
7167 s++;
02aa26ce
NT
7168
7169 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 7170 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 7171
7fd134d9
JH
7172 /* stray preinitial _ */
7173 if (*s == '_') {
7174 if (ckWARN(WARN_SYNTAX))
7175 Perl_warner(aTHX_ WARN_SYNTAX,
7176 "Misplaced _ in number");
7177 lastub = s++;
7178 }
7179
02aa26ce 7180 /* allow positive or negative exponent */
378cc40b
LW
7181 if (*s == '+' || *s == '-')
7182 *d++ = *s++;
02aa26ce 7183
7fd134d9
JH
7184 /* stray initial _ */
7185 if (*s == '_') {
7186 if (ckWARN(WARN_SYNTAX))
7187 Perl_warner(aTHX_ WARN_SYNTAX,
7188 "Misplaced _ in number");
7189 lastub = s++;
7190 }
7191
7fd134d9
JH
7192 /* read digits of exponent */
7193 while (isDIGIT(*s) || *s == '_') {
7194 if (isDIGIT(*s)) {
7195 if (d >= e)
7196 Perl_croak(aTHX_ number_too_long);
b3b48e3e 7197 *d++ = *s++;
7fd134d9
JH
7198 }
7199 else {
7200 if (ckWARN(WARN_SYNTAX) &&
7201 ((lastub && s == lastub + 1) ||
b3b48e3e 7202 (!isDIGIT(s[1]) && s[1] != '_')))
7fd134d9
JH
7203 Perl_warner(aTHX_ WARN_SYNTAX,
7204 "Misplaced _ in number");
b3b48e3e 7205 lastub = s++;
7fd134d9 7206 }
7fd134d9 7207 }
378cc40b 7208 }
02aa26ce
NT
7209
7210 /* terminate the string */
378cc40b 7211 *d = '\0';
02aa26ce
NT
7212
7213 /* make an sv from the string */
79072805 7214 sv = NEWSV(92,0);
097ee67d 7215
86554af2 7216#if defined(Strtol) && defined(Strtoul)
0b7fceb9
MU
7217
7218 /*
0b7fceb9
MU
7219 strtol/strtoll sets errno to ERANGE if the number is too big
7220 for an integer. We try to do an integer conversion first
7221 if no characters indicating "float" have been found.
7222 */
7223
7224 if (!floatit) {
0b7fceb9
MU
7225 IV iv;
7226 UV uv;
7227 errno = 0;
c239479b 7228 if (*PL_tokenbuf == '-')
96989be3 7229 iv = Strtol(PL_tokenbuf, (char**)NULL, 10);
c239479b 7230 else
96989be3 7231 uv = Strtoul(PL_tokenbuf, (char**)NULL, 10);
55eb892c 7232 if (errno)
86554af2 7233 floatit = TRUE; /* Probably just too large. */
0b7fceb9
MU
7234 else if (*PL_tokenbuf == '-')
7235 sv_setiv(sv, iv);
86554af2
JH
7236 else if (uv <= IV_MAX)
7237 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
0b7fceb9 7238 else
c239479b 7239 sv_setuv(sv, uv);
0b7fceb9
MU
7240 }
7241 if (floatit) {
86554af2
JH
7242 nv = Atof(PL_tokenbuf);
7243 sv_setnv(sv, nv);
7244 }
7245#else
7246 /*
7247 No working strtou?ll?.
7248
7249 Unfortunately atol() doesn't do range checks (returning
7250 LONG_MIN/LONG_MAX, and setting errno to ERANGE on overflows)
7251 everywhere [1], so we cannot use use atol() (or atoll()).
7252 If we could, they would be used, as Atol(), very much like
7253 Strtol() and Strtoul() are used above.
7254
7255 [1] XXX Configure test needed to check for atol()
d6c14000
JH
7256 (and atoll()) overflow behaviour XXX
7257
7258 --jhi
86554af2
JH
7259
7260 We need to do this the hard way. */
7261
7262 nv = Atof(PL_tokenbuf);
7263
7264 /* See if we can make do with an integer value without loss of
7265 precision. We use U_V to cast to a UV, because some
7266 compilers have issues. Then we try casting it back and see
7267 if it was the same [1]. We only do this if we know we
7268 specifically read an integer. If floatit is true, then we
4e553d73 7269 don't need to do the conversion at all.
86554af2
JH
7270
7271 [1] Note that this is lossy if our NVs cannot preserve our
d6c14000
JH
7272 UVs. There are metaconfig defines NV_PRESERVES_UV (a boolean)
7273 and NV_PRESERVES_UV_BITS (a number), but in general we really
7274 do hope all such potentially lossy platforms have strtou?ll?
7275 to do a lossless IV/UV conversion.
7276
7277 Maybe could do some tricks with DBL_DIG, LDBL_DIG and
7278 DBL_MANT_DIG and LDBL_MANT_DIG (these are already available
7279 as NV_DIG and NV_MANT_DIG)?
4e553d73 7280
d6c14000 7281 --jhi
86554af2
JH
7282 */
7283 {
7284 UV uv = U_V(nv);
7285 if (!floatit && (NV)uv == nv) {
7286 if (uv <= IV_MAX)
7287 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
7288 else
7289 sv_setuv(sv, uv);
7290 }
7291 else
7292 sv_setnv(sv, nv);
96989be3 7293 }
0b7fceb9 7294#endif
b8403495
JH
7295 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
7296 (PL_hints & HINT_NEW_INTEGER) )
4e553d73 7297 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
b8403495
JH
7298 (floatit ? "float" : "integer"),
7299 sv, Nullsv, NULL);
378cc40b 7300 break;
0b7fceb9 7301
e312add1 7302 /* if it starts with a v, it could be a v-string */
a7cb1f99 7303 case 'v':
dd629d5b 7304vstring:
a7cb1f99 7305 {
a7cb1f99
GS
7306 char *pos = s;
7307 pos++;
dd629d5b 7308 while (isDIGIT(*pos) || *pos == '_')
a7cb1f99 7309 pos++;
e526c9e6 7310 if (!isALPHA(*pos)) {
c4d5f83a 7311 UV rev;
ad391ad9 7312 U8 tmpbuf[UTF8_MAXLEN+1];
a7cb1f99 7313 U8 *tmpend;
a7cb1f99
GS
7314 s++; /* get past 'v' */
7315
7316 sv = NEWSV(92,5);
a7cb1f99
GS
7317 sv_setpvn(sv, "", 0);
7318
e526c9e6 7319 for (;;) {
3cb0bbe5
GS
7320 if (*s == '0' && isDIGIT(s[1]))
7321 yyerror("Octal number in vector unsupported");
dd629d5b
GS
7322 rev = 0;
7323 {
7324 /* this is atoi() that tolerates underscores */
7325 char *end = pos;
7326 UV mult = 1;
7327 while (--end >= s) {
7328 UV orev;
7329 if (*end == '_')
7330 continue;
7331 orev = rev;
7332 rev += (*end - '0') * mult;
7333 mult *= 10;
7334 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
7335 Perl_warner(aTHX_ WARN_OVERFLOW,
7336 "Integer overflow in decimal number");
7337 }
7338 }
9041c2e3
NIS
7339 /* Append native character for the rev point */
7340 tmpend = uvchr_to_utf8(tmpbuf, rev);
e526c9e6 7341 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
c4d5f83a
NIS
7342 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
7343 SvUTF8_on(sv);
e526c9e6
GS
7344 if (*pos == '.' && isDIGIT(pos[1]))
7345 s = ++pos;
3818b22b 7346 else {
e526c9e6
GS
7347 s = pos;
7348 break;
3818b22b 7349 }
dd629d5b 7350 while (isDIGIT(*pos) || *pos == '_')
e526c9e6
GS
7351 pos++;
7352 }
a7cb1f99 7353 SvPOK_on(sv);
a7cb1f99 7354 SvREADONLY_on(sv);
a7cb1f99
GS
7355 }
7356 }
7357 break;
79072805 7358 }
a687059c 7359
02aa26ce
NT
7360 /* make the op for the constant and return */
7361
a7cb1f99 7362 if (sv)
b73d6f50 7363 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 7364 else
b73d6f50 7365 lvalp->opval = Nullop;
a687059c 7366
378cc40b
LW
7367 return s;
7368}
7369
76e3520e 7370STATIC char *
cea2e8a9 7371S_scan_formline(pTHX_ register char *s)
378cc40b 7372{
79072805 7373 register char *eol;
378cc40b 7374 register char *t;
79cb57f6 7375 SV *stuff = newSVpvn("",0);
79072805 7376 bool needargs = FALSE;
378cc40b 7377
79072805 7378 while (!needargs) {
c2e66d9e 7379 if (*s == '.' || *s == /*{*/'}') {
79072805 7380 /*SUPPRESS 530*/
51882d45 7381#ifdef PERL_STRICT_CR
bf4acbe4 7382 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
51882d45 7383#else
bf4acbe4 7384 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 7385#endif
6a65c6a0 7386 if (*t == '\n' || t == PL_bufend)
79072805
LW
7387 break;
7388 }
3280af22 7389 if (PL_in_eval && !PL_rsfp) {
93a17b20 7390 eol = strchr(s,'\n');
0f85fab0 7391 if (!eol++)
3280af22 7392 eol = PL_bufend;
0f85fab0
LW
7393 }
7394 else
3280af22 7395 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 7396 if (*s != '#') {
a0d0e21e
LW
7397 for (t = s; t < eol; t++) {
7398 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
7399 needargs = FALSE;
7400 goto enough; /* ~~ must be first line in formline */
378cc40b 7401 }
a0d0e21e
LW
7402 if (*t == '@' || *t == '^')
7403 needargs = TRUE;
378cc40b 7404 }
a0d0e21e 7405 sv_catpvn(stuff, s, eol-s);
2dc4c65b
GS
7406#ifndef PERL_STRICT_CR
7407 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
7408 char *end = SvPVX(stuff) + SvCUR(stuff);
7409 end[-2] = '\n';
7410 end[-1] = '\0';
7411 SvCUR(stuff)--;
7412 }
7413#endif
79072805
LW
7414 }
7415 s = eol;
3280af22
NIS
7416 if (PL_rsfp) {
7417 s = filter_gets(PL_linestr, PL_rsfp, 0);
7418 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
7419 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 7420 PL_last_lop = PL_last_uni = Nullch;
79072805 7421 if (!s) {
3280af22 7422 s = PL_bufptr;
79072805 7423 yyerror("Format not terminated");
378cc40b
LW
7424 break;
7425 }
378cc40b 7426 }
463ee0b2 7427 incline(s);
79072805 7428 }
a0d0e21e
LW
7429 enough:
7430 if (SvCUR(stuff)) {
3280af22 7431 PL_expect = XTERM;
79072805 7432 if (needargs) {
3280af22
NIS
7433 PL_lex_state = LEX_NORMAL;
7434 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
7435 force_next(',');
7436 }
a0d0e21e 7437 else
3280af22
NIS
7438 PL_lex_state = LEX_FORMLINE;
7439 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 7440 force_next(THING);
3280af22 7441 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
79072805 7442 force_next(LSTOP);
378cc40b 7443 }
79072805 7444 else {
8990e307 7445 SvREFCNT_dec(stuff);
3280af22
NIS
7446 PL_lex_formbrack = 0;
7447 PL_bufptr = s;
79072805
LW
7448 }
7449 return s;
378cc40b 7450}
a687059c 7451
76e3520e 7452STATIC void
cea2e8a9 7453S_set_csh(pTHX)
a687059c 7454{
ae986130 7455#ifdef CSH
3280af22
NIS
7456 if (!PL_cshlen)
7457 PL_cshlen = strlen(PL_cshname);
ae986130 7458#endif
a687059c 7459}
463ee0b2 7460
ba6d6ac9 7461I32
864dbfa3 7462Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 7463{
3280af22
NIS
7464 I32 oldsavestack_ix = PL_savestack_ix;
7465 CV* outsidecv = PL_compcv;
748a9306 7466 AV* comppadlist;
8990e307 7467
3280af22
NIS
7468 if (PL_compcv) {
7469 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 7470 }
7766f137 7471 SAVEI32(PL_subline);
3280af22
NIS
7472 save_item(PL_subname);
7473 SAVEI32(PL_padix);
354992b1 7474 SAVECOMPPAD();
3280af22
NIS
7475 SAVESPTR(PL_comppad_name);
7476 SAVESPTR(PL_compcv);
7477 SAVEI32(PL_comppad_name_fill);
7478 SAVEI32(PL_min_intro_pending);
7479 SAVEI32(PL_max_intro_pending);
7480 SAVEI32(PL_pad_reset_pending);
7481
7482 PL_compcv = (CV*)NEWSV(1104,0);
7483 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
7484 CvFLAGS(PL_compcv) |= flags;
7485
7486 PL_comppad = newAV();
7487 av_push(PL_comppad, Nullsv);
7488 PL_curpad = AvARRAY(PL_comppad);
7489 PL_comppad_name = newAV();
7490 PL_comppad_name_fill = 0;
7491 PL_min_intro_pending = 0;
7492 PL_padix = 0;
57843af0 7493 PL_subline = CopLINE(PL_curcop);
6d4ff0d2 7494#ifdef USE_THREADS
79cb57f6 7495 av_store(PL_comppad_name, 0, newSVpvn("@_", 2));
533c011a
NIS
7496 PL_curpad[0] = (SV*)newAV();
7497 SvPADMY_on(PL_curpad[0]); /* XXX Needed? */
6d4ff0d2 7498#endif /* USE_THREADS */
748a9306
LW
7499
7500 comppadlist = newAV();
7501 AvREAL_off(comppadlist);
3280af22
NIS
7502 av_store(comppadlist, 0, (SV*)PL_comppad_name);
7503 av_store(comppadlist, 1, (SV*)PL_comppad);
748a9306 7504
3280af22
NIS
7505 CvPADLIST(PL_compcv) = comppadlist;
7506 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
11343788 7507#ifdef USE_THREADS
533c011a
NIS
7508 CvOWNER(PL_compcv) = 0;
7509 New(666, CvMUTEXP(PL_compcv), 1, perl_mutex);
7510 MUTEX_INIT(CvMUTEXP(PL_compcv));
11343788 7511#endif /* USE_THREADS */
748a9306 7512
8990e307
LW
7513 return oldsavestack_ix;
7514}
7515
084592ab
CN
7516#ifdef __SC__
7517#pragma segment Perl_yylex
7518#endif
8990e307 7519int
864dbfa3 7520Perl_yywarn(pTHX_ char *s)
8990e307 7521{
faef0170 7522 PL_in_eval |= EVAL_WARNONLY;
748a9306 7523 yyerror(s);
faef0170 7524 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 7525 return 0;
8990e307
LW
7526}
7527
7528int
864dbfa3 7529Perl_yyerror(pTHX_ char *s)
463ee0b2 7530{
68dc0745 7531 char *where = NULL;
7532 char *context = NULL;
7533 int contlen = -1;
46fc3d4c 7534 SV *msg;
463ee0b2 7535
3280af22 7536 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 7537 where = "at EOF";
3280af22
NIS
7538 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 &&
7539 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) {
7540 while (isSPACE(*PL_oldoldbufptr))
7541 PL_oldoldbufptr++;
7542 context = PL_oldoldbufptr;
7543 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 7544 }
3280af22
NIS
7545 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 &&
7546 PL_oldbufptr != PL_bufptr) {
7547 while (isSPACE(*PL_oldbufptr))
7548 PL_oldbufptr++;
7549 context = PL_oldbufptr;
7550 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
7551 }
7552 else if (yychar > 255)
68dc0745 7553 where = "next token ???";
cdfb297e
GS
7554#ifdef USE_PURE_BISON
7555/* GNU Bison sets the value -2 */
7556 else if (yychar == -2) {
7557#else
463ee0b2 7558 else if ((yychar & 127) == 127) {
cdfb297e 7559#endif
3280af22
NIS
7560 if (PL_lex_state == LEX_NORMAL ||
7561 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 7562 where = "at end of line";
3280af22 7563 else if (PL_lex_inpat)
68dc0745 7564 where = "within pattern";
463ee0b2 7565 else
68dc0745 7566 where = "within string";
463ee0b2 7567 }
46fc3d4c 7568 else {
79cb57f6 7569 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
46fc3d4c 7570 if (yychar < 32)
cea2e8a9 7571 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
46fc3d4c 7572 else if (isPRINT_LC(yychar))
cea2e8a9 7573 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
463ee0b2 7574 else
cea2e8a9 7575 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
46fc3d4c 7576 where = SvPVX(where_sv);
463ee0b2 7577 }
46fc3d4c 7578 msg = sv_2mortal(newSVpv(s, 0));
ed094faf
GS
7579 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
7580 CopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 7581 if (context)
cea2e8a9 7582 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 7583 else
cea2e8a9 7584 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 7585 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 7586 Perl_sv_catpvf(aTHX_ msg,
57def98f 7587 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 7588 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 7589 PL_multi_end = 0;
a0d0e21e 7590 }
faef0170 7591 if (PL_in_eval & EVAL_WARNONLY)
894356b3 7592 Perl_warn(aTHX_ "%"SVf, msg);
463ee0b2 7593 else
5a844595 7594 qerror(msg);
c7d6bfb2
GS
7595 if (PL_error_count >= 10) {
7596 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 7597 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
c7d6bfb2
GS
7598 ERRSV, CopFILE(PL_curcop));
7599 else
7600 Perl_croak(aTHX_ "%s has too many errors.\n",
7601 CopFILE(PL_curcop));
7602 }
3280af22
NIS
7603 PL_in_my = 0;
7604 PL_in_my_stash = Nullhv;
463ee0b2
LW
7605 return 0;
7606}
084592ab
CN
7607#ifdef __SC__
7608#pragma segment Main
7609#endif
4e35701f 7610
b250498f 7611STATIC char*
3ae08724 7612S_swallow_bom(pTHX_ U8 *s)
01ec43d0 7613{
b250498f
GS
7614 STRLEN slen;
7615 slen = SvCUR(PL_linestr);
7616 switch (*s) {
4e553d73
NIS
7617 case 0xFF:
7618 if (s[1] == 0xFE) {
01ec43d0 7619 /* UTF-16 little-endian */
3ae08724 7620 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
01ec43d0
GS
7621 Perl_croak(aTHX_ "Unsupported script encoding");
7622#ifndef PERL_NO_UTF16_FILTER
dea0fc0b 7623 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
3ae08724 7624 s += 2;
dea0fc0b
JH
7625 if (PL_bufend > (char*)s) {
7626 U8 *news;
7627 I32 newlen;
7628
7629 filter_add(utf16rev_textfilter, NULL);
7630 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
f72f5f89
JH
7631 PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
7632 PL_bufend - (char*)s - 1,
dea0fc0b
JH
7633 &newlen);
7634 Copy(news, s, newlen, U8);
7635 SvCUR_set(PL_linestr, newlen);
7636 PL_bufend = SvPVX(PL_linestr) + newlen;
7637 news[newlen++] = '\0';
7638 Safefree(news);
7639 }
b250498f 7640#else
01ec43d0 7641 Perl_croak(aTHX_ "Unsupported script encoding");
b250498f 7642#endif
01ec43d0
GS
7643 }
7644 break;
78ae23f5 7645 case 0xFE:
3ae08724 7646 if (s[1] == 0xFF) { /* UTF-16 big-endian */
01ec43d0 7647#ifndef PERL_NO_UTF16_FILTER
dea0fc0b
JH
7648 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
7649 s += 2;
7650 if (PL_bufend > (char *)s) {
7651 U8 *news;
7652 I32 newlen;
7653
7654 filter_add(utf16_textfilter, NULL);
7655 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
7656 PL_bufend = (char*)utf16_to_utf8(s, news,
7657 PL_bufend - (char*)s,
7658 &newlen);
7659 Copy(news, s, newlen, U8);
7660 SvCUR_set(PL_linestr, newlen);
7661 PL_bufend = SvPVX(PL_linestr) + newlen;
7662 news[newlen++] = '\0';
7663 Safefree(news);
7664 }
b250498f 7665#else
01ec43d0 7666 Perl_croak(aTHX_ "Unsupported script encoding");
b250498f 7667#endif
01ec43d0
GS
7668 }
7669 break;
3ae08724
GS
7670 case 0xEF:
7671 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
dea0fc0b 7672 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
01ec43d0
GS
7673 s += 3; /* UTF-8 */
7674 }
7675 break;
7676 case 0:
7677 if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */
3ae08724 7678 s[2] == 0xFE && s[3] == 0xFF)
01ec43d0
GS
7679 {
7680 Perl_croak(aTHX_ "Unsupported script encoding");
7681 }
7682 }
b8f84bb2 7683 return (char*)s;
b250498f 7684}
4755096e
GS
7685
7686#ifdef PERL_OBJECT
7687#include "XSUB.h"
7688#endif
7689
7690/*
7691 * restore_rsfp
7692 * Restore a source filter.
7693 */
7694
7695static void
7696restore_rsfp(pTHXo_ void *f)
7697{
7698 PerlIO *fp = (PerlIO*)f;
7699
7700 if (PL_rsfp == PerlIO_stdin())
7701 PerlIO_clearerr(PL_rsfp);
7702 else if (PL_rsfp && (PL_rsfp != fp))
7703 PerlIO_close(PL_rsfp);
7704 PL_rsfp = fp;
7705}
6e3aabd6
GS
7706
7707#ifndef PERL_NO_UTF16_FILTER
7708static I32
7709utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen)
7710{
7711 I32 count = FILTER_READ(idx+1, sv, maxlen);
7712 if (count) {
7713 U8* tmps;
7714 U8* tend;
dea0fc0b 7715 I32 newlen;
6e3aabd6 7716 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
f72f5f89
JH
7717 if (!*SvPV_nolen(sv))
7718 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7719 return count;
4e553d73 7720
dea0fc0b 7721 tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
6e3aabd6
GS
7722 sv_usepvn(sv, (char*)tmps, tend - tmps);
7723 }
7724 return count;
7725}
7726
7727static I32
7728utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen)
7729{
7730 I32 count = FILTER_READ(idx+1, sv, maxlen);
7731 if (count) {
7732 U8* tmps;
7733 U8* tend;
dea0fc0b 7734 I32 newlen;
f72f5f89
JH
7735 if (!*SvPV_nolen(sv))
7736 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7737 return count;
7738
6e3aabd6 7739 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
dea0fc0b 7740 tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
6e3aabd6
GS
7741 sv_usepvn(sv, (char*)tmps, tend - tmps);
7742 }
7743 return count;
7744}
7745#endif