This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Avoid AV and HV in perlio.c by inventing PerlIO_list_t which is AV-ish
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
bc89e66f 3 * Copyright (c) 1991-2001, Larry Wall
a687059c 4 *
d48672a2
LW
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
378cc40b 7 *
a0d0e21e
LW
8 */
9
10/*
11 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
12 */
13
9cbb5ea2
GS
14/*
15 * This file is the lexer for Perl. It's closely linked to the
4e553d73 16 * parser, perly.y.
ffb4593c
NT
17 *
18 * The main routine is yylex(), which returns the next token.
19 */
20
378cc40b 21#include "EXTERN.h"
864dbfa3 22#define PERL_IN_TOKE_C
378cc40b 23#include "perl.h"
378cc40b 24
d3b6f988
GS
25#define yychar PL_yychar
26#define yylval PL_yylval
27
fc36a67e 28static char ident_too_long[] = "Identifier too long";
8903cb82 29
51371543 30static void restore_rsfp(pTHXo_ void *f);
6e3aabd6
GS
31#ifndef PERL_NO_UTF16_FILTER
32static I32 utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
33static I32 utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen);
34#endif
51371543 35
9059aa12
LW
36#define XFAKEBRACK 128
37#define XENUMMASK 127
38
2b9d42f0
NIS
39#ifdef EBCDIC
40/* For now 'use utf8' does not affect tokenizer on EBCDIC */
41#define UTF (PL_linestr && DO_UTF8(PL_linestr))
42#else
43#define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
44#endif
a0ed51b3 45
4e553d73 46/* In variables name $^X, these are the legal values for X.
2b92dfce
GS
47 * 1999-02-27 mjd-perl-patch@plover.com */
48#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
49
bf4acbe4
GS
50/* On MacOS, respect nonbreaking spaces */
51#ifdef MACOS_TRADITIONAL
52#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
53#else
54#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
55#endif
56
ffb4593c
NT
57/* LEX_* are values for PL_lex_state, the state of the lexer.
58 * They are arranged oddly so that the guard on the switch statement
79072805
LW
59 * can get by with a single comparison (if the compiler is smart enough).
60 */
61
fb73857a 62/* #define LEX_NOTPARSING 11 is done in perl.h. */
63
55497cff 64#define LEX_NORMAL 10
65#define LEX_INTERPNORMAL 9
66#define LEX_INTERPCASEMOD 8
67#define LEX_INTERPPUSH 7
68#define LEX_INTERPSTART 6
69#define LEX_INTERPEND 5
70#define LEX_INTERPENDMAYBE 4
71#define LEX_INTERPCONCAT 3
72#define LEX_INTERPCONST 2
73#define LEX_FORMLINE 1
74#define LEX_KNOWNEXT 0
79072805 75
79072805
LW
76#ifdef ff_next
77#undef ff_next
d48672a2
LW
78#endif
79
a1a0e61e 80#ifdef USE_PURE_BISON
dba4d153
JH
81# ifndef YYMAXLEVEL
82# define YYMAXLEVEL 100
83# endif
20141f0e
IRC
84YYSTYPE* yylval_pointer[YYMAXLEVEL];
85int* yychar_pointer[YYMAXLEVEL];
6f202aea 86int yyactlevel = -1;
22c35a8c
GS
87# undef yylval
88# undef yychar
20141f0e
IRC
89# define yylval (*yylval_pointer[yyactlevel])
90# define yychar (*yychar_pointer[yyactlevel])
91# define PERL_YYLEX_PARAM yylval_pointer[yyactlevel],yychar_pointer[yyactlevel]
4e553d73 92# undef yylex
dba4d153 93# define yylex() Perl_yylex_r(aTHX_ yylval_pointer[yyactlevel],yychar_pointer[yyactlevel])
a1a0e61e
TD
94#endif
95
79072805 96#include "keywords.h"
fe14fcc3 97
ffb4593c
NT
98/* CLINE is a macro that ensures PL_copline has a sane value */
99
ae986130
LW
100#ifdef CLINE
101#undef CLINE
102#endif
57843af0 103#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 104
ffb4593c
NT
105/*
106 * Convenience functions to return different tokens and prime the
9cbb5ea2 107 * lexer for the next token. They all take an argument.
ffb4593c
NT
108 *
109 * TOKEN : generic token (used for '(', DOLSHARP, etc)
110 * OPERATOR : generic operator
111 * AOPERATOR : assignment operator
112 * PREBLOCK : beginning the block after an if, while, foreach, ...
113 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
114 * PREREF : *EXPR where EXPR is not a simple identifier
115 * TERM : expression term
116 * LOOPX : loop exiting command (goto, last, dump, etc)
117 * FTST : file test operator
118 * FUN0 : zero-argument function
2d2e263d 119 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
120 * BOop : bitwise or or xor
121 * BAop : bitwise and
122 * SHop : shift operator
123 * PWop : power operator
9cbb5ea2 124 * PMop : pattern-matching operator
ffb4593c
NT
125 * Aop : addition-level operator
126 * Mop : multiplication-level operator
127 * Eop : equality-testing operator
e5edeb50 128 * Rop : relational operator <= != gt
ffb4593c
NT
129 *
130 * Also see LOP and lop() below.
131 */
132
075953c3
JH
133/* Note that REPORT() and REPORT2() will be expressions that supply
134 * their own trailing comma, not suitable for statements as such. */
998054bd 135#ifdef DEBUGGING /* Serve -DT. */
075953c3
JH
136# define REPORT(x,retval) tokereport(x,s,(int)retval),
137# define REPORT2(x,retval) tokereport(x,s, yylval.ival),
998054bd 138#else
075953c3
JH
139# define REPORT(x,retval)
140# define REPORT2(x,retval)
998054bd
SC
141#endif
142
075953c3
JH
143#define TOKEN(retval) return (REPORT2("token",retval) PL_bufptr = s,(int)retval)
144#define OPERATOR(retval) return (REPORT2("operator",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval)
145#define AOPERATOR(retval) return ao((REPORT2("aop",retval) PL_expect = XTERM, PL_bufptr = s,(int)retval))
146#define PREBLOCK(retval) return (REPORT2("preblock",retval) PL_expect = XBLOCK,PL_bufptr = s,(int)retval)
147#define PRETERMBLOCK(retval) return (REPORT2("pretermblock",retval) PL_expect = XTERMBLOCK,PL_bufptr = s,(int)retval)
148#define PREREF(retval) return (REPORT2("preref",retval) PL_expect = XREF,PL_bufptr = s,(int)retval)
149#define TERM(retval) return (CLINE, REPORT2("term",retval) PL_expect = XOPERATOR, PL_bufptr = s,(int)retval)
150#define LOOPX(f) return(yylval.ival=f, REPORT("loopx",f) PL_expect = XTERM,PL_bufptr = s,(int)LOOPEX)
151#define FTST(f) return(yylval.ival=f, REPORT("ftst",f) PL_expect = XTERM,PL_bufptr = s,(int)UNIOP)
152#define FUN0(f) return(yylval.ival = f, REPORT("fun0",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC0)
153#define FUN1(f) return(yylval.ival = f, REPORT("fun1",f) PL_expect = XOPERATOR,PL_bufptr = s,(int)FUNC1)
154#define BOop(f) return ao((yylval.ival=f, REPORT("bitorop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITOROP))
155#define BAop(f) return ao((yylval.ival=f, REPORT("bitandop",f) PL_expect = XTERM,PL_bufptr = s,(int)BITANDOP))
156#define SHop(f) return ao((yylval.ival=f, REPORT("shiftop",f) PL_expect = XTERM,PL_bufptr = s,(int)SHIFTOP))
157#define PWop(f) return ao((yylval.ival=f, REPORT("powop",f) PL_expect = XTERM,PL_bufptr = s,(int)POWOP))
158#define PMop(f) return(yylval.ival=f, REPORT("matchop",f) PL_expect = XTERM,PL_bufptr = s,(int)MATCHOP)
159#define Aop(f) return ao((yylval.ival=f, REPORT("add",f) PL_expect = XTERM,PL_bufptr = s,(int)ADDOP))
160#define Mop(f) return ao((yylval.ival=f, REPORT("mul",f) PL_expect = XTERM,PL_bufptr = s,(int)MULOP))
161#define Eop(f) return(yylval.ival=f, REPORT("eq",f) PL_expect = XTERM,PL_bufptr = s,(int)EQOP)
162#define Rop(f) return(yylval.ival=f, REPORT("rel",f) PL_expect = XTERM,PL_bufptr = s,(int)RELOP)
2f3197b3 163
a687059c
LW
164/* This bit of chicanery makes a unary function followed by
165 * a parenthesis into a function with one argument, highest precedence.
166 */
2f3197b3 167#define UNI(f) return(yylval.ival = f, \
075953c3 168 REPORT("uni",f) \
3280af22
NIS
169 PL_expect = XTERM, \
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
172 PL_last_lop_op = f, \
a687059c
LW
173 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
174
79072805 175#define UNIBRACK(f) return(yylval.ival = f, \
075953c3 176 REPORT("uni",f) \
3280af22
NIS
177 PL_bufptr = s, \
178 PL_last_uni = PL_oldbufptr, \
79072805
LW
179 (*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )
180
9f68db38 181/* grandfather return to old style */
3280af22 182#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 183
8fa7f367
JH
184#ifdef DEBUGGING
185
2d00ba3b 186STATIC void
61b2116b 187S_tokereport(pTHX_ char *thing, char* s, I32 rv)
9041c2e3 188{
998054bd 189 DEBUG_T({
9c5ffd7c 190 SV* report = newSVpv(thing, 0);
29b291f7
RB
191 Perl_sv_catpvf(aTHX_ report, ":line %d:%"IVdf":", CopLINE(PL_curcop),
192 (IV)rv);
998054bd
SC
193
194 if (s - PL_bufptr > 0)
195 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
196 else {
197 if (PL_oldbufptr && *PL_oldbufptr)
198 sv_catpv(report, PL_tokenbuf);
199 }
200 PerlIO_printf(Perl_debug_log, "### %s\n", SvPV_nolen(report));
5f80b19c 201 });
998054bd
SC
202}
203
8fa7f367
JH
204#endif
205
ffb4593c
NT
206/*
207 * S_ao
208 *
209 * This subroutine detects &&= and ||= and turns an ANDAND or OROR
210 * into an OP_ANDASSIGN or OP_ORASSIGN
211 */
212
76e3520e 213STATIC int
cea2e8a9 214S_ao(pTHX_ int toketype)
a0d0e21e 215{
3280af22
NIS
216 if (*PL_bufptr == '=') {
217 PL_bufptr++;
a0d0e21e
LW
218 if (toketype == ANDAND)
219 yylval.ival = OP_ANDASSIGN;
220 else if (toketype == OROR)
221 yylval.ival = OP_ORASSIGN;
222 toketype = ASSIGNOP;
223 }
224 return toketype;
225}
226
ffb4593c
NT
227/*
228 * S_no_op
229 * When Perl expects an operator and finds something else, no_op
230 * prints the warning. It always prints "<something> found where
231 * operator expected. It prints "Missing semicolon on previous line?"
232 * if the surprise occurs at the start of the line. "do you need to
233 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
234 * where the compiler doesn't know if foo is a method call or a function.
235 * It prints "Missing operator before end of line" if there's nothing
236 * after the missing operator, or "... before <...>" if there is something
237 * after the missing operator.
238 */
239
76e3520e 240STATIC void
cea2e8a9 241S_no_op(pTHX_ char *what, char *s)
463ee0b2 242{
3280af22
NIS
243 char *oldbp = PL_bufptr;
244 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 245
1189a94a
GS
246 if (!s)
247 s = oldbp;
07c798fb 248 else
1189a94a 249 PL_bufptr = s;
cea2e8a9 250 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
748a9306 251 if (is_first)
cea2e8a9 252 Perl_warn(aTHX_ "\t(Missing semicolon on previous line?)\n");
7e2040f0 253 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
748a9306 254 char *t;
7e2040f0 255 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
3280af22 256 if (t < PL_bufptr && isSPACE(*t))
cea2e8a9 257 Perl_warn(aTHX_ "\t(Do you need to predeclare %.*s?)\n",
3280af22 258 t - PL_oldoldbufptr, PL_oldoldbufptr);
748a9306 259 }
07c798fb
HS
260 else {
261 assert(s >= oldbp);
cea2e8a9 262 Perl_warn(aTHX_ "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
07c798fb 263 }
3280af22 264 PL_bufptr = oldbp;
8990e307
LW
265}
266
ffb4593c
NT
267/*
268 * S_missingterm
269 * Complain about missing quote/regexp/heredoc terminator.
270 * If it's called with (char *)NULL then it cauterizes the line buffer.
271 * If we're in a delimited string and the delimiter is a control
272 * character, it's reformatted into a two-char sequence like ^C.
273 * This is fatal.
274 */
275
76e3520e 276STATIC void
cea2e8a9 277S_missingterm(pTHX_ char *s)
8990e307
LW
278{
279 char tmpbuf[3];
280 char q;
281 if (s) {
282 char *nl = strrchr(s,'\n');
d2719217 283 if (nl)
8990e307
LW
284 *nl = '\0';
285 }
9d116dd7
JH
286 else if (
287#ifdef EBCDIC
288 iscntrl(PL_multi_close)
289#else
290 PL_multi_close < 32 || PL_multi_close == 127
291#endif
292 ) {
8990e307 293 *tmpbuf = '^';
3280af22 294 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
295 s = "\\n";
296 tmpbuf[2] = '\0';
297 s = tmpbuf;
298 }
299 else {
3280af22 300 *tmpbuf = PL_multi_close;
8990e307
LW
301 tmpbuf[1] = '\0';
302 s = tmpbuf;
303 }
304 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 305 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 306}
79072805 307
ffb4593c
NT
308/*
309 * Perl_deprecate
ffb4593c
NT
310 */
311
79072805 312void
864dbfa3 313Perl_deprecate(pTHX_ char *s)
a0d0e21e 314{
599cee73 315 if (ckWARN(WARN_DEPRECATED))
cea2e8a9 316 Perl_warner(aTHX_ WARN_DEPRECATED, "Use of %s is deprecated", s);
a0d0e21e
LW
317}
318
ffb4593c
NT
319/*
320 * depcom
9cbb5ea2 321 * Deprecate a comma-less variable list.
ffb4593c
NT
322 */
323
76e3520e 324STATIC void
cea2e8a9 325S_depcom(pTHX)
a0d0e21e
LW
326{
327 deprecate("comma-less variable list");
328}
329
ffb4593c 330/*
9cbb5ea2
GS
331 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
332 * utf16-to-utf8-reversed.
ffb4593c
NT
333 */
334
c39cd008
GS
335#ifdef PERL_CR_FILTER
336static void
337strip_return(SV *sv)
338{
339 register char *s = SvPVX(sv);
340 register char *e = s + SvCUR(sv);
341 /* outer loop optimized to do nothing if there are no CR-LFs */
342 while (s < e) {
343 if (*s++ == '\r' && *s == '\n') {
344 /* hit a CR-LF, need to copy the rest */
345 register char *d = s - 1;
346 *d++ = *s++;
347 while (s < e) {
348 if (*s == '\r' && s[1] == '\n')
349 s++;
350 *d++ = *s++;
351 }
352 SvCUR(sv) -= s - d;
353 return;
354 }
355 }
356}
a868473f 357
76e3520e 358STATIC I32
c39cd008 359S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 360{
c39cd008
GS
361 I32 count = FILTER_READ(idx+1, sv, maxlen);
362 if (count > 0 && !maxlen)
363 strip_return(sv);
364 return count;
a868473f
NIS
365}
366#endif
367
ffb4593c
NT
368/*
369 * Perl_lex_start
9cbb5ea2
GS
370 * Initialize variables. Uses the Perl save_stack to save its state (for
371 * recursive calls to the parser).
ffb4593c
NT
372 */
373
a0d0e21e 374void
864dbfa3 375Perl_lex_start(pTHX_ SV *line)
79072805 376{
8990e307
LW
377 char *s;
378 STRLEN len;
379
3280af22
NIS
380 SAVEI32(PL_lex_dojoin);
381 SAVEI32(PL_lex_brackets);
3280af22
NIS
382 SAVEI32(PL_lex_casemods);
383 SAVEI32(PL_lex_starts);
384 SAVEI32(PL_lex_state);
7766f137 385 SAVEVPTR(PL_lex_inpat);
3280af22 386 SAVEI32(PL_lex_inwhat);
18b09519
GS
387 if (PL_lex_state == LEX_KNOWNEXT) {
388 I32 toke = PL_nexttoke;
389 while (--toke >= 0) {
390 SAVEI32(PL_nexttype[toke]);
391 SAVEVPTR(PL_nextval[toke]);
392 }
393 SAVEI32(PL_nexttoke);
18b09519 394 }
57843af0 395 SAVECOPLINE(PL_curcop);
3280af22
NIS
396 SAVEPPTR(PL_bufptr);
397 SAVEPPTR(PL_bufend);
398 SAVEPPTR(PL_oldbufptr);
399 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
400 SAVEPPTR(PL_last_lop);
401 SAVEPPTR(PL_last_uni);
3280af22
NIS
402 SAVEPPTR(PL_linestart);
403 SAVESPTR(PL_linestr);
404 SAVEPPTR(PL_lex_brackstack);
405 SAVEPPTR(PL_lex_casestack);
c76ac1ee 406 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
407 SAVESPTR(PL_lex_stuff);
408 SAVEI32(PL_lex_defer);
09bef843 409 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 410 SAVESPTR(PL_lex_repl);
bebdddfc
GS
411 SAVEINT(PL_expect);
412 SAVEINT(PL_lex_expect);
3280af22
NIS
413
414 PL_lex_state = LEX_NORMAL;
415 PL_lex_defer = 0;
416 PL_expect = XSTATE;
417 PL_lex_brackets = 0;
3280af22
NIS
418 New(899, PL_lex_brackstack, 120, char);
419 New(899, PL_lex_casestack, 12, char);
420 SAVEFREEPV(PL_lex_brackstack);
421 SAVEFREEPV(PL_lex_casestack);
422 PL_lex_casemods = 0;
423 *PL_lex_casestack = '\0';
424 PL_lex_dojoin = 0;
425 PL_lex_starts = 0;
426 PL_lex_stuff = Nullsv;
427 PL_lex_repl = Nullsv;
428 PL_lex_inpat = 0;
76be56bc 429 PL_nexttoke = 0;
3280af22 430 PL_lex_inwhat = 0;
09bef843 431 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
432 PL_linestr = line;
433 if (SvREADONLY(PL_linestr))
434 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
435 s = SvPV(PL_linestr, len);
8990e307 436 if (len && s[len-1] != ';') {
3280af22
NIS
437 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
438 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
439 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 440 }
3280af22
NIS
441 SvTEMP_off(PL_linestr);
442 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
443 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 444 PL_last_lop = PL_last_uni = Nullch;
3280af22 445 SvREFCNT_dec(PL_rs);
79cb57f6 446 PL_rs = newSVpvn("\n", 1);
3280af22 447 PL_rsfp = 0;
79072805 448}
a687059c 449
ffb4593c
NT
450/*
451 * Perl_lex_end
9cbb5ea2
GS
452 * Finalizer for lexing operations. Must be called when the parser is
453 * done with the lexer.
ffb4593c
NT
454 */
455
463ee0b2 456void
864dbfa3 457Perl_lex_end(pTHX)
463ee0b2 458{
3280af22 459 PL_doextract = FALSE;
463ee0b2
LW
460}
461
ffb4593c
NT
462/*
463 * S_incline
464 * This subroutine has nothing to do with tilting, whether at windmills
465 * or pinball tables. Its name is short for "increment line". It
57843af0 466 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 467 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
468 * # line 500 "foo.pm"
469 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
470 */
471
76e3520e 472STATIC void
cea2e8a9 473S_incline(pTHX_ char *s)
463ee0b2
LW
474{
475 char *t;
476 char *n;
73659bf1 477 char *e;
463ee0b2 478 char ch;
463ee0b2 479
57843af0 480 CopLINE_inc(PL_curcop);
463ee0b2
LW
481 if (*s++ != '#')
482 return;
bf4acbe4 483 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
484 if (strnEQ(s, "line", 4))
485 s += 4;
486 else
487 return;
084592ab 488 if (SPACE_OR_TAB(*s))
73659bf1 489 s++;
4e553d73 490 else
73659bf1 491 return;
bf4acbe4 492 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
493 if (!isDIGIT(*s))
494 return;
495 n = s;
496 while (isDIGIT(*s))
497 s++;
bf4acbe4 498 while (SPACE_OR_TAB(*s))
463ee0b2 499 s++;
73659bf1 500 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 501 s++;
73659bf1
GS
502 e = t + 1;
503 }
463ee0b2 504 else {
463ee0b2 505 for (t = s; !isSPACE(*t); t++) ;
73659bf1 506 e = t;
463ee0b2 507 }
bf4acbe4 508 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
509 e++;
510 if (*e != '\n' && *e != '\0')
511 return; /* false alarm */
512
463ee0b2
LW
513 ch = *t;
514 *t = '\0';
f4dd75d9
GS
515 if (t - s > 0) {
516#ifdef USE_ITHREADS
517 Safefree(CopFILE(PL_curcop));
518#else
519 SvREFCNT_dec(CopFILEGV(PL_curcop));
520#endif
57843af0 521 CopFILE_set(PL_curcop, s);
f4dd75d9 522 }
463ee0b2 523 *t = ch;
57843af0 524 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
525}
526
ffb4593c
NT
527/*
528 * S_skipspace
529 * Called to gobble the appropriate amount and type of whitespace.
530 * Skips comments as well.
531 */
532
76e3520e 533STATIC char *
cea2e8a9 534S_skipspace(pTHX_ register char *s)
a687059c 535{
3280af22 536 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 537 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
538 s++;
539 return s;
540 }
541 for (;;) {
fd049845 542 STRLEN prevlen;
09bef843 543 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 544 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
545 while (s < PL_bufend && isSPACE(*s)) {
546 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
547 incline(s);
548 }
ffb4593c
NT
549
550 /* comment */
3280af22
NIS
551 if (s < PL_bufend && *s == '#') {
552 while (s < PL_bufend && *s != '\n')
463ee0b2 553 s++;
60e6418e 554 if (s < PL_bufend) {
463ee0b2 555 s++;
60e6418e
GS
556 if (PL_in_eval && !PL_rsfp) {
557 incline(s);
558 continue;
559 }
560 }
463ee0b2 561 }
ffb4593c
NT
562
563 /* only continue to recharge the buffer if we're at the end
564 * of the buffer, we're not reading from a source filter, and
565 * we're in normal lexing mode
566 */
09bef843
SB
567 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
568 PL_lex_state == LEX_FORMLINE)
463ee0b2 569 return s;
ffb4593c
NT
570
571 /* try to recharge the buffer */
9cbb5ea2
GS
572 if ((s = filter_gets(PL_linestr, PL_rsfp,
573 (prevlen = SvCUR(PL_linestr)))) == Nullch)
574 {
575 /* end of file. Add on the -p or -n magic */
3280af22
NIS
576 if (PL_minus_n || PL_minus_p) {
577 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
578 ";}continue{print or die qq(-p destination: $!\\n)" :
579 "");
3280af22
NIS
580 sv_catpv(PL_linestr,";}");
581 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
582 }
583 else
3280af22 584 sv_setpv(PL_linestr,";");
ffb4593c
NT
585
586 /* reset variables for next time we lex */
9cbb5ea2
GS
587 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
588 = SvPVX(PL_linestr);
3280af22 589 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 590 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
591
592 /* Close the filehandle. Could be from -P preprocessor,
593 * STDIN, or a regular file. If we were reading code from
594 * STDIN (because the commandline held no -e or filename)
595 * then we don't close it, we reset it so the code can
596 * read from STDIN too.
597 */
598
3280af22
NIS
599 if (PL_preprocess && !PL_in_eval)
600 (void)PerlProc_pclose(PL_rsfp);
601 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
602 PerlIO_clearerr(PL_rsfp);
8990e307 603 else
3280af22
NIS
604 (void)PerlIO_close(PL_rsfp);
605 PL_rsfp = Nullfp;
463ee0b2
LW
606 return s;
607 }
ffb4593c
NT
608
609 /* not at end of file, so we only read another line */
09bef843
SB
610 /* make corresponding updates to old pointers, for yyerror() */
611 oldprevlen = PL_oldbufptr - PL_bufend;
612 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
613 if (PL_last_uni)
614 oldunilen = PL_last_uni - PL_bufend;
615 if (PL_last_lop)
616 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
617 PL_linestart = PL_bufptr = s + prevlen;
618 PL_bufend = s + SvCUR(PL_linestr);
619 s = PL_bufptr;
09bef843
SB
620 PL_oldbufptr = s + oldprevlen;
621 PL_oldoldbufptr = s + oldoldprevlen;
622 if (PL_last_uni)
623 PL_last_uni = s + oldunilen;
624 if (PL_last_lop)
625 PL_last_lop = s + oldloplen;
a0d0e21e 626 incline(s);
ffb4593c
NT
627
628 /* debugger active and we're not compiling the debugger code,
629 * so store the line into the debugger's array of lines
630 */
3280af22 631 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
632 SV *sv = NEWSV(85,0);
633
634 sv_upgrade(sv, SVt_PVMG);
3280af22 635 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
57843af0 636 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 637 }
463ee0b2 638 }
a687059c 639}
378cc40b 640
ffb4593c
NT
641/*
642 * S_check_uni
643 * Check the unary operators to ensure there's no ambiguity in how they're
644 * used. An ambiguous piece of code would be:
645 * rand + 5
646 * This doesn't mean rand() + 5. Because rand() is a unary operator,
647 * the +5 is its argument.
648 */
649
76e3520e 650STATIC void
cea2e8a9 651S_check_uni(pTHX)
ba106d47 652{
2f3197b3 653 char *s;
a0d0e21e 654 char *t;
2f3197b3 655
3280af22 656 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 657 return;
3280af22
NIS
658 while (isSPACE(*PL_last_uni))
659 PL_last_uni++;
7e2040f0 660 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 661 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 662 return;
0453d815 663 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 664 char ch = *s;
0453d815 665 *s = '\0';
4e553d73
NIS
666 Perl_warner(aTHX_ WARN_AMBIGUOUS,
667 "Warning: Use of \"%s\" without parens is ambiguous",
0453d815
PM
668 PL_last_uni);
669 *s = ch;
670 }
2f3197b3
LW
671}
672
ffb4593c
NT
673/* workaround to replace the UNI() macro with a function. Only the
674 * hints/uts.sh file mentions this. Other comments elsewhere in the
675 * source indicate Microport Unix might need it too.
676 */
677
ffed7fef
LW
678#ifdef CRIPPLED_CC
679
680#undef UNI
ffed7fef 681#define UNI(f) return uni(f,s)
ffed7fef 682
76e3520e 683STATIC int
cea2e8a9 684S_uni(pTHX_ I32 f, char *s)
ffed7fef
LW
685{
686 yylval.ival = f;
3280af22
NIS
687 PL_expect = XTERM;
688 PL_bufptr = s;
8f872242
NIS
689 PL_last_uni = PL_oldbufptr;
690 PL_last_lop_op = f;
ffed7fef
LW
691 if (*s == '(')
692 return FUNC1;
693 s = skipspace(s);
694 if (*s == '(')
695 return FUNC1;
696 else
697 return UNIOP;
698}
699
a0d0e21e
LW
700#endif /* CRIPPLED_CC */
701
ffb4593c
NT
702/*
703 * LOP : macro to build a list operator. Its behaviour has been replaced
704 * with a subroutine, S_lop() for which LOP is just another name.
705 */
706
a0d0e21e
LW
707#define LOP(f,x) return lop(f,x,s)
708
ffb4593c
NT
709/*
710 * S_lop
711 * Build a list operator (or something that might be one). The rules:
712 * - if we have a next token, then it's a list operator [why?]
713 * - if the next thing is an opening paren, then it's a function
714 * - else it's a list operator
715 */
716
76e3520e 717STATIC I32
a0be28da 718S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 719{
79072805 720 yylval.ival = f;
35c8bce7 721 CLINE;
075953c3 722 REPORT("lop", f)
3280af22
NIS
723 PL_expect = x;
724 PL_bufptr = s;
725 PL_last_lop = PL_oldbufptr;
726 PL_last_lop_op = f;
727 if (PL_nexttoke)
a0d0e21e 728 return LSTOP;
79072805
LW
729 if (*s == '(')
730 return FUNC;
731 s = skipspace(s);
732 if (*s == '(')
733 return FUNC;
734 else
735 return LSTOP;
736}
737
ffb4593c
NT
738/*
739 * S_force_next
9cbb5ea2 740 * When the lexer realizes it knows the next token (for instance,
ffb4593c 741 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
742 * to know what token to return the next time the lexer is called. Caller
743 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
744 * handles the token correctly.
ffb4593c
NT
745 */
746
4e553d73 747STATIC void
cea2e8a9 748S_force_next(pTHX_ I32 type)
79072805 749{
3280af22
NIS
750 PL_nexttype[PL_nexttoke] = type;
751 PL_nexttoke++;
752 if (PL_lex_state != LEX_KNOWNEXT) {
753 PL_lex_defer = PL_lex_state;
754 PL_lex_expect = PL_expect;
755 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
756 }
757}
758
ffb4593c
NT
759/*
760 * S_force_word
761 * When the lexer knows the next thing is a word (for instance, it has
762 * just seen -> and it knows that the next char is a word char, then
763 * it calls S_force_word to stick the next word into the PL_next lookahead.
764 *
765 * Arguments:
b1b65b59 766 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
767 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
768 * int check_keyword : if true, Perl checks to make sure the word isn't
769 * a keyword (do this if the word is a label, e.g. goto FOO)
770 * int allow_pack : if true, : characters will also be allowed (require,
771 * use, etc. do this)
9cbb5ea2 772 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
773 */
774
76e3520e 775STATIC char *
cea2e8a9 776S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 777{
463ee0b2
LW
778 register char *s;
779 STRLEN len;
4e553d73 780
463ee0b2
LW
781 start = skipspace(start);
782 s = start;
7e2040f0 783 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 784 (allow_pack && *s == ':') ||
15f0808c 785 (allow_initial_tick && *s == '\'') )
a0d0e21e 786 {
3280af22
NIS
787 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
788 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
789 return start;
790 if (token == METHOD) {
791 s = skipspace(s);
792 if (*s == '(')
3280af22 793 PL_expect = XTERM;
463ee0b2 794 else {
3280af22 795 PL_expect = XOPERATOR;
463ee0b2 796 }
79072805 797 }
3280af22
NIS
798 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
799 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
800 force_next(token);
801 }
802 return s;
803}
804
ffb4593c
NT
805/*
806 * S_force_ident
9cbb5ea2 807 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
808 * text only contains the "foo" portion. The first argument is a pointer
809 * to the "foo", and the second argument is the type symbol to prefix.
810 * Forces the next token to be a "WORD".
9cbb5ea2 811 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
812 */
813
76e3520e 814STATIC void
cea2e8a9 815S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
816{
817 if (s && *s) {
11343788 818 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 819 PL_nextval[PL_nexttoke].opval = o;
79072805 820 force_next(WORD);
748a9306 821 if (kind) {
11343788 822 o->op_private = OPpCONST_ENTERED;
55497cff 823 /* XXX see note in pp_entereval() for why we forgo typo
824 warnings if the symbol must be introduced in an eval.
825 GSAR 96-10-12 */
3280af22 826 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
827 kind == '$' ? SVt_PV :
828 kind == '@' ? SVt_PVAV :
829 kind == '%' ? SVt_PVHV :
830 SVt_PVGV
831 );
748a9306 832 }
79072805
LW
833 }
834}
835
1571675a
GS
836NV
837Perl_str_to_version(pTHX_ SV *sv)
838{
839 NV retval = 0.0;
840 NV nshift = 1.0;
841 STRLEN len;
842 char *start = SvPVx(sv,len);
3aa33fe5 843 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
844 char *end = start + len;
845 while (start < end) {
ba210ebe 846 STRLEN skip;
1571675a
GS
847 UV n;
848 if (utf)
9041c2e3 849 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
850 else {
851 n = *(U8*)start;
852 skip = 1;
853 }
854 retval += ((NV)n)/nshift;
855 start += skip;
856 nshift *= 1000;
857 }
858 return retval;
859}
860
4e553d73 861/*
ffb4593c
NT
862 * S_force_version
863 * Forces the next token to be a version number.
864 */
865
76e3520e 866STATIC char *
cea2e8a9 867S_force_version(pTHX_ char *s)
89bfa8cd 868{
869 OP *version = Nullop;
44dcb63b 870 char *d;
89bfa8cd 871
872 s = skipspace(s);
873
44dcb63b 874 d = s;
dd629d5b 875 if (*d == 'v')
44dcb63b 876 d++;
44dcb63b 877 if (isDIGIT(*d)) {
a7cb1f99 878 for (; isDIGIT(*d) || *d == '_' || *d == '.'; d++);
9f3d182e 879 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 880 SV *ver;
b73d6f50 881 s = scan_num(s, &yylval);
89bfa8cd 882 version = yylval.opval;
dd629d5b
GS
883 ver = cSVOPx(version)->op_sv;
884 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 885 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
886 SvNVX(ver) = str_to_version(ver);
887 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 888 }
89bfa8cd 889 }
890 }
891
892 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 893 PL_nextval[PL_nexttoke].opval = version;
4e553d73 894 force_next(WORD);
89bfa8cd 895
896 return (s);
897}
898
ffb4593c
NT
899/*
900 * S_tokeq
901 * Tokenize a quoted string passed in as an SV. It finds the next
902 * chunk, up to end of string or a backslash. It may make a new
903 * SV containing that chunk (if HINT_NEW_STRING is on). It also
904 * turns \\ into \.
905 */
906
76e3520e 907STATIC SV *
cea2e8a9 908S_tokeq(pTHX_ SV *sv)
79072805
LW
909{
910 register char *s;
911 register char *send;
912 register char *d;
b3ac6de7
IZ
913 STRLEN len = 0;
914 SV *pv = sv;
79072805
LW
915
916 if (!SvLEN(sv))
b3ac6de7 917 goto finish;
79072805 918
a0d0e21e 919 s = SvPV_force(sv, len);
21a311ee 920 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 921 goto finish;
463ee0b2 922 send = s + len;
79072805
LW
923 while (s < send && *s != '\\')
924 s++;
925 if (s == send)
b3ac6de7 926 goto finish;
79072805 927 d = s;
be4731d2 928 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 929 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
930 if (SvUTF8(sv))
931 SvUTF8_on(pv);
932 }
79072805
LW
933 while (s < send) {
934 if (*s == '\\') {
a0d0e21e 935 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
936 s++; /* all that, just for this */
937 }
938 *d++ = *s++;
939 }
940 *d = '\0';
463ee0b2 941 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 942 finish:
3280af22 943 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 944 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
945 return sv;
946}
947
ffb4593c
NT
948/*
949 * Now come three functions related to double-quote context,
950 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
951 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
952 * interact with PL_lex_state, and create fake ( ... ) argument lists
953 * to handle functions and concatenation.
954 * They assume that whoever calls them will be setting up a fake
955 * join call, because each subthing puts a ',' after it. This lets
956 * "lower \luPpEr"
957 * become
958 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
959 *
960 * (I'm not sure whether the spurious commas at the end of lcfirst's
961 * arguments and join's arguments are created or not).
962 */
963
964/*
965 * S_sublex_start
966 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
967 *
968 * Pattern matching will set PL_lex_op to the pattern-matching op to
969 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
970 *
971 * OP_CONST and OP_READLINE are easy--just make the new op and return.
972 *
973 * Everything else becomes a FUNC.
974 *
975 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
976 * had an OP_CONST or OP_READLINE). This just sets us up for a
977 * call to S_sublex_push().
978 */
979
76e3520e 980STATIC I32
cea2e8a9 981S_sublex_start(pTHX)
79072805
LW
982{
983 register I32 op_type = yylval.ival;
79072805
LW
984
985 if (op_type == OP_NULL) {
3280af22
NIS
986 yylval.opval = PL_lex_op;
987 PL_lex_op = Nullop;
79072805
LW
988 return THING;
989 }
990 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 991 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
992
993 if (SvTYPE(sv) == SVt_PVIV) {
994 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
995 STRLEN len;
996 char *p;
997 SV *nsv;
998
999 p = SvPV(sv, len);
79cb57f6 1000 nsv = newSVpvn(p, len);
01ec43d0
GS
1001 if (SvUTF8(sv))
1002 SvUTF8_on(nsv);
b3ac6de7
IZ
1003 SvREFCNT_dec(sv);
1004 sv = nsv;
4e553d73 1005 }
b3ac6de7 1006 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1007 PL_lex_stuff = Nullsv;
79072805
LW
1008 return THING;
1009 }
1010
3280af22
NIS
1011 PL_sublex_info.super_state = PL_lex_state;
1012 PL_sublex_info.sub_inwhat = op_type;
1013 PL_sublex_info.sub_op = PL_lex_op;
1014 PL_lex_state = LEX_INTERPPUSH;
55497cff 1015
3280af22
NIS
1016 PL_expect = XTERM;
1017 if (PL_lex_op) {
1018 yylval.opval = PL_lex_op;
1019 PL_lex_op = Nullop;
55497cff 1020 return PMFUNC;
1021 }
1022 else
1023 return FUNC;
1024}
1025
ffb4593c
NT
1026/*
1027 * S_sublex_push
1028 * Create a new scope to save the lexing state. The scope will be
1029 * ended in S_sublex_done. Returns a '(', starting the function arguments
1030 * to the uc, lc, etc. found before.
1031 * Sets PL_lex_state to LEX_INTERPCONCAT.
1032 */
1033
76e3520e 1034STATIC I32
cea2e8a9 1035S_sublex_push(pTHX)
55497cff 1036{
f46d017c 1037 ENTER;
55497cff 1038
3280af22
NIS
1039 PL_lex_state = PL_sublex_info.super_state;
1040 SAVEI32(PL_lex_dojoin);
1041 SAVEI32(PL_lex_brackets);
3280af22
NIS
1042 SAVEI32(PL_lex_casemods);
1043 SAVEI32(PL_lex_starts);
1044 SAVEI32(PL_lex_state);
7766f137 1045 SAVEVPTR(PL_lex_inpat);
3280af22 1046 SAVEI32(PL_lex_inwhat);
57843af0 1047 SAVECOPLINE(PL_curcop);
3280af22 1048 SAVEPPTR(PL_bufptr);
8452ff4b 1049 SAVEPPTR(PL_bufend);
3280af22
NIS
1050 SAVEPPTR(PL_oldbufptr);
1051 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1052 SAVEPPTR(PL_last_lop);
1053 SAVEPPTR(PL_last_uni);
3280af22
NIS
1054 SAVEPPTR(PL_linestart);
1055 SAVESPTR(PL_linestr);
1056 SAVEPPTR(PL_lex_brackstack);
1057 SAVEPPTR(PL_lex_casestack);
1058
1059 PL_linestr = PL_lex_stuff;
1060 PL_lex_stuff = Nullsv;
1061
9cbb5ea2
GS
1062 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1063 = SvPVX(PL_linestr);
3280af22 1064 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1065 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1066 SAVEFREESV(PL_linestr);
1067
1068 PL_lex_dojoin = FALSE;
1069 PL_lex_brackets = 0;
3280af22
NIS
1070 New(899, PL_lex_brackstack, 120, char);
1071 New(899, PL_lex_casestack, 12, char);
1072 SAVEFREEPV(PL_lex_brackstack);
1073 SAVEFREEPV(PL_lex_casestack);
1074 PL_lex_casemods = 0;
1075 *PL_lex_casestack = '\0';
1076 PL_lex_starts = 0;
1077 PL_lex_state = LEX_INTERPCONCAT;
57843af0 1078 CopLINE_set(PL_curcop, PL_multi_start);
3280af22
NIS
1079
1080 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1081 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1082 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1083 else
3280af22 1084 PL_lex_inpat = Nullop;
79072805 1085
55497cff 1086 return '(';
79072805
LW
1087}
1088
ffb4593c
NT
1089/*
1090 * S_sublex_done
1091 * Restores lexer state after a S_sublex_push.
1092 */
1093
76e3520e 1094STATIC I32
cea2e8a9 1095S_sublex_done(pTHX)
79072805 1096{
3280af22 1097 if (!PL_lex_starts++) {
9aa983d2
JH
1098 SV *sv = newSVpvn("",0);
1099 if (SvUTF8(PL_linestr))
1100 SvUTF8_on(sv);
3280af22 1101 PL_expect = XOPERATOR;
9aa983d2 1102 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1103 return THING;
1104 }
1105
3280af22
NIS
1106 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1107 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1108 return yylex();
79072805
LW
1109 }
1110
ffb4593c 1111 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1112 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1113 PL_linestr = PL_lex_repl;
1114 PL_lex_inpat = 0;
1115 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1116 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1117 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1118 SAVEFREESV(PL_linestr);
1119 PL_lex_dojoin = FALSE;
1120 PL_lex_brackets = 0;
3280af22
NIS
1121 PL_lex_casemods = 0;
1122 *PL_lex_casestack = '\0';
1123 PL_lex_starts = 0;
25da4f38 1124 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1125 PL_lex_state = LEX_INTERPNORMAL;
1126 PL_lex_starts++;
e9fa98b2
HS
1127 /* we don't clear PL_lex_repl here, so that we can check later
1128 whether this is an evalled subst; that means we rely on the
1129 logic to ensure sublex_done() is called again only via the
1130 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1131 }
e9fa98b2 1132 else {
3280af22 1133 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1134 PL_lex_repl = Nullsv;
1135 }
79072805 1136 return ',';
ffed7fef
LW
1137 }
1138 else {
f46d017c 1139 LEAVE;
3280af22
NIS
1140 PL_bufend = SvPVX(PL_linestr);
1141 PL_bufend += SvCUR(PL_linestr);
1142 PL_expect = XOPERATOR;
09bef843 1143 PL_sublex_info.sub_inwhat = 0;
79072805 1144 return ')';
ffed7fef
LW
1145 }
1146}
1147
02aa26ce
NT
1148/*
1149 scan_const
1150
1151 Extracts a pattern, double-quoted string, or transliteration. This
1152 is terrifying code.
1153
3280af22
NIS
1154 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1155 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1156 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1157
9b599b2a
GS
1158 Returns a pointer to the character scanned up to. Iff this is
1159 advanced from the start pointer supplied (ie if anything was
1160 successfully parsed), will leave an OP for the substring scanned
1161 in yylval. Caller must intuit reason for not parsing further
1162 by looking at the next characters herself.
1163
02aa26ce
NT
1164 In patterns:
1165 backslashes:
1166 double-quoted style: \r and \n
1167 regexp special ones: \D \s
1168 constants: \x3
1169 backrefs: \1 (deprecated in substitution replacements)
1170 case and quoting: \U \Q \E
1171 stops on @ and $, but not for $ as tail anchor
1172
1173 In transliterations:
1174 characters are VERY literal, except for - not at the start or end
1175 of the string, which indicates a range. scan_const expands the
1176 range to the full set of intermediate characters.
1177
1178 In double-quoted strings:
1179 backslashes:
1180 double-quoted style: \r and \n
1181 constants: \x3
1182 backrefs: \1 (deprecated)
1183 case and quoting: \U \Q \E
1184 stops on @ and $
1185
1186 scan_const does *not* construct ops to handle interpolated strings.
1187 It stops processing as soon as it finds an embedded $ or @ variable
1188 and leaves it to the caller to work out what's going on.
1189
1190 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @:foo.
1191
1192 $ in pattern could be $foo or could be tail anchor. Assumption:
1193 it's a tail anchor if $ is the last thing in the string, or if it's
1194 followed by one of ")| \n\t"
1195
1196 \1 (backreferences) are turned into $1
1197
1198 The structure of the code is
1199 while (there's a character to process) {
1200 handle transliteration ranges
1201 skip regexp comments
1202 skip # initiated comments in //x patterns
1203 check for embedded @foo
1204 check for embedded scalars
1205 if (backslash) {
1206 leave intact backslashes from leave (below)
1207 deprecate \1 in strings and sub replacements
1208 handle string-changing backslashes \l \U \Q \E, etc.
1209 switch (what was escaped) {
1210 handle - in a transliteration (becomes a literal -)
1211 handle \132 octal characters
1212 handle 0x15 hex characters
1213 handle \cV (control V)
1214 handle printf backslashes (\f, \r, \n, etc)
1215 } (end switch)
1216 } (end if backslash)
1217 } (end while character to read)
4e553d73 1218
02aa26ce
NT
1219*/
1220
76e3520e 1221STATIC char *
cea2e8a9 1222S_scan_const(pTHX_ char *start)
79072805 1223{
3280af22 1224 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1225 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1226 register char *s = start; /* start of the constant */
1227 register char *d = SvPVX(sv); /* destination for copies */
1228 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1229 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1230 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1231 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1232 UV uv;
1233
dff6d3cd 1234 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1235 PL_lex_inpat
4a2d328f 1236 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1237 : "";
79072805 1238
2b9d42f0
NIS
1239 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1240 /* If we are doing a trans and we know we want UTF8 set expectation */
1241 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1242 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1243 }
1244
1245
79072805 1246 while (s < send || dorange) {
02aa26ce 1247 /* get transliterations out of the way (they're most literal) */
3280af22 1248 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1249 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1250 if (dorange) {
1ba5c669
JH
1251 I32 i; /* current expanded character */
1252 I32 min; /* first character in range */
1253 I32 max; /* last character in range */
02aa26ce 1254
2b9d42f0 1255 if (has_utf8) {
8973db79
JH
1256 char *c = (char*)utf8_hop((U8*)d, -1);
1257 char *e = d++;
1258 while (e-- > c)
1259 *(e + 1) = *e;
2b9d42f0 1260 *c = UTF_TO_NATIVE(0xff);
8973db79
JH
1261 /* mark the range as done, and continue */
1262 dorange = FALSE;
1263 didrange = TRUE;
1264 continue;
1265 }
2b9d42f0 1266
02aa26ce 1267 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1268 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1269 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1270 d -= 2; /* eat the first char and the - */
1271
8ada0baa
JH
1272 min = (U8)*d; /* first char in range */
1273 max = (U8)d[1]; /* last char in range */
1274
c2e66d9e 1275 if (min > max) {
01ec43d0 1276 Perl_croak(aTHX_
1ba5c669
JH
1277 "Invalid [] range \"%c-%c\" in transliteration operator",
1278 (char)min, (char)max);
c2e66d9e
GS
1279 }
1280
c7f1f016 1281#ifdef EBCDIC
8ada0baa
JH
1282 if ((isLOWER(min) && isLOWER(max)) ||
1283 (isUPPER(min) && isUPPER(max))) {
1284 if (isLOWER(min)) {
1285 for (i = min; i <= max; i++)
1286 if (isLOWER(i))
db42d148 1287 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1288 } else {
1289 for (i = min; i <= max; i++)
1290 if (isUPPER(i))
db42d148 1291 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1292 }
1293 }
1294 else
1295#endif
1296 for (i = min; i <= max; i++)
1297 *d++ = i;
02aa26ce
NT
1298
1299 /* mark the range as done, and continue */
79072805 1300 dorange = FALSE;
01ec43d0 1301 didrange = TRUE;
79072805 1302 continue;
4e553d73 1303 }
02aa26ce
NT
1304
1305 /* range begins (ignore - as first or last char) */
79072805 1306 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1307 if (didrange) {
1fafa243 1308 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1309 }
2b9d42f0
NIS
1310 if (has_utf8) {
1311 *d++ = UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1312 s++;
1313 continue;
1314 }
79072805
LW
1315 dorange = TRUE;
1316 s++;
01ec43d0
GS
1317 }
1318 else {
1319 didrange = FALSE;
1320 }
79072805 1321 }
02aa26ce
NT
1322
1323 /* if we get here, we're not doing a transliteration */
1324
0f5d15d6
IZ
1325 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1326 except for the last char, which will be done separately. */
3280af22 1327 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395
IZ
1328 if (s[2] == '#') {
1329 while (s < send && *s != ')')
db42d148 1330 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1331 }
1332 else if (s[2] == '{' /* This should match regcomp.c */
1333 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1334 {
cc6b7395 1335 I32 count = 1;
0f5d15d6 1336 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1337 char c;
1338
d9f97599
GS
1339 while (count && (c = *regparse)) {
1340 if (c == '\\' && regparse[1])
1341 regparse++;
4e553d73 1342 else if (c == '{')
cc6b7395 1343 count++;
4e553d73 1344 else if (c == '}')
cc6b7395 1345 count--;
d9f97599 1346 regparse++;
cc6b7395 1347 }
5bdf89e7
IZ
1348 if (*regparse != ')') {
1349 regparse--; /* Leave one char for continuation. */
cc6b7395 1350 yyerror("Sequence (?{...}) not terminated or not {}-balanced");
5bdf89e7 1351 }
0f5d15d6 1352 while (s < regparse)
db42d148 1353 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1354 }
748a9306 1355 }
02aa26ce
NT
1356
1357 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1358 else if (*s == '#' && PL_lex_inpat &&
1359 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1360 while (s+1 < send && *s != '\n')
db42d148 1361 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1362 }
02aa26ce 1363
5d1d4326
JH
1364 /* check for embedded arrays
1365 (@foo, @:foo, @'foo, @{foo}, @$foo, @+, @-)
1366 */
7e2040f0 1367 else if (*s == '@' && s[1]
5d1d4326 1368 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1369 break;
02aa26ce
NT
1370
1371 /* check for embedded scalars. only stop if we're sure it's a
1372 variable.
1373 */
79072805 1374 else if (*s == '$') {
3280af22 1375 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1376 break;
c277df42 1377 if (s + 1 < send && !strchr("()| \n\t", s[1]))
79072805
LW
1378 break; /* in regexp, $ might be tail anchor */
1379 }
02aa26ce 1380
2b9d42f0
NIS
1381 /* End of else if chain - OP_TRANS rejoin rest */
1382
02aa26ce 1383 /* backslashes */
79072805
LW
1384 if (*s == '\\' && s+1 < send) {
1385 s++;
02aa26ce
NT
1386
1387 /* some backslashes we leave behind */
c9f97d15 1388 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1389 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1390 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1391 continue;
1392 }
02aa26ce
NT
1393
1394 /* deprecate \1 in strings and substitution replacements */
3280af22 1395 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1396 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1397 {
599cee73 1398 if (ckWARN(WARN_SYNTAX))
cea2e8a9 1399 Perl_warner(aTHX_ WARN_SYNTAX, "\\%c better written as $%c", *s, *s);
79072805
LW
1400 *--s = '$';
1401 break;
1402 }
02aa26ce
NT
1403
1404 /* string-change backslash escapes */
3280af22 1405 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1406 --s;
1407 break;
1408 }
02aa26ce
NT
1409
1410 /* if we get here, it's either a quoted -, or a digit */
79072805 1411 switch (*s) {
02aa26ce
NT
1412
1413 /* quoted - in transliterations */
79072805 1414 case '-':
3280af22 1415 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1416 *d++ = *s++;
1417 continue;
1418 }
1419 /* FALL THROUGH */
1420 default:
11b8faa4 1421 {
7e84c16c 1422 if (ckWARN(WARN_MISC) && isALNUM(*s))
4e553d73 1423 Perl_warner(aTHX_ WARN_MISC,
11b8faa4
JH
1424 "Unrecognized escape \\%c passed through",
1425 *s);
1426 /* default action is to copy the quoted character */
f9a63242 1427 goto default_action;
11b8faa4 1428 }
02aa26ce
NT
1429
1430 /* \132 indicates an octal constant */
79072805
LW
1431 case '0': case '1': case '2': case '3':
1432 case '4': case '5': case '6': case '7':
ba210ebe
JH
1433 {
1434 STRLEN len = 0; /* disallow underscores */
1435 uv = (UV)scan_oct(s, 3, &len);
1436 s += len;
1437 }
012bcf8d 1438 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1439
1440 /* \x24 indicates a hex constant */
79072805 1441 case 'x':
a0ed51b3
LW
1442 ++s;
1443 if (*s == '{') {
1444 char* e = strchr(s, '}');
355860ce
HS
1445 STRLEN len = 1; /* allow underscores */
1446
adaeee49 1447 if (!e) {
a0ed51b3 1448 yyerror("Missing right brace on \\x{}");
355860ce
HS
1449 ++s;
1450 continue;
ba210ebe 1451 }
355860ce 1452 uv = (UV)scan_hex(s + 1, e - s - 1, &len);
ba210ebe 1453 s = e + 1;
a0ed51b3
LW
1454 }
1455 else {
ba210ebe
JH
1456 {
1457 STRLEN len = 0; /* disallow underscores */
1458 uv = (UV)scan_hex(s, 2, &len);
1459 s += len;
1460 }
012bcf8d
GS
1461 }
1462
1463 NUM_ESCAPE_INSERT:
1464 /* Insert oct or hex escaped character.
301d3d20 1465 * There will always enough room in sv since such
db42d148 1466 * escapes will be longer than any UTF-8 sequence
301d3d20 1467 * they can end up as. */
ba7cea30 1468
c7f1f016
NIS
1469 /* We need to map to chars to ASCII before doing the tests
1470 to cover EBCDIC
1471 */
c4d5f83a 1472 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1473 if (!has_utf8 && uv > 255) {
301d3d20
JH
1474 /* Might need to recode whatever we have
1475 * accumulated so far if it contains any
1476 * hibit chars.
1477 *
1478 * (Can't we keep track of that and avoid
1479 * this rescan? --jhi)
012bcf8d 1480 */
c7f1f016 1481 int hicount = 0;
63cd0674
NIS
1482 U8 *c;
1483 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1484 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1485 hicount++;
db42d148 1486 }
012bcf8d 1487 }
63cd0674 1488 if (hicount) {
db42d148
NIS
1489 STRLEN offset = d - SvPVX(sv);
1490 U8 *src, *dst;
1491 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1492 src = (U8 *)d - 1;
1493 dst = src+hicount;
1494 d += hicount;
1495 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1496 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1497 U8 ch = NATIVE_TO_ASCII(*src);
db42d148
NIS
1498 *dst-- = UTF8_EIGHT_BIT_LO(ch);
1499 *dst-- = UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1500 }
1501 else {
63cd0674 1502 *dst-- = *src;
012bcf8d 1503 }
c7f1f016 1504 src--;
012bcf8d
GS
1505 }
1506 }
1507 }
1508
9aa983d2 1509 if (has_utf8 || uv > 255) {
9041c2e3 1510 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1511 has_utf8 = TRUE;
f9a63242
JH
1512 if (PL_lex_inwhat == OP_TRANS &&
1513 PL_sublex_info.sub_op) {
1514 PL_sublex_info.sub_op->op_private |=
1515 (PL_lex_repl ? OPpTRANS_FROM_UTF
1516 : OPpTRANS_TO_UTF);
f9a63242 1517 }
012bcf8d 1518 }
a0ed51b3 1519 else {
012bcf8d 1520 *d++ = (char)uv;
a0ed51b3 1521 }
012bcf8d
GS
1522 }
1523 else {
c4d5f83a 1524 *d++ = (char) uv;
a0ed51b3 1525 }
79072805 1526 continue;
02aa26ce 1527
4a2d328f
IZ
1528 /* \N{latin small letter a} is a named character */
1529 case 'N':
55eda711 1530 ++s;
423cee85
JH
1531 if (*s == '{') {
1532 char* e = strchr(s, '}');
155aba94 1533 SV *res;
423cee85
JH
1534 STRLEN len;
1535 char *str;
4e553d73 1536
423cee85 1537 if (!e) {
5777a3f7 1538 yyerror("Missing right brace on \\N{}");
423cee85
JH
1539 e = s - 1;
1540 goto cont_scan;
1541 }
55eda711
JH
1542 res = newSVpvn(s + 1, e - s - 1);
1543 res = new_constant( Nullch, 0, "charnames",
1544 res, Nullsv, "\\N{...}" );
f9a63242
JH
1545 if (has_utf8)
1546 sv_utf8_upgrade(res);
423cee85 1547 str = SvPV(res,len);
89491803 1548 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1549 char *ostart = SvPVX(sv);
1550 SvCUR_set(sv, d - ostart);
1551 SvPOK_on(sv);
e4f3eed8 1552 *d = '\0';
f08d6ad9 1553 sv_utf8_upgrade(sv);
d2f449dd
SB
1554 /* this just broke our allocation above... */
1555 SvGROW(sv, send - start);
f08d6ad9 1556 d = SvPVX(sv) + SvCUR(sv);
89491803 1557 has_utf8 = TRUE;
f08d6ad9 1558 }
423cee85
JH
1559 if (len > e - s + 4) {
1560 char *odest = SvPVX(sv);
1561
8973db79 1562 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1563 d = SvPVX(sv) + (d - odest);
1564 }
1565 Copy(str, d, len, char);
1566 d += len;
1567 SvREFCNT_dec(res);
1568 cont_scan:
1569 s = e + 1;
1570 }
1571 else
5777a3f7 1572 yyerror("Missing braces on \\N{}");
423cee85
JH
1573 continue;
1574
02aa26ce 1575 /* \c is a control character */
79072805
LW
1576 case 'c':
1577 s++;
ba210ebe
JH
1578 {
1579 U8 c = *s++;
c7f1f016
NIS
1580#ifdef EBCDIC
1581 if (isLOWER(c))
1582 c = toUPPER(c);
1583#endif
db42d148 1584 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1585 }
79072805 1586 continue;
02aa26ce
NT
1587
1588 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1589 case 'b':
db42d148 1590 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1591 break;
1592 case 'n':
db42d148 1593 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1594 break;
1595 case 'r':
db42d148 1596 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1597 break;
1598 case 'f':
db42d148 1599 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1600 break;
1601 case 't':
db42d148 1602 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1603 break;
34a3fe2a 1604 case 'e':
db42d148 1605 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1606 break;
1607 case 'a':
db42d148 1608 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1609 break;
02aa26ce
NT
1610 } /* end switch */
1611
79072805
LW
1612 s++;
1613 continue;
02aa26ce
NT
1614 } /* end if (backslash) */
1615
f9a63242 1616 default_action:
2b9d42f0
NIS
1617 /* If we started with encoded form, or already know we want it
1618 and then encode the next character */
1619 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1620 STRLEN len = 1;
1621 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1622 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1623 s += len;
1624 if (need > len) {
1625 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1626 STRLEN off = d - SvPVX(sv);
1627 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1628 }
1629 d = (char*)uvchr_to_utf8((U8*)d, uv);
1630 has_utf8 = TRUE;
1631 }
1632 else {
1633 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1634 }
02aa26ce
NT
1635 } /* while loop to process each character */
1636
1637 /* terminate the string and set up the sv */
79072805 1638 *d = '\0';
463ee0b2 1639 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1640 if (SvCUR(sv) >= SvLEN(sv))
585602fa 1641 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1642
79072805 1643 SvPOK_on(sv);
2b9d42f0 1644 if (has_utf8) {
7e2040f0 1645 SvUTF8_on(sv);
2b9d42f0
NIS
1646 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1647 PL_sublex_info.sub_op->op_private |=
1648 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1649 }
1650 }
79072805 1651
02aa26ce 1652 /* shrink the sv if we allocated more than we used */
79072805
LW
1653 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1654 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1655 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1656 }
02aa26ce 1657
9b599b2a 1658 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1659 if (s > PL_bufptr) {
1660 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1661 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1662 sv, Nullsv,
4e553d73 1663 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1664 ? "tr"
3280af22 1665 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1666 ? "s"
1667 : "qq")));
79072805 1668 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1669 } else
8990e307 1670 SvREFCNT_dec(sv);
79072805
LW
1671 return s;
1672}
1673
ffb4593c
NT
1674/* S_intuit_more
1675 * Returns TRUE if there's more to the expression (e.g., a subscript),
1676 * FALSE otherwise.
ffb4593c
NT
1677 *
1678 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1679 *
1680 * ->[ and ->{ return TRUE
1681 * { and [ outside a pattern are always subscripts, so return TRUE
1682 * if we're outside a pattern and it's not { or [, then return FALSE
1683 * if we're in a pattern and the first char is a {
1684 * {4,5} (any digits around the comma) returns FALSE
1685 * if we're in a pattern and the first char is a [
1686 * [] returns FALSE
1687 * [SOMETHING] has a funky algorithm to decide whether it's a
1688 * character class or not. It has to deal with things like
1689 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1690 * anything else returns TRUE
1691 */
1692
9cbb5ea2
GS
1693/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1694
76e3520e 1695STATIC int
cea2e8a9 1696S_intuit_more(pTHX_ register char *s)
79072805 1697{
3280af22 1698 if (PL_lex_brackets)
79072805
LW
1699 return TRUE;
1700 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1701 return TRUE;
1702 if (*s != '{' && *s != '[')
1703 return FALSE;
3280af22 1704 if (!PL_lex_inpat)
79072805
LW
1705 return TRUE;
1706
1707 /* In a pattern, so maybe we have {n,m}. */
1708 if (*s == '{') {
1709 s++;
1710 if (!isDIGIT(*s))
1711 return TRUE;
1712 while (isDIGIT(*s))
1713 s++;
1714 if (*s == ',')
1715 s++;
1716 while (isDIGIT(*s))
1717 s++;
1718 if (*s == '}')
1719 return FALSE;
1720 return TRUE;
1721
1722 }
1723
1724 /* On the other hand, maybe we have a character class */
1725
1726 s++;
1727 if (*s == ']' || *s == '^')
1728 return FALSE;
1729 else {
ffb4593c 1730 /* this is terrifying, and it works */
79072805
LW
1731 int weight = 2; /* let's weigh the evidence */
1732 char seen[256];
f27ffc4a 1733 unsigned char un_char = 255, last_un_char;
93a17b20 1734 char *send = strchr(s,']');
3280af22 1735 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1736
1737 if (!send) /* has to be an expression */
1738 return TRUE;
1739
1740 Zero(seen,256,char);
1741 if (*s == '$')
1742 weight -= 3;
1743 else if (isDIGIT(*s)) {
1744 if (s[1] != ']') {
1745 if (isDIGIT(s[1]) && s[2] == ']')
1746 weight -= 10;
1747 }
1748 else
1749 weight -= 100;
1750 }
1751 for (; s < send; s++) {
1752 last_un_char = un_char;
1753 un_char = (unsigned char)*s;
1754 switch (*s) {
1755 case '@':
1756 case '&':
1757 case '$':
1758 weight -= seen[un_char] * 10;
7e2040f0 1759 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1760 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1761 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1762 weight -= 100;
1763 else
1764 weight -= 10;
1765 }
1766 else if (*s == '$' && s[1] &&
93a17b20
LW
1767 strchr("[#!%*<>()-=",s[1])) {
1768 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1769 weight -= 10;
1770 else
1771 weight -= 1;
1772 }
1773 break;
1774 case '\\':
1775 un_char = 254;
1776 if (s[1]) {
93a17b20 1777 if (strchr("wds]",s[1]))
79072805
LW
1778 weight += 100;
1779 else if (seen['\''] || seen['"'])
1780 weight += 1;
93a17b20 1781 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1782 weight += 40;
1783 else if (isDIGIT(s[1])) {
1784 weight += 40;
1785 while (s[1] && isDIGIT(s[1]))
1786 s++;
1787 }
1788 }
1789 else
1790 weight += 100;
1791 break;
1792 case '-':
1793 if (s[1] == '\\')
1794 weight += 50;
93a17b20 1795 if (strchr("aA01! ",last_un_char))
79072805 1796 weight += 30;
93a17b20 1797 if (strchr("zZ79~",s[1]))
79072805 1798 weight += 30;
f27ffc4a
GS
1799 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1800 weight -= 5; /* cope with negative subscript */
79072805
LW
1801 break;
1802 default:
93a17b20 1803 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1804 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1805 char *d = tmpbuf;
1806 while (isALPHA(*s))
1807 *d++ = *s++;
1808 *d = '\0';
1809 if (keyword(tmpbuf, d - tmpbuf))
1810 weight -= 150;
1811 }
1812 if (un_char == last_un_char + 1)
1813 weight += 5;
1814 weight -= seen[un_char];
1815 break;
1816 }
1817 seen[un_char]++;
1818 }
1819 if (weight >= 0) /* probably a character class */
1820 return FALSE;
1821 }
1822
1823 return TRUE;
1824}
ffed7fef 1825
ffb4593c
NT
1826/*
1827 * S_intuit_method
1828 *
1829 * Does all the checking to disambiguate
1830 * foo bar
1831 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1832 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1833 *
1834 * First argument is the stuff after the first token, e.g. "bar".
1835 *
1836 * Not a method if bar is a filehandle.
1837 * Not a method if foo is a subroutine prototyped to take a filehandle.
1838 * Not a method if it's really "Foo $bar"
1839 * Method if it's "foo $bar"
1840 * Not a method if it's really "print foo $bar"
1841 * Method if it's really "foo package::" (interpreted as package->foo)
1842 * Not a method if bar is known to be a subroutne ("sub bar; foo bar")
3cb0bbe5 1843 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
1844 * =>
1845 */
1846
76e3520e 1847STATIC int
cea2e8a9 1848S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
1849{
1850 char *s = start + (*start == '$');
3280af22 1851 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
1852 STRLEN len;
1853 GV* indirgv;
1854
1855 if (gv) {
b6c543e3 1856 CV *cv;
a0d0e21e
LW
1857 if (GvIO(gv))
1858 return 0;
b6c543e3
IZ
1859 if ((cv = GvCVu(gv))) {
1860 char *proto = SvPVX(cv);
1861 if (proto) {
1862 if (*proto == ';')
1863 proto++;
1864 if (*proto == '*')
1865 return 0;
1866 }
1867 } else
a0d0e21e
LW
1868 gv = 0;
1869 }
8903cb82 1870 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
1871 /* start is the beginning of the possible filehandle/object,
1872 * and s is the end of it
1873 * tmpbuf is a copy of it
1874 */
1875
a0d0e21e 1876 if (*start == '$') {
3280af22 1877 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
1878 return 0;
1879 s = skipspace(s);
3280af22
NIS
1880 PL_bufptr = start;
1881 PL_expect = XREF;
a0d0e21e
LW
1882 return *s == '(' ? FUNCMETH : METHOD;
1883 }
1884 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
1885 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
1886 len -= 2;
1887 tmpbuf[len] = '\0';
1888 goto bare_package;
1889 }
1890 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 1891 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
1892 return 0;
1893 /* filehandle or package name makes it a method */
89bfa8cd 1894 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 1895 s = skipspace(s);
3280af22 1896 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 1897 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 1898 bare_package:
3280af22 1899 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 1900 newSVpvn(tmpbuf,len));
3280af22
NIS
1901 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
1902 PL_expect = XTERM;
a0d0e21e 1903 force_next(WORD);
3280af22 1904 PL_bufptr = s;
a0d0e21e
LW
1905 return *s == '(' ? FUNCMETH : METHOD;
1906 }
1907 }
1908 return 0;
1909}
1910
ffb4593c
NT
1911/*
1912 * S_incl_perldb
1913 * Return a string of Perl code to load the debugger. If PERL5DB
1914 * is set, it will return the contents of that, otherwise a
1915 * compile-time require of perl5db.pl.
1916 */
1917
76e3520e 1918STATIC char*
cea2e8a9 1919S_incl_perldb(pTHX)
a0d0e21e 1920{
3280af22 1921 if (PL_perldb) {
76e3520e 1922 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
1923
1924 if (pdb)
1925 return pdb;
61bb5906 1926 SETERRNO(0,SS$_NORMAL);
a0d0e21e
LW
1927 return "BEGIN { require 'perl5db.pl' }";
1928 }
1929 return "";
1930}
1931
1932
16d20bd9 1933/* Encoded script support. filter_add() effectively inserts a
4e553d73 1934 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
1935 * Note that the filter function only applies to the current source file
1936 * (e.g., it will not affect files 'require'd or 'use'd by this one).
1937 *
1938 * The datasv parameter (which may be NULL) can be used to pass
1939 * private data to this instance of the filter. The filter function
1940 * can recover the SV using the FILTER_DATA macro and use it to
1941 * store private buffers and state information.
1942 *
1943 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 1944 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 1945 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
1946 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
1947 * private use must be set using malloc'd pointers.
1948 */
16d20bd9
AD
1949
1950SV *
864dbfa3 1951Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 1952{
f4c556ac
GS
1953 if (!funcp)
1954 return Nullsv;
1955
3280af22
NIS
1956 if (!PL_rsfp_filters)
1957 PL_rsfp_filters = newAV();
16d20bd9 1958 if (!datasv)
8c52afec 1959 datasv = NEWSV(255,0);
16d20bd9 1960 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 1961 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 1962 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 1963 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac
GS
1964 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
1965 funcp, SvPV_nolen(datasv)));
3280af22
NIS
1966 av_unshift(PL_rsfp_filters, 1);
1967 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
1968 return(datasv);
1969}
4e553d73 1970
16d20bd9
AD
1971
1972/* Delete most recently added instance of this filter function. */
a0d0e21e 1973void
864dbfa3 1974Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 1975{
e0c19803 1976 SV *datasv;
f4c556ac 1977 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", funcp));
3280af22 1978 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
1979 return;
1980 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 1981 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 1982 if (IoANY(datasv) == (void *)funcp) {
e0c19803 1983 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 1984 IoANY(datasv) = (void *)NULL;
3280af22 1985 sv_free(av_pop(PL_rsfp_filters));
e50aee73 1986
16d20bd9
AD
1987 return;
1988 }
1989 /* we need to search for the correct entry and clear it */
cea2e8a9 1990 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
1991}
1992
1993
1994/* Invoke the n'th filter function for the current rsfp. */
1995I32
864dbfa3 1996Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4e553d73
NIS
1997
1998
8ac85365 1999 /* 0 = read one text line */
a0d0e21e 2000{
16d20bd9
AD
2001 filter_t funcp;
2002 SV *datasv = NULL;
e50aee73 2003
3280af22 2004 if (!PL_rsfp_filters)
16d20bd9 2005 return -1;
3280af22 2006 if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
16d20bd9
AD
2007 /* Provide a default input filter to make life easy. */
2008 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2009 DEBUG_P(PerlIO_printf(Perl_debug_log,
2010 "filter_read %d: from rsfp\n", idx));
4e553d73 2011 if (maxlen) {
16d20bd9
AD
2012 /* Want a block */
2013 int len ;
2014 int old_len = SvCUR(buf_sv) ;
2015
2016 /* ensure buf_sv is large enough */
2017 SvGROW(buf_sv, old_len + maxlen) ;
3280af22
NIS
2018 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2019 if (PerlIO_error(PL_rsfp))
37120919
AD
2020 return -1; /* error */
2021 else
2022 return 0 ; /* end of file */
2023 }
16d20bd9
AD
2024 SvCUR_set(buf_sv, old_len + len) ;
2025 } else {
2026 /* Want a line */
3280af22
NIS
2027 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2028 if (PerlIO_error(PL_rsfp))
37120919
AD
2029 return -1; /* error */
2030 else
2031 return 0 ; /* end of file */
2032 }
16d20bd9
AD
2033 }
2034 return SvCUR(buf_sv);
2035 }
2036 /* Skip this filter slot if filter has been deleted */
3280af22 2037 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
f4c556ac
GS
2038 DEBUG_P(PerlIO_printf(Perl_debug_log,
2039 "filter_read %d: skipped (filter deleted)\n",
2040 idx));
16d20bd9
AD
2041 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2042 }
2043 /* Get function pointer hidden within datasv */
4755096e 2044 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2045 DEBUG_P(PerlIO_printf(Perl_debug_log,
2046 "filter_read %d: via function %p (%s)\n",
2047 idx, funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2048 /* Call function. The function is expected to */
2049 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2050 /* Return: <0:error, =0:eof, >0:not eof */
0cb96387 2051 return (*funcp)(aTHXo_ idx, buf_sv, maxlen);
16d20bd9
AD
2052}
2053
76e3520e 2054STATIC char *
cea2e8a9 2055S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2056{
c39cd008 2057#ifdef PERL_CR_FILTER
3280af22 2058 if (!PL_rsfp_filters) {
c39cd008 2059 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2060 }
2061#endif
3280af22 2062 if (PL_rsfp_filters) {
16d20bd9 2063
55497cff 2064 if (!append)
2065 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2066 if (FILTER_READ(0, sv, 0) > 0)
2067 return ( SvPVX(sv) ) ;
2068 else
2069 return Nullch ;
2070 }
9d116dd7 2071 else
fd049845 2072 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2073}
2074
01ec43d0
GS
2075STATIC HV *
2076S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2077{
2078 GV *gv;
2079
01ec43d0 2080 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2081 return PL_curstash;
2082
2083 if (len > 2 &&
2084 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2085 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2086 {
2087 return GvHV(gv); /* Foo:: */
def3634b
GS
2088 }
2089
2090 /* use constant CLASS => 'MyClass' */
2091 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2092 SV *sv;
2093 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2094 pkgname = SvPV_nolen(sv);
2095 }
2096 }
2097
2098 return gv_stashpv(pkgname, FALSE);
2099}
a0d0e21e 2100
748a9306
LW
2101#ifdef DEBUGGING
2102 static char* exp_name[] =
09bef843
SB
2103 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
2104 "ATTRTERM", "TERMBLOCK"
2105 };
748a9306 2106#endif
463ee0b2 2107
02aa26ce
NT
2108/*
2109 yylex
2110
2111 Works out what to call the token just pulled out of the input
2112 stream. The yacc parser takes care of taking the ops we return and
2113 stitching them into a tree.
2114
2115 Returns:
2116 PRIVATEREF
2117
2118 Structure:
2119 if read an identifier
2120 if we're in a my declaration
2121 croak if they tried to say my($foo::bar)
2122 build the ops for a my() declaration
2123 if it's an access to a my() variable
2124 are we in a sort block?
2125 croak if my($a); $a <=> $b
2126 build ops for access to a my() variable
2127 if in a dq string, and they've said @foo and we can't find @foo
2128 croak
2129 build ops for a bareword
2130 if we already built the token before, use it.
2131*/
2132
dba4d153 2133#ifdef USE_PURE_BISON
864dbfa3 2134int
dba4d153 2135Perl_yylex_r(pTHX_ YYSTYPE *lvalp, int *lcharp)
378cc40b 2136{
20141f0e
IRC
2137 int r;
2138
6f202aea 2139 yyactlevel++;
20141f0e
IRC
2140 yylval_pointer[yyactlevel] = lvalp;
2141 yychar_pointer[yyactlevel] = lcharp;
b73d6f50
IRC
2142 if (yyactlevel >= YYMAXLEVEL)
2143 Perl_croak(aTHX_ "panic: YYMAXLEVEL");
20141f0e 2144
dba4d153 2145 r = Perl_yylex(aTHX);
20141f0e 2146
d8ae6756
IRC
2147 if (yyactlevel > 0)
2148 yyactlevel--;
20141f0e
IRC
2149
2150 return r;
2151}
dba4d153 2152#endif
20141f0e 2153
dba4d153
JH
2154#ifdef __SC__
2155#pragma segment Perl_yylex
2156#endif
dba4d153 2157int
dba4d153 2158Perl_yylex(pTHX)
20141f0e 2159{
79072805 2160 register char *s;
378cc40b 2161 register char *d;
79072805 2162 register I32 tmp;
463ee0b2 2163 STRLEN len;
161b471a
NIS
2164 GV *gv = Nullgv;
2165 GV **gvp = 0;
aa7440fb 2166 bool bof = FALSE;
a687059c 2167
02aa26ce 2168 /* check if there's an identifier for us to look at */
3280af22 2169 if (PL_pending_ident) {
02aa26ce 2170 /* pit holds the identifier we read and pending_ident is reset */
3280af22
NIS
2171 char pit = PL_pending_ident;
2172 PL_pending_ident = 0;
bbce6d69 2173
607df283 2174 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2175 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
607df283 2176
02aa26ce
NT
2177 /* if we're in a my(), we can't allow dynamics here.
2178 $foo'bar has already been turned into $foo::bar, so
2179 just check for colons.
2180
2181 if it's a legal name, the OP is a PADANY.
2182 */
3280af22 2183 if (PL_in_my) {
77ca0c92 2184 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
1ec3e8de
GS
2185 if (strchr(PL_tokenbuf,':'))
2186 yyerror(Perl_form(aTHX_ "No package name allowed for "
2187 "variable %s in \"our\"",
2188 PL_tokenbuf));
77ca0c92
LW
2189 tmp = pad_allocmy(PL_tokenbuf);
2190 }
2191 else {
2192 if (strchr(PL_tokenbuf,':'))
2193 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
02aa26ce 2194
77ca0c92
LW
2195 yylval.opval = newOP(OP_PADANY, 0);
2196 yylval.opval->op_targ = pad_allocmy(PL_tokenbuf);
2197 return PRIVATEREF;
2198 }
bbce6d69 2199 }
2200
4e553d73 2201 /*
02aa26ce
NT
2202 build the ops for accesses to a my() variable.
2203
2204 Deny my($a) or my($b) in a sort block, *if* $a or $b is
2205 then used in a comparison. This catches most, but not
2206 all cases. For instance, it catches
2207 sort { my($a); $a <=> $b }
2208 but not
2209 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
2210 (although why you'd do that is anyone's guess).
2211 */
2212
3280af22 2213 if (!strchr(PL_tokenbuf,':')) {
a863c7d1 2214#ifdef USE_THREADS
54b9620d 2215 /* Check for single character per-thread SVs */
3280af22
NIS
2216 if (PL_tokenbuf[0] == '$' && PL_tokenbuf[2] == '\0'
2217 && !isALPHA(PL_tokenbuf[1]) /* Rule out obvious non-threadsvs */
2218 && (tmp = find_threadsv(&PL_tokenbuf[1])) != NOT_IN_PAD)
554b3eca 2219 {
2faa37cc 2220 yylval.opval = newOP(OP_THREADSV, 0);
a863c7d1
MB
2221 yylval.opval->op_targ = tmp;
2222 return PRIVATEREF;
2223 }
2224#endif /* USE_THREADS */
3280af22 2225 if ((tmp = pad_findmy(PL_tokenbuf)) != NOT_IN_PAD) {
f472eb5c 2226 SV *namesv = AvARRAY(PL_comppad_name)[tmp];
77ca0c92 2227 /* might be an "our" variable" */
f472eb5c 2228 if (SvFLAGS(namesv) & SVpad_OUR) {
77ca0c92 2229 /* build ops for a bareword */
f472eb5c
GS
2230 SV *sym = newSVpv(HvNAME(GvSTASH(namesv)),0);
2231 sv_catpvn(sym, "::", 2);
2232 sv_catpv(sym, PL_tokenbuf+1);
2233 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
77ca0c92 2234 yylval.opval->op_private = OPpCONST_ENTERED;
f472eb5c 2235 gv_fetchpv(SvPVX(sym),
77ca0c92 2236 (PL_in_eval
f472eb5c
GS
2237 ? (GV_ADDMULTI | GV_ADDINEVAL)
2238 : TRUE
77ca0c92
LW
2239 ),
2240 ((PL_tokenbuf[0] == '$') ? SVt_PV
2241 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
2242 : SVt_PVHV));
2243 return WORD;
2244 }
2245
02aa26ce 2246 /* if it's a sort block and they're naming $a or $b */
3280af22
NIS
2247 if (PL_last_lop_op == OP_SORT &&
2248 PL_tokenbuf[0] == '$' &&
2249 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
2250 && !PL_tokenbuf[2])
bbce6d69 2251 {
3280af22
NIS
2252 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
2253 d < PL_bufend && *d != '\n';
a863c7d1
MB
2254 d++)
2255 {
2256 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
cea2e8a9 2257 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
3280af22 2258 PL_tokenbuf);
a863c7d1 2259 }
bbce6d69 2260 }
2261 }
bbce6d69 2262
a863c7d1
MB
2263 yylval.opval = newOP(OP_PADANY, 0);
2264 yylval.opval->op_targ = tmp;
2265 return PRIVATEREF;
2266 }
bbce6d69 2267 }
2268
02aa26ce
NT
2269 /*
2270 Whine if they've said @foo in a doublequoted string,
2271 and @foo isn't a variable we can find in the symbol
2272 table.
2273 */
3280af22
NIS
2274 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
2275 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
8593bda5
GS
2276 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
2277 && ckWARN(WARN_AMBIGUOUS))
2278 {
2279 /* Downgraded from fatal to warning 20000522 mjd */
2280 Perl_warner(aTHX_ WARN_AMBIGUOUS,
2281 "Possible unintended interpolation of %s in string",
2282 PL_tokenbuf);
2283 }
bbce6d69 2284 }
2285
02aa26ce 2286 /* build ops for a bareword */
3280af22 2287 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
bbce6d69 2288 yylval.opval->op_private = OPpCONST_ENTERED;
3280af22
NIS
2289 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
2290 ((PL_tokenbuf[0] == '$') ? SVt_PV
2291 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
bbce6d69 2292 : SVt_PVHV));
2293 return WORD;
2294 }
2295
02aa26ce
NT
2296 /* no identifier pending identification */
2297
3280af22 2298 switch (PL_lex_state) {
79072805
LW
2299#ifdef COMMENTARY
2300 case LEX_NORMAL: /* Some compilers will produce faster */
2301 case LEX_INTERPNORMAL: /* code if we comment these out. */
2302 break;
2303#endif
2304
09bef843 2305 /* when we've already built the next token, just pull it out of the queue */
79072805 2306 case LEX_KNOWNEXT:
3280af22
NIS
2307 PL_nexttoke--;
2308 yylval = PL_nextval[PL_nexttoke];
2309 if (!PL_nexttoke) {
2310 PL_lex_state = PL_lex_defer;
2311 PL_expect = PL_lex_expect;
2312 PL_lex_defer = LEX_NORMAL;
463ee0b2 2313 }
607df283 2314 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2315 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2316 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2317
3280af22 2318 return(PL_nexttype[PL_nexttoke]);
79072805 2319
02aa26ce 2320 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2321 when we get here, PL_bufptr is at the \
02aa26ce 2322 */
79072805
LW
2323 case LEX_INTERPCASEMOD:
2324#ifdef DEBUGGING
3280af22 2325 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2326 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2327#endif
02aa26ce 2328 /* handle \E or end of string */
3280af22 2329 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2330 char oldmod;
02aa26ce
NT
2331
2332 /* if at a \E */
3280af22
NIS
2333 if (PL_lex_casemods) {
2334 oldmod = PL_lex_casestack[--PL_lex_casemods];
2335 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2336
3280af22
NIS
2337 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2338 PL_bufptr += 2;
2339 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2340 }
79072805
LW
2341 return ')';
2342 }
3280af22
NIS
2343 if (PL_bufptr != PL_bufend)
2344 PL_bufptr += 2;
2345 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2346 return yylex();
79072805
LW
2347 }
2348 else {
607df283 2349 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2350 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2351 s = PL_bufptr + 1;
79072805
LW
2352 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2353 tmp = *s, *s = s[2], s[2] = tmp; /* misordered... */
a0d0e21e 2354 if (strchr("LU", *s) &&
3280af22 2355 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U')))
a0d0e21e 2356 {
3280af22 2357 PL_lex_casestack[--PL_lex_casemods] = '\0';
a0d0e21e
LW
2358 return ')';
2359 }
3280af22
NIS
2360 if (PL_lex_casemods > 10) {
2361 char* newlb = Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2362 if (newlb != PL_lex_casestack) {
a0d0e21e 2363 SAVEFREEPV(newlb);
3280af22 2364 PL_lex_casestack = newlb;
a0d0e21e
LW
2365 }
2366 }
3280af22
NIS
2367 PL_lex_casestack[PL_lex_casemods++] = *s;
2368 PL_lex_casestack[PL_lex_casemods] = '\0';
2369 PL_lex_state = LEX_INTERPCONCAT;
2370 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
2371 force_next('(');
2372 if (*s == 'l')
3280af22 2373 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
79072805 2374 else if (*s == 'u')
3280af22 2375 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
79072805 2376 else if (*s == 'L')
3280af22 2377 PL_nextval[PL_nexttoke].ival = OP_LC;
79072805 2378 else if (*s == 'U')
3280af22 2379 PL_nextval[PL_nexttoke].ival = OP_UC;
a0d0e21e 2380 else if (*s == 'Q')
3280af22 2381 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
79072805 2382 else
cea2e8a9 2383 Perl_croak(aTHX_ "panic: yylex");
3280af22 2384 PL_bufptr = s + 1;
79072805 2385 force_next(FUNC);
3280af22
NIS
2386 if (PL_lex_starts) {
2387 s = PL_bufptr;
2388 PL_lex_starts = 0;
79072805
LW
2389 Aop(OP_CONCAT);
2390 }
2391 else
cea2e8a9 2392 return yylex();
79072805
LW
2393 }
2394
55497cff 2395 case LEX_INTERPPUSH:
2396 return sublex_push();
2397
79072805 2398 case LEX_INTERPSTART:
3280af22 2399 if (PL_bufptr == PL_bufend)
79072805 2400 return sublex_done();
607df283 2401 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2402 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2403 PL_expect = XTERM;
2404 PL_lex_dojoin = (*PL_bufptr == '@');
2405 PL_lex_state = LEX_INTERPNORMAL;
2406 if (PL_lex_dojoin) {
2407 PL_nextval[PL_nexttoke].ival = 0;
79072805 2408 force_next(',');
554b3eca 2409#ifdef USE_THREADS
533c011a
NIS
2410 PL_nextval[PL_nexttoke].opval = newOP(OP_THREADSV, 0);
2411 PL_nextval[PL_nexttoke].opval->op_targ = find_threadsv("\"");
554b3eca
MB
2412 force_next(PRIVATEREF);
2413#else
a0d0e21e 2414 force_ident("\"", '$');
554b3eca 2415#endif /* USE_THREADS */
3280af22 2416 PL_nextval[PL_nexttoke].ival = 0;
79072805 2417 force_next('$');
3280af22 2418 PL_nextval[PL_nexttoke].ival = 0;
79072805 2419 force_next('(');
3280af22 2420 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2421 force_next(FUNC);
2422 }
3280af22
NIS
2423 if (PL_lex_starts++) {
2424 s = PL_bufptr;
79072805
LW
2425 Aop(OP_CONCAT);
2426 }
cea2e8a9 2427 return yylex();
79072805
LW
2428
2429 case LEX_INTERPENDMAYBE:
3280af22
NIS
2430 if (intuit_more(PL_bufptr)) {
2431 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2432 break;
2433 }
2434 /* FALL THROUGH */
2435
2436 case LEX_INTERPEND:
3280af22
NIS
2437 if (PL_lex_dojoin) {
2438 PL_lex_dojoin = FALSE;
2439 PL_lex_state = LEX_INTERPCONCAT;
79072805
LW
2440 return ')';
2441 }
43a16006 2442 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2443 && SvEVALED(PL_lex_repl))
43a16006 2444 {
e9fa98b2 2445 if (PL_bufptr != PL_bufend)
cea2e8a9 2446 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2447 PL_lex_repl = Nullsv;
2448 }
79072805
LW
2449 /* FALLTHROUGH */
2450 case LEX_INTERPCONCAT:
2451#ifdef DEBUGGING
3280af22 2452 if (PL_lex_brackets)
cea2e8a9 2453 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2454#endif
3280af22 2455 if (PL_bufptr == PL_bufend)
79072805
LW
2456 return sublex_done();
2457
3280af22
NIS
2458 if (SvIVX(PL_linestr) == '\'') {
2459 SV *sv = newSVsv(PL_linestr);
2460 if (!PL_lex_inpat)
76e3520e 2461 sv = tokeq(sv);
3280af22 2462 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2463 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2464 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2465 s = PL_bufend;
79072805
LW
2466 }
2467 else {
3280af22 2468 s = scan_const(PL_bufptr);
79072805 2469 if (*s == '\\')
3280af22 2470 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2471 else
3280af22 2472 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2473 }
2474
3280af22
NIS
2475 if (s != PL_bufptr) {
2476 PL_nextval[PL_nexttoke] = yylval;
2477 PL_expect = XTERM;
79072805 2478 force_next(THING);
3280af22 2479 if (PL_lex_starts++)
79072805
LW
2480 Aop(OP_CONCAT);
2481 else {
3280af22 2482 PL_bufptr = s;
cea2e8a9 2483 return yylex();
79072805
LW
2484 }
2485 }
2486
cea2e8a9 2487 return yylex();
a0d0e21e 2488 case LEX_FORMLINE:
3280af22
NIS
2489 PL_lex_state = LEX_NORMAL;
2490 s = scan_formline(PL_bufptr);
2491 if (!PL_lex_formbrack)
a0d0e21e
LW
2492 goto rightbracket;
2493 OPERATOR(';');
79072805
LW
2494 }
2495
3280af22
NIS
2496 s = PL_bufptr;
2497 PL_oldoldbufptr = PL_oldbufptr;
2498 PL_oldbufptr = s;
607df283 2499 DEBUG_T( {
bf49b057
GS
2500 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at %s\n",
2501 exp_name[PL_expect], s);
5f80b19c 2502 } );
463ee0b2
LW
2503
2504 retry:
378cc40b
LW
2505 switch (*s) {
2506 default:
7e2040f0 2507 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2508 goto keylookup;
cea2e8a9 2509 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2510 case 4:
2511 case 26:
2512 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2513 case 0:
3280af22
NIS
2514 if (!PL_rsfp) {
2515 PL_last_uni = 0;
2516 PL_last_lop = 0;
2517 if (PL_lex_brackets)
d98d5fff 2518 yyerror("Missing right curly or square bracket");
4e553d73 2519 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2520 "### Tokener got EOF\n");
5f80b19c 2521 } );
79072805 2522 TOKEN(0);
463ee0b2 2523 }
3280af22 2524 if (s++ < PL_bufend)
a687059c 2525 goto retry; /* ignore stray nulls */
3280af22
NIS
2526 PL_last_uni = 0;
2527 PL_last_lop = 0;
2528 if (!PL_in_eval && !PL_preambled) {
2529 PL_preambled = TRUE;
2530 sv_setpv(PL_linestr,incl_perldb());
2531 if (SvCUR(PL_linestr))
2532 sv_catpv(PL_linestr,";");
2533 if (PL_preambleav){
2534 while(AvFILLp(PL_preambleav) >= 0) {
2535 SV *tmpsv = av_shift(PL_preambleav);
2536 sv_catsv(PL_linestr, tmpsv);
2537 sv_catpv(PL_linestr, ";");
91b7def8 2538 sv_free(tmpsv);
2539 }
3280af22
NIS
2540 sv_free((SV*)PL_preambleav);
2541 PL_preambleav = NULL;
91b7def8 2542 }
3280af22
NIS
2543 if (PL_minus_n || PL_minus_p) {
2544 sv_catpv(PL_linestr, "LINE: while (<>) {");
2545 if (PL_minus_l)
2546 sv_catpv(PL_linestr,"chomp;");
2547 if (PL_minus_a) {
8fd239a7
CS
2548 GV* gv = gv_fetchpv("::F", TRUE, SVt_PVAV);
2549 if (gv)
2550 GvIMPORTED_AV_on(gv);
3280af22
NIS
2551 if (PL_minus_F) {
2552 if (strchr("/'\"", *PL_splitstr)
2553 && strchr(PL_splitstr + 1, *PL_splitstr))
cea2e8a9 2554 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s);", PL_splitstr);
54310121 2555 else {
2556 char delim;
2557 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2558 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2559 delim = *s;
cea2e8a9 2560 Perl_sv_catpvf(aTHX_ PL_linestr, "@F=split(%s%c",
46fc3d4c 2561 "q" + (delim == '\''), delim);
3280af22 2562 for (s = PL_splitstr; *s; s++) {
54310121 2563 if (*s == '\\')
3280af22
NIS
2564 sv_catpvn(PL_linestr, "\\", 1);
2565 sv_catpvn(PL_linestr, s, 1);
54310121 2566 }
cea2e8a9 2567 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2568 }
2304df62
AD
2569 }
2570 else
3280af22 2571 sv_catpv(PL_linestr,"@F=split(' ');");
2304df62 2572 }
79072805 2573 }
3280af22
NIS
2574 sv_catpv(PL_linestr, "\n");
2575 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2576 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2577 PL_last_lop = PL_last_uni = Nullch;
3280af22 2578 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2579 SV *sv = NEWSV(85,0);
2580
2581 sv_upgrade(sv, SVt_PVMG);
3280af22 2582 sv_setsv(sv,PL_linestr);
57843af0 2583 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2584 }
79072805 2585 goto retry;
a687059c 2586 }
e929a76b 2587 do {
aa7440fb 2588 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2589 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2590 fake_eof:
2591 if (PL_rsfp) {
2592 if (PL_preprocess && !PL_in_eval)
2593 (void)PerlProc_pclose(PL_rsfp);
2594 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2595 PerlIO_clearerr(PL_rsfp);
2596 else
2597 (void)PerlIO_close(PL_rsfp);
2598 PL_rsfp = Nullfp;
2599 PL_doextract = FALSE;
2600 }
2601 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2602 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2603 sv_catpv(PL_linestr,";}");
2604 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2605 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2606 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2607 PL_minus_n = PL_minus_p = 0;
2608 goto retry;
2609 }
2610 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2611 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2612 sv_setpv(PL_linestr,"");
2613 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2614 }
2615 /* if it looks like the start of a BOM, check if it in fact is */
2616 else if (bof && (!*s || *(U8*)s == 0xEF || *(U8*)s >= 0xFE)) {
226017aa 2617#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2618# ifdef __GNU_LIBRARY__
2619# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2620# define FTELL_FOR_PIPE_IS_BROKEN
2621# endif
e3f494f1
JH
2622# else
2623# ifdef __GLIBC__
2624# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2625# define FTELL_FOR_PIPE_IS_BROKEN
2626# endif
2627# endif
226017aa
DD
2628# endif
2629#endif
2630#ifdef FTELL_FOR_PIPE_IS_BROKEN
2631 /* This loses the possibility to detect the bof
2632 * situation on perl -P when the libc5 is being used.
2633 * Workaround? Maybe attach some extra state to PL_rsfp?
2634 */
2635 if (!PL_preprocess)
7e28d3af 2636 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2637#else
7e28d3af 2638 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2639#endif
7e28d3af 2640 if (bof) {
3280af22 2641 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2642 s = swallow_bom((U8*)s);
e929a76b 2643 }
378cc40b 2644 }
3280af22 2645 if (PL_doextract) {
a0d0e21e 2646 if (*s == '#' && s[1] == '!' && instr(s,"perl"))
3280af22 2647 PL_doextract = FALSE;
a0d0e21e
LW
2648
2649 /* Incest with pod. */
2650 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2651 sv_setpv(PL_linestr, "");
2652 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2653 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2654 PL_last_lop = PL_last_uni = Nullch;
3280af22 2655 PL_doextract = FALSE;
a0d0e21e 2656 }
4e553d73 2657 }
463ee0b2 2658 incline(s);
3280af22
NIS
2659 } while (PL_doextract);
2660 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2661 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2662 SV *sv = NEWSV(85,0);
a687059c 2663
93a17b20 2664 sv_upgrade(sv, SVt_PVMG);
3280af22 2665 sv_setsv(sv,PL_linestr);
57843af0 2666 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2667 }
3280af22 2668 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2669 PL_last_lop = PL_last_uni = Nullch;
57843af0 2670 if (CopLINE(PL_curcop) == 1) {
3280af22 2671 while (s < PL_bufend && isSPACE(*s))
79072805 2672 s++;
a0d0e21e 2673 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2674 s++;
44a8e56a 2675 d = Nullch;
3280af22 2676 if (!PL_in_eval) {
44a8e56a 2677 if (*s == '#' && *(s+1) == '!')
2678 d = s + 2;
2679#ifdef ALTERNATE_SHEBANG
2680 else {
2681 static char as[] = ALTERNATE_SHEBANG;
2682 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2683 d = s + (sizeof(as) - 1);
2684 }
2685#endif /* ALTERNATE_SHEBANG */
2686 }
2687 if (d) {
b8378b72 2688 char *ipath;
774d564b 2689 char *ipathend;
b8378b72 2690
774d564b 2691 while (isSPACE(*d))
b8378b72
CS
2692 d++;
2693 ipath = d;
774d564b 2694 while (*d && !isSPACE(*d))
2695 d++;
2696 ipathend = d;
2697
2698#ifdef ARG_ZERO_IS_SCRIPT
2699 if (ipathend > ipath) {
2700 /*
2701 * HP-UX (at least) sets argv[0] to the script name,
2702 * which makes $^X incorrect. And Digital UNIX and Linux,
2703 * at least, set argv[0] to the basename of the Perl
2704 * interpreter. So, having found "#!", we'll set it right.
2705 */
2706 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV));
2707 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2708 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2709 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2710 SvSETMAGIC(x);
2711 }
774d564b 2712 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2713 }
774d564b 2714#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2715
2716 /*
2717 * Look for options.
2718 */
748a9306 2719 d = instr(s,"perl -");
84e30d1a 2720 if (!d) {
748a9306 2721 d = instr(s,"perl");
84e30d1a
GS
2722#if defined(DOSISH)
2723 /* avoid getting into infinite loops when shebang
2724 * line contains "Perl" rather than "perl" */
2725 if (!d) {
2726 for (d = ipathend-4; d >= ipath; --d) {
2727 if ((*d == 'p' || *d == 'P')
2728 && !ibcmp(d, "perl", 4))
2729 {
2730 break;
2731 }
2732 }
2733 if (d < ipath)
2734 d = Nullch;
2735 }
2736#endif
2737 }
44a8e56a 2738#ifdef ALTERNATE_SHEBANG
2739 /*
2740 * If the ALTERNATE_SHEBANG on this system starts with a
2741 * character that can be part of a Perl expression, then if
2742 * we see it but not "perl", we're probably looking at the
2743 * start of Perl code, not a request to hand off to some
2744 * other interpreter. Similarly, if "perl" is there, but
2745 * not in the first 'word' of the line, we assume the line
2746 * contains the start of the Perl program.
44a8e56a 2747 */
2748 if (d && *s != '#') {
774d564b 2749 char *c = ipath;
44a8e56a 2750 while (*c && !strchr("; \t\r\n\f\v#", *c))
2751 c++;
2752 if (c < d)
2753 d = Nullch; /* "perl" not in first word; ignore */
2754 else
2755 *s = '#'; /* Don't try to parse shebang line */
2756 }
774d564b 2757#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2758#ifndef MACOS_TRADITIONAL
748a9306 2759 if (!d &&
44a8e56a 2760 *s == '#' &&
774d564b 2761 ipathend > ipath &&
3280af22 2762 !PL_minus_c &&
748a9306 2763 !instr(s,"indir") &&
3280af22 2764 instr(PL_origargv[0],"perl"))
748a9306 2765 {
9f68db38 2766 char **newargv;
9f68db38 2767
774d564b 2768 *ipathend = '\0';
2769 s = ipathend + 1;
3280af22 2770 while (s < PL_bufend && isSPACE(*s))
9f68db38 2771 s++;
3280af22
NIS
2772 if (s < PL_bufend) {
2773 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2774 newargv[1] = s;
3280af22 2775 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2776 s++;
2777 *s = '\0';
3280af22 2778 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2779 }
2780 else
3280af22 2781 newargv = PL_origargv;
774d564b 2782 newargv[0] = ipath;
b4748376 2783 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
cea2e8a9 2784 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2785 }
bf4acbe4 2786#endif
748a9306 2787 if (d) {
3280af22
NIS
2788 U32 oldpdb = PL_perldb;
2789 bool oldn = PL_minus_n;
2790 bool oldp = PL_minus_p;
748a9306
LW
2791
2792 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2793 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2794
2795 if (*d++ == '-') {
8cc95fdb 2796 do {
2797 if (*d == 'M' || *d == 'm') {
2798 char *m = d;
2799 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2800 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2801 (int)(d - m), m);
2802 }
2803 d = moreswitches(d);
2804 } while (d);
155aba94
GS
2805 if ((PERLDB_LINE && !oldpdb) ||
2806 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2807 /* if we have already added "LINE: while (<>) {",
2808 we must not do it again */
748a9306 2809 {
3280af22
NIS
2810 sv_setpv(PL_linestr, "");
2811 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2812 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2813 PL_last_lop = PL_last_uni = Nullch;
3280af22 2814 PL_preambled = FALSE;
84902520 2815 if (PERLDB_LINE)
3280af22 2816 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2817 goto retry;
2818 }
a0d0e21e 2819 }
79072805 2820 }
9f68db38 2821 }
79072805 2822 }
3280af22
NIS
2823 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2824 PL_bufptr = s;
2825 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2826 return yylex();
ae986130 2827 }
378cc40b 2828 goto retry;
4fdae800 2829 case '\r':
6a27c188 2830#ifdef PERL_STRICT_CR
cea2e8a9 2831 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2832 Perl_croak(aTHX_
cc507455 2833 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2834#endif
4fdae800 2835 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2836#ifdef MACOS_TRADITIONAL
2837 case '\312':
2838#endif
378cc40b
LW
2839 s++;
2840 goto retry;
378cc40b 2841 case '#':
e929a76b 2842 case '\n':
3280af22 2843 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2844 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2845 /* handle eval qq[#line 1 "foo"\n ...] */
2846 CopLINE_dec(PL_curcop);
2847 incline(s);
2848 }
3280af22 2849 d = PL_bufend;
a687059c 2850 while (s < d && *s != '\n')
378cc40b 2851 s++;
0f85fab0 2852 if (s < d)
378cc40b 2853 s++;
78c267c1 2854 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2855 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2856 incline(s);
3280af22
NIS
2857 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2858 PL_bufptr = s;
2859 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2860 return yylex();
a687059c 2861 }
378cc40b 2862 }
a687059c 2863 else {
378cc40b 2864 *s = '\0';
3280af22 2865 PL_bufend = s;
a687059c 2866 }
378cc40b
LW
2867 goto retry;
2868 case '-':
79072805 2869 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2870 I32 ftst = 0;
2871
378cc40b 2872 s++;
3280af22 2873 PL_bufptr = s;
748a9306
LW
2874 tmp = *s++;
2875
bf4acbe4 2876 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2877 s++;
2878
2879 if (strnEQ(s,"=>",2)) {
3280af22 2880 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2881 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2882 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2883 } );
748a9306
LW
2884 OPERATOR('-'); /* unary minus */
2885 }
3280af22 2886 PL_last_uni = PL_oldbufptr;
748a9306 2887 switch (tmp) {
e5edeb50
JH
2888 case 'r': ftst = OP_FTEREAD; break;
2889 case 'w': ftst = OP_FTEWRITE; break;
2890 case 'x': ftst = OP_FTEEXEC; break;
2891 case 'o': ftst = OP_FTEOWNED; break;
2892 case 'R': ftst = OP_FTRREAD; break;
2893 case 'W': ftst = OP_FTRWRITE; break;
2894 case 'X': ftst = OP_FTREXEC; break;
2895 case 'O': ftst = OP_FTROWNED; break;
2896 case 'e': ftst = OP_FTIS; break;
2897 case 'z': ftst = OP_FTZERO; break;
2898 case 's': ftst = OP_FTSIZE; break;
2899 case 'f': ftst = OP_FTFILE; break;
2900 case 'd': ftst = OP_FTDIR; break;
2901 case 'l': ftst = OP_FTLINK; break;
2902 case 'p': ftst = OP_FTPIPE; break;
2903 case 'S': ftst = OP_FTSOCK; break;
2904 case 'u': ftst = OP_FTSUID; break;
2905 case 'g': ftst = OP_FTSGID; break;
2906 case 'k': ftst = OP_FTSVTX; break;
2907 case 'b': ftst = OP_FTBLK; break;
2908 case 'c': ftst = OP_FTCHR; break;
2909 case 't': ftst = OP_FTTTY; break;
2910 case 'T': ftst = OP_FTTEXT; break;
2911 case 'B': ftst = OP_FTBINARY; break;
2912 case 'M': case 'A': case 'C':
2913 gv_fetchpv("\024",TRUE, SVt_PV);
2914 switch (tmp) {
2915 case 'M': ftst = OP_FTMTIME; break;
2916 case 'A': ftst = OP_FTATIME; break;
2917 case 'C': ftst = OP_FTCTIME; break;
2918 default: break;
2919 }
2920 break;
378cc40b 2921 default:
378cc40b
LW
2922 break;
2923 }
e5edeb50
JH
2924 if (ftst) {
2925 PL_last_lop_op = ftst;
4e553d73 2926 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2927 "### Saw file test %c\n", (int)ftst);
5f80b19c 2928 } );
e5edeb50
JH
2929 FTST(ftst);
2930 }
2931 else {
2932 /* Assume it was a minus followed by a one-letter named
2933 * subroutine call (or a -bareword), then. */
95c31fe3 2934 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848
RB
2935 "### %c looked like a file test but was not\n",
2936 (int)ftst);
5f80b19c 2937 } );
e5edeb50
JH
2938 s -= 2;
2939 }
378cc40b 2940 }
a687059c
LW
2941 tmp = *s++;
2942 if (*s == tmp) {
2943 s++;
3280af22 2944 if (PL_expect == XOPERATOR)
79072805
LW
2945 TERM(POSTDEC);
2946 else
2947 OPERATOR(PREDEC);
2948 }
2949 else if (*s == '>') {
2950 s++;
2951 s = skipspace(s);
7e2040f0 2952 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 2953 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 2954 TOKEN(ARROW);
79072805 2955 }
748a9306
LW
2956 else if (*s == '$')
2957 OPERATOR(ARROW);
463ee0b2 2958 else
748a9306 2959 TERM(ARROW);
a687059c 2960 }
3280af22 2961 if (PL_expect == XOPERATOR)
79072805
LW
2962 Aop(OP_SUBTRACT);
2963 else {
3280af22 2964 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2965 check_uni();
79072805 2966 OPERATOR('-'); /* unary minus */
2f3197b3 2967 }
79072805 2968
378cc40b 2969 case '+':
a687059c
LW
2970 tmp = *s++;
2971 if (*s == tmp) {
378cc40b 2972 s++;
3280af22 2973 if (PL_expect == XOPERATOR)
79072805
LW
2974 TERM(POSTINC);
2975 else
2976 OPERATOR(PREINC);
378cc40b 2977 }
3280af22 2978 if (PL_expect == XOPERATOR)
79072805
LW
2979 Aop(OP_ADD);
2980 else {
3280af22 2981 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 2982 check_uni();
a687059c 2983 OPERATOR('+');
2f3197b3 2984 }
a687059c 2985
378cc40b 2986 case '*':
3280af22
NIS
2987 if (PL_expect != XOPERATOR) {
2988 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
2989 PL_expect = XOPERATOR;
2990 force_ident(PL_tokenbuf, '*');
2991 if (!*PL_tokenbuf)
a0d0e21e 2992 PREREF('*');
79072805 2993 TERM('*');
a687059c 2994 }
79072805
LW
2995 s++;
2996 if (*s == '*') {
a687059c 2997 s++;
79072805 2998 PWop(OP_POW);
a687059c 2999 }
79072805
LW
3000 Mop(OP_MULTIPLY);
3001
378cc40b 3002 case '%':
3280af22 3003 if (PL_expect == XOPERATOR) {
bbce6d69 3004 ++s;
3005 Mop(OP_MODULO);
a687059c 3006 }
3280af22
NIS
3007 PL_tokenbuf[0] = '%';
3008 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3009 if (!PL_tokenbuf[1]) {
3010 if (s == PL_bufend)
bbce6d69 3011 yyerror("Final % should be \\% or %name");
3012 PREREF('%');
a687059c 3013 }
3280af22 3014 PL_pending_ident = '%';
bbce6d69 3015 TERM('%');
a687059c 3016
378cc40b 3017 case '^':
79072805 3018 s++;
a0d0e21e 3019 BOop(OP_BIT_XOR);
79072805 3020 case '[':
3280af22 3021 PL_lex_brackets++;
79072805 3022 /* FALL THROUGH */
378cc40b 3023 case '~':
378cc40b 3024 case ',':
378cc40b
LW
3025 tmp = *s++;
3026 OPERATOR(tmp);
a0d0e21e
LW
3027 case ':':
3028 if (s[1] == ':') {
3029 len = 0;
3030 goto just_a_word;
3031 }
3032 s++;
09bef843
SB
3033 switch (PL_expect) {
3034 OP *attrs;
3035 case XOPERATOR:
3036 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3037 break;
3038 PL_bufptr = s; /* update in case we back off */
3039 goto grabattrs;
3040 case XATTRBLOCK:
3041 PL_expect = XBLOCK;
3042 goto grabattrs;
3043 case XATTRTERM:
3044 PL_expect = XTERMBLOCK;
3045 grabattrs:
3046 s = skipspace(s);
3047 attrs = Nullop;
7e2040f0 3048 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3049 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3050 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3051 if (tmp < 0) tmp = -tmp;
3052 switch (tmp) {
3053 case KEY_or:
3054 case KEY_and:
3055 case KEY_for:
3056 case KEY_unless:
3057 case KEY_if:
3058 case KEY_while:
3059 case KEY_until:
3060 goto got_attrs;
3061 default:
3062 break;
3063 }
3064 }
09bef843
SB
3065 if (*d == '(') {
3066 d = scan_str(d,TRUE,TRUE);
3067 if (!d) {
09bef843
SB
3068 /* MUST advance bufptr here to avoid bogus
3069 "at end of line" context messages from yyerror().
3070 */
3071 PL_bufptr = s + len;
3072 yyerror("Unterminated attribute parameter in attribute list");
3073 if (attrs)
3074 op_free(attrs);
3075 return 0; /* EOF indicator */
3076 }
3077 }
3078 if (PL_lex_stuff) {
3079 SV *sv = newSVpvn(s, len);
3080 sv_catsv(sv, PL_lex_stuff);
3081 attrs = append_elem(OP_LIST, attrs,
3082 newSVOP(OP_CONST, 0, sv));
3083 SvREFCNT_dec(PL_lex_stuff);
3084 PL_lex_stuff = Nullsv;
3085 }
3086 else {
78f9721b
SM
3087 if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
3088 CvLVALUE_on(PL_compcv);
3089 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3090 CvLOCKED_on(PL_compcv);
3091 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3092 CvMETHOD_on(PL_compcv);
87ecf892
DM
3093#ifdef USE_ITHREADS
3094 else if (PL_in_my == KEY_our && len == 6 && strnEQ(s, "shared", len))
3095 GvSHARED_on(cGVOPx_gv(yylval.opval));
3096#endif
78f9721b
SM
3097 /* After we've set the flags, it could be argued that
3098 we don't need to do the attributes.pm-based setting
3099 process, and shouldn't bother appending recognized
3100 flags. To experiment with that, uncomment the
3101 following "else": */
0256094b 3102 else
78f9721b
SM
3103 attrs = append_elem(OP_LIST, attrs,
3104 newSVOP(OP_CONST, 0,
3105 newSVpvn(s, len)));
09bef843
SB
3106 }
3107 s = skipspace(d);
0120eecf 3108 if (*s == ':' && s[1] != ':')
09bef843 3109 s = skipspace(s+1);
0120eecf
GS
3110 else if (s == d)
3111 break; /* require real whitespace or :'s */
09bef843 3112 }
f9829d6b
GS
3113 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3114 if (*s != ';' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3115 char q = ((*s == '\'') ? '"' : '\'');
3116 /* If here for an expression, and parsed no attrs, back off. */
3117 if (tmp == '=' && !attrs) {
3118 s = PL_bufptr;
3119 break;
3120 }
3121 /* MUST advance bufptr here to avoid bogus "at end of line"
3122 context messages from yyerror().
3123 */
3124 PL_bufptr = s;
3125 if (!*s)
3126 yyerror("Unterminated attribute list");
3127 else
3128 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3129 q, *s, q));
3130 if (attrs)
3131 op_free(attrs);
3132 OPERATOR(':');
3133 }
f9829d6b 3134 got_attrs:
09bef843
SB
3135 if (attrs) {
3136 PL_nextval[PL_nexttoke].opval = attrs;
3137 force_next(THING);
3138 }
3139 TOKEN(COLONATTR);
3140 }
a0d0e21e 3141 OPERATOR(':');
8990e307
LW
3142 case '(':
3143 s++;
3280af22
NIS
3144 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3145 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3146 else
3280af22 3147 PL_expect = XTERM;
a0d0e21e 3148 TOKEN('(');
378cc40b 3149 case ';':
f4dd75d9 3150 CLINE;
378cc40b
LW
3151 tmp = *s++;
3152 OPERATOR(tmp);
3153 case ')':
378cc40b 3154 tmp = *s++;
16d20bd9
AD
3155 s = skipspace(s);
3156 if (*s == '{')
3157 PREBLOCK(tmp);
378cc40b 3158 TERM(tmp);
79072805
LW
3159 case ']':
3160 s++;
3280af22 3161 if (PL_lex_brackets <= 0)
d98d5fff 3162 yyerror("Unmatched right square bracket");
463ee0b2 3163 else
3280af22
NIS
3164 --PL_lex_brackets;
3165 if (PL_lex_state == LEX_INTERPNORMAL) {
3166 if (PL_lex_brackets == 0) {
a0d0e21e 3167 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3168 PL_lex_state = LEX_INTERPEND;
79072805
LW
3169 }
3170 }
4633a7c4 3171 TERM(']');
79072805
LW
3172 case '{':
3173 leftbracket:
79072805 3174 s++;
3280af22
NIS
3175 if (PL_lex_brackets > 100) {
3176 char* newlb = Renew(PL_lex_brackstack, PL_lex_brackets + 1, char);
3177 if (newlb != PL_lex_brackstack) {
8990e307 3178 SAVEFREEPV(newlb);
3280af22 3179 PL_lex_brackstack = newlb;
8990e307
LW
3180 }
3181 }
3280af22 3182 switch (PL_expect) {
a0d0e21e 3183 case XTERM:
3280af22 3184 if (PL_lex_formbrack) {
a0d0e21e
LW
3185 s--;
3186 PRETERMBLOCK(DO);
3187 }
3280af22
NIS
3188 if (PL_oldoldbufptr == PL_last_lop)
3189 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3190 else
3280af22 3191 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3192 OPERATOR(HASHBRACK);
a0d0e21e 3193 case XOPERATOR:
bf4acbe4 3194 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3195 s++;
44a8e56a 3196 d = s;
3280af22
NIS
3197 PL_tokenbuf[0] = '\0';
3198 if (d < PL_bufend && *d == '-') {
3199 PL_tokenbuf[0] = '-';
44a8e56a 3200 d++;
bf4acbe4 3201 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3202 d++;
3203 }
7e2040f0 3204 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3205 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3206 FALSE, &len);
bf4acbe4 3207 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3208 d++;
3209 if (*d == '}') {
3280af22 3210 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3211 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3212 if (minus)
3213 force_next('-');
748a9306
LW
3214 }
3215 }
3216 /* FALL THROUGH */
09bef843 3217 case XATTRBLOCK:
748a9306 3218 case XBLOCK:
3280af22
NIS
3219 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3220 PL_expect = XSTATE;
a0d0e21e 3221 break;
09bef843 3222 case XATTRTERM:
a0d0e21e 3223 case XTERMBLOCK:
3280af22
NIS
3224 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3225 PL_expect = XSTATE;
a0d0e21e
LW
3226 break;
3227 default: {
3228 char *t;
3280af22
NIS
3229 if (PL_oldoldbufptr == PL_last_lop)
3230 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3231 else
3280af22 3232 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3233 s = skipspace(s);
8452ff4b
SB
3234 if (*s == '}') {
3235 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3236 PL_expect = XTERM;
3237 /* This hack is to get the ${} in the message. */
3238 PL_bufptr = s+1;
3239 yyerror("syntax error");
3240 break;
3241 }
a0d0e21e 3242 OPERATOR(HASHBRACK);
8452ff4b 3243 }
b8a4b1be
GS
3244 /* This hack serves to disambiguate a pair of curlies
3245 * as being a block or an anon hash. Normally, expectation
3246 * determines that, but in cases where we're not in a
3247 * position to expect anything in particular (like inside
3248 * eval"") we have to resolve the ambiguity. This code
3249 * covers the case where the first term in the curlies is a
3250 * quoted string. Most other cases need to be explicitly
3251 * disambiguated by prepending a `+' before the opening
3252 * curly in order to force resolution as an anon hash.
3253 *
3254 * XXX should probably propagate the outer expectation
3255 * into eval"" to rely less on this hack, but that could
3256 * potentially break current behavior of eval"".
3257 * GSAR 97-07-21
3258 */
3259 t = s;
3260 if (*s == '\'' || *s == '"' || *s == '`') {
3261 /* common case: get past first string, handling escapes */
3280af22 3262 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3263 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3264 t++;
3265 t++;
a0d0e21e 3266 }
b8a4b1be 3267 else if (*s == 'q') {
3280af22 3268 if (++t < PL_bufend
b8a4b1be 3269 && (!isALNUM(*t)
3280af22 3270 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3271 && !isALNUM(*t))))
3272 {
b8a4b1be
GS
3273 char *tmps;
3274 char open, close, term;
3275 I32 brackets = 1;
3276
3280af22 3277 while (t < PL_bufend && isSPACE(*t))
b8a4b1be
GS
3278 t++;
3279 term = *t;
3280 open = term;
3281 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3282 term = tmps[5];
3283 close = term;
3284 if (open == close)
3280af22
NIS
3285 for (t++; t < PL_bufend; t++) {
3286 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3287 t++;
6d07e5e9 3288 else if (*t == open)
b8a4b1be
GS
3289 break;
3290 }
3291 else
3280af22
NIS
3292 for (t++; t < PL_bufend; t++) {
3293 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3294 t++;
6d07e5e9 3295 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3296 break;
3297 else if (*t == open)
3298 brackets++;
3299 }
3300 }
3301 t++;
a0d0e21e 3302 }
7e2040f0 3303 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3304 t += UTF8SKIP(t);
7e2040f0 3305 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3306 t += UTF8SKIP(t);
a0d0e21e 3307 }
3280af22 3308 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3309 t++;
b8a4b1be
GS
3310 /* if comma follows first term, call it an anon hash */
3311 /* XXX it could be a comma expression with loop modifiers */
3280af22 3312 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3313 || (*t == '=' && t[1] == '>')))
a0d0e21e 3314 OPERATOR(HASHBRACK);
3280af22 3315 if (PL_expect == XREF)
4e4e412b 3316 PL_expect = XTERM;
a0d0e21e 3317 else {
3280af22
NIS
3318 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3319 PL_expect = XSTATE;
a0d0e21e 3320 }
8990e307 3321 }
a0d0e21e 3322 break;
463ee0b2 3323 }
57843af0 3324 yylval.ival = CopLINE(PL_curcop);
79072805 3325 if (isSPACE(*s) || *s == '#')
3280af22 3326 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3327 TOKEN('{');
378cc40b 3328 case '}':
79072805
LW
3329 rightbracket:
3330 s++;
3280af22 3331 if (PL_lex_brackets <= 0)
d98d5fff 3332 yyerror("Unmatched right curly bracket");
463ee0b2 3333 else
3280af22 3334 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3335 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3336 PL_lex_formbrack = 0;
3337 if (PL_lex_state == LEX_INTERPNORMAL) {
3338 if (PL_lex_brackets == 0) {
9059aa12
LW
3339 if (PL_expect & XFAKEBRACK) {
3340 PL_expect &= XENUMMASK;
3280af22
NIS
3341 PL_lex_state = LEX_INTERPEND;
3342 PL_bufptr = s;
cea2e8a9 3343 return yylex(); /* ignore fake brackets */
79072805 3344 }
fa83b5b6 3345 if (*s == '-' && s[1] == '>')
3280af22 3346 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3347 else if (*s != '[' && *s != '{')
3280af22 3348 PL_lex_state = LEX_INTERPEND;
79072805
LW
3349 }
3350 }
9059aa12
LW
3351 if (PL_expect & XFAKEBRACK) {
3352 PL_expect &= XENUMMASK;
3280af22 3353 PL_bufptr = s;
cea2e8a9 3354 return yylex(); /* ignore fake brackets */
748a9306 3355 }
79072805
LW
3356 force_next('}');
3357 TOKEN(';');
378cc40b
LW
3358 case '&':
3359 s++;
3360 tmp = *s++;
3361 if (tmp == '&')
a0d0e21e 3362 AOPERATOR(ANDAND);
378cc40b 3363 s--;
3280af22 3364 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3365 if (ckWARN(WARN_SEMICOLON)
3366 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3367 {
57843af0 3368 CopLINE_dec(PL_curcop);
cea2e8a9 3369 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3370 CopLINE_inc(PL_curcop);
463ee0b2 3371 }
79072805 3372 BAop(OP_BIT_AND);
463ee0b2 3373 }
79072805 3374
3280af22
NIS
3375 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3376 if (*PL_tokenbuf) {
3377 PL_expect = XOPERATOR;
3378 force_ident(PL_tokenbuf, '&');
463ee0b2 3379 }
79072805
LW
3380 else
3381 PREREF('&');
c07a80fd 3382 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3383 TERM('&');
3384
378cc40b
LW
3385 case '|':
3386 s++;
3387 tmp = *s++;
3388 if (tmp == '|')
a0d0e21e 3389 AOPERATOR(OROR);
378cc40b 3390 s--;
79072805 3391 BOop(OP_BIT_OR);
378cc40b
LW
3392 case '=':
3393 s++;
3394 tmp = *s++;
3395 if (tmp == '=')
79072805
LW
3396 Eop(OP_EQ);
3397 if (tmp == '>')
3398 OPERATOR(',');
378cc40b 3399 if (tmp == '~')
79072805 3400 PMop(OP_MATCH);
599cee73 3401 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
cea2e8a9 3402 Perl_warner(aTHX_ WARN_SYNTAX, "Reversed %c= operator",(int)tmp);
378cc40b 3403 s--;
3280af22
NIS
3404 if (PL_expect == XSTATE && isALPHA(tmp) &&
3405 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3406 {
3280af22
NIS
3407 if (PL_in_eval && !PL_rsfp) {
3408 d = PL_bufend;
a5f75d66
AD
3409 while (s < d) {
3410 if (*s++ == '\n') {
3411 incline(s);
3412 if (strnEQ(s,"=cut",4)) {
3413 s = strchr(s,'\n');
3414 if (s)
3415 s++;
3416 else
3417 s = d;
3418 incline(s);
3419 goto retry;
3420 }
3421 }
3422 }
3423 goto retry;
3424 }
3280af22
NIS
3425 s = PL_bufend;
3426 PL_doextract = TRUE;
a0d0e21e
LW
3427 goto retry;
3428 }
3280af22 3429 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3430 char *t;
51882d45 3431#ifdef PERL_STRICT_CR
bf4acbe4 3432 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3433#else
bf4acbe4 3434 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3435#endif
a0d0e21e
LW
3436 if (*t == '\n' || *t == '#') {
3437 s--;
3280af22 3438 PL_expect = XBLOCK;
a0d0e21e
LW
3439 goto leftbracket;
3440 }
79072805 3441 }
a0d0e21e
LW
3442 yylval.ival = 0;
3443 OPERATOR(ASSIGNOP);
378cc40b
LW
3444 case '!':
3445 s++;
3446 tmp = *s++;
3447 if (tmp == '=')
79072805 3448 Eop(OP_NE);
378cc40b 3449 if (tmp == '~')
79072805 3450 PMop(OP_NOT);
378cc40b
LW
3451 s--;
3452 OPERATOR('!');
3453 case '<':
3280af22 3454 if (PL_expect != XOPERATOR) {
93a17b20 3455 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3456 check_uni();
79072805
LW
3457 if (s[1] == '<')
3458 s = scan_heredoc(s);
3459 else
3460 s = scan_inputsymbol(s);
3461 TERM(sublex_start());
378cc40b
LW
3462 }
3463 s++;
3464 tmp = *s++;
3465 if (tmp == '<')
79072805 3466 SHop(OP_LEFT_SHIFT);
395c3793
LW
3467 if (tmp == '=') {
3468 tmp = *s++;
3469 if (tmp == '>')
79072805 3470 Eop(OP_NCMP);
395c3793 3471 s--;
79072805 3472 Rop(OP_LE);
395c3793 3473 }
378cc40b 3474 s--;
79072805 3475 Rop(OP_LT);
378cc40b
LW
3476 case '>':
3477 s++;
3478 tmp = *s++;
3479 if (tmp == '>')
79072805 3480 SHop(OP_RIGHT_SHIFT);
378cc40b 3481 if (tmp == '=')
79072805 3482 Rop(OP_GE);
378cc40b 3483 s--;
79072805 3484 Rop(OP_GT);
378cc40b
LW
3485
3486 case '$':
bbce6d69 3487 CLINE;
3488
3280af22
NIS
3489 if (PL_expect == XOPERATOR) {
3490 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3491 PL_expect = XTERM;
a0d0e21e 3492 depcom();
bbce6d69 3493 return ','; /* grandfather non-comma-format format */
a0d0e21e 3494 }
8990e307 3495 }
a0d0e21e 3496
7e2040f0 3497 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3498 PL_tokenbuf[0] = '@';
376b8730
SM
3499 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3500 sizeof PL_tokenbuf - 1, FALSE);
3501 if (PL_expect == XOPERATOR)
3502 no_op("Array length", s);
3280af22 3503 if (!PL_tokenbuf[1])
a0d0e21e 3504 PREREF(DOLSHARP);
3280af22
NIS
3505 PL_expect = XOPERATOR;
3506 PL_pending_ident = '#';
463ee0b2 3507 TOKEN(DOLSHARP);
79072805 3508 }
bbce6d69 3509
3280af22 3510 PL_tokenbuf[0] = '$';
376b8730
SM
3511 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3512 sizeof PL_tokenbuf - 1, FALSE);
3513 if (PL_expect == XOPERATOR)
3514 no_op("Scalar", s);
3280af22
NIS
3515 if (!PL_tokenbuf[1]) {
3516 if (s == PL_bufend)
bbce6d69 3517 yyerror("Final $ should be \\$ or $name");
3518 PREREF('$');
8990e307 3519 }
a0d0e21e 3520
bbce6d69 3521 /* This kludge not intended to be bulletproof. */
3280af22 3522 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3523 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3524 newSViv(PL_compiling.cop_arybase));
bbce6d69 3525 yylval.opval->op_private = OPpCONST_ARYBASE;
3526 TERM(THING);
3527 }
3528
ff68c719 3529 d = s;
69d2bceb 3530 tmp = (I32)*s;
3280af22 3531 if (PL_lex_state == LEX_NORMAL)
ff68c719 3532 s = skipspace(s);
3533
3280af22 3534 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3535 char *t;
3536 if (*s == '[') {
3280af22 3537 PL_tokenbuf[0] = '@';
599cee73 3538 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3539 for(t = s + 1;
7e2040f0 3540 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3541 t++) ;
a0d0e21e 3542 if (*t++ == ',') {
3280af22
NIS
3543 PL_bufptr = skipspace(PL_bufptr);
3544 while (t < PL_bufend && *t != ']')
bbce6d69 3545 t++;
cea2e8a9 3546 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73
PM
3547 "Multidimensional syntax %.*s not supported",
3548 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3549 }
3550 }
bbce6d69 3551 }
3552 else if (*s == '{') {
3280af22 3553 PL_tokenbuf[0] = '%';
599cee73 3554 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3555 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3556 {
3280af22 3557 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3558 STRLEN len;
3559 for (t++; isSPACE(*t); t++) ;
7e2040f0 3560 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3561 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3562 for (; isSPACE(*t); t++) ;
864dbfa3 3563 if (*t == ';' && get_cv(tmpbuf, FALSE))
cea2e8a9 3564 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3565 "You need to quote \"%s\"", tmpbuf);
748a9306 3566 }
93a17b20
LW
3567 }
3568 }
2f3197b3 3569 }
bbce6d69 3570
3280af22 3571 PL_expect = XOPERATOR;
69d2bceb 3572 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3573 bool islop = (PL_last_lop == PL_oldoldbufptr);
3574 if (!islop || PL_last_lop_op == OP_GREPSTART)
3575 PL_expect = XOPERATOR;
bbce6d69 3576 else if (strchr("$@\"'`q", *s))
3280af22 3577 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3578 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3579 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3580 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3581 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3582 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3583 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3584 /* binary operators exclude handle interpretations */
3585 switch (tmp) {
3586 case -KEY_x:
3587 case -KEY_eq:
3588 case -KEY_ne:
3589 case -KEY_gt:
3590 case -KEY_lt:
3591 case -KEY_ge:
3592 case -KEY_le:
3593 case -KEY_cmp:
3594 break;
3595 default:
3280af22 3596 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3597 break;
3598 }
3599 }
68dc0745 3600 else {
3601 GV *gv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
3602 if (gv && GvCVu(gv))
3280af22 3603 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3604 }
93a17b20 3605 }
bbce6d69 3606 else if (isDIGIT(*s))
3280af22 3607 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3608 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3609 PL_expect = XTERM; /* e.g. print $fh .3 */
e0587a03 3610 else if (strchr("/?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3280af22 3611 PL_expect = XTERM; /* e.g. print $fh -1 */
e0587a03 3612 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3613 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3614 }
3280af22 3615 PL_pending_ident = '$';
79072805 3616 TOKEN('$');
378cc40b
LW
3617
3618 case '@':
3280af22 3619 if (PL_expect == XOPERATOR)
bbce6d69 3620 no_op("Array", s);
3280af22
NIS
3621 PL_tokenbuf[0] = '@';
3622 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3623 if (!PL_tokenbuf[1]) {
3624 if (s == PL_bufend)
bbce6d69 3625 yyerror("Final @ should be \\@ or @name");
3626 PREREF('@');
3627 }
3280af22 3628 if (PL_lex_state == LEX_NORMAL)
ff68c719 3629 s = skipspace(s);
3280af22 3630 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3631 if (*s == '{')
3280af22 3632 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3633
3634 /* Warn about @ where they meant $. */
599cee73 3635 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3636 if (*s == '[' || *s == '{') {
3637 char *t = s + 1;
7e2040f0 3638 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3639 t++;
3640 if (*t == '}' || *t == ']') {
3641 t++;
3280af22 3642 PL_bufptr = skipspace(PL_bufptr);
cea2e8a9 3643 Perl_warner(aTHX_ WARN_SYNTAX,
599cee73 3644 "Scalar value %.*s better written as $%.*s",
3280af22 3645 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3646 }
93a17b20
LW
3647 }
3648 }
463ee0b2 3649 }
3280af22 3650 PL_pending_ident = '@';
79072805 3651 TERM('@');
378cc40b
LW
3652
3653 case '/': /* may either be division or pattern */
3654 case '?': /* may either be conditional or pattern */
3280af22 3655 if (PL_expect != XOPERATOR) {
c277df42 3656 /* Disable warning on "study /blah/" */
4e553d73
NIS
3657 if (PL_oldoldbufptr == PL_last_uni
3658 && (*PL_last_uni != 's' || s - PL_last_uni < 5
7e2040f0
GS
3659 || memNE(PL_last_uni, "study", 5)
3660 || isALNUM_lazy_if(PL_last_uni+5,UTF)))
c277df42 3661 check_uni();
8782bef2 3662 s = scan_pat(s,OP_MATCH);
79072805 3663 TERM(sublex_start());
378cc40b
LW
3664 }
3665 tmp = *s++;
a687059c 3666 if (tmp == '/')
79072805 3667 Mop(OP_DIVIDE);
378cc40b
LW
3668 OPERATOR(tmp);
3669
3670 case '.':
51882d45
GS
3671 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3672#ifdef PERL_STRICT_CR
3673 && s[1] == '\n'
3674#else
3675 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3676#endif
3677 && (s == PL_linestart || s[-1] == '\n') )
3678 {
3280af22
NIS
3679 PL_lex_formbrack = 0;
3680 PL_expect = XSTATE;
79072805
LW
3681 goto rightbracket;
3682 }
3280af22 3683 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3684 tmp = *s++;
a687059c
LW
3685 if (*s == tmp) {
3686 s++;
2f3197b3
LW
3687 if (*s == tmp) {
3688 s++;
79072805 3689 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3690 }
3691 else
79072805 3692 yylval.ival = 0;
378cc40b 3693 OPERATOR(DOTDOT);
a687059c 3694 }
3280af22 3695 if (PL_expect != XOPERATOR)
2f3197b3 3696 check_uni();
79072805 3697 Aop(OP_CONCAT);
378cc40b
LW
3698 }
3699 /* FALL THROUGH */
3700 case '0': case '1': case '2': case '3': case '4':
3701 case '5': case '6': case '7': case '8': case '9':
b73d6f50 3702 s = scan_num(s, &yylval);
4e553d73 3703 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3704 "### Saw number in '%s'\n", s);
5f80b19c 3705 } );
3280af22 3706 if (PL_expect == XOPERATOR)
8990e307 3707 no_op("Number",s);
79072805
LW
3708 TERM(THING);
3709
3710 case '\'':
09bef843 3711 s = scan_str(s,FALSE,FALSE);
4e553d73 3712 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3713 "### Saw string before '%s'\n", s);
5f80b19c 3714 } );
3280af22
NIS
3715 if (PL_expect == XOPERATOR) {
3716 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3717 PL_expect = XTERM;
a0d0e21e
LW
3718 depcom();
3719 return ','; /* grandfather non-comma-format format */
3720 }
463ee0b2 3721 else
8990e307 3722 no_op("String",s);
463ee0b2 3723 }
79072805 3724 if (!s)
85e6fe83 3725 missingterm((char*)0);
79072805
LW
3726 yylval.ival = OP_CONST;
3727 TERM(sublex_start());
3728
3729 case '"':
09bef843 3730 s = scan_str(s,FALSE,FALSE);
4e553d73 3731 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3732 "### Saw string before '%s'\n", s);
5f80b19c 3733 } );
3280af22
NIS
3734 if (PL_expect == XOPERATOR) {
3735 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3736 PL_expect = XTERM;
a0d0e21e
LW
3737 depcom();
3738 return ','; /* grandfather non-comma-format format */
3739 }
463ee0b2 3740 else
8990e307 3741 no_op("String",s);
463ee0b2 3742 }
79072805 3743 if (!s)
85e6fe83 3744 missingterm((char*)0);
4633a7c4 3745 yylval.ival = OP_CONST;
3280af22 3746 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 3747 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4633a7c4
LW
3748 yylval.ival = OP_STRINGIFY;
3749 break;
3750 }
3751 }
79072805
LW
3752 TERM(sublex_start());
3753
3754 case '`':
09bef843 3755 s = scan_str(s,FALSE,FALSE);
4e553d73 3756 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3757 "### Saw backtick string before '%s'\n", s);
5f80b19c 3758 } );
3280af22 3759 if (PL_expect == XOPERATOR)
8990e307 3760 no_op("Backticks",s);
79072805 3761 if (!s)
85e6fe83 3762 missingterm((char*)0);
79072805
LW
3763 yylval.ival = OP_BACKTICK;
3764 set_csh();
3765 TERM(sublex_start());
3766
3767 case '\\':
3768 s++;
599cee73 3769 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
cea2e8a9 3770 Perl_warner(aTHX_ WARN_SYNTAX,"Can't use \\%c to mean $%c in expression",
599cee73 3771 *s, *s);
3280af22 3772 if (PL_expect == XOPERATOR)
8990e307 3773 no_op("Backslash",s);
79072805
LW
3774 OPERATOR(REFGEN);
3775
a7cb1f99 3776 case 'v':
e526c9e6 3777 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
a7cb1f99
GS
3778 char *start = s;
3779 start++;
3780 start++;
dd629d5b 3781 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
3782 start++;
3783 if (*start == '.' && isDIGIT(start[1])) {
b73d6f50 3784 s = scan_num(s, &yylval);
a7cb1f99
GS
3785 TERM(THING);
3786 }
e526c9e6 3787 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
62ec34bd 3788 else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF || PL_expect == XSTATE)) {
e526c9e6
GS
3789 char c = *start;
3790 GV *gv;
3791 *start = '\0';
3792 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3793 *start = c;
3794 if (!gv) {
b73d6f50 3795 s = scan_num(s, &yylval);
e526c9e6
GS
3796 TERM(THING);
3797 }
3798 }
a7cb1f99
GS
3799 }
3800 goto keylookup;
79072805 3801 case 'x':
3280af22 3802 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3803 s++;
3804 Mop(OP_REPEAT);
2f3197b3 3805 }
79072805
LW
3806 goto keylookup;
3807
378cc40b 3808 case '_':
79072805
LW
3809 case 'a': case 'A':
3810 case 'b': case 'B':
3811 case 'c': case 'C':
3812 case 'd': case 'D':
3813 case 'e': case 'E':
3814 case 'f': case 'F':
3815 case 'g': case 'G':
3816 case 'h': case 'H':
3817 case 'i': case 'I':
3818 case 'j': case 'J':
3819 case 'k': case 'K':
3820 case 'l': case 'L':
3821 case 'm': case 'M':
3822 case 'n': case 'N':
3823 case 'o': case 'O':
3824 case 'p': case 'P':
3825 case 'q': case 'Q':
3826 case 'r': case 'R':
3827 case 's': case 'S':
3828 case 't': case 'T':
3829 case 'u': case 'U':
a7cb1f99 3830 case 'V':
79072805
LW
3831 case 'w': case 'W':
3832 case 'X':
3833 case 'y': case 'Y':
3834 case 'z': case 'Z':
3835
49dc05e3 3836 keylookup: {
161b471a
NIS
3837 gv = Nullgv;
3838 gvp = 0;
49dc05e3 3839
3280af22
NIS
3840 PL_bufptr = s;
3841 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3842
3843 /* Some keywords can be followed by any delimiter, including ':' */
155aba94
GS
3844 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3845 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3846 (PL_tokenbuf[0] == 'q' &&
3847 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 3848
3849 /* x::* is just a word, unless x is "CORE" */
3280af22 3850 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3851 goto just_a_word;
3852
3643fb5f 3853 d = s;
3280af22 3854 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3855 d++; /* no comments skipped here, or s### is misparsed */
3856
3857 /* Is this a label? */
3280af22
NIS
3858 if (!tmp && PL_expect == XSTATE
3859 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3860 s = d + 1;
3280af22 3861 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3862 CLINE;
3863 TOKEN(LABEL);
3643fb5f
CS
3864 }
3865
3866 /* Check for keywords */
3280af22 3867 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3868
3869 /* Is this a word before a => operator? */
1c3923b3 3870 if (*d == '=' && d[1] == '>') {
748a9306 3871 CLINE;
3280af22 3872 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306 3873 yylval.opval->op_private = OPpCONST_BARE;
0064a8a9 3874 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7948272d 3875 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
748a9306
LW
3876 TERM(WORD);
3877 }
3878
a0d0e21e 3879 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
3880 GV *ogv = Nullgv; /* override (winner) */
3881 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 3882 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 3883 CV *cv;
3280af22 3884 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
3885 (cv = GvCVu(gv)))
3886 {
3887 if (GvIMPORTED_CV(gv))
3888 ogv = gv;
3889 else if (! CvMETHOD(cv))
3890 hgv = gv;
3891 }
3892 if (!ogv &&
3280af22
NIS
3893 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
3894 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
3895 GvCVu(gv) && GvIMPORTED_CV(gv))
3896 {
3897 ogv = gv;
3898 }
3899 }
3900 if (ogv) {
3901 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
3902 }
3903 else if (gv && !gvp
3904 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 3905 && GvCVu(gv)
3280af22 3906 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
3907 {
3908 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 3909 }
56f7f34b
CS
3910 else { /* no override */
3911 tmp = -tmp;
3912 gv = Nullgv;
3913 gvp = 0;
4944e2f7
GS
3914 if (ckWARN(WARN_AMBIGUOUS) && hgv
3915 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
cea2e8a9 3916 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 3917 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 3918 GvENAME(hgv), "qualify as such or use &");
49dc05e3 3919 }
a0d0e21e
LW
3920 }
3921
3922 reserved_word:
3923 switch (tmp) {
79072805
LW
3924
3925 default: /* not a keyword */
93a17b20 3926 just_a_word: {
96e4d5b1 3927 SV *sv;
3280af22 3928 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
3929
3930 /* Get the rest if it looks like a package qualifier */
3931
155aba94 3932 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 3933 STRLEN morelen;
3280af22 3934 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
3935 TRUE, &morelen);
3936 if (!morelen)
cea2e8a9 3937 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 3938 *s == '\'' ? "'" : "::");
c3e0f903 3939 len += morelen;
a0d0e21e 3940 }
8990e307 3941
3280af22
NIS
3942 if (PL_expect == XOPERATOR) {
3943 if (PL_bufptr == PL_linestart) {
57843af0 3944 CopLINE_dec(PL_curcop);
cea2e8a9 3945 Perl_warner(aTHX_ WARN_SEMICOLON, PL_warn_nosemi);
57843af0 3946 CopLINE_inc(PL_curcop);
463ee0b2
LW
3947 }
3948 else
54310121 3949 no_op("Bareword",s);
463ee0b2 3950 }
8990e307 3951
c3e0f903
GS
3952 /* Look for a subroutine with this name in current package,
3953 unless name is "Foo::", in which case Foo is a bearword
3954 (and a package name). */
3955
3956 if (len > 2 &&
3280af22 3957 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 3958 {
e476b1b5 3959 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
4e553d73 3960 Perl_warner(aTHX_ WARN_BAREWORD,
599cee73 3961 "Bareword \"%s\" refers to nonexistent package",
3280af22 3962 PL_tokenbuf);
c3e0f903 3963 len -= 2;
3280af22 3964 PL_tokenbuf[len] = '\0';
c3e0f903
GS
3965 gv = Nullgv;
3966 gvp = 0;
3967 }
3968 else {
3969 len = 0;
3970 if (!gv)
3280af22 3971 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
c3e0f903
GS
3972 }
3973
3974 /* if we saw a global override before, get the right name */
8990e307 3975
49dc05e3 3976 if (gvp) {
79cb57f6 3977 sv = newSVpvn("CORE::GLOBAL::",14);
3280af22 3978 sv_catpv(sv,PL_tokenbuf);
49dc05e3
GS
3979 }
3980 else
3280af22 3981 sv = newSVpv(PL_tokenbuf,0);
8990e307 3982
a0d0e21e
LW
3983 /* Presume this is going to be a bareword of some sort. */
3984
3985 CLINE;
49dc05e3 3986 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
a0d0e21e
LW
3987 yylval.opval->op_private = OPpCONST_BARE;
3988
c3e0f903
GS
3989 /* And if "Foo::", then that's what it certainly is. */
3990
3991 if (len)
3992 goto safe_bareword;
3993
8990e307
LW
3994 /* See if it's the indirect object for a list operator. */
3995
3280af22
NIS
3996 if (PL_oldoldbufptr &&
3997 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
3998 (PL_oldoldbufptr == PL_last_lop
3999 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 4000 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
4001 (PL_expect == XREF ||
4002 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 4003 {
748a9306
LW
4004 bool immediate_paren = *s == '(';
4005
a0d0e21e
LW
4006 /* (Now we can afford to cross potential line boundary.) */
4007 s = skipspace(s);
4008
4009 /* Two barewords in a row may indicate method call. */
4010
7e2040f0 4011 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
a0d0e21e
LW
4012 return tmp;
4013
4014 /* If not a declared subroutine, it's an indirect object. */
4015 /* (But it's an indir obj regardless for sort.) */
4016
7948272d 4017 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f0670693 4018 ((!gv || !GvCVu(gv)) &&
a9ef352a 4019 (PL_last_lop_op != OP_MAPSTART &&
f0670693 4020 PL_last_lop_op != OP_GREPSTART))))
a9ef352a 4021 {
3280af22 4022 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 4023 goto bareword;
93a17b20
LW
4024 }
4025 }
8990e307 4026
8990e307 4027
3280af22 4028 PL_expect = XOPERATOR;
8990e307 4029 s = skipspace(s);
1c3923b3
GS
4030
4031 /* Is this a word before a => operator? */
4032 if (*s == '=' && s[1] == '>') {
4033 CLINE;
4034 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 4035 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7948272d 4036 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
1c3923b3
GS
4037 TERM(WORD);
4038 }
4039
4040 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 4041 if (*s == '(') {
79072805 4042 CLINE;
96e4d5b1 4043 if (gv && GvCVu(gv)) {
bf4acbe4 4044 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
7a52d87a 4045 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
96e4d5b1 4046 s = d + 1;
4047 goto its_constant;
4048 }
4049 }
3280af22
NIS
4050 PL_nextval[PL_nexttoke].opval = yylval.opval;
4051 PL_expect = XOPERATOR;
93a17b20 4052 force_next(WORD);
c07a80fd 4053 yylval.ival = 0;
463ee0b2 4054 TOKEN('&');
79072805 4055 }
93a17b20 4056
a0d0e21e 4057 /* If followed by var or block, call it a method (unless sub) */
8990e307 4058
8ebc5c01 4059 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
3280af22
NIS
4060 PL_last_lop = PL_oldbufptr;
4061 PL_last_lop_op = OP_METHOD;
93a17b20 4062 PREBLOCK(METHOD);
463ee0b2
LW
4063 }
4064
8990e307
LW
4065 /* If followed by a bareword, see if it looks like indir obj. */
4066
7e2040f0 4067 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp = intuit_method(s,gv)))
a0d0e21e 4068 return tmp;
93a17b20 4069
8990e307
LW
4070 /* Not a method, so call it a subroutine (if defined) */
4071
8ebc5c01 4072 if (gv && GvCVu(gv)) {
46fc3d4c 4073 CV* cv;
0453d815
PM
4074 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
4075 Perl_warner(aTHX_ WARN_AMBIGUOUS,
4076 "Ambiguous use of -%s resolved as -&%s()",
3280af22 4077 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 4078 /* Check for a constant sub */
46fc3d4c 4079 cv = GvCV(gv);
96e4d5b1 4080 if ((sv = cv_const_sv(cv))) {
4081 its_constant:
4082 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4083 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4084 yylval.opval->op_private = 0;
4085 TOKEN(WORD);
89bfa8cd 4086 }
4087
a5f75d66
AD
4088 /* Resolve to GV now. */
4089 op_free(yylval.opval);
4090 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
9675f7ac 4091 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 4092 PL_last_lop = PL_oldbufptr;
bf848113 4093 PL_last_lop_op = OP_ENTERSUB;
4633a7c4
LW
4094 /* Is there a prototype? */
4095 if (SvPOK(cv)) {
4096 STRLEN len;
7a52d87a 4097 char *proto = SvPV((SV*)cv, len);
4633a7c4
LW
4098 if (!len)
4099 TERM(FUNC0SUB);
7a52d87a 4100 if (strEQ(proto, "$"))
4633a7c4 4101 OPERATOR(UNIOPSUB);
7a52d87a 4102 if (*proto == '&' && *s == '{') {
3280af22 4103 sv_setpv(PL_subname,"__ANON__");
4633a7c4
LW
4104 PREBLOCK(LSTOPSUB);
4105 }
a9ef352a 4106 }
3280af22
NIS
4107 PL_nextval[PL_nexttoke].opval = yylval.opval;
4108 PL_expect = XTERM;
8990e307
LW
4109 force_next(WORD);
4110 TOKEN(NOAMP);
4111 }
748a9306 4112
8990e307
LW
4113 /* Call it a bare word */
4114
5603f27d
GS
4115 if (PL_hints & HINT_STRICT_SUBS)
4116 yylval.opval->op_private |= OPpCONST_STRICT;
4117 else {
4118 bareword:
4119 if (ckWARN(WARN_RESERVED)) {
4120 if (lastchar != '-') {
4121 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
4122 if (!*d)
cea2e8a9 4123 Perl_warner(aTHX_ WARN_RESERVED, PL_warn_reserved,
5603f27d
GS
4124 PL_tokenbuf);
4125 }
748a9306
LW
4126 }
4127 }
c3e0f903
GS
4128
4129 safe_bareword:
f248d071 4130 if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
0453d815
PM
4131 Perl_warner(aTHX_ WARN_AMBIGUOUS,
4132 "Operator or semicolon missing before %c%s",
3280af22 4133 lastchar, PL_tokenbuf);
0453d815
PM
4134 Perl_warner(aTHX_ WARN_AMBIGUOUS,
4135 "Ambiguous use of %c resolved as operator %c",
748a9306
LW
4136 lastchar, lastchar);
4137 }
93a17b20 4138 TOKEN(WORD);
79072805 4139 }
79072805 4140
68dc0745 4141 case KEY___FILE__:
46fc3d4c 4142 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 4143 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 4144 TERM(THING);
4145
79072805 4146 case KEY___LINE__:
cf2093f6 4147 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 4148 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 4149 TERM(THING);
68dc0745 4150
4151 case KEY___PACKAGE__:
4152 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22
NIS
4153 (PL_curstash
4154 ? newSVsv(PL_curstname)
4155 : &PL_sv_undef));
79072805 4156 TERM(THING);
79072805 4157
e50aee73 4158 case KEY___DATA__:
79072805
LW
4159 case KEY___END__: {
4160 GV *gv;
79072805
LW
4161
4162 /*SUPPRESS 560*/
3280af22 4163 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
e50aee73 4164 char *pname = "main";
3280af22
NIS
4165 if (PL_tokenbuf[2] == 'D')
4166 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
cea2e8a9 4167 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
a5f75d66 4168 GvMULTI_on(gv);
79072805 4169 if (!GvIO(gv))
a0d0e21e 4170 GvIOp(gv) = newIO();
3280af22 4171 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
4172#if defined(HAS_FCNTL) && defined(F_SETFD)
4173 {
3280af22 4174 int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
4175 fcntl(fd,F_SETFD,fd >= 3);
4176 }
79072805 4177#endif
fd049845 4178 /* Mark this internal pseudo-handle as clean */
4179 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
3280af22 4180 if (PL_preprocess)
50952442 4181 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
3280af22 4182 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 4183 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 4184 else
50952442 4185 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
4186#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4187 /* if the script was opened in binmode, we need to revert
53129d29 4188 * it to text mode for compatibility; but only iff it has CRs
c39cd008 4189 * XXX this is a questionable hack at best. */
53129d29
GS
4190 if (PL_bufend-PL_bufptr > 2
4191 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
4192 {
4193 Off_t loc = 0;
50952442 4194 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
4195 loc = PerlIO_tell(PL_rsfp);
4196 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4197 }
4198 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
1143fce0
JH
4199#ifdef PERLIO_IS_STDIO /* really? */
4200# if defined(__BORLANDC__)
cb359b41
JH
4201 /* XXX see note in do_binmode() */
4202 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
4203# endif
4204#endif
c39cd008
GS
4205 if (loc > 0)
4206 PerlIO_seek(PL_rsfp, loc, 0);
4207 }
4208 }
4209#endif
7948272d 4210#ifdef PERLIO_LAYERS
0064a8a9 4211 if (UTF && !IN_BYTES)
7948272d
NIS
4212 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4213#endif
3280af22 4214 PL_rsfp = Nullfp;
79072805
LW
4215 }
4216 goto fake_eof;
e929a76b 4217 }
de3bb511 4218
8990e307 4219 case KEY_AUTOLOAD:
ed6116ce 4220 case KEY_DESTROY:
79072805 4221 case KEY_BEGIN:
7d30b5c4 4222 case KEY_CHECK:
7d07dbc2 4223 case KEY_INIT:
7d30b5c4 4224 case KEY_END:
3280af22
NIS
4225 if (PL_expect == XSTATE) {
4226 s = PL_bufptr;
93a17b20 4227 goto really_sub;
79072805
LW
4228 }
4229 goto just_a_word;
4230
a0d0e21e
LW
4231 case KEY_CORE:
4232 if (*s == ':' && s[1] == ':') {
4233 s += 2;
748a9306 4234 d = s;
3280af22 4235 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6798c92b
GS
4236 if (!(tmp = keyword(PL_tokenbuf, len)))
4237 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
4238 if (tmp < 0)
4239 tmp = -tmp;
4240 goto reserved_word;
4241 }
4242 goto just_a_word;
4243
463ee0b2
LW
4244 case KEY_abs:
4245 UNI(OP_ABS);
4246
79072805
LW
4247 case KEY_alarm:
4248 UNI(OP_ALARM);
4249
4250 case KEY_accept:
a0d0e21e 4251 LOP(OP_ACCEPT,XTERM);
79072805 4252
463ee0b2
LW
4253 case KEY_and:
4254 OPERATOR(ANDOP);
4255
79072805 4256 case KEY_atan2:
a0d0e21e 4257 LOP(OP_ATAN2,XTERM);
85e6fe83 4258
79072805 4259 case KEY_bind:
a0d0e21e 4260 LOP(OP_BIND,XTERM);
79072805
LW
4261
4262 case KEY_binmode:
1c1fc3ea 4263 LOP(OP_BINMODE,XTERM);
79072805
LW
4264
4265 case KEY_bless:
a0d0e21e 4266 LOP(OP_BLESS,XTERM);
79072805
LW
4267
4268 case KEY_chop:
4269 UNI(OP_CHOP);
4270
4271 case KEY_continue:
4272 PREBLOCK(CONTINUE);
4273
4274 case KEY_chdir:
85e6fe83 4275 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
79072805
LW
4276 UNI(OP_CHDIR);
4277
4278 case KEY_close:
4279 UNI(OP_CLOSE);
4280
4281 case KEY_closedir:
4282 UNI(OP_CLOSEDIR);
4283
4284 case KEY_cmp:
4285 Eop(OP_SCMP);
4286
4287 case KEY_caller:
4288 UNI(OP_CALLER);
4289
4290 case KEY_crypt:
4291#ifdef FCRYPT
f4c556ac
GS
4292 if (!PL_cryptseen) {
4293 PL_cryptseen = TRUE;
de3bb511 4294 init_des();
f4c556ac 4295 }
a687059c 4296#endif
a0d0e21e 4297 LOP(OP_CRYPT,XTERM);
79072805
LW
4298
4299 case KEY_chmod:
e476b1b5 4300 if (ckWARN(WARN_CHMOD)) {
3280af22 4301 for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ;
748a9306 4302 if (*d != '0' && isDIGIT(*d))
e476b1b5 4303 Perl_warner(aTHX_ WARN_CHMOD,
5a211162 4304 "chmod() mode argument is missing initial 0");
748a9306 4305 }
a0d0e21e 4306 LOP(OP_CHMOD,XTERM);
79072805
LW
4307
4308 case KEY_chown:
a0d0e21e 4309 LOP(OP_CHOWN,XTERM);
79072805
LW
4310
4311 case KEY_connect:
a0d0e21e 4312 LOP(OP_CONNECT,XTERM);
79072805 4313
463ee0b2
LW
4314 case KEY_chr:
4315 UNI(OP_CHR);
4316
79072805
LW
4317 case KEY_cos:
4318 UNI(OP_COS);
4319
4320 case KEY_chroot:
4321 UNI(OP_CHROOT);
4322
4323 case KEY_do:
4324 s = skipspace(s);
4325 if (*s == '{')
a0d0e21e 4326 PRETERMBLOCK(DO);
79072805 4327 if (*s != '\'')
a0d0e21e 4328 s = force_word(s,WORD,FALSE,TRUE,FALSE);
378cc40b 4329 OPERATOR(DO);
79072805
LW
4330
4331 case KEY_die:
3280af22 4332 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 4333 LOP(OP_DIE,XTERM);
79072805
LW
4334
4335 case KEY_defined:
4336 UNI(OP_DEFINED);
4337
4338 case KEY_delete:
a0d0e21e 4339 UNI(OP_DELETE);
79072805
LW
4340
4341 case KEY_dbmopen:
a0d0e21e
LW
4342 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4343 LOP(OP_DBMOPEN,XTERM);
79072805
LW
4344
4345 case KEY_dbmclose:
4346 UNI(OP_DBMCLOSE);
4347
4348 case KEY_dump:
a0d0e21e 4349 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4350 LOOPX(OP_DUMP);
4351
4352 case KEY_else:
4353 PREBLOCK(ELSE);
4354
4355 case KEY_elsif:
57843af0 4356 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4357 OPERATOR(ELSIF);
4358
4359 case KEY_eq:
4360 Eop(OP_SEQ);
4361
a0d0e21e
LW
4362 case KEY_exists:
4363 UNI(OP_EXISTS);
4e553d73 4364
79072805
LW
4365 case KEY_exit:
4366 UNI(OP_EXIT);
4367
4368 case KEY_eval:
79072805 4369 s = skipspace(s);
3280af22 4370 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
463ee0b2 4371 UNIBRACK(OP_ENTEREVAL);
79072805
LW
4372
4373 case KEY_eof:
4374 UNI(OP_EOF);
4375
4376 case KEY_exp:
4377 UNI(OP_EXP);
4378
4379 case KEY_each:
4380 UNI(OP_EACH);
4381
4382 case KEY_exec:
4383 set_csh();
a0d0e21e 4384 LOP(OP_EXEC,XREF);
79072805
LW
4385
4386 case KEY_endhostent:
4387 FUN0(OP_EHOSTENT);
4388
4389 case KEY_endnetent:
4390 FUN0(OP_ENETENT);
4391
4392 case KEY_endservent:
4393 FUN0(OP_ESERVENT);
4394
4395 case KEY_endprotoent:
4396 FUN0(OP_EPROTOENT);
4397
4398 case KEY_endpwent:
4399 FUN0(OP_EPWENT);
4400
4401 case KEY_endgrent:
4402 FUN0(OP_EGRENT);
4403
4404 case KEY_for:
4405 case KEY_foreach:
57843af0 4406 yylval.ival = CopLINE(PL_curcop);
55497cff 4407 s = skipspace(s);
7e2040f0 4408 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 4409 char *p = s;
3280af22 4410 if ((PL_bufend - p) >= 3 &&
55497cff 4411 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4412 p += 2;
77ca0c92
LW
4413 else if ((PL_bufend - p) >= 4 &&
4414 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4415 p += 3;
55497cff 4416 p = skipspace(p);
7e2040f0 4417 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
4418 p = scan_ident(p, PL_bufend,
4419 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4420 p = skipspace(p);
4421 }
4422 if (*p != '$')
cea2e8a9 4423 Perl_croak(aTHX_ "Missing $ on loop variable");
55497cff 4424 }
79072805
LW
4425 OPERATOR(FOR);
4426
4427 case KEY_formline:
a0d0e21e 4428 LOP(OP_FORMLINE,XTERM);
79072805
LW
4429
4430 case KEY_fork:
4431 FUN0(OP_FORK);
4432
4433 case KEY_fcntl:
a0d0e21e 4434 LOP(OP_FCNTL,XTERM);
79072805
LW
4435
4436 case KEY_fileno:
4437 UNI(OP_FILENO);
4438
4439 case KEY_flock:
a0d0e21e 4440 LOP(OP_FLOCK,XTERM);
79072805
LW
4441
4442 case KEY_gt:
4443 Rop(OP_SGT);
4444
4445 case KEY_ge:
4446 Rop(OP_SGE);
4447
4448 case KEY_grep:
2c38e13d 4449 LOP(OP_GREPSTART, XREF);
79072805
LW
4450
4451 case KEY_goto:
a0d0e21e 4452 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4453 LOOPX(OP_GOTO);
4454
4455 case KEY_gmtime:
4456 UNI(OP_GMTIME);
4457
4458 case KEY_getc:
4459 UNI(OP_GETC);
4460
4461 case KEY_getppid:
4462 FUN0(OP_GETPPID);
4463
4464 case KEY_getpgrp:
4465 UNI(OP_GETPGRP);
4466
4467 case KEY_getpriority:
a0d0e21e 4468 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
4469
4470 case KEY_getprotobyname:
4471 UNI(OP_GPBYNAME);
4472
4473 case KEY_getprotobynumber:
a0d0e21e 4474 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
4475
4476 case KEY_getprotoent:
4477 FUN0(OP_GPROTOENT);
4478
4479 case KEY_getpwent:
4480 FUN0(OP_GPWENT);
4481
4482 case KEY_getpwnam:
ff68c719 4483 UNI(OP_GPWNAM);
79072805
LW
4484
4485 case KEY_getpwuid:
ff68c719 4486 UNI(OP_GPWUID);
79072805
LW
4487
4488 case KEY_getpeername:
4489 UNI(OP_GETPEERNAME);
4490
4491 case KEY_gethostbyname:
4492 UNI(OP_GHBYNAME);
4493
4494 case KEY_gethostbyaddr:
a0d0e21e 4495 LOP(OP_GHBYADDR,XTERM);
79072805
LW
4496
4497 case KEY_gethostent:
4498 FUN0(OP_GHOSTENT);
4499
4500 case KEY_getnetbyname:
4501 UNI(OP_GNBYNAME);
4502
4503 case KEY_getnetbyaddr:
a0d0e21e 4504 LOP(OP_GNBYADDR,XTERM);
79072805
LW
4505
4506 case KEY_getnetent:
4507 FUN0(OP_GNETENT);
4508
4509 case KEY_getservbyname:
a0d0e21e 4510 LOP(OP_GSBYNAME,XTERM);
79072805
LW
4511
4512 case KEY_getservbyport:
a0d0e21e 4513 LOP(OP_GSBYPORT,XTERM);
79072805
LW
4514
4515 case KEY_getservent:
4516 FUN0(OP_GSERVENT);
4517
4518 case KEY_getsockname:
4519 UNI(OP_GETSOCKNAME);
4520
4521 case KEY_getsockopt:
a0d0e21e 4522 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
4523
4524 case KEY_getgrent:
4525 FUN0(OP_GGRENT);
4526
4527 case KEY_getgrnam:
ff68c719 4528 UNI(OP_GGRNAM);
79072805
LW
4529
4530 case KEY_getgrgid:
ff68c719 4531 UNI(OP_GGRGID);
79072805
LW
4532
4533 case KEY_getlogin:
4534 FUN0(OP_GETLOGIN);
4535
93a17b20 4536 case KEY_glob:
a0d0e21e
LW
4537 set_csh();
4538 LOP(OP_GLOB,XTERM);
93a17b20 4539
79072805
LW
4540 case KEY_hex:
4541 UNI(OP_HEX);
4542
4543 case KEY_if:
57843af0 4544 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4545 OPERATOR(IF);
4546
4547 case KEY_index:
a0d0e21e 4548 LOP(OP_INDEX,XTERM);
79072805
LW
4549
4550 case KEY_int:
4551 UNI(OP_INT);
4552
4553 case KEY_ioctl:
a0d0e21e 4554 LOP(OP_IOCTL,XTERM);
79072805
LW
4555
4556 case KEY_join:
a0d0e21e 4557 LOP(OP_JOIN,XTERM);
79072805
LW
4558
4559 case KEY_keys:
4560 UNI(OP_KEYS);
4561
4562 case KEY_kill:
a0d0e21e 4563 LOP(OP_KILL,XTERM);
79072805
LW
4564
4565 case KEY_last:
a0d0e21e 4566 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 4567 LOOPX(OP_LAST);
4e553d73 4568
79072805
LW
4569 case KEY_lc:
4570 UNI(OP_LC);
4571
4572 case KEY_lcfirst:
4573 UNI(OP_LCFIRST);
4574
4575 case KEY_local:
09bef843 4576 yylval.ival = 0;
79072805
LW
4577 OPERATOR(LOCAL);
4578
4579 case KEY_length:
4580 UNI(OP_LENGTH);
4581
4582 case KEY_lt:
4583 Rop(OP_SLT);
4584
4585 case KEY_le:
4586 Rop(OP_SLE);
4587
4588 case KEY_localtime:
4589 UNI(OP_LOCALTIME);
4590
4591 case KEY_log:
4592 UNI(OP_LOG);
4593
4594 case KEY_link:
a0d0e21e 4595 LOP(OP_LINK,XTERM);
79072805
LW
4596
4597 case KEY_listen:
a0d0e21e 4598 LOP(OP_LISTEN,XTERM);
79072805 4599
c0329465
MB
4600 case KEY_lock:
4601 UNI(OP_LOCK);
4602
79072805
LW
4603 case KEY_lstat:
4604 UNI(OP_LSTAT);
4605
4606 case KEY_m:
8782bef2 4607 s = scan_pat(s,OP_MATCH);
79072805
LW
4608 TERM(sublex_start());
4609
a0d0e21e 4610 case KEY_map:
2c38e13d 4611 LOP(OP_MAPSTART, XREF);
4e4e412b 4612
79072805 4613 case KEY_mkdir:
a0d0e21e 4614 LOP(OP_MKDIR,XTERM);
79072805
LW
4615
4616 case KEY_msgctl:
a0d0e21e 4617 LOP(OP_MSGCTL,XTERM);
79072805
LW
4618
4619 case KEY_msgget:
a0d0e21e 4620 LOP(OP_MSGGET,XTERM);
79072805
LW
4621
4622 case KEY_msgrcv:
a0d0e21e 4623 LOP(OP_MSGRCV,XTERM);
79072805
LW
4624
4625 case KEY_msgsnd:
a0d0e21e 4626 LOP(OP_MSGSND,XTERM);
79072805 4627
77ca0c92 4628 case KEY_our:
93a17b20 4629 case KEY_my:
77ca0c92 4630 PL_in_my = tmp;
c750a3ec 4631 s = skipspace(s);
7e2040f0 4632 if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 4633 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
4634 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4635 goto really_sub;
def3634b 4636 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 4637 if (!PL_in_my_stash) {
c750a3ec 4638 char tmpbuf[1024];
3280af22
NIS
4639 PL_bufptr = s;
4640 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
4641 yyerror(tmpbuf);
4642 }
4643 }
09bef843 4644 yylval.ival = 1;
55497cff 4645 OPERATOR(MY);
93a17b20 4646
79072805 4647 case KEY_next:
a0d0e21e 4648 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4649 LOOPX(OP_NEXT);
4650
4651 case KEY_ne:
4652 Eop(OP_SNE);
4653
a0d0e21e 4654 case KEY_no:
3280af22 4655 if (PL_expect != XSTATE)
a0d0e21e
LW
4656 yyerror("\"no\" not allowed in expression");
4657 s = force_word(s,WORD,FALSE,TRUE,FALSE);
89bfa8cd 4658 s = force_version(s);
a0d0e21e
LW
4659 yylval.ival = 0;
4660 OPERATOR(USE);
4661
4662 case KEY_not:
2d2e263d
LW
4663 if (*s == '(' || (s = skipspace(s), *s == '('))
4664 FUN1(OP_NOT);
4665 else
4666 OPERATOR(NOTOP);
a0d0e21e 4667
79072805 4668 case KEY_open:
93a17b20 4669 s = skipspace(s);
7e2040f0 4670 if (isIDFIRST_lazy_if(s,UTF)) {
93a17b20 4671 char *t;
7e2040f0 4672 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
93a17b20 4673 t = skipspace(d);
e476b1b5
GS
4674 if (strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE))
4675 Perl_warner(aTHX_ WARN_PRECEDENCE,
0453d815
PM
4676 "Precedence problem: open %.*s should be open(%.*s)",
4677 d-s,s, d-s,s);
93a17b20 4678 }
a0d0e21e 4679 LOP(OP_OPEN,XTERM);
79072805 4680
463ee0b2 4681 case KEY_or:
a0d0e21e 4682 yylval.ival = OP_OR;
463ee0b2
LW
4683 OPERATOR(OROP);
4684
79072805
LW
4685 case KEY_ord:
4686 UNI(OP_ORD);
4687
4688 case KEY_oct:
4689 UNI(OP_OCT);
4690
4691 case KEY_opendir:
a0d0e21e 4692 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
4693
4694 case KEY_print:
3280af22 4695 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4696 LOP(OP_PRINT,XREF);
79072805
LW
4697
4698 case KEY_printf:
3280af22 4699 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4700 LOP(OP_PRTF,XREF);
79072805 4701
c07a80fd 4702 case KEY_prototype:
4703 UNI(OP_PROTOTYPE);
4704
79072805 4705 case KEY_push:
a0d0e21e 4706 LOP(OP_PUSH,XTERM);
79072805
LW
4707
4708 case KEY_pop:
4709 UNI(OP_POP);
4710
a0d0e21e
LW
4711 case KEY_pos:
4712 UNI(OP_POS);
4e553d73 4713
79072805 4714 case KEY_pack:
a0d0e21e 4715 LOP(OP_PACK,XTERM);
79072805
LW
4716
4717 case KEY_package:
a0d0e21e 4718 s = force_word(s,WORD,FALSE,TRUE,FALSE);
79072805
LW
4719 OPERATOR(PACKAGE);
4720
4721 case KEY_pipe:
a0d0e21e 4722 LOP(OP_PIPE_OP,XTERM);
79072805
LW
4723
4724 case KEY_q:
09bef843 4725 s = scan_str(s,FALSE,FALSE);
79072805 4726 if (!s)
85e6fe83 4727 missingterm((char*)0);
79072805
LW
4728 yylval.ival = OP_CONST;
4729 TERM(sublex_start());
4730
a0d0e21e
LW
4731 case KEY_quotemeta:
4732 UNI(OP_QUOTEMETA);
4733
8990e307 4734 case KEY_qw:
09bef843 4735 s = scan_str(s,FALSE,FALSE);
8990e307 4736 if (!s)
85e6fe83 4737 missingterm((char*)0);
8127e0e3
GS
4738 force_next(')');
4739 if (SvCUR(PL_lex_stuff)) {
4740 OP *words = Nullop;
4741 int warned = 0;
3280af22 4742 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 4743 while (len) {
7948272d 4744 SV *sv;
8127e0e3
GS
4745 for (; isSPACE(*d) && len; --len, ++d) ;
4746 if (len) {
4747 char *b = d;
e476b1b5 4748 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
4749 for (; !isSPACE(*d) && len; --len, ++d) {
4750 if (*d == ',') {
e476b1b5 4751 Perl_warner(aTHX_ WARN_QW,
8127e0e3
GS
4752 "Possible attempt to separate words with commas");
4753 ++warned;
4754 }
4755 else if (*d == '#') {
e476b1b5 4756 Perl_warner(aTHX_ WARN_QW,
8127e0e3
GS
4757 "Possible attempt to put comments in qw() list");
4758 ++warned;
4759 }
4760 }
4761 }
4762 else {
4763 for (; !isSPACE(*d) && len; --len, ++d) ;
4764 }
7948272d
NIS
4765 sv = newSVpvn(b, d-b);
4766 if (DO_UTF8(PL_lex_stuff))
4767 SvUTF8_on(sv);
8127e0e3 4768 words = append_elem(OP_LIST, words,
7948272d 4769 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 4770 }
4771 }
8127e0e3
GS
4772 if (words) {
4773 PL_nextval[PL_nexttoke].opval = words;
4774 force_next(THING);
4775 }
55497cff 4776 }
37fd879b 4777 if (PL_lex_stuff) {
8127e0e3 4778 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
4779 PL_lex_stuff = Nullsv;
4780 }
3280af22 4781 PL_expect = XTERM;
8127e0e3 4782 TOKEN('(');
8990e307 4783
79072805 4784 case KEY_qq:
09bef843 4785 s = scan_str(s,FALSE,FALSE);
79072805 4786 if (!s)
85e6fe83 4787 missingterm((char*)0);
a0d0e21e 4788 yylval.ival = OP_STRINGIFY;
3280af22
NIS
4789 if (SvIVX(PL_lex_stuff) == '\'')
4790 SvIVX(PL_lex_stuff) = 0; /* qq'$foo' should intepolate */
79072805
LW
4791 TERM(sublex_start());
4792
8782bef2
GB
4793 case KEY_qr:
4794 s = scan_pat(s,OP_QR);
4795 TERM(sublex_start());
4796
79072805 4797 case KEY_qx:
09bef843 4798 s = scan_str(s,FALSE,FALSE);
79072805 4799 if (!s)
85e6fe83 4800 missingterm((char*)0);
79072805
LW
4801 yylval.ival = OP_BACKTICK;
4802 set_csh();
4803 TERM(sublex_start());
4804
4805 case KEY_return:
4806 OLDLOP(OP_RETURN);
4807
4808 case KEY_require:
a7cb1f99
GS
4809 s = skipspace(s);
4810 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4811 s = force_version(s);
4812 }
4813 else {
4814 *PL_tokenbuf = '\0';
4815 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 4816 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
a7cb1f99
GS
4817 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
4818 else if (*s == '<')
4819 yyerror("<> should be quotes");
4820 }
463ee0b2 4821 UNI(OP_REQUIRE);
79072805
LW
4822
4823 case KEY_reset:
4824 UNI(OP_RESET);
4825
4826 case KEY_redo:
a0d0e21e 4827 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4828 LOOPX(OP_REDO);
4829
4830 case KEY_rename:
a0d0e21e 4831 LOP(OP_RENAME,XTERM);
79072805
LW
4832
4833 case KEY_rand:
4834 UNI(OP_RAND);
4835
4836 case KEY_rmdir:
4837 UNI(OP_RMDIR);
4838
4839 case KEY_rindex:
a0d0e21e 4840 LOP(OP_RINDEX,XTERM);
79072805
LW
4841
4842 case KEY_read:
a0d0e21e 4843 LOP(OP_READ,XTERM);
79072805
LW
4844
4845 case KEY_readdir:
4846 UNI(OP_READDIR);
4847
93a17b20
LW
4848 case KEY_readline:
4849 set_csh();
4850 UNI(OP_READLINE);
4851
4852 case KEY_readpipe:
4853 set_csh();
4854 UNI(OP_BACKTICK);
4855
79072805
LW
4856 case KEY_rewinddir:
4857 UNI(OP_REWINDDIR);
4858
4859 case KEY_recv:
a0d0e21e 4860 LOP(OP_RECV,XTERM);
79072805
LW
4861
4862 case KEY_reverse:
a0d0e21e 4863 LOP(OP_REVERSE,XTERM);
79072805
LW
4864
4865 case KEY_readlink:
4866 UNI(OP_READLINK);
4867
4868 case KEY_ref:
4869 UNI(OP_REF);
4870
4871 case KEY_s:
4872 s = scan_subst(s);
4873 if (yylval.opval)
4874 TERM(sublex_start());
4875 else
4876 TOKEN(1); /* force error */
4877
a0d0e21e
LW
4878 case KEY_chomp:
4879 UNI(OP_CHOMP);
4e553d73 4880
79072805
LW
4881 case KEY_scalar:
4882 UNI(OP_SCALAR);
4883
4884 case KEY_select:
a0d0e21e 4885 LOP(OP_SELECT,XTERM);
79072805
LW
4886
4887 case KEY_seek:
a0d0e21e 4888 LOP(OP_SEEK,XTERM);
79072805
LW
4889
4890 case KEY_semctl:
a0d0e21e 4891 LOP(OP_SEMCTL,XTERM);
79072805
LW
4892
4893 case KEY_semget:
a0d0e21e 4894 LOP(OP_SEMGET,XTERM);
79072805
LW
4895
4896 case KEY_semop:
a0d0e21e 4897 LOP(OP_SEMOP,XTERM);
79072805
LW
4898
4899 case KEY_send:
a0d0e21e 4900 LOP(OP_SEND,XTERM);
79072805
LW
4901
4902 case KEY_setpgrp:
a0d0e21e 4903 LOP(OP_SETPGRP,XTERM);
79072805
LW
4904
4905 case KEY_setpriority:
a0d0e21e 4906 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
4907
4908 case KEY_sethostent:
ff68c719 4909 UNI(OP_SHOSTENT);
79072805
LW
4910
4911 case KEY_setnetent:
ff68c719 4912 UNI(OP_SNETENT);
79072805
LW
4913
4914 case KEY_setservent:
ff68c719 4915 UNI(OP_SSERVENT);
79072805
LW
4916
4917 case KEY_setprotoent:
ff68c719 4918 UNI(OP_SPROTOENT);
79072805
LW
4919
4920 case KEY_setpwent:
4921 FUN0(OP_SPWENT);
4922
4923 case KEY_setgrent:
4924 FUN0(OP_SGRENT);
4925
4926 case KEY_seekdir:
a0d0e21e 4927 LOP(OP_SEEKDIR,XTERM);
79072805
LW
4928
4929 case KEY_setsockopt:
a0d0e21e 4930 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
4931
4932 case KEY_shift:
4933 UNI(OP_SHIFT);
4934
4935 case KEY_shmctl:
a0d0e21e 4936 LOP(OP_SHMCTL,XTERM);
79072805
LW
4937
4938 case KEY_shmget:
a0d0e21e 4939 LOP(OP_SHMGET,XTERM);
79072805
LW
4940
4941 case KEY_shmread:
a0d0e21e 4942 LOP(OP_SHMREAD,XTERM);
79072805
LW
4943
4944 case KEY_shmwrite:
a0d0e21e 4945 LOP(OP_SHMWRITE,XTERM);
79072805
LW
4946
4947 case KEY_shutdown:
a0d0e21e 4948 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
4949
4950 case KEY_sin:
4951 UNI(OP_SIN);
4952
4953 case KEY_sleep:
4954 UNI(OP_SLEEP);
4955
4956 case KEY_socket:
a0d0e21e 4957 LOP(OP_SOCKET,XTERM);
79072805
LW
4958
4959 case KEY_socketpair:
a0d0e21e 4960 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
4961
4962 case KEY_sort:
3280af22 4963 checkcomma(s,PL_tokenbuf,"subroutine name");
79072805
LW
4964 s = skipspace(s);
4965 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 4966 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 4967 PL_expect = XTERM;
15f0808c 4968 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 4969 LOP(OP_SORT,XREF);
79072805
LW
4970
4971 case KEY_split:
a0d0e21e 4972 LOP(OP_SPLIT,XTERM);
79072805
LW
4973
4974 case KEY_sprintf:
a0d0e21e 4975 LOP(OP_SPRINTF,XTERM);
79072805
LW
4976
4977 case KEY_splice:
a0d0e21e 4978 LOP(OP_SPLICE,XTERM);
79072805
LW
4979
4980 case KEY_sqrt:
4981 UNI(OP_SQRT);
4982
4983 case KEY_srand:
4984 UNI(OP_SRAND);
4985
4986 case KEY_stat:
4987 UNI(OP_STAT);
4988
4989 case KEY_study:
79072805
LW
4990 UNI(OP_STUDY);
4991
4992 case KEY_substr:
a0d0e21e 4993 LOP(OP_SUBSTR,XTERM);
79072805
LW
4994
4995 case KEY_format:
4996 case KEY_sub:
93a17b20 4997 really_sub:
09bef843 4998 {
3280af22 4999 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 5000 SSize_t tboffset = 0;
09bef843
SB
5001 expectation attrful;
5002 bool have_name, have_proto;
5003 int key = tmp;
5004
5005 s = skipspace(s);
5006
7e2040f0 5007 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
5008 (*s == ':' && s[1] == ':'))
5009 {
5010 PL_expect = XBLOCK;
5011 attrful = XATTRBLOCK;
b1b65b59
JH
5012 /* remember buffer pos'n for later force_word */
5013 tboffset = s - PL_oldbufptr;
09bef843
SB
5014 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5015 if (strchr(tmpbuf, ':'))
5016 sv_setpv(PL_subname, tmpbuf);
5017 else {
5018 sv_setsv(PL_subname,PL_curstname);
5019 sv_catpvn(PL_subname,"::",2);
5020 sv_catpvn(PL_subname,tmpbuf,len);
5021 }
5022 s = skipspace(d);
5023 have_name = TRUE;
5024 }
463ee0b2 5025 else {
09bef843
SB
5026 if (key == KEY_my)
5027 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5028 PL_expect = XTERMBLOCK;
5029 attrful = XATTRTERM;
5030 sv_setpv(PL_subname,"?");
5031 have_name = FALSE;
463ee0b2 5032 }
4633a7c4 5033
09bef843
SB
5034 if (key == KEY_format) {
5035 if (*s == '=')
5036 PL_lex_formbrack = PL_lex_brackets + 1;
5037 if (have_name)
b1b65b59
JH
5038 (void) force_word(PL_oldbufptr + tboffset, WORD,
5039 FALSE, TRUE, TRUE);
09bef843
SB
5040 OPERATOR(FORMAT);
5041 }
79072805 5042
09bef843
SB
5043 /* Look for a prototype */
5044 if (*s == '(') {
5045 char *p;
5046
5047 s = scan_str(s,FALSE,FALSE);
37fd879b 5048 if (!s)
09bef843 5049 Perl_croak(aTHX_ "Prototype not terminated");
09bef843
SB
5050 /* strip spaces */
5051 d = SvPVX(PL_lex_stuff);
5052 tmp = 0;
5053 for (p = d; *p; ++p) {
5054 if (!isSPACE(*p))
5055 d[tmp++] = *p;
5056 }
5057 d[tmp] = '\0';
5058 SvCUR(PL_lex_stuff) = tmp;
5059 have_proto = TRUE;
68dc0745 5060
09bef843 5061 s = skipspace(s);
4633a7c4 5062 }
09bef843
SB
5063 else
5064 have_proto = FALSE;
5065
5066 if (*s == ':' && s[1] != ':')
5067 PL_expect = attrful;
5068
5069 if (have_proto) {
b1b65b59
JH
5070 PL_nextval[PL_nexttoke].opval =
5071 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
09bef843
SB
5072 PL_lex_stuff = Nullsv;
5073 force_next(THING);
68dc0745 5074 }
09bef843
SB
5075 if (!have_name) {
5076 sv_setpv(PL_subname,"__ANON__");
5077 TOKEN(ANONSUB);
4633a7c4 5078 }
b1b65b59
JH
5079 (void) force_word(PL_oldbufptr + tboffset, WORD,
5080 FALSE, TRUE, TRUE);
09bef843
SB
5081 if (key == KEY_my)
5082 TOKEN(MYSUB);
5083 TOKEN(SUB);
4633a7c4 5084 }
79072805
LW
5085
5086 case KEY_system:
5087 set_csh();
a0d0e21e 5088 LOP(OP_SYSTEM,XREF);
79072805
LW
5089
5090 case KEY_symlink:
a0d0e21e 5091 LOP(OP_SYMLINK,XTERM);
79072805
LW
5092
5093 case KEY_syscall:
a0d0e21e 5094 LOP(OP_SYSCALL,XTERM);
79072805 5095
c07a80fd 5096 case KEY_sysopen:
5097 LOP(OP_SYSOPEN,XTERM);
5098
137443ea 5099 case KEY_sysseek:
5100 LOP(OP_SYSSEEK,XTERM);
5101
79072805 5102 case KEY_sysread:
a0d0e21e 5103 LOP(OP_SYSREAD,XTERM);
79072805
LW
5104
5105 case KEY_syswrite:
a0d0e21e 5106 LOP(OP_SYSWRITE,XTERM);
79072805
LW
5107
5108 case KEY_tr:
5109 s = scan_trans(s);
5110 TERM(sublex_start());
5111
5112 case KEY_tell:
5113 UNI(OP_TELL);
5114
5115 case KEY_telldir:
5116 UNI(OP_TELLDIR);
5117
463ee0b2 5118 case KEY_tie:
a0d0e21e 5119 LOP(OP_TIE,XTERM);
463ee0b2 5120
c07a80fd 5121 case KEY_tied:
5122 UNI(OP_TIED);
5123
79072805
LW
5124 case KEY_time:
5125 FUN0(OP_TIME);
5126
5127 case KEY_times:
5128 FUN0(OP_TMS);
5129
5130 case KEY_truncate:
a0d0e21e 5131 LOP(OP_TRUNCATE,XTERM);
79072805
LW
5132
5133 case KEY_uc:
5134 UNI(OP_UC);
5135
5136 case KEY_ucfirst:
5137 UNI(OP_UCFIRST);
5138
463ee0b2
LW
5139 case KEY_untie:
5140 UNI(OP_UNTIE);
5141
79072805 5142 case KEY_until:
57843af0 5143 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5144 OPERATOR(UNTIL);
5145
5146 case KEY_unless:
57843af0 5147 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5148 OPERATOR(UNLESS);
5149
5150 case KEY_unlink:
a0d0e21e 5151 LOP(OP_UNLINK,XTERM);
79072805
LW
5152
5153 case KEY_undef:
5154 UNI(OP_UNDEF);
5155
5156 case KEY_unpack:
a0d0e21e 5157 LOP(OP_UNPACK,XTERM);
79072805
LW
5158
5159 case KEY_utime:
a0d0e21e 5160 LOP(OP_UTIME,XTERM);
79072805
LW
5161
5162 case KEY_umask:
e476b1b5 5163 if (ckWARN(WARN_UMASK)) {
3280af22 5164 for (d = s; d < PL_bufend && (isSPACE(*d) || *d == '('); d++) ;
4e553d73 5165 if (*d != '0' && isDIGIT(*d))
e476b1b5 5166 Perl_warner(aTHX_ WARN_UMASK,
4438c4b7 5167 "umask: argument is missing initial 0");
748a9306 5168 }
79072805
LW
5169 UNI(OP_UMASK);
5170
5171 case KEY_unshift:
a0d0e21e
LW
5172 LOP(OP_UNSHIFT,XTERM);
5173
5174 case KEY_use:
3280af22 5175 if (PL_expect != XSTATE)
a0d0e21e 5176 yyerror("\"use\" not allowed in expression");
89bfa8cd 5177 s = skipspace(s);
a7cb1f99 5178 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
89bfa8cd 5179 s = force_version(s);
a7cb1f99 5180 if (*s == ';' || (s = skipspace(s), *s == ';')) {
3280af22 5181 PL_nextval[PL_nexttoke].opval = Nullop;
89bfa8cd 5182 force_next(WORD);
5183 }
5184 }
5185 else {
5186 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5187 s = force_version(s);
5188 }
a0d0e21e
LW
5189 yylval.ival = 1;
5190 OPERATOR(USE);
79072805
LW
5191
5192 case KEY_values:
5193 UNI(OP_VALUES);
5194
5195 case KEY_vec:
a0d0e21e 5196 LOP(OP_VEC,XTERM);
79072805
LW
5197
5198 case KEY_while:
57843af0 5199 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5200 OPERATOR(WHILE);
5201
5202 case KEY_warn:
3280af22 5203 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 5204 LOP(OP_WARN,XTERM);
79072805
LW
5205
5206 case KEY_wait:
5207 FUN0(OP_WAIT);
5208
5209 case KEY_waitpid:
a0d0e21e 5210 LOP(OP_WAITPID,XTERM);
79072805
LW
5211
5212 case KEY_wantarray:
5213 FUN0(OP_WANTARRAY);
5214
5215 case KEY_write:
9d116dd7
JH
5216#ifdef EBCDIC
5217 {
5218 static char ctl_l[2];
5219
4e553d73 5220 if (ctl_l[0] == '\0')
9d116dd7
JH
5221 ctl_l[0] = toCTRL('L');
5222 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5223 }
5224#else
5225 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5226#endif
79072805
LW
5227 UNI(OP_ENTERWRITE);
5228
5229 case KEY_x:
3280af22 5230 if (PL_expect == XOPERATOR)
79072805
LW
5231 Mop(OP_REPEAT);
5232 check_uni();
5233 goto just_a_word;
5234
a0d0e21e
LW
5235 case KEY_xor:
5236 yylval.ival = OP_XOR;
5237 OPERATOR(OROP);
5238
79072805
LW
5239 case KEY_y:
5240 s = scan_trans(s);
5241 TERM(sublex_start());
5242 }
49dc05e3 5243 }}
79072805 5244}
bf4acbe4
GS
5245#ifdef __SC__
5246#pragma segment Main
5247#endif
79072805
LW
5248
5249I32
864dbfa3 5250Perl_keyword(pTHX_ register char *d, I32 len)
79072805
LW
5251{
5252 switch (*d) {
5253 case '_':
5254 if (d[1] == '_') {
a0d0e21e 5255 if (strEQ(d,"__FILE__")) return -KEY___FILE__;
68dc0745 5256 if (strEQ(d,"__LINE__")) return -KEY___LINE__;
5257 if (strEQ(d,"__PACKAGE__")) return -KEY___PACKAGE__;
e50aee73 5258 if (strEQ(d,"__DATA__")) return KEY___DATA__;
79072805
LW
5259 if (strEQ(d,"__END__")) return KEY___END__;
5260 }
5261 break;
8990e307
LW
5262 case 'A':
5263 if (strEQ(d,"AUTOLOAD")) return KEY_AUTOLOAD;
5264 break;
79072805 5265 case 'a':
463ee0b2
LW
5266 switch (len) {
5267 case 3:
a0d0e21e
LW
5268 if (strEQ(d,"and")) return -KEY_and;
5269 if (strEQ(d,"abs")) return -KEY_abs;
85e6fe83 5270 break;
463ee0b2 5271 case 5:
a0d0e21e
LW
5272 if (strEQ(d,"alarm")) return -KEY_alarm;
5273 if (strEQ(d,"atan2")) return -KEY_atan2;
463ee0b2
LW
5274 break;
5275 case 6:
a0d0e21e 5276 if (strEQ(d,"accept")) return -KEY_accept;
463ee0b2
LW
5277 break;
5278 }
79072805
LW
5279 break;
5280 case 'B':
5281 if (strEQ(d,"BEGIN")) return KEY_BEGIN;
93a17b20 5282 break;
79072805 5283 case 'b':
a0d0e21e
LW
5284 if (strEQ(d,"bless")) return -KEY_bless;
5285 if (strEQ(d,"bind")) return -KEY_bind;
5286 if (strEQ(d,"binmode")) return -KEY_binmode;
5287 break;
5288 case 'C':
5289 if (strEQ(d,"CORE")) return -KEY_CORE;
7d30b5c4 5290 if (strEQ(d,"CHECK")) return KEY_CHECK;
79072805
LW
5291 break;
5292 case 'c':
5293 switch (len) {
5294 case 3:
a0d0e21e
LW
5295 if (strEQ(d,"cmp")) return -KEY_cmp;
5296 if (strEQ(d,"chr")) return -KEY_chr;
5297 if (strEQ(d,"cos")) return -KEY_cos;
79072805
LW
5298 break;
5299 case 4:
79e5458b 5300 if (strEQ(d,"chop")) return -KEY_chop;
79072805
LW
5301 break;
5302 case 5:
a0d0e21e
LW
5303 if (strEQ(d,"close")) return -KEY_close;
5304 if (strEQ(d,"chdir")) return -KEY_chdir;
79e5458b 5305 if (strEQ(d,"chomp")) return -KEY_chomp;
a0d0e21e
LW
5306 if (strEQ(d,"chmod")) return -KEY_chmod;
5307 if (strEQ(d,"chown")) return -KEY_chown;
5308 if (strEQ(d,"crypt")) return -KEY_crypt;
79072805
LW
5309 break;
5310 case 6:
a0d0e21e
LW
5311 if (strEQ(d,"chroot")) return -KEY_chroot;
5312 if (strEQ(d,"caller")) return -KEY_caller;
79072805
LW
5313 break;
5314 case 7:
a0d0e21e 5315 if (strEQ(d,"connect")) return -KEY_connect;
79072805
LW
5316 break;
5317 case 8:
a0d0e21e
LW
5318 if (strEQ(d,"closedir")) return -KEY_closedir;
5319 if (strEQ(d,"continue")) return -KEY_continue;
79072805
LW
5320 break;
5321 }
5322 break;
ed6116ce
LW
5323 case 'D':
5324 if (strEQ(d,"DESTROY")) return KEY_DESTROY;
5325 break;
79072805
LW
5326 case 'd':
5327 switch (len) {
5328 case 2:
5329 if (strEQ(d,"do")) return KEY_do;
5330 break;
5331 case 3:
a0d0e21e 5332 if (strEQ(d,"die")) return -KEY_die;
79072805
LW
5333 break;
5334 case 4:
a0d0e21e 5335 if (strEQ(d,"dump")) return -KEY_dump;
79072805
LW
5336 break;
5337 case 6:
5338 if (strEQ(d,"delete")) return KEY_delete;
5339 break;
5340 case 7:
5341 if (strEQ(d,"defined")) return KEY_defined;
a0d0e21e 5342 if (strEQ(d,"dbmopen")) return -KEY_dbmopen;
79072805
LW
5343 break;
5344 case 8:
a0d0e21e 5345 if (strEQ(d,"dbmclose")) return -KEY_dbmclose;
79072805
LW
5346 break;
5347 }
5348 break;
5349 case 'E':
79072805
LW
5350 if (strEQ(d,"END")) return KEY_END;
5351 break;
5352 case 'e':
5353 switch (len) {
5354 case 2:
a0d0e21e 5355 if (strEQ(d,"eq")) return -KEY_eq;
79072805
LW
5356 break;
5357 case 3:
a0d0e21e
LW
5358 if (strEQ(d,"eof")) return -KEY_eof;
5359 if (strEQ(d,"exp")) return -KEY_exp;
79072805
LW
5360 break;
5361 case 4:
5362 if (strEQ(d,"else")) return KEY_else;
a0d0e21e 5363 if (strEQ(d,"exit")) return -KEY_exit;
79072805 5364 if (strEQ(d,"eval")) return KEY_eval;
a0d0e21e 5365 if (strEQ(d,"exec")) return -KEY_exec;
3a6a8333 5366 if (strEQ(d,"each")) return -KEY_each;
79072805
LW
5367 break;
5368 case 5:
5369 if (strEQ(d,"elsif")) return KEY_elsif;
5370 break;
a0d0e21e
LW
5371 case 6:
5372 if (strEQ(d,"exists")) return KEY_exists;
cea2e8a9 5373 if (strEQ(d,"elseif")) Perl_warn(aTHX_ "elseif should be elsif");
a0d0e21e 5374 break;
79072805 5375 case 8:
a0d0e21e
LW
5376 if (strEQ(d,"endgrent")) return -KEY_endgrent;
5377 if (strEQ(d,"endpwent")) return -KEY_endpwent;
79072805
LW
5378 break;
5379 case 9:
a0d0e21e 5380 if (strEQ(d,"endnetent")) return -KEY_endnetent;
79072805
LW
5381 break;
5382 case 10:
a0d0e21e
LW
5383 if (strEQ(d,"endhostent")) return -KEY_endhostent;
5384 if (strEQ(d,"endservent")) return -KEY_endservent;
79072805
LW
5385 break;
5386 case 11:
a0d0e21e 5387 if (strEQ(d,"endprotoent")) return -KEY_endprotoent;
79072805 5388 break;
a687059c 5389 }
a687059c 5390 break;
79072805
LW
5391 case 'f':
5392 switch (len) {
5393 case 3:
5394 if (strEQ(d,"for")) return KEY_for;
5395 break;
5396 case 4:
a0d0e21e 5397 if (strEQ(d,"fork")) return -KEY_fork;
79072805
LW
5398 break;
5399 case 5:
a0d0e21e
LW
5400 if (strEQ(d,"fcntl")) return -KEY_fcntl;
5401 if (strEQ(d,"flock")) return -KEY_flock;
79072805
LW
5402 break;
5403 case 6:
5404 if (strEQ(d,"format")) return KEY_format;
a0d0e21e 5405 if (strEQ(d,"fileno")) return -KEY_fileno;
79072805
LW
5406 break;
5407 case 7:
5408 if (strEQ(d,"foreach")) return KEY_foreach;
5409 break;
5410 case 8:
a0d0e21e 5411 if (strEQ(d,"formline")) return -KEY_formline;
79072805 5412 break;
378cc40b 5413 }
a687059c 5414 break;
79072805 5415 case 'g':
a687059c
LW
5416 if (strnEQ(d,"get",3)) {
5417 d += 3;
5418 if (*d == 'p') {
79072805
LW
5419 switch (len) {
5420 case 7:
a0d0e21e
LW
5421 if (strEQ(d,"ppid")) return -KEY_getppid;
5422 if (strEQ(d,"pgrp")) return -KEY_getpgrp;
79072805
LW
5423 break;
5424 case 8:
a0d0e21e
LW
5425 if (strEQ(d,"pwent")) return -KEY_getpwent;
5426 if (strEQ(d,"pwnam")) return -KEY_getpwnam;
5427 if (strEQ(d,"pwuid")) return -KEY_getpwuid;
79072805
LW
5428 break;
5429 case 11:
a0d0e21e
LW
5430 if (strEQ(d,"peername")) return -KEY_getpeername;
5431 if (strEQ(d,"protoent")) return -KEY_getprotoent;
5432 if (strEQ(d,"priority")) return -KEY_getpriority;
79072805
LW
5433 break;
5434 case 14:
a0d0e21e 5435 if (strEQ(d,"protobyname")) return -KEY_getprotobyname;
79072805
LW
5436 break;
5437 case 16:
a0d0e21e 5438 if (strEQ(d,"protobynumber"))return -KEY_getprotobynumber;
79072805
LW
5439 break;
5440 }
a687059c
LW
5441 }
5442 else if (*d == 'h') {
a0d0e21e
LW
5443 if (strEQ(d,"hostbyname")) return -KEY_gethostbyname;
5444 if (strEQ(d,"hostbyaddr")) return -KEY_gethostbyaddr;
5445 if (strEQ(d,"hostent")) return -KEY_gethostent;
a687059c
LW
5446 }
5447 else if (*d == 'n') {
a0d0e21e
LW
5448 if (strEQ(d,"netbyname")) return -KEY_getnetbyname;
5449 if (strEQ(d,"netbyaddr")) return -KEY_getnetbyaddr;
5450 if (strEQ(d,"netent")) return -KEY_getnetent;
a687059c
LW
5451 }
5452 else if (*d == 's') {
a0d0e21e
LW
5453 if (strEQ(d,"servbyname")) return -KEY_getservbyname;
5454 if (strEQ(d,"servbyport")) return -KEY_getservbyport;
5455 if (strEQ(d,"servent")) return -KEY_getservent;
5456 if (strEQ(d,"sockname")) return -KEY_getsockname;
5457 if (strEQ(d,"sockopt")) return -KEY_getsockopt;
a687059c
LW
5458 }
5459 else if (*d == 'g') {
a0d0e21e
LW
5460 if (strEQ(d,"grent")) return -KEY_getgrent;
5461 if (strEQ(d,"grnam")) return -KEY_getgrnam;
5462 if (strEQ(d,"grgid")) return -KEY_getgrgid;
a687059c
LW
5463 }
5464 else if (*d == 'l') {
a0d0e21e 5465 if (strEQ(d,"login")) return -KEY_getlogin;
a687059c 5466 }
a0d0e21e 5467 else if (strEQ(d,"c")) return -KEY_getc;
79072805 5468 break;
a687059c 5469 }
79072805
LW
5470 switch (len) {
5471 case 2:
a0d0e21e
LW
5472 if (strEQ(d,"gt")) return -KEY_gt;
5473 if (strEQ(d,"ge")) return -KEY_ge;
79072805
LW
5474 break;
5475 case 4:
5476 if (strEQ(d,"grep")) return KEY_grep;
5477 if (strEQ(d,"goto")) return KEY_goto;
fb73857a 5478 if (strEQ(d,"glob")) return KEY_glob;
79072805
LW
5479 break;
5480 case 6:
a0d0e21e 5481 if (strEQ(d,"gmtime")) return -KEY_gmtime;
79072805 5482 break;
378cc40b 5483 }
a687059c 5484 break;
79072805 5485 case 'h':
a0d0e21e 5486 if (strEQ(d,"hex")) return -KEY_hex;
a687059c 5487 break;
7d07dbc2
MB
5488 case 'I':
5489 if (strEQ(d,"INIT")) return KEY_INIT;
5490 break;
79072805
LW
5491 case 'i':
5492 switch (len) {
5493 case 2:
5494 if (strEQ(d,"if")) return KEY_if;
5495 break;
5496 case 3:
a0d0e21e 5497 if (strEQ(d,"int")) return -KEY_int;
79072805
LW
5498 break;
5499 case 5:
a0d0e21e
LW
5500 if (strEQ(d,"index")) return -KEY_index;
5501 if (strEQ(d,"ioctl")) return -KEY_ioctl;
79072805
LW
5502 break;
5503 }
a687059c 5504 break;
79072805 5505 case 'j':
a0d0e21e 5506 if (strEQ(d,"join")) return -KEY_join;
a687059c 5507 break;
79072805
LW
5508 case 'k':
5509 if (len == 4) {
3a6a8333 5510 if (strEQ(d,"keys")) return -KEY_keys;
a0d0e21e 5511 if (strEQ(d,"kill")) return -KEY_kill;
663a0e37 5512 }
79072805 5513 break;
79072805
LW
5514 case 'l':
5515 switch (len) {
5516 case 2:
a0d0e21e
LW
5517 if (strEQ(d,"lt")) return -KEY_lt;
5518 if (strEQ(d,"le")) return -KEY_le;
5519 if (strEQ(d,"lc")) return -KEY_lc;
79072805
LW
5520 break;
5521 case 3:
a0d0e21e 5522 if (strEQ(d,"log")) return -KEY_log;
79072805
LW
5523 break;
5524 case 4:
5525 if (strEQ(d,"last")) return KEY_last;
a0d0e21e 5526 if (strEQ(d,"link")) return -KEY_link;
c0329465 5527 if (strEQ(d,"lock")) return -KEY_lock;
395c3793 5528 break;
79072805
LW
5529 case 5:
5530 if (strEQ(d,"local")) return KEY_local;
a0d0e21e 5531 if (strEQ(d,"lstat")) return -KEY_lstat;
79072805
LW
5532 break;
5533 case 6:
a0d0e21e
LW
5534 if (strEQ(d,"length")) return -KEY_length;
5535 if (strEQ(d,"listen")) return -KEY_listen;
79072805
LW
5536 break;
5537 case 7:
a0d0e21e 5538 if (strEQ(d,"lcfirst")) return -KEY_lcfirst;
79072805
LW
5539 break;
5540 case 9:
a0d0e21e 5541 if (strEQ(d,"localtime")) return -KEY_localtime;
395c3793
LW
5542 break;
5543 }
a687059c 5544 break;
79072805
LW
5545 case 'm':
5546 switch (len) {
5547 case 1: return KEY_m;
93a17b20
LW
5548 case 2:
5549 if (strEQ(d,"my")) return KEY_my;
5550 break;
a0d0e21e
LW
5551 case 3:
5552 if (strEQ(d,"map")) return KEY_map;
5553 break;
79072805 5554 case 5:
a0d0e21e 5555 if (strEQ(d,"mkdir")) return -KEY_mkdir;
79072805
LW
5556 break;
5557 case 6:
a0d0e21e
LW
5558 if (strEQ(d,"msgctl")) return -KEY_msgctl;
5559 if (strEQ(d,"msgget")) return -KEY_msgget;
5560 if (strEQ(d,"msgrcv")) return -KEY_msgrcv;
5561 if (strEQ(d,"msgsnd")) return -KEY_msgsnd;
79072805
LW
5562 break;
5563 }
a687059c 5564 break;
79072805
LW
5565 case 'n':
5566 if (strEQ(d,"next")) return KEY_next;
a0d0e21e
LW
5567 if (strEQ(d,"ne")) return -KEY_ne;
5568 if (strEQ(d,"not")) return -KEY_not;
5569 if (strEQ(d,"no")) return KEY_no;
a687059c 5570 break;
79072805
LW
5571 case 'o':
5572 switch (len) {
463ee0b2 5573 case 2:
a0d0e21e 5574 if (strEQ(d,"or")) return -KEY_or;
463ee0b2 5575 break;
79072805 5576 case 3:
a0d0e21e
LW
5577 if (strEQ(d,"ord")) return -KEY_ord;
5578 if (strEQ(d,"oct")) return -KEY_oct;
77ca0c92 5579 if (strEQ(d,"our")) return KEY_our;
79072805
LW
5580 break;
5581 case 4:
a0d0e21e 5582 if (strEQ(d,"open")) return -KEY_open;
79072805
LW
5583 break;
5584 case 7:
a0d0e21e 5585 if (strEQ(d,"opendir")) return -KEY_opendir;
79072805 5586 break;
fe14fcc3 5587 }
a687059c 5588 break;
79072805
LW
5589 case 'p':
5590 switch (len) {
5591 case 3:
4e553d73 5592 if (strEQ(d,"pop")) return -KEY_pop;
a0d0e21e 5593 if (strEQ(d,"pos")) return KEY_pos;
79072805
LW
5594 break;
5595 case 4:
3a6a8333 5596 if (strEQ(d,"push")) return -KEY_push;
a0d0e21e
LW
5597 if (strEQ(d,"pack")) return -KEY_pack;
5598 if (strEQ(d,"pipe")) return -KEY_pipe;
79072805
LW
5599 break;
5600 case 5:
5601 if (strEQ(d,"print")) return KEY_print;
5602 break;
5603 case 6:
5604 if (strEQ(d,"printf")) return KEY_printf;
5605 break;
5606 case 7:
5607 if (strEQ(d,"package")) return KEY_package;
5608 break;
c07a80fd 5609 case 9:
5610 if (strEQ(d,"prototype")) return KEY_prototype;
663a0e37 5611 }
79072805
LW
5612 break;
5613 case 'q':
5614 if (len <= 2) {
5615 if (strEQ(d,"q")) return KEY_q;
8782bef2 5616 if (strEQ(d,"qr")) return KEY_qr;
79072805 5617 if (strEQ(d,"qq")) return KEY_qq;
8990e307 5618 if (strEQ(d,"qw")) return KEY_qw;
79072805 5619 if (strEQ(d,"qx")) return KEY_qx;
663a0e37 5620 }
a0d0e21e 5621 else if (strEQ(d,"quotemeta")) return -KEY_quotemeta;
79072805
LW
5622 break;
5623 case 'r':
5624 switch (len) {
5625 case 3:
a0d0e21e 5626 if (strEQ(d,"ref")) return -KEY_ref;
79072805
LW
5627 break;
5628 case 4:
a0d0e21e
LW
5629 if (strEQ(d,"read")) return -KEY_read;
5630 if (strEQ(d,"rand")) return -KEY_rand;
5631 if (strEQ(d,"recv")) return -KEY_recv;
79072805
LW
5632 if (strEQ(d,"redo")) return KEY_redo;
5633 break;
5634 case 5:
a0d0e21e
LW
5635 if (strEQ(d,"rmdir")) return -KEY_rmdir;
5636 if (strEQ(d,"reset")) return -KEY_reset;
79072805
LW
5637 break;
5638 case 6:
5639 if (strEQ(d,"return")) return KEY_return;
a0d0e21e
LW
5640 if (strEQ(d,"rename")) return -KEY_rename;
5641 if (strEQ(d,"rindex")) return -KEY_rindex;
79072805
LW
5642 break;
5643 case 7:
ec4ab249 5644 if (strEQ(d,"require")) return KEY_require;
a0d0e21e
LW
5645 if (strEQ(d,"reverse")) return -KEY_reverse;
5646 if (strEQ(d,"readdir")) return -KEY_readdir;
79072805
LW
5647 break;
5648 case 8:
a0d0e21e
LW
5649 if (strEQ(d,"readlink")) return -KEY_readlink;
5650 if (strEQ(d,"readline")) return -KEY_readline;
5651 if (strEQ(d,"readpipe")) return -KEY_readpipe;
79072805
LW
5652 break;
5653 case 9:
a0d0e21e 5654 if (strEQ(d,"rewinddir")) return -KEY_rewinddir;
79072805 5655 break;
a687059c 5656 }
79072805
LW
5657 break;
5658 case 's':
a687059c 5659 switch (d[1]) {
79072805 5660 case 0: return KEY_s;
a687059c 5661 case 'c':
79072805 5662 if (strEQ(d,"scalar")) return KEY_scalar;
a687059c
LW
5663 break;
5664 case 'e':
79072805
LW
5665 switch (len) {
5666 case 4:
a0d0e21e
LW
5667 if (strEQ(d,"seek")) return -KEY_seek;
5668 if (strEQ(d,"send")) return -KEY_send;
79072805
LW
5669 break;
5670 case 5:
a0d0e21e 5671 if (strEQ(d,"semop")) return -KEY_semop;
79072805
LW
5672 break;
5673 case 6:
a0d0e21e
LW
5674 if (strEQ(d,"select")) return -KEY_select;
5675 if (strEQ(d,"semctl")) return -KEY_semctl;
5676 if (strEQ(d,"semget")) return -KEY_semget;
79072805
LW
5677 break;
5678 case 7:
a0d0e21e
LW
5679 if (strEQ(d,"setpgrp")) return -KEY_setpgrp;
5680 if (strEQ(d,"seekdir")) return -KEY_seekdir;
79072805
LW
5681 break;
5682 case 8:
a0d0e21e
LW
5683 if (strEQ(d,"setpwent")) return -KEY_setpwent;
5684 if (strEQ(d,"setgrent")) return -KEY_setgrent;
79072805
LW
5685 break;
5686 case 9:
a0d0e21e 5687 if (strEQ(d,"setnetent")) return -KEY_setnetent;
79072805
LW
5688 break;
5689 case 10:
a0d0e21e
LW
5690 if (strEQ(d,"setsockopt")) return -KEY_setsockopt;
5691 if (strEQ(d,"sethostent")) return -KEY_sethostent;
5692 if (strEQ(d,"setservent")) return -KEY_setservent;
79072805
LW
5693 break;
5694 case 11:
a0d0e21e
LW
5695 if (strEQ(d,"setpriority")) return -KEY_setpriority;
5696 if (strEQ(d,"setprotoent")) return -KEY_setprotoent;
79072805
LW
5697 break;
5698 }
a687059c
LW
5699 break;
5700 case 'h':
79072805
LW
5701 switch (len) {
5702 case 5:
3a6a8333 5703 if (strEQ(d,"shift")) return -KEY_shift;
79072805
LW
5704 break;
5705 case 6:
a0d0e21e
LW
5706 if (strEQ(d,"shmctl")) return -KEY_shmctl;
5707 if (strEQ(d,"shmget")) return -KEY_shmget;
79072805
LW
5708 break;
5709 case 7:
a0d0e21e 5710 if (strEQ(d,"shmread")) return -KEY_shmread;
79072805
LW
5711 break;
5712 case 8:
a0d0e21e
LW
5713 if (strEQ(d,"shmwrite")) return -KEY_shmwrite;
5714 if (strEQ(d,"shutdown")) return -KEY_shutdown;
79072805
LW
5715 break;
5716 }
a687059c
LW
5717 break;
5718 case 'i':
a0d0e21e 5719 if (strEQ(d,"sin")) return -KEY_sin;
a687059c
LW
5720 break;
5721 case 'l':
a0d0e21e 5722 if (strEQ(d,"sleep")) return -KEY_sleep;
a687059c
LW
5723 break;
5724 case 'o':
79072805 5725 if (strEQ(d,"sort")) return KEY_sort;
a0d0e21e
LW
5726 if (strEQ(d,"socket")) return -KEY_socket;
5727 if (strEQ(d,"socketpair")) return -KEY_socketpair;
a687059c
LW
5728 break;
5729 case 'p':
79072805 5730 if (strEQ(d,"split")) return KEY_split;
a0d0e21e 5731 if (strEQ(d,"sprintf")) return -KEY_sprintf;
3a6a8333 5732 if (strEQ(d,"splice")) return -KEY_splice;
a687059c
LW
5733 break;
5734 case 'q':
a0d0e21e 5735 if (strEQ(d,"sqrt")) return -KEY_sqrt;
a687059c
LW
5736 break;
5737 case 'r':
a0d0e21e 5738 if (strEQ(d,"srand")) return -KEY_srand;
a687059c
LW
5739 break;
5740 case 't':
a0d0e21e 5741 if (strEQ(d,"stat")) return -KEY_stat;
79072805 5742 if (strEQ(d,"study")) return KEY_study;
a687059c
LW
5743 break;
5744 case 'u':
a0d0e21e 5745 if (strEQ(d,"substr")) return -KEY_substr;
79072805 5746 if (strEQ(d,"sub")) return KEY_sub;
a687059c
LW
5747 break;
5748 case 'y':
79072805
LW
5749 switch (len) {
5750 case 6:
a0d0e21e 5751 if (strEQ(d,"system")) return -KEY_system;
79072805
LW
5752 break;
5753 case 7:
a0d0e21e
LW
5754 if (strEQ(d,"symlink")) return -KEY_symlink;
5755 if (strEQ(d,"syscall")) return -KEY_syscall;
137443ea 5756 if (strEQ(d,"sysopen")) return -KEY_sysopen;
5757 if (strEQ(d,"sysread")) return -KEY_sysread;
5758 if (strEQ(d,"sysseek")) return -KEY_sysseek;
79072805
LW
5759 break;
5760 case 8:
a0d0e21e 5761 if (strEQ(d,"syswrite")) return -KEY_syswrite;
79072805 5762 break;
a687059c 5763 }
a687059c
LW
5764 break;
5765 }
5766 break;
79072805
LW
5767 case 't':
5768 switch (len) {
5769 case 2:
5770 if (strEQ(d,"tr")) return KEY_tr;
5771 break;
463ee0b2
LW
5772 case 3:
5773 if (strEQ(d,"tie")) return KEY_tie;
5774 break;
79072805 5775 case 4:
a0d0e21e 5776 if (strEQ(d,"tell")) return -KEY_tell;
c07a80fd 5777 if (strEQ(d,"tied")) return KEY_tied;
a0d0e21e 5778 if (strEQ(d,"time")) return -KEY_time;
79072805
LW
5779 break;
5780 case 5:
a0d0e21e 5781 if (strEQ(d,"times")) return -KEY_times;
79072805
LW
5782 break;
5783 case 7:
a0d0e21e 5784 if (strEQ(d,"telldir")) return -KEY_telldir;
79072805
LW
5785 break;
5786 case 8:
a0d0e21e 5787 if (strEQ(d,"truncate")) return -KEY_truncate;
79072805 5788 break;
378cc40b 5789 }
a687059c 5790 break;
79072805
LW
5791 case 'u':
5792 switch (len) {
5793 case 2:
a0d0e21e
LW
5794 if (strEQ(d,"uc")) return -KEY_uc;
5795 break;
5796 case 3:
5797 if (strEQ(d,"use")) return KEY_use;
79072805
LW
5798 break;
5799 case 5:
5800 if (strEQ(d,"undef")) return KEY_undef;
5801 if (strEQ(d,"until")) return KEY_until;
463ee0b2 5802 if (strEQ(d,"untie")) return KEY_untie;
a0d0e21e
LW
5803 if (strEQ(d,"utime")) return -KEY_utime;
5804 if (strEQ(d,"umask")) return -KEY_umask;
79072805
LW
5805 break;
5806 case 6:
5807 if (strEQ(d,"unless")) return KEY_unless;
a0d0e21e
LW
5808 if (strEQ(d,"unpack")) return -KEY_unpack;
5809 if (strEQ(d,"unlink")) return -KEY_unlink;
79072805
LW
5810 break;
5811 case 7:
3a6a8333 5812 if (strEQ(d,"unshift")) return -KEY_unshift;
a0d0e21e 5813 if (strEQ(d,"ucfirst")) return -KEY_ucfirst;
79072805 5814 break;
a687059c
LW
5815 }
5816 break;
79072805 5817 case 'v':
a0d0e21e
LW
5818 if (strEQ(d,"values")) return -KEY_values;
5819 if (strEQ(d,"vec")) return -KEY_vec;
a687059c 5820 break;
79072805
LW
5821 case 'w':
5822 switch (len) {
5823 case 4:
a0d0e21e
LW
5824 if (strEQ(d,"warn")) return -KEY_warn;
5825 if (strEQ(d,"wait")) return -KEY_wait;
79072805
LW
5826 break;
5827 case 5:
5828 if (strEQ(d,"while")) return KEY_while;
a0d0e21e 5829 if (strEQ(d,"write")) return -KEY_write;
79072805
LW
5830 break;
5831 case 7:
a0d0e21e 5832 if (strEQ(d,"waitpid")) return -KEY_waitpid;
79072805
LW
5833 break;
5834 case 9:
a0d0e21e 5835 if (strEQ(d,"wantarray")) return -KEY_wantarray;
79072805 5836 break;
2f3197b3 5837 }
a687059c 5838 break;
79072805 5839 case 'x':
a0d0e21e
LW
5840 if (len == 1) return -KEY_x;
5841 if (strEQ(d,"xor")) return -KEY_xor;
a687059c 5842 break;
79072805
LW
5843 case 'y':
5844 if (len == 1) return KEY_y;
5845 break;
5846 case 'z':
a687059c
LW
5847 break;
5848 }
79072805 5849 return 0;
a687059c
LW
5850}
5851
76e3520e 5852STATIC void
cea2e8a9 5853S_checkcomma(pTHX_ register char *s, char *name, char *what)
a687059c 5854{
2f3197b3
LW
5855 char *w;
5856
d008e5eb 5857 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
5858 if (ckWARN(WARN_SYNTAX)) {
5859 int level = 1;
5860 for (w = s+2; *w && level; w++) {
5861 if (*w == '(')
5862 ++level;
5863 else if (*w == ')')
5864 --level;
5865 }
5866 if (*w)
5867 for (; *w && isSPACE(*w); w++) ;
5868 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
65cec589
GS
5869 Perl_warner(aTHX_ WARN_SYNTAX,
5870 "%s (...) interpreted as function",name);
d008e5eb 5871 }
2f3197b3 5872 }
3280af22 5873 while (s < PL_bufend && isSPACE(*s))
2f3197b3 5874 s++;
a687059c
LW
5875 if (*s == '(')
5876 s++;
3280af22 5877 while (s < PL_bufend && isSPACE(*s))
a687059c 5878 s++;
7e2040f0 5879 if (isIDFIRST_lazy_if(s,UTF)) {
2f3197b3 5880 w = s++;
7e2040f0 5881 while (isALNUM_lazy_if(s,UTF))
a687059c 5882 s++;
3280af22 5883 while (s < PL_bufend && isSPACE(*s))
a687059c 5884 s++;
e929a76b 5885 if (*s == ',') {
463ee0b2 5886 int kw;
e929a76b 5887 *s = '\0';
864dbfa3 5888 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
e929a76b 5889 *s = ',';
463ee0b2 5890 if (kw)
e929a76b 5891 return;
cea2e8a9 5892 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
5893 }
5894 }
5895}
5896
423cee85
JH
5897/* Either returns sv, or mortalizes sv and returns a new SV*.
5898 Best used as sv=new_constant(..., sv, ...).
5899 If s, pv are NULL, calls subroutine with one argument,
5900 and type is used with error messages only. */
5901
b3ac6de7 5902STATIC SV *
dff6d3cd 5903S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9b0e499b 5904 const char *type)
b3ac6de7 5905{
b3ac6de7 5906 dSP;
3280af22 5907 HV *table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 5908 SV *res;
b3ac6de7
IZ
5909 SV **cvp;
5910 SV *cv, *typesv;
f0af216f 5911 const char *why1, *why2, *why3;
4e553d73 5912
f0af216f 5913 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
5914 SV *msg;
5915
f0af216f 5916 why2 = strEQ(key,"charnames")
41ab332f 5917 ? "(possibly a missing \"use charnames ...\")"
f0af216f 5918 : "";
4e553d73 5919 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
5920 (type ? type: "undef"), why2);
5921
5922 /* This is convoluted and evil ("goto considered harmful")
5923 * but I do not understand the intricacies of all the different
5924 * failure modes of %^H in here. The goal here is to make
5925 * the most probable error message user-friendly. --jhi */
5926
5927 goto msgdone;
5928
423cee85 5929 report:
4e553d73 5930 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 5931 (type ? type: "undef"), why1, why2, why3);
41ab332f 5932 msgdone:
423cee85
JH
5933 yyerror(SvPVX(msg));
5934 SvREFCNT_dec(msg);
5935 return sv;
5936 }
b3ac6de7
IZ
5937 cvp = hv_fetch(table, key, strlen(key), FALSE);
5938 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
5939 why1 = "$^H{";
5940 why2 = key;
f0af216f 5941 why3 = "} is not defined";
423cee85 5942 goto report;
b3ac6de7
IZ
5943 }
5944 sv_2mortal(sv); /* Parent created it permanently */
5945 cv = *cvp;
423cee85
JH
5946 if (!pv && s)
5947 pv = sv_2mortal(newSVpvn(s, len));
5948 if (type && pv)
5949 typesv = sv_2mortal(newSVpv(type, 0));
b3ac6de7 5950 else
423cee85 5951 typesv = &PL_sv_undef;
4e553d73 5952
e788e7d3 5953 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
5954 ENTER ;
5955 SAVETMPS;
4e553d73 5956
423cee85 5957 PUSHMARK(SP) ;
a5845cb7 5958 EXTEND(sp, 3);
423cee85
JH
5959 if (pv)
5960 PUSHs(pv);
b3ac6de7 5961 PUSHs(sv);
423cee85
JH
5962 if (pv)
5963 PUSHs(typesv);
b3ac6de7 5964 PUTBACK;
423cee85 5965 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 5966
423cee85 5967 SPAGAIN ;
4e553d73 5968
423cee85 5969 /* Check the eval first */
9b0e499b 5970 if (!PL_in_eval && SvTRUE(ERRSV)) {
423cee85
JH
5971 STRLEN n_a;
5972 sv_catpv(ERRSV, "Propagated");
5973 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
e1f15930 5974 (void)POPs;
423cee85
JH
5975 res = SvREFCNT_inc(sv);
5976 }
5977 else {
5978 res = POPs;
e1f15930 5979 (void)SvREFCNT_inc(res);
423cee85 5980 }
4e553d73 5981
423cee85
JH
5982 PUTBACK ;
5983 FREETMPS ;
5984 LEAVE ;
b3ac6de7 5985 POPSTACK;
4e553d73 5986
b3ac6de7 5987 if (!SvOK(res)) {
423cee85
JH
5988 why1 = "Call to &{$^H{";
5989 why2 = key;
f0af216f 5990 why3 = "}} did not return a defined value";
423cee85
JH
5991 sv = res;
5992 goto report;
9b0e499b 5993 }
423cee85 5994
9b0e499b 5995 return res;
b3ac6de7 5996}
4e553d73 5997
76e3520e 5998STATIC char *
cea2e8a9 5999S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2
LW
6000{
6001 register char *d = dest;
8903cb82 6002 register char *e = d + destlen - 3; /* two-character token, ending NUL */
463ee0b2 6003 for (;;) {
8903cb82 6004 if (d >= e)
cea2e8a9 6005 Perl_croak(aTHX_ ident_too_long);
834a4ddd 6006 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 6007 *d++ = *s++;
7e2040f0 6008 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
6009 *d++ = ':';
6010 *d++ = ':';
6011 s++;
6012 }
c3e0f903 6013 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
463ee0b2
LW
6014 *d++ = *s++;
6015 *d++ = *s++;
6016 }
fd400ab9 6017 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6018 char *t = s + UTF8SKIP(s);
fd400ab9 6019 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6020 t += UTF8SKIP(t);
6021 if (d + (t - s) > e)
cea2e8a9 6022 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6023 Copy(s, d, t - s, char);
6024 d += t - s;
6025 s = t;
6026 }
463ee0b2
LW
6027 else {
6028 *d = '\0';
6029 *slp = d - dest;
6030 return s;
e929a76b 6031 }
378cc40b
LW
6032 }
6033}
6034
76e3520e 6035STATIC char *
cea2e8a9 6036S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b
LW
6037{
6038 register char *d;
8903cb82 6039 register char *e;
79072805 6040 char *bracket = 0;
748a9306 6041 char funny = *s++;
378cc40b 6042
a0d0e21e
LW
6043 if (isSPACE(*s))
6044 s = skipspace(s);
378cc40b 6045 d = dest;
8903cb82 6046 e = d + destlen - 3; /* two-character token, ending NUL */
de3bb511 6047 if (isDIGIT(*s)) {
8903cb82 6048 while (isDIGIT(*s)) {
6049 if (d >= e)
cea2e8a9 6050 Perl_croak(aTHX_ ident_too_long);
378cc40b 6051 *d++ = *s++;
8903cb82 6052 }
378cc40b
LW
6053 }
6054 else {
463ee0b2 6055 for (;;) {
8903cb82 6056 if (d >= e)
cea2e8a9 6057 Perl_croak(aTHX_ ident_too_long);
834a4ddd 6058 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 6059 *d++ = *s++;
7e2040f0 6060 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
6061 *d++ = ':';
6062 *d++ = ':';
6063 s++;
6064 }
a0d0e21e 6065 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
6066 *d++ = *s++;
6067 *d++ = *s++;
6068 }
fd400ab9 6069 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6070 char *t = s + UTF8SKIP(s);
fd400ab9 6071 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6072 t += UTF8SKIP(t);
6073 if (d + (t - s) > e)
cea2e8a9 6074 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6075 Copy(s, d, t - s, char);
6076 d += t - s;
6077 s = t;
6078 }
463ee0b2
LW
6079 else
6080 break;
6081 }
378cc40b
LW
6082 }
6083 *d = '\0';
6084 d = dest;
79072805 6085 if (*d) {
3280af22
NIS
6086 if (PL_lex_state != LEX_NORMAL)
6087 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 6088 return s;
378cc40b 6089 }
748a9306 6090 if (*s == '$' && s[1] &&
7e2040f0 6091 (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
5cd24f17 6092 {
4810e5ec 6093 return s;
5cd24f17 6094 }
79072805
LW
6095 if (*s == '{') {
6096 bracket = s;
6097 s++;
6098 }
6099 else if (ck_uni)
6100 check_uni();
93a17b20 6101 if (s < send)
79072805
LW
6102 *d = *s++;
6103 d[1] = '\0';
2b92dfce 6104 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 6105 *d = toCTRL(*s);
6106 s++;
de3bb511 6107 }
79072805 6108 if (bracket) {
748a9306 6109 if (isSPACE(s[-1])) {
fa83b5b6 6110 while (s < send) {
6111 char ch = *s++;
bf4acbe4 6112 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 6113 *d = ch;
6114 break;
6115 }
6116 }
748a9306 6117 }
7e2040f0 6118 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 6119 d++;
a0ed51b3
LW
6120 if (UTF) {
6121 e = s;
155aba94 6122 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
a0ed51b3 6123 e += UTF8SKIP(e);
fd400ab9 6124 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
a0ed51b3
LW
6125 e += UTF8SKIP(e);
6126 }
6127 Copy(s, d, e - s, char);
6128 d += e - s;
6129 s = e;
6130 }
6131 else {
2b92dfce 6132 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 6133 *d++ = *s++;
2b92dfce 6134 if (d >= e)
cea2e8a9 6135 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 6136 }
79072805 6137 *d = '\0';
bf4acbe4 6138 while (s < send && SPACE_OR_TAB(*s)) s++;
ff68c719 6139 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
599cee73 6140 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
dff6d3cd 6141 const char *brack = *s == '[' ? "[...]" : "{...}";
cea2e8a9 6142 Perl_warner(aTHX_ WARN_AMBIGUOUS,
599cee73 6143 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
6144 funny, dest, brack, funny, dest, brack);
6145 }
79072805 6146 bracket++;
a0be28da 6147 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
6148 return s;
6149 }
4e553d73
NIS
6150 }
6151 /* Handle extended ${^Foo} variables
2b92dfce
GS
6152 * 1999-02-27 mjd-perl-patch@plover.com */
6153 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
6154 && isALNUM(*s))
6155 {
6156 d++;
6157 while (isALNUM(*s) && d < e) {
6158 *d++ = *s++;
6159 }
6160 if (d >= e)
cea2e8a9 6161 Perl_croak(aTHX_ ident_too_long);
2b92dfce 6162 *d = '\0';
79072805
LW
6163 }
6164 if (*s == '}') {
6165 s++;
3280af22
NIS
6166 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets)
6167 PL_lex_state = LEX_INTERPEND;
748a9306
LW
6168 if (funny == '#')
6169 funny = '@';
d008e5eb 6170 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 6171 if (ckWARN(WARN_AMBIGUOUS) &&
864dbfa3 6172 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
d008e5eb 6173 {
cea2e8a9 6174 Perl_warner(aTHX_ WARN_AMBIGUOUS,
d008e5eb
GS
6175 "Ambiguous use of %c{%s} resolved to %c%s",
6176 funny, dest, funny, dest);
6177 }
6178 }
79072805
LW
6179 }
6180 else {
6181 s = bracket; /* let the parser handle it */
93a17b20 6182 *dest = '\0';
79072805
LW
6183 }
6184 }
3280af22
NIS
6185 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
6186 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
6187 return s;
6188}
6189
cea2e8a9
GS
6190void
6191Perl_pmflag(pTHX_ U16 *pmfl, int ch)
a0d0e21e 6192{
bbce6d69 6193 if (ch == 'i')
a0d0e21e 6194 *pmfl |= PMf_FOLD;
a0d0e21e
LW
6195 else if (ch == 'g')
6196 *pmfl |= PMf_GLOBAL;
c90c0ff4 6197 else if (ch == 'c')
6198 *pmfl |= PMf_CONTINUE;
a0d0e21e
LW
6199 else if (ch == 'o')
6200 *pmfl |= PMf_KEEP;
6201 else if (ch == 'm')
6202 *pmfl |= PMf_MULTILINE;
6203 else if (ch == 's')
6204 *pmfl |= PMf_SINGLELINE;
6205 else if (ch == 'x')
6206 *pmfl |= PMf_EXTENDED;
6207}
378cc40b 6208
76e3520e 6209STATIC char *
cea2e8a9 6210S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 6211{
79072805
LW
6212 PMOP *pm;
6213 char *s;
378cc40b 6214
09bef843 6215 s = scan_str(start,FALSE,FALSE);
37fd879b 6216 if (!s)
cea2e8a9 6217 Perl_croak(aTHX_ "Search pattern not terminated");
bbce6d69 6218
8782bef2 6219 pm = (PMOP*)newPMOP(type, 0);
3280af22 6220 if (PL_multi_open == '?')
79072805 6221 pm->op_pmflags |= PMf_ONCE;
8782bef2
GB
6222 if(type == OP_QR) {
6223 while (*s && strchr("iomsx", *s))
6224 pmflag(&pm->op_pmflags,*s++);
6225 }
6226 else {
6227 while (*s && strchr("iogcmsx", *s))
6228 pmflag(&pm->op_pmflags,*s++);
6229 }
4633a7c4 6230 pm->op_pmpermflags = pm->op_pmflags;
bbce6d69 6231
3280af22 6232 PL_lex_op = (OP*)pm;
79072805 6233 yylval.ival = OP_MATCH;
378cc40b
LW
6234 return s;
6235}
6236
76e3520e 6237STATIC char *
cea2e8a9 6238S_scan_subst(pTHX_ char *start)
79072805 6239{
a0d0e21e 6240 register char *s;
79072805 6241 register PMOP *pm;
4fdae800 6242 I32 first_start;
79072805
LW
6243 I32 es = 0;
6244
79072805
LW
6245 yylval.ival = OP_NULL;
6246
09bef843 6247 s = scan_str(start,FALSE,FALSE);
79072805 6248
37fd879b 6249 if (!s)
cea2e8a9 6250 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 6251
3280af22 6252 if (s[-1] == PL_multi_open)
79072805
LW
6253 s--;
6254
3280af22 6255 first_start = PL_multi_start;
09bef843 6256 s = scan_str(s,FALSE,FALSE);
79072805 6257 if (!s) {
37fd879b 6258 if (PL_lex_stuff) {
3280af22 6259 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6260 PL_lex_stuff = Nullsv;
6261 }
cea2e8a9 6262 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 6263 }
3280af22 6264 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 6265
79072805 6266 pm = (PMOP*)newPMOP(OP_SUBST, 0);
48c036b1 6267 while (*s) {
a687059c
LW
6268 if (*s == 'e') {
6269 s++;
2f3197b3 6270 es++;
a687059c 6271 }
b3eb6a9b 6272 else if (strchr("iogcmsx", *s))
a0d0e21e 6273 pmflag(&pm->op_pmflags,*s++);
48c036b1
GS
6274 else
6275 break;
378cc40b 6276 }
79072805
LW
6277
6278 if (es) {
6279 SV *repl;
0244c3a4
GS
6280 PL_sublex_info.super_bufptr = s;
6281 PL_sublex_info.super_bufend = PL_bufend;
6282 PL_multi_end = 0;
79072805 6283 pm->op_pmflags |= PMf_EVAL;
79cb57f6 6284 repl = newSVpvn("",0);
463ee0b2 6285 while (es-- > 0)
a0d0e21e 6286 sv_catpv(repl, es ? "eval " : "do ");
79072805 6287 sv_catpvn(repl, "{ ", 2);
3280af22 6288 sv_catsv(repl, PL_lex_repl);
79072805 6289 sv_catpvn(repl, " };", 2);
25da4f38 6290 SvEVALED_on(repl);
3280af22
NIS
6291 SvREFCNT_dec(PL_lex_repl);
6292 PL_lex_repl = repl;
378cc40b 6293 }
79072805 6294
4633a7c4 6295 pm->op_pmpermflags = pm->op_pmflags;
3280af22 6296 PL_lex_op = (OP*)pm;
79072805 6297 yylval.ival = OP_SUBST;
378cc40b
LW
6298 return s;
6299}
6300
76e3520e 6301STATIC char *
cea2e8a9 6302S_scan_trans(pTHX_ char *start)
378cc40b 6303{
a0d0e21e 6304 register char* s;
11343788 6305 OP *o;
79072805
LW
6306 short *tbl;
6307 I32 squash;
a0ed51b3 6308 I32 del;
79072805
LW
6309 I32 complement;
6310
6311 yylval.ival = OP_NULL;
6312
09bef843 6313 s = scan_str(start,FALSE,FALSE);
37fd879b 6314 if (!s)
cea2e8a9 6315 Perl_croak(aTHX_ "Transliteration pattern not terminated");
3280af22 6316 if (s[-1] == PL_multi_open)
2f3197b3
LW
6317 s--;
6318
09bef843 6319 s = scan_str(s,FALSE,FALSE);
79072805 6320 if (!s) {
37fd879b 6321 if (PL_lex_stuff) {
3280af22 6322 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6323 PL_lex_stuff = Nullsv;
6324 }
cea2e8a9 6325 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 6326 }
79072805 6327
a0ed51b3 6328 complement = del = squash = 0;
6940069f 6329 while (strchr("cds", *s)) {
395c3793 6330 if (*s == 'c')
79072805 6331 complement = OPpTRANS_COMPLEMENT;
395c3793 6332 else if (*s == 'd')
a0ed51b3
LW
6333 del = OPpTRANS_DELETE;
6334 else if (*s == 's')
79072805 6335 squash = OPpTRANS_SQUASH;
395c3793
LW
6336 s++;
6337 }
8973db79
JH
6338
6339 New(803, tbl, complement&&!del?258:256, short);
6340 o = newPVOP(OP_TRANS, 0, (char*)tbl);
7948272d
NIS
6341 o->op_private = del|squash|complement|
6342 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
6343 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 6344
3280af22 6345 PL_lex_op = o;
79072805
LW
6346 yylval.ival = OP_TRANS;
6347 return s;
6348}
6349
76e3520e 6350STATIC char *
cea2e8a9 6351S_scan_heredoc(pTHX_ register char *s)
79072805
LW
6352{
6353 SV *herewas;
6354 I32 op_type = OP_SCALAR;
6355 I32 len;
6356 SV *tmpstr;
6357 char term;
6358 register char *d;
fc36a67e 6359 register char *e;
4633a7c4 6360 char *peek;
3280af22 6361 int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
79072805
LW
6362
6363 s += 2;
3280af22
NIS
6364 d = PL_tokenbuf;
6365 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 6366 if (!outer)
79072805 6367 *d++ = '\n';
bf4acbe4 6368 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
4633a7c4
LW
6369 if (*peek && strchr("`'\"",*peek)) {
6370 s = peek;
79072805 6371 term = *s++;
3280af22 6372 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 6373 d += len;
3280af22 6374 if (s < PL_bufend)
79072805 6375 s++;
79072805
LW
6376 }
6377 else {
6378 if (*s == '\\')
6379 s++, term = '\'';
6380 else
6381 term = '"';
7e2040f0 6382 if (!isALNUM_lazy_if(s,UTF))
4633a7c4 6383 deprecate("bare << to mean <<\"\"");
7e2040f0 6384 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 6385 if (d < e)
6386 *d++ = *s;
6387 }
6388 }
3280af22 6389 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 6390 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
6391 *d++ = '\n';
6392 *d = '\0';
3280af22 6393 len = d - PL_tokenbuf;
6a27c188 6394#ifndef PERL_STRICT_CR
f63a84b2
LW
6395 d = strchr(s, '\r');
6396 if (d) {
6397 char *olds = s;
6398 s = d;
3280af22 6399 while (s < PL_bufend) {
f63a84b2
LW
6400 if (*s == '\r') {
6401 *d++ = '\n';
6402 if (*++s == '\n')
6403 s++;
6404 }
6405 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
6406 *d++ = *s++;
6407 s++;
6408 }
6409 else
6410 *d++ = *s++;
6411 }
6412 *d = '\0';
3280af22
NIS
6413 PL_bufend = d;
6414 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2
LW
6415 s = olds;
6416 }
6417#endif
79072805 6418 d = "\n";
3280af22 6419 if (outer || !(d=ninstr(s,PL_bufend,d,d+1)))
79cb57f6 6420 herewas = newSVpvn(s,PL_bufend-s);
79072805 6421 else
79cb57f6 6422 s--, herewas = newSVpvn(s,d-s);
79072805 6423 s += SvCUR(herewas);
748a9306 6424
8d6dde3e 6425 tmpstr = NEWSV(87,79);
748a9306
LW
6426 sv_upgrade(tmpstr, SVt_PVIV);
6427 if (term == '\'') {
79072805 6428 op_type = OP_CONST;
748a9306
LW
6429 SvIVX(tmpstr) = -1;
6430 }
6431 else if (term == '`') {
79072805 6432 op_type = OP_BACKTICK;
748a9306
LW
6433 SvIVX(tmpstr) = '\\';
6434 }
79072805
LW
6435
6436 CLINE;
57843af0 6437 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
6438 PL_multi_open = PL_multi_close = '<';
6439 term = *PL_tokenbuf;
0244c3a4
GS
6440 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6441 char *bufptr = PL_sublex_info.super_bufptr;
6442 char *bufend = PL_sublex_info.super_bufend;
6443 char *olds = s - SvCUR(herewas);
6444 s = strchr(bufptr, '\n');
6445 if (!s)
6446 s = bufend;
6447 d = s;
6448 while (s < bufend &&
6449 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
6450 if (*s++ == '\n')
57843af0 6451 CopLINE_inc(PL_curcop);
0244c3a4
GS
6452 }
6453 if (s >= bufend) {
57843af0 6454 CopLINE_set(PL_curcop, PL_multi_start);
0244c3a4
GS
6455 missingterm(PL_tokenbuf);
6456 }
6457 sv_setpvn(herewas,bufptr,d-bufptr+1);
6458 sv_setpvn(tmpstr,d+1,s-d);
6459 s += len - 1;
6460 sv_catpvn(herewas,s,bufend-s);
6461 (void)strcpy(bufptr,SvPVX(herewas));
6462
6463 s = olds;
6464 goto retval;
6465 }
6466 else if (!outer) {
79072805 6467 d = s;
3280af22
NIS
6468 while (s < PL_bufend &&
6469 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 6470 if (*s++ == '\n')
57843af0 6471 CopLINE_inc(PL_curcop);
79072805 6472 }
3280af22 6473 if (s >= PL_bufend) {
57843af0 6474 CopLINE_set(PL_curcop, PL_multi_start);
3280af22 6475 missingterm(PL_tokenbuf);
79072805
LW
6476 }
6477 sv_setpvn(tmpstr,d+1,s-d);
6478 s += len - 1;
57843af0 6479 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 6480
3280af22
NIS
6481 sv_catpvn(herewas,s,PL_bufend-s);
6482 sv_setsv(PL_linestr,herewas);
6483 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
6484 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6485 PL_last_lop = PL_last_uni = Nullch;
79072805
LW
6486 }
6487 else
6488 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
3280af22 6489 while (s >= PL_bufend) { /* multiple line string? */
fd2d0953 6490 if (!outer ||
3280af22 6491 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
57843af0 6492 CopLINE_set(PL_curcop, PL_multi_start);
3280af22 6493 missingterm(PL_tokenbuf);
79072805 6494 }
57843af0 6495 CopLINE_inc(PL_curcop);
3280af22 6496 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6497 PL_last_lop = PL_last_uni = Nullch;
6a27c188 6498#ifndef PERL_STRICT_CR
3280af22 6499 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
6500 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
6501 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 6502 {
3280af22
NIS
6503 PL_bufend[-2] = '\n';
6504 PL_bufend--;
6505 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2 6506 }
3280af22
NIS
6507 else if (PL_bufend[-1] == '\r')
6508 PL_bufend[-1] = '\n';
f63a84b2 6509 }
3280af22
NIS
6510 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
6511 PL_bufend[-1] = '\n';
f63a84b2 6512#endif
3280af22 6513 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
6514 SV *sv = NEWSV(88,0);
6515
93a17b20 6516 sv_upgrade(sv, SVt_PVMG);
3280af22 6517 sv_setsv(sv,PL_linestr);
57843af0 6518 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
79072805 6519 }
3280af22
NIS
6520 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
6521 s = PL_bufend - 1;
79072805 6522 *s = ' ';
3280af22
NIS
6523 sv_catsv(PL_linestr,herewas);
6524 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805
LW
6525 }
6526 else {
3280af22
NIS
6527 s = PL_bufend;
6528 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
6529 }
6530 }
79072805 6531 s++;
0244c3a4 6532retval:
57843af0 6533 PL_multi_end = CopLINE(PL_curcop);
79072805
LW
6534 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
6535 SvLEN_set(tmpstr, SvCUR(tmpstr) + 1);
463ee0b2 6536 Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
79072805 6537 }
8990e307 6538 SvREFCNT_dec(herewas);
0064a8a9 6539 if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
7948272d 6540 SvUTF8_on(tmpstr);
3280af22 6541 PL_lex_stuff = tmpstr;
79072805
LW
6542 yylval.ival = op_type;
6543 return s;
6544}
6545
02aa26ce
NT
6546/* scan_inputsymbol
6547 takes: current position in input buffer
6548 returns: new position in input buffer
6549 side-effects: yylval and lex_op are set.
6550
6551 This code handles:
6552
6553 <> read from ARGV
6554 <FH> read from filehandle
6555 <pkg::FH> read from package qualified filehandle
6556 <pkg'FH> read from package qualified filehandle
6557 <$fh> read from filehandle in $fh
6558 <*.h> filename glob
6559
6560*/
6561
76e3520e 6562STATIC char *
cea2e8a9 6563S_scan_inputsymbol(pTHX_ char *start)
79072805 6564{
02aa26ce 6565 register char *s = start; /* current position in buffer */
79072805 6566 register char *d;
fc36a67e 6567 register char *e;
1b420867 6568 char *end;
79072805
LW
6569 I32 len;
6570
3280af22
NIS
6571 d = PL_tokenbuf; /* start of temp holding space */
6572 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
1b420867
GS
6573 end = strchr(s, '\n');
6574 if (!end)
6575 end = PL_bufend;
6576 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
6577
6578 /* die if we didn't have space for the contents of the <>,
1b420867 6579 or if it didn't end, or if we see a newline
02aa26ce
NT
6580 */
6581
3280af22 6582 if (len >= sizeof PL_tokenbuf)
cea2e8a9 6583 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 6584 if (s >= end)
cea2e8a9 6585 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 6586
fc36a67e 6587 s++;
02aa26ce
NT
6588
6589 /* check for <$fh>
6590 Remember, only scalar variables are interpreted as filehandles by
6591 this code. Anything more complex (e.g., <$fh{$num}>) will be
6592 treated as a glob() call.
6593 This code makes use of the fact that except for the $ at the front,
6594 a scalar variable and a filehandle look the same.
6595 */
4633a7c4 6596 if (*d == '$' && d[1]) d++;
02aa26ce
NT
6597
6598 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 6599 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 6600 d++;
02aa26ce
NT
6601
6602 /* If we've tried to read what we allow filehandles to look like, and
6603 there's still text left, then it must be a glob() and not a getline.
6604 Use scan_str to pull out the stuff between the <> and treat it
6605 as nothing more than a string.
6606 */
6607
3280af22 6608 if (d - PL_tokenbuf != len) {
79072805
LW
6609 yylval.ival = OP_GLOB;
6610 set_csh();
09bef843 6611 s = scan_str(start,FALSE,FALSE);
79072805 6612 if (!s)
cea2e8a9 6613 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
6614 return s;
6615 }
395c3793 6616 else {
02aa26ce 6617 /* we're in a filehandle read situation */
3280af22 6618 d = PL_tokenbuf;
02aa26ce
NT
6619
6620 /* turn <> into <ARGV> */
79072805
LW
6621 if (!len)
6622 (void)strcpy(d,"ARGV");
02aa26ce
NT
6623
6624 /* if <$fh>, create the ops to turn the variable into a
6625 filehandle
6626 */
79072805 6627 if (*d == '$') {
a0d0e21e 6628 I32 tmp;
02aa26ce
NT
6629
6630 /* try to find it in the pad for this block, otherwise find
6631 add symbol table ops
6632 */
11343788
MB
6633 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
6634 OP *o = newOP(OP_PADSV, 0);
6635 o->op_targ = tmp;
f5284f61 6636 PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, o);
a0d0e21e
LW
6637 }
6638 else {
6639 GV *gv = gv_fetchpv(d+1,TRUE, SVt_PV);
3280af22 6640 PL_lex_op = (OP*)newUNOP(OP_READLINE, 0,
a0d0e21e 6641 newUNOP(OP_RV2SV, 0,
f5284f61 6642 newGVOP(OP_GV, 0, gv)));
a0d0e21e 6643 }
f5284f61
IZ
6644 PL_lex_op->op_flags |= OPf_SPECIAL;
6645 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
79072805
LW
6646 yylval.ival = OP_NULL;
6647 }
02aa26ce
NT
6648
6649 /* If it's none of the above, it must be a literal filehandle
6650 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 6651 else {
85e6fe83 6652 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
3280af22 6653 PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
79072805
LW
6654 yylval.ival = OP_NULL;
6655 }
6656 }
02aa26ce 6657
79072805
LW
6658 return s;
6659}
6660
02aa26ce
NT
6661
6662/* scan_str
6663 takes: start position in buffer
09bef843
SB
6664 keep_quoted preserve \ on the embedded delimiter(s)
6665 keep_delims preserve the delimiters around the string
02aa26ce
NT
6666 returns: position to continue reading from buffer
6667 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
6668 updates the read buffer.
6669
6670 This subroutine pulls a string out of the input. It is called for:
6671 q single quotes q(literal text)
6672 ' single quotes 'literal text'
6673 qq double quotes qq(interpolate $here please)
6674 " double quotes "interpolate $here please"
6675 qx backticks qx(/bin/ls -l)
6676 ` backticks `/bin/ls -l`
6677 qw quote words @EXPORT_OK = qw( func() $spam )
6678 m// regexp match m/this/
6679 s/// regexp substitute s/this/that/
6680 tr/// string transliterate tr/this/that/
6681 y/// string transliterate y/this/that/
6682 ($*@) sub prototypes sub foo ($)
09bef843 6683 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
6684 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
6685
6686 In most of these cases (all but <>, patterns and transliterate)
6687 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
6688 calls scan_str(). s/// makes yylex() call scan_subst() which calls
6689 scan_str(). tr/// and y/// make yylex() call scan_trans() which
6690 calls scan_str().
4e553d73 6691
02aa26ce
NT
6692 It skips whitespace before the string starts, and treats the first
6693 character as the delimiter. If the delimiter is one of ([{< then
6694 the corresponding "close" character )]}> is used as the closing
6695 delimiter. It allows quoting of delimiters, and if the string has
6696 balanced delimiters ([{<>}]) it allows nesting.
6697
37fd879b
HS
6698 On success, the SV with the resulting string is put into lex_stuff or,
6699 if that is already non-NULL, into lex_repl. The second case occurs only
6700 when parsing the RHS of the special constructs s/// and tr/// (y///).
6701 For convenience, the terminating delimiter character is stuffed into
6702 SvIVX of the SV.
02aa26ce
NT
6703*/
6704
76e3520e 6705STATIC char *
09bef843 6706S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 6707{
02aa26ce
NT
6708 SV *sv; /* scalar value: string */
6709 char *tmps; /* temp string, used for delimiter matching */
6710 register char *s = start; /* current position in the buffer */
6711 register char term; /* terminating character */
6712 register char *to; /* current position in the sv's data */
6713 I32 brackets = 1; /* bracket nesting level */
89491803 6714 bool has_utf8 = FALSE; /* is there any utf8 content? */
02aa26ce
NT
6715
6716 /* skip space before the delimiter */
fb73857a 6717 if (isSPACE(*s))
6718 s = skipspace(s);
02aa26ce
NT
6719
6720 /* mark where we are, in case we need to report errors */
79072805 6721 CLINE;
02aa26ce
NT
6722
6723 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 6724 term = *s;
63cd0674 6725 if (!UTF8_IS_INVARIANT((U8)term) && UTF)
89491803 6726 has_utf8 = TRUE;
b1c7b182 6727
02aa26ce 6728 /* mark where we are */
57843af0 6729 PL_multi_start = CopLINE(PL_curcop);
3280af22 6730 PL_multi_open = term;
02aa26ce
NT
6731
6732 /* find corresponding closing delimiter */
93a17b20 6733 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
79072805 6734 term = tmps[5];
3280af22 6735 PL_multi_close = term;
79072805 6736
02aa26ce 6737 /* create a new SV to hold the contents. 87 is leak category, I'm
8d6dde3e
IZ
6738 assuming. 79 is the SV's initial length. What a random number. */
6739 sv = NEWSV(87,79);
ed6116ce
LW
6740 sv_upgrade(sv, SVt_PVIV);
6741 SvIVX(sv) = term;
a0d0e21e 6742 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
6743
6744 /* move past delimiter and try to read a complete string */
09bef843
SB
6745 if (keep_delims)
6746 sv_catpvn(sv, s, 1);
93a17b20
LW
6747 s++;
6748 for (;;) {
02aa26ce 6749 /* extend sv if need be */
3280af22 6750 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 6751 /* set 'to' to the next character in the sv's string */
463ee0b2 6752 to = SvPVX(sv)+SvCUR(sv);
09bef843 6753
02aa26ce 6754 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
6755 if (PL_multi_open == PL_multi_close) {
6756 for (; s < PL_bufend; s++,to++) {
02aa26ce 6757 /* embedded newlines increment the current line number */
3280af22 6758 if (*s == '\n' && !PL_rsfp)
57843af0 6759 CopLINE_inc(PL_curcop);
02aa26ce 6760 /* handle quoted delimiters */
3280af22 6761 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 6762 if (!keep_quoted && s[1] == term)
a0d0e21e 6763 s++;
02aa26ce 6764 /* any other quotes are simply copied straight through */
a0d0e21e
LW
6765 else
6766 *to++ = *s++;
6767 }
02aa26ce
NT
6768 /* terminate when run out of buffer (the for() condition), or
6769 have found the terminator */
93a17b20
LW
6770 else if (*s == term)
6771 break;
63cd0674 6772 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 6773 has_utf8 = TRUE;
93a17b20
LW
6774 *to = *s;
6775 }
6776 }
02aa26ce
NT
6777
6778 /* if the terminator isn't the same as the start character (e.g.,
6779 matched brackets), we have to allow more in the quoting, and
6780 be prepared for nested brackets.
6781 */
93a17b20 6782 else {
02aa26ce 6783 /* read until we run out of string, or we find the terminator */
3280af22 6784 for (; s < PL_bufend; s++,to++) {
02aa26ce 6785 /* embedded newlines increment the line count */
3280af22 6786 if (*s == '\n' && !PL_rsfp)
57843af0 6787 CopLINE_inc(PL_curcop);
02aa26ce 6788 /* backslashes can escape the open or closing characters */
3280af22 6789 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
6790 if (!keep_quoted &&
6791 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
6792 s++;
6793 else
6794 *to++ = *s++;
6795 }
02aa26ce 6796 /* allow nested opens and closes */
3280af22 6797 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 6798 break;
3280af22 6799 else if (*s == PL_multi_open)
93a17b20 6800 brackets++;
63cd0674 6801 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 6802 has_utf8 = TRUE;
93a17b20
LW
6803 *to = *s;
6804 }
6805 }
02aa26ce 6806 /* terminate the copied string and update the sv's end-of-string */
93a17b20 6807 *to = '\0';
463ee0b2 6808 SvCUR_set(sv, to - SvPVX(sv));
93a17b20 6809
02aa26ce
NT
6810 /*
6811 * this next chunk reads more into the buffer if we're not done yet
6812 */
6813
b1c7b182
GS
6814 if (s < PL_bufend)
6815 break; /* handle case where we are done yet :-) */
79072805 6816
6a27c188 6817#ifndef PERL_STRICT_CR
f63a84b2 6818 if (to - SvPVX(sv) >= 2) {
c6f14548
GS
6819 if ((to[-2] == '\r' && to[-1] == '\n') ||
6820 (to[-2] == '\n' && to[-1] == '\r'))
6821 {
f63a84b2
LW
6822 to[-2] = '\n';
6823 to--;
6824 SvCUR_set(sv, to - SvPVX(sv));
6825 }
6826 else if (to[-1] == '\r')
6827 to[-1] = '\n';
6828 }
6829 else if (to - SvPVX(sv) == 1 && to[-1] == '\r')
6830 to[-1] = '\n';
6831#endif
6832
02aa26ce
NT
6833 /* if we're out of file, or a read fails, bail and reset the current
6834 line marker so we can report where the unterminated string began
6835 */
3280af22
NIS
6836 if (!PL_rsfp ||
6837 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
c07a80fd 6838 sv_free(sv);
57843af0 6839 CopLINE_set(PL_curcop, PL_multi_start);
79072805
LW
6840 return Nullch;
6841 }
02aa26ce 6842 /* we read a line, so increment our line counter */
57843af0 6843 CopLINE_inc(PL_curcop);
a0ed51b3 6844
02aa26ce 6845 /* update debugger info */
3280af22 6846 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
6847 SV *sv = NEWSV(88,0);
6848
93a17b20 6849 sv_upgrade(sv, SVt_PVMG);
3280af22 6850 sv_setsv(sv,PL_linestr);
57843af0 6851 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
395c3793 6852 }
a0ed51b3 6853
3280af22
NIS
6854 /* having changed the buffer, we must update PL_bufend */
6855 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6856 PL_last_lop = PL_last_uni = Nullch;
378cc40b 6857 }
4e553d73 6858
02aa26ce
NT
6859 /* at this point, we have successfully read the delimited string */
6860
09bef843
SB
6861 if (keep_delims)
6862 sv_catpvn(sv, s, 1);
89491803 6863 if (has_utf8)
b1c7b182 6864 SvUTF8_on(sv);
57843af0 6865 PL_multi_end = CopLINE(PL_curcop);
79072805 6866 s++;
02aa26ce
NT
6867
6868 /* if we allocated too much space, give some back */
93a17b20
LW
6869 if (SvCUR(sv) + 5 < SvLEN(sv)) {
6870 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 6871 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 6872 }
02aa26ce
NT
6873
6874 /* decide whether this is the first or second quoted string we've read
6875 for this op
6876 */
4e553d73 6877
3280af22
NIS
6878 if (PL_lex_stuff)
6879 PL_lex_repl = sv;
79072805 6880 else
3280af22 6881 PL_lex_stuff = sv;
378cc40b
LW
6882 return s;
6883}
6884
02aa26ce
NT
6885/*
6886 scan_num
6887 takes: pointer to position in buffer
6888 returns: pointer to new position in buffer
6889 side-effects: builds ops for the constant in yylval.op
6890
6891 Read a number in any of the formats that Perl accepts:
6892
7fd134d9
JH
6893 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
6894 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
6895 0b[01](_?[01])*
6896 0[0-7](_?[0-7])*
6897 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 6898
3280af22 6899 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
6900 thing it reads.
6901
6902 If it reads a number without a decimal point or an exponent, it will
6903 try converting the number to an integer and see if it can do so
6904 without loss of precision.
6905*/
4e553d73 6906
378cc40b 6907char *
b73d6f50 6908Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
378cc40b 6909{
02aa26ce
NT
6910 register char *s = start; /* current position in buffer */
6911 register char *d; /* destination in temp buffer */
6912 register char *e; /* end of temp buffer */
86554af2 6913 NV nv; /* number read, as a double */
a7cb1f99 6914 SV *sv = Nullsv; /* place to put the converted number */
b8403495 6915 bool floatit; /* boolean: int or float? */
02aa26ce 6916 char *lastub = 0; /* position of last underbar */
fc36a67e 6917 static char number_too_long[] = "Number too long";
378cc40b 6918
02aa26ce
NT
6919 /* We use the first character to decide what type of number this is */
6920
378cc40b 6921 switch (*s) {
79072805 6922 default:
cea2e8a9 6923 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 6924
02aa26ce 6925 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 6926 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
6927 case '0':
6928 {
02aa26ce
NT
6929 /* variables:
6930 u holds the "number so far"
4f19785b
WSI
6931 shift the power of 2 of the base
6932 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
6933 overflowed was the number more than we can hold?
6934
6935 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
6936 we in octal/hex/binary?" indicator to disallow hex characters
6937 when in octal mode.
02aa26ce 6938 */
9e24b6e2
JH
6939 NV n = 0.0;
6940 UV u = 0;
79072805 6941 I32 shift;
9e24b6e2
JH
6942 bool overflowed = FALSE;
6943 static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
6944 static char* bases[5] = { "", "binary", "", "octal",
6945 "hexadecimal" };
6946 static char* Bases[5] = { "", "Binary", "", "Octal",
6947 "Hexadecimal" };
6948 static char *maxima[5] = { "",
6949 "0b11111111111111111111111111111111",
6950 "",
893fe2c2 6951 "037777777777",
9e24b6e2
JH
6952 "0xffffffff" };
6953 char *base, *Base, *max;
378cc40b 6954
02aa26ce 6955 /* check for hex */
378cc40b
LW
6956 if (s[1] == 'x') {
6957 shift = 4;
6958 s += 2;
4f19785b
WSI
6959 } else if (s[1] == 'b') {
6960 shift = 1;
6961 s += 2;
378cc40b 6962 }
02aa26ce 6963 /* check for a decimal in disguise */
b78218b7 6964 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 6965 goto decimal;
02aa26ce 6966 /* so it must be octal */
928753ea 6967 else {
378cc40b 6968 shift = 3;
928753ea
JH
6969 s++;
6970 }
6971
6972 if (*s == '_') {
6973 if (ckWARN(WARN_SYNTAX))
6974 Perl_warner(aTHX_ WARN_SYNTAX,
6975 "Misplaced _ in number");
6976 lastub = s++;
6977 }
9e24b6e2
JH
6978
6979 base = bases[shift];
6980 Base = Bases[shift];
6981 max = maxima[shift];
02aa26ce 6982
4f19785b 6983 /* read the rest of the number */
378cc40b 6984 for (;;) {
9e24b6e2 6985 /* x is used in the overflow test,
893fe2c2 6986 b is the digit we're adding on. */
9e24b6e2 6987 UV x, b;
55497cff 6988
378cc40b 6989 switch (*s) {
02aa26ce
NT
6990
6991 /* if we don't mention it, we're done */
378cc40b
LW
6992 default:
6993 goto out;
02aa26ce 6994
928753ea 6995 /* _ are ignored -- but warned about if consecutive */
de3bb511 6996 case '_':
928753ea
JH
6997 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
6998 Perl_warner(aTHX_ WARN_SYNTAX,
6999 "Misplaced _ in number");
7000 lastub = s++;
de3bb511 7001 break;
02aa26ce
NT
7002
7003 /* 8 and 9 are not octal */
378cc40b 7004 case '8': case '9':
4f19785b 7005 if (shift == 3)
cea2e8a9 7006 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 7007 /* FALL THROUGH */
02aa26ce
NT
7008
7009 /* octal digits */
4f19785b 7010 case '2': case '3': case '4':
378cc40b 7011 case '5': case '6': case '7':
4f19785b 7012 if (shift == 1)
cea2e8a9 7013 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
7014 /* FALL THROUGH */
7015
7016 case '0': case '1':
02aa26ce 7017 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 7018 goto digit;
02aa26ce
NT
7019
7020 /* hex digits */
378cc40b
LW
7021 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
7022 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 7023 /* make sure they said 0x */
378cc40b
LW
7024 if (shift != 4)
7025 goto out;
55497cff 7026 b = (*s++ & 7) + 9;
02aa26ce
NT
7027
7028 /* Prepare to put the digit we have onto the end
7029 of the number so far. We check for overflows.
7030 */
7031
55497cff 7032 digit:
9e24b6e2
JH
7033 if (!overflowed) {
7034 x = u << shift; /* make room for the digit */
7035
7036 if ((x >> shift) != u
7037 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
7038 overflowed = TRUE;
7039 n = (NV) u;
767a6a26
PM
7040 if (ckWARN_d(WARN_OVERFLOW))
7041 Perl_warner(aTHX_ WARN_OVERFLOW,
9e24b6e2
JH
7042 "Integer overflow in %s number",
7043 base);
7044 } else
7045 u = x | b; /* add the digit to the end */
7046 }
7047 if (overflowed) {
7048 n *= nvshift[shift];
7049 /* If an NV has not enough bits in its
7050 * mantissa to represent an UV this summing of
7051 * small low-order numbers is a waste of time
7052 * (because the NV cannot preserve the
7053 * low-order bits anyway): we could just
7054 * remember when did we overflow and in the
7055 * end just multiply n by the right
7056 * amount. */
7057 n += (NV) b;
55497cff 7058 }
378cc40b
LW
7059 break;
7060 }
7061 }
02aa26ce
NT
7062
7063 /* if we get here, we had success: make a scalar value from
7064 the number.
7065 */
378cc40b 7066 out:
928753ea
JH
7067
7068 /* final misplaced underbar check */
7069 if (s[-1] == '_') {
7070 if (ckWARN(WARN_SYNTAX))
7071 Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number");
7072 }
7073
79072805 7074 sv = NEWSV(92,0);
9e24b6e2 7075 if (overflowed) {
767a6a26
PM
7076 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
7077 Perl_warner(aTHX_ WARN_PORTABLE,
9e24b6e2
JH
7078 "%s number > %s non-portable",
7079 Base, max);
7080 sv_setnv(sv, n);
7081 }
7082 else {
15041a67 7083#if UVSIZE > 4
767a6a26
PM
7084 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
7085 Perl_warner(aTHX_ WARN_PORTABLE,
9e24b6e2
JH
7086 "%s number > %s non-portable",
7087 Base, max);
2cc4c2dc 7088#endif
9e24b6e2
JH
7089 sv_setuv(sv, u);
7090 }
2cc4c2dc 7091 if (PL_hints & HINT_NEW_BINARY)
b3ac6de7 7092 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
378cc40b
LW
7093 }
7094 break;
02aa26ce
NT
7095
7096 /*
7097 handle decimal numbers.
7098 we're also sent here when we read a 0 as the first digit
7099 */
378cc40b
LW
7100 case '1': case '2': case '3': case '4': case '5':
7101 case '6': case '7': case '8': case '9': case '.':
7102 decimal:
3280af22
NIS
7103 d = PL_tokenbuf;
7104 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 7105 floatit = FALSE;
02aa26ce
NT
7106
7107 /* read next group of digits and _ and copy into d */
de3bb511 7108 while (isDIGIT(*s) || *s == '_') {
4e553d73 7109 /* skip underscores, checking for misplaced ones
02aa26ce
NT
7110 if -w is on
7111 */
93a17b20 7112 if (*s == '_') {
928753ea
JH
7113 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
7114 Perl_warner(aTHX_ WARN_SYNTAX,
7115 "Misplaced _ in number");
7116 lastub = s++;
93a17b20 7117 }
fc36a67e 7118 else {
02aa26ce 7119 /* check for end of fixed-length buffer */
fc36a67e 7120 if (d >= e)
cea2e8a9 7121 Perl_croak(aTHX_ number_too_long);
02aa26ce 7122 /* if we're ok, copy the character */
378cc40b 7123 *d++ = *s++;
fc36a67e 7124 }
378cc40b 7125 }
02aa26ce
NT
7126
7127 /* final misplaced underbar check */
928753ea 7128 if (lastub && s == lastub + 1) {
d008e5eb 7129 if (ckWARN(WARN_SYNTAX))
cea2e8a9 7130 Perl_warner(aTHX_ WARN_SYNTAX, "Misplaced _ in number");
d008e5eb 7131 }
02aa26ce
NT
7132
7133 /* read a decimal portion if there is one. avoid
7134 3..5 being interpreted as the number 3. followed
7135 by .5
7136 */
2f3197b3 7137 if (*s == '.' && s[1] != '.') {
79072805 7138 floatit = TRUE;
378cc40b 7139 *d++ = *s++;
02aa26ce 7140
928753ea
JH
7141 if (*s == '_') {
7142 if (ckWARN(WARN_SYNTAX))
7143 Perl_warner(aTHX_ WARN_SYNTAX,
7144 "Misplaced _ in number");
7145 lastub = s;
7146 }
7147
7148 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 7149 */
fc36a67e 7150 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 7151 /* fixed length buffer check */
fc36a67e 7152 if (d >= e)
cea2e8a9 7153 Perl_croak(aTHX_ number_too_long);
928753ea
JH
7154 if (*s == '_') {
7155 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
7156 Perl_warner(aTHX_ WARN_SYNTAX,
7157 "Misplaced _ in number");
7158 lastub = s;
7159 }
7160 else
fc36a67e 7161 *d++ = *s;
378cc40b 7162 }
928753ea
JH
7163 /* fractional part ending in underbar? */
7164 if (s[-1] == '_') {
7165 if (ckWARN(WARN_SYNTAX))
7166 Perl_warner(aTHX_ WARN_SYNTAX,
7167 "Misplaced _ in number");
7168 }
dd629d5b
GS
7169 if (*s == '.' && isDIGIT(s[1])) {
7170 /* oops, it's really a v-string, but without the "v" */
7171 s = start - 1;
7172 goto vstring;
7173 }
378cc40b 7174 }
02aa26ce
NT
7175
7176 /* read exponent part, if present */
7fd134d9 7177 if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) {
79072805
LW
7178 floatit = TRUE;
7179 s++;
02aa26ce
NT
7180
7181 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 7182 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 7183
7fd134d9
JH
7184 /* stray preinitial _ */
7185 if (*s == '_') {
7186 if (ckWARN(WARN_SYNTAX))
7187 Perl_warner(aTHX_ WARN_SYNTAX,
7188 "Misplaced _ in number");
7189 lastub = s++;
7190 }
7191
02aa26ce 7192 /* allow positive or negative exponent */
378cc40b
LW
7193 if (*s == '+' || *s == '-')
7194 *d++ = *s++;
02aa26ce 7195
7fd134d9
JH
7196 /* stray initial _ */
7197 if (*s == '_') {
7198 if (ckWARN(WARN_SYNTAX))
7199 Perl_warner(aTHX_ WARN_SYNTAX,
7200 "Misplaced _ in number");
7201 lastub = s++;
7202 }
7203
7fd134d9
JH
7204 /* read digits of exponent */
7205 while (isDIGIT(*s) || *s == '_') {
7206 if (isDIGIT(*s)) {
7207 if (d >= e)
7208 Perl_croak(aTHX_ number_too_long);
b3b48e3e 7209 *d++ = *s++;
7fd134d9
JH
7210 }
7211 else {
7212 if (ckWARN(WARN_SYNTAX) &&
7213 ((lastub && s == lastub + 1) ||
b3b48e3e 7214 (!isDIGIT(s[1]) && s[1] != '_')))
7fd134d9
JH
7215 Perl_warner(aTHX_ WARN_SYNTAX,
7216 "Misplaced _ in number");
b3b48e3e 7217 lastub = s++;
7fd134d9 7218 }
7fd134d9 7219 }
378cc40b 7220 }
02aa26ce
NT
7221
7222 /* terminate the string */
378cc40b 7223 *d = '\0';
02aa26ce
NT
7224
7225 /* make an sv from the string */
79072805 7226 sv = NEWSV(92,0);
097ee67d 7227
86554af2 7228#if defined(Strtol) && defined(Strtoul)
0b7fceb9
MU
7229
7230 /*
0b7fceb9
MU
7231 strtol/strtoll sets errno to ERANGE if the number is too big
7232 for an integer. We try to do an integer conversion first
7233 if no characters indicating "float" have been found.
7234 */
7235
7236 if (!floatit) {
9c5ffd7c
JH
7237 IV iv = 0;
7238 UV uv = 0;
0b7fceb9 7239 errno = 0;
c239479b 7240 if (*PL_tokenbuf == '-')
96989be3 7241 iv = Strtol(PL_tokenbuf, (char**)NULL, 10);
c239479b 7242 else
96989be3 7243 uv = Strtoul(PL_tokenbuf, (char**)NULL, 10);
55eb892c 7244 if (errno)
86554af2 7245 floatit = TRUE; /* Probably just too large. */
0b7fceb9
MU
7246 else if (*PL_tokenbuf == '-')
7247 sv_setiv(sv, iv);
86554af2
JH
7248 else if (uv <= IV_MAX)
7249 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
0b7fceb9 7250 else
c239479b 7251 sv_setuv(sv, uv);
0b7fceb9
MU
7252 }
7253 if (floatit) {
86554af2
JH
7254 nv = Atof(PL_tokenbuf);
7255 sv_setnv(sv, nv);
7256 }
7257#else
7258 /*
7259 No working strtou?ll?.
7260
7261 Unfortunately atol() doesn't do range checks (returning
7262 LONG_MIN/LONG_MAX, and setting errno to ERANGE on overflows)
7263 everywhere [1], so we cannot use use atol() (or atoll()).
7264 If we could, they would be used, as Atol(), very much like
7265 Strtol() and Strtoul() are used above.
7266
7267 [1] XXX Configure test needed to check for atol()
d6c14000
JH
7268 (and atoll()) overflow behaviour XXX
7269
7270 --jhi
86554af2
JH
7271
7272 We need to do this the hard way. */
7273
7274 nv = Atof(PL_tokenbuf);
7275
7276 /* See if we can make do with an integer value without loss of
7277 precision. We use U_V to cast to a UV, because some
7278 compilers have issues. Then we try casting it back and see
7279 if it was the same [1]. We only do this if we know we
7280 specifically read an integer. If floatit is true, then we
4e553d73 7281 don't need to do the conversion at all.
86554af2
JH
7282
7283 [1] Note that this is lossy if our NVs cannot preserve our
d6c14000
JH
7284 UVs. There are metaconfig defines NV_PRESERVES_UV (a boolean)
7285 and NV_PRESERVES_UV_BITS (a number), but in general we really
7286 do hope all such potentially lossy platforms have strtou?ll?
7287 to do a lossless IV/UV conversion.
7288
7289 Maybe could do some tricks with DBL_DIG, LDBL_DIG and
7290 DBL_MANT_DIG and LDBL_MANT_DIG (these are already available
7291 as NV_DIG and NV_MANT_DIG)?
4e553d73 7292
d6c14000 7293 --jhi
86554af2
JH
7294 */
7295 {
7296 UV uv = U_V(nv);
7297 if (!floatit && (NV)uv == nv) {
7298 if (uv <= IV_MAX)
7299 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
7300 else
7301 sv_setuv(sv, uv);
7302 }
7303 else
7304 sv_setnv(sv, nv);
96989be3 7305 }
0b7fceb9 7306#endif
b8403495
JH
7307 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
7308 (PL_hints & HINT_NEW_INTEGER) )
4e553d73 7309 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
b8403495
JH
7310 (floatit ? "float" : "integer"),
7311 sv, Nullsv, NULL);
378cc40b 7312 break;
0b7fceb9 7313
e312add1 7314 /* if it starts with a v, it could be a v-string */
a7cb1f99 7315 case 'v':
dd629d5b 7316vstring:
a7cb1f99 7317 {
a7cb1f99
GS
7318 char *pos = s;
7319 pos++;
dd629d5b 7320 while (isDIGIT(*pos) || *pos == '_')
a7cb1f99 7321 pos++;
e526c9e6 7322 if (!isALPHA(*pos)) {
c4d5f83a 7323 UV rev;
ad391ad9 7324 U8 tmpbuf[UTF8_MAXLEN+1];
a7cb1f99 7325 U8 *tmpend;
a7cb1f99
GS
7326 s++; /* get past 'v' */
7327
7328 sv = NEWSV(92,5);
a7cb1f99
GS
7329 sv_setpvn(sv, "", 0);
7330
e526c9e6 7331 for (;;) {
3cb0bbe5
GS
7332 if (*s == '0' && isDIGIT(s[1]))
7333 yyerror("Octal number in vector unsupported");
dd629d5b
GS
7334 rev = 0;
7335 {
7336 /* this is atoi() that tolerates underscores */
7337 char *end = pos;
7338 UV mult = 1;
7339 while (--end >= s) {
7340 UV orev;
7341 if (*end == '_')
7342 continue;
7343 orev = rev;
7344 rev += (*end - '0') * mult;
7345 mult *= 10;
7346 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
7347 Perl_warner(aTHX_ WARN_OVERFLOW,
7348 "Integer overflow in decimal number");
7349 }
7350 }
9041c2e3
NIS
7351 /* Append native character for the rev point */
7352 tmpend = uvchr_to_utf8(tmpbuf, rev);
e526c9e6 7353 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
c4d5f83a
NIS
7354 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
7355 SvUTF8_on(sv);
e526c9e6
GS
7356 if (*pos == '.' && isDIGIT(pos[1]))
7357 s = ++pos;
3818b22b 7358 else {
e526c9e6
GS
7359 s = pos;
7360 break;
3818b22b 7361 }
dd629d5b 7362 while (isDIGIT(*pos) || *pos == '_')
e526c9e6
GS
7363 pos++;
7364 }
a7cb1f99 7365 SvPOK_on(sv);
a7cb1f99 7366 SvREADONLY_on(sv);
a7cb1f99
GS
7367 }
7368 }
7369 break;
79072805 7370 }
a687059c 7371
02aa26ce
NT
7372 /* make the op for the constant and return */
7373
a7cb1f99 7374 if (sv)
b73d6f50 7375 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 7376 else
b73d6f50 7377 lvalp->opval = Nullop;
a687059c 7378
378cc40b
LW
7379 return s;
7380}
7381
76e3520e 7382STATIC char *
cea2e8a9 7383S_scan_formline(pTHX_ register char *s)
378cc40b 7384{
79072805 7385 register char *eol;
378cc40b 7386 register char *t;
79cb57f6 7387 SV *stuff = newSVpvn("",0);
79072805 7388 bool needargs = FALSE;
378cc40b 7389
79072805 7390 while (!needargs) {
c2e66d9e 7391 if (*s == '.' || *s == /*{*/'}') {
79072805 7392 /*SUPPRESS 530*/
51882d45 7393#ifdef PERL_STRICT_CR
bf4acbe4 7394 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
51882d45 7395#else
bf4acbe4 7396 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 7397#endif
6a65c6a0 7398 if (*t == '\n' || t == PL_bufend)
79072805
LW
7399 break;
7400 }
3280af22 7401 if (PL_in_eval && !PL_rsfp) {
93a17b20 7402 eol = strchr(s,'\n');
0f85fab0 7403 if (!eol++)
3280af22 7404 eol = PL_bufend;
0f85fab0
LW
7405 }
7406 else
3280af22 7407 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 7408 if (*s != '#') {
a0d0e21e
LW
7409 for (t = s; t < eol; t++) {
7410 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
7411 needargs = FALSE;
7412 goto enough; /* ~~ must be first line in formline */
378cc40b 7413 }
a0d0e21e
LW
7414 if (*t == '@' || *t == '^')
7415 needargs = TRUE;
378cc40b 7416 }
a0d0e21e 7417 sv_catpvn(stuff, s, eol-s);
2dc4c65b
GS
7418#ifndef PERL_STRICT_CR
7419 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
7420 char *end = SvPVX(stuff) + SvCUR(stuff);
7421 end[-2] = '\n';
7422 end[-1] = '\0';
7423 SvCUR(stuff)--;
7424 }
7425#endif
79072805
LW
7426 }
7427 s = eol;
3280af22
NIS
7428 if (PL_rsfp) {
7429 s = filter_gets(PL_linestr, PL_rsfp, 0);
7430 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
7431 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 7432 PL_last_lop = PL_last_uni = Nullch;
79072805 7433 if (!s) {
3280af22 7434 s = PL_bufptr;
79072805 7435 yyerror("Format not terminated");
378cc40b
LW
7436 break;
7437 }
378cc40b 7438 }
463ee0b2 7439 incline(s);
79072805 7440 }
a0d0e21e
LW
7441 enough:
7442 if (SvCUR(stuff)) {
3280af22 7443 PL_expect = XTERM;
79072805 7444 if (needargs) {
3280af22
NIS
7445 PL_lex_state = LEX_NORMAL;
7446 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
7447 force_next(',');
7448 }
a0d0e21e 7449 else
3280af22
NIS
7450 PL_lex_state = LEX_FORMLINE;
7451 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 7452 force_next(THING);
3280af22 7453 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
79072805 7454 force_next(LSTOP);
378cc40b 7455 }
79072805 7456 else {
8990e307 7457 SvREFCNT_dec(stuff);
3280af22
NIS
7458 PL_lex_formbrack = 0;
7459 PL_bufptr = s;
79072805
LW
7460 }
7461 return s;
378cc40b 7462}
a687059c 7463
76e3520e 7464STATIC void
cea2e8a9 7465S_set_csh(pTHX)
a687059c 7466{
ae986130 7467#ifdef CSH
3280af22
NIS
7468 if (!PL_cshlen)
7469 PL_cshlen = strlen(PL_cshname);
ae986130 7470#endif
a687059c 7471}
463ee0b2 7472
ba6d6ac9 7473I32
864dbfa3 7474Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 7475{
3280af22
NIS
7476 I32 oldsavestack_ix = PL_savestack_ix;
7477 CV* outsidecv = PL_compcv;
748a9306 7478 AV* comppadlist;
8990e307 7479
3280af22
NIS
7480 if (PL_compcv) {
7481 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 7482 }
7766f137 7483 SAVEI32(PL_subline);
3280af22
NIS
7484 save_item(PL_subname);
7485 SAVEI32(PL_padix);
354992b1 7486 SAVECOMPPAD();
3280af22
NIS
7487 SAVESPTR(PL_comppad_name);
7488 SAVESPTR(PL_compcv);
7489 SAVEI32(PL_comppad_name_fill);
7490 SAVEI32(PL_min_intro_pending);
7491 SAVEI32(PL_max_intro_pending);
7492 SAVEI32(PL_pad_reset_pending);
7493
7494 PL_compcv = (CV*)NEWSV(1104,0);
7495 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
7496 CvFLAGS(PL_compcv) |= flags;
7497
7498 PL_comppad = newAV();
7499 av_push(PL_comppad, Nullsv);
7500 PL_curpad = AvARRAY(PL_comppad);
7501 PL_comppad_name = newAV();
7502 PL_comppad_name_fill = 0;
7503 PL_min_intro_pending = 0;
7504 PL_padix = 0;
57843af0 7505 PL_subline = CopLINE(PL_curcop);
6d4ff0d2 7506#ifdef USE_THREADS
79cb57f6 7507 av_store(PL_comppad_name, 0, newSVpvn("@_", 2));
533c011a
NIS
7508 PL_curpad[0] = (SV*)newAV();
7509 SvPADMY_on(PL_curpad[0]); /* XXX Needed? */
6d4ff0d2 7510#endif /* USE_THREADS */
748a9306
LW
7511
7512 comppadlist = newAV();
7513 AvREAL_off(comppadlist);
3280af22
NIS
7514 av_store(comppadlist, 0, (SV*)PL_comppad_name);
7515 av_store(comppadlist, 1, (SV*)PL_comppad);
748a9306 7516
3280af22
NIS
7517 CvPADLIST(PL_compcv) = comppadlist;
7518 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
11343788 7519#ifdef USE_THREADS
533c011a
NIS
7520 CvOWNER(PL_compcv) = 0;
7521 New(666, CvMUTEXP(PL_compcv), 1, perl_mutex);
7522 MUTEX_INIT(CvMUTEXP(PL_compcv));
11343788 7523#endif /* USE_THREADS */
748a9306 7524
8990e307
LW
7525 return oldsavestack_ix;
7526}
7527
084592ab
CN
7528#ifdef __SC__
7529#pragma segment Perl_yylex
7530#endif
8990e307 7531int
864dbfa3 7532Perl_yywarn(pTHX_ char *s)
8990e307 7533{
faef0170 7534 PL_in_eval |= EVAL_WARNONLY;
748a9306 7535 yyerror(s);
faef0170 7536 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 7537 return 0;
8990e307
LW
7538}
7539
7540int
864dbfa3 7541Perl_yyerror(pTHX_ char *s)
463ee0b2 7542{
68dc0745 7543 char *where = NULL;
7544 char *context = NULL;
7545 int contlen = -1;
46fc3d4c 7546 SV *msg;
463ee0b2 7547
3280af22 7548 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 7549 where = "at EOF";
3280af22
NIS
7550 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 &&
7551 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) {
7552 while (isSPACE(*PL_oldoldbufptr))
7553 PL_oldoldbufptr++;
7554 context = PL_oldoldbufptr;
7555 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 7556 }
3280af22
NIS
7557 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 &&
7558 PL_oldbufptr != PL_bufptr) {
7559 while (isSPACE(*PL_oldbufptr))
7560 PL_oldbufptr++;
7561 context = PL_oldbufptr;
7562 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
7563 }
7564 else if (yychar > 255)
68dc0745 7565 where = "next token ???";
cdfb297e
GS
7566#ifdef USE_PURE_BISON
7567/* GNU Bison sets the value -2 */
7568 else if (yychar == -2) {
7569#else
463ee0b2 7570 else if ((yychar & 127) == 127) {
cdfb297e 7571#endif
3280af22
NIS
7572 if (PL_lex_state == LEX_NORMAL ||
7573 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 7574 where = "at end of line";
3280af22 7575 else if (PL_lex_inpat)
68dc0745 7576 where = "within pattern";
463ee0b2 7577 else
68dc0745 7578 where = "within string";
463ee0b2 7579 }
46fc3d4c 7580 else {
79cb57f6 7581 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
46fc3d4c 7582 if (yychar < 32)
cea2e8a9 7583 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
46fc3d4c 7584 else if (isPRINT_LC(yychar))
cea2e8a9 7585 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
463ee0b2 7586 else
cea2e8a9 7587 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
46fc3d4c 7588 where = SvPVX(where_sv);
463ee0b2 7589 }
46fc3d4c 7590 msg = sv_2mortal(newSVpv(s, 0));
ed094faf
GS
7591 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
7592 CopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 7593 if (context)
cea2e8a9 7594 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 7595 else
cea2e8a9 7596 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 7597 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 7598 Perl_sv_catpvf(aTHX_ msg,
57def98f 7599 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 7600 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 7601 PL_multi_end = 0;
a0d0e21e 7602 }
faef0170 7603 if (PL_in_eval & EVAL_WARNONLY)
894356b3 7604 Perl_warn(aTHX_ "%"SVf, msg);
463ee0b2 7605 else
5a844595 7606 qerror(msg);
c7d6bfb2
GS
7607 if (PL_error_count >= 10) {
7608 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 7609 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
c7d6bfb2
GS
7610 ERRSV, CopFILE(PL_curcop));
7611 else
7612 Perl_croak(aTHX_ "%s has too many errors.\n",
7613 CopFILE(PL_curcop));
7614 }
3280af22
NIS
7615 PL_in_my = 0;
7616 PL_in_my_stash = Nullhv;
463ee0b2
LW
7617 return 0;
7618}
084592ab
CN
7619#ifdef __SC__
7620#pragma segment Main
7621#endif
4e35701f 7622
b250498f 7623STATIC char*
3ae08724 7624S_swallow_bom(pTHX_ U8 *s)
01ec43d0 7625{
b250498f
GS
7626 STRLEN slen;
7627 slen = SvCUR(PL_linestr);
7628 switch (*s) {
4e553d73
NIS
7629 case 0xFF:
7630 if (s[1] == 0xFE) {
01ec43d0 7631 /* UTF-16 little-endian */
3ae08724 7632 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
01ec43d0
GS
7633 Perl_croak(aTHX_ "Unsupported script encoding");
7634#ifndef PERL_NO_UTF16_FILTER
dea0fc0b 7635 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-LE script encoding\n"));
3ae08724 7636 s += 2;
dea0fc0b
JH
7637 if (PL_bufend > (char*)s) {
7638 U8 *news;
7639 I32 newlen;
7640
7641 filter_add(utf16rev_textfilter, NULL);
7642 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
f72f5f89
JH
7643 PL_bufend = (char*)utf16_to_utf8_reversed(s, news,
7644 PL_bufend - (char*)s - 1,
dea0fc0b
JH
7645 &newlen);
7646 Copy(news, s, newlen, U8);
7647 SvCUR_set(PL_linestr, newlen);
7648 PL_bufend = SvPVX(PL_linestr) + newlen;
7649 news[newlen++] = '\0';
7650 Safefree(news);
7651 }
b250498f 7652#else
01ec43d0 7653 Perl_croak(aTHX_ "Unsupported script encoding");
b250498f 7654#endif
01ec43d0
GS
7655 }
7656 break;
78ae23f5 7657 case 0xFE:
3ae08724 7658 if (s[1] == 0xFF) { /* UTF-16 big-endian */
01ec43d0 7659#ifndef PERL_NO_UTF16_FILTER
dea0fc0b
JH
7660 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding\n"));
7661 s += 2;
7662 if (PL_bufend > (char *)s) {
7663 U8 *news;
7664 I32 newlen;
7665
7666 filter_add(utf16_textfilter, NULL);
7667 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
7668 PL_bufend = (char*)utf16_to_utf8(s, news,
7669 PL_bufend - (char*)s,
7670 &newlen);
7671 Copy(news, s, newlen, U8);
7672 SvCUR_set(PL_linestr, newlen);
7673 PL_bufend = SvPVX(PL_linestr) + newlen;
7674 news[newlen++] = '\0';
7675 Safefree(news);
7676 }
b250498f 7677#else
01ec43d0 7678 Perl_croak(aTHX_ "Unsupported script encoding");
b250498f 7679#endif
01ec43d0
GS
7680 }
7681 break;
3ae08724
GS
7682 case 0xEF:
7683 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
dea0fc0b 7684 DEBUG_p(PerlIO_printf(Perl_debug_log, "UTF-8 script encoding\n"));
01ec43d0
GS
7685 s += 3; /* UTF-8 */
7686 }
7687 break;
7688 case 0:
7689 if (slen > 3 && s[1] == 0 && /* UTF-32 big-endian */
3ae08724 7690 s[2] == 0xFE && s[3] == 0xFF)
01ec43d0
GS
7691 {
7692 Perl_croak(aTHX_ "Unsupported script encoding");
7693 }
7694 }
b8f84bb2 7695 return (char*)s;
b250498f 7696}
4755096e
GS
7697
7698#ifdef PERL_OBJECT
7699#include "XSUB.h"
7700#endif
7701
7702/*
7703 * restore_rsfp
7704 * Restore a source filter.
7705 */
7706
7707static void
7708restore_rsfp(pTHXo_ void *f)
7709{
7710 PerlIO *fp = (PerlIO*)f;
7711
7712 if (PL_rsfp == PerlIO_stdin())
7713 PerlIO_clearerr(PL_rsfp);
7714 else if (PL_rsfp && (PL_rsfp != fp))
7715 PerlIO_close(PL_rsfp);
7716 PL_rsfp = fp;
7717}
6e3aabd6
GS
7718
7719#ifndef PERL_NO_UTF16_FILTER
7720static I32
7721utf16_textfilter(pTHXo_ int idx, SV *sv, int maxlen)
7722{
7723 I32 count = FILTER_READ(idx+1, sv, maxlen);
7724 if (count) {
7725 U8* tmps;
7726 U8* tend;
dea0fc0b 7727 I32 newlen;
6e3aabd6 7728 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
f72f5f89
JH
7729 if (!*SvPV_nolen(sv))
7730 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7731 return count;
4e553d73 7732
dea0fc0b 7733 tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
6e3aabd6
GS
7734 sv_usepvn(sv, (char*)tmps, tend - tmps);
7735 }
7736 return count;
7737}
7738
7739static I32
7740utf16rev_textfilter(pTHXo_ int idx, SV *sv, int maxlen)
7741{
7742 I32 count = FILTER_READ(idx+1, sv, maxlen);
7743 if (count) {
7744 U8* tmps;
7745 U8* tend;
dea0fc0b 7746 I32 newlen;
f72f5f89
JH
7747 if (!*SvPV_nolen(sv))
7748 /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
7749 return count;
7750
6e3aabd6 7751 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
dea0fc0b 7752 tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
6e3aabd6
GS
7753 sv_usepvn(sv, (char*)tmps, tend - tmps);
7754 }
7755 return count;
7756}
7757#endif