This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Tidy up the reference name stringification to save getting the
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
371fce9b 4 * 2000, 2001, 2002, 2003, 2004, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
fc36a67e 29static char ident_too_long[] = "Identifier too long";
4ac733c9 30static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 31static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 32
acfe0abc 33static void restore_rsfp(pTHX_ void *f);
6e3aabd6 34#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
35static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
36static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 37#endif
51371543 38
9059aa12
LW
39#define XFAKEBRACK 128
40#define XENUMMASK 127
41
39e02b42
JH
42#ifdef USE_UTF8_SCRIPTS
43# define UTF (!IN_BYTES)
2b9d42f0 44#else
746b446a 45# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 46#endif
a0ed51b3 47
61f0cdd9 48/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
49 * 1999-02-27 mjd-perl-patch@plover.com */
50#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51
bf4acbe4
GS
52/* On MacOS, respect nonbreaking spaces */
53#ifdef MACOS_TRADITIONAL
54#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55#else
56#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
57#endif
58
ffb4593c
NT
59/* LEX_* are values for PL_lex_state, the state of the lexer.
60 * They are arranged oddly so that the guard on the switch statement
79072805
LW
61 * can get by with a single comparison (if the compiler is smart enough).
62 */
63
fb73857a 64/* #define LEX_NOTPARSING 11 is done in perl.h. */
65
55497cff 66#define LEX_NORMAL 10
67#define LEX_INTERPNORMAL 9
68#define LEX_INTERPCASEMOD 8
69#define LEX_INTERPPUSH 7
70#define LEX_INTERPSTART 6
71#define LEX_INTERPEND 5
72#define LEX_INTERPENDMAYBE 4
73#define LEX_INTERPCONCAT 3
74#define LEX_INTERPCONST 2
75#define LEX_FORMLINE 1
76#define LEX_KNOWNEXT 0
79072805 77
bbf60fe6
DM
78#ifdef DEBUGGING
79static char* lex_state_names[] = {
80 "KNOWNEXT",
81 "FORMLINE",
82 "INTERPCONST",
83 "INTERPCONCAT",
84 "INTERPENDMAYBE",
85 "INTERPEND",
86 "INTERPSTART",
87 "INTERPPUSH",
88 "INTERPCASEMOD",
89 "INTERPNORMAL",
90 "NORMAL"
91};
92#endif
93
79072805
LW
94#ifdef ff_next
95#undef ff_next
d48672a2
LW
96#endif
97
79072805 98#include "keywords.h"
fe14fcc3 99
ffb4593c
NT
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
ae986130
LW
102#ifdef CLINE
103#undef CLINE
104#endif
57843af0 105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 106
ffb4593c
NT
107/*
108 * Convenience functions to return different tokens and prime the
9cbb5ea2 109 * lexer for the next token. They all take an argument.
ffb4593c
NT
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
2d2e263d 121 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
9cbb5ea2 126 * PMop : pattern-matching operator
ffb4593c
NT
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
e5edeb50 130 * Rop : relational operator <= != gt
ffb4593c
NT
131 *
132 * Also see LOP and lop() below.
133 */
134
998054bd 135#ifdef DEBUGGING /* Serve -DT. */
bbf60fe6 136# define REPORT(retval) tokereport(s,(int)retval)
998054bd 137#else
bbf60fe6 138# define REPORT(retval) (retval)
998054bd
SC
139#endif
140
bbf60fe6
DM
141#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
142#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
143#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
144#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
145#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
146#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
147#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
148#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
149#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
150#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
151#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
152#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
153#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
154#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
155#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
156#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
157#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
158#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
159#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
160#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 161
a687059c
LW
162/* This bit of chicanery makes a unary function followed by
163 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
164 * The UNIDOR macro is for unary functions that can be followed by the //
165 * operator (such as C<shift // 0>).
a687059c 166 */
bbf60fe6
DM
167#define UNI2(f,x) return ( \
168 yylval.ival = f, \
6f33ba73 169 PL_expect = x, \
3280af22
NIS
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
172 PL_last_lop_op = f, \
bbf60fe6
DM
173 REPORT( \
174 (*s == '(' || (s = skipspace(s), *s == '(') \
175 ? (int)FUNC1 : (int)UNIOP)))
6f33ba73
RGS
176#define UNI(f) UNI2(f,XTERM)
177#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 178
bbf60fe6
DM
179#define UNIBRACK(f) return ( \
180 yylval.ival = f, \
3280af22
NIS
181 PL_bufptr = s, \
182 PL_last_uni = PL_oldbufptr, \
bbf60fe6
DM
183 REPORT( \
184 (*s == '(' || (s = skipspace(s), *s == '(') \
185 ? (int)FUNC1 : (int)UNIOP)))
79072805 186
9f68db38 187/* grandfather return to old style */
3280af22 188#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 189
8fa7f367
JH
190#ifdef DEBUGGING
191
bbf60fe6
DM
192/* how to interpret the yylval associated with the token */
193enum token_type {
194 TOKENTYPE_NONE,
195 TOKENTYPE_IVAL,
196 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
197 TOKENTYPE_PVAL,
198 TOKENTYPE_OPVAL,
199 TOKENTYPE_GVVAL
200};
201
202static struct debug_tokens { int token, type; char *name;} debug_tokens[] =
9041c2e3 203{
bbf60fe6
DM
204 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
205 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
206 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
207 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
208 { ARROW, TOKENTYPE_NONE, "ARROW" },
209 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
210 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
211 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
212 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
213 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
214 { DO, TOKENTYPE_NONE, "DO" },
215 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
216 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
217 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
218 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
219 { ELSE, TOKENTYPE_NONE, "ELSE" },
220 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
221 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
222 { FOR, TOKENTYPE_IVAL, "FOR" },
223 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
224 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
225 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
226 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
227 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
228 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
229 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
230 { IF, TOKENTYPE_IVAL, "IF" },
231 { LABEL, TOKENTYPE_PVAL, "LABEL" },
232 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
233 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
234 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
235 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
236 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
237 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
238 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
239 { MY, TOKENTYPE_IVAL, "MY" },
240 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
241 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
242 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
243 { OROP, TOKENTYPE_IVAL, "OROP" },
244 { OROR, TOKENTYPE_NONE, "OROR" },
245 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
246 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
247 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
248 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
249 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
250 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
251 { PREINC, TOKENTYPE_NONE, "PREINC" },
252 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
253 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
254 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
255 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
256 { SUB, TOKENTYPE_NONE, "SUB" },
257 { THING, TOKENTYPE_OPVAL, "THING" },
258 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
259 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
260 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
261 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
262 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
263 { USE, TOKENTYPE_IVAL, "USE" },
264 { WHILE, TOKENTYPE_IVAL, "WHILE" },
265 { WORD, TOKENTYPE_OPVAL, "WORD" },
266 { 0, TOKENTYPE_NONE, 0 }
267};
268
269/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 270
bbf60fe6
DM
271STATIC int
272S_tokereport(pTHX_ char* s, I32 rv)
273{
274 if (DEBUG_T_TEST) {
275 char *name = Nullch;
276 enum token_type type = TOKENTYPE_NONE;
277 struct debug_tokens *p;
278 SV* report = NEWSV(0, 60);
279
280 Perl_sv_catpvf(aTHX_ report, "<== ");
281
282 for (p = debug_tokens; p->token; p++) {
283 if (p->token == (int)rv) {
284 name = p->name;
285 type = p->type;
286 break;
287 }
288 }
289 if (name)
290 Perl_sv_catpvf(aTHX_ report, "%s", name);
291 else if ((char)rv > ' ' && (char)rv < '~')
292 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
293 else if (!rv)
294 Perl_sv_catpvf(aTHX_ report, "EOF");
295 else
296 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
297 switch (type) {
298 case TOKENTYPE_NONE:
299 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
300 break;
301 case TOKENTYPE_IVAL:
302 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
303 break;
304 case TOKENTYPE_OPNUM:
305 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
306 PL_op_name[yylval.ival]);
307 break;
308 case TOKENTYPE_PVAL:
309 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
310 break;
311 case TOKENTYPE_OPVAL:
401441c0
RGS
312 if (yylval.opval)
313 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 314 PL_op_name[yylval.opval->op_type]);
401441c0
RGS
315 else
316 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
317 break;
318 }
319 Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
998054bd
SC
320 if (s - PL_bufptr > 0)
321 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
322 else {
323 if (PL_oldbufptr && *PL_oldbufptr)
324 sv_catpv(report, PL_tokenbuf);
325 }
bbf60fe6
DM
326 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
327 };
328 return (int)rv;
998054bd
SC
329}
330
8fa7f367
JH
331#endif
332
ffb4593c
NT
333/*
334 * S_ao
335 *
c963b151
BD
336 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
337 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
338 */
339
76e3520e 340STATIC int
cea2e8a9 341S_ao(pTHX_ int toketype)
a0d0e21e 342{
3280af22
NIS
343 if (*PL_bufptr == '=') {
344 PL_bufptr++;
a0d0e21e
LW
345 if (toketype == ANDAND)
346 yylval.ival = OP_ANDASSIGN;
347 else if (toketype == OROR)
348 yylval.ival = OP_ORASSIGN;
c963b151
BD
349 else if (toketype == DORDOR)
350 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
351 toketype = ASSIGNOP;
352 }
353 return toketype;
354}
355
ffb4593c
NT
356/*
357 * S_no_op
358 * When Perl expects an operator and finds something else, no_op
359 * prints the warning. It always prints "<something> found where
360 * operator expected. It prints "Missing semicolon on previous line?"
361 * if the surprise occurs at the start of the line. "do you need to
362 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
363 * where the compiler doesn't know if foo is a method call or a function.
364 * It prints "Missing operator before end of line" if there's nothing
365 * after the missing operator, or "... before <...>" if there is something
366 * after the missing operator.
367 */
368
76e3520e 369STATIC void
cea2e8a9 370S_no_op(pTHX_ char *what, char *s)
463ee0b2 371{
3280af22
NIS
372 char *oldbp = PL_bufptr;
373 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 374
1189a94a
GS
375 if (!s)
376 s = oldbp;
07c798fb 377 else
1189a94a 378 PL_bufptr = s;
cea2e8a9 379 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
380 if (ckWARN_d(WARN_SYNTAX)) {
381 if (is_first)
382 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
383 "\t(Missing semicolon on previous line?)\n");
384 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
385 char *t;
386 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
387 if (t < PL_bufptr && isSPACE(*t))
388 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
389 "\t(Do you need to predeclare %.*s?)\n",
390 t - PL_oldoldbufptr, PL_oldoldbufptr);
391 }
392 else {
393 assert(s >= oldbp);
394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
395 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
396 }
07c798fb 397 }
3280af22 398 PL_bufptr = oldbp;
8990e307
LW
399}
400
ffb4593c
NT
401/*
402 * S_missingterm
403 * Complain about missing quote/regexp/heredoc terminator.
404 * If it's called with (char *)NULL then it cauterizes the line buffer.
405 * If we're in a delimited string and the delimiter is a control
406 * character, it's reformatted into a two-char sequence like ^C.
407 * This is fatal.
408 */
409
76e3520e 410STATIC void
cea2e8a9 411S_missingterm(pTHX_ char *s)
8990e307
LW
412{
413 char tmpbuf[3];
414 char q;
415 if (s) {
416 char *nl = strrchr(s,'\n');
d2719217 417 if (nl)
8990e307
LW
418 *nl = '\0';
419 }
9d116dd7
JH
420 else if (
421#ifdef EBCDIC
422 iscntrl(PL_multi_close)
423#else
424 PL_multi_close < 32 || PL_multi_close == 127
425#endif
426 ) {
8990e307 427 *tmpbuf = '^';
3280af22 428 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
429 s = "\\n";
430 tmpbuf[2] = '\0';
431 s = tmpbuf;
432 }
433 else {
eb160463 434 *tmpbuf = (char)PL_multi_close;
8990e307
LW
435 tmpbuf[1] = '\0';
436 s = tmpbuf;
437 }
438 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 439 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 440}
79072805 441
ffb4593c
NT
442/*
443 * Perl_deprecate
ffb4593c
NT
444 */
445
79072805 446void
864dbfa3 447Perl_deprecate(pTHX_ char *s)
a0d0e21e 448{
599cee73 449 if (ckWARN(WARN_DEPRECATED))
9014280d 450 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
451}
452
12bcd1a6
PM
453void
454Perl_deprecate_old(pTHX_ char *s)
455{
456 /* This function should NOT be called for any new deprecated warnings */
457 /* Use Perl_deprecate instead */
458 /* */
459 /* It is here to maintain backward compatibility with the pre-5.8 */
460 /* warnings category hierarchy. The "deprecated" category used to */
461 /* live under the "syntax" category. It is now a top-level category */
462 /* in its own right. */
463
464 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
465 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
466 "Use of %s is deprecated", s);
467}
468
ffb4593c
NT
469/*
470 * depcom
9cbb5ea2 471 * Deprecate a comma-less variable list.
ffb4593c
NT
472 */
473
76e3520e 474STATIC void
cea2e8a9 475S_depcom(pTHX)
a0d0e21e 476{
12bcd1a6 477 deprecate_old("comma-less variable list");
a0d0e21e
LW
478}
479
ffb4593c 480/*
9cbb5ea2
GS
481 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
482 * utf16-to-utf8-reversed.
ffb4593c
NT
483 */
484
c39cd008
GS
485#ifdef PERL_CR_FILTER
486static void
487strip_return(SV *sv)
488{
489 register char *s = SvPVX(sv);
490 register char *e = s + SvCUR(sv);
491 /* outer loop optimized to do nothing if there are no CR-LFs */
492 while (s < e) {
493 if (*s++ == '\r' && *s == '\n') {
494 /* hit a CR-LF, need to copy the rest */
495 register char *d = s - 1;
496 *d++ = *s++;
497 while (s < e) {
498 if (*s == '\r' && s[1] == '\n')
499 s++;
500 *d++ = *s++;
501 }
502 SvCUR(sv) -= s - d;
503 return;
504 }
505 }
506}
a868473f 507
76e3520e 508STATIC I32
c39cd008 509S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 510{
c39cd008
GS
511 I32 count = FILTER_READ(idx+1, sv, maxlen);
512 if (count > 0 && !maxlen)
513 strip_return(sv);
514 return count;
a868473f
NIS
515}
516#endif
517
ffb4593c
NT
518/*
519 * Perl_lex_start
9cbb5ea2
GS
520 * Initialize variables. Uses the Perl save_stack to save its state (for
521 * recursive calls to the parser).
ffb4593c
NT
522 */
523
a0d0e21e 524void
864dbfa3 525Perl_lex_start(pTHX_ SV *line)
79072805 526{
8990e307
LW
527 char *s;
528 STRLEN len;
529
3280af22
NIS
530 SAVEI32(PL_lex_dojoin);
531 SAVEI32(PL_lex_brackets);
3280af22
NIS
532 SAVEI32(PL_lex_casemods);
533 SAVEI32(PL_lex_starts);
534 SAVEI32(PL_lex_state);
7766f137 535 SAVEVPTR(PL_lex_inpat);
3280af22 536 SAVEI32(PL_lex_inwhat);
18b09519
GS
537 if (PL_lex_state == LEX_KNOWNEXT) {
538 I32 toke = PL_nexttoke;
539 while (--toke >= 0) {
540 SAVEI32(PL_nexttype[toke]);
541 SAVEVPTR(PL_nextval[toke]);
542 }
543 SAVEI32(PL_nexttoke);
18b09519 544 }
57843af0 545 SAVECOPLINE(PL_curcop);
3280af22
NIS
546 SAVEPPTR(PL_bufptr);
547 SAVEPPTR(PL_bufend);
548 SAVEPPTR(PL_oldbufptr);
549 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
550 SAVEPPTR(PL_last_lop);
551 SAVEPPTR(PL_last_uni);
3280af22
NIS
552 SAVEPPTR(PL_linestart);
553 SAVESPTR(PL_linestr);
8edd5f42
RGS
554 SAVEGENERICPV(PL_lex_brackstack);
555 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 556 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
557 SAVESPTR(PL_lex_stuff);
558 SAVEI32(PL_lex_defer);
09bef843 559 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 560 SAVESPTR(PL_lex_repl);
bebdddfc
GS
561 SAVEINT(PL_expect);
562 SAVEINT(PL_lex_expect);
3280af22
NIS
563
564 PL_lex_state = LEX_NORMAL;
565 PL_lex_defer = 0;
566 PL_expect = XSTATE;
567 PL_lex_brackets = 0;
3280af22
NIS
568 New(899, PL_lex_brackstack, 120, char);
569 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
570 PL_lex_casemods = 0;
571 *PL_lex_casestack = '\0';
572 PL_lex_dojoin = 0;
573 PL_lex_starts = 0;
574 PL_lex_stuff = Nullsv;
575 PL_lex_repl = Nullsv;
576 PL_lex_inpat = 0;
76be56bc 577 PL_nexttoke = 0;
3280af22 578 PL_lex_inwhat = 0;
09bef843 579 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
580 PL_linestr = line;
581 if (SvREADONLY(PL_linestr))
582 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
583 s = SvPV(PL_linestr, len);
6f27f9a7 584 if (!len || s[len-1] != ';') {
3280af22
NIS
585 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
586 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
587 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 588 }
3280af22
NIS
589 SvTEMP_off(PL_linestr);
590 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
591 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 592 PL_last_lop = PL_last_uni = Nullch;
3280af22 593 PL_rsfp = 0;
79072805 594}
a687059c 595
ffb4593c
NT
596/*
597 * Perl_lex_end
9cbb5ea2
GS
598 * Finalizer for lexing operations. Must be called when the parser is
599 * done with the lexer.
ffb4593c
NT
600 */
601
463ee0b2 602void
864dbfa3 603Perl_lex_end(pTHX)
463ee0b2 604{
3280af22 605 PL_doextract = FALSE;
463ee0b2
LW
606}
607
ffb4593c
NT
608/*
609 * S_incline
610 * This subroutine has nothing to do with tilting, whether at windmills
611 * or pinball tables. Its name is short for "increment line". It
57843af0 612 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 613 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
614 * # line 500 "foo.pm"
615 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
616 */
617
76e3520e 618STATIC void
cea2e8a9 619S_incline(pTHX_ char *s)
463ee0b2
LW
620{
621 char *t;
622 char *n;
73659bf1 623 char *e;
463ee0b2 624 char ch;
463ee0b2 625
57843af0 626 CopLINE_inc(PL_curcop);
463ee0b2
LW
627 if (*s++ != '#')
628 return;
bf4acbe4 629 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
630 if (strnEQ(s, "line", 4))
631 s += 4;
632 else
633 return;
084592ab 634 if (SPACE_OR_TAB(*s))
73659bf1 635 s++;
4e553d73 636 else
73659bf1 637 return;
bf4acbe4 638 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
639 if (!isDIGIT(*s))
640 return;
641 n = s;
642 while (isDIGIT(*s))
643 s++;
bf4acbe4 644 while (SPACE_OR_TAB(*s))
463ee0b2 645 s++;
73659bf1 646 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 647 s++;
73659bf1
GS
648 e = t + 1;
649 }
463ee0b2 650 else {
463ee0b2 651 for (t = s; !isSPACE(*t); t++) ;
73659bf1 652 e = t;
463ee0b2 653 }
bf4acbe4 654 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
655 e++;
656 if (*e != '\n' && *e != '\0')
657 return; /* false alarm */
658
463ee0b2
LW
659 ch = *t;
660 *t = '\0';
f4dd75d9 661 if (t - s > 0) {
05ec9bb3 662 CopFILE_free(PL_curcop);
57843af0 663 CopFILE_set(PL_curcop, s);
f4dd75d9 664 }
463ee0b2 665 *t = ch;
57843af0 666 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
667}
668
ffb4593c
NT
669/*
670 * S_skipspace
671 * Called to gobble the appropriate amount and type of whitespace.
672 * Skips comments as well.
673 */
674
76e3520e 675STATIC char *
cea2e8a9 676S_skipspace(pTHX_ register char *s)
a687059c 677{
3280af22 678 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 679 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
680 s++;
681 return s;
682 }
683 for (;;) {
fd049845 684 STRLEN prevlen;
09bef843 685 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 686 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
687 while (s < PL_bufend && isSPACE(*s)) {
688 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
689 incline(s);
690 }
ffb4593c
NT
691
692 /* comment */
3280af22
NIS
693 if (s < PL_bufend && *s == '#') {
694 while (s < PL_bufend && *s != '\n')
463ee0b2 695 s++;
60e6418e 696 if (s < PL_bufend) {
463ee0b2 697 s++;
60e6418e
GS
698 if (PL_in_eval && !PL_rsfp) {
699 incline(s);
700 continue;
701 }
702 }
463ee0b2 703 }
ffb4593c
NT
704
705 /* only continue to recharge the buffer if we're at the end
706 * of the buffer, we're not reading from a source filter, and
707 * we're in normal lexing mode
708 */
09bef843
SB
709 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
710 PL_lex_state == LEX_FORMLINE)
463ee0b2 711 return s;
ffb4593c
NT
712
713 /* try to recharge the buffer */
9cbb5ea2
GS
714 if ((s = filter_gets(PL_linestr, PL_rsfp,
715 (prevlen = SvCUR(PL_linestr)))) == Nullch)
716 {
717 /* end of file. Add on the -p or -n magic */
3280af22
NIS
718 if (PL_minus_n || PL_minus_p) {
719 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
720 ";}continue{print or die qq(-p destination: $!\\n)" :
721 "");
3280af22
NIS
722 sv_catpv(PL_linestr,";}");
723 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
724 }
725 else
3280af22 726 sv_setpv(PL_linestr,";");
ffb4593c
NT
727
728 /* reset variables for next time we lex */
9cbb5ea2
GS
729 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
730 = SvPVX(PL_linestr);
3280af22 731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 732 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
733
734 /* Close the filehandle. Could be from -P preprocessor,
735 * STDIN, or a regular file. If we were reading code from
736 * STDIN (because the commandline held no -e or filename)
737 * then we don't close it, we reset it so the code can
738 * read from STDIN too.
739 */
740
3280af22
NIS
741 if (PL_preprocess && !PL_in_eval)
742 (void)PerlProc_pclose(PL_rsfp);
743 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
744 PerlIO_clearerr(PL_rsfp);
8990e307 745 else
3280af22
NIS
746 (void)PerlIO_close(PL_rsfp);
747 PL_rsfp = Nullfp;
463ee0b2
LW
748 return s;
749 }
ffb4593c
NT
750
751 /* not at end of file, so we only read another line */
09bef843
SB
752 /* make corresponding updates to old pointers, for yyerror() */
753 oldprevlen = PL_oldbufptr - PL_bufend;
754 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
755 if (PL_last_uni)
756 oldunilen = PL_last_uni - PL_bufend;
757 if (PL_last_lop)
758 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
759 PL_linestart = PL_bufptr = s + prevlen;
760 PL_bufend = s + SvCUR(PL_linestr);
761 s = PL_bufptr;
09bef843
SB
762 PL_oldbufptr = s + oldprevlen;
763 PL_oldoldbufptr = s + oldoldprevlen;
764 if (PL_last_uni)
765 PL_last_uni = s + oldunilen;
766 if (PL_last_lop)
767 PL_last_lop = s + oldloplen;
a0d0e21e 768 incline(s);
ffb4593c
NT
769
770 /* debugger active and we're not compiling the debugger code,
771 * so store the line into the debugger's array of lines
772 */
3280af22 773 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
774 SV *sv = NEWSV(85,0);
775
776 sv_upgrade(sv, SVt_PVMG);
3280af22 777 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
778 (void)SvIOK_on(sv);
779 SvIVX(sv) = 0;
57843af0 780 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 781 }
463ee0b2 782 }
a687059c 783}
378cc40b 784
ffb4593c
NT
785/*
786 * S_check_uni
787 * Check the unary operators to ensure there's no ambiguity in how they're
788 * used. An ambiguous piece of code would be:
789 * rand + 5
790 * This doesn't mean rand() + 5. Because rand() is a unary operator,
791 * the +5 is its argument.
792 */
793
76e3520e 794STATIC void
cea2e8a9 795S_check_uni(pTHX)
ba106d47 796{
2f3197b3 797 char *s;
a0d0e21e 798 char *t;
2f3197b3 799
3280af22 800 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 801 return;
3280af22
NIS
802 while (isSPACE(*PL_last_uni))
803 PL_last_uni++;
7e2040f0 804 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 805 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 806 return;
0453d815 807 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 808 char ch = *s;
0453d815 809 *s = '\0';
9014280d 810 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 811 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
812 PL_last_uni);
813 *s = ch;
814 }
2f3197b3
LW
815}
816
ffb4593c
NT
817/*
818 * LOP : macro to build a list operator. Its behaviour has been replaced
819 * with a subroutine, S_lop() for which LOP is just another name.
820 */
821
a0d0e21e
LW
822#define LOP(f,x) return lop(f,x,s)
823
ffb4593c
NT
824/*
825 * S_lop
826 * Build a list operator (or something that might be one). The rules:
827 * - if we have a next token, then it's a list operator [why?]
828 * - if the next thing is an opening paren, then it's a function
829 * - else it's a list operator
830 */
831
76e3520e 832STATIC I32
a0be28da 833S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 834{
79072805 835 yylval.ival = f;
35c8bce7 836 CLINE;
3280af22
NIS
837 PL_expect = x;
838 PL_bufptr = s;
839 PL_last_lop = PL_oldbufptr;
eb160463 840 PL_last_lop_op = (OPCODE)f;
3280af22 841 if (PL_nexttoke)
bbf60fe6 842 return REPORT(LSTOP);
79072805 843 if (*s == '(')
bbf60fe6 844 return REPORT(FUNC);
79072805
LW
845 s = skipspace(s);
846 if (*s == '(')
bbf60fe6 847 return REPORT(FUNC);
79072805 848 else
bbf60fe6 849 return REPORT(LSTOP);
79072805
LW
850}
851
ffb4593c
NT
852/*
853 * S_force_next
9cbb5ea2 854 * When the lexer realizes it knows the next token (for instance,
ffb4593c 855 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
856 * to know what token to return the next time the lexer is called. Caller
857 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
858 * handles the token correctly.
ffb4593c
NT
859 */
860
4e553d73 861STATIC void
cea2e8a9 862S_force_next(pTHX_ I32 type)
79072805 863{
3280af22
NIS
864 PL_nexttype[PL_nexttoke] = type;
865 PL_nexttoke++;
866 if (PL_lex_state != LEX_KNOWNEXT) {
867 PL_lex_defer = PL_lex_state;
868 PL_lex_expect = PL_expect;
869 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
870 }
871}
872
ffb4593c
NT
873/*
874 * S_force_word
875 * When the lexer knows the next thing is a word (for instance, it has
876 * just seen -> and it knows that the next char is a word char, then
877 * it calls S_force_word to stick the next word into the PL_next lookahead.
878 *
879 * Arguments:
b1b65b59 880 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
881 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
882 * int check_keyword : if true, Perl checks to make sure the word isn't
883 * a keyword (do this if the word is a label, e.g. goto FOO)
884 * int allow_pack : if true, : characters will also be allowed (require,
885 * use, etc. do this)
9cbb5ea2 886 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
887 */
888
76e3520e 889STATIC char *
cea2e8a9 890S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 891{
463ee0b2
LW
892 register char *s;
893 STRLEN len;
4e553d73 894
463ee0b2
LW
895 start = skipspace(start);
896 s = start;
7e2040f0 897 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 898 (allow_pack && *s == ':') ||
15f0808c 899 (allow_initial_tick && *s == '\'') )
a0d0e21e 900 {
3280af22
NIS
901 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
902 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
903 return start;
904 if (token == METHOD) {
905 s = skipspace(s);
906 if (*s == '(')
3280af22 907 PL_expect = XTERM;
463ee0b2 908 else {
3280af22 909 PL_expect = XOPERATOR;
463ee0b2 910 }
79072805 911 }
3280af22
NIS
912 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
913 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
5464dbd2
RGS
914 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
915 SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke].opval)->op_sv);
79072805
LW
916 force_next(token);
917 }
918 return s;
919}
920
ffb4593c
NT
921/*
922 * S_force_ident
9cbb5ea2 923 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
924 * text only contains the "foo" portion. The first argument is a pointer
925 * to the "foo", and the second argument is the type symbol to prefix.
926 * Forces the next token to be a "WORD".
9cbb5ea2 927 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
928 */
929
76e3520e 930STATIC void
cea2e8a9 931S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
932{
933 if (s && *s) {
11343788 934 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 935 PL_nextval[PL_nexttoke].opval = o;
79072805 936 force_next(WORD);
748a9306 937 if (kind) {
11343788 938 o->op_private = OPpCONST_ENTERED;
55497cff 939 /* XXX see note in pp_entereval() for why we forgo typo
940 warnings if the symbol must be introduced in an eval.
941 GSAR 96-10-12 */
3280af22 942 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
943 kind == '$' ? SVt_PV :
944 kind == '@' ? SVt_PVAV :
945 kind == '%' ? SVt_PVHV :
946 SVt_PVGV
947 );
748a9306 948 }
79072805
LW
949 }
950}
951
1571675a
GS
952NV
953Perl_str_to_version(pTHX_ SV *sv)
954{
955 NV retval = 0.0;
956 NV nshift = 1.0;
957 STRLEN len;
958 char *start = SvPVx(sv,len);
3aa33fe5 959 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
960 char *end = start + len;
961 while (start < end) {
ba210ebe 962 STRLEN skip;
1571675a
GS
963 UV n;
964 if (utf)
9041c2e3 965 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
966 else {
967 n = *(U8*)start;
968 skip = 1;
969 }
970 retval += ((NV)n)/nshift;
971 start += skip;
972 nshift *= 1000;
973 }
974 return retval;
975}
976
4e553d73 977/*
ffb4593c
NT
978 * S_force_version
979 * Forces the next token to be a version number.
e759cc13
RGS
980 * If the next token appears to be an invalid version number, (e.g. "v2b"),
981 * and if "guessing" is TRUE, then no new token is created (and the caller
982 * must use an alternative parsing method).
ffb4593c
NT
983 */
984
76e3520e 985STATIC char *
e759cc13 986S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 987{
988 OP *version = Nullop;
44dcb63b 989 char *d;
89bfa8cd 990
991 s = skipspace(s);
992
44dcb63b 993 d = s;
dd629d5b 994 if (*d == 'v')
44dcb63b 995 d++;
44dcb63b 996 if (isDIGIT(*d)) {
e759cc13
RGS
997 while (isDIGIT(*d) || *d == '_' || *d == '.')
998 d++;
9f3d182e 999 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1000 SV *ver;
b73d6f50 1001 s = scan_num(s, &yylval);
89bfa8cd 1002 version = yylval.opval;
dd629d5b
GS
1003 ver = cSVOPx(version)->op_sv;
1004 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 1005 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
1006 SvNVX(ver) = str_to_version(ver);
1007 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1008 }
89bfa8cd 1009 }
e759cc13
RGS
1010 else if (guessing)
1011 return s;
89bfa8cd 1012 }
1013
1014 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1015 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1016 force_next(WORD);
89bfa8cd 1017
e759cc13 1018 return s;
89bfa8cd 1019}
1020
ffb4593c
NT
1021/*
1022 * S_tokeq
1023 * Tokenize a quoted string passed in as an SV. It finds the next
1024 * chunk, up to end of string or a backslash. It may make a new
1025 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1026 * turns \\ into \.
1027 */
1028
76e3520e 1029STATIC SV *
cea2e8a9 1030S_tokeq(pTHX_ SV *sv)
79072805
LW
1031{
1032 register char *s;
1033 register char *send;
1034 register char *d;
b3ac6de7
IZ
1035 STRLEN len = 0;
1036 SV *pv = sv;
79072805
LW
1037
1038 if (!SvLEN(sv))
b3ac6de7 1039 goto finish;
79072805 1040
a0d0e21e 1041 s = SvPV_force(sv, len);
21a311ee 1042 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1043 goto finish;
463ee0b2 1044 send = s + len;
79072805
LW
1045 while (s < send && *s != '\\')
1046 s++;
1047 if (s == send)
b3ac6de7 1048 goto finish;
79072805 1049 d = s;
be4731d2 1050 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 1051 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
1052 if (SvUTF8(sv))
1053 SvUTF8_on(pv);
1054 }
79072805
LW
1055 while (s < send) {
1056 if (*s == '\\') {
a0d0e21e 1057 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1058 s++; /* all that, just for this */
1059 }
1060 *d++ = *s++;
1061 }
1062 *d = '\0';
463ee0b2 1063 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 1064 finish:
3280af22 1065 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1066 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1067 return sv;
1068}
1069
ffb4593c
NT
1070/*
1071 * Now come three functions related to double-quote context,
1072 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1073 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1074 * interact with PL_lex_state, and create fake ( ... ) argument lists
1075 * to handle functions and concatenation.
1076 * They assume that whoever calls them will be setting up a fake
1077 * join call, because each subthing puts a ',' after it. This lets
1078 * "lower \luPpEr"
1079 * become
1080 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1081 *
1082 * (I'm not sure whether the spurious commas at the end of lcfirst's
1083 * arguments and join's arguments are created or not).
1084 */
1085
1086/*
1087 * S_sublex_start
1088 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1089 *
1090 * Pattern matching will set PL_lex_op to the pattern-matching op to
1091 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1092 *
1093 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1094 *
1095 * Everything else becomes a FUNC.
1096 *
1097 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1098 * had an OP_CONST or OP_READLINE). This just sets us up for a
1099 * call to S_sublex_push().
1100 */
1101
76e3520e 1102STATIC I32
cea2e8a9 1103S_sublex_start(pTHX)
79072805
LW
1104{
1105 register I32 op_type = yylval.ival;
79072805
LW
1106
1107 if (op_type == OP_NULL) {
3280af22
NIS
1108 yylval.opval = PL_lex_op;
1109 PL_lex_op = Nullop;
79072805
LW
1110 return THING;
1111 }
1112 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1113 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1114
1115 if (SvTYPE(sv) == SVt_PVIV) {
1116 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1117 STRLEN len;
1118 char *p;
1119 SV *nsv;
1120
1121 p = SvPV(sv, len);
79cb57f6 1122 nsv = newSVpvn(p, len);
01ec43d0
GS
1123 if (SvUTF8(sv))
1124 SvUTF8_on(nsv);
b3ac6de7
IZ
1125 SvREFCNT_dec(sv);
1126 sv = nsv;
4e553d73 1127 }
b3ac6de7 1128 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1129 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1130 /* Allow <FH> // "foo" */
1131 if (op_type == OP_READLINE)
1132 PL_expect = XTERMORDORDOR;
79072805
LW
1133 return THING;
1134 }
1135
3280af22
NIS
1136 PL_sublex_info.super_state = PL_lex_state;
1137 PL_sublex_info.sub_inwhat = op_type;
1138 PL_sublex_info.sub_op = PL_lex_op;
1139 PL_lex_state = LEX_INTERPPUSH;
55497cff 1140
3280af22
NIS
1141 PL_expect = XTERM;
1142 if (PL_lex_op) {
1143 yylval.opval = PL_lex_op;
1144 PL_lex_op = Nullop;
55497cff 1145 return PMFUNC;
1146 }
1147 else
1148 return FUNC;
1149}
1150
ffb4593c
NT
1151/*
1152 * S_sublex_push
1153 * Create a new scope to save the lexing state. The scope will be
1154 * ended in S_sublex_done. Returns a '(', starting the function arguments
1155 * to the uc, lc, etc. found before.
1156 * Sets PL_lex_state to LEX_INTERPCONCAT.
1157 */
1158
76e3520e 1159STATIC I32
cea2e8a9 1160S_sublex_push(pTHX)
55497cff 1161{
f46d017c 1162 ENTER;
55497cff 1163
3280af22
NIS
1164 PL_lex_state = PL_sublex_info.super_state;
1165 SAVEI32(PL_lex_dojoin);
1166 SAVEI32(PL_lex_brackets);
3280af22
NIS
1167 SAVEI32(PL_lex_casemods);
1168 SAVEI32(PL_lex_starts);
1169 SAVEI32(PL_lex_state);
7766f137 1170 SAVEVPTR(PL_lex_inpat);
3280af22 1171 SAVEI32(PL_lex_inwhat);
57843af0 1172 SAVECOPLINE(PL_curcop);
3280af22 1173 SAVEPPTR(PL_bufptr);
8452ff4b 1174 SAVEPPTR(PL_bufend);
3280af22
NIS
1175 SAVEPPTR(PL_oldbufptr);
1176 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1177 SAVEPPTR(PL_last_lop);
1178 SAVEPPTR(PL_last_uni);
3280af22
NIS
1179 SAVEPPTR(PL_linestart);
1180 SAVESPTR(PL_linestr);
8edd5f42
RGS
1181 SAVEGENERICPV(PL_lex_brackstack);
1182 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1183
1184 PL_linestr = PL_lex_stuff;
1185 PL_lex_stuff = Nullsv;
1186
9cbb5ea2
GS
1187 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1188 = SvPVX(PL_linestr);
3280af22 1189 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1190 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1191 SAVEFREESV(PL_linestr);
1192
1193 PL_lex_dojoin = FALSE;
1194 PL_lex_brackets = 0;
3280af22
NIS
1195 New(899, PL_lex_brackstack, 120, char);
1196 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
1197 PL_lex_casemods = 0;
1198 *PL_lex_casestack = '\0';
1199 PL_lex_starts = 0;
1200 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1201 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1202
1203 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1204 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1205 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1206 else
3280af22 1207 PL_lex_inpat = Nullop;
79072805 1208
55497cff 1209 return '(';
79072805
LW
1210}
1211
ffb4593c
NT
1212/*
1213 * S_sublex_done
1214 * Restores lexer state after a S_sublex_push.
1215 */
1216
76e3520e 1217STATIC I32
cea2e8a9 1218S_sublex_done(pTHX)
79072805 1219{
3280af22 1220 if (!PL_lex_starts++) {
9aa983d2
JH
1221 SV *sv = newSVpvn("",0);
1222 if (SvUTF8(PL_linestr))
1223 SvUTF8_on(sv);
3280af22 1224 PL_expect = XOPERATOR;
9aa983d2 1225 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1226 return THING;
1227 }
1228
3280af22
NIS
1229 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1230 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1231 return yylex();
79072805
LW
1232 }
1233
ffb4593c 1234 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1235 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1236 PL_linestr = PL_lex_repl;
1237 PL_lex_inpat = 0;
1238 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1239 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1240 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1241 SAVEFREESV(PL_linestr);
1242 PL_lex_dojoin = FALSE;
1243 PL_lex_brackets = 0;
3280af22
NIS
1244 PL_lex_casemods = 0;
1245 *PL_lex_casestack = '\0';
1246 PL_lex_starts = 0;
25da4f38 1247 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1248 PL_lex_state = LEX_INTERPNORMAL;
1249 PL_lex_starts++;
e9fa98b2
HS
1250 /* we don't clear PL_lex_repl here, so that we can check later
1251 whether this is an evalled subst; that means we rely on the
1252 logic to ensure sublex_done() is called again only via the
1253 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1254 }
e9fa98b2 1255 else {
3280af22 1256 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1257 PL_lex_repl = Nullsv;
1258 }
79072805 1259 return ',';
ffed7fef
LW
1260 }
1261 else {
f46d017c 1262 LEAVE;
3280af22
NIS
1263 PL_bufend = SvPVX(PL_linestr);
1264 PL_bufend += SvCUR(PL_linestr);
1265 PL_expect = XOPERATOR;
09bef843 1266 PL_sublex_info.sub_inwhat = 0;
79072805 1267 return ')';
ffed7fef
LW
1268 }
1269}
1270
02aa26ce
NT
1271/*
1272 scan_const
1273
1274 Extracts a pattern, double-quoted string, or transliteration. This
1275 is terrifying code.
1276
3280af22
NIS
1277 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1278 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1279 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1280
9b599b2a
GS
1281 Returns a pointer to the character scanned up to. Iff this is
1282 advanced from the start pointer supplied (ie if anything was
1283 successfully parsed), will leave an OP for the substring scanned
1284 in yylval. Caller must intuit reason for not parsing further
1285 by looking at the next characters herself.
1286
02aa26ce
NT
1287 In patterns:
1288 backslashes:
1289 double-quoted style: \r and \n
1290 regexp special ones: \D \s
1291 constants: \x3
1292 backrefs: \1 (deprecated in substitution replacements)
1293 case and quoting: \U \Q \E
1294 stops on @ and $, but not for $ as tail anchor
1295
1296 In transliterations:
1297 characters are VERY literal, except for - not at the start or end
1298 of the string, which indicates a range. scan_const expands the
1299 range to the full set of intermediate characters.
1300
1301 In double-quoted strings:
1302 backslashes:
1303 double-quoted style: \r and \n
1304 constants: \x3
1305 backrefs: \1 (deprecated)
1306 case and quoting: \U \Q \E
1307 stops on @ and $
1308
1309 scan_const does *not* construct ops to handle interpolated strings.
1310 It stops processing as soon as it finds an embedded $ or @ variable
1311 and leaves it to the caller to work out what's going on.
1312
da6eedaa 1313 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1314
1315 $ in pattern could be $foo or could be tail anchor. Assumption:
1316 it's a tail anchor if $ is the last thing in the string, or if it's
1317 followed by one of ")| \n\t"
1318
1319 \1 (backreferences) are turned into $1
1320
1321 The structure of the code is
1322 while (there's a character to process) {
1323 handle transliteration ranges
1324 skip regexp comments
1325 skip # initiated comments in //x patterns
1326 check for embedded @foo
1327 check for embedded scalars
1328 if (backslash) {
1329 leave intact backslashes from leave (below)
1330 deprecate \1 in strings and sub replacements
1331 handle string-changing backslashes \l \U \Q \E, etc.
1332 switch (what was escaped) {
1333 handle - in a transliteration (becomes a literal -)
1334 handle \132 octal characters
1335 handle 0x15 hex characters
1336 handle \cV (control V)
1337 handle printf backslashes (\f, \r, \n, etc)
1338 } (end switch)
1339 } (end if backslash)
1340 } (end while character to read)
4e553d73 1341
02aa26ce
NT
1342*/
1343
76e3520e 1344STATIC char *
cea2e8a9 1345S_scan_const(pTHX_ char *start)
79072805 1346{
3280af22 1347 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1348 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1349 register char *s = start; /* start of the constant */
1350 register char *d = SvPVX(sv); /* destination for copies */
1351 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1352 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1353 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1354 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1355 UV uv;
1356
dff6d3cd 1357 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1358 PL_lex_inpat
b6d5fef8 1359 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1360 : "";
79072805 1361
2b9d42f0
NIS
1362 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1363 /* If we are doing a trans and we know we want UTF8 set expectation */
1364 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1365 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1366 }
1367
1368
79072805 1369 while (s < send || dorange) {
02aa26ce 1370 /* get transliterations out of the way (they're most literal) */
3280af22 1371 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1372 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1373 if (dorange) {
1ba5c669
JH
1374 I32 i; /* current expanded character */
1375 I32 min; /* first character in range */
1376 I32 max; /* last character in range */
02aa26ce 1377
2b9d42f0 1378 if (has_utf8) {
8973db79
JH
1379 char *c = (char*)utf8_hop((U8*)d, -1);
1380 char *e = d++;
1381 while (e-- > c)
1382 *(e + 1) = *e;
25716404 1383 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1384 /* mark the range as done, and continue */
1385 dorange = FALSE;
1386 didrange = TRUE;
1387 continue;
1388 }
2b9d42f0 1389
02aa26ce 1390 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1391 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1392 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1393 d -= 2; /* eat the first char and the - */
1394
8ada0baa
JH
1395 min = (U8)*d; /* first char in range */
1396 max = (U8)d[1]; /* last char in range */
1397
c2e66d9e 1398 if (min > max) {
01ec43d0 1399 Perl_croak(aTHX_
d1573ac7 1400 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1401 (char)min, (char)max);
c2e66d9e
GS
1402 }
1403
c7f1f016 1404#ifdef EBCDIC
8ada0baa
JH
1405 if ((isLOWER(min) && isLOWER(max)) ||
1406 (isUPPER(min) && isUPPER(max))) {
1407 if (isLOWER(min)) {
1408 for (i = min; i <= max; i++)
1409 if (isLOWER(i))
db42d148 1410 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1411 } else {
1412 for (i = min; i <= max; i++)
1413 if (isUPPER(i))
db42d148 1414 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1415 }
1416 }
1417 else
1418#endif
1419 for (i = min; i <= max; i++)
eb160463 1420 *d++ = (char)i;
02aa26ce
NT
1421
1422 /* mark the range as done, and continue */
79072805 1423 dorange = FALSE;
01ec43d0 1424 didrange = TRUE;
79072805 1425 continue;
4e553d73 1426 }
02aa26ce
NT
1427
1428 /* range begins (ignore - as first or last char) */
79072805 1429 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1430 if (didrange) {
1fafa243 1431 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1432 }
2b9d42f0 1433 if (has_utf8) {
25716404 1434 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1435 s++;
1436 continue;
1437 }
79072805
LW
1438 dorange = TRUE;
1439 s++;
01ec43d0
GS
1440 }
1441 else {
1442 didrange = FALSE;
1443 }
79072805 1444 }
02aa26ce
NT
1445
1446 /* if we get here, we're not doing a transliteration */
1447
0f5d15d6
IZ
1448 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1449 except for the last char, which will be done separately. */
3280af22 1450 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1451 if (s[2] == '#') {
e994fd66 1452 while (s+1 < send && *s != ')')
db42d148 1453 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1454 }
1455 else if (s[2] == '{' /* This should match regcomp.c */
1456 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1457 {
cc6b7395 1458 I32 count = 1;
0f5d15d6 1459 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1460 char c;
1461
d9f97599
GS
1462 while (count && (c = *regparse)) {
1463 if (c == '\\' && regparse[1])
1464 regparse++;
4e553d73 1465 else if (c == '{')
cc6b7395 1466 count++;
4e553d73 1467 else if (c == '}')
cc6b7395 1468 count--;
d9f97599 1469 regparse++;
cc6b7395 1470 }
e994fd66 1471 if (*regparse != ')')
5bdf89e7 1472 regparse--; /* Leave one char for continuation. */
0f5d15d6 1473 while (s < regparse)
db42d148 1474 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1475 }
748a9306 1476 }
02aa26ce
NT
1477
1478 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1479 else if (*s == '#' && PL_lex_inpat &&
1480 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1481 while (s+1 < send && *s != '\n')
db42d148 1482 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1483 }
02aa26ce 1484
5d1d4326 1485 /* check for embedded arrays
da6eedaa 1486 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1487 */
7e2040f0 1488 else if (*s == '@' && s[1]
5d1d4326 1489 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1490 break;
02aa26ce
NT
1491
1492 /* check for embedded scalars. only stop if we're sure it's a
1493 variable.
1494 */
79072805 1495 else if (*s == '$') {
3280af22 1496 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1497 break;
6002328a 1498 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1499 break; /* in regexp, $ might be tail anchor */
1500 }
02aa26ce 1501
2b9d42f0
NIS
1502 /* End of else if chain - OP_TRANS rejoin rest */
1503
02aa26ce 1504 /* backslashes */
79072805
LW
1505 if (*s == '\\' && s+1 < send) {
1506 s++;
02aa26ce
NT
1507
1508 /* some backslashes we leave behind */
c9f97d15 1509 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1510 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1511 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1512 continue;
1513 }
02aa26ce
NT
1514
1515 /* deprecate \1 in strings and substitution replacements */
3280af22 1516 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1517 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1518 {
599cee73 1519 if (ckWARN(WARN_SYNTAX))
9014280d 1520 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1521 *--s = '$';
1522 break;
1523 }
02aa26ce
NT
1524
1525 /* string-change backslash escapes */
3280af22 1526 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1527 --s;
1528 break;
1529 }
02aa26ce
NT
1530
1531 /* if we get here, it's either a quoted -, or a digit */
79072805 1532 switch (*s) {
02aa26ce
NT
1533
1534 /* quoted - in transliterations */
79072805 1535 case '-':
3280af22 1536 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1537 *d++ = *s++;
1538 continue;
1539 }
1540 /* FALL THROUGH */
1541 default:
11b8faa4 1542 {
707afd92
MS
1543 if (ckWARN(WARN_MISC) &&
1544 isALNUM(*s) &&
1545 *s != '_')
9014280d 1546 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1547 "Unrecognized escape \\%c passed through",
1548 *s);
1549 /* default action is to copy the quoted character */
f9a63242 1550 goto default_action;
11b8faa4 1551 }
02aa26ce
NT
1552
1553 /* \132 indicates an octal constant */
79072805
LW
1554 case '0': case '1': case '2': case '3':
1555 case '4': case '5': case '6': case '7':
ba210ebe 1556 {
53305cf1
NC
1557 I32 flags = 0;
1558 STRLEN len = 3;
1559 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1560 s += len;
1561 }
012bcf8d 1562 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1563
1564 /* \x24 indicates a hex constant */
79072805 1565 case 'x':
a0ed51b3
LW
1566 ++s;
1567 if (*s == '{') {
1568 char* e = strchr(s, '}');
a4c04bdc
NC
1569 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1570 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1571 STRLEN len;
355860ce 1572
53305cf1 1573 ++s;
adaeee49 1574 if (!e) {
a0ed51b3 1575 yyerror("Missing right brace on \\x{}");
355860ce 1576 continue;
ba210ebe 1577 }
53305cf1
NC
1578 len = e - s;
1579 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1580 s = e + 1;
a0ed51b3
LW
1581 }
1582 else {
ba210ebe 1583 {
53305cf1 1584 STRLEN len = 2;
a4c04bdc 1585 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1586 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1587 s += len;
1588 }
012bcf8d
GS
1589 }
1590
1591 NUM_ESCAPE_INSERT:
1592 /* Insert oct or hex escaped character.
301d3d20 1593 * There will always enough room in sv since such
db42d148 1594 * escapes will be longer than any UTF-8 sequence
301d3d20 1595 * they can end up as. */
ba7cea30 1596
c7f1f016
NIS
1597 /* We need to map to chars to ASCII before doing the tests
1598 to cover EBCDIC
1599 */
c4d5f83a 1600 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1601 if (!has_utf8 && uv > 255) {
301d3d20
JH
1602 /* Might need to recode whatever we have
1603 * accumulated so far if it contains any
1604 * hibit chars.
1605 *
1606 * (Can't we keep track of that and avoid
1607 * this rescan? --jhi)
012bcf8d 1608 */
c7f1f016 1609 int hicount = 0;
63cd0674
NIS
1610 U8 *c;
1611 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1612 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1613 hicount++;
db42d148 1614 }
012bcf8d 1615 }
63cd0674 1616 if (hicount) {
db42d148
NIS
1617 STRLEN offset = d - SvPVX(sv);
1618 U8 *src, *dst;
1619 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1620 src = (U8 *)d - 1;
1621 dst = src+hicount;
1622 d += hicount;
1623 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1624 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1625 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1626 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1627 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1628 }
1629 else {
63cd0674 1630 *dst-- = *src;
012bcf8d 1631 }
c7f1f016 1632 src--;
012bcf8d
GS
1633 }
1634 }
1635 }
1636
9aa983d2 1637 if (has_utf8 || uv > 255) {
9041c2e3 1638 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1639 has_utf8 = TRUE;
f9a63242
JH
1640 if (PL_lex_inwhat == OP_TRANS &&
1641 PL_sublex_info.sub_op) {
1642 PL_sublex_info.sub_op->op_private |=
1643 (PL_lex_repl ? OPpTRANS_FROM_UTF
1644 : OPpTRANS_TO_UTF);
f9a63242 1645 }
012bcf8d 1646 }
a0ed51b3 1647 else {
012bcf8d 1648 *d++ = (char)uv;
a0ed51b3 1649 }
012bcf8d
GS
1650 }
1651 else {
c4d5f83a 1652 *d++ = (char) uv;
a0ed51b3 1653 }
79072805 1654 continue;
02aa26ce 1655
b239daa5 1656 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1657 case 'N':
55eda711 1658 ++s;
423cee85
JH
1659 if (*s == '{') {
1660 char* e = strchr(s, '}');
155aba94 1661 SV *res;
423cee85
JH
1662 STRLEN len;
1663 char *str;
4e553d73 1664
423cee85 1665 if (!e) {
5777a3f7 1666 yyerror("Missing right brace on \\N{}");
423cee85
JH
1667 e = s - 1;
1668 goto cont_scan;
1669 }
dbc0d4f2
JH
1670 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1671 /* \N{U+...} */
1672 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1673 PERL_SCAN_DISALLOW_PREFIX;
1674 s += 3;
1675 len = e - s;
1676 uv = grok_hex(s, &len, &flags, NULL);
1677 s = e + 1;
1678 goto NUM_ESCAPE_INSERT;
1679 }
55eda711
JH
1680 res = newSVpvn(s + 1, e - s - 1);
1681 res = new_constant( Nullch, 0, "charnames",
1682 res, Nullsv, "\\N{...}" );
f9a63242
JH
1683 if (has_utf8)
1684 sv_utf8_upgrade(res);
423cee85 1685 str = SvPV(res,len);
1c47067b
JH
1686#ifdef EBCDIC_NEVER_MIND
1687 /* charnames uses pack U and that has been
1688 * recently changed to do the below uni->native
1689 * mapping, so this would be redundant (and wrong,
1690 * the code point would be doubly converted).
1691 * But leave this in just in case the pack U change
1692 * gets revoked, but the semantics is still
1693 * desireable for charnames. --jhi */
cddc7ef4
JH
1694 {
1695 UV uv = utf8_to_uvchr((U8*)str, 0);
1696
1697 if (uv < 0x100) {
1698 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1699
1700 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1701 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1702 str = SvPV(res, len);
1703 }
1704 }
1705#endif
89491803 1706 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1707 char *ostart = SvPVX(sv);
1708 SvCUR_set(sv, d - ostart);
1709 SvPOK_on(sv);
e4f3eed8 1710 *d = '\0';
f08d6ad9 1711 sv_utf8_upgrade(sv);
d2f449dd 1712 /* this just broke our allocation above... */
eb160463 1713 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1714 d = SvPVX(sv) + SvCUR(sv);
89491803 1715 has_utf8 = TRUE;
f08d6ad9 1716 }
eb160463 1717 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1718 char *odest = SvPVX(sv);
1719
8973db79 1720 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1721 d = SvPVX(sv) + (d - odest);
1722 }
1723 Copy(str, d, len, char);
1724 d += len;
1725 SvREFCNT_dec(res);
1726 cont_scan:
1727 s = e + 1;
1728 }
1729 else
5777a3f7 1730 yyerror("Missing braces on \\N{}");
423cee85
JH
1731 continue;
1732
02aa26ce 1733 /* \c is a control character */
79072805
LW
1734 case 'c':
1735 s++;
961ce445 1736 if (s < send) {
ba210ebe 1737 U8 c = *s++;
c7f1f016
NIS
1738#ifdef EBCDIC
1739 if (isLOWER(c))
1740 c = toUPPER(c);
1741#endif
db42d148 1742 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1743 }
961ce445
RGS
1744 else {
1745 yyerror("Missing control char name in \\c");
1746 }
79072805 1747 continue;
02aa26ce
NT
1748
1749 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1750 case 'b':
db42d148 1751 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1752 break;
1753 case 'n':
db42d148 1754 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1755 break;
1756 case 'r':
db42d148 1757 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1758 break;
1759 case 'f':
db42d148 1760 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1761 break;
1762 case 't':
db42d148 1763 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1764 break;
34a3fe2a 1765 case 'e':
db42d148 1766 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1767 break;
1768 case 'a':
db42d148 1769 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1770 break;
02aa26ce
NT
1771 } /* end switch */
1772
79072805
LW
1773 s++;
1774 continue;
02aa26ce
NT
1775 } /* end if (backslash) */
1776
f9a63242 1777 default_action:
2b9d42f0
NIS
1778 /* If we started with encoded form, or already know we want it
1779 and then encode the next character */
1780 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1781 STRLEN len = 1;
1782 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1783 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1784 s += len;
1785 if (need > len) {
1786 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1787 STRLEN off = d - SvPVX(sv);
1788 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1789 }
1790 d = (char*)uvchr_to_utf8((U8*)d, uv);
1791 has_utf8 = TRUE;
1792 }
1793 else {
1794 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1795 }
02aa26ce
NT
1796 } /* while loop to process each character */
1797
1798 /* terminate the string and set up the sv */
79072805 1799 *d = '\0';
463ee0b2 1800 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1801 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1802 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1803
79072805 1804 SvPOK_on(sv);
9f4817db 1805 if (PL_encoding && !has_utf8) {
d0063567
DK
1806 sv_recode_to_utf8(sv, PL_encoding);
1807 if (SvUTF8(sv))
1808 has_utf8 = TRUE;
9f4817db 1809 }
2b9d42f0 1810 if (has_utf8) {
7e2040f0 1811 SvUTF8_on(sv);
2b9d42f0 1812 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1813 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1814 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1815 }
1816 }
79072805 1817
02aa26ce 1818 /* shrink the sv if we allocated more than we used */
79072805
LW
1819 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1820 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1821 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1822 }
02aa26ce 1823
9b599b2a 1824 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1825 if (s > PL_bufptr) {
1826 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1827 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1828 sv, Nullsv,
4e553d73 1829 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1830 ? "tr"
3280af22 1831 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1832 ? "s"
1833 : "qq")));
79072805 1834 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1835 } else
8990e307 1836 SvREFCNT_dec(sv);
79072805
LW
1837 return s;
1838}
1839
ffb4593c
NT
1840/* S_intuit_more
1841 * Returns TRUE if there's more to the expression (e.g., a subscript),
1842 * FALSE otherwise.
ffb4593c
NT
1843 *
1844 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1845 *
1846 * ->[ and ->{ return TRUE
1847 * { and [ outside a pattern are always subscripts, so return TRUE
1848 * if we're outside a pattern and it's not { or [, then return FALSE
1849 * if we're in a pattern and the first char is a {
1850 * {4,5} (any digits around the comma) returns FALSE
1851 * if we're in a pattern and the first char is a [
1852 * [] returns FALSE
1853 * [SOMETHING] has a funky algorithm to decide whether it's a
1854 * character class or not. It has to deal with things like
1855 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1856 * anything else returns TRUE
1857 */
1858
9cbb5ea2
GS
1859/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1860
76e3520e 1861STATIC int
cea2e8a9 1862S_intuit_more(pTHX_ register char *s)
79072805 1863{
3280af22 1864 if (PL_lex_brackets)
79072805
LW
1865 return TRUE;
1866 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1867 return TRUE;
1868 if (*s != '{' && *s != '[')
1869 return FALSE;
3280af22 1870 if (!PL_lex_inpat)
79072805
LW
1871 return TRUE;
1872
1873 /* In a pattern, so maybe we have {n,m}. */
1874 if (*s == '{') {
1875 s++;
1876 if (!isDIGIT(*s))
1877 return TRUE;
1878 while (isDIGIT(*s))
1879 s++;
1880 if (*s == ',')
1881 s++;
1882 while (isDIGIT(*s))
1883 s++;
1884 if (*s == '}')
1885 return FALSE;
1886 return TRUE;
1887
1888 }
1889
1890 /* On the other hand, maybe we have a character class */
1891
1892 s++;
1893 if (*s == ']' || *s == '^')
1894 return FALSE;
1895 else {
ffb4593c 1896 /* this is terrifying, and it works */
79072805
LW
1897 int weight = 2; /* let's weigh the evidence */
1898 char seen[256];
f27ffc4a 1899 unsigned char un_char = 255, last_un_char;
93a17b20 1900 char *send = strchr(s,']');
3280af22 1901 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1902
1903 if (!send) /* has to be an expression */
1904 return TRUE;
1905
1906 Zero(seen,256,char);
1907 if (*s == '$')
1908 weight -= 3;
1909 else if (isDIGIT(*s)) {
1910 if (s[1] != ']') {
1911 if (isDIGIT(s[1]) && s[2] == ']')
1912 weight -= 10;
1913 }
1914 else
1915 weight -= 100;
1916 }
1917 for (; s < send; s++) {
1918 last_un_char = un_char;
1919 un_char = (unsigned char)*s;
1920 switch (*s) {
1921 case '@':
1922 case '&':
1923 case '$':
1924 weight -= seen[un_char] * 10;
7e2040f0 1925 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1926 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1927 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1928 weight -= 100;
1929 else
1930 weight -= 10;
1931 }
1932 else if (*s == '$' && s[1] &&
93a17b20
LW
1933 strchr("[#!%*<>()-=",s[1])) {
1934 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1935 weight -= 10;
1936 else
1937 weight -= 1;
1938 }
1939 break;
1940 case '\\':
1941 un_char = 254;
1942 if (s[1]) {
93a17b20 1943 if (strchr("wds]",s[1]))
79072805
LW
1944 weight += 100;
1945 else if (seen['\''] || seen['"'])
1946 weight += 1;
93a17b20 1947 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1948 weight += 40;
1949 else if (isDIGIT(s[1])) {
1950 weight += 40;
1951 while (s[1] && isDIGIT(s[1]))
1952 s++;
1953 }
1954 }
1955 else
1956 weight += 100;
1957 break;
1958 case '-':
1959 if (s[1] == '\\')
1960 weight += 50;
93a17b20 1961 if (strchr("aA01! ",last_un_char))
79072805 1962 weight += 30;
93a17b20 1963 if (strchr("zZ79~",s[1]))
79072805 1964 weight += 30;
f27ffc4a
GS
1965 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1966 weight -= 5; /* cope with negative subscript */
79072805
LW
1967 break;
1968 default:
93a17b20 1969 if (!isALNUM(last_un_char) && !strchr("$@&",last_un_char) &&
79072805
LW
1970 isALPHA(*s) && s[1] && isALPHA(s[1])) {
1971 char *d = tmpbuf;
1972 while (isALPHA(*s))
1973 *d++ = *s++;
1974 *d = '\0';
1975 if (keyword(tmpbuf, d - tmpbuf))
1976 weight -= 150;
1977 }
1978 if (un_char == last_un_char + 1)
1979 weight += 5;
1980 weight -= seen[un_char];
1981 break;
1982 }
1983 seen[un_char]++;
1984 }
1985 if (weight >= 0) /* probably a character class */
1986 return FALSE;
1987 }
1988
1989 return TRUE;
1990}
ffed7fef 1991
ffb4593c
NT
1992/*
1993 * S_intuit_method
1994 *
1995 * Does all the checking to disambiguate
1996 * foo bar
1997 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
1998 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
1999 *
2000 * First argument is the stuff after the first token, e.g. "bar".
2001 *
2002 * Not a method if bar is a filehandle.
2003 * Not a method if foo is a subroutine prototyped to take a filehandle.
2004 * Not a method if it's really "Foo $bar"
2005 * Method if it's "foo $bar"
2006 * Not a method if it's really "print foo $bar"
2007 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2008 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2009 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2010 * =>
2011 */
2012
76e3520e 2013STATIC int
cea2e8a9 2014S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2015{
2016 char *s = start + (*start == '$');
3280af22 2017 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2018 STRLEN len;
2019 GV* indirgv;
2020
2021 if (gv) {
b6c543e3 2022 CV *cv;
a0d0e21e
LW
2023 if (GvIO(gv))
2024 return 0;
b6c543e3
IZ
2025 if ((cv = GvCVu(gv))) {
2026 char *proto = SvPVX(cv);
2027 if (proto) {
2028 if (*proto == ';')
2029 proto++;
2030 if (*proto == '*')
2031 return 0;
2032 }
2033 } else
a0d0e21e
LW
2034 gv = 0;
2035 }
8903cb82 2036 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2037 /* start is the beginning of the possible filehandle/object,
2038 * and s is the end of it
2039 * tmpbuf is a copy of it
2040 */
2041
a0d0e21e 2042 if (*start == '$') {
3280af22 2043 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2044 return 0;
2045 s = skipspace(s);
3280af22
NIS
2046 PL_bufptr = start;
2047 PL_expect = XREF;
a0d0e21e
LW
2048 return *s == '(' ? FUNCMETH : METHOD;
2049 }
2050 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2051 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2052 len -= 2;
2053 tmpbuf[len] = '\0';
2054 goto bare_package;
2055 }
2056 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2057 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2058 return 0;
2059 /* filehandle or package name makes it a method */
89bfa8cd 2060 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2061 s = skipspace(s);
3280af22 2062 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2063 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2064 bare_package:
3280af22 2065 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2066 newSVpvn(tmpbuf,len));
3280af22
NIS
2067 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2068 PL_expect = XTERM;
a0d0e21e 2069 force_next(WORD);
3280af22 2070 PL_bufptr = s;
a0d0e21e
LW
2071 return *s == '(' ? FUNCMETH : METHOD;
2072 }
2073 }
2074 return 0;
2075}
2076
ffb4593c
NT
2077/*
2078 * S_incl_perldb
2079 * Return a string of Perl code to load the debugger. If PERL5DB
2080 * is set, it will return the contents of that, otherwise a
2081 * compile-time require of perl5db.pl.
2082 */
2083
76e3520e 2084STATIC char*
cea2e8a9 2085S_incl_perldb(pTHX)
a0d0e21e 2086{
3280af22 2087 if (PL_perldb) {
76e3520e 2088 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2089
2090 if (pdb)
2091 return pdb;
93189314 2092 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2093 return "BEGIN { require 'perl5db.pl' }";
2094 }
2095 return "";
2096}
2097
2098
16d20bd9 2099/* Encoded script support. filter_add() effectively inserts a
4e553d73 2100 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2101 * Note that the filter function only applies to the current source file
2102 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2103 *
2104 * The datasv parameter (which may be NULL) can be used to pass
2105 * private data to this instance of the filter. The filter function
2106 * can recover the SV using the FILTER_DATA macro and use it to
2107 * store private buffers and state information.
2108 *
2109 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2110 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2111 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2112 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2113 * private use must be set using malloc'd pointers.
2114 */
16d20bd9
AD
2115
2116SV *
864dbfa3 2117Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2118{
f4c556ac
GS
2119 if (!funcp)
2120 return Nullsv;
2121
3280af22
NIS
2122 if (!PL_rsfp_filters)
2123 PL_rsfp_filters = newAV();
16d20bd9 2124 if (!datasv)
8c52afec 2125 datasv = NEWSV(255,0);
16d20bd9 2126 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2127 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2128 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2129 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2130 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2131 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2132 av_unshift(PL_rsfp_filters, 1);
2133 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2134 return(datasv);
2135}
4e553d73 2136
16d20bd9
AD
2137
2138/* Delete most recently added instance of this filter function. */
a0d0e21e 2139void
864dbfa3 2140Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2141{
e0c19803 2142 SV *datasv;
fe5a182c 2143 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2144 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2145 return;
2146 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2147 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2148 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2149 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2150 IoANY(datasv) = (void *)NULL;
3280af22 2151 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2152
16d20bd9
AD
2153 return;
2154 }
2155 /* we need to search for the correct entry and clear it */
cea2e8a9 2156 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2157}
2158
2159
1de9afcd
RGS
2160/* Invoke the idxth filter function for the current rsfp. */
2161/* maxlen 0 = read one text line */
16d20bd9 2162I32
864dbfa3 2163Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2164{
16d20bd9
AD
2165 filter_t funcp;
2166 SV *datasv = NULL;
e50aee73 2167
3280af22 2168 if (!PL_rsfp_filters)
16d20bd9 2169 return -1;
1de9afcd 2170 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2171 /* Provide a default input filter to make life easy. */
2172 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2173 DEBUG_P(PerlIO_printf(Perl_debug_log,
2174 "filter_read %d: from rsfp\n", idx));
4e553d73 2175 if (maxlen) {
16d20bd9
AD
2176 /* Want a block */
2177 int len ;
2178 int old_len = SvCUR(buf_sv) ;
2179
2180 /* ensure buf_sv is large enough */
eb160463 2181 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2182 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2183 if (PerlIO_error(PL_rsfp))
37120919
AD
2184 return -1; /* error */
2185 else
2186 return 0 ; /* end of file */
2187 }
16d20bd9
AD
2188 SvCUR_set(buf_sv, old_len + len) ;
2189 } else {
2190 /* Want a line */
3280af22
NIS
2191 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2192 if (PerlIO_error(PL_rsfp))
37120919
AD
2193 return -1; /* error */
2194 else
2195 return 0 ; /* end of file */
2196 }
16d20bd9
AD
2197 }
2198 return SvCUR(buf_sv);
2199 }
2200 /* Skip this filter slot if filter has been deleted */
1de9afcd 2201 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2202 DEBUG_P(PerlIO_printf(Perl_debug_log,
2203 "filter_read %d: skipped (filter deleted)\n",
2204 idx));
16d20bd9
AD
2205 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2206 }
2207 /* Get function pointer hidden within datasv */
4755096e 2208 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2209 DEBUG_P(PerlIO_printf(Perl_debug_log,
2210 "filter_read %d: via function %p (%s)\n",
fe5a182c 2211 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2212 /* Call function. The function is expected to */
2213 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2214 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2215 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2216}
2217
76e3520e 2218STATIC char *
cea2e8a9 2219S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2220{
c39cd008 2221#ifdef PERL_CR_FILTER
3280af22 2222 if (!PL_rsfp_filters) {
c39cd008 2223 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2224 }
2225#endif
3280af22 2226 if (PL_rsfp_filters) {
55497cff 2227 if (!append)
2228 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2229 if (FILTER_READ(0, sv, 0) > 0)
2230 return ( SvPVX(sv) ) ;
2231 else
2232 return Nullch ;
2233 }
9d116dd7 2234 else
fd049845 2235 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2236}
2237
01ec43d0
GS
2238STATIC HV *
2239S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2240{
2241 GV *gv;
2242
01ec43d0 2243 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2244 return PL_curstash;
2245
2246 if (len > 2 &&
2247 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2248 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2249 {
2250 return GvHV(gv); /* Foo:: */
def3634b
GS
2251 }
2252
2253 /* use constant CLASS => 'MyClass' */
2254 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2255 SV *sv;
2256 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2257 pkgname = SvPV_nolen(sv);
2258 }
2259 }
2260
2261 return gv_stashpv(pkgname, FALSE);
2262}
a0d0e21e 2263
748a9306
LW
2264#ifdef DEBUGGING
2265 static char* exp_name[] =
09bef843 2266 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2267 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2268 };
748a9306 2269#endif
463ee0b2 2270
02aa26ce
NT
2271/*
2272 yylex
2273
2274 Works out what to call the token just pulled out of the input
2275 stream. The yacc parser takes care of taking the ops we return and
2276 stitching them into a tree.
2277
2278 Returns:
2279 PRIVATEREF
2280
2281 Structure:
2282 if read an identifier
2283 if we're in a my declaration
2284 croak if they tried to say my($foo::bar)
2285 build the ops for a my() declaration
2286 if it's an access to a my() variable
2287 are we in a sort block?
2288 croak if my($a); $a <=> $b
2289 build ops for access to a my() variable
2290 if in a dq string, and they've said @foo and we can't find @foo
2291 croak
2292 build ops for a bareword
2293 if we already built the token before, use it.
2294*/
2295
20141f0e 2296
dba4d153
JH
2297#ifdef __SC__
2298#pragma segment Perl_yylex
2299#endif
dba4d153 2300int
dba4d153 2301Perl_yylex(pTHX)
20141f0e 2302{
3afc138a 2303 register char *s = PL_bufptr;
378cc40b 2304 register char *d;
79072805 2305 register I32 tmp;
463ee0b2 2306 STRLEN len;
161b471a
NIS
2307 GV *gv = Nullgv;
2308 GV **gvp = 0;
aa7440fb 2309 bool bof = FALSE;
1d239bbb 2310 I32 orig_keyword = 0;
a687059c 2311
bbf60fe6
DM
2312 DEBUG_T( {
2313 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2314 lex_state_names[PL_lex_state]);
2315 } );
02aa26ce 2316 /* check if there's an identifier for us to look at */
ba979b31 2317 if (PL_pending_ident)
bbf60fe6 2318 return REPORT(S_pending_ident(aTHX));
bbce6d69 2319
02aa26ce
NT
2320 /* no identifier pending identification */
2321
3280af22 2322 switch (PL_lex_state) {
79072805
LW
2323#ifdef COMMENTARY
2324 case LEX_NORMAL: /* Some compilers will produce faster */
2325 case LEX_INTERPNORMAL: /* code if we comment these out. */
2326 break;
2327#endif
2328
09bef843 2329 /* when we've already built the next token, just pull it out of the queue */
79072805 2330 case LEX_KNOWNEXT:
3280af22
NIS
2331 PL_nexttoke--;
2332 yylval = PL_nextval[PL_nexttoke];
2333 if (!PL_nexttoke) {
2334 PL_lex_state = PL_lex_defer;
2335 PL_expect = PL_lex_expect;
2336 PL_lex_defer = LEX_NORMAL;
463ee0b2 2337 }
607df283 2338 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2339 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2340 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2341
bbf60fe6 2342 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2343
02aa26ce 2344 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2345 when we get here, PL_bufptr is at the \
02aa26ce 2346 */
79072805
LW
2347 case LEX_INTERPCASEMOD:
2348#ifdef DEBUGGING
3280af22 2349 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2350 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2351#endif
02aa26ce 2352 /* handle \E or end of string */
3280af22 2353 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2354 char oldmod;
02aa26ce
NT
2355
2356 /* if at a \E */
3280af22
NIS
2357 if (PL_lex_casemods) {
2358 oldmod = PL_lex_casestack[--PL_lex_casemods];
2359 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2360
3280af22
NIS
2361 if (PL_bufptr != PL_bufend && strchr("LUQ", oldmod)) {
2362 PL_bufptr += 2;
2363 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2364 }
bbf60fe6 2365 return REPORT(')');
79072805 2366 }
3280af22
NIS
2367 if (PL_bufptr != PL_bufend)
2368 PL_bufptr += 2;
2369 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2370 return yylex();
79072805
LW
2371 }
2372 else {
607df283 2373 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2374 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2375 s = PL_bufptr + 1;
6e909404
JH
2376 if (s[1] == '\\' && s[2] == 'E') {
2377 PL_bufptr = s + 3;
2378 PL_lex_state = LEX_INTERPCONCAT;
2379 return yylex();
a0d0e21e 2380 }
6e909404
JH
2381 else {
2382 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2383 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
2384 if (strchr("LU", *s) &&
2385 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2386 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2387 return REPORT(')');
6e909404
JH
2388 }
2389 if (PL_lex_casemods > 10)
2390 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2391 PL_lex_casestack[PL_lex_casemods++] = *s;
2392 PL_lex_casestack[PL_lex_casemods] = '\0';
2393 PL_lex_state = LEX_INTERPCONCAT;
2394 PL_nextval[PL_nexttoke].ival = 0;
2395 force_next('(');
2396 if (*s == 'l')
2397 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2398 else if (*s == 'u')
2399 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2400 else if (*s == 'L')
2401 PL_nextval[PL_nexttoke].ival = OP_LC;
2402 else if (*s == 'U')
2403 PL_nextval[PL_nexttoke].ival = OP_UC;
2404 else if (*s == 'Q')
2405 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2406 else
2407 Perl_croak(aTHX_ "panic: yylex");
2408 PL_bufptr = s + 1;
a0d0e21e 2409 }
79072805 2410 force_next(FUNC);
3280af22
NIS
2411 if (PL_lex_starts) {
2412 s = PL_bufptr;
2413 PL_lex_starts = 0;
131b3ad0
DM
2414 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2415 if (PL_lex_casemods == 1 && PL_lex_inpat)
2416 OPERATOR(',');
2417 else
2418 Aop(OP_CONCAT);
79072805
LW
2419 }
2420 else
cea2e8a9 2421 return yylex();
79072805
LW
2422 }
2423
55497cff 2424 case LEX_INTERPPUSH:
bbf60fe6 2425 return REPORT(sublex_push());
55497cff 2426
79072805 2427 case LEX_INTERPSTART:
3280af22 2428 if (PL_bufptr == PL_bufend)
bbf60fe6 2429 return REPORT(sublex_done());
607df283 2430 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2431 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2432 PL_expect = XTERM;
2433 PL_lex_dojoin = (*PL_bufptr == '@');
2434 PL_lex_state = LEX_INTERPNORMAL;
2435 if (PL_lex_dojoin) {
2436 PL_nextval[PL_nexttoke].ival = 0;
79072805 2437 force_next(',');
a0d0e21e 2438 force_ident("\"", '$');
3280af22 2439 PL_nextval[PL_nexttoke].ival = 0;
79072805 2440 force_next('$');
3280af22 2441 PL_nextval[PL_nexttoke].ival = 0;
79072805 2442 force_next('(');
3280af22 2443 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2444 force_next(FUNC);
2445 }
3280af22
NIS
2446 if (PL_lex_starts++) {
2447 s = PL_bufptr;
131b3ad0
DM
2448 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2449 if (!PL_lex_casemods && PL_lex_inpat)
2450 OPERATOR(',');
2451 else
2452 Aop(OP_CONCAT);
79072805 2453 }
cea2e8a9 2454 return yylex();
79072805
LW
2455
2456 case LEX_INTERPENDMAYBE:
3280af22
NIS
2457 if (intuit_more(PL_bufptr)) {
2458 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2459 break;
2460 }
2461 /* FALL THROUGH */
2462
2463 case LEX_INTERPEND:
3280af22
NIS
2464 if (PL_lex_dojoin) {
2465 PL_lex_dojoin = FALSE;
2466 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2467 return REPORT(')');
79072805 2468 }
43a16006 2469 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2470 && SvEVALED(PL_lex_repl))
43a16006 2471 {
e9fa98b2 2472 if (PL_bufptr != PL_bufend)
cea2e8a9 2473 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2474 PL_lex_repl = Nullsv;
2475 }
79072805
LW
2476 /* FALLTHROUGH */
2477 case LEX_INTERPCONCAT:
2478#ifdef DEBUGGING
3280af22 2479 if (PL_lex_brackets)
cea2e8a9 2480 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2481#endif
3280af22 2482 if (PL_bufptr == PL_bufend)
bbf60fe6 2483 return REPORT(sublex_done());
79072805 2484
3280af22
NIS
2485 if (SvIVX(PL_linestr) == '\'') {
2486 SV *sv = newSVsv(PL_linestr);
2487 if (!PL_lex_inpat)
76e3520e 2488 sv = tokeq(sv);
3280af22 2489 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2490 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2491 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2492 s = PL_bufend;
79072805
LW
2493 }
2494 else {
3280af22 2495 s = scan_const(PL_bufptr);
79072805 2496 if (*s == '\\')
3280af22 2497 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2498 else
3280af22 2499 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2500 }
2501
3280af22
NIS
2502 if (s != PL_bufptr) {
2503 PL_nextval[PL_nexttoke] = yylval;
2504 PL_expect = XTERM;
79072805 2505 force_next(THING);
131b3ad0
DM
2506 if (PL_lex_starts++) {
2507 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2508 if (!PL_lex_casemods && PL_lex_inpat)
2509 OPERATOR(',');
2510 else
2511 Aop(OP_CONCAT);
2512 }
79072805 2513 else {
3280af22 2514 PL_bufptr = s;
cea2e8a9 2515 return yylex();
79072805
LW
2516 }
2517 }
2518
cea2e8a9 2519 return yylex();
a0d0e21e 2520 case LEX_FORMLINE:
3280af22
NIS
2521 PL_lex_state = LEX_NORMAL;
2522 s = scan_formline(PL_bufptr);
2523 if (!PL_lex_formbrack)
a0d0e21e
LW
2524 goto rightbracket;
2525 OPERATOR(';');
79072805
LW
2526 }
2527
3280af22
NIS
2528 s = PL_bufptr;
2529 PL_oldoldbufptr = PL_oldbufptr;
2530 PL_oldbufptr = s;
607df283 2531 DEBUG_T( {
bbf60fe6 2532 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
bf49b057 2533 exp_name[PL_expect], s);
5f80b19c 2534 } );
463ee0b2
LW
2535
2536 retry:
378cc40b
LW
2537 switch (*s) {
2538 default:
7e2040f0 2539 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2540 goto keylookup;
cea2e8a9 2541 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2542 case 4:
2543 case 26:
2544 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2545 case 0:
3280af22
NIS
2546 if (!PL_rsfp) {
2547 PL_last_uni = 0;
2548 PL_last_lop = 0;
c5ee2135
WL
2549 if (PL_lex_brackets) {
2550 if (PL_lex_formbrack)
2551 yyerror("Format not terminated");
2552 else
2553 yyerror("Missing right curly or square bracket");
2554 }
4e553d73 2555 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2556 "### Tokener got EOF\n");
5f80b19c 2557 } );
79072805 2558 TOKEN(0);
463ee0b2 2559 }
3280af22 2560 if (s++ < PL_bufend)
a687059c 2561 goto retry; /* ignore stray nulls */
3280af22
NIS
2562 PL_last_uni = 0;
2563 PL_last_lop = 0;
2564 if (!PL_in_eval && !PL_preambled) {
2565 PL_preambled = TRUE;
2566 sv_setpv(PL_linestr,incl_perldb());
2567 if (SvCUR(PL_linestr))
2568 sv_catpv(PL_linestr,";");
2569 if (PL_preambleav){
2570 while(AvFILLp(PL_preambleav) >= 0) {
2571 SV *tmpsv = av_shift(PL_preambleav);
2572 sv_catsv(PL_linestr, tmpsv);
2573 sv_catpv(PL_linestr, ";");
91b7def8 2574 sv_free(tmpsv);
2575 }
3280af22
NIS
2576 sv_free((SV*)PL_preambleav);
2577 PL_preambleav = NULL;
91b7def8 2578 }
3280af22
NIS
2579 if (PL_minus_n || PL_minus_p) {
2580 sv_catpv(PL_linestr, "LINE: while (<>) {");
2581 if (PL_minus_l)
2582 sv_catpv(PL_linestr,"chomp;");
2583 if (PL_minus_a) {
3280af22
NIS
2584 if (PL_minus_F) {
2585 if (strchr("/'\"", *PL_splitstr)
2586 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2587 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2588 else {
2589 char delim;
2590 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2591 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2592 delim = *s;
75c72d73 2593 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2594 "q" + (delim == '\''), delim);
3280af22 2595 for (s = PL_splitstr; *s; s++) {
54310121 2596 if (*s == '\\')
3280af22
NIS
2597 sv_catpvn(PL_linestr, "\\", 1);
2598 sv_catpvn(PL_linestr, s, 1);
54310121 2599 }
cea2e8a9 2600 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2601 }
2304df62
AD
2602 }
2603 else
75c72d73 2604 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2605 }
79072805 2606 }
3280af22
NIS
2607 sv_catpv(PL_linestr, "\n");
2608 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2609 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2610 PL_last_lop = PL_last_uni = Nullch;
3280af22 2611 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2612 SV *sv = NEWSV(85,0);
2613
2614 sv_upgrade(sv, SVt_PVMG);
3280af22 2615 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2616 (void)SvIOK_on(sv);
2617 SvIVX(sv) = 0;
57843af0 2618 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2619 }
79072805 2620 goto retry;
a687059c 2621 }
e929a76b 2622 do {
aa7440fb 2623 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2624 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2625 fake_eof:
2626 if (PL_rsfp) {
2627 if (PL_preprocess && !PL_in_eval)
2628 (void)PerlProc_pclose(PL_rsfp);
2629 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2630 PerlIO_clearerr(PL_rsfp);
2631 else
2632 (void)PerlIO_close(PL_rsfp);
2633 PL_rsfp = Nullfp;
2634 PL_doextract = FALSE;
2635 }
2636 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2637 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2638 sv_catpv(PL_linestr,";}");
2639 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2640 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2641 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2642 PL_minus_n = PL_minus_p = 0;
2643 goto retry;
2644 }
2645 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2646 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2647 sv_setpv(PL_linestr,"");
2648 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2649 }
7aa207d6
JH
2650 /* If it looks like the start of a BOM or raw UTF-16,
2651 * check if it in fact is. */
2652 else if (bof &&
2653 (*s == 0 ||
2654 *(U8*)s == 0xEF ||
2655 *(U8*)s >= 0xFE ||
2656 s[1] == 0)) {
226017aa 2657#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2658# ifdef __GNU_LIBRARY__
2659# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2660# define FTELL_FOR_PIPE_IS_BROKEN
2661# endif
e3f494f1
JH
2662# else
2663# ifdef __GLIBC__
2664# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2665# define FTELL_FOR_PIPE_IS_BROKEN
2666# endif
2667# endif
226017aa
DD
2668# endif
2669#endif
2670#ifdef FTELL_FOR_PIPE_IS_BROKEN
2671 /* This loses the possibility to detect the bof
2672 * situation on perl -P when the libc5 is being used.
2673 * Workaround? Maybe attach some extra state to PL_rsfp?
2674 */
2675 if (!PL_preprocess)
7e28d3af 2676 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2677#else
eb160463 2678 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2679#endif
7e28d3af 2680 if (bof) {
3280af22 2681 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2682 s = swallow_bom((U8*)s);
e929a76b 2683 }
378cc40b 2684 }
3280af22 2685 if (PL_doextract) {
a0d0e21e
LW
2686 /* Incest with pod. */
2687 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2688 sv_setpv(PL_linestr, "");
2689 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2690 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2691 PL_last_lop = PL_last_uni = Nullch;
3280af22 2692 PL_doextract = FALSE;
a0d0e21e 2693 }
4e553d73 2694 }
463ee0b2 2695 incline(s);
3280af22
NIS
2696 } while (PL_doextract);
2697 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2698 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2699 SV *sv = NEWSV(85,0);
a687059c 2700
93a17b20 2701 sv_upgrade(sv, SVt_PVMG);
3280af22 2702 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2703 (void)SvIOK_on(sv);
2704 SvIVX(sv) = 0;
57843af0 2705 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2706 }
3280af22 2707 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2708 PL_last_lop = PL_last_uni = Nullch;
57843af0 2709 if (CopLINE(PL_curcop) == 1) {
3280af22 2710 while (s < PL_bufend && isSPACE(*s))
79072805 2711 s++;
a0d0e21e 2712 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2713 s++;
44a8e56a 2714 d = Nullch;
3280af22 2715 if (!PL_in_eval) {
44a8e56a 2716 if (*s == '#' && *(s+1) == '!')
2717 d = s + 2;
2718#ifdef ALTERNATE_SHEBANG
2719 else {
2720 static char as[] = ALTERNATE_SHEBANG;
2721 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2722 d = s + (sizeof(as) - 1);
2723 }
2724#endif /* ALTERNATE_SHEBANG */
2725 }
2726 if (d) {
b8378b72 2727 char *ipath;
774d564b 2728 char *ipathend;
b8378b72 2729
774d564b 2730 while (isSPACE(*d))
b8378b72
CS
2731 d++;
2732 ipath = d;
774d564b 2733 while (*d && !isSPACE(*d))
2734 d++;
2735 ipathend = d;
2736
2737#ifdef ARG_ZERO_IS_SCRIPT
2738 if (ipathend > ipath) {
2739 /*
2740 * HP-UX (at least) sets argv[0] to the script name,
2741 * which makes $^X incorrect. And Digital UNIX and Linux,
2742 * at least, set argv[0] to the basename of the Perl
2743 * interpreter. So, having found "#!", we'll set it right.
2744 */
ee2f7564 2745 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2746 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2747 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2748 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2749 SvSETMAGIC(x);
2750 }
556c1dec
JH
2751 else {
2752 STRLEN blen;
2753 STRLEN llen;
2754 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2755 char *lstart = SvPV(x,llen);
2756 if (llen < blen) {
2757 bstart += blen - llen;
2758 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2759 sv_setpvn(x, ipath, ipathend - ipath);
2760 SvSETMAGIC(x);
2761 }
2762 }
2763 }
774d564b 2764 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2765 }
774d564b 2766#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2767
2768 /*
2769 * Look for options.
2770 */
748a9306 2771 d = instr(s,"perl -");
84e30d1a 2772 if (!d) {
748a9306 2773 d = instr(s,"perl");
84e30d1a
GS
2774#if defined(DOSISH)
2775 /* avoid getting into infinite loops when shebang
2776 * line contains "Perl" rather than "perl" */
2777 if (!d) {
2778 for (d = ipathend-4; d >= ipath; --d) {
2779 if ((*d == 'p' || *d == 'P')
2780 && !ibcmp(d, "perl", 4))
2781 {
2782 break;
2783 }
2784 }
2785 if (d < ipath)
2786 d = Nullch;
2787 }
2788#endif
2789 }
44a8e56a 2790#ifdef ALTERNATE_SHEBANG
2791 /*
2792 * If the ALTERNATE_SHEBANG on this system starts with a
2793 * character that can be part of a Perl expression, then if
2794 * we see it but not "perl", we're probably looking at the
2795 * start of Perl code, not a request to hand off to some
2796 * other interpreter. Similarly, if "perl" is there, but
2797 * not in the first 'word' of the line, we assume the line
2798 * contains the start of the Perl program.
44a8e56a 2799 */
2800 if (d && *s != '#') {
774d564b 2801 char *c = ipath;
44a8e56a 2802 while (*c && !strchr("; \t\r\n\f\v#", *c))
2803 c++;
2804 if (c < d)
2805 d = Nullch; /* "perl" not in first word; ignore */
2806 else
2807 *s = '#'; /* Don't try to parse shebang line */
2808 }
774d564b 2809#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2810#ifndef MACOS_TRADITIONAL
748a9306 2811 if (!d &&
44a8e56a 2812 *s == '#' &&
774d564b 2813 ipathend > ipath &&
3280af22 2814 !PL_minus_c &&
748a9306 2815 !instr(s,"indir") &&
3280af22 2816 instr(PL_origargv[0],"perl"))
748a9306 2817 {
9f68db38 2818 char **newargv;
9f68db38 2819
774d564b 2820 *ipathend = '\0';
2821 s = ipathend + 1;
3280af22 2822 while (s < PL_bufend && isSPACE(*s))
9f68db38 2823 s++;
3280af22
NIS
2824 if (s < PL_bufend) {
2825 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2826 newargv[1] = s;
3280af22 2827 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2828 s++;
2829 *s = '\0';
3280af22 2830 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2831 }
2832 else
3280af22 2833 newargv = PL_origargv;
774d564b 2834 newargv[0] = ipath;
b35112e7 2835 PERL_FPU_PRE_EXEC
b4748376 2836 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2837 PERL_FPU_POST_EXEC
cea2e8a9 2838 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2839 }
bf4acbe4 2840#endif
748a9306 2841 if (d) {
3280af22
NIS
2842 U32 oldpdb = PL_perldb;
2843 bool oldn = PL_minus_n;
2844 bool oldp = PL_minus_p;
748a9306
LW
2845
2846 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2847 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2848
2849 if (*d++ == '-') {
a11ec5a9 2850 bool switches_done = PL_doswitches;
8cc95fdb 2851 do {
2852 if (*d == 'M' || *d == 'm') {
2853 char *m = d;
2854 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2855 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2856 (int)(d - m), m);
2857 }
2858 d = moreswitches(d);
2859 } while (d);
f0b2cf55
YST
2860 if (PL_doswitches && !switches_done) {
2861 int argc = PL_origargc;
2862 char **argv = PL_origargv;
2863 do {
2864 argc--,argv++;
2865 } while (argc && argv[0][0] == '-' && argv[0][1]);
2866 init_argv_symbols(argc,argv);
2867 }
155aba94
GS
2868 if ((PERLDB_LINE && !oldpdb) ||
2869 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2870 /* if we have already added "LINE: while (<>) {",
2871 we must not do it again */
748a9306 2872 {
3280af22
NIS
2873 sv_setpv(PL_linestr, "");
2874 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2875 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2876 PL_last_lop = PL_last_uni = Nullch;
3280af22 2877 PL_preambled = FALSE;
84902520 2878 if (PERLDB_LINE)
3280af22 2879 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2880 goto retry;
2881 }
a11ec5a9
RGS
2882 if (PL_doswitches && !switches_done) {
2883 int argc = PL_origargc;
2884 char **argv = PL_origargv;
2885 do {
2886 argc--,argv++;
2887 } while (argc && argv[0][0] == '-' && argv[0][1]);
2888 init_argv_symbols(argc,argv);
2889 }
a0d0e21e 2890 }
79072805 2891 }
9f68db38 2892 }
79072805 2893 }
3280af22
NIS
2894 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2895 PL_bufptr = s;
2896 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2897 return yylex();
ae986130 2898 }
378cc40b 2899 goto retry;
4fdae800 2900 case '\r':
6a27c188 2901#ifdef PERL_STRICT_CR
cea2e8a9 2902 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2903 Perl_croak(aTHX_
cc507455 2904 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2905#endif
4fdae800 2906 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2907#ifdef MACOS_TRADITIONAL
2908 case '\312':
2909#endif
378cc40b
LW
2910 s++;
2911 goto retry;
378cc40b 2912 case '#':
e929a76b 2913 case '\n':
3280af22 2914 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2915 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2916 /* handle eval qq[#line 1 "foo"\n ...] */
2917 CopLINE_dec(PL_curcop);
2918 incline(s);
2919 }
3280af22 2920 d = PL_bufend;
a687059c 2921 while (s < d && *s != '\n')
378cc40b 2922 s++;
0f85fab0 2923 if (s < d)
378cc40b 2924 s++;
78c267c1 2925 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2926 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2927 incline(s);
3280af22
NIS
2928 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2929 PL_bufptr = s;
2930 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2931 return yylex();
a687059c 2932 }
378cc40b 2933 }
a687059c 2934 else {
378cc40b 2935 *s = '\0';
3280af22 2936 PL_bufend = s;
a687059c 2937 }
378cc40b
LW
2938 goto retry;
2939 case '-':
79072805 2940 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2941 I32 ftst = 0;
2942
378cc40b 2943 s++;
3280af22 2944 PL_bufptr = s;
748a9306
LW
2945 tmp = *s++;
2946
bf4acbe4 2947 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2948 s++;
2949
2950 if (strnEQ(s,"=>",2)) {
3280af22 2951 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2952 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2953 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2954 } );
748a9306
LW
2955 OPERATOR('-'); /* unary minus */
2956 }
3280af22 2957 PL_last_uni = PL_oldbufptr;
748a9306 2958 switch (tmp) {
e5edeb50
JH
2959 case 'r': ftst = OP_FTEREAD; break;
2960 case 'w': ftst = OP_FTEWRITE; break;
2961 case 'x': ftst = OP_FTEEXEC; break;
2962 case 'o': ftst = OP_FTEOWNED; break;
2963 case 'R': ftst = OP_FTRREAD; break;
2964 case 'W': ftst = OP_FTRWRITE; break;
2965 case 'X': ftst = OP_FTREXEC; break;
2966 case 'O': ftst = OP_FTROWNED; break;
2967 case 'e': ftst = OP_FTIS; break;
2968 case 'z': ftst = OP_FTZERO; break;
2969 case 's': ftst = OP_FTSIZE; break;
2970 case 'f': ftst = OP_FTFILE; break;
2971 case 'd': ftst = OP_FTDIR; break;
2972 case 'l': ftst = OP_FTLINK; break;
2973 case 'p': ftst = OP_FTPIPE; break;
2974 case 'S': ftst = OP_FTSOCK; break;
2975 case 'u': ftst = OP_FTSUID; break;
2976 case 'g': ftst = OP_FTSGID; break;
2977 case 'k': ftst = OP_FTSVTX; break;
2978 case 'b': ftst = OP_FTBLK; break;
2979 case 'c': ftst = OP_FTCHR; break;
2980 case 't': ftst = OP_FTTTY; break;
2981 case 'T': ftst = OP_FTTEXT; break;
2982 case 'B': ftst = OP_FTBINARY; break;
2983 case 'M': case 'A': case 'C':
2984 gv_fetchpv("\024",TRUE, SVt_PV);
2985 switch (tmp) {
2986 case 'M': ftst = OP_FTMTIME; break;
2987 case 'A': ftst = OP_FTATIME; break;
2988 case 'C': ftst = OP_FTCTIME; break;
2989 default: break;
2990 }
2991 break;
378cc40b 2992 default:
378cc40b
LW
2993 break;
2994 }
e5edeb50 2995 if (ftst) {
eb160463 2996 PL_last_lop_op = (OPCODE)ftst;
4e553d73 2997 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 2998 "### Saw file test %c\n", (int)ftst);
5f80b19c 2999 } );
e5edeb50
JH
3000 FTST(ftst);
3001 }
3002 else {
3003 /* Assume it was a minus followed by a one-letter named
3004 * subroutine call (or a -bareword), then. */
95c31fe3 3005 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0
RGS
3006 "### '-%c' looked like a file test but was not\n",
3007 tmp);
5f80b19c 3008 } );
3cf7b4c4 3009 s = --PL_bufptr;
e5edeb50 3010 }
378cc40b 3011 }
a687059c
LW
3012 tmp = *s++;
3013 if (*s == tmp) {
3014 s++;
3280af22 3015 if (PL_expect == XOPERATOR)
79072805
LW
3016 TERM(POSTDEC);
3017 else
3018 OPERATOR(PREDEC);
3019 }
3020 else if (*s == '>') {
3021 s++;
3022 s = skipspace(s);
7e2040f0 3023 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3024 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3025 TOKEN(ARROW);
79072805 3026 }
748a9306
LW
3027 else if (*s == '$')
3028 OPERATOR(ARROW);
463ee0b2 3029 else
748a9306 3030 TERM(ARROW);
a687059c 3031 }
3280af22 3032 if (PL_expect == XOPERATOR)
79072805
LW
3033 Aop(OP_SUBTRACT);
3034 else {
3280af22 3035 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3036 check_uni();
79072805 3037 OPERATOR('-'); /* unary minus */
2f3197b3 3038 }
79072805 3039
378cc40b 3040 case '+':
a687059c
LW
3041 tmp = *s++;
3042 if (*s == tmp) {
378cc40b 3043 s++;
3280af22 3044 if (PL_expect == XOPERATOR)
79072805
LW
3045 TERM(POSTINC);
3046 else
3047 OPERATOR(PREINC);
378cc40b 3048 }
3280af22 3049 if (PL_expect == XOPERATOR)
79072805
LW
3050 Aop(OP_ADD);
3051 else {
3280af22 3052 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3053 check_uni();
a687059c 3054 OPERATOR('+');
2f3197b3 3055 }
a687059c 3056
378cc40b 3057 case '*':
3280af22
NIS
3058 if (PL_expect != XOPERATOR) {
3059 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3060 PL_expect = XOPERATOR;
3061 force_ident(PL_tokenbuf, '*');
3062 if (!*PL_tokenbuf)
a0d0e21e 3063 PREREF('*');
79072805 3064 TERM('*');
a687059c 3065 }
79072805
LW
3066 s++;
3067 if (*s == '*') {
a687059c 3068 s++;
79072805 3069 PWop(OP_POW);
a687059c 3070 }
79072805
LW
3071 Mop(OP_MULTIPLY);
3072
378cc40b 3073 case '%':
3280af22 3074 if (PL_expect == XOPERATOR) {
bbce6d69 3075 ++s;
3076 Mop(OP_MODULO);
a687059c 3077 }
3280af22
NIS
3078 PL_tokenbuf[0] = '%';
3079 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3080 if (!PL_tokenbuf[1]) {
bbce6d69 3081 PREREF('%');
a687059c 3082 }
3280af22 3083 PL_pending_ident = '%';
bbce6d69 3084 TERM('%');
a687059c 3085
378cc40b 3086 case '^':
79072805 3087 s++;
a0d0e21e 3088 BOop(OP_BIT_XOR);
79072805 3089 case '[':
3280af22 3090 PL_lex_brackets++;
79072805 3091 /* FALL THROUGH */
378cc40b 3092 case '~':
378cc40b 3093 case ',':
378cc40b
LW
3094 tmp = *s++;
3095 OPERATOR(tmp);
a0d0e21e
LW
3096 case ':':
3097 if (s[1] == ':') {
3098 len = 0;
3099 goto just_a_word;
3100 }
3101 s++;
09bef843
SB
3102 switch (PL_expect) {
3103 OP *attrs;
3104 case XOPERATOR:
3105 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3106 break;
3107 PL_bufptr = s; /* update in case we back off */
3108 goto grabattrs;
3109 case XATTRBLOCK:
3110 PL_expect = XBLOCK;
3111 goto grabattrs;
3112 case XATTRTERM:
3113 PL_expect = XTERMBLOCK;
3114 grabattrs:
3115 s = skipspace(s);
3116 attrs = Nullop;
7e2040f0 3117 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3118 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3119 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3120 if (tmp < 0) tmp = -tmp;
3121 switch (tmp) {
3122 case KEY_or:
3123 case KEY_and:
c963b151 3124 case KEY_err:
f9829d6b
GS
3125 case KEY_for:
3126 case KEY_unless:
3127 case KEY_if:
3128 case KEY_while:
3129 case KEY_until:
3130 goto got_attrs;
3131 default:
3132 break;
3133 }
3134 }
09bef843
SB
3135 if (*d == '(') {
3136 d = scan_str(d,TRUE,TRUE);
3137 if (!d) {
09bef843
SB
3138 /* MUST advance bufptr here to avoid bogus
3139 "at end of line" context messages from yyerror().
3140 */
3141 PL_bufptr = s + len;
3142 yyerror("Unterminated attribute parameter in attribute list");
3143 if (attrs)
3144 op_free(attrs);
bbf60fe6 3145 return REPORT(0); /* EOF indicator */
09bef843
SB
3146 }
3147 }
3148 if (PL_lex_stuff) {
3149 SV *sv = newSVpvn(s, len);
3150 sv_catsv(sv, PL_lex_stuff);
3151 attrs = append_elem(OP_LIST, attrs,
3152 newSVOP(OP_CONST, 0, sv));
3153 SvREFCNT_dec(PL_lex_stuff);
3154 PL_lex_stuff = Nullsv;
3155 }
3156 else {
371fce9b
DM
3157 if (len == 6 && strnEQ(s, "unique", len)) {
3158 if (PL_in_my == KEY_our)
3159#ifdef USE_ITHREADS
3160 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3161#else
3162 ; /* skip to avoid loading attributes.pm */
3163#endif
3164 else
3165 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3166 }
3167
d3cea301
SB
3168 /* NOTE: any CV attrs applied here need to be part of
3169 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3170 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3171 CvLVALUE_on(PL_compcv);
3172 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3173 CvLOCKED_on(PL_compcv);
3174 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3175 CvMETHOD_on(PL_compcv);
06492da6
SF
3176 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3177 CvASSERTION_on(PL_compcv);
78f9721b
SM
3178 /* After we've set the flags, it could be argued that
3179 we don't need to do the attributes.pm-based setting
3180 process, and shouldn't bother appending recognized
d3cea301
SB
3181 flags. To experiment with that, uncomment the
3182 following "else". (Note that's already been
3183 uncommented. That keeps the above-applied built-in
3184 attributes from being intercepted (and possibly
3185 rejected) by a package's attribute routines, but is
3186 justified by the performance win for the common case
3187 of applying only built-in attributes.) */
0256094b 3188 else
78f9721b
SM
3189 attrs = append_elem(OP_LIST, attrs,
3190 newSVOP(OP_CONST, 0,
3191 newSVpvn(s, len)));
09bef843
SB
3192 }
3193 s = skipspace(d);
0120eecf 3194 if (*s == ':' && s[1] != ':')
09bef843 3195 s = skipspace(s+1);
0120eecf
GS
3196 else if (s == d)
3197 break; /* require real whitespace or :'s */
09bef843 3198 }
f9829d6b 3199 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3200 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3201 char q = ((*s == '\'') ? '"' : '\'');
3202 /* If here for an expression, and parsed no attrs, back off. */
3203 if (tmp == '=' && !attrs) {
3204 s = PL_bufptr;
3205 break;
3206 }
3207 /* MUST advance bufptr here to avoid bogus "at end of line"
3208 context messages from yyerror().
3209 */
3210 PL_bufptr = s;
3211 if (!*s)
3212 yyerror("Unterminated attribute list");
3213 else
3214 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3215 q, *s, q));
3216 if (attrs)
3217 op_free(attrs);
3218 OPERATOR(':');
3219 }
f9829d6b 3220 got_attrs:
09bef843
SB
3221 if (attrs) {
3222 PL_nextval[PL_nexttoke].opval = attrs;
3223 force_next(THING);
3224 }
3225 TOKEN(COLONATTR);
3226 }
a0d0e21e 3227 OPERATOR(':');
8990e307
LW
3228 case '(':
3229 s++;
3280af22
NIS
3230 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3231 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3232 else
3280af22 3233 PL_expect = XTERM;
4a202259 3234 s = skipspace(s);
a0d0e21e 3235 TOKEN('(');
378cc40b 3236 case ';':
f4dd75d9 3237 CLINE;
378cc40b
LW
3238 tmp = *s++;
3239 OPERATOR(tmp);
3240 case ')':
378cc40b 3241 tmp = *s++;
16d20bd9
AD
3242 s = skipspace(s);
3243 if (*s == '{')
3244 PREBLOCK(tmp);
378cc40b 3245 TERM(tmp);
79072805
LW
3246 case ']':
3247 s++;
3280af22 3248 if (PL_lex_brackets <= 0)
d98d5fff 3249 yyerror("Unmatched right square bracket");
463ee0b2 3250 else
3280af22
NIS
3251 --PL_lex_brackets;
3252 if (PL_lex_state == LEX_INTERPNORMAL) {
3253 if (PL_lex_brackets == 0) {
a0d0e21e 3254 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3255 PL_lex_state = LEX_INTERPEND;
79072805
LW
3256 }
3257 }
4633a7c4 3258 TERM(']');
79072805
LW
3259 case '{':
3260 leftbracket:
79072805 3261 s++;
3280af22 3262 if (PL_lex_brackets > 100) {
8edd5f42 3263 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3264 }
3280af22 3265 switch (PL_expect) {
a0d0e21e 3266 case XTERM:
3280af22 3267 if (PL_lex_formbrack) {
a0d0e21e
LW
3268 s--;
3269 PRETERMBLOCK(DO);
3270 }
3280af22
NIS
3271 if (PL_oldoldbufptr == PL_last_lop)
3272 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3273 else
3280af22 3274 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3275 OPERATOR(HASHBRACK);
a0d0e21e 3276 case XOPERATOR:
bf4acbe4 3277 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3278 s++;
44a8e56a 3279 d = s;
3280af22
NIS
3280 PL_tokenbuf[0] = '\0';
3281 if (d < PL_bufend && *d == '-') {
3282 PL_tokenbuf[0] = '-';
44a8e56a 3283 d++;
bf4acbe4 3284 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3285 d++;
3286 }
7e2040f0 3287 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3288 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3289 FALSE, &len);
bf4acbe4 3290 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3291 d++;
3292 if (*d == '}') {
3280af22 3293 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3294 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3295 if (minus)
3296 force_next('-');
748a9306
LW
3297 }
3298 }
3299 /* FALL THROUGH */
09bef843 3300 case XATTRBLOCK:
748a9306 3301 case XBLOCK:
3280af22
NIS
3302 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3303 PL_expect = XSTATE;
a0d0e21e 3304 break;
09bef843 3305 case XATTRTERM:
a0d0e21e 3306 case XTERMBLOCK:
3280af22
NIS
3307 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3308 PL_expect = XSTATE;
a0d0e21e
LW
3309 break;
3310 default: {
3311 char *t;
3280af22
NIS
3312 if (PL_oldoldbufptr == PL_last_lop)
3313 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3314 else
3280af22 3315 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3316 s = skipspace(s);
8452ff4b
SB
3317 if (*s == '}') {
3318 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3319 PL_expect = XTERM;
3320 /* This hack is to get the ${} in the message. */
3321 PL_bufptr = s+1;
3322 yyerror("syntax error");
3323 break;
3324 }
a0d0e21e 3325 OPERATOR(HASHBRACK);
8452ff4b 3326 }
b8a4b1be
GS
3327 /* This hack serves to disambiguate a pair of curlies
3328 * as being a block or an anon hash. Normally, expectation
3329 * determines that, but in cases where we're not in a
3330 * position to expect anything in particular (like inside
3331 * eval"") we have to resolve the ambiguity. This code
3332 * covers the case where the first term in the curlies is a
3333 * quoted string. Most other cases need to be explicitly
3334 * disambiguated by prepending a `+' before the opening
3335 * curly in order to force resolution as an anon hash.
3336 *
3337 * XXX should probably propagate the outer expectation
3338 * into eval"" to rely less on this hack, but that could
3339 * potentially break current behavior of eval"".
3340 * GSAR 97-07-21
3341 */
3342 t = s;
3343 if (*s == '\'' || *s == '"' || *s == '`') {
3344 /* common case: get past first string, handling escapes */
3280af22 3345 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3346 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3347 t++;
3348 t++;
a0d0e21e 3349 }
b8a4b1be 3350 else if (*s == 'q') {
3280af22 3351 if (++t < PL_bufend
b8a4b1be 3352 && (!isALNUM(*t)
3280af22 3353 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3354 && !isALNUM(*t))))
3355 {
abc667d1 3356 /* skip q//-like construct */
b8a4b1be
GS
3357 char *tmps;
3358 char open, close, term;
3359 I32 brackets = 1;
3360
3280af22 3361 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3362 t++;
abc667d1
DM
3363 /* check for q => */
3364 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3365 OPERATOR(HASHBRACK);
3366 }
b8a4b1be
GS
3367 term = *t;
3368 open = term;
3369 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3370 term = tmps[5];
3371 close = term;
3372 if (open == close)
3280af22
NIS
3373 for (t++; t < PL_bufend; t++) {
3374 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3375 t++;
6d07e5e9 3376 else if (*t == open)
b8a4b1be
GS
3377 break;
3378 }
abc667d1 3379 else {
3280af22
NIS
3380 for (t++; t < PL_bufend; t++) {
3381 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3382 t++;
6d07e5e9 3383 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3384 break;
3385 else if (*t == open)
3386 brackets++;
3387 }
abc667d1
DM
3388 }
3389 t++;
b8a4b1be 3390 }
abc667d1
DM
3391 else
3392 /* skip plain q word */
3393 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3394 t += UTF8SKIP(t);
a0d0e21e 3395 }
7e2040f0 3396 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3397 t += UTF8SKIP(t);
7e2040f0 3398 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3399 t += UTF8SKIP(t);
a0d0e21e 3400 }
3280af22 3401 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3402 t++;
b8a4b1be
GS
3403 /* if comma follows first term, call it an anon hash */
3404 /* XXX it could be a comma expression with loop modifiers */
3280af22 3405 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3406 || (*t == '=' && t[1] == '>')))
a0d0e21e 3407 OPERATOR(HASHBRACK);
3280af22 3408 if (PL_expect == XREF)
4e4e412b 3409 PL_expect = XTERM;
a0d0e21e 3410 else {
3280af22
NIS
3411 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3412 PL_expect = XSTATE;
a0d0e21e 3413 }
8990e307 3414 }
a0d0e21e 3415 break;
463ee0b2 3416 }
57843af0 3417 yylval.ival = CopLINE(PL_curcop);
79072805 3418 if (isSPACE(*s) || *s == '#')
3280af22 3419 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3420 TOKEN('{');
378cc40b 3421 case '}':
79072805
LW
3422 rightbracket:
3423 s++;
3280af22 3424 if (PL_lex_brackets <= 0)
d98d5fff 3425 yyerror("Unmatched right curly bracket");
463ee0b2 3426 else
3280af22 3427 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3428 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3429 PL_lex_formbrack = 0;
3430 if (PL_lex_state == LEX_INTERPNORMAL) {
3431 if (PL_lex_brackets == 0) {
9059aa12
LW
3432 if (PL_expect & XFAKEBRACK) {
3433 PL_expect &= XENUMMASK;
3280af22
NIS
3434 PL_lex_state = LEX_INTERPEND;
3435 PL_bufptr = s;
cea2e8a9 3436 return yylex(); /* ignore fake brackets */
79072805 3437 }
fa83b5b6 3438 if (*s == '-' && s[1] == '>')
3280af22 3439 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3440 else if (*s != '[' && *s != '{')
3280af22 3441 PL_lex_state = LEX_INTERPEND;
79072805
LW
3442 }
3443 }
9059aa12
LW
3444 if (PL_expect & XFAKEBRACK) {
3445 PL_expect &= XENUMMASK;
3280af22 3446 PL_bufptr = s;
cea2e8a9 3447 return yylex(); /* ignore fake brackets */
748a9306 3448 }
79072805
LW
3449 force_next('}');
3450 TOKEN(';');
378cc40b
LW
3451 case '&':
3452 s++;
3453 tmp = *s++;
3454 if (tmp == '&')
a0d0e21e 3455 AOPERATOR(ANDAND);
378cc40b 3456 s--;
3280af22 3457 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3458 if (ckWARN(WARN_SEMICOLON)
3459 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3460 {
57843af0 3461 CopLINE_dec(PL_curcop);
9014280d 3462 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3463 CopLINE_inc(PL_curcop);
463ee0b2 3464 }
79072805 3465 BAop(OP_BIT_AND);
463ee0b2 3466 }
79072805 3467
3280af22
NIS
3468 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3469 if (*PL_tokenbuf) {
3470 PL_expect = XOPERATOR;
3471 force_ident(PL_tokenbuf, '&');
463ee0b2 3472 }
79072805
LW
3473 else
3474 PREREF('&');
c07a80fd 3475 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3476 TERM('&');
3477
378cc40b
LW
3478 case '|':
3479 s++;
3480 tmp = *s++;
3481 if (tmp == '|')
a0d0e21e 3482 AOPERATOR(OROR);
378cc40b 3483 s--;
79072805 3484 BOop(OP_BIT_OR);
378cc40b
LW
3485 case '=':
3486 s++;
3487 tmp = *s++;
3488 if (tmp == '=')
79072805
LW
3489 Eop(OP_EQ);
3490 if (tmp == '>')
3491 OPERATOR(',');
378cc40b 3492 if (tmp == '~')
79072805 3493 PMop(OP_MATCH);
599cee73 3494 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3495 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3496 s--;
3280af22
NIS
3497 if (PL_expect == XSTATE && isALPHA(tmp) &&
3498 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3499 {
3280af22
NIS
3500 if (PL_in_eval && !PL_rsfp) {
3501 d = PL_bufend;
a5f75d66
AD
3502 while (s < d) {
3503 if (*s++ == '\n') {
3504 incline(s);
3505 if (strnEQ(s,"=cut",4)) {
3506 s = strchr(s,'\n');
3507 if (s)
3508 s++;
3509 else
3510 s = d;
3511 incline(s);
3512 goto retry;
3513 }
3514 }
3515 }
3516 goto retry;
3517 }
3280af22
NIS
3518 s = PL_bufend;
3519 PL_doextract = TRUE;
a0d0e21e
LW
3520 goto retry;
3521 }
3280af22 3522 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3523 char *t;
51882d45 3524#ifdef PERL_STRICT_CR
bf4acbe4 3525 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3526#else
bf4acbe4 3527 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3528#endif
a0d0e21e
LW
3529 if (*t == '\n' || *t == '#') {
3530 s--;
3280af22 3531 PL_expect = XBLOCK;
a0d0e21e
LW
3532 goto leftbracket;
3533 }
79072805 3534 }
a0d0e21e
LW
3535 yylval.ival = 0;
3536 OPERATOR(ASSIGNOP);
378cc40b
LW
3537 case '!':
3538 s++;
3539 tmp = *s++;
984200d0 3540 if (tmp == '=') {
decca21c
YST
3541 /* was this !=~ where !~ was meant?
3542 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3543
984200d0
YST
3544 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3545 char *t = s+1;
3546
3547 while (t < PL_bufend && isSPACE(*t))
3548 ++t;
3549
decca21c
YST
3550 if (*t == '/' || *t == '?' ||
3551 ((*t == 'm' || *t == 's' || *t == 'y') && !isALNUM(t[1])) ||
3552 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
984200d0
YST
3553 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
3554 "!=~ should be !~");
3555 }
79072805 3556 Eop(OP_NE);
984200d0 3557 }
378cc40b 3558 if (tmp == '~')
79072805 3559 PMop(OP_NOT);
378cc40b
LW
3560 s--;
3561 OPERATOR('!');
3562 case '<':
3280af22 3563 if (PL_expect != XOPERATOR) {
93a17b20 3564 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 3565 check_uni();
79072805
LW
3566 if (s[1] == '<')
3567 s = scan_heredoc(s);
3568 else
3569 s = scan_inputsymbol(s);
3570 TERM(sublex_start());
378cc40b
LW
3571 }
3572 s++;
3573 tmp = *s++;
3574 if (tmp == '<')
79072805 3575 SHop(OP_LEFT_SHIFT);
395c3793
LW
3576 if (tmp == '=') {
3577 tmp = *s++;
3578 if (tmp == '>')
79072805 3579 Eop(OP_NCMP);
395c3793 3580 s--;
79072805 3581 Rop(OP_LE);
395c3793 3582 }
378cc40b 3583 s--;
79072805 3584 Rop(OP_LT);
378cc40b
LW
3585 case '>':
3586 s++;
3587 tmp = *s++;
3588 if (tmp == '>')
79072805 3589 SHop(OP_RIGHT_SHIFT);
378cc40b 3590 if (tmp == '=')
79072805 3591 Rop(OP_GE);
378cc40b 3592 s--;
79072805 3593 Rop(OP_GT);
378cc40b
LW
3594
3595 case '$':
bbce6d69 3596 CLINE;
3597
3280af22
NIS
3598 if (PL_expect == XOPERATOR) {
3599 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3600 PL_expect = XTERM;
a0d0e21e 3601 depcom();
bbf60fe6 3602 return REPORT(','); /* grandfather non-comma-format format */
a0d0e21e 3603 }
8990e307 3604 }
a0d0e21e 3605
7e2040f0 3606 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 3607 PL_tokenbuf[0] = '@';
376b8730
SM
3608 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
3609 sizeof PL_tokenbuf - 1, FALSE);
3610 if (PL_expect == XOPERATOR)
3611 no_op("Array length", s);
3280af22 3612 if (!PL_tokenbuf[1])
a0d0e21e 3613 PREREF(DOLSHARP);
3280af22
NIS
3614 PL_expect = XOPERATOR;
3615 PL_pending_ident = '#';
463ee0b2 3616 TOKEN(DOLSHARP);
79072805 3617 }
bbce6d69 3618
3280af22 3619 PL_tokenbuf[0] = '$';
376b8730
SM
3620 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
3621 sizeof PL_tokenbuf - 1, FALSE);
3622 if (PL_expect == XOPERATOR)
3623 no_op("Scalar", s);
3280af22
NIS
3624 if (!PL_tokenbuf[1]) {
3625 if (s == PL_bufend)
bbce6d69 3626 yyerror("Final $ should be \\$ or $name");
3627 PREREF('$');
8990e307 3628 }
a0d0e21e 3629
bbce6d69 3630 /* This kludge not intended to be bulletproof. */
3280af22 3631 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
bbce6d69 3632 yylval.opval = newSVOP(OP_CONST, 0,
b448e4fe 3633 newSViv(PL_compiling.cop_arybase));
bbce6d69 3634 yylval.opval->op_private = OPpCONST_ARYBASE;
3635 TERM(THING);
3636 }
3637
ff68c719 3638 d = s;
69d2bceb 3639 tmp = (I32)*s;
3280af22 3640 if (PL_lex_state == LEX_NORMAL)
ff68c719 3641 s = skipspace(s);
3642
3280af22 3643 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3644 char *t;
3645 if (*s == '[') {
3280af22 3646 PL_tokenbuf[0] = '@';
599cee73 3647 if (ckWARN(WARN_SYNTAX)) {
bbce6d69 3648 for(t = s + 1;
7e2040f0 3649 isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$';
bbce6d69 3650 t++) ;
a0d0e21e 3651 if (*t++ == ',') {
3280af22
NIS
3652 PL_bufptr = skipspace(PL_bufptr);
3653 while (t < PL_bufend && *t != ']')
bbce6d69 3654 t++;
9014280d 3655 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73
PM
3656 "Multidimensional syntax %.*s not supported",
3657 (t - PL_bufptr) + 1, PL_bufptr);
a0d0e21e
LW
3658 }
3659 }
bbce6d69 3660 }
3661 else if (*s == '{') {
3280af22 3662 PL_tokenbuf[0] = '%';
599cee73 3663 if (ckWARN(WARN_SYNTAX) && strEQ(PL_tokenbuf+1, "SIG") &&
bbce6d69 3664 (t = strchr(s, '}')) && (t = strchr(t, '=')))
3665 {
3280af22 3666 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3667 STRLEN len;
3668 for (t++; isSPACE(*t); t++) ;
7e2040f0 3669 if (isIDFIRST_lazy_if(t,UTF)) {
8903cb82 3670 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE, &len);
59a6d928 3671 for (; isSPACE(*t); t++) ;
864dbfa3 3672 if (*t == ';' && get_cv(tmpbuf, FALSE))
9014280d 3673 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 3674 "You need to quote \"%s\"", tmpbuf);
748a9306 3675 }
93a17b20
LW
3676 }
3677 }
2f3197b3 3678 }
bbce6d69 3679
3280af22 3680 PL_expect = XOPERATOR;
69d2bceb 3681 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
3280af22
NIS
3682 bool islop = (PL_last_lop == PL_oldoldbufptr);
3683 if (!islop || PL_last_lop_op == OP_GREPSTART)
3684 PL_expect = XOPERATOR;
bbce6d69 3685 else if (strchr("$@\"'`q", *s))
3280af22 3686 PL_expect = XTERM; /* e.g. print $fh "foo" */
7e2040f0 3687 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
3280af22 3688 PL_expect = XTERM; /* e.g. print $fh &sub */
7e2040f0 3689 else if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 3690 char tmpbuf[sizeof PL_tokenbuf];
8903cb82 3691 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
155aba94 3692 if ((tmp = keyword(tmpbuf, len))) {
84902520
TB
3693 /* binary operators exclude handle interpretations */
3694 switch (tmp) {
3695 case -KEY_x:
3696 case -KEY_eq:
3697 case -KEY_ne:
3698 case -KEY_gt:
3699 case -KEY_lt:
3700 case -KEY_ge:
3701 case -KEY_le:
3702 case -KEY_cmp:
3703 break;
3704 default:
3280af22 3705 PL_expect = XTERM; /* e.g. print $fh length() */
84902520
TB
3706 break;
3707 }
3708 }
68dc0745 3709 else {
8a8635f0 3710 PL_expect = XTERM; /* e.g. print $fh subr() */
93a17b20 3711 }
93a17b20 3712 }
bbce6d69 3713 else if (isDIGIT(*s))
3280af22 3714 PL_expect = XTERM; /* e.g. print $fh 3 */
bbce6d69 3715 else if (*s == '.' && isDIGIT(s[1]))
3280af22 3716 PL_expect = XTERM; /* e.g. print $fh .3 */
c963b151
BD
3717 else if (strchr("?-+", *s) && !isSPACE(s[1]) && s[1] != '=')
3718 PL_expect = XTERM; /* e.g. print $fh -1 */
7ce6e6b9
RGS
3719 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '=' && s[1] != '/')
3720 PL_expect = XTERM; /* e.g. print $fh /.../
3721 XXX except DORDOR operator */
e0587a03 3722 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2]) && s[2] != '=')
3280af22 3723 PL_expect = XTERM; /* print $fh <<"EOF" */
bbce6d69 3724 }
3280af22 3725 PL_pending_ident = '$';
79072805 3726 TOKEN('$');
378cc40b
LW
3727
3728 case '@':
3280af22 3729 if (PL_expect == XOPERATOR)
bbce6d69 3730 no_op("Array", s);
3280af22
NIS
3731 PL_tokenbuf[0] = '@';
3732 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
3733 if (!PL_tokenbuf[1]) {
bbce6d69 3734 PREREF('@');
3735 }
3280af22 3736 if (PL_lex_state == LEX_NORMAL)
ff68c719 3737 s = skipspace(s);
3280af22 3738 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 3739 if (*s == '{')
3280af22 3740 PL_tokenbuf[0] = '%';
a0d0e21e
LW
3741
3742 /* Warn about @ where they meant $. */
599cee73 3743 if (ckWARN(WARN_SYNTAX)) {
a0d0e21e
LW
3744 if (*s == '[' || *s == '{') {
3745 char *t = s + 1;
7e2040f0 3746 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
3747 t++;
3748 if (*t == '}' || *t == ']') {
3749 t++;
3280af22 3750 PL_bufptr = skipspace(PL_bufptr);
9014280d 3751 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 3752 "Scalar value %.*s better written as $%.*s",
3280af22 3753 t-PL_bufptr, PL_bufptr, t-PL_bufptr-1, PL_bufptr+1);
a0d0e21e 3754 }
93a17b20
LW
3755 }
3756 }
463ee0b2 3757 }
3280af22 3758 PL_pending_ident = '@';
79072805 3759 TERM('@');
378cc40b 3760
c963b151 3761 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
3762 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
3763 s += 2;
3764 AOPERATOR(DORDOR);
3765 }
c963b151
BD
3766 case '?': /* may either be conditional or pattern */
3767 if(PL_expect == XOPERATOR) {
3768 tmp = *s++;
3769 if(tmp == '?') {
3770 OPERATOR('?');
3771 }
3772 else {
3773 tmp = *s++;
3774 if(tmp == '/') {
3775 /* A // operator. */
3776 AOPERATOR(DORDOR);
3777 }
3778 else {
3779 s--;
3780 Mop(OP_DIVIDE);
3781 }
3782 }
3783 }
3784 else {
3785 /* Disable warning on "study /blah/" */
3786 if (PL_oldoldbufptr == PL_last_uni
3787 && (*PL_last_uni != 's' || s - PL_last_uni < 5
3788 || memNE(PL_last_uni, "study", 5)
3789 || isALNUM_lazy_if(PL_last_uni+5,UTF)
3790 ))
3791 check_uni();
3792 s = scan_pat(s,OP_MATCH);
3793 TERM(sublex_start());
3794 }
378cc40b
LW
3795
3796 case '.':
51882d45
GS
3797 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
3798#ifdef PERL_STRICT_CR
3799 && s[1] == '\n'
3800#else
3801 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
3802#endif
3803 && (s == PL_linestart || s[-1] == '\n') )
3804 {
3280af22
NIS
3805 PL_lex_formbrack = 0;
3806 PL_expect = XSTATE;
79072805
LW
3807 goto rightbracket;
3808 }
3280af22 3809 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
378cc40b 3810 tmp = *s++;
a687059c
LW
3811 if (*s == tmp) {
3812 s++;
2f3197b3
LW
3813 if (*s == tmp) {
3814 s++;
79072805 3815 yylval.ival = OPf_SPECIAL;
2f3197b3
LW
3816 }
3817 else
79072805 3818 yylval.ival = 0;
378cc40b 3819 OPERATOR(DOTDOT);
a687059c 3820 }
3280af22 3821 if (PL_expect != XOPERATOR)
2f3197b3 3822 check_uni();
79072805 3823 Aop(OP_CONCAT);
378cc40b
LW
3824 }
3825 /* FALL THROUGH */
3826 case '0': case '1': case '2': case '3': case '4':
3827 case '5': case '6': case '7': case '8': case '9':
b73d6f50 3828 s = scan_num(s, &yylval);
4e553d73 3829 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3830 "### Saw number in '%s'\n", s);
5f80b19c 3831 } );
3280af22 3832 if (PL_expect == XOPERATOR)
8990e307 3833 no_op("Number",s);
79072805
LW
3834 TERM(THING);
3835
3836 case '\'':
09bef843 3837 s = scan_str(s,FALSE,FALSE);
4e553d73 3838 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3839 "### Saw string before '%s'\n", s);
5f80b19c 3840 } );
3280af22
NIS
3841 if (PL_expect == XOPERATOR) {
3842 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3843 PL_expect = XTERM;
a0d0e21e 3844 depcom();
bbf60fe6 3845 return REPORT(','); /* grandfather non-comma-format format */
a0d0e21e 3846 }
463ee0b2 3847 else
8990e307 3848 no_op("String",s);
463ee0b2 3849 }
79072805 3850 if (!s)
85e6fe83 3851 missingterm((char*)0);
79072805
LW
3852 yylval.ival = OP_CONST;
3853 TERM(sublex_start());
3854
3855 case '"':
09bef843 3856 s = scan_str(s,FALSE,FALSE);
4e553d73 3857 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3858 "### Saw string before '%s'\n", s);
5f80b19c 3859 } );
3280af22
NIS
3860 if (PL_expect == XOPERATOR) {
3861 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
3862 PL_expect = XTERM;
a0d0e21e 3863 depcom();
bbf60fe6 3864 return REPORT(','); /* grandfather non-comma-format format */
a0d0e21e 3865 }
463ee0b2 3866 else
8990e307 3867 no_op("String",s);
463ee0b2 3868 }
79072805 3869 if (!s)
85e6fe83 3870 missingterm((char*)0);
4633a7c4 3871 yylval.ival = OP_CONST;
3280af22 3872 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 3873 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
4633a7c4
LW
3874 yylval.ival = OP_STRINGIFY;
3875 break;
3876 }
3877 }
79072805
LW
3878 TERM(sublex_start());
3879
3880 case '`':
09bef843 3881 s = scan_str(s,FALSE,FALSE);
4e553d73 3882 DEBUG_T( { PerlIO_printf(Perl_debug_log,
207e3d1a 3883 "### Saw backtick string before '%s'\n", s);
5f80b19c 3884 } );
3280af22 3885 if (PL_expect == XOPERATOR)
8990e307 3886 no_op("Backticks",s);
79072805 3887 if (!s)
85e6fe83 3888 missingterm((char*)0);
79072805
LW
3889 yylval.ival = OP_BACKTICK;
3890 set_csh();
3891 TERM(sublex_start());
3892
3893 case '\\':
3894 s++;
599cee73 3895 if (ckWARN(WARN_SYNTAX) && PL_lex_inwhat && isDIGIT(*s))
9014280d 3896 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
599cee73 3897 *s, *s);
3280af22 3898 if (PL_expect == XOPERATOR)
8990e307 3899 no_op("Backslash",s);
79072805
LW
3900 OPERATOR(REFGEN);
3901
a7cb1f99 3902 case 'v':
e526c9e6 3903 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
a7cb1f99
GS
3904 char *start = s;
3905 start++;
3906 start++;
dd629d5b 3907 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
3908 start++;
3909 if (*start == '.' && isDIGIT(start[1])) {
b73d6f50 3910 s = scan_num(s, &yylval);
a7cb1f99
GS
3911 TERM(THING);
3912 }
e526c9e6 3913 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
3914 else if (!isALPHA(*start) && (PL_expect == XTERM
3915 || PL_expect == XREF || PL_expect == XSTATE
3916 || PL_expect == XTERMORDORDOR)) {
e526c9e6
GS
3917 char c = *start;
3918 GV *gv;
3919 *start = '\0';
3920 gv = gv_fetchpv(s, FALSE, SVt_PVCV);
3921 *start = c;
3922 if (!gv) {
b73d6f50 3923 s = scan_num(s, &yylval);
e526c9e6
GS
3924 TERM(THING);
3925 }
3926 }
a7cb1f99
GS
3927 }
3928 goto keylookup;
79072805 3929 case 'x':
3280af22 3930 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
3931 s++;
3932 Mop(OP_REPEAT);
2f3197b3 3933 }
79072805
LW
3934 goto keylookup;
3935
378cc40b 3936 case '_':
79072805
LW
3937 case 'a': case 'A':
3938 case 'b': case 'B':
3939 case 'c': case 'C':
3940 case 'd': case 'D':
3941 case 'e': case 'E':
3942 case 'f': case 'F':
3943 case 'g': case 'G':
3944 case 'h': case 'H':
3945 case 'i': case 'I':
3946 case 'j': case 'J':
3947 case 'k': case 'K':
3948 case 'l': case 'L':
3949 case 'm': case 'M':
3950 case 'n': case 'N':
3951 case 'o': case 'O':
3952 case 'p': case 'P':
3953 case 'q': case 'Q':
3954 case 'r': case 'R':
3955 case 's': case 'S':
3956 case 't': case 'T':
3957 case 'u': case 'U':
a7cb1f99 3958 case 'V':
79072805
LW
3959 case 'w': case 'W':
3960 case 'X':
3961 case 'y': case 'Y':
3962 case 'z': case 'Z':
3963
49dc05e3 3964 keylookup: {
1d239bbb 3965 orig_keyword = 0;
161b471a
NIS
3966 gv = Nullgv;
3967 gvp = 0;
49dc05e3 3968
3280af22
NIS
3969 PL_bufptr = s;
3970 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 3971
3972 /* Some keywords can be followed by any delimiter, including ':' */
155aba94
GS
3973 tmp = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
3974 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
3975 (PL_tokenbuf[0] == 'q' &&
3976 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 3977
3978 /* x::* is just a word, unless x is "CORE" */
3280af22 3979 if (!tmp && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
3980 goto just_a_word;
3981
3643fb5f 3982 d = s;
3280af22 3983 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
3984 d++; /* no comments skipped here, or s### is misparsed */
3985
3986 /* Is this a label? */
3280af22
NIS
3987 if (!tmp && PL_expect == XSTATE
3988 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8ebc5c01 3989 s = d + 1;
3280af22 3990 yylval.pval = savepv(PL_tokenbuf);
8ebc5c01 3991 CLINE;
3992 TOKEN(LABEL);
3643fb5f
CS
3993 }
3994
3995 /* Check for keywords */
3280af22 3996 tmp = keyword(PL_tokenbuf, len);
748a9306
LW
3997
3998 /* Is this a word before a => operator? */
1c3923b3 3999 if (*d == '=' && d[1] == '>') {
748a9306 4000 CLINE;
3280af22 4001 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
748a9306 4002 yylval.opval->op_private = OPpCONST_BARE;
0064a8a9 4003 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7948272d 4004 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
748a9306
LW
4005 TERM(WORD);
4006 }
4007
a0d0e21e 4008 if (tmp < 0) { /* second-class keyword? */
56f7f34b
CS
4009 GV *ogv = Nullgv; /* override (winner) */
4010 GV *hgv = Nullgv; /* hidden (loser) */
3280af22 4011 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 4012 CV *cv;
3280af22 4013 if ((gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV)) &&
56f7f34b
CS
4014 (cv = GvCVu(gv)))
4015 {
4016 if (GvIMPORTED_CV(gv))
4017 ogv = gv;
4018 else if (! CvMETHOD(cv))
4019 hgv = gv;
4020 }
4021 if (!ogv &&
3280af22
NIS
4022 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
4023 (gv = *gvp) != (GV*)&PL_sv_undef &&
56f7f34b
CS
4024 GvCVu(gv) && GvIMPORTED_CV(gv))
4025 {
4026 ogv = gv;
4027 }
4028 }
4029 if (ogv) {
30fe34ed 4030 orig_keyword = tmp;
56f7f34b 4031 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
4032 }
4033 else if (gv && !gvp
4034 && -tmp==KEY_lock /* XXX generalizable kludge */
d0456cad 4035 && GvCVu(gv)
3280af22 4036 && !hv_fetch(GvHVn(PL_incgv), "Thread.pm", 9, FALSE))
6e7b2336
GS
4037 {
4038 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 4039 }
7fc307b5
RGS
4040 else if (gv && !gvp
4041 && tmp == -KEY_err
4042 && GvCVu(gv)
4043 && PL_expect != XOPERATOR
4044 && PL_expect != XTERMORDORDOR)
4045 {
4046 /* any sub overrides the "err" keyword, except when really an
4047 * operator is expected */
4048 tmp = 0;
4049 }
56f7f34b
CS
4050 else { /* no override */
4051 tmp = -tmp;
ac206dc8 4052 if (tmp == KEY_dump && ckWARN(WARN_MISC)) {
9014280d 4053 Perl_warner(aTHX_ packWARN(WARN_MISC),
ac206dc8
RGS
4054 "dump() better written as CORE::dump()");
4055 }
56f7f34b
CS
4056 gv = Nullgv;
4057 gvp = 0;
4944e2f7
GS
4058 if (ckWARN(WARN_AMBIGUOUS) && hgv
4059 && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
9014280d 4060 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 4061 "Ambiguous call resolved as CORE::%s(), %s",
2f3ca594 4062 GvENAME(hgv), "qualify as such or use &");
49dc05e3 4063 }
a0d0e21e
LW
4064 }
4065
4066 reserved_word:
4067 switch (tmp) {
79072805
LW
4068
4069 default: /* not a keyword */
93a17b20 4070 just_a_word: {
96e4d5b1 4071 SV *sv;
ce29ac45 4072 int pkgname = 0;
3280af22 4073 char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
8990e307
LW
4074
4075 /* Get the rest if it looks like a package qualifier */
4076
155aba94 4077 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 4078 STRLEN morelen;
3280af22 4079 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
4080 TRUE, &morelen);
4081 if (!morelen)
cea2e8a9 4082 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 4083 *s == '\'' ? "'" : "::");
c3e0f903 4084 len += morelen;
ce29ac45 4085 pkgname = 1;
a0d0e21e 4086 }
8990e307 4087
3280af22
NIS
4088 if (PL_expect == XOPERATOR) {
4089 if (PL_bufptr == PL_linestart) {
57843af0 4090 CopLINE_dec(PL_curcop);
9014280d 4091 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 4092 CopLINE_inc(PL_curcop);
463ee0b2
LW
4093 }
4094 else
54310121 4095 no_op("Bareword",s);
463ee0b2 4096 }
8990e307 4097
c3e0f903
GS
4098 /* Look for a subroutine with this name in current package,
4099 unless name is "Foo::", in which case Foo is a bearword
4100 (and a package name). */
4101
4102 if (len > 2 &&
3280af22 4103 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 4104 {
e476b1b5 4105 if (ckWARN(WARN_BAREWORD) && ! gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVHV))
9014280d 4106 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 4107 "Bareword \"%s\" refers to nonexistent package",
3280af22 4108 PL_tokenbuf);
c3e0f903 4109 len -= 2;
3280af22 4110 PL_tokenbuf[len] = '\0';
c3e0f903
GS
4111 gv = Nullgv;
4112 gvp = 0;
4113 }
4114 else {
4115 len = 0;
4116 if (!gv)
3280af22 4117 gv = gv_fetchpv(PL_tokenbuf, FALSE, SVt_PVCV);
c3e0f903
GS
4118 }
4119
4120 /* if we saw a global override before, get the right name */
8990e307 4121
49dc05e3 4122 if (gvp) {
79cb57f6 4123 sv = newSVpvn("CORE::GLOBAL::",14);
3280af22 4124 sv_catpv(sv,PL_tokenbuf);
49dc05e3
GS
4125 }
4126 else
3280af22 4127 sv = newSVpv(PL_tokenbuf,0);
8990e307 4128
a0d0e21e
LW
4129 /* Presume this is going to be a bareword of some sort. */
4130
4131 CLINE;
49dc05e3 4132 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
a0d0e21e 4133 yylval.opval->op_private = OPpCONST_BARE;
8f8cf39c
JH
4134 /* UTF-8 package name? */
4135 if (UTF && !IN_BYTES &&
4136 is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
4137 SvUTF8_on(sv);
a0d0e21e 4138
c3e0f903
GS
4139 /* And if "Foo::", then that's what it certainly is. */
4140
4141 if (len)
4142 goto safe_bareword;
4143
8990e307
LW
4144 /* See if it's the indirect object for a list operator. */
4145
3280af22
NIS
4146 if (PL_oldoldbufptr &&
4147 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
4148 (PL_oldoldbufptr == PL_last_lop
4149 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 4150 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
4151 (PL_expect == XREF ||
4152 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 4153 {
748a9306
LW
4154 bool immediate_paren = *s == '(';
4155
a0d0e21e
LW
4156 /* (Now we can afford to cross potential line boundary.) */
4157 s = skipspace(s);
4158
4159 /* Two barewords in a row may indicate method call. */
4160
7e2040f0 4161 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') && (tmp=intuit_method(s,gv)))
bbf60fe6 4162 return REPORT(tmp);
a0d0e21e
LW
4163
4164 /* If not a declared subroutine, it's an indirect object. */
4165 /* (But it's an indir obj regardless for sort.) */
4166
7948272d 4167 if ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f0670693 4168 ((!gv || !GvCVu(gv)) &&
a9ef352a 4169 (PL_last_lop_op != OP_MAPSTART &&
f0670693 4170 PL_last_lop_op != OP_GREPSTART))))
a9ef352a 4171 {
3280af22 4172 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 4173 goto bareword;
93a17b20
LW
4174 }
4175 }
8990e307 4176
3280af22 4177 PL_expect = XOPERATOR;
8990e307 4178 s = skipspace(s);
1c3923b3
GS
4179
4180 /* Is this a word before a => operator? */
ce29ac45 4181 if (*s == '=' && s[1] == '>' && !pkgname) {
1c3923b3
GS
4182 CLINE;
4183 sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 4184 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7948272d 4185 SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
1c3923b3
GS
4186 TERM(WORD);
4187 }
4188
4189 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 4190 if (*s == '(') {
79072805 4191 CLINE;
96e4d5b1 4192 if (gv && GvCVu(gv)) {
bf4acbe4 4193 for (d = s + 1; SPACE_OR_TAB(*d); d++) ;
7a52d87a 4194 if (*d == ')' && (sv = cv_const_sv(GvCV(gv)))) {
96e4d5b1 4195 s = d + 1;
4196 goto its_constant;
4197 }
4198 }
3280af22
NIS
4199 PL_nextval[PL_nexttoke].opval = yylval.opval;
4200 PL_expect = XOPERATOR;
93a17b20 4201 force_next(WORD);
c07a80fd 4202 yylval.ival = 0;
463ee0b2 4203 TOKEN('&');
79072805 4204 }
93a17b20 4205
a0d0e21e 4206 /* If followed by var or block, call it a method (unless sub) */
8990e307 4207
8ebc5c01 4208 if ((*s == '$' || *s == '{') && (!gv || !GvCVu(gv))) {
3280af22
NIS
4209 PL_last_lop = PL_oldbufptr;
4210 PL_last_lop_op = OP_METHOD;
93a17b20 4211 PREBLOCK(METHOD);
463ee0b2
LW
4212 }
4213
8990e307
LW
4214 /* If followed by a bareword, see if it looks like indir obj. */
4215
30fe34ed
RGS
4216 if (!orig_keyword
4217 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
4218 && (tmp = intuit_method(s,gv)))
bbf60fe6 4219 return REPORT(tmp);
93a17b20 4220
8990e307
LW
4221 /* Not a method, so call it a subroutine (if defined) */
4222
8ebc5c01 4223 if (gv && GvCVu(gv)) {
46fc3d4c 4224 CV* cv;
0453d815 4225 if (lastchar == '-' && ckWARN_d(WARN_AMBIGUOUS))
9014280d 4226 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
0453d815 4227 "Ambiguous use of -%s resolved as -&%s()",
3280af22 4228 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 4229 /* Check for a constant sub */
46fc3d4c 4230 cv = GvCV(gv);
96e4d5b1 4231 if ((sv = cv_const_sv(cv))) {
4232 its_constant:
4233 SvREFCNT_dec(((SVOP*)yylval.opval)->op_sv);
4234 ((SVOP*)yylval.opval)->op_sv = SvREFCNT_inc(sv);
4235 yylval.opval->op_private = 0;
4236 TOKEN(WORD);
89bfa8cd 4237 }
4238
a5f75d66
AD
4239 /* Resolve to GV now. */
4240 op_free(yylval.opval);
4241 yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
9675f7ac 4242 yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 4243 PL_last_lop = PL_oldbufptr;
bf848113 4244 PL_last_lop_op = OP_ENTERSUB;
4633a7c4
LW
4245 /* Is there a prototype? */
4246 if (SvPOK(cv)) {
4247 STRLEN len;
7a52d87a 4248 char *proto = SvPV((SV*)cv, len);
4633a7c4
LW
4249 if (!len)
4250 TERM(FUNC0SUB);
7a52d87a 4251 if (strEQ(proto, "$"))
4633a7c4 4252 OPERATOR(UNIOPSUB);
0f5d0394
AE
4253 while (*proto == ';')
4254 proto++;
7a52d87a 4255 if (*proto == '&' && *s == '{') {
c99da370
JH
4256 sv_setpv(PL_subname, PL_curstash ?
4257 "__ANON__" : "__ANON__::__ANON__");
4633a7c4
LW
4258 PREBLOCK(LSTOPSUB);
4259 }
a9ef352a 4260 }
3280af22
NIS
4261 PL_nextval[PL_nexttoke].opval = yylval.opval;
4262 PL_expect = XTERM;
8990e307
LW
4263 force_next(WORD);
4264 TOKEN(NOAMP);
4265 }
748a9306 4266
8990e307
LW
4267 /* Call it a bare word */
4268
5603f27d
GS
4269 if (PL_hints & HINT_STRICT_SUBS)
4270 yylval.opval->op_private |= OPpCONST_STRICT;
4271 else {
4272 bareword:
4273 if (ckWARN(WARN_RESERVED)) {
4274 if (lastchar != '-') {
4275 for (d = PL_tokenbuf; *d && isLOWER(*d); d++) ;
238ae712 4276 if (!*d && !gv_stashpv(PL_tokenbuf,FALSE))
9014280d 4277 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
4278 PL_tokenbuf);
4279 }
748a9306
LW
4280 }
4281 }
c3e0f903
GS
4282
4283 safe_bareword:
f248d071 4284 if (lastchar && strchr("*%&", lastchar) && ckWARN_d(WARN_AMBIGUOUS)) {
9014280d 4285 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
0453d815 4286 "Operator or semicolon missing before %c%s",
3280af22 4287 lastchar, PL_tokenbuf);
9014280d 4288 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
0453d815 4289 "Ambiguous use of %c resolved as operator %c",
748a9306
LW
4290 lastchar, lastchar);
4291 }
93a17b20 4292 TOKEN(WORD);
79072805 4293 }
79072805 4294
68dc0745 4295 case KEY___FILE__:
46fc3d4c 4296 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 4297 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 4298 TERM(THING);
4299
79072805 4300 case KEY___LINE__:
cf2093f6 4301 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 4302 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 4303 TERM(THING);
68dc0745 4304
4305 case KEY___PACKAGE__:
4306 yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 4307 (PL_curstash
9508c957 4308 ? newSVpv(HvNAME(PL_curstash), 0)
3280af22 4309 : &PL_sv_undef));
79072805 4310 TERM(THING);
79072805 4311
e50aee73 4312 case KEY___DATA__:
79072805
LW
4313 case KEY___END__: {
4314 GV *gv;
79072805
LW
4315
4316 /*SUPPRESS 560*/
3280af22 4317 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
e50aee73 4318 char *pname = "main";
3280af22
NIS
4319 if (PL_tokenbuf[2] == 'D')
4320 pname = HvNAME(PL_curstash ? PL_curstash : PL_defstash);
cea2e8a9 4321 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), TRUE, SVt_PVIO);
a5f75d66 4322 GvMULTI_on(gv);
79072805 4323 if (!GvIO(gv))
a0d0e21e 4324 GvIOp(gv) = newIO();
3280af22 4325 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
4326#if defined(HAS_FCNTL) && defined(F_SETFD)
4327 {
3280af22 4328 int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
4329 fcntl(fd,F_SETFD,fd >= 3);
4330 }
79072805 4331#endif
fd049845 4332 /* Mark this internal pseudo-handle as clean */
4333 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
3280af22 4334 if (PL_preprocess)
50952442 4335 IoTYPE(GvIOp(gv)) = IoTYPE_PIPE;
3280af22 4336 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 4337 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 4338 else
50952442 4339 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
4340#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
4341 /* if the script was opened in binmode, we need to revert
53129d29 4342 * it to text mode for compatibility; but only iff it has CRs
c39cd008 4343 * XXX this is a questionable hack at best. */
53129d29
GS
4344 if (PL_bufend-PL_bufptr > 2
4345 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
4346 {
4347 Off_t loc = 0;
50952442 4348 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
4349 loc = PerlIO_tell(PL_rsfp);
4350 (void)PerlIO_seek(PL_rsfp, 0L, 0);
4351 }
2986a63f
JH
4352#ifdef NETWARE
4353 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
4354#else
c39cd008 4355 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 4356#endif /* NETWARE */
1143fce0
JH
4357#ifdef PERLIO_IS_STDIO /* really? */
4358# if defined(__BORLANDC__)
cb359b41
JH
4359 /* XXX see note in do_binmode() */
4360 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
4361# endif
4362#endif
c39cd008
GS
4363 if (loc > 0)
4364 PerlIO_seek(PL_rsfp, loc, 0);
4365 }
4366 }
4367#endif
7948272d 4368#ifdef PERLIO_LAYERS
52d2e0f4
JH
4369 if (!IN_BYTES) {
4370 if (UTF)
4371 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
4372 else if (PL_encoding) {
4373 SV *name;
4374 dSP;
4375 ENTER;
4376 SAVETMPS;
4377 PUSHMARK(sp);
4378 EXTEND(SP, 1);
4379 XPUSHs(PL_encoding);
4380 PUTBACK;
4381 call_method("name", G_SCALAR);
4382 SPAGAIN;
4383 name = POPs;
4384 PUTBACK;
4385 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
4386 Perl_form(aTHX_ ":encoding(%"SVf")",
4387 name));
4388 FREETMPS;
4389 LEAVE;
4390 }
4391 }
7948272d 4392#endif
3280af22 4393 PL_rsfp = Nullfp;
79072805
LW
4394 }
4395 goto fake_eof;
e929a76b 4396 }
de3bb511 4397
8990e307 4398 case KEY_AUTOLOAD:
ed6116ce 4399 case KEY_DESTROY:
79072805 4400 case KEY_BEGIN:
7d30b5c4 4401 case KEY_CHECK:
7d07dbc2 4402 case KEY_INIT:
7d30b5c4 4403 case KEY_END:
3280af22
NIS
4404 if (PL_expect == XSTATE) {
4405 s = PL_bufptr;
93a17b20 4406 goto really_sub;
79072805
LW
4407 }
4408 goto just_a_word;
4409
a0d0e21e
LW
4410 case KEY_CORE:
4411 if (*s == ':' && s[1] == ':') {
4412 s += 2;
748a9306 4413 d = s;
3280af22 4414 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6798c92b
GS
4415 if (!(tmp = keyword(PL_tokenbuf, len)))
4416 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
4417 if (tmp < 0)
4418 tmp = -tmp;
4419 goto reserved_word;
4420 }
4421 goto just_a_word;
4422
463ee0b2
LW
4423 case KEY_abs:
4424 UNI(OP_ABS);
4425
79072805
LW
4426 case KEY_alarm:
4427 UNI(OP_ALARM);
4428
4429 case KEY_accept:
a0d0e21e 4430 LOP(OP_ACCEPT,XTERM);
79072805 4431
463ee0b2
LW
4432 case KEY_and:
4433 OPERATOR(ANDOP);
4434
79072805 4435 case KEY_atan2:
a0d0e21e 4436 LOP(OP_ATAN2,XTERM);
85e6fe83 4437
79072805 4438 case KEY_bind:
a0d0e21e 4439 LOP(OP_BIND,XTERM);
79072805
LW
4440
4441 case KEY_binmode:
1c1fc3ea 4442 LOP(OP_BINMODE,XTERM);
79072805
LW
4443
4444 case KEY_bless:
a0d0e21e 4445 LOP(OP_BLESS,XTERM);
79072805
LW
4446
4447 case KEY_chop:
4448 UNI(OP_CHOP);
4449
4450 case KEY_continue:
4451 PREBLOCK(CONTINUE);
4452
4453 case KEY_chdir:
85e6fe83 4454 (void)gv_fetchpv("ENV",TRUE, SVt_PVHV); /* may use HOME */
79072805
LW
4455 UNI(OP_CHDIR);
4456
4457 case KEY_close:
4458 UNI(OP_CLOSE);
4459
4460 case KEY_closedir:
4461 UNI(OP_CLOSEDIR);
4462
4463 case KEY_cmp:
4464 Eop(OP_SCMP);
4465
4466 case KEY_caller:
4467 UNI(OP_CALLER);
4468
4469 case KEY_crypt:
4470#ifdef FCRYPT
f4c556ac
GS
4471 if (!PL_cryptseen) {
4472 PL_cryptseen = TRUE;
de3bb511 4473 init_des();
f4c556ac 4474 }
a687059c 4475#endif
a0d0e21e 4476 LOP(OP_CRYPT,XTERM);
79072805
LW
4477
4478 case KEY_chmod:
a0d0e21e 4479 LOP(OP_CHMOD,XTERM);
79072805
LW
4480
4481 case KEY_chown:
a0d0e21e 4482 LOP(OP_CHOWN,XTERM);
79072805
LW
4483
4484 case KEY_connect:
a0d0e21e 4485 LOP(OP_CONNECT,XTERM);
79072805 4486
463ee0b2
LW
4487 case KEY_chr:
4488 UNI(OP_CHR);
4489
79072805
LW
4490 case KEY_cos:
4491 UNI(OP_COS);
4492
4493 case KEY_chroot:
4494 UNI(OP_CHROOT);
4495
4496 case KEY_do:
4497 s = skipspace(s);
4498 if (*s == '{')
a0d0e21e 4499 PRETERMBLOCK(DO);
79072805 4500 if (*s != '\'')
89c5585f 4501 s = force_word(s,WORD,TRUE,TRUE,FALSE);
378cc40b 4502 OPERATOR(DO);
79072805
LW
4503
4504 case KEY_die:
3280af22 4505 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 4506 LOP(OP_DIE,XTERM);
79072805
LW
4507
4508 case KEY_defined:
4509 UNI(OP_DEFINED);
4510
4511 case KEY_delete:
a0d0e21e 4512 UNI(OP_DELETE);
79072805
LW
4513
4514 case KEY_dbmopen:
a0d0e21e
LW
4515 gv_fetchpv("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
4516 LOP(OP_DBMOPEN,XTERM);
79072805
LW
4517
4518 case KEY_dbmclose:
4519 UNI(OP_DBMCLOSE);
4520
4521 case KEY_dump:
a0d0e21e 4522 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4523 LOOPX(OP_DUMP);
4524
4525 case KEY_else:
4526 PREBLOCK(ELSE);
4527
4528 case KEY_elsif:
57843af0 4529 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4530 OPERATOR(ELSIF);
4531
4532 case KEY_eq:
4533 Eop(OP_SEQ);
4534
a0d0e21e
LW
4535 case KEY_exists:
4536 UNI(OP_EXISTS);
4e553d73 4537
79072805
LW
4538 case KEY_exit:
4539 UNI(OP_EXIT);
4540
4541 case KEY_eval:
79072805 4542 s = skipspace(s);
3280af22 4543 PL_expect = (*s == '{') ? XTERMBLOCK : XTERM;
463ee0b2 4544 UNIBRACK(OP_ENTEREVAL);
79072805
LW
4545
4546 case KEY_eof:
4547 UNI(OP_EOF);
4548
c963b151
BD
4549 case KEY_err:
4550 OPERATOR(DOROP);
4551
79072805
LW
4552 case KEY_exp:
4553 UNI(OP_EXP);
4554
4555 case KEY_each:
4556 UNI(OP_EACH);
4557
4558 case KEY_exec:
4559 set_csh();
a0d0e21e 4560 LOP(OP_EXEC,XREF);
79072805
LW
4561
4562 case KEY_endhostent:
4563 FUN0(OP_EHOSTENT);
4564
4565 case KEY_endnetent:
4566 FUN0(OP_ENETENT);
4567
4568 case KEY_endservent:
4569 FUN0(OP_ESERVENT);
4570
4571 case KEY_endprotoent:
4572 FUN0(OP_EPROTOENT);
4573
4574 case KEY_endpwent:
4575 FUN0(OP_EPWENT);
4576
4577 case KEY_endgrent:
4578 FUN0(OP_EGRENT);
4579
4580 case KEY_for:
4581 case KEY_foreach:
57843af0 4582 yylval.ival = CopLINE(PL_curcop);
55497cff 4583 s = skipspace(s);
7e2040f0 4584 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 4585 char *p = s;
3280af22 4586 if ((PL_bufend - p) >= 3 &&
55497cff 4587 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
4588 p += 2;
77ca0c92
LW
4589 else if ((PL_bufend - p) >= 4 &&
4590 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
4591 p += 3;
55497cff 4592 p = skipspace(p);
7e2040f0 4593 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
4594 p = scan_ident(p, PL_bufend,
4595 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4596 p = skipspace(p);
4597 }
4598 if (*p != '$')
cea2e8a9 4599 Perl_croak(aTHX_ "Missing $ on loop variable");
55497cff 4600 }
79072805
LW
4601 OPERATOR(FOR);
4602
4603 case KEY_formline:
a0d0e21e 4604 LOP(OP_FORMLINE,XTERM);
79072805
LW
4605
4606 case KEY_fork:
4607 FUN0(OP_FORK);
4608
4609 case KEY_fcntl:
a0d0e21e 4610 LOP(OP_FCNTL,XTERM);
79072805
LW
4611
4612 case KEY_fileno:
4613 UNI(OP_FILENO);
4614
4615 case KEY_flock:
a0d0e21e 4616 LOP(OP_FLOCK,XTERM);
79072805
LW
4617
4618 case KEY_gt:
4619 Rop(OP_SGT);
4620
4621 case KEY_ge:
4622 Rop(OP_SGE);
4623
4624 case KEY_grep:
2c38e13d 4625 LOP(OP_GREPSTART, XREF);
79072805
LW
4626
4627 case KEY_goto:
a0d0e21e 4628 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4629 LOOPX(OP_GOTO);
4630
4631 case KEY_gmtime:
4632 UNI(OP_GMTIME);
4633
4634 case KEY_getc:
6f33ba73 4635 UNIDOR(OP_GETC);
79072805
LW
4636
4637 case KEY_getppid:
4638 FUN0(OP_GETPPID);
4639
4640 case KEY_getpgrp:
4641 UNI(OP_GETPGRP);
4642
4643 case KEY_getpriority:
a0d0e21e 4644 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
4645
4646 case KEY_getprotobyname:
4647 UNI(OP_GPBYNAME);
4648
4649 case KEY_getprotobynumber:
a0d0e21e 4650 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
4651
4652 case KEY_getprotoent:
4653 FUN0(OP_GPROTOENT);
4654
4655 case KEY_getpwent:
4656 FUN0(OP_GPWENT);
4657
4658 case KEY_getpwnam:
ff68c719 4659 UNI(OP_GPWNAM);
79072805
LW
4660
4661 case KEY_getpwuid:
ff68c719 4662 UNI(OP_GPWUID);
79072805
LW
4663
4664 case KEY_getpeername:
4665 UNI(OP_GETPEERNAME);
4666
4667 case KEY_gethostbyname:
4668 UNI(OP_GHBYNAME);
4669
4670 case KEY_gethostbyaddr:
a0d0e21e 4671 LOP(OP_GHBYADDR,XTERM);
79072805
LW
4672
4673 case KEY_gethostent:
4674 FUN0(OP_GHOSTENT);
4675
4676 case KEY_getnetbyname:
4677 UNI(OP_GNBYNAME);
4678
4679 case KEY_getnetbyaddr:
a0d0e21e 4680 LOP(OP_GNBYADDR,XTERM);
79072805
LW
4681
4682 case KEY_getnetent:
4683 FUN0(OP_GNETENT);
4684
4685 case KEY_getservbyname:
a0d0e21e 4686 LOP(OP_GSBYNAME,XTERM);
79072805
LW
4687
4688 case KEY_getservbyport:
a0d0e21e 4689 LOP(OP_GSBYPORT,XTERM);
79072805
LW
4690
4691 case KEY_getservent:
4692 FUN0(OP_GSERVENT);
4693
4694 case KEY_getsockname:
4695 UNI(OP_GETSOCKNAME);
4696
4697 case KEY_getsockopt:
a0d0e21e 4698 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
4699
4700 case KEY_getgrent:
4701 FUN0(OP_GGRENT);
4702
4703 case KEY_getgrnam:
ff68c719 4704 UNI(OP_GGRNAM);
79072805
LW
4705
4706 case KEY_getgrgid:
ff68c719 4707 UNI(OP_GGRGID);
79072805
LW
4708
4709 case KEY_getlogin:
4710 FUN0(OP_GETLOGIN);
4711
93a17b20 4712 case KEY_glob:
a0d0e21e
LW
4713 set_csh();
4714 LOP(OP_GLOB,XTERM);
93a17b20 4715
79072805
LW
4716 case KEY_hex:
4717 UNI(OP_HEX);
4718
4719 case KEY_if:
57843af0 4720 yylval.ival = CopLINE(PL_curcop);
79072805
LW
4721 OPERATOR(IF);
4722
4723 case KEY_index:
a0d0e21e 4724 LOP(OP_INDEX,XTERM);
79072805
LW
4725
4726 case KEY_int:
4727 UNI(OP_INT);
4728
4729 case KEY_ioctl:
a0d0e21e 4730 LOP(OP_IOCTL,XTERM);
79072805
LW
4731
4732 case KEY_join:
a0d0e21e 4733 LOP(OP_JOIN,XTERM);
79072805
LW
4734
4735 case KEY_keys:
4736 UNI(OP_KEYS);
4737
4738 case KEY_kill:
a0d0e21e 4739 LOP(OP_KILL,XTERM);
79072805
LW
4740
4741 case KEY_last:
a0d0e21e 4742 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 4743 LOOPX(OP_LAST);
4e553d73 4744
79072805
LW
4745 case KEY_lc:
4746 UNI(OP_LC);
4747
4748 case KEY_lcfirst:
4749 UNI(OP_LCFIRST);
4750
4751 case KEY_local:
09bef843 4752 yylval.ival = 0;
79072805
LW
4753 OPERATOR(LOCAL);
4754
4755 case KEY_length:
4756 UNI(OP_LENGTH);
4757
4758 case KEY_lt:
4759 Rop(OP_SLT);
4760
4761 case KEY_le:
4762 Rop(OP_SLE);
4763
4764 case KEY_localtime:
4765 UNI(OP_LOCALTIME);
4766
4767 case KEY_log:
4768 UNI(OP_LOG);
4769
4770 case KEY_link:
a0d0e21e 4771 LOP(OP_LINK,XTERM);
79072805
LW
4772
4773 case KEY_listen:
a0d0e21e 4774 LOP(OP_LISTEN,XTERM);
79072805 4775
c0329465
MB
4776 case KEY_lock:
4777 UNI(OP_LOCK);
4778
79072805
LW
4779 case KEY_lstat:
4780 UNI(OP_LSTAT);
4781
4782 case KEY_m:
8782bef2 4783 s = scan_pat(s,OP_MATCH);
79072805
LW
4784 TERM(sublex_start());
4785
a0d0e21e 4786 case KEY_map:
2c38e13d 4787 LOP(OP_MAPSTART, XREF);
4e4e412b 4788
79072805 4789 case KEY_mkdir:
a0d0e21e 4790 LOP(OP_MKDIR,XTERM);
79072805
LW
4791
4792 case KEY_msgctl:
a0d0e21e 4793 LOP(OP_MSGCTL,XTERM);
79072805
LW
4794
4795 case KEY_msgget:
a0d0e21e 4796 LOP(OP_MSGGET,XTERM);
79072805
LW
4797
4798 case KEY_msgrcv:
a0d0e21e 4799 LOP(OP_MSGRCV,XTERM);
79072805
LW
4800
4801 case KEY_msgsnd:
a0d0e21e 4802 LOP(OP_MSGSND,XTERM);
79072805 4803
77ca0c92 4804 case KEY_our:
93a17b20 4805 case KEY_my:
77ca0c92 4806 PL_in_my = tmp;
c750a3ec 4807 s = skipspace(s);
7e2040f0 4808 if (isIDFIRST_lazy_if(s,UTF)) {
3280af22 4809 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
4810 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
4811 goto really_sub;
def3634b 4812 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 4813 if (!PL_in_my_stash) {
c750a3ec 4814 char tmpbuf[1024];
3280af22
NIS
4815 PL_bufptr = s;
4816 sprintf(tmpbuf, "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
4817 yyerror(tmpbuf);
4818 }
4819 }
09bef843 4820 yylval.ival = 1;
55497cff 4821 OPERATOR(MY);
93a17b20 4822
79072805 4823 case KEY_next:
a0d0e21e 4824 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
4825 LOOPX(OP_NEXT);
4826
4827 case KEY_ne:
4828 Eop(OP_SNE);
4829
a0d0e21e 4830 case KEY_no:
3280af22 4831 if (PL_expect != XSTATE)
a0d0e21e
LW
4832 yyerror("\"no\" not allowed in expression");
4833 s = force_word(s,WORD,FALSE,TRUE,FALSE);
e759cc13 4834 s = force_version(s, FALSE);
a0d0e21e
LW
4835 yylval.ival = 0;
4836 OPERATOR(USE);
4837
4838 case KEY_not:
2d2e263d
LW
4839 if (*s == '(' || (s = skipspace(s), *s == '('))
4840 FUN1(OP_NOT);
4841 else
4842 OPERATOR(NOTOP);
a0d0e21e 4843
79072805 4844 case KEY_open:
93a17b20 4845 s = skipspace(s);
7e2040f0 4846 if (isIDFIRST_lazy_if(s,UTF)) {
93a17b20 4847 char *t;
7e2040f0 4848 for (d = s; isALNUM_lazy_if(d,UTF); d++) ;
e2ab214b
DM
4849 for (t=d; *t && isSPACE(*t); t++) ;
4850 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
4851 /* [perl #16184] */
4852 && !(t[0] == '=' && t[1] == '>')
4853 ) {
9014280d 4854 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 4855 "Precedence problem: open %.*s should be open(%.*s)",
66fbe8fb
HS
4856 d - s, s, d - s, s);
4857 }
93a17b20 4858 }
a0d0e21e 4859 LOP(OP_OPEN,XTERM);
79072805 4860
463ee0b2 4861 case KEY_or:
a0d0e21e 4862 yylval.ival = OP_OR;
463ee0b2
LW
4863 OPERATOR(OROP);
4864
79072805
LW
4865 case KEY_ord:
4866 UNI(OP_ORD);
4867
4868 case KEY_oct:
4869 UNI(OP_OCT);
4870
4871 case KEY_opendir:
a0d0e21e 4872 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
4873
4874 case KEY_print:
3280af22 4875 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4876 LOP(OP_PRINT,XREF);
79072805
LW
4877
4878 case KEY_printf:
3280af22 4879 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 4880 LOP(OP_PRTF,XREF);
79072805 4881
c07a80fd 4882 case KEY_prototype:
4883 UNI(OP_PROTOTYPE);
4884
79072805 4885 case KEY_push:
a0d0e21e 4886 LOP(OP_PUSH,XTERM);
79072805
LW
4887
4888 case KEY_pop:
6f33ba73 4889 UNIDOR(OP_POP);
79072805 4890
a0d0e21e 4891 case KEY_pos:
6f33ba73 4892 UNIDOR(OP_POS);
4e553d73 4893
79072805 4894 case KEY_pack:
a0d0e21e 4895 LOP(OP_PACK,XTERM);
79072805
LW
4896
4897 case KEY_package:
a0d0e21e 4898 s = force_word(s,WORD,FALSE,TRUE,FALSE);
79072805
LW
4899 OPERATOR(PACKAGE);
4900
4901 case KEY_pipe:
a0d0e21e 4902 LOP(OP_PIPE_OP,XTERM);
79072805
LW
4903
4904 case KEY_q:
09bef843 4905 s = scan_str(s,FALSE,FALSE);
79072805 4906 if (!s)
85e6fe83 4907 missingterm((char*)0);
79072805
LW
4908 yylval.ival = OP_CONST;
4909 TERM(sublex_start());
4910
a0d0e21e
LW
4911 case KEY_quotemeta:
4912 UNI(OP_QUOTEMETA);
4913
8990e307 4914 case KEY_qw:
09bef843 4915 s = scan_str(s,FALSE,FALSE);
8990e307 4916 if (!s)
85e6fe83 4917 missingterm((char*)0);
8127e0e3
GS
4918 force_next(')');
4919 if (SvCUR(PL_lex_stuff)) {
4920 OP *words = Nullop;
4921 int warned = 0;
3280af22 4922 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 4923 while (len) {
7948272d 4924 SV *sv;
8127e0e3
GS
4925 for (; isSPACE(*d) && len; --len, ++d) ;
4926 if (len) {
4927 char *b = d;
e476b1b5 4928 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
4929 for (; !isSPACE(*d) && len; --len, ++d) {
4930 if (*d == ',') {
9014280d 4931 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
4932 "Possible attempt to separate words with commas");
4933 ++warned;
4934 }
4935 else if (*d == '#') {
9014280d 4936 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
4937 "Possible attempt to put comments in qw() list");
4938 ++warned;
4939 }
4940 }
4941 }
4942 else {
4943 for (; !isSPACE(*d) && len; --len, ++d) ;
4944 }
7948272d
NIS
4945 sv = newSVpvn(b, d-b);
4946 if (DO_UTF8(PL_lex_stuff))
4947 SvUTF8_on(sv);
8127e0e3 4948 words = append_elem(OP_LIST, words,
7948272d 4949 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 4950 }
4951 }
8127e0e3
GS
4952 if (words) {
4953 PL_nextval[PL_nexttoke].opval = words;
4954 force_next(THING);
4955 }
55497cff 4956 }
37fd879b 4957 if (PL_lex_stuff) {
8127e0e3 4958 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
4959 PL_lex_stuff = Nullsv;
4960 }
3280af22 4961 PL_expect = XTERM;
8127e0e3 4962 TOKEN('(');
8990e307 4963
79072805 4964 case KEY_qq:
09bef843 4965 s = scan_str(s,FALSE,FALSE);
79072805 4966 if (!s)
85e6fe83 4967 missingterm((char*)0);
a0d0e21e 4968 yylval.ival = OP_STRINGIFY;
3280af22
NIS
4969 if (SvIVX(PL_lex_stuff) == '\'')
4970 SvIVX(PL_lex_stuff) = 0; /* qq'$foo' should intepolate */
79072805
LW
4971 TERM(sublex_start());
4972
8782bef2
GB
4973 case KEY_qr:
4974 s = scan_pat(s,OP_QR);
4975 TERM(sublex_start());
4976
79072805 4977 case KEY_qx:
09bef843 4978 s = scan_str(s,FALSE,FALSE);
79072805 4979 if (!s)
85e6fe83 4980 missingterm((char*)0);
79072805
LW
4981 yylval.ival = OP_BACKTICK;
4982 set_csh();
4983 TERM(sublex_start());
4984
4985 case KEY_return:
4986 OLDLOP(OP_RETURN);
4987
4988 case KEY_require:
a7cb1f99 4989 s = skipspace(s);
e759cc13
RGS
4990 if (isDIGIT(*s)) {
4991 s = force_version(s, FALSE);
a7cb1f99 4992 }
e759cc13
RGS
4993 else if (*s != 'v' || !isDIGIT(s[1])
4994 || (s = force_version(s, TRUE), *s == 'v'))
4995 {
a7cb1f99
GS
4996 *PL_tokenbuf = '\0';
4997 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 4998 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
a7cb1f99
GS
4999 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), TRUE);
5000 else if (*s == '<')
5001 yyerror("<> should be quotes");
5002 }
463ee0b2 5003 UNI(OP_REQUIRE);
79072805
LW
5004
5005 case KEY_reset:
5006 UNI(OP_RESET);
5007
5008 case KEY_redo:
a0d0e21e 5009 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
5010 LOOPX(OP_REDO);
5011
5012 case KEY_rename:
a0d0e21e 5013 LOP(OP_RENAME,XTERM);
79072805
LW
5014
5015 case KEY_rand:
5016 UNI(OP_RAND);
5017
5018 case KEY_rmdir:
5019 UNI(OP_RMDIR);
5020
5021 case KEY_rindex:
a0d0e21e 5022 LOP(OP_RINDEX,XTERM);
79072805
LW
5023
5024 case KEY_read:
a0d0e21e 5025 LOP(OP_READ,XTERM);
79072805
LW
5026
5027 case KEY_readdir:
5028 UNI(OP_READDIR);
5029
93a17b20
LW
5030 case KEY_readline:
5031 set_csh();
6f33ba73 5032 UNIDOR(OP_READLINE);
93a17b20
LW
5033
5034 case KEY_readpipe:
5035 set_csh();
5036 UNI(OP_BACKTICK);
5037
79072805
LW
5038 case KEY_rewinddir:
5039 UNI(OP_REWINDDIR);
5040
5041 case KEY_recv:
a0d0e21e 5042 LOP(OP_RECV,XTERM);
79072805
LW
5043
5044 case KEY_reverse:
a0d0e21e 5045 LOP(OP_REVERSE,XTERM);
79072805
LW
5046
5047 case KEY_readlink:
6f33ba73 5048 UNIDOR(OP_READLINK);
79072805
LW
5049
5050 case KEY_ref:
5051 UNI(OP_REF);
5052
5053 case KEY_s:
5054 s = scan_subst(s);
5055 if (yylval.opval)
5056 TERM(sublex_start());
5057 else
5058 TOKEN(1); /* force error */
5059
a0d0e21e
LW
5060 case KEY_chomp:
5061 UNI(OP_CHOMP);
4e553d73 5062
79072805
LW
5063 case KEY_scalar:
5064 UNI(OP_SCALAR);
5065
5066 case KEY_select:
a0d0e21e 5067 LOP(OP_SELECT,XTERM);
79072805
LW
5068
5069 case KEY_seek:
a0d0e21e 5070 LOP(OP_SEEK,XTERM);
79072805
LW
5071
5072 case KEY_semctl:
a0d0e21e 5073 LOP(OP_SEMCTL,XTERM);
79072805
LW
5074
5075 case KEY_semget:
a0d0e21e 5076 LOP(OP_SEMGET,XTERM);
79072805
LW
5077
5078 case KEY_semop:
a0d0e21e 5079 LOP(OP_SEMOP,XTERM);
79072805
LW
5080
5081 case KEY_send:
a0d0e21e 5082 LOP(OP_SEND,XTERM);
79072805
LW
5083
5084 case KEY_setpgrp:
a0d0e21e 5085 LOP(OP_SETPGRP,XTERM);
79072805
LW
5086
5087 case KEY_setpriority:
a0d0e21e 5088 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
5089
5090 case KEY_sethostent:
ff68c719 5091 UNI(OP_SHOSTENT);
79072805
LW
5092
5093 case KEY_setnetent:
ff68c719 5094 UNI(OP_SNETENT);
79072805
LW
5095
5096 case KEY_setservent:
ff68c719 5097 UNI(OP_SSERVENT);
79072805
LW
5098
5099 case KEY_setprotoent:
ff68c719 5100 UNI(OP_SPROTOENT);
79072805
LW
5101
5102 case KEY_setpwent:
5103 FUN0(OP_SPWENT);
5104
5105 case KEY_setgrent:
5106 FUN0(OP_SGRENT);
5107
5108 case KEY_seekdir:
a0d0e21e 5109 LOP(OP_SEEKDIR,XTERM);
79072805
LW
5110
5111 case KEY_setsockopt:
a0d0e21e 5112 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
5113
5114 case KEY_shift:
6f33ba73 5115 UNIDOR(OP_SHIFT);
79072805
LW
5116
5117 case KEY_shmctl:
a0d0e21e 5118 LOP(OP_SHMCTL,XTERM);
79072805
LW
5119
5120 case KEY_shmget:
a0d0e21e 5121 LOP(OP_SHMGET,XTERM);
79072805
LW
5122
5123 case KEY_shmread:
a0d0e21e 5124 LOP(OP_SHMREAD,XTERM);
79072805
LW
5125
5126 case KEY_shmwrite:
a0d0e21e 5127 LOP(OP_SHMWRITE,XTERM);
79072805
LW
5128
5129 case KEY_shutdown:
a0d0e21e 5130 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
5131
5132 case KEY_sin:
5133 UNI(OP_SIN);
5134
5135 case KEY_sleep:
5136 UNI(OP_SLEEP);
5137
5138 case KEY_socket:
a0d0e21e 5139 LOP(OP_SOCKET,XTERM);
79072805
LW
5140
5141 case KEY_socketpair:
a0d0e21e 5142 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
5143
5144 case KEY_sort:
3280af22 5145 checkcomma(s,PL_tokenbuf,"subroutine name");
79072805
LW
5146 s = skipspace(s);
5147 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 5148 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 5149 PL_expect = XTERM;
15f0808c 5150 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 5151 LOP(OP_SORT,XREF);
79072805
LW
5152
5153 case KEY_split:
a0d0e21e 5154 LOP(OP_SPLIT,XTERM);
79072805
LW
5155
5156 case KEY_sprintf:
a0d0e21e 5157 LOP(OP_SPRINTF,XTERM);
79072805
LW
5158
5159 case KEY_splice:
a0d0e21e 5160 LOP(OP_SPLICE,XTERM);
79072805
LW
5161
5162 case KEY_sqrt:
5163 UNI(OP_SQRT);
5164
5165 case KEY_srand:
5166 UNI(OP_SRAND);
5167
5168 case KEY_stat:
5169 UNI(OP_STAT);
5170
5171 case KEY_study:
79072805
LW
5172 UNI(OP_STUDY);
5173
5174 case KEY_substr:
a0d0e21e 5175 LOP(OP_SUBSTR,XTERM);
79072805
LW
5176
5177 case KEY_format:
5178 case KEY_sub:
93a17b20 5179 really_sub:
09bef843 5180 {
3280af22 5181 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 5182 SSize_t tboffset = 0;
09bef843 5183 expectation attrful;
d731386a 5184 bool have_name, have_proto, bad_proto;
09bef843
SB
5185 int key = tmp;
5186
5187 s = skipspace(s);
5188
7e2040f0 5189 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
5190 (*s == ':' && s[1] == ':'))
5191 {
5192 PL_expect = XBLOCK;
5193 attrful = XATTRBLOCK;
b1b65b59
JH
5194 /* remember buffer pos'n for later force_word */
5195 tboffset = s - PL_oldbufptr;
09bef843
SB
5196 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5197 if (strchr(tmpbuf, ':'))
5198 sv_setpv(PL_subname, tmpbuf);
5199 else {
5200 sv_setsv(PL_subname,PL_curstname);
5201 sv_catpvn(PL_subname,"::",2);
5202 sv_catpvn(PL_subname,tmpbuf,len);
5203 }
5204 s = skipspace(d);
5205 have_name = TRUE;
5206 }
463ee0b2 5207 else {
09bef843
SB
5208 if (key == KEY_my)
5209 Perl_croak(aTHX_ "Missing name in \"my sub\"");
5210 PL_expect = XTERMBLOCK;
5211 attrful = XATTRTERM;
5212 sv_setpv(PL_subname,"?");
5213 have_name = FALSE;
463ee0b2 5214 }
4633a7c4 5215
09bef843
SB
5216 if (key == KEY_format) {
5217 if (*s == '=')
5218 PL_lex_formbrack = PL_lex_brackets + 1;
5219 if (have_name)
b1b65b59
JH
5220 (void) force_word(PL_oldbufptr + tboffset, WORD,
5221 FALSE, TRUE, TRUE);
09bef843
SB
5222 OPERATOR(FORMAT);
5223 }
79072805 5224
09bef843
SB
5225 /* Look for a prototype */
5226 if (*s == '(') {
5227 char *p;
5228
5229 s = scan_str(s,FALSE,FALSE);
37fd879b 5230 if (!s)
09bef843 5231 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 5232 /* strip spaces and check for bad characters */
09bef843
SB
5233 d = SvPVX(PL_lex_stuff);
5234 tmp = 0;
d731386a 5235 bad_proto = FALSE;
09bef843 5236 for (p = d; *p; ++p) {
d37a9538 5237 if (!isSPACE(*p)) {
09bef843 5238 d[tmp++] = *p;
d37a9538
ST
5239 if (!strchr("$@%*;[]&\\", *p))
5240 bad_proto = TRUE;
5241 }
09bef843
SB
5242 }
5243 d[tmp] = '\0';
420cdfc1 5244 if (bad_proto && ckWARN(WARN_SYNTAX))
9014280d 5245 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
35c1215d
NC
5246 "Illegal character in prototype for %"SVf" : %s",
5247 PL_subname, d);
09bef843
SB
5248 SvCUR(PL_lex_stuff) = tmp;
5249 have_proto = TRUE;
68dc0745 5250
09bef843 5251 s = skipspace(s);
4633a7c4 5252 }
09bef843
SB
5253 else
5254 have_proto = FALSE;
5255
5256 if (*s == ':' && s[1] != ':')
5257 PL_expect = attrful;
8e742a20
MHM
5258 else if (*s != '{' && key == KEY_sub) {
5259 if (!have_name)
5260 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5261 else if (*s != ';')
5262 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, PL_subname);
5263 }
09bef843
SB
5264
5265 if (have_proto) {
b1b65b59
JH
5266 PL_nextval[PL_nexttoke].opval =
5267 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
09bef843
SB
5268 PL_lex_stuff = Nullsv;
5269 force_next(THING);
68dc0745 5270 }
09bef843 5271 if (!have_name) {
c99da370
JH
5272 sv_setpv(PL_subname,
5273 PL_curstash ? "__ANON__" : "__ANON__::__ANON__");
09bef843 5274 TOKEN(ANONSUB);
4633a7c4 5275 }
b1b65b59
JH
5276 (void) force_word(PL_oldbufptr + tboffset, WORD,
5277 FALSE, TRUE, TRUE);
09bef843
SB
5278 if (key == KEY_my)
5279 TOKEN(MYSUB);
5280 TOKEN(SUB);
4633a7c4 5281 }
79072805
LW
5282
5283 case KEY_system:
5284 set_csh();
a0d0e21e 5285 LOP(OP_SYSTEM,XREF);
79072805
LW
5286
5287 case KEY_symlink:
a0d0e21e 5288 LOP(OP_SYMLINK,XTERM);
79072805
LW
5289
5290 case KEY_syscall:
a0d0e21e 5291 LOP(OP_SYSCALL,XTERM);
79072805 5292
c07a80fd 5293 case KEY_sysopen:
5294 LOP(OP_SYSOPEN,XTERM);
5295
137443ea 5296 case KEY_sysseek:
5297 LOP(OP_SYSSEEK,XTERM);
5298
79072805 5299 case KEY_sysread:
a0d0e21e 5300 LOP(OP_SYSREAD,XTERM);
79072805
LW
5301
5302 case KEY_syswrite:
a0d0e21e 5303 LOP(OP_SYSWRITE,XTERM);
79072805
LW
5304
5305 case KEY_tr:
5306 s = scan_trans(s);
5307 TERM(sublex_start());
5308
5309 case KEY_tell:
5310 UNI(OP_TELL);
5311
5312 case KEY_telldir:
5313 UNI(OP_TELLDIR);
5314
463ee0b2 5315 case KEY_tie:
a0d0e21e 5316 LOP(OP_TIE,XTERM);
463ee0b2 5317
c07a80fd 5318 case KEY_tied:
5319 UNI(OP_TIED);
5320
79072805
LW
5321 case KEY_time:
5322 FUN0(OP_TIME);
5323
5324 case KEY_times:
5325 FUN0(OP_TMS);
5326
5327 case KEY_truncate:
a0d0e21e 5328 LOP(OP_TRUNCATE,XTERM);
79072805
LW
5329
5330 case KEY_uc:
5331 UNI(OP_UC);
5332
5333 case KEY_ucfirst:
5334 UNI(OP_UCFIRST);
5335
463ee0b2
LW
5336 case KEY_untie:
5337 UNI(OP_UNTIE);
5338
79072805 5339 case KEY_until:
57843af0 5340 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5341 OPERATOR(UNTIL);
5342
5343 case KEY_unless:
57843af0 5344 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5345 OPERATOR(UNLESS);
5346
5347 case KEY_unlink:
a0d0e21e 5348 LOP(OP_UNLINK,XTERM);
79072805
LW
5349
5350 case KEY_undef:
6f33ba73 5351 UNIDOR(OP_UNDEF);
79072805
LW
5352
5353 case KEY_unpack:
a0d0e21e 5354 LOP(OP_UNPACK,XTERM);
79072805
LW
5355
5356 case KEY_utime:
a0d0e21e 5357 LOP(OP_UTIME,XTERM);
79072805
LW
5358
5359 case KEY_umask:
6f33ba73 5360 UNIDOR(OP_UMASK);
79072805
LW
5361
5362 case KEY_unshift:
a0d0e21e
LW
5363 LOP(OP_UNSHIFT,XTERM);
5364
5365 case KEY_use:
3280af22 5366 if (PL_expect != XSTATE)
a0d0e21e 5367 yyerror("\"use\" not allowed in expression");
89bfa8cd 5368 s = skipspace(s);
a7cb1f99 5369 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
e759cc13 5370 s = force_version(s, TRUE);
a7cb1f99 5371 if (*s == ';' || (s = skipspace(s), *s == ';')) {
3280af22 5372 PL_nextval[PL_nexttoke].opval = Nullop;
89bfa8cd 5373 force_next(WORD);
5374 }
e759cc13
RGS
5375 else if (*s == 'v') {
5376 s = force_word(s,WORD,FALSE,TRUE,FALSE);
5377 s = force_version(s, FALSE);
5378 }
89bfa8cd 5379 }
5380 else {
5381 s = force_word(s,WORD,FALSE,TRUE,FALSE);
e759cc13 5382 s = force_version(s, FALSE);
89bfa8cd 5383 }
a0d0e21e
LW
5384 yylval.ival = 1;
5385 OPERATOR(USE);
79072805
LW
5386
5387 case KEY_values:
5388 UNI(OP_VALUES);
5389
5390 case KEY_vec:
a0d0e21e 5391 LOP(OP_VEC,XTERM);
79072805
LW
5392
5393 case KEY_while:
57843af0 5394 yylval.ival = CopLINE(PL_curcop);
79072805
LW
5395 OPERATOR(WHILE);
5396
5397 case KEY_warn:
3280af22 5398 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 5399 LOP(OP_WARN,XTERM);
79072805
LW
5400
5401 case KEY_wait:
5402 FUN0(OP_WAIT);
5403
5404 case KEY_waitpid:
a0d0e21e 5405 LOP(OP_WAITPID,XTERM);
79072805
LW
5406
5407 case KEY_wantarray:
5408 FUN0(OP_WANTARRAY);
5409
5410 case KEY_write:
9d116dd7
JH
5411#ifdef EBCDIC
5412 {
df3728a2
JH
5413 char ctl_l[2];
5414 ctl_l[0] = toCTRL('L');
5415 ctl_l[1] = '\0';
9d116dd7
JH
5416 gv_fetchpv(ctl_l,TRUE, SVt_PV);
5417 }
5418#else
5419 gv_fetchpv("\f",TRUE, SVt_PV); /* Make sure $^L is defined */
5420#endif
79072805
LW
5421 UNI(OP_ENTERWRITE);
5422
5423 case KEY_x:
3280af22 5424 if (PL_expect == XOPERATOR)
79072805
LW
5425 Mop(OP_REPEAT);
5426 check_uni();
5427 goto just_a_word;
5428
a0d0e21e
LW
5429 case KEY_xor:
5430 yylval.ival = OP_XOR;
5431 OPERATOR(OROP);
5432
79072805
LW
5433 case KEY_y:
5434 s = scan_trans(s);
5435 TERM(sublex_start());
5436 }
49dc05e3 5437 }}
79072805 5438}
bf4acbe4
GS
5439#ifdef __SC__
5440#pragma segment Main
5441#endif
79072805 5442
e930465f
JH
5443static int
5444S_pending_ident(pTHX)
8eceec63
SC
5445{
5446 register char *d;
a55b55d8 5447 register I32 tmp = 0;
8eceec63
SC
5448 /* pit holds the identifier we read and pending_ident is reset */
5449 char pit = PL_pending_ident;
5450 PL_pending_ident = 0;
5451
5452 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5453 "### Tokener saw identifier '%s'\n", PL_tokenbuf); });
5454
5455 /* if we're in a my(), we can't allow dynamics here.
5456 $foo'bar has already been turned into $foo::bar, so
5457 just check for colons.
5458
5459 if it's a legal name, the OP is a PADANY.
5460 */
5461 if (PL_in_my) {
5462 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
5463 if (strchr(PL_tokenbuf,':'))
5464 yyerror(Perl_form(aTHX_ "No package name allowed for "
5465 "variable %s in \"our\"",
5466 PL_tokenbuf));
dd2155a4 5467 tmp = allocmy(PL_tokenbuf);
8eceec63
SC
5468 }
5469 else {
5470 if (strchr(PL_tokenbuf,':'))
5471 yyerror(Perl_form(aTHX_ PL_no_myglob,PL_tokenbuf));
5472
5473 yylval.opval = newOP(OP_PADANY, 0);
dd2155a4 5474 yylval.opval->op_targ = allocmy(PL_tokenbuf);
8eceec63
SC
5475 return PRIVATEREF;
5476 }
5477 }
5478
5479 /*
5480 build the ops for accesses to a my() variable.
5481
5482 Deny my($a) or my($b) in a sort block, *if* $a or $b is
5483 then used in a comparison. This catches most, but not
5484 all cases. For instance, it catches
5485 sort { my($a); $a <=> $b }
5486 but not
5487 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
5488 (although why you'd do that is anyone's guess).
5489 */
5490
5491 if (!strchr(PL_tokenbuf,':')) {
8716503d
DM
5492 if (!PL_in_my)
5493 tmp = pad_findmy(PL_tokenbuf);
5494 if (tmp != NOT_IN_PAD) {
8eceec63 5495 /* might be an "our" variable" */
dd2155a4 5496 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
8eceec63 5497 /* build ops for a bareword */
dd2155a4 5498 SV *sym = newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)), 0);
8eceec63
SC
5499 sv_catpvn(sym, "::", 2);
5500 sv_catpv(sym, PL_tokenbuf+1);
5501 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
5502 yylval.opval->op_private = OPpCONST_ENTERED;
5503 gv_fetchpv(SvPVX(sym),
5504 (PL_in_eval
5505 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 5506 : GV_ADDMULTI
8eceec63
SC
5507 ),
5508 ((PL_tokenbuf[0] == '$') ? SVt_PV
5509 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5510 : SVt_PVHV));
5511 return WORD;
5512 }
5513
5514 /* if it's a sort block and they're naming $a or $b */
5515 if (PL_last_lop_op == OP_SORT &&
5516 PL_tokenbuf[0] == '$' &&
5517 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
5518 && !PL_tokenbuf[2])
5519 {
5520 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
5521 d < PL_bufend && *d != '\n';
5522 d++)
5523 {
5524 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
5525 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
5526 PL_tokenbuf);
5527 }
5528 }
5529 }
5530
5531 yylval.opval = newOP(OP_PADANY, 0);
5532 yylval.opval->op_targ = tmp;
5533 return PRIVATEREF;
5534 }
5535 }
5536
5537 /*
5538 Whine if they've said @foo in a doublequoted string,
5539 and @foo isn't a variable we can find in the symbol
5540 table.
5541 */
5542 if (pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
5543 GV *gv = gv_fetchpv(PL_tokenbuf+1, FALSE, SVt_PVAV);
5544 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
5545 && ckWARN(WARN_AMBIGUOUS))
5546 {
5547 /* Downgraded from fatal to warning 20000522 mjd */
9014280d 5548 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8eceec63
SC
5549 "Possible unintended interpolation of %s in string",
5550 PL_tokenbuf);
5551 }
5552 }
5553
5554 /* build ops for a bareword */
5555 yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf+1, 0));
5556 yylval.opval->op_private = OPpCONST_ENTERED;
5557 gv_fetchpv(PL_tokenbuf+1, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
5558 ((PL_tokenbuf[0] == '$') ? SVt_PV
5559 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
5560 : SVt_PVHV));
5561 return WORD;
5562}
5563
79072805 5564I32
864dbfa3 5565Perl_keyword(pTHX_ register char *d, I32 len)
79072805
LW
5566{
5567 switch (*d) {
5568 case '_':
5569 if (d[1] == '_') {
a0d0e21e 5570 if (strEQ(d,"__FILE__")) return -KEY___FILE__;
68dc0745 5571 if (strEQ(d,"__LINE__")) return -KEY___LINE__;
5572 if (strEQ(d,"__PACKAGE__")) return -KEY___PACKAGE__;
e50aee73 5573 if (strEQ(d,"__DATA__")) return KEY___DATA__;
79072805
LW
5574 if (strEQ(d,"__END__")) return KEY___END__;
5575 }
5576 break;
8990e307
LW
5577 case 'A':
5578 if (strEQ(d,"AUTOLOAD")) return KEY_AUTOLOAD;
5579 break;
79072805 5580 case 'a':
463ee0b2
LW
5581 switch (len) {
5582 case 3:
a0d0e21e
LW
5583 if (strEQ(d,"and")) return -KEY_and;
5584 if (strEQ(d,"abs")) return -KEY_abs;
85e6fe83 5585 break;
463ee0b2 5586 case 5:
a0d0e21e
LW
5587 if (strEQ(d,"alarm")) return -KEY_alarm;
5588 if (strEQ(d,"atan2")) return -KEY_atan2;
463ee0b2
LW
5589 break;
5590 case 6:
a0d0e21e 5591 if (strEQ(d,"accept")) return -KEY_accept;
463ee0b2
LW
5592 break;
5593 }
79072805
LW
5594 break;
5595 case 'B':
5596 if (strEQ(d,"BEGIN")) return KEY_BEGIN;
93a17b20 5597 break;
79072805 5598 case 'b':
a0d0e21e
LW
5599 if (strEQ(d,"bless")) return -KEY_bless;
5600 if (strEQ(d,"bind")) return -KEY_bind;
5601 if (strEQ(d,"binmode")) return -KEY_binmode;
5602 break;
5603 case 'C':
5604 if (strEQ(d,"CORE")) return -KEY_CORE;
7d30b5c4 5605 if (strEQ(d,"CHECK")) return KEY_CHECK;
79072805
LW
5606 break;
5607 case 'c':
5608 switch (len) {
5609 case 3:
a0d0e21e
LW
5610 if (strEQ(d,"cmp")) return -KEY_cmp;
5611 if (strEQ(d,"chr")) return -KEY_chr;
5612 if (strEQ(d,"cos")) return -KEY_cos;
79072805
LW
5613 break;
5614 case 4:
77bc9082 5615 if (strEQ(d,"chop")) return -KEY_chop;
79072805
LW
5616 break;
5617 case 5:
a0d0e21e
LW
5618 if (strEQ(d,"close")) return -KEY_close;
5619 if (strEQ(d,"chdir")) return -KEY_chdir;
77bc9082 5620 if (strEQ(d,"chomp")) return -KEY_chomp;
a0d0e21e
LW
5621 if (strEQ(d,"chmod")) return -KEY_chmod;
5622 if (strEQ(d,"chown")) return -KEY_chown;
5623 if (strEQ(d,"crypt")) return -KEY_crypt;
79072805
LW
5624 break;
5625 case 6:
a0d0e21e
LW
5626 if (strEQ(d,"chroot")) return -KEY_chroot;
5627 if (strEQ(d,"caller")) return -KEY_caller;
79072805
LW
5628 break;
5629 case 7:
a0d0e21e 5630 if (strEQ(d,"connect")) return -KEY_connect;
79072805
LW
5631 break;
5632 case 8:
a0d0e21e
LW
5633 if (strEQ(d,"closedir")) return -KEY_closedir;
5634 if (strEQ(d,"continue")) return -KEY_continue;
79072805
LW
5635 break;
5636 }
5637 break;
ed6116ce
LW
5638 case 'D':
5639 if (strEQ(d,"DESTROY")) return KEY_DESTROY;
5640 break;
79072805
LW
5641 case 'd':
5642 switch (len) {
5643 case 2:
5644 if (strEQ(d,"do")) return KEY_do;
5645 break;
5646 case 3:
a0d0e21e 5647 if (strEQ(d,"die")) return -KEY_die;
79072805
LW
5648 break;
5649 case 4:
a0d0e21e 5650 if (strEQ(d,"dump")) return -KEY_dump;
79072805
LW
5651 break;
5652 case 6:
5653 if (strEQ(d,"delete")) return KEY_delete;
5654 break;
5655 case 7:
5656 if (strEQ(d,"defined")) return KEY_defined;
a0d0e21e 5657 if (strEQ(d,"dbmopen")) return -KEY_dbmopen;
79072805
LW
5658 break;
5659 case 8:
a0d0e21e 5660 if (strEQ(d,"dbmclose")) return -KEY_dbmclose;
79072805
LW
5661 break;
5662 }
5663 break;
5664 case 'E':
79072805
LW
5665 if (strEQ(d,"END")) return KEY_END;
5666 break;
5667 case 'e':
5668 switch (len) {
5669 case 2:
a0d0e21e 5670 if (strEQ(d,"eq")) return -KEY_eq;
79072805
LW
5671 break;
5672 case 3:
a0d0e21e 5673 if (strEQ(d,"eof")) return -KEY_eof;
c963b151 5674 if (strEQ(d,"err")) return -KEY_err;
a0d0e21e 5675 if (strEQ(d,"exp")) return -KEY_exp;
79072805
LW
5676 break;
5677 case 4:
5678 if (strEQ(d,"else")) return KEY_else;
a0d0e21e 5679 if (strEQ(d,"exit")) return -KEY_exit;
79072805 5680 if (strEQ(d,"eval")) return KEY_eval;
a0d0e21e 5681 if (strEQ(d,"exec")) return -KEY_exec;
3a6a8333 5682 if (strEQ(d,"each")) return -KEY_each;
79072805
LW
5683 break;
5684 case 5:
5685 if (strEQ(d,"elsif")) return KEY_elsif;
5686 break;
a0d0e21e
LW
5687 case 6:
5688 if (strEQ(d,"exists")) return KEY_exists;
56da5a46
RGS
5689 if (strEQ(d,"elseif") && ckWARN_d(WARN_SYNTAX))
5690 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5691 "elseif should be elsif");
a0d0e21e 5692 break;
79072805 5693 case 8:
a0d0e21e
LW
5694 if (strEQ(d,"endgrent")) return -KEY_endgrent;
5695 if (strEQ(d,"endpwent")) return -KEY_endpwent;
79072805
LW
5696 break;
5697 case 9:
a0d0e21e 5698 if (strEQ(d,"endnetent")) return -KEY_endnetent;
79072805
LW
5699 break;
5700 case 10:
a0d0e21e
LW
5701 if (strEQ(d,"endhostent")) return -KEY_endhostent;
5702 if (strEQ(d,"endservent")) return -KEY_endservent;
79072805
LW
5703 break;
5704 case 11:
a0d0e21e 5705 if (strEQ(d,"endprotoent")) return -KEY_endprotoent;
79072805 5706 break;
a687059c 5707 }
a687059c 5708 break;
79072805
LW
5709 case 'f':
5710 switch (len) {
5711 case 3:
5712 if (strEQ(d,"for")) return KEY_for;
5713 break;
5714 case 4:
a0d0e21e 5715 if (strEQ(d,"fork")) return -KEY_fork;
79072805
LW
5716 break;
5717 case 5:
a0d0e21e
LW
5718 if (strEQ(d,"fcntl")) return -KEY_fcntl;
5719 if (strEQ(d,"flock")) return -KEY_flock;
79072805
LW
5720 break;
5721 case 6:
5722 if (strEQ(d,"format")) return KEY_format;
a0d0e21e 5723 if (strEQ(d,"fileno")) return -KEY_fileno;
79072805
LW
5724 break;
5725 case 7:
5726 if (strEQ(d,"foreach")) return KEY_foreach;
5727 break;
5728 case 8:
a0d0e21e 5729 if (strEQ(d,"formline")) return -KEY_formline;
79072805 5730 break;
378cc40b 5731 }
a687059c 5732 break;
79072805 5733 case 'g':
a687059c
LW
5734 if (strnEQ(d,"get",3)) {
5735 d += 3;
5736 if (*d == 'p') {
79072805
LW
5737 switch (len) {
5738 case 7:
a0d0e21e
LW
5739 if (strEQ(d,"ppid")) return -KEY_getppid;
5740 if (strEQ(d,"pgrp")) return -KEY_getpgrp;
79072805
LW
5741 break;
5742 case 8:
a0d0e21e
LW
5743 if (strEQ(d,"pwent")) return -KEY_getpwent;
5744 if (strEQ(d,"pwnam")) return -KEY_getpwnam;
5745 if (strEQ(d,"pwuid")) return -KEY_getpwuid;
79072805
LW
5746 break;
5747 case 11:
a0d0e21e
LW
5748 if (strEQ(d,"peername")) return -KEY_getpeername;
5749 if (strEQ(d,"protoent")) return -KEY_getprotoent;
5750 if (strEQ(d,"priority")) return -KEY_getpriority;
79072805
LW
5751 break;
5752 case 14:
a0d0e21e 5753 if (strEQ(d,"protobyname")) return -KEY_getprotobyname;
79072805
LW
5754 break;
5755 case 16:
a0d0e21e 5756 if (strEQ(d,"protobynumber"))return -KEY_getprotobynumber;
79072805
LW
5757 break;
5758 }
a687059c
LW
5759 }
5760 else if (*d == 'h') {
a0d0e21e
LW
5761 if (strEQ(d,"hostbyname")) return -KEY_gethostbyname;
5762 if (strEQ(d,"hostbyaddr")) return -KEY_gethostbyaddr;
5763 if (strEQ(d,"hostent")) return -KEY_gethostent;
a687059c
LW
5764 }
5765 else if (*d == 'n') {
a0d0e21e
LW
5766 if (strEQ(d,"netbyname")) return -KEY_getnetbyname;
5767 if (strEQ(d,"netbyaddr")) return -KEY_getnetbyaddr;
5768 if (strEQ(d,"netent")) return -KEY_getnetent;
a687059c
LW
5769 }
5770 else if (*d == 's') {
a0d0e21e
LW
5771 if (strEQ(d,"servbyname")) return -KEY_getservbyname;
5772 if (strEQ(d,"servbyport")) return -KEY_getservbyport;
5773 if (strEQ(d,"servent")) return -KEY_getservent;
5774 if (strEQ(d,"sockname")) return -KEY_getsockname;
5775 if (strEQ(d,"sockopt")) return -KEY_getsockopt;
a687059c
LW
5776 }
5777 else if (*d == 'g') {
a0d0e21e
LW
5778 if (strEQ(d,"grent")) return -KEY_getgrent;
5779 if (strEQ(d,"grnam")) return -KEY_getgrnam;
5780 if (strEQ(d,"grgid")) return -KEY_getgrgid;
a687059c
LW
5781 }
5782 else if (*d == 'l') {
a0d0e21e 5783 if (strEQ(d,"login")) return -KEY_getlogin;
a687059c 5784 }
a0d0e21e 5785 else if (strEQ(d,"c")) return -KEY_getc;
79072805 5786 break;
a687059c 5787 }
79072805
LW
5788 switch (len) {
5789 case 2:
a0d0e21e
LW
5790 if (strEQ(d,"gt")) return -KEY_gt;
5791 if (strEQ(d,"ge")) return -KEY_ge;
79072805
LW
5792 break;
5793 case 4:
5794 if (strEQ(d,"grep")) return KEY_grep;
5795 if (strEQ(d,"goto")) return KEY_goto;
fb73857a 5796 if (strEQ(d,"glob")) return KEY_glob;
79072805
LW
5797 break;
5798 case 6:
a0d0e21e 5799 if (strEQ(d,"gmtime")) return -KEY_gmtime;
79072805 5800 break;
378cc40b 5801 }
a687059c 5802 break;
79072805 5803 case 'h':
a0d0e21e 5804 if (strEQ(d,"hex")) return -KEY_hex;
a687059c 5805 break;
7d07dbc2
MB
5806 case 'I':
5807 if (strEQ(d,"INIT")) return KEY_INIT;
5808 break;
79072805
LW
5809 case 'i':
5810 switch (len) {
5811 case 2:
5812 if (strEQ(d,"if")) return KEY_if;
5813 break;
5814 case 3:
a0d0e21e 5815 if (strEQ(d,"int")) return -KEY_int;
79072805
LW
5816 break;
5817 case 5:
a0d0e21e
LW
5818 if (strEQ(d,"index")) return -KEY_index;
5819 if (strEQ(d,"ioctl")) return -KEY_ioctl;
79072805
LW
5820 break;
5821 }
a687059c 5822 break;
79072805 5823 case 'j':
a0d0e21e 5824 if (strEQ(d,"join")) return -KEY_join;
a687059c 5825 break;
79072805
LW
5826 case 'k':
5827 if (len == 4) {
3a6a8333 5828 if (strEQ(d,"keys")) return -KEY_keys;
a0d0e21e 5829 if (strEQ(d,"kill")) return -KEY_kill;
663a0e37 5830 }
79072805 5831 break;
79072805
LW
5832 case 'l':
5833 switch (len) {
5834 case 2:
a0d0e21e
LW
5835 if (strEQ(d,"lt")) return -KEY_lt;
5836 if (strEQ(d,"le")) return -KEY_le;
5837 if (strEQ(d,"lc")) return -KEY_lc;
79072805
LW
5838 break;
5839 case 3:
a0d0e21e 5840 if (strEQ(d,"log")) return -KEY_log;
79072805
LW
5841 break;
5842 case 4:
5843 if (strEQ(d,"last")) return KEY_last;
a0d0e21e 5844 if (strEQ(d,"link")) return -KEY_link;
c0329465 5845 if (strEQ(d,"lock")) return -KEY_lock;
395c3793 5846 break;
79072805
LW
5847 case 5:
5848 if (strEQ(d,"local")) return KEY_local;
a0d0e21e 5849 if (strEQ(d,"lstat")) return -KEY_lstat;
79072805
LW
5850 break;
5851 case 6:
a0d0e21e
LW
5852 if (strEQ(d,"length")) return -KEY_length;
5853 if (strEQ(d,"listen")) return -KEY_listen;
79072805
LW
5854 break;
5855 case 7:
a0d0e21e 5856 if (strEQ(d,"lcfirst")) return -KEY_lcfirst;
79072805
LW
5857 break;
5858 case 9:
a0d0e21e 5859 if (strEQ(d,"localtime")) return -KEY_localtime;
395c3793
LW
5860 break;
5861 }
a687059c 5862 break;
79072805
LW
5863 case 'm':
5864 switch (len) {
5865 case 1: return KEY_m;
93a17b20
LW
5866 case 2:
5867 if (strEQ(d,"my")) return KEY_my;
5868 break;
a0d0e21e
LW
5869 case 3:
5870 if (strEQ(d,"map")) return KEY_map;
5871 break;
79072805 5872 case 5:
a0d0e21e 5873 if (strEQ(d,"mkdir")) return -KEY_mkdir;
79072805
LW
5874 break;
5875 case 6:
a0d0e21e
LW
5876 if (strEQ(d,"msgctl")) return -KEY_msgctl;
5877 if (strEQ(d,"msgget")) return -KEY_msgget;
5878 if (strEQ(d,"msgrcv")) return -KEY_msgrcv;
5879 if (strEQ(d,"msgsnd")) return -KEY_msgsnd;
79072805
LW
5880 break;
5881 }
a687059c 5882 break;
79072805
LW
5883 case 'n':
5884 if (strEQ(d,"next")) return KEY_next;
a0d0e21e
LW
5885 if (strEQ(d,"ne")) return -KEY_ne;
5886 if (strEQ(d,"not")) return -KEY_not;
5887 if (strEQ(d,"no")) return KEY_no;
a687059c 5888 break;
79072805
LW
5889 case 'o':
5890 switch (len) {
463ee0b2 5891 case 2:
a0d0e21e 5892 if (strEQ(d,"or")) return -KEY_or;
463ee0b2 5893 break;
79072805 5894 case 3:
a0d0e21e
LW
5895 if (strEQ(d,"ord")) return -KEY_ord;
5896 if (strEQ(d,"oct")) return -KEY_oct;
77ca0c92 5897 if (strEQ(d,"our")) return KEY_our;
79072805
LW
5898 break;
5899 case 4:
a0d0e21e 5900 if (strEQ(d,"open")) return -KEY_open;
79072805
LW
5901 break;
5902 case 7:
a0d0e21e 5903 if (strEQ(d,"opendir")) return -KEY_opendir;
79072805 5904 break;
fe14fcc3 5905 }
a687059c 5906 break;
79072805
LW
5907 case 'p':
5908 switch (len) {
5909 case 3:
4e553d73 5910 if (strEQ(d,"pop")) return -KEY_pop;
a0d0e21e 5911 if (strEQ(d,"pos")) return KEY_pos;
79072805
LW
5912 break;
5913 case 4:
3a6a8333 5914 if (strEQ(d,"push")) return -KEY_push;
a0d0e21e
LW
5915 if (strEQ(d,"pack")) return -KEY_pack;
5916 if (strEQ(d,"pipe")) return -KEY_pipe;
79072805
LW
5917 break;
5918 case 5:
5919 if (strEQ(d,"print")) return KEY_print;
5920 break;
5921 case 6:
5922 if (strEQ(d,"printf")) return KEY_printf;
5923 break;
5924 case 7:
5925 if (strEQ(d,"package")) return KEY_package;
5926 break;
c07a80fd 5927 case 9:
5928 if (strEQ(d,"prototype")) return KEY_prototype;
663a0e37 5929 }
79072805
LW
5930 break;
5931 case 'q':
5932 if (len <= 2) {
5933 if (strEQ(d,"q")) return KEY_q;
8782bef2 5934 if (strEQ(d,"qr")) return KEY_qr;
79072805 5935 if (strEQ(d,"qq")) return KEY_qq;
8990e307 5936 if (strEQ(d,"qw")) return KEY_qw;
79072805 5937 if (strEQ(d,"qx")) return KEY_qx;
663a0e37 5938 }
a0d0e21e 5939 else if (strEQ(d,"quotemeta")) return -KEY_quotemeta;
79072805
LW
5940 break;
5941 case 'r':
5942 switch (len) {
5943 case 3:
a0d0e21e 5944 if (strEQ(d,"ref")) return -KEY_ref;
79072805
LW
5945 break;
5946 case 4:
a0d0e21e
LW
5947 if (strEQ(d,"read")) return -KEY_read;
5948 if (strEQ(d,"rand")) return -KEY_rand;
5949 if (strEQ(d,"recv")) return -KEY_recv;
79072805
LW
5950 if (strEQ(d,"redo")) return KEY_redo;
5951 break;
5952 case 5:
a0d0e21e
LW
5953 if (strEQ(d,"rmdir")) return -KEY_rmdir;
5954 if (strEQ(d,"reset")) return -KEY_reset;
79072805
LW
5955 break;
5956 case 6:
5957 if (strEQ(d,"return")) return KEY_return;
a0d0e21e
LW
5958 if (strEQ(d,"rename")) return -KEY_rename;
5959 if (strEQ(d,"rindex")) return -KEY_rindex;
79072805
LW
5960 break;
5961 case 7:
ec4ab249 5962 if (strEQ(d,"require")) return KEY_require;
a0d0e21e
LW
5963 if (strEQ(d,"reverse")) return -KEY_reverse;
5964 if (strEQ(d,"readdir")) return -KEY_readdir;
79072805
LW
5965 break;
5966 case 8:
a0d0e21e
LW
5967 if (strEQ(d,"readlink")) return -KEY_readlink;
5968 if (strEQ(d,"readline")) return -KEY_readline;
5969 if (strEQ(d,"readpipe")) return -KEY_readpipe;
79072805
LW
5970 break;
5971 case 9:
a0d0e21e 5972 if (strEQ(d,"rewinddir")) return -KEY_rewinddir;
79072805 5973 break;
a687059c 5974 }
79072805
LW
5975 break;
5976 case 's':
a687059c 5977 switch (d[1]) {
79072805 5978 case 0: return KEY_s;
a687059c 5979 case 'c':
79072805 5980 if (strEQ(d,"scalar")) return KEY_scalar;
a687059c
LW
5981 break;
5982 case 'e':
79072805
LW
5983 switch (len) {
5984 case 4:
a0d0e21e
LW
5985 if (strEQ(d,"seek")) return -KEY_seek;
5986 if (strEQ(d,"send")) return -KEY_send;
79072805
LW
5987 break;
5988 case 5:
a0d0e21e 5989 if (strEQ(d,"semop")) return -KEY_semop;
79072805
LW
5990 break;
5991 case 6:
a0d0e21e
LW
5992 if (strEQ(d,"select")) return -KEY_select;
5993 if (strEQ(d,"semctl")) return -KEY_semctl;
5994 if (strEQ(d,"semget")) return -KEY_semget;
79072805
LW
5995 break;
5996 case 7:
a0d0e21e
LW
5997 if (strEQ(d,"setpgrp")) return -KEY_setpgrp;
5998 if (strEQ(d,"seekdir")) return -KEY_seekdir;
79072805
LW
5999 break;
6000 case 8:
a0d0e21e
LW
6001 if (strEQ(d,"setpwent")) return -KEY_setpwent;
6002 if (strEQ(d,"setgrent")) return -KEY_setgrent;
79072805
LW
6003 break;
6004 case 9:
a0d0e21e 6005 if (strEQ(d,"setnetent")) return -KEY_setnetent;
79072805
LW
6006 break;
6007 case 10:
a0d0e21e
LW
6008 if (strEQ(d,"setsockopt")) return -KEY_setsockopt;
6009 if (strEQ(d,"sethostent")) return -KEY_sethostent;
6010 if (strEQ(d,"setservent")) return -KEY_setservent;
79072805
LW
6011 break;
6012 case 11:
a0d0e21e
LW
6013 if (strEQ(d,"setpriority")) return -KEY_setpriority;
6014 if (strEQ(d,"setprotoent")) return -KEY_setprotoent;
79072805
LW
6015 break;
6016 }
a687059c
LW
6017 break;
6018 case 'h':
79072805
LW
6019 switch (len) {
6020 case 5:
3a6a8333 6021 if (strEQ(d,"shift")) return -KEY_shift;
79072805
LW
6022 break;
6023 case 6:
a0d0e21e
LW
6024 if (strEQ(d,"shmctl")) return -KEY_shmctl;
6025 if (strEQ(d,"shmget")) return -KEY_shmget;
79072805
LW
6026 break;
6027 case 7:
a0d0e21e 6028 if (strEQ(d,"shmread")) return -KEY_shmread;
79072805
LW
6029 break;
6030 case 8:
a0d0e21e
LW
6031 if (strEQ(d,"shmwrite")) return -KEY_shmwrite;
6032 if (strEQ(d,"shutdown")) return -KEY_shutdown;
79072805
LW
6033 break;
6034 }
a687059c
LW
6035 break;
6036 case 'i':
a0d0e21e 6037 if (strEQ(d,"sin")) return -KEY_sin;
a687059c
LW
6038 break;
6039 case 'l':
a0d0e21e 6040 if (strEQ(d,"sleep")) return -KEY_sleep;
a687059c
LW
6041 break;
6042 case 'o':
79072805 6043 if (strEQ(d,"sort")) return KEY_sort;
a0d0e21e
LW
6044 if (strEQ(d,"socket")) return -KEY_socket;
6045 if (strEQ(d,"socketpair")) return -KEY_socketpair;
a687059c
LW
6046 break;
6047 case 'p':
79072805 6048 if (strEQ(d,"split")) return KEY_split;
a0d0e21e 6049 if (strEQ(d,"sprintf")) return -KEY_sprintf;
3a6a8333 6050 if (strEQ(d,"splice")) return -KEY_splice;
a687059c
LW
6051 break;
6052 case 'q':
a0d0e21e 6053 if (strEQ(d,"sqrt")) return -KEY_sqrt;
a687059c
LW
6054 break;
6055 case 'r':
a0d0e21e 6056 if (strEQ(d,"srand")) return -KEY_srand;
a687059c
LW
6057 break;
6058 case 't':
a0d0e21e 6059 if (strEQ(d,"stat")) return -KEY_stat;
79072805 6060 if (strEQ(d,"study")) return KEY_study;
a687059c
LW
6061 break;
6062 case 'u':
a0d0e21e 6063 if (strEQ(d,"substr")) return -KEY_substr;
79072805 6064 if (strEQ(d,"sub")) return KEY_sub;
a687059c
LW
6065 break;
6066 case 'y':
79072805
LW
6067 switch (len) {
6068 case 6:
a0d0e21e 6069 if (strEQ(d,"system")) return -KEY_system;
79072805
LW
6070 break;
6071 case 7:
a0d0e21e
LW
6072 if (strEQ(d,"symlink")) return -KEY_symlink;
6073 if (strEQ(d,"syscall")) return -KEY_syscall;
137443ea 6074 if (strEQ(d,"sysopen")) return -KEY_sysopen;
6075 if (strEQ(d,"sysread")) return -KEY_sysread;
6076 if (strEQ(d,"sysseek")) return -KEY_sysseek;
79072805
LW
6077 break;
6078 case 8:
a0d0e21e 6079 if (strEQ(d,"syswrite")) return -KEY_syswrite;
79072805 6080 break;
a687059c 6081 }
a687059c
LW
6082 break;
6083 }
6084 break;
79072805
LW
6085 case 't':
6086 switch (len) {
6087 case 2:
6088 if (strEQ(d,"tr")) return KEY_tr;
6089 break;
463ee0b2
LW
6090 case 3:
6091 if (strEQ(d,"tie")) return KEY_tie;
6092 break;
79072805 6093 case 4:
a0d0e21e 6094 if (strEQ(d,"tell")) return -KEY_tell;
c07a80fd 6095 if (strEQ(d,"tied")) return KEY_tied;
a0d0e21e 6096 if (strEQ(d,"time")) return -KEY_time;
79072805
LW
6097 break;
6098 case 5:
a0d0e21e 6099 if (strEQ(d,"times")) return -KEY_times;
79072805
LW
6100 break;
6101 case 7:
a0d0e21e 6102 if (strEQ(d,"telldir")) return -KEY_telldir;
79072805
LW
6103 break;
6104 case 8:
a0d0e21e 6105 if (strEQ(d,"truncate")) return -KEY_truncate;
79072805 6106 break;
378cc40b 6107 }
a687059c 6108 break;
79072805
LW
6109 case 'u':
6110 switch (len) {
6111 case 2:
a0d0e21e
LW
6112 if (strEQ(d,"uc")) return -KEY_uc;
6113 break;
6114 case 3:
6115 if (strEQ(d,"use")) return KEY_use;
79072805
LW
6116 break;
6117 case 5:
6118 if (strEQ(d,"undef")) return KEY_undef;
6119 if (strEQ(d,"until")) return KEY_until;
463ee0b2 6120 if (strEQ(d,"untie")) return KEY_untie;
a0d0e21e
LW
6121 if (strEQ(d,"utime")) return -KEY_utime;
6122 if (strEQ(d,"umask")) return -KEY_umask;
79072805
LW
6123 break;
6124 case 6:
6125 if (strEQ(d,"unless")) return KEY_unless;
a0d0e21e
LW
6126 if (strEQ(d,"unpack")) return -KEY_unpack;
6127 if (strEQ(d,"unlink")) return -KEY_unlink;
79072805
LW
6128 break;
6129 case 7:
3a6a8333 6130 if (strEQ(d,"unshift")) return -KEY_unshift;
a0d0e21e 6131 if (strEQ(d,"ucfirst")) return -KEY_ucfirst;
79072805 6132 break;
a687059c
LW
6133 }
6134 break;
79072805 6135 case 'v':
a0d0e21e
LW
6136 if (strEQ(d,"values")) return -KEY_values;
6137 if (strEQ(d,"vec")) return -KEY_vec;
a687059c 6138 break;
79072805
LW
6139 case 'w':
6140 switch (len) {
6141 case 4:
a0d0e21e
LW
6142 if (strEQ(d,"warn")) return -KEY_warn;
6143 if (strEQ(d,"wait")) return -KEY_wait;
79072805
LW
6144 break;
6145 case 5:
6146 if (strEQ(d,"while")) return KEY_while;
a0d0e21e 6147 if (strEQ(d,"write")) return -KEY_write;
79072805
LW
6148 break;
6149 case 7:
a0d0e21e 6150 if (strEQ(d,"waitpid")) return -KEY_waitpid;
79072805
LW
6151 break;
6152 case 9:
a0d0e21e 6153 if (strEQ(d,"wantarray")) return -KEY_wantarray;
79072805 6154 break;
2f3197b3 6155 }
a687059c 6156 break;
79072805 6157 case 'x':
a0d0e21e
LW
6158 if (len == 1) return -KEY_x;
6159 if (strEQ(d,"xor")) return -KEY_xor;
a687059c 6160 break;
79072805
LW
6161 case 'y':
6162 if (len == 1) return KEY_y;
6163 break;
6164 case 'z':
a687059c
LW
6165 break;
6166 }
79072805 6167 return 0;
a687059c
LW
6168}
6169
76e3520e 6170STATIC void
cea2e8a9 6171S_checkcomma(pTHX_ register char *s, char *name, char *what)
a687059c 6172{
2f3197b3
LW
6173 char *w;
6174
d008e5eb 6175 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
6176 if (ckWARN(WARN_SYNTAX)) {
6177 int level = 1;
6178 for (w = s+2; *w && level; w++) {
6179 if (*w == '(')
6180 ++level;
6181 else if (*w == ')')
6182 --level;
6183 }
6184 if (*w)
6185 for (; *w && isSPACE(*w); w++) ;
6186 if (!*w || !strchr(";|})]oaiuw!=", *w)) /* an advisory hack only... */
9014280d 6187 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 6188 "%s (...) interpreted as function",name);
d008e5eb 6189 }
2f3197b3 6190 }
3280af22 6191 while (s < PL_bufend && isSPACE(*s))
2f3197b3 6192 s++;
a687059c
LW
6193 if (*s == '(')
6194 s++;
3280af22 6195 while (s < PL_bufend && isSPACE(*s))
a687059c 6196 s++;
7e2040f0 6197 if (isIDFIRST_lazy_if(s,UTF)) {
2f3197b3 6198 w = s++;
7e2040f0 6199 while (isALNUM_lazy_if(s,UTF))
a687059c 6200 s++;
3280af22 6201 while (s < PL_bufend && isSPACE(*s))
a687059c 6202 s++;
e929a76b 6203 if (*s == ',') {
463ee0b2 6204 int kw;
e929a76b 6205 *s = '\0';
864dbfa3 6206 kw = keyword(w, s - w) || get_cv(w, FALSE) != 0;
e929a76b 6207 *s = ',';
463ee0b2 6208 if (kw)
e929a76b 6209 return;
cea2e8a9 6210 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
6211 }
6212 }
6213}
6214
423cee85
JH
6215/* Either returns sv, or mortalizes sv and returns a new SV*.
6216 Best used as sv=new_constant(..., sv, ...).
6217 If s, pv are NULL, calls subroutine with one argument,
6218 and type is used with error messages only. */
6219
b3ac6de7 6220STATIC SV *
dff6d3cd 6221S_new_constant(pTHX_ char *s, STRLEN len, const char *key, SV *sv, SV *pv,
9b0e499b 6222 const char *type)
b3ac6de7 6223{
b3ac6de7 6224 dSP;
3280af22 6225 HV *table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 6226 SV *res;
b3ac6de7
IZ
6227 SV **cvp;
6228 SV *cv, *typesv;
f0af216f 6229 const char *why1, *why2, *why3;
4e553d73 6230
f0af216f 6231 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
6232 SV *msg;
6233
f0af216f 6234 why2 = strEQ(key,"charnames")
41ab332f 6235 ? "(possibly a missing \"use charnames ...\")"
f0af216f 6236 : "";
4e553d73 6237 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
6238 (type ? type: "undef"), why2);
6239
6240 /* This is convoluted and evil ("goto considered harmful")
6241 * but I do not understand the intricacies of all the different
6242 * failure modes of %^H in here. The goal here is to make
6243 * the most probable error message user-friendly. --jhi */
6244
6245 goto msgdone;
6246
423cee85 6247 report:
4e553d73 6248 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 6249 (type ? type: "undef"), why1, why2, why3);
41ab332f 6250 msgdone:
423cee85
JH
6251 yyerror(SvPVX(msg));
6252 SvREFCNT_dec(msg);
6253 return sv;
6254 }
b3ac6de7
IZ
6255 cvp = hv_fetch(table, key, strlen(key), FALSE);
6256 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
6257 why1 = "$^H{";
6258 why2 = key;
f0af216f 6259 why3 = "} is not defined";
423cee85 6260 goto report;
b3ac6de7
IZ
6261 }
6262 sv_2mortal(sv); /* Parent created it permanently */
6263 cv = *cvp;
423cee85
JH
6264 if (!pv && s)
6265 pv = sv_2mortal(newSVpvn(s, len));
6266 if (type && pv)
6267 typesv = sv_2mortal(newSVpv(type, 0));
b3ac6de7 6268 else
423cee85 6269 typesv = &PL_sv_undef;
4e553d73 6270
e788e7d3 6271 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
6272 ENTER ;
6273 SAVETMPS;
4e553d73 6274
423cee85 6275 PUSHMARK(SP) ;
a5845cb7 6276 EXTEND(sp, 3);
423cee85
JH
6277 if (pv)
6278 PUSHs(pv);
b3ac6de7 6279 PUSHs(sv);
423cee85
JH
6280 if (pv)
6281 PUSHs(typesv);
b3ac6de7 6282 PUTBACK;
423cee85 6283 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 6284
423cee85 6285 SPAGAIN ;
4e553d73 6286
423cee85 6287 /* Check the eval first */
9b0e499b 6288 if (!PL_in_eval && SvTRUE(ERRSV)) {
423cee85
JH
6289 STRLEN n_a;
6290 sv_catpv(ERRSV, "Propagated");
6291 yyerror(SvPV(ERRSV, n_a)); /* Duplicates the message inside eval */
e1f15930 6292 (void)POPs;
423cee85
JH
6293 res = SvREFCNT_inc(sv);
6294 }
6295 else {
6296 res = POPs;
e1f15930 6297 (void)SvREFCNT_inc(res);
423cee85 6298 }
4e553d73 6299
423cee85
JH
6300 PUTBACK ;
6301 FREETMPS ;
6302 LEAVE ;
b3ac6de7 6303 POPSTACK;
4e553d73 6304
b3ac6de7 6305 if (!SvOK(res)) {
423cee85
JH
6306 why1 = "Call to &{$^H{";
6307 why2 = key;
f0af216f 6308 why3 = "}} did not return a defined value";
423cee85
JH
6309 sv = res;
6310 goto report;
9b0e499b 6311 }
423cee85 6312
9b0e499b 6313 return res;
b3ac6de7 6314}
4e553d73 6315
76e3520e 6316STATIC char *
cea2e8a9 6317S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2
LW
6318{
6319 register char *d = dest;
8903cb82 6320 register char *e = d + destlen - 3; /* two-character token, ending NUL */
463ee0b2 6321 for (;;) {
8903cb82 6322 if (d >= e)
cea2e8a9 6323 Perl_croak(aTHX_ ident_too_long);
834a4ddd 6324 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 6325 *d++ = *s++;
7e2040f0 6326 else if (*s == '\'' && allow_package && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
6327 *d++ = ':';
6328 *d++ = ':';
6329 s++;
6330 }
c3e0f903 6331 else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
463ee0b2
LW
6332 *d++ = *s++;
6333 *d++ = *s++;
6334 }
fd400ab9 6335 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6336 char *t = s + UTF8SKIP(s);
fd400ab9 6337 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6338 t += UTF8SKIP(t);
6339 if (d + (t - s) > e)
cea2e8a9 6340 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6341 Copy(s, d, t - s, char);
6342 d += t - s;
6343 s = t;
6344 }
463ee0b2
LW
6345 else {
6346 *d = '\0';
6347 *slp = d - dest;
6348 return s;
e929a76b 6349 }
378cc40b
LW
6350 }
6351}
6352
76e3520e 6353STATIC char *
cea2e8a9 6354S_scan_ident(pTHX_ register char *s, register char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b
LW
6355{
6356 register char *d;
8903cb82 6357 register char *e;
79072805 6358 char *bracket = 0;
748a9306 6359 char funny = *s++;
378cc40b 6360
a0d0e21e
LW
6361 if (isSPACE(*s))
6362 s = skipspace(s);
378cc40b 6363 d = dest;
8903cb82 6364 e = d + destlen - 3; /* two-character token, ending NUL */
de3bb511 6365 if (isDIGIT(*s)) {
8903cb82 6366 while (isDIGIT(*s)) {
6367 if (d >= e)
cea2e8a9 6368 Perl_croak(aTHX_ ident_too_long);
378cc40b 6369 *d++ = *s++;
8903cb82 6370 }
378cc40b
LW
6371 }
6372 else {
463ee0b2 6373 for (;;) {
8903cb82 6374 if (d >= e)
cea2e8a9 6375 Perl_croak(aTHX_ ident_too_long);
834a4ddd 6376 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 6377 *d++ = *s++;
7e2040f0 6378 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
6379 *d++ = ':';
6380 *d++ = ':';
6381 s++;
6382 }
a0d0e21e 6383 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
6384 *d++ = *s++;
6385 *d++ = *s++;
6386 }
fd400ab9 6387 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 6388 char *t = s + UTF8SKIP(s);
fd400ab9 6389 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
6390 t += UTF8SKIP(t);
6391 if (d + (t - s) > e)
cea2e8a9 6392 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
6393 Copy(s, d, t - s, char);
6394 d += t - s;
6395 s = t;
6396 }
463ee0b2
LW
6397 else
6398 break;
6399 }
378cc40b
LW
6400 }
6401 *d = '\0';
6402 d = dest;
79072805 6403 if (*d) {
3280af22
NIS
6404 if (PL_lex_state != LEX_NORMAL)
6405 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 6406 return s;
378cc40b 6407 }
748a9306 6408 if (*s == '$' && s[1] &&
7e2040f0 6409 (isALNUM_lazy_if(s+1,UTF) || strchr("${", s[1]) || strnEQ(s+1,"::",2)) )
5cd24f17 6410 {
4810e5ec 6411 return s;
5cd24f17 6412 }
79072805
LW
6413 if (*s == '{') {
6414 bracket = s;
6415 s++;
6416 }
6417 else if (ck_uni)
6418 check_uni();
93a17b20 6419 if (s < send)
79072805
LW
6420 *d = *s++;
6421 d[1] = '\0';
2b92dfce 6422 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 6423 *d = toCTRL(*s);
6424 s++;
de3bb511 6425 }
79072805 6426 if (bracket) {
748a9306 6427 if (isSPACE(s[-1])) {
fa83b5b6 6428 while (s < send) {
6429 char ch = *s++;
bf4acbe4 6430 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 6431 *d = ch;
6432 break;
6433 }
6434 }
748a9306 6435 }
7e2040f0 6436 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 6437 d++;
a0ed51b3
LW
6438 if (UTF) {
6439 e = s;
155aba94 6440 while ((e < send && isALNUM_lazy_if(e,UTF)) || *e == ':') {
a0ed51b3 6441 e += UTF8SKIP(e);
fd400ab9 6442 while (e < send && UTF8_IS_CONTINUED(*e) && is_utf8_mark((U8*)e))
a0ed51b3
LW
6443 e += UTF8SKIP(e);
6444 }
6445 Copy(s, d, e - s, char);
6446 d += e - s;
6447 s = e;
6448 }
6449 else {
2b92dfce 6450 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 6451 *d++ = *s++;
2b92dfce 6452 if (d >= e)
cea2e8a9 6453 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 6454 }
79072805 6455 *d = '\0';
bf4acbe4 6456 while (s < send && SPACE_OR_TAB(*s)) s++;
ff68c719 6457 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
599cee73 6458 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
dff6d3cd 6459 const char *brack = *s == '[' ? "[...]" : "{...}";
9014280d 6460 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 6461 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
6462 funny, dest, brack, funny, dest, brack);
6463 }
79072805 6464 bracket++;
a0be28da 6465 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
6466 return s;
6467 }
4e553d73
NIS
6468 }
6469 /* Handle extended ${^Foo} variables
2b92dfce
GS
6470 * 1999-02-27 mjd-perl-patch@plover.com */
6471 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
6472 && isALNUM(*s))
6473 {
6474 d++;
6475 while (isALNUM(*s) && d < e) {
6476 *d++ = *s++;
6477 }
6478 if (d >= e)
cea2e8a9 6479 Perl_croak(aTHX_ ident_too_long);
2b92dfce 6480 *d = '\0';
79072805
LW
6481 }
6482 if (*s == '}') {
6483 s++;
7df0d042 6484 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 6485 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
6486 PL_expect = XREF;
6487 }
748a9306
LW
6488 if (funny == '#')
6489 funny = '@';
d008e5eb 6490 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 6491 if (ckWARN(WARN_AMBIGUOUS) &&
864dbfa3 6492 (keyword(dest, d - dest) || get_cv(dest, FALSE)))
d008e5eb 6493 {
9014280d 6494 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
6495 "Ambiguous use of %c{%s} resolved to %c%s",
6496 funny, dest, funny, dest);
6497 }
6498 }
79072805
LW
6499 }
6500 else {
6501 s = bracket; /* let the parser handle it */
93a17b20 6502 *dest = '\0';
79072805
LW
6503 }
6504 }
3280af22
NIS
6505 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
6506 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
6507 return s;
6508}
6509
cea2e8a9 6510void
2b36a5a0 6511Perl_pmflag(pTHX_ U32* pmfl, int ch)
a0d0e21e 6512{
bbce6d69 6513 if (ch == 'i')
a0d0e21e 6514 *pmfl |= PMf_FOLD;
a0d0e21e
LW
6515 else if (ch == 'g')
6516 *pmfl |= PMf_GLOBAL;
c90c0ff4 6517 else if (ch == 'c')
6518 *pmfl |= PMf_CONTINUE;
a0d0e21e
LW
6519 else if (ch == 'o')
6520 *pmfl |= PMf_KEEP;
6521 else if (ch == 'm')
6522 *pmfl |= PMf_MULTILINE;
6523 else if (ch == 's')
6524 *pmfl |= PMf_SINGLELINE;
6525 else if (ch == 'x')
6526 *pmfl |= PMf_EXTENDED;
6527}
378cc40b 6528
76e3520e 6529STATIC char *
cea2e8a9 6530S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 6531{
79072805
LW
6532 PMOP *pm;
6533 char *s;
378cc40b 6534
09bef843 6535 s = scan_str(start,FALSE,FALSE);
37fd879b 6536 if (!s)
cea2e8a9 6537 Perl_croak(aTHX_ "Search pattern not terminated");
bbce6d69 6538
8782bef2 6539 pm = (PMOP*)newPMOP(type, 0);
3280af22 6540 if (PL_multi_open == '?')
79072805 6541 pm->op_pmflags |= PMf_ONCE;
8782bef2
GB
6542 if(type == OP_QR) {
6543 while (*s && strchr("iomsx", *s))
6544 pmflag(&pm->op_pmflags,*s++);
6545 }
6546 else {
6547 while (*s && strchr("iogcmsx", *s))
6548 pmflag(&pm->op_pmflags,*s++);
6549 }
4ac733c9
MJD
6550 /* issue a warning if /c is specified,but /g is not */
6551 if (ckWARN(WARN_REGEXP) &&
6552 (pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
6553 {
6554 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_without_g);
6555 }
6556
4633a7c4 6557 pm->op_pmpermflags = pm->op_pmflags;
bbce6d69 6558
3280af22 6559 PL_lex_op = (OP*)pm;
79072805 6560 yylval.ival = OP_MATCH;
378cc40b
LW
6561 return s;
6562}
6563
76e3520e 6564STATIC char *
cea2e8a9 6565S_scan_subst(pTHX_ char *start)
79072805 6566{
a0d0e21e 6567 register char *s;
79072805 6568 register PMOP *pm;
4fdae800 6569 I32 first_start;
79072805
LW
6570 I32 es = 0;
6571
79072805
LW
6572 yylval.ival = OP_NULL;
6573
09bef843 6574 s = scan_str(start,FALSE,FALSE);
79072805 6575
37fd879b 6576 if (!s)
cea2e8a9 6577 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 6578
3280af22 6579 if (s[-1] == PL_multi_open)
79072805
LW
6580 s--;
6581
3280af22 6582 first_start = PL_multi_start;
09bef843 6583 s = scan_str(s,FALSE,FALSE);
79072805 6584 if (!s) {
37fd879b 6585 if (PL_lex_stuff) {
3280af22 6586 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6587 PL_lex_stuff = Nullsv;
6588 }
cea2e8a9 6589 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 6590 }
3280af22 6591 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 6592
79072805 6593 pm = (PMOP*)newPMOP(OP_SUBST, 0);
48c036b1 6594 while (*s) {
a687059c
LW
6595 if (*s == 'e') {
6596 s++;
2f3197b3 6597 es++;
a687059c 6598 }
b3eb6a9b 6599 else if (strchr("iogcmsx", *s))
a0d0e21e 6600 pmflag(&pm->op_pmflags,*s++);
48c036b1
GS
6601 else
6602 break;
378cc40b 6603 }
79072805 6604
64e578a2
MJD
6605 /* /c is not meaningful with s/// */
6606 if (ckWARN(WARN_REGEXP) && (pm->op_pmflags & PMf_CONTINUE))
4ac733c9 6607 {
64e578a2 6608 Perl_warner(aTHX_ packWARN(WARN_REGEXP), c_in_subst);
4ac733c9
MJD
6609 }
6610
79072805
LW
6611 if (es) {
6612 SV *repl;
0244c3a4
GS
6613 PL_sublex_info.super_bufptr = s;
6614 PL_sublex_info.super_bufend = PL_bufend;
6615 PL_multi_end = 0;
79072805 6616 pm->op_pmflags |= PMf_EVAL;
79cb57f6 6617 repl = newSVpvn("",0);
463ee0b2 6618 while (es-- > 0)
a0d0e21e 6619 sv_catpv(repl, es ? "eval " : "do ");
79072805 6620 sv_catpvn(repl, "{ ", 2);
3280af22 6621 sv_catsv(repl, PL_lex_repl);
79072805 6622 sv_catpvn(repl, " };", 2);
25da4f38 6623 SvEVALED_on(repl);
3280af22
NIS
6624 SvREFCNT_dec(PL_lex_repl);
6625 PL_lex_repl = repl;
378cc40b 6626 }
79072805 6627
4633a7c4 6628 pm->op_pmpermflags = pm->op_pmflags;
3280af22 6629 PL_lex_op = (OP*)pm;
79072805 6630 yylval.ival = OP_SUBST;
378cc40b
LW
6631 return s;
6632}
6633
76e3520e 6634STATIC char *
cea2e8a9 6635S_scan_trans(pTHX_ char *start)
378cc40b 6636{
a0d0e21e 6637 register char* s;
11343788 6638 OP *o;
79072805
LW
6639 short *tbl;
6640 I32 squash;
a0ed51b3 6641 I32 del;
79072805
LW
6642 I32 complement;
6643
6644 yylval.ival = OP_NULL;
6645
09bef843 6646 s = scan_str(start,FALSE,FALSE);
37fd879b 6647 if (!s)
cea2e8a9 6648 Perl_croak(aTHX_ "Transliteration pattern not terminated");
3280af22 6649 if (s[-1] == PL_multi_open)
2f3197b3
LW
6650 s--;
6651
09bef843 6652 s = scan_str(s,FALSE,FALSE);
79072805 6653 if (!s) {
37fd879b 6654 if (PL_lex_stuff) {
3280af22 6655 SvREFCNT_dec(PL_lex_stuff);
37fd879b
HS
6656 PL_lex_stuff = Nullsv;
6657 }
cea2e8a9 6658 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 6659 }
79072805 6660
a0ed51b3 6661 complement = del = squash = 0;
6940069f 6662 while (strchr("cds", *s)) {
395c3793 6663 if (*s == 'c')
79072805 6664 complement = OPpTRANS_COMPLEMENT;
395c3793 6665 else if (*s == 'd')
a0ed51b3
LW
6666 del = OPpTRANS_DELETE;
6667 else if (*s == 's')
79072805 6668 squash = OPpTRANS_SQUASH;
395c3793
LW
6669 s++;
6670 }
8973db79
JH
6671
6672 New(803, tbl, complement&&!del?258:256, short);
6673 o = newPVOP(OP_TRANS, 0, (char*)tbl);
59f00321
RGS
6674 o->op_private &= ~OPpTRANS_ALL;
6675 o->op_private |= del|squash|complement|
7948272d
NIS
6676 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
6677 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 6678
3280af22 6679 PL_lex_op = o;
79072805
LW
6680 yylval.ival = OP_TRANS;
6681 return s;
6682}
6683
76e3520e 6684STATIC char *
cea2e8a9 6685S_scan_heredoc(pTHX_ register char *s)
79072805
LW
6686{
6687 SV *herewas;
6688 I32 op_type = OP_SCALAR;
6689 I32 len;
6690 SV *tmpstr;
6691 char term;
6692 register char *d;
fc36a67e 6693 register char *e;
4633a7c4 6694 char *peek;
3280af22 6695 int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
79072805
LW
6696
6697 s += 2;
3280af22
NIS
6698 d = PL_tokenbuf;
6699 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 6700 if (!outer)
79072805 6701 *d++ = '\n';
bf4acbe4 6702 for (peek = s; SPACE_OR_TAB(*peek); peek++) ;
4633a7c4
LW
6703 if (*peek && strchr("`'\"",*peek)) {
6704 s = peek;
79072805 6705 term = *s++;
3280af22 6706 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 6707 d += len;
3280af22 6708 if (s < PL_bufend)
79072805 6709 s++;
79072805
LW
6710 }
6711 else {
6712 if (*s == '\\')
6713 s++, term = '\'';
6714 else
6715 term = '"';
7e2040f0 6716 if (!isALNUM_lazy_if(s,UTF))
12bcd1a6 6717 deprecate_old("bare << to mean <<\"\"");
7e2040f0 6718 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 6719 if (d < e)
6720 *d++ = *s;
6721 }
6722 }
3280af22 6723 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 6724 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
6725 *d++ = '\n';
6726 *d = '\0';
3280af22 6727 len = d - PL_tokenbuf;
6a27c188 6728#ifndef PERL_STRICT_CR
f63a84b2
LW
6729 d = strchr(s, '\r');
6730 if (d) {
6731 char *olds = s;
6732 s = d;
3280af22 6733 while (s < PL_bufend) {
f63a84b2
LW
6734 if (*s == '\r') {
6735 *d++ = '\n';
6736 if (*++s == '\n')
6737 s++;
6738 }
6739 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
6740 *d++ = *s++;
6741 s++;
6742 }
6743 else
6744 *d++ = *s++;
6745 }
6746 *d = '\0';
3280af22
NIS
6747 PL_bufend = d;
6748 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2
LW
6749 s = olds;
6750 }
6751#endif
79072805 6752 d = "\n";
3280af22 6753 if (outer || !(d=ninstr(s,PL_bufend,d,d+1)))
79cb57f6 6754 herewas = newSVpvn(s,PL_bufend-s);
79072805 6755 else
79cb57f6 6756 s--, herewas = newSVpvn(s,d-s);
79072805 6757 s += SvCUR(herewas);
748a9306 6758
8d6dde3e 6759 tmpstr = NEWSV(87,79);
748a9306
LW
6760 sv_upgrade(tmpstr, SVt_PVIV);
6761 if (term == '\'') {
79072805 6762 op_type = OP_CONST;
748a9306
LW
6763 SvIVX(tmpstr) = -1;
6764 }
6765 else if (term == '`') {
79072805 6766 op_type = OP_BACKTICK;
748a9306
LW
6767 SvIVX(tmpstr) = '\\';
6768 }
79072805
LW
6769
6770 CLINE;
57843af0 6771 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
6772 PL_multi_open = PL_multi_close = '<';
6773 term = *PL_tokenbuf;
0244c3a4
GS
6774 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6775 char *bufptr = PL_sublex_info.super_bufptr;
6776 char *bufend = PL_sublex_info.super_bufend;
6777 char *olds = s - SvCUR(herewas);
6778 s = strchr(bufptr, '\n');
6779 if (!s)
6780 s = bufend;
6781 d = s;
6782 while (s < bufend &&
6783 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
6784 if (*s++ == '\n')
57843af0 6785 CopLINE_inc(PL_curcop);
0244c3a4
GS
6786 }
6787 if (s >= bufend) {
eb160463 6788 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
6789 missingterm(PL_tokenbuf);
6790 }
6791 sv_setpvn(herewas,bufptr,d-bufptr+1);
6792 sv_setpvn(tmpstr,d+1,s-d);
6793 s += len - 1;
6794 sv_catpvn(herewas,s,bufend-s);
689badd5 6795 Copy(SvPVX(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
6796
6797 s = olds;
6798 goto retval;
6799 }
6800 else if (!outer) {
79072805 6801 d = s;
3280af22
NIS
6802 while (s < PL_bufend &&
6803 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 6804 if (*s++ == '\n')
57843af0 6805 CopLINE_inc(PL_curcop);
79072805 6806 }
3280af22 6807 if (s >= PL_bufend) {
eb160463 6808 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 6809 missingterm(PL_tokenbuf);
79072805
LW
6810 }
6811 sv_setpvn(tmpstr,d+1,s-d);
6812 s += len - 1;
57843af0 6813 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 6814
3280af22
NIS
6815 sv_catpvn(herewas,s,PL_bufend-s);
6816 sv_setsv(PL_linestr,herewas);
6817 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
6818 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6819 PL_last_lop = PL_last_uni = Nullch;
79072805
LW
6820 }
6821 else
6822 sv_setpvn(tmpstr,"",0); /* avoid "uninitialized" warning */
3280af22 6823 while (s >= PL_bufend) { /* multiple line string? */
fd2d0953 6824 if (!outer ||
3280af22 6825 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
eb160463 6826 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 6827 missingterm(PL_tokenbuf);
79072805 6828 }
57843af0 6829 CopLINE_inc(PL_curcop);
3280af22 6830 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 6831 PL_last_lop = PL_last_uni = Nullch;
6a27c188 6832#ifndef PERL_STRICT_CR
3280af22 6833 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
6834 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
6835 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 6836 {
3280af22
NIS
6837 PL_bufend[-2] = '\n';
6838 PL_bufend--;
6839 SvCUR_set(PL_linestr, PL_bufend - SvPVX(PL_linestr));
f63a84b2 6840 }
3280af22
NIS
6841 else if (PL_bufend[-1] == '\r')
6842 PL_bufend[-1] = '\n';
f63a84b2 6843 }
3280af22
NIS
6844 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
6845 PL_bufend[-1] = '\n';
f63a84b2 6846#endif
3280af22 6847 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
6848 SV *sv = NEWSV(88,0);
6849
93a17b20 6850 sv_upgrade(sv, SVt_PVMG);
3280af22 6851 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
6852 (void)SvIOK_on(sv);
6853 SvIVX(sv) = 0;
57843af0 6854 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
79072805 6855 }
3280af22 6856 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
1de9afcd
RGS
6857 STRLEN off = PL_bufend - 1 - SvPVX(PL_linestr);
6858 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
6859 sv_catsv(PL_linestr,herewas);
6860 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 6861 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
6862 }
6863 else {
3280af22
NIS
6864 s = PL_bufend;
6865 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
6866 }
6867 }
79072805 6868 s++;
0244c3a4 6869retval:
57843af0 6870 PL_multi_end = CopLINE(PL_curcop);
79072805
LW
6871 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
6872 SvLEN_set(tmpstr, SvCUR(tmpstr) + 1);
463ee0b2 6873 Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
79072805 6874 }
8990e307 6875 SvREFCNT_dec(herewas);
2f31ce75
JH
6876 if (!IN_BYTES) {
6877 if (UTF && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
6878 SvUTF8_on(tmpstr);
6879 else if (PL_encoding)
6880 sv_recode_to_utf8(tmpstr, PL_encoding);
6881 }
3280af22 6882 PL_lex_stuff = tmpstr;
79072805
LW
6883 yylval.ival = op_type;
6884 return s;
6885}
6886
02aa26ce
NT
6887/* scan_inputsymbol
6888 takes: current position in input buffer
6889 returns: new position in input buffer
6890 side-effects: yylval and lex_op are set.
6891
6892 This code handles:
6893
6894 <> read from ARGV
6895 <FH> read from filehandle
6896 <pkg::FH> read from package qualified filehandle
6897 <pkg'FH> read from package qualified filehandle
6898 <$fh> read from filehandle in $fh
6899 <*.h> filename glob
6900
6901*/
6902
76e3520e 6903STATIC char *
cea2e8a9 6904S_scan_inputsymbol(pTHX_ char *start)
79072805 6905{
02aa26ce 6906 register char *s = start; /* current position in buffer */
79072805 6907 register char *d;
fc36a67e 6908 register char *e;
1b420867 6909 char *end;
79072805
LW
6910 I32 len;
6911
3280af22
NIS
6912 d = PL_tokenbuf; /* start of temp holding space */
6913 e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
1b420867
GS
6914 end = strchr(s, '\n');
6915 if (!end)
6916 end = PL_bufend;
6917 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
6918
6919 /* die if we didn't have space for the contents of the <>,
1b420867 6920 or if it didn't end, or if we see a newline
02aa26ce
NT
6921 */
6922
3280af22 6923 if (len >= sizeof PL_tokenbuf)
cea2e8a9 6924 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 6925 if (s >= end)
cea2e8a9 6926 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 6927
fc36a67e 6928 s++;
02aa26ce
NT
6929
6930 /* check for <$fh>
6931 Remember, only scalar variables are interpreted as filehandles by
6932 this code. Anything more complex (e.g., <$fh{$num}>) will be
6933 treated as a glob() call.
6934 This code makes use of the fact that except for the $ at the front,
6935 a scalar variable and a filehandle look the same.
6936 */
4633a7c4 6937 if (*d == '$' && d[1]) d++;
02aa26ce
NT
6938
6939 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 6940 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 6941 d++;
02aa26ce
NT
6942
6943 /* If we've tried to read what we allow filehandles to look like, and
6944 there's still text left, then it must be a glob() and not a getline.
6945 Use scan_str to pull out the stuff between the <> and treat it
6946 as nothing more than a string.
6947 */
6948
3280af22 6949 if (d - PL_tokenbuf != len) {
79072805
LW
6950 yylval.ival = OP_GLOB;
6951 set_csh();
09bef843 6952 s = scan_str(start,FALSE,FALSE);
79072805 6953 if (!s)
cea2e8a9 6954 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
6955 return s;
6956 }
395c3793 6957 else {
9b3023bc
RGS
6958 bool readline_overriden = FALSE;
6959 GV *gv_readline = Nullgv;
6960 GV **gvp;
02aa26ce 6961 /* we're in a filehandle read situation */
3280af22 6962 d = PL_tokenbuf;
02aa26ce
NT
6963
6964 /* turn <> into <ARGV> */
79072805 6965 if (!len)
689badd5 6966 Copy("ARGV",d,5,char);
02aa26ce 6967
9b3023bc 6968 /* Check whether readline() is overriden */
ba979b31
NIS
6969 if (((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV))
6970 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 6971 ||
ba979b31 6972 ((gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE))
9b3023bc 6973 && (gv_readline = *gvp) != (GV*)&PL_sv_undef
ba979b31 6974 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
6975 readline_overriden = TRUE;
6976
02aa26ce
NT
6977 /* if <$fh>, create the ops to turn the variable into a
6978 filehandle
6979 */
79072805 6980 if (*d == '$') {
a0d0e21e 6981 I32 tmp;
02aa26ce
NT
6982
6983 /* try to find it in the pad for this block, otherwise find
6984 add symbol table ops
6985 */
11343788 6986 if ((tmp = pad_findmy(d)) != NOT_IN_PAD) {
dd2155a4
DM
6987 if (PAD_COMPNAME_FLAGS(tmp) & SVpad_OUR) {
6988 SV *sym = sv_2mortal(
6989 newSVpv(HvNAME(PAD_COMPNAME_OURSTASH(tmp)),0));
f558d5af
JH
6990 sv_catpvn(sym, "::", 2);
6991 sv_catpv(sym, d+1);
6992 d = SvPVX(sym);
6993 goto intro_sym;
6994 }
6995 else {
6996 OP *o = newOP(OP_PADSV, 0);
6997 o->op_targ = tmp;
9b3023bc
RGS
6998 PL_lex_op = readline_overriden
6999 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
7000 append_elem(OP_LIST, o,
7001 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
7002 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 7003 }
a0d0e21e
LW
7004 }
7005 else {
f558d5af
JH
7006 GV *gv;
7007 ++d;
7008intro_sym:
7009 gv = gv_fetchpv(d,
7010 (PL_in_eval
7011 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 7012 : GV_ADDMULTI),
f558d5af 7013 SVt_PV);
9b3023bc
RGS
7014 PL_lex_op = readline_overriden
7015 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
7016 append_elem(OP_LIST,
7017 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
7018 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
7019 : (OP*)newUNOP(OP_READLINE, 0,
7020 newUNOP(OP_RV2SV, 0,
7021 newGVOP(OP_GV, 0, gv)));
a0d0e21e 7022 }
7c6fadd6
RGS
7023 if (!readline_overriden)
7024 PL_lex_op->op_flags |= OPf_SPECIAL;
f5284f61 7025 /* we created the ops in PL_lex_op, so make yylval.ival a null op */
79072805
LW
7026 yylval.ival = OP_NULL;
7027 }
02aa26ce
NT
7028
7029 /* If it's none of the above, it must be a literal filehandle
7030 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 7031 else {
85e6fe83 7032 GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO);
9b3023bc
RGS
7033 PL_lex_op = readline_overriden
7034 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
7035 append_elem(OP_LIST,
7036 newGVOP(OP_GV, 0, gv),
7037 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
7038 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
79072805
LW
7039 yylval.ival = OP_NULL;
7040 }
7041 }
02aa26ce 7042
79072805
LW
7043 return s;
7044}
7045
02aa26ce
NT
7046
7047/* scan_str
7048 takes: start position in buffer
09bef843
SB
7049 keep_quoted preserve \ on the embedded delimiter(s)
7050 keep_delims preserve the delimiters around the string
02aa26ce
NT
7051 returns: position to continue reading from buffer
7052 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
7053 updates the read buffer.
7054
7055 This subroutine pulls a string out of the input. It is called for:
7056 q single quotes q(literal text)
7057 ' single quotes 'literal text'
7058 qq double quotes qq(interpolate $here please)
7059 " double quotes "interpolate $here please"
7060 qx backticks qx(/bin/ls -l)
7061 ` backticks `/bin/ls -l`
7062 qw quote words @EXPORT_OK = qw( func() $spam )
7063 m// regexp match m/this/
7064 s/// regexp substitute s/this/that/
7065 tr/// string transliterate tr/this/that/
7066 y/// string transliterate y/this/that/
7067 ($*@) sub prototypes sub foo ($)
09bef843 7068 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
7069 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
7070
7071 In most of these cases (all but <>, patterns and transliterate)
7072 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
7073 calls scan_str(). s/// makes yylex() call scan_subst() which calls
7074 scan_str(). tr/// and y/// make yylex() call scan_trans() which
7075 calls scan_str().
4e553d73 7076
02aa26ce
NT
7077 It skips whitespace before the string starts, and treats the first
7078 character as the delimiter. If the delimiter is one of ([{< then
7079 the corresponding "close" character )]}> is used as the closing
7080 delimiter. It allows quoting of delimiters, and if the string has
7081 balanced delimiters ([{<>}]) it allows nesting.
7082
37fd879b
HS
7083 On success, the SV with the resulting string is put into lex_stuff or,
7084 if that is already non-NULL, into lex_repl. The second case occurs only
7085 when parsing the RHS of the special constructs s/// and tr/// (y///).
7086 For convenience, the terminating delimiter character is stuffed into
7087 SvIVX of the SV.
02aa26ce
NT
7088*/
7089
76e3520e 7090STATIC char *
09bef843 7091S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 7092{
02aa26ce
NT
7093 SV *sv; /* scalar value: string */
7094 char *tmps; /* temp string, used for delimiter matching */
7095 register char *s = start; /* current position in the buffer */
7096 register char term; /* terminating character */
7097 register char *to; /* current position in the sv's data */
7098 I32 brackets = 1; /* bracket nesting level */
89491803 7099 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e
IH
7100 I32 termcode; /* terminating char. code */
7101 U8 termstr[UTF8_MAXLEN]; /* terminating string */
7102 STRLEN termlen; /* length of terminating string */
7103 char *last = NULL; /* last position for nesting bracket */
02aa26ce
NT
7104
7105 /* skip space before the delimiter */
fb73857a 7106 if (isSPACE(*s))
7107 s = skipspace(s);
02aa26ce
NT
7108
7109 /* mark where we are, in case we need to report errors */
79072805 7110 CLINE;
02aa26ce
NT
7111
7112 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 7113 term = *s;
220e2d4e
IH
7114 if (!UTF) {
7115 termcode = termstr[0] = term;
7116 termlen = 1;
7117 }
7118 else {
f3b9ce0f 7119 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
7120 Copy(s, termstr, termlen, U8);
7121 if (!UTF8_IS_INVARIANT(term))
7122 has_utf8 = TRUE;
7123 }
b1c7b182 7124
02aa26ce 7125 /* mark where we are */
57843af0 7126 PL_multi_start = CopLINE(PL_curcop);
3280af22 7127 PL_multi_open = term;
02aa26ce
NT
7128
7129 /* find corresponding closing delimiter */
93a17b20 7130 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
7131 termcode = termstr[0] = term = tmps[5];
7132
3280af22 7133 PL_multi_close = term;
79072805 7134
02aa26ce 7135 /* create a new SV to hold the contents. 87 is leak category, I'm
8d6dde3e
IZ
7136 assuming. 79 is the SV's initial length. What a random number. */
7137 sv = NEWSV(87,79);
ed6116ce 7138 sv_upgrade(sv, SVt_PVIV);
220e2d4e 7139 SvIVX(sv) = termcode;
a0d0e21e 7140 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
7141
7142 /* move past delimiter and try to read a complete string */
09bef843 7143 if (keep_delims)
220e2d4e
IH
7144 sv_catpvn(sv, s, termlen);
7145 s += termlen;
93a17b20 7146 for (;;) {
220e2d4e
IH
7147 if (PL_encoding && !UTF) {
7148 bool cont = TRUE;
7149
7150 while (cont) {
7151 int offset = s - SvPVX(PL_linestr);
7152 bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 7153 &offset, (char*)termstr, termlen);
220e2d4e
IH
7154 char *ns = SvPVX(PL_linestr) + offset;
7155 char *svlast = SvEND(sv) - 1;
7156
7157 for (; s < ns; s++) {
7158 if (*s == '\n' && !PL_rsfp)
7159 CopLINE_inc(PL_curcop);
7160 }
7161 if (!found)
7162 goto read_more_line;
7163 else {
7164 /* handle quoted delimiters */
52327caf 7165 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
220e2d4e
IH
7166 char *t;
7167 for (t = svlast-2; t >= SvPVX(sv) && *t == '\\';)
7168 t--;
7169 if ((svlast-1 - t) % 2) {
7170 if (!keep_quoted) {
7171 *(svlast-1) = term;
7172 *svlast = '\0';
7173 SvCUR_set(sv, SvCUR(sv) - 1);
7174 }
7175 continue;
7176 }
7177 }
7178 if (PL_multi_open == PL_multi_close) {
7179 cont = FALSE;
7180 }
7181 else {
7182 char *t, *w;
7183 if (!last)
7184 last = SvPVX(sv);
7185 for (w = t = last; t < svlast; w++, t++) {
7186 /* At here, all closes are "was quoted" one,
7187 so we don't check PL_multi_close. */
7188 if (*t == '\\') {
7189 if (!keep_quoted && *(t+1) == PL_multi_open)
7190 t++;
7191 else
7192 *w++ = *t++;
7193 }
7194 else if (*t == PL_multi_open)
7195 brackets++;
7196
7197 *w = *t;
7198 }
7199 if (w < t) {
7200 *w++ = term;
7201 *w = '\0';
7202 SvCUR_set(sv, w - SvPVX(sv));
7203 }
7204 last = w;
7205 if (--brackets <= 0)
7206 cont = FALSE;
7207 }
7208 }
7209 }
7210 if (!keep_delims) {
7211 SvCUR_set(sv, SvCUR(sv) - 1);
7212 *SvEND(sv) = '\0';
7213 }
7214 break;
7215 }
7216
02aa26ce 7217 /* extend sv if need be */
3280af22 7218 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 7219 /* set 'to' to the next character in the sv's string */
463ee0b2 7220 to = SvPVX(sv)+SvCUR(sv);
09bef843 7221
02aa26ce 7222 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
7223 if (PL_multi_open == PL_multi_close) {
7224 for (; s < PL_bufend; s++,to++) {
02aa26ce 7225 /* embedded newlines increment the current line number */
3280af22 7226 if (*s == '\n' && !PL_rsfp)
57843af0 7227 CopLINE_inc(PL_curcop);
02aa26ce 7228 /* handle quoted delimiters */
3280af22 7229 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 7230 if (!keep_quoted && s[1] == term)
a0d0e21e 7231 s++;
02aa26ce 7232 /* any other quotes are simply copied straight through */
a0d0e21e
LW
7233 else
7234 *to++ = *s++;
7235 }
02aa26ce
NT
7236 /* terminate when run out of buffer (the for() condition), or
7237 have found the terminator */
220e2d4e
IH
7238 else if (*s == term) {
7239 if (termlen == 1)
7240 break;
f3b9ce0f 7241 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
7242 break;
7243 }
63cd0674 7244 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 7245 has_utf8 = TRUE;
93a17b20
LW
7246 *to = *s;
7247 }
7248 }
02aa26ce
NT
7249
7250 /* if the terminator isn't the same as the start character (e.g.,
7251 matched brackets), we have to allow more in the quoting, and
7252 be prepared for nested brackets.
7253 */
93a17b20 7254 else {
02aa26ce 7255 /* read until we run out of string, or we find the terminator */
3280af22 7256 for (; s < PL_bufend; s++,to++) {
02aa26ce 7257 /* embedded newlines increment the line count */
3280af22 7258 if (*s == '\n' && !PL_rsfp)
57843af0 7259 CopLINE_inc(PL_curcop);
02aa26ce 7260 /* backslashes can escape the open or closing characters */
3280af22 7261 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
7262 if (!keep_quoted &&
7263 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
7264 s++;
7265 else
7266 *to++ = *s++;
7267 }
02aa26ce 7268 /* allow nested opens and closes */
3280af22 7269 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 7270 break;
3280af22 7271 else if (*s == PL_multi_open)
93a17b20 7272 brackets++;
63cd0674 7273 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 7274 has_utf8 = TRUE;
93a17b20
LW
7275 *to = *s;
7276 }
7277 }
02aa26ce 7278 /* terminate the copied string and update the sv's end-of-string */
93a17b20 7279 *to = '\0';
463ee0b2 7280 SvCUR_set(sv, to - SvPVX(sv));
93a17b20 7281
02aa26ce
NT
7282 /*
7283 * this next chunk reads more into the buffer if we're not done yet
7284 */
7285
b1c7b182
GS
7286 if (s < PL_bufend)
7287 break; /* handle case where we are done yet :-) */
79072805 7288
6a27c188 7289#ifndef PERL_STRICT_CR
f63a84b2 7290 if (to - SvPVX(sv) >= 2) {
c6f14548
GS
7291 if ((to[-2] == '\r' && to[-1] == '\n') ||
7292 (to[-2] == '\n' && to[-1] == '\r'))
7293 {
f63a84b2
LW
7294 to[-2] = '\n';
7295 to--;
7296 SvCUR_set(sv, to - SvPVX(sv));
7297 }
7298 else if (to[-1] == '\r')
7299 to[-1] = '\n';
7300 }
7301 else if (to - SvPVX(sv) == 1 && to[-1] == '\r')
7302 to[-1] = '\n';
7303#endif
7304
220e2d4e 7305 read_more_line:
02aa26ce
NT
7306 /* if we're out of file, or a read fails, bail and reset the current
7307 line marker so we can report where the unterminated string began
7308 */
3280af22
NIS
7309 if (!PL_rsfp ||
7310 !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
c07a80fd 7311 sv_free(sv);
eb160463 7312 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
79072805
LW
7313 return Nullch;
7314 }
02aa26ce 7315 /* we read a line, so increment our line counter */
57843af0 7316 CopLINE_inc(PL_curcop);
a0ed51b3 7317
02aa26ce 7318 /* update debugger info */
3280af22 7319 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805
LW
7320 SV *sv = NEWSV(88,0);
7321
93a17b20 7322 sv_upgrade(sv, SVt_PVMG);
3280af22 7323 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
7324 (void)SvIOK_on(sv);
7325 SvIVX(sv) = 0;
57843af0 7326 av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop), sv);
395c3793 7327 }
a0ed51b3 7328
3280af22
NIS
7329 /* having changed the buffer, we must update PL_bufend */
7330 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 7331 PL_last_lop = PL_last_uni = Nullch;
378cc40b 7332 }
4e553d73 7333
02aa26ce
NT
7334 /* at this point, we have successfully read the delimited string */
7335
220e2d4e
IH
7336 if (!PL_encoding || UTF) {
7337 if (keep_delims)
7338 sv_catpvn(sv, s, termlen);
7339 s += termlen;
7340 }
7341 if (has_utf8 || PL_encoding)
b1c7b182 7342 SvUTF8_on(sv);
d0063567 7343
57843af0 7344 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
7345
7346 /* if we allocated too much space, give some back */
93a17b20
LW
7347 if (SvCUR(sv) + 5 < SvLEN(sv)) {
7348 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 7349 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 7350 }
02aa26ce
NT
7351
7352 /* decide whether this is the first or second quoted string we've read
7353 for this op
7354 */
4e553d73 7355
3280af22
NIS
7356 if (PL_lex_stuff)
7357 PL_lex_repl = sv;
79072805 7358 else
3280af22 7359 PL_lex_stuff = sv;
378cc40b
LW
7360 return s;
7361}
7362
02aa26ce
NT
7363/*
7364 scan_num
7365 takes: pointer to position in buffer
7366 returns: pointer to new position in buffer
7367 side-effects: builds ops for the constant in yylval.op
7368
7369 Read a number in any of the formats that Perl accepts:
7370
7fd134d9
JH
7371 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
7372 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
7373 0b[01](_?[01])*
7374 0[0-7](_?[0-7])*
7375 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 7376
3280af22 7377 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
7378 thing it reads.
7379
7380 If it reads a number without a decimal point or an exponent, it will
7381 try converting the number to an integer and see if it can do so
7382 without loss of precision.
7383*/
4e553d73 7384
378cc40b 7385char *
b73d6f50 7386Perl_scan_num(pTHX_ char *start, YYSTYPE* lvalp)
378cc40b 7387{
02aa26ce
NT
7388 register char *s = start; /* current position in buffer */
7389 register char *d; /* destination in temp buffer */
7390 register char *e; /* end of temp buffer */
86554af2 7391 NV nv; /* number read, as a double */
a7cb1f99 7392 SV *sv = Nullsv; /* place to put the converted number */
a86a20aa 7393 bool floatit; /* boolean: int or float? */
02aa26ce 7394 char *lastub = 0; /* position of last underbar */
fc36a67e 7395 static char number_too_long[] = "Number too long";
378cc40b 7396
02aa26ce
NT
7397 /* We use the first character to decide what type of number this is */
7398
378cc40b 7399 switch (*s) {
79072805 7400 default:
cea2e8a9 7401 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 7402
02aa26ce 7403 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 7404 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
7405 case '0':
7406 {
02aa26ce
NT
7407 /* variables:
7408 u holds the "number so far"
4f19785b
WSI
7409 shift the power of 2 of the base
7410 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
7411 overflowed was the number more than we can hold?
7412
7413 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
7414 we in octal/hex/binary?" indicator to disallow hex characters
7415 when in octal mode.
02aa26ce 7416 */
9e24b6e2
JH
7417 NV n = 0.0;
7418 UV u = 0;
79072805 7419 I32 shift;
9e24b6e2 7420 bool overflowed = FALSE;
61f33854 7421 bool just_zero = TRUE; /* just plain 0 or binary number? */
9e24b6e2
JH
7422 static NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
7423 static char* bases[5] = { "", "binary", "", "octal",
7424 "hexadecimal" };
7425 static char* Bases[5] = { "", "Binary", "", "Octal",
7426 "Hexadecimal" };
7427 static char *maxima[5] = { "",
7428 "0b11111111111111111111111111111111",
7429 "",
893fe2c2 7430 "037777777777",
9e24b6e2
JH
7431 "0xffffffff" };
7432 char *base, *Base, *max;
378cc40b 7433
02aa26ce 7434 /* check for hex */
378cc40b
LW
7435 if (s[1] == 'x') {
7436 shift = 4;
7437 s += 2;
61f33854 7438 just_zero = FALSE;
4f19785b
WSI
7439 } else if (s[1] == 'b') {
7440 shift = 1;
7441 s += 2;
61f33854 7442 just_zero = FALSE;
378cc40b 7443 }
02aa26ce 7444 /* check for a decimal in disguise */
b78218b7 7445 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 7446 goto decimal;
02aa26ce 7447 /* so it must be octal */
928753ea 7448 else {
378cc40b 7449 shift = 3;
928753ea
JH
7450 s++;
7451 }
7452
7453 if (*s == '_') {
7454 if (ckWARN(WARN_SYNTAX))
9014280d 7455 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7456 "Misplaced _ in number");
7457 lastub = s++;
7458 }
9e24b6e2
JH
7459
7460 base = bases[shift];
7461 Base = Bases[shift];
7462 max = maxima[shift];
02aa26ce 7463
4f19785b 7464 /* read the rest of the number */
378cc40b 7465 for (;;) {
9e24b6e2 7466 /* x is used in the overflow test,
893fe2c2 7467 b is the digit we're adding on. */
9e24b6e2 7468 UV x, b;
55497cff 7469
378cc40b 7470 switch (*s) {
02aa26ce
NT
7471
7472 /* if we don't mention it, we're done */
378cc40b
LW
7473 default:
7474 goto out;
02aa26ce 7475
928753ea 7476 /* _ are ignored -- but warned about if consecutive */
de3bb511 7477 case '_':
928753ea 7478 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
9014280d 7479 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7480 "Misplaced _ in number");
7481 lastub = s++;
de3bb511 7482 break;
02aa26ce
NT
7483
7484 /* 8 and 9 are not octal */
378cc40b 7485 case '8': case '9':
4f19785b 7486 if (shift == 3)
cea2e8a9 7487 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 7488 /* FALL THROUGH */
02aa26ce
NT
7489
7490 /* octal digits */
4f19785b 7491 case '2': case '3': case '4':
378cc40b 7492 case '5': case '6': case '7':
4f19785b 7493 if (shift == 1)
cea2e8a9 7494 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
7495 /* FALL THROUGH */
7496
7497 case '0': case '1':
02aa26ce 7498 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 7499 goto digit;
02aa26ce
NT
7500
7501 /* hex digits */
378cc40b
LW
7502 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
7503 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 7504 /* make sure they said 0x */
378cc40b
LW
7505 if (shift != 4)
7506 goto out;
55497cff 7507 b = (*s++ & 7) + 9;
02aa26ce
NT
7508
7509 /* Prepare to put the digit we have onto the end
7510 of the number so far. We check for overflows.
7511 */
7512
55497cff 7513 digit:
61f33854 7514 just_zero = FALSE;
9e24b6e2
JH
7515 if (!overflowed) {
7516 x = u << shift; /* make room for the digit */
7517
7518 if ((x >> shift) != u
7519 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
7520 overflowed = TRUE;
7521 n = (NV) u;
767a6a26 7522 if (ckWARN_d(WARN_OVERFLOW))
9014280d 7523 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
9e24b6e2
JH
7524 "Integer overflow in %s number",
7525 base);
7526 } else
7527 u = x | b; /* add the digit to the end */
7528 }
7529 if (overflowed) {
7530 n *= nvshift[shift];
7531 /* If an NV has not enough bits in its
7532 * mantissa to represent an UV this summing of
7533 * small low-order numbers is a waste of time
7534 * (because the NV cannot preserve the
7535 * low-order bits anyway): we could just
7536 * remember when did we overflow and in the
7537 * end just multiply n by the right
7538 * amount. */
7539 n += (NV) b;
55497cff 7540 }
378cc40b
LW
7541 break;
7542 }
7543 }
02aa26ce
NT
7544
7545 /* if we get here, we had success: make a scalar value from
7546 the number.
7547 */
378cc40b 7548 out:
928753ea
JH
7549
7550 /* final misplaced underbar check */
7551 if (s[-1] == '_') {
7552 if (ckWARN(WARN_SYNTAX))
9014280d 7553 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
7554 }
7555
79072805 7556 sv = NEWSV(92,0);
9e24b6e2 7557 if (overflowed) {
767a6a26 7558 if (ckWARN(WARN_PORTABLE) && n > 4294967295.0)
9014280d 7559 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
9e24b6e2
JH
7560 "%s number > %s non-portable",
7561 Base, max);
7562 sv_setnv(sv, n);
7563 }
7564 else {
15041a67 7565#if UVSIZE > 4
767a6a26 7566 if (ckWARN(WARN_PORTABLE) && u > 0xffffffff)
9014280d 7567 Perl_warner(aTHX_ packWARN(WARN_PORTABLE),
9e24b6e2
JH
7568 "%s number > %s non-portable",
7569 Base, max);
2cc4c2dc 7570#endif
9e24b6e2
JH
7571 sv_setuv(sv, u);
7572 }
61f33854
RGS
7573 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
7574 sv = new_constant(start, s - start, "integer",
7575 sv, Nullsv, NULL);
7576 else if (PL_hints & HINT_NEW_BINARY)
b3ac6de7 7577 sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
378cc40b
LW
7578 }
7579 break;
02aa26ce
NT
7580
7581 /*
7582 handle decimal numbers.
7583 we're also sent here when we read a 0 as the first digit
7584 */
378cc40b
LW
7585 case '1': case '2': case '3': case '4': case '5':
7586 case '6': case '7': case '8': case '9': case '.':
7587 decimal:
3280af22
NIS
7588 d = PL_tokenbuf;
7589 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 7590 floatit = FALSE;
02aa26ce
NT
7591
7592 /* read next group of digits and _ and copy into d */
de3bb511 7593 while (isDIGIT(*s) || *s == '_') {
4e553d73 7594 /* skip underscores, checking for misplaced ones
02aa26ce
NT
7595 if -w is on
7596 */
93a17b20 7597 if (*s == '_') {
928753ea 7598 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
9014280d 7599 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7600 "Misplaced _ in number");
7601 lastub = s++;
93a17b20 7602 }
fc36a67e 7603 else {
02aa26ce 7604 /* check for end of fixed-length buffer */
fc36a67e 7605 if (d >= e)
cea2e8a9 7606 Perl_croak(aTHX_ number_too_long);
02aa26ce 7607 /* if we're ok, copy the character */
378cc40b 7608 *d++ = *s++;
fc36a67e 7609 }
378cc40b 7610 }
02aa26ce
NT
7611
7612 /* final misplaced underbar check */
928753ea 7613 if (lastub && s == lastub + 1) {
d008e5eb 7614 if (ckWARN(WARN_SYNTAX))
9014280d 7615 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 7616 }
02aa26ce
NT
7617
7618 /* read a decimal portion if there is one. avoid
7619 3..5 being interpreted as the number 3. followed
7620 by .5
7621 */
2f3197b3 7622 if (*s == '.' && s[1] != '.') {
79072805 7623 floatit = TRUE;
378cc40b 7624 *d++ = *s++;
02aa26ce 7625
928753ea
JH
7626 if (*s == '_') {
7627 if (ckWARN(WARN_SYNTAX))
9014280d 7628 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7629 "Misplaced _ in number");
7630 lastub = s;
7631 }
7632
7633 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 7634 */
fc36a67e 7635 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 7636 /* fixed length buffer check */
fc36a67e 7637 if (d >= e)
cea2e8a9 7638 Perl_croak(aTHX_ number_too_long);
928753ea
JH
7639 if (*s == '_') {
7640 if (ckWARN(WARN_SYNTAX) && lastub && s == lastub + 1)
9014280d 7641 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7642 "Misplaced _ in number");
7643 lastub = s;
7644 }
7645 else
fc36a67e 7646 *d++ = *s;
378cc40b 7647 }
928753ea
JH
7648 /* fractional part ending in underbar? */
7649 if (s[-1] == '_') {
7650 if (ckWARN(WARN_SYNTAX))
9014280d 7651 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
7652 "Misplaced _ in number");
7653 }
dd629d5b
GS
7654 if (*s == '.' && isDIGIT(s[1])) {
7655 /* oops, it's really a v-string, but without the "v" */
f4758303 7656 s = start;
dd629d5b
GS
7657 goto vstring;
7658 }
378cc40b 7659 }
02aa26ce
NT
7660
7661 /* read exponent part, if present */
7fd134d9 7662 if (*s && strchr("eE",*s) && strchr("+-0123456789_", s[1])) {
79072805
LW
7663 floatit = TRUE;
7664 s++;
02aa26ce
NT
7665
7666 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 7667 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 7668
7fd134d9
JH
7669 /* stray preinitial _ */
7670 if (*s == '_') {
7671 if (ckWARN(WARN_SYNTAX))
9014280d 7672 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7fd134d9
JH
7673 "Misplaced _ in number");
7674 lastub = s++;
7675 }
7676
02aa26ce 7677 /* allow positive or negative exponent */
378cc40b
LW
7678 if (*s == '+' || *s == '-')
7679 *d++ = *s++;
02aa26ce 7680
7fd134d9
JH
7681 /* stray initial _ */
7682 if (*s == '_') {
7683 if (ckWARN(WARN_SYNTAX))
9014280d 7684 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7fd134d9
JH
7685 "Misplaced _ in number");
7686 lastub = s++;
7687 }
7688
7fd134d9
JH
7689 /* read digits of exponent */
7690 while (isDIGIT(*s) || *s == '_') {
7691 if (isDIGIT(*s)) {
7692 if (d >= e)
7693 Perl_croak(aTHX_ number_too_long);
b3b48e3e 7694 *d++ = *s++;
7fd134d9
JH
7695 }
7696 else {
7697 if (ckWARN(WARN_SYNTAX) &&
7698 ((lastub && s == lastub + 1) ||
b3b48e3e 7699 (!isDIGIT(s[1]) && s[1] != '_')))
9014280d 7700 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7fd134d9 7701 "Misplaced _ in number");
b3b48e3e 7702 lastub = s++;
7fd134d9 7703 }
7fd134d9 7704 }
378cc40b 7705 }
02aa26ce 7706
02aa26ce
NT
7707
7708 /* make an sv from the string */
79072805 7709 sv = NEWSV(92,0);
097ee67d 7710
0b7fceb9 7711 /*
58bb9ec3
NC
7712 We try to do an integer conversion first if no characters
7713 indicating "float" have been found.
0b7fceb9
MU
7714 */
7715
7716 if (!floatit) {
58bb9ec3
NC
7717 UV uv;
7718 int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
7719
7720 if (flags == IS_NUMBER_IN_UV) {
7721 if (uv <= IV_MAX)
86554af2 7722 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
58bb9ec3 7723 else
c239479b 7724 sv_setuv(sv, uv);
58bb9ec3
NC
7725 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
7726 if (uv <= (UV) IV_MIN)
7727 sv_setiv(sv, -(IV)uv);
7728 else
7729 floatit = TRUE;
7730 } else
7731 floatit = TRUE;
7732 }
0b7fceb9 7733 if (floatit) {
58bb9ec3
NC
7734 /* terminate the string */
7735 *d = '\0';
86554af2
JH
7736 nv = Atof(PL_tokenbuf);
7737 sv_setnv(sv, nv);
7738 }
86554af2 7739
b8403495
JH
7740 if ( floatit ? (PL_hints & HINT_NEW_FLOAT) :
7741 (PL_hints & HINT_NEW_INTEGER) )
4e553d73 7742 sv = new_constant(PL_tokenbuf, d - PL_tokenbuf,
b8403495
JH
7743 (floatit ? "float" : "integer"),
7744 sv, Nullsv, NULL);
378cc40b 7745 break;
0b7fceb9 7746
e312add1 7747 /* if it starts with a v, it could be a v-string */
a7cb1f99 7748 case 'v':
dd629d5b 7749vstring:
f4758303 7750 sv = NEWSV(92,5); /* preallocate storage space */
b0f01acb 7751 s = scan_vstring(s,sv);
a7cb1f99 7752 break;
79072805 7753 }
a687059c 7754
02aa26ce
NT
7755 /* make the op for the constant and return */
7756
a86a20aa 7757 if (sv)
b73d6f50 7758 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 7759 else
b73d6f50 7760 lvalp->opval = Nullop;
a687059c 7761
378cc40b
LW
7762 return s;
7763}
7764
76e3520e 7765STATIC char *
cea2e8a9 7766S_scan_formline(pTHX_ register char *s)
378cc40b 7767{
79072805 7768 register char *eol;
378cc40b 7769 register char *t;
79cb57f6 7770 SV *stuff = newSVpvn("",0);
79072805 7771 bool needargs = FALSE;
c5ee2135 7772 bool eofmt = FALSE;
378cc40b 7773
79072805 7774 while (!needargs) {
a1b95068 7775 if (*s == '.') {
79072805 7776 /*SUPPRESS 530*/
51882d45 7777#ifdef PERL_STRICT_CR
bf4acbe4 7778 for (t = s+1;SPACE_OR_TAB(*t); t++) ;
51882d45 7779#else
bf4acbe4 7780 for (t = s+1;SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 7781#endif
c5ee2135
WL
7782 if (*t == '\n' || t == PL_bufend) {
7783 eofmt = TRUE;
79072805 7784 break;
c5ee2135 7785 }
79072805 7786 }
3280af22 7787 if (PL_in_eval && !PL_rsfp) {
a1b95068 7788 eol = memchr(s,'\n',PL_bufend-s);
0f85fab0 7789 if (!eol++)
3280af22 7790 eol = PL_bufend;
0f85fab0
LW
7791 }
7792 else
3280af22 7793 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 7794 if (*s != '#') {
a0d0e21e
LW
7795 for (t = s; t < eol; t++) {
7796 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
7797 needargs = FALSE;
7798 goto enough; /* ~~ must be first line in formline */
378cc40b 7799 }
a0d0e21e
LW
7800 if (*t == '@' || *t == '^')
7801 needargs = TRUE;
378cc40b 7802 }
7121b347
MG
7803 if (eol > s) {
7804 sv_catpvn(stuff, s, eol-s);
2dc4c65b 7805#ifndef PERL_STRICT_CR
7121b347
MG
7806 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
7807 char *end = SvPVX(stuff) + SvCUR(stuff);
7808 end[-2] = '\n';
7809 end[-1] = '\0';
7810 SvCUR(stuff)--;
7811 }
2dc4c65b 7812#endif
7121b347
MG
7813 }
7814 else
7815 break;
79072805
LW
7816 }
7817 s = eol;
3280af22
NIS
7818 if (PL_rsfp) {
7819 s = filter_gets(PL_linestr, PL_rsfp, 0);
7820 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
7821 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 7822 PL_last_lop = PL_last_uni = Nullch;
79072805 7823 if (!s) {
3280af22 7824 s = PL_bufptr;
378cc40b
LW
7825 break;
7826 }
378cc40b 7827 }
463ee0b2 7828 incline(s);
79072805 7829 }
a0d0e21e
LW
7830 enough:
7831 if (SvCUR(stuff)) {
3280af22 7832 PL_expect = XTERM;
79072805 7833 if (needargs) {
3280af22
NIS
7834 PL_lex_state = LEX_NORMAL;
7835 PL_nextval[PL_nexttoke].ival = 0;
79072805
LW
7836 force_next(',');
7837 }
a0d0e21e 7838 else
3280af22 7839 PL_lex_state = LEX_FORMLINE;
1bd51a4c
IH
7840 if (!IN_BYTES) {
7841 if (UTF && is_utf8_string((U8*)SvPVX(stuff), SvCUR(stuff)))
7842 SvUTF8_on(stuff);
7843 else if (PL_encoding)
7844 sv_recode_to_utf8(stuff, PL_encoding);
7845 }
3280af22 7846 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 7847 force_next(THING);
3280af22 7848 PL_nextval[PL_nexttoke].ival = OP_FORMLINE;
79072805 7849 force_next(LSTOP);
378cc40b 7850 }
79072805 7851 else {
8990e307 7852 SvREFCNT_dec(stuff);
c5ee2135
WL
7853 if (eofmt)
7854 PL_lex_formbrack = 0;
3280af22 7855 PL_bufptr = s;
79072805
LW
7856 }
7857 return s;
378cc40b 7858}
a687059c 7859
76e3520e 7860STATIC void
cea2e8a9 7861S_set_csh(pTHX)
a687059c 7862{
ae986130 7863#ifdef CSH
3280af22
NIS
7864 if (!PL_cshlen)
7865 PL_cshlen = strlen(PL_cshname);
ae986130 7866#endif
a687059c 7867}
463ee0b2 7868
ba6d6ac9 7869I32
864dbfa3 7870Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 7871{
3280af22
NIS
7872 I32 oldsavestack_ix = PL_savestack_ix;
7873 CV* outsidecv = PL_compcv;
8990e307 7874
3280af22
NIS
7875 if (PL_compcv) {
7876 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 7877 }
7766f137 7878 SAVEI32(PL_subline);
3280af22 7879 save_item(PL_subname);
3280af22 7880 SAVESPTR(PL_compcv);
3280af22
NIS
7881
7882 PL_compcv = (CV*)NEWSV(1104,0);
7883 sv_upgrade((SV *)PL_compcv, is_format ? SVt_PVFM : SVt_PVCV);
7884 CvFLAGS(PL_compcv) |= flags;
7885
57843af0 7886 PL_subline = CopLINE(PL_curcop);
dd2155a4 7887 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
3280af22 7888 CvOUTSIDE(PL_compcv) = (CV*)SvREFCNT_inc(outsidecv);
a3985cdc 7889 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 7890
8990e307
LW
7891 return oldsavestack_ix;
7892}
7893
084592ab
CN
7894#ifdef __SC__
7895#pragma segment Perl_yylex
7896#endif
8990e307 7897int
864dbfa3 7898Perl_yywarn(pTHX_ char *s)
8990e307 7899{
faef0170 7900 PL_in_eval |= EVAL_WARNONLY;
748a9306 7901 yyerror(s);
faef0170 7902 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 7903 return 0;
8990e307
LW
7904}
7905
7906int
864dbfa3 7907Perl_yyerror(pTHX_ char *s)
463ee0b2 7908{
68dc0745 7909 char *where = NULL;
7910 char *context = NULL;
7911 int contlen = -1;
46fc3d4c 7912 SV *msg;
463ee0b2 7913
3280af22 7914 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 7915 where = "at EOF";
3280af22
NIS
7916 else if (PL_bufptr > PL_oldoldbufptr && PL_bufptr - PL_oldoldbufptr < 200 &&
7917 PL_oldoldbufptr != PL_oldbufptr && PL_oldbufptr != PL_bufptr) {
f355267c
JH
7918 /*
7919 Only for NetWare:
7920 The code below is removed for NetWare because it abends/crashes on NetWare
7921 when the script has error such as not having the closing quotes like:
7922 if ($var eq "value)
7923 Checking of white spaces is anyway done in NetWare code.
7924 */
7925#ifndef NETWARE
3280af22
NIS
7926 while (isSPACE(*PL_oldoldbufptr))
7927 PL_oldoldbufptr++;
f355267c 7928#endif
3280af22
NIS
7929 context = PL_oldoldbufptr;
7930 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 7931 }
3280af22
NIS
7932 else if (PL_bufptr > PL_oldbufptr && PL_bufptr - PL_oldbufptr < 200 &&
7933 PL_oldbufptr != PL_bufptr) {
f355267c
JH
7934 /*
7935 Only for NetWare:
7936 The code below is removed for NetWare because it abends/crashes on NetWare
7937 when the script has error such as not having the closing quotes like:
7938 if ($var eq "value)
7939 Checking of white spaces is anyway done in NetWare code.
7940 */
7941#ifndef NETWARE
3280af22
NIS
7942 while (isSPACE(*PL_oldbufptr))
7943 PL_oldbufptr++;
f355267c 7944#endif
3280af22
NIS
7945 context = PL_oldbufptr;
7946 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
7947 }
7948 else if (yychar > 255)
68dc0745 7949 where = "next token ???";
12fbd33b 7950 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
7951 if (PL_lex_state == LEX_NORMAL ||
7952 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 7953 where = "at end of line";
3280af22 7954 else if (PL_lex_inpat)
68dc0745 7955 where = "within pattern";
463ee0b2 7956 else
68dc0745 7957 where = "within string";
463ee0b2 7958 }
46fc3d4c 7959 else {
79cb57f6 7960 SV *where_sv = sv_2mortal(newSVpvn("next char ", 10));
46fc3d4c 7961 if (yychar < 32)
cea2e8a9 7962 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
46fc3d4c 7963 else if (isPRINT_LC(yychar))
cea2e8a9 7964 Perl_sv_catpvf(aTHX_ where_sv, "%c", yychar);
463ee0b2 7965 else
cea2e8a9 7966 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
46fc3d4c 7967 where = SvPVX(where_sv);
463ee0b2 7968 }
46fc3d4c 7969 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 7970 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 7971 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 7972 if (context)
cea2e8a9 7973 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 7974 else
cea2e8a9 7975 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 7976 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 7977 Perl_sv_catpvf(aTHX_ msg,
57def98f 7978 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 7979 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 7980 PL_multi_end = 0;
a0d0e21e 7981 }
56da5a46
RGS
7982 if (PL_in_eval & EVAL_WARNONLY && ckWARN_d(WARN_SYNTAX))
7983 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, msg);
463ee0b2 7984 else
5a844595 7985 qerror(msg);
c7d6bfb2
GS
7986 if (PL_error_count >= 10) {
7987 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 7988 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
248c2a4d 7989 ERRSV, OutCopFILE(PL_curcop));
c7d6bfb2
GS
7990 else
7991 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 7992 OutCopFILE(PL_curcop));
c7d6bfb2 7993 }
3280af22
NIS
7994 PL_in_my = 0;
7995 PL_in_my_stash = Nullhv;
463ee0b2
LW
7996 return 0;
7997}
084592ab
CN
7998#ifdef __SC__
7999#pragma segment Main
8000#endif
4e35701f 8001
b250498f 8002STATIC char*
3ae08724 8003S_swallow_bom(pTHX_ U8 *s)
01ec43d0 8004{
b250498f
GS
8005 STRLEN slen;
8006 slen = SvCUR(PL_linestr);
7aa207d6 8007 switch (s[0]) {
4e553d73
NIS
8008 case 0xFF:
8009 if (s[1] == 0xFE) {
7aa207d6 8010 /* UTF-16 little-endian? (or UTF32-LE?) */
3ae08724 8011 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
7aa207d6 8012 Perl_croak(aTHX_ "Unsupported script encoding UTF32-LE");
01ec43d0 8013#ifndef PERL_NO_UTF16_FILTER
7aa207d6 8014 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF16-LE script encoding (BOM)\n");
3ae08724 8015 s += 2;
7aa207d6 8016 utf16le:
dea0fc0b
JH
8017 if (PL_bufend > (char*)s) {
8018 U8 *news;
8019 I32 newlen;
8020
8021 filter_add(utf16rev_textfilter, NULL);
8022 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
1de9afcd
RGS
8023 utf16_to_utf8_reversed(s, news,
8024 PL_bufend - (char*)s - 1,
8025 &newlen);
7aa207d6 8026 sv_setpvn(PL_linestr, (const char*)news, newlen);
dea0fc0b 8027 Safefree(news);
7aa207d6
JH
8028 SvUTF8_on(PL_linestr);
8029 s = (U8*)SvPVX(PL_linestr);
8030 PL_bufend = SvPVX(PL_linestr) + newlen;
dea0fc0b 8031 }
b250498f 8032#else
7aa207d6 8033 Perl_croak(aTHX_ "Unsupported script encoding UTF16-LE");
b250498f 8034#endif
01ec43d0
GS
8035 }
8036 break;
78ae23f5 8037 case 0xFE:
7aa207d6 8038 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 8039#ifndef PERL_NO_UTF16_FILTER
7aa207d6 8040 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b 8041 s += 2;
7aa207d6 8042 utf16be:
dea0fc0b
JH
8043 if (PL_bufend > (char *)s) {
8044 U8 *news;
8045 I32 newlen;
8046
8047 filter_add(utf16_textfilter, NULL);
8048 New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
1de9afcd
RGS
8049 utf16_to_utf8(s, news,
8050 PL_bufend - (char*)s,
8051 &newlen);
7aa207d6 8052 sv_setpvn(PL_linestr, (const char*)news, newlen);
dea0fc0b 8053 Safefree(news);
7aa207d6
JH
8054 SvUTF8_on(PL_linestr);
8055 s = (U8*)SvPVX(PL_linestr);
8056 PL_bufend = SvPVX(PL_linestr) + newlen;
dea0fc0b 8057 }
b250498f 8058#else
7aa207d6 8059 Perl_croak(aTHX_ "Unsupported script encoding UTF16-BE");
b250498f 8060#endif
01ec43d0
GS
8061 }
8062 break;
3ae08724
GS
8063 case 0xEF:
8064 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 8065 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
8066 s += 3; /* UTF-8 */
8067 }
8068 break;
8069 case 0:
7aa207d6
JH
8070 if (slen > 3) {
8071 if (s[1] == 0) {
8072 if (s[2] == 0xFE && s[3] == 0xFF) {
8073 /* UTF-32 big-endian */
8074 Perl_croak(aTHX_ "Unsupported script encoding UTF32-BE");
8075 }
8076 }
8077 else if (s[2] == 0 && s[3] != 0) {
8078 /* Leading bytes
8079 * 00 xx 00 xx
8080 * are a good indicator of UTF-16BE. */
8081 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
8082 goto utf16be;
8083 }
01ec43d0 8084 }
7aa207d6
JH
8085 default:
8086 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
8087 /* Leading bytes
8088 * xx 00 xx 00
8089 * are a good indicator of UTF-16LE. */
8090 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
8091 goto utf16le;
8092 }
01ec43d0 8093 }
b8f84bb2 8094 return (char*)s;
b250498f 8095}
4755096e 8096
4755096e
GS
8097/*
8098 * restore_rsfp
8099 * Restore a source filter.
8100 */
8101
8102static void
acfe0abc 8103restore_rsfp(pTHX_ void *f)
4755096e
GS
8104{
8105 PerlIO *fp = (PerlIO*)f;
8106
8107 if (PL_rsfp == PerlIO_stdin())
8108 PerlIO_clearerr(PL_rsfp);
8109 else if (PL_rsfp && (PL_rsfp != fp))
8110 PerlIO_close(PL_rsfp);
8111 PL_rsfp = fp;
8112}
6e3aabd6
GS
8113
8114#ifndef PERL_NO_UTF16_FILTER
8115static I32
acfe0abc 8116utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 8117{
1de9afcd 8118 STRLEN old = SvCUR(sv);
6e3aabd6 8119 I32 count = FILTER_READ(idx+1, sv, maxlen);
1de9afcd
RGS
8120 DEBUG_P(PerlIO_printf(Perl_debug_log,
8121 "utf16_textfilter(%p): %d %d (%d)\n",
8122 utf16_textfilter, idx, maxlen, count));
6e3aabd6
GS
8123 if (count) {
8124 U8* tmps;
dea0fc0b 8125 I32 newlen;
6e3aabd6 8126 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
1de9afcd
RGS
8127 Copy(SvPVX(sv), tmps, old, char);
8128 utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
8129 SvCUR(sv) - old, &newlen);
8130 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
6e3aabd6 8131 }
1de9afcd
RGS
8132 DEBUG_P({sv_dump(sv);});
8133 return SvCUR(sv);
6e3aabd6
GS
8134}
8135
8136static I32
acfe0abc 8137utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 8138{
1de9afcd 8139 STRLEN old = SvCUR(sv);
6e3aabd6 8140 I32 count = FILTER_READ(idx+1, sv, maxlen);
1de9afcd
RGS
8141 DEBUG_P(PerlIO_printf(Perl_debug_log,
8142 "utf16rev_textfilter(%p): %d %d (%d)\n",
8143 utf16rev_textfilter, idx, maxlen, count));
6e3aabd6
GS
8144 if (count) {
8145 U8* tmps;
dea0fc0b 8146 I32 newlen;
6e3aabd6 8147 New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
1de9afcd
RGS
8148 Copy(SvPVX(sv), tmps, old, char);
8149 utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
8150 SvCUR(sv) - old, &newlen);
8151 sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
6e3aabd6 8152 }
1de9afcd 8153 DEBUG_P({ sv_dump(sv); });
6e3aabd6
GS
8154 return count;
8155}
8156#endif
9f4817db 8157
f333445c
JP
8158/*
8159Returns a pointer to the next character after the parsed
8160vstring, as well as updating the passed in sv.
8161
8162Function must be called like
8163
8164 sv = NEWSV(92,5);
8165 s = scan_vstring(s,sv);
8166
8167The sv should already be large enough to store the vstring
8168passed in, for performance reasons.
8169
8170*/
8171
8172char *
8173Perl_scan_vstring(pTHX_ char *s, SV *sv)
8174{
8175 char *pos = s;
8176 char *start = s;
8177 if (*pos == 'v') pos++; /* get past 'v' */
3e884cbf
JH
8178 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
8179 pos++;
f333445c
JP
8180 if ( *pos != '.') {
8181 /* this may not be a v-string if followed by => */
8fc7bb1c
SM
8182 char *next = pos;
8183 while (next < PL_bufend && isSPACE(*next))
8184 ++next;
8185 if ((PL_bufend - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
8186 /* return string not v-string */
8187 sv_setpvn(sv,(char *)s,pos-s);
8188 return pos;
8189 }
8190 }
8191
8192 if (!isALPHA(*pos)) {
8193 UV rev;
8194 U8 tmpbuf[UTF8_MAXLEN+1];
8195 U8 *tmpend;
8196
8197 if (*s == 'v') s++; /* get past 'v' */
8198
8199 sv_setpvn(sv, "", 0);
8200
8201 for (;;) {
8202 rev = 0;
8203 {
8204 /* this is atoi() that tolerates underscores */
8205 char *end = pos;
8206 UV mult = 1;
8207 while (--end >= s) {
8208 UV orev;
8209 if (*end == '_')
8210 continue;
8211 orev = rev;
8212 rev += (*end - '0') * mult;
8213 mult *= 10;
8214 if (orev > rev && ckWARN_d(WARN_OVERFLOW))
8215 Perl_warner(aTHX_ packWARN(WARN_OVERFLOW),
8216 "Integer overflow in decimal number");
8217 }
8218 }
8219#ifdef EBCDIC
8220 if (rev > 0x7FFFFFFF)
8221 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
8222#endif
8223 /* Append native character for the rev point */
8224 tmpend = uvchr_to_utf8(tmpbuf, rev);
8225 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
8226 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
8227 SvUTF8_on(sv);
3e884cbf 8228 if (pos + 1 < PL_bufend && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
8229 s = ++pos;
8230 else {
8231 s = pos;
8232 break;
8233 }
3e884cbf 8234 while (pos < PL_bufend && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
8235 pos++;
8236 }
8237 SvPOK_on(sv);
8238 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
8239 SvRMAGICAL_on(sv);
8240 }
8241 return s;
8242}
8243