This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Disable PERL_MALLOC and DEBUG_MSTATS in win32/makefile.mk by default
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
770526c1 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
fc36a67e 29static char ident_too_long[] = "Identifier too long";
4ac733c9 30static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 31static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 32
acfe0abc 33static void restore_rsfp(pTHX_ void *f);
6e3aabd6 34#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
35static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
36static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 37#endif
51371543 38
9059aa12
LW
39#define XFAKEBRACK 128
40#define XENUMMASK 127
41
39e02b42
JH
42#ifdef USE_UTF8_SCRIPTS
43# define UTF (!IN_BYTES)
2b9d42f0 44#else
746b446a 45# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 46#endif
a0ed51b3 47
61f0cdd9 48/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
49 * 1999-02-27 mjd-perl-patch@plover.com */
50#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51
bf4acbe4
GS
52/* On MacOS, respect nonbreaking spaces */
53#ifdef MACOS_TRADITIONAL
54#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55#else
56#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
57#endif
58
ffb4593c
NT
59/* LEX_* are values for PL_lex_state, the state of the lexer.
60 * They are arranged oddly so that the guard on the switch statement
79072805
LW
61 * can get by with a single comparison (if the compiler is smart enough).
62 */
63
fb73857a
PP
64/* #define LEX_NOTPARSING 11 is done in perl.h. */
65
55497cff
PP
66#define LEX_NORMAL 10
67#define LEX_INTERPNORMAL 9
68#define LEX_INTERPCASEMOD 8
69#define LEX_INTERPPUSH 7
70#define LEX_INTERPSTART 6
71#define LEX_INTERPEND 5
72#define LEX_INTERPENDMAYBE 4
73#define LEX_INTERPCONCAT 3
74#define LEX_INTERPCONST 2
75#define LEX_FORMLINE 1
76#define LEX_KNOWNEXT 0
79072805 77
bbf60fe6
DM
78#ifdef DEBUGGING
79static char* lex_state_names[] = {
80 "KNOWNEXT",
81 "FORMLINE",
82 "INTERPCONST",
83 "INTERPCONCAT",
84 "INTERPENDMAYBE",
85 "INTERPEND",
86 "INTERPSTART",
87 "INTERPPUSH",
88 "INTERPCASEMOD",
89 "INTERPNORMAL",
90 "NORMAL"
91};
92#endif
93
79072805
LW
94#ifdef ff_next
95#undef ff_next
d48672a2
LW
96#endif
97
79072805 98#include "keywords.h"
fe14fcc3 99
ffb4593c
NT
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
ae986130
LW
102#ifdef CLINE
103#undef CLINE
104#endif
57843af0 105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 106
ffb4593c
NT
107/*
108 * Convenience functions to return different tokens and prime the
9cbb5ea2 109 * lexer for the next token. They all take an argument.
ffb4593c
NT
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
2d2e263d 121 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
9cbb5ea2 126 * PMop : pattern-matching operator
ffb4593c
NT
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
e5edeb50 130 * Rop : relational operator <= != gt
ffb4593c
NT
131 *
132 * Also see LOP and lop() below.
133 */
134
998054bd 135#ifdef DEBUGGING /* Serve -DT. */
bbf60fe6 136# define REPORT(retval) tokereport(s,(int)retval)
998054bd 137#else
bbf60fe6 138# define REPORT(retval) (retval)
998054bd
SC
139#endif
140
bbf60fe6
DM
141#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
142#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
143#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
144#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
145#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
146#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
147#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
148#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
149#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
150#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
151#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
152#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
153#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
154#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
155#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
156#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
157#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
158#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
159#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
160#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 161
a687059c
LW
162/* This bit of chicanery makes a unary function followed by
163 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
164 * The UNIDOR macro is for unary functions that can be followed by the //
165 * operator (such as C<shift // 0>).
a687059c 166 */
bbf60fe6
DM
167#define UNI2(f,x) return ( \
168 yylval.ival = f, \
6f33ba73 169 PL_expect = x, \
3280af22
NIS
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
172 PL_last_lop_op = f, \
bbf60fe6
DM
173 REPORT( \
174 (*s == '(' || (s = skipspace(s), *s == '(') \
175 ? (int)FUNC1 : (int)UNIOP)))
6f33ba73
RGS
176#define UNI(f) UNI2(f,XTERM)
177#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 178
bbf60fe6
DM
179#define UNIBRACK(f) return ( \
180 yylval.ival = f, \
3280af22
NIS
181 PL_bufptr = s, \
182 PL_last_uni = PL_oldbufptr, \
bbf60fe6
DM
183 REPORT( \
184 (*s == '(' || (s = skipspace(s), *s == '(') \
185 ? (int)FUNC1 : (int)UNIOP)))
79072805 186
9f68db38 187/* grandfather return to old style */
3280af22 188#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 189
8fa7f367
JH
190#ifdef DEBUGGING
191
bbf60fe6
DM
192/* how to interpret the yylval associated with the token */
193enum token_type {
194 TOKENTYPE_NONE,
195 TOKENTYPE_IVAL,
196 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
197 TOKENTYPE_PVAL,
198 TOKENTYPE_OPVAL,
199 TOKENTYPE_GVVAL
200};
201
202static struct debug_tokens { int token, type; char *name;} debug_tokens[] =
9041c2e3 203{
bbf60fe6
DM
204 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
205 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
206 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
207 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
208 { ARROW, TOKENTYPE_NONE, "ARROW" },
209 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
210 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
211 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
212 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
213 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
214 { DO, TOKENTYPE_NONE, "DO" },
215 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
216 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
217 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
218 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
219 { ELSE, TOKENTYPE_NONE, "ELSE" },
220 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
221 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
222 { FOR, TOKENTYPE_IVAL, "FOR" },
223 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
224 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
225 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
226 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
227 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
228 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
229 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
230 { IF, TOKENTYPE_IVAL, "IF" },
231 { LABEL, TOKENTYPE_PVAL, "LABEL" },
232 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
233 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
234 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
235 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
236 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
237 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
238 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
239 { MY, TOKENTYPE_IVAL, "MY" },
240 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
241 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
242 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
243 { OROP, TOKENTYPE_IVAL, "OROP" },
244 { OROR, TOKENTYPE_NONE, "OROR" },
245 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
246 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
247 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
248 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
249 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
250 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
251 { PREINC, TOKENTYPE_NONE, "PREINC" },
252 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
253 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
254 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
255 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
256 { SUB, TOKENTYPE_NONE, "SUB" },
257 { THING, TOKENTYPE_OPVAL, "THING" },
258 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
259 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
260 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
261 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
262 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
263 { USE, TOKENTYPE_IVAL, "USE" },
264 { WHILE, TOKENTYPE_IVAL, "WHILE" },
265 { WORD, TOKENTYPE_OPVAL, "WORD" },
266 { 0, TOKENTYPE_NONE, 0 }
267};
268
269/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 270
bbf60fe6
DM
271STATIC int
272S_tokereport(pTHX_ char* s, I32 rv)
273{
274 if (DEBUG_T_TEST) {
275 char *name = Nullch;
276 enum token_type type = TOKENTYPE_NONE;
277 struct debug_tokens *p;
278 SV* report = NEWSV(0, 60);
279
280 Perl_sv_catpvf(aTHX_ report, "<== ");
281
282 for (p = debug_tokens; p->token; p++) {
283 if (p->token == (int)rv) {
284 name = p->name;
285 type = p->type;
286 break;
287 }
288 }
289 if (name)
290 Perl_sv_catpvf(aTHX_ report, "%s", name);
291 else if ((char)rv > ' ' && (char)rv < '~')
292 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
293 else if (!rv)
294 Perl_sv_catpvf(aTHX_ report, "EOF");
295 else
296 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
297 switch (type) {
298 case TOKENTYPE_NONE:
299 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
300 break;
301 case TOKENTYPE_IVAL:
302 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
303 break;
304 case TOKENTYPE_OPNUM:
305 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
306 PL_op_name[yylval.ival]);
307 break;
308 case TOKENTYPE_PVAL:
309 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
310 break;
311 case TOKENTYPE_OPVAL:
401441c0
RGS
312 if (yylval.opval)
313 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 314 PL_op_name[yylval.opval->op_type]);
401441c0
RGS
315 else
316 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
317 break;
318 }
319 Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
998054bd
SC
320 if (s - PL_bufptr > 0)
321 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
322 else {
323 if (PL_oldbufptr && *PL_oldbufptr)
324 sv_catpv(report, PL_tokenbuf);
325 }
bbf60fe6
DM
326 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
327 };
328 return (int)rv;
998054bd
SC
329}
330
8fa7f367
JH
331#endif
332
ffb4593c
NT
333/*
334 * S_ao
335 *
c963b151
BD
336 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
337 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
338 */
339
76e3520e 340STATIC int
cea2e8a9 341S_ao(pTHX_ int toketype)
a0d0e21e 342{
3280af22
NIS
343 if (*PL_bufptr == '=') {
344 PL_bufptr++;
a0d0e21e
LW
345 if (toketype == ANDAND)
346 yylval.ival = OP_ANDASSIGN;
347 else if (toketype == OROR)
348 yylval.ival = OP_ORASSIGN;
c963b151
BD
349 else if (toketype == DORDOR)
350 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
351 toketype = ASSIGNOP;
352 }
353 return toketype;
354}
355
ffb4593c
NT
356/*
357 * S_no_op
358 * When Perl expects an operator and finds something else, no_op
359 * prints the warning. It always prints "<something> found where
360 * operator expected. It prints "Missing semicolon on previous line?"
361 * if the surprise occurs at the start of the line. "do you need to
362 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
363 * where the compiler doesn't know if foo is a method call or a function.
364 * It prints "Missing operator before end of line" if there's nothing
365 * after the missing operator, or "... before <...>" if there is something
366 * after the missing operator.
367 */
368
76e3520e 369STATIC void
cea2e8a9 370S_no_op(pTHX_ char *what, char *s)
463ee0b2 371{
3280af22
NIS
372 char *oldbp = PL_bufptr;
373 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 374
1189a94a
GS
375 if (!s)
376 s = oldbp;
07c798fb 377 else
1189a94a 378 PL_bufptr = s;
cea2e8a9 379 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
380 if (ckWARN_d(WARN_SYNTAX)) {
381 if (is_first)
382 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
383 "\t(Missing semicolon on previous line?)\n");
384 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
385 char *t;
386 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
387 if (t < PL_bufptr && isSPACE(*t))
388 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
389 "\t(Do you need to predeclare %.*s?)\n",
390 t - PL_oldoldbufptr, PL_oldoldbufptr);
391 }
392 else {
393 assert(s >= oldbp);
394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
395 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
396 }
07c798fb 397 }
3280af22 398 PL_bufptr = oldbp;
8990e307
LW
399}
400
ffb4593c
NT
401/*
402 * S_missingterm
403 * Complain about missing quote/regexp/heredoc terminator.
404 * If it's called with (char *)NULL then it cauterizes the line buffer.
405 * If we're in a delimited string and the delimiter is a control
406 * character, it's reformatted into a two-char sequence like ^C.
407 * This is fatal.
408 */
409
76e3520e 410STATIC void
cea2e8a9 411S_missingterm(pTHX_ char *s)
8990e307
LW
412{
413 char tmpbuf[3];
414 char q;
415 if (s) {
416 char *nl = strrchr(s,'\n');
d2719217 417 if (nl)
8990e307
LW
418 *nl = '\0';
419 }
9d116dd7
JH
420 else if (
421#ifdef EBCDIC
422 iscntrl(PL_multi_close)
423#else
424 PL_multi_close < 32 || PL_multi_close == 127
425#endif
426 ) {
8990e307 427 *tmpbuf = '^';
3280af22 428 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
429 s = "\\n";
430 tmpbuf[2] = '\0';
431 s = tmpbuf;
432 }
433 else {
eb160463 434 *tmpbuf = (char)PL_multi_close;
8990e307
LW
435 tmpbuf[1] = '\0';
436 s = tmpbuf;
437 }
438 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 439 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 440}
79072805 441
ffb4593c
NT
442/*
443 * Perl_deprecate
ffb4593c
NT
444 */
445
79072805 446void
864dbfa3 447Perl_deprecate(pTHX_ char *s)
a0d0e21e 448{
599cee73 449 if (ckWARN(WARN_DEPRECATED))
9014280d 450 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
451}
452
12bcd1a6
PM
453void
454Perl_deprecate_old(pTHX_ char *s)
455{
456 /* This function should NOT be called for any new deprecated warnings */
457 /* Use Perl_deprecate instead */
458 /* */
459 /* It is here to maintain backward compatibility with the pre-5.8 */
460 /* warnings category hierarchy. The "deprecated" category used to */
461 /* live under the "syntax" category. It is now a top-level category */
462 /* in its own right. */
463
464 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
465 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
466 "Use of %s is deprecated", s);
467}
468
ffb4593c
NT
469/*
470 * depcom
9cbb5ea2 471 * Deprecate a comma-less variable list.
ffb4593c
NT
472 */
473
76e3520e 474STATIC void
cea2e8a9 475S_depcom(pTHX)
a0d0e21e 476{
12bcd1a6 477 deprecate_old("comma-less variable list");
a0d0e21e
LW
478}
479
ffb4593c 480/*
9cbb5ea2
GS
481 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
482 * utf16-to-utf8-reversed.
ffb4593c
NT
483 */
484
c39cd008
GS
485#ifdef PERL_CR_FILTER
486static void
487strip_return(SV *sv)
488{
489 register char *s = SvPVX(sv);
490 register char *e = s + SvCUR(sv);
491 /* outer loop optimized to do nothing if there are no CR-LFs */
492 while (s < e) {
493 if (*s++ == '\r' && *s == '\n') {
494 /* hit a CR-LF, need to copy the rest */
495 register char *d = s - 1;
496 *d++ = *s++;
497 while (s < e) {
498 if (*s == '\r' && s[1] == '\n')
499 s++;
500 *d++ = *s++;
501 }
502 SvCUR(sv) -= s - d;
503 return;
504 }
505 }
506}
a868473f 507
76e3520e 508STATIC I32
c39cd008 509S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 510{
c39cd008
GS
511 I32 count = FILTER_READ(idx+1, sv, maxlen);
512 if (count > 0 && !maxlen)
513 strip_return(sv);
514 return count;
a868473f
NIS
515}
516#endif
517
ffb4593c
NT
518/*
519 * Perl_lex_start
9cbb5ea2
GS
520 * Initialize variables. Uses the Perl save_stack to save its state (for
521 * recursive calls to the parser).
ffb4593c
NT
522 */
523
a0d0e21e 524void
864dbfa3 525Perl_lex_start(pTHX_ SV *line)
79072805 526{
8990e307
LW
527 char *s;
528 STRLEN len;
529
3280af22
NIS
530 SAVEI32(PL_lex_dojoin);
531 SAVEI32(PL_lex_brackets);
3280af22
NIS
532 SAVEI32(PL_lex_casemods);
533 SAVEI32(PL_lex_starts);
534 SAVEI32(PL_lex_state);
7766f137 535 SAVEVPTR(PL_lex_inpat);
3280af22 536 SAVEI32(PL_lex_inwhat);
18b09519
GS
537 if (PL_lex_state == LEX_KNOWNEXT) {
538 I32 toke = PL_nexttoke;
539 while (--toke >= 0) {
540 SAVEI32(PL_nexttype[toke]);
541 SAVEVPTR(PL_nextval[toke]);
542 }
543 SAVEI32(PL_nexttoke);
18b09519 544 }
57843af0 545 SAVECOPLINE(PL_curcop);
3280af22
NIS
546 SAVEPPTR(PL_bufptr);
547 SAVEPPTR(PL_bufend);
548 SAVEPPTR(PL_oldbufptr);
549 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
550 SAVEPPTR(PL_last_lop);
551 SAVEPPTR(PL_last_uni);
3280af22
NIS
552 SAVEPPTR(PL_linestart);
553 SAVESPTR(PL_linestr);
8edd5f42
RGS
554 SAVEGENERICPV(PL_lex_brackstack);
555 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 556 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
557 SAVESPTR(PL_lex_stuff);
558 SAVEI32(PL_lex_defer);
09bef843 559 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 560 SAVESPTR(PL_lex_repl);
bebdddfc
GS
561 SAVEINT(PL_expect);
562 SAVEINT(PL_lex_expect);
3280af22
NIS
563
564 PL_lex_state = LEX_NORMAL;
565 PL_lex_defer = 0;
566 PL_expect = XSTATE;
567 PL_lex_brackets = 0;
3280af22
NIS
568 New(899, PL_lex_brackstack, 120, char);
569 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
570 PL_lex_casemods = 0;
571 *PL_lex_casestack = '\0';
572 PL_lex_dojoin = 0;
573 PL_lex_starts = 0;
574 PL_lex_stuff = Nullsv;
575 PL_lex_repl = Nullsv;
576 PL_lex_inpat = 0;
76be56bc 577 PL_nexttoke = 0;
3280af22 578 PL_lex_inwhat = 0;
09bef843 579 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
580 PL_linestr = line;
581 if (SvREADONLY(PL_linestr))
582 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
583 s = SvPV(PL_linestr, len);
6f27f9a7 584 if (!len || s[len-1] != ';') {
3280af22
NIS
585 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
586 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
587 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 588 }
3280af22
NIS
589 SvTEMP_off(PL_linestr);
590 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
591 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 592 PL_last_lop = PL_last_uni = Nullch;
3280af22 593 PL_rsfp = 0;
79072805 594}
a687059c 595
ffb4593c
NT
596/*
597 * Perl_lex_end
9cbb5ea2
GS
598 * Finalizer for lexing operations. Must be called when the parser is
599 * done with the lexer.
ffb4593c
NT
600 */
601
463ee0b2 602void
864dbfa3 603Perl_lex_end(pTHX)
463ee0b2 604{
3280af22 605 PL_doextract = FALSE;
463ee0b2
LW
606}
607
ffb4593c
NT
608/*
609 * S_incline
610 * This subroutine has nothing to do with tilting, whether at windmills
611 * or pinball tables. Its name is short for "increment line". It
57843af0 612 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 613 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
614 * # line 500 "foo.pm"
615 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
616 */
617
76e3520e 618STATIC void
cea2e8a9 619S_incline(pTHX_ char *s)
463ee0b2
LW
620{
621 char *t;
622 char *n;
73659bf1 623 char *e;
463ee0b2 624 char ch;
463ee0b2 625
57843af0 626 CopLINE_inc(PL_curcop);
463ee0b2
LW
627 if (*s++ != '#')
628 return;
bf4acbe4 629 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
630 if (strnEQ(s, "line", 4))
631 s += 4;
632 else
633 return;
084592ab 634 if (SPACE_OR_TAB(*s))
73659bf1 635 s++;
4e553d73 636 else
73659bf1 637 return;
bf4acbe4 638 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
639 if (!isDIGIT(*s))
640 return;
641 n = s;
642 while (isDIGIT(*s))
643 s++;
bf4acbe4 644 while (SPACE_OR_TAB(*s))
463ee0b2 645 s++;
73659bf1 646 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 647 s++;
73659bf1
GS
648 e = t + 1;
649 }
463ee0b2 650 else {
463ee0b2 651 for (t = s; !isSPACE(*t); t++) ;
73659bf1 652 e = t;
463ee0b2 653 }
bf4acbe4 654 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
655 e++;
656 if (*e != '\n' && *e != '\0')
657 return; /* false alarm */
658
463ee0b2
LW
659 ch = *t;
660 *t = '\0';
f4dd75d9 661 if (t - s > 0) {
05ec9bb3 662 CopFILE_free(PL_curcop);
57843af0 663 CopFILE_set(PL_curcop, s);
f4dd75d9 664 }
463ee0b2 665 *t = ch;
57843af0 666 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
667}
668
ffb4593c
NT
669/*
670 * S_skipspace
671 * Called to gobble the appropriate amount and type of whitespace.
672 * Skips comments as well.
673 */
674
76e3520e 675STATIC char *
cea2e8a9 676S_skipspace(pTHX_ register char *s)
a687059c 677{
3280af22 678 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 679 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
680 s++;
681 return s;
682 }
683 for (;;) {
fd049845 684 STRLEN prevlen;
09bef843 685 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 686 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
687 while (s < PL_bufend && isSPACE(*s)) {
688 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
689 incline(s);
690 }
ffb4593c
NT
691
692 /* comment */
3280af22
NIS
693 if (s < PL_bufend && *s == '#') {
694 while (s < PL_bufend && *s != '\n')
463ee0b2 695 s++;
60e6418e 696 if (s < PL_bufend) {
463ee0b2 697 s++;
60e6418e
GS
698 if (PL_in_eval && !PL_rsfp) {
699 incline(s);
700 continue;
701 }
702 }
463ee0b2 703 }
ffb4593c
NT
704
705 /* only continue to recharge the buffer if we're at the end
706 * of the buffer, we're not reading from a source filter, and
707 * we're in normal lexing mode
708 */
09bef843
SB
709 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
710 PL_lex_state == LEX_FORMLINE)
463ee0b2 711 return s;
ffb4593c
NT
712
713 /* try to recharge the buffer */
9cbb5ea2
GS
714 if ((s = filter_gets(PL_linestr, PL_rsfp,
715 (prevlen = SvCUR(PL_linestr)))) == Nullch)
716 {
717 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
718 if (PL_minus_p) {
719 sv_setpv(PL_linestr,
720 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 721 PL_minus_n = PL_minus_p = 0;
a0d0e21e 722 }
01a19ab0
NC
723 else if (PL_minus_n) {
724 sv_setpvn(PL_linestr, ";}", 2);
725 PL_minus_n = 0;
726 }
a0d0e21e 727 else
4147a61b 728 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
729
730 /* reset variables for next time we lex */
9cbb5ea2
GS
731 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
732 = SvPVX(PL_linestr);
3280af22 733 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 734 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
735
736 /* Close the filehandle. Could be from -P preprocessor,
737 * STDIN, or a regular file. If we were reading code from
738 * STDIN (because the commandline held no -e or filename)
739 * then we don't close it, we reset it so the code can
740 * read from STDIN too.
741 */
742
3280af22
NIS
743 if (PL_preprocess && !PL_in_eval)
744 (void)PerlProc_pclose(PL_rsfp);
745 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
746 PerlIO_clearerr(PL_rsfp);
8990e307 747 else
3280af22
NIS
748 (void)PerlIO_close(PL_rsfp);
749 PL_rsfp = Nullfp;
463ee0b2
LW
750 return s;
751 }
ffb4593c
NT
752
753 /* not at end of file, so we only read another line */
09bef843
SB
754 /* make corresponding updates to old pointers, for yyerror() */
755 oldprevlen = PL_oldbufptr - PL_bufend;
756 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
757 if (PL_last_uni)
758 oldunilen = PL_last_uni - PL_bufend;
759 if (PL_last_lop)
760 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
761 PL_linestart = PL_bufptr = s + prevlen;
762 PL_bufend = s + SvCUR(PL_linestr);
763 s = PL_bufptr;
09bef843
SB
764 PL_oldbufptr = s + oldprevlen;
765 PL_oldoldbufptr = s + oldoldprevlen;
766 if (PL_last_uni)
767 PL_last_uni = s + oldunilen;
768 if (PL_last_lop)
769 PL_last_lop = s + oldloplen;
a0d0e21e 770 incline(s);
ffb4593c
NT
771
772 /* debugger active and we're not compiling the debugger code,
773 * so store the line into the debugger's array of lines
774 */
3280af22 775 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
776 SV *sv = NEWSV(85,0);
777
778 sv_upgrade(sv, SVt_PVMG);
3280af22 779 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
780 (void)SvIOK_on(sv);
781 SvIVX(sv) = 0;
57843af0 782 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 783 }
463ee0b2 784 }
a687059c 785}
378cc40b 786
ffb4593c
NT
787/*
788 * S_check_uni
789 * Check the unary operators to ensure there's no ambiguity in how they're
790 * used. An ambiguous piece of code would be:
791 * rand + 5
792 * This doesn't mean rand() + 5. Because rand() is a unary operator,
793 * the +5 is its argument.
794 */
795
76e3520e 796STATIC void
cea2e8a9 797S_check_uni(pTHX)
ba106d47 798{
2f3197b3 799 char *s;
a0d0e21e 800 char *t;
2f3197b3 801
3280af22 802 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 803 return;
3280af22
NIS
804 while (isSPACE(*PL_last_uni))
805 PL_last_uni++;
7e2040f0 806 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 807 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 808 return;
0453d815 809 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 810 char ch = *s;
0453d815 811 *s = '\0';
9014280d 812 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 813 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
814 PL_last_uni);
815 *s = ch;
816 }
2f3197b3
LW
817}
818
ffb4593c
NT
819/*
820 * LOP : macro to build a list operator. Its behaviour has been replaced
821 * with a subroutine, S_lop() for which LOP is just another name.
822 */
823
a0d0e21e
LW
824#define LOP(f,x) return lop(f,x,s)
825
ffb4593c
NT
826/*
827 * S_lop
828 * Build a list operator (or something that might be one). The rules:
829 * - if we have a next token, then it's a list operator [why?]
830 * - if the next thing is an opening paren, then it's a function
831 * - else it's a list operator
832 */
833
76e3520e 834STATIC I32
a0be28da 835S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 836{
79072805 837 yylval.ival = f;
35c8bce7 838 CLINE;
3280af22
NIS
839 PL_expect = x;
840 PL_bufptr = s;
841 PL_last_lop = PL_oldbufptr;
eb160463 842 PL_last_lop_op = (OPCODE)f;
3280af22 843 if (PL_nexttoke)
bbf60fe6 844 return REPORT(LSTOP);
79072805 845 if (*s == '(')
bbf60fe6 846 return REPORT(FUNC);
79072805
LW
847 s = skipspace(s);
848 if (*s == '(')
bbf60fe6 849 return REPORT(FUNC);
79072805 850 else
bbf60fe6 851 return REPORT(LSTOP);
79072805
LW
852}
853
ffb4593c
NT
854/*
855 * S_force_next
9cbb5ea2 856 * When the lexer realizes it knows the next token (for instance,
ffb4593c 857 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
858 * to know what token to return the next time the lexer is called. Caller
859 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
860 * handles the token correctly.
ffb4593c
NT
861 */
862
4e553d73 863STATIC void
cea2e8a9 864S_force_next(pTHX_ I32 type)
79072805 865{
3280af22
NIS
866 PL_nexttype[PL_nexttoke] = type;
867 PL_nexttoke++;
868 if (PL_lex_state != LEX_KNOWNEXT) {
869 PL_lex_defer = PL_lex_state;
870 PL_lex_expect = PL_expect;
871 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
872 }
873}
874
ffb4593c
NT
875/*
876 * S_force_word
877 * When the lexer knows the next thing is a word (for instance, it has
878 * just seen -> and it knows that the next char is a word char, then
879 * it calls S_force_word to stick the next word into the PL_next lookahead.
880 *
881 * Arguments:
b1b65b59 882 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
883 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
884 * int check_keyword : if true, Perl checks to make sure the word isn't
885 * a keyword (do this if the word is a label, e.g. goto FOO)
886 * int allow_pack : if true, : characters will also be allowed (require,
887 * use, etc. do this)
9cbb5ea2 888 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
889 */
890
76e3520e 891STATIC char *
cea2e8a9 892S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 893{
463ee0b2
LW
894 register char *s;
895 STRLEN len;
4e553d73 896
463ee0b2
LW
897 start = skipspace(start);
898 s = start;
7e2040f0 899 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 900 (allow_pack && *s == ':') ||
15f0808c 901 (allow_initial_tick && *s == '\'') )
a0d0e21e 902 {
3280af22
NIS
903 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
904 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
905 return start;
906 if (token == METHOD) {
907 s = skipspace(s);
908 if (*s == '(')
3280af22 909 PL_expect = XTERM;
463ee0b2 910 else {
3280af22 911 PL_expect = XOPERATOR;
463ee0b2 912 }
79072805 913 }
3280af22
NIS
914 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
915 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
5464dbd2
RGS
916 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
917 SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke].opval)->op_sv);
79072805
LW
918 force_next(token);
919 }
920 return s;
921}
922
ffb4593c
NT
923/*
924 * S_force_ident
9cbb5ea2 925 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
926 * text only contains the "foo" portion. The first argument is a pointer
927 * to the "foo", and the second argument is the type symbol to prefix.
928 * Forces the next token to be a "WORD".
9cbb5ea2 929 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
930 */
931
76e3520e 932STATIC void
cea2e8a9 933S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
934{
935 if (s && *s) {
11343788 936 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 937 PL_nextval[PL_nexttoke].opval = o;
79072805 938 force_next(WORD);
748a9306 939 if (kind) {
11343788 940 o->op_private = OPpCONST_ENTERED;
55497cff
PP
941 /* XXX see note in pp_entereval() for why we forgo typo
942 warnings if the symbol must be introduced in an eval.
943 GSAR 96-10-12 */
3280af22 944 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
945 kind == '$' ? SVt_PV :
946 kind == '@' ? SVt_PVAV :
947 kind == '%' ? SVt_PVHV :
948 SVt_PVGV
949 );
748a9306 950 }
79072805
LW
951 }
952}
953
1571675a
GS
954NV
955Perl_str_to_version(pTHX_ SV *sv)
956{
957 NV retval = 0.0;
958 NV nshift = 1.0;
959 STRLEN len;
960 char *start = SvPVx(sv,len);
3aa33fe5 961 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
962 char *end = start + len;
963 while (start < end) {
ba210ebe 964 STRLEN skip;
1571675a
GS
965 UV n;
966 if (utf)
9041c2e3 967 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
968 else {
969 n = *(U8*)start;
970 skip = 1;
971 }
972 retval += ((NV)n)/nshift;
973 start += skip;
974 nshift *= 1000;
975 }
976 return retval;
977}
978
4e553d73 979/*
ffb4593c
NT
980 * S_force_version
981 * Forces the next token to be a version number.
e759cc13
RGS
982 * If the next token appears to be an invalid version number, (e.g. "v2b"),
983 * and if "guessing" is TRUE, then no new token is created (and the caller
984 * must use an alternative parsing method).
ffb4593c
NT
985 */
986
76e3520e 987STATIC char *
e759cc13 988S_force_version(pTHX_ char *s, int guessing)
89bfa8cd
PP
989{
990 OP *version = Nullop;
44dcb63b 991 char *d;
89bfa8cd
PP
992
993 s = skipspace(s);
994
44dcb63b 995 d = s;
dd629d5b 996 if (*d == 'v')
44dcb63b 997 d++;
44dcb63b 998 if (isDIGIT(*d)) {
e759cc13
RGS
999 while (isDIGIT(*d) || *d == '_' || *d == '.')
1000 d++;
9f3d182e 1001 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1002 SV *ver;
b73d6f50 1003 s = scan_num(s, &yylval);
89bfa8cd 1004 version = yylval.opval;
dd629d5b
GS
1005 ver = cSVOPx(version)->op_sv;
1006 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 1007 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
1008 SvNVX(ver) = str_to_version(ver);
1009 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1010 }
89bfa8cd 1011 }
e759cc13
RGS
1012 else if (guessing)
1013 return s;
89bfa8cd
PP
1014 }
1015
1016 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1017 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1018 force_next(WORD);
89bfa8cd 1019
e759cc13 1020 return s;
89bfa8cd
PP
1021}
1022
ffb4593c
NT
1023/*
1024 * S_tokeq
1025 * Tokenize a quoted string passed in as an SV. It finds the next
1026 * chunk, up to end of string or a backslash. It may make a new
1027 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1028 * turns \\ into \.
1029 */
1030
76e3520e 1031STATIC SV *
cea2e8a9 1032S_tokeq(pTHX_ SV *sv)
79072805
LW
1033{
1034 register char *s;
1035 register char *send;
1036 register char *d;
b3ac6de7
IZ
1037 STRLEN len = 0;
1038 SV *pv = sv;
79072805
LW
1039
1040 if (!SvLEN(sv))
b3ac6de7 1041 goto finish;
79072805 1042
a0d0e21e 1043 s = SvPV_force(sv, len);
21a311ee 1044 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1045 goto finish;
463ee0b2 1046 send = s + len;
79072805
LW
1047 while (s < send && *s != '\\')
1048 s++;
1049 if (s == send)
b3ac6de7 1050 goto finish;
79072805 1051 d = s;
be4731d2 1052 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 1053 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
1054 if (SvUTF8(sv))
1055 SvUTF8_on(pv);
1056 }
79072805
LW
1057 while (s < send) {
1058 if (*s == '\\') {
a0d0e21e 1059 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1060 s++; /* all that, just for this */
1061 }
1062 *d++ = *s++;
1063 }
1064 *d = '\0';
463ee0b2 1065 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 1066 finish:
3280af22 1067 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1068 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1069 return sv;
1070}
1071
ffb4593c
NT
1072/*
1073 * Now come three functions related to double-quote context,
1074 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1075 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1076 * interact with PL_lex_state, and create fake ( ... ) argument lists
1077 * to handle functions and concatenation.
1078 * They assume that whoever calls them will be setting up a fake
1079 * join call, because each subthing puts a ',' after it. This lets
1080 * "lower \luPpEr"
1081 * become
1082 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1083 *
1084 * (I'm not sure whether the spurious commas at the end of lcfirst's
1085 * arguments and join's arguments are created or not).
1086 */
1087
1088/*
1089 * S_sublex_start
1090 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1091 *
1092 * Pattern matching will set PL_lex_op to the pattern-matching op to
1093 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1094 *
1095 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1096 *
1097 * Everything else becomes a FUNC.
1098 *
1099 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1100 * had an OP_CONST or OP_READLINE). This just sets us up for a
1101 * call to S_sublex_push().
1102 */
1103
76e3520e 1104STATIC I32
cea2e8a9 1105S_sublex_start(pTHX)
79072805
LW
1106{
1107 register I32 op_type = yylval.ival;
79072805
LW
1108
1109 if (op_type == OP_NULL) {
3280af22
NIS
1110 yylval.opval = PL_lex_op;
1111 PL_lex_op = Nullop;
79072805
LW
1112 return THING;
1113 }
1114 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1115 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1116
1117 if (SvTYPE(sv) == SVt_PVIV) {
1118 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1119 STRLEN len;
1120 char *p;
1121 SV *nsv;
1122
1123 p = SvPV(sv, len);
79cb57f6 1124 nsv = newSVpvn(p, len);
01ec43d0
GS
1125 if (SvUTF8(sv))
1126 SvUTF8_on(nsv);
b3ac6de7
IZ
1127 SvREFCNT_dec(sv);
1128 sv = nsv;
4e553d73 1129 }
b3ac6de7 1130 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1131 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1132 /* Allow <FH> // "foo" */
1133 if (op_type == OP_READLINE)
1134 PL_expect = XTERMORDORDOR;
79072805
LW
1135 return THING;
1136 }
1137
3280af22
NIS
1138 PL_sublex_info.super_state = PL_lex_state;
1139 PL_sublex_info.sub_inwhat = op_type;
1140 PL_sublex_info.sub_op = PL_lex_op;
1141 PL_lex_state = LEX_INTERPPUSH;
55497cff 1142
3280af22
NIS
1143 PL_expect = XTERM;
1144 if (PL_lex_op) {
1145 yylval.opval = PL_lex_op;
1146 PL_lex_op = Nullop;
55497cff
PP
1147 return PMFUNC;
1148 }
1149 else
1150 return FUNC;
1151}
1152
ffb4593c
NT
1153/*
1154 * S_sublex_push
1155 * Create a new scope to save the lexing state. The scope will be
1156 * ended in S_sublex_done. Returns a '(', starting the function arguments
1157 * to the uc, lc, etc. found before.
1158 * Sets PL_lex_state to LEX_INTERPCONCAT.
1159 */
1160
76e3520e 1161STATIC I32
cea2e8a9 1162S_sublex_push(pTHX)
55497cff 1163{
f46d017c 1164 ENTER;
55497cff 1165
3280af22
NIS
1166 PL_lex_state = PL_sublex_info.super_state;
1167 SAVEI32(PL_lex_dojoin);
1168 SAVEI32(PL_lex_brackets);
3280af22
NIS
1169 SAVEI32(PL_lex_casemods);
1170 SAVEI32(PL_lex_starts);
1171 SAVEI32(PL_lex_state);
7766f137 1172 SAVEVPTR(PL_lex_inpat);
3280af22 1173 SAVEI32(PL_lex_inwhat);
57843af0 1174 SAVECOPLINE(PL_curcop);
3280af22 1175 SAVEPPTR(PL_bufptr);
8452ff4b 1176 SAVEPPTR(PL_bufend);
3280af22
NIS
1177 SAVEPPTR(PL_oldbufptr);
1178 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1179 SAVEPPTR(PL_last_lop);
1180 SAVEPPTR(PL_last_uni);
3280af22
NIS
1181 SAVEPPTR(PL_linestart);
1182 SAVESPTR(PL_linestr);
8edd5f42
RGS
1183 SAVEGENERICPV(PL_lex_brackstack);
1184 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1185
1186 PL_linestr = PL_lex_stuff;
1187 PL_lex_stuff = Nullsv;
1188
9cbb5ea2
GS
1189 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1190 = SvPVX(PL_linestr);
3280af22 1191 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1192 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1193 SAVEFREESV(PL_linestr);
1194
1195 PL_lex_dojoin = FALSE;
1196 PL_lex_brackets = 0;
3280af22
NIS
1197 New(899, PL_lex_brackstack, 120, char);
1198 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
1199 PL_lex_casemods = 0;
1200 *PL_lex_casestack = '\0';
1201 PL_lex_starts = 0;
1202 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1203 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1204
1205 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1206 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1207 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1208 else
3280af22 1209 PL_lex_inpat = Nullop;
79072805 1210
55497cff 1211 return '(';
79072805
LW
1212}
1213
ffb4593c
NT
1214/*
1215 * S_sublex_done
1216 * Restores lexer state after a S_sublex_push.
1217 */
1218
76e3520e 1219STATIC I32
cea2e8a9 1220S_sublex_done(pTHX)
79072805 1221{
3280af22 1222 if (!PL_lex_starts++) {
9aa983d2
JH
1223 SV *sv = newSVpvn("",0);
1224 if (SvUTF8(PL_linestr))
1225 SvUTF8_on(sv);
3280af22 1226 PL_expect = XOPERATOR;
9aa983d2 1227 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1228 return THING;
1229 }
1230
3280af22
NIS
1231 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1232 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1233 return yylex();
79072805
LW
1234 }
1235
ffb4593c 1236 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1237 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1238 PL_linestr = PL_lex_repl;
1239 PL_lex_inpat = 0;
1240 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1241 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1242 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1243 SAVEFREESV(PL_linestr);
1244 PL_lex_dojoin = FALSE;
1245 PL_lex_brackets = 0;
3280af22
NIS
1246 PL_lex_casemods = 0;
1247 *PL_lex_casestack = '\0';
1248 PL_lex_starts = 0;
25da4f38 1249 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1250 PL_lex_state = LEX_INTERPNORMAL;
1251 PL_lex_starts++;
e9fa98b2
HS
1252 /* we don't clear PL_lex_repl here, so that we can check later
1253 whether this is an evalled subst; that means we rely on the
1254 logic to ensure sublex_done() is called again only via the
1255 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1256 }
e9fa98b2 1257 else {
3280af22 1258 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1259 PL_lex_repl = Nullsv;
1260 }
79072805 1261 return ',';
ffed7fef
LW
1262 }
1263 else {
f46d017c 1264 LEAVE;
3280af22
NIS
1265 PL_bufend = SvPVX(PL_linestr);
1266 PL_bufend += SvCUR(PL_linestr);
1267 PL_expect = XOPERATOR;
09bef843 1268 PL_sublex_info.sub_inwhat = 0;
79072805 1269 return ')';
ffed7fef
LW
1270 }
1271}
1272
02aa26ce
NT
1273/*
1274 scan_const
1275
1276 Extracts a pattern, double-quoted string, or transliteration. This
1277 is terrifying code.
1278
3280af22
NIS
1279 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1280 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1281 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1282
9b599b2a
GS
1283 Returns a pointer to the character scanned up to. Iff this is
1284 advanced from the start pointer supplied (ie if anything was
1285 successfully parsed), will leave an OP for the substring scanned
1286 in yylval. Caller must intuit reason for not parsing further
1287 by looking at the next characters herself.
1288
02aa26ce
NT
1289 In patterns:
1290 backslashes:
1291 double-quoted style: \r and \n
1292 regexp special ones: \D \s
1293 constants: \x3
1294 backrefs: \1 (deprecated in substitution replacements)
1295 case and quoting: \U \Q \E
1296 stops on @ and $, but not for $ as tail anchor
1297
1298 In transliterations:
1299 characters are VERY literal, except for - not at the start or end
1300 of the string, which indicates a range. scan_const expands the
1301 range to the full set of intermediate characters.
1302
1303 In double-quoted strings:
1304 backslashes:
1305 double-quoted style: \r and \n
1306 constants: \x3
1307 backrefs: \1 (deprecated)
1308 case and quoting: \U \Q \E
1309 stops on @ and $
1310
1311 scan_const does *not* construct ops to handle interpolated strings.
1312 It stops processing as soon as it finds an embedded $ or @ variable
1313 and leaves it to the caller to work out what's going on.
1314
da6eedaa 1315 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1316
1317 $ in pattern could be $foo or could be tail anchor. Assumption:
1318 it's a tail anchor if $ is the last thing in the string, or if it's
1319 followed by one of ")| \n\t"
1320
1321 \1 (backreferences) are turned into $1
1322
1323 The structure of the code is
1324 while (there's a character to process) {
1325 handle transliteration ranges
1326 skip regexp comments
1327 skip # initiated comments in //x patterns
1328 check for embedded @foo
1329 check for embedded scalars
1330 if (backslash) {
1331 leave intact backslashes from leave (below)
1332 deprecate \1 in strings and sub replacements
1333 handle string-changing backslashes \l \U \Q \E, etc.
1334 switch (what was escaped) {
1335 handle - in a transliteration (becomes a literal -)
1336 handle \132 octal characters
1337 handle 0x15 hex characters
1338 handle \cV (control V)
1339 handle printf backslashes (\f, \r, \n, etc)
1340 } (end switch)
1341 } (end if backslash)
1342 } (end while character to read)
4e553d73 1343
02aa26ce
NT
1344*/
1345
76e3520e 1346STATIC char *
cea2e8a9 1347S_scan_const(pTHX_ char *start)
79072805 1348{
3280af22 1349 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1350 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1351 register char *s = start; /* start of the constant */
1352 register char *d = SvPVX(sv); /* destination for copies */
1353 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1354 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1355 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1356 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1357 UV uv;
1358
dff6d3cd 1359 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1360 PL_lex_inpat
b6d5fef8 1361 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1362 : "";
79072805 1363
2b9d42f0
NIS
1364 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1365 /* If we are doing a trans and we know we want UTF8 set expectation */
1366 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1367 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1368 }
1369
1370
79072805 1371 while (s < send || dorange) {
02aa26ce 1372 /* get transliterations out of the way (they're most literal) */
3280af22 1373 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1374 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1375 if (dorange) {
1ba5c669
JH
1376 I32 i; /* current expanded character */
1377 I32 min; /* first character in range */
1378 I32 max; /* last character in range */
02aa26ce 1379
2b9d42f0 1380 if (has_utf8) {
8973db79
JH
1381 char *c = (char*)utf8_hop((U8*)d, -1);
1382 char *e = d++;
1383 while (e-- > c)
1384 *(e + 1) = *e;
25716404 1385 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1386 /* mark the range as done, and continue */
1387 dorange = FALSE;
1388 didrange = TRUE;
1389 continue;
1390 }
2b9d42f0 1391
02aa26ce 1392 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1393 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1394 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1395 d -= 2; /* eat the first char and the - */
1396
8ada0baa
JH
1397 min = (U8)*d; /* first char in range */
1398 max = (U8)d[1]; /* last char in range */
1399
c2e66d9e 1400 if (min > max) {
01ec43d0 1401 Perl_croak(aTHX_
d1573ac7 1402 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1403 (char)min, (char)max);
c2e66d9e
GS
1404 }
1405
c7f1f016 1406#ifdef EBCDIC
8ada0baa
JH
1407 if ((isLOWER(min) && isLOWER(max)) ||
1408 (isUPPER(min) && isUPPER(max))) {
1409 if (isLOWER(min)) {
1410 for (i = min; i <= max; i++)
1411 if (isLOWER(i))
db42d148 1412 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1413 } else {
1414 for (i = min; i <= max; i++)
1415 if (isUPPER(i))
db42d148 1416 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1417 }
1418 }
1419 else
1420#endif
1421 for (i = min; i <= max; i++)
eb160463 1422 *d++ = (char)i;
02aa26ce
NT
1423
1424 /* mark the range as done, and continue */
79072805 1425 dorange = FALSE;
01ec43d0 1426 didrange = TRUE;
79072805 1427 continue;
4e553d73 1428 }
02aa26ce
NT
1429
1430 /* range begins (ignore - as first or last char) */
79072805 1431 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1432 if (didrange) {
1fafa243 1433 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1434 }
2b9d42f0 1435 if (has_utf8) {
25716404 1436 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1437 s++;
1438 continue;
1439 }
79072805
LW
1440 dorange = TRUE;
1441 s++;
01ec43d0
GS
1442 }
1443 else {
1444 didrange = FALSE;
1445 }
79072805 1446 }
02aa26ce
NT
1447
1448 /* if we get here, we're not doing a transliteration */
1449
0f5d15d6
IZ
1450 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1451 except for the last char, which will be done separately. */
3280af22 1452 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1453 if (s[2] == '#') {
e994fd66 1454 while (s+1 < send && *s != ')')
db42d148 1455 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1456 }
1457 else if (s[2] == '{' /* This should match regcomp.c */
1458 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1459 {
cc6b7395 1460 I32 count = 1;
0f5d15d6 1461 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1462 char c;
1463
d9f97599
GS
1464 while (count && (c = *regparse)) {
1465 if (c == '\\' && regparse[1])
1466 regparse++;
4e553d73 1467 else if (c == '{')
cc6b7395 1468 count++;
4e553d73 1469 else if (c == '}')
cc6b7395 1470 count--;
d9f97599 1471 regparse++;
cc6b7395 1472 }
e994fd66 1473 if (*regparse != ')')
5bdf89e7 1474 regparse--; /* Leave one char for continuation. */
0f5d15d6 1475 while (s < regparse)
db42d148 1476 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1477 }
748a9306 1478 }
02aa26ce
NT
1479
1480 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1481 else if (*s == '#' && PL_lex_inpat &&
1482 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1483 while (s+1 < send && *s != '\n')
db42d148 1484 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1485 }
02aa26ce 1486
5d1d4326 1487 /* check for embedded arrays
da6eedaa 1488 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1489 */
7e2040f0 1490 else if (*s == '@' && s[1]
5d1d4326 1491 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1492 break;
02aa26ce
NT
1493
1494 /* check for embedded scalars. only stop if we're sure it's a
1495 variable.
1496 */
79072805 1497 else if (*s == '$') {
3280af22 1498 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1499 break;
6002328a 1500 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1501 break; /* in regexp, $ might be tail anchor */
1502 }
02aa26ce 1503
2b9d42f0
NIS
1504 /* End of else if chain - OP_TRANS rejoin rest */
1505
02aa26ce 1506 /* backslashes */
79072805
LW
1507 if (*s == '\\' && s+1 < send) {
1508 s++;
02aa26ce
NT
1509
1510 /* some backslashes we leave behind */
c9f97d15 1511 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1512 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1513 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1514 continue;
1515 }
02aa26ce
NT
1516
1517 /* deprecate \1 in strings and substitution replacements */
3280af22 1518 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1519 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1520 {
599cee73 1521 if (ckWARN(WARN_SYNTAX))
9014280d 1522 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1523 *--s = '$';
1524 break;
1525 }
02aa26ce
NT
1526
1527 /* string-change backslash escapes */
3280af22 1528 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1529 --s;
1530 break;
1531 }
02aa26ce
NT
1532
1533 /* if we get here, it's either a quoted -, or a digit */
79072805 1534 switch (*s) {
02aa26ce
NT
1535
1536 /* quoted - in transliterations */
79072805 1537 case '-':
3280af22 1538 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1539 *d++ = *s++;
1540 continue;
1541 }
1542 /* FALL THROUGH */
1543 default:
11b8faa4 1544 {
707afd92
MS
1545 if (ckWARN(WARN_MISC) &&
1546 isALNUM(*s) &&
1547 *s != '_')
9014280d 1548 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1549 "Unrecognized escape \\%c passed through",
1550 *s);
1551 /* default action is to copy the quoted character */
f9a63242 1552 goto default_action;
11b8faa4 1553 }
02aa26ce
NT
1554
1555 /* \132 indicates an octal constant */
79072805
LW
1556 case '0': case '1': case '2': case '3':
1557 case '4': case '5': case '6': case '7':
ba210ebe 1558 {
53305cf1
NC
1559 I32 flags = 0;
1560 STRLEN len = 3;
1561 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1562 s += len;
1563 }
012bcf8d 1564 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1565
1566 /* \x24 indicates a hex constant */
79072805 1567 case 'x':
a0ed51b3
LW
1568 ++s;
1569 if (*s == '{') {
1570 char* e = strchr(s, '}');
a4c04bdc
NC
1571 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1572 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1573 STRLEN len;
355860ce 1574
53305cf1 1575 ++s;
adaeee49 1576 if (!e) {
a0ed51b3 1577 yyerror("Missing right brace on \\x{}");
355860ce 1578 continue;
ba210ebe 1579 }
53305cf1
NC
1580 len = e - s;
1581 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1582 s = e + 1;
a0ed51b3
LW
1583 }
1584 else {
ba210ebe 1585 {
53305cf1 1586 STRLEN len = 2;
a4c04bdc 1587 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1588 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1589 s += len;
1590 }
012bcf8d
GS
1591 }
1592
1593 NUM_ESCAPE_INSERT:
1594 /* Insert oct or hex escaped character.
301d3d20 1595 * There will always enough room in sv since such
db42d148 1596 * escapes will be longer than any UTF-8 sequence
301d3d20 1597 * they can end up as. */
ba7cea30 1598
c7f1f016
NIS
1599 /* We need to map to chars to ASCII before doing the tests
1600 to cover EBCDIC
1601 */
c4d5f83a 1602 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1603 if (!has_utf8 && uv > 255) {
301d3d20
JH
1604 /* Might need to recode whatever we have
1605 * accumulated so far if it contains any
1606 * hibit chars.
1607 *
1608 * (Can't we keep track of that and avoid
1609 * this rescan? --jhi)
012bcf8d 1610 */
c7f1f016 1611 int hicount = 0;
63cd0674
NIS
1612 U8 *c;
1613 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1614 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1615 hicount++;
db42d148 1616 }
012bcf8d 1617 }
63cd0674 1618 if (hicount) {
db42d148
NIS
1619 STRLEN offset = d - SvPVX(sv);
1620 U8 *src, *dst;
1621 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1622 src = (U8 *)d - 1;
1623 dst = src+hicount;
1624 d += hicount;
1625 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1626 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1627 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1628 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1629 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1630 }
1631 else {
63cd0674 1632 *dst-- = *src;
012bcf8d 1633 }
c7f1f016 1634 src--;
012bcf8d
GS
1635 }
1636 }
1637 }
1638
9aa983d2 1639 if (has_utf8 || uv > 255) {
9041c2e3 1640 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1641 has_utf8 = TRUE;
f9a63242
JH
1642 if (PL_lex_inwhat == OP_TRANS &&
1643 PL_sublex_info.sub_op) {
1644 PL_sublex_info.sub_op->op_private |=
1645 (PL_lex_repl ? OPpTRANS_FROM_UTF
1646 : OPpTRANS_TO_UTF);
f9a63242 1647 }
012bcf8d 1648 }
a0ed51b3 1649 else {
012bcf8d 1650 *d++ = (char)uv;
a0ed51b3 1651 }
012bcf8d
GS
1652 }
1653 else {
c4d5f83a 1654 *d++ = (char) uv;
a0ed51b3 1655 }
79072805 1656 continue;
02aa26ce 1657
b239daa5 1658 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1659 case 'N':
55eda711 1660 ++s;
423cee85
JH
1661 if (*s == '{') {
1662 char* e = strchr(s, '}');
155aba94 1663 SV *res;
423cee85
JH
1664 STRLEN len;
1665 char *str;
4e553d73 1666
423cee85 1667 if (!e) {
5777a3f7 1668 yyerror("Missing right brace on \\N{}");
423cee85
JH
1669 e = s - 1;
1670 goto cont_scan;
1671 }
dbc0d4f2
JH
1672 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1673 /* \N{U+...} */
1674 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1675 PERL_SCAN_DISALLOW_PREFIX;
1676 s += 3;
1677 len = e - s;
1678 uv = grok_hex(s, &len, &flags, NULL);
1679 s = e + 1;
1680 goto NUM_ESCAPE_INSERT;
1681 }
55eda711
JH
1682 res = newSVpvn(s + 1, e - s - 1);
1683 res = new_constant( Nullch, 0, "charnames",
1684 res, Nullsv, "\\N{...}" );
f9a63242
JH
1685 if (has_utf8)
1686 sv_utf8_upgrade(res);
423cee85 1687 str = SvPV(res,len);
1c47067b
JH
1688#ifdef EBCDIC_NEVER_MIND
1689 /* charnames uses pack U and that has been
1690 * recently changed to do the below uni->native
1691 * mapping, so this would be redundant (and wrong,
1692 * the code point would be doubly converted).
1693 * But leave this in just in case the pack U change
1694 * gets revoked, but the semantics is still
1695 * desireable for charnames. --jhi */
cddc7ef4
JH
1696 {
1697 UV uv = utf8_to_uvchr((U8*)str, 0);
1698
1699 if (uv < 0x100) {
1700 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1701
1702 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1703 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1704 str = SvPV(res, len);
1705 }
1706 }
1707#endif
89491803 1708 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1709 char *ostart = SvPVX(sv);
1710 SvCUR_set(sv, d - ostart);
1711 SvPOK_on(sv);
e4f3eed8 1712 *d = '\0';
f08d6ad9 1713 sv_utf8_upgrade(sv);
d2f449dd 1714 /* this just broke our allocation above... */
eb160463 1715 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1716 d = SvPVX(sv) + SvCUR(sv);
89491803 1717 has_utf8 = TRUE;
f08d6ad9 1718 }
eb160463 1719 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1720 char *odest = SvPVX(sv);
1721
8973db79 1722 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1723 d = SvPVX(sv) + (d - odest);
1724 }
1725 Copy(str, d, len, char);
1726 d += len;
1727 SvREFCNT_dec(res);
1728 cont_scan:
1729 s = e + 1;
1730 }
1731 else
5777a3f7 1732 yyerror("Missing braces on \\N{}");
423cee85
JH
1733 continue;
1734
02aa26ce 1735 /* \c is a control character */
79072805
LW
1736 case 'c':
1737 s++;
961ce445 1738 if (s < send) {
ba210ebe 1739 U8 c = *s++;
c7f1f016
NIS
1740#ifdef EBCDIC
1741 if (isLOWER(c))
1742 c = toUPPER(c);
1743#endif
db42d148 1744 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1745 }
961ce445
RGS
1746 else {
1747 yyerror("Missing control char name in \\c");
1748 }
79072805 1749 continue;
02aa26ce
NT
1750
1751 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1752 case 'b':
db42d148 1753 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1754 break;
1755 case 'n':
db42d148 1756 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1757 break;
1758 case 'r':
db42d148 1759 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1760 break;
1761 case 'f':
db42d148 1762 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1763 break;
1764 case 't':
db42d148 1765 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1766 break;
34a3fe2a 1767 case 'e':
db42d148 1768 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1769 break;
1770 case 'a':
db42d148 1771 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1772 break;
02aa26ce
NT
1773 } /* end switch */
1774
79072805
LW
1775 s++;
1776 continue;
02aa26ce
NT
1777 } /* end if (backslash) */
1778
f9a63242 1779 default_action:
2b9d42f0
NIS
1780 /* If we started with encoded form, or already know we want it
1781 and then encode the next character */
1782 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1783 STRLEN len = 1;
1784 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1785 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1786 s += len;
1787 if (need > len) {
1788 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1789 STRLEN off = d - SvPVX(sv);
1790 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1791 }
1792 d = (char*)uvchr_to_utf8((U8*)d, uv);
1793 has_utf8 = TRUE;
1794 }
1795 else {
1796 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1797 }
02aa26ce
NT
1798 } /* while loop to process each character */
1799
1800 /* terminate the string and set up the sv */
79072805 1801 *d = '\0';
463ee0b2 1802 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1803 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1804 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1805
79072805 1806 SvPOK_on(sv);
9f4817db 1807 if (PL_encoding && !has_utf8) {
d0063567
DK
1808 sv_recode_to_utf8(sv, PL_encoding);
1809 if (SvUTF8(sv))
1810 has_utf8 = TRUE;
9f4817db 1811 }
2b9d42f0 1812 if (has_utf8) {
7e2040f0 1813 SvUTF8_on(sv);
2b9d42f0 1814 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1815 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1816 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1817 }
1818 }
79072805 1819
02aa26ce 1820 /* shrink the sv if we allocated more than we used */
79072805
LW
1821 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1822 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1823 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1824 }
02aa26ce 1825
9b599b2a 1826 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1827 if (s > PL_bufptr) {
1828 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1829 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1830 sv, Nullsv,
4e553d73 1831 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1832 ? "tr"
3280af22 1833 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1834 ? "s"
1835 : "qq")));
79072805 1836 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1837 } else
8990e307 1838 SvREFCNT_dec(sv);
79072805
LW
1839 return s;
1840}
1841
ffb4593c
NT
1842/* S_intuit_more
1843 * Returns TRUE if there's more to the expression (e.g., a subscript),
1844 * FALSE otherwise.
ffb4593c
NT
1845 *
1846 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1847 *
1848 * ->[ and ->{ return TRUE
1849 * { and [ outside a pattern are always subscripts, so return TRUE
1850 * if we're outside a pattern and it's not { or [, then return FALSE
1851 * if we're in a pattern and the first char is a {
1852 * {4,5} (any digits around the comma) returns FALSE
1853 * if we're in a pattern and the first char is a [
1854 * [] returns FALSE
1855 * [SOMETHING] has a funky algorithm to decide whether it's a
1856 * character class or not. It has to deal with things like
1857 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1858 * anything else returns TRUE
1859 */
1860
9cbb5ea2
GS
1861/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1862
76e3520e 1863STATIC int
cea2e8a9 1864S_intuit_more(pTHX_ register char *s)
79072805 1865{
3280af22 1866 if (PL_lex_brackets)
79072805
LW
1867 return TRUE;
1868 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1869 return TRUE;
1870 if (*s != '{' && *s != '[')
1871 return FALSE;
3280af22 1872 if (!PL_lex_inpat)
79072805
LW
1873 return TRUE;
1874
1875 /* In a pattern, so maybe we have {n,m}. */
1876 if (*s == '{') {
1877 s++;
1878 if (!isDIGIT(*s))
1879 return TRUE;
1880 while (isDIGIT(*s))
1881 s++;
1882 if (*s == ',')
1883 s++;
1884 while (isDIGIT(*s))
1885 s++;
1886 if (*s == '}')
1887 return FALSE;
1888 return TRUE;
1889
1890 }
1891
1892 /* On the other hand, maybe we have a character class */
1893
1894 s++;
1895 if (*s == ']' || *s == '^')
1896 return FALSE;
1897 else {
ffb4593c 1898 /* this is terrifying, and it works */
79072805
LW
1899 int weight = 2; /* let's weigh the evidence */
1900 char seen[256];
f27ffc4a 1901 unsigned char un_char = 255, last_un_char;
93a17b20 1902 char *send = strchr(s,']');
3280af22 1903 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1904
1905 if (!send) /* has to be an expression */
1906 return TRUE;
1907
1908 Zero(seen,256,char);
1909 if (*s == '$')
1910 weight -= 3;
1911 else if (isDIGIT(*s)) {
1912 if (s[1] != ']') {
1913 if (isDIGIT(s[1]) && s[2] == ']')
1914 weight -= 10;
1915 }
1916 else
1917 weight -= 100;
1918 }
1919 for (; s < send; s++) {
1920 last_un_char = un_char;
1921 un_char = (unsigned char)*s;
1922 switch (*s) {
1923 case '@':
1924 case '&':
1925 case '$':
1926 weight -= seen[un_char] * 10;
7e2040f0 1927 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1928 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1929 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1930 weight -= 100;
1931 else
1932 weight -= 10;
1933 }
1934 else if (*s == '$' && s[1] &&
93a17b20
LW
1935 strchr("[#!%*<>()-=",s[1])) {
1936 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1937 weight -= 10;
1938 else
1939 weight -= 1;
1940 }
1941 break;
1942 case '\\':
1943 un_char = 254;
1944 if (s[1]) {
93a17b20 1945 if (strchr("wds]",s[1]))
79072805
LW
1946 weight += 100;
1947 else if (seen['\''] || seen['"'])
1948 weight += 1;
93a17b20 1949 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1950 weight += 40;
1951 else if (isDIGIT(s[1])) {
1952 weight += 40;
1953 while (s[1] && isDIGIT(s[1]))
1954 s++;
1955 }
1956 }
1957 else
1958 weight += 100;
1959 break;
1960 case '-':
1961 if (s[1] == '\\')
1962 weight += 50;
93a17b20 1963 if (strchr("aA01! ",last_un_char))
79072805 1964 weight += 30;
93a17b20 1965 if (strchr("zZ79~",s[1]))
79072805 1966 weight += 30;
f27ffc4a
GS
1967 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1968 weight -= 5; /* cope with negative subscript */
79072805
LW
1969 break;
1970 default:
3792a11b
NC
1971 if (!isALNUM(last_un_char)
1972 && !(last_un_char == '$' || last_un_char == '@'
1973 || last_un_char == '&')
1974 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
1975 char *d = tmpbuf;
1976 while (isALPHA(*s))
1977 *d++ = *s++;
1978 *d = '\0';
1979 if (keyword(tmpbuf, d - tmpbuf))
1980 weight -= 150;
1981 }
1982 if (un_char == last_un_char + 1)
1983 weight += 5;
1984 weight -= seen[un_char];
1985 break;
1986 }
1987 seen[un_char]++;
1988 }
1989 if (weight >= 0) /* probably a character class */
1990 return FALSE;
1991 }
1992
1993 return TRUE;
1994}
ffed7fef 1995
ffb4593c
NT
1996/*
1997 * S_intuit_method
1998 *
1999 * Does all the checking to disambiguate
2000 * foo bar
2001 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2002 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2003 *
2004 * First argument is the stuff after the first token, e.g. "bar".
2005 *
2006 * Not a method if bar is a filehandle.
2007 * Not a method if foo is a subroutine prototyped to take a filehandle.
2008 * Not a method if it's really "Foo $bar"
2009 * Method if it's "foo $bar"
2010 * Not a method if it's really "print foo $bar"
2011 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2012 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2013 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2014 * =>
2015 */
2016
76e3520e 2017STATIC int
cea2e8a9 2018S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2019{
2020 char *s = start + (*start == '$');
3280af22 2021 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2022 STRLEN len;
2023 GV* indirgv;
2024
2025 if (gv) {
b6c543e3 2026 CV *cv;
a0d0e21e
LW
2027 if (GvIO(gv))
2028 return 0;
b6c543e3
IZ
2029 if ((cv = GvCVu(gv))) {
2030 char *proto = SvPVX(cv);
2031 if (proto) {
2032 if (*proto == ';')
2033 proto++;
2034 if (*proto == '*')
2035 return 0;
2036 }
2037 } else
a0d0e21e
LW
2038 gv = 0;
2039 }
8903cb82 2040 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2041 /* start is the beginning of the possible filehandle/object,
2042 * and s is the end of it
2043 * tmpbuf is a copy of it
2044 */
2045
a0d0e21e 2046 if (*start == '$') {
3280af22 2047 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2048 return 0;
2049 s = skipspace(s);
3280af22
NIS
2050 PL_bufptr = start;
2051 PL_expect = XREF;
a0d0e21e
LW
2052 return *s == '(' ? FUNCMETH : METHOD;
2053 }
2054 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2055 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2056 len -= 2;
2057 tmpbuf[len] = '\0';
2058 goto bare_package;
2059 }
2060 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2061 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2062 return 0;
2063 /* filehandle or package name makes it a method */
89bfa8cd 2064 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2065 s = skipspace(s);
3280af22 2066 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2067 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2068 bare_package:
3280af22 2069 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2070 newSVpvn(tmpbuf,len));
3280af22
NIS
2071 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2072 PL_expect = XTERM;
a0d0e21e 2073 force_next(WORD);
3280af22 2074 PL_bufptr = s;
a0d0e21e
LW
2075 return *s == '(' ? FUNCMETH : METHOD;
2076 }
2077 }
2078 return 0;
2079}
2080
ffb4593c
NT
2081/*
2082 * S_incl_perldb
2083 * Return a string of Perl code to load the debugger. If PERL5DB
2084 * is set, it will return the contents of that, otherwise a
2085 * compile-time require of perl5db.pl.
2086 */
2087
76e3520e 2088STATIC char*
cea2e8a9 2089S_incl_perldb(pTHX)
a0d0e21e 2090{
3280af22 2091 if (PL_perldb) {
76e3520e 2092 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2093
2094 if (pdb)
2095 return pdb;
93189314 2096 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2097 return "BEGIN { require 'perl5db.pl' }";
2098 }
2099 return "";
2100}
2101
2102
16d20bd9 2103/* Encoded script support. filter_add() effectively inserts a
4e553d73 2104 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2105 * Note that the filter function only applies to the current source file
2106 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2107 *
2108 * The datasv parameter (which may be NULL) can be used to pass
2109 * private data to this instance of the filter. The filter function
2110 * can recover the SV using the FILTER_DATA macro and use it to
2111 * store private buffers and state information.
2112 *
2113 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2114 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2115 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2116 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2117 * private use must be set using malloc'd pointers.
2118 */
16d20bd9
AD
2119
2120SV *
864dbfa3 2121Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2122{
f4c556ac
GS
2123 if (!funcp)
2124 return Nullsv;
2125
3280af22
NIS
2126 if (!PL_rsfp_filters)
2127 PL_rsfp_filters = newAV();
16d20bd9 2128 if (!datasv)
8c52afec 2129 datasv = NEWSV(255,0);
16d20bd9 2130 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2131 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2132 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2133 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2134 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2135 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2136 av_unshift(PL_rsfp_filters, 1);
2137 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2138 return(datasv);
2139}
4e553d73 2140
16d20bd9
AD
2141
2142/* Delete most recently added instance of this filter function. */
a0d0e21e 2143void
864dbfa3 2144Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2145{
e0c19803 2146 SV *datasv;
fe5a182c 2147 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2148 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2149 return;
2150 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2151 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2152 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2153 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2154 IoANY(datasv) = (void *)NULL;
3280af22 2155 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2156
16d20bd9
AD
2157 return;
2158 }
2159 /* we need to search for the correct entry and clear it */
cea2e8a9 2160 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2161}
2162
2163
1de9afcd
RGS
2164/* Invoke the idxth filter function for the current rsfp. */
2165/* maxlen 0 = read one text line */
16d20bd9 2166I32
864dbfa3 2167Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2168{
16d20bd9
AD
2169 filter_t funcp;
2170 SV *datasv = NULL;
e50aee73 2171
3280af22 2172 if (!PL_rsfp_filters)
16d20bd9 2173 return -1;
1de9afcd 2174 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2175 /* Provide a default input filter to make life easy. */
2176 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2177 DEBUG_P(PerlIO_printf(Perl_debug_log,
2178 "filter_read %d: from rsfp\n", idx));
4e553d73 2179 if (maxlen) {
16d20bd9
AD
2180 /* Want a block */
2181 int len ;
2182 int old_len = SvCUR(buf_sv) ;
2183
2184 /* ensure buf_sv is large enough */
eb160463 2185 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2186 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2187 if (PerlIO_error(PL_rsfp))
37120919
AD
2188 return -1; /* error */
2189 else
2190 return 0 ; /* end of file */
2191 }
16d20bd9
AD
2192 SvCUR_set(buf_sv, old_len + len) ;
2193 } else {
2194 /* Want a line */
3280af22
NIS
2195 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2196 if (PerlIO_error(PL_rsfp))
37120919
AD
2197 return -1; /* error */
2198 else
2199 return 0 ; /* end of file */
2200 }
16d20bd9
AD
2201 }
2202 return SvCUR(buf_sv);
2203 }
2204 /* Skip this filter slot if filter has been deleted */
1de9afcd 2205 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2206 DEBUG_P(PerlIO_printf(Perl_debug_log,
2207 "filter_read %d: skipped (filter deleted)\n",
2208 idx));
16d20bd9
AD
2209 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2210 }
2211 /* Get function pointer hidden within datasv */
4755096e 2212 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2213 DEBUG_P(PerlIO_printf(Perl_debug_log,
2214 "filter_read %d: via function %p (%s)\n",
fe5a182c 2215 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2216 /* Call function. The function is expected to */
2217 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2218 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2219 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2220}
2221
76e3520e 2222STATIC char *
cea2e8a9 2223S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2224{
c39cd008 2225#ifdef PERL_CR_FILTER
3280af22 2226 if (!PL_rsfp_filters) {
c39cd008 2227 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2228 }
2229#endif
3280af22 2230 if (PL_rsfp_filters) {
55497cff
PP
2231 if (!append)
2232 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2233 if (FILTER_READ(0, sv, 0) > 0)
2234 return ( SvPVX(sv) ) ;
2235 else
2236 return Nullch ;
2237 }
9d116dd7 2238 else
fd049845 2239 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2240}
2241
01ec43d0
GS
2242STATIC HV *
2243S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2244{
2245 GV *gv;
2246
01ec43d0 2247 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2248 return PL_curstash;
2249
2250 if (len > 2 &&
2251 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2252 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2253 {
2254 return GvHV(gv); /* Foo:: */
def3634b
GS
2255 }
2256
2257 /* use constant CLASS => 'MyClass' */
2258 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2259 SV *sv;
2260 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2261 pkgname = SvPV_nolen(sv);
2262 }
2263 }
2264
2265 return gv_stashpv(pkgname, FALSE);
2266}
a0d0e21e 2267
748a9306
LW
2268#ifdef DEBUGGING
2269 static char* exp_name[] =
09bef843 2270 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2271 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2272 };
748a9306 2273#endif
463ee0b2 2274
02aa26ce
NT
2275/*
2276 yylex
2277
2278 Works out what to call the token just pulled out of the input
2279 stream. The yacc parser takes care of taking the ops we return and
2280 stitching them into a tree.
2281
2282 Returns:
2283 PRIVATEREF
2284
2285 Structure:
2286 if read an identifier
2287 if we're in a my declaration
2288 croak if they tried to say my($foo::bar)
2289 build the ops for a my() declaration
2290 if it's an access to a my() variable
2291 are we in a sort block?
2292 croak if my($a); $a <=> $b
2293 build ops for access to a my() variable
2294 if in a dq string, and they've said @foo and we can't find @foo
2295 croak
2296 build ops for a bareword
2297 if we already built the token before, use it.
2298*/
2299
20141f0e 2300
dba4d153
JH
2301#ifdef __SC__
2302#pragma segment Perl_yylex
2303#endif
dba4d153 2304int
dba4d153 2305Perl_yylex(pTHX)
20141f0e 2306{
3afc138a 2307 register char *s = PL_bufptr;
378cc40b 2308 register char *d;
79072805 2309 register I32 tmp;
463ee0b2 2310 STRLEN len;
161b471a
NIS
2311 GV *gv = Nullgv;
2312 GV **gvp = 0;
aa7440fb 2313 bool bof = FALSE;
1d239bbb 2314 I32 orig_keyword = 0;
a687059c 2315
bbf60fe6
DM
2316 DEBUG_T( {
2317 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2318 lex_state_names[PL_lex_state]);
2319 } );
02aa26ce 2320 /* check if there's an identifier for us to look at */
ba979b31 2321 if (PL_pending_ident)
bbf60fe6 2322 return REPORT(S_pending_ident(aTHX));
bbce6d69 2323
02aa26ce
NT
2324 /* no identifier pending identification */
2325
3280af22 2326 switch (PL_lex_state) {
79072805
LW
2327#ifdef COMMENTARY
2328 case LEX_NORMAL: /* Some compilers will produce faster */
2329 case LEX_INTERPNORMAL: /* code if we comment these out. */
2330 break;
2331#endif
2332
09bef843 2333 /* when we've already built the next token, just pull it out of the queue */
79072805 2334 case LEX_KNOWNEXT:
3280af22
NIS
2335 PL_nexttoke--;
2336 yylval = PL_nextval[PL_nexttoke];
2337 if (!PL_nexttoke) {
2338 PL_lex_state = PL_lex_defer;
2339 PL_expect = PL_lex_expect;
2340 PL_lex_defer = LEX_NORMAL;
463ee0b2 2341 }
607df283 2342 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2343 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2344 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2345
bbf60fe6 2346 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2347
02aa26ce 2348 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2349 when we get here, PL_bufptr is at the \
02aa26ce 2350 */
79072805
LW
2351 case LEX_INTERPCASEMOD:
2352#ifdef DEBUGGING
3280af22 2353 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2354 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2355#endif
02aa26ce 2356 /* handle \E or end of string */
3280af22 2357 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2358 char oldmod;
02aa26ce
NT
2359
2360 /* if at a \E */
3280af22
NIS
2361 if (PL_lex_casemods) {
2362 oldmod = PL_lex_casestack[--PL_lex_casemods];
2363 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2364
3792a11b
NC
2365 if (PL_bufptr != PL_bufend
2366 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2367 PL_bufptr += 2;
2368 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2369 }
bbf60fe6 2370 return REPORT(')');
79072805 2371 }
3280af22
NIS
2372 if (PL_bufptr != PL_bufend)
2373 PL_bufptr += 2;
2374 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2375 return yylex();
79072805
LW
2376 }
2377 else {
607df283 2378 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2379 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2380 s = PL_bufptr + 1;
6e909404
JH
2381 if (s[1] == '\\' && s[2] == 'E') {
2382 PL_bufptr = s + 3;
2383 PL_lex_state = LEX_INTERPCONCAT;
2384 return yylex();
a0d0e21e 2385 }
6e909404
JH
2386 else {
2387 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2388 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2389 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2390 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2391 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2392 return REPORT(')');
6e909404
JH
2393 }
2394 if (PL_lex_casemods > 10)
2395 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2396 PL_lex_casestack[PL_lex_casemods++] = *s;
2397 PL_lex_casestack[PL_lex_casemods] = '\0';
2398 PL_lex_state = LEX_INTERPCONCAT;
2399 PL_nextval[PL_nexttoke].ival = 0;
2400 force_next('(');
2401 if (*s == 'l')
2402 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2403 else if (*s == 'u')
2404 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2405 else if (*s == 'L')
2406 PL_nextval[PL_nexttoke].ival = OP_LC;
2407 else if (*s == 'U')
2408 PL_nextval[PL_nexttoke].ival = OP_UC;
2409 else if (*s == 'Q')
2410 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2411 else
2412 Perl_croak(aTHX_ "panic: yylex");
2413 PL_bufptr = s + 1;
a0d0e21e 2414 }
79072805 2415 force_next(FUNC);
3280af22
NIS
2416 if (PL_lex_starts) {
2417 s = PL_bufptr;
2418 PL_lex_starts = 0;
131b3ad0
DM
2419 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2420 if (PL_lex_casemods == 1 && PL_lex_inpat)
2421 OPERATOR(',');
2422 else
2423 Aop(OP_CONCAT);
79072805
LW
2424 }
2425 else
cea2e8a9 2426 return yylex();
79072805
LW
2427 }
2428
55497cff 2429 case LEX_INTERPPUSH:
bbf60fe6 2430 return REPORT(sublex_push());
55497cff 2431
79072805 2432 case LEX_INTERPSTART:
3280af22 2433 if (PL_bufptr == PL_bufend)
bbf60fe6 2434 return REPORT(sublex_done());
607df283 2435 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2436 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2437 PL_expect = XTERM;
2438 PL_lex_dojoin = (*PL_bufptr == '@');
2439 PL_lex_state = LEX_INTERPNORMAL;
2440 if (PL_lex_dojoin) {
2441 PL_nextval[PL_nexttoke].ival = 0;
79072805 2442 force_next(',');
a0d0e21e 2443 force_ident("\"", '$');
3280af22 2444 PL_nextval[PL_nexttoke].ival = 0;
79072805 2445 force_next('$');
3280af22 2446 PL_nextval[PL_nexttoke].ival = 0;
79072805 2447 force_next('(');
3280af22 2448 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2449 force_next(FUNC);
2450 }
3280af22
NIS
2451 if (PL_lex_starts++) {
2452 s = PL_bufptr;
131b3ad0
DM
2453 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2454 if (!PL_lex_casemods && PL_lex_inpat)
2455 OPERATOR(',');
2456 else
2457 Aop(OP_CONCAT);
79072805 2458 }
cea2e8a9 2459 return yylex();
79072805
LW
2460
2461 case LEX_INTERPENDMAYBE:
3280af22
NIS
2462 if (intuit_more(PL_bufptr)) {
2463 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2464 break;
2465 }
2466 /* FALL THROUGH */
2467
2468 case LEX_INTERPEND:
3280af22
NIS
2469 if (PL_lex_dojoin) {
2470 PL_lex_dojoin = FALSE;
2471 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2472 return REPORT(')');
79072805 2473 }
43a16006 2474 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2475 && SvEVALED(PL_lex_repl))
43a16006 2476 {
e9fa98b2 2477 if (PL_bufptr != PL_bufend)
cea2e8a9 2478 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2479 PL_lex_repl = Nullsv;
2480 }
79072805
LW
2481 /* FALLTHROUGH */
2482 case LEX_INTERPCONCAT:
2483#ifdef DEBUGGING
3280af22 2484 if (PL_lex_brackets)
cea2e8a9 2485 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2486#endif
3280af22 2487 if (PL_bufptr == PL_bufend)
bbf60fe6 2488 return REPORT(sublex_done());
79072805 2489
3280af22
NIS
2490 if (SvIVX(PL_linestr) == '\'') {
2491 SV *sv = newSVsv(PL_linestr);
2492 if (!PL_lex_inpat)
76e3520e 2493 sv = tokeq(sv);
3280af22 2494 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2495 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2496 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2497 s = PL_bufend;
79072805
LW
2498 }
2499 else {
3280af22 2500 s = scan_const(PL_bufptr);
79072805 2501 if (*s == '\\')
3280af22 2502 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2503 else
3280af22 2504 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2505 }
2506
3280af22
NIS
2507 if (s != PL_bufptr) {
2508 PL_nextval[PL_nexttoke] = yylval;
2509 PL_expect = XTERM;
79072805 2510 force_next(THING);
131b3ad0
DM
2511 if (PL_lex_starts++) {
2512 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2513 if (!PL_lex_casemods && PL_lex_inpat)
2514 OPERATOR(',');
2515 else
2516 Aop(OP_CONCAT);
2517 }
79072805 2518 else {
3280af22 2519 PL_bufptr = s;
cea2e8a9 2520 return yylex();
79072805
LW
2521 }
2522 }
2523
cea2e8a9 2524 return yylex();
a0d0e21e 2525 case LEX_FORMLINE:
3280af22
NIS
2526 PL_lex_state = LEX_NORMAL;
2527 s = scan_formline(PL_bufptr);
2528 if (!PL_lex_formbrack)
a0d0e21e
LW
2529 goto rightbracket;
2530 OPERATOR(';');
79072805
LW
2531 }
2532
3280af22
NIS
2533 s = PL_bufptr;
2534 PL_oldoldbufptr = PL_oldbufptr;
2535 PL_oldbufptr = s;
607df283 2536 DEBUG_T( {
bbf60fe6 2537 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
bf49b057 2538 exp_name[PL_expect], s);
5f80b19c 2539 } );
463ee0b2
LW
2540
2541 retry:
378cc40b
LW
2542 switch (*s) {
2543 default:
7e2040f0 2544 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2545 goto keylookup;
cea2e8a9 2546 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2547 case 4:
2548 case 26:
2549 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2550 case 0:
3280af22
NIS
2551 if (!PL_rsfp) {
2552 PL_last_uni = 0;
2553 PL_last_lop = 0;
c5ee2135
LW
2554 if (PL_lex_brackets) {
2555 if (PL_lex_formbrack)
2556 yyerror("Format not terminated");
2557 else
2558 yyerror("Missing right curly or square bracket");
2559 }
4e553d73 2560 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2561 "### Tokener got EOF\n");
5f80b19c 2562 } );
79072805 2563 TOKEN(0);
463ee0b2 2564 }
3280af22 2565 if (s++ < PL_bufend)
a687059c 2566 goto retry; /* ignore stray nulls */
3280af22
NIS
2567 PL_last_uni = 0;
2568 PL_last_lop = 0;
2569 if (!PL_in_eval && !PL_preambled) {
2570 PL_preambled = TRUE;
2571 sv_setpv(PL_linestr,incl_perldb());
2572 if (SvCUR(PL_linestr))
4147a61b 2573 sv_catpvn(PL_linestr,";", 1);
3280af22
NIS
2574 if (PL_preambleav){
2575 while(AvFILLp(PL_preambleav) >= 0) {
2576 SV *tmpsv = av_shift(PL_preambleav);
2577 sv_catsv(PL_linestr, tmpsv);
4147a61b 2578 sv_catpvn(PL_linestr, ";", 1);
91b7def8
PP
2579 sv_free(tmpsv);
2580 }
3280af22
NIS
2581 sv_free((SV*)PL_preambleav);
2582 PL_preambleav = NULL;
91b7def8 2583 }
3280af22
NIS
2584 if (PL_minus_n || PL_minus_p) {
2585 sv_catpv(PL_linestr, "LINE: while (<>) {");
2586 if (PL_minus_l)
2587 sv_catpv(PL_linestr,"chomp;");
2588 if (PL_minus_a) {
3280af22 2589 if (PL_minus_F) {
3792a11b
NC
2590 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2591 || *PL_splitstr == '"')
3280af22 2592 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2593 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2594 else {
c8ef6a4b
NC
2595 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2596 bytes can be used as quoting characters. :-) */
48c4c863
NC
2597 /* The count here deliberately includes the NUL
2598 that terminates the C string constant. This
2599 embeds the opening NUL into the string. */
9a3fb652 2600 sv_catpvn(PL_linestr, "our @F=split(q", 15);
48c4c863
NC
2601 s = PL_splitstr;
2602 do {
2603 /* Need to \ \s */
2604 if (*s == '\\')
2605 sv_catpvn(PL_linestr, s, 1);
2606 sv_catpvn(PL_linestr, s, 1);
2607 } while (*s++);
2608 /* This loop will embed the trailing NUL of
2609 PL_linestr as the last thing it does before
2610 terminating. */
4fccd7c6 2611 sv_catpvn(PL_linestr, ");", 2);
54310121 2612 }
2304df62
AD
2613 }
2614 else
75c72d73 2615 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2616 }
79072805 2617 }
4147a61b 2618 sv_catpvn(PL_linestr, "\n", 1);
3280af22
NIS
2619 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2620 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2621 PL_last_lop = PL_last_uni = Nullch;
3280af22 2622 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2623 SV *sv = NEWSV(85,0);
2624
2625 sv_upgrade(sv, SVt_PVMG);
3280af22 2626 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2627 (void)SvIOK_on(sv);
2628 SvIVX(sv) = 0;
57843af0 2629 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2630 }
79072805 2631 goto retry;
a687059c 2632 }
e929a76b 2633 do {
aa7440fb 2634 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2635 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2636 fake_eof:
2637 if (PL_rsfp) {
2638 if (PL_preprocess && !PL_in_eval)
2639 (void)PerlProc_pclose(PL_rsfp);
2640 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2641 PerlIO_clearerr(PL_rsfp);
2642 else
2643 (void)PerlIO_close(PL_rsfp);
2644 PL_rsfp = Nullfp;
2645 PL_doextract = FALSE;
2646 }
2647 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2648 sv_setpv(PL_linestr,PL_minus_p
2649 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2650 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2651 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2652 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2653 PL_minus_n = PL_minus_p = 0;
2654 goto retry;
2655 }
2656 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2657 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2658 sv_setpv(PL_linestr,"");
2659 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2660 }
7aa207d6
JH
2661 /* If it looks like the start of a BOM or raw UTF-16,
2662 * check if it in fact is. */
2663 else if (bof &&
2664 (*s == 0 ||
2665 *(U8*)s == 0xEF ||
2666 *(U8*)s >= 0xFE ||
2667 s[1] == 0)) {
226017aa 2668#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2669# ifdef __GNU_LIBRARY__
2670# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2671# define FTELL_FOR_PIPE_IS_BROKEN
2672# endif
e3f494f1
JH
2673# else
2674# ifdef __GLIBC__
2675# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2676# define FTELL_FOR_PIPE_IS_BROKEN
2677# endif
2678# endif
226017aa
DD
2679# endif
2680#endif
2681#ifdef FTELL_FOR_PIPE_IS_BROKEN
2682 /* This loses the possibility to detect the bof
2683 * situation on perl -P when the libc5 is being used.
2684 * Workaround? Maybe attach some extra state to PL_rsfp?
2685 */
2686 if (!PL_preprocess)
7e28d3af 2687 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2688#else
eb160463 2689 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2690#endif
7e28d3af 2691 if (bof) {
3280af22 2692 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2693 s = swallow_bom((U8*)s);
e929a76b 2694 }
378cc40b 2695 }
3280af22 2696 if (PL_doextract) {
a0d0e21e
LW
2697 /* Incest with pod. */
2698 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2699 sv_setpv(PL_linestr, "");
2700 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2701 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2702 PL_last_lop = PL_last_uni = Nullch;
3280af22 2703 PL_doextract = FALSE;
a0d0e21e 2704 }
4e553d73 2705 }
463ee0b2 2706 incline(s);
3280af22
NIS
2707 } while (PL_doextract);
2708 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2709 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2710 SV *sv = NEWSV(85,0);
a687059c 2711
93a17b20 2712 sv_upgrade(sv, SVt_PVMG);
3280af22 2713 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2714 (void)SvIOK_on(sv);
2715 SvIVX(sv) = 0;
57843af0 2716 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2717 }
3280af22 2718 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2719 PL_last_lop = PL_last_uni = Nullch;
57843af0 2720 if (CopLINE(PL_curcop) == 1) {
3280af22 2721 while (s < PL_bufend && isSPACE(*s))
79072805 2722 s++;
a0d0e21e 2723 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2724 s++;
44a8e56a 2725 d = Nullch;
3280af22 2726 if (!PL_in_eval) {
44a8e56a
PP
2727 if (*s == '#' && *(s+1) == '!')
2728 d = s + 2;
2729#ifdef ALTERNATE_SHEBANG
2730 else {
2731 static char as[] = ALTERNATE_SHEBANG;
2732 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2733 d = s + (sizeof(as) - 1);
2734 }
2735#endif /* ALTERNATE_SHEBANG */
2736 }
2737 if (d) {
b8378b72 2738 char *ipath;
774d564b 2739 char *ipathend;
b8378b72 2740
774d564b 2741 while (isSPACE(*d))
b8378b72
CS
2742 d++;
2743 ipath = d;
774d564b
PP
2744 while (*d && !isSPACE(*d))
2745 d++;
2746 ipathend = d;
2747
2748#ifdef ARG_ZERO_IS_SCRIPT
2749 if (ipathend > ipath) {
2750 /*
2751 * HP-UX (at least) sets argv[0] to the script name,
2752 * which makes $^X incorrect. And Digital UNIX and Linux,
2753 * at least, set argv[0] to the basename of the Perl
2754 * interpreter. So, having found "#!", we'll set it right.
2755 */
ee2f7564 2756 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2757 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2758 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2759 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2760 SvSETMAGIC(x);
2761 }
556c1dec
JH
2762 else {
2763 STRLEN blen;
2764 STRLEN llen;
2765 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2766 char *lstart = SvPV(x,llen);
2767 if (llen < blen) {
2768 bstart += blen - llen;
2769 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2770 sv_setpvn(x, ipath, ipathend - ipath);
2771 SvSETMAGIC(x);
2772 }
2773 }
2774 }
774d564b 2775 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2776 }
774d564b 2777#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2778
2779 /*
2780 * Look for options.
2781 */
748a9306 2782 d = instr(s,"perl -");
84e30d1a 2783 if (!d) {
748a9306 2784 d = instr(s,"perl");
84e30d1a
GS
2785#if defined(DOSISH)
2786 /* avoid getting into infinite loops when shebang
2787 * line contains "Perl" rather than "perl" */
2788 if (!d) {
2789 for (d = ipathend-4; d >= ipath; --d) {
2790 if ((*d == 'p' || *d == 'P')
2791 && !ibcmp(d, "perl", 4))
2792 {
2793 break;
2794 }
2795 }
2796 if (d < ipath)
2797 d = Nullch;
2798 }
2799#endif
2800 }
44a8e56a
PP
2801#ifdef ALTERNATE_SHEBANG
2802 /*
2803 * If the ALTERNATE_SHEBANG on this system starts with a
2804 * character that can be part of a Perl expression, then if
2805 * we see it but not "perl", we're probably looking at the
2806 * start of Perl code, not a request to hand off to some
2807 * other interpreter. Similarly, if "perl" is there, but
2808 * not in the first 'word' of the line, we assume the line
2809 * contains the start of the Perl program.
44a8e56a
PP
2810 */
2811 if (d && *s != '#') {
774d564b 2812 char *c = ipath;
44a8e56a
PP
2813 while (*c && !strchr("; \t\r\n\f\v#", *c))
2814 c++;
2815 if (c < d)
2816 d = Nullch; /* "perl" not in first word; ignore */
2817 else
2818 *s = '#'; /* Don't try to parse shebang line */
2819 }
774d564b 2820#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2821#ifndef MACOS_TRADITIONAL
748a9306 2822 if (!d &&
44a8e56a 2823 *s == '#' &&
774d564b 2824 ipathend > ipath &&
3280af22 2825 !PL_minus_c &&
748a9306 2826 !instr(s,"indir") &&
3280af22 2827 instr(PL_origargv[0],"perl"))
748a9306 2828 {
9f68db38 2829 char **newargv;
9f68db38 2830
774d564b
PP
2831 *ipathend = '\0';
2832 s = ipathend + 1;
3280af22 2833 while (s < PL_bufend && isSPACE(*s))
9f68db38 2834 s++;
3280af22
NIS
2835 if (s < PL_bufend) {
2836 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2837 newargv[1] = s;
3280af22 2838 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2839 s++;
2840 *s = '\0';
3280af22 2841 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2842 }
2843 else
3280af22 2844 newargv = PL_origargv;
774d564b 2845 newargv[0] = ipath;
b35112e7 2846 PERL_FPU_PRE_EXEC
b4748376 2847 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2848 PERL_FPU_POST_EXEC
cea2e8a9 2849 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2850 }
bf4acbe4 2851#endif
748a9306 2852 if (d) {
3280af22
NIS
2853 U32 oldpdb = PL_perldb;
2854 bool oldn = PL_minus_n;
2855 bool oldp = PL_minus_p;
748a9306
LW
2856
2857 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2858 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2859
2860 if (*d++ == '-') {
a11ec5a9 2861 bool switches_done = PL_doswitches;
8cc95fdb
PP
2862 do {
2863 if (*d == 'M' || *d == 'm') {
2864 char *m = d;
2865 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2866 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2867 (int)(d - m), m);
2868 }
2869 d = moreswitches(d);
2870 } while (d);
f0b2cf55
YST
2871 if (PL_doswitches && !switches_done) {
2872 int argc = PL_origargc;
2873 char **argv = PL_origargv;
2874 do {
2875 argc--,argv++;
2876 } while (argc && argv[0][0] == '-' && argv[0][1]);
2877 init_argv_symbols(argc,argv);
2878 }
155aba94
GS
2879 if ((PERLDB_LINE && !oldpdb) ||
2880 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2881 /* if we have already added "LINE: while (<>) {",
2882 we must not do it again */
748a9306 2883 {
3280af22
NIS
2884 sv_setpv(PL_linestr, "");
2885 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2886 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2887 PL_last_lop = PL_last_uni = Nullch;
3280af22 2888 PL_preambled = FALSE;
84902520 2889 if (PERLDB_LINE)
3280af22 2890 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2891 goto retry;
2892 }
a11ec5a9
RGS
2893 if (PL_doswitches && !switches_done) {
2894 int argc = PL_origargc;
2895 char **argv = PL_origargv;
2896 do {
2897 argc--,argv++;
2898 } while (argc && argv[0][0] == '-' && argv[0][1]);
2899 init_argv_symbols(argc,argv);
2900 }
a0d0e21e 2901 }
79072805 2902 }
9f68db38 2903 }
79072805 2904 }
3280af22
NIS
2905 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2906 PL_bufptr = s;
2907 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2908 return yylex();
ae986130 2909 }
378cc40b 2910 goto retry;
4fdae800 2911 case '\r':
6a27c188 2912#ifdef PERL_STRICT_CR
cea2e8a9 2913 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2914 Perl_croak(aTHX_
cc507455 2915 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2916#endif
4fdae800 2917 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2918#ifdef MACOS_TRADITIONAL
2919 case '\312':
2920#endif
378cc40b
LW
2921 s++;
2922 goto retry;
378cc40b 2923 case '#':
e929a76b 2924 case '\n':
3280af22 2925 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2926 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2927 /* handle eval qq[#line 1 "foo"\n ...] */
2928 CopLINE_dec(PL_curcop);
2929 incline(s);
2930 }
3280af22 2931 d = PL_bufend;
a687059c 2932 while (s < d && *s != '\n')
378cc40b 2933 s++;
0f85fab0 2934 if (s < d)
378cc40b 2935 s++;
78c267c1 2936 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2937 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2938 incline(s);
3280af22
NIS
2939 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2940 PL_bufptr = s;
2941 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2942 return yylex();
a687059c 2943 }
378cc40b 2944 }
a687059c 2945 else {
378cc40b 2946 *s = '\0';
3280af22 2947 PL_bufend = s;
a687059c 2948 }
378cc40b
LW
2949 goto retry;
2950 case '-':
79072805 2951 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2952 I32 ftst = 0;
2953
378cc40b 2954 s++;
3280af22 2955 PL_bufptr = s;
748a9306
LW
2956 tmp = *s++;
2957
bf4acbe4 2958 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2959 s++;
2960
2961 if (strnEQ(s,"=>",2)) {
3280af22 2962 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2963 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2964 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2965 } );
748a9306
LW
2966 OPERATOR('-'); /* unary minus */
2967 }
3280af22 2968 PL_last_uni = PL_oldbufptr;
748a9306 2969 switch (tmp) {
e5edeb50
JH
2970 case 'r': ftst = OP_FTEREAD; break;
2971 case 'w': ftst = OP_FTEWRITE; break;
2972 case 'x': ftst = OP_FTEEXEC; break;
2973 case 'o': ftst = OP_FTEOWNED; break;
2974 case 'R': ftst = OP_FTRREAD; break;
2975 case 'W': ftst = OP_FTRWRITE; break;
2976 case 'X': ftst = OP_FTREXEC; break;
2977 case 'O': ftst = OP_FTROWNED; break;
2978 case 'e': ftst = OP_FTIS; break;
2979 case 'z': ftst = OP_FTZERO; break;
2980 case 's': ftst = OP_FTSIZE; break;
2981 case 'f': ftst = OP_FTFILE; break;
2982 case 'd': ftst = OP_FTDIR; break;
2983 case 'l': ftst = OP_FTLINK; break;
2984 case 'p': ftst = OP_FTPIPE; break;
2985 case 'S': ftst = OP_FTSOCK; break;
2986 case 'u': ftst = OP_FTSUID; break;
2987 case 'g': ftst = OP_FTSGID; break;
2988 case 'k': ftst = OP_FTSVTX; break;
2989 case 'b': ftst = OP_FTBLK; break;
2990 case 'c': ftst = OP_FTCHR; break;
2991 case 't': ftst = OP_FTTTY; break;
2992 case 'T': ftst = OP_FTTEXT; break;
2993 case 'B': ftst = OP_FTBINARY; break;
2994 case 'M': case 'A': case 'C':
2995 gv_fetchpv("\024",TRUE, SVt_PV);
2996 switch (tmp) {
2997 case 'M': ftst = OP_FTMTIME; break;
2998 case 'A': ftst = OP_FTATIME; break;
2999 case 'C': ftst = OP_FTCTIME; break;
3000 default: break;
3001 }
3002 break;
378cc40b 3003 default:
378cc40b
LW
3004 break;
3005 }
e5edeb50 3006 if (ftst) {
eb160463 3007 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3008 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 3009 "### Saw file test %c\n", (int)ftst);
5f80b19c 3010 } );
e5edeb50
JH
3011 FTST(ftst);
3012 }
3013 else {
3014 /* Assume it was a minus followed by a one-letter named
3015 * subroutine call (or a -bareword), then. */
95c31fe3 3016 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3017 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3018 (int) tmp);
5f80b19c 3019 } );
3cf7b4c4 3020 s = --PL_bufptr;
e5edeb50 3021 }
378cc40b 3022 }
a687059c
LW
3023 tmp = *s++;
3024 if (*s == tmp) {
3025 s++;
3280af22 3026 if (PL_expect == XOPERATOR)
79072805
LW
3027 TERM(POSTDEC);
3028 else
3029 OPERATOR(PREDEC);
3030 }
3031 else if (*s == '>') {
3032 s++;
3033 s = skipspace(s);
7e2040f0 3034 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3035 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3036 TOKEN(ARROW);
79072805 3037 }
748a9306
LW
3038 else if (*s == '$')
3039 OPERATOR(ARROW);
463ee0b2 3040 else
748a9306 3041 TERM(ARROW);
a687059c 3042 }
3280af22 3043 if (PL_expect == XOPERATOR)
79072805
LW
3044 Aop(OP_SUBTRACT);
3045 else {
3280af22 3046 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3047 check_uni();
79072805 3048 OPERATOR('-'); /* unary minus */
2f3197b3 3049 }
79072805 3050
378cc40b 3051 case '+':
a687059c
LW
3052 tmp = *s++;
3053 if (*s == tmp) {
378cc40b 3054 s++;
3280af22 3055 if (PL_expect == XOPERATOR)
79072805
LW
3056 TERM(POSTINC);
3057 else
3058 OPERATOR(PREINC);
378cc40b 3059 }
3280af22 3060 if (PL_expect == XOPERATOR)
79072805
LW
3061 Aop(OP_ADD);
3062 else {
3280af22 3063 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3064 check_uni();
a687059c 3065 OPERATOR('+');
2f3197b3 3066 }
a687059c 3067
378cc40b 3068 case '*':
3280af22
NIS
3069 if (PL_expect != XOPERATOR) {
3070 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3071 PL_expect = XOPERATOR;
3072 force_ident(PL_tokenbuf, '*');
3073 if (!*PL_tokenbuf)
a0d0e21e 3074 PREREF('*');
79072805 3075 TERM('*');
a687059c 3076 }
79072805
LW
3077 s++;
3078 if (*s == '*') {
a687059c 3079 s++;
79072805 3080 PWop(OP_POW);
a687059c 3081 }
79072805
LW
3082 Mop(OP_MULTIPLY);
3083
378cc40b 3084 case '%':
3280af22 3085 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3086 ++s;
3087 Mop(OP_MODULO);
a687059c 3088 }
3280af22
NIS
3089 PL_tokenbuf[0] = '%';
3090 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3091 if (!PL_tokenbuf[1]) {
bbce6d69 3092 PREREF('%');
a687059c 3093 }
3280af22 3094 PL_pending_ident = '%';
bbce6d69 3095 TERM('%');
a687059c 3096
378cc40b 3097 case '^':
79072805 3098 s++;
a0d0e21e 3099 BOop(OP_BIT_XOR);
79072805 3100 case '[':
3280af22 3101 PL_lex_brackets++;
79072805 3102 /* FALL THROUGH */
378cc40b 3103 case '~':
378cc40b 3104 case ',':
378cc40b
LW
3105 tmp = *s++;
3106 OPERATOR(tmp);
a0d0e21e
LW
3107 case ':':
3108 if (s[1] == ':') {
3109 len = 0;
3110 goto just_a_word;
3111 }
3112 s++;
09bef843
SB
3113 switch (PL_expect) {
3114 OP *attrs;
3115 case XOPERATOR:
3116 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3117 break;
3118 PL_bufptr = s; /* update in case we back off */
3119 goto grabattrs;
3120 case XATTRBLOCK:
3121 PL_expect = XBLOCK;
3122 goto grabattrs;
3123 case XATTRTERM:
3124 PL_expect = XTERMBLOCK;
3125 grabattrs:
3126 s = skipspace(s);
3127 attrs = Nullop;
7e2040f0 3128 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3129 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3130 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3131 if (tmp < 0) tmp = -tmp;
3132 switch (tmp) {
3133 case KEY_or:
3134 case KEY_and:
c963b151 3135 case KEY_err:
f9829d6b
GS
3136 case KEY_for:
3137 case KEY_unless:
3138 case KEY_if:
3139 case KEY_while:
3140 case KEY_until:
3141 goto got_attrs;
3142 default:
3143 break;
3144 }
3145 }
09bef843
SB
3146 if (*d == '(') {
3147 d = scan_str(d,TRUE,TRUE);
3148 if (!d) {
09bef843
SB
3149 /* MUST advance bufptr here to avoid bogus
3150 "at end of line" context messages from yyerror().
3151 */
3152 PL_bufptr = s + len;
3153 yyerror("Unterminated attribute parameter in attribute list");
3154 if (attrs)
3155 op_free(attrs);
bbf60fe6 3156 return REPORT(0); /* EOF indicator */
09bef843
SB
3157 }
3158 }
3159 if (PL_lex_stuff) {
3160 SV *sv = newSVpvn(s, len);
3161 sv_catsv(sv, PL_lex_stuff);
3162 attrs = append_elem(OP_LIST, attrs,
3163 newSVOP(OP_CONST, 0, sv));
3164 SvREFCNT_dec(PL_lex_stuff);
3165 PL_lex_stuff = Nullsv;
3166 }
3167 else {
371fce9b
DM
3168 if (len == 6 && strnEQ(s, "unique", len)) {
3169 if (PL_in_my == KEY_our)
3170#ifdef USE_ITHREADS
3171 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3172#else
3173 ; /* skip to avoid loading attributes.pm */
3174#endif
3175 else
3176 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3177 }
3178
d3cea301
SB
3179 /* NOTE: any CV attrs applied here need to be part of
3180 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3181 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3182 CvLVALUE_on(PL_compcv);
3183 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3184 CvLOCKED_on(PL_compcv);
3185 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3186 CvMETHOD_on(PL_compcv);
06492da6
SF
3187 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3188 CvASSERTION_on(PL_compcv);
78f9721b
SM
3189 /* After we've set the flags, it could be argued that
3190 we don't need to do the attributes.pm-based setting
3191 process, and shouldn't bother appending recognized
d3cea301
SB
3192 flags. To experiment with that, uncomment the
3193 following "else". (Note that's already been
3194 uncommented. That keeps the above-applied built-in
3195 attributes from being intercepted (and possibly
3196 rejected) by a package's attribute routines, but is
3197 justified by the performance win for the common case
3198 of applying only built-in attributes.) */
0256094b 3199 else
78f9721b
SM
3200 attrs = append_elem(OP_LIST, attrs,
3201 newSVOP(OP_CONST, 0,
3202 newSVpvn(s, len)));
09bef843
SB
3203 }
3204 s = skipspace(d);
0120eecf 3205 if (*s == ':' && s[1] != ':')
09bef843 3206 s = skipspace(s+1);
0120eecf
GS
3207 else if (s == d)
3208 break; /* require real whitespace or :'s */
09bef843 3209 }
f9829d6b 3210 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3211 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3212 char q = ((*s == '\'') ? '"' : '\'');
3213 /* If here for an expression, and parsed no attrs, back off. */
3214 if (tmp == '=' && !attrs) {
3215 s = PL_bufptr;
3216 break;
3217 }
3218 /* MUST advance bufptr here to avoid bogus "at end of line"
3219 context messages from yyerror().
3220 */
3221 PL_bufptr = s;
3222 if (!*s)
3223 yyerror("Unterminated attribute list");
3224 else
3225 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3226 q, *s, q));
3227 if (attrs)
3228 op_free(attrs);
3229 OPERATOR(':');
3230 }
f9829d6b 3231 got_attrs:
09bef843
SB
3232 if (attrs) {
3233 PL_nextval[PL_nexttoke].opval = attrs;
3234 force_next(THING);
3235 }
3236 TOKEN(COLONATTR);
3237 }
a0d0e21e 3238 OPERATOR(':');
8990e307
LW
3239 case '(':
3240 s++;
3280af22
NIS
3241 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3242 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3243 else
3280af22 3244 PL_expect = XTERM;
4a202259 3245 s = skipspace(s);
a0d0e21e 3246 TOKEN('(');
378cc40b 3247 case ';':
f4dd75d9 3248 CLINE;
378cc40b
LW
3249 tmp = *s++;
3250 OPERATOR(tmp);
3251 case ')':
378cc40b 3252 tmp = *s++;
16d20bd9
AD
3253 s = skipspace(s);
3254 if (*s == '{')
3255 PREBLOCK(tmp);
378cc40b 3256 TERM(tmp);
79072805
LW
3257 case ']':
3258 s++;
3280af22 3259 if (PL_lex_brackets <= 0)
d98d5fff 3260 yyerror("Unmatched right square bracket");
463ee0b2 3261 else
3280af22
NIS
3262 --PL_lex_brackets;
3263 if (PL_lex_state == LEX_INTERPNORMAL) {
3264 if (PL_lex_brackets == 0) {
a0d0e21e 3265 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3266 PL_lex_state = LEX_INTERPEND;
79072805
LW
3267 }
3268 }
4633a7c4 3269 TERM(']');
79072805
LW
3270 case '{':
3271 leftbracket:
79072805 3272 s++;
3280af22 3273 if (PL_lex_brackets > 100) {
8edd5f42 3274 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3275 }
3280af22 3276 switch (PL_expect) {
a0d0e21e 3277 case XTERM:
3280af22 3278 if (PL_lex_formbrack) {
a0d0e21e
LW
3279 s--;
3280 PRETERMBLOCK(DO);
3281 }
3280af22
NIS
3282 if (PL_oldoldbufptr == PL_last_lop)
3283 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3284 else
3280af22 3285 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3286 OPERATOR(HASHBRACK);
a0d0e21e 3287 case XOPERATOR:
bf4acbe4 3288 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3289 s++;
44a8e56a 3290 d = s;
3280af22
NIS
3291 PL_tokenbuf[0] = '\0';
3292 if (d < PL_bufend && *d == '-') {
3293 PL_tokenbuf[0] = '-';
44a8e56a 3294 d++;
bf4acbe4 3295 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3296 d++;
3297 }
7e2040f0 3298 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3299 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3300 FALSE, &len);
bf4acbe4 3301 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3302 d++;
3303 if (*d == '}') {
3280af22 3304 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3305 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3306 if (minus)
3307 force_next('-');
748a9306
LW
3308 }
3309 }
3310 /* FALL THROUGH */
09bef843 3311 case XATTRBLOCK:
748a9306 3312 case XBLOCK:
3280af22
NIS
3313 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3314 PL_expect = XSTATE;
a0d0e21e 3315 break;
09bef843 3316 case XATTRTERM:
a0d0e21e 3317 case XTERMBLOCK:
3280af22
NIS
3318 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3319 PL_expect = XSTATE;
a0d0e21e
LW
3320 break;
3321 default: {
3322 char *t;
3280af22
NIS
3323 if (PL_oldoldbufptr == PL_last_lop)
3324 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3325 else
3280af22 3326 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3327 s = skipspace(s);
8452ff4b
SB
3328 if (*s == '}') {
3329 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3330 PL_expect = XTERM;
3331 /* This hack is to get the ${} in the message. */
3332 PL_bufptr = s+1;
3333 yyerror("syntax error");
3334 break;
3335 }
a0d0e21e 3336 OPERATOR(HASHBRACK);
8452ff4b 3337 }
b8a4b1be
GS
3338 /* This hack serves to disambiguate a pair of curlies
3339 * as being a block or an anon hash. Normally, expectation
3340 * determines that, but in cases where we're not in a
3341 * position to expect anything in particular (like inside
3342 * eval"") we have to resolve the ambiguity. This code
3343 * covers the case where the first term in the curlies is a
3344 * quoted string. Most other cases need to be explicitly
3345 * disambiguated by prepending a `+' before the opening
3346 * curly in order to force resolution as an anon hash.
3347 *
3348 * XXX should probably propagate the outer expectation
3349 * into eval"" to rely less on this hack, but that could
3350 * potentially break current behavior of eval"".
3351 * GSAR 97-07-21
3352 */
3353 t = s;
3354 if (*s == '\'' || *s == '"' || *s == '`') {
3355 /* common case: get past first string, handling escapes */
3280af22 3356 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3357 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3358 t++;
3359 t++;
a0d0e21e 3360 }
b8a4b1be 3361 else if (*s == 'q') {
3280af22 3362 if (++t < PL_bufend
b8a4b1be 3363 && (!isALNUM(*t)
3280af22 3364 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3365 && !isALNUM(*t))))
3366 {
abc667d1 3367 /* skip q//-like construct */
b8a4b1be
GS
3368 char *tmps;
3369 char open, close, term;
3370 I32 brackets = 1;
3371
3280af22 3372 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3373 t++;
abc667d1
DM
3374 /* check for q => */
3375 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3376 OPERATOR(HASHBRACK);
3377 }
b8a4b1be
GS
3378 term = *t;
3379 open = term;
3380 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3381 term = tmps[5];
3382 close = term;
3383 if (open == close)
3280af22
NIS
3384 for (t++; t < PL_bufend; t++) {
3385 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3386 t++;
6d07e5e9 3387 else if (*t == open)
b8a4b1be
GS
3388 break;
3389 }
abc667d1 3390 else {
3280af22
NIS
3391 for (t++; t < PL_bufend; t++) {
3392 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3393 t++;
6d07e5e9 3394 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3395 break;
3396 else if (*t == open)
3397 brackets++;
3398 }
abc667d1
DM
3399 }
3400 t++;
b8a4b1be 3401 }
abc667d1
DM
3402 else
3403 /* skip plain q word */
3404 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3405 t += UTF8SKIP(t);
a0d0e21e 3406 }
7e2040f0 3407 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3408 t += UTF8SKIP(t);
7e2040f0 3409 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3410 t += UTF8SKIP(t);
a0d0e21e 3411 }
3280af22 3412 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3413 t++;
b8a4b1be
GS
3414 /* if comma follows first term, call it an anon hash */
3415 /* XXX it could be a comma expression with loop modifiers */
3280af22 3416 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3417 || (*t == '=' && t[1] == '>')))
a0d0e21e 3418 OPERATOR(HASHBRACK);
3280af22 3419 if (PL_expect == XREF)
4e4e412b 3420 PL_expect = XTERM;
a0d0e21e 3421 else {
3280af22
NIS
3422 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3423 PL_expect = XSTATE;
a0d0e21e 3424 }
8990e307 3425 }
a0d0e21e 3426 break;
463ee0b2 3427 }
57843af0 3428 yylval.ival = CopLINE(PL_curcop);
79072805 3429 if (isSPACE(*s) || *s == '#')
3280af22 3430 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3431 TOKEN('{');
378cc40b 3432 case '}':
79072805
LW
3433 rightbracket:
3434 s++;
3280af22 3435 if (PL_lex_brackets <= 0)
d98d5fff 3436 yyerror("Unmatched right curly bracket");
463ee0b2 3437 else
3280af22 3438 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3439 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3440 PL_lex_formbrack = 0;
3441 if (PL_lex_state == LEX_INTERPNORMAL) {
3442 if (PL_lex_brackets == 0) {
9059aa12
LW
3443 if (PL_expect & XFAKEBRACK) {
3444 PL_expect &= XENUMMASK;
3280af22
NIS
3445 PL_lex_state = LEX_INTERPEND;
3446 PL_bufptr = s;
cea2e8a9 3447 return yylex(); /* ignore fake brackets */
79072805 3448 }
fa83b5b6 3449 if (*s == '-' && s[1] == '>')
3280af22 3450 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3451 else if (*s != '[' && *s != '{')
3280af22 3452 PL_lex_state = LEX_INTERPEND;
79072805
LW
3453 }
3454 }
9059aa12
LW
3455 if (PL_expect & XFAKEBRACK) {
3456 PL_expect &= XENUMMASK;
3280af22 3457 PL_bufptr = s;
cea2e8a9 3458 return yylex(); /* ignore fake brackets */
748a9306 3459 }
79072805
LW
3460 force_next('}');
3461 TOKEN(';');
378cc40b
LW
3462 case '&':
3463 s++;
3464 tmp = *s++;
3465 if (tmp == '&')
a0d0e21e 3466 AOPERATOR(ANDAND);
378cc40b 3467 s--;
3280af22 3468 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3469 if (ckWARN(WARN_SEMICOLON)
3470 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3471 {
57843af0 3472 CopLINE_dec(PL_curcop);
9014280d 3473 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3474 CopLINE_inc(PL_curcop);
463ee0b2 3475 }
79072805 3476 BAop(OP_BIT_AND);
463ee0b2 3477 }
79072805 3478
3280af22
NIS
3479 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3480 if (*PL_tokenbuf) {
3481 PL_expect = XOPERATOR;
3482 force_ident(PL_tokenbuf, '&');
463ee0b2 3483 }
79072805
LW
3484 else
3485 PREREF('&');
c07a80fd 3486 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3487 TERM('&');
3488
378cc40b
LW
3489 case '|':
3490 s++;
3491 tmp = *s++;
3492 if (tmp == '|')
a0d0e21e 3493 AOPERATOR(OROR);
378cc40b 3494 s--;
79072805 3495 BOop(OP_BIT_OR);
378cc40b
LW
3496 case '=':
3497 s++;
3498 tmp = *s++;
3499 if (tmp == '=')
79072805
LW
3500 Eop(OP_EQ);
3501 if (tmp == '>')
3502 OPERATOR(',');
378cc40b 3503 if (tmp == '~')
79072805 3504 PMop(OP_MATCH);
599cee73 3505 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3506 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3507 s--;
3280af22
NIS
3508 if (PL_expect == XSTATE && isALPHA(tmp) &&
3509 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3510 {
3280af22
NIS
3511 if (PL_in_eval && !PL_rsfp) {
3512 d = PL_bufend;
a5f75d66
AD
3513 while (s < d) {
3514 if (*s++ == '\n') {
3515 incline(s);
3516 if (strnEQ(s,"=cut",4)) {
3517 s = strchr(s,'\n');
3518 if (s)
3519 s++;
3520 else
3521 s = d;
3522 incline(s);
3523 goto retry;
3524 }
3525 }
3526 }
3527 goto retry;
3528 }
3280af22
NIS
3529 s = PL_bufend;
3530 PL_doextract = TRUE;
a0d0e21e
LW
3531 goto retry;
3532 }
3280af22 3533 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3534 char *t;
51882d45 3535#ifdef PERL_STRICT_CR
bf4acbe4 3536 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3537#else
bf4acbe4 3538 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3539#endif
a0d0e21e
LW
3540 if (*t == '\n' || *t == '#') {
3541 s--;
3280af22 3542 PL_expect = XBLOCK;
a0d0e21e
LW
3543 goto leftbracket;
3544 }
79072805 3545 }
a0d0e21e
LW
3546 yylval.ival = 0;
3547 OPERATOR(ASSIGNOP);
378cc40b
LW
3548 case '!':
3549 s++;
3550 tmp = *s++;
984200d0 3551 if (tmp == '=') {
decca21c
YST
3552 /* was this !=~ where !~ was meant?
3553 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3554
984200d0
YST
3555 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3556 char *t = s+1;
3557
3558 while (t < PL_bufend && isSPACE(*t))
3559 ++t;
3560