This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Probably should be using *pvn rather than *pv forms for speed in
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
770526c1 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
fc36a67e 29static char ident_too_long[] = "Identifier too long";
4ac733c9 30static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 31static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 32
acfe0abc 33static void restore_rsfp(pTHX_ void *f);
6e3aabd6 34#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
35static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
36static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 37#endif
51371543 38
9059aa12
LW
39#define XFAKEBRACK 128
40#define XENUMMASK 127
41
39e02b42
JH
42#ifdef USE_UTF8_SCRIPTS
43# define UTF (!IN_BYTES)
2b9d42f0 44#else
746b446a 45# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 46#endif
a0ed51b3 47
61f0cdd9 48/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
49 * 1999-02-27 mjd-perl-patch@plover.com */
50#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51
bf4acbe4
GS
52/* On MacOS, respect nonbreaking spaces */
53#ifdef MACOS_TRADITIONAL
54#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55#else
56#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
57#endif
58
ffb4593c
NT
59/* LEX_* are values for PL_lex_state, the state of the lexer.
60 * They are arranged oddly so that the guard on the switch statement
79072805
LW
61 * can get by with a single comparison (if the compiler is smart enough).
62 */
63
fb73857a
PP
64/* #define LEX_NOTPARSING 11 is done in perl.h. */
65
55497cff
PP
66#define LEX_NORMAL 10
67#define LEX_INTERPNORMAL 9
68#define LEX_INTERPCASEMOD 8
69#define LEX_INTERPPUSH 7
70#define LEX_INTERPSTART 6
71#define LEX_INTERPEND 5
72#define LEX_INTERPENDMAYBE 4
73#define LEX_INTERPCONCAT 3
74#define LEX_INTERPCONST 2
75#define LEX_FORMLINE 1
76#define LEX_KNOWNEXT 0
79072805 77
bbf60fe6
DM
78#ifdef DEBUGGING
79static char* lex_state_names[] = {
80 "KNOWNEXT",
81 "FORMLINE",
82 "INTERPCONST",
83 "INTERPCONCAT",
84 "INTERPENDMAYBE",
85 "INTERPEND",
86 "INTERPSTART",
87 "INTERPPUSH",
88 "INTERPCASEMOD",
89 "INTERPNORMAL",
90 "NORMAL"
91};
92#endif
93
79072805
LW
94#ifdef ff_next
95#undef ff_next
d48672a2
LW
96#endif
97
79072805 98#include "keywords.h"
fe14fcc3 99
ffb4593c
NT
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
ae986130
LW
102#ifdef CLINE
103#undef CLINE
104#endif
57843af0 105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 106
ffb4593c
NT
107/*
108 * Convenience functions to return different tokens and prime the
9cbb5ea2 109 * lexer for the next token. They all take an argument.
ffb4593c
NT
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
2d2e263d 121 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
9cbb5ea2 126 * PMop : pattern-matching operator
ffb4593c
NT
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
e5edeb50 130 * Rop : relational operator <= != gt
ffb4593c
NT
131 *
132 * Also see LOP and lop() below.
133 */
134
998054bd 135#ifdef DEBUGGING /* Serve -DT. */
bbf60fe6 136# define REPORT(retval) tokereport(s,(int)retval)
998054bd 137#else
bbf60fe6 138# define REPORT(retval) (retval)
998054bd
SC
139#endif
140
bbf60fe6
DM
141#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
142#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
143#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
144#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
145#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
146#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
147#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
148#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
149#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
150#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
151#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
152#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
153#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
154#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
155#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
156#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
157#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
158#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
159#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
160#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 161
a687059c
LW
162/* This bit of chicanery makes a unary function followed by
163 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
164 * The UNIDOR macro is for unary functions that can be followed by the //
165 * operator (such as C<shift // 0>).
a687059c 166 */
bbf60fe6
DM
167#define UNI2(f,x) return ( \
168 yylval.ival = f, \
6f33ba73 169 PL_expect = x, \
3280af22
NIS
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
172 PL_last_lop_op = f, \
bbf60fe6
DM
173 REPORT( \
174 (*s == '(' || (s = skipspace(s), *s == '(') \
175 ? (int)FUNC1 : (int)UNIOP)))
6f33ba73
RGS
176#define UNI(f) UNI2(f,XTERM)
177#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 178
bbf60fe6
DM
179#define UNIBRACK(f) return ( \
180 yylval.ival = f, \
3280af22
NIS
181 PL_bufptr = s, \
182 PL_last_uni = PL_oldbufptr, \
bbf60fe6
DM
183 REPORT( \
184 (*s == '(' || (s = skipspace(s), *s == '(') \
185 ? (int)FUNC1 : (int)UNIOP)))
79072805 186
9f68db38 187/* grandfather return to old style */
3280af22 188#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 189
8fa7f367
JH
190#ifdef DEBUGGING
191
bbf60fe6
DM
192/* how to interpret the yylval associated with the token */
193enum token_type {
194 TOKENTYPE_NONE,
195 TOKENTYPE_IVAL,
196 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
197 TOKENTYPE_PVAL,
198 TOKENTYPE_OPVAL,
199 TOKENTYPE_GVVAL
200};
201
202static struct debug_tokens { int token, type; char *name;} debug_tokens[] =
9041c2e3 203{
bbf60fe6
DM
204 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
205 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
206 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
207 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
208 { ARROW, TOKENTYPE_NONE, "ARROW" },
209 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
210 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
211 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
212 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
213 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
214 { DO, TOKENTYPE_NONE, "DO" },
215 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
216 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
217 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
218 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
219 { ELSE, TOKENTYPE_NONE, "ELSE" },
220 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
221 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
222 { FOR, TOKENTYPE_IVAL, "FOR" },
223 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
224 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
225 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
226 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
227 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
228 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
229 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
230 { IF, TOKENTYPE_IVAL, "IF" },
231 { LABEL, TOKENTYPE_PVAL, "LABEL" },
232 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
233 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
234 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
235 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
236 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
237 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
238 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
239 { MY, TOKENTYPE_IVAL, "MY" },
240 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
241 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
242 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
243 { OROP, TOKENTYPE_IVAL, "OROP" },
244 { OROR, TOKENTYPE_NONE, "OROR" },
245 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
246 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
247 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
248 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
249 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
250 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
251 { PREINC, TOKENTYPE_NONE, "PREINC" },
252 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
253 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
254 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
255 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
256 { SUB, TOKENTYPE_NONE, "SUB" },
257 { THING, TOKENTYPE_OPVAL, "THING" },
258 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
259 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
260 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
261 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
262 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
263 { USE, TOKENTYPE_IVAL, "USE" },
264 { WHILE, TOKENTYPE_IVAL, "WHILE" },
265 { WORD, TOKENTYPE_OPVAL, "WORD" },
266 { 0, TOKENTYPE_NONE, 0 }
267};
268
269/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 270
bbf60fe6
DM
271STATIC int
272S_tokereport(pTHX_ char* s, I32 rv)
273{
274 if (DEBUG_T_TEST) {
275 char *name = Nullch;
276 enum token_type type = TOKENTYPE_NONE;
277 struct debug_tokens *p;
278 SV* report = NEWSV(0, 60);
279
280 Perl_sv_catpvf(aTHX_ report, "<== ");
281
282 for (p = debug_tokens; p->token; p++) {
283 if (p->token == (int)rv) {
284 name = p->name;
285 type = p->type;
286 break;
287 }
288 }
289 if (name)
290 Perl_sv_catpvf(aTHX_ report, "%s", name);
291 else if ((char)rv > ' ' && (char)rv < '~')
292 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
293 else if (!rv)
294 Perl_sv_catpvf(aTHX_ report, "EOF");
295 else
296 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
297 switch (type) {
298 case TOKENTYPE_NONE:
299 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
300 break;
301 case TOKENTYPE_IVAL:
302 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
303 break;
304 case TOKENTYPE_OPNUM:
305 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
306 PL_op_name[yylval.ival]);
307 break;
308 case TOKENTYPE_PVAL:
309 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
310 break;
311 case TOKENTYPE_OPVAL:
401441c0
RGS
312 if (yylval.opval)
313 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 314 PL_op_name[yylval.opval->op_type]);
401441c0
RGS
315 else
316 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
317 break;
318 }
319 Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
998054bd
SC
320 if (s - PL_bufptr > 0)
321 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
322 else {
323 if (PL_oldbufptr && *PL_oldbufptr)
324 sv_catpv(report, PL_tokenbuf);
325 }
bbf60fe6
DM
326 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
327 };
328 return (int)rv;
998054bd
SC
329}
330
8fa7f367
JH
331#endif
332
ffb4593c
NT
333/*
334 * S_ao
335 *
c963b151
BD
336 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
337 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
338 */
339
76e3520e 340STATIC int
cea2e8a9 341S_ao(pTHX_ int toketype)
a0d0e21e 342{
3280af22
NIS
343 if (*PL_bufptr == '=') {
344 PL_bufptr++;
a0d0e21e
LW
345 if (toketype == ANDAND)
346 yylval.ival = OP_ANDASSIGN;
347 else if (toketype == OROR)
348 yylval.ival = OP_ORASSIGN;
c963b151
BD
349 else if (toketype == DORDOR)
350 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
351 toketype = ASSIGNOP;
352 }
353 return toketype;
354}
355
ffb4593c
NT
356/*
357 * S_no_op
358 * When Perl expects an operator and finds something else, no_op
359 * prints the warning. It always prints "<something> found where
360 * operator expected. It prints "Missing semicolon on previous line?"
361 * if the surprise occurs at the start of the line. "do you need to
362 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
363 * where the compiler doesn't know if foo is a method call or a function.
364 * It prints "Missing operator before end of line" if there's nothing
365 * after the missing operator, or "... before <...>" if there is something
366 * after the missing operator.
367 */
368
76e3520e 369STATIC void
cea2e8a9 370S_no_op(pTHX_ char *what, char *s)
463ee0b2 371{
3280af22
NIS
372 char *oldbp = PL_bufptr;
373 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 374
1189a94a
GS
375 if (!s)
376 s = oldbp;
07c798fb 377 else
1189a94a 378 PL_bufptr = s;
cea2e8a9 379 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
380 if (ckWARN_d(WARN_SYNTAX)) {
381 if (is_first)
382 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
383 "\t(Missing semicolon on previous line?)\n");
384 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
385 char *t;
386 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
387 if (t < PL_bufptr && isSPACE(*t))
388 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
389 "\t(Do you need to predeclare %.*s?)\n",
390 t - PL_oldoldbufptr, PL_oldoldbufptr);
391 }
392 else {
393 assert(s >= oldbp);
394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
395 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
396 }
07c798fb 397 }
3280af22 398 PL_bufptr = oldbp;
8990e307
LW
399}
400
ffb4593c
NT
401/*
402 * S_missingterm
403 * Complain about missing quote/regexp/heredoc terminator.
404 * If it's called with (char *)NULL then it cauterizes the line buffer.
405 * If we're in a delimited string and the delimiter is a control
406 * character, it's reformatted into a two-char sequence like ^C.
407 * This is fatal.
408 */
409
76e3520e 410STATIC void
cea2e8a9 411S_missingterm(pTHX_ char *s)
8990e307
LW
412{
413 char tmpbuf[3];
414 char q;
415 if (s) {
416 char *nl = strrchr(s,'\n');
d2719217 417 if (nl)
8990e307
LW
418 *nl = '\0';
419 }
9d116dd7
JH
420 else if (
421#ifdef EBCDIC
422 iscntrl(PL_multi_close)
423#else
424 PL_multi_close < 32 || PL_multi_close == 127
425#endif
426 ) {
8990e307 427 *tmpbuf = '^';
3280af22 428 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
429 s = "\\n";
430 tmpbuf[2] = '\0';
431 s = tmpbuf;
432 }
433 else {
eb160463 434 *tmpbuf = (char)PL_multi_close;
8990e307
LW
435 tmpbuf[1] = '\0';
436 s = tmpbuf;
437 }
438 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 439 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 440}
79072805 441
ffb4593c
NT
442/*
443 * Perl_deprecate
ffb4593c
NT
444 */
445
79072805 446void
864dbfa3 447Perl_deprecate(pTHX_ char *s)
a0d0e21e 448{
599cee73 449 if (ckWARN(WARN_DEPRECATED))
9014280d 450 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
451}
452
12bcd1a6
PM
453void
454Perl_deprecate_old(pTHX_ char *s)
455{
456 /* This function should NOT be called for any new deprecated warnings */
457 /* Use Perl_deprecate instead */
458 /* */
459 /* It is here to maintain backward compatibility with the pre-5.8 */
460 /* warnings category hierarchy. The "deprecated" category used to */
461 /* live under the "syntax" category. It is now a top-level category */
462 /* in its own right. */
463
464 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
465 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
466 "Use of %s is deprecated", s);
467}
468
ffb4593c
NT
469/*
470 * depcom
9cbb5ea2 471 * Deprecate a comma-less variable list.
ffb4593c
NT
472 */
473
76e3520e 474STATIC void
cea2e8a9 475S_depcom(pTHX)
a0d0e21e 476{
12bcd1a6 477 deprecate_old("comma-less variable list");
a0d0e21e
LW
478}
479
ffb4593c 480/*
9cbb5ea2
GS
481 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
482 * utf16-to-utf8-reversed.
ffb4593c
NT
483 */
484
c39cd008
GS
485#ifdef PERL_CR_FILTER
486static void
487strip_return(SV *sv)
488{
489 register char *s = SvPVX(sv);
490 register char *e = s + SvCUR(sv);
491 /* outer loop optimized to do nothing if there are no CR-LFs */
492 while (s < e) {
493 if (*s++ == '\r' && *s == '\n') {
494 /* hit a CR-LF, need to copy the rest */
495 register char *d = s - 1;
496 *d++ = *s++;
497 while (s < e) {
498 if (*s == '\r' && s[1] == '\n')
499 s++;
500 *d++ = *s++;
501 }
502 SvCUR(sv) -= s - d;
503 return;
504 }
505 }
506}
a868473f 507
76e3520e 508STATIC I32
c39cd008 509S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 510{
c39cd008
GS
511 I32 count = FILTER_READ(idx+1, sv, maxlen);
512 if (count > 0 && !maxlen)
513 strip_return(sv);
514 return count;
a868473f
NIS
515}
516#endif
517
ffb4593c
NT
518/*
519 * Perl_lex_start
9cbb5ea2
GS
520 * Initialize variables. Uses the Perl save_stack to save its state (for
521 * recursive calls to the parser).
ffb4593c
NT
522 */
523
a0d0e21e 524void
864dbfa3 525Perl_lex_start(pTHX_ SV *line)
79072805 526{
8990e307
LW
527 char *s;
528 STRLEN len;
529
3280af22
NIS
530 SAVEI32(PL_lex_dojoin);
531 SAVEI32(PL_lex_brackets);
3280af22
NIS
532 SAVEI32(PL_lex_casemods);
533 SAVEI32(PL_lex_starts);
534 SAVEI32(PL_lex_state);
7766f137 535 SAVEVPTR(PL_lex_inpat);
3280af22 536 SAVEI32(PL_lex_inwhat);
18b09519
GS
537 if (PL_lex_state == LEX_KNOWNEXT) {
538 I32 toke = PL_nexttoke;
539 while (--toke >= 0) {
540 SAVEI32(PL_nexttype[toke]);
541 SAVEVPTR(PL_nextval[toke]);
542 }
543 SAVEI32(PL_nexttoke);
18b09519 544 }
57843af0 545 SAVECOPLINE(PL_curcop);
3280af22
NIS
546 SAVEPPTR(PL_bufptr);
547 SAVEPPTR(PL_bufend);
548 SAVEPPTR(PL_oldbufptr);
549 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
550 SAVEPPTR(PL_last_lop);
551 SAVEPPTR(PL_last_uni);
3280af22
NIS
552 SAVEPPTR(PL_linestart);
553 SAVESPTR(PL_linestr);
8edd5f42
RGS
554 SAVEGENERICPV(PL_lex_brackstack);
555 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 556 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
557 SAVESPTR(PL_lex_stuff);
558 SAVEI32(PL_lex_defer);
09bef843 559 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 560 SAVESPTR(PL_lex_repl);
bebdddfc
GS
561 SAVEINT(PL_expect);
562 SAVEINT(PL_lex_expect);
3280af22
NIS
563
564 PL_lex_state = LEX_NORMAL;
565 PL_lex_defer = 0;
566 PL_expect = XSTATE;
567 PL_lex_brackets = 0;
3280af22
NIS
568 New(899, PL_lex_brackstack, 120, char);
569 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
570 PL_lex_casemods = 0;
571 *PL_lex_casestack = '\0';
572 PL_lex_dojoin = 0;
573 PL_lex_starts = 0;
574 PL_lex_stuff = Nullsv;
575 PL_lex_repl = Nullsv;
576 PL_lex_inpat = 0;
76be56bc 577 PL_nexttoke = 0;
3280af22 578 PL_lex_inwhat = 0;
09bef843 579 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
580 PL_linestr = line;
581 if (SvREADONLY(PL_linestr))
582 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
583 s = SvPV(PL_linestr, len);
6f27f9a7 584 if (!len || s[len-1] != ';') {
3280af22
NIS
585 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
586 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
587 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 588 }
3280af22
NIS
589 SvTEMP_off(PL_linestr);
590 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
591 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 592 PL_last_lop = PL_last_uni = Nullch;
3280af22 593 PL_rsfp = 0;
79072805 594}
a687059c 595
ffb4593c
NT
596/*
597 * Perl_lex_end
9cbb5ea2
GS
598 * Finalizer for lexing operations. Must be called when the parser is
599 * done with the lexer.
ffb4593c
NT
600 */
601
463ee0b2 602void
864dbfa3 603Perl_lex_end(pTHX)
463ee0b2 604{
3280af22 605 PL_doextract = FALSE;
463ee0b2
LW
606}
607
ffb4593c
NT
608/*
609 * S_incline
610 * This subroutine has nothing to do with tilting, whether at windmills
611 * or pinball tables. Its name is short for "increment line". It
57843af0 612 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 613 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
614 * # line 500 "foo.pm"
615 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
616 */
617
76e3520e 618STATIC void
cea2e8a9 619S_incline(pTHX_ char *s)
463ee0b2
LW
620{
621 char *t;
622 char *n;
73659bf1 623 char *e;
463ee0b2 624 char ch;
463ee0b2 625
57843af0 626 CopLINE_inc(PL_curcop);
463ee0b2
LW
627 if (*s++ != '#')
628 return;
bf4acbe4 629 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
630 if (strnEQ(s, "line", 4))
631 s += 4;
632 else
633 return;
084592ab 634 if (SPACE_OR_TAB(*s))
73659bf1 635 s++;
4e553d73 636 else
73659bf1 637 return;
bf4acbe4 638 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
639 if (!isDIGIT(*s))
640 return;
641 n = s;
642 while (isDIGIT(*s))
643 s++;
bf4acbe4 644 while (SPACE_OR_TAB(*s))
463ee0b2 645 s++;
73659bf1 646 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 647 s++;
73659bf1
GS
648 e = t + 1;
649 }
463ee0b2 650 else {
463ee0b2 651 for (t = s; !isSPACE(*t); t++) ;
73659bf1 652 e = t;
463ee0b2 653 }
bf4acbe4 654 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
655 e++;
656 if (*e != '\n' && *e != '\0')
657 return; /* false alarm */
658
463ee0b2
LW
659 ch = *t;
660 *t = '\0';
f4dd75d9 661 if (t - s > 0) {
05ec9bb3 662 CopFILE_free(PL_curcop);
57843af0 663 CopFILE_set(PL_curcop, s);
f4dd75d9 664 }
463ee0b2 665 *t = ch;
57843af0 666 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
667}
668
ffb4593c
NT
669/*
670 * S_skipspace
671 * Called to gobble the appropriate amount and type of whitespace.
672 * Skips comments as well.
673 */
674
76e3520e 675STATIC char *
cea2e8a9 676S_skipspace(pTHX_ register char *s)
a687059c 677{
3280af22 678 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 679 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
680 s++;
681 return s;
682 }
683 for (;;) {
fd049845 684 STRLEN prevlen;
09bef843 685 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 686 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
687 while (s < PL_bufend && isSPACE(*s)) {
688 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
689 incline(s);
690 }
ffb4593c
NT
691
692 /* comment */
3280af22
NIS
693 if (s < PL_bufend && *s == '#') {
694 while (s < PL_bufend && *s != '\n')
463ee0b2 695 s++;
60e6418e 696 if (s < PL_bufend) {
463ee0b2 697 s++;
60e6418e
GS
698 if (PL_in_eval && !PL_rsfp) {
699 incline(s);
700 continue;
701 }
702 }
463ee0b2 703 }
ffb4593c
NT
704
705 /* only continue to recharge the buffer if we're at the end
706 * of the buffer, we're not reading from a source filter, and
707 * we're in normal lexing mode
708 */
09bef843
SB
709 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
710 PL_lex_state == LEX_FORMLINE)
463ee0b2 711 return s;
ffb4593c
NT
712
713 /* try to recharge the buffer */
9cbb5ea2
GS
714 if ((s = filter_gets(PL_linestr, PL_rsfp,
715 (prevlen = SvCUR(PL_linestr)))) == Nullch)
716 {
717 /* end of file. Add on the -p or -n magic */
3280af22
NIS
718 if (PL_minus_n || PL_minus_p) {
719 sv_setpv(PL_linestr,PL_minus_p ?
08e9d68e
DD
720 ";}continue{print or die qq(-p destination: $!\\n)" :
721 "");
3280af22
NIS
722 sv_catpv(PL_linestr,";}");
723 PL_minus_n = PL_minus_p = 0;
a0d0e21e
LW
724 }
725 else
4147a61b 726 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
727
728 /* reset variables for next time we lex */
9cbb5ea2
GS
729 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
730 = SvPVX(PL_linestr);
3280af22 731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 732 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
733
734 /* Close the filehandle. Could be from -P preprocessor,
735 * STDIN, or a regular file. If we were reading code from
736 * STDIN (because the commandline held no -e or filename)
737 * then we don't close it, we reset it so the code can
738 * read from STDIN too.
739 */
740
3280af22
NIS
741 if (PL_preprocess && !PL_in_eval)
742 (void)PerlProc_pclose(PL_rsfp);
743 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
744 PerlIO_clearerr(PL_rsfp);
8990e307 745 else
3280af22
NIS
746 (void)PerlIO_close(PL_rsfp);
747 PL_rsfp = Nullfp;
463ee0b2
LW
748 return s;
749 }
ffb4593c
NT
750
751 /* not at end of file, so we only read another line */
09bef843
SB
752 /* make corresponding updates to old pointers, for yyerror() */
753 oldprevlen = PL_oldbufptr - PL_bufend;
754 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
755 if (PL_last_uni)
756 oldunilen = PL_last_uni - PL_bufend;
757 if (PL_last_lop)
758 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
759 PL_linestart = PL_bufptr = s + prevlen;
760 PL_bufend = s + SvCUR(PL_linestr);
761 s = PL_bufptr;
09bef843
SB
762 PL_oldbufptr = s + oldprevlen;
763 PL_oldoldbufptr = s + oldoldprevlen;
764 if (PL_last_uni)
765 PL_last_uni = s + oldunilen;
766 if (PL_last_lop)
767 PL_last_lop = s + oldloplen;
a0d0e21e 768 incline(s);
ffb4593c
NT
769
770 /* debugger active and we're not compiling the debugger code,
771 * so store the line into the debugger's array of lines
772 */
3280af22 773 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
774 SV *sv = NEWSV(85,0);
775
776 sv_upgrade(sv, SVt_PVMG);
3280af22 777 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
778 (void)SvIOK_on(sv);
779 SvIVX(sv) = 0;
57843af0 780 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 781 }
463ee0b2 782 }
a687059c 783}
378cc40b 784
ffb4593c
NT
785/*
786 * S_check_uni
787 * Check the unary operators to ensure there's no ambiguity in how they're
788 * used. An ambiguous piece of code would be:
789 * rand + 5
790 * This doesn't mean rand() + 5. Because rand() is a unary operator,
791 * the +5 is its argument.
792 */
793
76e3520e 794STATIC void
cea2e8a9 795S_check_uni(pTHX)
ba106d47 796{
2f3197b3 797 char *s;
a0d0e21e 798 char *t;
2f3197b3 799
3280af22 800 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 801 return;
3280af22
NIS
802 while (isSPACE(*PL_last_uni))
803 PL_last_uni++;
7e2040f0 804 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 805 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 806 return;
0453d815 807 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 808 char ch = *s;
0453d815 809 *s = '\0';
9014280d 810 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 811 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
812 PL_last_uni);
813 *s = ch;
814 }
2f3197b3
LW
815}
816
ffb4593c
NT
817/*
818 * LOP : macro to build a list operator. Its behaviour has been replaced
819 * with a subroutine, S_lop() for which LOP is just another name.
820 */
821
a0d0e21e
LW
822#define LOP(f,x) return lop(f,x,s)
823
ffb4593c
NT
824/*
825 * S_lop
826 * Build a list operator (or something that might be one). The rules:
827 * - if we have a next token, then it's a list operator [why?]
828 * - if the next thing is an opening paren, then it's a function
829 * - else it's a list operator
830 */
831
76e3520e 832STATIC I32
a0be28da 833S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 834{
79072805 835 yylval.ival = f;
35c8bce7 836 CLINE;
3280af22
NIS
837 PL_expect = x;
838 PL_bufptr = s;
839 PL_last_lop = PL_oldbufptr;
eb160463 840 PL_last_lop_op = (OPCODE)f;
3280af22 841 if (PL_nexttoke)
bbf60fe6 842 return REPORT(LSTOP);
79072805 843 if (*s == '(')
bbf60fe6 844 return REPORT(FUNC);
79072805
LW
845 s = skipspace(s);
846 if (*s == '(')
bbf60fe6 847 return REPORT(FUNC);
79072805 848 else
bbf60fe6 849 return REPORT(LSTOP);
79072805
LW
850}
851
ffb4593c
NT
852/*
853 * S_force_next
9cbb5ea2 854 * When the lexer realizes it knows the next token (for instance,
ffb4593c 855 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
856 * to know what token to return the next time the lexer is called. Caller
857 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
858 * handles the token correctly.
ffb4593c
NT
859 */
860
4e553d73 861STATIC void
cea2e8a9 862S_force_next(pTHX_ I32 type)
79072805 863{
3280af22
NIS
864 PL_nexttype[PL_nexttoke] = type;
865 PL_nexttoke++;
866 if (PL_lex_state != LEX_KNOWNEXT) {
867 PL_lex_defer = PL_lex_state;
868 PL_lex_expect = PL_expect;
869 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
870 }
871}
872
ffb4593c
NT
873/*
874 * S_force_word
875 * When the lexer knows the next thing is a word (for instance, it has
876 * just seen -> and it knows that the next char is a word char, then
877 * it calls S_force_word to stick the next word into the PL_next lookahead.
878 *
879 * Arguments:
b1b65b59 880 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
881 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
882 * int check_keyword : if true, Perl checks to make sure the word isn't
883 * a keyword (do this if the word is a label, e.g. goto FOO)
884 * int allow_pack : if true, : characters will also be allowed (require,
885 * use, etc. do this)
9cbb5ea2 886 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
887 */
888
76e3520e 889STATIC char *
cea2e8a9 890S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 891{
463ee0b2
LW
892 register char *s;
893 STRLEN len;
4e553d73 894
463ee0b2
LW
895 start = skipspace(start);
896 s = start;
7e2040f0 897 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 898 (allow_pack && *s == ':') ||
15f0808c 899 (allow_initial_tick && *s == '\'') )
a0d0e21e 900 {
3280af22
NIS
901 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
902 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
903 return start;
904 if (token == METHOD) {
905 s = skipspace(s);
906 if (*s == '(')
3280af22 907 PL_expect = XTERM;
463ee0b2 908 else {
3280af22 909 PL_expect = XOPERATOR;
463ee0b2 910 }
79072805 911 }
3280af22
NIS
912 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST,0, newSVpv(PL_tokenbuf,0));
913 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
5464dbd2
RGS
914 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
915 SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke].opval)->op_sv);
79072805
LW
916 force_next(token);
917 }
918 return s;
919}
920
ffb4593c
NT
921/*
922 * S_force_ident
9cbb5ea2 923 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
924 * text only contains the "foo" portion. The first argument is a pointer
925 * to the "foo", and the second argument is the type symbol to prefix.
926 * Forces the next token to be a "WORD".
9cbb5ea2 927 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
928 */
929
76e3520e 930STATIC void
cea2e8a9 931S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
932{
933 if (s && *s) {
11343788 934 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 935 PL_nextval[PL_nexttoke].opval = o;
79072805 936 force_next(WORD);
748a9306 937 if (kind) {
11343788 938 o->op_private = OPpCONST_ENTERED;
55497cff
PP
939 /* XXX see note in pp_entereval() for why we forgo typo
940 warnings if the symbol must be introduced in an eval.
941 GSAR 96-10-12 */
3280af22 942 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
943 kind == '$' ? SVt_PV :
944 kind == '@' ? SVt_PVAV :
945 kind == '%' ? SVt_PVHV :
946 SVt_PVGV
947 );
748a9306 948 }
79072805
LW
949 }
950}
951
1571675a
GS
952NV
953Perl_str_to_version(pTHX_ SV *sv)
954{
955 NV retval = 0.0;
956 NV nshift = 1.0;
957 STRLEN len;
958 char *start = SvPVx(sv,len);
3aa33fe5 959 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
960 char *end = start + len;
961 while (start < end) {
ba210ebe 962 STRLEN skip;
1571675a
GS
963 UV n;
964 if (utf)
9041c2e3 965 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
966 else {
967 n = *(U8*)start;
968 skip = 1;
969 }
970 retval += ((NV)n)/nshift;
971 start += skip;
972 nshift *= 1000;
973 }
974 return retval;
975}
976
4e553d73 977/*
ffb4593c
NT
978 * S_force_version
979 * Forces the next token to be a version number.
e759cc13
RGS
980 * If the next token appears to be an invalid version number, (e.g. "v2b"),
981 * and if "guessing" is TRUE, then no new token is created (and the caller
982 * must use an alternative parsing method).
ffb4593c
NT
983 */
984
76e3520e 985STATIC char *
e759cc13 986S_force_version(pTHX_ char *s, int guessing)
89bfa8cd
PP
987{
988 OP *version = Nullop;
44dcb63b 989 char *d;
89bfa8cd
PP
990
991 s = skipspace(s);
992
44dcb63b 993 d = s;
dd629d5b 994 if (*d == 'v')
44dcb63b 995 d++;
44dcb63b 996 if (isDIGIT(*d)) {
e759cc13
RGS
997 while (isDIGIT(*d) || *d == '_' || *d == '.')
998 d++;
9f3d182e 999 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1000 SV *ver;
b73d6f50 1001 s = scan_num(s, &yylval);
89bfa8cd 1002 version = yylval.opval;
dd629d5b
GS
1003 ver = cSVOPx(version)->op_sv;
1004 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 1005 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
1006 SvNVX(ver) = str_to_version(ver);
1007 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1008 }
89bfa8cd 1009 }
e759cc13
RGS
1010 else if (guessing)
1011 return s;
89bfa8cd
PP
1012 }
1013
1014 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1015 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1016 force_next(WORD);
89bfa8cd 1017
e759cc13 1018 return s;
89bfa8cd
PP
1019}
1020
ffb4593c
NT
1021/*
1022 * S_tokeq
1023 * Tokenize a quoted string passed in as an SV. It finds the next
1024 * chunk, up to end of string or a backslash. It may make a new
1025 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1026 * turns \\ into \.
1027 */
1028
76e3520e 1029STATIC SV *
cea2e8a9 1030S_tokeq(pTHX_ SV *sv)
79072805
LW
1031{
1032 register char *s;
1033 register char *send;
1034 register char *d;
b3ac6de7
IZ
1035 STRLEN len = 0;
1036 SV *pv = sv;
79072805
LW
1037
1038 if (!SvLEN(sv))
b3ac6de7 1039 goto finish;
79072805 1040
a0d0e21e 1041 s = SvPV_force(sv, len);
21a311ee 1042 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1043 goto finish;
463ee0b2 1044 send = s + len;
79072805
LW
1045 while (s < send && *s != '\\')
1046 s++;
1047 if (s == send)
b3ac6de7 1048 goto finish;
79072805 1049 d = s;
be4731d2 1050 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 1051 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
1052 if (SvUTF8(sv))
1053 SvUTF8_on(pv);
1054 }
79072805
LW
1055 while (s < send) {
1056 if (*s == '\\') {
a0d0e21e 1057 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1058 s++; /* all that, just for this */
1059 }
1060 *d++ = *s++;
1061 }
1062 *d = '\0';
463ee0b2 1063 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 1064 finish:
3280af22 1065 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1066 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1067 return sv;
1068}
1069
ffb4593c
NT
1070/*
1071 * Now come three functions related to double-quote context,
1072 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1073 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1074 * interact with PL_lex_state, and create fake ( ... ) argument lists
1075 * to handle functions and concatenation.
1076 * They assume that whoever calls them will be setting up a fake
1077 * join call, because each subthing puts a ',' after it. This lets
1078 * "lower \luPpEr"
1079 * become
1080 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1081 *
1082 * (I'm not sure whether the spurious commas at the end of lcfirst's
1083 * arguments and join's arguments are created or not).
1084 */
1085
1086/*
1087 * S_sublex_start
1088 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1089 *
1090 * Pattern matching will set PL_lex_op to the pattern-matching op to
1091 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1092 *
1093 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1094 *
1095 * Everything else becomes a FUNC.
1096 *
1097 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1098 * had an OP_CONST or OP_READLINE). This just sets us up for a
1099 * call to S_sublex_push().
1100 */
1101
76e3520e 1102STATIC I32
cea2e8a9 1103S_sublex_start(pTHX)
79072805
LW
1104{
1105 register I32 op_type = yylval.ival;
79072805
LW
1106
1107 if (op_type == OP_NULL) {
3280af22
NIS
1108 yylval.opval = PL_lex_op;
1109 PL_lex_op = Nullop;
79072805
LW
1110 return THING;
1111 }
1112 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1113 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1114
1115 if (SvTYPE(sv) == SVt_PVIV) {
1116 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1117 STRLEN len;
1118 char *p;
1119 SV *nsv;
1120
1121 p = SvPV(sv, len);
79cb57f6 1122 nsv = newSVpvn(p, len);
01ec43d0
GS
1123 if (SvUTF8(sv))
1124 SvUTF8_on(nsv);
b3ac6de7
IZ
1125 SvREFCNT_dec(sv);
1126 sv = nsv;
4e553d73 1127 }
b3ac6de7 1128 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1129 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1130 /* Allow <FH> // "foo" */
1131 if (op_type == OP_READLINE)
1132 PL_expect = XTERMORDORDOR;
79072805
LW
1133 return THING;
1134 }
1135
3280af22
NIS
1136 PL_sublex_info.super_state = PL_lex_state;
1137 PL_sublex_info.sub_inwhat = op_type;
1138 PL_sublex_info.sub_op = PL_lex_op;
1139 PL_lex_state = LEX_INTERPPUSH;
55497cff 1140
3280af22
NIS
1141 PL_expect = XTERM;
1142 if (PL_lex_op) {
1143 yylval.opval = PL_lex_op;
1144 PL_lex_op = Nullop;
55497cff
PP
1145 return PMFUNC;
1146 }
1147 else
1148 return FUNC;
1149}
1150
ffb4593c
NT
1151/*
1152 * S_sublex_push
1153 * Create a new scope to save the lexing state. The scope will be
1154 * ended in S_sublex_done. Returns a '(', starting the function arguments
1155 * to the uc, lc, etc. found before.
1156 * Sets PL_lex_state to LEX_INTERPCONCAT.
1157 */
1158
76e3520e 1159STATIC I32
cea2e8a9 1160S_sublex_push(pTHX)
55497cff 1161{
f46d017c 1162 ENTER;
55497cff 1163
3280af22
NIS
1164 PL_lex_state = PL_sublex_info.super_state;
1165 SAVEI32(PL_lex_dojoin);
1166 SAVEI32(PL_lex_brackets);
3280af22
NIS
1167 SAVEI32(PL_lex_casemods);
1168 SAVEI32(PL_lex_starts);
1169 SAVEI32(PL_lex_state);
7766f137 1170 SAVEVPTR(PL_lex_inpat);
3280af22 1171 SAVEI32(PL_lex_inwhat);
57843af0 1172 SAVECOPLINE(PL_curcop);
3280af22 1173 SAVEPPTR(PL_bufptr);
8452ff4b 1174 SAVEPPTR(PL_bufend);
3280af22
NIS
1175 SAVEPPTR(PL_oldbufptr);
1176 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1177 SAVEPPTR(PL_last_lop);
1178 SAVEPPTR(PL_last_uni);
3280af22
NIS
1179 SAVEPPTR(PL_linestart);
1180 SAVESPTR(PL_linestr);
8edd5f42
RGS
1181 SAVEGENERICPV(PL_lex_brackstack);
1182 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1183
1184 PL_linestr = PL_lex_stuff;
1185 PL_lex_stuff = Nullsv;
1186
9cbb5ea2
GS
1187 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1188 = SvPVX(PL_linestr);
3280af22 1189 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1190 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1191 SAVEFREESV(PL_linestr);
1192
1193 PL_lex_dojoin = FALSE;
1194 PL_lex_brackets = 0;
3280af22
NIS
1195 New(899, PL_lex_brackstack, 120, char);
1196 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
1197 PL_lex_casemods = 0;
1198 *PL_lex_casestack = '\0';
1199 PL_lex_starts = 0;
1200 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1201 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1202
1203 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1204 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1205 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1206 else
3280af22 1207 PL_lex_inpat = Nullop;
79072805 1208
55497cff 1209 return '(';
79072805
LW
1210}
1211
ffb4593c
NT
1212/*
1213 * S_sublex_done
1214 * Restores lexer state after a S_sublex_push.
1215 */
1216
76e3520e 1217STATIC I32
cea2e8a9 1218S_sublex_done(pTHX)
79072805 1219{
3280af22 1220 if (!PL_lex_starts++) {
9aa983d2
JH
1221 SV *sv = newSVpvn("",0);
1222 if (SvUTF8(PL_linestr))
1223 SvUTF8_on(sv);
3280af22 1224 PL_expect = XOPERATOR;
9aa983d2 1225 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1226 return THING;
1227 }
1228
3280af22
NIS
1229 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1230 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1231 return yylex();
79072805
LW
1232 }
1233
ffb4593c 1234 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1235 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1236 PL_linestr = PL_lex_repl;
1237 PL_lex_inpat = 0;
1238 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1239 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1240 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1241 SAVEFREESV(PL_linestr);
1242 PL_lex_dojoin = FALSE;
1243 PL_lex_brackets = 0;
3280af22
NIS
1244 PL_lex_casemods = 0;
1245 *PL_lex_casestack = '\0';
1246 PL_lex_starts = 0;
25da4f38 1247 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1248 PL_lex_state = LEX_INTERPNORMAL;
1249 PL_lex_starts++;
e9fa98b2
HS
1250 /* we don't clear PL_lex_repl here, so that we can check later
1251 whether this is an evalled subst; that means we rely on the
1252 logic to ensure sublex_done() is called again only via the
1253 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1254 }
e9fa98b2 1255 else {
3280af22 1256 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1257 PL_lex_repl = Nullsv;
1258 }
79072805 1259 return ',';
ffed7fef
LW
1260 }
1261 else {
f46d017c 1262 LEAVE;
3280af22
NIS
1263 PL_bufend = SvPVX(PL_linestr);
1264 PL_bufend += SvCUR(PL_linestr);
1265 PL_expect = XOPERATOR;
09bef843 1266 PL_sublex_info.sub_inwhat = 0;
79072805 1267 return ')';
ffed7fef
LW
1268 }
1269}
1270
02aa26ce
NT
1271/*
1272 scan_const
1273
1274 Extracts a pattern, double-quoted string, or transliteration. This
1275 is terrifying code.
1276
3280af22
NIS
1277 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1278 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1279 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1280
9b599b2a
GS
1281 Returns a pointer to the character scanned up to. Iff this is
1282 advanced from the start pointer supplied (ie if anything was
1283 successfully parsed), will leave an OP for the substring scanned
1284 in yylval. Caller must intuit reason for not parsing further
1285 by looking at the next characters herself.
1286
02aa26ce
NT
1287 In patterns:
1288 backslashes:
1289 double-quoted style: \r and \n
1290 regexp special ones: \D \s
1291 constants: \x3
1292 backrefs: \1 (deprecated in substitution replacements)
1293 case and quoting: \U \Q \E
1294 stops on @ and $, but not for $ as tail anchor
1295
1296 In transliterations:
1297 characters are VERY literal, except for - not at the start or end
1298 of the string, which indicates a range. scan_const expands the
1299 range to the full set of intermediate characters.
1300
1301 In double-quoted strings:
1302 backslashes:
1303 double-quoted style: \r and \n
1304 constants: \x3
1305 backrefs: \1 (deprecated)
1306 case and quoting: \U \Q \E
1307 stops on @ and $
1308
1309 scan_const does *not* construct ops to handle interpolated strings.
1310 It stops processing as soon as it finds an embedded $ or @ variable
1311 and leaves it to the caller to work out what's going on.
1312
da6eedaa 1313 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1314
1315 $ in pattern could be $foo or could be tail anchor. Assumption:
1316 it's a tail anchor if $ is the last thing in the string, or if it's
1317 followed by one of ")| \n\t"
1318
1319 \1 (backreferences) are turned into $1
1320
1321 The structure of the code is
1322 while (there's a character to process) {
1323 handle transliteration ranges
1324 skip regexp comments
1325 skip # initiated comments in //x patterns
1326 check for embedded @foo
1327 check for embedded scalars
1328 if (backslash) {
1329 leave intact backslashes from leave (below)
1330 deprecate \1 in strings and sub replacements
1331 handle string-changing backslashes \l \U \Q \E, etc.
1332 switch (what was escaped) {
1333 handle - in a transliteration (becomes a literal -)
1334 handle \132 octal characters
1335 handle 0x15 hex characters
1336 handle \cV (control V)
1337 handle printf backslashes (\f, \r, \n, etc)
1338 } (end switch)
1339 } (end if backslash)
1340 } (end while character to read)
4e553d73 1341
02aa26ce
NT
1342*/
1343
76e3520e 1344STATIC char *
cea2e8a9 1345S_scan_const(pTHX_ char *start)
79072805 1346{
3280af22 1347 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1348 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1349 register char *s = start; /* start of the constant */
1350 register char *d = SvPVX(sv); /* destination for copies */
1351 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1352 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1353 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1354 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1355 UV uv;
1356
dff6d3cd 1357 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1358 PL_lex_inpat
b6d5fef8 1359 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1360 : "";
79072805 1361
2b9d42f0
NIS
1362 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1363 /* If we are doing a trans and we know we want UTF8 set expectation */
1364 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1365 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1366 }
1367
1368
79072805 1369 while (s < send || dorange) {
02aa26ce 1370 /* get transliterations out of the way (they're most literal) */
3280af22 1371 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1372 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1373 if (dorange) {
1ba5c669
JH
1374 I32 i; /* current expanded character */
1375 I32 min; /* first character in range */
1376 I32 max; /* last character in range */
02aa26ce 1377
2b9d42f0 1378 if (has_utf8) {
8973db79
JH
1379 char *c = (char*)utf8_hop((U8*)d, -1);
1380 char *e = d++;
1381 while (e-- > c)
1382 *(e + 1) = *e;
25716404 1383 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1384 /* mark the range as done, and continue */
1385 dorange = FALSE;
1386 didrange = TRUE;
1387 continue;
1388 }
2b9d42f0 1389
02aa26ce 1390 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1391 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1392 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1393 d -= 2; /* eat the first char and the - */
1394
8ada0baa
JH
1395 min = (U8)*d; /* first char in range */
1396 max = (U8)d[1]; /* last char in range */
1397
c2e66d9e 1398 if (min > max) {
01ec43d0 1399 Perl_croak(aTHX_
d1573ac7 1400 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1401 (char)min, (char)max);
c2e66d9e
GS
1402 }
1403
c7f1f016 1404#ifdef EBCDIC
8ada0baa
JH
1405 if ((isLOWER(min) && isLOWER(max)) ||
1406 (isUPPER(min) && isUPPER(max))) {
1407 if (isLOWER(min)) {
1408 for (i = min; i <= max; i++)
1409 if (isLOWER(i))
db42d148 1410 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1411 } else {
1412 for (i = min; i <= max; i++)
1413 if (isUPPER(i))
db42d148 1414 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1415 }
1416 }
1417 else
1418#endif
1419 for (i = min; i <= max; i++)
eb160463 1420 *d++ = (char)i;
02aa26ce
NT
1421
1422 /* mark the range as done, and continue */
79072805 1423 dorange = FALSE;
01ec43d0 1424 didrange = TRUE;
79072805 1425 continue;
4e553d73 1426 }
02aa26ce
NT
1427
1428 /* range begins (ignore - as first or last char) */
79072805 1429 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1430 if (didrange) {
1fafa243 1431 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1432 }
2b9d42f0 1433 if (has_utf8) {
25716404 1434 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1435 s++;
1436 continue;
1437 }
79072805
LW
1438 dorange = TRUE;
1439 s++;
01ec43d0
GS
1440 }
1441 else {
1442 didrange = FALSE;
1443 }
79072805 1444 }
02aa26ce
NT
1445
1446 /* if we get here, we're not doing a transliteration */
1447
0f5d15d6
IZ
1448 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1449 except for the last char, which will be done separately. */
3280af22 1450 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1451 if (s[2] == '#') {
e994fd66 1452 while (s+1 < send && *s != ')')
db42d148 1453 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1454 }
1455 else if (s[2] == '{' /* This should match regcomp.c */
1456 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1457 {
cc6b7395 1458 I32 count = 1;
0f5d15d6 1459 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1460 char c;
1461
d9f97599
GS
1462 while (count && (c = *regparse)) {
1463 if (c == '\\' && regparse[1])
1464 regparse++;
4e553d73 1465 else if (c == '{')
cc6b7395 1466 count++;
4e553d73 1467 else if (c == '}')
cc6b7395 1468 count--;
d9f97599 1469 regparse++;
cc6b7395 1470 }
e994fd66 1471 if (*regparse != ')')
5bdf89e7 1472 regparse--; /* Leave one char for continuation. */
0f5d15d6 1473 while (s < regparse)
db42d148 1474 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1475 }
748a9306 1476 }
02aa26ce
NT
1477
1478 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1479 else if (*s == '#' && PL_lex_inpat &&
1480 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1481 while (s+1 < send && *s != '\n')
db42d148 1482 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1483 }
02aa26ce 1484
5d1d4326 1485 /* check for embedded arrays
da6eedaa 1486 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1487 */
7e2040f0 1488 else if (*s == '@' && s[1]
5d1d4326 1489 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1490 break;
02aa26ce
NT
1491
1492 /* check for embedded scalars. only stop if we're sure it's a
1493 variable.
1494 */
79072805 1495 else if (*s == '$') {
3280af22 1496 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1497 break;
6002328a 1498 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1499 break; /* in regexp, $ might be tail anchor */
1500 }
02aa26ce 1501
2b9d42f0
NIS
1502 /* End of else if chain - OP_TRANS rejoin rest */
1503
02aa26ce 1504 /* backslashes */
79072805
LW
1505 if (*s == '\\' && s+1 < send) {
1506 s++;
02aa26ce
NT
1507
1508 /* some backslashes we leave behind */
c9f97d15 1509 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1510 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1511 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1512 continue;
1513 }
02aa26ce
NT
1514
1515 /* deprecate \1 in strings and substitution replacements */
3280af22 1516 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1517 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1518 {
599cee73 1519 if (ckWARN(WARN_SYNTAX))
9014280d 1520 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1521 *--s = '$';
1522 break;
1523 }
02aa26ce
NT
1524
1525 /* string-change backslash escapes */
3280af22 1526 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1527 --s;
1528 break;
1529 }
02aa26ce
NT
1530
1531 /* if we get here, it's either a quoted -, or a digit */
79072805 1532 switch (*s) {
02aa26ce
NT
1533
1534 /* quoted - in transliterations */
79072805 1535 case '-':
3280af22 1536 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1537 *d++ = *s++;
1538 continue;
1539 }
1540 /* FALL THROUGH */
1541 default:
11b8faa4 1542 {
707afd92
MS
1543 if (ckWARN(WARN_MISC) &&
1544 isALNUM(*s) &&
1545 *s != '_')
9014280d 1546 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1547 "Unrecognized escape \\%c passed through",
1548 *s);
1549 /* default action is to copy the quoted character */
f9a63242 1550 goto default_action;
11b8faa4 1551 }
02aa26ce
NT
1552
1553 /* \132 indicates an octal constant */
79072805
LW
1554 case '0': case '1': case '2': case '3':
1555 case '4': case '5': case '6': case '7':
ba210ebe 1556 {
53305cf1
NC
1557 I32 flags = 0;
1558 STRLEN len = 3;
1559 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1560 s += len;
1561 }
012bcf8d 1562 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1563
1564 /* \x24 indicates a hex constant */
79072805 1565 case 'x':
a0ed51b3
LW
1566 ++s;
1567 if (*s == '{') {
1568 char* e = strchr(s, '}');
a4c04bdc
NC
1569 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1570 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1571 STRLEN len;
355860ce 1572
53305cf1 1573 ++s;
adaeee49 1574 if (!e) {
a0ed51b3 1575 yyerror("Missing right brace on \\x{}");
355860ce 1576 continue;
ba210ebe 1577 }
53305cf1
NC
1578 len = e - s;
1579 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1580 s = e + 1;
a0ed51b3
LW
1581 }
1582 else {
ba210ebe 1583 {
53305cf1 1584 STRLEN len = 2;
a4c04bdc 1585 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1586 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1587 s += len;
1588 }
012bcf8d
GS
1589 }
1590
1591 NUM_ESCAPE_INSERT:
1592 /* Insert oct or hex escaped character.
301d3d20 1593 * There will always enough room in sv since such
db42d148 1594 * escapes will be longer than any UTF-8 sequence
301d3d20 1595 * they can end up as. */
ba7cea30 1596
c7f1f016
NIS
1597 /* We need to map to chars to ASCII before doing the tests
1598 to cover EBCDIC
1599 */
c4d5f83a 1600 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1601 if (!has_utf8 && uv > 255) {
301d3d20
JH
1602 /* Might need to recode whatever we have
1603 * accumulated so far if it contains any
1604 * hibit chars.
1605 *
1606 * (Can't we keep track of that and avoid
1607 * this rescan? --jhi)
012bcf8d 1608 */
c7f1f016 1609 int hicount = 0;
63cd0674
NIS
1610 U8 *c;
1611 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1612 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1613 hicount++;
db42d148 1614 }
012bcf8d 1615 }
63cd0674 1616 if (hicount) {
db42d148
NIS
1617 STRLEN offset = d - SvPVX(sv);
1618 U8 *src, *dst;
1619 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1620 src = (U8 *)d - 1;
1621 dst = src+hicount;
1622 d += hicount;
1623 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1624 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1625 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1626 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1627 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1628 }
1629 else {
63cd0674 1630 *dst-- = *src;
012bcf8d 1631 }
c7f1f016 1632 src--;
012bcf8d
GS
1633 }
1634 }
1635 }
1636
9aa983d2 1637 if (has_utf8 || uv > 255) {
9041c2e3 1638 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1639 has_utf8 = TRUE;
f9a63242
JH
1640 if (PL_lex_inwhat == OP_TRANS &&
1641 PL_sublex_info.sub_op) {
1642 PL_sublex_info.sub_op->op_private |=
1643 (PL_lex_repl ? OPpTRANS_FROM_UTF
1644 : OPpTRANS_TO_UTF);
f9a63242 1645 }
012bcf8d 1646 }
a0ed51b3 1647 else {
012bcf8d 1648 *d++ = (char)uv;
a0ed51b3 1649 }
012bcf8d
GS
1650 }
1651 else {
c4d5f83a 1652 *d++ = (char) uv;
a0ed51b3 1653 }
79072805 1654 continue;
02aa26ce 1655
b239daa5 1656 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1657 case 'N':
55eda711 1658 ++s;
423cee85
JH
1659 if (*s == '{') {
1660 char* e = strchr(s, '}');
155aba94 1661 SV *res;
423cee85
JH
1662 STRLEN len;
1663 char *str;
4e553d73 1664
423cee85 1665 if (!e) {
5777a3f7 1666 yyerror("Missing right brace on \\N{}");
423cee85
JH
1667 e = s - 1;
1668 goto cont_scan;
1669 }
dbc0d4f2
JH
1670 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1671 /* \N{U+...} */
1672 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1673 PERL_SCAN_DISALLOW_PREFIX;
1674 s += 3;
1675 len = e - s;
1676 uv = grok_hex(s, &len, &flags, NULL);
1677 s = e + 1;
1678 goto NUM_ESCAPE_INSERT;
1679 }
55eda711
JH
1680 res = newSVpvn(s + 1, e - s - 1);
1681 res = new_constant( Nullch, 0, "charnames",
1682 res, Nullsv, "\\N{...}" );
f9a63242
JH
1683 if (has_utf8)
1684 sv_utf8_upgrade(res);
423cee85 1685 str = SvPV(res,len);
1c47067b
JH
1686#ifdef EBCDIC_NEVER_MIND
1687 /* charnames uses pack U and that has been
1688 * recently changed to do the below uni->native
1689 * mapping, so this would be redundant (and wrong,
1690 * the code point would be doubly converted).
1691 * But leave this in just in case the pack U change
1692 * gets revoked, but the semantics is still
1693 * desireable for charnames. --jhi */
cddc7ef4
JH
1694 {
1695 UV uv = utf8_to_uvchr((U8*)str, 0);
1696
1697 if (uv < 0x100) {
1698 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1699
1700 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1701 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1702 str = SvPV(res, len);
1703 }
1704 }
1705#endif
89491803 1706 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1707 char *ostart = SvPVX(sv);
1708 SvCUR_set(sv, d - ostart);
1709 SvPOK_on(sv);
e4f3eed8 1710 *d = '\0';
f08d6ad9 1711 sv_utf8_upgrade(sv);
d2f449dd 1712 /* this just broke our allocation above... */
eb160463 1713 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1714 d = SvPVX(sv) + SvCUR(sv);
89491803 1715 has_utf8 = TRUE;
f08d6ad9 1716 }
eb160463 1717 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1718 char *odest = SvPVX(sv);
1719
8973db79 1720 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1721 d = SvPVX(sv) + (d - odest);
1722 }
1723 Copy(str, d, len, char);
1724 d += len;
1725 SvREFCNT_dec(res);
1726 cont_scan:
1727 s = e + 1;
1728 }
1729 else
5777a3f7 1730 yyerror("Missing braces on \\N{}");
423cee85
JH
1731 continue;
1732
02aa26ce 1733 /* \c is a control character */
79072805
LW
1734 case 'c':
1735 s++;
961ce445 1736 if (s < send) {
ba210ebe 1737 U8 c = *s++;
c7f1f016
NIS
1738#ifdef EBCDIC
1739 if (isLOWER(c))
1740 c = toUPPER(c);
1741#endif
db42d148 1742 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1743 }
961ce445
RGS
1744 else {
1745 yyerror("Missing control char name in \\c");
1746 }
79072805 1747 continue;
02aa26ce
NT
1748
1749 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1750 case 'b':
db42d148 1751 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1752 break;
1753 case 'n':
db42d148 1754 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1755 break;
1756 case 'r':
db42d148 1757 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1758 break;
1759 case 'f':
db42d148 1760 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1761 break;
1762 case 't':
db42d148 1763 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1764 break;
34a3fe2a 1765 case 'e':
db42d148 1766 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1767 break;
1768 case 'a':
db42d148 1769 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1770 break;
02aa26ce
NT
1771 } /* end switch */
1772
79072805
LW
1773 s++;
1774 continue;
02aa26ce
NT
1775 } /* end if (backslash) */
1776
f9a63242 1777 default_action:
2b9d42f0
NIS
1778 /* If we started with encoded form, or already know we want it
1779 and then encode the next character */
1780 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1781 STRLEN len = 1;
1782 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1783 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1784 s += len;
1785 if (need > len) {
1786 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1787 STRLEN off = d - SvPVX(sv);
1788 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1789 }
1790 d = (char*)uvchr_to_utf8((U8*)d, uv);
1791 has_utf8 = TRUE;
1792 }
1793 else {
1794 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1795 }
02aa26ce
NT
1796 } /* while loop to process each character */
1797
1798 /* terminate the string and set up the sv */
79072805 1799 *d = '\0';
463ee0b2 1800 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1801 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1802 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1803
79072805 1804 SvPOK_on(sv);
9f4817db 1805 if (PL_encoding && !has_utf8) {
d0063567
DK
1806 sv_recode_to_utf8(sv, PL_encoding);
1807 if (SvUTF8(sv))
1808 has_utf8 = TRUE;
9f4817db 1809 }
2b9d42f0 1810 if (has_utf8) {
7e2040f0 1811 SvUTF8_on(sv);
2b9d42f0 1812 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1813 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1814 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1815 }
1816 }
79072805 1817
02aa26ce 1818 /* shrink the sv if we allocated more than we used */
79072805
LW
1819 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1820 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1821 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1822 }
02aa26ce 1823
9b599b2a 1824 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1825 if (s > PL_bufptr) {
1826 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1827 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1828 sv, Nullsv,
4e553d73 1829 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1830 ? "tr"
3280af22 1831 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1832 ? "s"
1833 : "qq")));
79072805 1834 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1835 } else
8990e307 1836 SvREFCNT_dec(sv);
79072805
LW
1837 return s;
1838}
1839
ffb4593c
NT
1840/* S_intuit_more
1841 * Returns TRUE if there's more to the expression (e.g., a subscript),
1842 * FALSE otherwise.
ffb4593c
NT
1843 *
1844 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1845 *
1846 * ->[ and ->{ return TRUE
1847 * { and [ outside a pattern are always subscripts, so return TRUE
1848 * if we're outside a pattern and it's not { or [, then return FALSE
1849 * if we're in a pattern and the first char is a {
1850 * {4,5} (any digits around the comma) returns FALSE
1851 * if we're in a pattern and the first char is a [
1852 * [] returns FALSE
1853 * [SOMETHING] has a funky algorithm to decide whether it's a
1854 * character class or not. It has to deal with things like
1855 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1856 * anything else returns TRUE
1857 */
1858
9cbb5ea2
GS
1859/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1860
76e3520e 1861STATIC int
cea2e8a9 1862S_intuit_more(pTHX_ register char *s)
79072805 1863{
3280af22 1864 if (PL_lex_brackets)
79072805
LW
1865 return TRUE;
1866 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1867 return TRUE;
1868 if (*s != '{' && *s != '[')
1869 return FALSE;
3280af22 1870 if (!PL_lex_inpat)
79072805
LW
1871 return TRUE;
1872
1873 /* In a pattern, so maybe we have {n,m}. */
1874 if (*s == '{') {
1875 s++;
1876 if (!isDIGIT(*s))
1877 return TRUE;
1878 while (isDIGIT(*s))
1879 s++;
1880 if (*s == ',')
1881 s++;
1882 while (isDIGIT(*s))
1883 s++;
1884 if (*s == '}')
1885 return FALSE;
1886 return TRUE;
1887
1888 }
1889
1890 /* On the other hand, maybe we have a character class */
1891
1892 s++;
1893 if (*s == ']' || *s == '^')
1894 return FALSE;
1895 else {
ffb4593c 1896 /* this is terrifying, and it works */
79072805
LW
1897 int weight = 2; /* let's weigh the evidence */
1898 char seen[256];
f27ffc4a 1899 unsigned char un_char = 255, last_un_char;
93a17b20 1900 char *send = strchr(s,']');
3280af22 1901 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1902
1903 if (!send) /* has to be an expression */
1904 return TRUE;
1905
1906 Zero(seen,256,char);
1907 if (*s == '$')
1908 weight -= 3;
1909 else if (isDIGIT(*s)) {
1910 if (s[1] != ']') {
1911 if (isDIGIT(s[1]) && s[2] == ']')
1912 weight -= 10;
1913 }
1914 else
1915 weight -= 100;
1916 }
1917 for (; s < send; s++) {
1918 last_un_char = un_char;
1919 un_char = (unsigned char)*s;
1920 switch (*s) {
1921 case '@':
1922 case '&':
1923 case '$':
1924 weight -= seen[un_char] * 10;
7e2040f0 1925 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1926 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1927 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1928 weight -= 100;
1929 else
1930 weight -= 10;
1931 }
1932 else if (*s == '$' && s[1] &&
93a17b20
LW
1933 strchr("[#!%*<>()-=",s[1])) {
1934 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1935 weight -= 10;
1936 else
1937 weight -= 1;
1938 }
1939 break;
1940 case '\\':
1941 un_char = 254;
1942 if (s[1]) {
93a17b20 1943 if (strchr("wds]",s[1]))
79072805
LW
1944 weight += 100;
1945 else if (seen['\''] || seen['"'])
1946 weight += 1;
93a17b20 1947 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1948 weight += 40;
1949 else if (isDIGIT(s[1])) {
1950 weight += 40;
1951 while (s[1] && isDIGIT(s[1]))
1952 s++;
1953 }
1954 }
1955 else
1956 weight += 100;
1957 break;
1958 case '-':
1959 if (s[1] == '\\')
1960 weight += 50;
93a17b20 1961 if (strchr("aA01! ",last_un_char))
79072805 1962 weight += 30;
93a17b20 1963 if (strchr("zZ79~",s[1]))
79072805 1964 weight += 30;
f27ffc4a
GS
1965 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1966 weight -= 5; /* cope with negative subscript */
79072805
LW
1967 break;
1968 default:
3792a11b
NC
1969 if (!isALNUM(last_un_char)
1970 && !(last_un_char == '$' || last_un_char == '@'
1971 || last_un_char == '&')
1972 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
1973 char *d = tmpbuf;
1974 while (isALPHA(*s))
1975 *d++ = *s++;
1976 *d = '\0';
1977 if (keyword(tmpbuf, d - tmpbuf))
1978 weight -= 150;
1979 }
1980 if (un_char == last_un_char + 1)
1981 weight += 5;
1982 weight -= seen[un_char];
1983 break;
1984 }
1985 seen[un_char]++;
1986 }
1987 if (weight >= 0) /* probably a character class */
1988 return FALSE;
1989 }
1990
1991 return TRUE;
1992}
ffed7fef 1993
ffb4593c
NT
1994/*
1995 * S_intuit_method
1996 *
1997 * Does all the checking to disambiguate
1998 * foo bar
1999 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2000 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2001 *
2002 * First argument is the stuff after the first token, e.g. "bar".
2003 *
2004 * Not a method if bar is a filehandle.
2005 * Not a method if foo is a subroutine prototyped to take a filehandle.
2006 * Not a method if it's really "Foo $bar"
2007 * Method if it's "foo $bar"
2008 * Not a method if it's really "print foo $bar"
2009 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2010 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2011 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2012 * =>
2013 */
2014
76e3520e 2015STATIC int
cea2e8a9 2016S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2017{
2018 char *s = start + (*start == '$');
3280af22 2019 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2020 STRLEN len;
2021 GV* indirgv;
2022
2023 if (gv) {
b6c543e3 2024 CV *cv;
a0d0e21e
LW
2025 if (GvIO(gv))
2026 return 0;
b6c543e3
IZ
2027 if ((cv = GvCVu(gv))) {
2028 char *proto = SvPVX(cv);
2029 if (proto) {
2030 if (*proto == ';')
2031 proto++;
2032 if (*proto == '*')
2033 return 0;
2034 }
2035 } else
a0d0e21e
LW
2036 gv = 0;
2037 }
8903cb82 2038 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2039 /* start is the beginning of the possible filehandle/object,
2040 * and s is the end of it
2041 * tmpbuf is a copy of it
2042 */
2043
a0d0e21e 2044 if (*start == '$') {
3280af22 2045 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2046 return 0;
2047 s = skipspace(s);
3280af22
NIS
2048 PL_bufptr = start;
2049 PL_expect = XREF;
a0d0e21e
LW
2050 return *s == '(' ? FUNCMETH : METHOD;
2051 }
2052 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2053 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2054 len -= 2;
2055 tmpbuf[len] = '\0';
2056 goto bare_package;
2057 }
2058 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2059 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2060 return 0;
2061 /* filehandle or package name makes it a method */
89bfa8cd 2062 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2063 s = skipspace(s);
3280af22 2064 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2065 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2066 bare_package:
3280af22 2067 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2068 newSVpvn(tmpbuf,len));
3280af22
NIS
2069 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2070 PL_expect = XTERM;
a0d0e21e 2071 force_next(WORD);
3280af22 2072 PL_bufptr = s;
a0d0e21e
LW
2073 return *s == '(' ? FUNCMETH : METHOD;
2074 }
2075 }
2076 return 0;
2077}
2078
ffb4593c
NT
2079/*
2080 * S_incl_perldb
2081 * Return a string of Perl code to load the debugger. If PERL5DB
2082 * is set, it will return the contents of that, otherwise a
2083 * compile-time require of perl5db.pl.
2084 */
2085
76e3520e 2086STATIC char*
cea2e8a9 2087S_incl_perldb(pTHX)
a0d0e21e 2088{
3280af22 2089 if (PL_perldb) {
76e3520e 2090 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2091
2092 if (pdb)
2093 return pdb;
93189314 2094 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2095 return "BEGIN { require 'perl5db.pl' }";
2096 }
2097 return "";
2098}
2099
2100
16d20bd9 2101/* Encoded script support. filter_add() effectively inserts a
4e553d73 2102 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2103 * Note that the filter function only applies to the current source file
2104 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2105 *
2106 * The datasv parameter (which may be NULL) can be used to pass
2107 * private data to this instance of the filter. The filter function
2108 * can recover the SV using the FILTER_DATA macro and use it to
2109 * store private buffers and state information.
2110 *
2111 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2112 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2113 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2114 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2115 * private use must be set using malloc'd pointers.
2116 */
16d20bd9
AD
2117
2118SV *
864dbfa3 2119Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2120{
f4c556ac
GS
2121 if (!funcp)
2122 return Nullsv;
2123
3280af22
NIS
2124 if (!PL_rsfp_filters)
2125 PL_rsfp_filters = newAV();
16d20bd9 2126 if (!datasv)
8c52afec 2127 datasv = NEWSV(255,0);
16d20bd9 2128 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2129 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2130 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2131 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2132 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2133 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2134 av_unshift(PL_rsfp_filters, 1);
2135 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2136 return(datasv);
2137}
4e553d73 2138
16d20bd9
AD
2139
2140/* Delete most recently added instance of this filter function. */
a0d0e21e 2141void
864dbfa3 2142Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2143{
e0c19803 2144 SV *datasv;
fe5a182c 2145 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2146 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2147 return;
2148 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2149 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2150 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2151 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2152 IoANY(datasv) = (void *)NULL;
3280af22 2153 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2154
16d20bd9
AD
2155 return;
2156 }
2157 /* we need to search for the correct entry and clear it */
cea2e8a9 2158 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2159}
2160
2161
1de9afcd
RGS
2162/* Invoke the idxth filter function for the current rsfp. */
2163/* maxlen 0 = read one text line */
16d20bd9 2164I32
864dbfa3 2165Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2166{
16d20bd9
AD
2167 filter_t funcp;
2168 SV *datasv = NULL;
e50aee73 2169
3280af22 2170 if (!PL_rsfp_filters)
16d20bd9 2171 return -1;
1de9afcd 2172 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2173 /* Provide a default input filter to make life easy. */
2174 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2175 DEBUG_P(PerlIO_printf(Perl_debug_log,
2176 "filter_read %d: from rsfp\n", idx));
4e553d73 2177 if (maxlen) {
16d20bd9
AD
2178 /* Want a block */
2179 int len ;
2180 int old_len = SvCUR(buf_sv) ;
2181
2182 /* ensure buf_sv is large enough */
eb160463 2183 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2184 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2185 if (PerlIO_error(PL_rsfp))
37120919
AD
2186 return -1; /* error */
2187 else
2188 return 0 ; /* end of file */
2189 }
16d20bd9
AD
2190 SvCUR_set(buf_sv, old_len + len) ;
2191 } else {
2192 /* Want a line */
3280af22
NIS
2193 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2194 if (PerlIO_error(PL_rsfp))
37120919
AD
2195 return -1; /* error */
2196 else
2197 return 0 ; /* end of file */
2198 }
16d20bd9
AD
2199 }
2200 return SvCUR(buf_sv);
2201 }
2202 /* Skip this filter slot if filter has been deleted */
1de9afcd 2203 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2204 DEBUG_P(PerlIO_printf(Perl_debug_log,
2205 "filter_read %d: skipped (filter deleted)\n",
2206 idx));
16d20bd9
AD
2207 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2208 }
2209 /* Get function pointer hidden within datasv */
4755096e 2210 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2211 DEBUG_P(PerlIO_printf(Perl_debug_log,
2212 "filter_read %d: via function %p (%s)\n",
fe5a182c 2213 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2214 /* Call function. The function is expected to */
2215 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2216 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2217 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2218}
2219
76e3520e 2220STATIC char *
cea2e8a9 2221S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2222{
c39cd008 2223#ifdef PERL_CR_FILTER
3280af22 2224 if (!PL_rsfp_filters) {
c39cd008 2225 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2226 }
2227#endif
3280af22 2228 if (PL_rsfp_filters) {
55497cff
PP
2229 if (!append)
2230 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2231 if (FILTER_READ(0, sv, 0) > 0)
2232 return ( SvPVX(sv) ) ;
2233 else
2234 return Nullch ;
2235 }
9d116dd7 2236 else
fd049845 2237 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2238}
2239
01ec43d0
GS
2240STATIC HV *
2241S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2242{
2243 GV *gv;
2244
01ec43d0 2245 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2246 return PL_curstash;
2247
2248 if (len > 2 &&
2249 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2250 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2251 {
2252 return GvHV(gv); /* Foo:: */
def3634b
GS
2253 }
2254
2255 /* use constant CLASS => 'MyClass' */
2256 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2257 SV *sv;
2258 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2259 pkgname = SvPV_nolen(sv);
2260 }
2261 }
2262
2263 return gv_stashpv(pkgname, FALSE);
2264}
a0d0e21e 2265
748a9306
LW
2266#ifdef DEBUGGING
2267 static char* exp_name[] =
09bef843 2268 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2269 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2270 };
748a9306 2271#endif
463ee0b2 2272
02aa26ce
NT
2273/*
2274 yylex
2275
2276 Works out what to call the token just pulled out of the input
2277 stream. The yacc parser takes care of taking the ops we return and
2278 stitching them into a tree.
2279
2280 Returns:
2281 PRIVATEREF
2282
2283 Structure:
2284 if read an identifier
2285 if we're in a my declaration
2286 croak if they tried to say my($foo::bar)
2287 build the ops for a my() declaration
2288 if it's an access to a my() variable
2289 are we in a sort block?
2290 croak if my($a); $a <=> $b
2291 build ops for access to a my() variable
2292 if in a dq string, and they've said @foo and we can't find @foo
2293 croak
2294 build ops for a bareword
2295 if we already built the token before, use it.
2296*/
2297
20141f0e 2298
dba4d153
JH
2299#ifdef __SC__
2300#pragma segment Perl_yylex
2301#endif
dba4d153 2302int
dba4d153 2303Perl_yylex(pTHX)
20141f0e 2304{
3afc138a 2305 register char *s = PL_bufptr;
378cc40b 2306 register char *d;
79072805 2307 register I32 tmp;
463ee0b2 2308 STRLEN len;
161b471a
NIS
2309 GV *gv = Nullgv;
2310 GV **gvp = 0;
aa7440fb 2311 bool bof = FALSE;
1d239bbb 2312 I32 orig_keyword = 0;
a687059c 2313
bbf60fe6
DM
2314 DEBUG_T( {
2315 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2316 lex_state_names[PL_lex_state]);
2317 } );
02aa26ce 2318 /* check if there's an identifier for us to look at */
ba979b31 2319 if (PL_pending_ident)
bbf60fe6 2320 return REPORT(S_pending_ident(aTHX));
bbce6d69 2321
02aa26ce
NT
2322 /* no identifier pending identification */
2323
3280af22 2324 switch (PL_lex_state) {
79072805
LW
2325#ifdef COMMENTARY
2326 case LEX_NORMAL: /* Some compilers will produce faster */
2327 case LEX_INTERPNORMAL: /* code if we comment these out. */
2328 break;
2329#endif
2330
09bef843 2331 /* when we've already built the next token, just pull it out of the queue */
79072805 2332 case LEX_KNOWNEXT:
3280af22
NIS
2333 PL_nexttoke--;
2334 yylval = PL_nextval[PL_nexttoke];
2335 if (!PL_nexttoke) {
2336 PL_lex_state = PL_lex_defer;
2337 PL_expect = PL_lex_expect;
2338 PL_lex_defer = LEX_NORMAL;
463ee0b2 2339 }
607df283 2340 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2341 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2342 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2343
bbf60fe6 2344 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2345
02aa26ce 2346 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2347 when we get here, PL_bufptr is at the \
02aa26ce 2348 */
79072805
LW
2349 case LEX_INTERPCASEMOD:
2350#ifdef DEBUGGING
3280af22 2351 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2352 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2353#endif
02aa26ce 2354 /* handle \E or end of string */
3280af22 2355 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2356 char oldmod;
02aa26ce
NT
2357
2358 /* if at a \E */
3280af22
NIS
2359 if (PL_lex_casemods) {
2360 oldmod = PL_lex_casestack[--PL_lex_casemods];
2361 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2362
3792a11b
NC
2363 if (PL_bufptr != PL_bufend
2364 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2365 PL_bufptr += 2;
2366 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2367 }
bbf60fe6 2368 return REPORT(')');
79072805 2369 }
3280af22
NIS
2370 if (PL_bufptr != PL_bufend)
2371 PL_bufptr += 2;
2372 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2373 return yylex();
79072805
LW
2374 }
2375 else {
607df283 2376 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2377 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2378 s = PL_bufptr + 1;
6e909404
JH
2379 if (s[1] == '\\' && s[2] == 'E') {
2380 PL_bufptr = s + 3;
2381 PL_lex_state = LEX_INTERPCONCAT;
2382 return yylex();
a0d0e21e 2383 }
6e909404
JH
2384 else {
2385 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2386 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2387 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2388 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2389 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2390 return REPORT(')');
6e909404
JH
2391 }
2392 if (PL_lex_casemods > 10)
2393 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2394 PL_lex_casestack[PL_lex_casemods++] = *s;
2395 PL_lex_casestack[PL_lex_casemods] = '\0';
2396 PL_lex_state = LEX_INTERPCONCAT;
2397 PL_nextval[PL_nexttoke].ival = 0;
2398 force_next('(');
2399 if (*s == 'l')
2400 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2401 else if (*s == 'u')
2402 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2403 else if (*s == 'L')
2404 PL_nextval[PL_nexttoke].ival = OP_LC;
2405 else if (*s == 'U')
2406 PL_nextval[PL_nexttoke].ival = OP_UC;
2407 else if (*s == 'Q')
2408 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2409 else
2410 Perl_croak(aTHX_ "panic: yylex");
2411 PL_bufptr = s + 1;
a0d0e21e 2412 }
79072805 2413 force_next(FUNC);
3280af22
NIS
2414 if (PL_lex_starts) {
2415 s = PL_bufptr;
2416 PL_lex_starts = 0;
131b3ad0
DM
2417 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2418 if (PL_lex_casemods == 1 && PL_lex_inpat)
2419 OPERATOR(',');
2420 else
2421 Aop(OP_CONCAT);
79072805
LW
2422 }
2423 else
cea2e8a9 2424 return yylex();
79072805
LW
2425 }
2426
55497cff 2427 case LEX_INTERPPUSH:
bbf60fe6 2428 return REPORT(sublex_push());
55497cff 2429
79072805 2430 case LEX_INTERPSTART:
3280af22 2431 if (PL_bufptr == PL_bufend)
bbf60fe6 2432 return REPORT(sublex_done());
607df283 2433 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2434 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2435 PL_expect = XTERM;
2436 PL_lex_dojoin = (*PL_bufptr == '@');
2437 PL_lex_state = LEX_INTERPNORMAL;
2438 if (PL_lex_dojoin) {
2439 PL_nextval[PL_nexttoke].ival = 0;
79072805 2440 force_next(',');
a0d0e21e 2441 force_ident("\"", '$');
3280af22 2442 PL_nextval[PL_nexttoke].ival = 0;
79072805 2443 force_next('$');
3280af22 2444 PL_nextval[PL_nexttoke].ival = 0;
79072805 2445 force_next('(');
3280af22 2446 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2447 force_next(FUNC);
2448 }
3280af22
NIS
2449 if (PL_lex_starts++) {
2450 s = PL_bufptr;
131b3ad0
DM
2451 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2452 if (!PL_lex_casemods && PL_lex_inpat)
2453 OPERATOR(',');
2454 else
2455 Aop(OP_CONCAT);
79072805 2456 }
cea2e8a9 2457 return yylex();
79072805
LW
2458
2459 case LEX_INTERPENDMAYBE:
3280af22
NIS
2460 if (intuit_more(PL_bufptr)) {
2461 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2462 break;
2463 }
2464 /* FALL THROUGH */
2465
2466 case LEX_INTERPEND:
3280af22
NIS
2467 if (PL_lex_dojoin) {
2468 PL_lex_dojoin = FALSE;
2469 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2470 return REPORT(')');
79072805 2471 }
43a16006 2472 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2473 && SvEVALED(PL_lex_repl))
43a16006 2474 {
e9fa98b2 2475 if (PL_bufptr != PL_bufend)
cea2e8a9 2476 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2477 PL_lex_repl = Nullsv;
2478 }
79072805
LW
2479 /* FALLTHROUGH */
2480 case LEX_INTERPCONCAT:
2481#ifdef DEBUGGING
3280af22 2482 if (PL_lex_brackets)
cea2e8a9 2483 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2484#endif
3280af22 2485 if (PL_bufptr == PL_bufend)
bbf60fe6 2486 return REPORT(sublex_done());
79072805 2487
3280af22
NIS
2488 if (SvIVX(PL_linestr) == '\'') {
2489 SV *sv = newSVsv(PL_linestr);
2490 if (!PL_lex_inpat)
76e3520e 2491 sv = tokeq(sv);
3280af22 2492 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2493 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2494 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2495 s = PL_bufend;
79072805
LW
2496 }
2497 else {
3280af22 2498 s = scan_const(PL_bufptr);
79072805 2499 if (*s == '\\')
3280af22 2500 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2501 else
3280af22 2502 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2503 }
2504
3280af22
NIS
2505 if (s != PL_bufptr) {
2506 PL_nextval[PL_nexttoke] = yylval;
2507 PL_expect = XTERM;
79072805 2508 force_next(THING);
131b3ad0
DM
2509 if (PL_lex_starts++) {
2510 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2511 if (!PL_lex_casemods && PL_lex_inpat)
2512 OPERATOR(',');
2513 else
2514 Aop(OP_CONCAT);
2515 }
79072805 2516 else {
3280af22 2517 PL_bufptr = s;
cea2e8a9 2518 return yylex();
79072805
LW
2519 }
2520 }
2521
cea2e8a9 2522 return yylex();
a0d0e21e 2523 case LEX_FORMLINE:
3280af22
NIS
2524 PL_lex_state = LEX_NORMAL;
2525 s = scan_formline(PL_bufptr);
2526 if (!PL_lex_formbrack)
a0d0e21e
LW
2527 goto rightbracket;
2528 OPERATOR(';');
79072805
LW
2529 }
2530
3280af22
NIS
2531 s = PL_bufptr;
2532 PL_oldoldbufptr = PL_oldbufptr;
2533 PL_oldbufptr = s;
607df283 2534 DEBUG_T( {
bbf60fe6 2535 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
bf49b057 2536 exp_name[PL_expect], s);
5f80b19c 2537 } );
463ee0b2
LW
2538
2539 retry:
378cc40b
LW
2540 switch (*s) {
2541 default:
7e2040f0 2542 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2543 goto keylookup;
cea2e8a9 2544 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2545 case 4:
2546 case 26:
2547 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2548 case 0:
3280af22
NIS
2549 if (!PL_rsfp) {
2550 PL_last_uni = 0;
2551 PL_last_lop = 0;
c5ee2135
LW
2552 if (PL_lex_brackets) {
2553 if (PL_lex_formbrack)
2554 yyerror("Format not terminated");
2555 else
2556 yyerror("Missing right curly or square bracket");
2557 }
4e553d73 2558 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2559 "### Tokener got EOF\n");
5f80b19c 2560 } );
79072805 2561 TOKEN(0);
463ee0b2 2562 }
3280af22 2563 if (s++ < PL_bufend)
a687059c 2564 goto retry; /* ignore stray nulls */
3280af22
NIS
2565 PL_last_uni = 0;
2566 PL_last_lop = 0;
2567 if (!PL_in_eval && !PL_preambled) {
2568 PL_preambled = TRUE;
2569 sv_setpv(PL_linestr,incl_perldb());
2570 if (SvCUR(PL_linestr))
4147a61b 2571 sv_catpvn(PL_linestr,";", 1);
3280af22
NIS
2572 if (PL_preambleav){
2573 while(AvFILLp(PL_preambleav) >= 0) {
2574 SV *tmpsv = av_shift(PL_preambleav);
2575 sv_catsv(PL_linestr, tmpsv);
4147a61b 2576 sv_catpvn(PL_linestr, ";", 1);
91b7def8
PP
2577 sv_free(tmpsv);
2578 }
3280af22
NIS
2579 sv_free((SV*)PL_preambleav);
2580 PL_preambleav = NULL;
91b7def8 2581 }
3280af22
NIS
2582 if (PL_minus_n || PL_minus_p) {
2583 sv_catpv(PL_linestr, "LINE: while (<>) {");
2584 if (PL_minus_l)
2585 sv_catpv(PL_linestr,"chomp;");
2586 if (PL_minus_a) {
3280af22 2587 if (PL_minus_F) {
3792a11b
NC
2588 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2589 || *PL_splitstr == '"')
3280af22 2590 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2591 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121
PP
2592 else {
2593 char delim;
2594 s = "'~#\200\1'"; /* surely one char is unused...*/
3280af22 2595 while (s[1] && strchr(PL_splitstr, *s)) s++;
54310121 2596 delim = *s;
75c72d73 2597 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s%c",
46fc3d4c 2598 "q" + (delim == '\''), delim);
3280af22 2599 for (s = PL_splitstr; *s; s++) {
54310121 2600 if (*s == '\\')
3280af22
NIS
2601 sv_catpvn(PL_linestr, "\\", 1);
2602 sv_catpvn(PL_linestr, s, 1);
54310121 2603 }
cea2e8a9 2604 Perl_sv_catpvf(aTHX_ PL_linestr, "%c);", delim);
54310121 2605 }
2304df62
AD
2606 }
2607 else
75c72d73 2608 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2609 }
79072805 2610 }
4147a61b 2611 sv_catpvn(PL_linestr, "\n", 1);
3280af22
NIS
2612 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2613 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2614 PL_last_lop = PL_last_uni = Nullch;
3280af22 2615 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2616 SV *sv = NEWSV(85,0);
2617
2618 sv_upgrade(sv, SVt_PVMG);
3280af22 2619 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2620 (void)SvIOK_on(sv);
2621 SvIVX(sv) = 0;
57843af0 2622 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2623 }
79072805 2624 goto retry;
a687059c 2625 }
e929a76b 2626 do {
aa7440fb 2627 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2628 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2629 fake_eof:
2630 if (PL_rsfp) {
2631 if (PL_preprocess && !PL_in_eval)
2632 (void)PerlProc_pclose(PL_rsfp);
2633 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2634 PerlIO_clearerr(PL_rsfp);
2635 else
2636 (void)PerlIO_close(PL_rsfp);
2637 PL_rsfp = Nullfp;
2638 PL_doextract = FALSE;
2639 }
2640 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
2641 sv_setpv(PL_linestr,PL_minus_p ? ";}continue{print" : "");
2642 sv_catpv(PL_linestr,";}");
2643 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2644 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2645 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2646 PL_minus_n = PL_minus_p = 0;
2647 goto retry;
2648 }
2649 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2650 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2651 sv_setpv(PL_linestr,"");
2652 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2653 }
7aa207d6
JH
2654 /* If it looks like the start of a BOM or raw UTF-16,
2655 * check if it in fact is. */
2656 else if (bof &&
2657 (*s == 0 ||
2658 *(U8*)s == 0xEF ||
2659 *(U8*)s >= 0xFE ||
2660 s[1] == 0)) {
226017aa 2661#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2662# ifdef __GNU_LIBRARY__
2663# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2664# define FTELL_FOR_PIPE_IS_BROKEN
2665# endif
e3f494f1
JH
2666# else
2667# ifdef __GLIBC__
2668# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2669# define FTELL_FOR_PIPE_IS_BROKEN
2670# endif
2671# endif
226017aa
DD
2672# endif
2673#endif
2674#ifdef FTELL_FOR_PIPE_IS_BROKEN
2675 /* This loses the possibility to detect the bof
2676 * situation on perl -P when the libc5 is being used.
2677 * Workaround? Maybe attach some extra state to PL_rsfp?
2678 */
2679 if (!PL_preprocess)
7e28d3af 2680 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2681#else
eb160463 2682 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2683#endif
7e28d3af 2684 if (bof) {
3280af22 2685 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2686 s = swallow_bom((U8*)s);
e929a76b 2687 }
378cc40b 2688 }
3280af22 2689 if (PL_doextract) {
a0d0e21e
LW
2690 /* Incest with pod. */
2691 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2692 sv_setpv(PL_linestr, "");
2693 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2694 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2695 PL_last_lop = PL_last_uni = Nullch;
3280af22 2696 PL_doextract = FALSE;
a0d0e21e 2697 }
4e553d73 2698 }
463ee0b2 2699 incline(s);
3280af22
NIS
2700 } while (PL_doextract);
2701 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2702 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2703 SV *sv = NEWSV(85,0);
a687059c 2704
93a17b20 2705 sv_upgrade(sv, SVt_PVMG);
3280af22 2706 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2707 (void)SvIOK_on(sv);
2708 SvIVX(sv) = 0;
57843af0 2709 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2710 }
3280af22 2711 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2712 PL_last_lop = PL_last_uni = Nullch;
57843af0 2713 if (CopLINE(PL_curcop) == 1) {
3280af22 2714 while (s < PL_bufend && isSPACE(*s))
79072805 2715 s++;
a0d0e21e 2716 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2717 s++;
44a8e56a 2718 d = Nullch;
3280af22 2719 if (!PL_in_eval) {
44a8e56a
PP
2720 if (*s == '#' && *(s+1) == '!')
2721 d = s + 2;
2722#ifdef ALTERNATE_SHEBANG
2723 else {
2724 static char as[] = ALTERNATE_SHEBANG;
2725 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2726 d = s + (sizeof(as) - 1);
2727 }
2728#endif /* ALTERNATE_SHEBANG */
2729 }
2730 if (d) {
b8378b72 2731 char *ipath;
774d564b 2732 char *ipathend;
b8378b72 2733
774d564b 2734 while (isSPACE(*d))
b8378b72
CS
2735 d++;
2736 ipath = d;
774d564b
PP
2737 while (*d && !isSPACE(*d))
2738 d++;
2739 ipathend = d;
2740
2741#ifdef ARG_ZERO_IS_SCRIPT
2742 if (ipathend > ipath) {
2743 /*
2744 * HP-UX (at least) sets argv[0] to the script name,
2745 * which makes $^X incorrect. And Digital UNIX and Linux,
2746 * at least, set argv[0] to the basename of the Perl
2747 * interpreter. So, having found "#!", we'll set it right.
2748 */
ee2f7564 2749 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2750 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2751 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2752 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2753 SvSETMAGIC(x);
2754 }
556c1dec
JH
2755 else {
2756 STRLEN blen;
2757 STRLEN llen;
2758 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2759 char *lstart = SvPV(x,llen);
2760 if (llen < blen) {
2761 bstart += blen - llen;
2762 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2763 sv_setpvn(x, ipath, ipathend - ipath);
2764 SvSETMAGIC(x);
2765 }
2766 }
2767 }
774d564b 2768 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2769 }
774d564b 2770#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2771
2772 /*
2773 * Look for options.
2774 */
748a9306 2775 d = instr(s,"perl -");
84e30d1a 2776 if (!d) {
748a9306 2777 d = instr(s,"perl");
84e30d1a
GS
2778#if defined(DOSISH)
2779 /* avoid getting into infinite loops when shebang
2780 * line contains "Perl" rather than "perl" */
2781 if (!d) {
2782 for (d = ipathend-4; d >= ipath; --d) {
2783 if ((*d == 'p' || *d == 'P')
2784 && !ibcmp(d, "perl", 4))
2785 {
2786 break;
2787 }
2788 }
2789 if (d < ipath)
2790 d = Nullch;
2791 }
2792#endif
2793 }
44a8e56a
PP
2794#ifdef ALTERNATE_SHEBANG
2795 /*
2796 * If the ALTERNATE_SHEBANG on this system starts with a
2797 * character that can be part of a Perl expression, then if
2798 * we see it but not "perl", we're probably looking at the
2799 * start of Perl code, not a request to hand off to some
2800 * other interpreter. Similarly, if "perl" is there, but
2801 * not in the first 'word' of the line, we assume the line
2802 * contains the start of the Perl program.
44a8e56a
PP
2803 */
2804 if (d && *s != '#') {
774d564b 2805 char *c = ipath;
44a8e56a
PP
2806 while (*c && !strchr("; \t\r\n\f\v#", *c))
2807 c++;
2808 if (c < d)
2809 d = Nullch; /* "perl" not in first word; ignore */
2810 else
2811 *s = '#'; /* Don't try to parse shebang line */
2812 }
774d564b 2813#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2814#ifndef MACOS_TRADITIONAL
748a9306 2815 if (!d &&
44a8e56a 2816 *s == '#' &&
774d564b 2817 ipathend > ipath &&
3280af22 2818 !PL_minus_c &&
748a9306 2819 !instr(s,"indir") &&
3280af22 2820 instr(PL_origargv[0],"perl"))
748a9306 2821 {
9f68db38 2822 char **newargv;
9f68db38 2823
774d564b
PP
2824 *ipathend = '\0';
2825 s = ipathend + 1;
3280af22 2826 while (s < PL_bufend && isSPACE(*s))
9f68db38 2827 s++;
3280af22
NIS
2828 if (s < PL_bufend) {
2829 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2830 newargv[1] = s;
3280af22 2831 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2832 s++;
2833 *s = '\0';
3280af22 2834 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2835 }
2836 else
3280af22 2837 newargv = PL_origargv;
774d564b 2838 newargv[0] = ipath;
b35112e7 2839 PERL_FPU_PRE_EXEC
b4748376 2840 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2841 PERL_FPU_POST_EXEC
cea2e8a9 2842 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2843 }
bf4acbe4 2844#endif
748a9306 2845 if (d) {
3280af22
NIS
2846 U32 oldpdb = PL_perldb;
2847 bool oldn = PL_minus_n;
2848 bool oldp = PL_minus_p;
748a9306
LW
2849
2850 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2851 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2852
2853 if (*d++ == '-') {
a11ec5a9 2854 bool switches_done = PL_doswitches;
8cc95fdb
PP
2855 do {
2856 if (*d == 'M' || *d == 'm') {
2857 char *m = d;
2858 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2859 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2860 (int)(d - m), m);
2861 }
2862 d = moreswitches(d);
2863 } while (d);
f0b2cf55
YST
2864 if (PL_doswitches && !switches_done) {
2865 int argc = PL_origargc;
2866 char **argv = PL_origargv;
2867 do {
2868 argc--,argv++;
2869 } while (argc && argv[0][0] == '-' && argv[0][1]);
2870 init_argv_symbols(argc,argv);
2871 }
155aba94
GS
2872 if ((PERLDB_LINE && !oldpdb) ||
2873 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2874 /* if we have already added "LINE: while (<>) {",
2875 we must not do it again */
748a9306 2876 {
3280af22
NIS
2877 sv_setpv(PL_linestr, "");
2878 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2879 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2880 PL_last_lop = PL_last_uni = Nullch;
3280af22 2881 PL_preambled = FALSE;
84902520 2882 if (PERLDB_LINE)
3280af22 2883 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2884 goto retry;
2885 }
a11ec5a9
RGS
2886 if (PL_doswitches && !switches_done) {
2887 int argc = PL_origargc;
2888 char **argv = PL_origargv;
2889 do {
2890 argc--,argv++;
2891 } while (argc && argv[0][0] == '-' && argv[0][1]);
2892 init_argv_symbols(argc,argv);
2893 }
a0d0e21e 2894 }
79072805 2895 }
9f68db38 2896 }
79072805 2897 }
3280af22
NIS
2898 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2899 PL_bufptr = s;
2900 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2901 return yylex();
ae986130 2902 }
378cc40b 2903 goto retry;
4fdae800 2904 case '\r':
6a27c188 2905#ifdef PERL_STRICT_CR
cea2e8a9 2906 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2907 Perl_croak(aTHX_
cc507455 2908 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2909#endif
4fdae800 2910 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2911#ifdef MACOS_TRADITIONAL
2912 case '\312':
2913#endif
378cc40b
LW
2914 s++;
2915 goto retry;
378cc40b 2916 case '#':
e929a76b 2917 case '\n':
3280af22 2918 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2919 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2920 /* handle eval qq[#line 1 "foo"\n ...] */
2921 CopLINE_dec(PL_curcop);
2922 incline(s);
2923 }
3280af22 2924 d = PL_bufend;
a687059c 2925 while (s < d && *s != '\n')
378cc40b 2926 s++;
0f85fab0 2927 if (s < d)
378cc40b 2928 s++;
78c267c1 2929 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2930 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2931 incline(s);
3280af22
NIS
2932 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2933 PL_bufptr = s;
2934 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2935 return yylex();
a687059c 2936 }
378cc40b 2937 }
a687059c 2938 else {
378cc40b 2939 *s = '\0';
3280af22 2940 PL_bufend = s;
a687059c 2941 }
378cc40b
LW
2942 goto retry;
2943 case '-':
79072805 2944 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2945 I32 ftst = 0;
2946
378cc40b 2947 s++;
3280af22 2948 PL_bufptr = s;
748a9306
LW
2949 tmp = *s++;
2950
bf4acbe4 2951 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2952 s++;
2953
2954 if (strnEQ(s,"=>",2)) {
3280af22 2955 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2956 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2957 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2958 } );
748a9306
LW
2959 OPERATOR('-'); /* unary minus */
2960 }
3280af22 2961 PL_last_uni = PL_oldbufptr;
748a9306 2962 switch (tmp) {
e5edeb50
JH
2963 case 'r': ftst = OP_FTEREAD; break;
2964 case 'w': ftst = OP_FTEWRITE; break;
2965 case 'x': ftst = OP_FTEEXEC; break;
2966 case 'o': ftst = OP_FTEOWNED; break;
2967 case 'R': ftst = OP_FTRREAD; break;
2968 case 'W': ftst = OP_FTRWRITE; break;
2969 case 'X': ftst = OP_FTREXEC; break;
2970 case 'O': ftst = OP_FTROWNED; break;
2971 case 'e': ftst = OP_FTIS; break;
2972 case 'z': ftst = OP_FTZERO; break;
2973 case 's': ftst = OP_FTSIZE; break;
2974 case 'f': ftst = OP_FTFILE; break;
2975 case 'd': ftst = OP_FTDIR; break;
2976 case 'l': ftst = OP_FTLINK; break;
2977 case 'p': ftst = OP_FTPIPE; break;
2978 case 'S': ftst = OP_FTSOCK; break;
2979 case 'u': ftst = OP_FTSUID; break;
2980 case 'g': ftst = OP_FTSGID; break;
2981 case 'k': ftst = OP_FTSVTX; break;
2982 case 'b': ftst = OP_FTBLK; break;
2983 case 'c': ftst = OP_FTCHR; break;
2984 case 't': ftst = OP_FTTTY; break;
2985 case 'T': ftst = OP_FTTEXT; break;
2986 case 'B': ftst = OP_FTBINARY; break;
2987 case 'M': case 'A': case 'C':
2988 gv_fetchpv("\024",TRUE, SVt_PV);
2989 switch (tmp) {
2990 case 'M': ftst = OP_FTMTIME; break;
2991 case 'A': ftst = OP_FTATIME; break;
2992 case 'C': ftst = OP_FTCTIME; break;
2993 default: break;
2994 }
2995 break;
378cc40b 2996 default:
378cc40b
LW
2997 break;
2998 }
e5edeb50 2999 if (ftst) {
eb160463 3000 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3001 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 3002 "### Saw file test %c\n", (int)ftst);
5f80b19c 3003 } );
e5edeb50
JH
3004 FTST(ftst);
3005 }
3006 else {
3007 /* Assume it was a minus followed by a one-letter named
3008 * subroutine call (or a -bareword), then. */
95c31fe3 3009 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0
RGS
3010 "### '-%c' looked like a file test but was not\n",
3011 tmp);
5f80b19c 3012 } );
3cf7b4c4 3013 s = --PL_bufptr;
e5edeb50 3014 }
378cc40b 3015 }
a687059c
LW
3016 tmp = *s++;
3017 if (*s == tmp) {
3018 s++;
3280af22 3019 if (PL_expect == XOPERATOR)
79072805
LW
3020 TERM(POSTDEC);
3021 else
3022 OPERATOR(PREDEC);
3023 }
3024 else if (*s == '>') {
3025 s++;
3026 s = skipspace(s);
7e2040f0 3027 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3028 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3029 TOKEN(ARROW);
79072805 3030 }
748a9306
LW
3031 else if (*s == '$')
3032 OPERATOR(ARROW);
463ee0b2 3033 else
748a9306 3034 TERM(ARROW);
a687059c 3035 }
3280af22 3036 if (PL_expect == XOPERATOR)
79072805
LW
3037 Aop(OP_SUBTRACT);
3038 else {
3280af22 3039 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3040 check_uni();
79072805 3041 OPERATOR('-'); /* unary minus */
2f3197b3 3042 }
79072805 3043
378cc40b 3044 case '+':
a687059c
LW
3045 tmp = *s++;
3046 if (*s == tmp) {
378cc40b 3047 s++;
3280af22 3048 if (PL_expect == XOPERATOR)
79072805
LW
3049 TERM(POSTINC);
3050 else
3051 OPERATOR(PREINC);
378cc40b 3052 }
3280af22 3053 if (PL_expect == XOPERATOR)
79072805
LW
3054 Aop(OP_ADD);
3055 else {
3280af22 3056 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3057 check_uni();
a687059c 3058 OPERATOR('+');
2f3197b3 3059 }
a687059c 3060
378cc40b 3061 case '*':
3280af22
NIS
3062 if (PL_expect != XOPERATOR) {
3063 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3064 PL_expect = XOPERATOR;
3065 force_ident(PL_tokenbuf, '*');
3066 if (!*PL_tokenbuf)
a0d0e21e 3067 PREREF('*');
79072805 3068 TERM('*');
a687059c 3069 }
79072805
LW
3070 s++;
3071 if (*s == '*') {
a687059c 3072 s++;
79072805 3073 PWop(OP_POW);
a687059c 3074 }
79072805
LW
3075 Mop(OP_MULTIPLY);
3076
378cc40b 3077 case '%':
3280af22 3078 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3079 ++s;
3080 Mop(OP_MODULO);
a687059c 3081 }
3280af22
NIS
3082 PL_tokenbuf[0] = '%';
3083 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3084 if (!PL_tokenbuf[1]) {
bbce6d69 3085 PREREF('%');
a687059c 3086 }
3280af22 3087 PL_pending_ident = '%';
bbce6d69 3088 TERM('%');
a687059c 3089
378cc40b 3090 case '^':
79072805 3091 s++;
a0d0e21e 3092 BOop(OP_BIT_XOR);
79072805 3093 case '[':
3280af22 3094 PL_lex_brackets++;
79072805 3095 /* FALL THROUGH */
378cc40b 3096 case '~':
378cc40b 3097 case ',':
378cc40b
LW
3098 tmp = *s++;
3099 OPERATOR(tmp);
a0d0e21e
LW
3100 case ':':
3101 if (s[1] == ':') {
3102 len = 0;
3103 goto just_a_word;
3104 }
3105 s++;
09bef843
SB
3106 switch (PL_expect) {
3107 OP *attrs;
3108 case XOPERATOR:
3109 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3110 break;
3111 PL_bufptr = s; /* update in case we back off */
3112 goto grabattrs;
3113 case XATTRBLOCK:
3114 PL_expect = XBLOCK;
3115 goto grabattrs;
3116 case XATTRTERM:
3117 PL_expect = XTERMBLOCK;
3118 grabattrs:
3119 s = skipspace(s);
3120 attrs = Nullop;
7e2040f0 3121 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3122 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3123 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3124 if (tmp < 0) tmp = -tmp;
3125 switch (tmp) {
3126 case KEY_or:
3127 case KEY_and:
c963b151 3128 case KEY_err:
f9829d6b
GS
3129 case KEY_for:
3130 case KEY_unless:
3131 case KEY_if:
3132 case KEY_while:
3133 case KEY_until:
3134 goto got_attrs;
3135 default:
3136 break;
3137 }
3138 }
09bef843
SB
3139 if (*d == '(') {
3140 d = scan_str(d,TRUE,TRUE);
3141 if (!d) {
09bef843
SB
3142 /* MUST advance bufptr here to avoid bogus
3143 "at end of line" context messages from yyerror().
3144 */
3145 PL_bufptr = s + len;
3146 yyerror("Unterminated attribute parameter in attribute list");
3147 if (attrs)
3148 op_free(attrs);
bbf60fe6 3149 return REPORT(0); /* EOF indicator */
09bef843
SB
3150 }
3151 }
3152 if (PL_lex_stuff) {
3153 SV *sv = newSVpvn(s, len);
3154 sv_catsv(sv, PL_lex_stuff);
3155 attrs = append_elem(OP_LIST, attrs,
3156 newSVOP(OP_CONST, 0, sv));
3157 SvREFCNT_dec(PL_lex_stuff);
3158 PL_lex_stuff = Nullsv;
3159 }
3160 else {
371fce9b
DM
3161 if (len == 6 && strnEQ(s, "unique", len)) {
3162 if (PL_in_my == KEY_our)
3163#ifdef USE_ITHREADS
3164 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3165#else
3166 ; /* skip to avoid loading attributes.pm */
3167#endif
3168 else
3169 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3170 }
3171
d3cea301
SB
3172 /* NOTE: any CV attrs applied here need to be part of
3173 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3174 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3175 CvLVALUE_on(PL_compcv);
3176 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3177 CvLOCKED_on(PL_compcv);
3178 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3179 CvMETHOD_on(PL_compcv);
06492da6
SF
3180 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3181 CvASSERTION_on(PL_compcv);
78f9721b
SM
3182 /* After we've set the flags, it could be argued that
3183 we don't need to do the attributes.pm-based setting
3184 process, and shouldn't bother appending recognized
d3cea301
SB
3185 flags. To experiment with that, uncomment the
3186 following "else". (Note that's already been
3187 uncommented. That keeps the above-applied built-in
3188 attributes from being intercepted (and possibly
3189 rejected) by a package's attribute routines, but is
3190 justified by the performance win for the common case
3191 of applying only built-in attributes.) */
0256094b 3192 else
78f9721b
SM
3193 attrs = append_elem(OP_LIST, attrs,
3194 newSVOP(OP_CONST, 0,
3195 newSVpvn(s, len)));
09bef843
SB
3196 }
3197 s = skipspace(d);
0120eecf 3198 if (*s == ':' && s[1] != ':')
09bef843 3199 s = skipspace(s+1);
0120eecf
GS
3200 else if (s == d)
3201 break; /* require real whitespace or :'s */
09bef843 3202 }
f9829d6b 3203 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3204 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3205 char q = ((*s == '\'') ? '"' : '\'');
3206 /* If here for an expression, and parsed no attrs, back off. */
3207 if (tmp == '=' && !attrs) {
3208 s = PL_bufptr;
3209 break;
3210 }
3211 /* MUST advance bufptr here to avoid bogus "at end of line"
3212 context messages from yyerror().
3213 */
3214 PL_bufptr = s;
3215 if (!*s)
3216 yyerror("Unterminated attribute list");
3217 else
3218 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3219 q, *s, q));
3220 if (attrs)
3221 op_free(attrs);
3222 OPERATOR(':');
3223 }
f9829d6b 3224 got_attrs:
09bef843
SB
3225 if (attrs) {
3226 PL_nextval[PL_nexttoke].opval = attrs;
3227 force_next(THING);
3228 }
3229 TOKEN(COLONATTR);
3230 }
a0d0e21e 3231 OPERATOR(':');
8990e307
LW
3232 case '(':
3233 s++;
3280af22
NIS
3234 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3235 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3236 else
3280af22 3237 PL_expect = XTERM;
4a202259 3238 s = skipspace(s);
a0d0e21e 3239 TOKEN('(');
378cc40b 3240 case ';':
f4dd75d9 3241 CLINE;
378cc40b
LW
3242 tmp = *s++;
3243 OPERATOR(tmp);
3244 case ')':
378cc40b 3245 tmp = *s++;
16d20bd9
AD
3246 s = skipspace(s);
3247 if (*s == '{')
3248 PREBLOCK(tmp);
378cc40b 3249 TERM(tmp);
79072805
LW
3250 case ']':
3251 s++;
3280af22 3252 if (PL_lex_brackets <= 0)
d98d5fff 3253 yyerror("Unmatched right square bracket");
463ee0b2 3254 else
3280af22
NIS
3255 --PL_lex_brackets;
3256 if (PL_lex_state == LEX_INTERPNORMAL) {
3257 if (PL_lex_brackets == 0) {
a0d0e21e 3258 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3259 PL_lex_state = LEX_INTERPEND;
79072805
LW
3260 }
3261 }
4633a7c4 3262 TERM(']');
79072805
LW
3263 case '{':
3264 leftbracket:
79072805 3265 s++;
3280af22 3266 if (PL_lex_brackets > 100) {
8edd5f42 3267 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3268 }
3280af22 3269 switch (PL_expect) {
a0d0e21e 3270 case XTERM:
3280af22 3271 if (PL_lex_formbrack) {
a0d0e21e
LW
3272 s--;
3273 PRETERMBLOCK(DO);
3274 }
3280af22
NIS
3275 if (PL_oldoldbufptr == PL_last_lop)
3276 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3277 else
3280af22 3278 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3279 OPERATOR(HASHBRACK);
a0d0e21e 3280 case XOPERATOR:
bf4acbe4 3281 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3282 s++;
44a8e56a 3283 d = s;
3280af22
NIS
3284 PL_tokenbuf[0] = '\0';
3285 if (d < PL_bufend && *d == '-') {
3286 PL_tokenbuf[0] = '-';
44a8e56a 3287 d++;
bf4acbe4 3288 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3289 d++;
3290 }
7e2040f0 3291 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3292 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3293 FALSE, &len);
bf4acbe4 3294 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3295 d++;
3296 if (*d == '}') {
3280af22 3297 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3298 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3299 if (minus)
3300 force_next('-');
748a9306
LW
3301 }
3302 }
3303 /* FALL THROUGH */
09bef843 3304 case XATTRBLOCK:
748a9306 3305 case XBLOCK:
3280af22
NIS
3306 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3307 PL_expect = XSTATE;
a0d0e21e 3308 break;
09bef843 3309 case XATTRTERM:
a0d0e21e 3310 case XTERMBLOCK:
3280af22
NIS
3311 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3312 PL_expect = XSTATE;
a0d0e21e
LW
3313 break;
3314 default: {
3315 char *t;
3280af22
NIS
3316 if (PL_oldoldbufptr == PL_last_lop)
3317 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3318 else
3280af22 3319 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3320 s = skipspace(s);
8452ff4b
SB
3321 if (*s == '}') {
3322 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3323 PL_expect = XTERM;
3324 /* This hack is to get the ${} in the message. */
3325 PL_bufptr = s+1;
3326 yyerror("syntax error");
3327 break;
3328 }
a0d0e21e 3329 OPERATOR(HASHBRACK);
8452ff4b 3330 }
b8a4b1be
GS
3331 /* This hack serves to disambiguate a pair of curlies
3332 * as being a block or an anon hash. Normally, expectation
3333 * determines that, but in cases where we're not in a
3334 * position to expect anything in particular (like inside
3335 * eval"") we have to resolve the ambiguity. This code
3336 * covers the case where the first term in the curlies is a
3337 * quoted string. Most other cases need to be explicitly
3338 * disambiguated by prepending a `+' before the opening
3339 * curly in order to force resolution as an anon hash.
3340 *
3341 * XXX should probably propagate the outer expectation
3342 * into eval"" to rely less on this hack, but that could
3343 * potentially break current behavior of eval"".
3344 * GSAR 97-07-21
3345 */
3346 t = s;
3347 if (*s == '\'' || *s == '"' || *s == '`') {
3348 /* common case: get past first string, handling escapes */
3280af22 3349 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3350 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3351 t++;
3352 t++;
a0d0e21e 3353 }
b8a4b1be 3354 else if (*s == 'q') {
3280af22 3355 if (++t < PL_bufend
b8a4b1be 3356 && (!isALNUM(*t)
3280af22 3357 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3358 && !isALNUM(*t))))
3359 {
abc667d1 3360 /* skip q//-like construct */
b8a4b1be
GS
3361 char *tmps;
3362 char open, close, term;
3363 I32 brackets = 1;
3364
3280af22 3365 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3366 t++;
abc667d1
DM
3367 /* check for q => */
3368 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3369 OPERATOR(HASHBRACK);
3370 }
b8a4b1be
GS
3371 term = *t;
3372 open = term;
3373 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3374 term = tmps[5];
3375 close = term;
3376 if (open == close)
3280af22
NIS
3377 for (t++; t < PL_bufend; t++) {
3378 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3379 t++;
6d07e5e9 3380 else if (*t == open)
b8a4b1be
GS
3381 break;
3382 }
abc667d1 3383 else {
3280af22
NIS
3384 for (t++; t < PL_bufend; t++) {
3385 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3386 t++;
6d07e5e9 3387 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3388 break;
3389 else if (*t == open)
3390 brackets++;
3391 }
abc667d1
DM
3392 }
3393 t++;
b8a4b1be 3394 }
abc667d1
DM
3395 else
3396 /* skip plain q word */
3397 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3398 t += UTF8SKIP(t);
a0d0e21e 3399 }
7e2040f0 3400 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3401 t += UTF8SKIP(t);
7e2040f0 3402 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3403 t += UTF8SKIP(t);
a0d0e21e 3404 }
3280af22 3405 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3406 t++;
b8a4b1be
GS
3407 /* if comma follows first term, call it an anon hash */
3408 /* XXX it could be a comma expression with loop modifiers */
3280af22 3409 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3410 || (*t == '=' && t[1] == '>')))
a0d0e21e 3411 OPERATOR(HASHBRACK);
3280af22 3412 if (PL_expect == XREF)
4e4e412b 3413 PL_expect = XTERM;
a0d0e21e 3414 else {
3280af22
NIS
3415 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3416 PL_expect = XSTATE;
a0d0e21e 3417 }
8990e307 3418 }
a0d0e21e 3419 break;
463ee0b2 3420 }
57843af0 3421 yylval.ival = CopLINE(PL_curcop);
79072805 3422 if (isSPACE(*s) || *s == '#')
3280af22 3423 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3424 TOKEN('{');
378cc40b 3425 case '}':
79072805
LW
3426 rightbracket:
3427 s++;
3280af22 3428 if (PL_lex_brackets <= 0)
d98d5fff 3429 yyerror("Unmatched right curly bracket");
463ee0b2 3430 else
3280af22 3431 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3432 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3433 PL_lex_formbrack = 0;
3434 if (PL_lex_state == LEX_INTERPNORMAL) {
3435 if (PL_lex_brackets == 0) {
9059aa12
LW
3436 if (PL_expect & XFAKEBRACK) {
3437 PL_expect &= XENUMMASK;
3280af22
NIS
3438 PL_lex_state = LEX_INTERPEND;
3439 PL_bufptr = s;
cea2e8a9 3440 return yylex(); /* ignore fake brackets */
79072805 3441 }
fa83b5b6 3442 if (*s == '-' && s[1] == '>')
3280af22 3443 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3444 else if (*s != '[' && *s != '{')
3280af22 3445 PL_lex_state = LEX_INTERPEND;
79072805
LW
3446 }
3447 }
9059aa12
LW
3448 if (PL_expect & XFAKEBRACK) {
3449 PL_expect &= XENUMMASK;
3280af22 3450 PL_bufptr = s;
cea2e8a9 3451 return yylex(); /* ignore fake brackets */
748a9306 3452 }
79072805
LW
3453 force_next('}');
3454 TOKEN(';');
378cc40b
LW
3455 case '&':
3456 s++;
3457 tmp = *s++;
3458 if (tmp == '&')
a0d0e21e 3459 AOPERATOR(ANDAND);
378cc40b 3460 s--;
3280af22 3461 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3462 if (ckWARN(WARN_SEMICOLON)
3463 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3464 {
57843af0 3465 CopLINE_dec(PL_curcop);
9014280d 3466 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3467 CopLINE_inc(PL_curcop);
463ee0b2 3468 }
79072805 3469 BAop(OP_BIT_AND);
463ee0b2 3470 }
79072805 3471
3280af22
NIS
3472 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3473 if (*PL_tokenbuf) {
3474 PL_expect = XOPERATOR;
3475 force_ident(PL_tokenbuf, '&');
463ee0b2 3476 }
79072805
LW
3477 else
3478 PREREF('&');
c07a80fd 3479 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3480 TERM('&');
3481
378cc40b
LW
3482 case '|':
3483 s++;
3484 tmp = *s++;
3485 if (tmp == '|')
a0d0e21e 3486 AOPERATOR(OROR);
378cc40b 3487 s--;
79072805 3488 BOop(OP_BIT_OR);
378cc40b
LW
3489 case '=':
3490 s++;
3491 tmp = *s++;
3492 if (tmp == '=')
79072805
LW
3493 Eop(OP_EQ);
3494 if (tmp == '>')
3495 OPERATOR(',');
378cc40b 3496 if (tmp == '~')
79072805 3497 PMop(OP_MATCH);
599cee73 3498 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3499 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3500 s--;
3280af22
NIS
3501 if (PL_expect == XSTATE && isALPHA(tmp) &&
3502 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3503 {
3280af22
NIS
3504 if (PL_in_eval && !PL_rsfp) {
3505 d = PL_bufend;
a5f75d66
AD
3506 while (s < d) {
3507 if (*s++ == '\n') {
3508 incline(s);
3509 if (strnEQ(s,"=cut",4)) {
3510 s = strchr(s,'\n');
3511 if (s)
3512 s++;
3513 else
3514 s = d;
3515 incline(s);
3516 goto retry;
3517 }
3518 }
3519 }
3520 goto retry;
3521 }
3280af22
NIS
3522 s = PL_bufend;
3523 PL_doextract = TRUE;
a0d0e21e
LW
3524 goto retry;
3525 }
3280af22 3526 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3527 char *t;
51882d45 3528#ifdef PERL_STRICT_CR
bf4acbe4 3529 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3530#else
bf4acbe4 3531 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3532#endif
a0d0e21e
LW
3533 if (*t == '\n' || *t == '#') {
3534 s--;
3280af22 3535 PL_expect = XBLOCK;
a0d0e21e
LW
3536 goto leftbracket;
3537 }
79072805 3538 }
a0d0e21e
LW
3539 yylval.ival = 0;
3540 OPERATOR(ASSIGNOP);
378cc40b
LW
3541 case '!':
3542 s++;
3543 tmp = *s++;
984200d0 3544 if (tmp == '=') {
decca21c
YST
3545 /* was this !=~ where !~ was meant?
3546 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3547
984200d0
YST
3548 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
3549 char *t = s+1;
3550
3551 while (t < PL_bufend && isSPACE(*t))
3552 ++t;
3553
decca21c
YST
3554 if (*t == '/' || *t == '?' ||
3555 ((*t == 'm' || *t == 's' || *t == 'y') && !isALNUM(t[1])) ||
3556 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
984200d0
YST
3557 Perl_warner(aTHX_ packWA