This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Make Perl_keyword() another 30% faster.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
770526c1 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
fc36a67e 29static char ident_too_long[] = "Identifier too long";
4ac733c9 30static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 31static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 32
acfe0abc 33static void restore_rsfp(pTHX_ void *f);
6e3aabd6 34#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
35static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
36static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 37#endif
51371543 38
9059aa12
LW
39#define XFAKEBRACK 128
40#define XENUMMASK 127
41
39e02b42
JH
42#ifdef USE_UTF8_SCRIPTS
43# define UTF (!IN_BYTES)
2b9d42f0 44#else
746b446a 45# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 46#endif
a0ed51b3 47
61f0cdd9 48/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
49 * 1999-02-27 mjd-perl-patch@plover.com */
50#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51
bf4acbe4
GS
52/* On MacOS, respect nonbreaking spaces */
53#ifdef MACOS_TRADITIONAL
54#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55#else
56#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
57#endif
58
ffb4593c
NT
59/* LEX_* are values for PL_lex_state, the state of the lexer.
60 * They are arranged oddly so that the guard on the switch statement
79072805
LW
61 * can get by with a single comparison (if the compiler is smart enough).
62 */
63
fb73857a
PP
64/* #define LEX_NOTPARSING 11 is done in perl.h. */
65
55497cff
PP
66#define LEX_NORMAL 10
67#define LEX_INTERPNORMAL 9
68#define LEX_INTERPCASEMOD 8
69#define LEX_INTERPPUSH 7
70#define LEX_INTERPSTART 6
71#define LEX_INTERPEND 5
72#define LEX_INTERPENDMAYBE 4
73#define LEX_INTERPCONCAT 3
74#define LEX_INTERPCONST 2
75#define LEX_FORMLINE 1
76#define LEX_KNOWNEXT 0
79072805 77
bbf60fe6
DM
78#ifdef DEBUGGING
79static char* lex_state_names[] = {
80 "KNOWNEXT",
81 "FORMLINE",
82 "INTERPCONST",
83 "INTERPCONCAT",
84 "INTERPENDMAYBE",
85 "INTERPEND",
86 "INTERPSTART",
87 "INTERPPUSH",
88 "INTERPCASEMOD",
89 "INTERPNORMAL",
90 "NORMAL"
91};
92#endif
93
79072805
LW
94#ifdef ff_next
95#undef ff_next
d48672a2
LW
96#endif
97
79072805 98#include "keywords.h"
fe14fcc3 99
ffb4593c
NT
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
ae986130
LW
102#ifdef CLINE
103#undef CLINE
104#endif
57843af0 105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 106
ffb4593c
NT
107/*
108 * Convenience functions to return different tokens and prime the
9cbb5ea2 109 * lexer for the next token. They all take an argument.
ffb4593c
NT
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
2d2e263d 121 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
9cbb5ea2 126 * PMop : pattern-matching operator
ffb4593c
NT
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
e5edeb50 130 * Rop : relational operator <= != gt
ffb4593c
NT
131 *
132 * Also see LOP and lop() below.
133 */
134
998054bd 135#ifdef DEBUGGING /* Serve -DT. */
bbf60fe6 136# define REPORT(retval) tokereport(s,(int)retval)
998054bd 137#else
bbf60fe6 138# define REPORT(retval) (retval)
998054bd
SC
139#endif
140
bbf60fe6
DM
141#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
142#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
143#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
144#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
145#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
146#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
147#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
148#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
149#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
150#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
151#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
152#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
153#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
154#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
155#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
156#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
157#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
158#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
159#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
160#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 161
a687059c
LW
162/* This bit of chicanery makes a unary function followed by
163 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
164 * The UNIDOR macro is for unary functions that can be followed by the //
165 * operator (such as C<shift // 0>).
a687059c 166 */
bbf60fe6
DM
167#define UNI2(f,x) return ( \
168 yylval.ival = f, \
6f33ba73 169 PL_expect = x, \
3280af22
NIS
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
172 PL_last_lop_op = f, \
bbf60fe6
DM
173 REPORT( \
174 (*s == '(' || (s = skipspace(s), *s == '(') \
175 ? (int)FUNC1 : (int)UNIOP)))
6f33ba73
RGS
176#define UNI(f) UNI2(f,XTERM)
177#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 178
bbf60fe6
DM
179#define UNIBRACK(f) return ( \
180 yylval.ival = f, \
3280af22
NIS
181 PL_bufptr = s, \
182 PL_last_uni = PL_oldbufptr, \
bbf60fe6
DM
183 REPORT( \
184 (*s == '(' || (s = skipspace(s), *s == '(') \
185 ? (int)FUNC1 : (int)UNIOP)))
79072805 186
9f68db38 187/* grandfather return to old style */
3280af22 188#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 189
8fa7f367
JH
190#ifdef DEBUGGING
191
bbf60fe6
DM
192/* how to interpret the yylval associated with the token */
193enum token_type {
194 TOKENTYPE_NONE,
195 TOKENTYPE_IVAL,
196 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
197 TOKENTYPE_PVAL,
198 TOKENTYPE_OPVAL,
199 TOKENTYPE_GVVAL
200};
201
202static struct debug_tokens { int token, type; char *name;} debug_tokens[] =
9041c2e3 203{
bbf60fe6
DM
204 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
205 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
206 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
207 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
208 { ARROW, TOKENTYPE_NONE, "ARROW" },
209 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
210 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
211 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
212 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
213 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
214 { DO, TOKENTYPE_NONE, "DO" },
215 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
216 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
217 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
218 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
219 { ELSE, TOKENTYPE_NONE, "ELSE" },
220 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
221 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
222 { FOR, TOKENTYPE_IVAL, "FOR" },
223 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
224 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
225 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
226 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
227 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
228 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
229 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
230 { IF, TOKENTYPE_IVAL, "IF" },
231 { LABEL, TOKENTYPE_PVAL, "LABEL" },
232 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
233 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
234 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
235 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
236 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
237 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
238 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
239 { MY, TOKENTYPE_IVAL, "MY" },
240 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
241 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
242 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
243 { OROP, TOKENTYPE_IVAL, "OROP" },
244 { OROR, TOKENTYPE_NONE, "OROR" },
245 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
246 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
247 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
248 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
249 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
250 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
251 { PREINC, TOKENTYPE_NONE, "PREINC" },
252 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
253 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
254 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
255 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
256 { SUB, TOKENTYPE_NONE, "SUB" },
257 { THING, TOKENTYPE_OPVAL, "THING" },
258 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
259 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
260 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
261 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
262 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
263 { USE, TOKENTYPE_IVAL, "USE" },
264 { WHILE, TOKENTYPE_IVAL, "WHILE" },
265 { WORD, TOKENTYPE_OPVAL, "WORD" },
266 { 0, TOKENTYPE_NONE, 0 }
267};
268
269/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 270
bbf60fe6
DM
271STATIC int
272S_tokereport(pTHX_ char* s, I32 rv)
273{
274 if (DEBUG_T_TEST) {
275 char *name = Nullch;
276 enum token_type type = TOKENTYPE_NONE;
277 struct debug_tokens *p;
f2b990bf 278 SV* report = newSVpvn("<== ", 4);
bbf60fe6
DM
279
280 for (p = debug_tokens; p->token; p++) {
281 if (p->token == (int)rv) {
282 name = p->name;
283 type = p->type;
284 break;
285 }
286 }
287 if (name)
54667de8 288 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
289 else if ((char)rv > ' ' && (char)rv < '~')
290 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
291 else if (!rv)
54667de8 292 Perl_sv_catpv(aTHX_ report, "EOF");
bbf60fe6
DM
293 else
294 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
295 switch (type) {
296 case TOKENTYPE_NONE:
297 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
298 break;
299 case TOKENTYPE_IVAL:
300 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
301 break;
302 case TOKENTYPE_OPNUM:
303 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
304 PL_op_name[yylval.ival]);
305 break;
306 case TOKENTYPE_PVAL:
307 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
308 break;
309 case TOKENTYPE_OPVAL:
401441c0
RGS
310 if (yylval.opval)
311 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 312 PL_op_name[yylval.opval->op_type]);
401441c0
RGS
313 else
314 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
315 break;
316 }
317 Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
998054bd
SC
318 if (s - PL_bufptr > 0)
319 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
320 else {
321 if (PL_oldbufptr && *PL_oldbufptr)
322 sv_catpv(report, PL_tokenbuf);
323 }
bbf60fe6
DM
324 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
325 };
326 return (int)rv;
998054bd
SC
327}
328
8fa7f367
JH
329#endif
330
ffb4593c
NT
331/*
332 * S_ao
333 *
c963b151
BD
334 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
335 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
336 */
337
76e3520e 338STATIC int
cea2e8a9 339S_ao(pTHX_ int toketype)
a0d0e21e 340{
3280af22
NIS
341 if (*PL_bufptr == '=') {
342 PL_bufptr++;
a0d0e21e
LW
343 if (toketype == ANDAND)
344 yylval.ival = OP_ANDASSIGN;
345 else if (toketype == OROR)
346 yylval.ival = OP_ORASSIGN;
c963b151
BD
347 else if (toketype == DORDOR)
348 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
349 toketype = ASSIGNOP;
350 }
351 return toketype;
352}
353
ffb4593c
NT
354/*
355 * S_no_op
356 * When Perl expects an operator and finds something else, no_op
357 * prints the warning. It always prints "<something> found where
358 * operator expected. It prints "Missing semicolon on previous line?"
359 * if the surprise occurs at the start of the line. "do you need to
360 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
361 * where the compiler doesn't know if foo is a method call or a function.
362 * It prints "Missing operator before end of line" if there's nothing
363 * after the missing operator, or "... before <...>" if there is something
364 * after the missing operator.
365 */
366
76e3520e 367STATIC void
cea2e8a9 368S_no_op(pTHX_ char *what, char *s)
463ee0b2 369{
3280af22
NIS
370 char *oldbp = PL_bufptr;
371 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 372
1189a94a
GS
373 if (!s)
374 s = oldbp;
07c798fb 375 else
1189a94a 376 PL_bufptr = s;
cea2e8a9 377 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
378 if (ckWARN_d(WARN_SYNTAX)) {
379 if (is_first)
380 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
381 "\t(Missing semicolon on previous line?)\n");
382 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
383 char *t;
384 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
385 if (t < PL_bufptr && isSPACE(*t))
386 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
387 "\t(Do you need to predeclare %.*s?)\n",
388 t - PL_oldoldbufptr, PL_oldoldbufptr);
389 }
390 else {
391 assert(s >= oldbp);
392 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
393 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
394 }
07c798fb 395 }
3280af22 396 PL_bufptr = oldbp;
8990e307
LW
397}
398
ffb4593c
NT
399/*
400 * S_missingterm
401 * Complain about missing quote/regexp/heredoc terminator.
402 * If it's called with (char *)NULL then it cauterizes the line buffer.
403 * If we're in a delimited string and the delimiter is a control
404 * character, it's reformatted into a two-char sequence like ^C.
405 * This is fatal.
406 */
407
76e3520e 408STATIC void
cea2e8a9 409S_missingterm(pTHX_ char *s)
8990e307
LW
410{
411 char tmpbuf[3];
412 char q;
413 if (s) {
414 char *nl = strrchr(s,'\n');
d2719217 415 if (nl)
8990e307
LW
416 *nl = '\0';
417 }
9d116dd7
JH
418 else if (
419#ifdef EBCDIC
420 iscntrl(PL_multi_close)
421#else
422 PL_multi_close < 32 || PL_multi_close == 127
423#endif
424 ) {
8990e307 425 *tmpbuf = '^';
3280af22 426 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
427 s = "\\n";
428 tmpbuf[2] = '\0';
429 s = tmpbuf;
430 }
431 else {
eb160463 432 *tmpbuf = (char)PL_multi_close;
8990e307
LW
433 tmpbuf[1] = '\0';
434 s = tmpbuf;
435 }
436 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 437 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 438}
79072805 439
ffb4593c
NT
440/*
441 * Perl_deprecate
ffb4593c
NT
442 */
443
79072805 444void
864dbfa3 445Perl_deprecate(pTHX_ char *s)
a0d0e21e 446{
599cee73 447 if (ckWARN(WARN_DEPRECATED))
9014280d 448 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
449}
450
12bcd1a6
PM
451void
452Perl_deprecate_old(pTHX_ char *s)
453{
454 /* This function should NOT be called for any new deprecated warnings */
455 /* Use Perl_deprecate instead */
456 /* */
457 /* It is here to maintain backward compatibility with the pre-5.8 */
458 /* warnings category hierarchy. The "deprecated" category used to */
459 /* live under the "syntax" category. It is now a top-level category */
460 /* in its own right. */
461
462 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
463 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
464 "Use of %s is deprecated", s);
465}
466
ffb4593c
NT
467/*
468 * depcom
9cbb5ea2 469 * Deprecate a comma-less variable list.
ffb4593c
NT
470 */
471
76e3520e 472STATIC void
cea2e8a9 473S_depcom(pTHX)
a0d0e21e 474{
12bcd1a6 475 deprecate_old("comma-less variable list");
a0d0e21e
LW
476}
477
ffb4593c 478/*
9cbb5ea2
GS
479 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
480 * utf16-to-utf8-reversed.
ffb4593c
NT
481 */
482
c39cd008
GS
483#ifdef PERL_CR_FILTER
484static void
485strip_return(SV *sv)
486{
487 register char *s = SvPVX(sv);
488 register char *e = s + SvCUR(sv);
489 /* outer loop optimized to do nothing if there are no CR-LFs */
490 while (s < e) {
491 if (*s++ == '\r' && *s == '\n') {
492 /* hit a CR-LF, need to copy the rest */
493 register char *d = s - 1;
494 *d++ = *s++;
495 while (s < e) {
496 if (*s == '\r' && s[1] == '\n')
497 s++;
498 *d++ = *s++;
499 }
500 SvCUR(sv) -= s - d;
501 return;
502 }
503 }
504}
a868473f 505
76e3520e 506STATIC I32
c39cd008 507S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 508{
c39cd008
GS
509 I32 count = FILTER_READ(idx+1, sv, maxlen);
510 if (count > 0 && !maxlen)
511 strip_return(sv);
512 return count;
a868473f
NIS
513}
514#endif
515
ffb4593c
NT
516/*
517 * Perl_lex_start
9cbb5ea2
GS
518 * Initialize variables. Uses the Perl save_stack to save its state (for
519 * recursive calls to the parser).
ffb4593c
NT
520 */
521
a0d0e21e 522void
864dbfa3 523Perl_lex_start(pTHX_ SV *line)
79072805 524{
8990e307
LW
525 char *s;
526 STRLEN len;
527
3280af22
NIS
528 SAVEI32(PL_lex_dojoin);
529 SAVEI32(PL_lex_brackets);
3280af22
NIS
530 SAVEI32(PL_lex_casemods);
531 SAVEI32(PL_lex_starts);
532 SAVEI32(PL_lex_state);
7766f137 533 SAVEVPTR(PL_lex_inpat);
3280af22 534 SAVEI32(PL_lex_inwhat);
18b09519
GS
535 if (PL_lex_state == LEX_KNOWNEXT) {
536 I32 toke = PL_nexttoke;
537 while (--toke >= 0) {
538 SAVEI32(PL_nexttype[toke]);
539 SAVEVPTR(PL_nextval[toke]);
540 }
541 SAVEI32(PL_nexttoke);
18b09519 542 }
57843af0 543 SAVECOPLINE(PL_curcop);
3280af22
NIS
544 SAVEPPTR(PL_bufptr);
545 SAVEPPTR(PL_bufend);
546 SAVEPPTR(PL_oldbufptr);
547 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
548 SAVEPPTR(PL_last_lop);
549 SAVEPPTR(PL_last_uni);
3280af22
NIS
550 SAVEPPTR(PL_linestart);
551 SAVESPTR(PL_linestr);
8edd5f42
RGS
552 SAVEGENERICPV(PL_lex_brackstack);
553 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 554 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
555 SAVESPTR(PL_lex_stuff);
556 SAVEI32(PL_lex_defer);
09bef843 557 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 558 SAVESPTR(PL_lex_repl);
bebdddfc
GS
559 SAVEINT(PL_expect);
560 SAVEINT(PL_lex_expect);
3280af22
NIS
561
562 PL_lex_state = LEX_NORMAL;
563 PL_lex_defer = 0;
564 PL_expect = XSTATE;
565 PL_lex_brackets = 0;
3280af22
NIS
566 New(899, PL_lex_brackstack, 120, char);
567 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
568 PL_lex_casemods = 0;
569 *PL_lex_casestack = '\0';
570 PL_lex_dojoin = 0;
571 PL_lex_starts = 0;
572 PL_lex_stuff = Nullsv;
573 PL_lex_repl = Nullsv;
574 PL_lex_inpat = 0;
76be56bc 575 PL_nexttoke = 0;
3280af22 576 PL_lex_inwhat = 0;
09bef843 577 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
578 PL_linestr = line;
579 if (SvREADONLY(PL_linestr))
580 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
581 s = SvPV(PL_linestr, len);
6f27f9a7 582 if (!len || s[len-1] != ';') {
3280af22
NIS
583 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
584 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
585 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 586 }
3280af22
NIS
587 SvTEMP_off(PL_linestr);
588 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
589 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 590 PL_last_lop = PL_last_uni = Nullch;
3280af22 591 PL_rsfp = 0;
79072805 592}
a687059c 593
ffb4593c
NT
594/*
595 * Perl_lex_end
9cbb5ea2
GS
596 * Finalizer for lexing operations. Must be called when the parser is
597 * done with the lexer.
ffb4593c
NT
598 */
599
463ee0b2 600void
864dbfa3 601Perl_lex_end(pTHX)
463ee0b2 602{
3280af22 603 PL_doextract = FALSE;
463ee0b2
LW
604}
605
ffb4593c
NT
606/*
607 * S_incline
608 * This subroutine has nothing to do with tilting, whether at windmills
609 * or pinball tables. Its name is short for "increment line". It
57843af0 610 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 611 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
612 * # line 500 "foo.pm"
613 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
614 */
615
76e3520e 616STATIC void
cea2e8a9 617S_incline(pTHX_ char *s)
463ee0b2
LW
618{
619 char *t;
620 char *n;
73659bf1 621 char *e;
463ee0b2 622 char ch;
463ee0b2 623
57843af0 624 CopLINE_inc(PL_curcop);
463ee0b2
LW
625 if (*s++ != '#')
626 return;
bf4acbe4 627 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
628 if (strnEQ(s, "line", 4))
629 s += 4;
630 else
631 return;
084592ab 632 if (SPACE_OR_TAB(*s))
73659bf1 633 s++;
4e553d73 634 else
73659bf1 635 return;
bf4acbe4 636 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
637 if (!isDIGIT(*s))
638 return;
639 n = s;
640 while (isDIGIT(*s))
641 s++;
bf4acbe4 642 while (SPACE_OR_TAB(*s))
463ee0b2 643 s++;
73659bf1 644 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 645 s++;
73659bf1
GS
646 e = t + 1;
647 }
463ee0b2 648 else {
463ee0b2 649 for (t = s; !isSPACE(*t); t++) ;
73659bf1 650 e = t;
463ee0b2 651 }
bf4acbe4 652 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
653 e++;
654 if (*e != '\n' && *e != '\0')
655 return; /* false alarm */
656
463ee0b2
LW
657 ch = *t;
658 *t = '\0';
f4dd75d9 659 if (t - s > 0) {
05ec9bb3 660 CopFILE_free(PL_curcop);
57843af0 661 CopFILE_set(PL_curcop, s);
f4dd75d9 662 }
463ee0b2 663 *t = ch;
57843af0 664 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
665}
666
ffb4593c
NT
667/*
668 * S_skipspace
669 * Called to gobble the appropriate amount and type of whitespace.
670 * Skips comments as well.
671 */
672
76e3520e 673STATIC char *
cea2e8a9 674S_skipspace(pTHX_ register char *s)
a687059c 675{
3280af22 676 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 677 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
678 s++;
679 return s;
680 }
681 for (;;) {
fd049845 682 STRLEN prevlen;
09bef843 683 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 684 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
685 while (s < PL_bufend && isSPACE(*s)) {
686 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
687 incline(s);
688 }
ffb4593c
NT
689
690 /* comment */
3280af22
NIS
691 if (s < PL_bufend && *s == '#') {
692 while (s < PL_bufend && *s != '\n')
463ee0b2 693 s++;
60e6418e 694 if (s < PL_bufend) {
463ee0b2 695 s++;
60e6418e
GS
696 if (PL_in_eval && !PL_rsfp) {
697 incline(s);
698 continue;
699 }
700 }
463ee0b2 701 }
ffb4593c
NT
702
703 /* only continue to recharge the buffer if we're at the end
704 * of the buffer, we're not reading from a source filter, and
705 * we're in normal lexing mode
706 */
09bef843
SB
707 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
708 PL_lex_state == LEX_FORMLINE)
463ee0b2 709 return s;
ffb4593c
NT
710
711 /* try to recharge the buffer */
9cbb5ea2
GS
712 if ((s = filter_gets(PL_linestr, PL_rsfp,
713 (prevlen = SvCUR(PL_linestr)))) == Nullch)
714 {
715 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
716 if (PL_minus_p) {
717 sv_setpv(PL_linestr,
718 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 719 PL_minus_n = PL_minus_p = 0;
a0d0e21e 720 }
01a19ab0
NC
721 else if (PL_minus_n) {
722 sv_setpvn(PL_linestr, ";}", 2);
723 PL_minus_n = 0;
724 }
a0d0e21e 725 else
4147a61b 726 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
727
728 /* reset variables for next time we lex */
9cbb5ea2
GS
729 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
730 = SvPVX(PL_linestr);
3280af22 731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 732 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
733
734 /* Close the filehandle. Could be from -P preprocessor,
735 * STDIN, or a regular file. If we were reading code from
736 * STDIN (because the commandline held no -e or filename)
737 * then we don't close it, we reset it so the code can
738 * read from STDIN too.
739 */
740
3280af22
NIS
741 if (PL_preprocess && !PL_in_eval)
742 (void)PerlProc_pclose(PL_rsfp);
743 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
744 PerlIO_clearerr(PL_rsfp);
8990e307 745 else
3280af22
NIS
746 (void)PerlIO_close(PL_rsfp);
747 PL_rsfp = Nullfp;
463ee0b2
LW
748 return s;
749 }
ffb4593c
NT
750
751 /* not at end of file, so we only read another line */
09bef843
SB
752 /* make corresponding updates to old pointers, for yyerror() */
753 oldprevlen = PL_oldbufptr - PL_bufend;
754 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
755 if (PL_last_uni)
756 oldunilen = PL_last_uni - PL_bufend;
757 if (PL_last_lop)
758 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
759 PL_linestart = PL_bufptr = s + prevlen;
760 PL_bufend = s + SvCUR(PL_linestr);
761 s = PL_bufptr;
09bef843
SB
762 PL_oldbufptr = s + oldprevlen;
763 PL_oldoldbufptr = s + oldoldprevlen;
764 if (PL_last_uni)
765 PL_last_uni = s + oldunilen;
766 if (PL_last_lop)
767 PL_last_lop = s + oldloplen;
a0d0e21e 768 incline(s);
ffb4593c
NT
769
770 /* debugger active and we're not compiling the debugger code,
771 * so store the line into the debugger's array of lines
772 */
3280af22 773 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
774 SV *sv = NEWSV(85,0);
775
776 sv_upgrade(sv, SVt_PVMG);
3280af22 777 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
778 (void)SvIOK_on(sv);
779 SvIVX(sv) = 0;
57843af0 780 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 781 }
463ee0b2 782 }
a687059c 783}
378cc40b 784
ffb4593c
NT
785/*
786 * S_check_uni
787 * Check the unary operators to ensure there's no ambiguity in how they're
788 * used. An ambiguous piece of code would be:
789 * rand + 5
790 * This doesn't mean rand() + 5. Because rand() is a unary operator,
791 * the +5 is its argument.
792 */
793
76e3520e 794STATIC void
cea2e8a9 795S_check_uni(pTHX)
ba106d47 796{
2f3197b3 797 char *s;
a0d0e21e 798 char *t;
2f3197b3 799
3280af22 800 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 801 return;
3280af22
NIS
802 while (isSPACE(*PL_last_uni))
803 PL_last_uni++;
7e2040f0 804 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 805 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 806 return;
0453d815 807 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 808 char ch = *s;
0453d815 809 *s = '\0';
9014280d 810 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 811 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
812 PL_last_uni);
813 *s = ch;
814 }
2f3197b3
LW
815}
816
ffb4593c
NT
817/*
818 * LOP : macro to build a list operator. Its behaviour has been replaced
819 * with a subroutine, S_lop() for which LOP is just another name.
820 */
821
a0d0e21e
LW
822#define LOP(f,x) return lop(f,x,s)
823
ffb4593c
NT
824/*
825 * S_lop
826 * Build a list operator (or something that might be one). The rules:
827 * - if we have a next token, then it's a list operator [why?]
828 * - if the next thing is an opening paren, then it's a function
829 * - else it's a list operator
830 */
831
76e3520e 832STATIC I32
a0be28da 833S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 834{
79072805 835 yylval.ival = f;
35c8bce7 836 CLINE;
3280af22
NIS
837 PL_expect = x;
838 PL_bufptr = s;
839 PL_last_lop = PL_oldbufptr;
eb160463 840 PL_last_lop_op = (OPCODE)f;
3280af22 841 if (PL_nexttoke)
bbf60fe6 842 return REPORT(LSTOP);
79072805 843 if (*s == '(')
bbf60fe6 844 return REPORT(FUNC);
79072805
LW
845 s = skipspace(s);
846 if (*s == '(')
bbf60fe6 847 return REPORT(FUNC);
79072805 848 else
bbf60fe6 849 return REPORT(LSTOP);
79072805
LW
850}
851
ffb4593c
NT
852/*
853 * S_force_next
9cbb5ea2 854 * When the lexer realizes it knows the next token (for instance,
ffb4593c 855 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
856 * to know what token to return the next time the lexer is called. Caller
857 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
858 * handles the token correctly.
ffb4593c
NT
859 */
860
4e553d73 861STATIC void
cea2e8a9 862S_force_next(pTHX_ I32 type)
79072805 863{
3280af22
NIS
864 PL_nexttype[PL_nexttoke] = type;
865 PL_nexttoke++;
866 if (PL_lex_state != LEX_KNOWNEXT) {
867 PL_lex_defer = PL_lex_state;
868 PL_lex_expect = PL_expect;
869 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
870 }
871}
872
d0a148a6
NC
873STATIC SV *
874S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
875{
876 SV *sv = newSVpvn(start,len);
877 if (UTF && !IN_BYTES && is_utf8_string((U8*)start, len))
878 SvUTF8_on(sv);
879 return sv;
880}
881
ffb4593c
NT
882/*
883 * S_force_word
884 * When the lexer knows the next thing is a word (for instance, it has
885 * just seen -> and it knows that the next char is a word char, then
886 * it calls S_force_word to stick the next word into the PL_next lookahead.
887 *
888 * Arguments:
b1b65b59 889 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
890 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
891 * int check_keyword : if true, Perl checks to make sure the word isn't
892 * a keyword (do this if the word is a label, e.g. goto FOO)
893 * int allow_pack : if true, : characters will also be allowed (require,
894 * use, etc. do this)
9cbb5ea2 895 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
896 */
897
76e3520e 898STATIC char *
cea2e8a9 899S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 900{
463ee0b2
LW
901 register char *s;
902 STRLEN len;
4e553d73 903
463ee0b2
LW
904 start = skipspace(start);
905 s = start;
7e2040f0 906 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 907 (allow_pack && *s == ':') ||
15f0808c 908 (allow_initial_tick && *s == '\'') )
a0d0e21e 909 {
3280af22
NIS
910 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
911 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
912 return start;
913 if (token == METHOD) {
914 s = skipspace(s);
915 if (*s == '(')
3280af22 916 PL_expect = XTERM;
463ee0b2 917 else {
3280af22 918 PL_expect = XOPERATOR;
463ee0b2 919 }
79072805 920 }
d0a148a6
NC
921 PL_nextval[PL_nexttoke].opval
922 = (OP*)newSVOP(OP_CONST,0,
923 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 924 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
925 force_next(token);
926 }
927 return s;
928}
929
ffb4593c
NT
930/*
931 * S_force_ident
9cbb5ea2 932 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
933 * text only contains the "foo" portion. The first argument is a pointer
934 * to the "foo", and the second argument is the type symbol to prefix.
935 * Forces the next token to be a "WORD".
9cbb5ea2 936 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
937 */
938
76e3520e 939STATIC void
cea2e8a9 940S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
941{
942 if (s && *s) {
11343788 943 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 944 PL_nextval[PL_nexttoke].opval = o;
79072805 945 force_next(WORD);
748a9306 946 if (kind) {
11343788 947 o->op_private = OPpCONST_ENTERED;
55497cff
PP
948 /* XXX see note in pp_entereval() for why we forgo typo
949 warnings if the symbol must be introduced in an eval.
950 GSAR 96-10-12 */
3280af22 951 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
952 kind == '$' ? SVt_PV :
953 kind == '@' ? SVt_PVAV :
954 kind == '%' ? SVt_PVHV :
955 SVt_PVGV
956 );
748a9306 957 }
79072805
LW
958 }
959}
960
1571675a
GS
961NV
962Perl_str_to_version(pTHX_ SV *sv)
963{
964 NV retval = 0.0;
965 NV nshift = 1.0;
966 STRLEN len;
967 char *start = SvPVx(sv,len);
3aa33fe5 968 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
969 char *end = start + len;
970 while (start < end) {
ba210ebe 971 STRLEN skip;
1571675a
GS
972 UV n;
973 if (utf)
9041c2e3 974 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
975 else {
976 n = *(U8*)start;
977 skip = 1;
978 }
979 retval += ((NV)n)/nshift;
980 start += skip;
981 nshift *= 1000;
982 }
983 return retval;
984}
985
4e553d73 986/*
ffb4593c
NT
987 * S_force_version
988 * Forces the next token to be a version number.
e759cc13
RGS
989 * If the next token appears to be an invalid version number, (e.g. "v2b"),
990 * and if "guessing" is TRUE, then no new token is created (and the caller
991 * must use an alternative parsing method).
ffb4593c
NT
992 */
993
76e3520e 994STATIC char *
e759cc13 995S_force_version(pTHX_ char *s, int guessing)
89bfa8cd
PP
996{
997 OP *version = Nullop;
44dcb63b 998 char *d;
89bfa8cd
PP
999
1000 s = skipspace(s);
1001
44dcb63b 1002 d = s;
dd629d5b 1003 if (*d == 'v')
44dcb63b 1004 d++;
44dcb63b 1005 if (isDIGIT(*d)) {
e759cc13
RGS
1006 while (isDIGIT(*d) || *d == '_' || *d == '.')
1007 d++;
9f3d182e 1008 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1009 SV *ver;
b73d6f50 1010 s = scan_num(s, &yylval);
89bfa8cd 1011 version = yylval.opval;
dd629d5b
GS
1012 ver = cSVOPx(version)->op_sv;
1013 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 1014 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
1015 SvNVX(ver) = str_to_version(ver);
1016 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1017 }
89bfa8cd 1018 }
e759cc13
RGS
1019 else if (guessing)
1020 return s;
89bfa8cd
PP
1021 }
1022
1023 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1024 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1025 force_next(WORD);
89bfa8cd 1026
e759cc13 1027 return s;
89bfa8cd
PP
1028}
1029
ffb4593c
NT
1030/*
1031 * S_tokeq
1032 * Tokenize a quoted string passed in as an SV. It finds the next
1033 * chunk, up to end of string or a backslash. It may make a new
1034 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1035 * turns \\ into \.
1036 */
1037
76e3520e 1038STATIC SV *
cea2e8a9 1039S_tokeq(pTHX_ SV *sv)
79072805
LW
1040{
1041 register char *s;
1042 register char *send;
1043 register char *d;
b3ac6de7
IZ
1044 STRLEN len = 0;
1045 SV *pv = sv;
79072805
LW
1046
1047 if (!SvLEN(sv))
b3ac6de7 1048 goto finish;
79072805 1049
a0d0e21e 1050 s = SvPV_force(sv, len);
21a311ee 1051 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1052 goto finish;
463ee0b2 1053 send = s + len;
79072805
LW
1054 while (s < send && *s != '\\')
1055 s++;
1056 if (s == send)
b3ac6de7 1057 goto finish;
79072805 1058 d = s;
be4731d2 1059 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 1060 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
1061 if (SvUTF8(sv))
1062 SvUTF8_on(pv);
1063 }
79072805
LW
1064 while (s < send) {
1065 if (*s == '\\') {
a0d0e21e 1066 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1067 s++; /* all that, just for this */
1068 }
1069 *d++ = *s++;
1070 }
1071 *d = '\0';
463ee0b2 1072 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 1073 finish:
3280af22 1074 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1075 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1076 return sv;
1077}
1078
ffb4593c
NT
1079/*
1080 * Now come three functions related to double-quote context,
1081 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1082 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1083 * interact with PL_lex_state, and create fake ( ... ) argument lists
1084 * to handle functions and concatenation.
1085 * They assume that whoever calls them will be setting up a fake
1086 * join call, because each subthing puts a ',' after it. This lets
1087 * "lower \luPpEr"
1088 * become
1089 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1090 *
1091 * (I'm not sure whether the spurious commas at the end of lcfirst's
1092 * arguments and join's arguments are created or not).
1093 */
1094
1095/*
1096 * S_sublex_start
1097 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1098 *
1099 * Pattern matching will set PL_lex_op to the pattern-matching op to
1100 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1101 *
1102 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1103 *
1104 * Everything else becomes a FUNC.
1105 *
1106 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1107 * had an OP_CONST or OP_READLINE). This just sets us up for a
1108 * call to S_sublex_push().
1109 */
1110
76e3520e 1111STATIC I32
cea2e8a9 1112S_sublex_start(pTHX)
79072805
LW
1113{
1114 register I32 op_type = yylval.ival;
79072805
LW
1115
1116 if (op_type == OP_NULL) {
3280af22
NIS
1117 yylval.opval = PL_lex_op;
1118 PL_lex_op = Nullop;
79072805
LW
1119 return THING;
1120 }
1121 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1122 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1123
1124 if (SvTYPE(sv) == SVt_PVIV) {
1125 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1126 STRLEN len;
1127 char *p;
1128 SV *nsv;
1129
1130 p = SvPV(sv, len);
79cb57f6 1131 nsv = newSVpvn(p, len);
01ec43d0
GS
1132 if (SvUTF8(sv))
1133 SvUTF8_on(nsv);
b3ac6de7
IZ
1134 SvREFCNT_dec(sv);
1135 sv = nsv;
4e553d73 1136 }
b3ac6de7 1137 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1138 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1139 /* Allow <FH> // "foo" */
1140 if (op_type == OP_READLINE)
1141 PL_expect = XTERMORDORDOR;
79072805
LW
1142 return THING;
1143 }
1144
3280af22
NIS
1145 PL_sublex_info.super_state = PL_lex_state;
1146 PL_sublex_info.sub_inwhat = op_type;
1147 PL_sublex_info.sub_op = PL_lex_op;
1148 PL_lex_state = LEX_INTERPPUSH;
55497cff 1149
3280af22
NIS
1150 PL_expect = XTERM;
1151 if (PL_lex_op) {
1152 yylval.opval = PL_lex_op;
1153 PL_lex_op = Nullop;
55497cff
PP
1154 return PMFUNC;
1155 }
1156 else
1157 return FUNC;
1158}
1159
ffb4593c
NT
1160/*
1161 * S_sublex_push
1162 * Create a new scope to save the lexing state. The scope will be
1163 * ended in S_sublex_done. Returns a '(', starting the function arguments
1164 * to the uc, lc, etc. found before.
1165 * Sets PL_lex_state to LEX_INTERPCONCAT.
1166 */
1167
76e3520e 1168STATIC I32
cea2e8a9 1169S_sublex_push(pTHX)
55497cff 1170{
f46d017c 1171 ENTER;
55497cff 1172
3280af22
NIS
1173 PL_lex_state = PL_sublex_info.super_state;
1174 SAVEI32(PL_lex_dojoin);
1175 SAVEI32(PL_lex_brackets);
3280af22
NIS
1176 SAVEI32(PL_lex_casemods);
1177 SAVEI32(PL_lex_starts);
1178 SAVEI32(PL_lex_state);
7766f137 1179 SAVEVPTR(PL_lex_inpat);
3280af22 1180 SAVEI32(PL_lex_inwhat);
57843af0 1181 SAVECOPLINE(PL_curcop);
3280af22 1182 SAVEPPTR(PL_bufptr);
8452ff4b 1183 SAVEPPTR(PL_bufend);
3280af22
NIS
1184 SAVEPPTR(PL_oldbufptr);
1185 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1186 SAVEPPTR(PL_last_lop);
1187 SAVEPPTR(PL_last_uni);
3280af22
NIS
1188 SAVEPPTR(PL_linestart);
1189 SAVESPTR(PL_linestr);
8edd5f42
RGS
1190 SAVEGENERICPV(PL_lex_brackstack);
1191 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1192
1193 PL_linestr = PL_lex_stuff;
1194 PL_lex_stuff = Nullsv;
1195
9cbb5ea2
GS
1196 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1197 = SvPVX(PL_linestr);
3280af22 1198 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1199 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1200 SAVEFREESV(PL_linestr);
1201
1202 PL_lex_dojoin = FALSE;
1203 PL_lex_brackets = 0;
3280af22
NIS
1204 New(899, PL_lex_brackstack, 120, char);
1205 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
1206 PL_lex_casemods = 0;
1207 *PL_lex_casestack = '\0';
1208 PL_lex_starts = 0;
1209 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1210 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1211
1212 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1213 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1214 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1215 else
3280af22 1216 PL_lex_inpat = Nullop;
79072805 1217
55497cff 1218 return '(';
79072805
LW
1219}
1220
ffb4593c
NT
1221/*
1222 * S_sublex_done
1223 * Restores lexer state after a S_sublex_push.
1224 */
1225
76e3520e 1226STATIC I32
cea2e8a9 1227S_sublex_done(pTHX)
79072805 1228{
3280af22 1229 if (!PL_lex_starts++) {
9aa983d2
JH
1230 SV *sv = newSVpvn("",0);
1231 if (SvUTF8(PL_linestr))
1232 SvUTF8_on(sv);
3280af22 1233 PL_expect = XOPERATOR;
9aa983d2 1234 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1235 return THING;
1236 }
1237
3280af22
NIS
1238 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1239 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1240 return yylex();
79072805
LW
1241 }
1242
ffb4593c 1243 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1244 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1245 PL_linestr = PL_lex_repl;
1246 PL_lex_inpat = 0;
1247 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1248 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1249 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1250 SAVEFREESV(PL_linestr);
1251 PL_lex_dojoin = FALSE;
1252 PL_lex_brackets = 0;
3280af22
NIS
1253 PL_lex_casemods = 0;
1254 *PL_lex_casestack = '\0';
1255 PL_lex_starts = 0;
25da4f38 1256 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1257 PL_lex_state = LEX_INTERPNORMAL;
1258 PL_lex_starts++;
e9fa98b2
HS
1259 /* we don't clear PL_lex_repl here, so that we can check later
1260 whether this is an evalled subst; that means we rely on the
1261 logic to ensure sublex_done() is called again only via the
1262 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1263 }
e9fa98b2 1264 else {
3280af22 1265 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1266 PL_lex_repl = Nullsv;
1267 }
79072805 1268 return ',';
ffed7fef
LW
1269 }
1270 else {
f46d017c 1271 LEAVE;
3280af22
NIS
1272 PL_bufend = SvPVX(PL_linestr);
1273 PL_bufend += SvCUR(PL_linestr);
1274 PL_expect = XOPERATOR;
09bef843 1275 PL_sublex_info.sub_inwhat = 0;
79072805 1276 return ')';
ffed7fef
LW
1277 }
1278}
1279
02aa26ce
NT
1280/*
1281 scan_const
1282
1283 Extracts a pattern, double-quoted string, or transliteration. This
1284 is terrifying code.
1285
3280af22
NIS
1286 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1287 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1288 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1289
9b599b2a
GS
1290 Returns a pointer to the character scanned up to. Iff this is
1291 advanced from the start pointer supplied (ie if anything was
1292 successfully parsed), will leave an OP for the substring scanned
1293 in yylval. Caller must intuit reason for not parsing further
1294 by looking at the next characters herself.
1295
02aa26ce
NT
1296 In patterns:
1297 backslashes:
1298 double-quoted style: \r and \n
1299 regexp special ones: \D \s
1300 constants: \x3
1301 backrefs: \1 (deprecated in substitution replacements)
1302 case and quoting: \U \Q \E
1303 stops on @ and $, but not for $ as tail anchor
1304
1305 In transliterations:
1306 characters are VERY literal, except for - not at the start or end
1307 of the string, which indicates a range. scan_const expands the
1308 range to the full set of intermediate characters.
1309
1310 In double-quoted strings:
1311 backslashes:
1312 double-quoted style: \r and \n
1313 constants: \x3
1314 backrefs: \1 (deprecated)
1315 case and quoting: \U \Q \E
1316 stops on @ and $
1317
1318 scan_const does *not* construct ops to handle interpolated strings.
1319 It stops processing as soon as it finds an embedded $ or @ variable
1320 and leaves it to the caller to work out what's going on.
1321
da6eedaa 1322 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1323
1324 $ in pattern could be $foo or could be tail anchor. Assumption:
1325 it's a tail anchor if $ is the last thing in the string, or if it's
1326 followed by one of ")| \n\t"
1327
1328 \1 (backreferences) are turned into $1
1329
1330 The structure of the code is
1331 while (there's a character to process) {
1332 handle transliteration ranges
1333 skip regexp comments
1334 skip # initiated comments in //x patterns
1335 check for embedded @foo
1336 check for embedded scalars
1337 if (backslash) {
1338 leave intact backslashes from leave (below)
1339 deprecate \1 in strings and sub replacements
1340 handle string-changing backslashes \l \U \Q \E, etc.
1341 switch (what was escaped) {
1342 handle - in a transliteration (becomes a literal -)
1343 handle \132 octal characters
1344 handle 0x15 hex characters
1345 handle \cV (control V)
1346 handle printf backslashes (\f, \r, \n, etc)
1347 } (end switch)
1348 } (end if backslash)
1349 } (end while character to read)
4e553d73 1350
02aa26ce
NT
1351*/
1352
76e3520e 1353STATIC char *
cea2e8a9 1354S_scan_const(pTHX_ char *start)
79072805 1355{
3280af22 1356 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1357 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1358 register char *s = start; /* start of the constant */
1359 register char *d = SvPVX(sv); /* destination for copies */
1360 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1361 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1362 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1363 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1364 UV uv;
1365
dff6d3cd 1366 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1367 PL_lex_inpat
b6d5fef8 1368 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1369 : "";
79072805 1370
2b9d42f0
NIS
1371 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1372 /* If we are doing a trans and we know we want UTF8 set expectation */
1373 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1374 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1375 }
1376
1377
79072805 1378 while (s < send || dorange) {
02aa26ce 1379 /* get transliterations out of the way (they're most literal) */
3280af22 1380 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1381 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1382 if (dorange) {
1ba5c669
JH
1383 I32 i; /* current expanded character */
1384 I32 min; /* first character in range */
1385 I32 max; /* last character in range */
02aa26ce 1386
2b9d42f0 1387 if (has_utf8) {
8973db79
JH
1388 char *c = (char*)utf8_hop((U8*)d, -1);
1389 char *e = d++;
1390 while (e-- > c)
1391 *(e + 1) = *e;
25716404 1392 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1393 /* mark the range as done, and continue */
1394 dorange = FALSE;
1395 didrange = TRUE;
1396 continue;
1397 }
2b9d42f0 1398
02aa26ce 1399 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1400 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1401 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1402 d -= 2; /* eat the first char and the - */
1403
8ada0baa
JH
1404 min = (U8)*d; /* first char in range */
1405 max = (U8)d[1]; /* last char in range */
1406
c2e66d9e 1407 if (min > max) {
01ec43d0 1408 Perl_croak(aTHX_
d1573ac7 1409 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1410 (char)min, (char)max);
c2e66d9e
GS
1411 }
1412
c7f1f016 1413#ifdef EBCDIC
8ada0baa
JH
1414 if ((isLOWER(min) && isLOWER(max)) ||
1415 (isUPPER(min) && isUPPER(max))) {
1416 if (isLOWER(min)) {
1417 for (i = min; i <= max; i++)
1418 if (isLOWER(i))
db42d148 1419 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1420 } else {
1421 for (i = min; i <= max; i++)
1422 if (isUPPER(i))
db42d148 1423 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1424 }
1425 }
1426 else
1427#endif
1428 for (i = min; i <= max; i++)
eb160463 1429 *d++ = (char)i;
02aa26ce
NT
1430
1431 /* mark the range as done, and continue */
79072805 1432 dorange = FALSE;
01ec43d0 1433 didrange = TRUE;
79072805 1434 continue;
4e553d73 1435 }
02aa26ce
NT
1436
1437 /* range begins (ignore - as first or last char) */
79072805 1438 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1439 if (didrange) {
1fafa243 1440 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1441 }
2b9d42f0 1442 if (has_utf8) {
25716404 1443 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1444 s++;
1445 continue;
1446 }
79072805
LW
1447 dorange = TRUE;
1448 s++;
01ec43d0
GS
1449 }
1450 else {
1451 didrange = FALSE;
1452 }
79072805 1453 }
02aa26ce
NT
1454
1455 /* if we get here, we're not doing a transliteration */
1456
0f5d15d6
IZ
1457 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1458 except for the last char, which will be done separately. */
3280af22 1459 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1460 if (s[2] == '#') {
e994fd66 1461 while (s+1 < send && *s != ')')
db42d148 1462 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1463 }
1464 else if (s[2] == '{' /* This should match regcomp.c */
1465 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1466 {
cc6b7395 1467 I32 count = 1;
0f5d15d6 1468 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1469 char c;
1470
d9f97599
GS
1471 while (count && (c = *regparse)) {
1472 if (c == '\\' && regparse[1])
1473 regparse++;
4e553d73 1474 else if (c == '{')
cc6b7395 1475 count++;
4e553d73 1476 else if (c == '}')
cc6b7395 1477 count--;
d9f97599 1478 regparse++;
cc6b7395 1479 }
e994fd66 1480 if (*regparse != ')')
5bdf89e7 1481 regparse--; /* Leave one char for continuation. */
0f5d15d6 1482 while (s < regparse)
db42d148 1483 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1484 }
748a9306 1485 }
02aa26ce
NT
1486
1487 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1488 else if (*s == '#' && PL_lex_inpat &&
1489 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1490 while (s+1 < send && *s != '\n')
db42d148 1491 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1492 }
02aa26ce 1493
5d1d4326 1494 /* check for embedded arrays
da6eedaa 1495 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1496 */
7e2040f0 1497 else if (*s == '@' && s[1]
5d1d4326 1498 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1499 break;
02aa26ce
NT
1500
1501 /* check for embedded scalars. only stop if we're sure it's a
1502 variable.
1503 */
79072805 1504 else if (*s == '$') {
3280af22 1505 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1506 break;
6002328a 1507 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1508 break; /* in regexp, $ might be tail anchor */
1509 }
02aa26ce 1510
2b9d42f0
NIS
1511 /* End of else if chain - OP_TRANS rejoin rest */
1512
02aa26ce 1513 /* backslashes */
79072805
LW
1514 if (*s == '\\' && s+1 < send) {
1515 s++;
02aa26ce
NT
1516
1517 /* some backslashes we leave behind */
c9f97d15 1518 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1519 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1520 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1521 continue;
1522 }
02aa26ce
NT
1523
1524 /* deprecate \1 in strings and substitution replacements */
3280af22 1525 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1526 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1527 {
599cee73 1528 if (ckWARN(WARN_SYNTAX))
9014280d 1529 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1530 *--s = '$';
1531 break;
1532 }
02aa26ce
NT
1533
1534 /* string-change backslash escapes */
3280af22 1535 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1536 --s;
1537 break;
1538 }
02aa26ce
NT
1539
1540 /* if we get here, it's either a quoted -, or a digit */
79072805 1541 switch (*s) {
02aa26ce
NT
1542
1543 /* quoted - in transliterations */
79072805 1544 case '-':
3280af22 1545 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1546 *d++ = *s++;
1547 continue;
1548 }
1549 /* FALL THROUGH */
1550 default:
11b8faa4 1551 {
707afd92
MS
1552 if (ckWARN(WARN_MISC) &&
1553 isALNUM(*s) &&
1554 *s != '_')
9014280d 1555 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1556 "Unrecognized escape \\%c passed through",
1557 *s);
1558 /* default action is to copy the quoted character */
f9a63242 1559 goto default_action;
11b8faa4 1560 }
02aa26ce
NT
1561
1562 /* \132 indicates an octal constant */
79072805
LW
1563 case '0': case '1': case '2': case '3':
1564 case '4': case '5': case '6': case '7':
ba210ebe 1565 {
53305cf1
NC
1566 I32 flags = 0;
1567 STRLEN len = 3;
1568 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1569 s += len;
1570 }
012bcf8d 1571 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1572
1573 /* \x24 indicates a hex constant */
79072805 1574 case 'x':
a0ed51b3
LW
1575 ++s;
1576 if (*s == '{') {
1577 char* e = strchr(s, '}');
a4c04bdc
NC
1578 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1579 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1580 STRLEN len;
355860ce 1581
53305cf1 1582 ++s;
adaeee49 1583 if (!e) {
a0ed51b3 1584 yyerror("Missing right brace on \\x{}");
355860ce 1585 continue;
ba210ebe 1586 }
53305cf1
NC
1587 len = e - s;
1588 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1589 s = e + 1;
a0ed51b3
LW
1590 }
1591 else {
ba210ebe 1592 {
53305cf1 1593 STRLEN len = 2;
a4c04bdc 1594 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1595 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1596 s += len;
1597 }
012bcf8d
GS
1598 }
1599
1600 NUM_ESCAPE_INSERT:
1601 /* Insert oct or hex escaped character.
301d3d20 1602 * There will always enough room in sv since such
db42d148 1603 * escapes will be longer than any UTF-8 sequence
301d3d20 1604 * they can end up as. */
ba7cea30 1605
c7f1f016
NIS
1606 /* We need to map to chars to ASCII before doing the tests
1607 to cover EBCDIC
1608 */
c4d5f83a 1609 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1610 if (!has_utf8 && uv > 255) {
301d3d20
JH
1611 /* Might need to recode whatever we have
1612 * accumulated so far if it contains any
1613 * hibit chars.
1614 *
1615 * (Can't we keep track of that and avoid
1616 * this rescan? --jhi)
012bcf8d 1617 */
c7f1f016 1618 int hicount = 0;
63cd0674
NIS
1619 U8 *c;
1620 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1621 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1622 hicount++;
db42d148 1623 }
012bcf8d 1624 }
63cd0674 1625 if (hicount) {
db42d148
NIS
1626 STRLEN offset = d - SvPVX(sv);
1627 U8 *src, *dst;
1628 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1629 src = (U8 *)d - 1;
1630 dst = src+hicount;
1631 d += hicount;
1632 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1633 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1634 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1635 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1636 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1637 }
1638 else {
63cd0674 1639 *dst-- = *src;
012bcf8d 1640 }
c7f1f016 1641 src--;
012bcf8d
GS
1642 }
1643 }
1644 }
1645
9aa983d2 1646 if (has_utf8 || uv > 255) {
9041c2e3 1647 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1648 has_utf8 = TRUE;
f9a63242
JH
1649 if (PL_lex_inwhat == OP_TRANS &&
1650 PL_sublex_info.sub_op) {
1651 PL_sublex_info.sub_op->op_private |=
1652 (PL_lex_repl ? OPpTRANS_FROM_UTF
1653 : OPpTRANS_TO_UTF);
f9a63242 1654 }
012bcf8d 1655 }
a0ed51b3 1656 else {
012bcf8d 1657 *d++ = (char)uv;
a0ed51b3 1658 }
012bcf8d
GS
1659 }
1660 else {
c4d5f83a 1661 *d++ = (char) uv;
a0ed51b3 1662 }
79072805 1663 continue;
02aa26ce 1664
b239daa5 1665 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1666 case 'N':
55eda711 1667 ++s;
423cee85
JH
1668 if (*s == '{') {
1669 char* e = strchr(s, '}');
155aba94 1670 SV *res;
423cee85
JH
1671 STRLEN len;
1672 char *str;
4e553d73 1673
423cee85 1674 if (!e) {
5777a3f7 1675 yyerror("Missing right brace on \\N{}");
423cee85
JH
1676 e = s - 1;
1677 goto cont_scan;
1678 }
dbc0d4f2
JH
1679 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1680 /* \N{U+...} */
1681 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1682 PERL_SCAN_DISALLOW_PREFIX;
1683 s += 3;
1684 len = e - s;
1685 uv = grok_hex(s, &len, &flags, NULL);
1686 s = e + 1;
1687 goto NUM_ESCAPE_INSERT;
1688 }
55eda711
JH
1689 res = newSVpvn(s + 1, e - s - 1);
1690 res = new_constant( Nullch, 0, "charnames",
1691 res, Nullsv, "\\N{...}" );
f9a63242
JH
1692 if (has_utf8)
1693 sv_utf8_upgrade(res);
423cee85 1694 str = SvPV(res,len);
1c47067b
JH
1695#ifdef EBCDIC_NEVER_MIND
1696 /* charnames uses pack U and that has been
1697 * recently changed to do the below uni->native
1698 * mapping, so this would be redundant (and wrong,
1699 * the code point would be doubly converted).
1700 * But leave this in just in case the pack U change
1701 * gets revoked, but the semantics is still
1702 * desireable for charnames. --jhi */
cddc7ef4
JH
1703 {
1704 UV uv = utf8_to_uvchr((U8*)str, 0);
1705
1706 if (uv < 0x100) {
89ebb4a3 1707 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1708
1709 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1710 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1711 str = SvPV(res, len);
1712 }
1713 }
1714#endif
89491803 1715 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1716 char *ostart = SvPVX(sv);
1717 SvCUR_set(sv, d - ostart);
1718 SvPOK_on(sv);
e4f3eed8 1719 *d = '\0';
f08d6ad9 1720 sv_utf8_upgrade(sv);
d2f449dd 1721 /* this just broke our allocation above... */
eb160463 1722 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1723 d = SvPVX(sv) + SvCUR(sv);
89491803 1724 has_utf8 = TRUE;
f08d6ad9 1725 }
eb160463 1726 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1727 char *odest = SvPVX(sv);
1728
8973db79 1729 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1730 d = SvPVX(sv) + (d - odest);
1731 }
1732 Copy(str, d, len, char);
1733 d += len;
1734 SvREFCNT_dec(res);
1735 cont_scan:
1736 s = e + 1;
1737 }
1738 else
5777a3f7 1739 yyerror("Missing braces on \\N{}");
423cee85
JH
1740 continue;
1741
02aa26ce 1742 /* \c is a control character */
79072805
LW
1743 case 'c':
1744 s++;
961ce445 1745 if (s < send) {
ba210ebe 1746 U8 c = *s++;
c7f1f016
NIS
1747#ifdef EBCDIC
1748 if (isLOWER(c))
1749 c = toUPPER(c);
1750#endif
db42d148 1751 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1752 }
961ce445
RGS
1753 else {
1754 yyerror("Missing control char name in \\c");
1755 }
79072805 1756 continue;
02aa26ce
NT
1757
1758 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1759 case 'b':
db42d148 1760 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1761 break;
1762 case 'n':
db42d148 1763 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1764 break;
1765 case 'r':
db42d148 1766 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1767 break;
1768 case 'f':
db42d148 1769 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1770 break;
1771 case 't':
db42d148 1772 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1773 break;
34a3fe2a 1774 case 'e':
db42d148 1775 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1776 break;
1777 case 'a':
db42d148 1778 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1779 break;
02aa26ce
NT
1780 } /* end switch */
1781
79072805
LW
1782 s++;
1783 continue;
02aa26ce
NT
1784 } /* end if (backslash) */
1785
f9a63242 1786 default_action:
2b9d42f0
NIS
1787 /* If we started with encoded form, or already know we want it
1788 and then encode the next character */
1789 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1790 STRLEN len = 1;
1791 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1792 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1793 s += len;
1794 if (need > len) {
1795 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1796 STRLEN off = d - SvPVX(sv);
1797 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1798 }
1799 d = (char*)uvchr_to_utf8((U8*)d, uv);
1800 has_utf8 = TRUE;
1801 }
1802 else {
1803 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1804 }
02aa26ce
NT
1805 } /* while loop to process each character */
1806
1807 /* terminate the string and set up the sv */
79072805 1808 *d = '\0';
463ee0b2 1809 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1810 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1811 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1812
79072805 1813 SvPOK_on(sv);
9f4817db 1814 if (PL_encoding && !has_utf8) {
d0063567
DK
1815 sv_recode_to_utf8(sv, PL_encoding);
1816 if (SvUTF8(sv))
1817 has_utf8 = TRUE;
9f4817db 1818 }
2b9d42f0 1819 if (has_utf8) {
7e2040f0 1820 SvUTF8_on(sv);
2b9d42f0 1821 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1822 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1823 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1824 }
1825 }
79072805 1826
02aa26ce 1827 /* shrink the sv if we allocated more than we used */
79072805
LW
1828 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1829 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1830 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1831 }
02aa26ce 1832
9b599b2a 1833 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1834 if (s > PL_bufptr) {
1835 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1836 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1837 sv, Nullsv,
4e553d73 1838 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1839 ? "tr"
3280af22 1840 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1841 ? "s"
1842 : "qq")));
79072805 1843 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1844 } else
8990e307 1845 SvREFCNT_dec(sv);
79072805
LW
1846 return s;
1847}
1848
ffb4593c
NT
1849/* S_intuit_more
1850 * Returns TRUE if there's more to the expression (e.g., a subscript),
1851 * FALSE otherwise.
ffb4593c
NT
1852 *
1853 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1854 *
1855 * ->[ and ->{ return TRUE
1856 * { and [ outside a pattern are always subscripts, so return TRUE
1857 * if we're outside a pattern and it's not { or [, then return FALSE
1858 * if we're in a pattern and the first char is a {
1859 * {4,5} (any digits around the comma) returns FALSE
1860 * if we're in a pattern and the first char is a [
1861 * [] returns FALSE
1862 * [SOMETHING] has a funky algorithm to decide whether it's a
1863 * character class or not. It has to deal with things like
1864 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1865 * anything else returns TRUE
1866 */
1867
9cbb5ea2
GS
1868/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1869
76e3520e 1870STATIC int
cea2e8a9 1871S_intuit_more(pTHX_ register char *s)
79072805 1872{
3280af22 1873 if (PL_lex_brackets)
79072805
LW
1874 return TRUE;
1875 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1876 return TRUE;
1877 if (*s != '{' && *s != '[')
1878 return FALSE;
3280af22 1879 if (!PL_lex_inpat)
79072805
LW
1880 return TRUE;
1881
1882 /* In a pattern, so maybe we have {n,m}. */
1883 if (*s == '{') {
1884 s++;
1885 if (!isDIGIT(*s))
1886 return TRUE;
1887 while (isDIGIT(*s))
1888 s++;
1889 if (*s == ',')
1890 s++;
1891 while (isDIGIT(*s))
1892 s++;
1893 if (*s == '}')
1894 return FALSE;
1895 return TRUE;
1896
1897 }
1898
1899 /* On the other hand, maybe we have a character class */
1900
1901 s++;
1902 if (*s == ']' || *s == '^')
1903 return FALSE;
1904 else {
ffb4593c 1905 /* this is terrifying, and it works */
79072805
LW
1906 int weight = 2; /* let's weigh the evidence */
1907 char seen[256];
f27ffc4a 1908 unsigned char un_char = 255, last_un_char;
93a17b20 1909 char *send = strchr(s,']');
3280af22 1910 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1911
1912 if (!send) /* has to be an expression */
1913 return TRUE;
1914
1915 Zero(seen,256,char);
1916 if (*s == '$')
1917 weight -= 3;
1918 else if (isDIGIT(*s)) {
1919 if (s[1] != ']') {
1920 if (isDIGIT(s[1]) && s[2] == ']')
1921 weight -= 10;
1922 }
1923 else
1924 weight -= 100;
1925 }
1926 for (; s < send; s++) {
1927 last_un_char = un_char;
1928 un_char = (unsigned char)*s;
1929 switch (*s) {
1930 case '@':
1931 case '&':
1932 case '$':
1933 weight -= seen[un_char] * 10;
7e2040f0 1934 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1935 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1936 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1937 weight -= 100;
1938 else
1939 weight -= 10;
1940 }
1941 else if (*s == '$' && s[1] &&
93a17b20
LW
1942 strchr("[#!%*<>()-=",s[1])) {
1943 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1944 weight -= 10;
1945 else
1946 weight -= 1;
1947 }
1948 break;
1949 case '\\':
1950 un_char = 254;
1951 if (s[1]) {
93a17b20 1952 if (strchr("wds]",s[1]))
79072805
LW
1953 weight += 100;
1954 else if (seen['\''] || seen['"'])
1955 weight += 1;
93a17b20 1956 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1957 weight += 40;
1958 else if (isDIGIT(s[1])) {
1959 weight += 40;
1960 while (s[1] && isDIGIT(s[1]))
1961 s++;
1962 }
1963 }
1964 else
1965 weight += 100;
1966 break;
1967 case '-':
1968 if (s[1] == '\\')
1969 weight += 50;
93a17b20 1970 if (strchr("aA01! ",last_un_char))
79072805 1971 weight += 30;
93a17b20 1972 if (strchr("zZ79~",s[1]))
79072805 1973 weight += 30;
f27ffc4a
GS
1974 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1975 weight -= 5; /* cope with negative subscript */
79072805
LW
1976 break;
1977 default:
3792a11b
NC
1978 if (!isALNUM(last_un_char)
1979 && !(last_un_char == '$' || last_un_char == '@'
1980 || last_un_char == '&')
1981 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
1982 char *d = tmpbuf;
1983 while (isALPHA(*s))
1984 *d++ = *s++;
1985 *d = '\0';
1986 if (keyword(tmpbuf, d - tmpbuf))
1987 weight -= 150;
1988 }
1989 if (un_char == last_un_char + 1)
1990 weight += 5;
1991 weight -= seen[un_char];
1992 break;
1993 }
1994 seen[un_char]++;
1995 }
1996 if (weight >= 0) /* probably a character class */
1997 return FALSE;
1998 }
1999
2000 return TRUE;
2001}
ffed7fef 2002
ffb4593c
NT
2003/*
2004 * S_intuit_method
2005 *
2006 * Does all the checking to disambiguate
2007 * foo bar
2008 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2009 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2010 *
2011 * First argument is the stuff after the first token, e.g. "bar".
2012 *
2013 * Not a method if bar is a filehandle.
2014 * Not a method if foo is a subroutine prototyped to take a filehandle.
2015 * Not a method if it's really "Foo $bar"
2016 * Method if it's "foo $bar"
2017 * Not a method if it's really "print foo $bar"
2018 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2019 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2020 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2021 * =>
2022 */
2023
76e3520e 2024STATIC int
cea2e8a9 2025S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2026{
2027 char *s = start + (*start == '$');
3280af22 2028 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2029 STRLEN len;
2030 GV* indirgv;
2031
2032 if (gv) {
b6c543e3 2033 CV *cv;
a0d0e21e
LW
2034 if (GvIO(gv))
2035 return 0;
b6c543e3
IZ
2036 if ((cv = GvCVu(gv))) {
2037 char *proto = SvPVX(cv);
2038 if (proto) {
2039 if (*proto == ';')
2040 proto++;
2041 if (*proto == '*')
2042 return 0;
2043 }
2044 } else
a0d0e21e
LW
2045 gv = 0;
2046 }
8903cb82 2047 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2048 /* start is the beginning of the possible filehandle/object,
2049 * and s is the end of it
2050 * tmpbuf is a copy of it
2051 */
2052
a0d0e21e 2053 if (*start == '$') {
3280af22 2054 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2055 return 0;
2056 s = skipspace(s);
3280af22
NIS
2057 PL_bufptr = start;
2058 PL_expect = XREF;
a0d0e21e
LW
2059 return *s == '(' ? FUNCMETH : METHOD;
2060 }
2061 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2062 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2063 len -= 2;
2064 tmpbuf[len] = '\0';
2065 goto bare_package;
2066 }
2067 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2068 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2069 return 0;
2070 /* filehandle or package name makes it a method */
89bfa8cd 2071 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2072 s = skipspace(s);
3280af22 2073 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2074 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2075 bare_package:
3280af22 2076 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2077 newSVpvn(tmpbuf,len));
3280af22
NIS
2078 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2079 PL_expect = XTERM;
a0d0e21e 2080 force_next(WORD);
3280af22 2081 PL_bufptr = s;
a0d0e21e
LW
2082 return *s == '(' ? FUNCMETH : METHOD;
2083 }
2084 }
2085 return 0;
2086}
2087
ffb4593c
NT
2088/*
2089 * S_incl_perldb
2090 * Return a string of Perl code to load the debugger. If PERL5DB
2091 * is set, it will return the contents of that, otherwise a
2092 * compile-time require of perl5db.pl.
2093 */
2094
76e3520e 2095STATIC char*
cea2e8a9 2096S_incl_perldb(pTHX)
a0d0e21e 2097{
3280af22 2098 if (PL_perldb) {
76e3520e 2099 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2100
2101 if (pdb)
2102 return pdb;
93189314 2103 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2104 return "BEGIN { require 'perl5db.pl' }";
2105 }
2106 return "";
2107}
2108
2109
16d20bd9 2110/* Encoded script support. filter_add() effectively inserts a
4e553d73 2111 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2112 * Note that the filter function only applies to the current source file
2113 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2114 *
2115 * The datasv parameter (which may be NULL) can be used to pass
2116 * private data to this instance of the filter. The filter function
2117 * can recover the SV using the FILTER_DATA macro and use it to
2118 * store private buffers and state information.
2119 *
2120 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2121 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2122 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2123 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2124 * private use must be set using malloc'd pointers.
2125 */
16d20bd9
AD
2126
2127SV *
864dbfa3 2128Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2129{
f4c556ac
GS
2130 if (!funcp)
2131 return Nullsv;
2132
3280af22
NIS
2133 if (!PL_rsfp_filters)
2134 PL_rsfp_filters = newAV();
16d20bd9 2135 if (!datasv)
8c52afec 2136 datasv = NEWSV(255,0);
16d20bd9 2137 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2138 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2139 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2140 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2141 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2142 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2143 av_unshift(PL_rsfp_filters, 1);
2144 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2145 return(datasv);
2146}
4e553d73 2147
16d20bd9
AD
2148
2149/* Delete most recently added instance of this filter function. */
a0d0e21e 2150void
864dbfa3 2151Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2152{
e0c19803 2153 SV *datasv;
fe5a182c 2154 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2155 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2156 return;
2157 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2158 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2159 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2160 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2161 IoANY(datasv) = (void *)NULL;
3280af22 2162 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2163
16d20bd9
AD
2164 return;
2165 }
2166 /* we need to search for the correct entry and clear it */
cea2e8a9 2167 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2168}
2169
2170
1de9afcd
RGS
2171/* Invoke the idxth filter function for the current rsfp. */
2172/* maxlen 0 = read one text line */
16d20bd9 2173I32
864dbfa3 2174Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2175{
16d20bd9
AD
2176 filter_t funcp;
2177 SV *datasv = NULL;
e50aee73 2178
3280af22 2179 if (!PL_rsfp_filters)
16d20bd9 2180 return -1;
1de9afcd 2181 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2182 /* Provide a default input filter to make life easy. */
2183 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2184 DEBUG_P(PerlIO_printf(Perl_debug_log,
2185 "filter_read %d: from rsfp\n", idx));
4e553d73 2186 if (maxlen) {
16d20bd9
AD
2187 /* Want a block */
2188 int len ;
2189 int old_len = SvCUR(buf_sv) ;
2190
2191 /* ensure buf_sv is large enough */
eb160463 2192 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2193 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2194 if (PerlIO_error(PL_rsfp))
37120919
AD
2195 return -1; /* error */
2196 else
2197 return 0 ; /* end of file */
2198 }
16d20bd9
AD
2199 SvCUR_set(buf_sv, old_len + len) ;
2200 } else {
2201 /* Want a line */
3280af22
NIS
2202 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2203 if (PerlIO_error(PL_rsfp))
37120919
AD
2204 return -1; /* error */
2205 else
2206 return 0 ; /* end of file */
2207 }
16d20bd9
AD
2208 }
2209 return SvCUR(buf_sv);
2210 }
2211 /* Skip this filter slot if filter has been deleted */
1de9afcd 2212 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2213 DEBUG_P(PerlIO_printf(Perl_debug_log,
2214 "filter_read %d: skipped (filter deleted)\n",
2215 idx));
16d20bd9
AD
2216 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2217 }
2218 /* Get function pointer hidden within datasv */
4755096e 2219 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2220 DEBUG_P(PerlIO_printf(Perl_debug_log,
2221 "filter_read %d: via function %p (%s)\n",
fe5a182c 2222 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2223 /* Call function. The function is expected to */
2224 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2225 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2226 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2227}
2228
76e3520e 2229STATIC char *
cea2e8a9 2230S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2231{
c39cd008 2232#ifdef PERL_CR_FILTER
3280af22 2233 if (!PL_rsfp_filters) {
c39cd008 2234 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2235 }
2236#endif
3280af22 2237 if (PL_rsfp_filters) {
55497cff
PP
2238 if (!append)
2239 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2240 if (FILTER_READ(0, sv, 0) > 0)
2241 return ( SvPVX(sv) ) ;
2242 else
2243 return Nullch ;
2244 }
9d116dd7 2245 else
fd049845 2246 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2247}
2248
01ec43d0
GS
2249STATIC HV *
2250S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2251{
2252 GV *gv;
2253
01ec43d0 2254 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2255 return PL_curstash;
2256
2257 if (len > 2 &&
2258 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2259 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2260 {
2261 return GvHV(gv); /* Foo:: */
def3634b
GS
2262 }
2263
2264 /* use constant CLASS => 'MyClass' */
2265 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2266 SV *sv;
2267 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2268 pkgname = SvPV_nolen(sv);
2269 }
2270 }
2271
2272 return gv_stashpv(pkgname, FALSE);
2273}
a0d0e21e 2274
748a9306
LW
2275#ifdef DEBUGGING
2276 static char* exp_name[] =
09bef843 2277 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2278 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2279 };
748a9306 2280#endif
463ee0b2 2281
02aa26ce
NT
2282/*
2283 yylex
2284
2285 Works out what to call the token just pulled out of the input
2286 stream. The yacc parser takes care of taking the ops we return and
2287 stitching them into a tree.
2288
2289 Returns:
2290 PRIVATEREF
2291
2292 Structure:
2293 if read an identifier
2294 if we're in a my declaration
2295 croak if they tried to say my($foo::bar)
2296 build the ops for a my() declaration
2297 if it's an access to a my() variable
2298 are we in a sort block?
2299 croak if my($a); $a <=> $b
2300 build ops for access to a my() variable
2301 if in a dq string, and they've said @foo and we can't find @foo
2302 croak
2303 build ops for a bareword
2304 if we already built the token before, use it.
2305*/
2306
20141f0e 2307
dba4d153
JH
2308#ifdef __SC__
2309#pragma segment Perl_yylex
2310#endif
dba4d153 2311int
dba4d153 2312Perl_yylex(pTHX)
20141f0e 2313{
3afc138a 2314 register char *s = PL_bufptr;
378cc40b 2315 register char *d;
79072805 2316 register I32 tmp;
463ee0b2 2317 STRLEN len;
161b471a
NIS
2318 GV *gv = Nullgv;
2319 GV **gvp = 0;
aa7440fb 2320 bool bof = FALSE;
1d239bbb 2321 I32 orig_keyword = 0;
a687059c 2322
bbf60fe6
DM
2323 DEBUG_T( {
2324 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2325 lex_state_names[PL_lex_state]);
2326 } );
02aa26ce 2327 /* check if there's an identifier for us to look at */
ba979b31 2328 if (PL_pending_ident)
bbf60fe6 2329 return REPORT(S_pending_ident(aTHX));
bbce6d69 2330
02aa26ce
NT
2331 /* no identifier pending identification */
2332
3280af22 2333 switch (PL_lex_state) {
79072805
LW
2334#ifdef COMMENTARY
2335 case LEX_NORMAL: /* Some compilers will produce faster */
2336 case LEX_INTERPNORMAL: /* code if we comment these out. */
2337 break;
2338#endif
2339
09bef843 2340 /* when we've already built the next token, just pull it out of the queue */
79072805 2341 case LEX_KNOWNEXT:
3280af22
NIS
2342 PL_nexttoke--;
2343 yylval = PL_nextval[PL_nexttoke];
2344 if (!PL_nexttoke) {
2345 PL_lex_state = PL_lex_defer;
2346 PL_expect = PL_lex_expect;
2347 PL_lex_defer = LEX_NORMAL;
463ee0b2 2348 }
607df283 2349 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2350 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2351 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2352
bbf60fe6 2353 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2354
02aa26ce 2355 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2356 when we get here, PL_bufptr is at the \
02aa26ce 2357 */
79072805
LW
2358 case LEX_INTERPCASEMOD:
2359#ifdef DEBUGGING
3280af22 2360 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2361 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2362#endif
02aa26ce 2363 /* handle \E or end of string */
3280af22 2364 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2365 char oldmod;
02aa26ce
NT
2366
2367 /* if at a \E */
3280af22
NIS
2368 if (PL_lex_casemods) {
2369 oldmod = PL_lex_casestack[--PL_lex_casemods];
2370 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2371
3792a11b
NC
2372 if (PL_bufptr != PL_bufend
2373 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2374 PL_bufptr += 2;
2375 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2376 }
bbf60fe6 2377 return REPORT(')');
79072805 2378 }
3280af22
NIS
2379 if (PL_bufptr != PL_bufend)
2380 PL_bufptr += 2;
2381 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2382 return yylex();
79072805
LW
2383 }
2384 else {
607df283 2385 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2386 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2387 s = PL_bufptr + 1;
6e909404
JH
2388 if (s[1] == '\\' && s[2] == 'E') {
2389 PL_bufptr = s + 3;
2390 PL_lex_state = LEX_INTERPCONCAT;
2391 return yylex();
a0d0e21e 2392 }
6e909404
JH
2393 else {
2394 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2395 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2396 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2397 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2398 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2399 return REPORT(')');
6e909404
JH
2400 }
2401 if (PL_lex_casemods > 10)
2402 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2403 PL_lex_casestack[PL_lex_casemods++] = *s;
2404 PL_lex_casestack[PL_lex_casemods] = '\0';
2405 PL_lex_state = LEX_INTERPCONCAT;
2406 PL_nextval[PL_nexttoke].ival = 0;
2407 force_next('(');
2408 if (*s == 'l')
2409 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2410 else if (*s == 'u')
2411 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2412 else if (*s == 'L')
2413 PL_nextval[PL_nexttoke].ival = OP_LC;
2414 else if (*s == 'U')
2415 PL_nextval[PL_nexttoke].ival = OP_UC;
2416 else if (*s == 'Q')
2417 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2418 else
2419 Perl_croak(aTHX_ "panic: yylex");
2420 PL_bufptr = s + 1;
a0d0e21e 2421 }
79072805 2422 force_next(FUNC);
3280af22
NIS
2423 if (PL_lex_starts) {
2424 s = PL_bufptr;
2425 PL_lex_starts = 0;
131b3ad0
DM
2426 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2427 if (PL_lex_casemods == 1 && PL_lex_inpat)
2428 OPERATOR(',');
2429 else
2430 Aop(OP_CONCAT);
79072805
LW
2431 }
2432 else
cea2e8a9 2433 return yylex();
79072805
LW
2434 }
2435
55497cff 2436 case LEX_INTERPPUSH:
bbf60fe6 2437 return REPORT(sublex_push());
55497cff 2438
79072805 2439 case LEX_INTERPSTART:
3280af22 2440 if (PL_bufptr == PL_bufend)
bbf60fe6 2441 return REPORT(sublex_done());
607df283 2442 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2443 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2444 PL_expect = XTERM;
2445 PL_lex_dojoin = (*PL_bufptr == '@');
2446 PL_lex_state = LEX_INTERPNORMAL;
2447 if (PL_lex_dojoin) {
2448 PL_nextval[PL_nexttoke].ival = 0;
79072805 2449 force_next(',');
a0d0e21e 2450 force_ident("\"", '$');
3280af22 2451 PL_nextval[PL_nexttoke].ival = 0;
79072805 2452 force_next('$');
3280af22 2453 PL_nextval[PL_nexttoke].ival = 0;
79072805 2454 force_next('(');
3280af22 2455 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2456 force_next(FUNC);
2457 }
3280af22
NIS
2458 if (PL_lex_starts++) {
2459 s = PL_bufptr;
131b3ad0
DM
2460 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2461 if (!PL_lex_casemods && PL_lex_inpat)
2462 OPERATOR(',');
2463 else
2464 Aop(OP_CONCAT);
79072805 2465 }
cea2e8a9 2466 return yylex();
79072805
LW
2467
2468 case LEX_INTERPENDMAYBE:
3280af22
NIS
2469 if (intuit_more(PL_bufptr)) {
2470 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2471 break;
2472 }
2473 /* FALL THROUGH */
2474
2475 case LEX_INTERPEND:
3280af22
NIS
2476 if (PL_lex_dojoin) {
2477 PL_lex_dojoin = FALSE;
2478 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2479 return REPORT(')');
79072805 2480 }
43a16006 2481 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2482 && SvEVALED(PL_lex_repl))
43a16006 2483 {
e9fa98b2 2484 if (PL_bufptr != PL_bufend)
cea2e8a9 2485 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2486 PL_lex_repl = Nullsv;
2487 }
79072805
LW
2488 /* FALLTHROUGH */
2489 case LEX_INTERPCONCAT:
2490#ifdef DEBUGGING
3280af22 2491 if (PL_lex_brackets)
cea2e8a9 2492 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2493#endif
3280af22 2494 if (PL_bufptr == PL_bufend)
bbf60fe6 2495 return REPORT(sublex_done());
79072805 2496
3280af22
NIS
2497 if (SvIVX(PL_linestr) == '\'') {
2498 SV *sv = newSVsv(PL_linestr);
2499 if (!PL_lex_inpat)
76e3520e 2500 sv = tokeq(sv);
3280af22 2501 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2502 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2503 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2504 s = PL_bufend;
79072805
LW
2505 }
2506 else {
3280af22 2507 s = scan_const(PL_bufptr);
79072805 2508 if (*s == '\\')
3280af22 2509 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2510 else
3280af22 2511 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2512 }
2513
3280af22
NIS
2514 if (s != PL_bufptr) {
2515 PL_nextval[PL_nexttoke] = yylval;
2516 PL_expect = XTERM;
79072805 2517 force_next(THING);
131b3ad0
DM
2518 if (PL_lex_starts++) {
2519 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2520 if (!PL_lex_casemods && PL_lex_inpat)
2521 OPERATOR(',');
2522 else
2523 Aop(OP_CONCAT);
2524 }
79072805 2525 else {
3280af22 2526 PL_bufptr = s;
cea2e8a9 2527 return yylex();
79072805
LW
2528 }
2529 }
2530
cea2e8a9 2531 return yylex();
a0d0e21e 2532 case LEX_FORMLINE:
3280af22
NIS
2533 PL_lex_state = LEX_NORMAL;
2534 s = scan_formline(PL_bufptr);
2535 if (!PL_lex_formbrack)
a0d0e21e
LW
2536 goto rightbracket;
2537 OPERATOR(';');
79072805
LW
2538 }
2539
3280af22
NIS
2540 s = PL_bufptr;
2541 PL_oldoldbufptr = PL_oldbufptr;
2542 PL_oldbufptr = s;
607df283 2543 DEBUG_T( {
bbf60fe6 2544 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
bf49b057 2545 exp_name[PL_expect], s);
5f80b19c 2546 } );
463ee0b2
LW
2547
2548 retry:
378cc40b
LW
2549 switch (*s) {
2550 default:
7e2040f0 2551 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2552 goto keylookup;
cea2e8a9 2553 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2554 case 4:
2555 case 26:
2556 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2557 case 0:
3280af22
NIS
2558 if (!PL_rsfp) {
2559 PL_last_uni = 0;
2560 PL_last_lop = 0;
c5ee2135
LW
2561 if (PL_lex_brackets) {
2562 if (PL_lex_formbrack)
2563 yyerror("Format not terminated");
2564 else
2565 yyerror("Missing right curly or square bracket");
2566 }
4e553d73 2567 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2568 "### Tokener got EOF\n");
5f80b19c 2569 } );
79072805 2570 TOKEN(0);
463ee0b2 2571 }
3280af22 2572 if (s++ < PL_bufend)
a687059c 2573 goto retry; /* ignore stray nulls */
3280af22
NIS
2574 PL_last_uni = 0;
2575 PL_last_lop = 0;
2576 if (!PL_in_eval && !PL_preambled) {
2577 PL_preambled = TRUE;
2578 sv_setpv(PL_linestr,incl_perldb());
2579 if (SvCUR(PL_linestr))
4147a61b 2580 sv_catpvn(PL_linestr,";", 1);
3280af22
NIS
2581 if (PL_preambleav){
2582 while(AvFILLp(PL_preambleav) >= 0) {
2583 SV *tmpsv = av_shift(PL_preambleav);
2584 sv_catsv(PL_linestr, tmpsv);
4147a61b 2585 sv_catpvn(PL_linestr, ";", 1);
91b7def8
PP
2586 sv_free(tmpsv);
2587 }
3280af22
NIS
2588 sv_free((SV*)PL_preambleav);
2589 PL_preambleav = NULL;
91b7def8 2590 }
3280af22
NIS
2591 if (PL_minus_n || PL_minus_p) {
2592 sv_catpv(PL_linestr, "LINE: while (<>) {");
2593 if (PL_minus_l)
2594 sv_catpv(PL_linestr,"chomp;");
2595 if (PL_minus_a) {
3280af22 2596 if (PL_minus_F) {
3792a11b
NC
2597 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2598 || *PL_splitstr == '"')
3280af22 2599 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2600 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2601 else {
c8ef6a4b
NC
2602 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2603 bytes can be used as quoting characters. :-) */
48c4c863
NC
2604 /* The count here deliberately includes the NUL
2605 that terminates the C string constant. This
2606 embeds the opening NUL into the string. */
9a3fb652 2607 sv_catpvn(PL_linestr, "our @F=split(q", 15);
48c4c863
NC
2608 s = PL_splitstr;
2609 do {
2610 /* Need to \ \s */
2611 if (*s == '\\')
2612 sv_catpvn(PL_linestr, s, 1);
2613 sv_catpvn(PL_linestr, s, 1);
2614 } while (*s++);
2615 /* This loop will embed the trailing NUL of
2616 PL_linestr as the last thing it does before
2617 terminating. */
4fccd7c6 2618 sv_catpvn(PL_linestr, ");", 2);
54310121 2619 }
2304df62
AD
2620 }
2621 else
75c72d73 2622 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2623 }
79072805 2624 }
4147a61b 2625 sv_catpvn(PL_linestr, "\n", 1);
3280af22
NIS
2626 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2627 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2628 PL_last_lop = PL_last_uni = Nullch;
3280af22 2629 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2630 SV *sv = NEWSV(85,0);
2631
2632 sv_upgrade(sv, SVt_PVMG);
3280af22 2633 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2634 (void)SvIOK_on(sv);
2635 SvIVX(sv) = 0;
57843af0 2636 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2637 }
79072805 2638 goto retry;
a687059c 2639 }
e929a76b 2640 do {
aa7440fb 2641 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2642 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2643 fake_eof:
2644 if (PL_rsfp) {
2645 if (PL_preprocess && !PL_in_eval)
2646 (void)PerlProc_pclose(PL_rsfp);
2647 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2648 PerlIO_clearerr(PL_rsfp);
2649 else
2650 (void)PerlIO_close(PL_rsfp);
2651 PL_rsfp = Nullfp;
2652 PL_doextract = FALSE;
2653 }
2654 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2655 sv_setpv(PL_linestr,PL_minus_p
2656 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2657 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2658 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2659 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2660 PL_minus_n = PL_minus_p = 0;
2661 goto retry;
2662 }
2663 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2664 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2665 sv_setpv(PL_linestr,"");
2666 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2667 }
7aa207d6
JH
2668 /* If it looks like the start of a BOM or raw UTF-16,
2669 * check if it in fact is. */
2670 else if (bof &&
2671 (*s == 0 ||
2672 *(U8*)s == 0xEF ||
2673 *(U8*)s >= 0xFE ||
2674 s[1] == 0)) {
226017aa 2675#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2676# ifdef __GNU_LIBRARY__
2677# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2678# define FTELL_FOR_PIPE_IS_BROKEN
2679# endif
e3f494f1
JH
2680# else
2681# ifdef __GLIBC__
2682# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2683# define FTELL_FOR_PIPE_IS_BROKEN
2684# endif
2685# endif
226017aa
DD
2686# endif
2687#endif
2688#ifdef FTELL_FOR_PIPE_IS_BROKEN
2689 /* This loses the possibility to detect the bof
2690 * situation on perl -P when the libc5 is being used.
2691 * Workaround? Maybe attach some extra state to PL_rsfp?
2692 */
2693 if (!PL_preprocess)
7e28d3af 2694 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2695#else
eb160463 2696 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2697#endif
7e28d3af 2698 if (bof) {
3280af22 2699 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2700 s = swallow_bom((U8*)s);
e929a76b 2701 }
378cc40b 2702 }
3280af22 2703 if (PL_doextract) {
a0d0e21e
LW
2704 /* Incest with pod. */
2705 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2706 sv_setpv(PL_linestr, "");
2707 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2708 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2709 PL_last_lop = PL_last_uni = Nullch;
3280af22 2710 PL_doextract = FALSE;
a0d0e21e 2711 }
4e553d73 2712 }
463ee0b2 2713 incline(s);
3280af22
NIS
2714 } while (PL_doextract);
2715 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2716 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2717 SV *sv = NEWSV(85,0);
a687059c 2718
93a17b20 2719 sv_upgrade(sv, SVt_PVMG);
3280af22 2720 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2721 (void)SvIOK_on(sv);
2722 SvIVX(sv) = 0;
57843af0 2723 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2724 }
3280af22 2725 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2726 PL_last_lop = PL_last_uni = Nullch;
57843af0 2727 if (CopLINE(PL_curcop) == 1) {
3280af22 2728 while (s < PL_bufend && isSPACE(*s))
79072805 2729 s++;
a0d0e21e 2730 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2731 s++;
44a8e56a 2732 d = Nullch;
3280af22 2733 if (!PL_in_eval) {
44a8e56a
PP
2734 if (*s == '#' && *(s+1) == '!')
2735 d = s + 2;
2736#ifdef ALTERNATE_SHEBANG
2737 else {
2738 static char as[] = ALTERNATE_SHEBANG;
2739 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2740 d = s + (sizeof(as) - 1);
2741 }
2742#endif /* ALTERNATE_SHEBANG */
2743 }
2744 if (d) {
b8378b72 2745 char *ipath;
774d564b 2746 char *ipathend;
b8378b72 2747
774d564b 2748 while (isSPACE(*d))
b8378b72
CS
2749 d++;
2750 ipath = d;
774d564b
PP
2751 while (*d && !isSPACE(*d))
2752 d++;
2753 ipathend = d;
2754
2755#ifdef ARG_ZERO_IS_SCRIPT
2756 if (ipathend > ipath) {
2757 /*
2758 * HP-UX (at least) sets argv[0] to the script name,
2759 * which makes $^X incorrect. And Digital UNIX and Linux,
2760 * at least, set argv[0] to the basename of the Perl
2761 * interpreter. So, having found "#!", we'll set it right.
2762 */
ee2f7564 2763 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2764 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2765 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2766 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2767 SvSETMAGIC(x);
2768 }
556c1dec
JH
2769 else {
2770 STRLEN blen;
2771 STRLEN llen;
2772 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2773 char *lstart = SvPV(x,llen);
2774 if (llen < blen) {
2775 bstart += blen - llen;
2776 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2777 sv_setpvn(x, ipath, ipathend - ipath);
2778 SvSETMAGIC(x);
2779 }
2780 }
2781 }
774d564b 2782 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2783 }
774d564b 2784#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2785
2786 /*
2787 * Look for options.
2788 */
748a9306 2789 d = instr(s,"perl -");
84e30d1a 2790 if (!d) {
748a9306 2791 d = instr(s,"perl");
84e30d1a
GS
2792#if defined(DOSISH)
2793 /* avoid getting into infinite loops when shebang
2794 * line contains "Perl" rather than "perl" */
2795 if (!d) {
2796 for (d = ipathend-4; d >= ipath; --d) {
2797 if ((*d == 'p' || *d == 'P')
2798 && !ibcmp(d, "perl", 4))
2799 {
2800 break;
2801 }
2802 }
2803 if (d < ipath)
2804 d = Nullch;
2805 }
2806#endif
2807 }
44a8e56a
PP
2808#ifdef ALTERNATE_SHEBANG
2809 /*
2810 * If the ALTERNATE_SHEBANG on this system starts with a
2811 * character that can be part of a Perl expression, then if
2812 * we see it but not "perl", we're probably looking at the
2813 * start of Perl code, not a request to hand off to some
2814 * other interpreter. Similarly, if "perl" is there, but
2815 * not in the first 'word' of the line, we assume the line
2816 * contains the start of the Perl program.
44a8e56a
PP
2817 */
2818 if (d && *s != '#') {
774d564b 2819 char *c = ipath;
44a8e56a
PP
2820 while (*c && !strchr("; \t\r\n\f\v#", *c))
2821 c++;
2822 if (c < d)
2823 d = Nullch; /* "perl" not in first word; ignore */
2824 else
2825 *s = '#'; /* Don't try to parse shebang line */
2826 }
774d564b 2827#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2828#ifndef MACOS_TRADITIONAL
748a9306 2829 if (!d &&
44a8e56a 2830 *s == '#' &&
774d564b 2831 ipathend > ipath &&
3280af22 2832 !PL_minus_c &&
748a9306 2833 !instr(s,"indir") &&
3280af22 2834 instr(PL_origargv[0],"perl"))
748a9306 2835 {
9f68db38 2836 char **newargv;
9f68db38 2837
774d564b
PP
2838 *ipathend = '\0';
2839 s = ipathend + 1;
3280af22 2840 while (s < PL_bufend && isSPACE(*s))
9f68db38 2841 s++;
3280af22
NIS
2842 if (s < PL_bufend) {
2843 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2844 newargv[1] = s;
3280af22 2845 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2846 s++;
2847 *s = '\0';
3280af22 2848 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2849 }
2850 else
3280af22 2851 newargv = PL_origargv;
774d564b 2852 newargv[0] = ipath;
b35112e7 2853 PERL_FPU_PRE_EXEC
b4748376 2854 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2855 PERL_FPU_POST_EXEC
cea2e8a9 2856 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2857 }
bf4acbe4 2858#endif
748a9306 2859 if (d) {
3280af22
NIS
2860 U32 oldpdb = PL_perldb;
2861 bool oldn = PL_minus_n;
2862 bool oldp = PL_minus_p;
748a9306
LW
2863
2864 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2865 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2866
2867 if (*d++ == '-') {
a11ec5a9 2868 bool switches_done = PL_doswitches;
8cc95fdb
PP
2869 do {
2870 if (*d == 'M' || *d == 'm') {
2871 char *m = d;
2872 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2873 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2874 (int)(d - m), m);
2875 }
2876 d = moreswitches(d);
2877 } while (d);
f0b2cf55
YST
2878 if (PL_doswitches && !switches_done) {
2879 int argc = PL_origargc;
2880 char **argv = PL_origargv;
2881 do {
2882 argc--,argv++;
2883 } while (argc && argv[0][0] == '-' && argv[0][1]);
2884 init_argv_symbols(argc,argv);
2885 }
155aba94
GS
2886 if ((PERLDB_LINE && !oldpdb) ||
2887 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
2888 /* if we have already added "LINE: while (<>) {",
2889 we must not do it again */
748a9306 2890 {
3280af22
NIS
2891 sv_setpv(PL_linestr, "");
2892 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2893 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2894 PL_last_lop = PL_last_uni = Nullch;
3280af22 2895 PL_preambled = FALSE;
84902520 2896 if (PERLDB_LINE)
3280af22 2897 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2898 goto retry;
2899 }
a11ec5a9
RGS
2900 if (PL_doswitches && !switches_done) {
2901 int argc = PL_origargc;
2902 char **argv = PL_origargv;
2903 do {
2904 argc--,argv++;
2905 } while (argc && argv[0][0] == '-' && argv[0][1]);
2906 init_argv_symbols(argc,argv);
2907 }
a0d0e21e 2908 }
79072805 2909 }
9f68db38 2910 }
79072805 2911 }
3280af22
NIS
2912 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2913 PL_bufptr = s;
2914 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2915 return yylex();
ae986130 2916 }
378cc40b 2917 goto retry;
4fdae800 2918 case '\r':
6a27c188 2919#ifdef PERL_STRICT_CR
cea2e8a9 2920 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2921 Perl_croak(aTHX_
cc507455 2922 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2923#endif
4fdae800 2924 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2925#ifdef MACOS_TRADITIONAL
2926 case '\312':
2927#endif
378cc40b
LW
2928 s++;
2929 goto retry;
378cc40b 2930 case '#':
e929a76b 2931 case '\n':
3280af22 2932 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2933 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2934 /* handle eval qq[#line 1 "foo"\n ...] */
2935 CopLINE_dec(PL_curcop);
2936 incline(s);
2937 }
3280af22 2938 d = PL_bufend;
a687059c 2939 while (s < d && *s != '\n')
378cc40b 2940 s++;
0f85fab0 2941 if (s < d)
378cc40b 2942 s++;
78c267c1 2943 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2944 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2945 incline(s);
3280af22
NIS
2946 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2947 PL_bufptr = s;
2948 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2949 return yylex();
a687059c 2950 }
378cc40b 2951 }
a687059c 2952 else {
378cc40b 2953 *s = '\0';
3280af22 2954 PL_bufend = s;
a687059c 2955 }
378cc40b
LW
2956 goto retry;
2957 case '-':
79072805 2958 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2959 I32 ftst = 0;
2960
378cc40b 2961 s++;
3280af22 2962 PL_bufptr = s;
748a9306
LW
2963 tmp = *s++;
2964
bf4acbe4 2965 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2966 s++;
2967
2968 if (strnEQ(s,"=>",2)) {
3280af22 2969 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2970 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2971 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2972 } );
748a9306
LW
2973 OPERATOR('-'); /* unary minus */
2974 }
3280af22 2975 PL_last_uni = PL_oldbufptr;
748a9306 2976 switch (tmp) {
e5edeb50
JH
2977 case 'r': ftst = OP_FTEREAD; break;
2978 case 'w': ftst = OP_FTEWRITE; break;
2979 case 'x': ftst = OP_FTEEXEC; break;
2980 case 'o': ftst = OP_FTEOWNED; break;
2981 case 'R': ftst = OP_FTRREAD; break;
2982 case 'W': ftst = OP_FTRWRITE; break;
2983 case 'X': ftst = OP_FTREXEC; break;
2984 case 'O': ftst = OP_FTROWNED; break;
2985 case 'e': ftst = OP_FTIS; break;
2986 case 'z': ftst = OP_FTZERO; break;
2987 case 's': ftst = OP_FTSIZE; break;
2988 case 'f': ftst = OP_FTFILE; break;
2989 case 'd': ftst = OP_FTDIR; break;
2990 case 'l': ftst = OP_FTLINK; break;
2991 case 'p': ftst = OP_FTPIPE; break;
2992 case 'S': ftst = OP_FTSOCK; break;
2993 case 'u': ftst = OP_FTSUID; break;
2994 case 'g': ftst = OP_FTSGID; break;
2995 case 'k': ftst = OP_FTSVTX; break;
2996 case 'b': ftst = OP_FTBLK; break;
2997 case 'c': ftst = OP_FTCHR; break;
2998 case 't': ftst = OP_FTTTY; break;
2999 case 'T': ftst = OP_FTTEXT; break;
3000 case 'B': ftst = OP_FTBINARY; break;
3001 case 'M': case 'A': case 'C':
3002 gv_fetchpv("\024",TRUE, SVt_PV);
3003 switch (tmp) {
3004 case 'M': ftst = OP_FTMTIME; break;
3005 case 'A': ftst = OP_FTATIME; break;
3006 case 'C': ftst = OP_FTCTIME; break;
3007 default: break;
3008 }
3009 break;
378cc40b 3010 default:
378cc40b
LW
3011 break;
3012 }
e5edeb50 3013 if (ftst) {
eb160463 3014 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3015 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 3016 "### Saw file test %c\n", (int)ftst);
5f80b19c 3017 } );
e5edeb50
JH
3018 FTST(ftst);
3019 }
3020 else {
3021 /* Assume it was a minus followed by a one-letter named
3022 * subroutine call (or a -bareword), then. */
95c31fe3 3023 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3024 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3025 (int) tmp);
5f80b19c 3026 } );
3cf7b4c4 3027 s = --PL_bufptr;
e5edeb50 3028 }
378cc40b 3029 }
a687059c
LW
3030 tmp = *s++;
3031 if (*s == tmp) {
3032 s++;
3280af22 3033 if (PL_expect == XOPERATOR)
79072805
LW
3034 TERM(POSTDEC);
3035 else
3036 OPERATOR(PREDEC);
3037 }
3038 else if (*s == '>') {
3039 s++;
3040 s = skipspace(s);
7e2040f0 3041 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3042 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3043 TOKEN(ARROW);
79072805 3044 }
748a9306
LW
3045 else if (*s == '$')
3046 OPERATOR(ARROW);
463ee0b2 3047 else
748a9306 3048 TERM(ARROW);
a687059c 3049 }
3280af22 3050 if (PL_expect == XOPERATOR)
79072805
LW
3051 Aop(OP_SUBTRACT);
3052 else {
3280af22 3053 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3054 check_uni();
79072805 3055 OPERATOR('-'); /* unary minus */
2f3197b3 3056 }
79072805 3057
378cc40b 3058 case '+':
a687059c
LW
3059 tmp = *s++;
3060 if (*s == tmp) {
378cc40b 3061 s++;
3280af22 3062 if (PL_expect == XOPERATOR)
79072805
LW
3063 TERM(POSTINC);
3064 else
3065 OPERATOR(PREINC);
378cc40b 3066 }
3280af22 3067 if (PL_expect == XOPERATOR)
79072805
LW
3068 Aop(OP_ADD);
3069 else {
3280af22 3070 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3071 check_uni();
a687059c 3072 OPERATOR('+');
2f3197b3 3073 }
a687059c 3074
378cc40b 3075 case '*':
3280af22
NIS
3076 if (PL_expect != XOPERATOR) {
3077 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3078 PL_expect = XOPERATOR;
3079 force_ident(PL_tokenbuf, '*');
3080 if (!*PL_tokenbuf)
a0d0e21e 3081 PREREF('*');
79072805 3082 TERM('*');
a687059c 3083 }
79072805
LW
3084 s++;
3085 if (*s == '*') {
a687059c 3086 s++;
79072805 3087 PWop(OP_POW);
a687059c 3088 }
79072805
LW
3089 Mop(OP_MULTIPLY);
3090
378cc40b 3091 case '%':
3280af22 3092 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3093 ++s;
3094 Mop(OP_MODULO);
a687059c 3095 }
3280af22
NIS
3096 PL_tokenbuf[0] = '%';
3097 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3098 if (!PL_tokenbuf[1]) {
bbce6d69 3099 PREREF('%');
a687059c 3100 }
3280af22 3101 PL_pending_ident = '%';
bbce6d69 3102 TERM('%');
a687059c 3103
378cc40b 3104 case '^':
79072805 3105 s++;
a0d0e21e 3106 BOop(OP_BIT_XOR);
79072805 3107 case '[':
3280af22 3108 PL_lex_brackets++;
79072805 3109 /* FALL THROUGH */
378cc40b 3110 case '~':
378cc40b 3111 case ',':
378cc40b
LW
3112 tmp = *s++;
3113 OPERATOR(tmp);
a0d0e21e
LW
3114 case ':':
3115 if (s[1] == ':') {
3116 len = 0;
3117 goto just_a_word;
3118 }
3119 s++;
09bef843
SB
3120 switch (PL_expect) {
3121 OP *attrs;
3122 case XOPERATOR:
3123 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3124 break;
3125 PL_bufptr = s; /* update in case we back off */
3126 goto grabattrs;
3127 case XATTRBLOCK:
3128 PL_expect = XBLOCK;
3129 goto grabattrs;
3130 case XATTRTERM:
3131 PL_expect = XTERMBLOCK;
3132 grabattrs:
3133 s = skipspace(s);
3134 attrs = Nullop;
7e2040f0 3135 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3136 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3137 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3138 if (tmp < 0) tmp = -tmp;
3139 switch (tmp) {
3140 case KEY_or:
3141 case KEY_and:
c963b151 3142 case KEY_err:
f9829d6b
GS
3143 case KEY_for:
3144 case KEY_unless:
3145 case KEY_if:
3146 case KEY_while:
3147 case KEY_until:
3148 goto got_attrs;
3149 default:
3150 break;
3151 }
3152 }
09bef843
SB
3153 if (*d == '(') {
3154 d = scan_str(d,TRUE,TRUE);
3155 if (!d) {
09bef843
SB
3156 /* MUST advance bufptr here to avoid bogus
3157 "at end of line" context messages from yyerror().
3158 */
3159 PL_bufptr = s + len;
3160 yyerror("Unterminated attribute parameter in attribute list");
3161 if (attrs)
3162 op_free(attrs);
bbf60fe6 3163 return REPORT(0); /* EOF indicator */
09bef843
SB
3164 }
3165 }
3166 if (PL_lex_stuff) {
3167 SV *sv = newSVpvn(s, len);
3168 sv_catsv(sv, PL_lex_stuff);
3169 attrs = append_elem(OP_LIST, attrs,
3170 newSVOP(OP_CONST, 0, sv));
3171 SvREFCNT_dec(PL_lex_stuff);
3172 PL_lex_stuff = Nullsv;
3173 }
3174 else {
371fce9b
DM
3175 if (len == 6 && strnEQ(s, "unique", len)) {
3176 if (PL_in_my == KEY_our)
3177#ifdef USE_ITHREADS
3178 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3179#else
3180 ; /* skip to avoid loading attributes.pm */
3181#endif
3182 else
3183 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3184 }
3185
d3cea301
SB
3186 /* NOTE: any CV attrs applied here need to be part of
3187 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3188 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3189 CvLVALUE_on(PL_compcv);
3190 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3191 CvLOCKED_on(PL_compcv);
3192 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3193 CvMETHOD_on(PL_compcv);
06492da6
SF
3194 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3195 CvASSERTION_on(PL_compcv);
78f9721b
SM
3196 /* After we've set the flags, it could be argued that
3197 we don't need to do the attributes.pm-based setting
3198 process, and shouldn't bother appending recognized
d3cea301
SB
3199 flags. To experiment with that, uncomment the
3200 following "else". (Note that's already been
3201 uncommented. That keeps the above-applied built-in
3202 attributes from being intercepted (and possibly
3203 rejected) by a package's attribute routines, but is
3204 justified by the performance win for the common case
3205 of applying only built-in attributes.) */
0256094b 3206 else
78f9721b
SM
3207 attrs = append_elem(OP_LIST, attrs,
3208 newSVOP(OP_CONST, 0,
3209 newSVpvn(s, len)));
09bef843
SB
3210 }
3211 s = skipspace(d);
0120eecf 3212 if (*s == ':' && s[1] != ':')
09bef843 3213 s = skipspace(s+1);
0120eecf
GS
3214 else if (s == d)
3215 break; /* require real whitespace or :'s */
09bef843 3216 }
f9829d6b 3217 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3218 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3219 char q = ((*s == '\'') ? '"' : '\'');
3220 /* If here for an expression, and parsed no attrs, back off. */
3221 if (tmp == '=' && !attrs) {
3222 s = PL_bufptr;
3223 break;
3224 }
3225 /* MUST advance bufptr here to avoid bogus "at end of line"
3226 context messages from yyerror().
3227 */
3228 PL_bufptr = s;
3229 if (!*s)
3230 yyerror("Unterminated attribute list");
3231 else
3232 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3233 q, *s, q));
3234 if (attrs)
3235 op_free(attrs);
3236 OPERATOR(':');
3237 }
f9829d6b 3238 got_attrs:
09bef843
SB
3239 if (attrs) {
3240 PL_nextval[PL_nexttoke].opval = attrs;
3241 force_next(THING);
3242 }
3243 TOKEN(COLONATTR);
3244 }
a0d0e21e 3245 OPERATOR(':');
8990e307
LW
3246 case '(':
3247 s++;
3280af22
NIS
3248 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3249 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3250 else
3280af22 3251 PL_expect = XTERM;
4a202259 3252 s = skipspace(s);
a0d0e21e 3253 TOKEN('(');
378cc40b 3254 case ';':
f4dd75d9 3255 CLINE;
378cc40b
LW
3256 tmp = *s++;
3257 OPERATOR(tmp);
3258 case ')':
378cc40b 3259 tmp = *s++;
16d20bd9
AD
3260 s = skipspace(s);
3261 if (*s == '{')
3262 PREBLOCK(tmp);
378cc40b 3263 TERM(tmp);
79072805
LW
3264 case ']':
3265 s++;
3280af22 3266 if (PL_lex_brackets <= 0)
d98d5fff 3267 yyerror("Unmatched right square bracket");
463ee0b2 3268 else
3280af22
NIS
3269 --PL_lex_brackets;
3270 if (PL_lex_state == LEX_INTERPNORMAL) {
3271 if (PL_lex_brackets == 0) {
a0d0e21e 3272 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3273 PL_lex_state = LEX_INTERPEND;
79072805
LW
3274 }
3275 }
4633a7c4 3276 TERM(']');
79072805
LW
3277 case '{':
3278 leftbracket:
79072805 3279 s++;
3280af22 3280 if (PL_lex_brackets > 100) {
8edd5f42 3281 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3282 }
3280af22 3283 switch (PL_expect) {
a0d0e21e 3284 case XTERM:
3280af22 3285 if (PL_lex_formbrack) {
a0d0e21e
LW
3286 s--;
3287 PRETERMBLOCK(DO);
3288 }
3280af22
NIS
3289 if (PL_oldoldbufptr == PL_last_lop)
3290 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3291 else
3280af22 3292 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3293 OPERATOR(HASHBRACK);
a0d0e21e 3294 case XOPERATOR:
bf4acbe4 3295 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3296 s++;
44a8e56a 3297 d = s;
3280af22
NIS
3298 PL_tokenbuf[0] = '\0';
3299 if (d < PL_bufend && *d == '-') {
3300 PL_tokenbuf[0] = '-';
44a8e56a 3301 d++;
bf4acbe4 3302 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3303 d++;
3304 }
7e2040f0 3305 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3306 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3307 FALSE, &len);
bf4acbe4 3308 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3309 d++;
3310 if (*d == '}') {
3280af22 3311 char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3312 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3313 if (minus)
3314 force_next('-');
748a9306
LW
3315 }
3316 }
3317 /* FALL THROUGH */
09bef843 3318 case XATTRBLOCK:
748a9306 3319 case XBLOCK:
3280af22
NIS
3320 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3321 PL_expect = XSTATE;
a0d0e21e 3322 break;
09bef843 3323 case XATTRTERM:
a0d0e21e 3324 case XTERMBLOCK:
3280af22
NIS
3325 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3326 PL_expect = XSTATE;
a0d0e21e
LW
3327 break;
3328 default: {
3329 char *t;
3280af22
NIS
3330 if (PL_oldoldbufptr == PL_last_lop)
3331 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3332 else
3280af22 3333 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3334 s = skipspace(s);
8452ff4b
SB
3335 if (*s == '}') {
3336 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3337 PL_expect = XTERM;
3338 /* This hack is to get the ${} in the message. */
3339 PL_bufptr = s+1;
3340 yyerror("syntax error");
3341 break;
3342 }
a0d0e21e 3343 OPERATOR(HASHBRACK);
8452ff4b 3344 }
b8a4b1be
GS
3345 /* This hack serves to disambiguate a pair of curlies
3346 * as being a block or an anon hash. Normally, expectation
3347 * determines that, but in cases where we're not in a
3348 * position to expect anything in particular (like inside
3349 * eval"") we have to resolve the ambiguity. This code
3350 * covers the case where the first term in the curlies is a
3351 * quoted string. Most other cases need to be explicitly
3352 * disambiguated by prepending a `+' before the opening
3353 * curly in order to force resolution as an anon hash.
3354 *
3355 * XXX should probably propagate the outer expectation
3356 * into eval"" to rely less on this hack, but that could
3357 * potentially break current behavior of eval"".
3358 * GSAR 97-07-21
3359 */
3360 t = s;
3361 if (*s == '\'' || *s == '"' || *s == '`') {
3362 /* common case: get past first string, handling escapes */
3280af22 3363 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3364 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3365 t++;
3366 t++;
a0d0e21e 3367 }
b8a4b1be 3368 else if (*s == 'q') {
3280af22 3369 if (++t < PL_bufend
b8a4b1be 3370 && (!isALNUM(*t)
3280af22 3371 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3372 && !isALNUM(*t))))
3373 {
abc667d1 3374 /* skip q//-like construct */
b8a4b1be
GS
3375 char *tmps;
3376 char open, close, term;
3377 I32 brackets = 1;
3378
3280af22 3379 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3380 t++;
abc667d1
DM
3381 /* check for q => */
3382 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3383 OPERATOR(HASHBRACK);
3384 }
b8a4b1be
GS
3385 term = *t;
3386 open = term;
3387 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3388 term = tmps[5];
3389 close = term;
3390 if (open == close)
3280af22
NIS
3391 for (t++; t < PL_bufend; t++) {
3392 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3393 t++;
6d07e5e9 3394 else if (*t == open)
b8a4b1be
GS
3395 break;
3396 }
abc667d1 3397 else {
3280af22
NIS
3398 for (t++; t < PL_bufend; t++) {
3399 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3400 t++;
6d07e5e9 3401 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3402 break;
3403 else if (*t == open)
3404 brackets++;
3405 }
abc667d1
DM
3406 }
3407 t++;
b8a4b1be 3408 }
abc667d1
DM
3409 else
3410 /* skip plain q word */
3411 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3412 t += UTF8SKIP(t);
a0d0e21e 3413 }
7e2040f0 3414 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3415 t += UTF8SKIP(t);
7e2040f0 3416 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3417 t += UTF8SKIP(t);
a0d0e21e 3418 }
3280af22 3419 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3420 t++;
b8a4b1be
GS
3421 /* if comma follows first term, call it an anon hash */
3422 /* XXX it could be a comma expression with loop modifiers */
3280af22 3423 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3424 || (*t == '=' && t[1] == '>')))
a0d0e21e 3425 OPERATOR(HASHBRACK);
3280af22 3426 if (PL_expect == XREF)
4e4e412b 3427 PL_expect = XTERM;
a0d0e21e 3428 else {
3280af22
NIS
3429 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3430 PL_expect = XSTATE;
a0d0e21e 3431 }
8990e307 3432 }
a0d0e21e 3433 break;
463ee0b2 3434 }
57843af0 3435 yylval.ival = CopLINE(PL_curcop);
79072805 3436 if (isSPACE(*s) || *s == '#')
3280af22 3437 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3438 TOKEN('{');
378cc40b 3439 case '}':
79072805
LW
3440 rightbracket:
3441 s++;
3280af22 3442 if (PL_lex_brackets <= 0)
d98d5fff 3443 yyerror("Unmatched right curly bracket");
463ee0b2 3444 else
3280af22 3445 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3446 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3447 PL_lex_formbrack = 0;
3448 if (PL_lex_state == LEX_INTERPNORMAL) {
3449 if (PL_lex_brackets == 0) {
9059aa12
LW
3450 if (PL_expect & XFAKEBRACK) {
3451 PL_expect &= XENUMMASK;
3280af22
NIS
3452 PL_lex_state = LEX_INTERPEND;
3453 PL_bufptr = s;
cea2e8a9 3454 return yylex(); /* ignore fake brackets */
79072805 3455 }
fa83b5b6 3456 if (*s == '-' && s[1] == '>')
3280af22 3457 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3458 else if (*s != '[' && *s != '{')
3280af22 3459 PL_lex_state = LEX_INTERPEND;
79072805
LW
3460 }
3461 }
9059aa12
LW
3462 if (PL_expect & XFAKEBRACK) {
3463 PL_expect &= XENUMMASK;
3280af22 3464 PL_bufptr = s;
cea2e8a9 3465 return yylex(); /* ignore fake brackets */
748a9306 3466 }
79072805
LW
3467 force_next('}');
3468 TOKEN(';');
378cc40b
LW
3469 case '&':
3470 s++;
3471 tmp = *s++;
3472 if (tmp == '&')
a0d0e21e 3473 AOPERATOR(ANDAND);
378cc40b 3474 s--;
3280af22 3475 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3476 if (ckWARN(WARN_SEMICOLON)
3477 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3478 {
57843af0 3479 CopLINE_dec(PL_curcop);
9014280d 3480 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3481 CopLINE_inc(PL_curcop);
463ee0b2 3482 }
79072805 3483 BAop(OP_BIT_AND);
463ee0b2 3484 }
79072805 3485
3280af22
NIS
3486 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3487 if (*PL_tokenbuf) {
3488 PL_expect = XOPERATOR;
3489 force_ident(PL_tokenbuf, '&');
463ee0b2 3490 }
79072805
LW
3491 else
3492 PREREF('&');
c07a80fd 3493 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3494 TERM('&');
3495
378cc40b
LW
3496 case '|':
3497 s++;
3498 tmp = *s++;
3499 if (tmp == '|')
a0d0e21e 3500 AOPERATOR(OROR);
378cc40b 3501 s--;
79072805 3502 BOop(OP_BIT_OR);
378cc40b
LW
3503 case '=':
3504 s++;
3505 tmp = *s++;
3506 if (tmp == '=')
79072805
LW
3507 Eop(OP_EQ);
3508 if (tmp == '>')
3509 OPERATOR(',');
378cc40b 3510 if (tmp == '~')
79072805 3511 PMop(OP_MATCH);
599cee73 3512 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3513 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3514 s--;
3280af22
NIS
3515 if (PL_expect == XSTATE && isALPHA(tmp) &&
3516 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3517 {
3280af22
NIS
3518 if (PL_in_eval && !PL_rsfp) {
3519 d = PL_bufend;
a5f75d66
AD
3520 while (s < d) {
3521 if (*s++ == '\n') {
3522 incline(s);
3523 if (strnEQ(s,"=cut",4)) {
3524 s = strchr(s,'\n');
3525 if (s)
3526 s++;
3527 else
3528 s = d;
3529 incline(s);
3530 goto retry;
3531 }
3532 }
3533 }
3534 goto retry;
3535 }
3280af22
NIS
3536 s = PL_bufend;
3537 PL_doextract = TRUE;
a0d0e21e
LW
3538 goto retry;
3539 }
3280af22 3540 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3541 char *t;
51882d45 3542#ifdef PERL_STRICT_CR
bf4acbe4 3543 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3544#else
bf4acbe4 3545 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3546#endif
a0d0e21e
LW
3547 if (*t == '\n' || *t == '#') {
3548 s--;
3280af22 3549 PL_expect = XBLOCK;
a0d0e21e
LW
3550 goto leftbracket;
3551 }
79072805 3552 }
a0d0e21e
LW
3553 yylval.ival = 0;
3554 OPERATOR(ASSIGNOP);
378cc40b
LW
3555 case '!':