This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Pull the am-I-utf8-or-not logic into one place (S_newSV_maybe_utf8)
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
770526c1 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
fc36a67e 29static char ident_too_long[] = "Identifier too long";
4ac733c9 30static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 31static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 32
acfe0abc 33static void restore_rsfp(pTHX_ void *f);
6e3aabd6 34#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
35static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
36static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 37#endif
51371543 38
9059aa12
LW
39#define XFAKEBRACK 128
40#define XENUMMASK 127
41
39e02b42
JH
42#ifdef USE_UTF8_SCRIPTS
43# define UTF (!IN_BYTES)
2b9d42f0 44#else
746b446a 45# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 46#endif
a0ed51b3 47
61f0cdd9 48/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
49 * 1999-02-27 mjd-perl-patch@plover.com */
50#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51
bf4acbe4
GS
52/* On MacOS, respect nonbreaking spaces */
53#ifdef MACOS_TRADITIONAL
54#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55#else
56#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
57#endif
58
ffb4593c
NT
59/* LEX_* are values for PL_lex_state, the state of the lexer.
60 * They are arranged oddly so that the guard on the switch statement
79072805
LW
61 * can get by with a single comparison (if the compiler is smart enough).
62 */
63
fb73857a 64/* #define LEX_NOTPARSING 11 is done in perl.h. */
65
55497cff 66#define LEX_NORMAL 10
67#define LEX_INTERPNORMAL 9
68#define LEX_INTERPCASEMOD 8
69#define LEX_INTERPPUSH 7
70#define LEX_INTERPSTART 6
71#define LEX_INTERPEND 5
72#define LEX_INTERPENDMAYBE 4
73#define LEX_INTERPCONCAT 3
74#define LEX_INTERPCONST 2
75#define LEX_FORMLINE 1
76#define LEX_KNOWNEXT 0
79072805 77
bbf60fe6
DM
78#ifdef DEBUGGING
79static char* lex_state_names[] = {
80 "KNOWNEXT",
81 "FORMLINE",
82 "INTERPCONST",
83 "INTERPCONCAT",
84 "INTERPENDMAYBE",
85 "INTERPEND",
86 "INTERPSTART",
87 "INTERPPUSH",
88 "INTERPCASEMOD",
89 "INTERPNORMAL",
90 "NORMAL"
91};
92#endif
93
79072805
LW
94#ifdef ff_next
95#undef ff_next
d48672a2
LW
96#endif
97
79072805 98#include "keywords.h"
fe14fcc3 99
ffb4593c
NT
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
ae986130
LW
102#ifdef CLINE
103#undef CLINE
104#endif
57843af0 105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 106
ffb4593c
NT
107/*
108 * Convenience functions to return different tokens and prime the
9cbb5ea2 109 * lexer for the next token. They all take an argument.
ffb4593c
NT
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
2d2e263d 121 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
9cbb5ea2 126 * PMop : pattern-matching operator
ffb4593c
NT
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
e5edeb50 130 * Rop : relational operator <= != gt
ffb4593c
NT
131 *
132 * Also see LOP and lop() below.
133 */
134
998054bd 135#ifdef DEBUGGING /* Serve -DT. */
bbf60fe6 136# define REPORT(retval) tokereport(s,(int)retval)
998054bd 137#else
bbf60fe6 138# define REPORT(retval) (retval)
998054bd
SC
139#endif
140
bbf60fe6
DM
141#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
142#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
143#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
144#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
145#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
146#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
147#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
148#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
149#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
150#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
151#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
152#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
153#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
154#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
155#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
156#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
157#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
158#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
159#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
160#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 161
a687059c
LW
162/* This bit of chicanery makes a unary function followed by
163 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
164 * The UNIDOR macro is for unary functions that can be followed by the //
165 * operator (such as C<shift // 0>).
a687059c 166 */
bbf60fe6
DM
167#define UNI2(f,x) return ( \
168 yylval.ival = f, \
6f33ba73 169 PL_expect = x, \
3280af22
NIS
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
172 PL_last_lop_op = f, \
bbf60fe6
DM
173 REPORT( \
174 (*s == '(' || (s = skipspace(s), *s == '(') \
175 ? (int)FUNC1 : (int)UNIOP)))
6f33ba73
RGS
176#define UNI(f) UNI2(f,XTERM)
177#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 178
bbf60fe6
DM
179#define UNIBRACK(f) return ( \
180 yylval.ival = f, \
3280af22
NIS
181 PL_bufptr = s, \
182 PL_last_uni = PL_oldbufptr, \
bbf60fe6
DM
183 REPORT( \
184 (*s == '(' || (s = skipspace(s), *s == '(') \
185 ? (int)FUNC1 : (int)UNIOP)))
79072805 186
9f68db38 187/* grandfather return to old style */
3280af22 188#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 189
8fa7f367
JH
190#ifdef DEBUGGING
191
bbf60fe6
DM
192/* how to interpret the yylval associated with the token */
193enum token_type {
194 TOKENTYPE_NONE,
195 TOKENTYPE_IVAL,
196 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
197 TOKENTYPE_PVAL,
198 TOKENTYPE_OPVAL,
199 TOKENTYPE_GVVAL
200};
201
202static struct debug_tokens { int token, type; char *name;} debug_tokens[] =
9041c2e3 203{
bbf60fe6
DM
204 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
205 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
206 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
207 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
208 { ARROW, TOKENTYPE_NONE, "ARROW" },
209 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
210 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
211 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
212 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
213 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
214 { DO, TOKENTYPE_NONE, "DO" },
215 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
216 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
217 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
218 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
219 { ELSE, TOKENTYPE_NONE, "ELSE" },
220 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
221 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
222 { FOR, TOKENTYPE_IVAL, "FOR" },
223 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
224 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
225 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
226 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
227 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
228 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
229 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
230 { IF, TOKENTYPE_IVAL, "IF" },
231 { LABEL, TOKENTYPE_PVAL, "LABEL" },
232 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
233 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
234 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
235 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
236 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
237 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
238 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
239 { MY, TOKENTYPE_IVAL, "MY" },
240 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
241 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
242 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
243 { OROP, TOKENTYPE_IVAL, "OROP" },
244 { OROR, TOKENTYPE_NONE, "OROR" },
245 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
246 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
247 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
248 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
249 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
250 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
251 { PREINC, TOKENTYPE_NONE, "PREINC" },
252 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
253 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
254 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
255 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
256 { SUB, TOKENTYPE_NONE, "SUB" },
257 { THING, TOKENTYPE_OPVAL, "THING" },
258 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
259 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
260 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
261 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
262 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
263 { USE, TOKENTYPE_IVAL, "USE" },
264 { WHILE, TOKENTYPE_IVAL, "WHILE" },
265 { WORD, TOKENTYPE_OPVAL, "WORD" },
266 { 0, TOKENTYPE_NONE, 0 }
267};
268
269/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 270
bbf60fe6
DM
271STATIC int
272S_tokereport(pTHX_ char* s, I32 rv)
273{
274 if (DEBUG_T_TEST) {
275 char *name = Nullch;
276 enum token_type type = TOKENTYPE_NONE;
277 struct debug_tokens *p;
278 SV* report = NEWSV(0, 60);
279
280 Perl_sv_catpvf(aTHX_ report, "<== ");
281
282 for (p = debug_tokens; p->token; p++) {
283 if (p->token == (int)rv) {
284 name = p->name;
285 type = p->type;
286 break;
287 }
288 }
289 if (name)
290 Perl_sv_catpvf(aTHX_ report, "%s", name);
291 else if ((char)rv > ' ' && (char)rv < '~')
292 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
293 else if (!rv)
294 Perl_sv_catpvf(aTHX_ report, "EOF");
295 else
296 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
297 switch (type) {
298 case TOKENTYPE_NONE:
299 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
300 break;
301 case TOKENTYPE_IVAL:
302 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
303 break;
304 case TOKENTYPE_OPNUM:
305 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
306 PL_op_name[yylval.ival]);
307 break;
308 case TOKENTYPE_PVAL:
309 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
310 break;
311 case TOKENTYPE_OPVAL:
401441c0
RGS
312 if (yylval.opval)
313 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 314 PL_op_name[yylval.opval->op_type]);
401441c0
RGS
315 else
316 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
317 break;
318 }
319 Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
998054bd
SC
320 if (s - PL_bufptr > 0)
321 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
322 else {
323 if (PL_oldbufptr && *PL_oldbufptr)
324 sv_catpv(report, PL_tokenbuf);
325 }
bbf60fe6
DM
326 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
327 };
328 return (int)rv;
998054bd
SC
329}
330
8fa7f367
JH
331#endif
332
ffb4593c
NT
333/*
334 * S_ao
335 *
c963b151
BD
336 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
337 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
338 */
339
76e3520e 340STATIC int
cea2e8a9 341S_ao(pTHX_ int toketype)
a0d0e21e 342{
3280af22
NIS
343 if (*PL_bufptr == '=') {
344 PL_bufptr++;
a0d0e21e
LW
345 if (toketype == ANDAND)
346 yylval.ival = OP_ANDASSIGN;
347 else if (toketype == OROR)
348 yylval.ival = OP_ORASSIGN;
c963b151
BD
349 else if (toketype == DORDOR)
350 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
351 toketype = ASSIGNOP;
352 }
353 return toketype;
354}
355
ffb4593c
NT
356/*
357 * S_no_op
358 * When Perl expects an operator and finds something else, no_op
359 * prints the warning. It always prints "<something> found where
360 * operator expected. It prints "Missing semicolon on previous line?"
361 * if the surprise occurs at the start of the line. "do you need to
362 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
363 * where the compiler doesn't know if foo is a method call or a function.
364 * It prints "Missing operator before end of line" if there's nothing
365 * after the missing operator, or "... before <...>" if there is something
366 * after the missing operator.
367 */
368
76e3520e 369STATIC void
cea2e8a9 370S_no_op(pTHX_ char *what, char *s)
463ee0b2 371{
3280af22
NIS
372 char *oldbp = PL_bufptr;
373 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 374
1189a94a
GS
375 if (!s)
376 s = oldbp;
07c798fb 377 else
1189a94a 378 PL_bufptr = s;
cea2e8a9 379 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
380 if (ckWARN_d(WARN_SYNTAX)) {
381 if (is_first)
382 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
383 "\t(Missing semicolon on previous line?)\n");
384 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
385 char *t;
386 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
387 if (t < PL_bufptr && isSPACE(*t))
388 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
389 "\t(Do you need to predeclare %.*s?)\n",
390 t - PL_oldoldbufptr, PL_oldoldbufptr);
391 }
392 else {
393 assert(s >= oldbp);
394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
395 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
396 }
07c798fb 397 }
3280af22 398 PL_bufptr = oldbp;
8990e307
LW
399}
400
ffb4593c
NT
401/*
402 * S_missingterm
403 * Complain about missing quote/regexp/heredoc terminator.
404 * If it's called with (char *)NULL then it cauterizes the line buffer.
405 * If we're in a delimited string and the delimiter is a control
406 * character, it's reformatted into a two-char sequence like ^C.
407 * This is fatal.
408 */
409
76e3520e 410STATIC void
cea2e8a9 411S_missingterm(pTHX_ char *s)
8990e307
LW
412{
413 char tmpbuf[3];
414 char q;
415 if (s) {
416 char *nl = strrchr(s,'\n');
d2719217 417 if (nl)
8990e307
LW
418 *nl = '\0';
419 }
9d116dd7
JH
420 else if (
421#ifdef EBCDIC
422 iscntrl(PL_multi_close)
423#else
424 PL_multi_close < 32 || PL_multi_close == 127
425#endif
426 ) {
8990e307 427 *tmpbuf = '^';
3280af22 428 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
429 s = "\\n";
430 tmpbuf[2] = '\0';
431 s = tmpbuf;
432 }
433 else {
eb160463 434 *tmpbuf = (char)PL_multi_close;
8990e307
LW
435 tmpbuf[1] = '\0';
436 s = tmpbuf;
437 }
438 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 439 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 440}
79072805 441
ffb4593c
NT
442/*
443 * Perl_deprecate
ffb4593c
NT
444 */
445
79072805 446void
864dbfa3 447Perl_deprecate(pTHX_ char *s)
a0d0e21e 448{
599cee73 449 if (ckWARN(WARN_DEPRECATED))
9014280d 450 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
451}
452
12bcd1a6
PM
453void
454Perl_deprecate_old(pTHX_ char *s)
455{
456 /* This function should NOT be called for any new deprecated warnings */
457 /* Use Perl_deprecate instead */
458 /* */
459 /* It is here to maintain backward compatibility with the pre-5.8 */
460 /* warnings category hierarchy. The "deprecated" category used to */
461 /* live under the "syntax" category. It is now a top-level category */
462 /* in its own right. */
463
464 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
465 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
466 "Use of %s is deprecated", s);
467}
468
ffb4593c
NT
469/*
470 * depcom
9cbb5ea2 471 * Deprecate a comma-less variable list.
ffb4593c
NT
472 */
473
76e3520e 474STATIC void
cea2e8a9 475S_depcom(pTHX)
a0d0e21e 476{
12bcd1a6 477 deprecate_old("comma-less variable list");
a0d0e21e
LW
478}
479
ffb4593c 480/*
9cbb5ea2
GS
481 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
482 * utf16-to-utf8-reversed.
ffb4593c
NT
483 */
484
c39cd008
GS
485#ifdef PERL_CR_FILTER
486static void
487strip_return(SV *sv)
488{
489 register char *s = SvPVX(sv);
490 register char *e = s + SvCUR(sv);
491 /* outer loop optimized to do nothing if there are no CR-LFs */
492 while (s < e) {
493 if (*s++ == '\r' && *s == '\n') {
494 /* hit a CR-LF, need to copy the rest */
495 register char *d = s - 1;
496 *d++ = *s++;
497 while (s < e) {
498 if (*s == '\r' && s[1] == '\n')
499 s++;
500 *d++ = *s++;
501 }
502 SvCUR(sv) -= s - d;
503 return;
504 }
505 }
506}
a868473f 507
76e3520e 508STATIC I32
c39cd008 509S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 510{
c39cd008
GS
511 I32 count = FILTER_READ(idx+1, sv, maxlen);
512 if (count > 0 && !maxlen)
513 strip_return(sv);
514 return count;
a868473f
NIS
515}
516#endif
517
ffb4593c
NT
518/*
519 * Perl_lex_start
9cbb5ea2
GS
520 * Initialize variables. Uses the Perl save_stack to save its state (for
521 * recursive calls to the parser).
ffb4593c
NT
522 */
523
a0d0e21e 524void
864dbfa3 525Perl_lex_start(pTHX_ SV *line)
79072805 526{
8990e307
LW
527 char *s;
528 STRLEN len;
529
3280af22
NIS
530 SAVEI32(PL_lex_dojoin);
531 SAVEI32(PL_lex_brackets);
3280af22
NIS
532 SAVEI32(PL_lex_casemods);
533 SAVEI32(PL_lex_starts);
534 SAVEI32(PL_lex_state);
7766f137 535 SAVEVPTR(PL_lex_inpat);
3280af22 536 SAVEI32(PL_lex_inwhat);
18b09519
GS
537 if (PL_lex_state == LEX_KNOWNEXT) {
538 I32 toke = PL_nexttoke;
539 while (--toke >= 0) {
540 SAVEI32(PL_nexttype[toke]);
541 SAVEVPTR(PL_nextval[toke]);
542 }
543 SAVEI32(PL_nexttoke);
18b09519 544 }
57843af0 545 SAVECOPLINE(PL_curcop);
3280af22
NIS
546 SAVEPPTR(PL_bufptr);
547 SAVEPPTR(PL_bufend);
548 SAVEPPTR(PL_oldbufptr);
549 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
550 SAVEPPTR(PL_last_lop);
551 SAVEPPTR(PL_last_uni);
3280af22
NIS
552 SAVEPPTR(PL_linestart);
553 SAVESPTR(PL_linestr);
8edd5f42
RGS
554 SAVEGENERICPV(PL_lex_brackstack);
555 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 556 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
557 SAVESPTR(PL_lex_stuff);
558 SAVEI32(PL_lex_defer);
09bef843 559 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 560 SAVESPTR(PL_lex_repl);
bebdddfc
GS
561 SAVEINT(PL_expect);
562 SAVEINT(PL_lex_expect);
3280af22
NIS
563
564 PL_lex_state = LEX_NORMAL;
565 PL_lex_defer = 0;
566 PL_expect = XSTATE;
567 PL_lex_brackets = 0;
3280af22
NIS
568 New(899, PL_lex_brackstack, 120, char);
569 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
570 PL_lex_casemods = 0;
571 *PL_lex_casestack = '\0';
572 PL_lex_dojoin = 0;
573 PL_lex_starts = 0;
574 PL_lex_stuff = Nullsv;
575 PL_lex_repl = Nullsv;
576 PL_lex_inpat = 0;
76be56bc 577 PL_nexttoke = 0;
3280af22 578 PL_lex_inwhat = 0;
09bef843 579 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
580 PL_linestr = line;
581 if (SvREADONLY(PL_linestr))
582 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
583 s = SvPV(PL_linestr, len);
6f27f9a7 584 if (!len || s[len-1] != ';') {
3280af22
NIS
585 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
586 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
587 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 588 }
3280af22
NIS
589 SvTEMP_off(PL_linestr);
590 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
591 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 592 PL_last_lop = PL_last_uni = Nullch;
3280af22 593 PL_rsfp = 0;
79072805 594}
a687059c 595
ffb4593c
NT
596/*
597 * Perl_lex_end
9cbb5ea2
GS
598 * Finalizer for lexing operations. Must be called when the parser is
599 * done with the lexer.
ffb4593c
NT
600 */
601
463ee0b2 602void
864dbfa3 603Perl_lex_end(pTHX)
463ee0b2 604{
3280af22 605 PL_doextract = FALSE;
463ee0b2
LW
606}
607
ffb4593c
NT
608/*
609 * S_incline
610 * This subroutine has nothing to do with tilting, whether at windmills
611 * or pinball tables. Its name is short for "increment line". It
57843af0 612 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 613 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
614 * # line 500 "foo.pm"
615 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
616 */
617
76e3520e 618STATIC void
cea2e8a9 619S_incline(pTHX_ char *s)
463ee0b2
LW
620{
621 char *t;
622 char *n;
73659bf1 623 char *e;
463ee0b2 624 char ch;
463ee0b2 625
57843af0 626 CopLINE_inc(PL_curcop);
463ee0b2
LW
627 if (*s++ != '#')
628 return;
bf4acbe4 629 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
630 if (strnEQ(s, "line", 4))
631 s += 4;
632 else
633 return;
084592ab 634 if (SPACE_OR_TAB(*s))
73659bf1 635 s++;
4e553d73 636 else
73659bf1 637 return;
bf4acbe4 638 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
639 if (!isDIGIT(*s))
640 return;
641 n = s;
642 while (isDIGIT(*s))
643 s++;
bf4acbe4 644 while (SPACE_OR_TAB(*s))
463ee0b2 645 s++;
73659bf1 646 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 647 s++;
73659bf1
GS
648 e = t + 1;
649 }
463ee0b2 650 else {
463ee0b2 651 for (t = s; !isSPACE(*t); t++) ;
73659bf1 652 e = t;
463ee0b2 653 }
bf4acbe4 654 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
655 e++;
656 if (*e != '\n' && *e != '\0')
657 return; /* false alarm */
658
463ee0b2
LW
659 ch = *t;
660 *t = '\0';
f4dd75d9 661 if (t - s > 0) {
05ec9bb3 662 CopFILE_free(PL_curcop);
57843af0 663 CopFILE_set(PL_curcop, s);
f4dd75d9 664 }
463ee0b2 665 *t = ch;
57843af0 666 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
667}
668
ffb4593c
NT
669/*
670 * S_skipspace
671 * Called to gobble the appropriate amount and type of whitespace.
672 * Skips comments as well.
673 */
674
76e3520e 675STATIC char *
cea2e8a9 676S_skipspace(pTHX_ register char *s)
a687059c 677{
3280af22 678 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 679 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
680 s++;
681 return s;
682 }
683 for (;;) {
fd049845 684 STRLEN prevlen;
09bef843 685 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 686 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
687 while (s < PL_bufend && isSPACE(*s)) {
688 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
689 incline(s);
690 }
ffb4593c
NT
691
692 /* comment */
3280af22
NIS
693 if (s < PL_bufend && *s == '#') {
694 while (s < PL_bufend && *s != '\n')
463ee0b2 695 s++;
60e6418e 696 if (s < PL_bufend) {
463ee0b2 697 s++;
60e6418e
GS
698 if (PL_in_eval && !PL_rsfp) {
699 incline(s);
700 continue;
701 }
702 }
463ee0b2 703 }
ffb4593c
NT
704
705 /* only continue to recharge the buffer if we're at the end
706 * of the buffer, we're not reading from a source filter, and
707 * we're in normal lexing mode
708 */
09bef843
SB
709 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
710 PL_lex_state == LEX_FORMLINE)
463ee0b2 711 return s;
ffb4593c
NT
712
713 /* try to recharge the buffer */
9cbb5ea2
GS
714 if ((s = filter_gets(PL_linestr, PL_rsfp,
715 (prevlen = SvCUR(PL_linestr)))) == Nullch)
716 {
717 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
718 if (PL_minus_p) {
719 sv_setpv(PL_linestr,
720 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 721 PL_minus_n = PL_minus_p = 0;
a0d0e21e 722 }
01a19ab0
NC
723 else if (PL_minus_n) {
724 sv_setpvn(PL_linestr, ";}", 2);
725 PL_minus_n = 0;
726 }
a0d0e21e 727 else
4147a61b 728 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
729
730 /* reset variables for next time we lex */
9cbb5ea2
GS
731 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
732 = SvPVX(PL_linestr);
3280af22 733 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 734 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
735
736 /* Close the filehandle. Could be from -P preprocessor,
737 * STDIN, or a regular file. If we were reading code from
738 * STDIN (because the commandline held no -e or filename)
739 * then we don't close it, we reset it so the code can
740 * read from STDIN too.
741 */
742
3280af22
NIS
743 if (PL_preprocess && !PL_in_eval)
744 (void)PerlProc_pclose(PL_rsfp);
745 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
746 PerlIO_clearerr(PL_rsfp);
8990e307 747 else
3280af22
NIS
748 (void)PerlIO_close(PL_rsfp);
749 PL_rsfp = Nullfp;
463ee0b2
LW
750 return s;
751 }
ffb4593c
NT
752
753 /* not at end of file, so we only read another line */
09bef843
SB
754 /* make corresponding updates to old pointers, for yyerror() */
755 oldprevlen = PL_oldbufptr - PL_bufend;
756 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
757 if (PL_last_uni)
758 oldunilen = PL_last_uni - PL_bufend;
759 if (PL_last_lop)
760 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
761 PL_linestart = PL_bufptr = s + prevlen;
762 PL_bufend = s + SvCUR(PL_linestr);
763 s = PL_bufptr;
09bef843
SB
764 PL_oldbufptr = s + oldprevlen;
765 PL_oldoldbufptr = s + oldoldprevlen;
766 if (PL_last_uni)
767 PL_last_uni = s + oldunilen;
768 if (PL_last_lop)
769 PL_last_lop = s + oldloplen;
a0d0e21e 770 incline(s);
ffb4593c
NT
771
772 /* debugger active and we're not compiling the debugger code,
773 * so store the line into the debugger's array of lines
774 */
3280af22 775 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
776 SV *sv = NEWSV(85,0);
777
778 sv_upgrade(sv, SVt_PVMG);
3280af22 779 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
780 (void)SvIOK_on(sv);
781 SvIVX(sv) = 0;
57843af0 782 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 783 }
463ee0b2 784 }
a687059c 785}
378cc40b 786
ffb4593c
NT
787/*
788 * S_check_uni
789 * Check the unary operators to ensure there's no ambiguity in how they're
790 * used. An ambiguous piece of code would be:
791 * rand + 5
792 * This doesn't mean rand() + 5. Because rand() is a unary operator,
793 * the +5 is its argument.
794 */
795
76e3520e 796STATIC void
cea2e8a9 797S_check_uni(pTHX)
ba106d47 798{
2f3197b3 799 char *s;
a0d0e21e 800 char *t;
2f3197b3 801
3280af22 802 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 803 return;
3280af22
NIS
804 while (isSPACE(*PL_last_uni))
805 PL_last_uni++;
7e2040f0 806 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 807 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 808 return;
0453d815 809 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 810 char ch = *s;
0453d815 811 *s = '\0';
9014280d 812 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 813 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
814 PL_last_uni);
815 *s = ch;
816 }
2f3197b3
LW
817}
818
ffb4593c
NT
819/*
820 * LOP : macro to build a list operator. Its behaviour has been replaced
821 * with a subroutine, S_lop() for which LOP is just another name.
822 */
823
a0d0e21e
LW
824#define LOP(f,x) return lop(f,x,s)
825
ffb4593c
NT
826/*
827 * S_lop
828 * Build a list operator (or something that might be one). The rules:
829 * - if we have a next token, then it's a list operator [why?]
830 * - if the next thing is an opening paren, then it's a function
831 * - else it's a list operator
832 */
833
76e3520e 834STATIC I32
a0be28da 835S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 836{
79072805 837 yylval.ival = f;
35c8bce7 838 CLINE;
3280af22
NIS
839 PL_expect = x;
840 PL_bufptr = s;
841 PL_last_lop = PL_oldbufptr;
eb160463 842 PL_last_lop_op = (OPCODE)f;
3280af22 843 if (PL_nexttoke)
bbf60fe6 844 return REPORT(LSTOP);
79072805 845 if (*s == '(')
bbf60fe6 846 return REPORT(FUNC);
79072805
LW
847 s = skipspace(s);
848 if (*s == '(')
bbf60fe6 849 return REPORT(FUNC);
79072805 850 else
bbf60fe6 851 return REPORT(LSTOP);
79072805
LW
852}
853
ffb4593c
NT
854/*
855 * S_force_next
9cbb5ea2 856 * When the lexer realizes it knows the next token (for instance,
ffb4593c 857 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
858 * to know what token to return the next time the lexer is called. Caller
859 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
860 * handles the token correctly.
ffb4593c
NT
861 */
862
4e553d73 863STATIC void
cea2e8a9 864S_force_next(pTHX_ I32 type)
79072805 865{
3280af22
NIS
866 PL_nexttype[PL_nexttoke] = type;
867 PL_nexttoke++;
868 if (PL_lex_state != LEX_KNOWNEXT) {
869 PL_lex_defer = PL_lex_state;
870 PL_lex_expect = PL_expect;
871 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
872 }
873}
874
d0a148a6
NC
875STATIC SV *
876S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
877{
878 SV *sv = newSVpvn(start,len);
879 if (UTF && !IN_BYTES && is_utf8_string((U8*)start, len))
880 SvUTF8_on(sv);
881 return sv;
882}
883
ffb4593c
NT
884/*
885 * S_force_word
886 * When the lexer knows the next thing is a word (for instance, it has
887 * just seen -> and it knows that the next char is a word char, then
888 * it calls S_force_word to stick the next word into the PL_next lookahead.
889 *
890 * Arguments:
b1b65b59 891 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
892 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
893 * int check_keyword : if true, Perl checks to make sure the word isn't
894 * a keyword (do this if the word is a label, e.g. goto FOO)
895 * int allow_pack : if true, : characters will also be allowed (require,
896 * use, etc. do this)
9cbb5ea2 897 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
898 */
899
76e3520e 900STATIC char *
cea2e8a9 901S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 902{
463ee0b2
LW
903 register char *s;
904 STRLEN len;
4e553d73 905
463ee0b2
LW
906 start = skipspace(start);
907 s = start;
7e2040f0 908 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 909 (allow_pack && *s == ':') ||
15f0808c 910 (allow_initial_tick && *s == '\'') )
a0d0e21e 911 {
3280af22
NIS
912 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
913 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
914 return start;
915 if (token == METHOD) {
916 s = skipspace(s);
917 if (*s == '(')
3280af22 918 PL_expect = XTERM;
463ee0b2 919 else {
3280af22 920 PL_expect = XOPERATOR;
463ee0b2 921 }
79072805 922 }
d0a148a6
NC
923 PL_nextval[PL_nexttoke].opval
924 = (OP*)newSVOP(OP_CONST,0,
925 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 926 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
927 force_next(token);
928 }
929 return s;
930}
931
ffb4593c
NT
932/*
933 * S_force_ident
9cbb5ea2 934 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
935 * text only contains the "foo" portion. The first argument is a pointer
936 * to the "foo", and the second argument is the type symbol to prefix.
937 * Forces the next token to be a "WORD".
9cbb5ea2 938 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
939 */
940
76e3520e 941STATIC void
cea2e8a9 942S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
943{
944 if (s && *s) {
11343788 945 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 946 PL_nextval[PL_nexttoke].opval = o;
79072805 947 force_next(WORD);
748a9306 948 if (kind) {
11343788 949 o->op_private = OPpCONST_ENTERED;
55497cff 950 /* XXX see note in pp_entereval() for why we forgo typo
951 warnings if the symbol must be introduced in an eval.
952 GSAR 96-10-12 */
3280af22 953 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
954 kind == '$' ? SVt_PV :
955 kind == '@' ? SVt_PVAV :
956 kind == '%' ? SVt_PVHV :
957 SVt_PVGV
958 );
748a9306 959 }
79072805
LW
960 }
961}
962
1571675a
GS
963NV
964Perl_str_to_version(pTHX_ SV *sv)
965{
966 NV retval = 0.0;
967 NV nshift = 1.0;
968 STRLEN len;
969 char *start = SvPVx(sv,len);
3aa33fe5 970 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
971 char *end = start + len;
972 while (start < end) {
ba210ebe 973 STRLEN skip;
1571675a
GS
974 UV n;
975 if (utf)
9041c2e3 976 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
977 else {
978 n = *(U8*)start;
979 skip = 1;
980 }
981 retval += ((NV)n)/nshift;
982 start += skip;
983 nshift *= 1000;
984 }
985 return retval;
986}
987
4e553d73 988/*
ffb4593c
NT
989 * S_force_version
990 * Forces the next token to be a version number.
e759cc13
RGS
991 * If the next token appears to be an invalid version number, (e.g. "v2b"),
992 * and if "guessing" is TRUE, then no new token is created (and the caller
993 * must use an alternative parsing method).
ffb4593c
NT
994 */
995
76e3520e 996STATIC char *
e759cc13 997S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 998{
999 OP *version = Nullop;
44dcb63b 1000 char *d;
89bfa8cd 1001
1002 s = skipspace(s);
1003
44dcb63b 1004 d = s;
dd629d5b 1005 if (*d == 'v')
44dcb63b 1006 d++;
44dcb63b 1007 if (isDIGIT(*d)) {
e759cc13
RGS
1008 while (isDIGIT(*d) || *d == '_' || *d == '.')
1009 d++;
9f3d182e 1010 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1011 SV *ver;
b73d6f50 1012 s = scan_num(s, &yylval);
89bfa8cd 1013 version = yylval.opval;
dd629d5b
GS
1014 ver = cSVOPx(version)->op_sv;
1015 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 1016 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
1017 SvNVX(ver) = str_to_version(ver);
1018 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1019 }
89bfa8cd 1020 }
e759cc13
RGS
1021 else if (guessing)
1022 return s;
89bfa8cd 1023 }
1024
1025 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1026 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1027 force_next(WORD);
89bfa8cd 1028
e759cc13 1029 return s;
89bfa8cd 1030}
1031
ffb4593c
NT
1032/*
1033 * S_tokeq
1034 * Tokenize a quoted string passed in as an SV. It finds the next
1035 * chunk, up to end of string or a backslash. It may make a new
1036 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1037 * turns \\ into \.
1038 */
1039
76e3520e 1040STATIC SV *
cea2e8a9 1041S_tokeq(pTHX_ SV *sv)
79072805
LW
1042{
1043 register char *s;
1044 register char *send;
1045 register char *d;
b3ac6de7
IZ
1046 STRLEN len = 0;
1047 SV *pv = sv;
79072805
LW
1048
1049 if (!SvLEN(sv))
b3ac6de7 1050 goto finish;
79072805 1051
a0d0e21e 1052 s = SvPV_force(sv, len);
21a311ee 1053 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1054 goto finish;
463ee0b2 1055 send = s + len;
79072805
LW
1056 while (s < send && *s != '\\')
1057 s++;
1058 if (s == send)
b3ac6de7 1059 goto finish;
79072805 1060 d = s;
be4731d2 1061 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 1062 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
1063 if (SvUTF8(sv))
1064 SvUTF8_on(pv);
1065 }
79072805
LW
1066 while (s < send) {
1067 if (*s == '\\') {
a0d0e21e 1068 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1069 s++; /* all that, just for this */
1070 }
1071 *d++ = *s++;
1072 }
1073 *d = '\0';
463ee0b2 1074 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 1075 finish:
3280af22 1076 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1077 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1078 return sv;
1079}
1080
ffb4593c
NT
1081/*
1082 * Now come three functions related to double-quote context,
1083 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1084 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1085 * interact with PL_lex_state, and create fake ( ... ) argument lists
1086 * to handle functions and concatenation.
1087 * They assume that whoever calls them will be setting up a fake
1088 * join call, because each subthing puts a ',' after it. This lets
1089 * "lower \luPpEr"
1090 * become
1091 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1092 *
1093 * (I'm not sure whether the spurious commas at the end of lcfirst's
1094 * arguments and join's arguments are created or not).
1095 */
1096
1097/*
1098 * S_sublex_start
1099 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1100 *
1101 * Pattern matching will set PL_lex_op to the pattern-matching op to
1102 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1103 *
1104 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1105 *
1106 * Everything else becomes a FUNC.
1107 *
1108 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1109 * had an OP_CONST or OP_READLINE). This just sets us up for a
1110 * call to S_sublex_push().
1111 */
1112
76e3520e 1113STATIC I32
cea2e8a9 1114S_sublex_start(pTHX)
79072805
LW
1115{
1116 register I32 op_type = yylval.ival;
79072805
LW
1117
1118 if (op_type == OP_NULL) {
3280af22
NIS
1119 yylval.opval = PL_lex_op;
1120 PL_lex_op = Nullop;
79072805
LW
1121 return THING;
1122 }
1123 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1124 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1125
1126 if (SvTYPE(sv) == SVt_PVIV) {
1127 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1128 STRLEN len;
1129 char *p;
1130 SV *nsv;
1131
1132 p = SvPV(sv, len);
79cb57f6 1133 nsv = newSVpvn(p, len);
01ec43d0
GS
1134 if (SvUTF8(sv))
1135 SvUTF8_on(nsv);
b3ac6de7
IZ
1136 SvREFCNT_dec(sv);
1137 sv = nsv;
4e553d73 1138 }
b3ac6de7 1139 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1140 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1141 /* Allow <FH> // "foo" */
1142 if (op_type == OP_READLINE)
1143 PL_expect = XTERMORDORDOR;
79072805
LW
1144 return THING;
1145 }
1146
3280af22
NIS
1147 PL_sublex_info.super_state = PL_lex_state;
1148 PL_sublex_info.sub_inwhat = op_type;
1149 PL_sublex_info.sub_op = PL_lex_op;
1150 PL_lex_state = LEX_INTERPPUSH;
55497cff 1151
3280af22
NIS
1152 PL_expect = XTERM;
1153 if (PL_lex_op) {
1154 yylval.opval = PL_lex_op;
1155 PL_lex_op = Nullop;
55497cff 1156 return PMFUNC;
1157 }
1158 else
1159 return FUNC;
1160}
1161
ffb4593c
NT
1162/*
1163 * S_sublex_push
1164 * Create a new scope to save the lexing state. The scope will be
1165 * ended in S_sublex_done. Returns a '(', starting the function arguments
1166 * to the uc, lc, etc. found before.
1167 * Sets PL_lex_state to LEX_INTERPCONCAT.
1168 */
1169
76e3520e 1170STATIC I32
cea2e8a9 1171S_sublex_push(pTHX)
55497cff 1172{
f46d017c 1173 ENTER;
55497cff 1174
3280af22
NIS
1175 PL_lex_state = PL_sublex_info.super_state;
1176 SAVEI32(PL_lex_dojoin);
1177 SAVEI32(PL_lex_brackets);
3280af22
NIS
1178 SAVEI32(PL_lex_casemods);
1179 SAVEI32(PL_lex_starts);
1180 SAVEI32(PL_lex_state);
7766f137 1181 SAVEVPTR(PL_lex_inpat);
3280af22 1182 SAVEI32(PL_lex_inwhat);
57843af0 1183 SAVECOPLINE(PL_curcop);
3280af22 1184 SAVEPPTR(PL_bufptr);
8452ff4b 1185 SAVEPPTR(PL_bufend);
3280af22
NIS
1186 SAVEPPTR(PL_oldbufptr);
1187 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1188 SAVEPPTR(PL_last_lop);
1189 SAVEPPTR(PL_last_uni);
3280af22
NIS
1190 SAVEPPTR(PL_linestart);
1191 SAVESPTR(PL_linestr);
8edd5f42
RGS
1192 SAVEGENERICPV(PL_lex_brackstack);
1193 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1194
1195 PL_linestr = PL_lex_stuff;
1196 PL_lex_stuff = Nullsv;
1197
9cbb5ea2
GS
1198 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1199 = SvPVX(PL_linestr);
3280af22 1200 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1201 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1202 SAVEFREESV(PL_linestr);
1203
1204 PL_lex_dojoin = FALSE;
1205 PL_lex_brackets = 0;
3280af22
NIS
1206 New(899, PL_lex_brackstack, 120, char);
1207 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
1208 PL_lex_casemods = 0;
1209 *PL_lex_casestack = '\0';
1210 PL_lex_starts = 0;
1211 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1212 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1213
1214 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1215 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1216 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1217 else
3280af22 1218 PL_lex_inpat = Nullop;
79072805 1219
55497cff 1220 return '(';
79072805
LW
1221}
1222
ffb4593c
NT
1223/*
1224 * S_sublex_done
1225 * Restores lexer state after a S_sublex_push.
1226 */
1227
76e3520e 1228STATIC I32
cea2e8a9 1229S_sublex_done(pTHX)
79072805 1230{
3280af22 1231 if (!PL_lex_starts++) {
9aa983d2
JH
1232 SV *sv = newSVpvn("",0);
1233 if (SvUTF8(PL_linestr))
1234 SvUTF8_on(sv);
3280af22 1235 PL_expect = XOPERATOR;
9aa983d2 1236 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1237 return THING;
1238 }
1239
3280af22
NIS
1240 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1241 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1242 return yylex();
79072805
LW
1243 }
1244
ffb4593c 1245 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1246 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1247 PL_linestr = PL_lex_repl;
1248 PL_lex_inpat = 0;
1249 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1250 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1251 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1252 SAVEFREESV(PL_linestr);
1253 PL_lex_dojoin = FALSE;
1254 PL_lex_brackets = 0;
3280af22
NIS
1255 PL_lex_casemods = 0;
1256 *PL_lex_casestack = '\0';
1257 PL_lex_starts = 0;
25da4f38 1258 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1259 PL_lex_state = LEX_INTERPNORMAL;
1260 PL_lex_starts++;
e9fa98b2
HS
1261 /* we don't clear PL_lex_repl here, so that we can check later
1262 whether this is an evalled subst; that means we rely on the
1263 logic to ensure sublex_done() is called again only via the
1264 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1265 }
e9fa98b2 1266 else {
3280af22 1267 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1268 PL_lex_repl = Nullsv;
1269 }
79072805 1270 return ',';
ffed7fef
LW
1271 }
1272 else {
f46d017c 1273 LEAVE;
3280af22
NIS
1274 PL_bufend = SvPVX(PL_linestr);
1275 PL_bufend += SvCUR(PL_linestr);
1276 PL_expect = XOPERATOR;
09bef843 1277 PL_sublex_info.sub_inwhat = 0;
79072805 1278 return ')';
ffed7fef
LW
1279 }
1280}
1281
02aa26ce
NT
1282/*
1283 scan_const
1284
1285 Extracts a pattern, double-quoted string, or transliteration. This
1286 is terrifying code.
1287
3280af22
NIS
1288 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1289 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1290 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1291
9b599b2a
GS
1292 Returns a pointer to the character scanned up to. Iff this is
1293 advanced from the start pointer supplied (ie if anything was
1294 successfully parsed), will leave an OP for the substring scanned
1295 in yylval. Caller must intuit reason for not parsing further
1296 by looking at the next characters herself.
1297
02aa26ce
NT
1298 In patterns:
1299 backslashes:
1300 double-quoted style: \r and \n
1301 regexp special ones: \D \s
1302 constants: \x3
1303 backrefs: \1 (deprecated in substitution replacements)
1304 case and quoting: \U \Q \E
1305 stops on @ and $, but not for $ as tail anchor
1306
1307 In transliterations:
1308 characters are VERY literal, except for - not at the start or end
1309 of the string, which indicates a range. scan_const expands the
1310 range to the full set of intermediate characters.
1311
1312 In double-quoted strings:
1313 backslashes:
1314 double-quoted style: \r and \n
1315 constants: \x3
1316 backrefs: \1 (deprecated)
1317 case and quoting: \U \Q \E
1318 stops on @ and $
1319
1320 scan_const does *not* construct ops to handle interpolated strings.
1321 It stops processing as soon as it finds an embedded $ or @ variable
1322 and leaves it to the caller to work out what's going on.
1323
da6eedaa 1324 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1325
1326 $ in pattern could be $foo or could be tail anchor. Assumption:
1327 it's a tail anchor if $ is the last thing in the string, or if it's
1328 followed by one of ")| \n\t"
1329
1330 \1 (backreferences) are turned into $1
1331
1332 The structure of the code is
1333 while (there's a character to process) {
1334 handle transliteration ranges
1335 skip regexp comments
1336 skip # initiated comments in //x patterns
1337 check for embedded @foo
1338 check for embedded scalars
1339 if (backslash) {
1340 leave intact backslashes from leave (below)
1341 deprecate \1 in strings and sub replacements
1342 handle string-changing backslashes \l \U \Q \E, etc.
1343 switch (what was escaped) {
1344 handle - in a transliteration (becomes a literal -)
1345 handle \132 octal characters
1346 handle 0x15 hex characters
1347 handle \cV (control V)
1348 handle printf backslashes (\f, \r, \n, etc)
1349 } (end switch)
1350 } (end if backslash)
1351 } (end while character to read)
4e553d73 1352
02aa26ce
NT
1353*/
1354
76e3520e 1355STATIC char *
cea2e8a9 1356S_scan_const(pTHX_ char *start)
79072805 1357{
3280af22 1358 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1359 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1360 register char *s = start; /* start of the constant */
1361 register char *d = SvPVX(sv); /* destination for copies */
1362 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1363 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1364 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1365 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1366 UV uv;
1367
dff6d3cd 1368 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1369 PL_lex_inpat
b6d5fef8 1370 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1371 : "";
79072805 1372
2b9d42f0
NIS
1373 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1374 /* If we are doing a trans and we know we want UTF8 set expectation */
1375 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1376 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1377 }
1378
1379
79072805 1380 while (s < send || dorange) {
02aa26ce 1381 /* get transliterations out of the way (they're most literal) */
3280af22 1382 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1383 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1384 if (dorange) {
1ba5c669
JH
1385 I32 i; /* current expanded character */
1386 I32 min; /* first character in range */
1387 I32 max; /* last character in range */
02aa26ce 1388
2b9d42f0 1389 if (has_utf8) {
8973db79
JH
1390 char *c = (char*)utf8_hop((U8*)d, -1);
1391 char *e = d++;
1392 while (e-- > c)
1393 *(e + 1) = *e;
25716404 1394 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1395 /* mark the range as done, and continue */
1396 dorange = FALSE;
1397 didrange = TRUE;
1398 continue;
1399 }
2b9d42f0 1400
02aa26ce 1401 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1402 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1403 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1404 d -= 2; /* eat the first char and the - */
1405
8ada0baa
JH
1406 min = (U8)*d; /* first char in range */
1407 max = (U8)d[1]; /* last char in range */
1408
c2e66d9e 1409 if (min > max) {
01ec43d0 1410 Perl_croak(aTHX_
d1573ac7 1411 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1412 (char)min, (char)max);
c2e66d9e
GS
1413 }
1414
c7f1f016 1415#ifdef EBCDIC
8ada0baa
JH
1416 if ((isLOWER(min) && isLOWER(max)) ||
1417 (isUPPER(min) && isUPPER(max))) {
1418 if (isLOWER(min)) {
1419 for (i = min; i <= max; i++)
1420 if (isLOWER(i))
db42d148 1421 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1422 } else {
1423 for (i = min; i <= max; i++)
1424 if (isUPPER(i))
db42d148 1425 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1426 }
1427 }
1428 else
1429#endif
1430 for (i = min; i <= max; i++)
eb160463 1431 *d++ = (char)i;
02aa26ce
NT
1432
1433 /* mark the range as done, and continue */
79072805 1434 dorange = FALSE;
01ec43d0 1435 didrange = TRUE;
79072805 1436 continue;
4e553d73 1437 }
02aa26ce
NT
1438
1439 /* range begins (ignore - as first or last char) */
79072805 1440 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1441 if (didrange) {
1fafa243 1442 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1443 }
2b9d42f0 1444 if (has_utf8) {
25716404 1445 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1446 s++;
1447 continue;
1448 }
79072805
LW
1449 dorange = TRUE;
1450 s++;
01ec43d0
GS
1451 }
1452 else {
1453 didrange = FALSE;
1454 }
79072805 1455 }
02aa26ce
NT
1456
1457 /* if we get here, we're not doing a transliteration */
1458
0f5d15d6
IZ
1459 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1460 except for the last char, which will be done separately. */
3280af22 1461 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1462 if (s[2] == '#') {
e994fd66 1463 while (s+1 < send && *s != ')')
db42d148 1464 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1465 }
1466 else if (s[2] == '{' /* This should match regcomp.c */
1467 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1468 {
cc6b7395 1469 I32 count = 1;
0f5d15d6 1470 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1471 char c;
1472
d9f97599
GS
1473 while (count && (c = *regparse)) {
1474 if (c == '\\' && regparse[1])
1475 regparse++;
4e553d73 1476 else if (c == '{')
cc6b7395 1477 count++;
4e553d73 1478 else if (c == '}')
cc6b7395 1479 count--;
d9f97599 1480 regparse++;
cc6b7395 1481 }
e994fd66 1482 if (*regparse != ')')
5bdf89e7 1483 regparse--; /* Leave one char for continuation. */
0f5d15d6 1484 while (s < regparse)
db42d148 1485 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1486 }
748a9306 1487 }
02aa26ce
NT
1488
1489 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1490 else if (*s == '#' && PL_lex_inpat &&
1491 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1492 while (s+1 < send && *s != '\n')
db42d148 1493 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1494 }
02aa26ce 1495
5d1d4326 1496 /* check for embedded arrays
da6eedaa 1497 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1498 */
7e2040f0 1499 else if (*s == '@' && s[1]
5d1d4326 1500 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1501 break;
02aa26ce
NT
1502
1503 /* check for embedded scalars. only stop if we're sure it's a
1504 variable.
1505 */
79072805 1506 else if (*s == '$') {
3280af22 1507 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1508 break;
6002328a 1509 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1510 break; /* in regexp, $ might be tail anchor */
1511 }
02aa26ce 1512
2b9d42f0
NIS
1513 /* End of else if chain - OP_TRANS rejoin rest */
1514
02aa26ce 1515 /* backslashes */
79072805
LW
1516 if (*s == '\\' && s+1 < send) {
1517 s++;
02aa26ce
NT
1518
1519 /* some backslashes we leave behind */
c9f97d15 1520 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1521 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1522 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1523 continue;
1524 }
02aa26ce
NT
1525
1526 /* deprecate \1 in strings and substitution replacements */
3280af22 1527 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1528 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1529 {
599cee73 1530 if (ckWARN(WARN_SYNTAX))
9014280d 1531 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1532 *--s = '$';
1533 break;
1534 }
02aa26ce
NT
1535
1536 /* string-change backslash escapes */
3280af22 1537 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1538 --s;
1539 break;
1540 }
02aa26ce
NT
1541
1542 /* if we get here, it's either a quoted -, or a digit */
79072805 1543 switch (*s) {
02aa26ce
NT
1544
1545 /* quoted - in transliterations */
79072805 1546 case '-':
3280af22 1547 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1548 *d++ = *s++;
1549 continue;
1550 }
1551 /* FALL THROUGH */
1552 default:
11b8faa4 1553 {
707afd92
MS
1554 if (ckWARN(WARN_MISC) &&
1555 isALNUM(*s) &&
1556 *s != '_')
9014280d 1557 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1558 "Unrecognized escape \\%c passed through",
1559 *s);
1560 /* default action is to copy the quoted character */
f9a63242 1561 goto default_action;
11b8faa4 1562 }
02aa26ce
NT
1563
1564 /* \132 indicates an octal constant */
79072805
LW
1565 case '0': case '1': case '2': case '3':
1566 case '4': case '5': case '6': case '7':
ba210ebe 1567 {
53305cf1
NC
1568 I32 flags = 0;
1569 STRLEN len = 3;
1570 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1571 s += len;
1572 }
012bcf8d 1573 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1574
1575 /* \x24 indicates a hex constant */
79072805 1576 case 'x':
a0ed51b3
LW
1577 ++s;
1578 if (*s == '{') {
1579 char* e = strchr(s, '}');
a4c04bdc
NC
1580 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1581 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1582 STRLEN len;
355860ce 1583
53305cf1 1584 ++s;
adaeee49 1585 if (!e) {
a0ed51b3 1586 yyerror("Missing right brace on \\x{}");
355860ce 1587 continue;
ba210ebe 1588 }
53305cf1
NC
1589 len = e - s;
1590 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1591 s = e + 1;
a0ed51b3
LW
1592 }
1593 else {
ba210ebe 1594 {
53305cf1 1595 STRLEN len = 2;
a4c04bdc 1596 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1597 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1598 s += len;
1599 }
012bcf8d
GS
1600 }
1601
1602 NUM_ESCAPE_INSERT:
1603 /* Insert oct or hex escaped character.
301d3d20 1604 * There will always enough room in sv since such
db42d148 1605 * escapes will be longer than any UTF-8 sequence
301d3d20 1606 * they can end up as. */
ba7cea30 1607
c7f1f016
NIS
1608 /* We need to map to chars to ASCII before doing the tests
1609 to cover EBCDIC
1610 */
c4d5f83a 1611 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1612 if (!has_utf8 && uv > 255) {
301d3d20
JH
1613 /* Might need to recode whatever we have
1614 * accumulated so far if it contains any
1615 * hibit chars.
1616 *
1617 * (Can't we keep track of that and avoid
1618 * this rescan? --jhi)
012bcf8d 1619 */
c7f1f016 1620 int hicount = 0;
63cd0674
NIS
1621 U8 *c;
1622 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1623 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1624 hicount++;
db42d148 1625 }
012bcf8d 1626 }
63cd0674 1627 if (hicount) {
db42d148
NIS
1628 STRLEN offset = d - SvPVX(sv);
1629 U8 *src, *dst;
1630 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1631 src = (U8 *)d - 1;
1632 dst = src+hicount;
1633 d += hicount;
1634 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1635 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1636 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1637 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1638 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1639 }
1640 else {
63cd0674 1641 *dst-- = *src;
012bcf8d 1642 }
c7f1f016 1643 src--;
012bcf8d
GS
1644 }
1645 }
1646 }
1647
9aa983d2 1648 if (has_utf8 || uv > 255) {
9041c2e3 1649 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1650 has_utf8 = TRUE;
f9a63242
JH
1651 if (PL_lex_inwhat == OP_TRANS &&
1652 PL_sublex_info.sub_op) {
1653 PL_sublex_info.sub_op->op_private |=
1654 (PL_lex_repl ? OPpTRANS_FROM_UTF
1655 : OPpTRANS_TO_UTF);
f9a63242 1656 }
012bcf8d 1657 }
a0ed51b3 1658 else {
012bcf8d 1659 *d++ = (char)uv;
a0ed51b3 1660 }
012bcf8d
GS
1661 }
1662 else {
c4d5f83a 1663 *d++ = (char) uv;
a0ed51b3 1664 }
79072805 1665 continue;
02aa26ce 1666
b239daa5 1667 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1668 case 'N':
55eda711 1669 ++s;
423cee85
JH
1670 if (*s == '{') {
1671 char* e = strchr(s, '}');
155aba94 1672 SV *res;
423cee85
JH
1673 STRLEN len;
1674 char *str;
4e553d73 1675
423cee85 1676 if (!e) {
5777a3f7 1677 yyerror("Missing right brace on \\N{}");
423cee85
JH
1678 e = s - 1;
1679 goto cont_scan;
1680 }
dbc0d4f2
JH
1681 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1682 /* \N{U+...} */
1683 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1684 PERL_SCAN_DISALLOW_PREFIX;
1685 s += 3;
1686 len = e - s;
1687 uv = grok_hex(s, &len, &flags, NULL);
1688 s = e + 1;
1689 goto NUM_ESCAPE_INSERT;
1690 }
55eda711
JH
1691 res = newSVpvn(s + 1, e - s - 1);
1692 res = new_constant( Nullch, 0, "charnames",
1693 res, Nullsv, "\\N{...}" );
f9a63242
JH
1694 if (has_utf8)
1695 sv_utf8_upgrade(res);
423cee85 1696 str = SvPV(res,len);
1c47067b
JH
1697#ifdef EBCDIC_NEVER_MIND
1698 /* charnames uses pack U and that has been
1699 * recently changed to do the below uni->native
1700 * mapping, so this would be redundant (and wrong,
1701 * the code point would be doubly converted).
1702 * But leave this in just in case the pack U change
1703 * gets revoked, but the semantics is still
1704 * desireable for charnames. --jhi */
cddc7ef4
JH
1705 {
1706 UV uv = utf8_to_uvchr((U8*)str, 0);
1707
1708 if (uv < 0x100) {
1709 U8 tmpbuf[UTF8_MAXLEN+1], *d;
1710
1711 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1712 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1713 str = SvPV(res, len);
1714 }
1715 }
1716#endif
89491803 1717 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1718 char *ostart = SvPVX(sv);
1719 SvCUR_set(sv, d - ostart);
1720 SvPOK_on(sv);
e4f3eed8 1721 *d = '\0';
f08d6ad9 1722 sv_utf8_upgrade(sv);
d2f449dd 1723 /* this just broke our allocation above... */
eb160463 1724 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1725 d = SvPVX(sv) + SvCUR(sv);
89491803 1726 has_utf8 = TRUE;
f08d6ad9 1727 }
eb160463 1728 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1729 char *odest = SvPVX(sv);
1730
8973db79 1731 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1732 d = SvPVX(sv) + (d - odest);
1733 }
1734 Copy(str, d, len, char);
1735 d += len;
1736 SvREFCNT_dec(res);
1737 cont_scan:
1738 s = e + 1;
1739 }
1740 else
5777a3f7 1741 yyerror("Missing braces on \\N{}");
423cee85
JH
1742 continue;
1743
02aa26ce 1744 /* \c is a control character */
79072805
LW
1745 case 'c':
1746 s++;
961ce445 1747 if (s < send) {
ba210ebe 1748 U8 c = *s++;
c7f1f016
NIS
1749#ifdef EBCDIC
1750 if (isLOWER(c))
1751 c = toUPPER(c);
1752#endif
db42d148 1753 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1754 }
961ce445
RGS
1755 else {
1756 yyerror("Missing control char name in \\c");
1757 }
79072805 1758 continue;
02aa26ce
NT
1759
1760 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1761 case 'b':
db42d148 1762 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1763 break;
1764 case 'n':
db42d148 1765 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1766 break;
1767 case 'r':
db42d148 1768 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1769 break;
1770 case 'f':
db42d148 1771 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1772 break;
1773 case 't':
db42d148 1774 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1775 break;
34a3fe2a 1776 case 'e':
db42d148 1777 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1778 break;
1779 case 'a':
db42d148 1780 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1781 break;
02aa26ce
NT
1782 } /* end switch */
1783
79072805
LW
1784 s++;
1785 continue;
02aa26ce
NT
1786 } /* end if (backslash) */
1787
f9a63242 1788 default_action:
2b9d42f0
NIS
1789 /* If we started with encoded form, or already know we want it
1790 and then encode the next character */
1791 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1792 STRLEN len = 1;
1793 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1794 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1795 s += len;
1796 if (need > len) {
1797 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1798 STRLEN off = d - SvPVX(sv);
1799 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1800 }
1801 d = (char*)uvchr_to_utf8((U8*)d, uv);
1802 has_utf8 = TRUE;
1803 }
1804 else {
1805 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1806 }
02aa26ce
NT
1807 } /* while loop to process each character */
1808
1809 /* terminate the string and set up the sv */
79072805 1810 *d = '\0';
463ee0b2 1811 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1812 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1813 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1814
79072805 1815 SvPOK_on(sv);
9f4817db 1816 if (PL_encoding && !has_utf8) {
d0063567
DK
1817 sv_recode_to_utf8(sv, PL_encoding);
1818 if (SvUTF8(sv))
1819 has_utf8 = TRUE;
9f4817db 1820 }
2b9d42f0 1821 if (has_utf8) {
7e2040f0 1822 SvUTF8_on(sv);
2b9d42f0 1823 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1824 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1825 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1826 }
1827 }
79072805 1828
02aa26ce 1829 /* shrink the sv if we allocated more than we used */
79072805
LW
1830 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1831 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1832 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1833 }
02aa26ce 1834
9b599b2a 1835 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1836 if (s > PL_bufptr) {
1837 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1838 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1839 sv, Nullsv,
4e553d73 1840 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1841 ? "tr"
3280af22 1842 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1843 ? "s"
1844 : "qq")));
79072805 1845 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1846 } else
8990e307 1847 SvREFCNT_dec(sv);
79072805
LW
1848 return s;
1849}
1850
ffb4593c
NT
1851/* S_intuit_more
1852 * Returns TRUE if there's more to the expression (e.g., a subscript),
1853 * FALSE otherwise.
ffb4593c
NT
1854 *
1855 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1856 *
1857 * ->[ and ->{ return TRUE
1858 * { and [ outside a pattern are always subscripts, so return TRUE
1859 * if we're outside a pattern and it's not { or [, then return FALSE
1860 * if we're in a pattern and the first char is a {
1861 * {4,5} (any digits around the comma) returns FALSE
1862 * if we're in a pattern and the first char is a [
1863 * [] returns FALSE
1864 * [SOMETHING] has a funky algorithm to decide whether it's a
1865 * character class or not. It has to deal with things like
1866 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1867 * anything else returns TRUE
1868 */
1869
9cbb5ea2
GS
1870/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1871
76e3520e 1872STATIC int
cea2e8a9 1873S_intuit_more(pTHX_ register char *s)
79072805 1874{
3280af22 1875 if (PL_lex_brackets)
79072805
LW
1876 return TRUE;
1877 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1878 return TRUE;
1879 if (*s != '{' && *s != '[')
1880 return FALSE;
3280af22 1881 if (!PL_lex_inpat)
79072805
LW
1882 return TRUE;
1883
1884 /* In a pattern, so maybe we have {n,m}. */
1885 if (*s == '{') {
1886 s++;
1887 if (!isDIGIT(*s))
1888 return TRUE;
1889 while (isDIGIT(*s))
1890 s++;
1891 if (*s == ',')
1892 s++;
1893 while (isDIGIT(*s))
1894 s++;
1895 if (*s == '}')
1896 return FALSE;
1897 return TRUE;
1898
1899 }
1900
1901 /* On the other hand, maybe we have a character class */
1902
1903 s++;
1904 if (*s == ']' || *s == '^')
1905 return FALSE;
1906 else {
ffb4593c 1907 /* this is terrifying, and it works */
79072805
LW
1908 int weight = 2; /* let's weigh the evidence */
1909 char seen[256];
f27ffc4a 1910 unsigned char un_char = 255, last_un_char;
93a17b20 1911 char *send = strchr(s,']');
3280af22 1912 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1913
1914 if (!send) /* has to be an expression */
1915 return TRUE;
1916
1917 Zero(seen,256,char);
1918 if (*s == '$')
1919 weight -= 3;
1920 else if (isDIGIT(*s)) {
1921 if (s[1] != ']') {
1922 if (isDIGIT(s[1]) && s[2] == ']')
1923 weight -= 10;
1924 }
1925 else
1926 weight -= 100;
1927 }
1928 for (; s < send; s++) {
1929 last_un_char = un_char;
1930 un_char = (unsigned char)*s;
1931 switch (*s) {
1932 case '@':
1933 case '&':
1934 case '$':
1935 weight -= seen[un_char] * 10;
7e2040f0 1936 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1937 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1938 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1939 weight -= 100;
1940 else
1941 weight -= 10;
1942 }
1943 else if (*s == '$' && s[1] &&
93a17b20
LW
1944 strchr("[#!%*<>()-=",s[1])) {
1945 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1946 weight -= 10;
1947 else
1948 weight -= 1;
1949 }
1950 break;
1951 case '\\':
1952 un_char = 254;
1953 if (s[1]) {
93a17b20 1954 if (strchr("wds]",s[1]))
79072805
LW
1955 weight += 100;
1956 else if (seen['\''] || seen['"'])
1957 weight += 1;
93a17b20 1958 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1959 weight += 40;
1960 else if (isDIGIT(s[1])) {
1961 weight += 40;
1962 while (s[1] && isDIGIT(s[1]))
1963 s++;
1964 }
1965 }
1966 else
1967 weight += 100;
1968 break;
1969 case '-':
1970 if (s[1] == '\\')
1971 weight += 50;
93a17b20 1972 if (strchr("aA01! ",last_un_char))
79072805 1973 weight += 30;
93a17b20 1974 if (strchr("zZ79~",s[1]))
79072805 1975 weight += 30;
f27ffc4a
GS
1976 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1977 weight -= 5; /* cope with negative subscript */
79072805
LW
1978 break;
1979 default:
3792a11b
NC
1980 if (!isALNUM(last_un_char)
1981 && !(last_un_char == '$' || last_un_char == '@'
1982 || last_un_char == '&')
1983 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
1984 char *d = tmpbuf;
1985 while (isALPHA(*s))
1986 *d++ = *s++;
1987 *d = '\0';
1988 if (keyword(tmpbuf, d - tmpbuf))
1989 weight -= 150;
1990 }
1991 if (un_char == last_un_char + 1)
1992 weight += 5;
1993 weight -= seen[un_char];
1994 break;
1995 }
1996 seen[un_char]++;
1997 }
1998 if (weight >= 0) /* probably a character class */
1999 return FALSE;
2000 }
2001
2002 return TRUE;
2003}
ffed7fef 2004
ffb4593c
NT
2005/*
2006 * S_intuit_method
2007 *
2008 * Does all the checking to disambiguate
2009 * foo bar
2010 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2011 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2012 *
2013 * First argument is the stuff after the first token, e.g. "bar".
2014 *
2015 * Not a method if bar is a filehandle.
2016 * Not a method if foo is a subroutine prototyped to take a filehandle.
2017 * Not a method if it's really "Foo $bar"
2018 * Method if it's "foo $bar"
2019 * Not a method if it's really "print foo $bar"
2020 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2021 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2022 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2023 * =>
2024 */
2025
76e3520e 2026STATIC int
cea2e8a9 2027S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2028{
2029 char *s = start + (*start == '$');
3280af22 2030 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2031 STRLEN len;
2032 GV* indirgv;
2033
2034 if (gv) {
b6c543e3 2035 CV *cv;
a0d0e21e
LW
2036 if (GvIO(gv))
2037 return 0;
b6c543e3
IZ
2038 if ((cv = GvCVu(gv))) {
2039 char *proto = SvPVX(cv);
2040 if (proto) {
2041 if (*proto == ';')
2042 proto++;
2043 if (*proto == '*')
2044 return 0;
2045 }
2046 } else
a0d0e21e
LW
2047 gv = 0;
2048 }
8903cb82 2049 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2050 /* start is the beginning of the possible filehandle/object,
2051 * and s is the end of it
2052 * tmpbuf is a copy of it
2053 */
2054
a0d0e21e 2055 if (*start == '$') {
3280af22 2056 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2057 return 0;
2058 s = skipspace(s);
3280af22
NIS
2059 PL_bufptr = start;
2060 PL_expect = XREF;
a0d0e21e
LW
2061 return *s == '(' ? FUNCMETH : METHOD;
2062 }
2063 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2064 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2065 len -= 2;
2066 tmpbuf[len] = '\0';
2067 goto bare_package;
2068 }
2069 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2070 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2071 return 0;
2072 /* filehandle or package name makes it a method */
89bfa8cd 2073 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2074 s = skipspace(s);
3280af22 2075 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2076 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2077 bare_package:
3280af22 2078 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2079 newSVpvn(tmpbuf,len));
3280af22
NIS
2080 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2081 PL_expect = XTERM;
a0d0e21e 2082 force_next(WORD);
3280af22 2083 PL_bufptr = s;
a0d0e21e
LW
2084 return *s == '(' ? FUNCMETH : METHOD;
2085 }
2086 }
2087 return 0;
2088}
2089
ffb4593c
NT
2090/*
2091 * S_incl_perldb
2092 * Return a string of Perl code to load the debugger. If PERL5DB
2093 * is set, it will return the contents of that, otherwise a
2094 * compile-time require of perl5db.pl.
2095 */
2096
76e3520e 2097STATIC char*
cea2e8a9 2098S_incl_perldb(pTHX)
a0d0e21e 2099{
3280af22 2100 if (PL_perldb) {
76e3520e 2101 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2102
2103 if (pdb)
2104 return pdb;
93189314 2105 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2106 return "BEGIN { require 'perl5db.pl' }";
2107 }
2108 return "";
2109}
2110
2111
16d20bd9 2112/* Encoded script support. filter_add() effectively inserts a
4e553d73 2113 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2114 * Note that the filter function only applies to the current source file
2115 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2116 *
2117 * The datasv parameter (which may be NULL) can be used to pass
2118 * private data to this instance of the filter. The filter function
2119 * can recover the SV using the FILTER_DATA macro and use it to
2120 * store private buffers and state information.
2121 *
2122 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2123 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2124 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2125 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2126 * private use must be set using malloc'd pointers.
2127 */
16d20bd9
AD
2128
2129SV *
864dbfa3 2130Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2131{
f4c556ac
GS
2132 if (!funcp)
2133 return Nullsv;
2134
3280af22
NIS
2135 if (!PL_rsfp_filters)
2136 PL_rsfp_filters = newAV();
16d20bd9 2137 if (!datasv)
8c52afec 2138 datasv = NEWSV(255,0);
16d20bd9 2139 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2140 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2141 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2142 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2143 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2144 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2145 av_unshift(PL_rsfp_filters, 1);
2146 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2147 return(datasv);
2148}
4e553d73 2149
16d20bd9
AD
2150
2151/* Delete most recently added instance of this filter function. */
a0d0e21e 2152void
864dbfa3 2153Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2154{
e0c19803 2155 SV *datasv;
fe5a182c 2156 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2157 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2158 return;
2159 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2160 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2161 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2162 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2163 IoANY(datasv) = (void *)NULL;
3280af22 2164 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2165
16d20bd9
AD
2166 return;
2167 }
2168 /* we need to search for the correct entry and clear it */
cea2e8a9 2169 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2170}
2171
2172
1de9afcd
RGS
2173/* Invoke the idxth filter function for the current rsfp. */
2174/* maxlen 0 = read one text line */
16d20bd9 2175I32
864dbfa3 2176Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2177{
16d20bd9
AD
2178 filter_t funcp;
2179 SV *datasv = NULL;
e50aee73 2180
3280af22 2181 if (!PL_rsfp_filters)
16d20bd9 2182 return -1;
1de9afcd 2183 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2184 /* Provide a default input filter to make life easy. */
2185 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2186 DEBUG_P(PerlIO_printf(Perl_debug_log,
2187 "filter_read %d: from rsfp\n", idx));
4e553d73 2188 if (maxlen) {
16d20bd9
AD
2189 /* Want a block */
2190 int len ;
2191 int old_len = SvCUR(buf_sv) ;
2192
2193 /* ensure buf_sv is large enough */
eb160463 2194 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2195 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2196 if (PerlIO_error(PL_rsfp))
37120919
AD
2197 return -1; /* error */
2198 else
2199 return 0 ; /* end of file */
2200 }
16d20bd9
AD
2201 SvCUR_set(buf_sv, old_len + len) ;
2202 } else {
2203 /* Want a line */
3280af22
NIS
2204 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2205 if (PerlIO_error(PL_rsfp))
37120919
AD
2206 return -1; /* error */
2207 else
2208 return 0 ; /* end of file */
2209 }
16d20bd9
AD
2210 }
2211 return SvCUR(buf_sv);
2212 }
2213 /* Skip this filter slot if filter has been deleted */
1de9afcd 2214 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2215 DEBUG_P(PerlIO_printf(Perl_debug_log,
2216 "filter_read %d: skipped (filter deleted)\n",
2217 idx));
16d20bd9
AD
2218 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2219 }
2220 /* Get function pointer hidden within datasv */
4755096e 2221 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2222 DEBUG_P(PerlIO_printf(Perl_debug_log,
2223 "filter_read %d: via function %p (%s)\n",
fe5a182c 2224 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2225 /* Call function. The function is expected to */
2226 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2227 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2228 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2229}
2230
76e3520e 2231STATIC char *
cea2e8a9 2232S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2233{
c39cd008 2234#ifdef PERL_CR_FILTER
3280af22 2235 if (!PL_rsfp_filters) {
c39cd008 2236 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2237 }
2238#endif
3280af22 2239 if (PL_rsfp_filters) {
55497cff 2240 if (!append)
2241 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2242 if (FILTER_READ(0, sv, 0) > 0)
2243 return ( SvPVX(sv) ) ;
2244 else
2245 return Nullch ;
2246 }
9d116dd7 2247 else
fd049845 2248 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2249}
2250
01ec43d0
GS
2251STATIC HV *
2252S_find_in_my_stash(pTHX_ char *pkgname, I32 len)
def3634b
GS
2253{
2254 GV *gv;
2255
01ec43d0 2256 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2257 return PL_curstash;
2258
2259 if (len > 2 &&
2260 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2261 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2262 {
2263 return GvHV(gv); /* Foo:: */
def3634b
GS
2264 }
2265
2266 /* use constant CLASS => 'MyClass' */
2267 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2268 SV *sv;
2269 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2270 pkgname = SvPV_nolen(sv);
2271 }
2272 }
2273
2274 return gv_stashpv(pkgname, FALSE);
2275}
a0d0e21e 2276
748a9306
LW
2277#ifdef DEBUGGING
2278 static char* exp_name[] =
09bef843 2279 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2280 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2281 };
748a9306 2282#endif
463ee0b2 2283
02aa26ce
NT
2284/*
2285 yylex
2286
2287 Works out what to call the token just pulled out of the input
2288 stream. The yacc parser takes care of taking the ops we return and
2289 stitching them into a tree.
2290
2291 Returns:
2292 PRIVATEREF
2293
2294 Structure:
2295 if read an identifier
2296 if we're in a my declaration
2297 croak if they tried to say my($foo::bar)
2298 build the ops for a my() declaration
2299 if it's an access to a my() variable
2300 are we in a sort block?
2301 croak if my($a); $a <=> $b
2302 build ops for access to a my() variable
2303 if in a dq string, and they've said @foo and we can't find @foo
2304 croak
2305 build ops for a bareword
2306 if we already built the token before, use it.
2307*/
2308
20141f0e 2309
dba4d153
JH
2310#ifdef __SC__
2311#pragma segment Perl_yylex
2312#endif
dba4d153 2313int
dba4d153 2314Perl_yylex(pTHX)
20141f0e 2315{
3afc138a 2316 register char *s = PL_bufptr;
378cc40b 2317 register char *d;
79072805 2318 register I32 tmp;
463ee0b2 2319 STRLEN len;
161b471a
NIS
2320 GV *gv = Nullgv;
2321 GV **gvp = 0;
aa7440fb 2322 bool bof = FALSE;
1d239bbb 2323 I32 orig_keyword = 0;
a687059c 2324
bbf60fe6
DM
2325 DEBUG_T( {
2326 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2327 lex_state_names[PL_lex_state]);
2328 } );
02aa26ce 2329 /* check if there's an identifier for us to look at */
ba979b31 2330 if (PL_pending_ident)
bbf60fe6 2331 return REPORT(S_pending_ident(aTHX));
bbce6d69 2332
02aa26ce
NT
2333 /* no identifier pending identification */
2334
3280af22 2335 switch (PL_lex_state) {
79072805
LW
2336#ifdef COMMENTARY
2337 case LEX_NORMAL: /* Some compilers will produce faster */
2338 case LEX_INTERPNORMAL: /* code if we comment these out. */
2339 break;
2340#endif
2341
09bef843 2342 /* when we've already built the next token, just pull it out of the queue */
79072805 2343 case LEX_KNOWNEXT:
3280af22
NIS
2344 PL_nexttoke--;
2345 yylval = PL_nextval[PL_nexttoke];
2346 if (!PL_nexttoke) {
2347 PL_lex_state = PL_lex_defer;
2348 PL_expect = PL_lex_expect;
2349 PL_lex_defer = LEX_NORMAL;
463ee0b2 2350 }
607df283 2351 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2352 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2353 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2354
bbf60fe6 2355 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2356
02aa26ce 2357 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2358 when we get here, PL_bufptr is at the \
02aa26ce 2359 */
79072805
LW
2360 case LEX_INTERPCASEMOD:
2361#ifdef DEBUGGING
3280af22 2362 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2363 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2364#endif
02aa26ce 2365 /* handle \E or end of string */
3280af22 2366 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2367 char oldmod;
02aa26ce
NT
2368
2369 /* if at a \E */
3280af22
NIS
2370 if (PL_lex_casemods) {
2371 oldmod = PL_lex_casestack[--PL_lex_casemods];
2372 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2373
3792a11b
NC
2374 if (PL_bufptr != PL_bufend
2375 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2376 PL_bufptr += 2;
2377 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2378 }
bbf60fe6 2379 return REPORT(')');
79072805 2380 }
3280af22
NIS
2381 if (PL_bufptr != PL_bufend)
2382 PL_bufptr += 2;
2383 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2384 return yylex();
79072805
LW
2385 }
2386 else {
607df283 2387 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2388 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2389 s = PL_bufptr + 1;
6e909404
JH
2390 if (s[1] == '\\' && s[2] == 'E') {
2391 PL_bufptr = s + 3;
2392 PL_lex_state = LEX_INTERPCONCAT;
2393 return yylex();
a0d0e21e 2394 }
6e909404
JH
2395 else {
2396 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2397 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2398 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2399 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2400 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2401 return REPORT(')');
6e909404
JH
2402 }
2403 if (PL_lex_casemods > 10)
2404 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2405 PL_lex_casestack[PL_lex_casemods++] = *s;
2406 PL_lex_casestack[PL_lex_casemods] = '\0';
2407 PL_lex_state = LEX_INTERPCONCAT;
2408 PL_nextval[PL_nexttoke].ival = 0;
2409 force_next('(');
2410 if (*s == 'l')
2411 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2412 else if (*s == 'u')
2413 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2414 else if (*s == 'L')
2415 PL_nextval[PL_nexttoke].ival = OP_LC;
2416 else if (*s == 'U')
2417 PL_nextval[PL_nexttoke].ival = OP_UC;
2418 else if (*s == 'Q')
2419 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2420 else
2421 Perl_croak(aTHX_ "panic: yylex");
2422 PL_bufptr = s + 1;
a0d0e21e 2423 }
79072805 2424 force_next(FUNC);
3280af22
NIS
2425 if (PL_lex_starts) {
2426 s = PL_bufptr;
2427 PL_lex_starts = 0;
131b3ad0
DM
2428 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2429 if (PL_lex_casemods == 1 && PL_lex_inpat)
2430 OPERATOR(',');
2431 else
2432 Aop(OP_CONCAT);
79072805
LW
2433 }
2434 else
cea2e8a9 2435 return yylex();
79072805
LW
2436 }
2437
55497cff 2438 case LEX_INTERPPUSH:
bbf60fe6 2439 return REPORT(sublex_push());
55497cff 2440
79072805 2441 case LEX_INTERPSTART:
3280af22 2442 if (PL_bufptr == PL_bufend)
bbf60fe6 2443 return REPORT(sublex_done());
607df283 2444 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2445 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2446 PL_expect = XTERM;
2447 PL_lex_dojoin = (*PL_bufptr == '@');
2448 PL_lex_state = LEX_INTERPNORMAL;
2449 if (PL_lex_dojoin) {
2450 PL_nextval[PL_nexttoke].ival = 0;
79072805 2451 force_next(',');
a0d0e21e 2452 force_ident("\"", '$');
3280af22 2453 PL_nextval[PL_nexttoke].ival = 0;
79072805 2454 force_next('$');
3280af22 2455 PL_nextval[PL_nexttoke].ival = 0;
79072805 2456 force_next('(');
3280af22 2457 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2458 force_next(FUNC);
2459 }
3280af22
NIS
2460 if (PL_lex_starts++) {
2461 s = PL_bufptr;
131b3ad0
DM
2462 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2463 if (!PL_lex_casemods && PL_lex_inpat)
2464 OPERATOR(',');
2465 else
2466 Aop(OP_CONCAT);
79072805 2467 }
cea2e8a9 2468 return yylex();
79072805
LW
2469
2470 case LEX_INTERPENDMAYBE:
3280af22
NIS
2471 if (intuit_more(PL_bufptr)) {
2472 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2473 break;
2474 }
2475 /* FALL THROUGH */
2476
2477 case LEX_INTERPEND:
3280af22
NIS
2478 if (PL_lex_dojoin) {
2479 PL_lex_dojoin = FALSE;
2480 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2481 return REPORT(')');
79072805 2482 }
43a16006 2483 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2484 && SvEVALED(PL_lex_repl))
43a16006 2485 {
e9fa98b2 2486 if (PL_bufptr != PL_bufend)
cea2e8a9 2487 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2488 PL_lex_repl = Nullsv;
2489 }
79072805
LW
2490 /* FALLTHROUGH */
2491 case LEX_INTERPCONCAT:
2492#ifdef DEBUGGING
3280af22 2493 if (PL_lex_brackets)
cea2e8a9 2494 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2495#endif
3280af22 2496 if (PL_bufptr == PL_bufend)
bbf60fe6 2497 return REPORT(sublex_done());
79072805 2498
3280af22
NIS
2499 if (SvIVX(PL_linestr) == '\'') {
2500 SV *sv = newSVsv(PL_linestr);
2501 if (!PL_lex_inpat)
76e3520e 2502 sv = tokeq(sv);
3280af22 2503 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2504 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2505 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2506 s = PL_bufend;
79072805
LW
2507 }
2508 else {
3280af22 2509 s = scan_const(PL_bufptr);
79072805 2510 if (*s == '\\')
3280af22 2511 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2512 else
3280af22 2513 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2514 }
2515
3280af22
NIS
2516 if (s != PL_bufptr) {
2517 PL_nextval[PL_nexttoke] = yylval;
2518 PL_expect = XTERM;
79072805 2519 force_next(THING);
131b3ad0
DM
2520 if (PL_lex_starts++) {
2521 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2522 if (!PL_lex_casemods && PL_lex_inpat)
2523 OPERATOR(',');
2524 else
2525 Aop(OP_CONCAT);
2526 }
79072805 2527 else {
3280af22 2528 PL_bufptr = s;
cea2e8a9 2529 return yylex();
79072805
LW
2530 }
2531 }
2532
cea2e8a9 2533 return yylex();
a0d0e21e 2534 case LEX_FORMLINE:
3280af22
NIS
2535 PL_lex_state = LEX_NORMAL;
2536 s = scan_formline(PL_bufptr);
2537 if (!PL_lex_formbrack)
a0d0e21e
LW
2538 goto rightbracket;
2539 OPERATOR(';');
79072805
LW
2540 }
2541
3280af22
NIS
2542 s = PL_bufptr;
2543 PL_oldoldbufptr = PL_oldbufptr;
2544 PL_oldbufptr = s;
607df283 2545 DEBUG_T( {
bbf60fe6 2546 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
bf49b057 2547 exp_name[PL_expect], s);
5f80b19c 2548 } );
463ee0b2
LW
2549
2550 retry:
378cc40b
LW
2551 switch (*s) {
2552 default:
7e2040f0 2553 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2554 goto keylookup;
cea2e8a9 2555 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2556 case 4:
2557 case 26:
2558 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2559 case 0:
3280af22
NIS
2560 if (!PL_rsfp) {
2561 PL_last_uni = 0;
2562 PL_last_lop = 0;
c5ee2135
WL
2563 if (PL_lex_brackets) {
2564 if (PL_lex_formbrack)
2565 yyerror("Format not terminated");
2566 else
2567 yyerror("Missing right curly or square bracket");
2568 }
4e553d73 2569 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2570 "### Tokener got EOF\n");
5f80b19c 2571 } );
79072805 2572 TOKEN(0);
463ee0b2 2573 }
3280af22 2574 if (s++ < PL_bufend)
a687059c 2575 goto retry; /* ignore stray nulls */
3280af22
NIS
2576 PL_last_uni = 0;
2577 PL_last_lop = 0;
2578 if (!PL_in_eval && !PL_preambled) {
2579 PL_preambled = TRUE;
2580 sv_setpv(PL_linestr,incl_perldb());
2581 if (SvCUR(PL_linestr))
4147a61b 2582 sv_catpvn(PL_linestr,";", 1);
3280af22
NIS
2583 if (PL_preambleav){
2584 while(AvFILLp(PL_preambleav) >= 0) {
2585 SV *tmpsv = av_shift(PL_preambleav);
2586 sv_catsv(PL_linestr, tmpsv);
4147a61b 2587 sv_catpvn(PL_linestr, ";", 1);
91b7def8 2588 sv_free(tmpsv);
2589 }
3280af22
NIS
2590 sv_free((SV*)PL_preambleav);
2591 PL_preambleav = NULL;
91b7def8 2592 }
3280af22
NIS
2593 if (PL_minus_n || PL_minus_p) {
2594 sv_catpv(PL_linestr, "LINE: while (<>) {");
2595 if (PL_minus_l)
2596 sv_catpv(PL_linestr,"chomp;");
2597 if (PL_minus_a) {
3280af22 2598 if (PL_minus_F) {
3792a11b
NC
2599 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2600 || *PL_splitstr == '"')
3280af22 2601 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2602 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2603 else {
c8ef6a4b
NC
2604 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2605 bytes can be used as quoting characters. :-) */
48c4c863
NC
2606 /* The count here deliberately includes the NUL
2607 that terminates the C string constant. This
2608 embeds the opening NUL into the string. */
9a3fb652 2609 sv_catpvn(PL_linestr, "our @F=split(q", 15);
48c4c863
NC
2610 s = PL_splitstr;
2611 do {
2612 /* Need to \ \s */
2613 if (*s == '\\')
2614 sv_catpvn(PL_linestr, s, 1);
2615 sv_catpvn(PL_linestr, s, 1);
2616 } while (*s++);
2617 /* This loop will embed the trailing NUL of
2618 PL_linestr as the last thing it does before
2619 terminating. */
4fccd7c6 2620 sv_catpvn(PL_linestr, ");", 2);
54310121 2621 }
2304df62
AD
2622 }
2623 else
75c72d73 2624 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2625 }
79072805 2626 }
4147a61b 2627 sv_catpvn(PL_linestr, "\n", 1);
3280af22
NIS
2628 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2629 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2630 PL_last_lop = PL_last_uni = Nullch;
3280af22 2631 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2632 SV *sv = NEWSV(85,0);
2633
2634 sv_upgrade(sv, SVt_PVMG);
3280af22 2635 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2636 (void)SvIOK_on(sv);
2637 SvIVX(sv) = 0;
57843af0 2638 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2639 }
79072805 2640 goto retry;
a687059c 2641 }
e929a76b 2642 do {
aa7440fb 2643 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2644 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2645 fake_eof:
2646 if (PL_rsfp) {
2647 if (PL_preprocess && !PL_in_eval)
2648 (void)PerlProc_pclose(PL_rsfp);
2649 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2650 PerlIO_clearerr(PL_rsfp);
2651 else
2652 (void)PerlIO_close(PL_rsfp);
2653 PL_rsfp = Nullfp;
2654 PL_doextract = FALSE;
2655 }
2656 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2657 sv_setpv(PL_linestr,PL_minus_p
2658 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2659 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2660 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2661 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2662 PL_minus_n = PL_minus_p = 0;
2663 goto retry;
2664 }
2665 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2666 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2667 sv_setpv(PL_linestr,"");
2668 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2669 }
7aa207d6
JH
2670 /* If it looks like the start of a BOM or raw UTF-16,
2671 * check if it in fact is. */
2672 else if (bof &&
2673 (*s == 0 ||
2674 *(U8*)s == 0xEF ||
2675 *(U8*)s >= 0xFE ||
2676 s[1] == 0)) {
226017aa 2677#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2678# ifdef __GNU_LIBRARY__
2679# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2680# define FTELL_FOR_PIPE_IS_BROKEN
2681# endif
e3f494f1
JH
2682# else
2683# ifdef __GLIBC__
2684# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2685# define FTELL_FOR_PIPE_IS_BROKEN
2686# endif
2687# endif
226017aa
DD
2688# endif
2689#endif
2690#ifdef FTELL_FOR_PIPE_IS_BROKEN
2691 /* This loses the possibility to detect the bof
2692 * situation on perl -P when the libc5 is being used.
2693 * Workaround? Maybe attach some extra state to PL_rsfp?
2694 */
2695 if (!PL_preprocess)
7e28d3af 2696 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2697#else
eb160463 2698 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2699#endif
7e28d3af 2700 if (bof) {
3280af22 2701 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2702 s = swallow_bom((U8*)s);
e929a76b 2703 }
378cc40b 2704 }
3280af22 2705 if (PL_doextract) {
a0d0e21e
LW
2706 /* Incest with pod. */
2707 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2708 sv_setpv(PL_linestr, "");
2709 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2710 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2711 PL_last_lop = PL_last_uni = Nullch;
3280af22 2712 PL_doextract = FALSE;
a0d0e21e 2713 }
4e553d73 2714 }
463ee0b2 2715 incline(s);
3280af22
NIS
2716 } while (PL_doextract);
2717 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2718 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2719 SV *sv = NEWSV(85,0);
a687059c 2720
93a17b20 2721 sv_upgrade(sv, SVt_PVMG);
3280af22 2722 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2723 (void)SvIOK_on(sv);
2724 SvIVX(sv) = 0;
57843af0 2725 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2726 }
3280af22 2727 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2728 PL_last_lop = PL_last_uni = Nullch;
57843af0 2729 if (CopLINE(PL_curcop) == 1) {
3280af22 2730 while (s < PL_bufend && isSPACE(*s))
79072805 2731 s++;
a0d0e21e 2732 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2733 s++;
44a8e56a 2734 d = Nullch;
3280af22 2735 if (!PL_in_eval) {
44a8e56a 2736 if (*s == '#' && *(s+1) == '!')
2737 d = s + 2;
2738#ifdef ALTERNATE_SHEBANG
2739 else {
2740 static char as[] = ALTERNATE_SHEBANG;
2741 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2742 d = s + (sizeof(as) - 1);
2743 }
2744#endif /* ALTERNATE_SHEBANG */
2745 }
2746 if (d) {
b8378b72 2747 char *ipath;
774d564b 2748 char *ipathend;
b8378b72 2749
774d564b 2750 while (isSPACE(*d))
b8378b72
CS
2751 d++;
2752 ipath = d;
774d564b 2753 while (*d && !isSPACE(*d))
2754 d++;
2755 ipathend = d;
2756
2757#ifdef ARG_ZERO_IS_SCRIPT
2758 if (ipathend > ipath) {
2759 /*
2760 * HP-UX (at least) sets argv[0] to the script name,
2761 * which makes $^X incorrect. And Digital UNIX and Linux,
2762 * at least, set argv[0] to the basename of the Perl
2763 * interpreter. So, having found "#!", we'll set it right.
2764 */
ee2f7564 2765 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2766 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2767 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2768 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2769 SvSETMAGIC(x);
2770 }
556c1dec
JH
2771 else {
2772 STRLEN blen;
2773 STRLEN llen;
2774 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2775 char *lstart = SvPV(x,llen);
2776 if (llen < blen) {
2777 bstart += blen - llen;
2778 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2779 sv_setpvn(x, ipath, ipathend - ipath);
2780 SvSETMAGIC(x);
2781 }
2782 }
2783 }
774d564b 2784 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2785 }
774d564b 2786#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2787
2788 /*
2789 * Look for options.
2790 */
748a9306 2791 d = instr(s,"perl -");
84e30d1a 2792 if (!d) {
748a9306 2793 d = instr(s,"perl");
84e30d1a
GS
2794#if defined(DOSISH)
2795 /* avoid getting into infinite loops when shebang
2796 * line contains "Perl" rather than "perl" */
2797 if (!d) {
2798 for (d = ipathend-4; d >= ipath; --d) {
2799 if ((*d == 'p' || *d == 'P')
2800 && !ibcmp(d, "perl", 4))
2801 {
2802 break;
2803 }
2804 }
2805 if (d < ipath)
2806 d = Nullch;
2807 }
2808#endif
2809 }
44a8e56a 2810#ifdef ALTERNATE_SHEBANG
2811 /*
2812 * If the ALTERNATE_SHEBANG on this system starts with a
2813 * character that can be part of a Perl expression, then if
2814 * we see it but not "perl", we're probably looking at the
2815 * start of Perl code, not a request to hand off to some
2816 * other interpreter. Similarly, if "perl" is there, but
2817 * not in the first 'word' of the line, we assume the line
2818 * contains the start of the Perl program.
44a8e56a 2819 */
2820 if (d && *s != '#') {
774d564b 2821 char *c = ipath;
44a8e56a 2822 while (*c && !strchr("; \t\r\n\f\v#", *c))
2823 c++;
2824 if (c < d)
2825 d = Nullch; /* "perl" not in first word; ignore */
2826 else
2827 *s = '#'; /* Don't try to parse shebang line */
2828 }
774d564b 2829#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2830#ifndef MACOS_TRADITIONAL
748a9306 2831 if (!d &&
44a8e56a 2832 *s == '#' &&
774d564b 2833 ipathend > ipath &&
3280af22 2834 !PL_minus_c &&
748a9306 2835 !instr(s,"indir") &&
3280af22 2836 instr(PL_origargv[0],"perl"))
748a9306 2837 {
9f68db38 2838 char **newargv;
9f68db38 2839
774d564b 2840 *ipathend = '\0';
2841 s = ipathend + 1;
3280af22 2842 while (s < PL_bufend && isSPACE(*s))
9f68db38 2843 s++;
3280af22
NIS
2844 if (s < PL_bufend) {
2845 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2846 newargv[1] = s;
3280af22 2847 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2848 s++;
2849 *s = '\0';
3280af22 2850 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2851 }
2852 else
3280af22 2853 newargv = PL_origargv;
774d564b 2854 newargv[0] = ipath;
b35112e7 2855 PERL_FPU_PRE_EXEC
b4748376 2856 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2857 PERL_FPU_POST_EXEC
cea2e8a9 2858 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2859 }
bf4acbe4 2860#endif
748a9306 2861 if (d) {
3280af22
NIS
2862 U32 oldpdb = PL_perldb;
2863 bool oldn = PL_minus_n;
2864 bool oldp = PL_minus_p;
748a9306
LW
2865
2866 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2867 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2868
2869 if (*d++ == '-') {
a11ec5a9 2870 bool switches_done = PL_doswitches;
8cc95fdb 2871 do {
2872 if (*d == 'M' || *d == 'm') {
2873 char *m = d;
2874 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2875 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2876 (int)(d - m), m);
2877 }
2878 d = moreswitches(d);
2879 } while (d);
f0b2cf55
YST
2880 if (PL_doswitches && !switches_done) {
2881 int argc = PL_origargc;
2882 char **argv = PL_origargv;
2883 do {
2884 argc--,argv++;
2885 } while (argc && argv[0][0] == '-' && argv[0][1]);
2886 init_argv_symbols(argc,argv);
2887 }
155aba94
GS
2888 if ((PERLDB_LINE && !oldpdb) ||
2889 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2890 /* if we have already added "LINE: while (<>) {",
2891 we must not do it again */
748a9306 2892 {
3280af22
NIS
2893 sv_setpv(PL_linestr, "");
2894 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2895 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2896 PL_last_lop = PL_last_uni = Nullch;
3280af22 2897 PL_preambled = FALSE;
84902520 2898 if (PERLDB_LINE)
3280af22 2899 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2900 goto retry;
2901 }
a11ec5a9
RGS
2902 if (PL_doswitches && !switches_done) {
2903 int argc = PL_origargc;
2904 char **argv = PL_origargv;
2905 do {
2906 argc--,argv++;
2907 } while (argc && argv[0][0] == '-' && argv[0][1]);
2908 init_argv_symbols(argc,argv);
2909 }
a0d0e21e 2910 }
79072805 2911 }
9f68db38 2912 }
79072805 2913 }
3280af22
NIS
2914 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2915 PL_bufptr = s;
2916 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2917 return yylex();
ae986130 2918 }
378cc40b 2919 goto retry;
4fdae800 2920 case '\r':
6a27c188 2921#ifdef PERL_STRICT_CR
cea2e8a9 2922 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2923 Perl_croak(aTHX_
cc507455 2924 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2925#endif
4fdae800 2926 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2927#ifdef MACOS_TRADITIONAL
2928 case '\312':
2929#endif
378cc40b
LW
2930 s++;
2931 goto retry;
378cc40b 2932 case '#':
e929a76b 2933 case '\n':
3280af22 2934 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2935 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2936 /* handle eval qq[#line 1 "foo"\n ...] */
2937 CopLINE_dec(PL_curcop);
2938 incline(s);
2939 }
3280af22 2940 d = PL_bufend;
a687059c 2941 while (s < d && *s != '\n')
378cc40b 2942 s++;
0f85fab0 2943 if (s < d)
378cc40b 2944 s++;
78c267c1 2945 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2946 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2947 incline(s);
3280af22
NIS
2948 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2949 PL_bufptr = s;
2950 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2951 return yylex();
a687059c 2952 }
378cc40b 2953 }
a687059c 2954 else {
378cc40b 2955 *s = '\0';
3280af22 2956 PL_bufend = s;
a687059c 2957 }
378cc40b
LW
2958 goto retry;
2959 case '-':
79072805 2960 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2961 I32 ftst = 0;
2962
378cc40b 2963 s++;
3280af22 2964 PL_bufptr = s;
748a9306
LW
2965 tmp = *s++;
2966
bf4acbe4 2967 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2968 s++;
2969
2970 if (strnEQ(s,"=>",2)) {
3280af22 2971 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2972 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2973 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2974 } );
748a9306
LW
2975 OPERATOR('-'); /* unary minus */
2976 }
3280af22 2977 PL_last_uni = PL_oldbufptr;
748a9306 2978 switch (tmp) {
e5edeb50
JH
2979 case 'r': ftst = OP_FTEREAD; break;
2980 case 'w': ftst = OP_FTEWRITE; break;
2981 case 'x': ftst = OP_FTEEXEC; break;
2982 case 'o': ftst = OP_FTEOWNED; break;
2983 case 'R': ftst = OP_FTRREAD; break;
2984 case 'W': ftst = OP_FTRWRITE; break;
2985 case 'X': ftst = OP_FTREXEC; break;
2986 case 'O': ftst = OP_FTROWNED; break;
2987 case 'e': ftst = OP_FTIS; break;
2988 case 'z': ftst = OP_FTZERO; break;
2989 case 's': ftst = OP_FTSIZE; break;
2990 case 'f': ftst = OP_FTFILE; break;
2991 case 'd': ftst = OP_FTDIR; break;
2992 case 'l': ftst = OP_FTLINK; break;
2993 case 'p': ftst = OP_FTPIPE; break;
2994 case 'S': ftst = OP_FTSOCK; break;
2995 case 'u': ftst = OP_FTSUID; break;
2996 case 'g': ftst = OP_FTSGID; break;
2997 case 'k': ftst = OP_FTSVTX; break;
2998 case 'b': ftst = OP_FTBLK; break;
2999 case 'c': ftst = OP_FTCHR; break;
3000 case 't': ftst = OP_FTTTY; break;
3001 case 'T': ftst = OP_FTTEXT; break;
3002 case 'B': ftst = OP_FTBINARY; break;
3003 case 'M': case 'A': case 'C':
3004 gv_fetchpv("\024",TRUE, SVt_PV);
3005 switch (tmp) {
3006 case 'M': ftst = OP_FTMTIME; break;
3007 case 'A': ftst = OP_FTATIME; break;
3008 case 'C': ftst = OP_FTCTIME; break;
3009 default: break;
3010 }
3011 break;
378cc40b 3012 default:
378cc40b
LW
3013 break;
3014 }
e5edeb50 3015 if (ftst) {
eb160463 3016 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3017 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 3018 "### Saw file test %c\n", (int)ftst);
5f80b19c 3019 } );
e5edeb50
JH
3020 FTST(ftst);
3021 }
3022 else {
3023 /* Assume it was a minus followed by a one-letter named
3024 * subroutine call (or a -bareword), then. */
95c31fe3 3025 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3026 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3027 (int) tmp);
5f80b19c 3028 } );
3cf7b4c4 3029 s = --PL_bufptr;
e5edeb50 3030 }
378cc40b 3031 }
a687059c
LW
3032 tmp = *s++;
3033 if (*s == tmp) {
3034 s++;
3280af22 3035 if (PL_expect == XOPERATOR)
79072805
LW
3036 TERM(POSTDEC);
3037 else
3038 OPERATOR(PREDEC);
3039 }
3040 else if (*s == '>') {
3041 s++;
3042 s = skipspace(s);
7e2040f0 3043 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3044 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3045 TOKEN(ARROW);
79072805 3046 }
748a9306
LW
3047 else if (*s == '$')
3048 OPERATOR(ARROW);
463ee0b2 3049 else
748a9306 3050 TERM(ARROW);
a687059c 3051 }
3280af22 3052 if (PL_expect == XOPERATOR)
79072805
LW
3053 Aop(OP_SUBTRACT);
3054 else {
3280af22 3055 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3056 check_uni();
79072805 3057 OPERATOR('-'); /* unary minus */
2f3197b3 3058 }
79072805 3059
378cc40b 3060 case '+':
a687059c
LW
3061 tmp = *s++;
3062 if (*s == tmp) {
378cc40b 3063 s++;
3280af22 3064 if (PL_expect == XOPERATOR)
79072805
LW
3065 TERM(POSTINC);
3066 else
3067 OPERATOR(PREINC);
378cc40b 3068 }
3280af22 3069 if (PL_expect == XOPERATOR)
79072805
LW
3070 Aop(OP_ADD);
3071 else {
3280af22 3072 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3073 check_uni();
a687059c 3074 OPERATOR('+');
2f3197b3 3075 }
a687059c 3076
378cc40b 3077 case '*':
3280af22
NIS
3078 if (PL_expect != XOPERATOR) {
3079 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3080 PL_expect = XOPERATOR;
3081 force_ident(PL_tokenbuf, '*');
3082 if (!*PL_tokenbuf)
a0d0e21e 3083 PREREF('*');
79072805 3084 TERM('*');
a687059c 3085 }
79072805
LW
3086 s++;
3087 if (*s == '*') {
a687059c 3088 s++;
79072805 3089 PWop(OP_POW);
a687059c 3090 }
79072805
LW
3091 Mop(OP_MULTIPLY);
3092
378cc40b 3093 case '%':
3280af22 3094 if (PL_expect == XOPERATOR) {
bbce6d69 3095 ++s;
3096 Mop(OP_MODULO);
a687059c 3097 }
3280af22
NIS
3098 PL_tokenbuf[0] = '%';
3099 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3100 if (!PL_tokenbuf[1]) {
bbce6d69 3101 PREREF('%');
a687059c 3102 }
3280af22 3103 PL_pending_ident = '%';
bbce6d69 3104 TERM('%');
a687059c 3105
378cc40b 3106 case '^':
79072805 3107 s++;
a0d0e21e 3108 BOop(OP_BIT_XOR);
79072805 3109 case '[':
3280af22 3110 PL_lex_brackets++;
79072805 3111 /* FALL THROUGH */
378cc40b 3112 case '~':
378cc40b 3113 case ',':
378cc40b
LW
3114 tmp = *s++;
3115 OPERATOR(tmp);
a0d0e21e
LW
3116 case ':':
3117 if (s[1] == ':') {
3118 len = 0;
3119 goto just_a_word;
3120 }
3121 s++;
09bef843
SB
3122 switch (PL_expect) {
3123 OP *attrs;
3124 case XOPERATOR:
3125 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3126 break;
3127 PL_bufptr = s; /* update in case we back off */
3128 goto grabattrs;
3129 case XATTRBLOCK:
3130 PL_expect = XBLOCK;
3131 goto grabattrs;
3132 case XATTRTERM:
3133 PL_expect = XTERMBLOCK;
3134 grabattrs:
3135 s = skipspace(s);
3136 attrs = Nullop;
7e2040f0 3137 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3138 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3139 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3140 if (tmp < 0) tmp = -tmp;
3141 switch (tmp) {
3142 case KEY_or:
3143 case KEY_and:
c963b151 3144 case KEY_err:
f9829d6b
GS
3145 case KEY_for:
3146 case KEY_unless:
3147 case KEY_if:
3148 case KEY_while:
3149 case KEY_until:
3150 goto got_attrs;
3151 default:
3152 break;
3153 }
3154 }
09bef843
SB
3155 if (*d == '(') {
3156 d = scan_str(d,TRUE,TRUE);
3157 if (!d) {
09bef843
SB
3158 /* MUST advance bufptr here to avoid bogus
3159 "at end of line" context messages from yyerror().
3160 */
3161 PL_bufptr = s + len;
3162 yyerror("Unterminated attribute parameter in attribute list");
3163 if (attrs)
3164 op_free(attrs);
bbf60fe6 3165 return REPORT(0); /* EOF indicator */
09bef843
SB
3166 }
3167 }
3168 if (PL_lex_stuff) {
3169 SV *sv = newSVpvn(s, len);
3170 sv_catsv(sv, PL_lex_stuff);
3171 attrs = append_elem(OP_LIST, attrs,
3172 newSVOP(OP_CONST, 0, sv));
3173 SvREFCNT_dec(PL_lex_stuff);
3174 PL_lex_stuff = Nullsv;
3175 }
3176 else {
371fce9b
DM
3177 if (len == 6 && strnEQ(s, "unique", len)) {
3178 if (PL_in_my == KEY_our)
3179#ifdef USE_ITHREADS
3180 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3181#else
3182 ; /* skip to avoid loading attributes.pm */
3183#endif
3184 else
3185 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3186 }
3187
d3cea301
SB
3188 /* NOTE: any CV attrs applied here need to be part of
3189 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3190 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3191 CvLVALUE_on(PL_compcv);
3192 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3193 CvLOCKED_on(PL_compcv);
3194 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3195 CvMETHOD_on(PL_compcv);
06492da6
SF
3196 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3197 CvASSERTION_on(PL_compcv);
78f9721b
SM
3198 /* After we've set the flags, it could be argued that
3199 we don't need to do the attributes.pm-based setting
3200 process, and shouldn't bother appending recognized
d3cea301
SB
3201 flags. To experiment with that, uncomment the
3202 following "else". (Note that's already been
3203 uncommented. That keeps the above-applied built-in
3204 attributes from being intercepted (and possibly
3205 rejected) by a package's attribute routines, but is
3206 justified by the performance win for the common case
3207 of applying only built-in attributes.) */
0256094b 3208 else
78f9721b
SM
3209 attrs = append_elem(OP_LIST, attrs,
3210 newSVOP(OP_CONST, 0,
3211 newSVpvn(s, len)));
09bef843
SB
3212 }
3213 s = skipspace(d);
0120eecf 3214 if (*s == ':' && s[1] != ':')
09bef843 3215 s = skipspace(s+1);
0120eecf
GS
3216 else if (s == d)
3217 break; /* require real whitespace or :'s */
09bef843 3218 }
f9829d6b 3219 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3220 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3221 char q = ((*s == '\'') ? '"' : '\'');
3222 /* If here for an expression, and parsed no attrs, back off. */
3223 if (tmp == '=' && !attrs) {
3224 s = PL_bufptr;
3225 break;
3226 }
3227 /* MUST advance bufptr here to avoid bogus "at end of line"
3228 context messages from yyerror().
3229 */
3230 PL_bufptr = s;
3231 if (!*s)
3232 yyerror("Unterminated attribute list");
3233 else
3234 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3235 q, *s, q));
3236 if (attrs)
3237 op_free(attrs);
3238 OPERATOR(':');
3239 }
f9829d6b 3240 got_attrs:
09bef843
SB
3241 if (attrs) {
3242 PL_nextval[PL_nexttoke].opval = attrs;
3243 force_next(THING);
3244 }
3245 TOKEN(COLONATTR);
3246 }
a0d0e21e 3247 OPERATOR(':');
8990e307
LW
3248 case '(':
3249 s++;
3280af22
NIS
3250 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3251 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3252 else
3280af22 3253 PL_expect = XTERM;
4a202259 3254 s = skipspace(s);
a0d0e21e 3255 TOKEN('(');
378cc40b 3256 case ';':
f4dd75d9 3257 CLINE;
378cc40b
LW
3258 tmp = *s++;
3259 OPERATOR(tmp);
3260 case ')':
378cc40b 3261 tmp = *s++;
16d20bd9
AD
3262 s = skipspace(s);
3263 if (*s == '{')
3264 PREBLOCK(tmp);
378cc40b 3265 TERM(tmp);
79072805
LW
3266 case ']':
3267 s++;
3280af22 3268 if (PL_lex_brackets <= 0)
d98d5fff 3269 yyerror("Unmatched right square bracket");
463ee0b2 3270 else
3280af22
NIS
3271 --PL_lex_brackets;
3272 if (PL_lex_state == LEX_INTERPNORMAL) {
3273 if (PL_lex_brackets == 0) {
a0d0e21e 3274 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3275 PL_lex_state = LEX_INTERPEND;
79072805
LW
3276 }
3277 }
4633a7c4 3278 TERM(']');
79072805
LW
3279 case '{':
3280 leftbracket:
79072805 3281 s++;
3280af22 3282 if (PL_lex_brackets > 100) {
8edd5f42 3283 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3284 }
3280af22 3285 switch (PL_expect) {
a0d0e21e 3286 case XTERM:
3280af22 3287 if (PL_lex_formbrack) {
a0d0e21e
LW
3288 s--;
3289 PRETERMBLOCK(DO);
3290 }
3280af22
NIS
3291 if (PL_oldoldbufptr == PL_last_lop)
3292 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3293 else
3280af22 3294 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3295 OPERATOR(HASHBRACK);
a0d0e21e 3296 case XOPERATOR:
bf4acbe4 3297 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3298 s++;
44a8e56a 3299 d = s;
3280af22
NIS
3300 PL_tokenbuf[0] = '\0';
3301 if (d < PL_bufend && *d == '-') {
3302 PL_tokenbuf[0] = '-';
44a8e56a 3303 d++;
bf4acbe4 3304 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3305 d++;
3306 }
7e2040f0 3307 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3308 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3309 FALSE, &len);
bf4acbe4 3310 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3311 d++;
3312 if (*d == '}') {
3280af22 3313 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3314 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3315 if (minus)
3316 force_next('-');
748a9306
LW
3317 }
3318 }
3319 /* FALL THROUGH */
09bef843 3320 case XATTRBLOCK:
748a9306 3321 case XBLOCK:
3280af22
NIS
3322 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3323 PL_expect = XSTATE;
a0d0e21e 3324 break;
09bef843 3325 case XATTRTERM:
a0d0e21e 3326 case XTERMBLOCK:
3280af22
NIS
3327 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3328 PL_expect = XSTATE;
a0d0e21e
LW
3329 break;
3330 default: {
3331 char *t;
3280af22
NIS
3332 if (PL_oldoldbufptr == PL_last_lop)
3333 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3334 else
3280af22 3335 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3336 s = skipspace(s);
8452ff4b
SB
3337 if (*s == '}') {
3338 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3339 PL_expect = XTERM;
3340 /* This hack is to get the ${} in the message. */
3341 PL_bufptr = s+1;
3342 yyerror("syntax error");
3343 break;
3344 }
a0d0e21e 3345 OPERATOR(HASHBRACK);
8452ff4b 3346 }
b8a4b1be
GS
3347 /* This hack serves to disambiguate a pair of curlies
3348 * as being a block or an anon hash. Normally, expectation
3349 * determines that, but in cases where we're not in a
3350 * position to expect anything in particular (like inside
3351 * eval"") we have to resolve the ambiguity. This code
3352 * covers the case where the first term in the curlies is a
3353 * quoted string. Most other cases need to be explicitly
3354 * disambiguated by prepending a `+' before the opening
3355 * curly in order to force resolution as an anon hash.
3356 *
3357 * XXX should probably propagate the outer expectation
3358 * into eval"" to rely less on this hack, but that could
3359 * potentially break current behavior of eval"".
3360 * GSAR 97-07-21
3361 */
3362 t = s;
3363 if (*s == '\'' || *s == '"' || *s == '`') {
3364 /* common case: get past first string, handling escapes */
3280af22 3365 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3366 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3367 t++;
3368 t++;
a0d0e21e 3369 }
b8a4b1be 3370 else if (*s == 'q') {
3280af22 3371 if (++t < PL_bufend
b8a4b1be 3372 && (!isALNUM(*t)
3280af22 3373 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3374 && !isALNUM(*t))))
3375 {
abc667d1 3376 /* skip q//-like construct */
b8a4b1be
GS
3377 char *tmps;
3378 char open, close, term;
3379 I32 brackets = 1;
3380
3280af22 3381 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3382 t++;
abc667d1
DM
3383 /* check for q => */
3384 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3385 OPERATOR(HASHBRACK);
3386 }
b8a4b1be
GS
3387 term = *t;
3388 open = term;
3389 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3390 term = tmps[5];
3391 close = term;
3392 if (open == close)
3280af22
NIS
3393 for (t++; t < PL_bufend; t++) {
3394 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3395 t++;
6d07e5e9 3396 else if (*t == open)
b8a4b1be
GS
3397 break;
3398 }
abc667d1 3399 else {
3280af22
NIS
3400 for (t++; t < PL_bufend; t++) {
3401 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3402 t++;
6d07e5e9 3403 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3404 break;
3405 else if (*t == open)
3406 brackets++;
3407 }
abc667d1
DM
3408 }
3409 t++;
b8a4b1be 3410 }
abc667d1
DM
3411 else
3412 /* skip plain q word */
3413 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3414 t += UTF8SKIP(t);
a0d0e21e 3415 }
7e2040f0 3416 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3417 t += UTF8SKIP(t);
7e2040f0 3418 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3419 t += UTF8SKIP(t);
a0d0e21e 3420 }
3280af22 3421 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3422 t++;
b8a4b1be
GS
3423 /* if comma follows first term, call it an anon hash */
3424 /* XXX it could be a comma expression with loop modifiers */
3280af22 3425 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3426 || (*t == '=' && t[1] == '>')))
a0d0e21e 3427 OPERATOR(HASHBRACK);
3280af22 3428 if (PL_expect == XREF)
4e4e412b 3429 PL_expect = XTERM;
a0d0e21e 3430 else {
3280af22
NIS
3431 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3432 PL_expect = XSTATE;
a0d0e21e 3433 }
8990e307 3434 }
a0d0e21e 3435 break;
463ee0b2 3436 }
57843af0 3437 yylval.ival = CopLINE(PL_curcop);
79072805 3438 if (isSPACE(*s) || *s == '#')
3280af22 3439 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3440 TOKEN('{');
378cc40b 3441 case '}':
79072805
LW
3442 rightbracket:
3443 s++;
3280af22 3444 if (PL_lex_brackets <= 0)
d98d5fff 3445 yyerror("Unmatched right curly bracket");
463ee0b2 3446 else
3280af22 3447 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3448 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3449 PL_lex_formbrack = 0;
3450 if (PL_lex_state == LEX_INTERPNORMAL) {
3451 if (PL_lex_brackets == 0) {
9059aa12
LW
3452 if (PL_expect & XFAKEBRACK) {
3453 PL_expect &= XENUMMASK;
3280af22
NIS
3454 PL_lex_state = LEX_INTERPEND;
3455 PL_bufptr = s;
cea2e8a9 3456 return yylex(); /* ignore fake brackets */
79072805 3457 }
fa83b5b6 3458 if (*s == '-' && s[1] == '>')
3280af22 3459 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3460 else if (*s != '[' && *s != '{')
3280af22 3461 PL_lex_state = LEX_INTERPEND;
79072805
LW
3462 }
3463 }
9059aa12
LW
3464 if (PL_expect & XFAKEBRACK) {
3465 PL_expect &= XENUMMASK;
3280af22 3466 PL_bufptr = s;
cea2e8a9 3467 return yylex(); /* ignore fake brackets */
748a9306 3468 }
79072805
LW
3469 force_next('}');
3470 TOKEN(';');
378cc40b
LW
3471 case '&':
3472 s++;
3473 tmp = *s++;
3474 if (tmp == '&')
a0d0e21e 3475 AOPERATOR(ANDAND);
378cc40b 3476 s--;
3280af22 3477 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3478 if (ckWARN(WARN_SEMICOLON)
3479 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3480 {
57843af0 3481 CopLINE_dec(PL_curcop);
9014280d 3482 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3483 CopLINE_inc(PL_curcop);
463ee0b2 3484 }
79072805 3485 BAop(OP_BIT_AND);
463ee0b2 3486 }
79072805 3487
3280af22
NIS
3488 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3489 if (*PL_tokenbuf) {
3490 PL_expect = XOPERATOR;
3491 force_ident(PL_tokenbuf, '&');
463ee0b2 3492 }
79072805
LW
3493 else
3494 PREREF('&');
c07a80fd 3495 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3496 TERM('&');
3497
378cc40b
LW
3498 case '|':
3499 s++;
3500 tmp = *s++;
3501 if (tmp == '|')
a0d0e21e 3502 AOPERATOR(OROR);
378cc40b 3503 s--;
79072805 3504 BOop(OP_BIT_OR);
378cc40b
LW
3505 case '=':
3506 s++;
3507 tmp = *s++;
3508 if (tmp == '=')
79072805
LW
3509 Eop(OP_EQ);
3510 if (tmp == '>')
3511 OPERATOR(',');
378cc40b 3512 if (tmp == '~')
79072805 3513 PMop(OP_MATCH);
599cee73 3514 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3515 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3516 s--;
3280af22
NIS
3517 if (PL_expect == XSTATE && isALPHA(tmp) &&
3518 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3519 {
3280af22
NIS
3520 if (PL_in_eval && !PL_rsfp) {
3521 d = PL_bufend;
a5f75d66
AD
3522 while (s < d) {
3523 if (*s++ == '\n') {
3524 incline(s);
3525 if (strnEQ(s,"=cut",4)) {
3526 s = strchr(s,'\n');
3527 if (s)
3528 s++;
3529 else
3530 s = d;
3531 incline(s);
3532 goto retry;
3533 }
3534 }
3535 }
3536 goto retry;
3537 }
3280af22
NIS
3538 s = PL_bufend;
3539 PL_doextract = TRUE;
a0d0e21e
LW
3540 goto retry;
3541 }
3280af22 3542 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3543 char *t;
51882d45 3544#ifdef PERL_STRICT_CR
bf4acbe4 3545 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3546#else
bf4acbe4 3547 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3548#endif
a0d0e21e
LW
3549 if (*t == '\n' || *t == '#') {
3550 s--;
3280af22 3551 PL_expect = XBLOCK;
a0d0e21e
LW
3552 goto leftbracket;
3553 }
79072805 3554 }
a0d0e21e
LW
3555 yylval.ival = 0;
3556 OPERATOR(ASSIGNOP);
378cc40b
LW
3557 case '!':
3558 s++;
3559 tmp = *s++;
984200d0 3560 if (tmp == '=') {
decca21c
YST
3561 /* was this !=~ where !~ was meant?
3562 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
3563