This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade to Math::BigInt 1.75, by Tels
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
770526c1 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
fc36a67e 29static char ident_too_long[] = "Identifier too long";
4ac733c9 30static char c_without_g[] = "Use of /c modifier is meaningless without /g";
64e578a2 31static char c_in_subst[] = "Use of /c modifier is meaningless in s///";
8903cb82 32
acfe0abc 33static void restore_rsfp(pTHX_ void *f);
6e3aabd6 34#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
35static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
36static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 37#endif
51371543 38
9059aa12
LW
39#define XFAKEBRACK 128
40#define XENUMMASK 127
41
39e02b42
JH
42#ifdef USE_UTF8_SCRIPTS
43# define UTF (!IN_BYTES)
2b9d42f0 44#else
746b446a 45# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 46#endif
a0ed51b3 47
61f0cdd9 48/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
49 * 1999-02-27 mjd-perl-patch@plover.com */
50#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
51
bf4acbe4
GS
52/* On MacOS, respect nonbreaking spaces */
53#ifdef MACOS_TRADITIONAL
54#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
55#else
56#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
57#endif
58
ffb4593c
NT
59/* LEX_* are values for PL_lex_state, the state of the lexer.
60 * They are arranged oddly so that the guard on the switch statement
79072805
LW
61 * can get by with a single comparison (if the compiler is smart enough).
62 */
63
fb73857a 64/* #define LEX_NOTPARSING 11 is done in perl.h. */
65
55497cff 66#define LEX_NORMAL 10
67#define LEX_INTERPNORMAL 9
68#define LEX_INTERPCASEMOD 8
69#define LEX_INTERPPUSH 7
70#define LEX_INTERPSTART 6
71#define LEX_INTERPEND 5
72#define LEX_INTERPENDMAYBE 4
73#define LEX_INTERPCONCAT 3
74#define LEX_INTERPCONST 2
75#define LEX_FORMLINE 1
76#define LEX_KNOWNEXT 0
79072805 77
bbf60fe6
DM
78#ifdef DEBUGGING
79static char* lex_state_names[] = {
80 "KNOWNEXT",
81 "FORMLINE",
82 "INTERPCONST",
83 "INTERPCONCAT",
84 "INTERPENDMAYBE",
85 "INTERPEND",
86 "INTERPSTART",
87 "INTERPPUSH",
88 "INTERPCASEMOD",
89 "INTERPNORMAL",
90 "NORMAL"
91};
92#endif
93
79072805
LW
94#ifdef ff_next
95#undef ff_next
d48672a2
LW
96#endif
97
79072805 98#include "keywords.h"
fe14fcc3 99
ffb4593c
NT
100/* CLINE is a macro that ensures PL_copline has a sane value */
101
ae986130
LW
102#ifdef CLINE
103#undef CLINE
104#endif
57843af0 105#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 106
ffb4593c
NT
107/*
108 * Convenience functions to return different tokens and prime the
9cbb5ea2 109 * lexer for the next token. They all take an argument.
ffb4593c
NT
110 *
111 * TOKEN : generic token (used for '(', DOLSHARP, etc)
112 * OPERATOR : generic operator
113 * AOPERATOR : assignment operator
114 * PREBLOCK : beginning the block after an if, while, foreach, ...
115 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
116 * PREREF : *EXPR where EXPR is not a simple identifier
117 * TERM : expression term
118 * LOOPX : loop exiting command (goto, last, dump, etc)
119 * FTST : file test operator
120 * FUN0 : zero-argument function
2d2e263d 121 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
122 * BOop : bitwise or or xor
123 * BAop : bitwise and
124 * SHop : shift operator
125 * PWop : power operator
9cbb5ea2 126 * PMop : pattern-matching operator
ffb4593c
NT
127 * Aop : addition-level operator
128 * Mop : multiplication-level operator
129 * Eop : equality-testing operator
e5edeb50 130 * Rop : relational operator <= != gt
ffb4593c
NT
131 *
132 * Also see LOP and lop() below.
133 */
134
998054bd 135#ifdef DEBUGGING /* Serve -DT. */
bbf60fe6 136# define REPORT(retval) tokereport(s,(int)retval)
998054bd 137#else
bbf60fe6 138# define REPORT(retval) (retval)
998054bd
SC
139#endif
140
bbf60fe6
DM
141#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
142#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
143#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
144#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
145#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
146#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
147#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
148#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
149#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
150#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
151#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
152#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
153#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
154#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
155#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
156#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
157#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
158#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
159#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
160#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 161
a687059c
LW
162/* This bit of chicanery makes a unary function followed by
163 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
164 * The UNIDOR macro is for unary functions that can be followed by the //
165 * operator (such as C<shift // 0>).
a687059c 166 */
bbf60fe6
DM
167#define UNI2(f,x) return ( \
168 yylval.ival = f, \
6f33ba73 169 PL_expect = x, \
3280af22
NIS
170 PL_bufptr = s, \
171 PL_last_uni = PL_oldbufptr, \
172 PL_last_lop_op = f, \
bbf60fe6
DM
173 REPORT( \
174 (*s == '(' || (s = skipspace(s), *s == '(') \
175 ? (int)FUNC1 : (int)UNIOP)))
6f33ba73
RGS
176#define UNI(f) UNI2(f,XTERM)
177#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 178
bbf60fe6
DM
179#define UNIBRACK(f) return ( \
180 yylval.ival = f, \
3280af22
NIS
181 PL_bufptr = s, \
182 PL_last_uni = PL_oldbufptr, \
bbf60fe6
DM
183 REPORT( \
184 (*s == '(' || (s = skipspace(s), *s == '(') \
185 ? (int)FUNC1 : (int)UNIOP)))
79072805 186
9f68db38 187/* grandfather return to old style */
3280af22 188#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 189
8fa7f367
JH
190#ifdef DEBUGGING
191
bbf60fe6
DM
192/* how to interpret the yylval associated with the token */
193enum token_type {
194 TOKENTYPE_NONE,
195 TOKENTYPE_IVAL,
196 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
197 TOKENTYPE_PVAL,
198 TOKENTYPE_OPVAL,
199 TOKENTYPE_GVVAL
200};
201
202static struct debug_tokens { int token, type; char *name;} debug_tokens[] =
9041c2e3 203{
bbf60fe6
DM
204 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
205 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
206 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
207 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
208 { ARROW, TOKENTYPE_NONE, "ARROW" },
209 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
210 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
211 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
212 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
213 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
214 { DO, TOKENTYPE_NONE, "DO" },
215 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
216 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
217 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
218 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
219 { ELSE, TOKENTYPE_NONE, "ELSE" },
220 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
221 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
222 { FOR, TOKENTYPE_IVAL, "FOR" },
223 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
224 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
225 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
226 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
227 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
228 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
229 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
230 { IF, TOKENTYPE_IVAL, "IF" },
231 { LABEL, TOKENTYPE_PVAL, "LABEL" },
232 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
233 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
234 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
235 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
236 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
237 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
238 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
239 { MY, TOKENTYPE_IVAL, "MY" },
240 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
241 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
242 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
243 { OROP, TOKENTYPE_IVAL, "OROP" },
244 { OROR, TOKENTYPE_NONE, "OROR" },
245 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
246 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
247 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
248 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
249 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
250 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
251 { PREINC, TOKENTYPE_NONE, "PREINC" },
252 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
253 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
254 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
255 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
256 { SUB, TOKENTYPE_NONE, "SUB" },
257 { THING, TOKENTYPE_OPVAL, "THING" },
258 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
259 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
260 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
261 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
262 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
263 { USE, TOKENTYPE_IVAL, "USE" },
264 { WHILE, TOKENTYPE_IVAL, "WHILE" },
265 { WORD, TOKENTYPE_OPVAL, "WORD" },
266 { 0, TOKENTYPE_NONE, 0 }
267};
268
269/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 270
bbf60fe6
DM
271STATIC int
272S_tokereport(pTHX_ char* s, I32 rv)
273{
274 if (DEBUG_T_TEST) {
275 char *name = Nullch;
276 enum token_type type = TOKENTYPE_NONE;
277 struct debug_tokens *p;
f2b990bf 278 SV* report = newSVpvn("<== ", 4);
bbf60fe6
DM
279
280 for (p = debug_tokens; p->token; p++) {
281 if (p->token == (int)rv) {
282 name = p->name;
283 type = p->type;
284 break;
285 }
286 }
287 if (name)
54667de8 288 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
289 else if ((char)rv > ' ' && (char)rv < '~')
290 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
291 else if (!rv)
54667de8 292 Perl_sv_catpv(aTHX_ report, "EOF");
bbf60fe6
DM
293 else
294 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
295 switch (type) {
296 case TOKENTYPE_NONE:
297 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
298 break;
299 case TOKENTYPE_IVAL:
300 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", yylval.ival);
301 break;
302 case TOKENTYPE_OPNUM:
303 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
304 PL_op_name[yylval.ival]);
305 break;
306 case TOKENTYPE_PVAL:
307 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
308 break;
309 case TOKENTYPE_OPVAL:
401441c0
RGS
310 if (yylval.opval)
311 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 312 PL_op_name[yylval.opval->op_type]);
401441c0
RGS
313 else
314 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
315 break;
316 }
317 Perl_sv_catpvf(aTHX_ report, " at line %d [", CopLINE(PL_curcop));
998054bd
SC
318 if (s - PL_bufptr > 0)
319 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
320 else {
321 if (PL_oldbufptr && *PL_oldbufptr)
322 sv_catpv(report, PL_tokenbuf);
323 }
bbf60fe6
DM
324 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
325 };
326 return (int)rv;
998054bd
SC
327}
328
8fa7f367
JH
329#endif
330
ffb4593c
NT
331/*
332 * S_ao
333 *
c963b151
BD
334 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
335 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
336 */
337
76e3520e 338STATIC int
cea2e8a9 339S_ao(pTHX_ int toketype)
a0d0e21e 340{
3280af22
NIS
341 if (*PL_bufptr == '=') {
342 PL_bufptr++;
a0d0e21e
LW
343 if (toketype == ANDAND)
344 yylval.ival = OP_ANDASSIGN;
345 else if (toketype == OROR)
346 yylval.ival = OP_ORASSIGN;
c963b151
BD
347 else if (toketype == DORDOR)
348 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
349 toketype = ASSIGNOP;
350 }
351 return toketype;
352}
353
ffb4593c
NT
354/*
355 * S_no_op
356 * When Perl expects an operator and finds something else, no_op
357 * prints the warning. It always prints "<something> found where
358 * operator expected. It prints "Missing semicolon on previous line?"
359 * if the surprise occurs at the start of the line. "do you need to
360 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
361 * where the compiler doesn't know if foo is a method call or a function.
362 * It prints "Missing operator before end of line" if there's nothing
363 * after the missing operator, or "... before <...>" if there is something
364 * after the missing operator.
365 */
366
76e3520e 367STATIC void
cea2e8a9 368S_no_op(pTHX_ char *what, char *s)
463ee0b2 369{
3280af22
NIS
370 char *oldbp = PL_bufptr;
371 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 372
1189a94a
GS
373 if (!s)
374 s = oldbp;
07c798fb 375 else
1189a94a 376 PL_bufptr = s;
cea2e8a9 377 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
378 if (ckWARN_d(WARN_SYNTAX)) {
379 if (is_first)
380 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
381 "\t(Missing semicolon on previous line?)\n");
382 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
383 char *t;
384 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
385 if (t < PL_bufptr && isSPACE(*t))
386 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
387 "\t(Do you need to predeclare %.*s?)\n",
388 t - PL_oldoldbufptr, PL_oldoldbufptr);
389 }
390 else {
391 assert(s >= oldbp);
392 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
393 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
394 }
07c798fb 395 }
3280af22 396 PL_bufptr = oldbp;
8990e307
LW
397}
398
ffb4593c
NT
399/*
400 * S_missingterm
401 * Complain about missing quote/regexp/heredoc terminator.
402 * If it's called with (char *)NULL then it cauterizes the line buffer.
403 * If we're in a delimited string and the delimiter is a control
404 * character, it's reformatted into a two-char sequence like ^C.
405 * This is fatal.
406 */
407
76e3520e 408STATIC void
cea2e8a9 409S_missingterm(pTHX_ char *s)
8990e307
LW
410{
411 char tmpbuf[3];
412 char q;
413 if (s) {
414 char *nl = strrchr(s,'\n');
d2719217 415 if (nl)
8990e307
LW
416 *nl = '\0';
417 }
9d116dd7
JH
418 else if (
419#ifdef EBCDIC
420 iscntrl(PL_multi_close)
421#else
422 PL_multi_close < 32 || PL_multi_close == 127
423#endif
424 ) {
8990e307 425 *tmpbuf = '^';
3280af22 426 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
427 tmpbuf[2] = '\0';
428 s = tmpbuf;
429 }
430 else {
eb160463 431 *tmpbuf = (char)PL_multi_close;
8990e307
LW
432 tmpbuf[1] = '\0';
433 s = tmpbuf;
434 }
435 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 436 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 437}
79072805 438
ffb4593c
NT
439/*
440 * Perl_deprecate
ffb4593c
NT
441 */
442
79072805 443void
864dbfa3 444Perl_deprecate(pTHX_ char *s)
a0d0e21e 445{
599cee73 446 if (ckWARN(WARN_DEPRECATED))
9014280d 447 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
448}
449
12bcd1a6
PM
450void
451Perl_deprecate_old(pTHX_ char *s)
452{
453 /* This function should NOT be called for any new deprecated warnings */
454 /* Use Perl_deprecate instead */
455 /* */
456 /* It is here to maintain backward compatibility with the pre-5.8 */
457 /* warnings category hierarchy. The "deprecated" category used to */
458 /* live under the "syntax" category. It is now a top-level category */
459 /* in its own right. */
460
461 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
462 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
463 "Use of %s is deprecated", s);
464}
465
ffb4593c
NT
466/*
467 * depcom
9cbb5ea2 468 * Deprecate a comma-less variable list.
ffb4593c
NT
469 */
470
76e3520e 471STATIC void
cea2e8a9 472S_depcom(pTHX)
a0d0e21e 473{
12bcd1a6 474 deprecate_old("comma-less variable list");
a0d0e21e
LW
475}
476
ffb4593c 477/*
9cbb5ea2
GS
478 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
479 * utf16-to-utf8-reversed.
ffb4593c
NT
480 */
481
c39cd008
GS
482#ifdef PERL_CR_FILTER
483static void
484strip_return(SV *sv)
485{
486 register char *s = SvPVX(sv);
487 register char *e = s + SvCUR(sv);
488 /* outer loop optimized to do nothing if there are no CR-LFs */
489 while (s < e) {
490 if (*s++ == '\r' && *s == '\n') {
491 /* hit a CR-LF, need to copy the rest */
492 register char *d = s - 1;
493 *d++ = *s++;
494 while (s < e) {
495 if (*s == '\r' && s[1] == '\n')
496 s++;
497 *d++ = *s++;
498 }
499 SvCUR(sv) -= s - d;
500 return;
501 }
502 }
503}
a868473f 504
76e3520e 505STATIC I32
c39cd008 506S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 507{
c39cd008
GS
508 I32 count = FILTER_READ(idx+1, sv, maxlen);
509 if (count > 0 && !maxlen)
510 strip_return(sv);
511 return count;
a868473f
NIS
512}
513#endif
514
ffb4593c
NT
515/*
516 * Perl_lex_start
9cbb5ea2
GS
517 * Initialize variables. Uses the Perl save_stack to save its state (for
518 * recursive calls to the parser).
ffb4593c
NT
519 */
520
a0d0e21e 521void
864dbfa3 522Perl_lex_start(pTHX_ SV *line)
79072805 523{
8990e307
LW
524 char *s;
525 STRLEN len;
526
3280af22
NIS
527 SAVEI32(PL_lex_dojoin);
528 SAVEI32(PL_lex_brackets);
3280af22
NIS
529 SAVEI32(PL_lex_casemods);
530 SAVEI32(PL_lex_starts);
531 SAVEI32(PL_lex_state);
7766f137 532 SAVEVPTR(PL_lex_inpat);
3280af22 533 SAVEI32(PL_lex_inwhat);
18b09519
GS
534 if (PL_lex_state == LEX_KNOWNEXT) {
535 I32 toke = PL_nexttoke;
536 while (--toke >= 0) {
537 SAVEI32(PL_nexttype[toke]);
538 SAVEVPTR(PL_nextval[toke]);
539 }
540 SAVEI32(PL_nexttoke);
18b09519 541 }
57843af0 542 SAVECOPLINE(PL_curcop);
3280af22
NIS
543 SAVEPPTR(PL_bufptr);
544 SAVEPPTR(PL_bufend);
545 SAVEPPTR(PL_oldbufptr);
546 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
547 SAVEPPTR(PL_last_lop);
548 SAVEPPTR(PL_last_uni);
3280af22
NIS
549 SAVEPPTR(PL_linestart);
550 SAVESPTR(PL_linestr);
8edd5f42
RGS
551 SAVEGENERICPV(PL_lex_brackstack);
552 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 553 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
554 SAVESPTR(PL_lex_stuff);
555 SAVEI32(PL_lex_defer);
09bef843 556 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 557 SAVESPTR(PL_lex_repl);
bebdddfc
GS
558 SAVEINT(PL_expect);
559 SAVEINT(PL_lex_expect);
3280af22
NIS
560
561 PL_lex_state = LEX_NORMAL;
562 PL_lex_defer = 0;
563 PL_expect = XSTATE;
564 PL_lex_brackets = 0;
3280af22
NIS
565 New(899, PL_lex_brackstack, 120, char);
566 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
567 PL_lex_casemods = 0;
568 *PL_lex_casestack = '\0';
569 PL_lex_dojoin = 0;
570 PL_lex_starts = 0;
571 PL_lex_stuff = Nullsv;
572 PL_lex_repl = Nullsv;
573 PL_lex_inpat = 0;
76be56bc 574 PL_nexttoke = 0;
3280af22 575 PL_lex_inwhat = 0;
09bef843 576 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
577 PL_linestr = line;
578 if (SvREADONLY(PL_linestr))
579 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
580 s = SvPV(PL_linestr, len);
6f27f9a7 581 if (!len || s[len-1] != ';') {
3280af22
NIS
582 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
583 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
584 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 585 }
3280af22
NIS
586 SvTEMP_off(PL_linestr);
587 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
588 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 589 PL_last_lop = PL_last_uni = Nullch;
3280af22 590 PL_rsfp = 0;
79072805 591}
a687059c 592
ffb4593c
NT
593/*
594 * Perl_lex_end
9cbb5ea2
GS
595 * Finalizer for lexing operations. Must be called when the parser is
596 * done with the lexer.
ffb4593c
NT
597 */
598
463ee0b2 599void
864dbfa3 600Perl_lex_end(pTHX)
463ee0b2 601{
3280af22 602 PL_doextract = FALSE;
463ee0b2
LW
603}
604
ffb4593c
NT
605/*
606 * S_incline
607 * This subroutine has nothing to do with tilting, whether at windmills
608 * or pinball tables. Its name is short for "increment line". It
57843af0 609 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 610 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
611 * # line 500 "foo.pm"
612 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
613 */
614
76e3520e 615STATIC void
cea2e8a9 616S_incline(pTHX_ char *s)
463ee0b2
LW
617{
618 char *t;
619 char *n;
73659bf1 620 char *e;
463ee0b2 621 char ch;
463ee0b2 622
57843af0 623 CopLINE_inc(PL_curcop);
463ee0b2
LW
624 if (*s++ != '#')
625 return;
bf4acbe4 626 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
627 if (strnEQ(s, "line", 4))
628 s += 4;
629 else
630 return;
084592ab 631 if (SPACE_OR_TAB(*s))
73659bf1 632 s++;
4e553d73 633 else
73659bf1 634 return;
bf4acbe4 635 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
636 if (!isDIGIT(*s))
637 return;
638 n = s;
639 while (isDIGIT(*s))
640 s++;
bf4acbe4 641 while (SPACE_OR_TAB(*s))
463ee0b2 642 s++;
73659bf1 643 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 644 s++;
73659bf1
GS
645 e = t + 1;
646 }
463ee0b2 647 else {
463ee0b2 648 for (t = s; !isSPACE(*t); t++) ;
73659bf1 649 e = t;
463ee0b2 650 }
bf4acbe4 651 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
652 e++;
653 if (*e != '\n' && *e != '\0')
654 return; /* false alarm */
655
463ee0b2
LW
656 ch = *t;
657 *t = '\0';
f4dd75d9 658 if (t - s > 0) {
05ec9bb3 659 CopFILE_free(PL_curcop);
57843af0 660 CopFILE_set(PL_curcop, s);
f4dd75d9 661 }
463ee0b2 662 *t = ch;
57843af0 663 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
664}
665
ffb4593c
NT
666/*
667 * S_skipspace
668 * Called to gobble the appropriate amount and type of whitespace.
669 * Skips comments as well.
670 */
671
76e3520e 672STATIC char *
cea2e8a9 673S_skipspace(pTHX_ register char *s)
a687059c 674{
3280af22 675 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 676 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
677 s++;
678 return s;
679 }
680 for (;;) {
fd049845 681 STRLEN prevlen;
09bef843 682 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 683 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
684 while (s < PL_bufend && isSPACE(*s)) {
685 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
686 incline(s);
687 }
ffb4593c
NT
688
689 /* comment */
3280af22
NIS
690 if (s < PL_bufend && *s == '#') {
691 while (s < PL_bufend && *s != '\n')
463ee0b2 692 s++;
60e6418e 693 if (s < PL_bufend) {
463ee0b2 694 s++;
60e6418e
GS
695 if (PL_in_eval && !PL_rsfp) {
696 incline(s);
697 continue;
698 }
699 }
463ee0b2 700 }
ffb4593c
NT
701
702 /* only continue to recharge the buffer if we're at the end
703 * of the buffer, we're not reading from a source filter, and
704 * we're in normal lexing mode
705 */
09bef843
SB
706 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
707 PL_lex_state == LEX_FORMLINE)
463ee0b2 708 return s;
ffb4593c
NT
709
710 /* try to recharge the buffer */
9cbb5ea2
GS
711 if ((s = filter_gets(PL_linestr, PL_rsfp,
712 (prevlen = SvCUR(PL_linestr)))) == Nullch)
713 {
714 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
715 if (PL_minus_p) {
716 sv_setpv(PL_linestr,
717 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 718 PL_minus_n = PL_minus_p = 0;
a0d0e21e 719 }
01a19ab0
NC
720 else if (PL_minus_n) {
721 sv_setpvn(PL_linestr, ";}", 2);
722 PL_minus_n = 0;
723 }
a0d0e21e 724 else
4147a61b 725 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
726
727 /* reset variables for next time we lex */
9cbb5ea2
GS
728 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
729 = SvPVX(PL_linestr);
3280af22 730 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 731 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
732
733 /* Close the filehandle. Could be from -P preprocessor,
734 * STDIN, or a regular file. If we were reading code from
735 * STDIN (because the commandline held no -e or filename)
736 * then we don't close it, we reset it so the code can
737 * read from STDIN too.
738 */
739
3280af22
NIS
740 if (PL_preprocess && !PL_in_eval)
741 (void)PerlProc_pclose(PL_rsfp);
742 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
743 PerlIO_clearerr(PL_rsfp);
8990e307 744 else
3280af22
NIS
745 (void)PerlIO_close(PL_rsfp);
746 PL_rsfp = Nullfp;
463ee0b2
LW
747 return s;
748 }
ffb4593c
NT
749
750 /* not at end of file, so we only read another line */
09bef843
SB
751 /* make corresponding updates to old pointers, for yyerror() */
752 oldprevlen = PL_oldbufptr - PL_bufend;
753 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
754 if (PL_last_uni)
755 oldunilen = PL_last_uni - PL_bufend;
756 if (PL_last_lop)
757 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
758 PL_linestart = PL_bufptr = s + prevlen;
759 PL_bufend = s + SvCUR(PL_linestr);
760 s = PL_bufptr;
09bef843
SB
761 PL_oldbufptr = s + oldprevlen;
762 PL_oldoldbufptr = s + oldoldprevlen;
763 if (PL_last_uni)
764 PL_last_uni = s + oldunilen;
765 if (PL_last_lop)
766 PL_last_lop = s + oldloplen;
a0d0e21e 767 incline(s);
ffb4593c
NT
768
769 /* debugger active and we're not compiling the debugger code,
770 * so store the line into the debugger's array of lines
771 */
3280af22 772 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
773 SV *sv = NEWSV(85,0);
774
775 sv_upgrade(sv, SVt_PVMG);
3280af22 776 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a
MJD
777 (void)SvIOK_on(sv);
778 SvIVX(sv) = 0;
57843af0 779 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 780 }
463ee0b2 781 }
a687059c 782}
378cc40b 783
ffb4593c
NT
784/*
785 * S_check_uni
786 * Check the unary operators to ensure there's no ambiguity in how they're
787 * used. An ambiguous piece of code would be:
788 * rand + 5
789 * This doesn't mean rand() + 5. Because rand() is a unary operator,
790 * the +5 is its argument.
791 */
792
76e3520e 793STATIC void
cea2e8a9 794S_check_uni(pTHX)
ba106d47 795{
2f3197b3 796 char *s;
a0d0e21e 797 char *t;
2f3197b3 798
3280af22 799 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 800 return;
3280af22
NIS
801 while (isSPACE(*PL_last_uni))
802 PL_last_uni++;
7e2040f0 803 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 804 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 805 return;
0453d815 806 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 807 char ch = *s;
0453d815 808 *s = '\0';
9014280d 809 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 810 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
811 PL_last_uni);
812 *s = ch;
813 }
2f3197b3
LW
814}
815
ffb4593c
NT
816/*
817 * LOP : macro to build a list operator. Its behaviour has been replaced
818 * with a subroutine, S_lop() for which LOP is just another name.
819 */
820
a0d0e21e
LW
821#define LOP(f,x) return lop(f,x,s)
822
ffb4593c
NT
823/*
824 * S_lop
825 * Build a list operator (or something that might be one). The rules:
826 * - if we have a next token, then it's a list operator [why?]
827 * - if the next thing is an opening paren, then it's a function
828 * - else it's a list operator
829 */
830
76e3520e 831STATIC I32
a0be28da 832S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 833{
79072805 834 yylval.ival = f;
35c8bce7 835 CLINE;
3280af22
NIS
836 PL_expect = x;
837 PL_bufptr = s;
838 PL_last_lop = PL_oldbufptr;
eb160463 839 PL_last_lop_op = (OPCODE)f;
3280af22 840 if (PL_nexttoke)
bbf60fe6 841 return REPORT(LSTOP);
79072805 842 if (*s == '(')
bbf60fe6 843 return REPORT(FUNC);
79072805
LW
844 s = skipspace(s);
845 if (*s == '(')
bbf60fe6 846 return REPORT(FUNC);
79072805 847 else
bbf60fe6 848 return REPORT(LSTOP);
79072805
LW
849}
850
ffb4593c
NT
851/*
852 * S_force_next
9cbb5ea2 853 * When the lexer realizes it knows the next token (for instance,
ffb4593c 854 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
855 * to know what token to return the next time the lexer is called. Caller
856 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
857 * handles the token correctly.
ffb4593c
NT
858 */
859
4e553d73 860STATIC void
cea2e8a9 861S_force_next(pTHX_ I32 type)
79072805 862{
3280af22
NIS
863 PL_nexttype[PL_nexttoke] = type;
864 PL_nexttoke++;
865 if (PL_lex_state != LEX_KNOWNEXT) {
866 PL_lex_defer = PL_lex_state;
867 PL_lex_expect = PL_expect;
868 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
869 }
870}
871
d0a148a6
NC
872STATIC SV *
873S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
874{
875 SV *sv = newSVpvn(start,len);
876 if (UTF && !IN_BYTES && is_utf8_string((U8*)start, len))
877 SvUTF8_on(sv);
878 return sv;
879}
880
ffb4593c
NT
881/*
882 * S_force_word
883 * When the lexer knows the next thing is a word (for instance, it has
884 * just seen -> and it knows that the next char is a word char, then
885 * it calls S_force_word to stick the next word into the PL_next lookahead.
886 *
887 * Arguments:
b1b65b59 888 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
889 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
890 * int check_keyword : if true, Perl checks to make sure the word isn't
891 * a keyword (do this if the word is a label, e.g. goto FOO)
892 * int allow_pack : if true, : characters will also be allowed (require,
893 * use, etc. do this)
9cbb5ea2 894 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
895 */
896
76e3520e 897STATIC char *
cea2e8a9 898S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 899{
463ee0b2
LW
900 register char *s;
901 STRLEN len;
4e553d73 902
463ee0b2
LW
903 start = skipspace(start);
904 s = start;
7e2040f0 905 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 906 (allow_pack && *s == ':') ||
15f0808c 907 (allow_initial_tick && *s == '\'') )
a0d0e21e 908 {
3280af22
NIS
909 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
910 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
911 return start;
912 if (token == METHOD) {
913 s = skipspace(s);
914 if (*s == '(')
3280af22 915 PL_expect = XTERM;
463ee0b2 916 else {
3280af22 917 PL_expect = XOPERATOR;
463ee0b2 918 }
79072805 919 }
d0a148a6
NC
920 PL_nextval[PL_nexttoke].opval
921 = (OP*)newSVOP(OP_CONST,0,
922 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 923 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
924 force_next(token);
925 }
926 return s;
927}
928
ffb4593c
NT
929/*
930 * S_force_ident
9cbb5ea2 931 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
932 * text only contains the "foo" portion. The first argument is a pointer
933 * to the "foo", and the second argument is the type symbol to prefix.
934 * Forces the next token to be a "WORD".
9cbb5ea2 935 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
936 */
937
76e3520e 938STATIC void
cea2e8a9 939S_force_ident(pTHX_ register char *s, int kind)
79072805
LW
940{
941 if (s && *s) {
11343788 942 OP* o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 943 PL_nextval[PL_nexttoke].opval = o;
79072805 944 force_next(WORD);
748a9306 945 if (kind) {
11343788 946 o->op_private = OPpCONST_ENTERED;
55497cff 947 /* XXX see note in pp_entereval() for why we forgo typo
948 warnings if the symbol must be introduced in an eval.
949 GSAR 96-10-12 */
3280af22 950 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
951 kind == '$' ? SVt_PV :
952 kind == '@' ? SVt_PVAV :
953 kind == '%' ? SVt_PVHV :
954 SVt_PVGV
955 );
748a9306 956 }
79072805
LW
957 }
958}
959
1571675a
GS
960NV
961Perl_str_to_version(pTHX_ SV *sv)
962{
963 NV retval = 0.0;
964 NV nshift = 1.0;
965 STRLEN len;
966 char *start = SvPVx(sv,len);
3aa33fe5 967 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
968 char *end = start + len;
969 while (start < end) {
ba210ebe 970 STRLEN skip;
1571675a
GS
971 UV n;
972 if (utf)
9041c2e3 973 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
974 else {
975 n = *(U8*)start;
976 skip = 1;
977 }
978 retval += ((NV)n)/nshift;
979 start += skip;
980 nshift *= 1000;
981 }
982 return retval;
983}
984
4e553d73 985/*
ffb4593c
NT
986 * S_force_version
987 * Forces the next token to be a version number.
e759cc13
RGS
988 * If the next token appears to be an invalid version number, (e.g. "v2b"),
989 * and if "guessing" is TRUE, then no new token is created (and the caller
990 * must use an alternative parsing method).
ffb4593c
NT
991 */
992
76e3520e 993STATIC char *
e759cc13 994S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 995{
996 OP *version = Nullop;
44dcb63b 997 char *d;
89bfa8cd 998
999 s = skipspace(s);
1000
44dcb63b 1001 d = s;
dd629d5b 1002 if (*d == 'v')
44dcb63b 1003 d++;
44dcb63b 1004 if (isDIGIT(*d)) {
e759cc13
RGS
1005 while (isDIGIT(*d) || *d == '_' || *d == '.')
1006 d++;
9f3d182e 1007 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1008 SV *ver;
b73d6f50 1009 s = scan_num(s, &yylval);
89bfa8cd 1010 version = yylval.opval;
dd629d5b
GS
1011 ver = cSVOPx(version)->op_sv;
1012 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 1013 (void)SvUPGRADE(ver, SVt_PVNV);
1571675a
GS
1014 SvNVX(ver) = str_to_version(ver);
1015 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1016 }
89bfa8cd 1017 }
e759cc13
RGS
1018 else if (guessing)
1019 return s;
89bfa8cd 1020 }
1021
1022 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1023 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1024 force_next(WORD);
89bfa8cd 1025
e759cc13 1026 return s;
89bfa8cd 1027}
1028
ffb4593c
NT
1029/*
1030 * S_tokeq
1031 * Tokenize a quoted string passed in as an SV. It finds the next
1032 * chunk, up to end of string or a backslash. It may make a new
1033 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1034 * turns \\ into \.
1035 */
1036
76e3520e 1037STATIC SV *
cea2e8a9 1038S_tokeq(pTHX_ SV *sv)
79072805
LW
1039{
1040 register char *s;
1041 register char *send;
1042 register char *d;
b3ac6de7
IZ
1043 STRLEN len = 0;
1044 SV *pv = sv;
79072805
LW
1045
1046 if (!SvLEN(sv))
b3ac6de7 1047 goto finish;
79072805 1048
a0d0e21e 1049 s = SvPV_force(sv, len);
21a311ee 1050 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1051 goto finish;
463ee0b2 1052 send = s + len;
79072805
LW
1053 while (s < send && *s != '\\')
1054 s++;
1055 if (s == send)
b3ac6de7 1056 goto finish;
79072805 1057 d = s;
be4731d2 1058 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 1059 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
1060 if (SvUTF8(sv))
1061 SvUTF8_on(pv);
1062 }
79072805
LW
1063 while (s < send) {
1064 if (*s == '\\') {
a0d0e21e 1065 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1066 s++; /* all that, just for this */
1067 }
1068 *d++ = *s++;
1069 }
1070 *d = '\0';
463ee0b2 1071 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 1072 finish:
3280af22 1073 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1074 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1075 return sv;
1076}
1077
ffb4593c
NT
1078/*
1079 * Now come three functions related to double-quote context,
1080 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1081 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1082 * interact with PL_lex_state, and create fake ( ... ) argument lists
1083 * to handle functions and concatenation.
1084 * They assume that whoever calls them will be setting up a fake
1085 * join call, because each subthing puts a ',' after it. This lets
1086 * "lower \luPpEr"
1087 * become
1088 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1089 *
1090 * (I'm not sure whether the spurious commas at the end of lcfirst's
1091 * arguments and join's arguments are created or not).
1092 */
1093
1094/*
1095 * S_sublex_start
1096 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1097 *
1098 * Pattern matching will set PL_lex_op to the pattern-matching op to
1099 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1100 *
1101 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1102 *
1103 * Everything else becomes a FUNC.
1104 *
1105 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1106 * had an OP_CONST or OP_READLINE). This just sets us up for a
1107 * call to S_sublex_push().
1108 */
1109
76e3520e 1110STATIC I32
cea2e8a9 1111S_sublex_start(pTHX)
79072805
LW
1112{
1113 register I32 op_type = yylval.ival;
79072805
LW
1114
1115 if (op_type == OP_NULL) {
3280af22
NIS
1116 yylval.opval = PL_lex_op;
1117 PL_lex_op = Nullop;
79072805
LW
1118 return THING;
1119 }
1120 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1121 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1122
1123 if (SvTYPE(sv) == SVt_PVIV) {
1124 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1125 STRLEN len;
1126 char *p;
1127 SV *nsv;
1128
1129 p = SvPV(sv, len);
79cb57f6 1130 nsv = newSVpvn(p, len);
01ec43d0
GS
1131 if (SvUTF8(sv))
1132 SvUTF8_on(nsv);
b3ac6de7
IZ
1133 SvREFCNT_dec(sv);
1134 sv = nsv;
4e553d73 1135 }
b3ac6de7 1136 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1137 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1138 /* Allow <FH> // "foo" */
1139 if (op_type == OP_READLINE)
1140 PL_expect = XTERMORDORDOR;
79072805
LW
1141 return THING;
1142 }
1143
3280af22
NIS
1144 PL_sublex_info.super_state = PL_lex_state;
1145 PL_sublex_info.sub_inwhat = op_type;
1146 PL_sublex_info.sub_op = PL_lex_op;
1147 PL_lex_state = LEX_INTERPPUSH;
55497cff 1148
3280af22
NIS
1149 PL_expect = XTERM;
1150 if (PL_lex_op) {
1151 yylval.opval = PL_lex_op;
1152 PL_lex_op = Nullop;
55497cff 1153 return PMFUNC;
1154 }
1155 else
1156 return FUNC;
1157}
1158
ffb4593c
NT
1159/*
1160 * S_sublex_push
1161 * Create a new scope to save the lexing state. The scope will be
1162 * ended in S_sublex_done. Returns a '(', starting the function arguments
1163 * to the uc, lc, etc. found before.
1164 * Sets PL_lex_state to LEX_INTERPCONCAT.
1165 */
1166
76e3520e 1167STATIC I32
cea2e8a9 1168S_sublex_push(pTHX)
55497cff 1169{
f46d017c 1170 ENTER;
55497cff 1171
3280af22
NIS
1172 PL_lex_state = PL_sublex_info.super_state;
1173 SAVEI32(PL_lex_dojoin);
1174 SAVEI32(PL_lex_brackets);
3280af22
NIS
1175 SAVEI32(PL_lex_casemods);
1176 SAVEI32(PL_lex_starts);
1177 SAVEI32(PL_lex_state);
7766f137 1178 SAVEVPTR(PL_lex_inpat);
3280af22 1179 SAVEI32(PL_lex_inwhat);
57843af0 1180 SAVECOPLINE(PL_curcop);
3280af22 1181 SAVEPPTR(PL_bufptr);
8452ff4b 1182 SAVEPPTR(PL_bufend);
3280af22
NIS
1183 SAVEPPTR(PL_oldbufptr);
1184 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1185 SAVEPPTR(PL_last_lop);
1186 SAVEPPTR(PL_last_uni);
3280af22
NIS
1187 SAVEPPTR(PL_linestart);
1188 SAVESPTR(PL_linestr);
8edd5f42
RGS
1189 SAVEGENERICPV(PL_lex_brackstack);
1190 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1191
1192 PL_linestr = PL_lex_stuff;
1193 PL_lex_stuff = Nullsv;
1194
9cbb5ea2
GS
1195 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1196 = SvPVX(PL_linestr);
3280af22 1197 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1198 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1199 SAVEFREESV(PL_linestr);
1200
1201 PL_lex_dojoin = FALSE;
1202 PL_lex_brackets = 0;
3280af22
NIS
1203 New(899, PL_lex_brackstack, 120, char);
1204 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
1205 PL_lex_casemods = 0;
1206 *PL_lex_casestack = '\0';
1207 PL_lex_starts = 0;
1208 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1209 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1210
1211 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1212 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1213 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1214 else
3280af22 1215 PL_lex_inpat = Nullop;
79072805 1216
55497cff 1217 return '(';
79072805
LW
1218}
1219
ffb4593c
NT
1220/*
1221 * S_sublex_done
1222 * Restores lexer state after a S_sublex_push.
1223 */
1224
76e3520e 1225STATIC I32
cea2e8a9 1226S_sublex_done(pTHX)
79072805 1227{
3280af22 1228 if (!PL_lex_starts++) {
9aa983d2
JH
1229 SV *sv = newSVpvn("",0);
1230 if (SvUTF8(PL_linestr))
1231 SvUTF8_on(sv);
3280af22 1232 PL_expect = XOPERATOR;
9aa983d2 1233 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1234 return THING;
1235 }
1236
3280af22
NIS
1237 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1238 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1239 return yylex();
79072805
LW
1240 }
1241
ffb4593c 1242 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1243 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1244 PL_linestr = PL_lex_repl;
1245 PL_lex_inpat = 0;
1246 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1247 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1248 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1249 SAVEFREESV(PL_linestr);
1250 PL_lex_dojoin = FALSE;
1251 PL_lex_brackets = 0;
3280af22
NIS
1252 PL_lex_casemods = 0;
1253 *PL_lex_casestack = '\0';
1254 PL_lex_starts = 0;
25da4f38 1255 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1256 PL_lex_state = LEX_INTERPNORMAL;
1257 PL_lex_starts++;
e9fa98b2
HS
1258 /* we don't clear PL_lex_repl here, so that we can check later
1259 whether this is an evalled subst; that means we rely on the
1260 logic to ensure sublex_done() is called again only via the
1261 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1262 }
e9fa98b2 1263 else {
3280af22 1264 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1265 PL_lex_repl = Nullsv;
1266 }
79072805 1267 return ',';
ffed7fef
LW
1268 }
1269 else {
f46d017c 1270 LEAVE;
3280af22
NIS
1271 PL_bufend = SvPVX(PL_linestr);
1272 PL_bufend += SvCUR(PL_linestr);
1273 PL_expect = XOPERATOR;
09bef843 1274 PL_sublex_info.sub_inwhat = 0;
79072805 1275 return ')';
ffed7fef
LW
1276 }
1277}
1278
02aa26ce
NT
1279/*
1280 scan_const
1281
1282 Extracts a pattern, double-quoted string, or transliteration. This
1283 is terrifying code.
1284
3280af22
NIS
1285 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1286 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1287 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1288
9b599b2a
GS
1289 Returns a pointer to the character scanned up to. Iff this is
1290 advanced from the start pointer supplied (ie if anything was
1291 successfully parsed), will leave an OP for the substring scanned
1292 in yylval. Caller must intuit reason for not parsing further
1293 by looking at the next characters herself.
1294
02aa26ce
NT
1295 In patterns:
1296 backslashes:
1297 double-quoted style: \r and \n
1298 regexp special ones: \D \s
1299 constants: \x3
1300 backrefs: \1 (deprecated in substitution replacements)
1301 case and quoting: \U \Q \E
1302 stops on @ and $, but not for $ as tail anchor
1303
1304 In transliterations:
1305 characters are VERY literal, except for - not at the start or end
1306 of the string, which indicates a range. scan_const expands the
1307 range to the full set of intermediate characters.
1308
1309 In double-quoted strings:
1310 backslashes:
1311 double-quoted style: \r and \n
1312 constants: \x3
1313 backrefs: \1 (deprecated)
1314 case and quoting: \U \Q \E
1315 stops on @ and $
1316
1317 scan_const does *not* construct ops to handle interpolated strings.
1318 It stops processing as soon as it finds an embedded $ or @ variable
1319 and leaves it to the caller to work out what's going on.
1320
da6eedaa 1321 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1322
1323 $ in pattern could be $foo or could be tail anchor. Assumption:
1324 it's a tail anchor if $ is the last thing in the string, or if it's
1325 followed by one of ")| \n\t"
1326
1327 \1 (backreferences) are turned into $1
1328
1329 The structure of the code is
1330 while (there's a character to process) {
1331 handle transliteration ranges
1332 skip regexp comments
1333 skip # initiated comments in //x patterns
1334 check for embedded @foo
1335 check for embedded scalars
1336 if (backslash) {
1337 leave intact backslashes from leave (below)
1338 deprecate \1 in strings and sub replacements
1339 handle string-changing backslashes \l \U \Q \E, etc.
1340 switch (what was escaped) {
1341 handle - in a transliteration (becomes a literal -)
1342 handle \132 octal characters
1343 handle 0x15 hex characters
1344 handle \cV (control V)
1345 handle printf backslashes (\f, \r, \n, etc)
1346 } (end switch)
1347 } (end if backslash)
1348 } (end while character to read)
4e553d73 1349
02aa26ce
NT
1350*/
1351
76e3520e 1352STATIC char *
cea2e8a9 1353S_scan_const(pTHX_ char *start)
79072805 1354{
3280af22 1355 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1356 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1357 register char *s = start; /* start of the constant */
1358 register char *d = SvPVX(sv); /* destination for copies */
1359 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1360 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1361 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1362 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1363 UV uv;
1364
dff6d3cd 1365 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1366 PL_lex_inpat
b6d5fef8 1367 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1368 : "";
79072805 1369
2b9d42f0
NIS
1370 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1371 /* If we are doing a trans and we know we want UTF8 set expectation */
1372 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1373 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1374 }
1375
1376
79072805 1377 while (s < send || dorange) {
02aa26ce 1378 /* get transliterations out of the way (they're most literal) */
3280af22 1379 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1380 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1381 if (dorange) {
1ba5c669
JH
1382 I32 i; /* current expanded character */
1383 I32 min; /* first character in range */
1384 I32 max; /* last character in range */
02aa26ce 1385
2b9d42f0 1386 if (has_utf8) {
8973db79
JH
1387 char *c = (char*)utf8_hop((U8*)d, -1);
1388 char *e = d++;
1389 while (e-- > c)
1390 *(e + 1) = *e;
25716404 1391 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1392 /* mark the range as done, and continue */
1393 dorange = FALSE;
1394 didrange = TRUE;
1395 continue;
1396 }
2b9d42f0 1397
02aa26ce 1398 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1399 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1400 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1401 d -= 2; /* eat the first char and the - */
1402
8ada0baa
JH
1403 min = (U8)*d; /* first char in range */
1404 max = (U8)d[1]; /* last char in range */
1405
c2e66d9e 1406 if (min > max) {
01ec43d0 1407 Perl_croak(aTHX_
d1573ac7 1408 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1409 (char)min, (char)max);
c2e66d9e
GS
1410 }
1411
c7f1f016 1412#ifdef EBCDIC
8ada0baa
JH
1413 if ((isLOWER(min) && isLOWER(max)) ||
1414 (isUPPER(min) && isUPPER(max))) {
1415 if (isLOWER(min)) {
1416 for (i = min; i <= max; i++)
1417 if (isLOWER(i))
db42d148 1418 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1419 } else {
1420 for (i = min; i <= max; i++)
1421 if (isUPPER(i))
db42d148 1422 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1423 }
1424 }
1425 else
1426#endif
1427 for (i = min; i <= max; i++)
eb160463 1428 *d++ = (char)i;
02aa26ce
NT
1429
1430 /* mark the range as done, and continue */
79072805 1431 dorange = FALSE;
01ec43d0 1432 didrange = TRUE;
79072805 1433 continue;
4e553d73 1434 }
02aa26ce
NT
1435
1436 /* range begins (ignore - as first or last char) */
79072805 1437 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1438 if (didrange) {
1fafa243 1439 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1440 }
2b9d42f0 1441 if (has_utf8) {
25716404 1442 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1443 s++;
1444 continue;
1445 }
79072805
LW
1446 dorange = TRUE;
1447 s++;
01ec43d0
GS
1448 }
1449 else {
1450 didrange = FALSE;
1451 }
79072805 1452 }
02aa26ce
NT
1453
1454 /* if we get here, we're not doing a transliteration */
1455
0f5d15d6
IZ
1456 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1457 except for the last char, which will be done separately. */
3280af22 1458 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1459 if (s[2] == '#') {
e994fd66 1460 while (s+1 < send && *s != ')')
db42d148 1461 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1462 }
1463 else if (s[2] == '{' /* This should match regcomp.c */
1464 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1465 {
cc6b7395 1466 I32 count = 1;
0f5d15d6 1467 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1468 char c;
1469
d9f97599
GS
1470 while (count && (c = *regparse)) {
1471 if (c == '\\' && regparse[1])
1472 regparse++;
4e553d73 1473 else if (c == '{')
cc6b7395 1474 count++;
4e553d73 1475 else if (c == '}')
cc6b7395 1476 count--;
d9f97599 1477 regparse++;
cc6b7395 1478 }
e994fd66 1479 if (*regparse != ')')
5bdf89e7 1480 regparse--; /* Leave one char for continuation. */
0f5d15d6 1481 while (s < regparse)
db42d148 1482 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1483 }
748a9306 1484 }
02aa26ce
NT
1485
1486 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1487 else if (*s == '#' && PL_lex_inpat &&
1488 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1489 while (s+1 < send && *s != '\n')
db42d148 1490 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1491 }
02aa26ce 1492
5d1d4326 1493 /* check for embedded arrays
da6eedaa 1494 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1495 */
7e2040f0 1496 else if (*s == '@' && s[1]
5d1d4326 1497 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1498 break;
02aa26ce
NT
1499
1500 /* check for embedded scalars. only stop if we're sure it's a
1501 variable.
1502 */
79072805 1503 else if (*s == '$') {
3280af22 1504 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1505 break;
6002328a 1506 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1507 break; /* in regexp, $ might be tail anchor */
1508 }
02aa26ce 1509
2b9d42f0
NIS
1510 /* End of else if chain - OP_TRANS rejoin rest */
1511
02aa26ce 1512 /* backslashes */
79072805
LW
1513 if (*s == '\\' && s+1 < send) {
1514 s++;
02aa26ce
NT
1515
1516 /* some backslashes we leave behind */
c9f97d15 1517 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1518 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1519 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1520 continue;
1521 }
02aa26ce
NT
1522
1523 /* deprecate \1 in strings and substitution replacements */
3280af22 1524 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1525 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1526 {
599cee73 1527 if (ckWARN(WARN_SYNTAX))
9014280d 1528 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1529 *--s = '$';
1530 break;
1531 }
02aa26ce
NT
1532
1533 /* string-change backslash escapes */
3280af22 1534 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1535 --s;
1536 break;
1537 }
02aa26ce
NT
1538
1539 /* if we get here, it's either a quoted -, or a digit */
79072805 1540 switch (*s) {
02aa26ce
NT
1541
1542 /* quoted - in transliterations */
79072805 1543 case '-':
3280af22 1544 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1545 *d++ = *s++;
1546 continue;
1547 }
1548 /* FALL THROUGH */
1549 default:
11b8faa4 1550 {
707afd92
MS
1551 if (ckWARN(WARN_MISC) &&
1552 isALNUM(*s) &&
1553 *s != '_')
9014280d 1554 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1555 "Unrecognized escape \\%c passed through",
1556 *s);
1557 /* default action is to copy the quoted character */
f9a63242 1558 goto default_action;
11b8faa4 1559 }
02aa26ce
NT
1560
1561 /* \132 indicates an octal constant */
79072805
LW
1562 case '0': case '1': case '2': case '3':
1563 case '4': case '5': case '6': case '7':
ba210ebe 1564 {
53305cf1
NC
1565 I32 flags = 0;
1566 STRLEN len = 3;
1567 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1568 s += len;
1569 }
012bcf8d 1570 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1571
1572 /* \x24 indicates a hex constant */
79072805 1573 case 'x':
a0ed51b3
LW
1574 ++s;
1575 if (*s == '{') {
1576 char* e = strchr(s, '}');
a4c04bdc
NC
1577 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1578 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1579 STRLEN len;
355860ce 1580
53305cf1 1581 ++s;
adaeee49 1582 if (!e) {
a0ed51b3 1583 yyerror("Missing right brace on \\x{}");
355860ce 1584 continue;
ba210ebe 1585 }
53305cf1
NC
1586 len = e - s;
1587 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1588 s = e + 1;
a0ed51b3
LW
1589 }
1590 else {
ba210ebe 1591 {
53305cf1 1592 STRLEN len = 2;
a4c04bdc 1593 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1594 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1595 s += len;
1596 }
012bcf8d
GS
1597 }
1598
1599 NUM_ESCAPE_INSERT:
1600 /* Insert oct or hex escaped character.
301d3d20 1601 * There will always enough room in sv since such
db42d148 1602 * escapes will be longer than any UTF-8 sequence
301d3d20 1603 * they can end up as. */
ba7cea30 1604
c7f1f016
NIS
1605 /* We need to map to chars to ASCII before doing the tests
1606 to cover EBCDIC
1607 */
c4d5f83a 1608 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1609 if (!has_utf8 && uv > 255) {
301d3d20
JH
1610 /* Might need to recode whatever we have
1611 * accumulated so far if it contains any
1612 * hibit chars.
1613 *
1614 * (Can't we keep track of that and avoid
1615 * this rescan? --jhi)
012bcf8d 1616 */
c7f1f016 1617 int hicount = 0;
63cd0674
NIS
1618 U8 *c;
1619 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1620 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1621 hicount++;
db42d148 1622 }
012bcf8d 1623 }
63cd0674 1624 if (hicount) {
db42d148
NIS
1625 STRLEN offset = d - SvPVX(sv);
1626 U8 *src, *dst;
1627 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1628 src = (U8 *)d - 1;
1629 dst = src+hicount;
1630 d += hicount;
1631 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1632 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1633 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1634 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1635 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1636 }
1637 else {
63cd0674 1638 *dst-- = *src;
012bcf8d 1639 }
c7f1f016 1640 src--;
012bcf8d
GS
1641 }
1642 }
1643 }
1644
9aa983d2 1645 if (has_utf8 || uv > 255) {
9041c2e3 1646 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1647 has_utf8 = TRUE;
f9a63242
JH
1648 if (PL_lex_inwhat == OP_TRANS &&
1649 PL_sublex_info.sub_op) {
1650 PL_sublex_info.sub_op->op_private |=
1651 (PL_lex_repl ? OPpTRANS_FROM_UTF
1652 : OPpTRANS_TO_UTF);
f9a63242 1653 }
012bcf8d 1654 }
a0ed51b3 1655 else {
012bcf8d 1656 *d++ = (char)uv;
a0ed51b3 1657 }
012bcf8d
GS
1658 }
1659 else {
c4d5f83a 1660 *d++ = (char) uv;
a0ed51b3 1661 }
79072805 1662 continue;
02aa26ce 1663
b239daa5 1664 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1665 case 'N':
55eda711 1666 ++s;
423cee85
JH
1667 if (*s == '{') {
1668 char* e = strchr(s, '}');
155aba94 1669 SV *res;
423cee85
JH
1670 STRLEN len;
1671 char *str;
4e553d73 1672
423cee85 1673 if (!e) {
5777a3f7 1674 yyerror("Missing right brace on \\N{}");
423cee85
JH
1675 e = s - 1;
1676 goto cont_scan;
1677 }
dbc0d4f2
JH
1678 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1679 /* \N{U+...} */
1680 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1681 PERL_SCAN_DISALLOW_PREFIX;
1682 s += 3;
1683 len = e - s;
1684 uv = grok_hex(s, &len, &flags, NULL);
1685 s = e + 1;
1686 goto NUM_ESCAPE_INSERT;
1687 }
55eda711
JH
1688 res = newSVpvn(s + 1, e - s - 1);
1689 res = new_constant( Nullch, 0, "charnames",
1690 res, Nullsv, "\\N{...}" );
f9a63242
JH
1691 if (has_utf8)
1692 sv_utf8_upgrade(res);
423cee85 1693 str = SvPV(res,len);
1c47067b
JH
1694#ifdef EBCDIC_NEVER_MIND
1695 /* charnames uses pack U and that has been
1696 * recently changed to do the below uni->native
1697 * mapping, so this would be redundant (and wrong,
1698 * the code point would be doubly converted).
1699 * But leave this in just in case the pack U change
1700 * gets revoked, but the semantics is still
1701 * desireable for charnames. --jhi */
cddc7ef4
JH
1702 {
1703 UV uv = utf8_to_uvchr((U8*)str, 0);
1704
1705 if (uv < 0x100) {
89ebb4a3 1706 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1707
1708 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1709 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1710 str = SvPV(res, len);
1711 }
1712 }
1713#endif
89491803 1714 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1715 char *ostart = SvPVX(sv);
1716 SvCUR_set(sv, d - ostart);
1717 SvPOK_on(sv);
e4f3eed8 1718 *d = '\0';
f08d6ad9 1719 sv_utf8_upgrade(sv);
d2f449dd 1720 /* this just broke our allocation above... */
eb160463 1721 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1722 d = SvPVX(sv) + SvCUR(sv);
89491803 1723 has_utf8 = TRUE;
f08d6ad9 1724 }
eb160463 1725 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1726 char *odest = SvPVX(sv);
1727
8973db79 1728 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1729 d = SvPVX(sv) + (d - odest);
1730 }
1731 Copy(str, d, len, char);
1732 d += len;
1733 SvREFCNT_dec(res);
1734 cont_scan:
1735 s = e + 1;
1736 }
1737 else
5777a3f7 1738 yyerror("Missing braces on \\N{}");
423cee85
JH
1739 continue;
1740
02aa26ce 1741 /* \c is a control character */
79072805
LW
1742 case 'c':
1743 s++;
961ce445 1744 if (s < send) {
ba210ebe 1745 U8 c = *s++;
c7f1f016
NIS
1746#ifdef EBCDIC
1747 if (isLOWER(c))
1748 c = toUPPER(c);
1749#endif
db42d148 1750 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1751 }
961ce445
RGS
1752 else {
1753 yyerror("Missing control char name in \\c");
1754 }
79072805 1755 continue;
02aa26ce
NT
1756
1757 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1758 case 'b':
db42d148 1759 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1760 break;
1761 case 'n':
db42d148 1762 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1763 break;
1764 case 'r':
db42d148 1765 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1766 break;
1767 case 'f':
db42d148 1768 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1769 break;
1770 case 't':
db42d148 1771 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1772 break;
34a3fe2a 1773 case 'e':
db42d148 1774 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1775 break;
1776 case 'a':
db42d148 1777 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1778 break;
02aa26ce
NT
1779 } /* end switch */
1780
79072805
LW
1781 s++;
1782 continue;
02aa26ce
NT
1783 } /* end if (backslash) */
1784
f9a63242 1785 default_action:
2b9d42f0
NIS
1786 /* If we started with encoded form, or already know we want it
1787 and then encode the next character */
1788 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1789 STRLEN len = 1;
1790 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1791 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1792 s += len;
1793 if (need > len) {
1794 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1795 STRLEN off = d - SvPVX(sv);
1796 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1797 }
1798 d = (char*)uvchr_to_utf8((U8*)d, uv);
1799 has_utf8 = TRUE;
1800 }
1801 else {
1802 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1803 }
02aa26ce
NT
1804 } /* while loop to process each character */
1805
1806 /* terminate the string and set up the sv */
79072805 1807 *d = '\0';
463ee0b2 1808 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1809 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1810 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1811
79072805 1812 SvPOK_on(sv);
9f4817db 1813 if (PL_encoding && !has_utf8) {
d0063567
DK
1814 sv_recode_to_utf8(sv, PL_encoding);
1815 if (SvUTF8(sv))
1816 has_utf8 = TRUE;
9f4817db 1817 }
2b9d42f0 1818 if (has_utf8) {
7e2040f0 1819 SvUTF8_on(sv);
2b9d42f0 1820 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1821 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1822 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1823 }
1824 }
79072805 1825
02aa26ce 1826 /* shrink the sv if we allocated more than we used */
79072805
LW
1827 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1828 SvLEN_set(sv, SvCUR(sv) + 1);
463ee0b2 1829 Renew(SvPVX(sv), SvLEN(sv), char);
79072805 1830 }
02aa26ce 1831
9b599b2a 1832 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1833 if (s > PL_bufptr) {
1834 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1835 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1836 sv, Nullsv,
4e553d73 1837 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1838 ? "tr"
3280af22 1839 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1840 ? "s"
1841 : "qq")));
79072805 1842 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1843 } else
8990e307 1844 SvREFCNT_dec(sv);
79072805
LW
1845 return s;
1846}
1847
ffb4593c
NT
1848/* S_intuit_more
1849 * Returns TRUE if there's more to the expression (e.g., a subscript),
1850 * FALSE otherwise.
ffb4593c
NT
1851 *
1852 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1853 *
1854 * ->[ and ->{ return TRUE
1855 * { and [ outside a pattern are always subscripts, so return TRUE
1856 * if we're outside a pattern and it's not { or [, then return FALSE
1857 * if we're in a pattern and the first char is a {
1858 * {4,5} (any digits around the comma) returns FALSE
1859 * if we're in a pattern and the first char is a [
1860 * [] returns FALSE
1861 * [SOMETHING] has a funky algorithm to decide whether it's a
1862 * character class or not. It has to deal with things like
1863 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1864 * anything else returns TRUE
1865 */
1866
9cbb5ea2
GS
1867/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1868
76e3520e 1869STATIC int
cea2e8a9 1870S_intuit_more(pTHX_ register char *s)
79072805 1871{
3280af22 1872 if (PL_lex_brackets)
79072805
LW
1873 return TRUE;
1874 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1875 return TRUE;
1876 if (*s != '{' && *s != '[')
1877 return FALSE;
3280af22 1878 if (!PL_lex_inpat)
79072805
LW
1879 return TRUE;
1880
1881 /* In a pattern, so maybe we have {n,m}. */
1882 if (*s == '{') {
1883 s++;
1884 if (!isDIGIT(*s))
1885 return TRUE;
1886 while (isDIGIT(*s))
1887 s++;
1888 if (*s == ',')
1889 s++;
1890 while (isDIGIT(*s))
1891 s++;
1892 if (*s == '}')
1893 return FALSE;
1894 return TRUE;
1895
1896 }
1897
1898 /* On the other hand, maybe we have a character class */
1899
1900 s++;
1901 if (*s == ']' || *s == '^')
1902 return FALSE;
1903 else {
ffb4593c 1904 /* this is terrifying, and it works */
79072805
LW
1905 int weight = 2; /* let's weigh the evidence */
1906 char seen[256];
f27ffc4a 1907 unsigned char un_char = 255, last_un_char;
93a17b20 1908 char *send = strchr(s,']');
3280af22 1909 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1910
1911 if (!send) /* has to be an expression */
1912 return TRUE;
1913
1914 Zero(seen,256,char);
1915 if (*s == '$')
1916 weight -= 3;
1917 else if (isDIGIT(*s)) {
1918 if (s[1] != ']') {
1919 if (isDIGIT(s[1]) && s[2] == ']')
1920 weight -= 10;
1921 }
1922 else
1923 weight -= 100;
1924 }
1925 for (; s < send; s++) {
1926 last_un_char = un_char;
1927 un_char = (unsigned char)*s;
1928 switch (*s) {
1929 case '@':
1930 case '&':
1931 case '$':
1932 weight -= seen[un_char] * 10;
7e2040f0 1933 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1934 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1935 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1936 weight -= 100;
1937 else
1938 weight -= 10;
1939 }
1940 else if (*s == '$' && s[1] &&
93a17b20
LW
1941 strchr("[#!%*<>()-=",s[1])) {
1942 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1943 weight -= 10;
1944 else
1945 weight -= 1;
1946 }
1947 break;
1948 case '\\':
1949 un_char = 254;
1950 if (s[1]) {
93a17b20 1951 if (strchr("wds]",s[1]))
79072805
LW
1952 weight += 100;
1953 else if (seen['\''] || seen['"'])
1954 weight += 1;
93a17b20 1955 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1956 weight += 40;
1957 else if (isDIGIT(s[1])) {
1958 weight += 40;
1959 while (s[1] && isDIGIT(s[1]))
1960 s++;
1961 }
1962 }
1963 else
1964 weight += 100;
1965 break;
1966 case '-':
1967 if (s[1] == '\\')
1968 weight += 50;
93a17b20 1969 if (strchr("aA01! ",last_un_char))
79072805 1970 weight += 30;
93a17b20 1971 if (strchr("zZ79~",s[1]))
79072805 1972 weight += 30;
f27ffc4a
GS
1973 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1974 weight -= 5; /* cope with negative subscript */
79072805
LW
1975 break;
1976 default:
3792a11b
NC
1977 if (!isALNUM(last_un_char)
1978 && !(last_un_char == '$' || last_un_char == '@'
1979 || last_un_char == '&')
1980 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
1981 char *d = tmpbuf;
1982 while (isALPHA(*s))
1983 *d++ = *s++;
1984 *d = '\0';
1985 if (keyword(tmpbuf, d - tmpbuf))
1986 weight -= 150;
1987 }
1988 if (un_char == last_un_char + 1)
1989 weight += 5;
1990 weight -= seen[un_char];
1991 break;
1992 }
1993 seen[un_char]++;
1994 }
1995 if (weight >= 0) /* probably a character class */
1996 return FALSE;
1997 }
1998
1999 return TRUE;
2000}
ffed7fef 2001
ffb4593c
NT
2002/*
2003 * S_intuit_method
2004 *
2005 * Does all the checking to disambiguate
2006 * foo bar
2007 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2008 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2009 *
2010 * First argument is the stuff after the first token, e.g. "bar".
2011 *
2012 * Not a method if bar is a filehandle.
2013 * Not a method if foo is a subroutine prototyped to take a filehandle.
2014 * Not a method if it's really "Foo $bar"
2015 * Method if it's "foo $bar"
2016 * Not a method if it's really "print foo $bar"
2017 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2018 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2019 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2020 * =>
2021 */
2022
76e3520e 2023STATIC int
cea2e8a9 2024S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2025{
2026 char *s = start + (*start == '$');
3280af22 2027 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2028 STRLEN len;
2029 GV* indirgv;
2030
2031 if (gv) {
b6c543e3 2032 CV *cv;
a0d0e21e
LW
2033 if (GvIO(gv))
2034 return 0;
b6c543e3
IZ
2035 if ((cv = GvCVu(gv))) {
2036 char *proto = SvPVX(cv);
2037 if (proto) {
2038 if (*proto == ';')
2039 proto++;
2040 if (*proto == '*')
2041 return 0;
2042 }
2043 } else
a0d0e21e
LW
2044 gv = 0;
2045 }
8903cb82 2046 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2047 /* start is the beginning of the possible filehandle/object,
2048 * and s is the end of it
2049 * tmpbuf is a copy of it
2050 */
2051
a0d0e21e 2052 if (*start == '$') {
3280af22 2053 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2054 return 0;
2055 s = skipspace(s);
3280af22
NIS
2056 PL_bufptr = start;
2057 PL_expect = XREF;
a0d0e21e
LW
2058 return *s == '(' ? FUNCMETH : METHOD;
2059 }
2060 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2061 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2062 len -= 2;
2063 tmpbuf[len] = '\0';
2064 goto bare_package;
2065 }
2066 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2067 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2068 return 0;
2069 /* filehandle or package name makes it a method */
89bfa8cd 2070 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2071 s = skipspace(s);
3280af22 2072 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2073 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2074 bare_package:
3280af22 2075 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2076 newSVpvn(tmpbuf,len));
3280af22
NIS
2077 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2078 PL_expect = XTERM;
a0d0e21e 2079 force_next(WORD);
3280af22 2080 PL_bufptr = s;
a0d0e21e
LW
2081 return *s == '(' ? FUNCMETH : METHOD;
2082 }
2083 }
2084 return 0;
2085}
2086
ffb4593c
NT
2087/*
2088 * S_incl_perldb
2089 * Return a string of Perl code to load the debugger. If PERL5DB
2090 * is set, it will return the contents of that, otherwise a
2091 * compile-time require of perl5db.pl.
2092 */
2093
76e3520e 2094STATIC char*
cea2e8a9 2095S_incl_perldb(pTHX)
a0d0e21e 2096{
3280af22 2097 if (PL_perldb) {
76e3520e 2098 char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2099
2100 if (pdb)
2101 return pdb;
93189314 2102 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2103 return "BEGIN { require 'perl5db.pl' }";
2104 }
2105 return "";
2106}
2107
2108
16d20bd9 2109/* Encoded script support. filter_add() effectively inserts a
4e553d73 2110 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2111 * Note that the filter function only applies to the current source file
2112 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2113 *
2114 * The datasv parameter (which may be NULL) can be used to pass
2115 * private data to this instance of the filter. The filter function
2116 * can recover the SV using the FILTER_DATA macro and use it to
2117 * store private buffers and state information.
2118 *
2119 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2120 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2121 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2122 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2123 * private use must be set using malloc'd pointers.
2124 */
16d20bd9
AD
2125
2126SV *
864dbfa3 2127Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2128{
f4c556ac
GS
2129 if (!funcp)
2130 return Nullsv;
2131
3280af22
NIS
2132 if (!PL_rsfp_filters)
2133 PL_rsfp_filters = newAV();
16d20bd9 2134 if (!datasv)
8c52afec 2135 datasv = NEWSV(255,0);
16d20bd9 2136 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2137 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2138 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2139 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2140 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2141 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2142 av_unshift(PL_rsfp_filters, 1);
2143 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2144 return(datasv);
2145}
4e553d73 2146
16d20bd9
AD
2147
2148/* Delete most recently added instance of this filter function. */
a0d0e21e 2149void
864dbfa3 2150Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2151{
e0c19803 2152 SV *datasv;
fe5a182c 2153 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2154 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2155 return;
2156 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2157 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2158 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2159 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2160 IoANY(datasv) = (void *)NULL;
3280af22 2161 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2162
16d20bd9
AD
2163 return;
2164 }
2165 /* we need to search for the correct entry and clear it */
cea2e8a9 2166 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2167}
2168
2169
1de9afcd
RGS
2170/* Invoke the idxth filter function for the current rsfp. */
2171/* maxlen 0 = read one text line */
16d20bd9 2172I32
864dbfa3 2173Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2174{
16d20bd9
AD
2175 filter_t funcp;
2176 SV *datasv = NULL;
e50aee73 2177
3280af22 2178 if (!PL_rsfp_filters)
16d20bd9 2179 return -1;
1de9afcd 2180 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2181 /* Provide a default input filter to make life easy. */
2182 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2183 DEBUG_P(PerlIO_printf(Perl_debug_log,
2184 "filter_read %d: from rsfp\n", idx));
4e553d73 2185 if (maxlen) {
16d20bd9
AD
2186 /* Want a block */
2187 int len ;
2188 int old_len = SvCUR(buf_sv) ;
2189
2190 /* ensure buf_sv is large enough */
eb160463 2191 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2192 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2193 if (PerlIO_error(PL_rsfp))
37120919
AD
2194 return -1; /* error */
2195 else
2196 return 0 ; /* end of file */
2197 }
16d20bd9
AD
2198 SvCUR_set(buf_sv, old_len + len) ;
2199 } else {
2200 /* Want a line */
3280af22
NIS
2201 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2202 if (PerlIO_error(PL_rsfp))
37120919
AD
2203 return -1; /* error */
2204 else
2205 return 0 ; /* end of file */
2206 }
16d20bd9
AD
2207 }
2208 return SvCUR(buf_sv);
2209 }
2210 /* Skip this filter slot if filter has been deleted */
1de9afcd 2211 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2212 DEBUG_P(PerlIO_printf(Perl_debug_log,
2213 "filter_read %d: skipped (filter deleted)\n",
2214 idx));
16d20bd9
AD
2215 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2216 }
2217 /* Get function pointer hidden within datasv */
4755096e 2218 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2219 DEBUG_P(PerlIO_printf(Perl_debug_log,
2220 "filter_read %d: via function %p (%s)\n",
fe5a182c 2221 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2222 /* Call function. The function is expected to */
2223 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2224 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2225 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2226}
2227
76e3520e 2228STATIC char *
cea2e8a9 2229S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2230{
c39cd008 2231#ifdef PERL_CR_FILTER
3280af22 2232 if (!PL_rsfp_filters) {
c39cd008 2233 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2234 }
2235#endif
3280af22 2236 if (PL_rsfp_filters) {
55497cff 2237 if (!append)
2238 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2239 if (FILTER_READ(0, sv, 0) > 0)
2240 return ( SvPVX(sv) ) ;
2241 else
2242 return Nullch ;
2243 }
9d116dd7 2244 else
fd049845 2245 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2246}
2247
01ec43d0 2248STATIC HV *
7fc63493 2249S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b
GS
2250{
2251 GV *gv;
2252
01ec43d0 2253 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2254 return PL_curstash;
2255
2256 if (len > 2 &&
2257 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2258 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2259 {
2260 return GvHV(gv); /* Foo:: */
def3634b
GS
2261 }
2262
2263 /* use constant CLASS => 'MyClass' */
2264 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2265 SV *sv;
2266 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2267 pkgname = SvPV_nolen(sv);
2268 }
2269 }
2270
2271 return gv_stashpv(pkgname, FALSE);
2272}
a0d0e21e 2273
748a9306
LW
2274#ifdef DEBUGGING
2275 static char* exp_name[] =
09bef843 2276 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2277 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2278 };
748a9306 2279#endif
463ee0b2 2280
02aa26ce
NT
2281/*
2282 yylex
2283
2284 Works out what to call the token just pulled out of the input
2285 stream. The yacc parser takes care of taking the ops we return and
2286 stitching them into a tree.
2287
2288 Returns:
2289 PRIVATEREF
2290
2291 Structure:
2292 if read an identifier
2293 if we're in a my declaration
2294 croak if they tried to say my($foo::bar)
2295 build the ops for a my() declaration
2296 if it's an access to a my() variable
2297 are we in a sort block?
2298 croak if my($a); $a <=> $b
2299 build ops for access to a my() variable
2300 if in a dq string, and they've said @foo and we can't find @foo
2301 croak
2302 build ops for a bareword
2303 if we already built the token before, use it.
2304*/
2305
20141f0e 2306
dba4d153
JH
2307#ifdef __SC__
2308#pragma segment Perl_yylex
2309#endif
dba4d153 2310int
dba4d153 2311Perl_yylex(pTHX)
20141f0e 2312{
3afc138a 2313 register char *s = PL_bufptr;
378cc40b 2314 register char *d;
79072805 2315 register I32 tmp;
463ee0b2 2316 STRLEN len;
161b471a
NIS
2317 GV *gv = Nullgv;
2318 GV **gvp = 0;
aa7440fb 2319 bool bof = FALSE;
1d239bbb 2320 I32 orig_keyword = 0;
a687059c 2321
bbf60fe6
DM
2322 DEBUG_T( {
2323 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2324 lex_state_names[PL_lex_state]);
2325 } );
02aa26ce 2326 /* check if there's an identifier for us to look at */
ba979b31 2327 if (PL_pending_ident)
bbf60fe6 2328 return REPORT(S_pending_ident(aTHX));
bbce6d69 2329
02aa26ce
NT
2330 /* no identifier pending identification */
2331
3280af22 2332 switch (PL_lex_state) {
79072805
LW
2333#ifdef COMMENTARY
2334 case LEX_NORMAL: /* Some compilers will produce faster */
2335 case LEX_INTERPNORMAL: /* code if we comment these out. */
2336 break;
2337#endif
2338
09bef843 2339 /* when we've already built the next token, just pull it out of the queue */
79072805 2340 case LEX_KNOWNEXT:
3280af22
NIS
2341 PL_nexttoke--;
2342 yylval = PL_nextval[PL_nexttoke];
2343 if (!PL_nexttoke) {
2344 PL_lex_state = PL_lex_defer;
2345 PL_expect = PL_lex_expect;
2346 PL_lex_defer = LEX_NORMAL;
463ee0b2 2347 }
607df283 2348 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2349 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2350 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2351
bbf60fe6 2352 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2353
02aa26ce 2354 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2355 when we get here, PL_bufptr is at the \
02aa26ce 2356 */
79072805
LW
2357 case LEX_INTERPCASEMOD:
2358#ifdef DEBUGGING
3280af22 2359 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2360 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2361#endif
02aa26ce 2362 /* handle \E or end of string */
3280af22 2363 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2364 char oldmod;
02aa26ce
NT
2365
2366 /* if at a \E */
3280af22
NIS
2367 if (PL_lex_casemods) {
2368 oldmod = PL_lex_casestack[--PL_lex_casemods];
2369 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2370
3792a11b
NC
2371 if (PL_bufptr != PL_bufend
2372 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2373 PL_bufptr += 2;
2374 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2375 }
bbf60fe6 2376 return REPORT(')');
79072805 2377 }
3280af22
NIS
2378 if (PL_bufptr != PL_bufend)
2379 PL_bufptr += 2;
2380 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2381 return yylex();
79072805
LW
2382 }
2383 else {
607df283 2384 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2385 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2386 s = PL_bufptr + 1;
6e909404
JH
2387 if (s[1] == '\\' && s[2] == 'E') {
2388 PL_bufptr = s + 3;
2389 PL_lex_state = LEX_INTERPCONCAT;
2390 return yylex();
a0d0e21e 2391 }
6e909404
JH
2392 else {
2393 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2394 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2395 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2396 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2397 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2398 return REPORT(')');
6e909404
JH
2399 }
2400 if (PL_lex_casemods > 10)
2401 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2402 PL_lex_casestack[PL_lex_casemods++] = *s;
2403 PL_lex_casestack[PL_lex_casemods] = '\0';
2404 PL_lex_state = LEX_INTERPCONCAT;
2405 PL_nextval[PL_nexttoke].ival = 0;
2406 force_next('(');
2407 if (*s == 'l')
2408 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2409 else if (*s == 'u')
2410 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2411 else if (*s == 'L')
2412 PL_nextval[PL_nexttoke].ival = OP_LC;
2413 else if (*s == 'U')
2414 PL_nextval[PL_nexttoke].ival = OP_UC;
2415 else if (*s == 'Q')
2416 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2417 else
2418 Perl_croak(aTHX_ "panic: yylex");
2419 PL_bufptr = s + 1;
a0d0e21e 2420 }
79072805 2421 force_next(FUNC);
3280af22
NIS
2422 if (PL_lex_starts) {
2423 s = PL_bufptr;
2424 PL_lex_starts = 0;
131b3ad0
DM
2425 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2426 if (PL_lex_casemods == 1 && PL_lex_inpat)
2427 OPERATOR(',');
2428 else
2429 Aop(OP_CONCAT);
79072805
LW
2430 }
2431 else
cea2e8a9 2432 return yylex();
79072805
LW
2433 }
2434
55497cff 2435 case LEX_INTERPPUSH:
bbf60fe6 2436 return REPORT(sublex_push());
55497cff 2437
79072805 2438 case LEX_INTERPSTART:
3280af22 2439 if (PL_bufptr == PL_bufend)
bbf60fe6 2440 return REPORT(sublex_done());
607df283 2441 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2442 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2443 PL_expect = XTERM;
2444 PL_lex_dojoin = (*PL_bufptr == '@');
2445 PL_lex_state = LEX_INTERPNORMAL;
2446 if (PL_lex_dojoin) {
2447 PL_nextval[PL_nexttoke].ival = 0;
79072805 2448 force_next(',');
a0d0e21e 2449 force_ident("\"", '$');
3280af22 2450 PL_nextval[PL_nexttoke].ival = 0;
79072805 2451 force_next('$');
3280af22 2452 PL_nextval[PL_nexttoke].ival = 0;
79072805 2453 force_next('(');
3280af22 2454 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2455 force_next(FUNC);
2456 }
3280af22
NIS
2457 if (PL_lex_starts++) {
2458 s = PL_bufptr;
131b3ad0
DM
2459 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2460 if (!PL_lex_casemods && PL_lex_inpat)
2461 OPERATOR(',');
2462 else
2463 Aop(OP_CONCAT);
79072805 2464 }
cea2e8a9 2465 return yylex();
79072805
LW
2466
2467 case LEX_INTERPENDMAYBE:
3280af22
NIS
2468 if (intuit_more(PL_bufptr)) {
2469 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2470 break;
2471 }
2472 /* FALL THROUGH */
2473
2474 case LEX_INTERPEND:
3280af22
NIS
2475 if (PL_lex_dojoin) {
2476 PL_lex_dojoin = FALSE;
2477 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2478 return REPORT(')');
79072805 2479 }
43a16006 2480 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2481 && SvEVALED(PL_lex_repl))
43a16006 2482 {
e9fa98b2 2483 if (PL_bufptr != PL_bufend)
cea2e8a9 2484 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2485 PL_lex_repl = Nullsv;
2486 }
79072805
LW
2487 /* FALLTHROUGH */
2488 case LEX_INTERPCONCAT:
2489#ifdef DEBUGGING
3280af22 2490 if (PL_lex_brackets)
cea2e8a9 2491 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2492#endif
3280af22 2493 if (PL_bufptr == PL_bufend)
bbf60fe6 2494 return REPORT(sublex_done());
79072805 2495
3280af22
NIS
2496 if (SvIVX(PL_linestr) == '\'') {
2497 SV *sv = newSVsv(PL_linestr);
2498 if (!PL_lex_inpat)
76e3520e 2499 sv = tokeq(sv);
3280af22 2500 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2501 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2502 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2503 s = PL_bufend;
79072805
LW
2504 }
2505 else {
3280af22 2506 s = scan_const(PL_bufptr);
79072805 2507 if (*s == '\\')
3280af22 2508 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2509 else
3280af22 2510 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2511 }
2512
3280af22
NIS
2513 if (s != PL_bufptr) {
2514 PL_nextval[PL_nexttoke] = yylval;
2515 PL_expect = XTERM;
79072805 2516 force_next(THING);
131b3ad0
DM
2517 if (PL_lex_starts++) {
2518 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2519 if (!PL_lex_casemods && PL_lex_inpat)
2520 OPERATOR(',');
2521 else
2522 Aop(OP_CONCAT);
2523 }
79072805 2524 else {
3280af22 2525 PL_bufptr = s;
cea2e8a9 2526 return yylex();
79072805
LW
2527 }
2528 }
2529
cea2e8a9 2530 return yylex();
a0d0e21e 2531 case LEX_FORMLINE:
3280af22
NIS
2532 PL_lex_state = LEX_NORMAL;
2533 s = scan_formline(PL_bufptr);
2534 if (!PL_lex_formbrack)
a0d0e21e
LW
2535 goto rightbracket;
2536 OPERATOR(';');
79072805
LW
2537 }
2538
3280af22
NIS
2539 s = PL_bufptr;
2540 PL_oldoldbufptr = PL_oldbufptr;
2541 PL_oldbufptr = s;
607df283 2542 DEBUG_T( {
bbf60fe6 2543 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
bf49b057 2544 exp_name[PL_expect], s);
5f80b19c 2545 } );
463ee0b2
LW
2546
2547 retry:
378cc40b
LW
2548 switch (*s) {
2549 default:
7e2040f0 2550 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2551 goto keylookup;
cea2e8a9 2552 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2553 case 4:
2554 case 26:
2555 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2556 case 0:
3280af22
NIS
2557 if (!PL_rsfp) {
2558 PL_last_uni = 0;
2559 PL_last_lop = 0;
c5ee2135
WL
2560 if (PL_lex_brackets) {
2561 if (PL_lex_formbrack)
2562 yyerror("Format not terminated");
2563 else
2564 yyerror("Missing right curly or square bracket");
2565 }
4e553d73 2566 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2567 "### Tokener got EOF\n");
5f80b19c 2568 } );
79072805 2569 TOKEN(0);
463ee0b2 2570 }
3280af22 2571 if (s++ < PL_bufend)
a687059c 2572 goto retry; /* ignore stray nulls */
3280af22
NIS
2573 PL_last_uni = 0;
2574 PL_last_lop = 0;
2575 if (!PL_in_eval && !PL_preambled) {
2576 PL_preambled = TRUE;
2577 sv_setpv(PL_linestr,incl_perldb());
2578 if (SvCUR(PL_linestr))
4147a61b 2579 sv_catpvn(PL_linestr,";", 1);
3280af22
NIS
2580 if (PL_preambleav){
2581 while(AvFILLp(PL_preambleav) >= 0) {
2582 SV *tmpsv = av_shift(PL_preambleav);
2583 sv_catsv(PL_linestr, tmpsv);
4147a61b 2584 sv_catpvn(PL_linestr, ";", 1);
91b7def8 2585 sv_free(tmpsv);
2586 }
3280af22
NIS
2587 sv_free((SV*)PL_preambleav);
2588 PL_preambleav = NULL;
91b7def8 2589 }
3280af22
NIS
2590 if (PL_minus_n || PL_minus_p) {
2591 sv_catpv(PL_linestr, "LINE: while (<>) {");
2592 if (PL_minus_l)
2593 sv_catpv(PL_linestr,"chomp;");
2594 if (PL_minus_a) {
3280af22 2595 if (PL_minus_F) {
3792a11b
NC
2596 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2597 || *PL_splitstr == '"')
3280af22 2598 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2599 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2600 else {
c8ef6a4b
NC
2601 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2602 bytes can be used as quoting characters. :-) */
48c4c863
NC
2603 /* The count here deliberately includes the NUL
2604 that terminates the C string constant. This
2605 embeds the opening NUL into the string. */
9a3fb652 2606 sv_catpvn(PL_linestr, "our @F=split(q", 15);
48c4c863
NC
2607 s = PL_splitstr;
2608 do {
2609 /* Need to \ \s */
2610 if (*s == '\\')
2611 sv_catpvn(PL_linestr, s, 1);
2612 sv_catpvn(PL_linestr, s, 1);
2613 } while (*s++);
2614 /* This loop will embed the trailing NUL of
2615 PL_linestr as the last thing it does before
2616 terminating. */
4fccd7c6 2617 sv_catpvn(PL_linestr, ");", 2);
54310121 2618 }
2304df62
AD
2619 }
2620 else
75c72d73 2621 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2622 }
79072805 2623 }
4147a61b 2624 sv_catpvn(PL_linestr, "\n", 1);
3280af22
NIS
2625 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2626 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2627 PL_last_lop = PL_last_uni = Nullch;
3280af22 2628 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2629 SV *sv = NEWSV(85,0);
2630
2631 sv_upgrade(sv, SVt_PVMG);
3280af22 2632 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2633 (void)SvIOK_on(sv);
2634 SvIVX(sv) = 0;
57843af0 2635 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2636 }
79072805 2637 goto retry;
a687059c 2638 }
e929a76b 2639 do {
aa7440fb 2640 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2641 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2642 fake_eof:
2643 if (PL_rsfp) {
2644 if (PL_preprocess && !PL_in_eval)
2645 (void)PerlProc_pclose(PL_rsfp);
2646 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2647 PerlIO_clearerr(PL_rsfp);
2648 else
2649 (void)PerlIO_close(PL_rsfp);
2650 PL_rsfp = Nullfp;
2651 PL_doextract = FALSE;
2652 }
2653 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2654 sv_setpv(PL_linestr,PL_minus_p
2655 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2656 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2657 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2658 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2659 PL_minus_n = PL_minus_p = 0;
2660 goto retry;
2661 }
2662 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2663 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2664 sv_setpv(PL_linestr,"");
2665 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2666 }
7aa207d6
JH
2667 /* If it looks like the start of a BOM or raw UTF-16,
2668 * check if it in fact is. */
2669 else if (bof &&
2670 (*s == 0 ||
2671 *(U8*)s == 0xEF ||
2672 *(U8*)s >= 0xFE ||
2673 s[1] == 0)) {
226017aa 2674#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2675# ifdef __GNU_LIBRARY__
2676# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2677# define FTELL_FOR_PIPE_IS_BROKEN
2678# endif
e3f494f1
JH
2679# else
2680# ifdef __GLIBC__
2681# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2682# define FTELL_FOR_PIPE_IS_BROKEN
2683# endif
2684# endif
226017aa
DD
2685# endif
2686#endif
2687#ifdef FTELL_FOR_PIPE_IS_BROKEN
2688 /* This loses the possibility to detect the bof
2689 * situation on perl -P when the libc5 is being used.
2690 * Workaround? Maybe attach some extra state to PL_rsfp?
2691 */
2692 if (!PL_preprocess)
7e28d3af 2693 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2694#else
eb160463 2695 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2696#endif
7e28d3af 2697 if (bof) {
3280af22 2698 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2699 s = swallow_bom((U8*)s);
e929a76b 2700 }
378cc40b 2701 }
3280af22 2702 if (PL_doextract) {
a0d0e21e
LW
2703 /* Incest with pod. */
2704 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2705 sv_setpv(PL_linestr, "");
2706 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2707 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2708 PL_last_lop = PL_last_uni = Nullch;
3280af22 2709 PL_doextract = FALSE;
a0d0e21e 2710 }
4e553d73 2711 }
463ee0b2 2712 incline(s);
3280af22
NIS
2713 } while (PL_doextract);
2714 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2715 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2716 SV *sv = NEWSV(85,0);
a687059c 2717
93a17b20 2718 sv_upgrade(sv, SVt_PVMG);
3280af22 2719 sv_setsv(sv,PL_linestr);
0ac0412a
MJD
2720 (void)SvIOK_on(sv);
2721 SvIVX(sv) = 0;
57843af0 2722 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2723 }
3280af22 2724 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2725 PL_last_lop = PL_last_uni = Nullch;
57843af0 2726 if (CopLINE(PL_curcop) == 1) {
3280af22 2727 while (s < PL_bufend && isSPACE(*s))
79072805 2728 s++;
a0d0e21e 2729 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2730 s++;
44a8e56a 2731 d = Nullch;
3280af22 2732 if (!PL_in_eval) {
44a8e56a 2733 if (*s == '#' && *(s+1) == '!')
2734 d = s + 2;
2735#ifdef ALTERNATE_SHEBANG
2736 else {
2737 static char as[] = ALTERNATE_SHEBANG;
2738 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2739 d = s + (sizeof(as) - 1);
2740 }
2741#endif /* ALTERNATE_SHEBANG */
2742 }
2743 if (d) {
b8378b72 2744 char *ipath;
774d564b 2745 char *ipathend;
b8378b72 2746
774d564b 2747 while (isSPACE(*d))
b8378b72
CS
2748 d++;
2749 ipath = d;
774d564b 2750 while (*d && !isSPACE(*d))
2751 d++;
2752 ipathend = d;
2753
2754#ifdef ARG_ZERO_IS_SCRIPT
2755 if (ipathend > ipath) {
2756 /*
2757 * HP-UX (at least) sets argv[0] to the script name,
2758 * which makes $^X incorrect. And Digital UNIX and Linux,
2759 * at least, set argv[0] to the basename of the Perl
2760 * interpreter. So, having found "#!", we'll set it right.
2761 */
ee2f7564 2762 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2763 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2764 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2765 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2766 SvSETMAGIC(x);
2767 }
556c1dec
JH
2768 else {
2769 STRLEN blen;
2770 STRLEN llen;
2771 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2772 char *lstart = SvPV(x,llen);
2773 if (llen < blen) {
2774 bstart += blen - llen;
2775 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2776 sv_setpvn(x, ipath, ipathend - ipath);
2777 SvSETMAGIC(x);
2778 }
2779 }
2780 }
774d564b 2781 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2782 }
774d564b 2783#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2784
2785 /*
2786 * Look for options.
2787 */
748a9306 2788 d = instr(s,"perl -");
84e30d1a 2789 if (!d) {
748a9306 2790 d = instr(s,"perl");
84e30d1a
GS
2791#if defined(DOSISH)
2792 /* avoid getting into infinite loops when shebang
2793 * line contains "Perl" rather than "perl" */
2794 if (!d) {
2795 for (d = ipathend-4; d >= ipath; --d) {
2796 if ((*d == 'p' || *d == 'P')
2797 && !ibcmp(d, "perl", 4))
2798 {
2799 break;
2800 }
2801 }
2802 if (d < ipath)
2803 d = Nullch;
2804 }
2805#endif
2806 }
44a8e56a 2807#ifdef ALTERNATE_SHEBANG
2808 /*
2809 * If the ALTERNATE_SHEBANG on this system starts with a
2810 * character that can be part of a Perl expression, then if
2811 * we see it but not "perl", we're probably looking at the
2812 * start of Perl code, not a request to hand off to some
2813 * other interpreter. Similarly, if "perl" is there, but
2814 * not in the first 'word' of the line, we assume the line
2815 * contains the start of the Perl program.
44a8e56a 2816 */
2817 if (d && *s != '#') {
774d564b 2818 char *c = ipath;
44a8e56a 2819 while (*c && !strchr("; \t\r\n\f\v#", *c))
2820 c++;
2821 if (c < d)
2822 d = Nullch; /* "perl" not in first word; ignore */
2823 else
2824 *s = '#'; /* Don't try to parse shebang line */
2825 }
774d564b 2826#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2827#ifndef MACOS_TRADITIONAL
748a9306 2828 if (!d &&
44a8e56a 2829 *s == '#' &&
774d564b 2830 ipathend > ipath &&
3280af22 2831 !PL_minus_c &&
748a9306 2832 !instr(s,"indir") &&
3280af22 2833 instr(PL_origargv[0],"perl"))
748a9306 2834 {
9f68db38 2835 char **newargv;
9f68db38 2836
774d564b 2837 *ipathend = '\0';
2838 s = ipathend + 1;
3280af22 2839 while (s < PL_bufend && isSPACE(*s))
9f68db38 2840 s++;
3280af22
NIS
2841 if (s < PL_bufend) {
2842 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2843 newargv[1] = s;
3280af22 2844 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2845 s++;
2846 *s = '\0';
3280af22 2847 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2848 }
2849 else
3280af22 2850 newargv = PL_origargv;
774d564b 2851 newargv[0] = ipath;
b35112e7 2852 PERL_FPU_PRE_EXEC
b4748376 2853 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2854 PERL_FPU_POST_EXEC
cea2e8a9 2855 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2856 }
bf4acbe4 2857#endif
748a9306 2858 if (d) {
3280af22
NIS
2859 U32 oldpdb = PL_perldb;
2860 bool oldn = PL_minus_n;
2861 bool oldp = PL_minus_p;
748a9306
LW
2862
2863 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2864 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2865
2866 if (*d++ == '-') {
a11ec5a9 2867 bool switches_done = PL_doswitches;
8cc95fdb 2868 do {
2869 if (*d == 'M' || *d == 'm') {
2870 char *m = d;
2871 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2872 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2873 (int)(d - m), m);
2874 }
2875 d = moreswitches(d);
2876 } while (d);
f0b2cf55
YST
2877 if (PL_doswitches && !switches_done) {
2878 int argc = PL_origargc;
2879 char **argv = PL_origargv;
2880 do {
2881 argc--,argv++;
2882 } while (argc && argv[0][0] == '-' && argv[0][1]);
2883 init_argv_symbols(argc,argv);
2884 }
155aba94
GS
2885 if ((PERLDB_LINE && !oldpdb) ||
2886 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2887 /* if we have already added "LINE: while (<>) {",
2888 we must not do it again */
748a9306 2889 {
3280af22
NIS
2890 sv_setpv(PL_linestr, "");
2891 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2892 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2893 PL_last_lop = PL_last_uni = Nullch;
3280af22 2894 PL_preambled = FALSE;
84902520 2895 if (PERLDB_LINE)
3280af22 2896 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2897 goto retry;
2898 }
a11ec5a9
RGS
2899 if (PL_doswitches && !switches_done) {
2900 int argc = PL_origargc;
2901 char **argv = PL_origargv;
2902 do {
2903 argc--,argv++;
2904 } while (argc && argv[0][0] == '-' && argv[0][1]);
2905 init_argv_symbols(argc,argv);
2906 }
a0d0e21e 2907 }
79072805 2908 }
9f68db38 2909 }
79072805 2910 }
3280af22
NIS
2911 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2912 PL_bufptr = s;
2913 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2914 return yylex();
ae986130 2915 }
378cc40b 2916 goto retry;
4fdae800 2917 case '\r':
6a27c188 2918#ifdef PERL_STRICT_CR
cea2e8a9 2919 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2920 Perl_croak(aTHX_
cc507455 2921 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2922#endif
4fdae800 2923 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2924#ifdef MACOS_TRADITIONAL
2925 case '\312':
2926#endif
378cc40b
LW
2927 s++;
2928 goto retry;
378cc40b 2929 case '#':
e929a76b 2930 case '\n':
3280af22 2931 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2932 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2933 /* handle eval qq[#line 1 "foo"\n ...] */
2934 CopLINE_dec(PL_curcop);
2935 incline(s);
2936 }
3280af22 2937 d = PL_bufend;
a687059c 2938 while (s < d && *s != '\n')
378cc40b 2939 s++;
0f85fab0 2940 if (s < d)
378cc40b 2941 s++;
78c267c1 2942 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2943 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2944 incline(s);
3280af22
NIS
2945 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2946 PL_bufptr = s;
2947 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2948 return yylex();
a687059c 2949 }
378cc40b 2950 }
a687059c 2951 else {
378cc40b 2952 *s = '\0';
3280af22 2953 PL_bufend = s;
a687059c 2954 }
378cc40b
LW
2955 goto retry;
2956 case '-':
79072805 2957 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2958 I32 ftst = 0;
2959
378cc40b 2960 s++;
3280af22 2961 PL_bufptr = s;
748a9306
LW
2962 tmp = *s++;
2963
bf4acbe4 2964 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2965 s++;
2966
2967 if (strnEQ(s,"=>",2)) {
3280af22 2968 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2969 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2970 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2971 } );
748a9306
LW
2972 OPERATOR('-'); /* unary minus */
2973 }
3280af22 2974 PL_last_uni = PL_oldbufptr;
748a9306 2975 switch (tmp) {
e5edeb50
JH
2976 case 'r': ftst = OP_FTEREAD; break;
2977 case 'w': ftst = OP_FTEWRITE; break;
2978 case 'x': ftst = OP_FTEEXEC; break;
2979 case 'o': ftst = OP_FTEOWNED; break;
2980 case 'R': ftst = OP_FTRREAD; break;
2981 case 'W': ftst = OP_FTRWRITE; break;
2982 case 'X': ftst = OP_FTREXEC; break;
2983 case 'O': ftst = OP_FTROWNED; break;
2984 case 'e': ftst = OP_FTIS; break;
2985 case 'z': ftst = OP_FTZERO; break;
2986 case 's': ftst = OP_FTSIZE; break;
2987 case 'f': ftst = OP_FTFILE; break;
2988 case 'd': ftst = OP_FTDIR; break;
2989 case 'l': ftst = OP_FTLINK; break;
2990 case 'p': ftst = OP_FTPIPE; break;
2991 case 'S': ftst = OP_FTSOCK; break;
2992 case 'u': ftst = OP_FTSUID; break;
2993 case 'g': ftst = OP_FTSGID; break;
2994 case 'k': ftst = OP_FTSVTX; break;
2995 case 'b': ftst = OP_FTBLK; break;
2996 case 'c': ftst = OP_FTCHR; break;
2997 case 't': ftst = OP_FTTTY; break;
2998 case 'T': ftst = OP_FTTEXT; break;
2999 case 'B': ftst = OP_FTBINARY; break;
3000 case 'M': case 'A': case 'C':
3001 gv_fetchpv("\024",TRUE, SVt_PV);
3002 switch (tmp) {
3003 case 'M': ftst = OP_FTMTIME; break;
3004 case 'A': ftst = OP_FTATIME; break;
3005 case 'C': ftst = OP_FTCTIME; break;
3006 default: break;
3007 }
3008 break;
378cc40b 3009 default:
378cc40b
LW
3010 break;
3011 }
e5edeb50 3012 if (ftst) {
eb160463 3013 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3014 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 3015 "### Saw file test %c\n", (int)ftst);
5f80b19c 3016 } );
e5edeb50
JH
3017 FTST(ftst);
3018 }
3019 else {
3020 /* Assume it was a minus followed by a one-letter named
3021 * subroutine call (or a -bareword), then. */
95c31fe3 3022 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3023 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3024 (int) tmp);
5f80b19c 3025 } );
3cf7b4c4 3026 s = --PL_bufptr;
e5edeb50 3027 }
378cc40b 3028 }
a687059c
LW
3029 tmp = *s++;
3030 if (*s == tmp) {
3031 s++;
3280af22 3032 if (PL_expect == XOPERATOR)
79072805
LW
3033 TERM(POSTDEC);
3034 else
3035 OPERATOR(PREDEC);
3036 }
3037 else if (*s == '>') {
3038 s++;
3039 s = skipspace(s);
7e2040f0 3040 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3041 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3042 TOKEN(ARROW);
79072805 3043 }
748a9306
LW
3044 else if (*s == '$')
3045 OPERATOR(ARROW);
463ee0b2 3046 else
748a9306 3047 TERM(ARROW);
a687059c 3048 }
3280af22 3049 if (PL_expect == XOPERATOR)
79072805
LW
3050 Aop(OP_SUBTRACT);
3051 else {
3280af22 3052 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3053 check_uni();
79072805 3054 OPERATOR('-'); /* unary minus */
2f3197b3 3055 }
79072805 3056
378cc40b 3057 case '+':
a687059c
LW
3058 tmp = *s++;
3059 if (*s == tmp) {
378cc40b 3060 s++;
3280af22 3061 if (PL_expect == XOPERATOR)
79072805
LW
3062 TERM(POSTINC);
3063 else
3064 OPERATOR(PREINC);
378cc40b 3065 }
3280af22 3066 if (PL_expect == XOPERATOR)
79072805
LW
3067 Aop(OP_ADD);
3068 else {
3280af22 3069 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3070 check_uni();
a687059c 3071 OPERATOR('+');
2f3197b3 3072 }
a687059c 3073
378cc40b 3074 case '*':
3280af22
NIS
3075 if (PL_expect != XOPERATOR) {
3076 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3077 PL_expect = XOPERATOR;
3078 force_ident(PL_tokenbuf, '*');
3079 if (!*PL_tokenbuf)
a0d0e21e 3080 PREREF('*');
79072805 3081 TERM('*');
a687059c 3082 }
79072805
LW
3083 s++;
3084 if (*s == '*') {
a687059c 3085 s++;
79072805 3086 PWop(OP_POW);
a687059c 3087 }
79072805
LW
3088 Mop(OP_MULTIPLY);
3089
378cc40b 3090 case '%':
3280af22 3091 if (PL_expect == XOPERATOR) {
bbce6d69 3092 ++s;
3093 Mop(OP_MODULO);
a687059c 3094 }
3280af22
NIS
3095 PL_tokenbuf[0] = '%';
3096 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3097 if (!PL_tokenbuf[1]) {
bbce6d69 3098 PREREF('%');
a687059c 3099 }
3280af22 3100 PL_pending_ident = '%';
bbce6d69 3101 TERM('%');
a687059c 3102
378cc40b 3103 case '^':
79072805 3104 s++;
a0d0e21e 3105 BOop(OP_BIT_XOR);
79072805 3106 case '[':
3280af22 3107 PL_lex_brackets++;
79072805 3108 /* FALL THROUGH */
378cc40b 3109 case '~':
378cc40b 3110 case ',':
378cc40b
LW
3111 tmp = *s++;
3112 OPERATOR(tmp);
a0d0e21e
LW
3113 case ':':
3114 if (s[1] == ':') {
3115 len = 0;
3116 goto just_a_word;
3117 }
3118 s++;
09bef843
SB
3119 switch (PL_expect) {
3120 OP *attrs;
3121 case XOPERATOR:
3122 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3123 break;
3124 PL_bufptr = s; /* update in case we back off */
3125 goto grabattrs;
3126 case XATTRBLOCK:
3127 PL_expect = XBLOCK;
3128 goto grabattrs;
3129 case XATTRTERM:
3130 PL_expect = XTERMBLOCK;
3131 grabattrs:
3132 s = skipspace(s);
3133 attrs = Nullop;
7e2040f0 3134 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3135 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3136 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3137 if (tmp < 0) tmp = -tmp;
3138 switch (tmp) {
3139 case KEY_or:
3140 case KEY_and:
c963b151 3141 case KEY_err:
f9829d6b
GS
3142 case KEY_for:
3143 case KEY_unless:
3144 case KEY_if:
3145 case KEY_while:
3146 case KEY_until:
3147 goto got_attrs;
3148 default:
3149 break;
3150 }
3151 }
09bef843
SB
3152 if (*d == '(') {
3153 d = scan_str(d,TRUE,TRUE);
3154 if (!d) {
09bef843
SB
3155 /* MUST advance bufptr here to avoid bogus
3156 "at end of line" context messages from yyerror().
3157 */
3158 PL_bufptr = s + len;
3159 yyerror("Unterminated attribute parameter in attribute list");
3160 if (attrs)
3161 op_free(attrs);
bbf60fe6 3162 return REPORT(0); /* EOF indicator */
09bef843
SB
3163 }
3164 }
3165 if (PL_lex_stuff) {
3166 SV *sv = newSVpvn(s, len);
3167 sv_catsv(sv, PL_lex_stuff);
3168 attrs = append_elem(OP_LIST, attrs,
3169 newSVOP(OP_CONST, 0, sv));
3170 SvREFCNT_dec(PL_lex_stuff);
3171 PL_lex_stuff = Nullsv;
3172 }
3173 else {
371fce9b
DM
3174 if (len == 6 && strnEQ(s, "unique", len)) {
3175 if (PL_in_my == KEY_our)
3176#ifdef USE_ITHREADS
3177 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3178#else
3179 ; /* skip to avoid loading attributes.pm */
3180#endif
3181 else
3182 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3183 }
3184
d3cea301
SB
3185 /* NOTE: any CV attrs applied here need to be part of
3186 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3187 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3188 CvLVALUE_on(PL_compcv);
3189 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3190 CvLOCKED_on(PL_compcv);
3191 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3192 CvMETHOD_on(PL_compcv);
06492da6
SF
3193 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3194 CvASSERTION_on(PL_compcv);
78f9721b
SM
3195 /* After we've set the flags, it could be argued that
3196 we don't need to do the attributes.pm-based setting
3197 process, and shouldn't bother appending recognized
d3cea301
SB
3198 flags. To experiment with that, uncomment the
3199 following "else". (Note that's already been
3200 uncommented. That keeps the above-applied built-in
3201 attributes from being intercepted (and possibly
3202 rejected) by a package's attribute routines, but is
3203 justified by the performance win for the common case
3204 of applying only built-in attributes.) */
0256094b 3205 else
78f9721b
SM
3206 attrs = append_elem(OP_LIST, attrs,
3207 newSVOP(OP_CONST, 0,
3208 newSVpvn(s, len)));
09bef843
SB
3209 }
3210 s = skipspace(d);
0120eecf 3211 if (*s == ':' && s[1] != ':')
09bef843 3212 s = skipspace(s+1);
0120eecf
GS
3213 else if (s == d)
3214 break; /* require real whitespace or :'s */
09bef843 3215 }
f9829d6b 3216 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3217 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3218 char q = ((*s == '\'') ? '"' : '\'');
3219 /* If here for an expression, and parsed no attrs, back off. */
3220 if (tmp == '=' && !attrs) {
3221 s = PL_bufptr;
3222 break;
3223 }
3224 /* MUST advance bufptr here to avoid bogus "at end of line"
3225 context messages from yyerror().
3226 */
3227 PL_bufptr = s;
3228 if (!*s)
3229 yyerror("Unterminated attribute list");
3230 else
3231 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3232 q, *s, q));
3233 if (attrs)
3234 op_free(attrs);
3235 OPERATOR(':');
3236 }
f9829d6b 3237 got_attrs:
09bef843
SB
3238 if (attrs) {
3239 PL_nextval[PL_nexttoke].opval = attrs;
3240 force_next(THING);
3241 }
3242 TOKEN(COLONATTR);
3243 }
a0d0e21e 3244 OPERATOR(':');
8990e307
LW
3245 case '(':
3246 s++;
3280af22
NIS
3247 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3248 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3249 else
3280af22 3250 PL_expect = XTERM;
4a202259 3251 s = skipspace(s);
a0d0e21e 3252 TOKEN('(');
378cc40b 3253 case ';':
f4dd75d9 3254 CLINE;
378cc40b
LW
3255 tmp = *s++;
3256 OPERATOR(tmp);
3257 case ')':
378cc40b 3258 tmp = *s++;
16d20bd9
AD
3259 s = skipspace(s);
3260 if (*s == '{')
3261 PREBLOCK(tmp);
378cc40b 3262 TERM(tmp);
79072805
LW
3263 case ']':
3264 s++;
3280af22 3265 if (PL_lex_brackets <= 0)
d98d5fff 3266 yyerror("Unmatched right square bracket");
463ee0b2 3267 else
3280af22
NIS
3268 --PL_lex_brackets;
3269 if (PL_lex_state == LEX_INTERPNORMAL) {
3270 if (PL_lex_brackets == 0) {
a0d0e21e 3271 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3272 PL_lex_state = LEX_INTERPEND;
79072805
LW
3273 }
3274 }
4633a7c4 3275 TERM(']');
79072805
LW
3276 case '{':
3277 leftbracket:
79072805 3278 s++;
3280af22 3279 if (PL_lex_brackets > 100) {
8edd5f42 3280 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3281 }
3280af22 3282 switch (PL_expect) {
a0d0e21e 3283 case XTERM:
3280af22 3284 if (PL_lex_formbrack) {
a0d0e21e
LW
3285 s--;
3286 PRETERMBLOCK(DO);
3287 }
3280af22
NIS
3288 if (PL_oldoldbufptr == PL_last_lop)
3289 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3290 else
3280af22 3291 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3292 OPERATOR(HASHBRACK);
a0d0e21e 3293 case XOPERATOR:
bf4acbe4 3294 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3295 s++;
44a8e56a 3296 d = s;
3280af22
NIS
3297 PL_tokenbuf[0] = '\0';
3298 if (d < PL_bufend && *d == '-') {
3299 PL_tokenbuf[0] = '-';
44a8e56a 3300 d++;
bf4acbe4 3301 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3302 d++;
3303 }
7e2040f0 3304 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3305 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3306 FALSE, &len);
bf4acbe4 3307 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3308 d++;
3309 if (*d == '}') {
3280af22 3310 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3311 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3312 if (minus)
3313 force_next('-');
748a9306
LW
3314 }
3315 }
3316 /* FALL THROUGH */
09bef843 3317 case XATTRBLOCK:
748a9306 3318 case XBLOCK:
3280af22
NIS
3319 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3320 PL_expect = XSTATE;
a0d0e21e 3321 break;
09bef843 3322 case XATTRTERM:
a0d0e21e 3323 case XTERMBLOCK:
3280af22
NIS
3324 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3325 PL_expect = XSTATE;
a0d0e21e
LW
3326 break;
3327 default: {
3328 char *t;
3280af22
NIS
3329 if (PL_oldoldbufptr == PL_last_lop)
3330 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3331 else
3280af22 3332 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3333 s = skipspace(s);
8452ff4b
SB
3334 if (*s == '}') {
3335 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3336 PL_expect = XTERM;
3337 /* This hack is to get the ${} in the message. */
3338 PL_bufptr = s+1;
3339 yyerror("syntax error");
3340 break;
3341 }
a0d0e21e 3342 OPERATOR(HASHBRACK);
8452ff4b 3343 }
b8a4b1be
GS
3344 /* This hack serves to disambiguate a pair of curlies
3345 * as being a block or an anon hash. Normally, expectation
3346 * determines that, but in cases where we're not in a
3347 * position to expect anything in particular (like inside
3348 * eval"") we have to resolve the ambiguity. This code
3349 * covers the case where the first term in the curlies is a
3350 * quoted string. Most other cases need to be explicitly
3351 * disambiguated by prepending a `+' before the opening
3352 * curly in order to force resolution as an anon hash.
3353 *
3354 * XXX should probably propagate the outer expectation
3355 * into eval"" to rely less on this hack, but that could
3356 * potentially break current behavior of eval"".
3357 * GSAR 97-07-21
3358 */
3359 t = s;
3360 if (*s == '\'' || *s == '"' || *s == '`') {
3361 /* common case: get past first string, handling escapes */
3280af22 3362 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3363 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3364 t++;
3365 t++;
a0d0e21e 3366 }
b8a4b1be 3367 else if (*s == 'q') {
3280af22 3368 if (++t < PL_bufend
b8a4b1be 3369 && (!isALNUM(*t)
3280af22 3370 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3371 && !isALNUM(*t))))
3372 {
abc667d1 3373 /* skip q//-like construct */
b8a4b1be
GS
3374 char *tmps;
3375 char open, close, term;
3376 I32 brackets = 1;
3377
3280af22 3378 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3379 t++;
abc667d1
DM
3380 /* check for q => */
3381 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3382 OPERATOR(HASHBRACK);
3383 }
b8a4b1be
GS
3384 term = *t;
3385 open = term;
3386 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3387 term = tmps[5];
3388 close = term;
3389 if (open == close)
3280af22
NIS
3390 for (t++; t < PL_bufend; t++) {
3391 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3392 t++;
6d07e5e9 3393 else if (*t == open)
b8a4b1be
GS
3394 break;
3395 }
abc667d1 3396 else {
3280af22
NIS
3397 for (t++; t < PL_bufend; t++) {
3398 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3399 t++;
6d07e5e9 3400 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3401 break;
3402 else if (*t == open)
3403 brackets++;
3404 }
abc667d1
DM
3405 }
3406 t++;
b8a4b1be 3407 }
abc667d1
DM
3408 else
3409 /* skip plain q word */
3410 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3411 t += UTF8SKIP(t);
a0d0e21e 3412 }
7e2040f0 3413 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3414 t += UTF8SKIP(t);
7e2040f0 3415 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3416 t += UTF8SKIP(t);
a0d0e21e 3417 }
3280af22 3418 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3419 t++;
b8a4b1be
GS
3420 /* if comma follows first term, call it an anon hash */
3421 /* XXX it could be a comma expression with loop modifiers */
3280af22 3422 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3423 || (*t == '=' && t[1] == '>')))
a0d0e21e 3424 OPERATOR(HASHBRACK);
3280af22 3425 if (PL_expect == XREF)
4e4e412b 3426 PL_expect = XTERM;
a0d0e21e 3427 else {
3280af22
NIS
3428 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3429 PL_expect = XSTATE;
a0d0e21e 3430 }
8990e307 3431 }
a0d0e21e 3432 break;
463ee0b2 3433 }
57843af0 3434 yylval.ival = CopLINE(PL_curcop);
79072805 3435 if (isSPACE(*s) || *s == '#')
3280af22 3436 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3437 TOKEN('{');
378cc40b 3438 case '}':
79072805
LW
3439 rightbracket:
3440 s++;
3280af22 3441 if (PL_lex_brackets <= 0)
d98d5fff 3442 yyerror("Unmatched right curly bracket");
463ee0b2 3443 else
3280af22 3444 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3445 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3446 PL_lex_formbrack = 0;
3447 if (PL_lex_state == LEX_INTERPNORMAL) {
3448 if (PL_lex_brackets == 0) {
9059aa12
LW
3449 if (PL_expect & XFAKEBRACK) {
3450 PL_expect &= XENUMMASK;
3280af22
NIS
3451 PL_lex_state = LEX_INTERPEND;
3452 PL_bufptr = s;
cea2e8a9 3453 return yylex(); /* ignore fake brackets */
79072805 3454 }
fa83b5b6 3455 if (*s == '-' && s[1] == '>')
3280af22 3456 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3457 else if (*s != '[' && *s != '{')
3280af22 3458 PL_lex_state = LEX_INTERPEND;
79072805
LW
3459 }
3460 }
9059aa12
LW
3461 if (PL_expect & XFAKEBRACK) {
3462 PL_expect &= XENUMMASK;
3280af22 3463 PL_bufptr = s;
cea2e8a9 3464 return yylex(); /* ignore fake brackets */
748a9306 3465 }
79072805
LW
3466 force_next('}');
3467 TOKEN(';');
378cc40b
LW
3468 case '&':
3469 s++;
3470 tmp = *s++;
3471 if (tmp == '&')
a0d0e21e 3472 AOPERATOR(ANDAND);
378cc40b 3473 s--;
3280af22 3474 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3475 if (ckWARN(WARN_SEMICOLON)
3476 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3477 {
57843af0 3478 CopLINE_dec(PL_curcop);
9014280d 3479 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3480 CopLINE_inc(PL_curcop);
463ee0b2 3481 }
79072805 3482 BAop(OP_BIT_AND);
463ee0b2 3483 }
79072805 3484
3280af22
NIS
3485 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3486 if (*PL_tokenbuf) {
3487 PL_expect = XOPERATOR;
3488 force_ident(PL_tokenbuf, '&');
463ee0b2 3489 }
79072805
LW
3490 else
3491 PREREF('&');
c07a80fd 3492 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3493 TERM('&');
3494
378cc40b
LW
3495 case '|':
3496 s++;
3497 tmp = *s++;
3498 if (tmp == '|')
a0d0e21e 3499 AOPERATOR(OROR);
378cc40b 3500 s--;
79072805 3501 BOop(OP_BIT_OR);
378cc40b
LW
3502 case '=':
3503 s++;
3504 tmp = *s++;
3505 if (tmp == '=')
79072805
LW
3506 Eop(OP_EQ);
3507 if (tmp == '>')
3508 OPERATOR(',');
378cc40b 3509 if (tmp == '~')
79072805 3510 PMop(OP_MATCH);
599cee73 3511 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3512 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3513 s--;
3280af22
NIS
3514 if (PL_expect == XSTATE && isALPHA(tmp) &&
3515 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3516 {
3280af22
NIS
3517 if (PL_in_eval && !PL_rsfp) {
3518 d = PL_bufend;
a5f75d66
AD
3519 while (s < d) {
3520 if (*s++ == '\n') {
3521 incline(s);
3522 if (strnEQ(s,"=cut",4)) {
3523 s = strchr(s,'\n');
3524 if (s)
3525 s++;
3526 else
3527 s = d;
3528 incline(s);
3529 goto retry;
3530 }
3531 }
3532 }
3533 goto retry;
3534 }
3280af22
NIS
3535 s = PL_bufend;
3536 PL_doextract = TRUE;
a0d0e21e
LW
3537 goto retry;
3538 }
3280af22 3539 if (PL_lex_brackets < PL_lex_formbrack) {
a0d0e21e 3540 char *t;
51882d45 3541#ifdef PERL_STRICT_CR
bf4acbe4 3542 for (t = s; SPACE_OR_TAB(*t); t++) ;
51882d45 3543#else
bf4acbe4 3544 for (t = s; SPACE_OR_TAB(*t) || *t == '\r'; t++) ;
51882d45 3545#endif
a0d0e21e
LW
3546 if (*t == '\n' || *t == '#') {
3547 s--;
3280af22 3548 PL_expect = XBLOCK;
a0d0e21e
LW
3549 goto leftbracket;
3550 }
79072805 3551 }
a0d0e21e
LW
3552 yylval.ival = 0;
3553 OPERATOR(ASSIGNOP);
378cc40b
LW
3554 case '!':
3555 s++;
3556 tmp = *s++;
984200d0 3557 if (tmp == '=') {