This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix typo (bug #35368)
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
770526c1 4 * 2000, 2001, 2002, 2003, 2004, 2005, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
27da23d5
JH
29static const char ident_too_long[] =
30 "Identifier too long";
31static const char c_without_g[] =
32 "Use of /c modifier is meaningless without /g";
33static const char c_in_subst[] =
34 "Use of /c modifier is meaningless in s///";
8903cb82 35
acfe0abc 36static void restore_rsfp(pTHX_ void *f);
6e3aabd6 37#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
38static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
39static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 40#endif
51371543 41
9059aa12
LW
42#define XFAKEBRACK 128
43#define XENUMMASK 127
44
39e02b42
JH
45#ifdef USE_UTF8_SCRIPTS
46# define UTF (!IN_BYTES)
2b9d42f0 47#else
746b446a 48# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 49#endif
a0ed51b3 50
61f0cdd9 51/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
52 * 1999-02-27 mjd-perl-patch@plover.com */
53#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
54
bf4acbe4
GS
55/* On MacOS, respect nonbreaking spaces */
56#ifdef MACOS_TRADITIONAL
57#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
58#else
59#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
60#endif
61
ffb4593c
NT
62/* LEX_* are values for PL_lex_state, the state of the lexer.
63 * They are arranged oddly so that the guard on the switch statement
79072805
LW
64 * can get by with a single comparison (if the compiler is smart enough).
65 */
66
fb73857a 67/* #define LEX_NOTPARSING 11 is done in perl.h. */
68
55497cff 69#define LEX_NORMAL 10
70#define LEX_INTERPNORMAL 9
71#define LEX_INTERPCASEMOD 8
72#define LEX_INTERPPUSH 7
73#define LEX_INTERPSTART 6
74#define LEX_INTERPEND 5
75#define LEX_INTERPENDMAYBE 4
76#define LEX_INTERPCONCAT 3
77#define LEX_INTERPCONST 2
78#define LEX_FORMLINE 1
79#define LEX_KNOWNEXT 0
79072805 80
bbf60fe6 81#ifdef DEBUGGING
27da23d5 82static const char* const lex_state_names[] = {
bbf60fe6
DM
83 "KNOWNEXT",
84 "FORMLINE",
85 "INTERPCONST",
86 "INTERPCONCAT",
87 "INTERPENDMAYBE",
88 "INTERPEND",
89 "INTERPSTART",
90 "INTERPPUSH",
91 "INTERPCASEMOD",
92 "INTERPNORMAL",
93 "NORMAL"
94};
95#endif
96
79072805
LW
97#ifdef ff_next
98#undef ff_next
d48672a2
LW
99#endif
100
79072805 101#include "keywords.h"
fe14fcc3 102
ffb4593c
NT
103/* CLINE is a macro that ensures PL_copline has a sane value */
104
ae986130
LW
105#ifdef CLINE
106#undef CLINE
107#endif
57843af0 108#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 109
ffb4593c
NT
110/*
111 * Convenience functions to return different tokens and prime the
9cbb5ea2 112 * lexer for the next token. They all take an argument.
ffb4593c
NT
113 *
114 * TOKEN : generic token (used for '(', DOLSHARP, etc)
115 * OPERATOR : generic operator
116 * AOPERATOR : assignment operator
117 * PREBLOCK : beginning the block after an if, while, foreach, ...
118 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
119 * PREREF : *EXPR where EXPR is not a simple identifier
120 * TERM : expression term
121 * LOOPX : loop exiting command (goto, last, dump, etc)
122 * FTST : file test operator
123 * FUN0 : zero-argument function
2d2e263d 124 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
125 * BOop : bitwise or or xor
126 * BAop : bitwise and
127 * SHop : shift operator
128 * PWop : power operator
9cbb5ea2 129 * PMop : pattern-matching operator
ffb4593c
NT
130 * Aop : addition-level operator
131 * Mop : multiplication-level operator
132 * Eop : equality-testing operator
e5edeb50 133 * Rop : relational operator <= != gt
ffb4593c
NT
134 *
135 * Also see LOP and lop() below.
136 */
137
998054bd 138#ifdef DEBUGGING /* Serve -DT. */
bbf60fe6 139# define REPORT(retval) tokereport(s,(int)retval)
998054bd 140#else
bbf60fe6 141# define REPORT(retval) (retval)
998054bd
SC
142#endif
143
bbf60fe6
DM
144#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
145#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
146#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
147#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
148#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
149#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
150#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
151#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
152#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
153#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
154#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
155#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
156#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
157#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
158#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
159#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
160#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
161#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
162#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
163#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 164
a687059c
LW
165/* This bit of chicanery makes a unary function followed by
166 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
167 * The UNIDOR macro is for unary functions that can be followed by the //
168 * operator (such as C<shift // 0>).
a687059c 169 */
bbf60fe6
DM
170#define UNI2(f,x) return ( \
171 yylval.ival = f, \
6f33ba73 172 PL_expect = x, \
3280af22
NIS
173 PL_bufptr = s, \
174 PL_last_uni = PL_oldbufptr, \
175 PL_last_lop_op = f, \
bbf60fe6
DM
176 REPORT( \
177 (*s == '(' || (s = skipspace(s), *s == '(') \
178 ? (int)FUNC1 : (int)UNIOP)))
6f33ba73
RGS
179#define UNI(f) UNI2(f,XTERM)
180#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 181
bbf60fe6
DM
182#define UNIBRACK(f) return ( \
183 yylval.ival = f, \
3280af22
NIS
184 PL_bufptr = s, \
185 PL_last_uni = PL_oldbufptr, \
bbf60fe6
DM
186 REPORT( \
187 (*s == '(' || (s = skipspace(s), *s == '(') \
188 ? (int)FUNC1 : (int)UNIOP)))
79072805 189
9f68db38 190/* grandfather return to old style */
3280af22 191#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 192
8fa7f367
JH
193#ifdef DEBUGGING
194
bbf60fe6
DM
195/* how to interpret the yylval associated with the token */
196enum token_type {
197 TOKENTYPE_NONE,
198 TOKENTYPE_IVAL,
199 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
200 TOKENTYPE_PVAL,
201 TOKENTYPE_OPVAL,
202 TOKENTYPE_GVVAL
203};
204
27da23d5
JH
205static struct debug_tokens { const int token, type; const char *name; }
206 const debug_tokens[] =
9041c2e3 207{
bbf60fe6
DM
208 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
209 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
210 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
211 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
212 { ARROW, TOKENTYPE_NONE, "ARROW" },
213 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
214 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
215 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
216 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
217 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
218 { DO, TOKENTYPE_NONE, "DO" },
219 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
220 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
221 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
222 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
223 { ELSE, TOKENTYPE_NONE, "ELSE" },
224 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
225 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
226 { FOR, TOKENTYPE_IVAL, "FOR" },
227 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
228 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
229 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
230 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
231 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
232 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
233 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
234 { IF, TOKENTYPE_IVAL, "IF" },
235 { LABEL, TOKENTYPE_PVAL, "LABEL" },
236 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
237 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
238 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
239 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
240 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
241 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
242 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
243 { MY, TOKENTYPE_IVAL, "MY" },
244 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
245 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
246 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
247 { OROP, TOKENTYPE_IVAL, "OROP" },
248 { OROR, TOKENTYPE_NONE, "OROR" },
249 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
250 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
251 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
252 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
253 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
254 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
255 { PREINC, TOKENTYPE_NONE, "PREINC" },
256 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
257 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
258 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
259 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
260 { SUB, TOKENTYPE_NONE, "SUB" },
261 { THING, TOKENTYPE_OPVAL, "THING" },
262 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
263 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
264 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
265 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
266 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
267 { USE, TOKENTYPE_IVAL, "USE" },
268 { WHILE, TOKENTYPE_IVAL, "WHILE" },
269 { WORD, TOKENTYPE_OPVAL, "WORD" },
270 { 0, TOKENTYPE_NONE, 0 }
271};
272
273/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 274
bbf60fe6 275STATIC int
bfed75c6 276S_tokereport(pTHX_ const char* s, I32 rv)
bbf60fe6
DM
277{
278 if (DEBUG_T_TEST) {
bfed75c6 279 const char *name = Nullch;
bbf60fe6
DM
280 enum token_type type = TOKENTYPE_NONE;
281 struct debug_tokens *p;
bfed75c6 282 SV* report = newSVpvn("<== ", 4);
bbf60fe6 283
aadb217d 284 for (p = (struct debug_tokens *)debug_tokens; p->token; p++) {
bbf60fe6
DM
285 if (p->token == (int)rv) {
286 name = p->name;
287 type = p->type;
288 break;
289 }
290 }
291 if (name)
54667de8 292 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
293 else if ((char)rv > ' ' && (char)rv < '~')
294 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
295 else if (!rv)
54667de8 296 Perl_sv_catpv(aTHX_ report, "EOF");
bbf60fe6
DM
297 else
298 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
299 switch (type) {
300 case TOKENTYPE_NONE:
301 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
302 break;
303 case TOKENTYPE_IVAL:
e4584336 304 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
305 break;
306 case TOKENTYPE_OPNUM:
307 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
308 PL_op_name[yylval.ival]);
309 break;
310 case TOKENTYPE_PVAL:
311 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
312 break;
313 case TOKENTYPE_OPVAL:
401441c0
RGS
314 if (yylval.opval)
315 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 316 PL_op_name[yylval.opval->op_type]);
401441c0
RGS
317 else
318 Perl_sv_catpv(aTHX_ report, "(opval=null)");
bbf60fe6
DM
319 break;
320 }
e4584336 321 Perl_sv_catpvf(aTHX_ report, " at line %"IVdf" [", (IV)CopLINE(PL_curcop));
998054bd
SC
322 if (s - PL_bufptr > 0)
323 sv_catpvn(report, PL_bufptr, s - PL_bufptr);
324 else {
325 if (PL_oldbufptr && *PL_oldbufptr)
326 sv_catpv(report, PL_tokenbuf);
327 }
bbf60fe6
DM
328 PerlIO_printf(Perl_debug_log, "### %s]\n", SvPV_nolen(report));
329 };
330 return (int)rv;
998054bd
SC
331}
332
8fa7f367
JH
333#endif
334
ffb4593c
NT
335/*
336 * S_ao
337 *
c963b151
BD
338 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
339 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
340 */
341
76e3520e 342STATIC int
cea2e8a9 343S_ao(pTHX_ int toketype)
a0d0e21e 344{
3280af22
NIS
345 if (*PL_bufptr == '=') {
346 PL_bufptr++;
a0d0e21e
LW
347 if (toketype == ANDAND)
348 yylval.ival = OP_ANDASSIGN;
349 else if (toketype == OROR)
350 yylval.ival = OP_ORASSIGN;
c963b151
BD
351 else if (toketype == DORDOR)
352 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
353 toketype = ASSIGNOP;
354 }
355 return toketype;
356}
357
ffb4593c
NT
358/*
359 * S_no_op
360 * When Perl expects an operator and finds something else, no_op
361 * prints the warning. It always prints "<something> found where
362 * operator expected. It prints "Missing semicolon on previous line?"
363 * if the surprise occurs at the start of the line. "do you need to
364 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
365 * where the compiler doesn't know if foo is a method call or a function.
366 * It prints "Missing operator before end of line" if there's nothing
367 * after the missing operator, or "... before <...>" if there is something
368 * after the missing operator.
369 */
370
76e3520e 371STATIC void
bfed75c6 372S_no_op(pTHX_ const char *what, char *s)
463ee0b2 373{
3280af22
NIS
374 char *oldbp = PL_bufptr;
375 bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 376
1189a94a
GS
377 if (!s)
378 s = oldbp;
07c798fb 379 else
1189a94a 380 PL_bufptr = s;
cea2e8a9 381 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
382 if (ckWARN_d(WARN_SYNTAX)) {
383 if (is_first)
384 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
385 "\t(Missing semicolon on previous line?)\n");
386 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
387 char *t;
388 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
389 if (t < PL_bufptr && isSPACE(*t))
390 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
391 "\t(Do you need to predeclare %.*s?)\n",
392 t - PL_oldoldbufptr, PL_oldoldbufptr);
393 }
394 else {
395 assert(s >= oldbp);
396 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
397 "\t(Missing operator before %.*s?)\n", s - oldbp, oldbp);
398 }
07c798fb 399 }
3280af22 400 PL_bufptr = oldbp;
8990e307
LW
401}
402
ffb4593c
NT
403/*
404 * S_missingterm
405 * Complain about missing quote/regexp/heredoc terminator.
406 * If it's called with (char *)NULL then it cauterizes the line buffer.
407 * If we're in a delimited string and the delimiter is a control
408 * character, it's reformatted into a two-char sequence like ^C.
409 * This is fatal.
410 */
411
76e3520e 412STATIC void
cea2e8a9 413S_missingterm(pTHX_ char *s)
8990e307
LW
414{
415 char tmpbuf[3];
416 char q;
417 if (s) {
418 char *nl = strrchr(s,'\n');
d2719217 419 if (nl)
8990e307
LW
420 *nl = '\0';
421 }
9d116dd7
JH
422 else if (
423#ifdef EBCDIC
424 iscntrl(PL_multi_close)
425#else
426 PL_multi_close < 32 || PL_multi_close == 127
427#endif
428 ) {
8990e307 429 *tmpbuf = '^';
3280af22 430 tmpbuf[1] = toCTRL(PL_multi_close);
8990e307
LW
431 tmpbuf[2] = '\0';
432 s = tmpbuf;
433 }
434 else {
eb160463 435 *tmpbuf = (char)PL_multi_close;
8990e307
LW
436 tmpbuf[1] = '\0';
437 s = tmpbuf;
438 }
439 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 440 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 441}
79072805 442
ffb4593c
NT
443/*
444 * Perl_deprecate
ffb4593c
NT
445 */
446
79072805 447void
bfed75c6 448Perl_deprecate(pTHX_ const char *s)
a0d0e21e 449{
599cee73 450 if (ckWARN(WARN_DEPRECATED))
9014280d 451 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
452}
453
12bcd1a6 454void
bfed75c6 455Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
456{
457 /* This function should NOT be called for any new deprecated warnings */
458 /* Use Perl_deprecate instead */
459 /* */
460 /* It is here to maintain backward compatibility with the pre-5.8 */
461 /* warnings category hierarchy. The "deprecated" category used to */
462 /* live under the "syntax" category. It is now a top-level category */
463 /* in its own right. */
464
465 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 466 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
467 "Use of %s is deprecated", s);
468}
469
ffb4593c
NT
470/*
471 * depcom
9cbb5ea2 472 * Deprecate a comma-less variable list.
ffb4593c
NT
473 */
474
76e3520e 475STATIC void
cea2e8a9 476S_depcom(pTHX)
a0d0e21e 477{
12bcd1a6 478 deprecate_old("comma-less variable list");
a0d0e21e
LW
479}
480
ffb4593c 481/*
9cbb5ea2
GS
482 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
483 * utf16-to-utf8-reversed.
ffb4593c
NT
484 */
485
c39cd008
GS
486#ifdef PERL_CR_FILTER
487static void
488strip_return(SV *sv)
489{
490 register char *s = SvPVX(sv);
491 register char *e = s + SvCUR(sv);
492 /* outer loop optimized to do nothing if there are no CR-LFs */
493 while (s < e) {
494 if (*s++ == '\r' && *s == '\n') {
495 /* hit a CR-LF, need to copy the rest */
496 register char *d = s - 1;
497 *d++ = *s++;
498 while (s < e) {
499 if (*s == '\r' && s[1] == '\n')
500 s++;
501 *d++ = *s++;
502 }
503 SvCUR(sv) -= s - d;
504 return;
505 }
506 }
507}
a868473f 508
76e3520e 509STATIC I32
c39cd008 510S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 511{
c39cd008
GS
512 I32 count = FILTER_READ(idx+1, sv, maxlen);
513 if (count > 0 && !maxlen)
514 strip_return(sv);
515 return count;
a868473f
NIS
516}
517#endif
518
ffb4593c
NT
519/*
520 * Perl_lex_start
9cbb5ea2
GS
521 * Initialize variables. Uses the Perl save_stack to save its state (for
522 * recursive calls to the parser).
ffb4593c
NT
523 */
524
a0d0e21e 525void
864dbfa3 526Perl_lex_start(pTHX_ SV *line)
79072805 527{
8990e307
LW
528 char *s;
529 STRLEN len;
530
3280af22
NIS
531 SAVEI32(PL_lex_dojoin);
532 SAVEI32(PL_lex_brackets);
3280af22
NIS
533 SAVEI32(PL_lex_casemods);
534 SAVEI32(PL_lex_starts);
535 SAVEI32(PL_lex_state);
7766f137 536 SAVEVPTR(PL_lex_inpat);
3280af22 537 SAVEI32(PL_lex_inwhat);
18b09519
GS
538 if (PL_lex_state == LEX_KNOWNEXT) {
539 I32 toke = PL_nexttoke;
540 while (--toke >= 0) {
541 SAVEI32(PL_nexttype[toke]);
542 SAVEVPTR(PL_nextval[toke]);
543 }
544 SAVEI32(PL_nexttoke);
18b09519 545 }
57843af0 546 SAVECOPLINE(PL_curcop);
3280af22
NIS
547 SAVEPPTR(PL_bufptr);
548 SAVEPPTR(PL_bufend);
549 SAVEPPTR(PL_oldbufptr);
550 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
551 SAVEPPTR(PL_last_lop);
552 SAVEPPTR(PL_last_uni);
3280af22
NIS
553 SAVEPPTR(PL_linestart);
554 SAVESPTR(PL_linestr);
8edd5f42
RGS
555 SAVEGENERICPV(PL_lex_brackstack);
556 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 557 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
558 SAVESPTR(PL_lex_stuff);
559 SAVEI32(PL_lex_defer);
09bef843 560 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 561 SAVESPTR(PL_lex_repl);
bebdddfc
GS
562 SAVEINT(PL_expect);
563 SAVEINT(PL_lex_expect);
3280af22
NIS
564
565 PL_lex_state = LEX_NORMAL;
566 PL_lex_defer = 0;
567 PL_expect = XSTATE;
568 PL_lex_brackets = 0;
3280af22
NIS
569 New(899, PL_lex_brackstack, 120, char);
570 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
571 PL_lex_casemods = 0;
572 *PL_lex_casestack = '\0';
573 PL_lex_dojoin = 0;
574 PL_lex_starts = 0;
575 PL_lex_stuff = Nullsv;
576 PL_lex_repl = Nullsv;
577 PL_lex_inpat = 0;
76be56bc 578 PL_nexttoke = 0;
3280af22 579 PL_lex_inwhat = 0;
09bef843 580 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
581 PL_linestr = line;
582 if (SvREADONLY(PL_linestr))
583 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
584 s = SvPV(PL_linestr, len);
6f27f9a7 585 if (!len || s[len-1] != ';') {
3280af22
NIS
586 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
587 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
588 sv_catpvn(PL_linestr, "\n;", 2);
8990e307 589 }
3280af22
NIS
590 SvTEMP_off(PL_linestr);
591 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
592 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 593 PL_last_lop = PL_last_uni = Nullch;
3280af22 594 PL_rsfp = 0;
79072805 595}
a687059c 596
ffb4593c
NT
597/*
598 * Perl_lex_end
9cbb5ea2
GS
599 * Finalizer for lexing operations. Must be called when the parser is
600 * done with the lexer.
ffb4593c
NT
601 */
602
463ee0b2 603void
864dbfa3 604Perl_lex_end(pTHX)
463ee0b2 605{
3280af22 606 PL_doextract = FALSE;
463ee0b2
LW
607}
608
ffb4593c
NT
609/*
610 * S_incline
611 * This subroutine has nothing to do with tilting, whether at windmills
612 * or pinball tables. Its name is short for "increment line". It
57843af0 613 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 614 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
615 * # line 500 "foo.pm"
616 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
617 */
618
76e3520e 619STATIC void
cea2e8a9 620S_incline(pTHX_ char *s)
463ee0b2
LW
621{
622 char *t;
623 char *n;
73659bf1 624 char *e;
463ee0b2 625 char ch;
463ee0b2 626
57843af0 627 CopLINE_inc(PL_curcop);
463ee0b2
LW
628 if (*s++ != '#')
629 return;
bf4acbe4 630 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
631 if (strnEQ(s, "line", 4))
632 s += 4;
633 else
634 return;
084592ab 635 if (SPACE_OR_TAB(*s))
73659bf1 636 s++;
4e553d73 637 else
73659bf1 638 return;
bf4acbe4 639 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
640 if (!isDIGIT(*s))
641 return;
642 n = s;
643 while (isDIGIT(*s))
644 s++;
bf4acbe4 645 while (SPACE_OR_TAB(*s))
463ee0b2 646 s++;
73659bf1 647 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 648 s++;
73659bf1
GS
649 e = t + 1;
650 }
463ee0b2 651 else {
463ee0b2 652 for (t = s; !isSPACE(*t); t++) ;
73659bf1 653 e = t;
463ee0b2 654 }
bf4acbe4 655 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
656 e++;
657 if (*e != '\n' && *e != '\0')
658 return; /* false alarm */
659
463ee0b2
LW
660 ch = *t;
661 *t = '\0';
f4dd75d9 662 if (t - s > 0) {
05ec9bb3 663 CopFILE_free(PL_curcop);
57843af0 664 CopFILE_set(PL_curcop, s);
f4dd75d9 665 }
463ee0b2 666 *t = ch;
57843af0 667 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
668}
669
ffb4593c
NT
670/*
671 * S_skipspace
672 * Called to gobble the appropriate amount and type of whitespace.
673 * Skips comments as well.
674 */
675
76e3520e 676STATIC char *
cea2e8a9 677S_skipspace(pTHX_ register char *s)
a687059c 678{
3280af22 679 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 680 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
681 s++;
682 return s;
683 }
684 for (;;) {
fd049845 685 STRLEN prevlen;
09bef843 686 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 687 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
688 while (s < PL_bufend && isSPACE(*s)) {
689 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
690 incline(s);
691 }
ffb4593c
NT
692
693 /* comment */
3280af22
NIS
694 if (s < PL_bufend && *s == '#') {
695 while (s < PL_bufend && *s != '\n')
463ee0b2 696 s++;
60e6418e 697 if (s < PL_bufend) {
463ee0b2 698 s++;
60e6418e
GS
699 if (PL_in_eval && !PL_rsfp) {
700 incline(s);
701 continue;
702 }
703 }
463ee0b2 704 }
ffb4593c
NT
705
706 /* only continue to recharge the buffer if we're at the end
707 * of the buffer, we're not reading from a source filter, and
708 * we're in normal lexing mode
709 */
09bef843
SB
710 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
711 PL_lex_state == LEX_FORMLINE)
463ee0b2 712 return s;
ffb4593c
NT
713
714 /* try to recharge the buffer */
9cbb5ea2
GS
715 if ((s = filter_gets(PL_linestr, PL_rsfp,
716 (prevlen = SvCUR(PL_linestr)))) == Nullch)
717 {
718 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
719 if (PL_minus_p) {
720 sv_setpv(PL_linestr,
721 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 722 PL_minus_n = PL_minus_p = 0;
a0d0e21e 723 }
01a19ab0
NC
724 else if (PL_minus_n) {
725 sv_setpvn(PL_linestr, ";}", 2);
726 PL_minus_n = 0;
727 }
a0d0e21e 728 else
4147a61b 729 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
730
731 /* reset variables for next time we lex */
9cbb5ea2
GS
732 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
733 = SvPVX(PL_linestr);
3280af22 734 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 735 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
736
737 /* Close the filehandle. Could be from -P preprocessor,
738 * STDIN, or a regular file. If we were reading code from
739 * STDIN (because the commandline held no -e or filename)
740 * then we don't close it, we reset it so the code can
741 * read from STDIN too.
742 */
743
3280af22
NIS
744 if (PL_preprocess && !PL_in_eval)
745 (void)PerlProc_pclose(PL_rsfp);
746 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
747 PerlIO_clearerr(PL_rsfp);
8990e307 748 else
3280af22
NIS
749 (void)PerlIO_close(PL_rsfp);
750 PL_rsfp = Nullfp;
463ee0b2
LW
751 return s;
752 }
ffb4593c
NT
753
754 /* not at end of file, so we only read another line */
09bef843
SB
755 /* make corresponding updates to old pointers, for yyerror() */
756 oldprevlen = PL_oldbufptr - PL_bufend;
757 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
758 if (PL_last_uni)
759 oldunilen = PL_last_uni - PL_bufend;
760 if (PL_last_lop)
761 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
762 PL_linestart = PL_bufptr = s + prevlen;
763 PL_bufend = s + SvCUR(PL_linestr);
764 s = PL_bufptr;
09bef843
SB
765 PL_oldbufptr = s + oldprevlen;
766 PL_oldoldbufptr = s + oldoldprevlen;
767 if (PL_last_uni)
768 PL_last_uni = s + oldunilen;
769 if (PL_last_lop)
770 PL_last_lop = s + oldloplen;
a0d0e21e 771 incline(s);
ffb4593c
NT
772
773 /* debugger active and we're not compiling the debugger code,
774 * so store the line into the debugger's array of lines
775 */
3280af22 776 if (PERLDB_LINE && PL_curstash != PL_debstash) {
8990e307
LW
777 SV *sv = NEWSV(85,0);
778
779 sv_upgrade(sv, SVt_PVMG);
3280af22 780 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 781 (void)SvIOK_on(sv);
45977657 782 SvIV_set(sv, 0);
57843af0 783 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 784 }
463ee0b2 785 }
a687059c 786}
378cc40b 787
ffb4593c
NT
788/*
789 * S_check_uni
790 * Check the unary operators to ensure there's no ambiguity in how they're
791 * used. An ambiguous piece of code would be:
792 * rand + 5
793 * This doesn't mean rand() + 5. Because rand() is a unary operator,
794 * the +5 is its argument.
795 */
796
76e3520e 797STATIC void
cea2e8a9 798S_check_uni(pTHX)
ba106d47 799{
2f3197b3 800 char *s;
a0d0e21e 801 char *t;
2f3197b3 802
3280af22 803 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 804 return;
3280af22
NIS
805 while (isSPACE(*PL_last_uni))
806 PL_last_uni++;
7e2040f0 807 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 808 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 809 return;
0453d815 810 if (ckWARN_d(WARN_AMBIGUOUS)){
f248d071 811 char ch = *s;
0453d815 812 *s = '\0';
9014280d 813 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 814 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
815 PL_last_uni);
816 *s = ch;
817 }
2f3197b3
LW
818}
819
ffb4593c
NT
820/*
821 * LOP : macro to build a list operator. Its behaviour has been replaced
822 * with a subroutine, S_lop() for which LOP is just another name.
823 */
824
a0d0e21e
LW
825#define LOP(f,x) return lop(f,x,s)
826
ffb4593c
NT
827/*
828 * S_lop
829 * Build a list operator (or something that might be one). The rules:
830 * - if we have a next token, then it's a list operator [why?]
831 * - if the next thing is an opening paren, then it's a function
832 * - else it's a list operator
833 */
834
76e3520e 835STATIC I32
a0be28da 836S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 837{
79072805 838 yylval.ival = f;
35c8bce7 839 CLINE;
3280af22
NIS
840 PL_expect = x;
841 PL_bufptr = s;
842 PL_last_lop = PL_oldbufptr;
eb160463 843 PL_last_lop_op = (OPCODE)f;
3280af22 844 if (PL_nexttoke)
bbf60fe6 845 return REPORT(LSTOP);
79072805 846 if (*s == '(')
bbf60fe6 847 return REPORT(FUNC);
79072805
LW
848 s = skipspace(s);
849 if (*s == '(')
bbf60fe6 850 return REPORT(FUNC);
79072805 851 else
bbf60fe6 852 return REPORT(LSTOP);
79072805
LW
853}
854
ffb4593c
NT
855/*
856 * S_force_next
9cbb5ea2 857 * When the lexer realizes it knows the next token (for instance,
ffb4593c 858 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
859 * to know what token to return the next time the lexer is called. Caller
860 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
861 * handles the token correctly.
ffb4593c
NT
862 */
863
4e553d73 864STATIC void
cea2e8a9 865S_force_next(pTHX_ I32 type)
79072805 866{
3280af22
NIS
867 PL_nexttype[PL_nexttoke] = type;
868 PL_nexttoke++;
869 if (PL_lex_state != LEX_KNOWNEXT) {
870 PL_lex_defer = PL_lex_state;
871 PL_lex_expect = PL_expect;
872 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
873 }
874}
875
d0a148a6
NC
876STATIC SV *
877S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
878{
879 SV *sv = newSVpvn(start,len);
bfed75c6 880 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
881 SvUTF8_on(sv);
882 return sv;
883}
884
ffb4593c
NT
885/*
886 * S_force_word
887 * When the lexer knows the next thing is a word (for instance, it has
888 * just seen -> and it knows that the next char is a word char, then
889 * it calls S_force_word to stick the next word into the PL_next lookahead.
890 *
891 * Arguments:
b1b65b59 892 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
893 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
894 * int check_keyword : if true, Perl checks to make sure the word isn't
895 * a keyword (do this if the word is a label, e.g. goto FOO)
896 * int allow_pack : if true, : characters will also be allowed (require,
897 * use, etc. do this)
9cbb5ea2 898 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
899 */
900
76e3520e 901STATIC char *
cea2e8a9 902S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 903{
463ee0b2
LW
904 register char *s;
905 STRLEN len;
4e553d73 906
463ee0b2
LW
907 start = skipspace(start);
908 s = start;
7e2040f0 909 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 910 (allow_pack && *s == ':') ||
15f0808c 911 (allow_initial_tick && *s == '\'') )
a0d0e21e 912 {
3280af22
NIS
913 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
914 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
915 return start;
916 if (token == METHOD) {
917 s = skipspace(s);
918 if (*s == '(')
3280af22 919 PL_expect = XTERM;
463ee0b2 920 else {
3280af22 921 PL_expect = XOPERATOR;
463ee0b2 922 }
79072805 923 }
d0a148a6
NC
924 PL_nextval[PL_nexttoke].opval
925 = (OP*)newSVOP(OP_CONST,0,
926 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 927 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
928 force_next(token);
929 }
930 return s;
931}
932
ffb4593c
NT
933/*
934 * S_force_ident
9cbb5ea2 935 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
936 * text only contains the "foo" portion. The first argument is a pointer
937 * to the "foo", and the second argument is the type symbol to prefix.
938 * Forces the next token to be a "WORD".
9cbb5ea2 939 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
940 */
941
76e3520e 942STATIC void
bfed75c6 943S_force_ident(pTHX_ register const char *s, int kind)
79072805
LW
944{
945 if (s && *s) {
bfed75c6 946 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 947 PL_nextval[PL_nexttoke].opval = o;
79072805 948 force_next(WORD);
748a9306 949 if (kind) {
11343788 950 o->op_private = OPpCONST_ENTERED;
55497cff 951 /* XXX see note in pp_entereval() for why we forgo typo
952 warnings if the symbol must be introduced in an eval.
953 GSAR 96-10-12 */
3280af22 954 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : TRUE,
a0d0e21e
LW
955 kind == '$' ? SVt_PV :
956 kind == '@' ? SVt_PVAV :
957 kind == '%' ? SVt_PVHV :
958 SVt_PVGV
959 );
748a9306 960 }
79072805
LW
961 }
962}
963
1571675a
GS
964NV
965Perl_str_to_version(pTHX_ SV *sv)
966{
967 NV retval = 0.0;
968 NV nshift = 1.0;
969 STRLEN len;
970 char *start = SvPVx(sv,len);
3aa33fe5 971 bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a
GS
972 char *end = start + len;
973 while (start < end) {
ba210ebe 974 STRLEN skip;
1571675a
GS
975 UV n;
976 if (utf)
9041c2e3 977 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
978 else {
979 n = *(U8*)start;
980 skip = 1;
981 }
982 retval += ((NV)n)/nshift;
983 start += skip;
984 nshift *= 1000;
985 }
986 return retval;
987}
988
4e553d73 989/*
ffb4593c
NT
990 * S_force_version
991 * Forces the next token to be a version number.
e759cc13
RGS
992 * If the next token appears to be an invalid version number, (e.g. "v2b"),
993 * and if "guessing" is TRUE, then no new token is created (and the caller
994 * must use an alternative parsing method).
ffb4593c
NT
995 */
996
76e3520e 997STATIC char *
e759cc13 998S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 999{
1000 OP *version = Nullop;
44dcb63b 1001 char *d;
89bfa8cd 1002
1003 s = skipspace(s);
1004
44dcb63b 1005 d = s;
dd629d5b 1006 if (*d == 'v')
44dcb63b 1007 d++;
44dcb63b 1008 if (isDIGIT(*d)) {
e759cc13
RGS
1009 while (isDIGIT(*d) || *d == '_' || *d == '.')
1010 d++;
9f3d182e 1011 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1012 SV *ver;
b73d6f50 1013 s = scan_num(s, &yylval);
89bfa8cd 1014 version = yylval.opval;
dd629d5b
GS
1015 ver = cSVOPx(version)->op_sv;
1016 if (SvPOK(ver) && !SvNIOK(ver)) {
155aba94 1017 (void)SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1018 SvNV_set(ver, str_to_version(ver));
1571675a 1019 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1020 }
89bfa8cd 1021 }
e759cc13
RGS
1022 else if (guessing)
1023 return s;
89bfa8cd 1024 }
1025
1026 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1027 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1028 force_next(WORD);
89bfa8cd 1029
e759cc13 1030 return s;
89bfa8cd 1031}
1032
ffb4593c
NT
1033/*
1034 * S_tokeq
1035 * Tokenize a quoted string passed in as an SV. It finds the next
1036 * chunk, up to end of string or a backslash. It may make a new
1037 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1038 * turns \\ into \.
1039 */
1040
76e3520e 1041STATIC SV *
cea2e8a9 1042S_tokeq(pTHX_ SV *sv)
79072805
LW
1043{
1044 register char *s;
1045 register char *send;
1046 register char *d;
b3ac6de7
IZ
1047 STRLEN len = 0;
1048 SV *pv = sv;
79072805
LW
1049
1050 if (!SvLEN(sv))
b3ac6de7 1051 goto finish;
79072805 1052
a0d0e21e 1053 s = SvPV_force(sv, len);
21a311ee 1054 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1055 goto finish;
463ee0b2 1056 send = s + len;
79072805
LW
1057 while (s < send && *s != '\\')
1058 s++;
1059 if (s == send)
b3ac6de7 1060 goto finish;
79072805 1061 d = s;
be4731d2 1062 if ( PL_hints & HINT_NEW_STRING ) {
79cb57f6 1063 pv = sv_2mortal(newSVpvn(SvPVX(pv), len));
be4731d2
NIS
1064 if (SvUTF8(sv))
1065 SvUTF8_on(pv);
1066 }
79072805
LW
1067 while (s < send) {
1068 if (*s == '\\') {
a0d0e21e 1069 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1070 s++; /* all that, just for this */
1071 }
1072 *d++ = *s++;
1073 }
1074 *d = '\0';
463ee0b2 1075 SvCUR_set(sv, d - SvPVX(sv));
b3ac6de7 1076 finish:
3280af22 1077 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1078 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1079 return sv;
1080}
1081
ffb4593c
NT
1082/*
1083 * Now come three functions related to double-quote context,
1084 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1085 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1086 * interact with PL_lex_state, and create fake ( ... ) argument lists
1087 * to handle functions and concatenation.
1088 * They assume that whoever calls them will be setting up a fake
1089 * join call, because each subthing puts a ',' after it. This lets
1090 * "lower \luPpEr"
1091 * become
1092 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1093 *
1094 * (I'm not sure whether the spurious commas at the end of lcfirst's
1095 * arguments and join's arguments are created or not).
1096 */
1097
1098/*
1099 * S_sublex_start
1100 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1101 *
1102 * Pattern matching will set PL_lex_op to the pattern-matching op to
1103 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1104 *
1105 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1106 *
1107 * Everything else becomes a FUNC.
1108 *
1109 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1110 * had an OP_CONST or OP_READLINE). This just sets us up for a
1111 * call to S_sublex_push().
1112 */
1113
76e3520e 1114STATIC I32
cea2e8a9 1115S_sublex_start(pTHX)
79072805
LW
1116{
1117 register I32 op_type = yylval.ival;
79072805
LW
1118
1119 if (op_type == OP_NULL) {
3280af22
NIS
1120 yylval.opval = PL_lex_op;
1121 PL_lex_op = Nullop;
79072805
LW
1122 return THING;
1123 }
1124 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1125 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1126
1127 if (SvTYPE(sv) == SVt_PVIV) {
1128 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1129 STRLEN len;
1130 char *p;
1131 SV *nsv;
1132
1133 p = SvPV(sv, len);
79cb57f6 1134 nsv = newSVpvn(p, len);
01ec43d0
GS
1135 if (SvUTF8(sv))
1136 SvUTF8_on(nsv);
b3ac6de7
IZ
1137 SvREFCNT_dec(sv);
1138 sv = nsv;
4e553d73 1139 }
b3ac6de7 1140 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1141 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1142 /* Allow <FH> // "foo" */
1143 if (op_type == OP_READLINE)
1144 PL_expect = XTERMORDORDOR;
79072805
LW
1145 return THING;
1146 }
1147
3280af22
NIS
1148 PL_sublex_info.super_state = PL_lex_state;
1149 PL_sublex_info.sub_inwhat = op_type;
1150 PL_sublex_info.sub_op = PL_lex_op;
1151 PL_lex_state = LEX_INTERPPUSH;
55497cff 1152
3280af22
NIS
1153 PL_expect = XTERM;
1154 if (PL_lex_op) {
1155 yylval.opval = PL_lex_op;
1156 PL_lex_op = Nullop;
55497cff 1157 return PMFUNC;
1158 }
1159 else
1160 return FUNC;
1161}
1162
ffb4593c
NT
1163/*
1164 * S_sublex_push
1165 * Create a new scope to save the lexing state. The scope will be
1166 * ended in S_sublex_done. Returns a '(', starting the function arguments
1167 * to the uc, lc, etc. found before.
1168 * Sets PL_lex_state to LEX_INTERPCONCAT.
1169 */
1170
76e3520e 1171STATIC I32
cea2e8a9 1172S_sublex_push(pTHX)
55497cff 1173{
27da23d5 1174 dVAR;
f46d017c 1175 ENTER;
55497cff 1176
3280af22
NIS
1177 PL_lex_state = PL_sublex_info.super_state;
1178 SAVEI32(PL_lex_dojoin);
1179 SAVEI32(PL_lex_brackets);
3280af22
NIS
1180 SAVEI32(PL_lex_casemods);
1181 SAVEI32(PL_lex_starts);
1182 SAVEI32(PL_lex_state);
7766f137 1183 SAVEVPTR(PL_lex_inpat);
3280af22 1184 SAVEI32(PL_lex_inwhat);
57843af0 1185 SAVECOPLINE(PL_curcop);
3280af22 1186 SAVEPPTR(PL_bufptr);
8452ff4b 1187 SAVEPPTR(PL_bufend);
3280af22
NIS
1188 SAVEPPTR(PL_oldbufptr);
1189 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1190 SAVEPPTR(PL_last_lop);
1191 SAVEPPTR(PL_last_uni);
3280af22
NIS
1192 SAVEPPTR(PL_linestart);
1193 SAVESPTR(PL_linestr);
8edd5f42
RGS
1194 SAVEGENERICPV(PL_lex_brackstack);
1195 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1196
1197 PL_linestr = PL_lex_stuff;
1198 PL_lex_stuff = Nullsv;
1199
9cbb5ea2
GS
1200 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1201 = SvPVX(PL_linestr);
3280af22 1202 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1203 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1204 SAVEFREESV(PL_linestr);
1205
1206 PL_lex_dojoin = FALSE;
1207 PL_lex_brackets = 0;
3280af22
NIS
1208 New(899, PL_lex_brackstack, 120, char);
1209 New(899, PL_lex_casestack, 12, char);
3280af22
NIS
1210 PL_lex_casemods = 0;
1211 *PL_lex_casestack = '\0';
1212 PL_lex_starts = 0;
1213 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1214 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1215
1216 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1217 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1218 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1219 else
3280af22 1220 PL_lex_inpat = Nullop;
79072805 1221
55497cff 1222 return '(';
79072805
LW
1223}
1224
ffb4593c
NT
1225/*
1226 * S_sublex_done
1227 * Restores lexer state after a S_sublex_push.
1228 */
1229
76e3520e 1230STATIC I32
cea2e8a9 1231S_sublex_done(pTHX)
79072805 1232{
27da23d5 1233 dVAR;
3280af22 1234 if (!PL_lex_starts++) {
9aa983d2
JH
1235 SV *sv = newSVpvn("",0);
1236 if (SvUTF8(PL_linestr))
1237 SvUTF8_on(sv);
3280af22 1238 PL_expect = XOPERATOR;
9aa983d2 1239 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1240 return THING;
1241 }
1242
3280af22
NIS
1243 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1244 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1245 return yylex();
79072805
LW
1246 }
1247
ffb4593c 1248 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1249 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1250 PL_linestr = PL_lex_repl;
1251 PL_lex_inpat = 0;
1252 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1253 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1254 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1255 SAVEFREESV(PL_linestr);
1256 PL_lex_dojoin = FALSE;
1257 PL_lex_brackets = 0;
3280af22
NIS
1258 PL_lex_casemods = 0;
1259 *PL_lex_casestack = '\0';
1260 PL_lex_starts = 0;
25da4f38 1261 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1262 PL_lex_state = LEX_INTERPNORMAL;
1263 PL_lex_starts++;
e9fa98b2
HS
1264 /* we don't clear PL_lex_repl here, so that we can check later
1265 whether this is an evalled subst; that means we rely on the
1266 logic to ensure sublex_done() is called again only via the
1267 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1268 }
e9fa98b2 1269 else {
3280af22 1270 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1271 PL_lex_repl = Nullsv;
1272 }
79072805 1273 return ',';
ffed7fef
LW
1274 }
1275 else {
f46d017c 1276 LEAVE;
3280af22
NIS
1277 PL_bufend = SvPVX(PL_linestr);
1278 PL_bufend += SvCUR(PL_linestr);
1279 PL_expect = XOPERATOR;
09bef843 1280 PL_sublex_info.sub_inwhat = 0;
79072805 1281 return ')';
ffed7fef
LW
1282 }
1283}
1284
02aa26ce
NT
1285/*
1286 scan_const
1287
1288 Extracts a pattern, double-quoted string, or transliteration. This
1289 is terrifying code.
1290
3280af22
NIS
1291 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1292 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1293 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1294
9b599b2a
GS
1295 Returns a pointer to the character scanned up to. Iff this is
1296 advanced from the start pointer supplied (ie if anything was
1297 successfully parsed), will leave an OP for the substring scanned
1298 in yylval. Caller must intuit reason for not parsing further
1299 by looking at the next characters herself.
1300
02aa26ce
NT
1301 In patterns:
1302 backslashes:
1303 double-quoted style: \r and \n
1304 regexp special ones: \D \s
1305 constants: \x3
1306 backrefs: \1 (deprecated in substitution replacements)
1307 case and quoting: \U \Q \E
1308 stops on @ and $, but not for $ as tail anchor
1309
1310 In transliterations:
1311 characters are VERY literal, except for - not at the start or end
1312 of the string, which indicates a range. scan_const expands the
1313 range to the full set of intermediate characters.
1314
1315 In double-quoted strings:
1316 backslashes:
1317 double-quoted style: \r and \n
1318 constants: \x3
1319 backrefs: \1 (deprecated)
1320 case and quoting: \U \Q \E
1321 stops on @ and $
1322
1323 scan_const does *not* construct ops to handle interpolated strings.
1324 It stops processing as soon as it finds an embedded $ or @ variable
1325 and leaves it to the caller to work out what's going on.
1326
da6eedaa 1327 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1328
1329 $ in pattern could be $foo or could be tail anchor. Assumption:
1330 it's a tail anchor if $ is the last thing in the string, or if it's
1331 followed by one of ")| \n\t"
1332
1333 \1 (backreferences) are turned into $1
1334
1335 The structure of the code is
1336 while (there's a character to process) {
1337 handle transliteration ranges
1338 skip regexp comments
1339 skip # initiated comments in //x patterns
1340 check for embedded @foo
1341 check for embedded scalars
1342 if (backslash) {
1343 leave intact backslashes from leave (below)
1344 deprecate \1 in strings and sub replacements
1345 handle string-changing backslashes \l \U \Q \E, etc.
1346 switch (what was escaped) {
1347 handle - in a transliteration (becomes a literal -)
1348 handle \132 octal characters
1349 handle 0x15 hex characters
1350 handle \cV (control V)
1351 handle printf backslashes (\f, \r, \n, etc)
1352 } (end switch)
1353 } (end if backslash)
1354 } (end while character to read)
4e553d73 1355
02aa26ce
NT
1356*/
1357
76e3520e 1358STATIC char *
cea2e8a9 1359S_scan_const(pTHX_ char *start)
79072805 1360{
3280af22 1361 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1362 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1363 register char *s = start; /* start of the constant */
1364 register char *d = SvPVX(sv); /* destination for copies */
1365 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1366 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1367 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1368 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d
GS
1369 UV uv;
1370
dff6d3cd 1371 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1372 PL_lex_inpat
b6d5fef8 1373 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1374 : "";
79072805 1375
2b9d42f0
NIS
1376 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1377 /* If we are doing a trans and we know we want UTF8 set expectation */
1378 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1379 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1380 }
1381
1382
79072805 1383 while (s < send || dorange) {
02aa26ce 1384 /* get transliterations out of the way (they're most literal) */
3280af22 1385 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1386 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1387 if (dorange) {
1ba5c669
JH
1388 I32 i; /* current expanded character */
1389 I32 min; /* first character in range */
1390 I32 max; /* last character in range */
02aa26ce 1391
2b9d42f0 1392 if (has_utf8) {
8973db79
JH
1393 char *c = (char*)utf8_hop((U8*)d, -1);
1394 char *e = d++;
1395 while (e-- > c)
1396 *(e + 1) = *e;
25716404 1397 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1398 /* mark the range as done, and continue */
1399 dorange = FALSE;
1400 didrange = TRUE;
1401 continue;
1402 }
2b9d42f0 1403
02aa26ce 1404 i = d - SvPVX(sv); /* remember current offset */
9cbb5ea2
GS
1405 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1406 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1407 d -= 2; /* eat the first char and the - */
1408
8ada0baa
JH
1409 min = (U8)*d; /* first char in range */
1410 max = (U8)d[1]; /* last char in range */
1411
c2e66d9e 1412 if (min > max) {
01ec43d0 1413 Perl_croak(aTHX_
d1573ac7 1414 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1415 (char)min, (char)max);
c2e66d9e
GS
1416 }
1417
c7f1f016 1418#ifdef EBCDIC
8ada0baa
JH
1419 if ((isLOWER(min) && isLOWER(max)) ||
1420 (isUPPER(min) && isUPPER(max))) {
1421 if (isLOWER(min)) {
1422 for (i = min; i <= max; i++)
1423 if (isLOWER(i))
db42d148 1424 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1425 } else {
1426 for (i = min; i <= max; i++)
1427 if (isUPPER(i))
db42d148 1428 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1429 }
1430 }
1431 else
1432#endif
1433 for (i = min; i <= max; i++)
eb160463 1434 *d++ = (char)i;
02aa26ce
NT
1435
1436 /* mark the range as done, and continue */
79072805 1437 dorange = FALSE;
01ec43d0 1438 didrange = TRUE;
79072805 1439 continue;
4e553d73 1440 }
02aa26ce
NT
1441
1442 /* range begins (ignore - as first or last char) */
79072805 1443 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1444 if (didrange) {
1fafa243 1445 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1446 }
2b9d42f0 1447 if (has_utf8) {
25716404 1448 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1449 s++;
1450 continue;
1451 }
79072805
LW
1452 dorange = TRUE;
1453 s++;
01ec43d0
GS
1454 }
1455 else {
1456 didrange = FALSE;
1457 }
79072805 1458 }
02aa26ce
NT
1459
1460 /* if we get here, we're not doing a transliteration */
1461
0f5d15d6
IZ
1462 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1463 except for the last char, which will be done separately. */
3280af22 1464 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1465 if (s[2] == '#') {
e994fd66 1466 while (s+1 < send && *s != ')')
db42d148 1467 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1468 }
1469 else if (s[2] == '{' /* This should match regcomp.c */
1470 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1471 {
cc6b7395 1472 I32 count = 1;
0f5d15d6 1473 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1474 char c;
1475
d9f97599
GS
1476 while (count && (c = *regparse)) {
1477 if (c == '\\' && regparse[1])
1478 regparse++;
4e553d73 1479 else if (c == '{')
cc6b7395 1480 count++;
4e553d73 1481 else if (c == '}')
cc6b7395 1482 count--;
d9f97599 1483 regparse++;
cc6b7395 1484 }
e994fd66 1485 if (*regparse != ')')
5bdf89e7 1486 regparse--; /* Leave one char for continuation. */
0f5d15d6 1487 while (s < regparse)
db42d148 1488 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1489 }
748a9306 1490 }
02aa26ce
NT
1491
1492 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1493 else if (*s == '#' && PL_lex_inpat &&
1494 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1495 while (s+1 < send && *s != '\n')
db42d148 1496 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1497 }
02aa26ce 1498
5d1d4326 1499 /* check for embedded arrays
da6eedaa 1500 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1501 */
7e2040f0 1502 else if (*s == '@' && s[1]
5d1d4326 1503 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1504 break;
02aa26ce
NT
1505
1506 /* check for embedded scalars. only stop if we're sure it's a
1507 variable.
1508 */
79072805 1509 else if (*s == '$') {
3280af22 1510 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1511 break;
6002328a 1512 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1513 break; /* in regexp, $ might be tail anchor */
1514 }
02aa26ce 1515
2b9d42f0
NIS
1516 /* End of else if chain - OP_TRANS rejoin rest */
1517
02aa26ce 1518 /* backslashes */
79072805
LW
1519 if (*s == '\\' && s+1 < send) {
1520 s++;
02aa26ce
NT
1521
1522 /* some backslashes we leave behind */
c9f97d15 1523 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1524 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1525 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1526 continue;
1527 }
02aa26ce
NT
1528
1529 /* deprecate \1 in strings and substitution replacements */
3280af22 1530 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1531 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1532 {
599cee73 1533 if (ckWARN(WARN_SYNTAX))
9014280d 1534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1535 *--s = '$';
1536 break;
1537 }
02aa26ce
NT
1538
1539 /* string-change backslash escapes */
3280af22 1540 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1541 --s;
1542 break;
1543 }
02aa26ce
NT
1544
1545 /* if we get here, it's either a quoted -, or a digit */
79072805 1546 switch (*s) {
02aa26ce
NT
1547
1548 /* quoted - in transliterations */
79072805 1549 case '-':
3280af22 1550 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1551 *d++ = *s++;
1552 continue;
1553 }
1554 /* FALL THROUGH */
1555 default:
11b8faa4 1556 {
707afd92 1557 if (ckWARN(WARN_MISC) &&
bfed75c6 1558 isALNUM(*s) &&
707afd92 1559 *s != '_')
9014280d 1560 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1561 "Unrecognized escape \\%c passed through",
1562 *s);
1563 /* default action is to copy the quoted character */
f9a63242 1564 goto default_action;
11b8faa4 1565 }
02aa26ce
NT
1566
1567 /* \132 indicates an octal constant */
79072805
LW
1568 case '0': case '1': case '2': case '3':
1569 case '4': case '5': case '6': case '7':
ba210ebe 1570 {
53305cf1
NC
1571 I32 flags = 0;
1572 STRLEN len = 3;
1573 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1574 s += len;
1575 }
012bcf8d 1576 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1577
1578 /* \x24 indicates a hex constant */
79072805 1579 case 'x':
a0ed51b3
LW
1580 ++s;
1581 if (*s == '{') {
1582 char* e = strchr(s, '}');
a4c04bdc
NC
1583 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1584 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1585 STRLEN len;
355860ce 1586
53305cf1 1587 ++s;
adaeee49 1588 if (!e) {
a0ed51b3 1589 yyerror("Missing right brace on \\x{}");
355860ce 1590 continue;
ba210ebe 1591 }
53305cf1
NC
1592 len = e - s;
1593 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1594 s = e + 1;
a0ed51b3
LW
1595 }
1596 else {
ba210ebe 1597 {
53305cf1 1598 STRLEN len = 2;
a4c04bdc 1599 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1600 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1601 s += len;
1602 }
012bcf8d
GS
1603 }
1604
1605 NUM_ESCAPE_INSERT:
1606 /* Insert oct or hex escaped character.
301d3d20 1607 * There will always enough room in sv since such
db42d148 1608 * escapes will be longer than any UTF-8 sequence
301d3d20 1609 * they can end up as. */
ba7cea30 1610
c7f1f016
NIS
1611 /* We need to map to chars to ASCII before doing the tests
1612 to cover EBCDIC
1613 */
c4d5f83a 1614 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1615 if (!has_utf8 && uv > 255) {
301d3d20
JH
1616 /* Might need to recode whatever we have
1617 * accumulated so far if it contains any
1618 * hibit chars.
1619 *
1620 * (Can't we keep track of that and avoid
1621 * this rescan? --jhi)
012bcf8d 1622 */
c7f1f016 1623 int hicount = 0;
63cd0674
NIS
1624 U8 *c;
1625 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1626 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1627 hicount++;
db42d148 1628 }
012bcf8d 1629 }
63cd0674 1630 if (hicount) {
db42d148
NIS
1631 STRLEN offset = d - SvPVX(sv);
1632 U8 *src, *dst;
1633 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1634 src = (U8 *)d - 1;
1635 dst = src+hicount;
1636 d += hicount;
1637 while (src >= (U8 *)SvPVX(sv)) {
c4d5f83a 1638 if (!NATIVE_IS_INVARIANT(*src)) {
63cd0674 1639 U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1640 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1641 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1642 }
1643 else {
63cd0674 1644 *dst-- = *src;
012bcf8d 1645 }
c7f1f016 1646 src--;
012bcf8d
GS
1647 }
1648 }
1649 }
1650
9aa983d2 1651 if (has_utf8 || uv > 255) {
9041c2e3 1652 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1653 has_utf8 = TRUE;
f9a63242
JH
1654 if (PL_lex_inwhat == OP_TRANS &&
1655 PL_sublex_info.sub_op) {
1656 PL_sublex_info.sub_op->op_private |=
1657 (PL_lex_repl ? OPpTRANS_FROM_UTF
1658 : OPpTRANS_TO_UTF);
f9a63242 1659 }
012bcf8d 1660 }
a0ed51b3 1661 else {
012bcf8d 1662 *d++ = (char)uv;
a0ed51b3 1663 }
012bcf8d
GS
1664 }
1665 else {
c4d5f83a 1666 *d++ = (char) uv;
a0ed51b3 1667 }
79072805 1668 continue;
02aa26ce 1669
b239daa5 1670 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1671 case 'N':
55eda711 1672 ++s;
423cee85
JH
1673 if (*s == '{') {
1674 char* e = strchr(s, '}');
155aba94 1675 SV *res;
423cee85
JH
1676 STRLEN len;
1677 char *str;
4e553d73 1678
423cee85 1679 if (!e) {
5777a3f7 1680 yyerror("Missing right brace on \\N{}");
423cee85
JH
1681 e = s - 1;
1682 goto cont_scan;
1683 }
dbc0d4f2
JH
1684 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1685 /* \N{U+...} */
1686 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1687 PERL_SCAN_DISALLOW_PREFIX;
1688 s += 3;
1689 len = e - s;
1690 uv = grok_hex(s, &len, &flags, NULL);
1691 s = e + 1;
1692 goto NUM_ESCAPE_INSERT;
1693 }
55eda711
JH
1694 res = newSVpvn(s + 1, e - s - 1);
1695 res = new_constant( Nullch, 0, "charnames",
1696 res, Nullsv, "\\N{...}" );
f9a63242
JH
1697 if (has_utf8)
1698 sv_utf8_upgrade(res);
423cee85 1699 str = SvPV(res,len);
1c47067b
JH
1700#ifdef EBCDIC_NEVER_MIND
1701 /* charnames uses pack U and that has been
1702 * recently changed to do the below uni->native
1703 * mapping, so this would be redundant (and wrong,
1704 * the code point would be doubly converted).
1705 * But leave this in just in case the pack U change
1706 * gets revoked, but the semantics is still
1707 * desireable for charnames. --jhi */
cddc7ef4
JH
1708 {
1709 UV uv = utf8_to_uvchr((U8*)str, 0);
1710
1711 if (uv < 0x100) {
89ebb4a3 1712 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1713
1714 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1715 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
1716 str = SvPV(res, len);
1717 }
1718 }
1719#endif
89491803 1720 if (!has_utf8 && SvUTF8(res)) {
f08d6ad9
GS
1721 char *ostart = SvPVX(sv);
1722 SvCUR_set(sv, d - ostart);
1723 SvPOK_on(sv);
e4f3eed8 1724 *d = '\0';
f08d6ad9 1725 sv_utf8_upgrade(sv);
d2f449dd 1726 /* this just broke our allocation above... */
eb160463 1727 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1728 d = SvPVX(sv) + SvCUR(sv);
89491803 1729 has_utf8 = TRUE;
f08d6ad9 1730 }
eb160463 1731 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85
JH
1732 char *odest = SvPVX(sv);
1733
8973db79 1734 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1735 d = SvPVX(sv) + (d - odest);
1736 }
1737 Copy(str, d, len, char);
1738 d += len;
1739 SvREFCNT_dec(res);
1740 cont_scan:
1741 s = e + 1;
1742 }
1743 else
5777a3f7 1744 yyerror("Missing braces on \\N{}");
423cee85
JH
1745 continue;
1746
02aa26ce 1747 /* \c is a control character */
79072805
LW
1748 case 'c':
1749 s++;
961ce445 1750 if (s < send) {
ba210ebe 1751 U8 c = *s++;
c7f1f016
NIS
1752#ifdef EBCDIC
1753 if (isLOWER(c))
1754 c = toUPPER(c);
1755#endif
db42d148 1756 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1757 }
961ce445
RGS
1758 else {
1759 yyerror("Missing control char name in \\c");
1760 }
79072805 1761 continue;
02aa26ce
NT
1762
1763 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1764 case 'b':
db42d148 1765 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1766 break;
1767 case 'n':
db42d148 1768 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1769 break;
1770 case 'r':
db42d148 1771 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1772 break;
1773 case 'f':
db42d148 1774 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1775 break;
1776 case 't':
db42d148 1777 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1778 break;
34a3fe2a 1779 case 'e':
db42d148 1780 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1781 break;
1782 case 'a':
db42d148 1783 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1784 break;
02aa26ce
NT
1785 } /* end switch */
1786
79072805
LW
1787 s++;
1788 continue;
02aa26ce
NT
1789 } /* end if (backslash) */
1790
f9a63242 1791 default_action:
2b9d42f0
NIS
1792 /* If we started with encoded form, or already know we want it
1793 and then encode the next character */
1794 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1795 STRLEN len = 1;
1796 UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1797 STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
1798 s += len;
1799 if (need > len) {
1800 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
1801 STRLEN off = d - SvPVX(sv);
1802 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1803 }
1804 d = (char*)uvchr_to_utf8((U8*)d, uv);
1805 has_utf8 = TRUE;
1806 }
1807 else {
1808 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1809 }
02aa26ce
NT
1810 } /* while loop to process each character */
1811
1812 /* terminate the string and set up the sv */
79072805 1813 *d = '\0';
463ee0b2 1814 SvCUR_set(sv, d - SvPVX(sv));
2b9d42f0 1815 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1816 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1817
79072805 1818 SvPOK_on(sv);
9f4817db 1819 if (PL_encoding && !has_utf8) {
d0063567
DK
1820 sv_recode_to_utf8(sv, PL_encoding);
1821 if (SvUTF8(sv))
1822 has_utf8 = TRUE;
9f4817db 1823 }
2b9d42f0 1824 if (has_utf8) {
7e2040f0 1825 SvUTF8_on(sv);
2b9d42f0 1826 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1827 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1828 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1829 }
1830 }
79072805 1831
02aa26ce 1832 /* shrink the sv if we allocated more than we used */
79072805 1833 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 1834 SvPV_shrink_to_cur(sv);
79072805 1835 }
02aa26ce 1836
9b599b2a 1837 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1838 if (s > PL_bufptr) {
1839 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1840 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1841 sv, Nullsv,
4e553d73 1842 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1843 ? "tr"
3280af22 1844 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1845 ? "s"
1846 : "qq")));
79072805 1847 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1848 } else
8990e307 1849 SvREFCNT_dec(sv);
79072805
LW
1850 return s;
1851}
1852
ffb4593c
NT
1853/* S_intuit_more
1854 * Returns TRUE if there's more to the expression (e.g., a subscript),
1855 * FALSE otherwise.
ffb4593c
NT
1856 *
1857 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1858 *
1859 * ->[ and ->{ return TRUE
1860 * { and [ outside a pattern are always subscripts, so return TRUE
1861 * if we're outside a pattern and it's not { or [, then return FALSE
1862 * if we're in a pattern and the first char is a {
1863 * {4,5} (any digits around the comma) returns FALSE
1864 * if we're in a pattern and the first char is a [
1865 * [] returns FALSE
1866 * [SOMETHING] has a funky algorithm to decide whether it's a
1867 * character class or not. It has to deal with things like
1868 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1869 * anything else returns TRUE
1870 */
1871
9cbb5ea2
GS
1872/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1873
76e3520e 1874STATIC int
cea2e8a9 1875S_intuit_more(pTHX_ register char *s)
79072805 1876{
3280af22 1877 if (PL_lex_brackets)
79072805
LW
1878 return TRUE;
1879 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1880 return TRUE;
1881 if (*s != '{' && *s != '[')
1882 return FALSE;
3280af22 1883 if (!PL_lex_inpat)
79072805
LW
1884 return TRUE;
1885
1886 /* In a pattern, so maybe we have {n,m}. */
1887 if (*s == '{') {
1888 s++;
1889 if (!isDIGIT(*s))
1890 return TRUE;
1891 while (isDIGIT(*s))
1892 s++;
1893 if (*s == ',')
1894 s++;
1895 while (isDIGIT(*s))
1896 s++;
1897 if (*s == '}')
1898 return FALSE;
1899 return TRUE;
1900
1901 }
1902
1903 /* On the other hand, maybe we have a character class */
1904
1905 s++;
1906 if (*s == ']' || *s == '^')
1907 return FALSE;
1908 else {
ffb4593c 1909 /* this is terrifying, and it works */
79072805
LW
1910 int weight = 2; /* let's weigh the evidence */
1911 char seen[256];
f27ffc4a 1912 unsigned char un_char = 255, last_un_char;
93a17b20 1913 char *send = strchr(s,']');
3280af22 1914 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
1915
1916 if (!send) /* has to be an expression */
1917 return TRUE;
1918
1919 Zero(seen,256,char);
1920 if (*s == '$')
1921 weight -= 3;
1922 else if (isDIGIT(*s)) {
1923 if (s[1] != ']') {
1924 if (isDIGIT(s[1]) && s[2] == ']')
1925 weight -= 10;
1926 }
1927 else
1928 weight -= 100;
1929 }
1930 for (; s < send; s++) {
1931 last_un_char = un_char;
1932 un_char = (unsigned char)*s;
1933 switch (*s) {
1934 case '@':
1935 case '&':
1936 case '$':
1937 weight -= seen[un_char] * 10;
7e2040f0 1938 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 1939 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
a0d0e21e 1940 if ((int)strlen(tmpbuf) > 1 && gv_fetchpv(tmpbuf,FALSE, SVt_PV))
79072805
LW
1941 weight -= 100;
1942 else
1943 weight -= 10;
1944 }
1945 else if (*s == '$' && s[1] &&
93a17b20
LW
1946 strchr("[#!%*<>()-=",s[1])) {
1947 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
1948 weight -= 10;
1949 else
1950 weight -= 1;
1951 }
1952 break;
1953 case '\\':
1954 un_char = 254;
1955 if (s[1]) {
93a17b20 1956 if (strchr("wds]",s[1]))
79072805
LW
1957 weight += 100;
1958 else if (seen['\''] || seen['"'])
1959 weight += 1;
93a17b20 1960 else if (strchr("rnftbxcav",s[1]))
79072805
LW
1961 weight += 40;
1962 else if (isDIGIT(s[1])) {
1963 weight += 40;
1964 while (s[1] && isDIGIT(s[1]))
1965 s++;
1966 }
1967 }
1968 else
1969 weight += 100;
1970 break;
1971 case '-':
1972 if (s[1] == '\\')
1973 weight += 50;
93a17b20 1974 if (strchr("aA01! ",last_un_char))
79072805 1975 weight += 30;
93a17b20 1976 if (strchr("zZ79~",s[1]))
79072805 1977 weight += 30;
f27ffc4a
GS
1978 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
1979 weight -= 5; /* cope with negative subscript */
79072805
LW
1980 break;
1981 default:
3792a11b
NC
1982 if (!isALNUM(last_un_char)
1983 && !(last_un_char == '$' || last_un_char == '@'
1984 || last_un_char == '&')
1985 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
1986 char *d = tmpbuf;
1987 while (isALPHA(*s))
1988 *d++ = *s++;
1989 *d = '\0';
1990 if (keyword(tmpbuf, d - tmpbuf))
1991 weight -= 150;
1992 }
1993 if (un_char == last_un_char + 1)
1994 weight += 5;
1995 weight -= seen[un_char];
1996 break;
1997 }
1998 seen[un_char]++;
1999 }
2000 if (weight >= 0) /* probably a character class */
2001 return FALSE;
2002 }
2003
2004 return TRUE;
2005}
ffed7fef 2006
ffb4593c
NT
2007/*
2008 * S_intuit_method
2009 *
2010 * Does all the checking to disambiguate
2011 * foo bar
2012 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2013 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2014 *
2015 * First argument is the stuff after the first token, e.g. "bar".
2016 *
2017 * Not a method if bar is a filehandle.
2018 * Not a method if foo is a subroutine prototyped to take a filehandle.
2019 * Not a method if it's really "Foo $bar"
2020 * Method if it's "foo $bar"
2021 * Not a method if it's really "print foo $bar"
2022 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2023 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2024 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2025 * =>
2026 */
2027
76e3520e 2028STATIC int
cea2e8a9 2029S_intuit_method(pTHX_ char *start, GV *gv)
a0d0e21e
LW
2030{
2031 char *s = start + (*start == '$');
3280af22 2032 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2033 STRLEN len;
2034 GV* indirgv;
2035
2036 if (gv) {
b6c543e3 2037 CV *cv;
a0d0e21e
LW
2038 if (GvIO(gv))
2039 return 0;
b6c543e3
IZ
2040 if ((cv = GvCVu(gv))) {
2041 char *proto = SvPVX(cv);
2042 if (proto) {
2043 if (*proto == ';')
2044 proto++;
2045 if (*proto == '*')
2046 return 0;
2047 }
2048 } else
a0d0e21e
LW
2049 gv = 0;
2050 }
8903cb82 2051 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2052 /* start is the beginning of the possible filehandle/object,
2053 * and s is the end of it
2054 * tmpbuf is a copy of it
2055 */
2056
a0d0e21e 2057 if (*start == '$') {
3280af22 2058 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2059 return 0;
2060 s = skipspace(s);
3280af22
NIS
2061 PL_bufptr = start;
2062 PL_expect = XREF;
a0d0e21e
LW
2063 return *s == '(' ? FUNCMETH : METHOD;
2064 }
2065 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2066 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2067 len -= 2;
2068 tmpbuf[len] = '\0';
2069 goto bare_package;
2070 }
2071 indirgv = gv_fetchpv(tmpbuf, FALSE, SVt_PVCV);
8ebc5c01 2072 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2073 return 0;
2074 /* filehandle or package name makes it a method */
89bfa8cd 2075 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2076 s = skipspace(s);
3280af22 2077 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2078 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2079 bare_package:
3280af22 2080 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2081 newSVpvn(tmpbuf,len));
3280af22
NIS
2082 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2083 PL_expect = XTERM;
a0d0e21e 2084 force_next(WORD);
3280af22 2085 PL_bufptr = s;
a0d0e21e
LW
2086 return *s == '(' ? FUNCMETH : METHOD;
2087 }
2088 }
2089 return 0;
2090}
2091
ffb4593c
NT
2092/*
2093 * S_incl_perldb
2094 * Return a string of Perl code to load the debugger. If PERL5DB
2095 * is set, it will return the contents of that, otherwise a
2096 * compile-time require of perl5db.pl.
2097 */
2098
bfed75c6 2099STATIC const char*
cea2e8a9 2100S_incl_perldb(pTHX)
a0d0e21e 2101{
3280af22 2102 if (PL_perldb) {
bfed75c6 2103 const char *pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2104
2105 if (pdb)
2106 return pdb;
93189314 2107 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2108 return "BEGIN { require 'perl5db.pl' }";
2109 }
2110 return "";
2111}
2112
2113
16d20bd9 2114/* Encoded script support. filter_add() effectively inserts a
4e553d73 2115 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2116 * Note that the filter function only applies to the current source file
2117 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2118 *
2119 * The datasv parameter (which may be NULL) can be used to pass
2120 * private data to this instance of the filter. The filter function
2121 * can recover the SV using the FILTER_DATA macro and use it to
2122 * store private buffers and state information.
2123 *
2124 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2125 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2126 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2127 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2128 * private use must be set using malloc'd pointers.
2129 */
16d20bd9
AD
2130
2131SV *
864dbfa3 2132Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2133{
f4c556ac
GS
2134 if (!funcp)
2135 return Nullsv;
2136
3280af22
NIS
2137 if (!PL_rsfp_filters)
2138 PL_rsfp_filters = newAV();
16d20bd9 2139 if (!datasv)
8c52afec 2140 datasv = NEWSV(255,0);
16d20bd9 2141 if (!SvUPGRADE(datasv, SVt_PVIO))
cea2e8a9 2142 Perl_die(aTHX_ "Can't upgrade filter_add data to SVt_PVIO");
4755096e 2143 IoANY(datasv) = (void *)funcp; /* stash funcp into spare field */
e0c19803 2144 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2145 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
fe5a182c 2146 (void*)funcp, SvPV_nolen(datasv)));
3280af22
NIS
2147 av_unshift(PL_rsfp_filters, 1);
2148 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2149 return(datasv);
2150}
4e553d73 2151
16d20bd9
AD
2152
2153/* Delete most recently added instance of this filter function. */
a0d0e21e 2154void
864dbfa3 2155Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2156{
e0c19803 2157 SV *datasv;
fe5a182c 2158 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", (void*)funcp));
3280af22 2159 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2160 return;
2161 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2162 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4755096e 2163 if (IoANY(datasv) == (void *)funcp) {
e0c19803 2164 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2165 IoANY(datasv) = (void *)NULL;
3280af22 2166 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2167
16d20bd9
AD
2168 return;
2169 }
2170 /* we need to search for the correct entry and clear it */
cea2e8a9 2171 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2172}
2173
2174
1de9afcd
RGS
2175/* Invoke the idxth filter function for the current rsfp. */
2176/* maxlen 0 = read one text line */
16d20bd9 2177I32
864dbfa3 2178Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2179{
16d20bd9
AD
2180 filter_t funcp;
2181 SV *datasv = NULL;
e50aee73 2182
3280af22 2183 if (!PL_rsfp_filters)
16d20bd9 2184 return -1;
1de9afcd 2185 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2186 /* Provide a default input filter to make life easy. */
2187 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2188 DEBUG_P(PerlIO_printf(Perl_debug_log,
2189 "filter_read %d: from rsfp\n", idx));
4e553d73 2190 if (maxlen) {
16d20bd9
AD
2191 /* Want a block */
2192 int len ;
2193 int old_len = SvCUR(buf_sv) ;
2194
2195 /* ensure buf_sv is large enough */
eb160463 2196 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2197 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2198 if (PerlIO_error(PL_rsfp))
37120919
AD
2199 return -1; /* error */
2200 else
2201 return 0 ; /* end of file */
2202 }
16d20bd9
AD
2203 SvCUR_set(buf_sv, old_len + len) ;
2204 } else {
2205 /* Want a line */
3280af22
NIS
2206 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2207 if (PerlIO_error(PL_rsfp))
37120919
AD
2208 return -1; /* error */
2209 else
2210 return 0 ; /* end of file */
2211 }
16d20bd9
AD
2212 }
2213 return SvCUR(buf_sv);
2214 }
2215 /* Skip this filter slot if filter has been deleted */
1de9afcd 2216 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2217 DEBUG_P(PerlIO_printf(Perl_debug_log,
2218 "filter_read %d: skipped (filter deleted)\n",
2219 idx));
16d20bd9
AD
2220 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2221 }
2222 /* Get function pointer hidden within datasv */
4755096e 2223 funcp = (filter_t)IoANY(datasv);
f4c556ac
GS
2224 DEBUG_P(PerlIO_printf(Perl_debug_log,
2225 "filter_read %d: via function %p (%s)\n",
fe5a182c 2226 idx, (void*)funcp, SvPV_nolen(datasv)));
16d20bd9
AD
2227 /* Call function. The function is expected to */
2228 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2229 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2230 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2231}
2232
76e3520e 2233STATIC char *
cea2e8a9 2234S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2235{
c39cd008 2236#ifdef PERL_CR_FILTER
3280af22 2237 if (!PL_rsfp_filters) {
c39cd008 2238 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2239 }
2240#endif
3280af22 2241 if (PL_rsfp_filters) {
55497cff 2242 if (!append)
2243 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2244 if (FILTER_READ(0, sv, 0) > 0)
2245 return ( SvPVX(sv) ) ;
2246 else
2247 return Nullch ;
2248 }
9d116dd7 2249 else
fd049845 2250 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2251}
2252
01ec43d0 2253STATIC HV *
7fc63493 2254S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b
GS
2255{
2256 GV *gv;
2257
01ec43d0 2258 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2259 return PL_curstash;
2260
2261 if (len > 2 &&
2262 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
01ec43d0
GS
2263 (gv = gv_fetchpv(pkgname, FALSE, SVt_PVHV)))
2264 {
2265 return GvHV(gv); /* Foo:: */
def3634b
GS
2266 }
2267
2268 /* use constant CLASS => 'MyClass' */
2269 if ((gv = gv_fetchpv(pkgname, FALSE, SVt_PVCV))) {
2270 SV *sv;
2271 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
2272 pkgname = SvPV_nolen(sv);
2273 }
2274 }
2275
2276 return gv_stashpv(pkgname, FALSE);
2277}
a0d0e21e 2278
748a9306 2279#ifdef DEBUGGING
27da23d5 2280 static const char* const exp_name[] =
09bef843 2281 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2282 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2283 };
748a9306 2284#endif
463ee0b2 2285
02aa26ce
NT
2286/*
2287 yylex
2288
2289 Works out what to call the token just pulled out of the input
2290 stream. The yacc parser takes care of taking the ops we return and
2291 stitching them into a tree.
2292
2293 Returns:
2294 PRIVATEREF
2295
2296 Structure:
2297 if read an identifier
2298 if we're in a my declaration
2299 croak if they tried to say my($foo::bar)
2300 build the ops for a my() declaration
2301 if it's an access to a my() variable
2302 are we in a sort block?
2303 croak if my($a); $a <=> $b
2304 build ops for access to a my() variable
2305 if in a dq string, and they've said @foo and we can't find @foo
2306 croak
2307 build ops for a bareword
2308 if we already built the token before, use it.
2309*/
2310
20141f0e 2311
dba4d153
JH
2312#ifdef __SC__
2313#pragma segment Perl_yylex
2314#endif
dba4d153 2315int
dba4d153 2316Perl_yylex(pTHX)
20141f0e 2317{
3afc138a 2318 register char *s = PL_bufptr;
378cc40b 2319 register char *d;
79072805 2320 register I32 tmp;
463ee0b2 2321 STRLEN len;
161b471a
NIS
2322 GV *gv = Nullgv;
2323 GV **gvp = 0;
aa7440fb 2324 bool bof = FALSE;
1d239bbb 2325 I32 orig_keyword = 0;
a687059c 2326
bbf60fe6
DM
2327 DEBUG_T( {
2328 PerlIO_printf(Perl_debug_log, "### LEX_%s\n",
2329 lex_state_names[PL_lex_state]);
2330 } );
02aa26ce 2331 /* check if there's an identifier for us to look at */
ba979b31 2332 if (PL_pending_ident)
bbf60fe6 2333 return REPORT(S_pending_ident(aTHX));
bbce6d69 2334
02aa26ce
NT
2335 /* no identifier pending identification */
2336
3280af22 2337 switch (PL_lex_state) {
79072805
LW
2338#ifdef COMMENTARY
2339 case LEX_NORMAL: /* Some compilers will produce faster */
2340 case LEX_INTERPNORMAL: /* code if we comment these out. */
2341 break;
2342#endif
2343
09bef843 2344 /* when we've already built the next token, just pull it out of the queue */
79072805 2345 case LEX_KNOWNEXT:
3280af22
NIS
2346 PL_nexttoke--;
2347 yylval = PL_nextval[PL_nexttoke];
2348 if (!PL_nexttoke) {
2349 PL_lex_state = PL_lex_defer;
2350 PL_expect = PL_lex_expect;
2351 PL_lex_defer = LEX_NORMAL;
463ee0b2 2352 }
607df283 2353 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4659c93f 2354 "### Next token after '%s' was known, type %"IVdf"\n", PL_bufptr,
5f80b19c 2355 (IV)PL_nexttype[PL_nexttoke]); });
607df283 2356
bbf60fe6 2357 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2358
02aa26ce 2359 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2360 when we get here, PL_bufptr is at the \
02aa26ce 2361 */
79072805
LW
2362 case LEX_INTERPCASEMOD:
2363#ifdef DEBUGGING
3280af22 2364 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2365 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2366#endif
02aa26ce 2367 /* handle \E or end of string */
3280af22 2368 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
a0d0e21e 2369 char oldmod;
02aa26ce
NT
2370
2371 /* if at a \E */
3280af22
NIS
2372 if (PL_lex_casemods) {
2373 oldmod = PL_lex_casestack[--PL_lex_casemods];
2374 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2375
3792a11b
NC
2376 if (PL_bufptr != PL_bufend
2377 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2378 PL_bufptr += 2;
2379 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2380 }
bbf60fe6 2381 return REPORT(')');
79072805 2382 }
3280af22
NIS
2383 if (PL_bufptr != PL_bufend)
2384 PL_bufptr += 2;
2385 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2386 return yylex();
79072805
LW
2387 }
2388 else {
607df283 2389 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2390 "### Saw case modifier at '%s'\n", PL_bufptr); });
3280af22 2391 s = PL_bufptr + 1;
6e909404
JH
2392 if (s[1] == '\\' && s[2] == 'E') {
2393 PL_bufptr = s + 3;
2394 PL_lex_state = LEX_INTERPCONCAT;
2395 return yylex();
a0d0e21e 2396 }
6e909404
JH
2397 else {
2398 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2399 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2400 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2401 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2402 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2403 return REPORT(')');
6e909404
JH
2404 }
2405 if (PL_lex_casemods > 10)
2406 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2407 PL_lex_casestack[PL_lex_casemods++] = *s;
2408 PL_lex_casestack[PL_lex_casemods] = '\0';
2409 PL_lex_state = LEX_INTERPCONCAT;
2410 PL_nextval[PL_nexttoke].ival = 0;
2411 force_next('(');
2412 if (*s == 'l')
2413 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2414 else if (*s == 'u')
2415 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2416 else if (*s == 'L')
2417 PL_nextval[PL_nexttoke].ival = OP_LC;
2418 else if (*s == 'U')
2419 PL_nextval[PL_nexttoke].ival = OP_UC;
2420 else if (*s == 'Q')
2421 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2422 else
2423 Perl_croak(aTHX_ "panic: yylex");
2424 PL_bufptr = s + 1;
a0d0e21e 2425 }
79072805 2426 force_next(FUNC);
3280af22
NIS
2427 if (PL_lex_starts) {
2428 s = PL_bufptr;
2429 PL_lex_starts = 0;
131b3ad0
DM
2430 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2431 if (PL_lex_casemods == 1 && PL_lex_inpat)
2432 OPERATOR(',');
2433 else
2434 Aop(OP_CONCAT);
79072805
LW
2435 }
2436 else
cea2e8a9 2437 return yylex();
79072805
LW
2438 }
2439
55497cff 2440 case LEX_INTERPPUSH:
bbf60fe6 2441 return REPORT(sublex_push());
55497cff 2442
79072805 2443 case LEX_INTERPSTART:
3280af22 2444 if (PL_bufptr == PL_bufend)
bbf60fe6 2445 return REPORT(sublex_done());
607df283 2446 DEBUG_T({ PerlIO_printf(Perl_debug_log,
5f80b19c 2447 "### Interpolated variable at '%s'\n", PL_bufptr); });
3280af22
NIS
2448 PL_expect = XTERM;
2449 PL_lex_dojoin = (*PL_bufptr == '@');
2450 PL_lex_state = LEX_INTERPNORMAL;
2451 if (PL_lex_dojoin) {
2452 PL_nextval[PL_nexttoke].ival = 0;
79072805 2453 force_next(',');
a0d0e21e 2454 force_ident("\"", '$');
3280af22 2455 PL_nextval[PL_nexttoke].ival = 0;
79072805 2456 force_next('$');
3280af22 2457 PL_nextval[PL_nexttoke].ival = 0;
79072805 2458 force_next('(');
3280af22 2459 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2460 force_next(FUNC);
2461 }
3280af22
NIS
2462 if (PL_lex_starts++) {
2463 s = PL_bufptr;
131b3ad0
DM
2464 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2465 if (!PL_lex_casemods && PL_lex_inpat)
2466 OPERATOR(',');
2467 else
2468 Aop(OP_CONCAT);
79072805 2469 }
cea2e8a9 2470 return yylex();
79072805
LW
2471
2472 case LEX_INTERPENDMAYBE:
3280af22
NIS
2473 if (intuit_more(PL_bufptr)) {
2474 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2475 break;
2476 }
2477 /* FALL THROUGH */
2478
2479 case LEX_INTERPEND:
3280af22
NIS
2480 if (PL_lex_dojoin) {
2481 PL_lex_dojoin = FALSE;
2482 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2483 return REPORT(')');
79072805 2484 }
43a16006 2485 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2486 && SvEVALED(PL_lex_repl))
43a16006 2487 {
e9fa98b2 2488 if (PL_bufptr != PL_bufend)
cea2e8a9 2489 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2490 PL_lex_repl = Nullsv;
2491 }
79072805
LW
2492 /* FALLTHROUGH */
2493 case LEX_INTERPCONCAT:
2494#ifdef DEBUGGING
3280af22 2495 if (PL_lex_brackets)
cea2e8a9 2496 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2497#endif
3280af22 2498 if (PL_bufptr == PL_bufend)
bbf60fe6 2499 return REPORT(sublex_done());
79072805 2500
3280af22
NIS
2501 if (SvIVX(PL_linestr) == '\'') {
2502 SV *sv = newSVsv(PL_linestr);
2503 if (!PL_lex_inpat)
76e3520e 2504 sv = tokeq(sv);
3280af22 2505 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2506 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2507 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2508 s = PL_bufend;
79072805
LW
2509 }
2510 else {
3280af22 2511 s = scan_const(PL_bufptr);
79072805 2512 if (*s == '\\')
3280af22 2513 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2514 else
3280af22 2515 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2516 }
2517
3280af22
NIS
2518 if (s != PL_bufptr) {
2519 PL_nextval[PL_nexttoke] = yylval;
2520 PL_expect = XTERM;
79072805 2521 force_next(THING);
131b3ad0
DM
2522 if (PL_lex_starts++) {
2523 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2524 if (!PL_lex_casemods && PL_lex_inpat)
2525 OPERATOR(',');
2526 else
2527 Aop(OP_CONCAT);
2528 }
79072805 2529 else {
3280af22 2530 PL_bufptr = s;
cea2e8a9 2531 return yylex();
79072805
LW
2532 }
2533 }
2534
cea2e8a9 2535 return yylex();
a0d0e21e 2536 case LEX_FORMLINE:
3280af22
NIS
2537 PL_lex_state = LEX_NORMAL;
2538 s = scan_formline(PL_bufptr);
2539 if (!PL_lex_formbrack)
a0d0e21e
LW
2540 goto rightbracket;
2541 OPERATOR(';');
79072805
LW
2542 }
2543
3280af22
NIS
2544 s = PL_bufptr;
2545 PL_oldoldbufptr = PL_oldbufptr;
2546 PL_oldbufptr = s;
607df283 2547 DEBUG_T( {
bbf60fe6 2548 PerlIO_printf(Perl_debug_log, "### Tokener expecting %s at [%s]\n",
bf49b057 2549 exp_name[PL_expect], s);
5f80b19c 2550 } );
463ee0b2
LW
2551
2552 retry:
378cc40b
LW
2553 switch (*s) {
2554 default:
7e2040f0 2555 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2556 goto keylookup;
cea2e8a9 2557 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2558 case 4:
2559 case 26:
2560 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2561 case 0:
3280af22
NIS
2562 if (!PL_rsfp) {
2563 PL_last_uni = 0;
2564 PL_last_lop = 0;
c5ee2135
WL
2565 if (PL_lex_brackets) {
2566 if (PL_lex_formbrack)
2567 yyerror("Format not terminated");
2568 else
2569 yyerror("Missing right curly or square bracket");
2570 }
4e553d73 2571 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2572 "### Tokener got EOF\n");
5f80b19c 2573 } );
79072805 2574 TOKEN(0);
463ee0b2 2575 }
3280af22 2576 if (s++ < PL_bufend)
a687059c 2577 goto retry; /* ignore stray nulls */
3280af22
NIS
2578 PL_last_uni = 0;
2579 PL_last_lop = 0;
2580 if (!PL_in_eval && !PL_preambled) {
2581 PL_preambled = TRUE;
2582 sv_setpv(PL_linestr,incl_perldb());
2583 if (SvCUR(PL_linestr))
4147a61b 2584 sv_catpvn(PL_linestr,";", 1);
3280af22
NIS
2585 if (PL_preambleav){
2586 while(AvFILLp(PL_preambleav) >= 0) {
2587 SV *tmpsv = av_shift(PL_preambleav);
2588 sv_catsv(PL_linestr, tmpsv);
4147a61b 2589 sv_catpvn(PL_linestr, ";", 1);
91b7def8 2590 sv_free(tmpsv);
2591 }
3280af22
NIS
2592 sv_free((SV*)PL_preambleav);
2593 PL_preambleav = NULL;
91b7def8 2594 }
3280af22
NIS
2595 if (PL_minus_n || PL_minus_p) {
2596 sv_catpv(PL_linestr, "LINE: while (<>) {");
2597 if (PL_minus_l)
2598 sv_catpv(PL_linestr,"chomp;");
2599 if (PL_minus_a) {
3280af22 2600 if (PL_minus_F) {
3792a11b
NC
2601 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2602 || *PL_splitstr == '"')
3280af22 2603 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2604 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2605 else {
c8ef6a4b
NC
2606 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2607 bytes can be used as quoting characters. :-) */
48c4c863
NC
2608 /* The count here deliberately includes the NUL
2609 that terminates the C string constant. This
2610 embeds the opening NUL into the string. */
dd374669 2611 const char *splits = PL_splitstr;
9a3fb652 2612 sv_catpvn(PL_linestr, "our @F=split(q", 15);
48c4c863
NC
2613 do {
2614 /* Need to \ \s */
dd374669
AL
2615 if (*splits == '\\')
2616 sv_catpvn(PL_linestr, splits, 1);
2617 sv_catpvn(PL_linestr, splits, 1);
2618 } while (*splits++);
48c4c863
NC
2619 /* This loop will embed the trailing NUL of
2620 PL_linestr as the last thing it does before
2621 terminating. */
4fccd7c6 2622 sv_catpvn(PL_linestr, ");", 2);
54310121 2623 }
2304df62
AD
2624 }
2625 else
75c72d73 2626 sv_catpv(PL_linestr,"our @F=split(' ');");
2304df62 2627 }
79072805 2628 }
4147a61b 2629 sv_catpvn(PL_linestr, "\n", 1);
3280af22
NIS
2630 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2631 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2632 PL_last_lop = PL_last_uni = Nullch;
3280af22 2633 if (PERLDB_LINE && PL_curstash != PL_debstash) {
a0d0e21e
LW
2634 SV *sv = NEWSV(85,0);
2635
2636 sv_upgrade(sv, SVt_PVMG);
3280af22 2637 sv_setsv(sv,PL_linestr);
0ac0412a 2638 (void)SvIOK_on(sv);
45977657 2639 SvIV_set(sv, 0);
57843af0 2640 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2641 }
79072805 2642 goto retry;
a687059c 2643 }
e929a76b 2644 do {
aa7440fb 2645 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2646 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2647 fake_eof:
2648 if (PL_rsfp) {
2649 if (PL_preprocess && !PL_in_eval)
2650 (void)PerlProc_pclose(PL_rsfp);
2651 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2652 PerlIO_clearerr(PL_rsfp);
2653 else
2654 (void)PerlIO_close(PL_rsfp);
2655 PL_rsfp = Nullfp;
2656 PL_doextract = FALSE;
2657 }
2658 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2659 sv_setpv(PL_linestr,PL_minus_p
2660 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2661 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2662 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2663 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2664 PL_minus_n = PL_minus_p = 0;
2665 goto retry;
2666 }
2667 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2668 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2669 sv_setpv(PL_linestr,"");
2670 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2671 }
7aa207d6
JH
2672 /* If it looks like the start of a BOM or raw UTF-16,
2673 * check if it in fact is. */
2674 else if (bof &&
2675 (*s == 0 ||
2676 *(U8*)s == 0xEF ||
2677 *(U8*)s >= 0xFE ||
2678 s[1] == 0)) {
226017aa 2679#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2680# ifdef __GNU_LIBRARY__
2681# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2682# define FTELL_FOR_PIPE_IS_BROKEN
2683# endif
e3f494f1
JH
2684# else
2685# ifdef __GLIBC__
2686# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2687# define FTELL_FOR_PIPE_IS_BROKEN
2688# endif
2689# endif
226017aa
DD
2690# endif
2691#endif
2692#ifdef FTELL_FOR_PIPE_IS_BROKEN
2693 /* This loses the possibility to detect the bof
2694 * situation on perl -P when the libc5 is being used.
2695 * Workaround? Maybe attach some extra state to PL_rsfp?
2696 */
2697 if (!PL_preprocess)
7e28d3af 2698 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2699#else
eb160463 2700 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2701#endif
7e28d3af 2702 if (bof) {
3280af22 2703 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2704 s = swallow_bom((U8*)s);
e929a76b 2705 }
378cc40b 2706 }
3280af22 2707 if (PL_doextract) {
a0d0e21e
LW
2708 /* Incest with pod. */
2709 if (*s == '=' && strnEQ(s, "=cut", 4)) {
3280af22
NIS
2710 sv_setpv(PL_linestr, "");
2711 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2712 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2713 PL_last_lop = PL_last_uni = Nullch;
3280af22 2714 PL_doextract = FALSE;
a0d0e21e 2715 }
4e553d73 2716 }
463ee0b2 2717 incline(s);
3280af22
NIS
2718 } while (PL_doextract);
2719 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2720 if (PERLDB_LINE && PL_curstash != PL_debstash) {
79072805 2721 SV *sv = NEWSV(85,0);
a687059c 2722
93a17b20 2723 sv_upgrade(sv, SVt_PVMG);
3280af22 2724 sv_setsv(sv,PL_linestr);
0ac0412a 2725 (void)SvIOK_on(sv);
45977657 2726 SvIV_set(sv, 0);
57843af0 2727 av_store(CopFILEAV(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2728 }
3280af22 2729 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2730 PL_last_lop = PL_last_uni = Nullch;
57843af0 2731 if (CopLINE(PL_curcop) == 1) {
3280af22 2732 while (s < PL_bufend && isSPACE(*s))
79072805 2733 s++;
a0d0e21e 2734 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2735 s++;
44a8e56a 2736 d = Nullch;
3280af22 2737 if (!PL_in_eval) {
44a8e56a 2738 if (*s == '#' && *(s+1) == '!')
2739 d = s + 2;
2740#ifdef ALTERNATE_SHEBANG
2741 else {
bfed75c6 2742 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 2743 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2744 d = s + (sizeof(as) - 1);
2745 }
2746#endif /* ALTERNATE_SHEBANG */
2747 }
2748 if (d) {
b8378b72 2749 char *ipath;
774d564b 2750 char *ipathend;
b8378b72 2751
774d564b 2752 while (isSPACE(*d))
b8378b72
CS
2753 d++;
2754 ipath = d;
774d564b 2755 while (*d && !isSPACE(*d))
2756 d++;
2757 ipathend = d;
2758
2759#ifdef ARG_ZERO_IS_SCRIPT
2760 if (ipathend > ipath) {
2761 /*
2762 * HP-UX (at least) sets argv[0] to the script name,
2763 * which makes $^X incorrect. And Digital UNIX and Linux,
2764 * at least, set argv[0] to the basename of the Perl
2765 * interpreter. So, having found "#!", we'll set it right.
2766 */
ee2f7564 2767 SV *x = GvSV(gv_fetchpv("\030", TRUE, SVt_PV)); /* $^X */
774d564b 2768 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2769 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2770 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2771 SvSETMAGIC(x);
2772 }
556c1dec
JH
2773 else {
2774 STRLEN blen;
2775 STRLEN llen;
2776 char *bstart = SvPV(CopFILESV(PL_curcop),blen);
2777 char *lstart = SvPV(x,llen);
2778 if (llen < blen) {
2779 bstart += blen - llen;
2780 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2781 sv_setpvn(x, ipath, ipathend - ipath);
2782 SvSETMAGIC(x);
2783 }
2784 }
2785 }
774d564b 2786 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2787 }
774d564b 2788#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2789
2790 /*
2791 * Look for options.
2792 */
748a9306 2793 d = instr(s,"perl -");
84e30d1a 2794 if (!d) {
748a9306 2795 d = instr(s,"perl");
84e30d1a
GS
2796#if defined(DOSISH)
2797 /* avoid getting into infinite loops when shebang
2798 * line contains "Perl" rather than "perl" */
2799 if (!d) {
2800 for (d = ipathend-4; d >= ipath; --d) {
2801 if ((*d == 'p' || *d == 'P')
2802 && !ibcmp(d, "perl", 4))
2803 {
2804 break;
2805 }
2806 }
2807 if (d < ipath)
2808 d = Nullch;
2809 }
2810#endif
2811 }
44a8e56a 2812#ifdef ALTERNATE_SHEBANG
2813 /*
2814 * If the ALTERNATE_SHEBANG on this system starts with a
2815 * character that can be part of a Perl expression, then if
2816 * we see it but not "perl", we're probably looking at the
2817 * start of Perl code, not a request to hand off to some
2818 * other interpreter. Similarly, if "perl" is there, but
2819 * not in the first 'word' of the line, we assume the line
2820 * contains the start of the Perl program.
44a8e56a 2821 */
2822 if (d && *s != '#') {
774d564b 2823 char *c = ipath;
44a8e56a 2824 while (*c && !strchr("; \t\r\n\f\v#", *c))
2825 c++;
2826 if (c < d)
2827 d = Nullch; /* "perl" not in first word; ignore */
2828 else
2829 *s = '#'; /* Don't try to parse shebang line */
2830 }
774d564b 2831#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2832#ifndef MACOS_TRADITIONAL
748a9306 2833 if (!d &&
44a8e56a 2834 *s == '#' &&
774d564b 2835 ipathend > ipath &&
3280af22 2836 !PL_minus_c &&
748a9306 2837 !instr(s,"indir") &&
3280af22 2838 instr(PL_origargv[0],"perl"))
748a9306 2839 {
27da23d5 2840 dVAR;
9f68db38 2841 char **newargv;
9f68db38 2842
774d564b 2843 *ipathend = '\0';
2844 s = ipathend + 1;
3280af22 2845 while (s < PL_bufend && isSPACE(*s))
9f68db38 2846 s++;
3280af22
NIS
2847 if (s < PL_bufend) {
2848 Newz(899,newargv,PL_origargc+3,char*);
9f68db38 2849 newargv[1] = s;
3280af22 2850 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2851 s++;
2852 *s = '\0';
3280af22 2853 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2854 }
2855 else
3280af22 2856 newargv = PL_origargv;
774d564b 2857 newargv[0] = ipath;
b35112e7 2858 PERL_FPU_PRE_EXEC
b4748376 2859 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2860 PERL_FPU_POST_EXEC
cea2e8a9 2861 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2862 }
bf4acbe4 2863#endif
748a9306 2864 if (d) {
3280af22
NIS
2865 U32 oldpdb = PL_perldb;
2866 bool oldn = PL_minus_n;
2867 bool oldp = PL_minus_p;
748a9306
LW
2868
2869 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2870 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2871
2872 if (*d++ == '-') {
a11ec5a9 2873 bool switches_done = PL_doswitches;
8cc95fdb 2874 do {
3ffe3ee4 2875 if (*d == 'M' || *d == 'm' || *d == 'C') {
8cc95fdb 2876 char *m = d;
2877 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2878 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 2879 (int)(d - m), m);
2880 }
2881 d = moreswitches(d);
2882 } while (d);
f0b2cf55
YST
2883 if (PL_doswitches && !switches_done) {
2884 int argc = PL_origargc;
2885 char **argv = PL_origargv;
2886 do {
2887 argc--,argv++;
2888 } while (argc && argv[0][0] == '-' && argv[0][1]);
2889 init_argv_symbols(argc,argv);
2890 }
155aba94
GS
2891 if ((PERLDB_LINE && !oldpdb) ||
2892 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 2893 /* if we have already added "LINE: while (<>) {",
2894 we must not do it again */
748a9306 2895 {
3280af22
NIS
2896 sv_setpv(PL_linestr, "");
2897 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2898 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2899 PL_last_lop = PL_last_uni = Nullch;
3280af22 2900 PL_preambled = FALSE;
84902520 2901 if (PERLDB_LINE)
3280af22 2902 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
2903 goto retry;
2904 }
a11ec5a9
RGS
2905 if (PL_doswitches && !switches_done) {
2906 int argc = PL_origargc;
2907 char **argv = PL_origargv;
2908 do {
2909 argc--,argv++;
2910 } while (argc && argv[0][0] == '-' && argv[0][1]);
2911 init_argv_symbols(argc,argv);
2912 }
a0d0e21e 2913 }
79072805 2914 }
9f68db38 2915 }
79072805 2916 }
3280af22
NIS
2917 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2918 PL_bufptr = s;
2919 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2920 return yylex();
ae986130 2921 }
378cc40b 2922 goto retry;
4fdae800 2923 case '\r':
6a27c188 2924#ifdef PERL_STRICT_CR
cea2e8a9 2925 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 2926 Perl_croak(aTHX_
cc507455 2927 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 2928#endif
4fdae800 2929 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
2930#ifdef MACOS_TRADITIONAL
2931 case '\312':
2932#endif
378cc40b
LW
2933 s++;
2934 goto retry;
378cc40b 2935 case '#':
e929a76b 2936 case '\n':
3280af22 2937 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
2938 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
2939 /* handle eval qq[#line 1 "foo"\n ...] */
2940 CopLINE_dec(PL_curcop);
2941 incline(s);
2942 }
3280af22 2943 d = PL_bufend;
a687059c 2944 while (s < d && *s != '\n')
378cc40b 2945 s++;
0f85fab0 2946 if (s < d)
378cc40b 2947 s++;
78c267c1 2948 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 2949 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 2950 incline(s);
3280af22
NIS
2951 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2952 PL_bufptr = s;
2953 PL_lex_state = LEX_FORMLINE;
cea2e8a9 2954 return yylex();
a687059c 2955 }
378cc40b 2956 }
a687059c 2957 else {
378cc40b 2958 *s = '\0';
3280af22 2959 PL_bufend = s;
a687059c 2960 }
378cc40b
LW
2961 goto retry;
2962 case '-':
79072805 2963 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50
JH
2964 I32 ftst = 0;
2965
378cc40b 2966 s++;
3280af22 2967 PL_bufptr = s;
748a9306
LW
2968 tmp = *s++;
2969
bf4acbe4 2970 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
2971 s++;
2972
2973 if (strnEQ(s,"=>",2)) {
3280af22 2974 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
4e553d73 2975 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2976 "### Saw unary minus before =>, forcing word '%s'\n", s);
5f80b19c 2977 } );
748a9306
LW
2978 OPERATOR('-'); /* unary minus */
2979 }
3280af22 2980 PL_last_uni = PL_oldbufptr;
748a9306 2981 switch (tmp) {
e5edeb50
JH
2982 case 'r': ftst = OP_FTEREAD; break;
2983 case 'w': ftst = OP_FTEWRITE; break;
2984 case 'x': ftst = OP_FTEEXEC; break;
2985 case 'o': ftst = OP_FTEOWNED; break;
2986 case 'R': ftst = OP_FTRREAD; break;
2987 case 'W': ftst = OP_FTRWRITE; break;
2988 case 'X': ftst = OP_FTREXEC; break;
2989 case 'O': ftst = OP_FTROWNED; break;
2990 case 'e': ftst = OP_FTIS; break;
2991 case 'z': ftst = OP_FTZERO; break;
2992 case 's': ftst = OP_FTSIZE; break;
2993 case 'f': ftst = OP_FTFILE; break;
2994 case 'd': ftst = OP_FTDIR; break;
2995 case 'l': ftst = OP_FTLINK; break;
2996 case 'p': ftst = OP_FTPIPE; break;
2997 case 'S': ftst = OP_FTSOCK; break;
2998 case 'u': ftst = OP_FTSUID; break;
2999 case 'g': ftst = OP_FTSGID; break;
3000 case 'k': ftst = OP_FTSVTX; break;
3001 case 'b': ftst = OP_FTBLK; break;
3002 case 'c': ftst = OP_FTCHR; break;
3003 case 't': ftst = OP_FTTTY; break;
3004 case 'T': ftst = OP_FTTEXT; break;
3005 case 'B': ftst = OP_FTBINARY; break;
3006 case 'M': case 'A': case 'C':
3007 gv_fetchpv("\024",TRUE, SVt_PV);
3008 switch (tmp) {
3009 case 'M': ftst = OP_FTMTIME; break;
3010 case 'A': ftst = OP_FTATIME; break;
3011 case 'C': ftst = OP_FTCTIME; break;
3012 default: break;
3013 }
3014 break;
378cc40b 3015 default:
378cc40b
LW
3016 break;
3017 }
e5edeb50 3018 if (ftst) {
eb160463 3019 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3020 DEBUG_T( { PerlIO_printf(Perl_debug_log,
0844c848 3021 "### Saw file test %c\n", (int)ftst);
5f80b19c 3022 } );
e5edeb50
JH
3023 FTST(ftst);
3024 }
3025 else {
3026 /* Assume it was a minus followed by a one-letter named
3027 * subroutine call (or a -bareword), then. */
95c31fe3 3028 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3029 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3030 (int) tmp);
5f80b19c 3031 } );
3cf7b4c4 3032 s = --PL_bufptr;
e5edeb50 3033 }
378cc40b 3034 }
a687059c
LW
3035 tmp = *s++;
3036 if (*s == tmp) {
3037 s++;
3280af22 3038 if (PL_expect == XOPERATOR)
79072805
LW
3039 TERM(POSTDEC);
3040 else
3041 OPERATOR(PREDEC);
3042 }
3043 else if (*s == '>') {
3044 s++;
3045 s = skipspace(s);
7e2040f0 3046 if (isIDFIRST_lazy_if(s,UTF)) {
a0d0e21e 3047 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
463ee0b2 3048 TOKEN(ARROW);
79072805 3049 }
748a9306
LW
3050 else if (*s == '$')
3051 OPERATOR(ARROW);
463ee0b2 3052 else
748a9306 3053 TERM(ARROW);
a687059c 3054 }
3280af22 3055 if (PL_expect == XOPERATOR)
79072805
LW
3056 Aop(OP_SUBTRACT);
3057 else {
3280af22 3058 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3059 check_uni();
79072805 3060 OPERATOR('-'); /* unary minus */
2f3197b3 3061 }
79072805 3062
378cc40b 3063 case '+':
a687059c
LW
3064 tmp = *s++;
3065 if (*s == tmp) {
378cc40b 3066 s++;
3280af22 3067 if (PL_expect == XOPERATOR)
79072805
LW
3068 TERM(POSTINC);
3069 else
3070 OPERATOR(PREINC);
378cc40b 3071 }
3280af22 3072 if (PL_expect == XOPERATOR)
79072805
LW
3073 Aop(OP_ADD);
3074 else {
3280af22 3075 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
2f3197b3 3076 check_uni();
a687059c 3077 OPERATOR('+');
2f3197b3 3078 }
a687059c 3079
378cc40b 3080 case '*':
3280af22
NIS
3081 if (PL_expect != XOPERATOR) {
3082 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3083 PL_expect = XOPERATOR;
3084 force_ident(PL_tokenbuf, '*');
3085 if (!*PL_tokenbuf)
a0d0e21e 3086 PREREF('*');
79072805 3087 TERM('*');
a687059c 3088 }
79072805
LW
3089 s++;
3090 if (*s == '*') {
a687059c 3091 s++;
79072805 3092 PWop(OP_POW);
a687059c 3093 }
79072805
LW
3094 Mop(OP_MULTIPLY);
3095
378cc40b 3096 case '%':
3280af22 3097 if (PL_expect == XOPERATOR) {
bbce6d69 3098 ++s;
3099 Mop(OP_MODULO);
a687059c 3100 }
3280af22
NIS
3101 PL_tokenbuf[0] = '%';
3102 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3103 if (!PL_tokenbuf[1]) {
bbce6d69 3104 PREREF('%');
a687059c 3105 }
3280af22 3106 PL_pending_ident = '%';
bbce6d69 3107 TERM('%');
a687059c 3108
378cc40b 3109 case '^':
79072805 3110 s++;
a0d0e21e 3111 BOop(OP_BIT_XOR);
79072805 3112 case '[':
3280af22 3113 PL_lex_brackets++;
79072805 3114 /* FALL THROUGH */
378cc40b 3115 case '~':
378cc40b 3116 case ',':
378cc40b
LW
3117 tmp = *s++;
3118 OPERATOR(tmp);
a0d0e21e
LW
3119 case ':':
3120 if (s[1] == ':') {
3121 len = 0;
3122 goto just_a_word;
3123 }
3124 s++;
09bef843
SB
3125 switch (PL_expect) {
3126 OP *attrs;
3127 case XOPERATOR:
3128 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3129 break;
3130 PL_bufptr = s; /* update in case we back off */
3131 goto grabattrs;
3132 case XATTRBLOCK:
3133 PL_expect = XBLOCK;
3134 goto grabattrs;
3135 case XATTRTERM:
3136 PL_expect = XTERMBLOCK;
3137 grabattrs:
3138 s = skipspace(s);
3139 attrs = Nullop;
7e2040f0 3140 while (isIDFIRST_lazy_if(s,UTF)) {
09bef843 3141 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3142 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3143 if (tmp < 0) tmp = -tmp;
3144 switch (tmp) {
3145 case KEY_or:
3146 case KEY_and:
c963b151 3147 case KEY_err:
f9829d6b
GS
3148 case KEY_for:
3149 case KEY_unless:
3150 case KEY_if:
3151 case KEY_while:
3152 case KEY_until:
3153 goto got_attrs;
3154 default:
3155 break;
3156 }
3157 }
09bef843
SB
3158 if (*d == '(') {
3159 d = scan_str(d,TRUE,TRUE);
3160 if (!d) {
09bef843
SB
3161 /* MUST advance bufptr here to avoid bogus
3162 "at end of line" context messages from yyerror().
3163 */
3164 PL_bufptr = s + len;
3165 yyerror("Unterminated attribute parameter in attribute list");
3166 if (attrs)
3167 op_free(attrs);
bbf60fe6 3168 return REPORT(0); /* EOF indicator */
09bef843
SB
3169 }
3170 }
3171 if (PL_lex_stuff) {
3172 SV *sv = newSVpvn(s, len);
3173 sv_catsv(sv, PL_lex_stuff);
3174 attrs = append_elem(OP_LIST, attrs,
3175 newSVOP(OP_CONST, 0, sv));
3176 SvREFCNT_dec(PL_lex_stuff);
3177 PL_lex_stuff = Nullsv;
3178 }
3179 else {
371fce9b
DM
3180 if (len == 6 && strnEQ(s, "unique", len)) {
3181 if (PL_in_my == KEY_our)
3182#ifdef USE_ITHREADS
3183 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3184#else
3185 ; /* skip to avoid loading attributes.pm */
3186#endif
bfed75c6 3187 else
371fce9b
DM
3188 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3189 }
3190
d3cea301
SB
3191 /* NOTE: any CV attrs applied here need to be part of
3192 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3193 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3194 CvLVALUE_on(PL_compcv);
3195 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3196 CvLOCKED_on(PL_compcv);
3197 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3198 CvMETHOD_on(PL_compcv);
06492da6
SF
3199 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3200 CvASSERTION_on(PL_compcv);
78f9721b
SM
3201 /* After we've set the flags, it could be argued that
3202 we don't need to do the attributes.pm-based setting
3203 process, and shouldn't bother appending recognized
d3cea301
SB
3204 flags. To experiment with that, uncomment the
3205 following "else". (Note that's already been
3206 uncommented. That keeps the above-applied built-in
3207 attributes from being intercepted (and possibly
3208 rejected) by a package's attribute routines, but is
3209 justified by the performance win for the common case
3210 of applying only built-in attributes.) */
0256094b 3211 else
78f9721b
SM
3212 attrs = append_elem(OP_LIST, attrs,
3213 newSVOP(OP_CONST, 0,
3214 newSVpvn(s, len)));
09bef843
SB
3215 }
3216 s = skipspace(d);
0120eecf 3217 if (*s == ':' && s[1] != ':')
09bef843 3218 s = skipspace(s+1);
0120eecf
GS
3219 else if (s == d)
3220 break; /* require real whitespace or :'s */
09bef843 3221 }
f9829d6b 3222 tmp = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
8e7ae056 3223 if (*s != ';' && *s != '}' && *s != tmp && (tmp != '=' || *s != ')')) {
09bef843
SB
3224 char q = ((*s == '\'') ? '"' : '\'');
3225 /* If here for an expression, and parsed no attrs, back off. */
3226 if (tmp == '=' && !attrs) {
3227 s = PL_bufptr;
3228 break;
3229 }
3230 /* MUST advance bufptr here to avoid bogus "at end of line"
3231 context messages from yyerror().
3232 */
3233 PL_bufptr = s;
3234 if (!*s)
3235 yyerror("Unterminated attribute list");
3236 else
3237 yyerror(Perl_form(aTHX_ "Invalid separator character %c%c%c in attribute list",
3238 q, *s, q));
3239 if (attrs)
3240 op_free(attrs);
3241 OPERATOR(':');
3242 }
f9829d6b 3243 got_attrs:
09bef843
SB
3244 if (attrs) {
3245 PL_nextval[PL_nexttoke].opval = attrs;
3246 force_next(THING);
3247 }
3248 TOKEN(COLONATTR);
3249 }
a0d0e21e 3250 OPERATOR(':');
8990e307
LW
3251 case '(':
3252 s++;
3280af22
NIS
3253 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3254 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3255 else
3280af22 3256 PL_expect = XTERM;
4a202259 3257 s = skipspace(s);
a0d0e21e 3258 TOKEN('(');
378cc40b 3259 case ';':
f4dd75d9 3260 CLINE;
378cc40b
LW
3261 tmp = *s++;
3262 OPERATOR(tmp);
3263 case ')':
378cc40b 3264 tmp = *s++;
16d20bd9
AD
3265 s = skipspace(s);
3266 if (*s == '{')
3267 PREBLOCK(tmp);
378cc40b 3268 TERM(tmp);
79072805
LW
3269 case ']':
3270 s++;
3280af22 3271 if (PL_lex_brackets <= 0)
d98d5fff 3272 yyerror("Unmatched right square bracket");
463ee0b2 3273 else
3280af22
NIS
3274 --PL_lex_brackets;
3275 if (PL_lex_state == LEX_INTERPNORMAL) {
3276 if (PL_lex_brackets == 0) {
a0d0e21e 3277 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3278 PL_lex_state = LEX_INTERPEND;
79072805
LW
3279 }
3280 }
4633a7c4 3281 TERM(']');
79072805
LW
3282 case '{':
3283 leftbracket:
79072805 3284 s++;
3280af22 3285 if (PL_lex_brackets > 100) {
8edd5f42 3286 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3287 }
3280af22 3288 switch (PL_expect) {
a0d0e21e 3289 case XTERM:
3280af22 3290 if (PL_lex_formbrack) {
a0d0e21e
LW
3291 s--;
3292 PRETERMBLOCK(DO);
3293 }
3280af22
NIS
3294 if (PL_oldoldbufptr == PL_last_lop)
3295 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3296 else
3280af22 3297 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3298 OPERATOR(HASHBRACK);
a0d0e21e 3299 case XOPERATOR:
bf4acbe4 3300 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3301 s++;
44a8e56a 3302 d = s;
3280af22
NIS
3303 PL_tokenbuf[0] = '\0';
3304 if (d < PL_bufend && *d == '-') {
3305 PL_tokenbuf[0] = '-';
44a8e56a 3306 d++;
bf4acbe4 3307 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3308 d++;
3309 }
7e2040f0 3310 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3311 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3312 FALSE, &len);
bf4acbe4 3313 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3314 d++;
3315 if (*d == '}') {
3280af22 3316 char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3317 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3318 if (minus)
3319 force_next('-');
748a9306
LW
3320 }
3321 }
3322 /* FALL THROUGH */
09bef843 3323 case XATTRBLOCK:
748a9306 3324 case XBLOCK:
3280af22
NIS
3325 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3326 PL_expect = XSTATE;
a0d0e21e 3327 break;
09bef843 3328 case XATTRTERM:
a0d0e21e 3329 case XTERMBLOCK:
3280af22
NIS
3330 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3331 PL_expect = XSTATE;
a0d0e21e
LW
3332 break;
3333 default: {
3334 char *t;
3280af22
NIS
3335 if (PL_oldoldbufptr == PL_last_lop)
3336 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3337 else
3280af22 3338 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3339 s = skipspace(s);
8452ff4b
SB
3340 if (*s == '}') {
3341 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3342 PL_expect = XTERM;
3343 /* This hack is to get the ${} in the message. */
3344 PL_bufptr = s+1;
3345 yyerror("syntax error");
3346 break;
3347 }
a0d0e21e 3348 OPERATOR(HASHBRACK);
8452ff4b 3349 }
b8a4b1be
GS
3350 /* This hack serves to disambiguate a pair of curlies
3351 * as being a block or an anon hash. Normally, expectation
3352 * determines that, but in cases where we're not in a
3353 * position to expect anything in particular (like inside
3354 * eval"") we have to resolve the ambiguity. This code
3355 * covers the case where the first term in the curlies is a
3356 * quoted string. Most other cases need to be explicitly
3357 * disambiguated by prepending a `+' before the opening
3358 * curly in order to force resolution as an anon hash.
3359 *
3360 * XXX should probably propagate the outer expectation
3361 * into eval"" to rely less on this hack, but that could
3362 * potentially break current behavior of eval"".
3363 * GSAR 97-07-21
3364 */
3365 t = s;
3366 if (*s == '\'' || *s == '"' || *s == '`') {
3367 /* common case: get past first string, handling escapes */
3280af22 3368 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3369 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3370 t++;
3371 t++;
a0d0e21e 3372 }
b8a4b1be 3373 else if (*s == 'q') {
3280af22 3374 if (++t < PL_bufend
b8a4b1be 3375 && (!isALNUM(*t)
3280af22 3376 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3377 && !isALNUM(*t))))
3378 {
abc667d1 3379 /* skip q//-like construct */
b8a4b1be
GS
3380 char *tmps;
3381 char open, close, term;
3382 I32 brackets = 1;
3383
3280af22 3384 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3385 t++;
abc667d1
DM
3386 /* check for q => */
3387 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3388 OPERATOR(HASHBRACK);
3389 }
b8a4b1be
GS
3390 term = *t;
3391 open = term;
3392 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
3393 term = tmps[5];
3394 close = term;
3395 if (open == close)
3280af22
NIS
3396 for (t++; t < PL_bufend; t++) {
3397 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 3398 t++;
6d07e5e9 3399 else if (*t == open)
b8a4b1be
GS
3400 break;
3401 }
abc667d1 3402 else {
3280af22
NIS
3403 for (t++; t < PL_bufend; t++) {
3404 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 3405 t++;
6d07e5e9 3406 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
3407 break;
3408 else if (*t == open)
3409 brackets++;
3410 }
abc667d1
DM
3411 }
3412 t++;
b8a4b1be 3413 }
abc667d1
DM
3414 else
3415 /* skip plain q word */
3416 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
3417 t += UTF8SKIP(t);
a0d0e21e 3418 }
7e2040f0 3419 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 3420 t += UTF8SKIP(t);
7e2040f0 3421 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 3422 t += UTF8SKIP(t);
a0d0e21e 3423 }
3280af22 3424 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 3425 t++;
b8a4b1be
GS
3426 /* if comma follows first term, call it an anon hash */
3427 /* XXX it could be a comma expression with loop modifiers */
3280af22 3428 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 3429 || (*t == '=' && t[1] == '>')))
a0d0e21e 3430 OPERATOR(HASHBRACK);
3280af22 3431 if (PL_expect == XREF)
4e4e412b 3432 PL_expect = XTERM;
a0d0e21e 3433 else {
3280af22
NIS
3434 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
3435 PL_expect = XSTATE;
a0d0e21e 3436 }
8990e307 3437 }
a0d0e21e 3438 break;
463ee0b2 3439 }
57843af0 3440 yylval.ival = CopLINE(PL_curcop);
79072805 3441 if (isSPACE(*s) || *s == '#')
3280af22 3442 PL_copline = NOLINE; /* invalidate current command line number */
79072805 3443 TOKEN('{');
378cc40b 3444 case '}':
79072805
LW
3445 rightbracket:
3446 s++;
3280af22 3447 if (PL_lex_brackets <= 0)
d98d5fff 3448 yyerror("Unmatched right curly bracket");
463ee0b2 3449 else
3280af22 3450 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 3451 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
3452 PL_lex_formbrack = 0;
3453 if (PL_lex_state == LEX_INTERPNORMAL) {
3454 if (PL_lex_brackets == 0) {
9059aa12
LW
3455 if (PL_expect & XFAKEBRACK) {
3456 PL_expect &= XENUMMASK;
3280af22
NIS
3457 PL_lex_state = LEX_INTERPEND;
3458 PL_bufptr = s;
cea2e8a9 3459 return yylex(); /* ignore fake brackets */
79072805 3460 }
fa83b5b6 3461 if (*s == '-' && s[1] == '>')
3280af22 3462 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 3463 else if (*s != '[' && *s != '{')
3280af22 3464 PL_lex_state = LEX_INTERPEND;
79072805
LW
3465 }
3466 }
9059aa12
LW
3467 if (PL_expect & XFAKEBRACK) {
3468 PL_expect &= XENUMMASK;
3280af22 3469 PL_bufptr = s;
cea2e8a9 3470 return yylex(); /* ignore fake brackets */
748a9306 3471 }
79072805
LW
3472 force_next('}');
3473 TOKEN(';');
378cc40b
LW
3474 case '&':
3475 s++;
3476 tmp = *s++;
3477 if (tmp == '&')
a0d0e21e 3478 AOPERATOR(ANDAND);
378cc40b 3479 s--;
3280af22 3480 if (PL_expect == XOPERATOR) {
7e2040f0
GS
3481 if (ckWARN(WARN_SEMICOLON)
3482 && isIDFIRST_lazy_if(s,UTF) && PL_bufptr == PL_linestart)
3483 {
57843af0 3484 CopLINE_dec(PL_curcop);
9014280d 3485 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), PL_warn_nosemi);
57843af0 3486 CopLINE_inc(PL_curcop);
463ee0b2 3487 }
79072805 3488 BAop(OP_BIT_AND);
463ee0b2 3489 }
79072805 3490
3280af22
NIS
3491 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3492 if (*PL_tokenbuf) {
3493 PL_expect = XOPERATOR;
3494 force_ident(PL_tokenbuf, '&');
463ee0b2 3495 }
79072805
LW
3496 else
3497 PREREF('&');
c07a80fd 3498 yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
3499 TERM('&');
3500
378cc40b
LW
3501 case '|':
3502 s++;
3503 tmp = *s++;
3504 if (tmp == '|')
a0d0e21e 3505 AOPERATOR(OROR);
378cc40b 3506 s--;
79072805 3507 BOop(OP_BIT_OR);
378cc40b
LW
3508 case '=':
3509 s++;
3510 tmp = *s++;
3511 if (tmp == '=')
79072805
LW
3512 Eop(OP_EQ);
3513 if (tmp == '>')
3514 OPERATOR(',');
378cc40b 3515 if (tmp == '~')
79072805 3516 PMop(OP_MATCH);
599cee73 3517 if (ckWARN(WARN_SYNTAX) && tmp && isSPACE(*s) && strchr("+-*/%.^&|<",tmp))
9014280d 3518 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Reversed %c= operator",(int)tmp);
378cc40b 3519 s--;
3280af22
NIS
3520 if (PL_expect == XSTATE && isALPHA(tmp) &&
3521 (s == PL_linestart+1 || s[-2] == '\n') )
748a9306 3522 {
3280af22
NIS
3523 if (PL_in_eval && !PL_rsfp) {
3524 d = PL_bufend;
a5f75d66
AD
3525 while (s < d) {
3526 if (*s++ == '\n') {
3527 incline(s);
3528 if (strnEQ(s,"=cut",4)) {
3529 s = strchr(s,'\n');
3530 if (s)
3531 s++;
3532 else
3533 s = d;
3534 incline(s);
3535 goto retry;
3536 }
3537 }
3538 }
3539 goto retry;
3540 }
3280af22
NIS
3541 s = PL_bufend;
3542 PL_doextract = TRUE;