This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Remove stale date.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
9059aa12
LW
38#define XFAKEBRACK 128
39#define XENUMMASK 127
40
39e02b42
JH
41#ifdef USE_UTF8_SCRIPTS
42# define UTF (!IN_BYTES)
2b9d42f0 43#else
746b446a 44# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 45#endif
a0ed51b3 46
61f0cdd9 47/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
48 * 1999-02-27 mjd-perl-patch@plover.com */
49#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50
bf4acbe4
GS
51/* On MacOS, respect nonbreaking spaces */
52#ifdef MACOS_TRADITIONAL
53#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54#else
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
56#endif
57
ffb4593c
NT
58/* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
79072805
LW
60 * can get by with a single comparison (if the compiler is smart enough).
61 */
62
fb73857a
PP
63/* #define LEX_NOTPARSING 11 is done in perl.h. */
64
b6007c36
DM
65#define LEX_NORMAL 10 /* normal code (ie not within "...") */
66#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69#define LEX_INTERPSTART 6 /* expecting the start of a $var */
70
71 /* at end of code, eg "$x" followed by: */
72#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
74
75#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77#define LEX_INTERPCONST 2 /* NOT USED */
78#define LEX_FORMLINE 1 /* expecting a format line */
79#define LEX_KNOWNEXT 0 /* next token known; just return it */
80
79072805 81
bbf60fe6 82#ifdef DEBUGGING
27da23d5 83static const char* const lex_state_names[] = {
bbf60fe6
DM
84 "KNOWNEXT",
85 "FORMLINE",
86 "INTERPCONST",
87 "INTERPCONCAT",
88 "INTERPENDMAYBE",
89 "INTERPEND",
90 "INTERPSTART",
91 "INTERPPUSH",
92 "INTERPCASEMOD",
93 "INTERPNORMAL",
94 "NORMAL"
95};
96#endif
97
79072805
LW
98#ifdef ff_next
99#undef ff_next
d48672a2
LW
100#endif
101
79072805 102#include "keywords.h"
fe14fcc3 103
ffb4593c
NT
104/* CLINE is a macro that ensures PL_copline has a sane value */
105
ae986130
LW
106#ifdef CLINE
107#undef CLINE
108#endif
57843af0 109#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 110
ffb4593c
NT
111/*
112 * Convenience functions to return different tokens and prime the
9cbb5ea2 113 * lexer for the next token. They all take an argument.
ffb4593c
NT
114 *
115 * TOKEN : generic token (used for '(', DOLSHARP, etc)
116 * OPERATOR : generic operator
117 * AOPERATOR : assignment operator
118 * PREBLOCK : beginning the block after an if, while, foreach, ...
119 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
120 * PREREF : *EXPR where EXPR is not a simple identifier
121 * TERM : expression term
122 * LOOPX : loop exiting command (goto, last, dump, etc)
123 * FTST : file test operator
124 * FUN0 : zero-argument function
2d2e263d 125 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
126 * BOop : bitwise or or xor
127 * BAop : bitwise and
128 * SHop : shift operator
129 * PWop : power operator
9cbb5ea2 130 * PMop : pattern-matching operator
ffb4593c
NT
131 * Aop : addition-level operator
132 * Mop : multiplication-level operator
133 * Eop : equality-testing operator
e5edeb50 134 * Rop : relational operator <= != gt
ffb4593c
NT
135 *
136 * Also see LOP and lop() below.
137 */
138
998054bd 139#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 140# define REPORT(retval) tokereport((I32)retval)
998054bd 141#else
bbf60fe6 142# define REPORT(retval) (retval)
998054bd
SC
143#endif
144
bbf60fe6
DM
145#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
146#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
147#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
148#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
149#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
150#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
151#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
152#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
153#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
154#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
155#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
156#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
157#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
158#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
159#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
160#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
161#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
162#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
163#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
164#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 165
a687059c
LW
166/* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
a687059c 170 */
376fcdbf
AL
171#define UNI2(f,x) { \
172 yylval.ival = f; \
173 PL_expect = x; \
174 PL_bufptr = s; \
175 PL_last_uni = PL_oldbufptr; \
176 PL_last_lop_op = f; \
177 if (*s == '(') \
178 return REPORT( (int)FUNC1 ); \
179 s = skipspace(s); \
180 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
181 }
6f33ba73
RGS
182#define UNI(f) UNI2(f,XTERM)
183#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 184
376fcdbf
AL
185#define UNIBRACK(f) { \
186 yylval.ival = f; \
187 PL_bufptr = s; \
188 PL_last_uni = PL_oldbufptr; \
189 if (*s == '(') \
190 return REPORT( (int)FUNC1 ); \
191 s = skipspace(s); \
192 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
193 }
79072805 194
9f68db38 195/* grandfather return to old style */
3280af22 196#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 197
8fa7f367
JH
198#ifdef DEBUGGING
199
bbf60fe6
DM
200/* how to interpret the yylval associated with the token */
201enum token_type {
202 TOKENTYPE_NONE,
203 TOKENTYPE_IVAL,
204 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
205 TOKENTYPE_PVAL,
206 TOKENTYPE_OPVAL,
207 TOKENTYPE_GVVAL
208};
209
27da23d5
JH
210static struct debug_tokens { const int token, type; const char *name; }
211 const debug_tokens[] =
9041c2e3 212{
bbf60fe6
DM
213 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
214 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
215 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
216 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
217 { ARROW, TOKENTYPE_NONE, "ARROW" },
218 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
219 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
220 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
221 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
222 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 223 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
224 { DO, TOKENTYPE_NONE, "DO" },
225 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
226 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
227 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
228 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
229 { ELSE, TOKENTYPE_NONE, "ELSE" },
230 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
231 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
232 { FOR, TOKENTYPE_IVAL, "FOR" },
233 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
234 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
235 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
236 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
237 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
238 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 239 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
240 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
241 { IF, TOKENTYPE_IVAL, "IF" },
242 { LABEL, TOKENTYPE_PVAL, "LABEL" },
243 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
244 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
245 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
246 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
247 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
248 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
249 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
250 { MY, TOKENTYPE_IVAL, "MY" },
251 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
252 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
253 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
254 { OROP, TOKENTYPE_IVAL, "OROP" },
255 { OROR, TOKENTYPE_NONE, "OROR" },
256 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
257 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
258 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
259 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
260 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
261 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
262 { PREINC, TOKENTYPE_NONE, "PREINC" },
263 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
264 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
265 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
266 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
267 { SUB, TOKENTYPE_NONE, "SUB" },
268 { THING, TOKENTYPE_OPVAL, "THING" },
269 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
270 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
271 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
272 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
273 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
274 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 275 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
276 { WHILE, TOKENTYPE_IVAL, "WHILE" },
277 { WORD, TOKENTYPE_OPVAL, "WORD" },
278 { 0, TOKENTYPE_NONE, 0 }
279};
280
281/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 282
bbf60fe6 283STATIC int
f5bd084c 284S_tokereport(pTHX_ I32 rv)
bbf60fe6 285{
97aff369 286 dVAR;
bbf60fe6 287 if (DEBUG_T_TEST) {
bfed75c6 288 const char *name = Nullch;
bbf60fe6 289 enum token_type type = TOKENTYPE_NONE;
f54cb97a 290 const struct debug_tokens *p;
396482e1 291 SV* const report = newSVpvs("<== ");
bbf60fe6 292
f54cb97a 293 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
294 if (p->token == (int)rv) {
295 name = p->name;
296 type = p->type;
297 break;
298 }
299 }
300 if (name)
54667de8 301 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
302 else if ((char)rv > ' ' && (char)rv < '~')
303 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
304 else if (!rv)
396482e1 305 sv_catpvs(report, "EOF");
bbf60fe6
DM
306 else
307 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
308 switch (type) {
309 case TOKENTYPE_NONE:
310 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
311 break;
312 case TOKENTYPE_IVAL:
e4584336 313 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
314 break;
315 case TOKENTYPE_OPNUM:
316 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
317 PL_op_name[yylval.ival]);
318 break;
319 case TOKENTYPE_PVAL:
320 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
321 break;
322 case TOKENTYPE_OPVAL:
b6007c36 323 if (yylval.opval) {
401441c0 324 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 325 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
326 if (yylval.opval->op_type == OP_CONST) {
327 Perl_sv_catpvf(aTHX_ report, " %s",
328 SvPEEK(cSVOPx_sv(yylval.opval)));
329 }
330
331 }
401441c0 332 else
396482e1 333 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
334 break;
335 }
b6007c36 336 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
337 };
338 return (int)rv;
998054bd
SC
339}
340
b6007c36
DM
341
342/* print the buffer with suitable escapes */
343
344STATIC void
345S_printbuf(pTHX_ const char* fmt, const char* s)
346{
396482e1 347 SV* const tmp = newSVpvs("");
b6007c36
DM
348 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
349 SvREFCNT_dec(tmp);
350}
351
8fa7f367
JH
352#endif
353
ffb4593c
NT
354/*
355 * S_ao
356 *
c963b151
BD
357 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
358 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
359 */
360
76e3520e 361STATIC int
cea2e8a9 362S_ao(pTHX_ int toketype)
a0d0e21e 363{
97aff369 364 dVAR;
3280af22
NIS
365 if (*PL_bufptr == '=') {
366 PL_bufptr++;
a0d0e21e
LW
367 if (toketype == ANDAND)
368 yylval.ival = OP_ANDASSIGN;
369 else if (toketype == OROR)
370 yylval.ival = OP_ORASSIGN;
c963b151
BD
371 else if (toketype == DORDOR)
372 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
373 toketype = ASSIGNOP;
374 }
375 return toketype;
376}
377
ffb4593c
NT
378/*
379 * S_no_op
380 * When Perl expects an operator and finds something else, no_op
381 * prints the warning. It always prints "<something> found where
382 * operator expected. It prints "Missing semicolon on previous line?"
383 * if the surprise occurs at the start of the line. "do you need to
384 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
385 * where the compiler doesn't know if foo is a method call or a function.
386 * It prints "Missing operator before end of line" if there's nothing
387 * after the missing operator, or "... before <...>" if there is something
388 * after the missing operator.
389 */
390
76e3520e 391STATIC void
bfed75c6 392S_no_op(pTHX_ const char *what, char *s)
463ee0b2 393{
97aff369 394 dVAR;
9d4ba2ae
AL
395 char * const oldbp = PL_bufptr;
396 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 397
1189a94a
GS
398 if (!s)
399 s = oldbp;
07c798fb 400 else
1189a94a 401 PL_bufptr = s;
cea2e8a9 402 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
403 if (ckWARN_d(WARN_SYNTAX)) {
404 if (is_first)
405 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
406 "\t(Missing semicolon on previous line?)\n");
407 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 408 const char *t;
56da5a46
RGS
409 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
410 if (t < PL_bufptr && isSPACE(*t))
411 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
412 "\t(Do you need to predeclare %.*s?)\n",
551405c4 413 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
414 }
415 else {
416 assert(s >= oldbp);
417 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 418 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 419 }
07c798fb 420 }
3280af22 421 PL_bufptr = oldbp;
8990e307
LW
422}
423
ffb4593c
NT
424/*
425 * S_missingterm
426 * Complain about missing quote/regexp/heredoc terminator.
427 * If it's called with (char *)NULL then it cauterizes the line buffer.
428 * If we're in a delimited string and the delimiter is a control
429 * character, it's reformatted into a two-char sequence like ^C.
430 * This is fatal.
431 */
432
76e3520e 433STATIC void
cea2e8a9 434S_missingterm(pTHX_ char *s)
8990e307 435{
97aff369 436 dVAR;
8990e307
LW
437 char tmpbuf[3];
438 char q;
439 if (s) {
9d4ba2ae 440 char * const nl = strrchr(s,'\n');
d2719217 441 if (nl)
8990e307
LW
442 *nl = '\0';
443 }
9d116dd7
JH
444 else if (
445#ifdef EBCDIC
446 iscntrl(PL_multi_close)
447#else
448 PL_multi_close < 32 || PL_multi_close == 127
449#endif
450 ) {
8990e307 451 *tmpbuf = '^';
585ec06d 452 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
453 tmpbuf[2] = '\0';
454 s = tmpbuf;
455 }
456 else {
eb160463 457 *tmpbuf = (char)PL_multi_close;
8990e307
LW
458 tmpbuf[1] = '\0';
459 s = tmpbuf;
460 }
461 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 462 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 463}
79072805 464
ef89dcc3 465#define FEATURE_IS_ENABLED(name) \
0d863452 466 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 467 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
468/*
469 * S_feature_is_enabled
470 * Check whether the named feature is enabled.
471 */
472STATIC bool
473S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
474{
97aff369 475 dVAR;
0d863452 476 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140
RH
477 char he_name[32] = "feature_";
478 (void) strncpy(&he_name[8], name, 24);
479
480 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
481}
482
ffb4593c
NT
483/*
484 * Perl_deprecate
ffb4593c
NT
485 */
486
79072805 487void
bfed75c6 488Perl_deprecate(pTHX_ const char *s)
a0d0e21e 489{
599cee73 490 if (ckWARN(WARN_DEPRECATED))
9014280d 491 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
492}
493
12bcd1a6 494void
bfed75c6 495Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
496{
497 /* This function should NOT be called for any new deprecated warnings */
498 /* Use Perl_deprecate instead */
499 /* */
500 /* It is here to maintain backward compatibility with the pre-5.8 */
501 /* warnings category hierarchy. The "deprecated" category used to */
502 /* live under the "syntax" category. It is now a top-level category */
503 /* in its own right. */
504
505 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 506 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
507 "Use of %s is deprecated", s);
508}
509
ffb4593c 510/*
9cbb5ea2
GS
511 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
512 * utf16-to-utf8-reversed.
ffb4593c
NT
513 */
514
c39cd008
GS
515#ifdef PERL_CR_FILTER
516static void
517strip_return(SV *sv)
518{
95a20fc0 519 register const char *s = SvPVX_const(sv);
9d4ba2ae 520 register const char * const e = s + SvCUR(sv);
c39cd008
GS
521 /* outer loop optimized to do nothing if there are no CR-LFs */
522 while (s < e) {
523 if (*s++ == '\r' && *s == '\n') {
524 /* hit a CR-LF, need to copy the rest */
525 register char *d = s - 1;
526 *d++ = *s++;
527 while (s < e) {
528 if (*s == '\r' && s[1] == '\n')
529 s++;
530 *d++ = *s++;
531 }
532 SvCUR(sv) -= s - d;
533 return;
534 }
535 }
536}
a868473f 537
76e3520e 538STATIC I32
c39cd008 539S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 540{
f54cb97a 541 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
542 if (count > 0 && !maxlen)
543 strip_return(sv);
544 return count;
a868473f
NIS
545}
546#endif
547
ffb4593c
NT
548/*
549 * Perl_lex_start
9cbb5ea2
GS
550 * Initialize variables. Uses the Perl save_stack to save its state (for
551 * recursive calls to the parser).
ffb4593c
NT
552 */
553
a0d0e21e 554void
864dbfa3 555Perl_lex_start(pTHX_ SV *line)
79072805 556{
97aff369 557 dVAR;
cfd0369c 558 const char *s;
8990e307
LW
559 STRLEN len;
560
3280af22
NIS
561 SAVEI32(PL_lex_dojoin);
562 SAVEI32(PL_lex_brackets);
3280af22
NIS
563 SAVEI32(PL_lex_casemods);
564 SAVEI32(PL_lex_starts);
565 SAVEI32(PL_lex_state);
7766f137 566 SAVEVPTR(PL_lex_inpat);
3280af22 567 SAVEI32(PL_lex_inwhat);
18b09519
GS
568 if (PL_lex_state == LEX_KNOWNEXT) {
569 I32 toke = PL_nexttoke;
570 while (--toke >= 0) {
571 SAVEI32(PL_nexttype[toke]);
572 SAVEVPTR(PL_nextval[toke]);
573 }
574 SAVEI32(PL_nexttoke);
18b09519 575 }
57843af0 576 SAVECOPLINE(PL_curcop);
3280af22
NIS
577 SAVEPPTR(PL_bufptr);
578 SAVEPPTR(PL_bufend);
579 SAVEPPTR(PL_oldbufptr);
580 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
581 SAVEPPTR(PL_last_lop);
582 SAVEPPTR(PL_last_uni);
3280af22
NIS
583 SAVEPPTR(PL_linestart);
584 SAVESPTR(PL_linestr);
8edd5f42
RGS
585 SAVEGENERICPV(PL_lex_brackstack);
586 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 587 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
588 SAVESPTR(PL_lex_stuff);
589 SAVEI32(PL_lex_defer);
09bef843 590 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 591 SAVESPTR(PL_lex_repl);
bebdddfc
GS
592 SAVEINT(PL_expect);
593 SAVEINT(PL_lex_expect);
3280af22
NIS
594
595 PL_lex_state = LEX_NORMAL;
596 PL_lex_defer = 0;
597 PL_expect = XSTATE;
598 PL_lex_brackets = 0;
a02a5408
JC
599 Newx(PL_lex_brackstack, 120, char);
600 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
601 PL_lex_casemods = 0;
602 *PL_lex_casestack = '\0';
603 PL_lex_dojoin = 0;
604 PL_lex_starts = 0;
605 PL_lex_stuff = Nullsv;
606 PL_lex_repl = Nullsv;
607 PL_lex_inpat = 0;
76be56bc 608 PL_nexttoke = 0;
3280af22 609 PL_lex_inwhat = 0;
09bef843 610 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
611 PL_linestr = line;
612 if (SvREADONLY(PL_linestr))
613 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 614 s = SvPV_const(PL_linestr, len);
6f27f9a7 615 if (!len || s[len-1] != ';') {
3280af22
NIS
616 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
617 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 618 sv_catpvs(PL_linestr, "\n;");
8990e307 619 }
3280af22
NIS
620 SvTEMP_off(PL_linestr);
621 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
622 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 623 PL_last_lop = PL_last_uni = Nullch;
3280af22 624 PL_rsfp = 0;
79072805 625}
a687059c 626
ffb4593c
NT
627/*
628 * Perl_lex_end
9cbb5ea2
GS
629 * Finalizer for lexing operations. Must be called when the parser is
630 * done with the lexer.
ffb4593c
NT
631 */
632
463ee0b2 633void
864dbfa3 634Perl_lex_end(pTHX)
463ee0b2 635{
97aff369 636 dVAR;
3280af22 637 PL_doextract = FALSE;
463ee0b2
LW
638}
639
ffb4593c
NT
640/*
641 * S_incline
642 * This subroutine has nothing to do with tilting, whether at windmills
643 * or pinball tables. Its name is short for "increment line". It
57843af0 644 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 645 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
646 * # line 500 "foo.pm"
647 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
648 */
649
76e3520e 650STATIC void
cea2e8a9 651S_incline(pTHX_ char *s)
463ee0b2 652{
97aff369 653 dVAR;
463ee0b2
LW
654 char *t;
655 char *n;
73659bf1 656 char *e;
463ee0b2 657 char ch;
463ee0b2 658
57843af0 659 CopLINE_inc(PL_curcop);
463ee0b2
LW
660 if (*s++ != '#')
661 return;
bf4acbe4 662 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
663 if (strnEQ(s, "line", 4))
664 s += 4;
665 else
666 return;
084592ab 667 if (SPACE_OR_TAB(*s))
73659bf1 668 s++;
4e553d73 669 else
73659bf1 670 return;
bf4acbe4 671 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
672 if (!isDIGIT(*s))
673 return;
674 n = s;
675 while (isDIGIT(*s))
676 s++;
bf4acbe4 677 while (SPACE_OR_TAB(*s))
463ee0b2 678 s++;
73659bf1 679 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 680 s++;
73659bf1
GS
681 e = t + 1;
682 }
463ee0b2 683 else {
463ee0b2 684 for (t = s; !isSPACE(*t); t++) ;
73659bf1 685 e = t;
463ee0b2 686 }
bf4acbe4 687 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
688 e++;
689 if (*e != '\n' && *e != '\0')
690 return; /* false alarm */
691
463ee0b2
LW
692 ch = *t;
693 *t = '\0';
f4dd75d9 694 if (t - s > 0) {
8a5ee598 695#ifndef USE_ITHREADS
c4420975 696 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
697 STRLEN tmplen = cf ? strlen(cf) : 0;
698 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
699 /* must copy *{"::_<(eval N)[oldfilename:L]"}
700 * to *{"::_<newfilename"} */
701 char smallbuf[256], smallbuf2[256];
702 char *tmpbuf, *tmpbuf2;
8a5ee598 703 GV **gvp, *gv2;
e66cf94c
RGS
704 STRLEN tmplen2 = strlen(s);
705 if (tmplen + 3 < sizeof smallbuf)
706 tmpbuf = smallbuf;
707 else
708 Newx(tmpbuf, tmplen + 3, char);
709 if (tmplen2 + 3 < sizeof smallbuf2)
710 tmpbuf2 = smallbuf2;
711 else
712 Newx(tmpbuf2, tmplen2 + 3, char);
713 tmpbuf[0] = tmpbuf2[0] = '_';
714 tmpbuf[1] = tmpbuf2[1] = '<';
715 memcpy(tmpbuf + 2, cf, ++tmplen);
716 memcpy(tmpbuf2 + 2, s, ++tmplen2);
717 ++tmplen; ++tmplen2;
8a5ee598
RGS
718 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
719 if (gvp) {
720 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
721 if (!isGV(gv2))
722 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
723 /* adjust ${"::_<newfilename"} to store the new file name */
724 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
725 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
726 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
727 }
e66cf94c
RGS
728 if (tmpbuf != smallbuf) Safefree(tmpbuf);
729 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
730 }
8a5ee598 731#endif
05ec9bb3 732 CopFILE_free(PL_curcop);
57843af0 733 CopFILE_set(PL_curcop, s);
f4dd75d9 734 }
463ee0b2 735 *t = ch;
57843af0 736 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
737}
738
ffb4593c
NT
739/*
740 * S_skipspace
741 * Called to gobble the appropriate amount and type of whitespace.
742 * Skips comments as well.
743 */
744
76e3520e 745STATIC char *
cea2e8a9 746S_skipspace(pTHX_ register char *s)
a687059c 747{
97aff369 748 dVAR;
3280af22 749 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 750 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
751 s++;
752 return s;
753 }
754 for (;;) {
fd049845 755 STRLEN prevlen;
09bef843 756 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 757 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
758 while (s < PL_bufend && isSPACE(*s)) {
759 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
760 incline(s);
761 }
ffb4593c
NT
762
763 /* comment */
3280af22
NIS
764 if (s < PL_bufend && *s == '#') {
765 while (s < PL_bufend && *s != '\n')
463ee0b2 766 s++;
60e6418e 767 if (s < PL_bufend) {
463ee0b2 768 s++;
60e6418e
GS
769 if (PL_in_eval && !PL_rsfp) {
770 incline(s);
771 continue;
772 }
773 }
463ee0b2 774 }
ffb4593c
NT
775
776 /* only continue to recharge the buffer if we're at the end
777 * of the buffer, we're not reading from a source filter, and
778 * we're in normal lexing mode
779 */
09bef843
SB
780 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
781 PL_lex_state == LEX_FORMLINE)
463ee0b2 782 return s;
ffb4593c
NT
783
784 /* try to recharge the buffer */
9cbb5ea2
GS
785 if ((s = filter_gets(PL_linestr, PL_rsfp,
786 (prevlen = SvCUR(PL_linestr)))) == Nullch)
787 {
788 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
789 if (PL_minus_p) {
790 sv_setpv(PL_linestr,
791 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 792 PL_minus_n = PL_minus_p = 0;
a0d0e21e 793 }
01a19ab0
NC
794 else if (PL_minus_n) {
795 sv_setpvn(PL_linestr, ";}", 2);
796 PL_minus_n = 0;
797 }
a0d0e21e 798 else
4147a61b 799 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
800
801 /* reset variables for next time we lex */
9cbb5ea2
GS
802 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
803 = SvPVX(PL_linestr);
3280af22 804 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 805 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
806
807 /* Close the filehandle. Could be from -P preprocessor,
808 * STDIN, or a regular file. If we were reading code from
809 * STDIN (because the commandline held no -e or filename)
810 * then we don't close it, we reset it so the code can
811 * read from STDIN too.
812 */
813
3280af22
NIS
814 if (PL_preprocess && !PL_in_eval)
815 (void)PerlProc_pclose(PL_rsfp);
816 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
817 PerlIO_clearerr(PL_rsfp);
8990e307 818 else
3280af22
NIS
819 (void)PerlIO_close(PL_rsfp);
820 PL_rsfp = Nullfp;
463ee0b2
LW
821 return s;
822 }
ffb4593c
NT
823
824 /* not at end of file, so we only read another line */
09bef843
SB
825 /* make corresponding updates to old pointers, for yyerror() */
826 oldprevlen = PL_oldbufptr - PL_bufend;
827 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
828 if (PL_last_uni)
829 oldunilen = PL_last_uni - PL_bufend;
830 if (PL_last_lop)
831 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
832 PL_linestart = PL_bufptr = s + prevlen;
833 PL_bufend = s + SvCUR(PL_linestr);
834 s = PL_bufptr;
09bef843
SB
835 PL_oldbufptr = s + oldprevlen;
836 PL_oldoldbufptr = s + oldoldprevlen;
837 if (PL_last_uni)
838 PL_last_uni = s + oldunilen;
839 if (PL_last_lop)
840 PL_last_lop = s + oldloplen;
a0d0e21e 841 incline(s);
ffb4593c
NT
842
843 /* debugger active and we're not compiling the debugger code,
844 * so store the line into the debugger's array of lines
845 */
3280af22 846 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 847 SV * const sv = NEWSV(85,0);
8990e307
LW
848
849 sv_upgrade(sv, SVt_PVMG);
3280af22 850 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 851 (void)SvIOK_on(sv);
45977657 852 SvIV_set(sv, 0);
36c7798d 853 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 854 }
463ee0b2 855 }
a687059c 856}
378cc40b 857
ffb4593c
NT
858/*
859 * S_check_uni
860 * Check the unary operators to ensure there's no ambiguity in how they're
861 * used. An ambiguous piece of code would be:
862 * rand + 5
863 * This doesn't mean rand() + 5. Because rand() is a unary operator,
864 * the +5 is its argument.
865 */
866
76e3520e 867STATIC void
cea2e8a9 868S_check_uni(pTHX)
ba106d47 869{
97aff369 870 dVAR;
2f3197b3 871 char *s;
a0d0e21e 872 char *t;
2f3197b3 873
3280af22 874 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 875 return;
3280af22
NIS
876 while (isSPACE(*PL_last_uni))
877 PL_last_uni++;
7e2040f0 878 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 879 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 880 return;
0453d815 881 if (ckWARN_d(WARN_AMBIGUOUS)){
9d4ba2ae 882 const char ch = *s;
0453d815 883 *s = '\0';
9014280d 884 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 885 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
886 PL_last_uni);
887 *s = ch;
888 }
2f3197b3
LW
889}
890
ffb4593c
NT
891/*
892 * LOP : macro to build a list operator. Its behaviour has been replaced
893 * with a subroutine, S_lop() for which LOP is just another name.
894 */
895
a0d0e21e
LW
896#define LOP(f,x) return lop(f,x,s)
897
ffb4593c
NT
898/*
899 * S_lop
900 * Build a list operator (or something that might be one). The rules:
901 * - if we have a next token, then it's a list operator [why?]
902 * - if the next thing is an opening paren, then it's a function
903 * - else it's a list operator
904 */
905
76e3520e 906STATIC I32
a0be28da 907S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 908{
97aff369 909 dVAR;
79072805 910 yylval.ival = f;
35c8bce7 911 CLINE;
3280af22
NIS
912 PL_expect = x;
913 PL_bufptr = s;
914 PL_last_lop = PL_oldbufptr;
eb160463 915 PL_last_lop_op = (OPCODE)f;
3280af22 916 if (PL_nexttoke)
bbf60fe6 917 return REPORT(LSTOP);
79072805 918 if (*s == '(')
bbf60fe6 919 return REPORT(FUNC);
79072805
LW
920 s = skipspace(s);
921 if (*s == '(')
bbf60fe6 922 return REPORT(FUNC);
79072805 923 else
bbf60fe6 924 return REPORT(LSTOP);
79072805
LW
925}
926
ffb4593c
NT
927/*
928 * S_force_next
9cbb5ea2 929 * When the lexer realizes it knows the next token (for instance,
ffb4593c 930 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
931 * to know what token to return the next time the lexer is called. Caller
932 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
933 * handles the token correctly.
ffb4593c
NT
934 */
935
4e553d73 936STATIC void
cea2e8a9 937S_force_next(pTHX_ I32 type)
79072805 938{
97aff369 939 dVAR;
3280af22
NIS
940 PL_nexttype[PL_nexttoke] = type;
941 PL_nexttoke++;
942 if (PL_lex_state != LEX_KNOWNEXT) {
943 PL_lex_defer = PL_lex_state;
944 PL_lex_expect = PL_expect;
945 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
946 }
947}
948
d0a148a6
NC
949STATIC SV *
950S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
951{
97aff369 952 dVAR;
9d4ba2ae 953 SV * const sv = newSVpvn(start,len);
bfed75c6 954 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
955 SvUTF8_on(sv);
956 return sv;
957}
958
ffb4593c
NT
959/*
960 * S_force_word
961 * When the lexer knows the next thing is a word (for instance, it has
962 * just seen -> and it knows that the next char is a word char, then
963 * it calls S_force_word to stick the next word into the PL_next lookahead.
964 *
965 * Arguments:
b1b65b59 966 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
967 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
968 * int check_keyword : if true, Perl checks to make sure the word isn't
969 * a keyword (do this if the word is a label, e.g. goto FOO)
970 * int allow_pack : if true, : characters will also be allowed (require,
971 * use, etc. do this)
9cbb5ea2 972 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
973 */
974
76e3520e 975STATIC char *
cea2e8a9 976S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 977{
97aff369 978 dVAR;
463ee0b2
LW
979 register char *s;
980 STRLEN len;
4e553d73 981
463ee0b2
LW
982 start = skipspace(start);
983 s = start;
7e2040f0 984 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 985 (allow_pack && *s == ':') ||
15f0808c 986 (allow_initial_tick && *s == '\'') )
a0d0e21e 987 {
3280af22
NIS
988 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
989 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
990 return start;
991 if (token == METHOD) {
992 s = skipspace(s);
993 if (*s == '(')
3280af22 994 PL_expect = XTERM;
463ee0b2 995 else {
3280af22 996 PL_expect = XOPERATOR;
463ee0b2 997 }
79072805 998 }
d0a148a6
NC
999 PL_nextval[PL_nexttoke].opval
1000 = (OP*)newSVOP(OP_CONST,0,
1001 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 1002 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
1003 force_next(token);
1004 }
1005 return s;
1006}
1007
ffb4593c
NT
1008/*
1009 * S_force_ident
9cbb5ea2 1010 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1011 * text only contains the "foo" portion. The first argument is a pointer
1012 * to the "foo", and the second argument is the type symbol to prefix.
1013 * Forces the next token to be a "WORD".
9cbb5ea2 1014 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1015 */
1016
76e3520e 1017STATIC void
bfed75c6 1018S_force_ident(pTHX_ register const char *s, int kind)
79072805 1019{
97aff369 1020 dVAR;
79072805 1021 if (s && *s) {
bfed75c6 1022 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpv(s,0));
3280af22 1023 PL_nextval[PL_nexttoke].opval = o;
79072805 1024 force_next(WORD);
748a9306 1025 if (kind) {
11343788 1026 o->op_private = OPpCONST_ENTERED;
55497cff
PP
1027 /* XXX see note in pp_entereval() for why we forgo typo
1028 warnings if the symbol must be introduced in an eval.
1029 GSAR 96-10-12 */
f776e3cd 1030 gv_fetchpv(s, PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
a0d0e21e
LW
1031 kind == '$' ? SVt_PV :
1032 kind == '@' ? SVt_PVAV :
1033 kind == '%' ? SVt_PVHV :
1034 SVt_PVGV
1035 );
748a9306 1036 }
79072805
LW
1037 }
1038}
1039
1571675a
GS
1040NV
1041Perl_str_to_version(pTHX_ SV *sv)
1042{
1043 NV retval = 0.0;
1044 NV nshift = 1.0;
1045 STRLEN len;
cfd0369c 1046 const char *start = SvPV_const(sv,len);
9d4ba2ae 1047 const char * const end = start + len;
504618e9 1048 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1049 while (start < end) {
ba210ebe 1050 STRLEN skip;
1571675a
GS
1051 UV n;
1052 if (utf)
9041c2e3 1053 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1054 else {
1055 n = *(U8*)start;
1056 skip = 1;
1057 }
1058 retval += ((NV)n)/nshift;
1059 start += skip;
1060 nshift *= 1000;
1061 }
1062 return retval;
1063}
1064
4e553d73 1065/*
ffb4593c
NT
1066 * S_force_version
1067 * Forces the next token to be a version number.
e759cc13
RGS
1068 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1069 * and if "guessing" is TRUE, then no new token is created (and the caller
1070 * must use an alternative parsing method).
ffb4593c
NT
1071 */
1072
76e3520e 1073STATIC char *
e759cc13 1074S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1075{
97aff369 1076 dVAR;
89bfa8cd 1077 OP *version = Nullop;
44dcb63b 1078 char *d;
89bfa8cd
PP
1079
1080 s = skipspace(s);
1081
44dcb63b 1082 d = s;
dd629d5b 1083 if (*d == 'v')
44dcb63b 1084 d++;
44dcb63b 1085 if (isDIGIT(*d)) {
e759cc13
RGS
1086 while (isDIGIT(*d) || *d == '_' || *d == '.')
1087 d++;
9f3d182e 1088 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1089 SV *ver;
b73d6f50 1090 s = scan_num(s, &yylval);
89bfa8cd 1091 version = yylval.opval;
dd629d5b
GS
1092 ver = cSVOPx(version)->op_sv;
1093 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1094 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1095 SvNV_set(ver, str_to_version(ver));
1571675a 1096 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1097 }
89bfa8cd 1098 }
e759cc13
RGS
1099 else if (guessing)
1100 return s;
89bfa8cd
PP
1101 }
1102
1103 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1104 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1105 force_next(WORD);
89bfa8cd 1106
e759cc13 1107 return s;
89bfa8cd
PP
1108}
1109
ffb4593c
NT
1110/*
1111 * S_tokeq
1112 * Tokenize a quoted string passed in as an SV. It finds the next
1113 * chunk, up to end of string or a backslash. It may make a new
1114 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1115 * turns \\ into \.
1116 */
1117
76e3520e 1118STATIC SV *
cea2e8a9 1119S_tokeq(pTHX_ SV *sv)
79072805 1120{
97aff369 1121 dVAR;
79072805
LW
1122 register char *s;
1123 register char *send;
1124 register char *d;
b3ac6de7
IZ
1125 STRLEN len = 0;
1126 SV *pv = sv;
79072805
LW
1127
1128 if (!SvLEN(sv))
b3ac6de7 1129 goto finish;
79072805 1130
a0d0e21e 1131 s = SvPV_force(sv, len);
21a311ee 1132 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1133 goto finish;
463ee0b2 1134 send = s + len;
79072805
LW
1135 while (s < send && *s != '\\')
1136 s++;
1137 if (s == send)
b3ac6de7 1138 goto finish;
79072805 1139 d = s;
be4731d2 1140 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1141 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1142 if (SvUTF8(sv))
1143 SvUTF8_on(pv);
1144 }
79072805
LW
1145 while (s < send) {
1146 if (*s == '\\') {
a0d0e21e 1147 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1148 s++; /* all that, just for this */
1149 }
1150 *d++ = *s++;
1151 }
1152 *d = '\0';
95a20fc0 1153 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1154 finish:
3280af22 1155 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1156 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1157 return sv;
1158}
1159
ffb4593c
NT
1160/*
1161 * Now come three functions related to double-quote context,
1162 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1163 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1164 * interact with PL_lex_state, and create fake ( ... ) argument lists
1165 * to handle functions and concatenation.
1166 * They assume that whoever calls them will be setting up a fake
1167 * join call, because each subthing puts a ',' after it. This lets
1168 * "lower \luPpEr"
1169 * become
1170 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1171 *
1172 * (I'm not sure whether the spurious commas at the end of lcfirst's
1173 * arguments and join's arguments are created or not).
1174 */
1175
1176/*
1177 * S_sublex_start
1178 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1179 *
1180 * Pattern matching will set PL_lex_op to the pattern-matching op to
1181 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1182 *
1183 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1184 *
1185 * Everything else becomes a FUNC.
1186 *
1187 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1188 * had an OP_CONST or OP_READLINE). This just sets us up for a
1189 * call to S_sublex_push().
1190 */
1191
76e3520e 1192STATIC I32
cea2e8a9 1193S_sublex_start(pTHX)
79072805 1194{
97aff369 1195 dVAR;
0d46e09a 1196 register const I32 op_type = yylval.ival;
79072805
LW
1197
1198 if (op_type == OP_NULL) {
3280af22
NIS
1199 yylval.opval = PL_lex_op;
1200 PL_lex_op = Nullop;
79072805
LW
1201 return THING;
1202 }
1203 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1204 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1205
1206 if (SvTYPE(sv) == SVt_PVIV) {
1207 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1208 STRLEN len;
cfd0369c 1209 const char *p = SvPV_const(sv, len);
f54cb97a 1210 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1211 if (SvUTF8(sv))
1212 SvUTF8_on(nsv);
b3ac6de7
IZ
1213 SvREFCNT_dec(sv);
1214 sv = nsv;
4e553d73 1215 }
b3ac6de7 1216 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1217 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1218 /* Allow <FH> // "foo" */
1219 if (op_type == OP_READLINE)
1220 PL_expect = XTERMORDORDOR;
79072805
LW
1221 return THING;
1222 }
1223
3280af22
NIS
1224 PL_sublex_info.super_state = PL_lex_state;
1225 PL_sublex_info.sub_inwhat = op_type;
1226 PL_sublex_info.sub_op = PL_lex_op;
1227 PL_lex_state = LEX_INTERPPUSH;
55497cff 1228
3280af22
NIS
1229 PL_expect = XTERM;
1230 if (PL_lex_op) {
1231 yylval.opval = PL_lex_op;
1232 PL_lex_op = Nullop;
55497cff
PP
1233 return PMFUNC;
1234 }
1235 else
1236 return FUNC;
1237}
1238
ffb4593c
NT
1239/*
1240 * S_sublex_push
1241 * Create a new scope to save the lexing state. The scope will be
1242 * ended in S_sublex_done. Returns a '(', starting the function arguments
1243 * to the uc, lc, etc. found before.
1244 * Sets PL_lex_state to LEX_INTERPCONCAT.
1245 */
1246
76e3520e 1247STATIC I32
cea2e8a9 1248S_sublex_push(pTHX)
55497cff 1249{
27da23d5 1250 dVAR;
f46d017c 1251 ENTER;
55497cff 1252
3280af22
NIS
1253 PL_lex_state = PL_sublex_info.super_state;
1254 SAVEI32(PL_lex_dojoin);
1255 SAVEI32(PL_lex_brackets);
3280af22
NIS
1256 SAVEI32(PL_lex_casemods);
1257 SAVEI32(PL_lex_starts);
1258 SAVEI32(PL_lex_state);
7766f137 1259 SAVEVPTR(PL_lex_inpat);
3280af22 1260 SAVEI32(PL_lex_inwhat);
57843af0 1261 SAVECOPLINE(PL_curcop);
3280af22 1262 SAVEPPTR(PL_bufptr);
8452ff4b 1263 SAVEPPTR(PL_bufend);
3280af22
NIS
1264 SAVEPPTR(PL_oldbufptr);
1265 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1266 SAVEPPTR(PL_last_lop);
1267 SAVEPPTR(PL_last_uni);
3280af22
NIS
1268 SAVEPPTR(PL_linestart);
1269 SAVESPTR(PL_linestr);
8edd5f42
RGS
1270 SAVEGENERICPV(PL_lex_brackstack);
1271 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1272
1273 PL_linestr = PL_lex_stuff;
1274 PL_lex_stuff = Nullsv;
1275
9cbb5ea2
GS
1276 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1277 = SvPVX(PL_linestr);
3280af22 1278 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1279 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1280 SAVEFREESV(PL_linestr);
1281
1282 PL_lex_dojoin = FALSE;
1283 PL_lex_brackets = 0;
a02a5408
JC
1284 Newx(PL_lex_brackstack, 120, char);
1285 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1286 PL_lex_casemods = 0;
1287 *PL_lex_casestack = '\0';
1288 PL_lex_starts = 0;
1289 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1290 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1291
1292 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1293 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1294 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1295 else
3280af22 1296 PL_lex_inpat = Nullop;
79072805 1297
55497cff 1298 return '(';
79072805
LW
1299}
1300
ffb4593c
NT
1301/*
1302 * S_sublex_done
1303 * Restores lexer state after a S_sublex_push.
1304 */
1305
76e3520e 1306STATIC I32
cea2e8a9 1307S_sublex_done(pTHX)
79072805 1308{
27da23d5 1309 dVAR;
3280af22 1310 if (!PL_lex_starts++) {
396482e1 1311 SV * const sv = newSVpvs("");
9aa983d2
JH
1312 if (SvUTF8(PL_linestr))
1313 SvUTF8_on(sv);
3280af22 1314 PL_expect = XOPERATOR;
9aa983d2 1315 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1316 return THING;
1317 }
1318
3280af22
NIS
1319 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1320 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1321 return yylex();
79072805
LW
1322 }
1323
ffb4593c 1324 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1325 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1326 PL_linestr = PL_lex_repl;
1327 PL_lex_inpat = 0;
1328 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1329 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1330 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1331 SAVEFREESV(PL_linestr);
1332 PL_lex_dojoin = FALSE;
1333 PL_lex_brackets = 0;
3280af22
NIS
1334 PL_lex_casemods = 0;
1335 *PL_lex_casestack = '\0';
1336 PL_lex_starts = 0;
25da4f38 1337 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1338 PL_lex_state = LEX_INTERPNORMAL;
1339 PL_lex_starts++;
e9fa98b2
HS
1340 /* we don't clear PL_lex_repl here, so that we can check later
1341 whether this is an evalled subst; that means we rely on the
1342 logic to ensure sublex_done() is called again only via the
1343 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1344 }
e9fa98b2 1345 else {
3280af22 1346 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1347 PL_lex_repl = Nullsv;
1348 }
79072805 1349 return ',';
ffed7fef
LW
1350 }
1351 else {
f46d017c 1352 LEAVE;
3280af22
NIS
1353 PL_bufend = SvPVX(PL_linestr);
1354 PL_bufend += SvCUR(PL_linestr);
1355 PL_expect = XOPERATOR;
09bef843 1356 PL_sublex_info.sub_inwhat = 0;
79072805 1357 return ')';
ffed7fef
LW
1358 }
1359}
1360
02aa26ce
NT
1361/*
1362 scan_const
1363
1364 Extracts a pattern, double-quoted string, or transliteration. This
1365 is terrifying code.
1366
3280af22
NIS
1367 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1368 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1369 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1370
9b599b2a
GS
1371 Returns a pointer to the character scanned up to. Iff this is
1372 advanced from the start pointer supplied (ie if anything was
1373 successfully parsed), will leave an OP for the substring scanned
1374 in yylval. Caller must intuit reason for not parsing further
1375 by looking at the next characters herself.
1376
02aa26ce
NT
1377 In patterns:
1378 backslashes:
1379 double-quoted style: \r and \n
1380 regexp special ones: \D \s
1381 constants: \x3
1382 backrefs: \1 (deprecated in substitution replacements)
1383 case and quoting: \U \Q \E
1384 stops on @ and $, but not for $ as tail anchor
1385
1386 In transliterations:
1387 characters are VERY literal, except for - not at the start or end
1388 of the string, which indicates a range. scan_const expands the
1389 range to the full set of intermediate characters.
1390
1391 In double-quoted strings:
1392 backslashes:
1393 double-quoted style: \r and \n
1394 constants: \x3
1395 backrefs: \1 (deprecated)
1396 case and quoting: \U \Q \E
1397 stops on @ and $
1398
1399 scan_const does *not* construct ops to handle interpolated strings.
1400 It stops processing as soon as it finds an embedded $ or @ variable
1401 and leaves it to the caller to work out what's going on.
1402
da6eedaa 1403 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1404
1405 $ in pattern could be $foo or could be tail anchor. Assumption:
1406 it's a tail anchor if $ is the last thing in the string, or if it's
1407 followed by one of ")| \n\t"
1408
1409 \1 (backreferences) are turned into $1
1410
1411 The structure of the code is
1412 while (there's a character to process) {
1413 handle transliteration ranges
1414 skip regexp comments
1415 skip # initiated comments in //x patterns
1416 check for embedded @foo
1417 check for embedded scalars
1418 if (backslash) {
1419 leave intact backslashes from leave (below)
1420 deprecate \1 in strings and sub replacements
1421 handle string-changing backslashes \l \U \Q \E, etc.
1422 switch (what was escaped) {
1423 handle - in a transliteration (becomes a literal -)
1424 handle \132 octal characters
1425 handle 0x15 hex characters
1426 handle \cV (control V)
1427 handle printf backslashes (\f, \r, \n, etc)
1428 } (end switch)
1429 } (end if backslash)
1430 } (end while character to read)
4e553d73 1431
02aa26ce
NT
1432*/
1433
76e3520e 1434STATIC char *
cea2e8a9 1435S_scan_const(pTHX_ char *start)
79072805 1436{
97aff369 1437 dVAR;
3280af22 1438 register char *send = PL_bufend; /* end of the constant */
02aa26ce
NT
1439 SV *sv = NEWSV(93, send - start); /* sv for the constant */
1440 register char *s = start; /* start of the constant */
1441 register char *d = SvPVX(sv); /* destination for copies */
1442 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1443 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1444 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1445 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1446 UV uv;
4c3a8340
ST
1447#ifdef EBCDIC
1448 UV literal_endpoint = 0;
1449#endif
012bcf8d 1450
dff6d3cd 1451 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1452 PL_lex_inpat
b6d5fef8 1453 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1454 : "";
79072805 1455
2b9d42f0
NIS
1456 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1457 /* If we are doing a trans and we know we want UTF8 set expectation */
1458 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1459 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1460 }
1461
1462
79072805 1463 while (s < send || dorange) {
02aa26ce 1464 /* get transliterations out of the way (they're most literal) */
3280af22 1465 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1466 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1467 if (dorange) {
1ba5c669
JH
1468 I32 i; /* current expanded character */
1469 I32 min; /* first character in range */
1470 I32 max; /* last character in range */
02aa26ce 1471
2b9d42f0 1472 if (has_utf8) {
9d4ba2ae 1473 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1474 char *e = d++;
1475 while (e-- > c)
1476 *(e + 1) = *e;
25716404 1477 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1478 /* mark the range as done, and continue */
1479 dorange = FALSE;
1480 didrange = TRUE;
1481 continue;
1482 }
2b9d42f0 1483
95a20fc0 1484 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1485 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1486 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1487 d -= 2; /* eat the first char and the - */
1488
8ada0baa
JH
1489 min = (U8)*d; /* first char in range */
1490 max = (U8)d[1]; /* last char in range */
1491
c2e66d9e 1492 if (min > max) {
01ec43d0 1493 Perl_croak(aTHX_
d1573ac7 1494 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1495 (char)min, (char)max);
c2e66d9e
GS
1496 }
1497
c7f1f016 1498#ifdef EBCDIC
4c3a8340
ST
1499 if (literal_endpoint == 2 &&
1500 ((isLOWER(min) && isLOWER(max)) ||
1501 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1502 if (isLOWER(min)) {
1503 for (i = min; i <= max; i++)
1504 if (isLOWER(i))
db42d148 1505 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1506 } else {
1507 for (i = min; i <= max; i++)
1508 if (isUPPER(i))
db42d148 1509 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1510 }
1511 }
1512 else
1513#endif
1514 for (i = min; i <= max; i++)
eb160463 1515 *d++ = (char)i;
02aa26ce
NT
1516
1517 /* mark the range as done, and continue */
79072805 1518 dorange = FALSE;
01ec43d0 1519 didrange = TRUE;
4c3a8340
ST
1520#ifdef EBCDIC
1521 literal_endpoint = 0;
1522#endif
79072805 1523 continue;
4e553d73 1524 }
02aa26ce
NT
1525
1526 /* range begins (ignore - as first or last char) */
79072805 1527 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1528 if (didrange) {
1fafa243 1529 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1530 }
2b9d42f0 1531 if (has_utf8) {
25716404 1532 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1533 s++;
1534 continue;
1535 }
79072805
LW
1536 dorange = TRUE;
1537 s++;
01ec43d0
GS
1538 }
1539 else {
1540 didrange = FALSE;
4c3a8340
ST
1541#ifdef EBCDIC
1542 literal_endpoint = 0;
1543#endif
01ec43d0 1544 }
79072805 1545 }
02aa26ce
NT
1546
1547 /* if we get here, we're not doing a transliteration */
1548
0f5d15d6
IZ
1549 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1550 except for the last char, which will be done separately. */
3280af22 1551 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1552 if (s[2] == '#') {
e994fd66 1553 while (s+1 < send && *s != ')')
db42d148 1554 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1555 }
1556 else if (s[2] == '{' /* This should match regcomp.c */
1557 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1558 {
cc6b7395 1559 I32 count = 1;
0f5d15d6 1560 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1561 char c;
1562
d9f97599
GS
1563 while (count && (c = *regparse)) {
1564 if (c == '\\' && regparse[1])
1565 regparse++;
4e553d73 1566 else if (c == '{')
cc6b7395 1567 count++;
4e553d73 1568 else if (c == '}')
cc6b7395 1569 count--;
d9f97599 1570 regparse++;
cc6b7395 1571 }
e994fd66 1572 if (*regparse != ')')
5bdf89e7 1573 regparse--; /* Leave one char for continuation. */
0f5d15d6 1574 while (s < regparse)
db42d148 1575 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1576 }
748a9306 1577 }
02aa26ce
NT
1578
1579 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1580 else if (*s == '#' && PL_lex_inpat &&
1581 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1582 while (s+1 < send && *s != '\n')
db42d148 1583 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1584 }
02aa26ce 1585
5d1d4326 1586 /* check for embedded arrays
da6eedaa 1587 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1588 */
7e2040f0 1589 else if (*s == '@' && s[1]
5d1d4326 1590 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1591 break;
02aa26ce
NT
1592
1593 /* check for embedded scalars. only stop if we're sure it's a
1594 variable.
1595 */
79072805 1596 else if (*s == '$') {
3280af22 1597 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1598 break;
6002328a 1599 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1600 break; /* in regexp, $ might be tail anchor */
1601 }
02aa26ce 1602
2b9d42f0
NIS
1603 /* End of else if chain - OP_TRANS rejoin rest */
1604
02aa26ce 1605 /* backslashes */
79072805
LW
1606 if (*s == '\\' && s+1 < send) {
1607 s++;
02aa26ce
NT
1608
1609 /* some backslashes we leave behind */
c9f97d15 1610 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1611 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1612 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1613 continue;
1614 }
02aa26ce
NT
1615
1616 /* deprecate \1 in strings and substitution replacements */
3280af22 1617 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1618 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1619 {
599cee73 1620 if (ckWARN(WARN_SYNTAX))
9014280d 1621 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1622 *--s = '$';
1623 break;
1624 }
02aa26ce
NT
1625
1626 /* string-change backslash escapes */
3280af22 1627 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1628 --s;
1629 break;
1630 }
02aa26ce
NT
1631
1632 /* if we get here, it's either a quoted -, or a digit */
79072805 1633 switch (*s) {
02aa26ce
NT
1634
1635 /* quoted - in transliterations */
79072805 1636 case '-':
3280af22 1637 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1638 *d++ = *s++;
1639 continue;
1640 }
1641 /* FALL THROUGH */
1642 default:
11b8faa4 1643 {
041457d9
DM
1644 if (isALNUM(*s) &&
1645 *s != '_' &&
1646 ckWARN(WARN_MISC))
9014280d 1647 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1648 "Unrecognized escape \\%c passed through",
1649 *s);
1650 /* default action is to copy the quoted character */
f9a63242 1651 goto default_action;
11b8faa4 1652 }
02aa26ce
NT
1653
1654 /* \132 indicates an octal constant */
79072805
LW
1655 case '0': case '1': case '2': case '3':
1656 case '4': case '5': case '6': case '7':
ba210ebe 1657 {
53305cf1
NC
1658 I32 flags = 0;
1659 STRLEN len = 3;
1660 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1661 s += len;
1662 }
012bcf8d 1663 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1664
1665 /* \x24 indicates a hex constant */
79072805 1666 case 'x':
a0ed51b3
LW
1667 ++s;
1668 if (*s == '{') {
9d4ba2ae 1669 char* const e = strchr(s, '}');
a4c04bdc
NC
1670 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1671 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1672 STRLEN len;
355860ce 1673
53305cf1 1674 ++s;
adaeee49 1675 if (!e) {
a0ed51b3 1676 yyerror("Missing right brace on \\x{}");
355860ce 1677 continue;
ba210ebe 1678 }
53305cf1
NC
1679 len = e - s;
1680 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1681 s = e + 1;
a0ed51b3
LW
1682 }
1683 else {
ba210ebe 1684 {
53305cf1 1685 STRLEN len = 2;
a4c04bdc 1686 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1687 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1688 s += len;
1689 }
012bcf8d
GS
1690 }
1691
1692 NUM_ESCAPE_INSERT:
1693 /* Insert oct or hex escaped character.
301d3d20 1694 * There will always enough room in sv since such
db42d148 1695 * escapes will be longer than any UTF-8 sequence
301d3d20 1696 * they can end up as. */
ba7cea30 1697
c7f1f016
NIS
1698 /* We need to map to chars to ASCII before doing the tests
1699 to cover EBCDIC
1700 */
c4d5f83a 1701 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1702 if (!has_utf8 && uv > 255) {
301d3d20
JH
1703 /* Might need to recode whatever we have
1704 * accumulated so far if it contains any
1705 * hibit chars.
1706 *
1707 * (Can't we keep track of that and avoid
1708 * this rescan? --jhi)
012bcf8d 1709 */
c7f1f016 1710 int hicount = 0;
63cd0674
NIS
1711 U8 *c;
1712 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1713 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1714 hicount++;
db42d148 1715 }
012bcf8d 1716 }
63cd0674 1717 if (hicount) {
9d4ba2ae 1718 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1719 U8 *src, *dst;
1720 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1721 src = (U8 *)d - 1;
1722 dst = src+hicount;
1723 d += hicount;
cfd0369c 1724 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 1725 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 1726 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1727 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1728 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1729 }
1730 else {
63cd0674 1731 *dst-- = *src;
012bcf8d 1732 }
c7f1f016 1733 src--;
012bcf8d
GS
1734 }
1735 }
1736 }
1737
9aa983d2 1738 if (has_utf8 || uv > 255) {
9041c2e3 1739 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1740 has_utf8 = TRUE;
f9a63242
JH
1741 if (PL_lex_inwhat == OP_TRANS &&
1742 PL_sublex_info.sub_op) {
1743 PL_sublex_info.sub_op->op_private |=
1744 (PL_lex_repl ? OPpTRANS_FROM_UTF
1745 : OPpTRANS_TO_UTF);
f9a63242 1746 }
012bcf8d 1747 }
a0ed51b3 1748 else {
012bcf8d 1749 *d++ = (char)uv;
a0ed51b3 1750 }
012bcf8d
GS
1751 }
1752 else {
c4d5f83a 1753 *d++ = (char) uv;
a0ed51b3 1754 }
79072805 1755 continue;
02aa26ce 1756
b239daa5 1757 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1758 case 'N':
55eda711 1759 ++s;
423cee85
JH
1760 if (*s == '{') {
1761 char* e = strchr(s, '}');
155aba94 1762 SV *res;
423cee85 1763 STRLEN len;
cfd0369c 1764 const char *str;
4e553d73 1765
423cee85 1766 if (!e) {
5777a3f7 1767 yyerror("Missing right brace on \\N{}");
423cee85
JH
1768 e = s - 1;
1769 goto cont_scan;
1770 }
dbc0d4f2
JH
1771 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1772 /* \N{U+...} */
1773 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1774 PERL_SCAN_DISALLOW_PREFIX;
1775 s += 3;
1776 len = e - s;
1777 uv = grok_hex(s, &len, &flags, NULL);
1778 s = e + 1;
1779 goto NUM_ESCAPE_INSERT;
1780 }
55eda711
JH
1781 res = newSVpvn(s + 1, e - s - 1);
1782 res = new_constant( Nullch, 0, "charnames",
1783 res, Nullsv, "\\N{...}" );
f9a63242
JH
1784 if (has_utf8)
1785 sv_utf8_upgrade(res);
cfd0369c 1786 str = SvPV_const(res,len);
1c47067b
JH
1787#ifdef EBCDIC_NEVER_MIND
1788 /* charnames uses pack U and that has been
1789 * recently changed to do the below uni->native
1790 * mapping, so this would be redundant (and wrong,
1791 * the code point would be doubly converted).
1792 * But leave this in just in case the pack U change
1793 * gets revoked, but the semantics is still
1794 * desireable for charnames. --jhi */
cddc7ef4 1795 {
cfd0369c 1796 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
1797
1798 if (uv < 0x100) {
89ebb4a3 1799 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1800
1801 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1802 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 1803 str = SvPV_const(res, len);
cddc7ef4
JH
1804 }
1805 }
1806#endif
89491803 1807 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 1808 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
1809 SvCUR_set(sv, d - ostart);
1810 SvPOK_on(sv);
e4f3eed8 1811 *d = '\0';
f08d6ad9 1812 sv_utf8_upgrade(sv);
d2f449dd 1813 /* this just broke our allocation above... */
eb160463 1814 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1815 d = SvPVX(sv) + SvCUR(sv);
89491803 1816 has_utf8 = TRUE;
f08d6ad9 1817 }
eb160463 1818 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 1819 const char * const odest = SvPVX_const(sv);
423cee85 1820
8973db79 1821 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1822 d = SvPVX(sv) + (d - odest);
1823 }
1824 Copy(str, d, len, char);
1825 d += len;
1826 SvREFCNT_dec(res);
1827 cont_scan:
1828 s = e + 1;
1829 }
1830 else
5777a3f7 1831 yyerror("Missing braces on \\N{}");
423cee85
JH
1832 continue;
1833
02aa26ce 1834 /* \c is a control character */
79072805
LW
1835 case 'c':
1836 s++;
961ce445 1837 if (s < send) {
ba210ebe 1838 U8 c = *s++;
c7f1f016
NIS
1839#ifdef EBCDIC
1840 if (isLOWER(c))
1841 c = toUPPER(c);
1842#endif
db42d148 1843 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1844 }
961ce445
RGS
1845 else {
1846 yyerror("Missing control char name in \\c");
1847 }
79072805 1848 continue;
02aa26ce
NT
1849
1850 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1851 case 'b':
db42d148 1852 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1853 break;
1854 case 'n':
db42d148 1855 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1856 break;
1857 case 'r':
db42d148 1858 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1859 break;
1860 case 'f':
db42d148 1861 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1862 break;
1863 case 't':
db42d148 1864 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1865 break;
34a3fe2a 1866 case 'e':
db42d148 1867 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1868 break;
1869 case 'a':
db42d148 1870 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1871 break;
02aa26ce
NT
1872 } /* end switch */
1873
79072805
LW
1874 s++;
1875 continue;
02aa26ce 1876 } /* end if (backslash) */
4c3a8340
ST
1877#ifdef EBCDIC
1878 else
1879 literal_endpoint++;
1880#endif
02aa26ce 1881
f9a63242 1882 default_action:
2b9d42f0
NIS
1883 /* If we started with encoded form, or already know we want it
1884 and then encode the next character */
1885 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1886 STRLEN len = 1;
9d4ba2ae
AL
1887 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1888 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
2b9d42f0
NIS
1889 s += len;
1890 if (need > len) {
1891 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 1892 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
1893 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1894 }
1895 d = (char*)uvchr_to_utf8((U8*)d, uv);
1896 has_utf8 = TRUE;
1897 }
1898 else {
1899 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1900 }
02aa26ce
NT
1901 } /* while loop to process each character */
1902
1903 /* terminate the string and set up the sv */
79072805 1904 *d = '\0';
95a20fc0 1905 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 1906 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1907 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1908
79072805 1909 SvPOK_on(sv);
9f4817db 1910 if (PL_encoding && !has_utf8) {
d0063567
DK
1911 sv_recode_to_utf8(sv, PL_encoding);
1912 if (SvUTF8(sv))
1913 has_utf8 = TRUE;
9f4817db 1914 }
2b9d42f0 1915 if (has_utf8) {
7e2040f0 1916 SvUTF8_on(sv);
2b9d42f0 1917 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1918 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1919 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1920 }
1921 }
79072805 1922
02aa26ce 1923 /* shrink the sv if we allocated more than we used */
79072805 1924 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 1925 SvPV_shrink_to_cur(sv);
79072805 1926 }
02aa26ce 1927
9b599b2a 1928 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1929 if (s > PL_bufptr) {
1930 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1931 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1932 sv, Nullsv,
4e553d73 1933 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1934 ? "tr"
3280af22 1935 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1936 ? "s"
1937 : "qq")));
79072805 1938 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1939 } else
8990e307 1940 SvREFCNT_dec(sv);
79072805
LW
1941 return s;
1942}
1943
ffb4593c
NT
1944/* S_intuit_more
1945 * Returns TRUE if there's more to the expression (e.g., a subscript),
1946 * FALSE otherwise.
ffb4593c
NT
1947 *
1948 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1949 *
1950 * ->[ and ->{ return TRUE
1951 * { and [ outside a pattern are always subscripts, so return TRUE
1952 * if we're outside a pattern and it's not { or [, then return FALSE
1953 * if we're in a pattern and the first char is a {
1954 * {4,5} (any digits around the comma) returns FALSE
1955 * if we're in a pattern and the first char is a [
1956 * [] returns FALSE
1957 * [SOMETHING] has a funky algorithm to decide whether it's a
1958 * character class or not. It has to deal with things like
1959 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1960 * anything else returns TRUE
1961 */
1962
9cbb5ea2
GS
1963/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1964
76e3520e 1965STATIC int
cea2e8a9 1966S_intuit_more(pTHX_ register char *s)
79072805 1967{
97aff369 1968 dVAR;
3280af22 1969 if (PL_lex_brackets)
79072805
LW
1970 return TRUE;
1971 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1972 return TRUE;
1973 if (*s != '{' && *s != '[')
1974 return FALSE;
3280af22 1975 if (!PL_lex_inpat)
79072805
LW
1976 return TRUE;
1977
1978 /* In a pattern, so maybe we have {n,m}. */
1979 if (*s == '{') {
1980 s++;
1981 if (!isDIGIT(*s))
1982 return TRUE;
1983 while (isDIGIT(*s))
1984 s++;
1985 if (*s == ',')
1986 s++;
1987 while (isDIGIT(*s))
1988 s++;
1989 if (*s == '}')
1990 return FALSE;
1991 return TRUE;
1992
1993 }
1994
1995 /* On the other hand, maybe we have a character class */
1996
1997 s++;
1998 if (*s == ']' || *s == '^')
1999 return FALSE;
2000 else {
ffb4593c 2001 /* this is terrifying, and it works */
79072805
LW
2002 int weight = 2; /* let's weigh the evidence */
2003 char seen[256];
f27ffc4a 2004 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2005 const char * const send = strchr(s,']');
3280af22 2006 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2007
2008 if (!send) /* has to be an expression */
2009 return TRUE;
2010
2011 Zero(seen,256,char);
2012 if (*s == '$')
2013 weight -= 3;
2014 else if (isDIGIT(*s)) {
2015 if (s[1] != ']') {
2016 if (isDIGIT(s[1]) && s[2] == ']')
2017 weight -= 10;
2018 }
2019 else
2020 weight -= 100;
2021 }
2022 for (; s < send; s++) {
2023 last_un_char = un_char;
2024 un_char = (unsigned char)*s;
2025 switch (*s) {
2026 case '@':
2027 case '&':
2028 case '$':
2029 weight -= seen[un_char] * 10;
7e2040f0 2030 if (isALNUM_lazy_if(s+1,UTF)) {
8903cb82 2031 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
f776e3cd
NC
2032 if ((int)strlen(tmpbuf) > 1
2033 && gv_fetchpv(tmpbuf, 0, SVt_PV))
79072805
LW
2034 weight -= 100;
2035 else
2036 weight -= 10;
2037 }
2038 else if (*s == '$' && s[1] &&
93a17b20
LW
2039 strchr("[#!%*<>()-=",s[1])) {
2040 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2041 weight -= 10;
2042 else
2043 weight -= 1;
2044 }
2045 break;
2046 case '\\':
2047 un_char = 254;
2048 if (s[1]) {
93a17b20 2049 if (strchr("wds]",s[1]))
79072805
LW
2050 weight += 100;
2051 else if (seen['\''] || seen['"'])
2052 weight += 1;
93a17b20 2053 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2054 weight += 40;
2055 else if (isDIGIT(s[1])) {
2056 weight += 40;
2057 while (s[1] && isDIGIT(s[1]))
2058 s++;
2059 }
2060 }
2061 else
2062 weight += 100;
2063 break;
2064 case '-':
2065 if (s[1] == '\\')
2066 weight += 50;
93a17b20 2067 if (strchr("aA01! ",last_un_char))
79072805 2068 weight += 30;
93a17b20 2069 if (strchr("zZ79~",s[1]))
79072805 2070 weight += 30;
f27ffc4a
GS
2071 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2072 weight -= 5; /* cope with negative subscript */
79072805
LW
2073 break;
2074 default:
3792a11b
NC
2075 if (!isALNUM(last_un_char)
2076 && !(last_un_char == '$' || last_un_char == '@'
2077 || last_un_char == '&')
2078 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2079 char *d = tmpbuf;
2080 while (isALPHA(*s))
2081 *d++ = *s++;
2082 *d = '\0';
2083 if (keyword(tmpbuf, d - tmpbuf))
2084 weight -= 150;
2085 }
2086 if (un_char == last_un_char + 1)
2087 weight += 5;
2088 weight -= seen[un_char];
2089 break;
2090 }
2091 seen[un_char]++;
2092 }
2093 if (weight >= 0) /* probably a character class */
2094 return FALSE;
2095 }
2096
2097 return TRUE;
2098}
ffed7fef 2099
ffb4593c
NT
2100/*
2101 * S_intuit_method
2102 *
2103 * Does all the checking to disambiguate
2104 * foo bar
2105 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2106 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2107 *
2108 * First argument is the stuff after the first token, e.g. "bar".
2109 *
2110 * Not a method if bar is a filehandle.
2111 * Not a method if foo is a subroutine prototyped to take a filehandle.
2112 * Not a method if it's really "Foo $bar"
2113 * Method if it's "foo $bar"
2114 * Not a method if it's really "print foo $bar"
2115 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2116 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2117 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2118 * =>
2119 */
2120
76e3520e 2121STATIC int
62d55b22 2122S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2123{
97aff369 2124 dVAR;
a0d0e21e 2125 char *s = start + (*start == '$');
3280af22 2126 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2127 STRLEN len;
2128 GV* indirgv;
2129
2130 if (gv) {
62d55b22 2131 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2132 return 0;
62d55b22
NC
2133 if (cv) {
2134 if (SvPOK(cv)) {
2135 const char *proto = SvPVX_const(cv);
2136 if (proto) {
2137 if (*proto == ';')
2138 proto++;
2139 if (*proto == '*')
2140 return 0;
2141 }
b6c543e3
IZ
2142 }
2143 } else
a0d0e21e
LW
2144 gv = 0;
2145 }
8903cb82 2146 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2147 /* start is the beginning of the possible filehandle/object,
2148 * and s is the end of it
2149 * tmpbuf is a copy of it
2150 */
2151
a0d0e21e 2152 if (*start == '$') {
3280af22 2153 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2154 return 0;
2155 s = skipspace(s);
3280af22
NIS
2156 PL_bufptr = start;
2157 PL_expect = XREF;
a0d0e21e
LW
2158 return *s == '(' ? FUNCMETH : METHOD;
2159 }
2160 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2161 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2162 len -= 2;
2163 tmpbuf[len] = '\0';
2164 goto bare_package;
2165 }
f776e3cd 2166 indirgv = gv_fetchpv(tmpbuf, 0, SVt_PVCV);
8ebc5c01 2167 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2168 return 0;
2169 /* filehandle or package name makes it a method */
89bfa8cd 2170 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2171 s = skipspace(s);
3280af22 2172 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2173 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2174 bare_package:
3280af22 2175 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2176 newSVpvn(tmpbuf,len));
3280af22
NIS
2177 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2178 PL_expect = XTERM;
a0d0e21e 2179 force_next(WORD);
3280af22 2180 PL_bufptr = s;
a0d0e21e
LW
2181 return *s == '(' ? FUNCMETH : METHOD;
2182 }
2183 }
2184 return 0;
2185}
2186
ffb4593c
NT
2187/*
2188 * S_incl_perldb
2189 * Return a string of Perl code to load the debugger. If PERL5DB
2190 * is set, it will return the contents of that, otherwise a
2191 * compile-time require of perl5db.pl.
2192 */
2193
bfed75c6 2194STATIC const char*
cea2e8a9 2195S_incl_perldb(pTHX)
a0d0e21e 2196{
97aff369 2197 dVAR;
3280af22 2198 if (PL_perldb) {
9d4ba2ae 2199 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2200
2201 if (pdb)
2202 return pdb;
93189314 2203 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2204 return "BEGIN { require 'perl5db.pl' }";
2205 }
2206 return "";
2207}
2208
2209
16d20bd9 2210/* Encoded script support. filter_add() effectively inserts a
4e553d73 2211 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2212 * Note that the filter function only applies to the current source file
2213 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2214 *
2215 * The datasv parameter (which may be NULL) can be used to pass
2216 * private data to this instance of the filter. The filter function
2217 * can recover the SV using the FILTER_DATA macro and use it to
2218 * store private buffers and state information.
2219 *
2220 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2221 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2222 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2223 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2224 * private use must be set using malloc'd pointers.
2225 */
16d20bd9
AD
2226
2227SV *
864dbfa3 2228Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2229{
97aff369 2230 dVAR;
f4c556ac
GS
2231 if (!funcp)
2232 return Nullsv;
2233
3280af22
NIS
2234 if (!PL_rsfp_filters)
2235 PL_rsfp_filters = newAV();
16d20bd9 2236 if (!datasv)
8c52afec 2237 datasv = NEWSV(255,0);
862a34c6 2238 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2239 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2240 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2241 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2242 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2243 av_unshift(PL_rsfp_filters, 1);
2244 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2245 return(datasv);
2246}
4e553d73 2247
16d20bd9
AD
2248
2249/* Delete most recently added instance of this filter function. */
a0d0e21e 2250void
864dbfa3 2251Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2252{
97aff369 2253 dVAR;
e0c19803 2254 SV *datasv;
24801a4b 2255
33073adb 2256#ifdef DEBUGGING
8141890a 2257 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2258#endif
3280af22 2259 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2260 return;
2261 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2262 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2263 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2264 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2265 IoANY(datasv) = (void *)NULL;
3280af22 2266 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2267
16d20bd9
AD
2268 return;
2269 }
2270 /* we need to search for the correct entry and clear it */
cea2e8a9 2271 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2272}
2273
2274
1de9afcd
RGS
2275/* Invoke the idxth filter function for the current rsfp. */
2276/* maxlen 0 = read one text line */
16d20bd9 2277I32
864dbfa3 2278Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2279{
97aff369 2280 dVAR;
16d20bd9
AD
2281 filter_t funcp;
2282 SV *datasv = NULL;
e50aee73 2283
3280af22 2284 if (!PL_rsfp_filters)
16d20bd9 2285 return -1;
1de9afcd 2286 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2287 /* Provide a default input filter to make life easy. */
2288 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2289 DEBUG_P(PerlIO_printf(Perl_debug_log,
2290 "filter_read %d: from rsfp\n", idx));
4e553d73 2291 if (maxlen) {
16d20bd9
AD
2292 /* Want a block */
2293 int len ;
f54cb97a 2294 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2295
2296 /* ensure buf_sv is large enough */
eb160463 2297 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2298 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2299 if (PerlIO_error(PL_rsfp))
37120919
AD
2300 return -1; /* error */
2301 else
2302 return 0 ; /* end of file */
2303 }
16d20bd9
AD
2304 SvCUR_set(buf_sv, old_len + len) ;
2305 } else {
2306 /* Want a line */
3280af22
NIS
2307 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2308 if (PerlIO_error(PL_rsfp))
37120919
AD
2309 return -1; /* error */
2310 else
2311 return 0 ; /* end of file */
2312 }
16d20bd9
AD
2313 }
2314 return SvCUR(buf_sv);
2315 }
2316 /* Skip this filter slot if filter has been deleted */
1de9afcd 2317 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2318 DEBUG_P(PerlIO_printf(Perl_debug_log,
2319 "filter_read %d: skipped (filter deleted)\n",
2320 idx));
16d20bd9
AD
2321 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2322 }
2323 /* Get function pointer hidden within datasv */
8141890a 2324 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2325 DEBUG_P(PerlIO_printf(Perl_debug_log,
2326 "filter_read %d: via function %p (%s)\n",
cfd0369c 2327 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2328 /* Call function. The function is expected to */
2329 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2330 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2331 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2332}
2333
76e3520e 2334STATIC char *
cea2e8a9 2335S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2336{
97aff369 2337 dVAR;
c39cd008 2338#ifdef PERL_CR_FILTER
3280af22 2339 if (!PL_rsfp_filters) {
c39cd008 2340 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2341 }
2342#endif
3280af22 2343 if (PL_rsfp_filters) {
55497cff
PP
2344 if (!append)
2345 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2346 if (FILTER_READ(0, sv, 0) > 0)
2347 return ( SvPVX(sv) ) ;
2348 else
2349 return Nullch ;
2350 }
9d116dd7 2351 else
fd049845 2352 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2353}
2354
01ec43d0 2355STATIC HV *
7fc63493 2356S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2357{
97aff369 2358 dVAR;
def3634b
GS
2359 GV *gv;
2360
01ec43d0 2361 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2362 return PL_curstash;
2363
2364 if (len > 2 &&
2365 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
f776e3cd 2366 (gv = gv_fetchpv(pkgname, 0, SVt_PVHV)))
01ec43d0
GS
2367 {
2368 return GvHV(gv); /* Foo:: */
def3634b
GS
2369 }
2370
2371 /* use constant CLASS => 'MyClass' */
f776e3cd 2372 if ((gv = gv_fetchpv(pkgname, 0, SVt_PVCV))) {
def3634b
GS
2373 SV *sv;
2374 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2375 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2376 }
2377 }
2378
2379 return gv_stashpv(pkgname, FALSE);
2380}
a0d0e21e 2381
468aa647 2382STATIC char *
cc6ed77d 2383S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 2384 dVAR;
468aa647
RGS
2385 if (PL_expect != XSTATE)
2386 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2387 is_use ? "use" : "no"));
2388 s = skipspace(s);
2389 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2390 s = force_version(s, TRUE);
2391 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2392 PL_nextval[PL_nexttoke].opval = Nullop;
2393 force_next(WORD);
2394 }
2395 else if (*s == 'v') {
2396 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2397 s = force_version(s, FALSE);
2398 }
2399 }
2400 else {
2401 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2402 s = force_version(s, FALSE);
2403 }
2404 yylval.ival = is_use;
2405 return s;
2406}
748a9306 2407#ifdef DEBUGGING
27da23d5 2408 static const char* const exp_name[] =
09bef843 2409 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2410 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2411 };
748a9306 2412#endif
463ee0b2 2413
02aa26ce
NT
2414/*
2415 yylex
2416
2417 Works out what to call the token just pulled out of the input
2418 stream. The yacc parser takes care of taking the ops we return and
2419 stitching them into a tree.
2420
2421 Returns:
2422 PRIVATEREF
2423
2424 Structure:
2425 if read an identifier
2426 if we're in a my declaration
2427 croak if they tried to say my($foo::bar)
2428 build the ops for a my() declaration
2429 if it's an access to a my() variable
2430 are we in a sort block?
2431 croak if my($a); $a <=> $b
2432 build ops for access to a my() variable
2433 if in a dq string, and they've said @foo and we can't find @foo
2434 croak
2435 build ops for a bareword
2436 if we already built the token before, use it.
2437*/
2438
20141f0e 2439
dba4d153
JH
2440#ifdef __SC__
2441#pragma segment Perl_yylex
2442#endif
dba4d153 2443int
dba4d153 2444Perl_yylex(pTHX)
20141f0e 2445{
97aff369 2446 dVAR;
3afc138a 2447 register char *s = PL_bufptr;
378cc40b 2448 register char *d;
463ee0b2 2449 STRLEN len;
aa7440fb 2450 bool bof = FALSE;
a687059c 2451
bbf60fe6 2452 DEBUG_T( {
396482e1 2453 SV* tmp = newSVpvs("");
b6007c36
DM
2454 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2455 (IV)CopLINE(PL_curcop),
2456 lex_state_names[PL_lex_state],
2457 exp_name[PL_expect],
2458 pv_display(tmp, s, strlen(s), 0, 60));
2459 SvREFCNT_dec(tmp);
bbf60fe6 2460 } );
02aa26ce 2461 /* check if there's an identifier for us to look at */
ba979b31 2462 if (PL_pending_ident)
bbf60fe6 2463 return REPORT(S_pending_ident(aTHX));
bbce6d69 2464
02aa26ce
NT
2465 /* no identifier pending identification */
2466
3280af22 2467 switch (PL_lex_state) {
79072805
LW
2468#ifdef COMMENTARY
2469 case LEX_NORMAL: /* Some compilers will produce faster */
2470 case LEX_INTERPNORMAL: /* code if we comment these out. */
2471 break;
2472#endif
2473
09bef843 2474 /* when we've already built the next token, just pull it out of the queue */
79072805 2475 case LEX_KNOWNEXT:
3280af22
NIS
2476 PL_nexttoke--;
2477 yylval = PL_nextval[PL_nexttoke];
2478 if (!PL_nexttoke) {
2479 PL_lex_state = PL_lex_defer;
2480 PL_expect = PL_lex_expect;
2481 PL_lex_defer = LEX_NORMAL;
463ee0b2 2482 }
bbf60fe6 2483 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2484
02aa26ce 2485 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2486 when we get here, PL_bufptr is at the \
02aa26ce 2487 */
79072805
LW
2488 case LEX_INTERPCASEMOD:
2489#ifdef DEBUGGING
3280af22 2490 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2491 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2492#endif
02aa26ce 2493 /* handle \E or end of string */
3280af22 2494 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2495 /* if at a \E */
3280af22 2496 if (PL_lex_casemods) {
f54cb97a 2497 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2498 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2499
3792a11b
NC
2500 if (PL_bufptr != PL_bufend
2501 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2502 PL_bufptr += 2;
2503 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2504 }
bbf60fe6 2505 return REPORT(')');
79072805 2506 }
3280af22
NIS
2507 if (PL_bufptr != PL_bufend)
2508 PL_bufptr += 2;
2509 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2510 return yylex();
79072805
LW
2511 }
2512 else {
607df283 2513 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2514 "### Saw case modifier\n"); });
3280af22 2515 s = PL_bufptr + 1;
6e909404
JH
2516 if (s[1] == '\\' && s[2] == 'E') {
2517 PL_bufptr = s + 3;
2518 PL_lex_state = LEX_INTERPCONCAT;
2519 return yylex();
a0d0e21e 2520 }
6e909404 2521 else {
90771dc0 2522 I32 tmp;
6e909404
JH
2523 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2524 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2525 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2526 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2527 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2528 return REPORT(')');
6e909404
JH
2529 }
2530 if (PL_lex_casemods > 10)
2531 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2532 PL_lex_casestack[PL_lex_casemods++] = *s;
2533 PL_lex_casestack[PL_lex_casemods] = '\0';
2534 PL_lex_state = LEX_INTERPCONCAT;
2535 PL_nextval[PL_nexttoke].ival = 0;
2536 force_next('(');
2537 if (*s == 'l')
2538 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2539 else if (*s == 'u')
2540 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2541 else if (*s == 'L')
2542 PL_nextval[PL_nexttoke].ival = OP_LC;
2543 else if (*s == 'U')
2544 PL_nextval[PL_nexttoke].ival = OP_UC;
2545 else if (*s == 'Q')
2546 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2547 else
2548 Perl_croak(aTHX_ "panic: yylex");
2549 PL_bufptr = s + 1;
a0d0e21e 2550 }
79072805 2551 force_next(FUNC);
3280af22
NIS
2552 if (PL_lex_starts) {
2553 s = PL_bufptr;
2554 PL_lex_starts = 0;
131b3ad0
DM
2555 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2556 if (PL_lex_casemods == 1 && PL_lex_inpat)
2557 OPERATOR(',');
2558 else
2559 Aop(OP_CONCAT);
79072805
LW
2560 }
2561 else
cea2e8a9 2562 return yylex();
79072805
LW
2563 }
2564
55497cff 2565 case LEX_INTERPPUSH:
bbf60fe6 2566 return REPORT(sublex_push());
55497cff 2567
79072805 2568 case LEX_INTERPSTART:
3280af22 2569 if (PL_bufptr == PL_bufend)
bbf60fe6 2570 return REPORT(sublex_done());
607df283 2571 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2572 "### Interpolated variable\n"); });
3280af22
NIS
2573 PL_expect = XTERM;
2574 PL_lex_dojoin = (*PL_bufptr == '@');
2575 PL_lex_state = LEX_INTERPNORMAL;
2576 if (PL_lex_dojoin) {
2577 PL_nextval[PL_nexttoke].ival = 0;
79072805 2578 force_next(',');
a0d0e21e 2579 force_ident("\"", '$');
3280af22 2580 PL_nextval[PL_nexttoke].ival = 0;
79072805 2581 force_next('$');
3280af22 2582 PL_nextval[PL_nexttoke].ival = 0;
79072805 2583 force_next('(');
3280af22 2584 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2585 force_next(FUNC);
2586 }
3280af22
NIS
2587 if (PL_lex_starts++) {
2588 s = PL_bufptr;
131b3ad0
DM
2589 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2590 if (!PL_lex_casemods && PL_lex_inpat)
2591 OPERATOR(',');
2592 else
2593 Aop(OP_CONCAT);
79072805 2594 }
cea2e8a9 2595 return yylex();
79072805
LW
2596
2597 case LEX_INTERPENDMAYBE:
3280af22
NIS
2598 if (intuit_more(PL_bufptr)) {
2599 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2600 break;
2601 }
2602 /* FALL THROUGH */
2603
2604 case LEX_INTERPEND:
3280af22
NIS
2605 if (PL_lex_dojoin) {
2606 PL_lex_dojoin = FALSE;
2607 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2608 return REPORT(')');
79072805 2609 }
43a16006 2610 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2611 && SvEVALED(PL_lex_repl))
43a16006 2612 {
e9fa98b2 2613 if (PL_bufptr != PL_bufend)
cea2e8a9 2614 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2615 PL_lex_repl = Nullsv;
2616 }
79072805
LW
2617 /* FALLTHROUGH */
2618 case LEX_INTERPCONCAT:
2619#ifdef DEBUGGING
3280af22 2620 if (PL_lex_brackets)
cea2e8a9 2621 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2622#endif
3280af22 2623 if (PL_bufptr == PL_bufend)
bbf60fe6 2624 return REPORT(sublex_done());
79072805 2625
3280af22
NIS
2626 if (SvIVX(PL_linestr) == '\'') {
2627 SV *sv = newSVsv(PL_linestr);
2628 if (!PL_lex_inpat)
76e3520e 2629 sv = tokeq(sv);
3280af22 2630 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2631 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2632 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2633 s = PL_bufend;
79072805
LW
2634 }
2635 else {
3280af22 2636 s = scan_const(PL_bufptr);
79072805 2637 if (*s == '\\')
3280af22 2638 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2639 else
3280af22 2640 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2641 }
2642
3280af22
NIS
2643 if (s != PL_bufptr) {
2644 PL_nextval[PL_nexttoke] = yylval;
2645 PL_expect = XTERM;
79072805 2646 force_next(THING);
131b3ad0
DM
2647 if (PL_lex_starts++) {
2648 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2649 if (!PL_lex_casemods && PL_lex_inpat)
2650 OPERATOR(',');
2651 else
2652 Aop(OP_CONCAT);
2653 }
79072805 2654 else {
3280af22 2655 PL_bufptr = s;
cea2e8a9 2656 return yylex();
79072805
LW
2657 }
2658 }
2659
cea2e8a9 2660 return yylex();
a0d0e21e 2661 case LEX_FORMLINE:
3280af22
NIS
2662 PL_lex_state = LEX_NORMAL;
2663 s = scan_formline(PL_bufptr);
2664 if (!PL_lex_formbrack)
a0d0e21e
LW
2665 goto rightbracket;
2666 OPERATOR(';');
79072805
LW
2667 }
2668
3280af22
NIS
2669 s = PL_bufptr;
2670 PL_oldoldbufptr = PL_oldbufptr;
2671 PL_oldbufptr = s;
463ee0b2
LW
2672
2673 retry:
378cc40b
LW
2674 switch (*s) {
2675 default:
7e2040f0 2676 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2677 goto keylookup;
cea2e8a9 2678 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2679 case 4:
2680 case 26:
2681 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2682 case 0:
3280af22
NIS
2683 if (!PL_rsfp) {
2684 PL_last_uni = 0;
2685 PL_last_lop = 0;
c5ee2135 2686 if (PL_lex_brackets) {
0bd48802
AL
2687 yyerror(PL_lex_formbrack
2688 ? "Format not terminated"
2689 : "Missing right curly or square bracket");
c5ee2135 2690 }
4e553d73 2691 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2692 "### Tokener got EOF\n");
5f80b19c 2693 } );
79072805 2694 TOKEN(0);
463ee0b2 2695 }
3280af22 2696 if (s++ < PL_bufend)
a687059c 2697 goto retry; /* ignore stray nulls */
3280af22
NIS
2698 PL_last_uni = 0;
2699 PL_last_lop = 0;
2700 if (!PL_in_eval && !PL_preambled) {
2701 PL_preambled = TRUE;
2702 sv_setpv(PL_linestr,incl_perldb());
2703 if (SvCUR(PL_linestr))
396482e1 2704 sv_catpvs(PL_linestr,";");
3280af22
NIS
2705 if (PL_preambleav){
2706 while(AvFILLp(PL_preambleav) >= 0) {
2707 SV *tmpsv = av_shift(PL_preambleav);
2708 sv_catsv(PL_linestr, tmpsv);
396482e1 2709 sv_catpvs(PL_linestr, ";");
91b7def8
PP
2710 sv_free(tmpsv);
2711 }
3280af22
NIS
2712 sv_free((SV*)PL_preambleav);
2713 PL_preambleav = NULL;
91b7def8 2714 }
3280af22 2715 if (PL_minus_n || PL_minus_p) {
396482e1 2716 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 2717 if (PL_minus_l)
396482e1 2718 sv_catpvs(PL_linestr,"chomp;");
3280af22 2719 if (PL_minus_a) {
3280af22 2720 if (PL_minus_F) {
3792a11b
NC
2721 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2722 || *PL_splitstr == '"')
3280af22 2723 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2724 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2725 else {
c8ef6a4b
NC
2726 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2727 bytes can be used as quoting characters. :-) */
dd374669 2728 const char *splits = PL_splitstr;
91d456ae 2729 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
2730 do {
2731 /* Need to \ \s */
dd374669
AL
2732 if (*splits == '\\')
2733 sv_catpvn(PL_linestr, splits, 1);
2734 sv_catpvn(PL_linestr, splits, 1);
2735 } while (*splits++);
48c4c863
NC
2736 /* This loop will embed the trailing NUL of
2737 PL_linestr as the last thing it does before
2738 terminating. */
396482e1 2739 sv_catpvs(PL_linestr, ");");
54310121 2740 }
2304df62
AD
2741 }
2742 else
396482e1 2743 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 2744 }
79072805 2745 }
bc9b29db 2746 if (PL_minus_E)
396482e1
GA
2747 sv_catpvs(PL_linestr,"use feature ':5.10';");
2748 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
2749 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2750 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2751 PL_last_lop = PL_last_uni = Nullch;
3280af22 2752 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2753 SV * const sv = NEWSV(85,0);
a0d0e21e
LW
2754
2755 sv_upgrade(sv, SVt_PVMG);
3280af22 2756 sv_setsv(sv,PL_linestr);
0ac0412a 2757 (void)SvIOK_on(sv);
45977657 2758 SvIV_set(sv, 0);
36c7798d 2759 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2760 }
79072805 2761 goto retry;
a687059c 2762 }
e929a76b 2763 do {
aa7440fb 2764 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2765 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2766 fake_eof:
2767 if (PL_rsfp) {
2768 if (PL_preprocess && !PL_in_eval)
2769 (void)PerlProc_pclose(PL_rsfp);
2770 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2771 PerlIO_clearerr(PL_rsfp);
2772 else
2773 (void)PerlIO_close(PL_rsfp);
2774 PL_rsfp = Nullfp;
2775 PL_doextract = FALSE;
2776 }
2777 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2778 sv_setpv(PL_linestr,PL_minus_p
2779 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2780 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2781 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2782 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2783 PL_minus_n = PL_minus_p = 0;
2784 goto retry;
2785 }
2786 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2787 PL_last_lop = PL_last_uni = Nullch;
c69006e4 2788 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2789 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2790 }
7aa207d6
JH
2791 /* If it looks like the start of a BOM or raw UTF-16,
2792 * check if it in fact is. */
2793 else if (bof &&
2794 (*s == 0 ||
2795 *(U8*)s == 0xEF ||
2796 *(U8*)s >= 0xFE ||
2797 s[1] == 0)) {
226017aa 2798#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2799# ifdef __GNU_LIBRARY__
2800# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2801# define FTELL_FOR_PIPE_IS_BROKEN
2802# endif
e3f494f1
JH
2803# else
2804# ifdef __GLIBC__
2805# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2806# define FTELL_FOR_PIPE_IS_BROKEN
2807# endif
2808# endif
226017aa
DD
2809# endif
2810#endif
2811#ifdef FTELL_FOR_PIPE_IS_BROKEN
2812 /* This loses the possibility to detect the bof
2813 * situation on perl -P when the libc5 is being used.
2814 * Workaround? Maybe attach some extra state to PL_rsfp?
2815 */
2816 if (!PL_preprocess)
7e28d3af 2817 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2818#else
eb160463 2819 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2820#endif
7e28d3af 2821 if (bof) {
3280af22 2822 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2823 s = swallow_bom((U8*)s);
e929a76b 2824 }
378cc40b 2825 }
3280af22 2826 if (PL_doextract) {
a0d0e21e
LW
2827 /* Incest with pod. */
2828 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2829 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2830 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2831 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2832 PL_last_lop = PL_last_uni = Nullch;
3280af22 2833 PL_doextract = FALSE;
a0d0e21e 2834 }
4e553d73 2835 }
463ee0b2 2836 incline(s);
3280af22
NIS
2837 } while (PL_doextract);
2838 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2839 if (PERLDB_LINE && PL_curstash != PL_debstash) {
9d4ba2ae 2840 SV * const sv = NEWSV(85,0);
a687059c 2841
93a17b20 2842 sv_upgrade(sv, SVt_PVMG);
3280af22 2843 sv_setsv(sv,PL_linestr);
0ac0412a 2844 (void)SvIOK_on(sv);
45977657 2845 SvIV_set(sv, 0);
36c7798d 2846 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2847 }
3280af22 2848 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2849 PL_last_lop = PL_last_uni = Nullch;
57843af0 2850 if (CopLINE(PL_curcop) == 1) {
3280af22 2851 while (s < PL_bufend && isSPACE(*s))
79072805 2852 s++;
a0d0e21e 2853 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2854 s++;
44a8e56a 2855 d = Nullch;
3280af22 2856 if (!PL_in_eval) {
44a8e56a
PP
2857 if (*s == '#' && *(s+1) == '!')
2858 d = s + 2;
2859#ifdef ALTERNATE_SHEBANG
2860 else {
bfed75c6 2861 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a
PP
2862 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2863 d = s + (sizeof(as) - 1);
2864 }
2865#endif /* ALTERNATE_SHEBANG */
2866 }
2867 if (d) {
b8378b72 2868 char *ipath;
774d564b 2869 char *ipathend;
b8378b72 2870
774d564b 2871 while (isSPACE(*d))
b8378b72
CS
2872 d++;
2873 ipath = d;
774d564b
PP
2874 while (*d && !isSPACE(*d))
2875 d++;
2876 ipathend = d;
2877
2878#ifdef ARG_ZERO_IS_SCRIPT
2879 if (ipathend > ipath) {
2880 /*
2881 * HP-UX (at least) sets argv[0] to the script name,
2882 * which makes $^X incorrect. And Digital UNIX and Linux,
2883 * at least, set argv[0] to the basename of the Perl
2884 * interpreter. So, having found "#!", we'll set it right.
2885 */
f776e3cd
NC
2886 SV * const x
2887 = GvSV(gv_fetchpv("\030", GV_ADD, SVt_PV)); /* $^X */
774d564b 2888 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2889 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2890 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c
PP
2891 SvSETMAGIC(x);
2892 }
556c1dec
JH
2893 else {
2894 STRLEN blen;
2895 STRLEN llen;
cfd0369c 2896 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 2897 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
2898 if (llen < blen) {
2899 bstart += blen - llen;
2900 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2901 sv_setpvn(x, ipath, ipathend - ipath);
2902 SvSETMAGIC(x);
2903 }
2904 }
2905 }
774d564b 2906 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2907 }
774d564b 2908#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2909
2910 /*
2911 * Look for options.
2912 */
748a9306 2913 d = instr(s,"perl -");
84e30d1a 2914 if (!d) {
748a9306 2915 d = instr(s,"perl");
84e30d1a
GS
2916#if defined(DOSISH)
2917 /* avoid getting into infinite loops when shebang
2918 * line contains "Perl" rather than "perl" */
2919 if (!d) {
2920 for (d = ipathend-4; d >= ipath; --d) {
2921 if ((*d == 'p' || *d == 'P')
2922 && !ibcmp(d, "perl", 4))
2923 {
2924 break;
2925 }
2926 }
2927 if (d < ipath)
2928 d = Nullch;
2929 }
2930#endif
2931 }
44a8e56a
PP
2932#ifdef ALTERNATE_SHEBANG
2933 /*
2934 * If the ALTERNATE_SHEBANG on this system starts with a
2935 * character that can be part of a Perl expression, then if
2936 * we see it but not "perl", we're probably looking at the
2937 * start of Perl code, not a request to hand off to some
2938 * other interpreter. Similarly, if "perl" is there, but
2939 * not in the first 'word' of the line, we assume the line
2940 * contains the start of the Perl program.
44a8e56a
PP
2941 */
2942 if (d && *s != '#') {
f54cb97a 2943 const char *c = ipath;
44a8e56a
PP
2944 while (*c && !strchr("; \t\r\n\f\v#", *c))
2945 c++;
2946 if (c < d)
2947 d = Nullch; /* "perl" not in first word; ignore */
2948 else
2949 *s = '#'; /* Don't try to parse shebang line */
2950 }
774d564b 2951#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2952#ifndef MACOS_TRADITIONAL
748a9306 2953 if (!d &&
44a8e56a 2954 *s == '#' &&
774d564b 2955 ipathend > ipath &&
3280af22 2956 !PL_minus_c &&
748a9306 2957 !instr(s,"indir") &&
3280af22 2958 instr(PL_origargv[0],"perl"))
748a9306 2959 {
27da23d5 2960 dVAR;
9f68db38 2961 char **newargv;
9f68db38 2962
774d564b
PP
2963 *ipathend = '\0';
2964 s = ipathend + 1;
3280af22 2965 while (s < PL_bufend && isSPACE(*s))
9f68db38 2966 s++;
3280af22 2967 if (s < PL_bufend) {
a02a5408 2968 Newxz(newargv,PL_origargc+3,char*);
9f68db38 2969 newargv[1] = s;
3280af22 2970 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2971 s++;
2972 *s = '\0';
3280af22 2973 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2974 }
2975 else
3280af22 2976 newargv = PL_origargv;
774d564b 2977 newargv[0] = ipath;
b35112e7 2978 PERL_FPU_PRE_EXEC
b4748376 2979 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2980 PERL_FPU_POST_EXEC
cea2e8a9 2981 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2982 }
bf4acbe4 2983#endif
748a9306 2984 if (d) {
f54cb97a
AL
2985 const U32 oldpdb = PL_perldb;
2986 const bool oldn = PL_minus_n;
2987 const bool oldp = PL_minus_p;
748a9306
LW
2988
2989 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2990 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2991
2992 if (*d++ == '-') {
f54cb97a 2993 const bool switches_done = PL_doswitches;
8cc95fdb 2994 do {
3ffe3ee4 2995 if (*d == 'M' || *d == 'm' || *d == 'C') {
9d4ba2ae 2996 const char * const m = d;
8cc95fdb 2997 while (*d && !isSPACE(*d)) d++;
cea2e8a9 2998 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb
PP
2999 (int)(d - m), m);
3000 }
3001 d = moreswitches(d);
3002 } while (d);
f0b2cf55
YST
3003 if (PL_doswitches && !switches_done) {
3004 int argc = PL_origargc;
3005 char **argv = PL_origargv;
3006 do {
3007 argc--,argv++;
3008 } while (argc && argv[0][0] == '-' && argv[0][1]);
3009 init_argv_symbols(argc,argv);
3010 }
155aba94
GS
3011 if ((PERLDB_LINE && !oldpdb) ||
3012 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b
PP
3013 /* if we have already added "LINE: while (<>) {",
3014 we must not do it again */
748a9306 3015 {
c69006e4 3016 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
3017 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3018 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 3019 PL_last_lop = PL_last_uni = Nullch;
3280af22 3020 PL_preambled = FALSE;
84902520 3021 if (PERLDB_LINE)
3280af22 3022 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
3023 goto retry;
3024 }
a0d0e21e 3025 }
79072805 3026 }
9f68db38 3027 }
79072805 3028 }
3280af22
NIS
3029 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3030 PL_bufptr = s;
3031 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3032 return yylex();
ae986130 3033 }
378cc40b 3034 goto retry;
4fdae800 3035 case '\r':
6a27c188 3036#ifdef PERL_STRICT_CR
cea2e8a9 3037 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 3038 Perl_croak(aTHX_
cc507455 3039 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 3040#endif
4fdae800 3041 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
3042#ifdef MACOS_TRADITIONAL
3043 case '\312':
3044#endif
378cc40b
LW
3045 s++;
3046 goto retry;
378cc40b 3047 case '#':
e929a76b 3048 case '\n':
3280af22 3049 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
3050 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3051 /* handle eval qq[#line 1 "foo"\n ...] */
3052 CopLINE_dec(PL_curcop);
3053 incline(s);
3054 }
3280af22 3055 d = PL_bufend;
a687059c 3056 while (s < d && *s != '\n')
378cc40b 3057 s++;
0f85fab0 3058 if (s < d)
378cc40b 3059 s++;
78c267c1 3060 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 3061 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 3062 incline(s);
3280af22
NIS
3063 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3064 PL_bufptr = s;
3065 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3066 return yylex();
a687059c 3067 }
378cc40b 3068 }
a687059c 3069 else {
378cc40b 3070 *s = '\0';
3280af22 3071 PL_bufend = s;
a687059c 3072 }
378cc40b
LW
3073 goto retry;
3074 case '-':
79072805 3075 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 3076 I32 ftst = 0;
90771dc0 3077 char tmp;
e5edeb50 3078
378cc40b 3079 s++;
3280af22 3080 PL_bufptr = s;
748a9306
LW
3081 tmp = *s++;
3082
bf4acbe4 3083 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
3084 s++;
3085
3086 if (strnEQ(s,"=>",2)) {
3280af22 3087 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
b6007c36
DM
3088 DEBUG_T( { S_printbuf(aTHX_
3089 "### Saw unary minus before =>, forcing word %s\n", s);
5f80b19c 3090 } );
748a9306
LW
3091 OPERATOR('-'); /* unary minus */
3092 }
3280af22 3093 PL_last_uni = PL_oldbufptr;
748a9306 3094 switch (tmp) {
e5edeb50
JH
3095 case 'r': ftst = OP_FTEREAD; break;
3096 case 'w': ftst = OP_FTEWRITE; break;
3097 case 'x': ftst = OP_FTEEXEC; break;
3098 case 'o': ftst = OP_FTEOWNED; break;
3099 case 'R': ftst = OP_FTRREAD; break;
3100 case 'W': ftst = OP_FTRWRITE; break;
3101 case 'X': ftst = OP_FTREXEC; break;
3102 case 'O': ftst = OP_FTROWNED; break;
3103 case 'e': ftst = OP_FTIS; break;
3104 case 'z': ftst = OP_FTZERO; break;
3105 case 's': ftst = OP_FTSIZE; break;
3106 case 'f': ftst = OP_FTFILE; break;
3107 case 'd': ftst = OP_FTDIR; break;
3108 case 'l': ftst = OP_FTLINK; break;
3109 case 'p': ftst = OP_FTPIPE; break;
3110 case 'S': ftst = OP_FTSOCK; break;
3111 case 'u': ftst = OP_FTSUID; break;
3112 case 'g': ftst = OP_FTSGID; break;
3113 case 'k': ftst = OP_FTSVTX; break;
3114 case 'b': ftst = OP_FTBLK; break;
3115 case 'c': ftst = OP_FTCHR; break;
3116 case 't': ftst = OP_FTTTY; break;
3117 case 'T': ftst = OP_FTTEXT; break;
3118 case 'B': ftst = OP_FTBINARY; break;
3119 case 'M': case 'A': case 'C':
f776e3cd 3120 gv_fetchpv("\024",GV_ADD, SVt_PV);
e5edeb50
JH
3121 switch (tmp) {
3122 case 'M': ftst = OP_FTMTIME; break;
3123 case 'A': ftst = OP_FTATIME; break;
3124 case 'C': ftst = OP_FTCTIME; break;
3125 default: break;
3126 }
3127 break;
378cc40b 3128 default:
378cc40b
LW
3129 break;
3130 }
e5edeb50 3131 if (ftst) {
eb160463 3132 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3133 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 3134 "### Saw file test %c\n", (int)tmp);
5f80b19c 3135 } );
e5edeb50
JH
3136 FTST(ftst);
3137 }
3138 else {
3139 /* Assume it was a minus followed by a one-letter named
3140 * subroutine call (or a -bareword), then. */
95c31fe3 3141 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3142 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3143 (int) tmp);
5f80b19c 3144 } );
3cf7b4c4 3145 s = --PL_bufptr;
e5edeb50 3146 }
378cc40b 3147 }
90771dc0
NC
3148 {
3149 const char tmp = *s++;
3150 if (*s == tmp) {
3151 s++;
3152 if (PL_expect == XOPERATOR)
3153 TERM(POSTDEC);
3154 else
3155 OPERATOR(PREDEC);
3156 }
3157 else if (*s == '>') {
3158 s++;
3159 s = skipspace(s);
3160 if (isIDFIRST_lazy_if(s,UTF)) {
3161 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3162 TOKEN(ARROW);
3163 }
3164 else if (*s == '$')
3165 OPERATOR(ARROW);
3166 else
3167 TERM(ARROW);
3168 }
3280af22 3169 if (PL_expect == XOPERATOR)
90771dc0
NC
3170 Aop(OP_SUBTRACT);
3171 else {
3172 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3173 check_uni();
3174 OPERATOR('-'); /* unary minus */
79072805 3175 }
2f3197b3 3176 }
79072805 3177
378cc40b 3178 case '+':
90771dc0
NC
3179 {
3180 const char tmp = *s++;
3181 if (*s == tmp) {
3182 s++;
3183 if (PL_expect == XOPERATOR)
3184 TERM(POSTINC);
3185 else
3186 OPERATOR(PREINC);
3187 }
3280af22 3188 if (PL_expect == XOPERATOR)
90771dc0
NC
3189 Aop(OP_ADD);
3190 else {
3191 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3192 check_uni();
3193 OPERATOR('+');
3194 }
2f3197b3 3195 }
a687059c 3196
378cc40b 3197 case '*':
3280af22
NIS
3198 if (PL_expect != XOPERATOR) {
3199 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3200 PL_expect = XOPERATOR;
3201 force_ident(PL_tokenbuf, '*');
3202 if (!*PL_tokenbuf)
a0d0e21e 3203 PREREF('*');
79072805 3204 TERM('*');
a687059c 3205 }
79072805
LW
3206 s++;
3207 if (*s == '*') {
a687059c 3208 s++;
79072805 3209 PWop(OP_POW);
a687059c 3210 }
79072805
LW
3211 Mop(OP_MULTIPLY);
3212
378cc40b 3213 case '%':
3280af22 3214 if (PL_expect == XOPERATOR) {
bbce6d69
PP
3215 ++s;
3216 Mop(OP_MODULO);
a687059c 3217 }
3280af22
NIS
3218 PL_tokenbuf[0] = '%';
3219 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3220 if (!PL_tokenbuf[1]) {
bbce6d69 3221 PREREF('%');
a687059c 3222 }
3280af22 3223 PL_pending_ident = '%';
bbce6d69 3224 TERM('%');
a687059c 3225
378cc40b 3226 case '^':
79072805 3227 s++;
a0d0e21e 3228 BOop(OP_BIT_XOR);
79072805 3229 case '[':
3280af22 3230 PL_lex_brackets++;
79072805 3231 /* FALL THROUGH */
378cc40b 3232 case '~':
0d863452
RH
3233 if (s[1] == '~'
3234 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
ef89dcc3 3235 && FEATURE_IS_ENABLED("~~"))
0d863452
RH
3236 {
3237 s += 2;
3238 Eop(OP_SMARTMATCH);
3239 }
378cc40b 3240 case ',':
90771dc0
NC
3241 {
3242 const char tmp = *s++;
3243 OPERATOR(tmp);
3244 }
a0d0e21e
LW
3245 case ':':
3246 if (s[1] == ':') {
3247 len = 0;
0bfa2a8a 3248 goto just_a_word_zero_gv;
a0d0e21e
LW
3249 }
3250 s++;
09bef843
SB
3251 switch (PL_expect) {
3252 OP *attrs;
3253 case XOPERATOR:
3254 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3255 break;
3256 PL_bufptr = s; /* update in case we back off */
3257 goto grabattrs;
3258 case XATTRBLOCK:
3259 PL_expect = XBLOCK;
3260 goto grabattrs;
3261 case XATTRTERM:
3262 PL_expect = XTERMBLOCK;
3263 grabattrs:
3264 s = skipspace(s);
3265 attrs = Nullop;
7e2040f0 3266 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 3267 I32 tmp;
09bef843 3268 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3269 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3270 if (tmp < 0) tmp = -tmp;
3271 switch (tmp) {
3272 case KEY_or:
3273 case KEY_and:
c963b151 3274 case KEY_err:
f9829d6b
GS
3275 case KEY_for:
3276 case KEY_unless:
3277 case KEY_if:
3278 case KEY_while:
3279 case KEY_until:
3280 goto got_attrs;
3281 default:
3282 break;
3283 }
3284 }
09bef843
SB
3285 if (*d == '(') {
3286 d = scan_str(d,TRUE,TRUE);
3287 if (!d) {
09bef843
SB
3288 /* MUST advance bufptr here to avoid bogus
3289 "at end of line" context messages from yyerror().
3290 */
3291 PL_bufptr = s + len;
3292 yyerror("Unterminated attribute parameter in attribute list");
3293 if (attrs)
3294 op_free(attrs);
bbf60fe6 3295 return REPORT(0); /* EOF indicator */
09bef843
SB
3296 }
3297 }
3298 if (PL_lex_stuff) {
3299 SV *sv = newSVpvn(s, len);
3300 sv_catsv(sv, PL_lex_stuff);
3301 attrs = append_elem(OP_LIST, attrs,
3302 newSVOP(OP_CONST, 0, sv));
3303 SvREFCNT_dec(PL_lex_stuff);
3304 PL_lex_stuff = Nullsv;
3305 }
3306 else {
371fce9b
DM
3307 if (len == 6 && strnEQ(s, "unique", len)) {
3308 if (PL_in_my == KEY_our)
3309#ifdef USE_ITHREADS
3310 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3311#else
3312 ; /* skip to avoid loading attributes.pm */
3313#endif
bfed75c6 3314 else
371fce9b
DM
3315 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3316 }
3317
d3cea301
SB
3318 /* NOTE: any CV attrs applied here need to be part of
3319 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3320 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3321 CvLVALUE_on(PL_compcv);
3322 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3323 CvLOCKED_on(PL_compcv);
3324 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3325 CvMETHOD_on(PL_compcv);
06492da6
SF
3326 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3327 CvASSERTION_on(PL_compcv);
78f9721b
SM
3328 /* After we've set the flags, it could be argued that
3329 we don't need to do the attributes.pm-based setting
3330 process, and shouldn't bother appending recognized
d3cea301
SB
3331 flags. To experiment with that, uncomment the
3332 following "else". (Note that's already been
3333 uncommented. That keeps the above-applied built-in
3334 attributes from being intercepted (and possibly
3335 rejected) by a package's attribute routines, but is
3336 justified by the performance win for the common case
3337 of applying only built-in attributes.) */
0256094b 3338 else
78f9721b
SM
3339 attrs = append_elem(OP_LIST, attrs,
3340 newSVOP(OP_CONST, 0,
3341 newSVpvn(s, len)));
09bef843
SB
3342 }
3343 s = skipspace(d);
0120eecf 3344 if (*s == ':' && s[1] != ':')
09bef843 3345 s = skipspace(s+1);
0120eecf
GS
3346 else if (s == d)
3347 break; /* require real whitespace or :'s */
09bef843 3348 }
90771dc0
NC
3349 {
3350 const char tmp
3351 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3352 if (*s != ';' && *s != '}' && *s != tmp
3353 && (tmp != '=' || *s != ')')) {
3354 const char q = ((*s == '\'') ? '"' : '\'');
3355 /* If here for an expression, and parsed no attrs, back
3356 off. */
3357 if (tmp == '=' && !attrs) {
3358 s = PL_bufptr;
3359 break;
3360 }
3361 /* MUST advance bufptr here to avoid bogus "at end of line"
3362 context messages from yyerror().
3363 */
3364 PL_bufptr = s;
3365 yyerror( *s
3366 ? Perl_form(aTHX_ "Invalid separator character "
3367 "%c%c%c in attribute list", q, *s, q)
3368 : "Unterminated attribute list" );
3369 if (attrs)
3370 op_free(attrs);
3371 OPERATOR(':');
09bef843 3372 }
09bef843 3373 }
f9829d6b 3374 got_attrs:
09bef843
SB
3375 if (attrs) {
3376 PL_nextval[PL_nexttoke].opval = attrs;
3377 force_next(THING);
3378 }
3379 TOKEN(COLONATTR);
3380 }
a0d0e21e 3381 OPERATOR(':');
8990e307
LW
3382 case '(':
3383 s++;
3280af22
NIS
3384 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3385 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3386 else
3280af22 3387 PL_expect = XTERM;
4a202259 3388 s = skipspace(s);
a0d0e21e 3389 TOKEN('(');
378cc40b 3390 case ';':
f4dd75d9 3391 CLINE;
90771dc0
NC
3392 {
3393 const char tmp = *s++;
3394 OPERATOR(tmp);
3395 }
378cc40b 3396 case ')':
90771dc0
NC
3397 {
3398 const char tmp = *s++;
3399 s = skipspace(s);
3400 if (*s == '{')
3401 PREBLOCK(tmp);
3402 TERM(tmp);
3403 }
79072805
LW
3404 case ']':
3405 s++;
3280af22 3406 if (PL_lex_brackets <= 0)
d98d5fff 3407 yyerror("Unmatched right square bracket");
463ee0b2 3408 else
3280af22
NIS
3409 --PL_lex_brackets;
3410 if (PL_lex_state == LEX_INTERPNORMAL) {
3411 if (PL_lex_brackets == 0) {
a0d0e21e 3412 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3413 PL_lex_state = LEX_INTERPEND;
79072805
LW
3414 }
3415 }
4633a7c4 3416 TERM(']');
79072805
LW
3417 case '{':
3418 leftbracket:
79072805 3419 s++;
3280af22 3420 if (PL_lex_brackets > 100) {
8edd5f42 3421 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3422 }
3280af22 3423 switch (PL_expect) {
a0d0e21e 3424 case XTERM:
3280af22 3425 if (PL_lex_formbrack) {
a0d0e21e
LW
3426 s--;
3427 PRETERMBLOCK(DO);
3428 }
3280af22
NIS
3429 if (PL_oldoldbufptr == PL_last_lop)
3430 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3431 else
3280af22 3432 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3433 OPERATOR(HASHBRACK);
a0d0e21e 3434 case XOPERATOR:
bf4acbe4 3435 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3436 s++;
44a8e56a 3437 d = s;
3280af22
NIS
3438 PL_tokenbuf[0] = '\0';
3439 if (d < PL_bufend && *d == '-') {
3440 PL_tokenbuf[0] = '-';
44a8e56a 3441 d++;
bf4acbe4 3442 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a
PP
3443 d++;
3444 }
7e2040f0 3445 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3446 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3447 FALSE, &len);
bf4acbe4 3448 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3449 d++;
3450 if (*d == '}') {
f54cb97a 3451 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a
PP
3452 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3453 if (minus)
3454 force_next('-');
748a9306
LW
3455 }
3456 }
3457 /* FALL THROUGH */
09bef843 3458 case XATTRBLOCK:
748a9306 3459 case XBLOCK:
3280af22
NIS
3460 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3461 PL_expect = XSTATE;
a0d0e21e 3462 break;
09bef843 3463 case XATTRTERM:
a0d0e21e 3464 case XTERMBLOCK:
3280af22
NIS
3465 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3466 PL_expect = XSTATE;
a0d0e21e
LW
3467 break;
3468 default: {
f54cb97a 3469 const char *t;
3280af22
NIS
3470 if (PL_oldoldbufptr == PL_last_lop)
3471 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3472 else
3280af22 3473 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3474 s = skipspace(s);
8452ff4b
SB
3475 if (*s == '}') {
3476 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3477 PL_expect = XTERM;
3478 /* This hack is to get the ${} in the message. */
3479 PL_bufptr = s+1;
3480 yyerror("syntax error");
3481 break;
3482 }
a0d0e21e 3483 OPERATOR(HASHBRACK);
8452ff4b 3484 }
b8a4b1be
GS
3485 /* This hack serves to disambiguate a pair of curlies
3486 * as being a block or an anon hash. Normally, expectation
3487 * determines that, but in cases where we're not in a
3488 * position to expect anything in particular (like inside
3489 * eval"") we have to resolve the ambiguity. This code
3490 * covers the case where the first term in the curlies is a
3491 * quoted string. Most other cases need to be explicitly
a0288114 3492 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
3493 * curly in order to force resolution as an anon hash.
3494 *
3495 * XXX should probably propagate the outer expectation
3496 * into eval"" to rely less on this hack, but that could
3497 * potentially break current behavior of eval"".
3498 * GSAR 97-07-21
3499 */
3500 t = s;
3501 if (*s == '\'' || *s == '"' || *s == '`') {
3502 /* common case: get past first string, handling escapes */
3280af22 3503 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3504 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3505 t++;
3506 t++;
a0d0e21e 3507 }
b8a4b1be 3508 else if (*s == 'q') {
3280af22 3509 if (++t < PL_bufend
b8a4b1be 3510 && (!isALNUM(*t)
3280af22 3511 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3512 && !isALNUM(*t))))
3513 {
abc667d1 3514 /* skip q//-like construct */
f54cb97a 3515 const char *tmps;
b8a4b1be
GS
3516 char open, close, term;
3517 I32 brackets = 1;
3518
3280af22 3519 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 3520 t++;
abc667d1
DM
3521 /* check for q => */
3522 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
3523 OPERATOR(HASHBRACK);
3524 }
b8a4b1be
GS
3525 term = *t;