This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Avoid a warning from the Irix C compiler.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
9059aa12
LW
38#define XFAKEBRACK 128
39#define XENUMMASK 127
40
39e02b42
JH
41#ifdef USE_UTF8_SCRIPTS
42# define UTF (!IN_BYTES)
2b9d42f0 43#else
746b446a 44# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 45#endif
a0ed51b3 46
61f0cdd9 47/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
48 * 1999-02-27 mjd-perl-patch@plover.com */
49#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
50
bf4acbe4
GS
51/* On MacOS, respect nonbreaking spaces */
52#ifdef MACOS_TRADITIONAL
53#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
54#else
55#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
56#endif
57
ffb4593c
NT
58/* LEX_* are values for PL_lex_state, the state of the lexer.
59 * They are arranged oddly so that the guard on the switch statement
79072805
LW
60 * can get by with a single comparison (if the compiler is smart enough).
61 */
62
fb73857a 63/* #define LEX_NOTPARSING 11 is done in perl.h. */
64
b6007c36
DM
65#define LEX_NORMAL 10 /* normal code (ie not within "...") */
66#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
67#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
68#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
69#define LEX_INTERPSTART 6 /* expecting the start of a $var */
70
71 /* at end of code, eg "$x" followed by: */
72#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
73#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
74
75#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
76 string or after \E, $foo, etc */
77#define LEX_INTERPCONST 2 /* NOT USED */
78#define LEX_FORMLINE 1 /* expecting a format line */
79#define LEX_KNOWNEXT 0 /* next token known; just return it */
80
79072805 81
bbf60fe6 82#ifdef DEBUGGING
27da23d5 83static const char* const lex_state_names[] = {
bbf60fe6
DM
84 "KNOWNEXT",
85 "FORMLINE",
86 "INTERPCONST",
87 "INTERPCONCAT",
88 "INTERPENDMAYBE",
89 "INTERPEND",
90 "INTERPSTART",
91 "INTERPPUSH",
92 "INTERPCASEMOD",
93 "INTERPNORMAL",
94 "NORMAL"
95};
96#endif
97
79072805
LW
98#ifdef ff_next
99#undef ff_next
d48672a2
LW
100#endif
101
79072805 102#include "keywords.h"
fe14fcc3 103
ffb4593c
NT
104/* CLINE is a macro that ensures PL_copline has a sane value */
105
ae986130
LW
106#ifdef CLINE
107#undef CLINE
108#endif
57843af0 109#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 110
ffb4593c
NT
111/*
112 * Convenience functions to return different tokens and prime the
9cbb5ea2 113 * lexer for the next token. They all take an argument.
ffb4593c
NT
114 *
115 * TOKEN : generic token (used for '(', DOLSHARP, etc)
116 * OPERATOR : generic operator
117 * AOPERATOR : assignment operator
118 * PREBLOCK : beginning the block after an if, while, foreach, ...
119 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
120 * PREREF : *EXPR where EXPR is not a simple identifier
121 * TERM : expression term
122 * LOOPX : loop exiting command (goto, last, dump, etc)
123 * FTST : file test operator
124 * FUN0 : zero-argument function
2d2e263d 125 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
126 * BOop : bitwise or or xor
127 * BAop : bitwise and
128 * SHop : shift operator
129 * PWop : power operator
9cbb5ea2 130 * PMop : pattern-matching operator
ffb4593c
NT
131 * Aop : addition-level operator
132 * Mop : multiplication-level operator
133 * Eop : equality-testing operator
e5edeb50 134 * Rop : relational operator <= != gt
ffb4593c
NT
135 *
136 * Also see LOP and lop() below.
137 */
138
998054bd 139#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 140# define REPORT(retval) tokereport((I32)retval)
998054bd 141#else
bbf60fe6 142# define REPORT(retval) (retval)
998054bd
SC
143#endif
144
bbf60fe6
DM
145#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
146#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
147#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
148#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
149#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
150#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
151#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
152#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
153#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
154#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
155#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
156#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
157#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
158#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
159#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
160#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
161#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
162#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
163#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
164#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 165
a687059c
LW
166/* This bit of chicanery makes a unary function followed by
167 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
168 * The UNIDOR macro is for unary functions that can be followed by the //
169 * operator (such as C<shift // 0>).
a687059c 170 */
376fcdbf
AL
171#define UNI2(f,x) { \
172 yylval.ival = f; \
173 PL_expect = x; \
174 PL_bufptr = s; \
175 PL_last_uni = PL_oldbufptr; \
176 PL_last_lop_op = f; \
177 if (*s == '(') \
178 return REPORT( (int)FUNC1 ); \
179 s = skipspace(s); \
180 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
181 }
6f33ba73
RGS
182#define UNI(f) UNI2(f,XTERM)
183#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 184
376fcdbf
AL
185#define UNIBRACK(f) { \
186 yylval.ival = f; \
187 PL_bufptr = s; \
188 PL_last_uni = PL_oldbufptr; \
189 if (*s == '(') \
190 return REPORT( (int)FUNC1 ); \
191 s = skipspace(s); \
192 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
193 }
79072805 194
9f68db38 195/* grandfather return to old style */
3280af22 196#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 197
8fa7f367
JH
198#ifdef DEBUGGING
199
bbf60fe6
DM
200/* how to interpret the yylval associated with the token */
201enum token_type {
202 TOKENTYPE_NONE,
203 TOKENTYPE_IVAL,
204 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
205 TOKENTYPE_PVAL,
206 TOKENTYPE_OPVAL,
207 TOKENTYPE_GVVAL
208};
209
27da23d5
JH
210static struct debug_tokens { const int token, type; const char *name; }
211 const debug_tokens[] =
9041c2e3 212{
bbf60fe6
DM
213 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
214 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
215 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
216 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
217 { ARROW, TOKENTYPE_NONE, "ARROW" },
218 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
219 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
220 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
221 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
222 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 223 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
224 { DO, TOKENTYPE_NONE, "DO" },
225 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
226 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
227 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
228 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
229 { ELSE, TOKENTYPE_NONE, "ELSE" },
230 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
231 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
232 { FOR, TOKENTYPE_IVAL, "FOR" },
233 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
234 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
235 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
236 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
237 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
238 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 239 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
240 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
241 { IF, TOKENTYPE_IVAL, "IF" },
242 { LABEL, TOKENTYPE_PVAL, "LABEL" },
243 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
244 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
245 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
246 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
247 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
248 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
249 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
250 { MY, TOKENTYPE_IVAL, "MY" },
251 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
252 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
253 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
254 { OROP, TOKENTYPE_IVAL, "OROP" },
255 { OROR, TOKENTYPE_NONE, "OROR" },
256 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
257 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
258 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
259 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
260 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
261 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
262 { PREINC, TOKENTYPE_NONE, "PREINC" },
263 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
264 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
265 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
266 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
267 { SUB, TOKENTYPE_NONE, "SUB" },
268 { THING, TOKENTYPE_OPVAL, "THING" },
269 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
270 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
271 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
272 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
273 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
274 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 275 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
276 { WHILE, TOKENTYPE_IVAL, "WHILE" },
277 { WORD, TOKENTYPE_OPVAL, "WORD" },
278 { 0, TOKENTYPE_NONE, 0 }
279};
280
281/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 282
bbf60fe6 283STATIC int
f5bd084c 284S_tokereport(pTHX_ I32 rv)
bbf60fe6 285{
97aff369 286 dVAR;
bbf60fe6 287 if (DEBUG_T_TEST) {
bfed75c6 288 const char *name = Nullch;
bbf60fe6 289 enum token_type type = TOKENTYPE_NONE;
f54cb97a 290 const struct debug_tokens *p;
396482e1 291 SV* const report = newSVpvs("<== ");
bbf60fe6 292
f54cb97a 293 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
294 if (p->token == (int)rv) {
295 name = p->name;
296 type = p->type;
297 break;
298 }
299 }
300 if (name)
54667de8 301 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
302 else if ((char)rv > ' ' && (char)rv < '~')
303 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
304 else if (!rv)
396482e1 305 sv_catpvs(report, "EOF");
bbf60fe6
DM
306 else
307 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
308 switch (type) {
309 case TOKENTYPE_NONE:
310 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
311 break;
312 case TOKENTYPE_IVAL:
e4584336 313 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
314 break;
315 case TOKENTYPE_OPNUM:
316 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
317 PL_op_name[yylval.ival]);
318 break;
319 case TOKENTYPE_PVAL:
320 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
321 break;
322 case TOKENTYPE_OPVAL:
b6007c36 323 if (yylval.opval) {
401441c0 324 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 325 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
326 if (yylval.opval->op_type == OP_CONST) {
327 Perl_sv_catpvf(aTHX_ report, " %s",
328 SvPEEK(cSVOPx_sv(yylval.opval)));
329 }
330
331 }
401441c0 332 else
396482e1 333 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
334 break;
335 }
b6007c36 336 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
337 };
338 return (int)rv;
998054bd
SC
339}
340
b6007c36
DM
341
342/* print the buffer with suitable escapes */
343
344STATIC void
345S_printbuf(pTHX_ const char* fmt, const char* s)
346{
396482e1 347 SV* const tmp = newSVpvs("");
b6007c36
DM
348 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
349 SvREFCNT_dec(tmp);
350}
351
8fa7f367
JH
352#endif
353
ffb4593c
NT
354/*
355 * S_ao
356 *
c963b151
BD
357 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
358 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
359 */
360
76e3520e 361STATIC int
cea2e8a9 362S_ao(pTHX_ int toketype)
a0d0e21e 363{
97aff369 364 dVAR;
3280af22
NIS
365 if (*PL_bufptr == '=') {
366 PL_bufptr++;
a0d0e21e
LW
367 if (toketype == ANDAND)
368 yylval.ival = OP_ANDASSIGN;
369 else if (toketype == OROR)
370 yylval.ival = OP_ORASSIGN;
c963b151
BD
371 else if (toketype == DORDOR)
372 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
373 toketype = ASSIGNOP;
374 }
375 return toketype;
376}
377
ffb4593c
NT
378/*
379 * S_no_op
380 * When Perl expects an operator and finds something else, no_op
381 * prints the warning. It always prints "<something> found where
382 * operator expected. It prints "Missing semicolon on previous line?"
383 * if the surprise occurs at the start of the line. "do you need to
384 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
385 * where the compiler doesn't know if foo is a method call or a function.
386 * It prints "Missing operator before end of line" if there's nothing
387 * after the missing operator, or "... before <...>" if there is something
388 * after the missing operator.
389 */
390
76e3520e 391STATIC void
bfed75c6 392S_no_op(pTHX_ const char *what, char *s)
463ee0b2 393{
97aff369 394 dVAR;
9d4ba2ae
AL
395 char * const oldbp = PL_bufptr;
396 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 397
1189a94a
GS
398 if (!s)
399 s = oldbp;
07c798fb 400 else
1189a94a 401 PL_bufptr = s;
cea2e8a9 402 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
403 if (ckWARN_d(WARN_SYNTAX)) {
404 if (is_first)
405 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
406 "\t(Missing semicolon on previous line?)\n");
407 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 408 const char *t;
56da5a46
RGS
409 for (t = PL_oldoldbufptr; *t && (isALNUM_lazy_if(t,UTF) || *t == ':'); t++) ;
410 if (t < PL_bufptr && isSPACE(*t))
411 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
412 "\t(Do you need to predeclare %.*s?)\n",
551405c4 413 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
414 }
415 else {
416 assert(s >= oldbp);
417 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 418 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 419 }
07c798fb 420 }
3280af22 421 PL_bufptr = oldbp;
8990e307
LW
422}
423
ffb4593c
NT
424/*
425 * S_missingterm
426 * Complain about missing quote/regexp/heredoc terminator.
427 * If it's called with (char *)NULL then it cauterizes the line buffer.
428 * If we're in a delimited string and the delimiter is a control
429 * character, it's reformatted into a two-char sequence like ^C.
430 * This is fatal.
431 */
432
76e3520e 433STATIC void
cea2e8a9 434S_missingterm(pTHX_ char *s)
8990e307 435{
97aff369 436 dVAR;
8990e307
LW
437 char tmpbuf[3];
438 char q;
439 if (s) {
9d4ba2ae 440 char * const nl = strrchr(s,'\n');
d2719217 441 if (nl)
8990e307
LW
442 *nl = '\0';
443 }
9d116dd7
JH
444 else if (
445#ifdef EBCDIC
446 iscntrl(PL_multi_close)
447#else
448 PL_multi_close < 32 || PL_multi_close == 127
449#endif
450 ) {
8990e307 451 *tmpbuf = '^';
585ec06d 452 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
453 tmpbuf[2] = '\0';
454 s = tmpbuf;
455 }
456 else {
eb160463 457 *tmpbuf = (char)PL_multi_close;
8990e307
LW
458 tmpbuf[1] = '\0';
459 s = tmpbuf;
460 }
461 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 462 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 463}
79072805 464
ef89dcc3 465#define FEATURE_IS_ENABLED(name) \
0d863452 466 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 467 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
468/*
469 * S_feature_is_enabled
470 * Check whether the named feature is enabled.
471 */
472STATIC bool
473S_feature_is_enabled(pTHX_ char *name, STRLEN namelen)
474{
97aff369 475 dVAR;
0d863452 476 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140
RH
477 char he_name[32] = "feature_";
478 (void) strncpy(&he_name[8], name, 24);
479
480 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
481}
482
ffb4593c
NT
483/*
484 * Perl_deprecate
ffb4593c
NT
485 */
486
79072805 487void
bfed75c6 488Perl_deprecate(pTHX_ const char *s)
a0d0e21e 489{
599cee73 490 if (ckWARN(WARN_DEPRECATED))
9014280d 491 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
492}
493
12bcd1a6 494void
bfed75c6 495Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
496{
497 /* This function should NOT be called for any new deprecated warnings */
498 /* Use Perl_deprecate instead */
499 /* */
500 /* It is here to maintain backward compatibility with the pre-5.8 */
501 /* warnings category hierarchy. The "deprecated" category used to */
502 /* live under the "syntax" category. It is now a top-level category */
503 /* in its own right. */
504
505 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 506 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
507 "Use of %s is deprecated", s);
508}
509
ffb4593c 510/*
9cbb5ea2
GS
511 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
512 * utf16-to-utf8-reversed.
ffb4593c
NT
513 */
514
c39cd008
GS
515#ifdef PERL_CR_FILTER
516static void
517strip_return(SV *sv)
518{
95a20fc0 519 register const char *s = SvPVX_const(sv);
9d4ba2ae 520 register const char * const e = s + SvCUR(sv);
c39cd008
GS
521 /* outer loop optimized to do nothing if there are no CR-LFs */
522 while (s < e) {
523 if (*s++ == '\r' && *s == '\n') {
524 /* hit a CR-LF, need to copy the rest */
525 register char *d = s - 1;
526 *d++ = *s++;
527 while (s < e) {
528 if (*s == '\r' && s[1] == '\n')
529 s++;
530 *d++ = *s++;
531 }
532 SvCUR(sv) -= s - d;
533 return;
534 }
535 }
536}
a868473f 537
76e3520e 538STATIC I32
c39cd008 539S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 540{
f54cb97a 541 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
542 if (count > 0 && !maxlen)
543 strip_return(sv);
544 return count;
a868473f
NIS
545}
546#endif
547
ffb4593c
NT
548/*
549 * Perl_lex_start
9cbb5ea2
GS
550 * Initialize variables. Uses the Perl save_stack to save its state (for
551 * recursive calls to the parser).
ffb4593c
NT
552 */
553
a0d0e21e 554void
864dbfa3 555Perl_lex_start(pTHX_ SV *line)
79072805 556{
97aff369 557 dVAR;
cfd0369c 558 const char *s;
8990e307
LW
559 STRLEN len;
560
3280af22
NIS
561 SAVEI32(PL_lex_dojoin);
562 SAVEI32(PL_lex_brackets);
3280af22
NIS
563 SAVEI32(PL_lex_casemods);
564 SAVEI32(PL_lex_starts);
565 SAVEI32(PL_lex_state);
7766f137 566 SAVEVPTR(PL_lex_inpat);
3280af22 567 SAVEI32(PL_lex_inwhat);
18b09519
GS
568 if (PL_lex_state == LEX_KNOWNEXT) {
569 I32 toke = PL_nexttoke;
570 while (--toke >= 0) {
571 SAVEI32(PL_nexttype[toke]);
572 SAVEVPTR(PL_nextval[toke]);
573 }
574 SAVEI32(PL_nexttoke);
18b09519 575 }
57843af0 576 SAVECOPLINE(PL_curcop);
3280af22
NIS
577 SAVEPPTR(PL_bufptr);
578 SAVEPPTR(PL_bufend);
579 SAVEPPTR(PL_oldbufptr);
580 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
581 SAVEPPTR(PL_last_lop);
582 SAVEPPTR(PL_last_uni);
3280af22
NIS
583 SAVEPPTR(PL_linestart);
584 SAVESPTR(PL_linestr);
8edd5f42
RGS
585 SAVEGENERICPV(PL_lex_brackstack);
586 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 587 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
588 SAVESPTR(PL_lex_stuff);
589 SAVEI32(PL_lex_defer);
09bef843 590 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 591 SAVESPTR(PL_lex_repl);
bebdddfc
GS
592 SAVEINT(PL_expect);
593 SAVEINT(PL_lex_expect);
3280af22
NIS
594
595 PL_lex_state = LEX_NORMAL;
596 PL_lex_defer = 0;
597 PL_expect = XSTATE;
598 PL_lex_brackets = 0;
a02a5408
JC
599 Newx(PL_lex_brackstack, 120, char);
600 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
601 PL_lex_casemods = 0;
602 *PL_lex_casestack = '\0';
603 PL_lex_dojoin = 0;
604 PL_lex_starts = 0;
605 PL_lex_stuff = Nullsv;
606 PL_lex_repl = Nullsv;
607 PL_lex_inpat = 0;
76be56bc 608 PL_nexttoke = 0;
3280af22 609 PL_lex_inwhat = 0;
09bef843 610 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
611 PL_linestr = line;
612 if (SvREADONLY(PL_linestr))
613 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 614 s = SvPV_const(PL_linestr, len);
6f27f9a7 615 if (!len || s[len-1] != ';') {
3280af22
NIS
616 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
617 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 618 sv_catpvs(PL_linestr, "\n;");
8990e307 619 }
3280af22
NIS
620 SvTEMP_off(PL_linestr);
621 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
622 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
207e3d1a 623 PL_last_lop = PL_last_uni = Nullch;
3280af22 624 PL_rsfp = 0;
79072805 625}
a687059c 626
ffb4593c
NT
627/*
628 * Perl_lex_end
9cbb5ea2
GS
629 * Finalizer for lexing operations. Must be called when the parser is
630 * done with the lexer.
ffb4593c
NT
631 */
632
463ee0b2 633void
864dbfa3 634Perl_lex_end(pTHX)
463ee0b2 635{
97aff369 636 dVAR;
3280af22 637 PL_doextract = FALSE;
463ee0b2
LW
638}
639
ffb4593c
NT
640/*
641 * S_incline
642 * This subroutine has nothing to do with tilting, whether at windmills
643 * or pinball tables. Its name is short for "increment line". It
57843af0 644 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 645 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
646 * # line 500 "foo.pm"
647 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
648 */
649
76e3520e 650STATIC void
cea2e8a9 651S_incline(pTHX_ char *s)
463ee0b2 652{
97aff369 653 dVAR;
463ee0b2
LW
654 char *t;
655 char *n;
73659bf1 656 char *e;
463ee0b2 657 char ch;
463ee0b2 658
57843af0 659 CopLINE_inc(PL_curcop);
463ee0b2
LW
660 if (*s++ != '#')
661 return;
bf4acbe4 662 while (SPACE_OR_TAB(*s)) s++;
73659bf1
GS
663 if (strnEQ(s, "line", 4))
664 s += 4;
665 else
666 return;
084592ab 667 if (SPACE_OR_TAB(*s))
73659bf1 668 s++;
4e553d73 669 else
73659bf1 670 return;
bf4acbe4 671 while (SPACE_OR_TAB(*s)) s++;
463ee0b2
LW
672 if (!isDIGIT(*s))
673 return;
674 n = s;
675 while (isDIGIT(*s))
676 s++;
bf4acbe4 677 while (SPACE_OR_TAB(*s))
463ee0b2 678 s++;
73659bf1 679 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 680 s++;
73659bf1
GS
681 e = t + 1;
682 }
463ee0b2 683 else {
463ee0b2 684 for (t = s; !isSPACE(*t); t++) ;
73659bf1 685 e = t;
463ee0b2 686 }
bf4acbe4 687 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
688 e++;
689 if (*e != '\n' && *e != '\0')
690 return; /* false alarm */
691
463ee0b2
LW
692 ch = *t;
693 *t = '\0';
f4dd75d9 694 if (t - s > 0) {
8a5ee598 695#ifndef USE_ITHREADS
c4420975 696 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
697 STRLEN tmplen = cf ? strlen(cf) : 0;
698 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
699 /* must copy *{"::_<(eval N)[oldfilename:L]"}
700 * to *{"::_<newfilename"} */
701 char smallbuf[256], smallbuf2[256];
702 char *tmpbuf, *tmpbuf2;
8a5ee598 703 GV **gvp, *gv2;
e66cf94c
RGS
704 STRLEN tmplen2 = strlen(s);
705 if (tmplen + 3 < sizeof smallbuf)
706 tmpbuf = smallbuf;
707 else
708 Newx(tmpbuf, tmplen + 3, char);
709 if (tmplen2 + 3 < sizeof smallbuf2)
710 tmpbuf2 = smallbuf2;
711 else
712 Newx(tmpbuf2, tmplen2 + 3, char);
713 tmpbuf[0] = tmpbuf2[0] = '_';
714 tmpbuf[1] = tmpbuf2[1] = '<';
715 memcpy(tmpbuf + 2, cf, ++tmplen);
716 memcpy(tmpbuf2 + 2, s, ++tmplen2);
717 ++tmplen; ++tmplen2;
8a5ee598
RGS
718 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
719 if (gvp) {
720 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
721 if (!isGV(gv2))
722 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
723 /* adjust ${"::_<newfilename"} to store the new file name */
724 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
725 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
726 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
727 }
e66cf94c
RGS
728 if (tmpbuf != smallbuf) Safefree(tmpbuf);
729 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
730 }
8a5ee598 731#endif
05ec9bb3 732 CopFILE_free(PL_curcop);
57843af0 733 CopFILE_set(PL_curcop, s);
f4dd75d9 734 }
463ee0b2 735 *t = ch;
57843af0 736 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
737}
738
ffb4593c
NT
739/*
740 * S_skipspace
741 * Called to gobble the appropriate amount and type of whitespace.
742 * Skips comments as well.
743 */
744
76e3520e 745STATIC char *
cea2e8a9 746S_skipspace(pTHX_ register char *s)
a687059c 747{
97aff369 748 dVAR;
3280af22 749 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 750 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2
LW
751 s++;
752 return s;
753 }
754 for (;;) {
fd049845 755 STRLEN prevlen;
09bef843 756 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 757 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
758 while (s < PL_bufend && isSPACE(*s)) {
759 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
760 incline(s);
761 }
ffb4593c
NT
762
763 /* comment */
3280af22
NIS
764 if (s < PL_bufend && *s == '#') {
765 while (s < PL_bufend && *s != '\n')
463ee0b2 766 s++;
60e6418e 767 if (s < PL_bufend) {
463ee0b2 768 s++;
60e6418e
GS
769 if (PL_in_eval && !PL_rsfp) {
770 incline(s);
771 continue;
772 }
773 }
463ee0b2 774 }
ffb4593c
NT
775
776 /* only continue to recharge the buffer if we're at the end
777 * of the buffer, we're not reading from a source filter, and
778 * we're in normal lexing mode
779 */
09bef843
SB
780 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
781 PL_lex_state == LEX_FORMLINE)
463ee0b2 782 return s;
ffb4593c
NT
783
784 /* try to recharge the buffer */
9cbb5ea2
GS
785 if ((s = filter_gets(PL_linestr, PL_rsfp,
786 (prevlen = SvCUR(PL_linestr)))) == Nullch)
787 {
788 /* end of file. Add on the -p or -n magic */
01a19ab0
NC
789 if (PL_minus_p) {
790 sv_setpv(PL_linestr,
791 ";}continue{print or die qq(-p destination: $!\\n);}");
3280af22 792 PL_minus_n = PL_minus_p = 0;
a0d0e21e 793 }
01a19ab0
NC
794 else if (PL_minus_n) {
795 sv_setpvn(PL_linestr, ";}", 2);
796 PL_minus_n = 0;
797 }
a0d0e21e 798 else
4147a61b 799 sv_setpvn(PL_linestr,";", 1);
ffb4593c
NT
800
801 /* reset variables for next time we lex */
9cbb5ea2
GS
802 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
803 = SvPVX(PL_linestr);
3280af22 804 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 805 PL_last_lop = PL_last_uni = Nullch;
ffb4593c
NT
806
807 /* Close the filehandle. Could be from -P preprocessor,
808 * STDIN, or a regular file. If we were reading code from
809 * STDIN (because the commandline held no -e or filename)
810 * then we don't close it, we reset it so the code can
811 * read from STDIN too.
812 */
813
3280af22
NIS
814 if (PL_preprocess && !PL_in_eval)
815 (void)PerlProc_pclose(PL_rsfp);
816 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
817 PerlIO_clearerr(PL_rsfp);
8990e307 818 else
3280af22
NIS
819 (void)PerlIO_close(PL_rsfp);
820 PL_rsfp = Nullfp;
463ee0b2
LW
821 return s;
822 }
ffb4593c
NT
823
824 /* not at end of file, so we only read another line */
09bef843
SB
825 /* make corresponding updates to old pointers, for yyerror() */
826 oldprevlen = PL_oldbufptr - PL_bufend;
827 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
828 if (PL_last_uni)
829 oldunilen = PL_last_uni - PL_bufend;
830 if (PL_last_lop)
831 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
832 PL_linestart = PL_bufptr = s + prevlen;
833 PL_bufend = s + SvCUR(PL_linestr);
834 s = PL_bufptr;
09bef843
SB
835 PL_oldbufptr = s + oldprevlen;
836 PL_oldoldbufptr = s + oldoldprevlen;
837 if (PL_last_uni)
838 PL_last_uni = s + oldunilen;
839 if (PL_last_lop)
840 PL_last_lop = s + oldloplen;
a0d0e21e 841 incline(s);
ffb4593c
NT
842
843 /* debugger active and we're not compiling the debugger code,
844 * so store the line into the debugger's array of lines
845 */
3280af22 846 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 847 SV * const sv = newSV(0);
8990e307
LW
848
849 sv_upgrade(sv, SVt_PVMG);
3280af22 850 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 851 (void)SvIOK_on(sv);
45977657 852 SvIV_set(sv, 0);
36c7798d 853 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 854 }
463ee0b2 855 }
a687059c 856}
378cc40b 857
ffb4593c
NT
858/*
859 * S_check_uni
860 * Check the unary operators to ensure there's no ambiguity in how they're
861 * used. An ambiguous piece of code would be:
862 * rand + 5
863 * This doesn't mean rand() + 5. Because rand() is a unary operator,
864 * the +5 is its argument.
865 */
866
76e3520e 867STATIC void
cea2e8a9 868S_check_uni(pTHX)
ba106d47 869{
97aff369 870 dVAR;
2f3197b3 871 char *s;
a0d0e21e 872 char *t;
2f3197b3 873
3280af22 874 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 875 return;
3280af22
NIS
876 while (isSPACE(*PL_last_uni))
877 PL_last_uni++;
7e2040f0 878 for (s = PL_last_uni; isALNUM_lazy_if(s,UTF) || *s == '-'; s++) ;
3280af22 879 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 880 return;
6136c704
AL
881
882 /* XXX Things like this are just so nasty. We shouldn't be modifying
883 source code, even if we realquick set it back. */
0453d815 884 if (ckWARN_d(WARN_AMBIGUOUS)){
9d4ba2ae 885 const char ch = *s;
0453d815 886 *s = '\0';
9014280d 887 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2d5ccbba 888 "Warning: Use of \"%s\" without parentheses is ambiguous",
0453d815
PM
889 PL_last_uni);
890 *s = ch;
891 }
2f3197b3
LW
892}
893
ffb4593c
NT
894/*
895 * LOP : macro to build a list operator. Its behaviour has been replaced
896 * with a subroutine, S_lop() for which LOP is just another name.
897 */
898
a0d0e21e
LW
899#define LOP(f,x) return lop(f,x,s)
900
ffb4593c
NT
901/*
902 * S_lop
903 * Build a list operator (or something that might be one). The rules:
904 * - if we have a next token, then it's a list operator [why?]
905 * - if the next thing is an opening paren, then it's a function
906 * - else it's a list operator
907 */
908
76e3520e 909STATIC I32
a0be28da 910S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 911{
97aff369 912 dVAR;
79072805 913 yylval.ival = f;
35c8bce7 914 CLINE;
3280af22
NIS
915 PL_expect = x;
916 PL_bufptr = s;
917 PL_last_lop = PL_oldbufptr;
eb160463 918 PL_last_lop_op = (OPCODE)f;
3280af22 919 if (PL_nexttoke)
bbf60fe6 920 return REPORT(LSTOP);
79072805 921 if (*s == '(')
bbf60fe6 922 return REPORT(FUNC);
79072805
LW
923 s = skipspace(s);
924 if (*s == '(')
bbf60fe6 925 return REPORT(FUNC);
79072805 926 else
bbf60fe6 927 return REPORT(LSTOP);
79072805
LW
928}
929
ffb4593c
NT
930/*
931 * S_force_next
9cbb5ea2 932 * When the lexer realizes it knows the next token (for instance,
ffb4593c 933 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2
GS
934 * to know what token to return the next time the lexer is called. Caller
935 * will need to set PL_nextval[], and possibly PL_expect to ensure the lexer
936 * handles the token correctly.
ffb4593c
NT
937 */
938
4e553d73 939STATIC void
cea2e8a9 940S_force_next(pTHX_ I32 type)
79072805 941{
97aff369 942 dVAR;
3280af22
NIS
943 PL_nexttype[PL_nexttoke] = type;
944 PL_nexttoke++;
945 if (PL_lex_state != LEX_KNOWNEXT) {
946 PL_lex_defer = PL_lex_state;
947 PL_lex_expect = PL_expect;
948 PL_lex_state = LEX_KNOWNEXT;
79072805
LW
949 }
950}
951
d0a148a6
NC
952STATIC SV *
953S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
954{
97aff369 955 dVAR;
9d4ba2ae 956 SV * const sv = newSVpvn(start,len);
bfed75c6 957 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
958 SvUTF8_on(sv);
959 return sv;
960}
961
ffb4593c
NT
962/*
963 * S_force_word
964 * When the lexer knows the next thing is a word (for instance, it has
965 * just seen -> and it knows that the next char is a word char, then
966 * it calls S_force_word to stick the next word into the PL_next lookahead.
967 *
968 * Arguments:
b1b65b59 969 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
970 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
971 * int check_keyword : if true, Perl checks to make sure the word isn't
972 * a keyword (do this if the word is a label, e.g. goto FOO)
973 * int allow_pack : if true, : characters will also be allowed (require,
974 * use, etc. do this)
9cbb5ea2 975 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
976 */
977
76e3520e 978STATIC char *
cea2e8a9 979S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 980{
97aff369 981 dVAR;
463ee0b2
LW
982 register char *s;
983 STRLEN len;
4e553d73 984
463ee0b2
LW
985 start = skipspace(start);
986 s = start;
7e2040f0 987 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 988 (allow_pack && *s == ':') ||
15f0808c 989 (allow_initial_tick && *s == '\'') )
a0d0e21e 990 {
3280af22
NIS
991 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
992 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2
LW
993 return start;
994 if (token == METHOD) {
995 s = skipspace(s);
996 if (*s == '(')
3280af22 997 PL_expect = XTERM;
463ee0b2 998 else {
3280af22 999 PL_expect = XOPERATOR;
463ee0b2 1000 }
79072805 1001 }
d0a148a6
NC
1002 PL_nextval[PL_nexttoke].opval
1003 = (OP*)newSVOP(OP_CONST,0,
1004 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
3280af22 1005 PL_nextval[PL_nexttoke].opval->op_private |= OPpCONST_BARE;
79072805
LW
1006 force_next(token);
1007 }
1008 return s;
1009}
1010
ffb4593c
NT
1011/*
1012 * S_force_ident
9cbb5ea2 1013 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1014 * text only contains the "foo" portion. The first argument is a pointer
1015 * to the "foo", and the second argument is the type symbol to prefix.
1016 * Forces the next token to be a "WORD".
9cbb5ea2 1017 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1018 */
1019
76e3520e 1020STATIC void
bfed75c6 1021S_force_ident(pTHX_ register const char *s, int kind)
79072805 1022{
97aff369 1023 dVAR;
79072805 1024 if (s && *s) {
90e5519e
NC
1025 const STRLEN len = strlen(s);
1026 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
3280af22 1027 PL_nextval[PL_nexttoke].opval = o;
79072805 1028 force_next(WORD);
748a9306 1029 if (kind) {
11343788 1030 o->op_private = OPpCONST_ENTERED;
55497cff 1031 /* XXX see note in pp_entereval() for why we forgo typo
1032 warnings if the symbol must be introduced in an eval.
1033 GSAR 96-10-12 */
90e5519e
NC
1034 gv_fetchpvn_flags(s, len,
1035 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1036 : GV_ADD,
1037 kind == '$' ? SVt_PV :
1038 kind == '@' ? SVt_PVAV :
1039 kind == '%' ? SVt_PVHV :
a0d0e21e 1040 SVt_PVGV
90e5519e 1041 );
748a9306 1042 }
79072805
LW
1043 }
1044}
1045
1571675a
GS
1046NV
1047Perl_str_to_version(pTHX_ SV *sv)
1048{
1049 NV retval = 0.0;
1050 NV nshift = 1.0;
1051 STRLEN len;
cfd0369c 1052 const char *start = SvPV_const(sv,len);
9d4ba2ae 1053 const char * const end = start + len;
504618e9 1054 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1055 while (start < end) {
ba210ebe 1056 STRLEN skip;
1571675a
GS
1057 UV n;
1058 if (utf)
9041c2e3 1059 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1060 else {
1061 n = *(U8*)start;
1062 skip = 1;
1063 }
1064 retval += ((NV)n)/nshift;
1065 start += skip;
1066 nshift *= 1000;
1067 }
1068 return retval;
1069}
1070
4e553d73 1071/*
ffb4593c
NT
1072 * S_force_version
1073 * Forces the next token to be a version number.
e759cc13
RGS
1074 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1075 * and if "guessing" is TRUE, then no new token is created (and the caller
1076 * must use an alternative parsing method).
ffb4593c
NT
1077 */
1078
76e3520e 1079STATIC char *
e759cc13 1080S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1081{
97aff369 1082 dVAR;
89bfa8cd 1083 OP *version = Nullop;
44dcb63b 1084 char *d;
89bfa8cd 1085
1086 s = skipspace(s);
1087
44dcb63b 1088 d = s;
dd629d5b 1089 if (*d == 'v')
44dcb63b 1090 d++;
44dcb63b 1091 if (isDIGIT(*d)) {
e759cc13
RGS
1092 while (isDIGIT(*d) || *d == '_' || *d == '.')
1093 d++;
9f3d182e 1094 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1095 SV *ver;
b73d6f50 1096 s = scan_num(s, &yylval);
89bfa8cd 1097 version = yylval.opval;
dd629d5b
GS
1098 ver = cSVOPx(version)->op_sv;
1099 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1100 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1101 SvNV_set(ver, str_to_version(ver));
1571675a 1102 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1103 }
89bfa8cd 1104 }
e759cc13
RGS
1105 else if (guessing)
1106 return s;
89bfa8cd 1107 }
1108
1109 /* NOTE: The parser sees the package name and the VERSION swapped */
3280af22 1110 PL_nextval[PL_nexttoke].opval = version;
4e553d73 1111 force_next(WORD);
89bfa8cd 1112
e759cc13 1113 return s;
89bfa8cd 1114}
1115
ffb4593c
NT
1116/*
1117 * S_tokeq
1118 * Tokenize a quoted string passed in as an SV. It finds the next
1119 * chunk, up to end of string or a backslash. It may make a new
1120 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1121 * turns \\ into \.
1122 */
1123
76e3520e 1124STATIC SV *
cea2e8a9 1125S_tokeq(pTHX_ SV *sv)
79072805 1126{
97aff369 1127 dVAR;
79072805
LW
1128 register char *s;
1129 register char *send;
1130 register char *d;
b3ac6de7
IZ
1131 STRLEN len = 0;
1132 SV *pv = sv;
79072805
LW
1133
1134 if (!SvLEN(sv))
b3ac6de7 1135 goto finish;
79072805 1136
a0d0e21e 1137 s = SvPV_force(sv, len);
21a311ee 1138 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1139 goto finish;
463ee0b2 1140 send = s + len;
79072805
LW
1141 while (s < send && *s != '\\')
1142 s++;
1143 if (s == send)
b3ac6de7 1144 goto finish;
79072805 1145 d = s;
be4731d2 1146 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1147 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1148 if (SvUTF8(sv))
1149 SvUTF8_on(pv);
1150 }
79072805
LW
1151 while (s < send) {
1152 if (*s == '\\') {
a0d0e21e 1153 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1154 s++; /* all that, just for this */
1155 }
1156 *d++ = *s++;
1157 }
1158 *d = '\0';
95a20fc0 1159 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1160 finish:
3280af22 1161 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1162 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1163 return sv;
1164}
1165
ffb4593c
NT
1166/*
1167 * Now come three functions related to double-quote context,
1168 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1169 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1170 * interact with PL_lex_state, and create fake ( ... ) argument lists
1171 * to handle functions and concatenation.
1172 * They assume that whoever calls them will be setting up a fake
1173 * join call, because each subthing puts a ',' after it. This lets
1174 * "lower \luPpEr"
1175 * become
1176 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1177 *
1178 * (I'm not sure whether the spurious commas at the end of lcfirst's
1179 * arguments and join's arguments are created or not).
1180 */
1181
1182/*
1183 * S_sublex_start
1184 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1185 *
1186 * Pattern matching will set PL_lex_op to the pattern-matching op to
1187 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1188 *
1189 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1190 *
1191 * Everything else becomes a FUNC.
1192 *
1193 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1194 * had an OP_CONST or OP_READLINE). This just sets us up for a
1195 * call to S_sublex_push().
1196 */
1197
76e3520e 1198STATIC I32
cea2e8a9 1199S_sublex_start(pTHX)
79072805 1200{
97aff369 1201 dVAR;
0d46e09a 1202 register const I32 op_type = yylval.ival;
79072805
LW
1203
1204 if (op_type == OP_NULL) {
3280af22
NIS
1205 yylval.opval = PL_lex_op;
1206 PL_lex_op = Nullop;
79072805
LW
1207 return THING;
1208 }
1209 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1210 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1211
1212 if (SvTYPE(sv) == SVt_PVIV) {
1213 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1214 STRLEN len;
cfd0369c 1215 const char *p = SvPV_const(sv, len);
f54cb97a 1216 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1217 if (SvUTF8(sv))
1218 SvUTF8_on(nsv);
b3ac6de7
IZ
1219 SvREFCNT_dec(sv);
1220 sv = nsv;
4e553d73 1221 }
b3ac6de7 1222 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
3280af22 1223 PL_lex_stuff = Nullsv;
6f33ba73
RGS
1224 /* Allow <FH> // "foo" */
1225 if (op_type == OP_READLINE)
1226 PL_expect = XTERMORDORDOR;
79072805
LW
1227 return THING;
1228 }
1229
3280af22
NIS
1230 PL_sublex_info.super_state = PL_lex_state;
1231 PL_sublex_info.sub_inwhat = op_type;
1232 PL_sublex_info.sub_op = PL_lex_op;
1233 PL_lex_state = LEX_INTERPPUSH;
55497cff 1234
3280af22
NIS
1235 PL_expect = XTERM;
1236 if (PL_lex_op) {
1237 yylval.opval = PL_lex_op;
1238 PL_lex_op = Nullop;
55497cff 1239 return PMFUNC;
1240 }
1241 else
1242 return FUNC;
1243}
1244
ffb4593c
NT
1245/*
1246 * S_sublex_push
1247 * Create a new scope to save the lexing state. The scope will be
1248 * ended in S_sublex_done. Returns a '(', starting the function arguments
1249 * to the uc, lc, etc. found before.
1250 * Sets PL_lex_state to LEX_INTERPCONCAT.
1251 */
1252
76e3520e 1253STATIC I32
cea2e8a9 1254S_sublex_push(pTHX)
55497cff 1255{
27da23d5 1256 dVAR;
f46d017c 1257 ENTER;
55497cff 1258
3280af22
NIS
1259 PL_lex_state = PL_sublex_info.super_state;
1260 SAVEI32(PL_lex_dojoin);
1261 SAVEI32(PL_lex_brackets);
3280af22
NIS
1262 SAVEI32(PL_lex_casemods);
1263 SAVEI32(PL_lex_starts);
1264 SAVEI32(PL_lex_state);
7766f137 1265 SAVEVPTR(PL_lex_inpat);
3280af22 1266 SAVEI32(PL_lex_inwhat);
57843af0 1267 SAVECOPLINE(PL_curcop);
3280af22 1268 SAVEPPTR(PL_bufptr);
8452ff4b 1269 SAVEPPTR(PL_bufend);
3280af22
NIS
1270 SAVEPPTR(PL_oldbufptr);
1271 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1272 SAVEPPTR(PL_last_lop);
1273 SAVEPPTR(PL_last_uni);
3280af22
NIS
1274 SAVEPPTR(PL_linestart);
1275 SAVESPTR(PL_linestr);
8edd5f42
RGS
1276 SAVEGENERICPV(PL_lex_brackstack);
1277 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1278
1279 PL_linestr = PL_lex_stuff;
1280 PL_lex_stuff = Nullsv;
1281
9cbb5ea2
GS
1282 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1283 = SvPVX(PL_linestr);
3280af22 1284 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1285 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1286 SAVEFREESV(PL_linestr);
1287
1288 PL_lex_dojoin = FALSE;
1289 PL_lex_brackets = 0;
a02a5408
JC
1290 Newx(PL_lex_brackstack, 120, char);
1291 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1292 PL_lex_casemods = 0;
1293 *PL_lex_casestack = '\0';
1294 PL_lex_starts = 0;
1295 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1296 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1297
1298 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1299 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1300 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1301 else
3280af22 1302 PL_lex_inpat = Nullop;
79072805 1303
55497cff 1304 return '(';
79072805
LW
1305}
1306
ffb4593c
NT
1307/*
1308 * S_sublex_done
1309 * Restores lexer state after a S_sublex_push.
1310 */
1311
76e3520e 1312STATIC I32
cea2e8a9 1313S_sublex_done(pTHX)
79072805 1314{
27da23d5 1315 dVAR;
3280af22 1316 if (!PL_lex_starts++) {
396482e1 1317 SV * const sv = newSVpvs("");
9aa983d2
JH
1318 if (SvUTF8(PL_linestr))
1319 SvUTF8_on(sv);
3280af22 1320 PL_expect = XOPERATOR;
9aa983d2 1321 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1322 return THING;
1323 }
1324
3280af22
NIS
1325 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1326 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1327 return yylex();
79072805
LW
1328 }
1329
ffb4593c 1330 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1331 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1332 PL_linestr = PL_lex_repl;
1333 PL_lex_inpat = 0;
1334 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1335 PL_bufend += SvCUR(PL_linestr);
207e3d1a 1336 PL_last_lop = PL_last_uni = Nullch;
3280af22
NIS
1337 SAVEFREESV(PL_linestr);
1338 PL_lex_dojoin = FALSE;
1339 PL_lex_brackets = 0;
3280af22
NIS
1340 PL_lex_casemods = 0;
1341 *PL_lex_casestack = '\0';
1342 PL_lex_starts = 0;
25da4f38 1343 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1344 PL_lex_state = LEX_INTERPNORMAL;
1345 PL_lex_starts++;
e9fa98b2
HS
1346 /* we don't clear PL_lex_repl here, so that we can check later
1347 whether this is an evalled subst; that means we rely on the
1348 logic to ensure sublex_done() is called again only via the
1349 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1350 }
e9fa98b2 1351 else {
3280af22 1352 PL_lex_state = LEX_INTERPCONCAT;
e9fa98b2
HS
1353 PL_lex_repl = Nullsv;
1354 }
79072805 1355 return ',';
ffed7fef
LW
1356 }
1357 else {
f46d017c 1358 LEAVE;
3280af22
NIS
1359 PL_bufend = SvPVX(PL_linestr);
1360 PL_bufend += SvCUR(PL_linestr);
1361 PL_expect = XOPERATOR;
09bef843 1362 PL_sublex_info.sub_inwhat = 0;
79072805 1363 return ')';
ffed7fef
LW
1364 }
1365}
1366
02aa26ce
NT
1367/*
1368 scan_const
1369
1370 Extracts a pattern, double-quoted string, or transliteration. This
1371 is terrifying code.
1372
3280af22
NIS
1373 It looks at lex_inwhat and PL_lex_inpat to find out whether it's
1374 processing a pattern (PL_lex_inpat is true), a transliteration
02aa26ce
NT
1375 (lex_inwhat & OP_TRANS is true), or a double-quoted string.
1376
9b599b2a
GS
1377 Returns a pointer to the character scanned up to. Iff this is
1378 advanced from the start pointer supplied (ie if anything was
1379 successfully parsed), will leave an OP for the substring scanned
1380 in yylval. Caller must intuit reason for not parsing further
1381 by looking at the next characters herself.
1382
02aa26ce
NT
1383 In patterns:
1384 backslashes:
1385 double-quoted style: \r and \n
1386 regexp special ones: \D \s
1387 constants: \x3
1388 backrefs: \1 (deprecated in substitution replacements)
1389 case and quoting: \U \Q \E
1390 stops on @ and $, but not for $ as tail anchor
1391
1392 In transliterations:
1393 characters are VERY literal, except for - not at the start or end
1394 of the string, which indicates a range. scan_const expands the
1395 range to the full set of intermediate characters.
1396
1397 In double-quoted strings:
1398 backslashes:
1399 double-quoted style: \r and \n
1400 constants: \x3
1401 backrefs: \1 (deprecated)
1402 case and quoting: \U \Q \E
1403 stops on @ and $
1404
1405 scan_const does *not* construct ops to handle interpolated strings.
1406 It stops processing as soon as it finds an embedded $ or @ variable
1407 and leaves it to the caller to work out what's going on.
1408
da6eedaa 1409 @ in pattern could be: @foo, @{foo}, @$foo, @'foo, @::foo.
02aa26ce
NT
1410
1411 $ in pattern could be $foo or could be tail anchor. Assumption:
1412 it's a tail anchor if $ is the last thing in the string, or if it's
1413 followed by one of ")| \n\t"
1414
1415 \1 (backreferences) are turned into $1
1416
1417 The structure of the code is
1418 while (there's a character to process) {
1419 handle transliteration ranges
1420 skip regexp comments
1421 skip # initiated comments in //x patterns
1422 check for embedded @foo
1423 check for embedded scalars
1424 if (backslash) {
1425 leave intact backslashes from leave (below)
1426 deprecate \1 in strings and sub replacements
1427 handle string-changing backslashes \l \U \Q \E, etc.
1428 switch (what was escaped) {
1429 handle - in a transliteration (becomes a literal -)
1430 handle \132 octal characters
1431 handle 0x15 hex characters
1432 handle \cV (control V)
1433 handle printf backslashes (\f, \r, \n, etc)
1434 } (end switch)
1435 } (end if backslash)
1436 } (end while character to read)
4e553d73 1437
02aa26ce
NT
1438*/
1439
76e3520e 1440STATIC char *
cea2e8a9 1441S_scan_const(pTHX_ char *start)
79072805 1442{
97aff369 1443 dVAR;
3280af22 1444 register char *send = PL_bufend; /* end of the constant */
561b68a9 1445 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1446 register char *s = start; /* start of the constant */
1447 register char *d = SvPVX(sv); /* destination for copies */
1448 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1449 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1450 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1451 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1452 UV uv;
4c3a8340
TS
1453#ifdef EBCDIC
1454 UV literal_endpoint = 0;
1455#endif
012bcf8d 1456
dff6d3cd 1457 const char *leaveit = /* set of acceptably-backslashed characters */
3280af22 1458 PL_lex_inpat
b6d5fef8 1459 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1460 : "";
79072805 1461
2b9d42f0
NIS
1462 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1463 /* If we are doing a trans and we know we want UTF8 set expectation */
1464 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1465 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1466 }
1467
1468
79072805 1469 while (s < send || dorange) {
02aa26ce 1470 /* get transliterations out of the way (they're most literal) */
3280af22 1471 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1472 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1473 if (dorange) {
1ba5c669
JH
1474 I32 i; /* current expanded character */
1475 I32 min; /* first character in range */
1476 I32 max; /* last character in range */
02aa26ce 1477
2b9d42f0 1478 if (has_utf8) {
9d4ba2ae 1479 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1480 char *e = d++;
1481 while (e-- > c)
1482 *(e + 1) = *e;
25716404 1483 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1484 /* mark the range as done, and continue */
1485 dorange = FALSE;
1486 didrange = TRUE;
1487 continue;
1488 }
2b9d42f0 1489
95a20fc0 1490 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1491 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1492 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1493 d -= 2; /* eat the first char and the - */
1494
8ada0baa
JH
1495 min = (U8)*d; /* first char in range */
1496 max = (U8)d[1]; /* last char in range */
1497
c2e66d9e 1498 if (min > max) {
01ec43d0 1499 Perl_croak(aTHX_
d1573ac7 1500 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1501 (char)min, (char)max);
c2e66d9e
GS
1502 }
1503
c7f1f016 1504#ifdef EBCDIC
4c3a8340
TS
1505 if (literal_endpoint == 2 &&
1506 ((isLOWER(min) && isLOWER(max)) ||
1507 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1508 if (isLOWER(min)) {
1509 for (i = min; i <= max; i++)
1510 if (isLOWER(i))
db42d148 1511 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1512 } else {
1513 for (i = min; i <= max; i++)
1514 if (isUPPER(i))
db42d148 1515 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1516 }
1517 }
1518 else
1519#endif
1520 for (i = min; i <= max; i++)
eb160463 1521 *d++ = (char)i;
02aa26ce
NT
1522
1523 /* mark the range as done, and continue */
79072805 1524 dorange = FALSE;
01ec43d0 1525 didrange = TRUE;
4c3a8340
TS
1526#ifdef EBCDIC
1527 literal_endpoint = 0;
1528#endif
79072805 1529 continue;
4e553d73 1530 }
02aa26ce
NT
1531
1532 /* range begins (ignore - as first or last char) */
79072805 1533 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1534 if (didrange) {
1fafa243 1535 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1536 }
2b9d42f0 1537 if (has_utf8) {
25716404 1538 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1539 s++;
1540 continue;
1541 }
79072805
LW
1542 dorange = TRUE;
1543 s++;
01ec43d0
GS
1544 }
1545 else {
1546 didrange = FALSE;
4c3a8340
TS
1547#ifdef EBCDIC
1548 literal_endpoint = 0;
1549#endif
01ec43d0 1550 }
79072805 1551 }
02aa26ce
NT
1552
1553 /* if we get here, we're not doing a transliteration */
1554
0f5d15d6
IZ
1555 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1556 except for the last char, which will be done separately. */
3280af22 1557 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1558 if (s[2] == '#') {
e994fd66 1559 while (s+1 < send && *s != ')')
db42d148 1560 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1561 }
1562 else if (s[2] == '{' /* This should match regcomp.c */
1563 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1564 {
cc6b7395 1565 I32 count = 1;
0f5d15d6 1566 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1567 char c;
1568
d9f97599
GS
1569 while (count && (c = *regparse)) {
1570 if (c == '\\' && regparse[1])
1571 regparse++;
4e553d73 1572 else if (c == '{')
cc6b7395 1573 count++;
4e553d73 1574 else if (c == '}')
cc6b7395 1575 count--;
d9f97599 1576 regparse++;
cc6b7395 1577 }
e994fd66 1578 if (*regparse != ')')
5bdf89e7 1579 regparse--; /* Leave one char for continuation. */
0f5d15d6 1580 while (s < regparse)
db42d148 1581 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1582 }
748a9306 1583 }
02aa26ce
NT
1584
1585 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1586 else if (*s == '#' && PL_lex_inpat &&
1587 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1588 while (s+1 < send && *s != '\n')
db42d148 1589 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1590 }
02aa26ce 1591
5d1d4326 1592 /* check for embedded arrays
da6eedaa 1593 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1594 */
7e2040f0 1595 else if (*s == '@' && s[1]
5d1d4326 1596 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1597 break;
02aa26ce
NT
1598
1599 /* check for embedded scalars. only stop if we're sure it's a
1600 variable.
1601 */
79072805 1602 else if (*s == '$') {
3280af22 1603 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1604 break;
6002328a 1605 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1606 break; /* in regexp, $ might be tail anchor */
1607 }
02aa26ce 1608
2b9d42f0
NIS
1609 /* End of else if chain - OP_TRANS rejoin rest */
1610
02aa26ce 1611 /* backslashes */
79072805
LW
1612 if (*s == '\\' && s+1 < send) {
1613 s++;
02aa26ce
NT
1614
1615 /* some backslashes we leave behind */
c9f97d15 1616 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1617 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1618 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1619 continue;
1620 }
02aa26ce
NT
1621
1622 /* deprecate \1 in strings and substitution replacements */
3280af22 1623 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1624 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1625 {
599cee73 1626 if (ckWARN(WARN_SYNTAX))
9014280d 1627 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1628 *--s = '$';
1629 break;
1630 }
02aa26ce
NT
1631
1632 /* string-change backslash escapes */
3280af22 1633 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1634 --s;
1635 break;
1636 }
02aa26ce
NT
1637
1638 /* if we get here, it's either a quoted -, or a digit */
79072805 1639 switch (*s) {
02aa26ce
NT
1640
1641 /* quoted - in transliterations */
79072805 1642 case '-':
3280af22 1643 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1644 *d++ = *s++;
1645 continue;
1646 }
1647 /* FALL THROUGH */
1648 default:
11b8faa4 1649 {
041457d9
DM
1650 if (isALNUM(*s) &&
1651 *s != '_' &&
1652 ckWARN(WARN_MISC))
9014280d 1653 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1654 "Unrecognized escape \\%c passed through",
1655 *s);
1656 /* default action is to copy the quoted character */
f9a63242 1657 goto default_action;
11b8faa4 1658 }
02aa26ce
NT
1659
1660 /* \132 indicates an octal constant */
79072805
LW
1661 case '0': case '1': case '2': case '3':
1662 case '4': case '5': case '6': case '7':
ba210ebe 1663 {
53305cf1
NC
1664 I32 flags = 0;
1665 STRLEN len = 3;
1666 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
1667 s += len;
1668 }
012bcf8d 1669 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
1670
1671 /* \x24 indicates a hex constant */
79072805 1672 case 'x':
a0ed51b3
LW
1673 ++s;
1674 if (*s == '{') {
9d4ba2ae 1675 char* const e = strchr(s, '}');
a4c04bdc
NC
1676 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1677 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1678 STRLEN len;
355860ce 1679
53305cf1 1680 ++s;
adaeee49 1681 if (!e) {
a0ed51b3 1682 yyerror("Missing right brace on \\x{}");
355860ce 1683 continue;
ba210ebe 1684 }
53305cf1
NC
1685 len = e - s;
1686 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 1687 s = e + 1;
a0ed51b3
LW
1688 }
1689 else {
ba210ebe 1690 {
53305cf1 1691 STRLEN len = 2;
a4c04bdc 1692 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 1693 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
1694 s += len;
1695 }
012bcf8d
GS
1696 }
1697
1698 NUM_ESCAPE_INSERT:
1699 /* Insert oct or hex escaped character.
301d3d20 1700 * There will always enough room in sv since such
db42d148 1701 * escapes will be longer than any UTF-8 sequence
301d3d20 1702 * they can end up as. */
ba7cea30 1703
c7f1f016
NIS
1704 /* We need to map to chars to ASCII before doing the tests
1705 to cover EBCDIC
1706 */
c4d5f83a 1707 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 1708 if (!has_utf8 && uv > 255) {
301d3d20
JH
1709 /* Might need to recode whatever we have
1710 * accumulated so far if it contains any
1711 * hibit chars.
1712 *
1713 * (Can't we keep track of that and avoid
1714 * this rescan? --jhi)
012bcf8d 1715 */
c7f1f016 1716 int hicount = 0;
63cd0674
NIS
1717 U8 *c;
1718 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 1719 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 1720 hicount++;
db42d148 1721 }
012bcf8d 1722 }
63cd0674 1723 if (hicount) {
9d4ba2ae 1724 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
1725 U8 *src, *dst;
1726 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
1727 src = (U8 *)d - 1;
1728 dst = src+hicount;
1729 d += hicount;
cfd0369c 1730 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 1731 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 1732 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
1733 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
1734 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
1735 }
1736 else {
63cd0674 1737 *dst-- = *src;
012bcf8d 1738 }
c7f1f016 1739 src--;
012bcf8d
GS
1740 }
1741 }
1742 }
1743
9aa983d2 1744 if (has_utf8 || uv > 255) {
9041c2e3 1745 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 1746 has_utf8 = TRUE;
f9a63242
JH
1747 if (PL_lex_inwhat == OP_TRANS &&
1748 PL_sublex_info.sub_op) {
1749 PL_sublex_info.sub_op->op_private |=
1750 (PL_lex_repl ? OPpTRANS_FROM_UTF
1751 : OPpTRANS_TO_UTF);
f9a63242 1752 }
012bcf8d 1753 }
a0ed51b3 1754 else {
012bcf8d 1755 *d++ = (char)uv;
a0ed51b3 1756 }
012bcf8d
GS
1757 }
1758 else {
c4d5f83a 1759 *d++ = (char) uv;
a0ed51b3 1760 }
79072805 1761 continue;
02aa26ce 1762
b239daa5 1763 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 1764 case 'N':
55eda711 1765 ++s;
423cee85
JH
1766 if (*s == '{') {
1767 char* e = strchr(s, '}');
155aba94 1768 SV *res;
423cee85 1769 STRLEN len;
cfd0369c 1770 const char *str;
4e553d73 1771
423cee85 1772 if (!e) {
5777a3f7 1773 yyerror("Missing right brace on \\N{}");
423cee85
JH
1774 e = s - 1;
1775 goto cont_scan;
1776 }
dbc0d4f2
JH
1777 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
1778 /* \N{U+...} */
1779 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
1780 PERL_SCAN_DISALLOW_PREFIX;
1781 s += 3;
1782 len = e - s;
1783 uv = grok_hex(s, &len, &flags, NULL);
1784 s = e + 1;
1785 goto NUM_ESCAPE_INSERT;
1786 }
55eda711
JH
1787 res = newSVpvn(s + 1, e - s - 1);
1788 res = new_constant( Nullch, 0, "charnames",
1789 res, Nullsv, "\\N{...}" );
f9a63242
JH
1790 if (has_utf8)
1791 sv_utf8_upgrade(res);
cfd0369c 1792 str = SvPV_const(res,len);
1c47067b
JH
1793#ifdef EBCDIC_NEVER_MIND
1794 /* charnames uses pack U and that has been
1795 * recently changed to do the below uni->native
1796 * mapping, so this would be redundant (and wrong,
1797 * the code point would be doubly converted).
1798 * But leave this in just in case the pack U change
1799 * gets revoked, but the semantics is still
1800 * desireable for charnames. --jhi */
cddc7ef4 1801 {
cfd0369c 1802 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
1803
1804 if (uv < 0x100) {
89ebb4a3 1805 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
1806
1807 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
1808 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 1809 str = SvPV_const(res, len);
cddc7ef4
JH
1810 }
1811 }
1812#endif
89491803 1813 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 1814 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
1815 SvCUR_set(sv, d - ostart);
1816 SvPOK_on(sv);
e4f3eed8 1817 *d = '\0';
f08d6ad9 1818 sv_utf8_upgrade(sv);
d2f449dd 1819 /* this just broke our allocation above... */
eb160463 1820 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 1821 d = SvPVX(sv) + SvCUR(sv);
89491803 1822 has_utf8 = TRUE;
f08d6ad9 1823 }
eb160463 1824 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 1825 const char * const odest = SvPVX_const(sv);
423cee85 1826
8973db79 1827 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
1828 d = SvPVX(sv) + (d - odest);
1829 }
1830 Copy(str, d, len, char);
1831 d += len;
1832 SvREFCNT_dec(res);
1833 cont_scan:
1834 s = e + 1;
1835 }
1836 else
5777a3f7 1837 yyerror("Missing braces on \\N{}");
423cee85
JH
1838 continue;
1839
02aa26ce 1840 /* \c is a control character */
79072805
LW
1841 case 'c':
1842 s++;
961ce445 1843 if (s < send) {
ba210ebe 1844 U8 c = *s++;
c7f1f016
NIS
1845#ifdef EBCDIC
1846 if (isLOWER(c))
1847 c = toUPPER(c);
1848#endif
db42d148 1849 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 1850 }
961ce445
RGS
1851 else {
1852 yyerror("Missing control char name in \\c");
1853 }
79072805 1854 continue;
02aa26ce
NT
1855
1856 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 1857 case 'b':
db42d148 1858 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
1859 break;
1860 case 'n':
db42d148 1861 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
1862 break;
1863 case 'r':
db42d148 1864 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
1865 break;
1866 case 'f':
db42d148 1867 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
1868 break;
1869 case 't':
db42d148 1870 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 1871 break;
34a3fe2a 1872 case 'e':
db42d148 1873 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
1874 break;
1875 case 'a':
db42d148 1876 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 1877 break;
02aa26ce
NT
1878 } /* end switch */
1879
79072805
LW
1880 s++;
1881 continue;
02aa26ce 1882 } /* end if (backslash) */
4c3a8340
TS
1883#ifdef EBCDIC
1884 else
1885 literal_endpoint++;
1886#endif
02aa26ce 1887
f9a63242 1888 default_action:
2b9d42f0
NIS
1889 /* If we started with encoded form, or already know we want it
1890 and then encode the next character */
1891 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
1892 STRLEN len = 1;
9d4ba2ae
AL
1893 const UV uv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
1894 const STRLEN need = UNISKIP(NATIVE_TO_UNI(uv));
2b9d42f0
NIS
1895 s += len;
1896 if (need > len) {
1897 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 1898 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
1899 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
1900 }
1901 d = (char*)uvchr_to_utf8((U8*)d, uv);
1902 has_utf8 = TRUE;
1903 }
1904 else {
1905 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
1906 }
02aa26ce
NT
1907 } /* while loop to process each character */
1908
1909 /* terminate the string and set up the sv */
79072805 1910 *d = '\0';
95a20fc0 1911 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 1912 if (SvCUR(sv) >= SvLEN(sv))
d0063567 1913 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 1914
79072805 1915 SvPOK_on(sv);
9f4817db 1916 if (PL_encoding && !has_utf8) {
d0063567
DK
1917 sv_recode_to_utf8(sv, PL_encoding);
1918 if (SvUTF8(sv))
1919 has_utf8 = TRUE;
9f4817db 1920 }
2b9d42f0 1921 if (has_utf8) {
7e2040f0 1922 SvUTF8_on(sv);
2b9d42f0 1923 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 1924 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
1925 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1926 }
1927 }
79072805 1928
02aa26ce 1929 /* shrink the sv if we allocated more than we used */
79072805 1930 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 1931 SvPV_shrink_to_cur(sv);
79072805 1932 }
02aa26ce 1933
9b599b2a 1934 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
1935 if (s > PL_bufptr) {
1936 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 1937 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
b3ac6de7 1938 sv, Nullsv,
4e553d73 1939 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 1940 ? "tr"
3280af22 1941 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
1942 ? "s"
1943 : "qq")));
79072805 1944 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 1945 } else
8990e307 1946 SvREFCNT_dec(sv);
79072805
LW
1947 return s;
1948}
1949
ffb4593c
NT
1950/* S_intuit_more
1951 * Returns TRUE if there's more to the expression (e.g., a subscript),
1952 * FALSE otherwise.
ffb4593c
NT
1953 *
1954 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
1955 *
1956 * ->[ and ->{ return TRUE
1957 * { and [ outside a pattern are always subscripts, so return TRUE
1958 * if we're outside a pattern and it's not { or [, then return FALSE
1959 * if we're in a pattern and the first char is a {
1960 * {4,5} (any digits around the comma) returns FALSE
1961 * if we're in a pattern and the first char is a [
1962 * [] returns FALSE
1963 * [SOMETHING] has a funky algorithm to decide whether it's a
1964 * character class or not. It has to deal with things like
1965 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
1966 * anything else returns TRUE
1967 */
1968
9cbb5ea2
GS
1969/* This is the one truly awful dwimmer necessary to conflate C and sed. */
1970
76e3520e 1971STATIC int
cea2e8a9 1972S_intuit_more(pTHX_ register char *s)
79072805 1973{
97aff369 1974 dVAR;
3280af22 1975 if (PL_lex_brackets)
79072805
LW
1976 return TRUE;
1977 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
1978 return TRUE;
1979 if (*s != '{' && *s != '[')
1980 return FALSE;
3280af22 1981 if (!PL_lex_inpat)
79072805
LW
1982 return TRUE;
1983
1984 /* In a pattern, so maybe we have {n,m}. */
1985 if (*s == '{') {
1986 s++;
1987 if (!isDIGIT(*s))
1988 return TRUE;
1989 while (isDIGIT(*s))
1990 s++;
1991 if (*s == ',')
1992 s++;
1993 while (isDIGIT(*s))
1994 s++;
1995 if (*s == '}')
1996 return FALSE;
1997 return TRUE;
1998
1999 }
2000
2001 /* On the other hand, maybe we have a character class */
2002
2003 s++;
2004 if (*s == ']' || *s == '^')
2005 return FALSE;
2006 else {
ffb4593c 2007 /* this is terrifying, and it works */
79072805
LW
2008 int weight = 2; /* let's weigh the evidence */
2009 char seen[256];
f27ffc4a 2010 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2011 const char * const send = strchr(s,']');
3280af22 2012 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2013
2014 if (!send) /* has to be an expression */
2015 return TRUE;
2016
2017 Zero(seen,256,char);
2018 if (*s == '$')
2019 weight -= 3;
2020 else if (isDIGIT(*s)) {
2021 if (s[1] != ']') {
2022 if (isDIGIT(s[1]) && s[2] == ']')
2023 weight -= 10;
2024 }
2025 else
2026 weight -= 100;
2027 }
2028 for (; s < send; s++) {
2029 last_un_char = un_char;
2030 un_char = (unsigned char)*s;
2031 switch (*s) {
2032 case '@':
2033 case '&':
2034 case '$':
2035 weight -= seen[un_char] * 10;
7e2040f0 2036 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2037 int len;
8903cb82 2038 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2039 len = (int)strlen(tmpbuf);
2040 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2041 weight -= 100;
2042 else
2043 weight -= 10;
2044 }
2045 else if (*s == '$' && s[1] &&
93a17b20
LW
2046 strchr("[#!%*<>()-=",s[1])) {
2047 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2048 weight -= 10;
2049 else
2050 weight -= 1;
2051 }
2052 break;
2053 case '\\':
2054 un_char = 254;
2055 if (s[1]) {
93a17b20 2056 if (strchr("wds]",s[1]))
79072805
LW
2057 weight += 100;
2058 else if (seen['\''] || seen['"'])
2059 weight += 1;
93a17b20 2060 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2061 weight += 40;
2062 else if (isDIGIT(s[1])) {
2063 weight += 40;
2064 while (s[1] && isDIGIT(s[1]))
2065 s++;
2066 }
2067 }
2068 else
2069 weight += 100;
2070 break;
2071 case '-':
2072 if (s[1] == '\\')
2073 weight += 50;
93a17b20 2074 if (strchr("aA01! ",last_un_char))
79072805 2075 weight += 30;
93a17b20 2076 if (strchr("zZ79~",s[1]))
79072805 2077 weight += 30;
f27ffc4a
GS
2078 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2079 weight -= 5; /* cope with negative subscript */
79072805
LW
2080 break;
2081 default:
3792a11b
NC
2082 if (!isALNUM(last_un_char)
2083 && !(last_un_char == '$' || last_un_char == '@'
2084 || last_un_char == '&')
2085 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2086 char *d = tmpbuf;
2087 while (isALPHA(*s))
2088 *d++ = *s++;
2089 *d = '\0';
2090 if (keyword(tmpbuf, d - tmpbuf))
2091 weight -= 150;
2092 }
2093 if (un_char == last_un_char + 1)
2094 weight += 5;
2095 weight -= seen[un_char];
2096 break;
2097 }
2098 seen[un_char]++;
2099 }
2100 if (weight >= 0) /* probably a character class */
2101 return FALSE;
2102 }
2103
2104 return TRUE;
2105}
ffed7fef 2106
ffb4593c
NT
2107/*
2108 * S_intuit_method
2109 *
2110 * Does all the checking to disambiguate
2111 * foo bar
2112 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2113 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2114 *
2115 * First argument is the stuff after the first token, e.g. "bar".
2116 *
2117 * Not a method if bar is a filehandle.
2118 * Not a method if foo is a subroutine prototyped to take a filehandle.
2119 * Not a method if it's really "Foo $bar"
2120 * Method if it's "foo $bar"
2121 * Not a method if it's really "print foo $bar"
2122 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2123 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2124 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2125 * =>
2126 */
2127
76e3520e 2128STATIC int
62d55b22 2129S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2130{
97aff369 2131 dVAR;
a0d0e21e 2132 char *s = start + (*start == '$');
3280af22 2133 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2134 STRLEN len;
2135 GV* indirgv;
2136
2137 if (gv) {
62d55b22 2138 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2139 return 0;
62d55b22
NC
2140 if (cv) {
2141 if (SvPOK(cv)) {
2142 const char *proto = SvPVX_const(cv);
2143 if (proto) {
2144 if (*proto == ';')
2145 proto++;
2146 if (*proto == '*')
2147 return 0;
2148 }
b6c543e3
IZ
2149 }
2150 } else
a0d0e21e
LW
2151 gv = 0;
2152 }
8903cb82 2153 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2154 /* start is the beginning of the possible filehandle/object,
2155 * and s is the end of it
2156 * tmpbuf is a copy of it
2157 */
2158
a0d0e21e 2159 if (*start == '$') {
3280af22 2160 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e
LW
2161 return 0;
2162 s = skipspace(s);
3280af22
NIS
2163 PL_bufptr = start;
2164 PL_expect = XREF;
a0d0e21e
LW
2165 return *s == '(' ? FUNCMETH : METHOD;
2166 }
2167 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2168 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2169 len -= 2;
2170 tmpbuf[len] = '\0';
2171 goto bare_package;
2172 }
90e5519e 2173 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2174 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2175 return 0;
2176 /* filehandle or package name makes it a method */
89bfa8cd 2177 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
a0d0e21e 2178 s = skipspace(s);
3280af22 2179 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2180 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2181 bare_package:
3280af22 2182 PL_nextval[PL_nexttoke].opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2183 newSVpvn(tmpbuf,len));
3280af22
NIS
2184 PL_nextval[PL_nexttoke].opval->op_private = OPpCONST_BARE;
2185 PL_expect = XTERM;
a0d0e21e 2186 force_next(WORD);
3280af22 2187 PL_bufptr = s;
a0d0e21e
LW
2188 return *s == '(' ? FUNCMETH : METHOD;
2189 }
2190 }
2191 return 0;
2192}
2193
ffb4593c
NT
2194/*
2195 * S_incl_perldb
2196 * Return a string of Perl code to load the debugger. If PERL5DB
2197 * is set, it will return the contents of that, otherwise a
2198 * compile-time require of perl5db.pl.
2199 */
2200
bfed75c6 2201STATIC const char*
cea2e8a9 2202S_incl_perldb(pTHX)
a0d0e21e 2203{
97aff369 2204 dVAR;
3280af22 2205 if (PL_perldb) {
9d4ba2ae 2206 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2207
2208 if (pdb)
2209 return pdb;
93189314 2210 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2211 return "BEGIN { require 'perl5db.pl' }";
2212 }
2213 return "";
2214}
2215
2216
16d20bd9 2217/* Encoded script support. filter_add() effectively inserts a
4e553d73 2218 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2219 * Note that the filter function only applies to the current source file
2220 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2221 *
2222 * The datasv parameter (which may be NULL) can be used to pass
2223 * private data to this instance of the filter. The filter function
2224 * can recover the SV using the FILTER_DATA macro and use it to
2225 * store private buffers and state information.
2226 *
2227 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2228 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2229 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2230 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2231 * private use must be set using malloc'd pointers.
2232 */
16d20bd9
AD
2233
2234SV *
864dbfa3 2235Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2236{
97aff369 2237 dVAR;
f4c556ac
GS
2238 if (!funcp)
2239 return Nullsv;
2240
3280af22
NIS
2241 if (!PL_rsfp_filters)
2242 PL_rsfp_filters = newAV();
16d20bd9 2243 if (!datasv)
561b68a9 2244 datasv = newSV(0);
862a34c6 2245 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2246 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2247 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2248 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
8141890a 2249 IoANY(datasv), SvPV_nolen(datasv)));
3280af22
NIS
2250 av_unshift(PL_rsfp_filters, 1);
2251 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2252 return(datasv);
2253}
4e553d73 2254
16d20bd9
AD
2255
2256/* Delete most recently added instance of this filter function. */
a0d0e21e 2257void
864dbfa3 2258Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2259{
97aff369 2260 dVAR;
e0c19803 2261 SV *datasv;
24801a4b 2262
33073adb 2263#ifdef DEBUGGING
8141890a 2264 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p", FPTR2DPTR(XPVIO *, funcp)));
33073adb 2265#endif
3280af22 2266 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2267 return;
2268 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2269 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2270 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2271 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2272 IoANY(datasv) = (void *)NULL;
3280af22 2273 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2274
16d20bd9
AD
2275 return;
2276 }
2277 /* we need to search for the correct entry and clear it */
cea2e8a9 2278 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2279}
2280
2281
1de9afcd
RGS
2282/* Invoke the idxth filter function for the current rsfp. */
2283/* maxlen 0 = read one text line */
16d20bd9 2284I32
864dbfa3 2285Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2286{
97aff369 2287 dVAR;
16d20bd9
AD
2288 filter_t funcp;
2289 SV *datasv = NULL;
e50aee73 2290
3280af22 2291 if (!PL_rsfp_filters)
16d20bd9 2292 return -1;
1de9afcd 2293 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2294 /* Provide a default input filter to make life easy. */
2295 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2296 DEBUG_P(PerlIO_printf(Perl_debug_log,
2297 "filter_read %d: from rsfp\n", idx));
4e553d73 2298 if (maxlen) {
16d20bd9
AD
2299 /* Want a block */
2300 int len ;
f54cb97a 2301 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2302
2303 /* ensure buf_sv is large enough */
eb160463 2304 SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
3280af22
NIS
2305 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
2306 if (PerlIO_error(PL_rsfp))
37120919
AD
2307 return -1; /* error */
2308 else
2309 return 0 ; /* end of file */
2310 }
16d20bd9
AD
2311 SvCUR_set(buf_sv, old_len + len) ;
2312 } else {
2313 /* Want a line */
3280af22
NIS
2314 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2315 if (PerlIO_error(PL_rsfp))
37120919
AD
2316 return -1; /* error */
2317 else
2318 return 0 ; /* end of file */
2319 }
16d20bd9
AD
2320 }
2321 return SvCUR(buf_sv);
2322 }
2323 /* Skip this filter slot if filter has been deleted */
1de9afcd 2324 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2325 DEBUG_P(PerlIO_printf(Perl_debug_log,
2326 "filter_read %d: skipped (filter deleted)\n",
2327 idx));
16d20bd9
AD
2328 return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
2329 }
2330 /* Get function pointer hidden within datasv */
8141890a 2331 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2332 DEBUG_P(PerlIO_printf(Perl_debug_log,
2333 "filter_read %d: via function %p (%s)\n",
cfd0369c 2334 idx, datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2335 /* Call function. The function is expected to */
2336 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2337 /* Return: <0:error, =0:eof, >0:not eof */
acfe0abc 2338 return (*funcp)(aTHX_ idx, buf_sv, maxlen);
16d20bd9
AD
2339}
2340
76e3520e 2341STATIC char *
cea2e8a9 2342S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2343{
97aff369 2344 dVAR;
c39cd008 2345#ifdef PERL_CR_FILTER
3280af22 2346 if (!PL_rsfp_filters) {
c39cd008 2347 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2348 }
2349#endif
3280af22 2350 if (PL_rsfp_filters) {
55497cff 2351 if (!append)
2352 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2353 if (FILTER_READ(0, sv, 0) > 0)
2354 return ( SvPVX(sv) ) ;
2355 else
2356 return Nullch ;
2357 }
9d116dd7 2358 else
fd049845 2359 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2360}
2361
01ec43d0 2362STATIC HV *
7fc63493 2363S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2364{
97aff369 2365 dVAR;
def3634b
GS
2366 GV *gv;
2367
01ec43d0 2368 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2369 return PL_curstash;
2370
2371 if (len > 2 &&
2372 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2373 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2374 {
2375 return GvHV(gv); /* Foo:: */
def3634b
GS
2376 }
2377
2378 /* use constant CLASS => 'MyClass' */
90e5519e 2379 if ((gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV))) {
def3634b
GS
2380 SV *sv;
2381 if (GvCV(gv) && (sv = cv_const_sv(GvCV(gv)))) {
83003860 2382 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2383 }
2384 }
2385
2386 return gv_stashpv(pkgname, FALSE);
2387}
a0d0e21e 2388
468aa647 2389STATIC char *
cc6ed77d 2390S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 2391 dVAR;
468aa647
RGS
2392 if (PL_expect != XSTATE)
2393 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2394 is_use ? "use" : "no"));
2395 s = skipspace(s);
2396 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2397 s = force_version(s, TRUE);
2398 if (*s == ';' || (s = skipspace(s), *s == ';')) {
2399 PL_nextval[PL_nexttoke].opval = Nullop;
2400 force_next(WORD);
2401 }
2402 else if (*s == 'v') {
2403 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2404 s = force_version(s, FALSE);
2405 }
2406 }
2407 else {
2408 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2409 s = force_version(s, FALSE);
2410 }
2411 yylval.ival = is_use;
2412 return s;
2413}
748a9306 2414#ifdef DEBUGGING
27da23d5 2415 static const char* const exp_name[] =
09bef843 2416 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2417 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2418 };
748a9306 2419#endif
463ee0b2 2420
02aa26ce
NT
2421/*
2422 yylex
2423
2424 Works out what to call the token just pulled out of the input
2425 stream. The yacc parser takes care of taking the ops we return and
2426 stitching them into a tree.
2427
2428 Returns:
2429 PRIVATEREF
2430
2431 Structure:
2432 if read an identifier
2433 if we're in a my declaration
2434 croak if they tried to say my($foo::bar)
2435 build the ops for a my() declaration
2436 if it's an access to a my() variable
2437 are we in a sort block?
2438 croak if my($a); $a <=> $b
2439 build ops for access to a my() variable
2440 if in a dq string, and they've said @foo and we can't find @foo
2441 croak
2442 build ops for a bareword
2443 if we already built the token before, use it.
2444*/
2445
20141f0e 2446
dba4d153
JH
2447#ifdef __SC__
2448#pragma segment Perl_yylex
2449#endif
dba4d153 2450int
dba4d153 2451Perl_yylex(pTHX)
20141f0e 2452{
97aff369 2453 dVAR;
3afc138a 2454 register char *s = PL_bufptr;
378cc40b 2455 register char *d;
463ee0b2 2456 STRLEN len;
aa7440fb 2457 bool bof = FALSE;
a687059c 2458
bbf60fe6 2459 DEBUG_T( {
396482e1 2460 SV* tmp = newSVpvs("");
b6007c36
DM
2461 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
2462 (IV)CopLINE(PL_curcop),
2463 lex_state_names[PL_lex_state],
2464 exp_name[PL_expect],
2465 pv_display(tmp, s, strlen(s), 0, 60));
2466 SvREFCNT_dec(tmp);
bbf60fe6 2467 } );
02aa26ce 2468 /* check if there's an identifier for us to look at */
ba979b31 2469 if (PL_pending_ident)
bbf60fe6 2470 return REPORT(S_pending_ident(aTHX));
bbce6d69 2471
02aa26ce
NT
2472 /* no identifier pending identification */
2473
3280af22 2474 switch (PL_lex_state) {
79072805
LW
2475#ifdef COMMENTARY
2476 case LEX_NORMAL: /* Some compilers will produce faster */
2477 case LEX_INTERPNORMAL: /* code if we comment these out. */
2478 break;
2479#endif
2480
09bef843 2481 /* when we've already built the next token, just pull it out of the queue */
79072805 2482 case LEX_KNOWNEXT:
3280af22
NIS
2483 PL_nexttoke--;
2484 yylval = PL_nextval[PL_nexttoke];
2485 if (!PL_nexttoke) {
2486 PL_lex_state = PL_lex_defer;
2487 PL_expect = PL_lex_expect;
2488 PL_lex_defer = LEX_NORMAL;
463ee0b2 2489 }
bbf60fe6 2490 return REPORT(PL_nexttype[PL_nexttoke]);
79072805 2491
02aa26ce 2492 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 2493 when we get here, PL_bufptr is at the \
02aa26ce 2494 */
79072805
LW
2495 case LEX_INTERPCASEMOD:
2496#ifdef DEBUGGING
3280af22 2497 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 2498 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 2499#endif
02aa26ce 2500 /* handle \E or end of string */
3280af22 2501 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 2502 /* if at a \E */
3280af22 2503 if (PL_lex_casemods) {
f54cb97a 2504 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 2505 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 2506
3792a11b
NC
2507 if (PL_bufptr != PL_bufend
2508 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
2509 PL_bufptr += 2;
2510 PL_lex_state = LEX_INTERPCONCAT;
a0d0e21e 2511 }
bbf60fe6 2512 return REPORT(')');
79072805 2513 }
3280af22
NIS
2514 if (PL_bufptr != PL_bufend)
2515 PL_bufptr += 2;
2516 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 2517 return yylex();
79072805
LW
2518 }
2519 else {
607df283 2520 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2521 "### Saw case modifier\n"); });
3280af22 2522 s = PL_bufptr + 1;
6e909404
JH
2523 if (s[1] == '\\' && s[2] == 'E') {
2524 PL_bufptr = s + 3;
2525 PL_lex_state = LEX_INTERPCONCAT;
2526 return yylex();
a0d0e21e 2527 }
6e909404 2528 else {
90771dc0 2529 I32 tmp;
6e909404
JH
2530 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
2531 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 2532 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
2533 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
2534 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 2535 return REPORT(')');
6e909404
JH
2536 }
2537 if (PL_lex_casemods > 10)
2538 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
2539 PL_lex_casestack[PL_lex_casemods++] = *s;
2540 PL_lex_casestack[PL_lex_casemods] = '\0';
2541 PL_lex_state = LEX_INTERPCONCAT;
2542 PL_nextval[PL_nexttoke].ival = 0;
2543 force_next('(');
2544 if (*s == 'l')
2545 PL_nextval[PL_nexttoke].ival = OP_LCFIRST;
2546 else if (*s == 'u')
2547 PL_nextval[PL_nexttoke].ival = OP_UCFIRST;
2548 else if (*s == 'L')
2549 PL_nextval[PL_nexttoke].ival = OP_LC;
2550 else if (*s == 'U')
2551 PL_nextval[PL_nexttoke].ival = OP_UC;
2552 else if (*s == 'Q')
2553 PL_nextval[PL_nexttoke].ival = OP_QUOTEMETA;
2554 else
2555 Perl_croak(aTHX_ "panic: yylex");
2556 PL_bufptr = s + 1;
a0d0e21e 2557 }
79072805 2558 force_next(FUNC);
3280af22
NIS
2559 if (PL_lex_starts) {
2560 s = PL_bufptr;
2561 PL_lex_starts = 0;
131b3ad0
DM
2562 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2563 if (PL_lex_casemods == 1 && PL_lex_inpat)
2564 OPERATOR(',');
2565 else
2566 Aop(OP_CONCAT);
79072805
LW
2567 }
2568 else
cea2e8a9 2569 return yylex();
79072805
LW
2570 }
2571
55497cff 2572 case LEX_INTERPPUSH:
bbf60fe6 2573 return REPORT(sublex_push());
55497cff 2574
79072805 2575 case LEX_INTERPSTART:
3280af22 2576 if (PL_bufptr == PL_bufend)
bbf60fe6 2577 return REPORT(sublex_done());
607df283 2578 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 2579 "### Interpolated variable\n"); });
3280af22
NIS
2580 PL_expect = XTERM;
2581 PL_lex_dojoin = (*PL_bufptr == '@');
2582 PL_lex_state = LEX_INTERPNORMAL;
2583 if (PL_lex_dojoin) {
2584 PL_nextval[PL_nexttoke].ival = 0;
79072805 2585 force_next(',');
a0d0e21e 2586 force_ident("\"", '$');
3280af22 2587 PL_nextval[PL_nexttoke].ival = 0;
79072805 2588 force_next('$');
3280af22 2589 PL_nextval[PL_nexttoke].ival = 0;
79072805 2590 force_next('(');
3280af22 2591 PL_nextval[PL_nexttoke].ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
2592 force_next(FUNC);
2593 }
3280af22
NIS
2594 if (PL_lex_starts++) {
2595 s = PL_bufptr;
131b3ad0
DM
2596 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2597 if (!PL_lex_casemods && PL_lex_inpat)
2598 OPERATOR(',');
2599 else
2600 Aop(OP_CONCAT);
79072805 2601 }
cea2e8a9 2602 return yylex();
79072805
LW
2603
2604 case LEX_INTERPENDMAYBE:
3280af22
NIS
2605 if (intuit_more(PL_bufptr)) {
2606 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
2607 break;
2608 }
2609 /* FALL THROUGH */
2610
2611 case LEX_INTERPEND:
3280af22
NIS
2612 if (PL_lex_dojoin) {
2613 PL_lex_dojoin = FALSE;
2614 PL_lex_state = LEX_INTERPCONCAT;
bbf60fe6 2615 return REPORT(')');
79072805 2616 }
43a16006 2617 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 2618 && SvEVALED(PL_lex_repl))
43a16006 2619 {
e9fa98b2 2620 if (PL_bufptr != PL_bufend)
cea2e8a9 2621 Perl_croak(aTHX_ "Bad evalled substitution pattern");
e9fa98b2
HS
2622 PL_lex_repl = Nullsv;
2623 }
79072805
LW
2624 /* FALLTHROUGH */
2625 case LEX_INTERPCONCAT:
2626#ifdef DEBUGGING
3280af22 2627 if (PL_lex_brackets)
cea2e8a9 2628 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 2629#endif
3280af22 2630 if (PL_bufptr == PL_bufend)
bbf60fe6 2631 return REPORT(sublex_done());
79072805 2632
3280af22
NIS
2633 if (SvIVX(PL_linestr) == '\'') {
2634 SV *sv = newSVsv(PL_linestr);
2635 if (!PL_lex_inpat)
76e3520e 2636 sv = tokeq(sv);
3280af22 2637 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 2638 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 2639 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 2640 s = PL_bufend;
79072805
LW
2641 }
2642 else {
3280af22 2643 s = scan_const(PL_bufptr);
79072805 2644 if (*s == '\\')
3280af22 2645 PL_lex_state = LEX_INTERPCASEMOD;
79072805 2646 else
3280af22 2647 PL_lex_state = LEX_INTERPSTART;
79072805
LW
2648 }
2649
3280af22
NIS
2650 if (s != PL_bufptr) {
2651 PL_nextval[PL_nexttoke] = yylval;
2652 PL_expect = XTERM;
79072805 2653 force_next(THING);
131b3ad0
DM
2654 if (PL_lex_starts++) {
2655 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
2656 if (!PL_lex_casemods && PL_lex_inpat)
2657 OPERATOR(',');
2658 else
2659 Aop(OP_CONCAT);
2660 }
79072805 2661 else {
3280af22 2662 PL_bufptr = s;
cea2e8a9 2663 return yylex();
79072805
LW
2664 }
2665 }
2666
cea2e8a9 2667 return yylex();
a0d0e21e 2668 case LEX_FORMLINE:
3280af22
NIS
2669 PL_lex_state = LEX_NORMAL;
2670 s = scan_formline(PL_bufptr);
2671 if (!PL_lex_formbrack)
a0d0e21e
LW
2672 goto rightbracket;
2673 OPERATOR(';');
79072805
LW
2674 }
2675
3280af22
NIS
2676 s = PL_bufptr;
2677 PL_oldoldbufptr = PL_oldbufptr;
2678 PL_oldbufptr = s;
463ee0b2
LW
2679
2680 retry:
378cc40b
LW
2681 switch (*s) {
2682 default:
7e2040f0 2683 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 2684 goto keylookup;
cea2e8a9 2685 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
2686 case 4:
2687 case 26:
2688 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 2689 case 0:
3280af22
NIS
2690 if (!PL_rsfp) {
2691 PL_last_uni = 0;
2692 PL_last_lop = 0;
c5ee2135 2693 if (PL_lex_brackets) {
0bd48802
AL
2694 yyerror(PL_lex_formbrack
2695 ? "Format not terminated"
2696 : "Missing right curly or square bracket");
c5ee2135 2697 }
4e553d73 2698 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 2699 "### Tokener got EOF\n");
5f80b19c 2700 } );
79072805 2701 TOKEN(0);
463ee0b2 2702 }
3280af22 2703 if (s++ < PL_bufend)
a687059c 2704 goto retry; /* ignore stray nulls */
3280af22
NIS
2705 PL_last_uni = 0;
2706 PL_last_lop = 0;
2707 if (!PL_in_eval && !PL_preambled) {
2708 PL_preambled = TRUE;
2709 sv_setpv(PL_linestr,incl_perldb());
2710 if (SvCUR(PL_linestr))
396482e1 2711 sv_catpvs(PL_linestr,";");
3280af22
NIS
2712 if (PL_preambleav){
2713 while(AvFILLp(PL_preambleav) >= 0) {
2714 SV *tmpsv = av_shift(PL_preambleav);
2715 sv_catsv(PL_linestr, tmpsv);
396482e1 2716 sv_catpvs(PL_linestr, ";");
91b7def8 2717 sv_free(tmpsv);
2718 }
3280af22
NIS
2719 sv_free((SV*)PL_preambleav);
2720 PL_preambleav = NULL;
91b7def8 2721 }
3280af22 2722 if (PL_minus_n || PL_minus_p) {
396482e1 2723 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 2724 if (PL_minus_l)
396482e1 2725 sv_catpvs(PL_linestr,"chomp;");
3280af22 2726 if (PL_minus_a) {
3280af22 2727 if (PL_minus_F) {
3792a11b
NC
2728 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
2729 || *PL_splitstr == '"')
3280af22 2730 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 2731 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 2732 else {
c8ef6a4b
NC
2733 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
2734 bytes can be used as quoting characters. :-) */
dd374669 2735 const char *splits = PL_splitstr;
91d456ae 2736 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
2737 do {
2738 /* Need to \ \s */
dd374669
AL
2739 if (*splits == '\\')
2740 sv_catpvn(PL_linestr, splits, 1);
2741 sv_catpvn(PL_linestr, splits, 1);
2742 } while (*splits++);
48c4c863
NC
2743 /* This loop will embed the trailing NUL of
2744 PL_linestr as the last thing it does before
2745 terminating. */
396482e1 2746 sv_catpvs(PL_linestr, ");");
54310121 2747 }
2304df62
AD
2748 }
2749 else
396482e1 2750 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 2751 }
79072805 2752 }
bc9b29db 2753 if (PL_minus_E)
396482e1
GA
2754 sv_catpvs(PL_linestr,"use feature ':5.10';");
2755 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
2756 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2757 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2758 PL_last_lop = PL_last_uni = Nullch;
3280af22 2759 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 2760 SV * const sv = newSV(0);
a0d0e21e
LW
2761
2762 sv_upgrade(sv, SVt_PVMG);
3280af22 2763 sv_setsv(sv,PL_linestr);
0ac0412a 2764 (void)SvIOK_on(sv);
45977657 2765 SvIV_set(sv, 0);
36c7798d 2766 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 2767 }
79072805 2768 goto retry;
a687059c 2769 }
e929a76b 2770 do {
aa7440fb 2771 bof = PL_rsfp ? TRUE : FALSE;
7e28d3af
JH
2772 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == Nullch) {
2773 fake_eof:
2774 if (PL_rsfp) {
2775 if (PL_preprocess && !PL_in_eval)
2776 (void)PerlProc_pclose(PL_rsfp);
2777 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
2778 PerlIO_clearerr(PL_rsfp);
2779 else
2780 (void)PerlIO_close(PL_rsfp);
2781 PL_rsfp = Nullfp;
2782 PL_doextract = FALSE;
2783 }
2784 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
a23c4656
NC
2785 sv_setpv(PL_linestr,PL_minus_p
2786 ? ";}continue{print;}" : ";}");
7e28d3af
JH
2787 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2788 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2789 PL_last_lop = PL_last_uni = Nullch;
7e28d3af
JH
2790 PL_minus_n = PL_minus_p = 0;
2791 goto retry;
2792 }
2793 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
207e3d1a 2794 PL_last_lop = PL_last_uni = Nullch;
c69006e4 2795 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
2796 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
2797 }
7aa207d6
JH
2798 /* If it looks like the start of a BOM or raw UTF-16,
2799 * check if it in fact is. */
2800 else if (bof &&
2801 (*s == 0 ||
2802 *(U8*)s == 0xEF ||
2803 *(U8*)s >= 0xFE ||
2804 s[1] == 0)) {
226017aa 2805#ifdef PERLIO_IS_STDIO
e3f494f1
JH
2806# ifdef __GNU_LIBRARY__
2807# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
2808# define FTELL_FOR_PIPE_IS_BROKEN
2809# endif
e3f494f1
JH
2810# else
2811# ifdef __GLIBC__
2812# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
2813# define FTELL_FOR_PIPE_IS_BROKEN
2814# endif
2815# endif
226017aa
DD
2816# endif
2817#endif
2818#ifdef FTELL_FOR_PIPE_IS_BROKEN
2819 /* This loses the possibility to detect the bof
2820 * situation on perl -P when the libc5 is being used.
2821 * Workaround? Maybe attach some extra state to PL_rsfp?
2822 */
2823 if (!PL_preprocess)
7e28d3af 2824 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 2825#else
eb160463 2826 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 2827#endif
7e28d3af 2828 if (bof) {
3280af22 2829 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 2830 s = swallow_bom((U8*)s);
e929a76b 2831 }
378cc40b 2832 }
3280af22 2833 if (PL_doextract) {
a0d0e21e
LW
2834 /* Incest with pod. */
2835 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 2836 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
2837 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
2838 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2839 PL_last_lop = PL_last_uni = Nullch;
3280af22 2840 PL_doextract = FALSE;
a0d0e21e 2841 }
4e553d73 2842 }
463ee0b2 2843 incline(s);
3280af22
NIS
2844 } while (PL_doextract);
2845 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
2846 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 2847 SV * const sv = newSV(0);
a687059c 2848
93a17b20 2849 sv_upgrade(sv, SVt_PVMG);
3280af22 2850 sv_setsv(sv,PL_linestr);
0ac0412a 2851 (void)SvIOK_on(sv);
45977657 2852 SvIV_set(sv, 0);
36c7798d 2853 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a687059c 2854 }
3280af22 2855 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 2856 PL_last_lop = PL_last_uni = Nullch;
57843af0 2857 if (CopLINE(PL_curcop) == 1) {
3280af22 2858 while (s < PL_bufend && isSPACE(*s))
79072805 2859 s++;
a0d0e21e 2860 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 2861 s++;
44a8e56a 2862 d = Nullch;
3280af22 2863 if (!PL_in_eval) {
44a8e56a 2864 if (*s == '#' && *(s+1) == '!')
2865 d = s + 2;
2866#ifdef ALTERNATE_SHEBANG
2867 else {
bfed75c6 2868 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 2869 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
2870 d = s + (sizeof(as) - 1);
2871 }
2872#endif /* ALTERNATE_SHEBANG */
2873 }
2874 if (d) {
b8378b72 2875 char *ipath;
774d564b 2876 char *ipathend;
b8378b72 2877
774d564b 2878 while (isSPACE(*d))
b8378b72
CS
2879 d++;
2880 ipath = d;
774d564b 2881 while (*d && !isSPACE(*d))
2882 d++;
2883 ipathend = d;
2884
2885#ifdef ARG_ZERO_IS_SCRIPT
2886 if (ipathend > ipath) {
2887 /*
2888 * HP-UX (at least) sets argv[0] to the script name,
2889 * which makes $^X incorrect. And Digital UNIX and Linux,
2890 * at least, set argv[0] to the basename of the Perl
2891 * interpreter. So, having found "#!", we'll set it right.
2892 */
f776e3cd 2893 SV * const x
5c1737d1 2894 = GvSV(gv_fetchpvs("\030", GV_ADD, SVt_PV)); /* $^X */
774d564b 2895 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 2896 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 2897 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 2898 SvSETMAGIC(x);
2899 }
556c1dec
JH
2900 else {
2901 STRLEN blen;
2902 STRLEN llen;
cfd0369c 2903 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 2904 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
2905 if (llen < blen) {
2906 bstart += blen - llen;
2907 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
2908 sv_setpvn(x, ipath, ipathend - ipath);
2909 SvSETMAGIC(x);
2910 }
2911 }
2912 }
774d564b 2913 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 2914 }
774d564b 2915#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
2916
2917 /*
2918 * Look for options.
2919 */
748a9306 2920 d = instr(s,"perl -");
84e30d1a 2921 if (!d) {
748a9306 2922 d = instr(s,"perl");
84e30d1a
GS
2923#if defined(DOSISH)
2924 /* avoid getting into infinite loops when shebang
2925 * line contains "Perl" rather than "perl" */
2926 if (!d) {
2927 for (d = ipathend-4; d >= ipath; --d) {
2928 if ((*d == 'p' || *d == 'P')
2929 && !ibcmp(d, "perl", 4))
2930 {
2931 break;
2932 }
2933 }
2934 if (d < ipath)
2935 d = Nullch;
2936 }
2937#endif
2938 }
44a8e56a 2939#ifdef ALTERNATE_SHEBANG
2940 /*
2941 * If the ALTERNATE_SHEBANG on this system starts with a
2942 * character that can be part of a Perl expression, then if
2943 * we see it but not "perl", we're probably looking at the
2944 * start of Perl code, not a request to hand off to some
2945 * other interpreter. Similarly, if "perl" is there, but
2946 * not in the first 'word' of the line, we assume the line
2947 * contains the start of the Perl program.
44a8e56a 2948 */
2949 if (d && *s != '#') {
f54cb97a 2950 const char *c = ipath;
44a8e56a 2951 while (*c && !strchr("; \t\r\n\f\v#", *c))
2952 c++;
2953 if (c < d)
2954 d = Nullch; /* "perl" not in first word; ignore */
2955 else
2956 *s = '#'; /* Don't try to parse shebang line */
2957 }
774d564b 2958#endif /* ALTERNATE_SHEBANG */
bf4acbe4 2959#ifndef MACOS_TRADITIONAL
748a9306 2960 if (!d &&
44a8e56a 2961 *s == '#' &&
774d564b 2962 ipathend > ipath &&
3280af22 2963 !PL_minus_c &&
748a9306 2964 !instr(s,"indir") &&
3280af22 2965 instr(PL_origargv[0],"perl"))
748a9306 2966 {
27da23d5 2967 dVAR;
9f68db38 2968 char **newargv;
9f68db38 2969
774d564b 2970 *ipathend = '\0';
2971 s = ipathend + 1;
3280af22 2972 while (s < PL_bufend && isSPACE(*s))
9f68db38 2973 s++;
3280af22 2974 if (s < PL_bufend) {
a02a5408 2975 Newxz(newargv,PL_origargc+3,char*);
9f68db38 2976 newargv[1] = s;
3280af22 2977 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
2978 s++;
2979 *s = '\0';
3280af22 2980 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
2981 }
2982 else
3280af22 2983 newargv = PL_origargv;
774d564b 2984 newargv[0] = ipath;
b35112e7 2985 PERL_FPU_PRE_EXEC
b4748376 2986 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 2987 PERL_FPU_POST_EXEC
cea2e8a9 2988 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 2989 }
bf4acbe4 2990#endif
748a9306 2991 if (d) {
748a9306 2992 while (*d && !isSPACE(*d)) d++;
bf4acbe4 2993 while (SPACE_OR_TAB(*d)) d++;
748a9306
LW
2994
2995 if (*d++ == '-') {
f54cb97a 2996 const bool switches_done = PL_doswitches;
fb993905
GA
2997 const U32 oldpdb = PL_perldb;
2998 const bool oldn = PL_minus_n;
2999 const bool oldp = PL_minus_p;
3000
8cc95fdb 3001 do {
3ffe3ee4 3002 if (*d == 'M' || *d == 'm' || *d == 'C') {
9d4ba2ae 3003 const char * const m = d;
8cc95fdb 3004 while (*d && !isSPACE(*d)) d++;
cea2e8a9 3005 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
8cc95fdb 3006 (int)(d - m), m);
3007 }
3008 d = moreswitches(d);
3009 } while (d);
f0b2cf55
YST
3010 if (PL_doswitches && !switches_done) {
3011 int argc = PL_origargc;
3012 char **argv = PL_origargv;
3013 do {
3014 argc--,argv++;
3015 } while (argc && argv[0][0] == '-' && argv[0][1]);
3016 init_argv_symbols(argc,argv);
3017 }
155aba94
GS
3018 if ((PERLDB_LINE && !oldpdb) ||
3019 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 3020 /* if we have already added "LINE: while (<>) {",
3021 we must not do it again */
748a9306 3022 {
c69006e4 3023 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
3024 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3025 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
207e3d1a 3026 PL_last_lop = PL_last_uni = Nullch;
3280af22 3027 PL_preambled = FALSE;
84902520 3028 if (PERLDB_LINE)
3280af22 3029 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
3030 goto retry;
3031 }
a0d0e21e 3032 }
79072805 3033 }
9f68db38 3034 }
79072805 3035 }
3280af22
NIS
3036 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3037 PL_bufptr = s;
3038 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3039 return yylex();
ae986130 3040 }
378cc40b 3041 goto retry;
4fdae800 3042 case '\r':
6a27c188 3043#ifdef PERL_STRICT_CR
cea2e8a9 3044 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 3045 Perl_croak(aTHX_
cc507455 3046 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 3047#endif
4fdae800 3048 case ' ': case '\t': case '\f': case 013:
bf4acbe4
GS
3049#ifdef MACOS_TRADITIONAL
3050 case '\312':
3051#endif
378cc40b
LW
3052 s++;
3053 goto retry;
378cc40b 3054 case '#':
e929a76b 3055 case '\n':
3280af22 3056 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
3057 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
3058 /* handle eval qq[#line 1 "foo"\n ...] */
3059 CopLINE_dec(PL_curcop);
3060 incline(s);
3061 }
3280af22 3062 d = PL_bufend;
a687059c 3063 while (s < d && *s != '\n')
378cc40b 3064 s++;
0f85fab0 3065 if (s < d)
378cc40b 3066 s++;
78c267c1 3067 else if (s > d) /* Found by Ilya: feed random input to Perl. */
a8406387 3068 Perl_croak(aTHX_ "panic: input overflow");
463ee0b2 3069 incline(s);
3280af22
NIS
3070 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3071 PL_bufptr = s;
3072 PL_lex_state = LEX_FORMLINE;
cea2e8a9 3073 return yylex();
a687059c 3074 }
378cc40b 3075 }
a687059c 3076 else {
378cc40b 3077 *s = '\0';
3280af22 3078 PL_bufend = s;
a687059c 3079 }
378cc40b
LW
3080 goto retry;
3081 case '-':
79072805 3082 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 3083 I32 ftst = 0;
90771dc0 3084 char tmp;
e5edeb50 3085
378cc40b 3086 s++;
3280af22 3087 PL_bufptr = s;
748a9306
LW
3088 tmp = *s++;
3089
bf4acbe4 3090 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
3091 s++;
3092
3093 if (strnEQ(s,"=>",2)) {
3280af22 3094 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
b6007c36
DM
3095 DEBUG_T( { S_printbuf(aTHX_
3096 "### Saw unary minus before =>, forcing word %s\n", s);
5f80b19c 3097 } );
748a9306
LW
3098 OPERATOR('-'); /* unary minus */
3099 }
3280af22 3100 PL_last_uni = PL_oldbufptr;
748a9306 3101 switch (tmp) {
e5edeb50
JH
3102 case 'r': ftst = OP_FTEREAD; break;
3103 case 'w': ftst = OP_FTEWRITE; break;
3104 case 'x': ftst = OP_FTEEXEC; break;
3105 case 'o': ftst = OP_FTEOWNED; break;
3106 case 'R': ftst = OP_FTRREAD; break;
3107 case 'W': ftst = OP_FTRWRITE; break;
3108 case 'X': ftst = OP_FTREXEC; break;
3109 case 'O': ftst = OP_FTROWNED; break;
3110 case 'e': ftst = OP_FTIS; break;
3111 case 'z': ftst = OP_FTZERO; break;
3112 case 's': ftst = OP_FTSIZE; break;
3113 case 'f': ftst = OP_FTFILE; break;
3114 case 'd': ftst = OP_FTDIR; break;
3115 case 'l': ftst = OP_FTLINK; break;
3116 case 'p': ftst = OP_FTPIPE; break;
3117 case 'S': ftst = OP_FTSOCK; break;
3118 case 'u': ftst = OP_FTSUID; break;
3119 case 'g': ftst = OP_FTSGID; break;
3120 case 'k': ftst = OP_FTSVTX; break;
3121 case 'b': ftst = OP_FTBLK; break;
3122 case 'c': ftst = OP_FTCHR; break;
3123 case 't': ftst = OP_FTTTY; break;
3124 case 'T': ftst = OP_FTTEXT; break;
3125 case 'B': ftst = OP_FTBINARY; break;
3126 case 'M': case 'A': case 'C':
5c1737d1 3127 gv_fetchpvs("\024",GV_ADD, SVt_PV);
e5edeb50
JH
3128 switch (tmp) {
3129 case 'M': ftst = OP_FTMTIME; break;
3130 case 'A': ftst = OP_FTATIME; break;
3131 case 'C': ftst = OP_FTCTIME; break;
3132 default: break;
3133 }
3134 break;
378cc40b 3135 default:
378cc40b
LW
3136 break;
3137 }
e5edeb50 3138 if (ftst) {
eb160463 3139 PL_last_lop_op = (OPCODE)ftst;
4e553d73 3140 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 3141 "### Saw file test %c\n", (int)tmp);
5f80b19c 3142 } );
e5edeb50
JH
3143 FTST(ftst);
3144 }
3145 else {
3146 /* Assume it was a minus followed by a one-letter named
3147 * subroutine call (or a -bareword), then. */
95c31fe3 3148 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 3149 "### '-%c' looked like a file test but was not\n",
4fccd7c6 3150 (int) tmp);
5f80b19c 3151 } );
3cf7b4c4 3152 s = --PL_bufptr;
e5edeb50 3153 }
378cc40b 3154 }
90771dc0
NC
3155 {
3156 const char tmp = *s++;
3157 if (*s == tmp) {
3158 s++;
3159 if (PL_expect == XOPERATOR)
3160 TERM(POSTDEC);
3161 else
3162 OPERATOR(PREDEC);
3163 }
3164 else if (*s == '>') {
3165 s++;
3166 s = skipspace(s);
3167 if (isIDFIRST_lazy_if(s,UTF)) {
3168 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
3169 TOKEN(ARROW);
3170 }
3171 else if (*s == '$')
3172 OPERATOR(ARROW);
3173 else
3174 TERM(ARROW);
3175 }
3280af22 3176 if (PL_expect == XOPERATOR)
90771dc0
NC
3177 Aop(OP_SUBTRACT);
3178 else {
3179 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3180 check_uni();
3181 OPERATOR('-'); /* unary minus */
79072805 3182 }
2f3197b3 3183 }
79072805 3184
378cc40b 3185 case '+':
90771dc0
NC
3186 {
3187 const char tmp = *s++;
3188 if (*s == tmp) {
3189 s++;
3190 if (PL_expect == XOPERATOR)
3191 TERM(POSTINC);
3192 else
3193 OPERATOR(PREINC);
3194 }
3280af22 3195 if (PL_expect == XOPERATOR)
90771dc0
NC
3196 Aop(OP_ADD);
3197 else {
3198 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
3199 check_uni();
3200 OPERATOR('+');
3201 }
2f3197b3 3202 }
a687059c 3203
378cc40b 3204 case '*':
3280af22
NIS
3205 if (PL_expect != XOPERATOR) {
3206 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
3207 PL_expect = XOPERATOR;
3208 force_ident(PL_tokenbuf, '*');
3209 if (!*PL_tokenbuf)
a0d0e21e 3210 PREREF('*');
79072805 3211 TERM('*');
a687059c 3212 }
79072805
LW
3213 s++;
3214 if (*s == '*') {
a687059c 3215 s++;
79072805 3216 PWop(OP_POW);
a687059c 3217 }
79072805
LW
3218 Mop(OP_MULTIPLY);
3219
378cc40b 3220 case '%':
3280af22 3221 if (PL_expect == XOPERATOR) {
bbce6d69 3222 ++s;
3223 Mop(OP_MODULO);
a687059c 3224 }
3280af22
NIS
3225 PL_tokenbuf[0] = '%';
3226 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
3227 if (!PL_tokenbuf[1]) {
bbce6d69 3228 PREREF('%');
a687059c 3229 }
3280af22 3230 PL_pending_ident = '%';
bbce6d69 3231 TERM('%');
a687059c 3232
378cc40b 3233 case '^':
79072805 3234 s++;
a0d0e21e 3235 BOop(OP_BIT_XOR);
79072805 3236 case '[':
3280af22 3237 PL_lex_brackets++;
79072805 3238 /* FALL THROUGH */
378cc40b 3239 case '~':
0d863452
RH
3240 if (s[1] == '~'
3241 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)
ef89dcc3 3242 && FEATURE_IS_ENABLED("~~"))
0d863452
RH
3243 {
3244 s += 2;
3245 Eop(OP_SMARTMATCH);
3246 }
378cc40b 3247 case ',':
90771dc0
NC
3248 {
3249 const char tmp = *s++;
3250 OPERATOR(tmp);
3251 }
a0d0e21e
LW
3252 case ':':
3253 if (s[1] == ':') {
3254 len = 0;
0bfa2a8a 3255 goto just_a_word_zero_gv;
a0d0e21e
LW
3256 }
3257 s++;
09bef843
SB
3258 switch (PL_expect) {
3259 OP *attrs;
3260 case XOPERATOR:
3261 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
3262 break;
3263 PL_bufptr = s; /* update in case we back off */
3264 goto grabattrs;
3265 case XATTRBLOCK:
3266 PL_expect = XBLOCK;
3267 goto grabattrs;
3268 case XATTRTERM:
3269 PL_expect = XTERMBLOCK;
3270 grabattrs:
3271 s = skipspace(s);
3272 attrs = Nullop;
7e2040f0 3273 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 3274 I32 tmp;
09bef843 3275 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
f9829d6b
GS
3276 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len))) {
3277 if (tmp < 0) tmp = -tmp;
3278 switch (tmp) {
3279 case KEY_or:
3280 case KEY_and:
c963b151 3281 case KEY_err:
f9829d6b
GS
3282 case KEY_for:
3283 case KEY_unless:
3284 case KEY_if:
3285 case KEY_while:
3286 case KEY_until:
3287 goto got_attrs;
3288 default:
3289 break;
3290 }
3291 }
09bef843
SB
3292 if (*d == '(') {
3293 d = scan_str(d,TRUE,TRUE);
3294 if (!d) {
09bef843
SB
3295 /* MUST advance bufptr here to avoid bogus
3296 "at end of line" context messages from yyerror().
3297 */
3298 PL_bufptr = s + len;
3299 yyerror("Unterminated attribute parameter in attribute list");
3300 if (attrs)
3301 op_free(attrs);
bbf60fe6 3302 return REPORT(0); /* EOF indicator */
09bef843
SB
3303 }
3304 }
3305 if (PL_lex_stuff) {
3306 SV *sv = newSVpvn(s, len);
3307 sv_catsv(sv, PL_lex_stuff);
3308 attrs = append_elem(OP_LIST, attrs,
3309 newSVOP(OP_CONST, 0, sv));
3310 SvREFCNT_dec(PL_lex_stuff);
3311 PL_lex_stuff = Nullsv;
3312 }
3313 else {
371fce9b
DM
3314 if (len == 6 && strnEQ(s, "unique", len)) {
3315 if (PL_in_my == KEY_our)
3316#ifdef USE_ITHREADS
3317 GvUNIQUE_on(cGVOPx_gv(yylval.opval));
3318#else
3319 ; /* skip to avoid loading attributes.pm */
3320#endif
bfed75c6 3321 else
371fce9b
DM
3322 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
3323 }
3324
d3cea301
SB
3325 /* NOTE: any CV attrs applied here need to be part of
3326 the CVf_BUILTIN_ATTRS define in cv.h! */
371fce9b 3327 else if (!PL_in_my && len == 6 && strnEQ(s, "lvalue", len))
78f9721b
SM
3328 CvLVALUE_on(PL_compcv);
3329 else if (!PL_in_my && len == 6 && strnEQ(s, "locked", len))
3330 CvLOCKED_on(PL_compcv);
3331 else if (!PL_in_my && len == 6 && strnEQ(s, "method", len))
3332 CvMETHOD_on(PL_compcv);
06492da6
SF
3333 else if (!PL_in_my && len == 9 && strnEQ(s, "assertion", len))
3334 CvASSERTION_on(PL_compcv);
78f9721b
SM
3335 /* After we've set the flags, it could be argued that
3336 we don't need to do the attributes.pm-based setting
3337 process, and shouldn't bother appending recognized
d3cea301
SB
3338 flags. To experiment with that, uncomment the
3339 following "else". (Note that's already been
3340 uncommented. That keeps the above-applied built-in
3341 attributes from being intercepted (and possibly
3342 rejected) by a package's attribute routines, but is
3343 justified by the performance win for the common case
3344 of applying only built-in attributes.) */
0256094b 3345 else
78f9721b
SM
3346 attrs = append_elem(OP_LIST, attrs,
3347 newSVOP(OP_CONST, 0,
3348 newSVpvn(s, len)));
09bef843
SB
3349 }
3350 s = skipspace(d);
0120eecf 3351 if (*s == ':' && s[1] != ':')
09bef843 3352 s = skipspace(s+1);
0120eecf
GS
3353 else if (s == d)
3354 break; /* require real whitespace or :'s */
09bef843 3355 }
90771dc0
NC
3356 {
3357 const char tmp
3358 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
3359 if (*s != ';' && *s != '}' && *s != tmp
3360 && (tmp != '=' || *s != ')')) {
3361 const char q = ((*s == '\'') ? '"' : '\'');
3362 /* If here for an expression, and parsed no attrs, back
3363 off. */
3364 if (tmp == '=' && !attrs) {
3365 s = PL_bufptr;
3366 break;
3367 }
3368 /* MUST advance bufptr here to avoid bogus "at end of line"
3369 context messages from yyerror().
3370 */
3371 PL_bufptr = s;
3372 yyerror( *s
3373 ? Perl_form(aTHX_ "Invalid separator character "
3374 "%c%c%c in attribute list", q, *s, q)
3375 : "Unterminated attribute list" );
3376 if (attrs)
3377 op_free(attrs);
3378 OPERATOR(':');
09bef843 3379 }
09bef843 3380 }
f9829d6b 3381 got_attrs:
09bef843
SB
3382 if (attrs) {
3383 PL_nextval[PL_nexttoke].opval = attrs;
3384 force_next(THING);
3385 }
3386 TOKEN(COLONATTR);
3387 }
a0d0e21e 3388 OPERATOR(':');
8990e307
LW
3389 case '(':
3390 s++;
3280af22
NIS
3391 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
3392 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 3393 else
3280af22 3394 PL_expect = XTERM;
4a202259 3395 s = skipspace(s);
a0d0e21e 3396 TOKEN('(');
378cc40b 3397 case ';':
f4dd75d9 3398 CLINE;
90771dc0
NC
3399 {
3400 const char tmp = *s++;
3401 OPERATOR(tmp);
3402 }
378cc40b 3403 case ')':
90771dc0
NC
3404 {
3405 const char tmp = *s++;
3406 s = skipspace(s);
3407 if (*s == '{')
3408 PREBLOCK(tmp);
3409 TERM(tmp);
3410 }
79072805
LW
3411 case ']':
3412 s++;
3280af22 3413 if (PL_lex_brackets <= 0)
d98d5fff 3414 yyerror("Unmatched right square bracket");
463ee0b2 3415 else
3280af22
NIS
3416 --PL_lex_brackets;
3417 if (PL_lex_state == LEX_INTERPNORMAL) {
3418 if (PL_lex_brackets == 0) {
a0d0e21e 3419 if (*s != '[' && *s != '{' && (*s != '-' || s[1] != '>'))
3280af22 3420 PL_lex_state = LEX_INTERPEND;
79072805
LW
3421 }
3422 }
4633a7c4 3423 TERM(']');
79072805
LW
3424 case '{':
3425 leftbracket:
79072805 3426 s++;
3280af22 3427 if (PL_lex_brackets > 100) {
8edd5f42 3428 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 3429 }
3280af22 3430 switch (PL_expect) {
a0d0e21e 3431 case XTERM:
3280af22 3432 if (PL_lex_formbrack) {
a0d0e21e
LW
3433 s--;
3434 PRETERMBLOCK(DO);
3435 }
3280af22
NIS
3436 if (PL_oldoldbufptr == PL_last_lop)
3437 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3438 else
3280af22 3439 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 3440 OPERATOR(HASHBRACK);
a0d0e21e 3441 case XOPERATOR:
bf4acbe4 3442 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 3443 s++;
44a8e56a 3444 d = s;
3280af22
NIS
3445 PL_tokenbuf[0] = '\0';
3446 if (d < PL_bufend && *d == '-') {
3447 PL_tokenbuf[0] = '-';
44a8e56a 3448 d++;
bf4acbe4 3449 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 3450 d++;
3451 }
7e2040f0 3452 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 3453 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 3454 FALSE, &len);
bf4acbe4 3455 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
3456 d++;
3457 if (*d == '}') {
f54cb97a 3458 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 3459 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
3460 if (minus)
3461 force_next('-');
748a9306
LW
3462 }
3463 }
3464 /* FALL THROUGH */
09bef843 3465 case XATTRBLOCK:
748a9306 3466 case XBLOCK:
3280af22
NIS
3467 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
3468 PL_expect = XSTATE;
a0d0e21e 3469 break;
09bef843 3470 case XATTRTERM:
a0d0e21e 3471 case XTERMBLOCK:
3280af22
NIS
3472 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
3473 PL_expect = XSTATE;
a0d0e21e
LW
3474 break;
3475 default: {
f54cb97a 3476 const char *t;
3280af22
NIS
3477 if (PL_oldoldbufptr == PL_last_lop)
3478 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 3479 else
3280af22 3480 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
a0d0e21e 3481 s = skipspace(s);
8452ff4b
SB
3482 if (*s == '}') {
3483 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
3484 PL_expect = XTERM;
3485 /* This hack is to get the ${} in the message. */
3486 PL_bufptr = s+1;
3487 yyerror("syntax error");
3488 break;
3489 }
a0d0e21e 3490 OPERATOR(HASHBRACK);
8452ff4b 3491 }
b8a4b1be
GS
3492 /* This hack serves to disambiguate a pair of curlies
3493 * as being a block or an anon hash. Normally, expectation
3494 * determines that, but in cases where we're not in a
3495 * position to expect anything in particular (like inside
3496 * eval"") we have to resolve the ambiguity. This code
3497 * covers the case where the first term in the curlies is a
3498 * quoted string. Most other cases need to be explicitly
a0288114 3499 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
3500 * curly in order to force resolution as an anon hash.
3501 *
3502 * XXX should probably propagate the outer expectation
3503 * into eval"" to rely less on this hack, but that could
3504 * potentially break current behavior of eval"".
3505 * GSAR 97-07-21
3506 */
3507 t = s;
3508 if (*s == '\'' || *s == '"' || *s == '`') {
3509 /* common case: get past first string, handling escapes */
3280af22 3510 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
3511 if (*t++ == '\\' && (*t == '\\' || *t == *s))
3512 t++;
3513 t++;
a0d0e21e 3514 }
b8a4b1be 3515 else if (*s == 'q') {
3280af22 3516 if (++t < PL_bufend
b8a4b1be 3517 && (!isALNUM(*t)
3280af22 3518 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
3519 && !isALNUM(*t))))
3520 {
abc667d1 3521 /* skip q//-like construct */
f54cb97a 3522 const char *tmps;
b8a4b1be
GS
3523 char open, close, term;
3524 I32 brackets = 1;
3525
3280af22 3526 while (t < PL_bufend && isSPACE(*t))