This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix Time-HiRes linker error on Win32 introduced by upgrades
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
29595ff2 38#ifdef PERL_MAD
29595ff2 39# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 40# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 41#else
5db06880 42# define CURMAD(slot,sv)
9ded7720 43# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
44#endif
45
9059aa12
LW
46#define XFAKEBRACK 128
47#define XENUMMASK 127
48
39e02b42
JH
49#ifdef USE_UTF8_SCRIPTS
50# define UTF (!IN_BYTES)
2b9d42f0 51#else
746b446a 52# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 53#endif
a0ed51b3 54
61f0cdd9 55/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
56 * 1999-02-27 mjd-perl-patch@plover.com */
57#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
58
bf4acbe4
GS
59/* On MacOS, respect nonbreaking spaces */
60#ifdef MACOS_TRADITIONAL
61#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
62#else
63#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
64#endif
65
ffb4593c
NT
66/* LEX_* are values for PL_lex_state, the state of the lexer.
67 * They are arranged oddly so that the guard on the switch statement
79072805
LW
68 * can get by with a single comparison (if the compiler is smart enough).
69 */
70
fb73857a 71/* #define LEX_NOTPARSING 11 is done in perl.h. */
72
b6007c36
DM
73#define LEX_NORMAL 10 /* normal code (ie not within "...") */
74#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
75#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
76#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
77#define LEX_INTERPSTART 6 /* expecting the start of a $var */
78
79 /* at end of code, eg "$x" followed by: */
80#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
81#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
82
83#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
84 string or after \E, $foo, etc */
85#define LEX_INTERPCONST 2 /* NOT USED */
86#define LEX_FORMLINE 1 /* expecting a format line */
87#define LEX_KNOWNEXT 0 /* next token known; just return it */
88
79072805 89
bbf60fe6 90#ifdef DEBUGGING
27da23d5 91static const char* const lex_state_names[] = {
bbf60fe6
DM
92 "KNOWNEXT",
93 "FORMLINE",
94 "INTERPCONST",
95 "INTERPCONCAT",
96 "INTERPENDMAYBE",
97 "INTERPEND",
98 "INTERPSTART",
99 "INTERPPUSH",
100 "INTERPCASEMOD",
101 "INTERPNORMAL",
102 "NORMAL"
103};
104#endif
105
79072805
LW
106#ifdef ff_next
107#undef ff_next
d48672a2
LW
108#endif
109
79072805 110#include "keywords.h"
fe14fcc3 111
ffb4593c
NT
112/* CLINE is a macro that ensures PL_copline has a sane value */
113
ae986130
LW
114#ifdef CLINE
115#undef CLINE
116#endif
57843af0 117#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 118
5db06880 119#ifdef PERL_MAD
29595ff2
NC
120# define SKIPSPACE0(s) skipspace0(s)
121# define SKIPSPACE1(s) skipspace1(s)
122# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
123# define PEEKSPACE(s) skipspace2(s,0)
124#else
125# define SKIPSPACE0(s) skipspace(s)
126# define SKIPSPACE1(s) skipspace(s)
127# define SKIPSPACE2(s,tsv) skipspace(s)
128# define PEEKSPACE(s) skipspace(s)
129#endif
130
ffb4593c
NT
131/*
132 * Convenience functions to return different tokens and prime the
9cbb5ea2 133 * lexer for the next token. They all take an argument.
ffb4593c
NT
134 *
135 * TOKEN : generic token (used for '(', DOLSHARP, etc)
136 * OPERATOR : generic operator
137 * AOPERATOR : assignment operator
138 * PREBLOCK : beginning the block after an if, while, foreach, ...
139 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
140 * PREREF : *EXPR where EXPR is not a simple identifier
141 * TERM : expression term
142 * LOOPX : loop exiting command (goto, last, dump, etc)
143 * FTST : file test operator
144 * FUN0 : zero-argument function
2d2e263d 145 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
146 * BOop : bitwise or or xor
147 * BAop : bitwise and
148 * SHop : shift operator
149 * PWop : power operator
9cbb5ea2 150 * PMop : pattern-matching operator
ffb4593c
NT
151 * Aop : addition-level operator
152 * Mop : multiplication-level operator
153 * Eop : equality-testing operator
e5edeb50 154 * Rop : relational operator <= != gt
ffb4593c
NT
155 *
156 * Also see LOP and lop() below.
157 */
158
998054bd 159#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 160# define REPORT(retval) tokereport((I32)retval)
998054bd 161#else
bbf60fe6 162# define REPORT(retval) (retval)
998054bd
SC
163#endif
164
bbf60fe6
DM
165#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
166#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
167#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
168#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
169#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
170#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
171#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
172#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
173#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
174#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
175#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
176#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
177#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
178#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
179#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
180#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
181#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
182#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
183#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
184#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 185
a687059c
LW
186/* This bit of chicanery makes a unary function followed by
187 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
188 * The UNIDOR macro is for unary functions that can be followed by the //
189 * operator (such as C<shift // 0>).
a687059c 190 */
376fcdbf
AL
191#define UNI2(f,x) { \
192 yylval.ival = f; \
193 PL_expect = x; \
194 PL_bufptr = s; \
195 PL_last_uni = PL_oldbufptr; \
196 PL_last_lop_op = f; \
197 if (*s == '(') \
198 return REPORT( (int)FUNC1 ); \
29595ff2 199 s = PEEKSPACE(s); \
376fcdbf
AL
200 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
201 }
6f33ba73
RGS
202#define UNI(f) UNI2(f,XTERM)
203#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 204
376fcdbf
AL
205#define UNIBRACK(f) { \
206 yylval.ival = f; \
207 PL_bufptr = s; \
208 PL_last_uni = PL_oldbufptr; \
209 if (*s == '(') \
210 return REPORT( (int)FUNC1 ); \
29595ff2 211 s = PEEKSPACE(s); \
376fcdbf
AL
212 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
213 }
79072805 214
9f68db38 215/* grandfather return to old style */
3280af22 216#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 217
8fa7f367
JH
218#ifdef DEBUGGING
219
bbf60fe6
DM
220/* how to interpret the yylval associated with the token */
221enum token_type {
222 TOKENTYPE_NONE,
223 TOKENTYPE_IVAL,
224 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
225 TOKENTYPE_PVAL,
226 TOKENTYPE_OPVAL,
227 TOKENTYPE_GVVAL
228};
229
6d4a66ac
NC
230static struct debug_tokens {
231 const int token;
232 enum token_type type;
233 const char *name;
234} const debug_tokens[] =
9041c2e3 235{
bbf60fe6
DM
236 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
237 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
238 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
239 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
240 { ARROW, TOKENTYPE_NONE, "ARROW" },
241 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
242 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
243 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
244 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
245 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 246 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
247 { DO, TOKENTYPE_NONE, "DO" },
248 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
249 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
250 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
251 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
252 { ELSE, TOKENTYPE_NONE, "ELSE" },
253 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
254 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
255 { FOR, TOKENTYPE_IVAL, "FOR" },
256 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
257 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
258 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
259 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
260 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
261 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 262 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
263 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
264 { IF, TOKENTYPE_IVAL, "IF" },
265 { LABEL, TOKENTYPE_PVAL, "LABEL" },
266 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
267 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
268 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
269 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
270 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
271 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
272 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
273 { MY, TOKENTYPE_IVAL, "MY" },
274 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
275 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
276 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
277 { OROP, TOKENTYPE_IVAL, "OROP" },
278 { OROR, TOKENTYPE_NONE, "OROR" },
279 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
280 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
281 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
282 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
283 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
284 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
285 { PREINC, TOKENTYPE_NONE, "PREINC" },
286 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
287 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
288 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
289 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
290 { SUB, TOKENTYPE_NONE, "SUB" },
291 { THING, TOKENTYPE_OPVAL, "THING" },
292 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
293 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
294 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
295 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
296 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
297 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 298 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
299 { WHILE, TOKENTYPE_IVAL, "WHILE" },
300 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 301 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
302};
303
304/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 305
bbf60fe6 306STATIC int
f5bd084c 307S_tokereport(pTHX_ I32 rv)
bbf60fe6 308{
97aff369 309 dVAR;
bbf60fe6 310 if (DEBUG_T_TEST) {
bd61b366 311 const char *name = NULL;
bbf60fe6 312 enum token_type type = TOKENTYPE_NONE;
f54cb97a 313 const struct debug_tokens *p;
396482e1 314 SV* const report = newSVpvs("<== ");
bbf60fe6 315
f54cb97a 316 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
317 if (p->token == (int)rv) {
318 name = p->name;
319 type = p->type;
320 break;
321 }
322 }
323 if (name)
54667de8 324 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
325 else if ((char)rv > ' ' && (char)rv < '~')
326 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
327 else if (!rv)
396482e1 328 sv_catpvs(report, "EOF");
bbf60fe6
DM
329 else
330 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
331 switch (type) {
332 case TOKENTYPE_NONE:
333 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
334 break;
335 case TOKENTYPE_IVAL:
e4584336 336 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
337 break;
338 case TOKENTYPE_OPNUM:
339 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
340 PL_op_name[yylval.ival]);
341 break;
342 case TOKENTYPE_PVAL:
343 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
344 break;
345 case TOKENTYPE_OPVAL:
b6007c36 346 if (yylval.opval) {
401441c0 347 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 348 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
349 if (yylval.opval->op_type == OP_CONST) {
350 Perl_sv_catpvf(aTHX_ report, " %s",
351 SvPEEK(cSVOPx_sv(yylval.opval)));
352 }
353
354 }
401441c0 355 else
396482e1 356 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
357 break;
358 }
b6007c36 359 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
360 };
361 return (int)rv;
998054bd
SC
362}
363
b6007c36
DM
364
365/* print the buffer with suitable escapes */
366
367STATIC void
368S_printbuf(pTHX_ const char* fmt, const char* s)
369{
396482e1 370 SV* const tmp = newSVpvs("");
b6007c36
DM
371 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
372 SvREFCNT_dec(tmp);
373}
374
8fa7f367
JH
375#endif
376
ffb4593c
NT
377/*
378 * S_ao
379 *
c963b151
BD
380 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
381 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
382 */
383
76e3520e 384STATIC int
cea2e8a9 385S_ao(pTHX_ int toketype)
a0d0e21e 386{
97aff369 387 dVAR;
3280af22
NIS
388 if (*PL_bufptr == '=') {
389 PL_bufptr++;
a0d0e21e
LW
390 if (toketype == ANDAND)
391 yylval.ival = OP_ANDASSIGN;
392 else if (toketype == OROR)
393 yylval.ival = OP_ORASSIGN;
c963b151
BD
394 else if (toketype == DORDOR)
395 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
396 toketype = ASSIGNOP;
397 }
398 return toketype;
399}
400
ffb4593c
NT
401/*
402 * S_no_op
403 * When Perl expects an operator and finds something else, no_op
404 * prints the warning. It always prints "<something> found where
405 * operator expected. It prints "Missing semicolon on previous line?"
406 * if the surprise occurs at the start of the line. "do you need to
407 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
408 * where the compiler doesn't know if foo is a method call or a function.
409 * It prints "Missing operator before end of line" if there's nothing
410 * after the missing operator, or "... before <...>" if there is something
411 * after the missing operator.
412 */
413
76e3520e 414STATIC void
bfed75c6 415S_no_op(pTHX_ const char *what, char *s)
463ee0b2 416{
97aff369 417 dVAR;
9d4ba2ae
AL
418 char * const oldbp = PL_bufptr;
419 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 420
1189a94a
GS
421 if (!s)
422 s = oldbp;
07c798fb 423 else
1189a94a 424 PL_bufptr = s;
cea2e8a9 425 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
426 if (ckWARN_d(WARN_SYNTAX)) {
427 if (is_first)
428 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
429 "\t(Missing semicolon on previous line?)\n");
430 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 431 const char *t;
c35e046a
AL
432 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
433 NOOP;
56da5a46
RGS
434 if (t < PL_bufptr && isSPACE(*t))
435 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
436 "\t(Do you need to predeclare %.*s?)\n",
551405c4 437 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
438 }
439 else {
440 assert(s >= oldbp);
441 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 442 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 443 }
07c798fb 444 }
3280af22 445 PL_bufptr = oldbp;
8990e307
LW
446}
447
ffb4593c
NT
448/*
449 * S_missingterm
450 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 451 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
452 * If we're in a delimited string and the delimiter is a control
453 * character, it's reformatted into a two-char sequence like ^C.
454 * This is fatal.
455 */
456
76e3520e 457STATIC void
cea2e8a9 458S_missingterm(pTHX_ char *s)
8990e307 459{
97aff369 460 dVAR;
8990e307
LW
461 char tmpbuf[3];
462 char q;
463 if (s) {
9d4ba2ae 464 char * const nl = strrchr(s,'\n');
d2719217 465 if (nl)
8990e307
LW
466 *nl = '\0';
467 }
9d116dd7
JH
468 else if (
469#ifdef EBCDIC
470 iscntrl(PL_multi_close)
471#else
472 PL_multi_close < 32 || PL_multi_close == 127
473#endif
474 ) {
8990e307 475 *tmpbuf = '^';
585ec06d 476 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
477 tmpbuf[2] = '\0';
478 s = tmpbuf;
479 }
480 else {
eb160463 481 *tmpbuf = (char)PL_multi_close;
8990e307
LW
482 tmpbuf[1] = '\0';
483 s = tmpbuf;
484 }
485 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 486 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 487}
79072805 488
ef89dcc3 489#define FEATURE_IS_ENABLED(name) \
0d863452 490 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 491 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
492/*
493 * S_feature_is_enabled
494 * Check whether the named feature is enabled.
495 */
496STATIC bool
d4c19fe8 497S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 498{
97aff369 499 dVAR;
0d863452 500 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 501 char he_name[32] = "feature_";
6fca0082 502 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 503
7b9ef140 504 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
505}
506
ffb4593c
NT
507/*
508 * Perl_deprecate
ffb4593c
NT
509 */
510
79072805 511void
bfed75c6 512Perl_deprecate(pTHX_ const char *s)
a0d0e21e 513{
599cee73 514 if (ckWARN(WARN_DEPRECATED))
9014280d 515 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
516}
517
12bcd1a6 518void
bfed75c6 519Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
520{
521 /* This function should NOT be called for any new deprecated warnings */
522 /* Use Perl_deprecate instead */
523 /* */
524 /* It is here to maintain backward compatibility with the pre-5.8 */
525 /* warnings category hierarchy. The "deprecated" category used to */
526 /* live under the "syntax" category. It is now a top-level category */
527 /* in its own right. */
528
529 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 530 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
531 "Use of %s is deprecated", s);
532}
533
ffb4593c 534/*
9cbb5ea2
GS
535 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
536 * utf16-to-utf8-reversed.
ffb4593c
NT
537 */
538
c39cd008
GS
539#ifdef PERL_CR_FILTER
540static void
541strip_return(SV *sv)
542{
95a20fc0 543 register const char *s = SvPVX_const(sv);
9d4ba2ae 544 register const char * const e = s + SvCUR(sv);
c39cd008
GS
545 /* outer loop optimized to do nothing if there are no CR-LFs */
546 while (s < e) {
547 if (*s++ == '\r' && *s == '\n') {
548 /* hit a CR-LF, need to copy the rest */
549 register char *d = s - 1;
550 *d++ = *s++;
551 while (s < e) {
552 if (*s == '\r' && s[1] == '\n')
553 s++;
554 *d++ = *s++;
555 }
556 SvCUR(sv) -= s - d;
557 return;
558 }
559 }
560}
a868473f 561
76e3520e 562STATIC I32
c39cd008 563S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 564{
f54cb97a 565 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
566 if (count > 0 && !maxlen)
567 strip_return(sv);
568 return count;
a868473f
NIS
569}
570#endif
571
ffb4593c
NT
572/*
573 * Perl_lex_start
9cbb5ea2
GS
574 * Initialize variables. Uses the Perl save_stack to save its state (for
575 * recursive calls to the parser).
ffb4593c
NT
576 */
577
a0d0e21e 578void
864dbfa3 579Perl_lex_start(pTHX_ SV *line)
79072805 580{
97aff369 581 dVAR;
cfd0369c 582 const char *s;
8990e307
LW
583 STRLEN len;
584
3280af22
NIS
585 SAVEI32(PL_lex_dojoin);
586 SAVEI32(PL_lex_brackets);
3280af22
NIS
587 SAVEI32(PL_lex_casemods);
588 SAVEI32(PL_lex_starts);
589 SAVEI32(PL_lex_state);
7766f137 590 SAVEVPTR(PL_lex_inpat);
3280af22 591 SAVEI32(PL_lex_inwhat);
5db06880
NC
592#ifdef PERL_MAD
593 if (PL_lex_state == LEX_KNOWNEXT) {
594 I32 toke = PL_lasttoke;
595 while (--toke >= 0) {
596 SAVEI32(PL_nexttoke[toke].next_type);
597 SAVEVPTR(PL_nexttoke[toke].next_val);
598 if (PL_madskills)
599 SAVEVPTR(PL_nexttoke[toke].next_mad);
600 }
601 SAVEI32(PL_lasttoke);
602 }
603 if (PL_madskills) {
cd81e915
NC
604 SAVESPTR(PL_thistoken);
605 SAVESPTR(PL_thiswhite);
606 SAVESPTR(PL_nextwhite);
607 SAVESPTR(PL_thisopen);
608 SAVESPTR(PL_thisclose);
609 SAVESPTR(PL_thisstuff);
610 SAVEVPTR(PL_thismad);
611 SAVEI32(PL_realtokenstart);
612 SAVEI32(PL_faketokens);
613 }
614 SAVEI32(PL_curforce);
5db06880 615#else
18b09519
GS
616 if (PL_lex_state == LEX_KNOWNEXT) {
617 I32 toke = PL_nexttoke;
618 while (--toke >= 0) {
619 SAVEI32(PL_nexttype[toke]);
620 SAVEVPTR(PL_nextval[toke]);
621 }
622 SAVEI32(PL_nexttoke);
18b09519 623 }
5db06880 624#endif
57843af0 625 SAVECOPLINE(PL_curcop);
3280af22
NIS
626 SAVEPPTR(PL_bufptr);
627 SAVEPPTR(PL_bufend);
628 SAVEPPTR(PL_oldbufptr);
629 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
630 SAVEPPTR(PL_last_lop);
631 SAVEPPTR(PL_last_uni);
3280af22
NIS
632 SAVEPPTR(PL_linestart);
633 SAVESPTR(PL_linestr);
8edd5f42
RGS
634 SAVEGENERICPV(PL_lex_brackstack);
635 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 636 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
637 SAVESPTR(PL_lex_stuff);
638 SAVEI32(PL_lex_defer);
09bef843 639 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 640 SAVESPTR(PL_lex_repl);
bebdddfc
GS
641 SAVEINT(PL_expect);
642 SAVEINT(PL_lex_expect);
3280af22
NIS
643
644 PL_lex_state = LEX_NORMAL;
645 PL_lex_defer = 0;
646 PL_expect = XSTATE;
647 PL_lex_brackets = 0;
a02a5408
JC
648 Newx(PL_lex_brackstack, 120, char);
649 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
650 PL_lex_casemods = 0;
651 *PL_lex_casestack = '\0';
652 PL_lex_dojoin = 0;
653 PL_lex_starts = 0;
a0714e2c
SS
654 PL_lex_stuff = NULL;
655 PL_lex_repl = NULL;
3280af22 656 PL_lex_inpat = 0;
5db06880
NC
657#ifdef PERL_MAD
658 PL_lasttoke = 0;
659#else
76be56bc 660 PL_nexttoke = 0;
5db06880 661#endif
3280af22 662 PL_lex_inwhat = 0;
09bef843 663 PL_sublex_info.sub_inwhat = 0;
3280af22 664 PL_linestr = line;
cfd0369c 665 s = SvPV_const(PL_linestr, len);
0eb20fa2
DM
666 if (SvREADONLY(PL_linestr) || !len || s[len-1] != ';') {
667 PL_linestr = sv_2mortal(len ? newSVsv(PL_linestr) : newSVpvn(s, 0));
668 if (!len || s[len-1] != ';')
669 sv_catpvs(PL_linestr, "\n;");
8990e307 670 }
3280af22
NIS
671 SvTEMP_off(PL_linestr);
672 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
673 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 674 PL_last_lop = PL_last_uni = NULL;
3280af22 675 PL_rsfp = 0;
79072805 676}
a687059c 677
ffb4593c
NT
678/*
679 * Perl_lex_end
9cbb5ea2
GS
680 * Finalizer for lexing operations. Must be called when the parser is
681 * done with the lexer.
ffb4593c
NT
682 */
683
463ee0b2 684void
864dbfa3 685Perl_lex_end(pTHX)
463ee0b2 686{
97aff369 687 dVAR;
3280af22 688 PL_doextract = FALSE;
463ee0b2
LW
689}
690
ffb4593c
NT
691/*
692 * S_incline
693 * This subroutine has nothing to do with tilting, whether at windmills
694 * or pinball tables. Its name is short for "increment line". It
57843af0 695 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 696 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
697 * # line 500 "foo.pm"
698 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
699 */
700
76e3520e 701STATIC void
cea2e8a9 702S_incline(pTHX_ char *s)
463ee0b2 703{
97aff369 704 dVAR;
463ee0b2
LW
705 char *t;
706 char *n;
73659bf1 707 char *e;
463ee0b2 708 char ch;
463ee0b2 709
57843af0 710 CopLINE_inc(PL_curcop);
463ee0b2
LW
711 if (*s++ != '#')
712 return;
d4c19fe8
AL
713 while (SPACE_OR_TAB(*s))
714 s++;
73659bf1
GS
715 if (strnEQ(s, "line", 4))
716 s += 4;
717 else
718 return;
084592ab 719 if (SPACE_OR_TAB(*s))
73659bf1 720 s++;
4e553d73 721 else
73659bf1 722 return;
d4c19fe8
AL
723 while (SPACE_OR_TAB(*s))
724 s++;
463ee0b2
LW
725 if (!isDIGIT(*s))
726 return;
d4c19fe8 727
463ee0b2
LW
728 n = s;
729 while (isDIGIT(*s))
730 s++;
bf4acbe4 731 while (SPACE_OR_TAB(*s))
463ee0b2 732 s++;
73659bf1 733 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 734 s++;
73659bf1
GS
735 e = t + 1;
736 }
463ee0b2 737 else {
c35e046a
AL
738 t = s;
739 while (!isSPACE(*t))
740 t++;
73659bf1 741 e = t;
463ee0b2 742 }
bf4acbe4 743 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
744 e++;
745 if (*e != '\n' && *e != '\0')
746 return; /* false alarm */
747
463ee0b2
LW
748 ch = *t;
749 *t = '\0';
f4dd75d9 750 if (t - s > 0) {
8a5ee598 751#ifndef USE_ITHREADS
c4420975 752 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
753 STRLEN tmplen = cf ? strlen(cf) : 0;
754 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
755 /* must copy *{"::_<(eval N)[oldfilename:L]"}
756 * to *{"::_<newfilename"} */
757 char smallbuf[256], smallbuf2[256];
758 char *tmpbuf, *tmpbuf2;
8a5ee598 759 GV **gvp, *gv2;
e66cf94c
RGS
760 STRLEN tmplen2 = strlen(s);
761 if (tmplen + 3 < sizeof smallbuf)
762 tmpbuf = smallbuf;
763 else
764 Newx(tmpbuf, tmplen + 3, char);
765 if (tmplen2 + 3 < sizeof smallbuf2)
766 tmpbuf2 = smallbuf2;
767 else
768 Newx(tmpbuf2, tmplen2 + 3, char);
769 tmpbuf[0] = tmpbuf2[0] = '_';
770 tmpbuf[1] = tmpbuf2[1] = '<';
771 memcpy(tmpbuf + 2, cf, ++tmplen);
772 memcpy(tmpbuf2 + 2, s, ++tmplen2);
773 ++tmplen; ++tmplen2;
8a5ee598
RGS
774 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
775 if (gvp) {
776 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 777 if (!isGV(gv2)) {
8a5ee598 778 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
779 /* adjust ${"::_<newfilename"} to store the new file name */
780 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
781 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
782 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
783 }
8a5ee598 784 }
e66cf94c
RGS
785 if (tmpbuf != smallbuf) Safefree(tmpbuf);
786 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
787 }
8a5ee598 788#endif
05ec9bb3 789 CopFILE_free(PL_curcop);
57843af0 790 CopFILE_set(PL_curcop, s);
f4dd75d9 791 }
463ee0b2 792 *t = ch;
57843af0 793 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
794}
795
29595ff2 796#ifdef PERL_MAD
cd81e915 797/* skip space before PL_thistoken */
29595ff2
NC
798
799STATIC char *
800S_skipspace0(pTHX_ register char *s)
801{
802 s = skipspace(s);
803 if (!PL_madskills)
804 return s;
cd81e915
NC
805 if (PL_skipwhite) {
806 if (!PL_thiswhite)
6b29d1f5 807 PL_thiswhite = newSVpvs("");
cd81e915
NC
808 sv_catsv(PL_thiswhite, PL_skipwhite);
809 sv_free(PL_skipwhite);
810 PL_skipwhite = 0;
811 }
812 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
813 return s;
814}
815
cd81e915 816/* skip space after PL_thistoken */
29595ff2
NC
817
818STATIC char *
819S_skipspace1(pTHX_ register char *s)
820{
d4c19fe8 821 const char *start = s;
29595ff2
NC
822 I32 startoff = start - SvPVX(PL_linestr);
823
824 s = skipspace(s);
825 if (!PL_madskills)
826 return s;
827 start = SvPVX(PL_linestr) + startoff;
cd81e915 828 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 829 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
830 PL_thistoken = newSVpvn(tstart, start - tstart);
831 }
832 PL_realtokenstart = -1;
833 if (PL_skipwhite) {
834 if (!PL_nextwhite)
6b29d1f5 835 PL_nextwhite = newSVpvs("");
cd81e915
NC
836 sv_catsv(PL_nextwhite, PL_skipwhite);
837 sv_free(PL_skipwhite);
838 PL_skipwhite = 0;
29595ff2
NC
839 }
840 return s;
841}
842
843STATIC char *
844S_skipspace2(pTHX_ register char *s, SV **svp)
845{
c35e046a
AL
846 char *start;
847 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
848 const I32 startoff = s - SvPVX(PL_linestr);
849
29595ff2
NC
850 s = skipspace(s);
851 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
852 if (!PL_madskills || !svp)
853 return s;
854 start = SvPVX(PL_linestr) + startoff;
cd81e915 855 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 856 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
857 PL_thistoken = newSVpvn(tstart, start - tstart);
858 PL_realtokenstart = -1;
29595ff2 859 }
cd81e915 860 if (PL_skipwhite) {
29595ff2 861 if (!*svp)
6b29d1f5 862 *svp = newSVpvs("");
cd81e915
NC
863 sv_setsv(*svp, PL_skipwhite);
864 sv_free(PL_skipwhite);
865 PL_skipwhite = 0;
29595ff2
NC
866 }
867
868 return s;
869}
870#endif
871
ffb4593c
NT
872/*
873 * S_skipspace
874 * Called to gobble the appropriate amount and type of whitespace.
875 * Skips comments as well.
876 */
877
76e3520e 878STATIC char *
cea2e8a9 879S_skipspace(pTHX_ register char *s)
a687059c 880{
97aff369 881 dVAR;
5db06880
NC
882#ifdef PERL_MAD
883 int curoff;
884 int startoff = s - SvPVX(PL_linestr);
885
cd81e915
NC
886 if (PL_skipwhite) {
887 sv_free(PL_skipwhite);
888 PL_skipwhite = 0;
5db06880
NC
889 }
890#endif
891
3280af22 892 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 893 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 894 s++;
5db06880
NC
895#ifdef PERL_MAD
896 goto done;
897#else
463ee0b2 898 return s;
5db06880 899#endif
463ee0b2
LW
900 }
901 for (;;) {
fd049845 902 STRLEN prevlen;
09bef843 903 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 904 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
905 while (s < PL_bufend && isSPACE(*s)) {
906 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
907 incline(s);
908 }
ffb4593c
NT
909
910 /* comment */
3280af22
NIS
911 if (s < PL_bufend && *s == '#') {
912 while (s < PL_bufend && *s != '\n')
463ee0b2 913 s++;
60e6418e 914 if (s < PL_bufend) {
463ee0b2 915 s++;
60e6418e
GS
916 if (PL_in_eval && !PL_rsfp) {
917 incline(s);
918 continue;
919 }
920 }
463ee0b2 921 }
ffb4593c
NT
922
923 /* only continue to recharge the buffer if we're at the end
924 * of the buffer, we're not reading from a source filter, and
925 * we're in normal lexing mode
926 */
09bef843
SB
927 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
928 PL_lex_state == LEX_FORMLINE)
5db06880
NC
929#ifdef PERL_MAD
930 goto done;
931#else
463ee0b2 932 return s;
5db06880 933#endif
ffb4593c
NT
934
935 /* try to recharge the buffer */
5db06880
NC
936#ifdef PERL_MAD
937 curoff = s - SvPVX(PL_linestr);
938#endif
939
9cbb5ea2 940 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 941 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 942 {
5db06880
NC
943#ifdef PERL_MAD
944 if (PL_madskills && curoff != startoff) {
cd81e915 945 if (!PL_skipwhite)
6b29d1f5 946 PL_skipwhite = newSVpvs("");
cd81e915 947 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
948 curoff - startoff);
949 }
950
951 /* mustn't throw out old stuff yet if madpropping */
952 SvCUR(PL_linestr) = curoff;
953 s = SvPVX(PL_linestr) + curoff;
954 *s = 0;
955 if (curoff && s[-1] == '\n')
956 s[-1] = ' ';
957#endif
958
9cbb5ea2 959 /* end of file. Add on the -p or -n magic */
cd81e915 960 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 961 if (PL_minus_p) {
5db06880
NC
962#ifdef PERL_MAD
963 sv_catpv(PL_linestr,
964 ";}continue{print or die qq(-p destination: $!\\n);}");
965#else
01a19ab0
NC
966 sv_setpv(PL_linestr,
967 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 968#endif
3280af22 969 PL_minus_n = PL_minus_p = 0;
a0d0e21e 970 }
01a19ab0 971 else if (PL_minus_n) {
5db06880
NC
972#ifdef PERL_MAD
973 sv_catpvn(PL_linestr, ";}", 2);
974#else
01a19ab0 975 sv_setpvn(PL_linestr, ";}", 2);
5db06880 976#endif
01a19ab0
NC
977 PL_minus_n = 0;
978 }
a0d0e21e 979 else
5db06880
NC
980#ifdef PERL_MAD
981 sv_catpvn(PL_linestr,";", 1);
982#else
4147a61b 983 sv_setpvn(PL_linestr,";", 1);
5db06880 984#endif
ffb4593c
NT
985
986 /* reset variables for next time we lex */
9cbb5ea2 987 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
988 = SvPVX(PL_linestr)
989#ifdef PERL_MAD
990 + curoff
991#endif
992 ;
3280af22 993 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 994 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
995
996 /* Close the filehandle. Could be from -P preprocessor,
997 * STDIN, or a regular file. If we were reading code from
998 * STDIN (because the commandline held no -e or filename)
999 * then we don't close it, we reset it so the code can
1000 * read from STDIN too.
1001 */
1002
3280af22
NIS
1003 if (PL_preprocess && !PL_in_eval)
1004 (void)PerlProc_pclose(PL_rsfp);
1005 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1006 PerlIO_clearerr(PL_rsfp);
8990e307 1007 else
3280af22 1008 (void)PerlIO_close(PL_rsfp);
4608196e 1009 PL_rsfp = NULL;
463ee0b2
LW
1010 return s;
1011 }
ffb4593c
NT
1012
1013 /* not at end of file, so we only read another line */
09bef843
SB
1014 /* make corresponding updates to old pointers, for yyerror() */
1015 oldprevlen = PL_oldbufptr - PL_bufend;
1016 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1017 if (PL_last_uni)
1018 oldunilen = PL_last_uni - PL_bufend;
1019 if (PL_last_lop)
1020 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1021 PL_linestart = PL_bufptr = s + prevlen;
1022 PL_bufend = s + SvCUR(PL_linestr);
1023 s = PL_bufptr;
09bef843
SB
1024 PL_oldbufptr = s + oldprevlen;
1025 PL_oldoldbufptr = s + oldoldprevlen;
1026 if (PL_last_uni)
1027 PL_last_uni = s + oldunilen;
1028 if (PL_last_lop)
1029 PL_last_lop = s + oldloplen;
a0d0e21e 1030 incline(s);
ffb4593c
NT
1031
1032 /* debugger active and we're not compiling the debugger code,
1033 * so store the line into the debugger's array of lines
1034 */
3280af22 1035 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 1036 SV * const sv = newSV(0);
8990e307
LW
1037
1038 sv_upgrade(sv, SVt_PVMG);
3280af22 1039 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 1040 (void)SvIOK_on(sv);
45977657 1041 SvIV_set(sv, 0);
36c7798d 1042 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 1043 }
463ee0b2 1044 }
5db06880
NC
1045
1046#ifdef PERL_MAD
1047 done:
1048 if (PL_madskills) {
cd81e915 1049 if (!PL_skipwhite)
6b29d1f5 1050 PL_skipwhite = newSVpvs("");
5db06880
NC
1051 curoff = s - SvPVX(PL_linestr);
1052 if (curoff - startoff)
cd81e915 1053 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1054 curoff - startoff);
1055 }
1056 return s;
1057#endif
a687059c 1058}
378cc40b 1059
ffb4593c
NT
1060/*
1061 * S_check_uni
1062 * Check the unary operators to ensure there's no ambiguity in how they're
1063 * used. An ambiguous piece of code would be:
1064 * rand + 5
1065 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1066 * the +5 is its argument.
1067 */
1068
76e3520e 1069STATIC void
cea2e8a9 1070S_check_uni(pTHX)
ba106d47 1071{
97aff369 1072 dVAR;
d4c19fe8
AL
1073 const char *s;
1074 const char *t;
2f3197b3 1075
3280af22 1076 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1077 return;
3280af22
NIS
1078 while (isSPACE(*PL_last_uni))
1079 PL_last_uni++;
c35e046a
AL
1080 s = PL_last_uni;
1081 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1082 s++;
3280af22 1083 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1084 return;
6136c704 1085
0453d815 1086 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1087 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1088 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1089 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1090 }
2f3197b3
LW
1091}
1092
ffb4593c
NT
1093/*
1094 * LOP : macro to build a list operator. Its behaviour has been replaced
1095 * with a subroutine, S_lop() for which LOP is just another name.
1096 */
1097
a0d0e21e
LW
1098#define LOP(f,x) return lop(f,x,s)
1099
ffb4593c
NT
1100/*
1101 * S_lop
1102 * Build a list operator (or something that might be one). The rules:
1103 * - if we have a next token, then it's a list operator [why?]
1104 * - if the next thing is an opening paren, then it's a function
1105 * - else it's a list operator
1106 */
1107
76e3520e 1108STATIC I32
a0be28da 1109S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1110{
97aff369 1111 dVAR;
79072805 1112 yylval.ival = f;
35c8bce7 1113 CLINE;
3280af22
NIS
1114 PL_expect = x;
1115 PL_bufptr = s;
1116 PL_last_lop = PL_oldbufptr;
eb160463 1117 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1118#ifdef PERL_MAD
1119 if (PL_lasttoke)
1120 return REPORT(LSTOP);
1121#else
3280af22 1122 if (PL_nexttoke)
bbf60fe6 1123 return REPORT(LSTOP);
5db06880 1124#endif
79072805 1125 if (*s == '(')
bbf60fe6 1126 return REPORT(FUNC);
29595ff2 1127 s = PEEKSPACE(s);
79072805 1128 if (*s == '(')
bbf60fe6 1129 return REPORT(FUNC);
79072805 1130 else
bbf60fe6 1131 return REPORT(LSTOP);
79072805
LW
1132}
1133
5db06880
NC
1134#ifdef PERL_MAD
1135 /*
1136 * S_start_force
1137 * Sets up for an eventual force_next(). start_force(0) basically does
1138 * an unshift, while start_force(-1) does a push. yylex removes items
1139 * on the "pop" end.
1140 */
1141
1142STATIC void
1143S_start_force(pTHX_ int where)
1144{
1145 int i;
1146
cd81e915 1147 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1148 where = PL_lasttoke;
cd81e915
NC
1149 assert(PL_curforce < 0 || PL_curforce == where);
1150 if (PL_curforce != where) {
5db06880
NC
1151 for (i = PL_lasttoke; i > where; --i) {
1152 PL_nexttoke[i] = PL_nexttoke[i-1];
1153 }
1154 PL_lasttoke++;
1155 }
cd81e915 1156 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1157 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1158 PL_curforce = where;
1159 if (PL_nextwhite) {
5db06880 1160 if (PL_madskills)
6b29d1f5 1161 curmad('^', newSVpvs(""));
cd81e915 1162 CURMAD('_', PL_nextwhite);
5db06880
NC
1163 }
1164}
1165
1166STATIC void
1167S_curmad(pTHX_ char slot, SV *sv)
1168{
1169 MADPROP **where;
1170
1171 if (!sv)
1172 return;
cd81e915
NC
1173 if (PL_curforce < 0)
1174 where = &PL_thismad;
5db06880 1175 else
cd81e915 1176 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1177
cd81e915 1178 if (PL_faketokens)
5db06880
NC
1179 sv_setpvn(sv, "", 0);
1180 else {
1181 if (!IN_BYTES) {
1182 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1183 SvUTF8_on(sv);
1184 else if (PL_encoding) {
1185 sv_recode_to_utf8(sv, PL_encoding);
1186 }
1187 }
1188 }
1189
1190 /* keep a slot open for the head of the list? */
1191 if (slot != '_' && *where && (*where)->mad_key == '^') {
1192 (*where)->mad_key = slot;
1193 sv_free((*where)->mad_val);
1194 (*where)->mad_val = (void*)sv;
1195 }
1196 else
1197 addmad(newMADsv(slot, sv), where, 0);
1198}
1199#else
b3f24c00
MHM
1200# define start_force(where) NOOP
1201# define curmad(slot, sv) NOOP
5db06880
NC
1202#endif
1203
ffb4593c
NT
1204/*
1205 * S_force_next
9cbb5ea2 1206 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1207 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1208 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1209 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1210 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1211 */
1212
4e553d73 1213STATIC void
cea2e8a9 1214S_force_next(pTHX_ I32 type)
79072805 1215{
97aff369 1216 dVAR;
5db06880 1217#ifdef PERL_MAD
cd81e915 1218 if (PL_curforce < 0)
5db06880 1219 start_force(PL_lasttoke);
cd81e915 1220 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1221 if (PL_lex_state != LEX_KNOWNEXT)
1222 PL_lex_defer = PL_lex_state;
1223 PL_lex_state = LEX_KNOWNEXT;
1224 PL_lex_expect = PL_expect;
cd81e915 1225 PL_curforce = -1;
5db06880 1226#else
3280af22
NIS
1227 PL_nexttype[PL_nexttoke] = type;
1228 PL_nexttoke++;
1229 if (PL_lex_state != LEX_KNOWNEXT) {
1230 PL_lex_defer = PL_lex_state;
1231 PL_lex_expect = PL_expect;
1232 PL_lex_state = LEX_KNOWNEXT;
79072805 1233 }
5db06880 1234#endif
79072805
LW
1235}
1236
d0a148a6
NC
1237STATIC SV *
1238S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1239{
97aff369 1240 dVAR;
9d4ba2ae 1241 SV * const sv = newSVpvn(start,len);
bfed75c6 1242 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1243 SvUTF8_on(sv);
1244 return sv;
1245}
1246
ffb4593c
NT
1247/*
1248 * S_force_word
1249 * When the lexer knows the next thing is a word (for instance, it has
1250 * just seen -> and it knows that the next char is a word char, then
1251 * it calls S_force_word to stick the next word into the PL_next lookahead.
1252 *
1253 * Arguments:
b1b65b59 1254 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
1255 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1256 * int check_keyword : if true, Perl checks to make sure the word isn't
1257 * a keyword (do this if the word is a label, e.g. goto FOO)
1258 * int allow_pack : if true, : characters will also be allowed (require,
1259 * use, etc. do this)
9cbb5ea2 1260 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1261 */
1262
76e3520e 1263STATIC char *
cea2e8a9 1264S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1265{
97aff369 1266 dVAR;
463ee0b2
LW
1267 register char *s;
1268 STRLEN len;
4e553d73 1269
29595ff2 1270 start = SKIPSPACE1(start);
463ee0b2 1271 s = start;
7e2040f0 1272 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1273 (allow_pack && *s == ':') ||
15f0808c 1274 (allow_initial_tick && *s == '\'') )
a0d0e21e 1275 {
3280af22 1276 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1277 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1278 return start;
cd81e915 1279 start_force(PL_curforce);
5db06880
NC
1280 if (PL_madskills)
1281 curmad('X', newSVpvn(start,s-start));
463ee0b2 1282 if (token == METHOD) {
29595ff2 1283 s = SKIPSPACE1(s);
463ee0b2 1284 if (*s == '(')
3280af22 1285 PL_expect = XTERM;
463ee0b2 1286 else {
3280af22 1287 PL_expect = XOPERATOR;
463ee0b2 1288 }
79072805 1289 }
9ded7720 1290 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1291 = (OP*)newSVOP(OP_CONST,0,
1292 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1293 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1294 force_next(token);
1295 }
1296 return s;
1297}
1298
ffb4593c
NT
1299/*
1300 * S_force_ident
9cbb5ea2 1301 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1302 * text only contains the "foo" portion. The first argument is a pointer
1303 * to the "foo", and the second argument is the type symbol to prefix.
1304 * Forces the next token to be a "WORD".
9cbb5ea2 1305 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1306 */
1307
76e3520e 1308STATIC void
bfed75c6 1309S_force_ident(pTHX_ register const char *s, int kind)
79072805 1310{
97aff369 1311 dVAR;
c35e046a 1312 if (*s) {
90e5519e
NC
1313 const STRLEN len = strlen(s);
1314 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1315 start_force(PL_curforce);
9ded7720 1316 NEXTVAL_NEXTTOKE.opval = o;
79072805 1317 force_next(WORD);
748a9306 1318 if (kind) {
11343788 1319 o->op_private = OPpCONST_ENTERED;
55497cff 1320 /* XXX see note in pp_entereval() for why we forgo typo
1321 warnings if the symbol must be introduced in an eval.
1322 GSAR 96-10-12 */
90e5519e
NC
1323 gv_fetchpvn_flags(s, len,
1324 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1325 : GV_ADD,
1326 kind == '$' ? SVt_PV :
1327 kind == '@' ? SVt_PVAV :
1328 kind == '%' ? SVt_PVHV :
a0d0e21e 1329 SVt_PVGV
90e5519e 1330 );
748a9306 1331 }
79072805
LW
1332 }
1333}
1334
1571675a
GS
1335NV
1336Perl_str_to_version(pTHX_ SV *sv)
1337{
1338 NV retval = 0.0;
1339 NV nshift = 1.0;
1340 STRLEN len;
cfd0369c 1341 const char *start = SvPV_const(sv,len);
9d4ba2ae 1342 const char * const end = start + len;
504618e9 1343 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1344 while (start < end) {
ba210ebe 1345 STRLEN skip;
1571675a
GS
1346 UV n;
1347 if (utf)
9041c2e3 1348 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1349 else {
1350 n = *(U8*)start;
1351 skip = 1;
1352 }
1353 retval += ((NV)n)/nshift;
1354 start += skip;
1355 nshift *= 1000;
1356 }
1357 return retval;
1358}
1359
4e553d73 1360/*
ffb4593c
NT
1361 * S_force_version
1362 * Forces the next token to be a version number.
e759cc13
RGS
1363 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1364 * and if "guessing" is TRUE, then no new token is created (and the caller
1365 * must use an alternative parsing method).
ffb4593c
NT
1366 */
1367
76e3520e 1368STATIC char *
e759cc13 1369S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1370{
97aff369 1371 dVAR;
5f66b61c 1372 OP *version = NULL;
44dcb63b 1373 char *d;
5db06880
NC
1374#ifdef PERL_MAD
1375 I32 startoff = s - SvPVX(PL_linestr);
1376#endif
89bfa8cd 1377
29595ff2 1378 s = SKIPSPACE1(s);
89bfa8cd 1379
44dcb63b 1380 d = s;
dd629d5b 1381 if (*d == 'v')
44dcb63b 1382 d++;
44dcb63b 1383 if (isDIGIT(*d)) {
e759cc13
RGS
1384 while (isDIGIT(*d) || *d == '_' || *d == '.')
1385 d++;
5db06880
NC
1386#ifdef PERL_MAD
1387 if (PL_madskills) {
cd81e915 1388 start_force(PL_curforce);
5db06880
NC
1389 curmad('X', newSVpvn(s,d-s));
1390 }
1391#endif
9f3d182e 1392 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1393 SV *ver;
b73d6f50 1394 s = scan_num(s, &yylval);
89bfa8cd 1395 version = yylval.opval;
dd629d5b
GS
1396 ver = cSVOPx(version)->op_sv;
1397 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1398 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1399 SvNV_set(ver, str_to_version(ver));
1571675a 1400 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1401 }
89bfa8cd 1402 }
5db06880
NC
1403 else if (guessing) {
1404#ifdef PERL_MAD
1405 if (PL_madskills) {
cd81e915
NC
1406 sv_free(PL_nextwhite); /* let next token collect whitespace */
1407 PL_nextwhite = 0;
5db06880
NC
1408 s = SvPVX(PL_linestr) + startoff;
1409 }
1410#endif
e759cc13 1411 return s;
5db06880 1412 }
89bfa8cd 1413 }
1414
5db06880
NC
1415#ifdef PERL_MAD
1416 if (PL_madskills && !version) {
cd81e915
NC
1417 sv_free(PL_nextwhite); /* let next token collect whitespace */
1418 PL_nextwhite = 0;
5db06880
NC
1419 s = SvPVX(PL_linestr) + startoff;
1420 }
1421#endif
89bfa8cd 1422 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1423 start_force(PL_curforce);
9ded7720 1424 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1425 force_next(WORD);
89bfa8cd 1426
e759cc13 1427 return s;
89bfa8cd 1428}
1429
ffb4593c
NT
1430/*
1431 * S_tokeq
1432 * Tokenize a quoted string passed in as an SV. It finds the next
1433 * chunk, up to end of string or a backslash. It may make a new
1434 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1435 * turns \\ into \.
1436 */
1437
76e3520e 1438STATIC SV *
cea2e8a9 1439S_tokeq(pTHX_ SV *sv)
79072805 1440{
97aff369 1441 dVAR;
79072805
LW
1442 register char *s;
1443 register char *send;
1444 register char *d;
b3ac6de7
IZ
1445 STRLEN len = 0;
1446 SV *pv = sv;
79072805
LW
1447
1448 if (!SvLEN(sv))
b3ac6de7 1449 goto finish;
79072805 1450
a0d0e21e 1451 s = SvPV_force(sv, len);
21a311ee 1452 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1453 goto finish;
463ee0b2 1454 send = s + len;
79072805
LW
1455 while (s < send && *s != '\\')
1456 s++;
1457 if (s == send)
b3ac6de7 1458 goto finish;
79072805 1459 d = s;
be4731d2 1460 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1461 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1462 if (SvUTF8(sv))
1463 SvUTF8_on(pv);
1464 }
79072805
LW
1465 while (s < send) {
1466 if (*s == '\\') {
a0d0e21e 1467 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1468 s++; /* all that, just for this */
1469 }
1470 *d++ = *s++;
1471 }
1472 *d = '\0';
95a20fc0 1473 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1474 finish:
3280af22 1475 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1476 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1477 return sv;
1478}
1479
ffb4593c
NT
1480/*
1481 * Now come three functions related to double-quote context,
1482 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1483 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1484 * interact with PL_lex_state, and create fake ( ... ) argument lists
1485 * to handle functions and concatenation.
1486 * They assume that whoever calls them will be setting up a fake
1487 * join call, because each subthing puts a ',' after it. This lets
1488 * "lower \luPpEr"
1489 * become
1490 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1491 *
1492 * (I'm not sure whether the spurious commas at the end of lcfirst's
1493 * arguments and join's arguments are created or not).
1494 */
1495
1496/*
1497 * S_sublex_start
1498 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1499 *
1500 * Pattern matching will set PL_lex_op to the pattern-matching op to
1501 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1502 *
1503 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1504 *
1505 * Everything else becomes a FUNC.
1506 *
1507 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1508 * had an OP_CONST or OP_READLINE). This just sets us up for a
1509 * call to S_sublex_push().
1510 */
1511
76e3520e 1512STATIC I32
cea2e8a9 1513S_sublex_start(pTHX)
79072805 1514{
97aff369 1515 dVAR;
0d46e09a 1516 register const I32 op_type = yylval.ival;
79072805
LW
1517
1518 if (op_type == OP_NULL) {
3280af22 1519 yylval.opval = PL_lex_op;
5f66b61c 1520 PL_lex_op = NULL;
79072805
LW
1521 return THING;
1522 }
1523 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1524 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1525
1526 if (SvTYPE(sv) == SVt_PVIV) {
1527 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1528 STRLEN len;
96a5add6 1529 const char * const p = SvPV_const(sv, len);
f54cb97a 1530 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1531 if (SvUTF8(sv))
1532 SvUTF8_on(nsv);
b3ac6de7
IZ
1533 SvREFCNT_dec(sv);
1534 sv = nsv;
4e553d73 1535 }
b3ac6de7 1536 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1537 PL_lex_stuff = NULL;
6f33ba73
RGS
1538 /* Allow <FH> // "foo" */
1539 if (op_type == OP_READLINE)
1540 PL_expect = XTERMORDORDOR;
79072805
LW
1541 return THING;
1542 }
e3f73d4e
RGS
1543 else if (op_type == OP_BACKTICK && PL_lex_op) {
1544 /* readpipe() vas overriden */
1545 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
1546 yylval.opval = PL_lex_op;
9b201d7d 1547 PL_lex_op = NULL;
e3f73d4e
RGS
1548 PL_lex_stuff = NULL;
1549 return THING;
1550 }
79072805 1551
3280af22
NIS
1552 PL_sublex_info.super_state = PL_lex_state;
1553 PL_sublex_info.sub_inwhat = op_type;
1554 PL_sublex_info.sub_op = PL_lex_op;
1555 PL_lex_state = LEX_INTERPPUSH;
55497cff 1556
3280af22
NIS
1557 PL_expect = XTERM;
1558 if (PL_lex_op) {
1559 yylval.opval = PL_lex_op;
5f66b61c 1560 PL_lex_op = NULL;
55497cff 1561 return PMFUNC;
1562 }
1563 else
1564 return FUNC;
1565}
1566
ffb4593c
NT
1567/*
1568 * S_sublex_push
1569 * Create a new scope to save the lexing state. The scope will be
1570 * ended in S_sublex_done. Returns a '(', starting the function arguments
1571 * to the uc, lc, etc. found before.
1572 * Sets PL_lex_state to LEX_INTERPCONCAT.
1573 */
1574
76e3520e 1575STATIC I32
cea2e8a9 1576S_sublex_push(pTHX)
55497cff 1577{
27da23d5 1578 dVAR;
f46d017c 1579 ENTER;
55497cff 1580
3280af22
NIS
1581 PL_lex_state = PL_sublex_info.super_state;
1582 SAVEI32(PL_lex_dojoin);
1583 SAVEI32(PL_lex_brackets);
3280af22
NIS
1584 SAVEI32(PL_lex_casemods);
1585 SAVEI32(PL_lex_starts);
1586 SAVEI32(PL_lex_state);
7766f137 1587 SAVEVPTR(PL_lex_inpat);
3280af22 1588 SAVEI32(PL_lex_inwhat);
57843af0 1589 SAVECOPLINE(PL_curcop);
3280af22 1590 SAVEPPTR(PL_bufptr);
8452ff4b 1591 SAVEPPTR(PL_bufend);
3280af22
NIS
1592 SAVEPPTR(PL_oldbufptr);
1593 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1594 SAVEPPTR(PL_last_lop);
1595 SAVEPPTR(PL_last_uni);
3280af22
NIS
1596 SAVEPPTR(PL_linestart);
1597 SAVESPTR(PL_linestr);
8edd5f42
RGS
1598 SAVEGENERICPV(PL_lex_brackstack);
1599 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1600
1601 PL_linestr = PL_lex_stuff;
a0714e2c 1602 PL_lex_stuff = NULL;
3280af22 1603
9cbb5ea2
GS
1604 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1605 = SvPVX(PL_linestr);
3280af22 1606 PL_bufend += SvCUR(PL_linestr);
bd61b366 1607 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1608 SAVEFREESV(PL_linestr);
1609
1610 PL_lex_dojoin = FALSE;
1611 PL_lex_brackets = 0;
a02a5408
JC
1612 Newx(PL_lex_brackstack, 120, char);
1613 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1614 PL_lex_casemods = 0;
1615 *PL_lex_casestack = '\0';
1616 PL_lex_starts = 0;
1617 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1618 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1619
1620 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1621 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1622 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1623 else
5f66b61c 1624 PL_lex_inpat = NULL;
79072805 1625
55497cff 1626 return '(';
79072805
LW
1627}
1628
ffb4593c
NT
1629/*
1630 * S_sublex_done
1631 * Restores lexer state after a S_sublex_push.
1632 */
1633
76e3520e 1634STATIC I32
cea2e8a9 1635S_sublex_done(pTHX)
79072805 1636{
27da23d5 1637 dVAR;
3280af22 1638 if (!PL_lex_starts++) {
396482e1 1639 SV * const sv = newSVpvs("");
9aa983d2
JH
1640 if (SvUTF8(PL_linestr))
1641 SvUTF8_on(sv);
3280af22 1642 PL_expect = XOPERATOR;
9aa983d2 1643 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1644 return THING;
1645 }
1646
3280af22
NIS
1647 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1648 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1649 return yylex();
79072805
LW
1650 }
1651
ffb4593c 1652 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1653 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1654 PL_linestr = PL_lex_repl;
1655 PL_lex_inpat = 0;
1656 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1657 PL_bufend += SvCUR(PL_linestr);
bd61b366 1658 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1659 SAVEFREESV(PL_linestr);
1660 PL_lex_dojoin = FALSE;
1661 PL_lex_brackets = 0;
3280af22
NIS
1662 PL_lex_casemods = 0;
1663 *PL_lex_casestack = '\0';
1664 PL_lex_starts = 0;
25da4f38 1665 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1666 PL_lex_state = LEX_INTERPNORMAL;
1667 PL_lex_starts++;
e9fa98b2
HS
1668 /* we don't clear PL_lex_repl here, so that we can check later
1669 whether this is an evalled subst; that means we rely on the
1670 logic to ensure sublex_done() is called again only via the
1671 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1672 }
e9fa98b2 1673 else {
3280af22 1674 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1675 PL_lex_repl = NULL;
e9fa98b2 1676 }
79072805 1677 return ',';
ffed7fef
LW
1678 }
1679 else {
5db06880
NC
1680#ifdef PERL_MAD
1681 if (PL_madskills) {
cd81e915
NC
1682 if (PL_thiswhite) {
1683 if (!PL_endwhite)
6b29d1f5 1684 PL_endwhite = newSVpvs("");
cd81e915
NC
1685 sv_catsv(PL_endwhite, PL_thiswhite);
1686 PL_thiswhite = 0;
1687 }
1688 if (PL_thistoken)
1689 sv_setpvn(PL_thistoken,"",0);
5db06880 1690 else
cd81e915 1691 PL_realtokenstart = -1;
5db06880
NC
1692 }
1693#endif
f46d017c 1694 LEAVE;
3280af22
NIS
1695 PL_bufend = SvPVX(PL_linestr);
1696 PL_bufend += SvCUR(PL_linestr);
1697 PL_expect = XOPERATOR;
09bef843 1698 PL_sublex_info.sub_inwhat = 0;
79072805 1699 return ')';
ffed7fef
LW
1700 }
1701}
1702
02aa26ce
NT
1703/*
1704 scan_const
1705
1706 Extracts a pattern, double-quoted string, or transliteration. This
1707 is terrifying code.
1708
94def140 1709 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1710 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1711 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1712
94def140
TS
1713 Returns a pointer to the character scanned up to. If this is
1714 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1715 successfully parsed), will leave an OP for the substring scanned
1716 in yylval. Caller must intuit reason for not parsing further
1717 by looking at the next characters herself.
1718
02aa26ce
NT
1719 In patterns:
1720 backslashes:
1721 double-quoted style: \r and \n
1722 regexp special ones: \D \s
94def140
TS
1723 constants: \x31
1724 backrefs: \1
02aa26ce
NT
1725 case and quoting: \U \Q \E
1726 stops on @ and $, but not for $ as tail anchor
1727
1728 In transliterations:
1729 characters are VERY literal, except for - not at the start or end
94def140
TS
1730 of the string, which indicates a range. If the range is in bytes,
1731 scan_const expands the range to the full set of intermediate
1732 characters. If the range is in utf8, the hyphen is replaced with
1733 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1734
1735 In double-quoted strings:
1736 backslashes:
1737 double-quoted style: \r and \n
94def140
TS
1738 constants: \x31
1739 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1740 case and quoting: \U \Q \E
1741 stops on @ and $
1742
1743 scan_const does *not* construct ops to handle interpolated strings.
1744 It stops processing as soon as it finds an embedded $ or @ variable
1745 and leaves it to the caller to work out what's going on.
1746
94def140
TS
1747 embedded arrays (whether in pattern or not) could be:
1748 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1749
1750 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1751
1752 $ in pattern could be $foo or could be tail anchor. Assumption:
1753 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1754 followed by one of "()| \r\n\t"
02aa26ce
NT
1755
1756 \1 (backreferences) are turned into $1
1757
1758 The structure of the code is
1759 while (there's a character to process) {
94def140
TS
1760 handle transliteration ranges
1761 skip regexp comments /(?#comment)/ and codes /(?{code})/
1762 skip #-initiated comments in //x patterns
1763 check for embedded arrays
02aa26ce
NT
1764 check for embedded scalars
1765 if (backslash) {
94def140
TS
1766 leave intact backslashes from leaveit (below)
1767 deprecate \1 in substitution replacements
02aa26ce
NT
1768 handle string-changing backslashes \l \U \Q \E, etc.
1769 switch (what was escaped) {
94def140
TS
1770 handle \- in a transliteration (becomes a literal -)
1771 handle \132 (octal characters)
1772 handle \x15 and \x{1234} (hex characters)
1773 handle \N{name} (named characters)
1774 handle \cV (control characters)
1775 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1776 } (end switch)
1777 } (end if backslash)
1778 } (end while character to read)
4e553d73 1779
02aa26ce
NT
1780*/
1781
76e3520e 1782STATIC char *
cea2e8a9 1783S_scan_const(pTHX_ char *start)
79072805 1784{
97aff369 1785 dVAR;
3280af22 1786 register char *send = PL_bufend; /* end of the constant */
561b68a9 1787 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1788 register char *s = start; /* start of the constant */
1789 register char *d = SvPVX(sv); /* destination for copies */
1790 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1791 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1792 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1793 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1794 UV uv;
4c3a8340
TS
1795#ifdef EBCDIC
1796 UV literal_endpoint = 0;
e294cc5d 1797 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 1798#endif
012bcf8d 1799
2b9d42f0
NIS
1800 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1801 /* If we are doing a trans and we know we want UTF8 set expectation */
1802 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1803 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1804 }
1805
1806
79072805 1807 while (s < send || dorange) {
02aa26ce 1808 /* get transliterations out of the way (they're most literal) */
3280af22 1809 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1810 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1811 if (dorange) {
1ba5c669
JH
1812 I32 i; /* current expanded character */
1813 I32 min; /* first character in range */
1814 I32 max; /* last character in range */
02aa26ce 1815
e294cc5d
JH
1816#ifdef EBCDIC
1817 UV uvmax = 0;
1818#endif
1819
1820 if (has_utf8
1821#ifdef EBCDIC
1822 && !native_range
1823#endif
1824 ) {
9d4ba2ae 1825 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1826 char *e = d++;
1827 while (e-- > c)
1828 *(e + 1) = *e;
25716404 1829 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1830 /* mark the range as done, and continue */
1831 dorange = FALSE;
1832 didrange = TRUE;
1833 continue;
1834 }
2b9d42f0 1835
95a20fc0 1836 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
1837#ifdef EBCDIC
1838 SvGROW(sv,
1839 SvLEN(sv) + (has_utf8 ?
1840 (512 - UTF_CONTINUATION_MARK +
1841 UNISKIP(0x100))
1842 : 256));
1843 /* How many two-byte within 0..255: 128 in UTF-8,
1844 * 96 in UTF-8-mod. */
1845#else
9cbb5ea2 1846 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 1847#endif
9cbb5ea2 1848 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
1849#ifdef EBCDIC
1850 if (has_utf8) {
1851 int j;
1852 for (j = 0; j <= 1; j++) {
1853 char * const c = (char*)utf8_hop((U8*)d, -1);
1854 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
1855 if (j)
1856 min = (U8)uv;
1857 else if (uv < 256)
1858 max = (U8)uv;
1859 else {
1860 max = (U8)0xff; /* only to \xff */
1861 uvmax = uv; /* \x{100} to uvmax */
1862 }
1863 d = c; /* eat endpoint chars */
1864 }
1865 }
1866 else {
1867#endif
1868 d -= 2; /* eat the first char and the - */
1869 min = (U8)*d; /* first char in range */
1870 max = (U8)d[1]; /* last char in range */
1871#ifdef EBCDIC
1872 }
1873#endif
8ada0baa 1874
c2e66d9e 1875 if (min > max) {
01ec43d0 1876 Perl_croak(aTHX_
d1573ac7 1877 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1878 (char)min, (char)max);
c2e66d9e
GS
1879 }
1880
c7f1f016 1881#ifdef EBCDIC
4c3a8340
TS
1882 if (literal_endpoint == 2 &&
1883 ((isLOWER(min) && isLOWER(max)) ||
1884 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1885 if (isLOWER(min)) {
1886 for (i = min; i <= max; i++)
1887 if (isLOWER(i))
db42d148 1888 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1889 } else {
1890 for (i = min; i <= max; i++)
1891 if (isUPPER(i))
db42d148 1892 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1893 }
1894 }
1895 else
1896#endif
1897 for (i = min; i <= max; i++)
e294cc5d
JH
1898#ifdef EBCDIC
1899 if (has_utf8) {
1900 const U8 ch = (U8)NATIVE_TO_UTF(i);
1901 if (UNI_IS_INVARIANT(ch))
1902 *d++ = (U8)i;
1903 else {
1904 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
1905 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
1906 }
1907 }
1908 else
1909#endif
1910 *d++ = (char)i;
1911
1912#ifdef EBCDIC
1913 if (uvmax) {
1914 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
1915 if (uvmax > 0x101)
1916 *d++ = (char)UTF_TO_NATIVE(0xff);
1917 if (uvmax > 0x100)
1918 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
1919 }
1920#endif
02aa26ce
NT
1921
1922 /* mark the range as done, and continue */
79072805 1923 dorange = FALSE;
01ec43d0 1924 didrange = TRUE;
4c3a8340
TS
1925#ifdef EBCDIC
1926 literal_endpoint = 0;
1927#endif
79072805 1928 continue;
4e553d73 1929 }
02aa26ce
NT
1930
1931 /* range begins (ignore - as first or last char) */
79072805 1932 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1933 if (didrange) {
1fafa243 1934 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1935 }
e294cc5d
JH
1936 if (has_utf8
1937#ifdef EBCDIC
1938 && !native_range
1939#endif
1940 ) {
25716404 1941 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1942 s++;
1943 continue;
1944 }
79072805
LW
1945 dorange = TRUE;
1946 s++;
01ec43d0
GS
1947 }
1948 else {
1949 didrange = FALSE;
4c3a8340
TS
1950#ifdef EBCDIC
1951 literal_endpoint = 0;
e294cc5d 1952 native_range = TRUE;
4c3a8340 1953#endif
01ec43d0 1954 }
79072805 1955 }
02aa26ce
NT
1956
1957 /* if we get here, we're not doing a transliteration */
1958
0f5d15d6
IZ
1959 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1960 except for the last char, which will be done separately. */
3280af22 1961 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1962 if (s[2] == '#') {
e994fd66 1963 while (s+1 < send && *s != ')')
db42d148 1964 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1965 }
1966 else if (s[2] == '{' /* This should match regcomp.c */
1967 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1968 {
cc6b7395 1969 I32 count = 1;
0f5d15d6 1970 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1971 char c;
1972
d9f97599
GS
1973 while (count && (c = *regparse)) {
1974 if (c == '\\' && regparse[1])
1975 regparse++;
4e553d73 1976 else if (c == '{')
cc6b7395 1977 count++;
4e553d73 1978 else if (c == '}')
cc6b7395 1979 count--;
d9f97599 1980 regparse++;
cc6b7395 1981 }
e994fd66 1982 if (*regparse != ')')
5bdf89e7 1983 regparse--; /* Leave one char for continuation. */
0f5d15d6 1984 while (s < regparse)
db42d148 1985 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1986 }
748a9306 1987 }
02aa26ce
NT
1988
1989 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1990 else if (*s == '#' && PL_lex_inpat &&
1991 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1992 while (s+1 < send && *s != '\n')
db42d148 1993 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1994 }
02aa26ce 1995
5d1d4326 1996 /* check for embedded arrays
da6eedaa 1997 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1998 */
1749ea0d
TS
1999 else if (*s == '@' && s[1]) {
2000 if (isALNUM_lazy_if(s+1,UTF))
2001 break;
2002 if (strchr(":'{$", s[1]))
2003 break;
2004 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2005 break; /* in regexp, neither @+ nor @- are interpolated */
2006 }
02aa26ce
NT
2007
2008 /* check for embedded scalars. only stop if we're sure it's a
2009 variable.
2010 */
79072805 2011 else if (*s == '$') {
3280af22 2012 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2013 break;
6002328a 2014 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
2015 break; /* in regexp, $ might be tail anchor */
2016 }
02aa26ce 2017
2b9d42f0
NIS
2018 /* End of else if chain - OP_TRANS rejoin rest */
2019
02aa26ce 2020 /* backslashes */
79072805
LW
2021 if (*s == '\\' && s+1 < send) {
2022 s++;
02aa26ce 2023
02aa26ce 2024 /* deprecate \1 in strings and substitution replacements */
3280af22 2025 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2026 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2027 {
599cee73 2028 if (ckWARN(WARN_SYNTAX))
9014280d 2029 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2030 *--s = '$';
2031 break;
2032 }
02aa26ce
NT
2033
2034 /* string-change backslash escapes */
3280af22 2035 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2036 --s;
2037 break;
2038 }
cc74c5bd
TS
2039 /* skip any other backslash escapes in a pattern */
2040 else if (PL_lex_inpat) {
2041 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2042 goto default_action;
2043 }
02aa26ce
NT
2044
2045 /* if we get here, it's either a quoted -, or a digit */
79072805 2046 switch (*s) {
02aa26ce
NT
2047
2048 /* quoted - in transliterations */
79072805 2049 case '-':
3280af22 2050 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2051 *d++ = *s++;
2052 continue;
2053 }
2054 /* FALL THROUGH */
2055 default:
11b8faa4 2056 {
86f97054 2057 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 2058 ckWARN(WARN_MISC))
9014280d 2059 Perl_warner(aTHX_ packWARN(WARN_MISC),
e294cc5d
JH
2060 "Unrecognized escape \\%c passed through",
2061 *s);
11b8faa4 2062 /* default action is to copy the quoted character */
f9a63242 2063 goto default_action;
11b8faa4 2064 }
02aa26ce
NT
2065
2066 /* \132 indicates an octal constant */
79072805
LW
2067 case '0': case '1': case '2': case '3':
2068 case '4': case '5': case '6': case '7':
ba210ebe 2069 {
53305cf1
NC
2070 I32 flags = 0;
2071 STRLEN len = 3;
2072 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2073 s += len;
2074 }
012bcf8d 2075 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2076
2077 /* \x24 indicates a hex constant */
79072805 2078 case 'x':
a0ed51b3
LW
2079 ++s;
2080 if (*s == '{') {
9d4ba2ae 2081 char* const e = strchr(s, '}');
a4c04bdc
NC
2082 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2083 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2084 STRLEN len;
355860ce 2085
53305cf1 2086 ++s;
adaeee49 2087 if (!e) {
a0ed51b3 2088 yyerror("Missing right brace on \\x{}");
355860ce 2089 continue;
ba210ebe 2090 }
53305cf1
NC
2091 len = e - s;
2092 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2093 s = e + 1;
a0ed51b3
LW
2094 }
2095 else {
ba210ebe 2096 {
53305cf1 2097 STRLEN len = 2;
a4c04bdc 2098 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2099 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2100 s += len;
2101 }
012bcf8d
GS
2102 }
2103
2104 NUM_ESCAPE_INSERT:
2105 /* Insert oct or hex escaped character.
301d3d20 2106 * There will always enough room in sv since such
db42d148 2107 * escapes will be longer than any UTF-8 sequence
301d3d20 2108 * they can end up as. */
ba7cea30 2109
c7f1f016
NIS
2110 /* We need to map to chars to ASCII before doing the tests
2111 to cover EBCDIC
2112 */
c4d5f83a 2113 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2114 if (!has_utf8 && uv > 255) {
301d3d20
JH
2115 /* Might need to recode whatever we have
2116 * accumulated so far if it contains any
2117 * hibit chars.
2118 *
2119 * (Can't we keep track of that and avoid
2120 * this rescan? --jhi)
012bcf8d 2121 */
c7f1f016 2122 int hicount = 0;
63cd0674
NIS
2123 U8 *c;
2124 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2125 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2126 hicount++;
db42d148 2127 }
012bcf8d 2128 }
63cd0674 2129 if (hicount) {
9d4ba2ae 2130 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2131 U8 *src, *dst;
2132 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2133 src = (U8 *)d - 1;
2134 dst = src+hicount;
2135 d += hicount;
cfd0369c 2136 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2137 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2138 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2139 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2140 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2141 }
2142 else {
63cd0674 2143 *dst-- = *src;
012bcf8d 2144 }
c7f1f016 2145 src--;
012bcf8d
GS
2146 }
2147 }
2148 }
2149
9aa983d2 2150 if (has_utf8 || uv > 255) {
9041c2e3 2151 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2152 has_utf8 = TRUE;
f9a63242
JH
2153 if (PL_lex_inwhat == OP_TRANS &&
2154 PL_sublex_info.sub_op) {
2155 PL_sublex_info.sub_op->op_private |=
2156 (PL_lex_repl ? OPpTRANS_FROM_UTF
2157 : OPpTRANS_TO_UTF);
f9a63242 2158 }
e294cc5d
JH
2159#ifdef EBCDIC
2160 if (uv > 255 && !dorange)
2161 native_range = FALSE;
2162#endif
012bcf8d 2163 }
a0ed51b3 2164 else {
012bcf8d 2165 *d++ = (char)uv;
a0ed51b3 2166 }
012bcf8d
GS
2167 }
2168 else {
c4d5f83a 2169 *d++ = (char) uv;
a0ed51b3 2170 }
79072805 2171 continue;
02aa26ce 2172
b239daa5 2173 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2174 case 'N':
55eda711 2175 ++s;
423cee85
JH
2176 if (*s == '{') {
2177 char* e = strchr(s, '}');
155aba94 2178 SV *res;
423cee85 2179 STRLEN len;
cfd0369c 2180 const char *str;
fc8cd66c 2181 SV *type;
4e553d73 2182
423cee85 2183 if (!e) {
5777a3f7 2184 yyerror("Missing right brace on \\N{}");
423cee85
JH
2185 e = s - 1;
2186 goto cont_scan;
2187 }
dbc0d4f2
JH
2188 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2189 /* \N{U+...} */
2190 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2191 PERL_SCAN_DISALLOW_PREFIX;
2192 s += 3;
2193 len = e - s;
2194 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2195 if ( e > s && len != (STRLEN)(e - s) ) {
2196 uv = 0xFFFD;
fc8cd66c 2197 }
dbc0d4f2
JH
2198 s = e + 1;
2199 goto NUM_ESCAPE_INSERT;
2200 }
55eda711 2201 res = newSVpvn(s + 1, e - s - 1);
fc8cd66c 2202 type = newSVpvn(s - 2,e - s + 3);
bd61b366 2203 res = new_constant( NULL, 0, "charnames",
fc8cd66c
YO
2204 res, NULL, SvPVX(type) );
2205 SvREFCNT_dec(type);
f9a63242
JH
2206 if (has_utf8)
2207 sv_utf8_upgrade(res);
cfd0369c 2208 str = SvPV_const(res,len);
1c47067b
JH
2209#ifdef EBCDIC_NEVER_MIND
2210 /* charnames uses pack U and that has been
2211 * recently changed to do the below uni->native
2212 * mapping, so this would be redundant (and wrong,
2213 * the code point would be doubly converted).
2214 * But leave this in just in case the pack U change
2215 * gets revoked, but the semantics is still
2216 * desireable for charnames. --jhi */
cddc7ef4 2217 {
cfd0369c 2218 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2219
2220 if (uv < 0x100) {
89ebb4a3 2221 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2222
2223 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2224 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2225 str = SvPV_const(res, len);
cddc7ef4
JH
2226 }
2227 }
2228#endif
89491803 2229 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2230 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2231 SvCUR_set(sv, d - ostart);
2232 SvPOK_on(sv);
e4f3eed8 2233 *d = '\0';
f08d6ad9 2234 sv_utf8_upgrade(sv);
d2f449dd 2235 /* this just broke our allocation above... */
eb160463 2236 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2237 d = SvPVX(sv) + SvCUR(sv);
89491803 2238 has_utf8 = TRUE;
f08d6ad9 2239 }
eb160463 2240 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2241 const char * const odest = SvPVX_const(sv);
423cee85 2242
8973db79 2243 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2244 d = SvPVX(sv) + (d - odest);
2245 }
e294cc5d
JH
2246#ifdef EBCDIC
2247 if (!dorange)
2248 native_range = FALSE; /* \N{} is guessed to be Unicode */
2249#endif
423cee85
JH
2250 Copy(str, d, len, char);
2251 d += len;
2252 SvREFCNT_dec(res);
2253 cont_scan:
2254 s = e + 1;
2255 }
2256 else
5777a3f7 2257 yyerror("Missing braces on \\N{}");
423cee85
JH
2258 continue;
2259
02aa26ce 2260 /* \c is a control character */
79072805
LW
2261 case 'c':
2262 s++;
961ce445 2263 if (s < send) {
ba210ebe 2264 U8 c = *s++;
c7f1f016
NIS
2265#ifdef EBCDIC
2266 if (isLOWER(c))
2267 c = toUPPER(c);
2268#endif
db42d148 2269 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2270 }
961ce445
RGS
2271 else {
2272 yyerror("Missing control char name in \\c");
2273 }
79072805 2274 continue;
02aa26ce
NT
2275
2276 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2277 case 'b':
db42d148 2278 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2279 break;
2280 case 'n':
db42d148 2281 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2282 break;
2283 case 'r':
db42d148 2284 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2285 break;
2286 case 'f':
db42d148 2287 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2288 break;
2289 case 't':
db42d148 2290 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2291 break;
34a3fe2a 2292 case 'e':
db42d148 2293 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2294 break;
2295 case 'a':
db42d148 2296 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2297 break;
02aa26ce
NT
2298 } /* end switch */
2299
79072805
LW
2300 s++;
2301 continue;
02aa26ce 2302 } /* end if (backslash) */
4c3a8340
TS
2303#ifdef EBCDIC
2304 else
2305 literal_endpoint++;
2306#endif
02aa26ce 2307
f9a63242 2308 default_action:
2b9d42f0
NIS
2309 /* If we started with encoded form, or already know we want it
2310 and then encode the next character */
2311 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2312 STRLEN len = 1;
5f66b61c
AL
2313 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2314 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2315 s += len;
2316 if (need > len) {
2317 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2318 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2319 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2320 }
5f66b61c 2321 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0 2322 has_utf8 = TRUE;
e294cc5d
JH
2323#ifdef EBCDIC
2324 if (uv > 255 && !dorange)
2325 native_range = FALSE;
2326#endif
2b9d42f0
NIS
2327 }
2328 else {
2329 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2330 }
02aa26ce
NT
2331 } /* while loop to process each character */
2332
2333 /* terminate the string and set up the sv */
79072805 2334 *d = '\0';
95a20fc0 2335 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2336 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2337 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2338
79072805 2339 SvPOK_on(sv);
9f4817db 2340 if (PL_encoding && !has_utf8) {
d0063567
DK
2341 sv_recode_to_utf8(sv, PL_encoding);
2342 if (SvUTF8(sv))
2343 has_utf8 = TRUE;
9f4817db 2344 }
2b9d42f0 2345 if (has_utf8) {
7e2040f0 2346 SvUTF8_on(sv);
2b9d42f0 2347 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2348 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2349 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2350 }
2351 }
79072805 2352
02aa26ce 2353 /* shrink the sv if we allocated more than we used */
79072805 2354 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2355 SvPV_shrink_to_cur(sv);
79072805 2356 }
02aa26ce 2357
9b599b2a 2358 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2359 if (s > PL_bufptr) {
2360 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
10edeb5d
JH
2361 sv = new_constant(start, s - start,
2362 (const char *)(PL_lex_inpat ? "qr" : "q"),
a0714e2c 2363 sv, NULL,
10edeb5d
JH
2364 (const char *)
2365 (( PL_lex_inwhat == OP_TRANS
2366 ? "tr"
2367 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
2368 ? "s"
2369 : "qq"))));
79072805 2370 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2371 } else
8990e307 2372 SvREFCNT_dec(sv);
79072805
LW
2373 return s;
2374}
2375
ffb4593c
NT
2376/* S_intuit_more
2377 * Returns TRUE if there's more to the expression (e.g., a subscript),
2378 * FALSE otherwise.
ffb4593c
NT
2379 *
2380 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2381 *
2382 * ->[ and ->{ return TRUE
2383 * { and [ outside a pattern are always subscripts, so return TRUE
2384 * if we're outside a pattern and it's not { or [, then return FALSE
2385 * if we're in a pattern and the first char is a {
2386 * {4,5} (any digits around the comma) returns FALSE
2387 * if we're in a pattern and the first char is a [
2388 * [] returns FALSE
2389 * [SOMETHING] has a funky algorithm to decide whether it's a
2390 * character class or not. It has to deal with things like
2391 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2392 * anything else returns TRUE
2393 */
2394
9cbb5ea2
GS
2395/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2396
76e3520e 2397STATIC int
cea2e8a9 2398S_intuit_more(pTHX_ register char *s)
79072805 2399{
97aff369 2400 dVAR;
3280af22 2401 if (PL_lex_brackets)
79072805
LW
2402 return TRUE;
2403 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2404 return TRUE;
2405 if (*s != '{' && *s != '[')
2406 return FALSE;
3280af22 2407 if (!PL_lex_inpat)
79072805
LW
2408 return TRUE;
2409
2410 /* In a pattern, so maybe we have {n,m}. */
2411 if (*s == '{') {
2412 s++;
2413 if (!isDIGIT(*s))
2414 return TRUE;
2415 while (isDIGIT(*s))
2416 s++;
2417 if (*s == ',')
2418 s++;
2419 while (isDIGIT(*s))
2420 s++;
2421 if (*s == '}')
2422 return FALSE;
2423 return TRUE;
2424
2425 }
2426
2427 /* On the other hand, maybe we have a character class */
2428
2429 s++;
2430 if (*s == ']' || *s == '^')
2431 return FALSE;
2432 else {
ffb4593c 2433 /* this is terrifying, and it works */
79072805
LW
2434 int weight = 2; /* let's weigh the evidence */
2435 char seen[256];
f27ffc4a 2436 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2437 const char * const send = strchr(s,']');
3280af22 2438 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2439
2440 if (!send) /* has to be an expression */
2441 return TRUE;
2442
2443 Zero(seen,256,char);
2444 if (*s == '$')
2445 weight -= 3;
2446 else if (isDIGIT(*s)) {
2447 if (s[1] != ']') {
2448 if (isDIGIT(s[1]) && s[2] == ']')
2449 weight -= 10;
2450 }
2451 else
2452 weight -= 100;
2453 }
2454 for (; s < send; s++) {
2455 last_un_char = un_char;
2456 un_char = (unsigned char)*s;
2457 switch (*s) {
2458 case '@':
2459 case '&':
2460 case '$':
2461 weight -= seen[un_char] * 10;
7e2040f0 2462 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2463 int len;
8903cb82 2464 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2465 len = (int)strlen(tmpbuf);
2466 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2467 weight -= 100;
2468 else
2469 weight -= 10;
2470 }
2471 else if (*s == '$' && s[1] &&
93a17b20
LW
2472 strchr("[#!%*<>()-=",s[1])) {
2473 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2474 weight -= 10;
2475 else
2476 weight -= 1;
2477 }
2478 break;
2479 case '\\':
2480 un_char = 254;
2481 if (s[1]) {
93a17b20 2482 if (strchr("wds]",s[1]))
79072805 2483 weight += 100;
10edeb5d 2484 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 2485 weight += 1;
93a17b20 2486 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2487 weight += 40;
2488 else if (isDIGIT(s[1])) {
2489 weight += 40;
2490 while (s[1] && isDIGIT(s[1]))
2491 s++;
2492 }
2493 }
2494 else
2495 weight += 100;
2496 break;
2497 case '-':
2498 if (s[1] == '\\')
2499 weight += 50;
93a17b20 2500 if (strchr("aA01! ",last_un_char))
79072805 2501 weight += 30;
93a17b20 2502 if (strchr("zZ79~",s[1]))
79072805 2503 weight += 30;
f27ffc4a
GS
2504 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2505 weight -= 5; /* cope with negative subscript */
79072805
LW
2506 break;
2507 default:
3792a11b
NC
2508 if (!isALNUM(last_un_char)
2509 && !(last_un_char == '$' || last_un_char == '@'
2510 || last_un_char == '&')
2511 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2512 char *d = tmpbuf;
2513 while (isALPHA(*s))
2514 *d++ = *s++;
2515 *d = '\0';
5458a98a 2516 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
2517 weight -= 150;
2518 }
2519 if (un_char == last_un_char + 1)
2520 weight += 5;
2521 weight -= seen[un_char];
2522 break;
2523 }
2524 seen[un_char]++;
2525 }
2526 if (weight >= 0) /* probably a character class */
2527 return FALSE;
2528 }
2529
2530 return TRUE;
2531}
ffed7fef 2532
ffb4593c
NT
2533/*
2534 * S_intuit_method
2535 *
2536 * Does all the checking to disambiguate
2537 * foo bar
2538 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2539 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2540 *
2541 * First argument is the stuff after the first token, e.g. "bar".
2542 *
2543 * Not a method if bar is a filehandle.
2544 * Not a method if foo is a subroutine prototyped to take a filehandle.
2545 * Not a method if it's really "Foo $bar"
2546 * Method if it's "foo $bar"
2547 * Not a method if it's really "print foo $bar"
2548 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2549 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2550 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2551 * =>
2552 */
2553
76e3520e 2554STATIC int
62d55b22 2555S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2556{
97aff369 2557 dVAR;
a0d0e21e 2558 char *s = start + (*start == '$');
3280af22 2559 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2560 STRLEN len;
2561 GV* indirgv;
5db06880
NC
2562#ifdef PERL_MAD
2563 int soff;
2564#endif
a0d0e21e
LW
2565
2566 if (gv) {
62d55b22 2567 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2568 return 0;
62d55b22
NC
2569 if (cv) {
2570 if (SvPOK(cv)) {
2571 const char *proto = SvPVX_const(cv);
2572 if (proto) {
2573 if (*proto == ';')
2574 proto++;
2575 if (*proto == '*')
2576 return 0;
2577 }
b6c543e3
IZ
2578 }
2579 } else
c35e046a 2580 gv = NULL;
a0d0e21e 2581 }
8903cb82 2582 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2583 /* start is the beginning of the possible filehandle/object,
2584 * and s is the end of it
2585 * tmpbuf is a copy of it
2586 */
2587
a0d0e21e 2588 if (*start == '$') {
3280af22 2589 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2590 return 0;
5db06880
NC
2591#ifdef PERL_MAD
2592 len = start - SvPVX(PL_linestr);
2593#endif
29595ff2 2594 s = PEEKSPACE(s);
f0092767 2595#ifdef PERL_MAD
5db06880
NC
2596 start = SvPVX(PL_linestr) + len;
2597#endif
3280af22
NIS
2598 PL_bufptr = start;
2599 PL_expect = XREF;
a0d0e21e
LW
2600 return *s == '(' ? FUNCMETH : METHOD;
2601 }
5458a98a 2602 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
2603 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2604 len -= 2;
2605 tmpbuf[len] = '\0';
5db06880
NC
2606#ifdef PERL_MAD
2607 soff = s - SvPVX(PL_linestr);
2608#endif
c3e0f903
GS
2609 goto bare_package;
2610 }
90e5519e 2611 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2612 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2613 return 0;
2614 /* filehandle or package name makes it a method */
89bfa8cd 2615 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2616#ifdef PERL_MAD
2617 soff = s - SvPVX(PL_linestr);
2618#endif
29595ff2 2619 s = PEEKSPACE(s);
3280af22 2620 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2621 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2622 bare_package:
cd81e915 2623 start_force(PL_curforce);
9ded7720 2624 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2625 newSVpvn(tmpbuf,len));
9ded7720 2626 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2627 if (PL_madskills)
2628 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2629 PL_expect = XTERM;
a0d0e21e 2630 force_next(WORD);
3280af22 2631 PL_bufptr = s;
5db06880
NC
2632#ifdef PERL_MAD
2633 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2634#endif
a0d0e21e
LW
2635 return *s == '(' ? FUNCMETH : METHOD;
2636 }
2637 }
2638 return 0;
2639}
2640
ffb4593c
NT
2641/*
2642 * S_incl_perldb
2643 * Return a string of Perl code to load the debugger. If PERL5DB
2644 * is set, it will return the contents of that, otherwise a
2645 * compile-time require of perl5db.pl.
2646 */
2647
bfed75c6 2648STATIC const char*
cea2e8a9 2649S_incl_perldb(pTHX)
a0d0e21e 2650{
97aff369 2651 dVAR;
3280af22 2652 if (PL_perldb) {
9d4ba2ae 2653 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2654
2655 if (pdb)
2656 return pdb;
93189314 2657 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2658 return "BEGIN { require 'perl5db.pl' }";
2659 }
2660 return "";
2661}
2662
2663
16d20bd9 2664/* Encoded script support. filter_add() effectively inserts a
4e553d73 2665 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2666 * Note that the filter function only applies to the current source file
2667 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2668 *
2669 * The datasv parameter (which may be NULL) can be used to pass
2670 * private data to this instance of the filter. The filter function
2671 * can recover the SV using the FILTER_DATA macro and use it to
2672 * store private buffers and state information.
2673 *
2674 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2675 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2676 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2677 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2678 * private use must be set using malloc'd pointers.
2679 */
16d20bd9
AD
2680
2681SV *
864dbfa3 2682Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2683{
97aff369 2684 dVAR;
f4c556ac 2685 if (!funcp)
a0714e2c 2686 return NULL;
f4c556ac 2687
3280af22
NIS
2688 if (!PL_rsfp_filters)
2689 PL_rsfp_filters = newAV();
16d20bd9 2690 if (!datasv)
561b68a9 2691 datasv = newSV(0);
862a34c6 2692 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2693 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2694 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2695 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2696 FPTR2DPTR(void *, IoANY(datasv)),
2697 SvPV_nolen(datasv)));
3280af22
NIS
2698 av_unshift(PL_rsfp_filters, 1);
2699 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2700 return(datasv);
2701}
4e553d73 2702
16d20bd9
AD
2703
2704/* Delete most recently added instance of this filter function. */
a0d0e21e 2705void
864dbfa3 2706Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2707{
97aff369 2708 dVAR;
e0c19803 2709 SV *datasv;
24801a4b 2710
33073adb 2711#ifdef DEBUGGING
55662e27
JH
2712 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2713 FPTR2DPTR(void*, funcp)));
33073adb 2714#endif
3280af22 2715 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2716 return;
2717 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2718 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2719 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2720 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2721 IoANY(datasv) = (void *)NULL;
3280af22 2722 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2723
16d20bd9
AD
2724 return;
2725 }
2726 /* we need to search for the correct entry and clear it */
cea2e8a9 2727 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2728}
2729
2730
1de9afcd
RGS
2731/* Invoke the idxth filter function for the current rsfp. */
2732/* maxlen 0 = read one text line */
16d20bd9 2733I32
864dbfa3 2734Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2735{
97aff369 2736 dVAR;
16d20bd9
AD
2737 filter_t funcp;
2738 SV *datasv = NULL;
f482118e
NC
2739 /* This API is bad. It should have been using unsigned int for maxlen.
2740 Not sure if we want to change the API, but if not we should sanity
2741 check the value here. */
39cd7a59
NC
2742 const unsigned int correct_length
2743 = maxlen < 0 ?
2744#ifdef PERL_MICRO
2745 0x7FFFFFFF
2746#else
2747 INT_MAX
2748#endif
2749 : maxlen;
e50aee73 2750
3280af22 2751 if (!PL_rsfp_filters)
16d20bd9 2752 return -1;
1de9afcd 2753 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2754 /* Provide a default input filter to make life easy. */
2755 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2756 DEBUG_P(PerlIO_printf(Perl_debug_log,
2757 "filter_read %d: from rsfp\n", idx));
f482118e 2758 if (correct_length) {
16d20bd9
AD
2759 /* Want a block */
2760 int len ;
f54cb97a 2761 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2762
2763 /* ensure buf_sv is large enough */
f482118e
NC
2764 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2765 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2766 correct_length)) <= 0) {
3280af22 2767 if (PerlIO_error(PL_rsfp))
37120919
AD
2768 return -1; /* error */
2769 else
2770 return 0 ; /* end of file */
2771 }
16d20bd9
AD
2772 SvCUR_set(buf_sv, old_len + len) ;
2773 } else {
2774 /* Want a line */
3280af22
NIS
2775 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2776 if (PerlIO_error(PL_rsfp))
37120919
AD
2777 return -1; /* error */
2778 else
2779 return 0 ; /* end of file */
2780 }
16d20bd9
AD
2781 }
2782 return SvCUR(buf_sv);
2783 }
2784 /* Skip this filter slot if filter has been deleted */
1de9afcd 2785 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2786 DEBUG_P(PerlIO_printf(Perl_debug_log,
2787 "filter_read %d: skipped (filter deleted)\n",
2788 idx));
f482118e 2789 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2790 }
2791 /* Get function pointer hidden within datasv */
8141890a 2792 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2793 DEBUG_P(PerlIO_printf(Perl_debug_log,
2794 "filter_read %d: via function %p (%s)\n",
ca0270c4 2795 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2796 /* Call function. The function is expected to */
2797 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2798 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2799 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2800}
2801
76e3520e 2802STATIC char *
cea2e8a9 2803S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2804{
97aff369 2805 dVAR;
c39cd008 2806#ifdef PERL_CR_FILTER
3280af22 2807 if (!PL_rsfp_filters) {
c39cd008 2808 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2809 }
2810#endif
3280af22 2811 if (PL_rsfp_filters) {
55497cff 2812 if (!append)
2813 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2814 if (FILTER_READ(0, sv, 0) > 0)
2815 return ( SvPVX(sv) ) ;
2816 else
bd61b366 2817 return NULL ;
16d20bd9 2818 }
9d116dd7 2819 else
fd049845 2820 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2821}
2822
01ec43d0 2823STATIC HV *
7fc63493 2824S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2825{
97aff369 2826 dVAR;
def3634b
GS
2827 GV *gv;
2828
01ec43d0 2829 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2830 return PL_curstash;
2831
2832 if (len > 2 &&
2833 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2834 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2835 {
2836 return GvHV(gv); /* Foo:: */
def3634b
GS
2837 }
2838
2839 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2840 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2841 if (gv && GvCV(gv)) {
2842 SV * const sv = cv_const_sv(GvCV(gv));
2843 if (sv)
83003860 2844 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2845 }
2846
2847 return gv_stashpv(pkgname, FALSE);
2848}
a0d0e21e 2849
e3f73d4e
RGS
2850/*
2851 * S_readpipe_override
2852 * Check whether readpipe() is overriden, and generates the appropriate
2853 * optree, provided sublex_start() is called afterwards.
2854 */
2855STATIC void
1d51329b 2856S_readpipe_override(pTHX)
e3f73d4e
RGS
2857{
2858 GV **gvp;
2859 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
2860 yylval.ival = OP_BACKTICK;
2861 if ((gv_readpipe
2862 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
2863 ||
2864 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
2865 && (gv_readpipe = *gvp) != (GV*)&PL_sv_undef
2866 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
2867 {
2868 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2869 append_elem(OP_LIST,
2870 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
2871 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
2872 }
2873 else {
2874 set_csh();
2875 }
2876}
2877
5db06880
NC
2878#ifdef PERL_MAD
2879 /*
2880 * Perl_madlex
2881 * The intent of this yylex wrapper is to minimize the changes to the
2882 * tokener when we aren't interested in collecting madprops. It remains
2883 * to be seen how successful this strategy will be...
2884 */
2885
2886int
2887Perl_madlex(pTHX)
2888{
2889 int optype;
2890 char *s = PL_bufptr;
2891
cd81e915
NC
2892 /* make sure PL_thiswhite is initialized */
2893 PL_thiswhite = 0;
2894 PL_thismad = 0;
5db06880 2895
cd81e915 2896 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2897 if (PL_pending_ident)
2898 return S_pending_ident(aTHX);
2899
2900 /* previous token ate up our whitespace? */
cd81e915
NC
2901 if (!PL_lasttoke && PL_nextwhite) {
2902 PL_thiswhite = PL_nextwhite;
2903 PL_nextwhite = 0;
5db06880
NC
2904 }
2905
2906 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2907 PL_realtokenstart = -1;
2908 PL_thistoken = 0;
5db06880
NC
2909 optype = yylex();
2910 s = PL_bufptr;
cd81e915 2911 assert(PL_curforce < 0);
5db06880 2912
cd81e915
NC
2913 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2914 if (!PL_thistoken) {
2915 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 2916 PL_thistoken = newSVpvs("");
5db06880 2917 else {
c35e046a 2918 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 2919 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2920 }
2921 }
cd81e915
NC
2922 if (PL_thismad) /* install head */
2923 CURMAD('X', PL_thistoken);
5db06880
NC
2924 }
2925
2926 /* last whitespace of a sublex? */
cd81e915
NC
2927 if (optype == ')' && PL_endwhite) {
2928 CURMAD('X', PL_endwhite);
5db06880
NC
2929 }
2930
cd81e915 2931 if (!PL_thismad) {
5db06880
NC
2932
2933 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
2934 if (!PL_thiswhite && !PL_endwhite && !optype) {
2935 sv_free(PL_thistoken);
2936 PL_thistoken = 0;
5db06880
NC
2937 return 0;
2938 }
2939
2940 /* put off final whitespace till peg */
2941 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
2942 PL_nextwhite = PL_thiswhite;
2943 PL_thiswhite = 0;
5db06880 2944 }
cd81e915
NC
2945 else if (PL_thisopen) {
2946 CURMAD('q', PL_thisopen);
2947 if (PL_thistoken)
2948 sv_free(PL_thistoken);
2949 PL_thistoken = 0;
5db06880
NC
2950 }
2951 else {
2952 /* Store actual token text as madprop X */
cd81e915 2953 CURMAD('X', PL_thistoken);
5db06880
NC
2954 }
2955
cd81e915 2956 if (PL_thiswhite) {
5db06880 2957 /* add preceding whitespace as madprop _ */
cd81e915 2958 CURMAD('_', PL_thiswhite);
5db06880
NC
2959 }
2960
cd81e915 2961 if (PL_thisstuff) {
5db06880 2962 /* add quoted material as madprop = */
cd81e915 2963 CURMAD('=', PL_thisstuff);
5db06880
NC
2964 }
2965
cd81e915 2966 if (PL_thisclose) {
5db06880 2967 /* add terminating quote as madprop Q */
cd81e915 2968 CURMAD('Q', PL_thisclose);
5db06880
NC
2969 }
2970 }
2971
2972 /* special processing based on optype */
2973
2974 switch (optype) {
2975
2976 /* opval doesn't need a TOKEN since it can already store mp */
2977 case WORD:
2978 case METHOD:
2979 case FUNCMETH:
2980 case THING:
2981 case PMFUNC:
2982 case PRIVATEREF:
2983 case FUNC0SUB:
2984 case UNIOPSUB:
2985 case LSTOPSUB:
2986 if (yylval.opval)
cd81e915
NC
2987 append_madprops(PL_thismad, yylval.opval, 0);
2988 PL_thismad = 0;
5db06880
NC
2989 return optype;
2990
2991 /* fake EOF */
2992 case 0:
2993 optype = PEG;
cd81e915
NC
2994 if (PL_endwhite) {
2995 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
2996 PL_endwhite = 0;
5db06880
NC
2997 }
2998 break;
2999
3000 case ']':
3001 case '}':
cd81e915 3002 if (PL_faketokens)
5db06880
NC
3003 break;
3004 /* remember any fake bracket that lexer is about to discard */
3005 if (PL_lex_brackets == 1 &&
3006 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3007 {
3008 s = PL_bufptr;
3009 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3010 s++;
3011 if (*s == '}') {
cd81e915
NC
3012 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3013 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3014 PL_thiswhite = 0;
5db06880
NC
3015 PL_bufptr = s - 1;
3016 break; /* don't bother looking for trailing comment */
3017 }
3018 else
3019 s = PL_bufptr;
3020 }
3021 if (optype == ']')
3022 break;
3023 /* FALLTHROUGH */
3024
3025 /* attach a trailing comment to its statement instead of next token */
3026 case ';':
cd81e915 3027 if (PL_faketokens)
5db06880
NC
3028 break;
3029 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3030 s = PL_bufptr;
3031 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3032 s++;
3033 if (*s == '\n' || *s == '#') {
3034 while (s < PL_bufend && *s != '\n')
3035 s++;
3036 if (s < PL_bufend)
3037 s++;
cd81e915
NC
3038 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3039 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3040 PL_thiswhite = 0;
5db06880
NC
3041 PL_bufptr = s;
3042 }
3043 }
3044 break;
3045
3046 /* pval */
3047 case LABEL:
3048 break;
3049
3050 /* ival */
3051 default:
3052 break;
3053
3054 }
3055
3056 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
3057 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
3058 PL_thismad = 0;
5db06880
NC
3059 return optype;
3060}
3061#endif
3062
468aa647 3063STATIC char *
cc6ed77d 3064S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3065 dVAR;
468aa647
RGS
3066 if (PL_expect != XSTATE)
3067 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3068 is_use ? "use" : "no"));
29595ff2 3069 s = SKIPSPACE1(s);
468aa647
RGS
3070 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3071 s = force_version(s, TRUE);
29595ff2 3072 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 3073 start_force(PL_curforce);
9ded7720 3074 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3075 force_next(WORD);
3076 }
3077 else if (*s == 'v') {
3078 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3079 s = force_version(s, FALSE);
3080 }
3081 }
3082 else {
3083 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3084 s = force_version(s, FALSE);
3085 }
3086 yylval.ival = is_use;
3087 return s;
3088}
748a9306 3089#ifdef DEBUGGING
27da23d5 3090 static const char* const exp_name[] =
09bef843 3091 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3092 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3093 };
748a9306 3094#endif
463ee0b2 3095
02aa26ce
NT
3096/*
3097 yylex
3098
3099 Works out what to call the token just pulled out of the input
3100 stream. The yacc parser takes care of taking the ops we return and
3101 stitching them into a tree.
3102
3103 Returns:
3104 PRIVATEREF
3105
3106 Structure:
3107 if read an identifier
3108 if we're in a my declaration
3109 croak if they tried to say my($foo::bar)
3110 build the ops for a my() declaration
3111 if it's an access to a my() variable
3112 are we in a sort block?
3113 croak if my($a); $a <=> $b
3114 build ops for access to a my() variable
3115 if in a dq string, and they've said @foo and we can't find @foo
3116 croak
3117 build ops for a bareword
3118 if we already built the token before, use it.
3119*/
3120
20141f0e 3121
dba4d153
JH
3122#ifdef __SC__
3123#pragma segment Perl_yylex
3124#endif
dba4d153 3125int
dba4d153 3126Perl_yylex(pTHX)
20141f0e 3127{
97aff369 3128 dVAR;
3afc138a 3129 register char *s = PL_bufptr;
378cc40b 3130 register char *d;
463ee0b2 3131 STRLEN len;
aa7440fb 3132 bool bof = FALSE;
a687059c 3133
10edeb5d
JH
3134 /* orig_keyword, gvp, and gv are initialized here because
3135 * jump to the label just_a_word_zero can bypass their
3136 * initialization later. */
3137 I32 orig_keyword = 0;
3138 GV *gv = NULL;
3139 GV **gvp = NULL;
3140
bbf60fe6 3141 DEBUG_T( {
396482e1 3142 SV* tmp = newSVpvs("");
b6007c36
DM
3143 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3144 (IV)CopLINE(PL_curcop),
3145 lex_state_names[PL_lex_state],
3146 exp_name[PL_expect],
3147 pv_display(tmp, s, strlen(s), 0, 60));
3148 SvREFCNT_dec(tmp);
bbf60fe6 3149 } );
02aa26ce 3150 /* check if there's an identifier for us to look at */
ba979b31 3151 if (PL_pending_ident)
bbf60fe6 3152 return REPORT(S_pending_ident(aTHX));
bbce6d69 3153
02aa26ce
NT
3154 /* no identifier pending identification */
3155
3280af22 3156 switch (PL_lex_state) {
79072805
LW
3157#ifdef COMMENTARY
3158 case LEX_NORMAL: /* Some compilers will produce faster */
3159 case LEX_INTERPNORMAL: /* code if we comment these out. */
3160 break;
3161#endif
3162
09bef843 3163 /* when we've already built the next token, just pull it out of the queue */
79072805 3164 case LEX_KNOWNEXT:
5db06880
NC
3165#ifdef PERL_MAD
3166 PL_lasttoke--;
3167 yylval = PL_nexttoke[PL_lasttoke].next_val;
3168 if (PL_madskills) {
cd81e915 3169 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3170 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3171 if (PL_thismad && PL_thismad->mad_key == '_') {
3172 PL_thiswhite = (SV*)PL_thismad->mad_val;
3173 PL_thismad->mad_val = 0;
3174 mad_free(PL_thismad);
3175 PL_thismad = 0;
5db06880
NC
3176 }
3177 }
3178 if (!PL_lasttoke) {
3179 PL_lex_state = PL_lex_defer;
3180 PL_expect = PL_lex_expect;
3181 PL_lex_defer = LEX_NORMAL;
3182 if (!PL_nexttoke[PL_lasttoke].next_type)
3183 return yylex();
3184 }
3185#else
3280af22 3186 PL_nexttoke--;
5db06880 3187 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3188 if (!PL_nexttoke) {
3189 PL_lex_state = PL_lex_defer;
3190 PL_expect = PL_lex_expect;
3191 PL_lex_defer = LEX_NORMAL;
463ee0b2 3192 }
5db06880
NC
3193#endif
3194#ifdef PERL_MAD
3195 /* FIXME - can these be merged? */
3196 return(PL_nexttoke[PL_lasttoke].next_type);
3197#else
bbf60fe6 3198 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3199#endif
79072805 3200
02aa26ce 3201 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3202 when we get here, PL_bufptr is at the \
02aa26ce 3203 */
79072805
LW
3204 case LEX_INTERPCASEMOD:
3205#ifdef DEBUGGING
3280af22 3206 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3207 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3208#endif
02aa26ce 3209 /* handle \E or end of string */
3280af22 3210 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3211 /* if at a \E */
3280af22 3212 if (PL_lex_casemods) {
f54cb97a 3213 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3214 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3215
3792a11b
NC
3216 if (PL_bufptr != PL_bufend
3217 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3218 PL_bufptr += 2;
3219 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3220#ifdef PERL_MAD
3221 if (PL_madskills)
6b29d1f5 3222 PL_thistoken = newSVpvs("\\E");
5db06880 3223#endif
a0d0e21e 3224 }
bbf60fe6 3225 return REPORT(')');
79072805 3226 }
5db06880
NC
3227#ifdef PERL_MAD
3228 while (PL_bufptr != PL_bufend &&
3229 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3230 if (!PL_thiswhite)
6b29d1f5 3231 PL_thiswhite = newSVpvs("");
cd81e915 3232 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3233 PL_bufptr += 2;
3234 }
3235#else
3280af22
NIS
3236 if (PL_bufptr != PL_bufend)
3237 PL_bufptr += 2;
5db06880 3238#endif
3280af22 3239 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3240 return yylex();
79072805
LW
3241 }
3242 else {
607df283 3243 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3244 "### Saw case modifier\n"); });
3280af22 3245 s = PL_bufptr + 1;
6e909404 3246 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3247#ifdef PERL_MAD
cd81e915 3248 if (!PL_thiswhite)
6b29d1f5 3249 PL_thiswhite = newSVpvs("");
cd81e915 3250 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3251#endif
89122651 3252 PL_bufptr = s + 3;
6e909404
JH
3253 PL_lex_state = LEX_INTERPCONCAT;
3254 return yylex();
a0d0e21e 3255 }
6e909404 3256 else {
90771dc0 3257 I32 tmp;
5db06880
NC
3258 if (!PL_madskills) /* when just compiling don't need correct */
3259 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3260 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3261 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3262 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3263 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3264 return REPORT(')');
6e909404
JH
3265 }
3266 if (PL_lex_casemods > 10)
3267 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3268 PL_lex_casestack[PL_lex_casemods++] = *s;
3269 PL_lex_casestack[PL_lex_casemods] = '\0';
3270 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3271 start_force(PL_curforce);
9ded7720 3272 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3273 force_next('(');
cd81e915 3274 start_force(PL_curforce);
6e909404 3275 if (*s == 'l')
9ded7720 3276 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3277 else if (*s == 'u')
9ded7720 3278 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3279 else if (*s == 'L')
9ded7720 3280 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3281 else if (*s == 'U')
9ded7720 3282 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3283 else if (*s == 'Q')
9ded7720 3284 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3285 else
3286 Perl_croak(aTHX_ "panic: yylex");
5db06880 3287 if (PL_madskills) {
6b29d1f5 3288 SV* const tmpsv = newSVpvs("");
5db06880
NC
3289 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3290 curmad('_', tmpsv);
3291 }
6e909404 3292 PL_bufptr = s + 1;
a0d0e21e 3293 }
79072805 3294 force_next(FUNC);
3280af22
NIS
3295 if (PL_lex_starts) {
3296 s = PL_bufptr;
3297 PL_lex_starts = 0;
5db06880
NC
3298#ifdef PERL_MAD
3299 if (PL_madskills) {
cd81e915
NC
3300 if (PL_thistoken)
3301 sv_free(PL_thistoken);
6b29d1f5 3302 PL_thistoken = newSVpvs("");
5db06880
NC
3303 }
3304#endif
131b3ad0
DM
3305 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3306 if (PL_lex_casemods == 1 && PL_lex_inpat)
3307 OPERATOR(',');
3308 else
3309 Aop(OP_CONCAT);
79072805
LW
3310 }
3311 else
cea2e8a9 3312 return yylex();
79072805
LW
3313 }
3314
55497cff 3315 case LEX_INTERPPUSH:
bbf60fe6 3316 return REPORT(sublex_push());
55497cff 3317
79072805 3318 case LEX_INTERPSTART:
3280af22 3319 if (PL_bufptr == PL_bufend)
bbf60fe6 3320 return REPORT(sublex_done());
607df283 3321 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3322 "### Interpolated variable\n"); });
3280af22
NIS
3323 PL_expect = XTERM;
3324 PL_lex_dojoin = (*PL_bufptr == '@');
3325 PL_lex_state = LEX_INTERPNORMAL;
3326 if (PL_lex_dojoin) {
cd81e915 3327 start_force(PL_curforce);
9ded7720 3328 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3329 force_next(',');
cd81e915 3330 start_force(PL_curforce);
a0d0e21e 3331 force_ident("\"", '$');
cd81e915 3332 start_force(PL_curforce);
9ded7720 3333 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3334 force_next('$');
cd81e915 3335 start_force(PL_curforce);
9ded7720 3336 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3337 force_next('(');
cd81e915 3338 start_force(PL_curforce);
9ded7720 3339 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3340 force_next(FUNC);
3341 }
3280af22
NIS
3342 if (PL_lex_starts++) {
3343 s = PL_bufptr;
5db06880
NC
3344#ifdef PERL_MAD
3345 if (PL_madskills) {
cd81e915
NC
3346 if (PL_thistoken)
3347 sv_free(PL_thistoken);
6b29d1f5 3348 PL_thistoken = newSVpvs("");
5db06880
NC
3349 }
3350#endif
131b3ad0
DM
3351 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3352 if (!PL_lex_casemods && PL_lex_inpat)
3353 OPERATOR(',');
3354 else
3355 Aop(OP_CONCAT);
79072805 3356 }
cea2e8a9 3357 return yylex();
79072805
LW
3358
3359 case LEX_INTERPENDMAYBE:
3280af22
NIS
3360 if (intuit_more(PL_bufptr)) {
3361 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3362 break;
3363 }
3364 /* FALL THROUGH */
3365
3366 case LEX_INTERPEND:
3280af22
NIS
3367 if (PL_lex_dojoin) {
3368 PL_lex_dojoin = FALSE;
3369 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3370#ifdef PERL_MAD
3371 if (PL_madskills) {
cd81e915
NC
3372 if (PL_thistoken)
3373 sv_free(PL_thistoken);
6b29d1f5 3374 PL_thistoken = newSVpvs("");
5db06880
NC
3375 }
3376#endif
bbf60fe6 3377 return REPORT(')');
79072805 3378 }
43a16006 3379 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3380 && SvEVALED(PL_lex_repl))
43a16006 3381 {
e9fa98b2 3382 if (PL_bufptr != PL_bufend)
cea2e8a9 3383 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3384 PL_lex_repl = NULL;
e9fa98b2 3385 }
79072805
LW
3386 /* FALLTHROUGH */
3387 case LEX_INTERPCONCAT:
3388#ifdef DEBUGGING
3280af22 3389 if (PL_lex_brackets)
cea2e8a9 3390 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3391#endif
3280af22 3392 if (PL_bufptr == PL_bufend)
bbf60fe6 3393 return REPORT(sublex_done());
79072805 3394
3280af22
NIS
3395 if (SvIVX(PL_linestr) == '\'') {
3396 SV *sv = newSVsv(PL_linestr);
3397 if (!PL_lex_inpat)
76e3520e 3398 sv = tokeq(sv);
3280af22 3399 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3400 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3401 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3402 s = PL_bufend;
79072805
LW
3403 }
3404 else {
3280af22 3405 s = scan_const(PL_bufptr);
79072805 3406 if (*s == '\\')
3280af22 3407 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3408 else
3280af22 3409 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3410 }
3411
3280af22 3412 if (s != PL_bufptr) {
cd81e915 3413 start_force(PL_curforce);
5db06880
NC
3414 if (PL_madskills) {
3415 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3416 }
9ded7720 3417 NEXTVAL_NEXTTOKE = yylval;
3280af22 3418 PL_expect = XTERM;
79072805 3419 force_next(THING);
131b3ad0 3420 if (PL_lex_starts++) {
5db06880
NC
3421#ifdef PERL_MAD
3422 if (PL_madskills) {
cd81e915
NC
3423 if (PL_thistoken)
3424 sv_free(PL_thistoken);
6b29d1f5 3425 PL_thistoken = newSVpvs("");
5db06880
NC
3426 }
3427#endif
131b3ad0
DM
3428 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3429 if (!PL_lex_casemods && PL_lex_inpat)
3430 OPERATOR(',');
3431 else
3432 Aop(OP_CONCAT);
3433 }
79072805 3434 else {
3280af22 3435 PL_bufptr = s;
cea2e8a9 3436 return yylex();
79072805
LW
3437 }
3438 }
3439
cea2e8a9 3440 return yylex();
a0d0e21e 3441 case LEX_FORMLINE:
3280af22
NIS
3442 PL_lex_state = LEX_NORMAL;
3443 s = scan_formline(PL_bufptr);
3444 if (!PL_lex_formbrack)
a0d0e21e
LW
3445 goto rightbracket;
3446 OPERATOR(';');
79072805
LW
3447 }
3448
3280af22
NIS
3449 s = PL_bufptr;
3450 PL_oldoldbufptr = PL_oldbufptr;
3451 PL_oldbufptr = s;
463ee0b2
LW
3452
3453 retry:
5db06880 3454#ifdef PERL_MAD
cd81e915
NC
3455 if (PL_thistoken) {
3456 sv_free(PL_thistoken);
3457 PL_thistoken = 0;
5db06880 3458 }
cd81e915 3459 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3460#endif
378cc40b
LW
3461 switch (*s) {
3462 default:
7e2040f0 3463 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3464 goto keylookup;
cea2e8a9 3465 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3466 case 4:
3467 case 26:
3468 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3469 case 0:
5db06880
NC
3470#ifdef PERL_MAD
3471 if (PL_madskills)
cd81e915 3472 PL_faketokens = 0;
5db06880 3473#endif
3280af22
NIS
3474 if (!PL_rsfp) {
3475 PL_last_uni = 0;
3476 PL_last_lop = 0;
c5ee2135 3477 if (PL_lex_brackets) {
10edeb5d
JH
3478 yyerror((const char *)
3479 (PL_lex_formbrack
3480 ? "Format not terminated"
3481 : "Missing right curly or square bracket"));
c5ee2135 3482 }
4e553d73 3483 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3484 "### Tokener got EOF\n");
5f80b19c 3485 } );
79072805 3486 TOKEN(0);
463ee0b2 3487 }
3280af22 3488 if (s++ < PL_bufend)
a687059c 3489 goto retry; /* ignore stray nulls */
3280af22
NIS
3490 PL_last_uni = 0;
3491 PL_last_lop = 0;
3492 if (!PL_in_eval && !PL_preambled) {
3493 PL_preambled = TRUE;
5db06880
NC
3494#ifdef PERL_MAD
3495 if (PL_madskills)
cd81e915 3496 PL_faketokens = 1;
5db06880 3497#endif
3280af22
NIS
3498 sv_setpv(PL_linestr,incl_perldb());
3499 if (SvCUR(PL_linestr))
396482e1 3500 sv_catpvs(PL_linestr,";");
3280af22
NIS
3501 if (PL_preambleav){
3502 while(AvFILLp(PL_preambleav) >= 0) {
3503 SV *tmpsv = av_shift(PL_preambleav);
3504 sv_catsv(PL_linestr, tmpsv);
396482e1 3505 sv_catpvs(PL_linestr, ";");
91b7def8 3506 sv_free(tmpsv);
3507 }
3280af22
NIS
3508 sv_free((SV*)PL_preambleav);
3509 PL_preambleav = NULL;
91b7def8 3510 }
3280af22 3511 if (PL_minus_n || PL_minus_p) {
396482e1 3512 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 3513 if (PL_minus_l)
396482e1 3514 sv_catpvs(PL_linestr,"chomp;");
3280af22 3515 if (PL_minus_a) {
3280af22 3516 if (PL_minus_F) {
3792a11b
NC
3517 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3518 || *PL_splitstr == '"')
3280af22 3519 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 3520 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 3521 else {
c8ef6a4b
NC
3522 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3523 bytes can be used as quoting characters. :-) */
dd374669 3524 const char *splits = PL_splitstr;
91d456ae 3525 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
3526 do {
3527 /* Need to \ \s */
dd374669
AL
3528 if (*splits == '\\')
3529 sv_catpvn(PL_linestr, splits, 1);
3530 sv_catpvn(PL_linestr, splits, 1);
3531 } while (*splits++);
48c4c863
NC
3532 /* This loop will embed the trailing NUL of
3533 PL_linestr as the last thing it does before
3534 terminating. */
396482e1 3535 sv_catpvs(PL_linestr, ");");
54310121 3536 }
2304df62
AD
3537 }
3538 else
396482e1 3539 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 3540 }
79072805 3541 }
bc9b29db