This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add another volatile modifier to protect against longjmp clobbering
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
4bb101f2 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
b94e2f88 4 * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
12 * "It all comes from here, the stench and the peril." --Frodo
378cc40b
LW
13 */
14
9cbb5ea2
GS
15/*
16 * This file is the lexer for Perl. It's closely linked to the
4e553d73 17 * parser, perly.y.
ffb4593c
NT
18 *
19 * The main routine is yylex(), which returns the next token.
20 */
21
378cc40b 22#include "EXTERN.h"
864dbfa3 23#define PERL_IN_TOKE_C
378cc40b 24#include "perl.h"
378cc40b 25
12fbd33b
DM
26#define yychar (*PL_yycharp)
27#define yylval (*PL_yylvalp)
d3b6f988 28
0bd48802 29static const char ident_too_long[] = "Identifier too long";
c445ea15 30static const char commaless_variable_list[] = "comma-less variable list";
8903cb82 31
acfe0abc 32static void restore_rsfp(pTHX_ void *f);
6e3aabd6 33#ifndef PERL_NO_UTF16_FILTER
acfe0abc
GS
34static I32 utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen);
35static I32 utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen);
6e3aabd6 36#endif
51371543 37
29595ff2 38#ifdef PERL_MAD
29595ff2 39# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 40# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 41#else
5db06880 42# define CURMAD(slot,sv)
9ded7720 43# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
44#endif
45
9059aa12
LW
46#define XFAKEBRACK 128
47#define XENUMMASK 127
48
39e02b42
JH
49#ifdef USE_UTF8_SCRIPTS
50# define UTF (!IN_BYTES)
2b9d42f0 51#else
746b446a 52# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 53#endif
a0ed51b3 54
61f0cdd9 55/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
56 * 1999-02-27 mjd-perl-patch@plover.com */
57#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
58
bf4acbe4
GS
59/* On MacOS, respect nonbreaking spaces */
60#ifdef MACOS_TRADITIONAL
61#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
62#else
63#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
64#endif
65
ffb4593c
NT
66/* LEX_* are values for PL_lex_state, the state of the lexer.
67 * They are arranged oddly so that the guard on the switch statement
79072805
LW
68 * can get by with a single comparison (if the compiler is smart enough).
69 */
70
fb73857a 71/* #define LEX_NOTPARSING 11 is done in perl.h. */
72
b6007c36
DM
73#define LEX_NORMAL 10 /* normal code (ie not within "...") */
74#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
75#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
76#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
77#define LEX_INTERPSTART 6 /* expecting the start of a $var */
78
79 /* at end of code, eg "$x" followed by: */
80#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
81#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
82
83#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
84 string or after \E, $foo, etc */
85#define LEX_INTERPCONST 2 /* NOT USED */
86#define LEX_FORMLINE 1 /* expecting a format line */
87#define LEX_KNOWNEXT 0 /* next token known; just return it */
88
79072805 89
bbf60fe6 90#ifdef DEBUGGING
27da23d5 91static const char* const lex_state_names[] = {
bbf60fe6
DM
92 "KNOWNEXT",
93 "FORMLINE",
94 "INTERPCONST",
95 "INTERPCONCAT",
96 "INTERPENDMAYBE",
97 "INTERPEND",
98 "INTERPSTART",
99 "INTERPPUSH",
100 "INTERPCASEMOD",
101 "INTERPNORMAL",
102 "NORMAL"
103};
104#endif
105
79072805
LW
106#ifdef ff_next
107#undef ff_next
d48672a2
LW
108#endif
109
79072805 110#include "keywords.h"
fe14fcc3 111
ffb4593c
NT
112/* CLINE is a macro that ensures PL_copline has a sane value */
113
ae986130
LW
114#ifdef CLINE
115#undef CLINE
116#endif
57843af0 117#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 118
5db06880 119#ifdef PERL_MAD
29595ff2
NC
120# define SKIPSPACE0(s) skipspace0(s)
121# define SKIPSPACE1(s) skipspace1(s)
122# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
123# define PEEKSPACE(s) skipspace2(s,0)
124#else
125# define SKIPSPACE0(s) skipspace(s)
126# define SKIPSPACE1(s) skipspace(s)
127# define SKIPSPACE2(s,tsv) skipspace(s)
128# define PEEKSPACE(s) skipspace(s)
129#endif
130
ffb4593c
NT
131/*
132 * Convenience functions to return different tokens and prime the
9cbb5ea2 133 * lexer for the next token. They all take an argument.
ffb4593c
NT
134 *
135 * TOKEN : generic token (used for '(', DOLSHARP, etc)
136 * OPERATOR : generic operator
137 * AOPERATOR : assignment operator
138 * PREBLOCK : beginning the block after an if, while, foreach, ...
139 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
140 * PREREF : *EXPR where EXPR is not a simple identifier
141 * TERM : expression term
142 * LOOPX : loop exiting command (goto, last, dump, etc)
143 * FTST : file test operator
144 * FUN0 : zero-argument function
2d2e263d 145 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
146 * BOop : bitwise or or xor
147 * BAop : bitwise and
148 * SHop : shift operator
149 * PWop : power operator
9cbb5ea2 150 * PMop : pattern-matching operator
ffb4593c
NT
151 * Aop : addition-level operator
152 * Mop : multiplication-level operator
153 * Eop : equality-testing operator
e5edeb50 154 * Rop : relational operator <= != gt
ffb4593c
NT
155 *
156 * Also see LOP and lop() below.
157 */
158
998054bd 159#ifdef DEBUGGING /* Serve -DT. */
f5bd084c 160# define REPORT(retval) tokereport((I32)retval)
998054bd 161#else
bbf60fe6 162# define REPORT(retval) (retval)
998054bd
SC
163#endif
164
bbf60fe6
DM
165#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
166#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
167#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
168#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
169#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
170#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
171#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
172#define LOOPX(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
173#define FTST(f) return (yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
174#define FUN0(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
175#define FUN1(f) return (yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
176#define BOop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
177#define BAop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
178#define SHop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
179#define PWop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
180#define PMop(f) return(yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
181#define Aop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
182#define Mop(f) return ao((yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
183#define Eop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
184#define Rop(f) return (yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 185
a687059c
LW
186/* This bit of chicanery makes a unary function followed by
187 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
188 * The UNIDOR macro is for unary functions that can be followed by the //
189 * operator (such as C<shift // 0>).
a687059c 190 */
376fcdbf
AL
191#define UNI2(f,x) { \
192 yylval.ival = f; \
193 PL_expect = x; \
194 PL_bufptr = s; \
195 PL_last_uni = PL_oldbufptr; \
196 PL_last_lop_op = f; \
197 if (*s == '(') \
198 return REPORT( (int)FUNC1 ); \
29595ff2 199 s = PEEKSPACE(s); \
376fcdbf
AL
200 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
201 }
6f33ba73
RGS
202#define UNI(f) UNI2(f,XTERM)
203#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 204
376fcdbf
AL
205#define UNIBRACK(f) { \
206 yylval.ival = f; \
207 PL_bufptr = s; \
208 PL_last_uni = PL_oldbufptr; \
209 if (*s == '(') \
210 return REPORT( (int)FUNC1 ); \
29595ff2 211 s = PEEKSPACE(s); \
376fcdbf
AL
212 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
213 }
79072805 214
9f68db38 215/* grandfather return to old style */
3280af22 216#define OLDLOP(f) return(yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 217
8fa7f367
JH
218#ifdef DEBUGGING
219
bbf60fe6
DM
220/* how to interpret the yylval associated with the token */
221enum token_type {
222 TOKENTYPE_NONE,
223 TOKENTYPE_IVAL,
224 TOKENTYPE_OPNUM, /* yylval.ival contains an opcode number */
225 TOKENTYPE_PVAL,
226 TOKENTYPE_OPVAL,
227 TOKENTYPE_GVVAL
228};
229
6d4a66ac
NC
230static struct debug_tokens {
231 const int token;
232 enum token_type type;
233 const char *name;
234} const debug_tokens[] =
9041c2e3 235{
bbf60fe6
DM
236 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
237 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
238 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
239 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
240 { ARROW, TOKENTYPE_NONE, "ARROW" },
241 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
242 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
243 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
244 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
245 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 246 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
247 { DO, TOKENTYPE_NONE, "DO" },
248 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
249 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
250 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
251 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
252 { ELSE, TOKENTYPE_NONE, "ELSE" },
253 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
254 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
255 { FOR, TOKENTYPE_IVAL, "FOR" },
256 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
257 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
258 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
259 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
260 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
261 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 262 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
263 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
264 { IF, TOKENTYPE_IVAL, "IF" },
265 { LABEL, TOKENTYPE_PVAL, "LABEL" },
266 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
267 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
268 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
269 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
270 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
271 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
272 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
273 { MY, TOKENTYPE_IVAL, "MY" },
274 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
275 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
276 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
277 { OROP, TOKENTYPE_IVAL, "OROP" },
278 { OROR, TOKENTYPE_NONE, "OROR" },
279 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
280 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
281 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
282 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
283 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
284 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
285 { PREINC, TOKENTYPE_NONE, "PREINC" },
286 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
287 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
288 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
289 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
290 { SUB, TOKENTYPE_NONE, "SUB" },
291 { THING, TOKENTYPE_OPVAL, "THING" },
292 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
293 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
294 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
295 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
296 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
297 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 298 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
299 { WHILE, TOKENTYPE_IVAL, "WHILE" },
300 { WORD, TOKENTYPE_OPVAL, "WORD" },
c35e046a 301 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
302};
303
304/* dump the returned token in rv, plus any optional arg in yylval */
998054bd 305
bbf60fe6 306STATIC int
f5bd084c 307S_tokereport(pTHX_ I32 rv)
bbf60fe6 308{
97aff369 309 dVAR;
bbf60fe6 310 if (DEBUG_T_TEST) {
bd61b366 311 const char *name = NULL;
bbf60fe6 312 enum token_type type = TOKENTYPE_NONE;
f54cb97a 313 const struct debug_tokens *p;
396482e1 314 SV* const report = newSVpvs("<== ");
bbf60fe6 315
f54cb97a 316 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
317 if (p->token == (int)rv) {
318 name = p->name;
319 type = p->type;
320 break;
321 }
322 }
323 if (name)
54667de8 324 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
325 else if ((char)rv > ' ' && (char)rv < '~')
326 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
327 else if (!rv)
396482e1 328 sv_catpvs(report, "EOF");
bbf60fe6
DM
329 else
330 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
331 switch (type) {
332 case TOKENTYPE_NONE:
333 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
334 break;
335 case TOKENTYPE_IVAL:
e4584336 336 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)yylval.ival);
bbf60fe6
DM
337 break;
338 case TOKENTYPE_OPNUM:
339 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
340 PL_op_name[yylval.ival]);
341 break;
342 case TOKENTYPE_PVAL:
343 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", yylval.pval);
344 break;
345 case TOKENTYPE_OPVAL:
b6007c36 346 if (yylval.opval) {
401441c0 347 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
bbf60fe6 348 PL_op_name[yylval.opval->op_type]);
b6007c36
DM
349 if (yylval.opval->op_type == OP_CONST) {
350 Perl_sv_catpvf(aTHX_ report, " %s",
351 SvPEEK(cSVOPx_sv(yylval.opval)));
352 }
353
354 }
401441c0 355 else
396482e1 356 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
357 break;
358 }
b6007c36 359 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
360 };
361 return (int)rv;
998054bd
SC
362}
363
b6007c36
DM
364
365/* print the buffer with suitable escapes */
366
367STATIC void
368S_printbuf(pTHX_ const char* fmt, const char* s)
369{
396482e1 370 SV* const tmp = newSVpvs("");
b6007c36
DM
371 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
372 SvREFCNT_dec(tmp);
373}
374
8fa7f367
JH
375#endif
376
ffb4593c
NT
377/*
378 * S_ao
379 *
c963b151
BD
380 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
381 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
382 */
383
76e3520e 384STATIC int
cea2e8a9 385S_ao(pTHX_ int toketype)
a0d0e21e 386{
97aff369 387 dVAR;
3280af22
NIS
388 if (*PL_bufptr == '=') {
389 PL_bufptr++;
a0d0e21e
LW
390 if (toketype == ANDAND)
391 yylval.ival = OP_ANDASSIGN;
392 else if (toketype == OROR)
393 yylval.ival = OP_ORASSIGN;
c963b151
BD
394 else if (toketype == DORDOR)
395 yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
396 toketype = ASSIGNOP;
397 }
398 return toketype;
399}
400
ffb4593c
NT
401/*
402 * S_no_op
403 * When Perl expects an operator and finds something else, no_op
404 * prints the warning. It always prints "<something> found where
405 * operator expected. It prints "Missing semicolon on previous line?"
406 * if the surprise occurs at the start of the line. "do you need to
407 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
408 * where the compiler doesn't know if foo is a method call or a function.
409 * It prints "Missing operator before end of line" if there's nothing
410 * after the missing operator, or "... before <...>" if there is something
411 * after the missing operator.
412 */
413
76e3520e 414STATIC void
bfed75c6 415S_no_op(pTHX_ const char *what, char *s)
463ee0b2 416{
97aff369 417 dVAR;
9d4ba2ae
AL
418 char * const oldbp = PL_bufptr;
419 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 420
1189a94a
GS
421 if (!s)
422 s = oldbp;
07c798fb 423 else
1189a94a 424 PL_bufptr = s;
cea2e8a9 425 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
426 if (ckWARN_d(WARN_SYNTAX)) {
427 if (is_first)
428 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
429 "\t(Missing semicolon on previous line?)\n");
430 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 431 const char *t;
c35e046a
AL
432 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
433 NOOP;
56da5a46
RGS
434 if (t < PL_bufptr && isSPACE(*t))
435 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
436 "\t(Do you need to predeclare %.*s?)\n",
551405c4 437 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
438 }
439 else {
440 assert(s >= oldbp);
441 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 442 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 443 }
07c798fb 444 }
3280af22 445 PL_bufptr = oldbp;
8990e307
LW
446}
447
ffb4593c
NT
448/*
449 * S_missingterm
450 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 451 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
452 * If we're in a delimited string and the delimiter is a control
453 * character, it's reformatted into a two-char sequence like ^C.
454 * This is fatal.
455 */
456
76e3520e 457STATIC void
cea2e8a9 458S_missingterm(pTHX_ char *s)
8990e307 459{
97aff369 460 dVAR;
8990e307
LW
461 char tmpbuf[3];
462 char q;
463 if (s) {
9d4ba2ae 464 char * const nl = strrchr(s,'\n');
d2719217 465 if (nl)
8990e307
LW
466 *nl = '\0';
467 }
9d116dd7
JH
468 else if (
469#ifdef EBCDIC
470 iscntrl(PL_multi_close)
471#else
472 PL_multi_close < 32 || PL_multi_close == 127
473#endif
474 ) {
8990e307 475 *tmpbuf = '^';
585ec06d 476 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
477 tmpbuf[2] = '\0';
478 s = tmpbuf;
479 }
480 else {
eb160463 481 *tmpbuf = (char)PL_multi_close;
8990e307
LW
482 tmpbuf[1] = '\0';
483 s = tmpbuf;
484 }
485 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 486 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 487}
79072805 488
ef89dcc3 489#define FEATURE_IS_ENABLED(name) \
0d863452 490 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 491 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
0d863452
RH
492/*
493 * S_feature_is_enabled
494 * Check whether the named feature is enabled.
495 */
496STATIC bool
d4c19fe8 497S_feature_is_enabled(pTHX_ const char *name, STRLEN namelen)
0d863452 498{
97aff369 499 dVAR;
0d863452 500 HV * const hinthv = GvHV(PL_hintgv);
7b9ef140 501 char he_name[32] = "feature_";
6fca0082 502 (void) my_strlcpy(&he_name[8], name, 24);
d4c19fe8 503
7b9ef140 504 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
505}
506
ffb4593c
NT
507/*
508 * Perl_deprecate
ffb4593c
NT
509 */
510
79072805 511void
bfed75c6 512Perl_deprecate(pTHX_ const char *s)
a0d0e21e 513{
599cee73 514 if (ckWARN(WARN_DEPRECATED))
9014280d 515 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), "Use of %s is deprecated", s);
a0d0e21e
LW
516}
517
12bcd1a6 518void
bfed75c6 519Perl_deprecate_old(pTHX_ const char *s)
12bcd1a6
PM
520{
521 /* This function should NOT be called for any new deprecated warnings */
522 /* Use Perl_deprecate instead */
523 /* */
524 /* It is here to maintain backward compatibility with the pre-5.8 */
525 /* warnings category hierarchy. The "deprecated" category used to */
526 /* live under the "syntax" category. It is now a top-level category */
527 /* in its own right. */
528
529 if (ckWARN2(WARN_DEPRECATED, WARN_SYNTAX))
bfed75c6 530 Perl_warner(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
12bcd1a6
PM
531 "Use of %s is deprecated", s);
532}
533
ffb4593c 534/*
9cbb5ea2
GS
535 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
536 * utf16-to-utf8-reversed.
ffb4593c
NT
537 */
538
c39cd008
GS
539#ifdef PERL_CR_FILTER
540static void
541strip_return(SV *sv)
542{
95a20fc0 543 register const char *s = SvPVX_const(sv);
9d4ba2ae 544 register const char * const e = s + SvCUR(sv);
c39cd008
GS
545 /* outer loop optimized to do nothing if there are no CR-LFs */
546 while (s < e) {
547 if (*s++ == '\r' && *s == '\n') {
548 /* hit a CR-LF, need to copy the rest */
549 register char *d = s - 1;
550 *d++ = *s++;
551 while (s < e) {
552 if (*s == '\r' && s[1] == '\n')
553 s++;
554 *d++ = *s++;
555 }
556 SvCUR(sv) -= s - d;
557 return;
558 }
559 }
560}
a868473f 561
76e3520e 562STATIC I32
c39cd008 563S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 564{
f54cb97a 565 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
566 if (count > 0 && !maxlen)
567 strip_return(sv);
568 return count;
a868473f
NIS
569}
570#endif
571
ffb4593c
NT
572/*
573 * Perl_lex_start
9cbb5ea2
GS
574 * Initialize variables. Uses the Perl save_stack to save its state (for
575 * recursive calls to the parser).
ffb4593c
NT
576 */
577
a0d0e21e 578void
864dbfa3 579Perl_lex_start(pTHX_ SV *line)
79072805 580{
97aff369 581 dVAR;
cfd0369c 582 const char *s;
8990e307
LW
583 STRLEN len;
584
3280af22
NIS
585 SAVEI32(PL_lex_dojoin);
586 SAVEI32(PL_lex_brackets);
3280af22
NIS
587 SAVEI32(PL_lex_casemods);
588 SAVEI32(PL_lex_starts);
589 SAVEI32(PL_lex_state);
7766f137 590 SAVEVPTR(PL_lex_inpat);
3280af22 591 SAVEI32(PL_lex_inwhat);
5db06880
NC
592#ifdef PERL_MAD
593 if (PL_lex_state == LEX_KNOWNEXT) {
594 I32 toke = PL_lasttoke;
595 while (--toke >= 0) {
596 SAVEI32(PL_nexttoke[toke].next_type);
597 SAVEVPTR(PL_nexttoke[toke].next_val);
598 if (PL_madskills)
599 SAVEVPTR(PL_nexttoke[toke].next_mad);
600 }
601 SAVEI32(PL_lasttoke);
602 }
603 if (PL_madskills) {
cd81e915
NC
604 SAVESPTR(PL_thistoken);
605 SAVESPTR(PL_thiswhite);
606 SAVESPTR(PL_nextwhite);
607 SAVESPTR(PL_thisopen);
608 SAVESPTR(PL_thisclose);
609 SAVESPTR(PL_thisstuff);
610 SAVEVPTR(PL_thismad);
611 SAVEI32(PL_realtokenstart);
612 SAVEI32(PL_faketokens);
613 }
614 SAVEI32(PL_curforce);
5db06880 615#else
18b09519
GS
616 if (PL_lex_state == LEX_KNOWNEXT) {
617 I32 toke = PL_nexttoke;
618 while (--toke >= 0) {
619 SAVEI32(PL_nexttype[toke]);
620 SAVEVPTR(PL_nextval[toke]);
621 }
622 SAVEI32(PL_nexttoke);
18b09519 623 }
5db06880 624#endif
57843af0 625 SAVECOPLINE(PL_curcop);
3280af22
NIS
626 SAVEPPTR(PL_bufptr);
627 SAVEPPTR(PL_bufend);
628 SAVEPPTR(PL_oldbufptr);
629 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
630 SAVEPPTR(PL_last_lop);
631 SAVEPPTR(PL_last_uni);
3280af22
NIS
632 SAVEPPTR(PL_linestart);
633 SAVESPTR(PL_linestr);
8edd5f42
RGS
634 SAVEGENERICPV(PL_lex_brackstack);
635 SAVEGENERICPV(PL_lex_casestack);
c76ac1ee 636 SAVEDESTRUCTOR_X(restore_rsfp, PL_rsfp);
3280af22
NIS
637 SAVESPTR(PL_lex_stuff);
638 SAVEI32(PL_lex_defer);
09bef843 639 SAVEI32(PL_sublex_info.sub_inwhat);
3280af22 640 SAVESPTR(PL_lex_repl);
bebdddfc
GS
641 SAVEINT(PL_expect);
642 SAVEINT(PL_lex_expect);
3280af22
NIS
643
644 PL_lex_state = LEX_NORMAL;
645 PL_lex_defer = 0;
646 PL_expect = XSTATE;
647 PL_lex_brackets = 0;
a02a5408
JC
648 Newx(PL_lex_brackstack, 120, char);
649 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
650 PL_lex_casemods = 0;
651 *PL_lex_casestack = '\0';
652 PL_lex_dojoin = 0;
653 PL_lex_starts = 0;
a0714e2c
SS
654 PL_lex_stuff = NULL;
655 PL_lex_repl = NULL;
3280af22 656 PL_lex_inpat = 0;
5db06880
NC
657#ifdef PERL_MAD
658 PL_lasttoke = 0;
659#else
76be56bc 660 PL_nexttoke = 0;
5db06880 661#endif
3280af22 662 PL_lex_inwhat = 0;
09bef843 663 PL_sublex_info.sub_inwhat = 0;
3280af22
NIS
664 PL_linestr = line;
665 if (SvREADONLY(PL_linestr))
666 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
cfd0369c 667 s = SvPV_const(PL_linestr, len);
6f27f9a7 668 if (!len || s[len-1] != ';') {
3280af22
NIS
669 if (!(SvFLAGS(PL_linestr) & SVs_TEMP))
670 PL_linestr = sv_2mortal(newSVsv(PL_linestr));
396482e1 671 sv_catpvs(PL_linestr, "\n;");
8990e307 672 }
3280af22
NIS
673 SvTEMP_off(PL_linestr);
674 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = SvPVX(PL_linestr);
675 PL_bufend = PL_bufptr + SvCUR(PL_linestr);
bd61b366 676 PL_last_lop = PL_last_uni = NULL;
3280af22 677 PL_rsfp = 0;
79072805 678}
a687059c 679
ffb4593c
NT
680/*
681 * Perl_lex_end
9cbb5ea2
GS
682 * Finalizer for lexing operations. Must be called when the parser is
683 * done with the lexer.
ffb4593c
NT
684 */
685
463ee0b2 686void
864dbfa3 687Perl_lex_end(pTHX)
463ee0b2 688{
97aff369 689 dVAR;
3280af22 690 PL_doextract = FALSE;
463ee0b2
LW
691}
692
ffb4593c
NT
693/*
694 * S_incline
695 * This subroutine has nothing to do with tilting, whether at windmills
696 * or pinball tables. Its name is short for "increment line". It
57843af0 697 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 698 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
699 * # line 500 "foo.pm"
700 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
701 */
702
76e3520e 703STATIC void
cea2e8a9 704S_incline(pTHX_ char *s)
463ee0b2 705{
97aff369 706 dVAR;
463ee0b2
LW
707 char *t;
708 char *n;
73659bf1 709 char *e;
463ee0b2 710 char ch;
463ee0b2 711
57843af0 712 CopLINE_inc(PL_curcop);
463ee0b2
LW
713 if (*s++ != '#')
714 return;
d4c19fe8
AL
715 while (SPACE_OR_TAB(*s))
716 s++;
73659bf1
GS
717 if (strnEQ(s, "line", 4))
718 s += 4;
719 else
720 return;
084592ab 721 if (SPACE_OR_TAB(*s))
73659bf1 722 s++;
4e553d73 723 else
73659bf1 724 return;
d4c19fe8
AL
725 while (SPACE_OR_TAB(*s))
726 s++;
463ee0b2
LW
727 if (!isDIGIT(*s))
728 return;
d4c19fe8 729
463ee0b2
LW
730 n = s;
731 while (isDIGIT(*s))
732 s++;
bf4acbe4 733 while (SPACE_OR_TAB(*s))
463ee0b2 734 s++;
73659bf1 735 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 736 s++;
73659bf1
GS
737 e = t + 1;
738 }
463ee0b2 739 else {
c35e046a
AL
740 t = s;
741 while (!isSPACE(*t))
742 t++;
73659bf1 743 e = t;
463ee0b2 744 }
bf4acbe4 745 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
746 e++;
747 if (*e != '\n' && *e != '\0')
748 return; /* false alarm */
749
463ee0b2
LW
750 ch = *t;
751 *t = '\0';
f4dd75d9 752 if (t - s > 0) {
8a5ee598 753#ifndef USE_ITHREADS
c4420975 754 const char * const cf = CopFILE(PL_curcop);
42d9b98d
NC
755 STRLEN tmplen = cf ? strlen(cf) : 0;
756 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
757 /* must copy *{"::_<(eval N)[oldfilename:L]"}
758 * to *{"::_<newfilename"} */
759 char smallbuf[256], smallbuf2[256];
760 char *tmpbuf, *tmpbuf2;
8a5ee598 761 GV **gvp, *gv2;
e66cf94c
RGS
762 STRLEN tmplen2 = strlen(s);
763 if (tmplen + 3 < sizeof smallbuf)
764 tmpbuf = smallbuf;
765 else
766 Newx(tmpbuf, tmplen + 3, char);
767 if (tmplen2 + 3 < sizeof smallbuf2)
768 tmpbuf2 = smallbuf2;
769 else
770 Newx(tmpbuf2, tmplen2 + 3, char);
771 tmpbuf[0] = tmpbuf2[0] = '_';
772 tmpbuf[1] = tmpbuf2[1] = '<';
773 memcpy(tmpbuf + 2, cf, ++tmplen);
774 memcpy(tmpbuf2 + 2, s, ++tmplen2);
775 ++tmplen; ++tmplen2;
8a5ee598
RGS
776 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
777 if (gvp) {
778 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
779 if (!isGV(gv2))
780 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
781 /* adjust ${"::_<newfilename"} to store the new file name */
782 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
783 GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
784 GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
785 }
e66cf94c
RGS
786 if (tmpbuf != smallbuf) Safefree(tmpbuf);
787 if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
788 }
8a5ee598 789#endif
05ec9bb3 790 CopFILE_free(PL_curcop);
57843af0 791 CopFILE_set(PL_curcop, s);
f4dd75d9 792 }
463ee0b2 793 *t = ch;
57843af0 794 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
795}
796
29595ff2 797#ifdef PERL_MAD
cd81e915 798/* skip space before PL_thistoken */
29595ff2
NC
799
800STATIC char *
801S_skipspace0(pTHX_ register char *s)
802{
803 s = skipspace(s);
804 if (!PL_madskills)
805 return s;
cd81e915
NC
806 if (PL_skipwhite) {
807 if (!PL_thiswhite)
808 PL_thiswhite = newSVpvn("",0);
809 sv_catsv(PL_thiswhite, PL_skipwhite);
810 sv_free(PL_skipwhite);
811 PL_skipwhite = 0;
812 }
813 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
814 return s;
815}
816
cd81e915 817/* skip space after PL_thistoken */
29595ff2
NC
818
819STATIC char *
820S_skipspace1(pTHX_ register char *s)
821{
d4c19fe8 822 const char *start = s;
29595ff2
NC
823 I32 startoff = start - SvPVX(PL_linestr);
824
825 s = skipspace(s);
826 if (!PL_madskills)
827 return s;
828 start = SvPVX(PL_linestr) + startoff;
cd81e915 829 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 830 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
831 PL_thistoken = newSVpvn(tstart, start - tstart);
832 }
833 PL_realtokenstart = -1;
834 if (PL_skipwhite) {
835 if (!PL_nextwhite)
836 PL_nextwhite = newSVpvn("",0);
837 sv_catsv(PL_nextwhite, PL_skipwhite);
838 sv_free(PL_skipwhite);
839 PL_skipwhite = 0;
29595ff2
NC
840 }
841 return s;
842}
843
844STATIC char *
845S_skipspace2(pTHX_ register char *s, SV **svp)
846{
c35e046a
AL
847 char *start;
848 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
849 const I32 startoff = s - SvPVX(PL_linestr);
850
29595ff2
NC
851 s = skipspace(s);
852 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
853 if (!PL_madskills || !svp)
854 return s;
855 start = SvPVX(PL_linestr) + startoff;
cd81e915 856 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 857 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
858 PL_thistoken = newSVpvn(tstart, start - tstart);
859 PL_realtokenstart = -1;
29595ff2 860 }
cd81e915 861 if (PL_skipwhite) {
29595ff2
NC
862 if (!*svp)
863 *svp = newSVpvn("",0);
cd81e915
NC
864 sv_setsv(*svp, PL_skipwhite);
865 sv_free(PL_skipwhite);
866 PL_skipwhite = 0;
29595ff2
NC
867 }
868
869 return s;
870}
871#endif
872
ffb4593c
NT
873/*
874 * S_skipspace
875 * Called to gobble the appropriate amount and type of whitespace.
876 * Skips comments as well.
877 */
878
76e3520e 879STATIC char *
cea2e8a9 880S_skipspace(pTHX_ register char *s)
a687059c 881{
97aff369 882 dVAR;
5db06880
NC
883#ifdef PERL_MAD
884 int curoff;
885 int startoff = s - SvPVX(PL_linestr);
886
cd81e915
NC
887 if (PL_skipwhite) {
888 sv_free(PL_skipwhite);
889 PL_skipwhite = 0;
5db06880
NC
890 }
891#endif
892
3280af22 893 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 894 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 895 s++;
5db06880
NC
896#ifdef PERL_MAD
897 goto done;
898#else
463ee0b2 899 return s;
5db06880 900#endif
463ee0b2
LW
901 }
902 for (;;) {
fd049845 903 STRLEN prevlen;
09bef843 904 SSize_t oldprevlen, oldoldprevlen;
9c5ffd7c 905 SSize_t oldloplen = 0, oldunilen = 0;
60e6418e
GS
906 while (s < PL_bufend && isSPACE(*s)) {
907 if (*s++ == '\n' && PL_in_eval && !PL_rsfp)
908 incline(s);
909 }
ffb4593c
NT
910
911 /* comment */
3280af22
NIS
912 if (s < PL_bufend && *s == '#') {
913 while (s < PL_bufend && *s != '\n')
463ee0b2 914 s++;
60e6418e 915 if (s < PL_bufend) {
463ee0b2 916 s++;
60e6418e
GS
917 if (PL_in_eval && !PL_rsfp) {
918 incline(s);
919 continue;
920 }
921 }
463ee0b2 922 }
ffb4593c
NT
923
924 /* only continue to recharge the buffer if we're at the end
925 * of the buffer, we're not reading from a source filter, and
926 * we're in normal lexing mode
927 */
09bef843
SB
928 if (s < PL_bufend || !PL_rsfp || PL_sublex_info.sub_inwhat ||
929 PL_lex_state == LEX_FORMLINE)
5db06880
NC
930#ifdef PERL_MAD
931 goto done;
932#else
463ee0b2 933 return s;
5db06880 934#endif
ffb4593c
NT
935
936 /* try to recharge the buffer */
5db06880
NC
937#ifdef PERL_MAD
938 curoff = s - SvPVX(PL_linestr);
939#endif
940
9cbb5ea2 941 if ((s = filter_gets(PL_linestr, PL_rsfp,
bd61b366 942 (prevlen = SvCUR(PL_linestr)))) == NULL)
9cbb5ea2 943 {
5db06880
NC
944#ifdef PERL_MAD
945 if (PL_madskills && curoff != startoff) {
cd81e915
NC
946 if (!PL_skipwhite)
947 PL_skipwhite = newSVpvn("",0);
948 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
949 curoff - startoff);
950 }
951
952 /* mustn't throw out old stuff yet if madpropping */
953 SvCUR(PL_linestr) = curoff;
954 s = SvPVX(PL_linestr) + curoff;
955 *s = 0;
956 if (curoff && s[-1] == '\n')
957 s[-1] = ' ';
958#endif
959
9cbb5ea2 960 /* end of file. Add on the -p or -n magic */
cd81e915 961 /* XXX these shouldn't really be added here, can't set PL_faketokens */
01a19ab0 962 if (PL_minus_p) {
5db06880
NC
963#ifdef PERL_MAD
964 sv_catpv(PL_linestr,
965 ";}continue{print or die qq(-p destination: $!\\n);}");
966#else
01a19ab0
NC
967 sv_setpv(PL_linestr,
968 ";}continue{print or die qq(-p destination: $!\\n);}");
5db06880 969#endif
3280af22 970 PL_minus_n = PL_minus_p = 0;
a0d0e21e 971 }
01a19ab0 972 else if (PL_minus_n) {
5db06880
NC
973#ifdef PERL_MAD
974 sv_catpvn(PL_linestr, ";}", 2);
975#else
01a19ab0 976 sv_setpvn(PL_linestr, ";}", 2);
5db06880 977#endif
01a19ab0
NC
978 PL_minus_n = 0;
979 }
a0d0e21e 980 else
5db06880
NC
981#ifdef PERL_MAD
982 sv_catpvn(PL_linestr,";", 1);
983#else
4147a61b 984 sv_setpvn(PL_linestr,";", 1);
5db06880 985#endif
ffb4593c
NT
986
987 /* reset variables for next time we lex */
9cbb5ea2 988 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
89122651
NC
989 = SvPVX(PL_linestr)
990#ifdef PERL_MAD
991 + curoff
992#endif
993 ;
3280af22 994 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 995 PL_last_lop = PL_last_uni = NULL;
ffb4593c
NT
996
997 /* Close the filehandle. Could be from -P preprocessor,
998 * STDIN, or a regular file. If we were reading code from
999 * STDIN (because the commandline held no -e or filename)
1000 * then we don't close it, we reset it so the code can
1001 * read from STDIN too.
1002 */
1003
3280af22
NIS
1004 if (PL_preprocess && !PL_in_eval)
1005 (void)PerlProc_pclose(PL_rsfp);
1006 else if ((PerlIO*)PL_rsfp == PerlIO_stdin())
1007 PerlIO_clearerr(PL_rsfp);
8990e307 1008 else
3280af22 1009 (void)PerlIO_close(PL_rsfp);
4608196e 1010 PL_rsfp = NULL;
463ee0b2
LW
1011 return s;
1012 }
ffb4593c
NT
1013
1014 /* not at end of file, so we only read another line */
09bef843
SB
1015 /* make corresponding updates to old pointers, for yyerror() */
1016 oldprevlen = PL_oldbufptr - PL_bufend;
1017 oldoldprevlen = PL_oldoldbufptr - PL_bufend;
1018 if (PL_last_uni)
1019 oldunilen = PL_last_uni - PL_bufend;
1020 if (PL_last_lop)
1021 oldloplen = PL_last_lop - PL_bufend;
3280af22
NIS
1022 PL_linestart = PL_bufptr = s + prevlen;
1023 PL_bufend = s + SvCUR(PL_linestr);
1024 s = PL_bufptr;
09bef843
SB
1025 PL_oldbufptr = s + oldprevlen;
1026 PL_oldoldbufptr = s + oldoldprevlen;
1027 if (PL_last_uni)
1028 PL_last_uni = s + oldunilen;
1029 if (PL_last_lop)
1030 PL_last_lop = s + oldloplen;
a0d0e21e 1031 incline(s);
ffb4593c
NT
1032
1033 /* debugger active and we're not compiling the debugger code,
1034 * so store the line into the debugger's array of lines
1035 */
3280af22 1036 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 1037 SV * const sv = newSV(0);
8990e307
LW
1038
1039 sv_upgrade(sv, SVt_PVMG);
3280af22 1040 sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
0ac0412a 1041 (void)SvIOK_on(sv);
45977657 1042 SvIV_set(sv, 0);
36c7798d 1043 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
8990e307 1044 }
463ee0b2 1045 }
5db06880
NC
1046
1047#ifdef PERL_MAD
1048 done:
1049 if (PL_madskills) {
cd81e915
NC
1050 if (!PL_skipwhite)
1051 PL_skipwhite = newSVpvn("",0);
5db06880
NC
1052 curoff = s - SvPVX(PL_linestr);
1053 if (curoff - startoff)
cd81e915 1054 sv_catpvn(PL_skipwhite, SvPVX(PL_linestr) + startoff,
5db06880
NC
1055 curoff - startoff);
1056 }
1057 return s;
1058#endif
a687059c 1059}
378cc40b 1060
ffb4593c
NT
1061/*
1062 * S_check_uni
1063 * Check the unary operators to ensure there's no ambiguity in how they're
1064 * used. An ambiguous piece of code would be:
1065 * rand + 5
1066 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1067 * the +5 is its argument.
1068 */
1069
76e3520e 1070STATIC void
cea2e8a9 1071S_check_uni(pTHX)
ba106d47 1072{
97aff369 1073 dVAR;
d4c19fe8
AL
1074 const char *s;
1075 const char *t;
2f3197b3 1076
3280af22 1077 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1078 return;
3280af22
NIS
1079 while (isSPACE(*PL_last_uni))
1080 PL_last_uni++;
c35e046a
AL
1081 s = PL_last_uni;
1082 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1083 s++;
3280af22 1084 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1085 return;
6136c704 1086
0453d815 1087 if (ckWARN_d(WARN_AMBIGUOUS)){
9014280d 1088 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
32d45c1d
NC
1089 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1090 (int)(s - PL_last_uni), PL_last_uni);
0453d815 1091 }
2f3197b3
LW
1092}
1093
ffb4593c
NT
1094/*
1095 * LOP : macro to build a list operator. Its behaviour has been replaced
1096 * with a subroutine, S_lop() for which LOP is just another name.
1097 */
1098
a0d0e21e
LW
1099#define LOP(f,x) return lop(f,x,s)
1100
ffb4593c
NT
1101/*
1102 * S_lop
1103 * Build a list operator (or something that might be one). The rules:
1104 * - if we have a next token, then it's a list operator [why?]
1105 * - if the next thing is an opening paren, then it's a function
1106 * - else it's a list operator
1107 */
1108
76e3520e 1109STATIC I32
a0be28da 1110S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1111{
97aff369 1112 dVAR;
79072805 1113 yylval.ival = f;
35c8bce7 1114 CLINE;
3280af22
NIS
1115 PL_expect = x;
1116 PL_bufptr = s;
1117 PL_last_lop = PL_oldbufptr;
eb160463 1118 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1119#ifdef PERL_MAD
1120 if (PL_lasttoke)
1121 return REPORT(LSTOP);
1122#else
3280af22 1123 if (PL_nexttoke)
bbf60fe6 1124 return REPORT(LSTOP);
5db06880 1125#endif
79072805 1126 if (*s == '(')
bbf60fe6 1127 return REPORT(FUNC);
29595ff2 1128 s = PEEKSPACE(s);
79072805 1129 if (*s == '(')
bbf60fe6 1130 return REPORT(FUNC);
79072805 1131 else
bbf60fe6 1132 return REPORT(LSTOP);
79072805
LW
1133}
1134
5db06880
NC
1135#ifdef PERL_MAD
1136 /*
1137 * S_start_force
1138 * Sets up for an eventual force_next(). start_force(0) basically does
1139 * an unshift, while start_force(-1) does a push. yylex removes items
1140 * on the "pop" end.
1141 */
1142
1143STATIC void
1144S_start_force(pTHX_ int where)
1145{
1146 int i;
1147
cd81e915 1148 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1149 where = PL_lasttoke;
cd81e915
NC
1150 assert(PL_curforce < 0 || PL_curforce == where);
1151 if (PL_curforce != where) {
5db06880
NC
1152 for (i = PL_lasttoke; i > where; --i) {
1153 PL_nexttoke[i] = PL_nexttoke[i-1];
1154 }
1155 PL_lasttoke++;
1156 }
cd81e915 1157 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1158 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1159 PL_curforce = where;
1160 if (PL_nextwhite) {
5db06880
NC
1161 if (PL_madskills)
1162 curmad('^', newSVpvn("",0));
cd81e915 1163 CURMAD('_', PL_nextwhite);
5db06880
NC
1164 }
1165}
1166
1167STATIC void
1168S_curmad(pTHX_ char slot, SV *sv)
1169{
1170 MADPROP **where;
1171
1172 if (!sv)
1173 return;
cd81e915
NC
1174 if (PL_curforce < 0)
1175 where = &PL_thismad;
5db06880 1176 else
cd81e915 1177 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1178
cd81e915 1179 if (PL_faketokens)
5db06880
NC
1180 sv_setpvn(sv, "", 0);
1181 else {
1182 if (!IN_BYTES) {
1183 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1184 SvUTF8_on(sv);
1185 else if (PL_encoding) {
1186 sv_recode_to_utf8(sv, PL_encoding);
1187 }
1188 }
1189 }
1190
1191 /* keep a slot open for the head of the list? */
1192 if (slot != '_' && *where && (*where)->mad_key == '^') {
1193 (*where)->mad_key = slot;
1194 sv_free((*where)->mad_val);
1195 (*where)->mad_val = (void*)sv;
1196 }
1197 else
1198 addmad(newMADsv(slot, sv), where, 0);
1199}
1200#else
b3f24c00
MHM
1201# define start_force(where) NOOP
1202# define curmad(slot, sv) NOOP
5db06880
NC
1203#endif
1204
ffb4593c
NT
1205/*
1206 * S_force_next
9cbb5ea2 1207 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1208 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1209 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1210 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1211 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1212 */
1213
4e553d73 1214STATIC void
cea2e8a9 1215S_force_next(pTHX_ I32 type)
79072805 1216{
97aff369 1217 dVAR;
5db06880 1218#ifdef PERL_MAD
cd81e915 1219 if (PL_curforce < 0)
5db06880 1220 start_force(PL_lasttoke);
cd81e915 1221 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1222 if (PL_lex_state != LEX_KNOWNEXT)
1223 PL_lex_defer = PL_lex_state;
1224 PL_lex_state = LEX_KNOWNEXT;
1225 PL_lex_expect = PL_expect;
cd81e915 1226 PL_curforce = -1;
5db06880 1227#else
3280af22
NIS
1228 PL_nexttype[PL_nexttoke] = type;
1229 PL_nexttoke++;
1230 if (PL_lex_state != LEX_KNOWNEXT) {
1231 PL_lex_defer = PL_lex_state;
1232 PL_lex_expect = PL_expect;
1233 PL_lex_state = LEX_KNOWNEXT;
79072805 1234 }
5db06880 1235#endif
79072805
LW
1236}
1237
d0a148a6
NC
1238STATIC SV *
1239S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
1240{
97aff369 1241 dVAR;
9d4ba2ae 1242 SV * const sv = newSVpvn(start,len);
bfed75c6 1243 if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
d0a148a6
NC
1244 SvUTF8_on(sv);
1245 return sv;
1246}
1247
ffb4593c
NT
1248/*
1249 * S_force_word
1250 * When the lexer knows the next thing is a word (for instance, it has
1251 * just seen -> and it knows that the next char is a word char, then
1252 * it calls S_force_word to stick the next word into the PL_next lookahead.
1253 *
1254 * Arguments:
b1b65b59 1255 * char *start : buffer position (must be within PL_linestr)
ffb4593c
NT
1256 * int token : PL_next will be this type of bare word (e.g., METHOD,WORD)
1257 * int check_keyword : if true, Perl checks to make sure the word isn't
1258 * a keyword (do this if the word is a label, e.g. goto FOO)
1259 * int allow_pack : if true, : characters will also be allowed (require,
1260 * use, etc. do this)
9cbb5ea2 1261 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1262 */
1263
76e3520e 1264STATIC char *
cea2e8a9 1265S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1266{
97aff369 1267 dVAR;
463ee0b2
LW
1268 register char *s;
1269 STRLEN len;
4e553d73 1270
29595ff2 1271 start = SKIPSPACE1(start);
463ee0b2 1272 s = start;
7e2040f0 1273 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1274 (allow_pack && *s == ':') ||
15f0808c 1275 (allow_initial_tick && *s == '\'') )
a0d0e21e 1276 {
3280af22
NIS
1277 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
1278 if (check_keyword && keyword(PL_tokenbuf, len))
463ee0b2 1279 return start;
cd81e915 1280 start_force(PL_curforce);
5db06880
NC
1281 if (PL_madskills)
1282 curmad('X', newSVpvn(start,s-start));
463ee0b2 1283 if (token == METHOD) {
29595ff2 1284 s = SKIPSPACE1(s);
463ee0b2 1285 if (*s == '(')
3280af22 1286 PL_expect = XTERM;
463ee0b2 1287 else {
3280af22 1288 PL_expect = XOPERATOR;
463ee0b2 1289 }
79072805 1290 }
9ded7720 1291 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1292 = (OP*)newSVOP(OP_CONST,0,
1293 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1294 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1295 force_next(token);
1296 }
1297 return s;
1298}
1299
ffb4593c
NT
1300/*
1301 * S_force_ident
9cbb5ea2 1302 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1303 * text only contains the "foo" portion. The first argument is a pointer
1304 * to the "foo", and the second argument is the type symbol to prefix.
1305 * Forces the next token to be a "WORD".
9cbb5ea2 1306 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
1307 */
1308
76e3520e 1309STATIC void
bfed75c6 1310S_force_ident(pTHX_ register const char *s, int kind)
79072805 1311{
97aff369 1312 dVAR;
c35e046a 1313 if (*s) {
90e5519e
NC
1314 const STRLEN len = strlen(s);
1315 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 1316 start_force(PL_curforce);
9ded7720 1317 NEXTVAL_NEXTTOKE.opval = o;
79072805 1318 force_next(WORD);
748a9306 1319 if (kind) {
11343788 1320 o->op_private = OPpCONST_ENTERED;
55497cff 1321 /* XXX see note in pp_entereval() for why we forgo typo
1322 warnings if the symbol must be introduced in an eval.
1323 GSAR 96-10-12 */
90e5519e
NC
1324 gv_fetchpvn_flags(s, len,
1325 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
1326 : GV_ADD,
1327 kind == '$' ? SVt_PV :
1328 kind == '@' ? SVt_PVAV :
1329 kind == '%' ? SVt_PVHV :
a0d0e21e 1330 SVt_PVGV
90e5519e 1331 );
748a9306 1332 }
79072805
LW
1333 }
1334}
1335
1571675a
GS
1336NV
1337Perl_str_to_version(pTHX_ SV *sv)
1338{
1339 NV retval = 0.0;
1340 NV nshift = 1.0;
1341 STRLEN len;
cfd0369c 1342 const char *start = SvPV_const(sv,len);
9d4ba2ae 1343 const char * const end = start + len;
504618e9 1344 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
1571675a 1345 while (start < end) {
ba210ebe 1346 STRLEN skip;
1571675a
GS
1347 UV n;
1348 if (utf)
9041c2e3 1349 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
1350 else {
1351 n = *(U8*)start;
1352 skip = 1;
1353 }
1354 retval += ((NV)n)/nshift;
1355 start += skip;
1356 nshift *= 1000;
1357 }
1358 return retval;
1359}
1360
4e553d73 1361/*
ffb4593c
NT
1362 * S_force_version
1363 * Forces the next token to be a version number.
e759cc13
RGS
1364 * If the next token appears to be an invalid version number, (e.g. "v2b"),
1365 * and if "guessing" is TRUE, then no new token is created (and the caller
1366 * must use an alternative parsing method).
ffb4593c
NT
1367 */
1368
76e3520e 1369STATIC char *
e759cc13 1370S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 1371{
97aff369 1372 dVAR;
5f66b61c 1373 OP *version = NULL;
44dcb63b 1374 char *d;
5db06880
NC
1375#ifdef PERL_MAD
1376 I32 startoff = s - SvPVX(PL_linestr);
1377#endif
89bfa8cd 1378
29595ff2 1379 s = SKIPSPACE1(s);
89bfa8cd 1380
44dcb63b 1381 d = s;
dd629d5b 1382 if (*d == 'v')
44dcb63b 1383 d++;
44dcb63b 1384 if (isDIGIT(*d)) {
e759cc13
RGS
1385 while (isDIGIT(*d) || *d == '_' || *d == '.')
1386 d++;
5db06880
NC
1387#ifdef PERL_MAD
1388 if (PL_madskills) {
cd81e915 1389 start_force(PL_curforce);
5db06880
NC
1390 curmad('X', newSVpvn(s,d-s));
1391 }
1392#endif
9f3d182e 1393 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 1394 SV *ver;
b73d6f50 1395 s = scan_num(s, &yylval);
89bfa8cd 1396 version = yylval.opval;
dd629d5b
GS
1397 ver = cSVOPx(version)->op_sv;
1398 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 1399 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 1400 SvNV_set(ver, str_to_version(ver));
1571675a 1401 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 1402 }
89bfa8cd 1403 }
5db06880
NC
1404 else if (guessing) {
1405#ifdef PERL_MAD
1406 if (PL_madskills) {
cd81e915
NC
1407 sv_free(PL_nextwhite); /* let next token collect whitespace */
1408 PL_nextwhite = 0;
5db06880
NC
1409 s = SvPVX(PL_linestr) + startoff;
1410 }
1411#endif
e759cc13 1412 return s;
5db06880 1413 }
89bfa8cd 1414 }
1415
5db06880
NC
1416#ifdef PERL_MAD
1417 if (PL_madskills && !version) {
cd81e915
NC
1418 sv_free(PL_nextwhite); /* let next token collect whitespace */
1419 PL_nextwhite = 0;
5db06880
NC
1420 s = SvPVX(PL_linestr) + startoff;
1421 }
1422#endif
89bfa8cd 1423 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 1424 start_force(PL_curforce);
9ded7720 1425 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 1426 force_next(WORD);
89bfa8cd 1427
e759cc13 1428 return s;
89bfa8cd 1429}
1430
ffb4593c
NT
1431/*
1432 * S_tokeq
1433 * Tokenize a quoted string passed in as an SV. It finds the next
1434 * chunk, up to end of string or a backslash. It may make a new
1435 * SV containing that chunk (if HINT_NEW_STRING is on). It also
1436 * turns \\ into \.
1437 */
1438
76e3520e 1439STATIC SV *
cea2e8a9 1440S_tokeq(pTHX_ SV *sv)
79072805 1441{
97aff369 1442 dVAR;
79072805
LW
1443 register char *s;
1444 register char *send;
1445 register char *d;
b3ac6de7
IZ
1446 STRLEN len = 0;
1447 SV *pv = sv;
79072805
LW
1448
1449 if (!SvLEN(sv))
b3ac6de7 1450 goto finish;
79072805 1451
a0d0e21e 1452 s = SvPV_force(sv, len);
21a311ee 1453 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 1454 goto finish;
463ee0b2 1455 send = s + len;
79072805
LW
1456 while (s < send && *s != '\\')
1457 s++;
1458 if (s == send)
b3ac6de7 1459 goto finish;
79072805 1460 d = s;
be4731d2 1461 if ( PL_hints & HINT_NEW_STRING ) {
95a20fc0 1462 pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
be4731d2
NIS
1463 if (SvUTF8(sv))
1464 SvUTF8_on(pv);
1465 }
79072805
LW
1466 while (s < send) {
1467 if (*s == '\\') {
a0d0e21e 1468 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
1469 s++; /* all that, just for this */
1470 }
1471 *d++ = *s++;
1472 }
1473 *d = '\0';
95a20fc0 1474 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 1475 finish:
3280af22 1476 if ( PL_hints & HINT_NEW_STRING )
b3ac6de7 1477 return new_constant(NULL, 0, "q", sv, pv, "q");
79072805
LW
1478 return sv;
1479}
1480
ffb4593c
NT
1481/*
1482 * Now come three functions related to double-quote context,
1483 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
1484 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
1485 * interact with PL_lex_state, and create fake ( ... ) argument lists
1486 * to handle functions and concatenation.
1487 * They assume that whoever calls them will be setting up a fake
1488 * join call, because each subthing puts a ',' after it. This lets
1489 * "lower \luPpEr"
1490 * become
1491 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
1492 *
1493 * (I'm not sure whether the spurious commas at the end of lcfirst's
1494 * arguments and join's arguments are created or not).
1495 */
1496
1497/*
1498 * S_sublex_start
1499 * Assumes that yylval.ival is the op we're creating (e.g. OP_LCFIRST).
1500 *
1501 * Pattern matching will set PL_lex_op to the pattern-matching op to
1502 * make (we return THING if yylval.ival is OP_NULL, PMFUNC otherwise).
1503 *
1504 * OP_CONST and OP_READLINE are easy--just make the new op and return.
1505 *
1506 * Everything else becomes a FUNC.
1507 *
1508 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
1509 * had an OP_CONST or OP_READLINE). This just sets us up for a
1510 * call to S_sublex_push().
1511 */
1512
76e3520e 1513STATIC I32
cea2e8a9 1514S_sublex_start(pTHX)
79072805 1515{
97aff369 1516 dVAR;
0d46e09a 1517 register const I32 op_type = yylval.ival;
79072805
LW
1518
1519 if (op_type == OP_NULL) {
3280af22 1520 yylval.opval = PL_lex_op;
5f66b61c 1521 PL_lex_op = NULL;
79072805
LW
1522 return THING;
1523 }
1524 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 1525 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
1526
1527 if (SvTYPE(sv) == SVt_PVIV) {
1528 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
1529 STRLEN len;
96a5add6 1530 const char * const p = SvPV_const(sv, len);
f54cb97a 1531 SV * const nsv = newSVpvn(p, len);
01ec43d0
GS
1532 if (SvUTF8(sv))
1533 SvUTF8_on(nsv);
b3ac6de7
IZ
1534 SvREFCNT_dec(sv);
1535 sv = nsv;
4e553d73 1536 }
b3ac6de7 1537 yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 1538 PL_lex_stuff = NULL;
6f33ba73
RGS
1539 /* Allow <FH> // "foo" */
1540 if (op_type == OP_READLINE)
1541 PL_expect = XTERMORDORDOR;
79072805
LW
1542 return THING;
1543 }
1544
3280af22
NIS
1545 PL_sublex_info.super_state = PL_lex_state;
1546 PL_sublex_info.sub_inwhat = op_type;
1547 PL_sublex_info.sub_op = PL_lex_op;
1548 PL_lex_state = LEX_INTERPPUSH;
55497cff 1549
3280af22
NIS
1550 PL_expect = XTERM;
1551 if (PL_lex_op) {
1552 yylval.opval = PL_lex_op;
5f66b61c 1553 PL_lex_op = NULL;
55497cff 1554 return PMFUNC;
1555 }
1556 else
1557 return FUNC;
1558}
1559
ffb4593c
NT
1560/*
1561 * S_sublex_push
1562 * Create a new scope to save the lexing state. The scope will be
1563 * ended in S_sublex_done. Returns a '(', starting the function arguments
1564 * to the uc, lc, etc. found before.
1565 * Sets PL_lex_state to LEX_INTERPCONCAT.
1566 */
1567
76e3520e 1568STATIC I32
cea2e8a9 1569S_sublex_push(pTHX)
55497cff 1570{
27da23d5 1571 dVAR;
f46d017c 1572 ENTER;
55497cff 1573
3280af22
NIS
1574 PL_lex_state = PL_sublex_info.super_state;
1575 SAVEI32(PL_lex_dojoin);
1576 SAVEI32(PL_lex_brackets);
3280af22
NIS
1577 SAVEI32(PL_lex_casemods);
1578 SAVEI32(PL_lex_starts);
1579 SAVEI32(PL_lex_state);
7766f137 1580 SAVEVPTR(PL_lex_inpat);
3280af22 1581 SAVEI32(PL_lex_inwhat);
57843af0 1582 SAVECOPLINE(PL_curcop);
3280af22 1583 SAVEPPTR(PL_bufptr);
8452ff4b 1584 SAVEPPTR(PL_bufend);
3280af22
NIS
1585 SAVEPPTR(PL_oldbufptr);
1586 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
1587 SAVEPPTR(PL_last_lop);
1588 SAVEPPTR(PL_last_uni);
3280af22
NIS
1589 SAVEPPTR(PL_linestart);
1590 SAVESPTR(PL_linestr);
8edd5f42
RGS
1591 SAVEGENERICPV(PL_lex_brackstack);
1592 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
1593
1594 PL_linestr = PL_lex_stuff;
a0714e2c 1595 PL_lex_stuff = NULL;
3280af22 1596
9cbb5ea2
GS
1597 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
1598 = SvPVX(PL_linestr);
3280af22 1599 PL_bufend += SvCUR(PL_linestr);
bd61b366 1600 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1601 SAVEFREESV(PL_linestr);
1602
1603 PL_lex_dojoin = FALSE;
1604 PL_lex_brackets = 0;
a02a5408
JC
1605 Newx(PL_lex_brackstack, 120, char);
1606 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
1607 PL_lex_casemods = 0;
1608 *PL_lex_casestack = '\0';
1609 PL_lex_starts = 0;
1610 PL_lex_state = LEX_INTERPCONCAT;
eb160463 1611 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
1612
1613 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
1614 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
1615 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 1616 else
5f66b61c 1617 PL_lex_inpat = NULL;
79072805 1618
55497cff 1619 return '(';
79072805
LW
1620}
1621
ffb4593c
NT
1622/*
1623 * S_sublex_done
1624 * Restores lexer state after a S_sublex_push.
1625 */
1626
76e3520e 1627STATIC I32
cea2e8a9 1628S_sublex_done(pTHX)
79072805 1629{
27da23d5 1630 dVAR;
3280af22 1631 if (!PL_lex_starts++) {
396482e1 1632 SV * const sv = newSVpvs("");
9aa983d2
JH
1633 if (SvUTF8(PL_linestr))
1634 SvUTF8_on(sv);
3280af22 1635 PL_expect = XOPERATOR;
9aa983d2 1636 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
1637 return THING;
1638 }
1639
3280af22
NIS
1640 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
1641 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 1642 return yylex();
79072805
LW
1643 }
1644
ffb4593c 1645 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
1646 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
1647 PL_linestr = PL_lex_repl;
1648 PL_lex_inpat = 0;
1649 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
1650 PL_bufend += SvCUR(PL_linestr);
bd61b366 1651 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
1652 SAVEFREESV(PL_linestr);
1653 PL_lex_dojoin = FALSE;
1654 PL_lex_brackets = 0;
3280af22
NIS
1655 PL_lex_casemods = 0;
1656 *PL_lex_casestack = '\0';
1657 PL_lex_starts = 0;
25da4f38 1658 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
1659 PL_lex_state = LEX_INTERPNORMAL;
1660 PL_lex_starts++;
e9fa98b2
HS
1661 /* we don't clear PL_lex_repl here, so that we can check later
1662 whether this is an evalled subst; that means we rely on the
1663 logic to ensure sublex_done() is called again only via the
1664 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 1665 }
e9fa98b2 1666 else {
3280af22 1667 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 1668 PL_lex_repl = NULL;
e9fa98b2 1669 }
79072805 1670 return ',';
ffed7fef
LW
1671 }
1672 else {
5db06880
NC
1673#ifdef PERL_MAD
1674 if (PL_madskills) {
cd81e915
NC
1675 if (PL_thiswhite) {
1676 if (!PL_endwhite)
1677 PL_endwhite = newSVpvn("",0);
1678 sv_catsv(PL_endwhite, PL_thiswhite);
1679 PL_thiswhite = 0;
1680 }
1681 if (PL_thistoken)
1682 sv_setpvn(PL_thistoken,"",0);
5db06880 1683 else
cd81e915 1684 PL_realtokenstart = -1;
5db06880
NC
1685 }
1686#endif
f46d017c 1687 LEAVE;
3280af22
NIS
1688 PL_bufend = SvPVX(PL_linestr);
1689 PL_bufend += SvCUR(PL_linestr);
1690 PL_expect = XOPERATOR;
09bef843 1691 PL_sublex_info.sub_inwhat = 0;
79072805 1692 return ')';
ffed7fef
LW
1693 }
1694}
1695
02aa26ce
NT
1696/*
1697 scan_const
1698
1699 Extracts a pattern, double-quoted string, or transliteration. This
1700 is terrifying code.
1701
94def140 1702 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 1703 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 1704 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 1705
94def140
TS
1706 Returns a pointer to the character scanned up to. If this is
1707 advanced from the start pointer supplied (i.e. if anything was
9b599b2a
GS
1708 successfully parsed), will leave an OP for the substring scanned
1709 in yylval. Caller must intuit reason for not parsing further
1710 by looking at the next characters herself.
1711
02aa26ce
NT
1712 In patterns:
1713 backslashes:
1714 double-quoted style: \r and \n
1715 regexp special ones: \D \s
94def140
TS
1716 constants: \x31
1717 backrefs: \1
02aa26ce
NT
1718 case and quoting: \U \Q \E
1719 stops on @ and $, but not for $ as tail anchor
1720
1721 In transliterations:
1722 characters are VERY literal, except for - not at the start or end
94def140
TS
1723 of the string, which indicates a range. If the range is in bytes,
1724 scan_const expands the range to the full set of intermediate
1725 characters. If the range is in utf8, the hyphen is replaced with
1726 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
1727
1728 In double-quoted strings:
1729 backslashes:
1730 double-quoted style: \r and \n
94def140
TS
1731 constants: \x31
1732 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
1733 case and quoting: \U \Q \E
1734 stops on @ and $
1735
1736 scan_const does *not* construct ops to handle interpolated strings.
1737 It stops processing as soon as it finds an embedded $ or @ variable
1738 and leaves it to the caller to work out what's going on.
1739
94def140
TS
1740 embedded arrays (whether in pattern or not) could be:
1741 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
1742
1743 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
1744
1745 $ in pattern could be $foo or could be tail anchor. Assumption:
1746 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 1747 followed by one of "()| \r\n\t"
02aa26ce
NT
1748
1749 \1 (backreferences) are turned into $1
1750
1751 The structure of the code is
1752 while (there's a character to process) {
94def140
TS
1753 handle transliteration ranges
1754 skip regexp comments /(?#comment)/ and codes /(?{code})/
1755 skip #-initiated comments in //x patterns
1756 check for embedded arrays
02aa26ce
NT
1757 check for embedded scalars
1758 if (backslash) {
94def140
TS
1759 leave intact backslashes from leaveit (below)
1760 deprecate \1 in substitution replacements
02aa26ce
NT
1761 handle string-changing backslashes \l \U \Q \E, etc.
1762 switch (what was escaped) {
94def140
TS
1763 handle \- in a transliteration (becomes a literal -)
1764 handle \132 (octal characters)
1765 handle \x15 and \x{1234} (hex characters)
1766 handle \N{name} (named characters)
1767 handle \cV (control characters)
1768 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce
NT
1769 } (end switch)
1770 } (end if backslash)
1771 } (end while character to read)
4e553d73 1772
02aa26ce
NT
1773*/
1774
76e3520e 1775STATIC char *
cea2e8a9 1776S_scan_const(pTHX_ char *start)
79072805 1777{
97aff369 1778 dVAR;
3280af22 1779 register char *send = PL_bufend; /* end of the constant */
561b68a9 1780 SV *sv = newSV(send - start); /* sv for the constant */
02aa26ce
NT
1781 register char *s = start; /* start of the constant */
1782 register char *d = SvPVX(sv); /* destination for copies */
1783 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 1784 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0
NIS
1785 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
1786 I32 this_utf8 = UTF; /* The source string is assumed to be UTF8 */
012bcf8d 1787 UV uv;
4c3a8340
TS
1788#ifdef EBCDIC
1789 UV literal_endpoint = 0;
1790#endif
012bcf8d 1791
d4c19fe8 1792 const char * const leaveit = /* set of acceptably-backslashed characters */
3280af22 1793 PL_lex_inpat
25f684f7 1794 ? "\\.^$@AGZdDwWsSbBpPXC+*?|()-nrtfeaxcz0123456789[{]} \t\n\r\f\v#"
9b599b2a 1795 : "";
79072805 1796
2b9d42f0
NIS
1797 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
1798 /* If we are doing a trans and we know we want UTF8 set expectation */
1799 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
1800 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
1801 }
1802
1803
79072805 1804 while (s < send || dorange) {
02aa26ce 1805 /* get transliterations out of the way (they're most literal) */
3280af22 1806 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 1807 /* expand a range A-Z to the full set of characters. AIE! */
79072805 1808 if (dorange) {
1ba5c669
JH
1809 I32 i; /* current expanded character */
1810 I32 min; /* first character in range */
1811 I32 max; /* last character in range */
02aa26ce 1812
2b9d42f0 1813 if (has_utf8) {
9d4ba2ae 1814 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
1815 char *e = d++;
1816 while (e-- > c)
1817 *(e + 1) = *e;
25716404 1818 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
1819 /* mark the range as done, and continue */
1820 dorange = FALSE;
1821 didrange = TRUE;
1822 continue;
1823 }
2b9d42f0 1824
95a20fc0 1825 i = d - SvPVX_const(sv); /* remember current offset */
9cbb5ea2
GS
1826 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
1827 d = SvPVX(sv) + i; /* refresh d after realloc */
02aa26ce
NT
1828 d -= 2; /* eat the first char and the - */
1829
8ada0baa
JH
1830 min = (U8)*d; /* first char in range */
1831 max = (U8)d[1]; /* last char in range */
1832
c2e66d9e 1833 if (min > max) {
01ec43d0 1834 Perl_croak(aTHX_
d1573ac7 1835 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 1836 (char)min, (char)max);
c2e66d9e
GS
1837 }
1838
c7f1f016 1839#ifdef EBCDIC
4c3a8340
TS
1840 if (literal_endpoint == 2 &&
1841 ((isLOWER(min) && isLOWER(max)) ||
1842 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
1843 if (isLOWER(min)) {
1844 for (i = min; i <= max; i++)
1845 if (isLOWER(i))
db42d148 1846 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1847 } else {
1848 for (i = min; i <= max; i++)
1849 if (isUPPER(i))
db42d148 1850 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
1851 }
1852 }
1853 else
1854#endif
1855 for (i = min; i <= max; i++)
eb160463 1856 *d++ = (char)i;
02aa26ce
NT
1857
1858 /* mark the range as done, and continue */
79072805 1859 dorange = FALSE;
01ec43d0 1860 didrange = TRUE;
4c3a8340
TS
1861#ifdef EBCDIC
1862 literal_endpoint = 0;
1863#endif
79072805 1864 continue;
4e553d73 1865 }
02aa26ce
NT
1866
1867 /* range begins (ignore - as first or last char) */
79072805 1868 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 1869 if (didrange) {
1fafa243 1870 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 1871 }
2b9d42f0 1872 if (has_utf8) {
25716404 1873 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
1874 s++;
1875 continue;
1876 }
79072805
LW
1877 dorange = TRUE;
1878 s++;
01ec43d0
GS
1879 }
1880 else {
1881 didrange = FALSE;
4c3a8340
TS
1882#ifdef EBCDIC
1883 literal_endpoint = 0;
1884#endif
01ec43d0 1885 }
79072805 1886 }
02aa26ce
NT
1887
1888 /* if we get here, we're not doing a transliteration */
1889
0f5d15d6
IZ
1890 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
1891 except for the last char, which will be done separately. */
3280af22 1892 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 1893 if (s[2] == '#') {
e994fd66 1894 while (s+1 < send && *s != ')')
db42d148 1895 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
1896 }
1897 else if (s[2] == '{' /* This should match regcomp.c */
1898 || ((s[2] == 'p' || s[2] == '?') && s[3] == '{'))
1899 {
cc6b7395 1900 I32 count = 1;
0f5d15d6 1901 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
1902 char c;
1903
d9f97599
GS
1904 while (count && (c = *regparse)) {
1905 if (c == '\\' && regparse[1])
1906 regparse++;
4e553d73 1907 else if (c == '{')
cc6b7395 1908 count++;
4e553d73 1909 else if (c == '}')
cc6b7395 1910 count--;
d9f97599 1911 regparse++;
cc6b7395 1912 }
e994fd66 1913 if (*regparse != ')')
5bdf89e7 1914 regparse--; /* Leave one char for continuation. */
0f5d15d6 1915 while (s < regparse)
db42d148 1916 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 1917 }
748a9306 1918 }
02aa26ce
NT
1919
1920 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
1921 else if (*s == '#' && PL_lex_inpat &&
1922 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 1923 while (s+1 < send && *s != '\n')
db42d148 1924 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 1925 }
02aa26ce 1926
5d1d4326 1927 /* check for embedded arrays
da6eedaa 1928 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 1929 */
7e2040f0 1930 else if (*s == '@' && s[1]
5d1d4326 1931 && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1])))
79072805 1932 break;
02aa26ce
NT
1933
1934 /* check for embedded scalars. only stop if we're sure it's a
1935 variable.
1936 */
79072805 1937 else if (*s == '$') {
3280af22 1938 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 1939 break;
6002328a 1940 if (s + 1 < send && !strchr("()| \r\n\t", s[1]))
79072805
LW
1941 break; /* in regexp, $ might be tail anchor */
1942 }
02aa26ce 1943
2b9d42f0
NIS
1944 /* End of else if chain - OP_TRANS rejoin rest */
1945
02aa26ce 1946 /* backslashes */
79072805
LW
1947 if (*s == '\\' && s+1 < send) {
1948 s++;
02aa26ce
NT
1949
1950 /* some backslashes we leave behind */
c9f97d15 1951 if (*leaveit && *s && strchr(leaveit, *s)) {
db42d148
NIS
1952 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
1953 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
79072805
LW
1954 continue;
1955 }
02aa26ce
NT
1956
1957 /* deprecate \1 in strings and substitution replacements */
3280af22 1958 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 1959 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 1960 {
599cee73 1961 if (ckWARN(WARN_SYNTAX))
9014280d 1962 Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
1963 *--s = '$';
1964 break;
1965 }
02aa26ce
NT
1966
1967 /* string-change backslash escapes */
3280af22 1968 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
1969 --s;
1970 break;
1971 }
02aa26ce
NT
1972
1973 /* if we get here, it's either a quoted -, or a digit */
79072805 1974 switch (*s) {
02aa26ce
NT
1975
1976 /* quoted - in transliterations */
79072805 1977 case '-':
3280af22 1978 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
1979 *d++ = *s++;
1980 continue;
1981 }
1982 /* FALL THROUGH */
1983 default:
11b8faa4 1984 {
86f97054 1985 if ((isALPHA(*s) || isDIGIT(*s)) &&
041457d9 1986 ckWARN(WARN_MISC))
9014280d 1987 Perl_warner(aTHX_ packWARN(WARN_MISC),
11b8faa4
JH
1988 "Unrecognized escape \\%c passed through",
1989 *s);
1990 /* default action is to copy the quoted character */
f9a63242 1991 goto default_action;
11b8faa4 1992 }
02aa26ce
NT
1993
1994 /* \132 indicates an octal constant */
79072805
LW
1995 case '0': case '1': case '2': case '3':
1996 case '4': case '5': case '6': case '7':
ba210ebe 1997 {
53305cf1
NC
1998 I32 flags = 0;
1999 STRLEN len = 3;
2000 uv = grok_oct(s, &len, &flags, NULL);
ba210ebe
JH
2001 s += len;
2002 }
012bcf8d 2003 goto NUM_ESCAPE_INSERT;
02aa26ce
NT
2004
2005 /* \x24 indicates a hex constant */
79072805 2006 case 'x':
a0ed51b3
LW
2007 ++s;
2008 if (*s == '{') {
9d4ba2ae 2009 char* const e = strchr(s, '}');
a4c04bdc
NC
2010 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2011 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2012 STRLEN len;
355860ce 2013
53305cf1 2014 ++s;
adaeee49 2015 if (!e) {
a0ed51b3 2016 yyerror("Missing right brace on \\x{}");
355860ce 2017 continue;
ba210ebe 2018 }
53305cf1
NC
2019 len = e - s;
2020 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe 2021 s = e + 1;
a0ed51b3
LW
2022 }
2023 else {
ba210ebe 2024 {
53305cf1 2025 STRLEN len = 2;
a4c04bdc 2026 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2027 uv = grok_hex(s, &len, &flags, NULL);
ba210ebe
JH
2028 s += len;
2029 }
012bcf8d
GS
2030 }
2031
2032 NUM_ESCAPE_INSERT:
2033 /* Insert oct or hex escaped character.
301d3d20 2034 * There will always enough room in sv since such
db42d148 2035 * escapes will be longer than any UTF-8 sequence
301d3d20 2036 * they can end up as. */
ba7cea30 2037
c7f1f016
NIS
2038 /* We need to map to chars to ASCII before doing the tests
2039 to cover EBCDIC
2040 */
c4d5f83a 2041 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(uv))) {
9aa983d2 2042 if (!has_utf8 && uv > 255) {
301d3d20
JH
2043 /* Might need to recode whatever we have
2044 * accumulated so far if it contains any
2045 * hibit chars.
2046 *
2047 * (Can't we keep track of that and avoid
2048 * this rescan? --jhi)
012bcf8d 2049 */
c7f1f016 2050 int hicount = 0;
63cd0674
NIS
2051 U8 *c;
2052 for (c = (U8 *) SvPVX(sv); c < (U8 *)d; c++) {
c4d5f83a 2053 if (!NATIVE_IS_INVARIANT(*c)) {
012bcf8d 2054 hicount++;
db42d148 2055 }
012bcf8d 2056 }
63cd0674 2057 if (hicount) {
9d4ba2ae 2058 const STRLEN offset = d - SvPVX_const(sv);
db42d148
NIS
2059 U8 *src, *dst;
2060 d = SvGROW(sv, SvLEN(sv) + hicount + 1) + offset;
2061 src = (U8 *)d - 1;
2062 dst = src+hicount;
2063 d += hicount;
cfd0369c 2064 while (src >= (const U8 *)SvPVX_const(sv)) {
c4d5f83a 2065 if (!NATIVE_IS_INVARIANT(*src)) {
9d4ba2ae 2066 const U8 ch = NATIVE_TO_ASCII(*src);
eb160463
GS
2067 *dst-- = (U8)UTF8_EIGHT_BIT_LO(ch);
2068 *dst-- = (U8)UTF8_EIGHT_BIT_HI(ch);
012bcf8d
GS
2069 }
2070 else {
63cd0674 2071 *dst-- = *src;
012bcf8d 2072 }
c7f1f016 2073 src--;
012bcf8d
GS
2074 }
2075 }
2076 }
2077
9aa983d2 2078 if (has_utf8 || uv > 255) {
9041c2e3 2079 d = (char*)uvchr_to_utf8((U8*)d, uv);
4e553d73 2080 has_utf8 = TRUE;
f9a63242
JH
2081 if (PL_lex_inwhat == OP_TRANS &&
2082 PL_sublex_info.sub_op) {
2083 PL_sublex_info.sub_op->op_private |=
2084 (PL_lex_repl ? OPpTRANS_FROM_UTF
2085 : OPpTRANS_TO_UTF);
f9a63242 2086 }
012bcf8d 2087 }
a0ed51b3 2088 else {
012bcf8d 2089 *d++ = (char)uv;
a0ed51b3 2090 }
012bcf8d
GS
2091 }
2092 else {
c4d5f83a 2093 *d++ = (char) uv;
a0ed51b3 2094 }
79072805 2095 continue;
02aa26ce 2096
b239daa5 2097 /* \N{LATIN SMALL LETTER A} is a named character */
4a2d328f 2098 case 'N':
55eda711 2099 ++s;
423cee85
JH
2100 if (*s == '{') {
2101 char* e = strchr(s, '}');
155aba94 2102 SV *res;
423cee85 2103 STRLEN len;
cfd0369c 2104 const char *str;
4e553d73 2105
423cee85 2106 if (!e) {
5777a3f7 2107 yyerror("Missing right brace on \\N{}");
423cee85
JH
2108 e = s - 1;
2109 goto cont_scan;
2110 }
dbc0d4f2
JH
2111 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
2112 /* \N{U+...} */
2113 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2114 PERL_SCAN_DISALLOW_PREFIX;
2115 s += 3;
2116 len = e - s;
2117 uv = grok_hex(s, &len, &flags, NULL);
2118 s = e + 1;
2119 goto NUM_ESCAPE_INSERT;
2120 }
55eda711 2121 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2122 res = new_constant( NULL, 0, "charnames",
a0714e2c 2123 res, NULL, "\\N{...}" );
f9a63242
JH
2124 if (has_utf8)
2125 sv_utf8_upgrade(res);
cfd0369c 2126 str = SvPV_const(res,len);
1c47067b
JH
2127#ifdef EBCDIC_NEVER_MIND
2128 /* charnames uses pack U and that has been
2129 * recently changed to do the below uni->native
2130 * mapping, so this would be redundant (and wrong,
2131 * the code point would be doubly converted).
2132 * But leave this in just in case the pack U change
2133 * gets revoked, but the semantics is still
2134 * desireable for charnames. --jhi */
cddc7ef4 2135 {
cfd0369c 2136 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2137
2138 if (uv < 0x100) {
89ebb4a3 2139 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2140
2141 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2142 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2143 str = SvPV_const(res, len);
cddc7ef4
JH
2144 }
2145 }
2146#endif
89491803 2147 if (!has_utf8 && SvUTF8(res)) {
9d4ba2ae 2148 const char * const ostart = SvPVX_const(sv);
f08d6ad9
GS
2149 SvCUR_set(sv, d - ostart);
2150 SvPOK_on(sv);
e4f3eed8 2151 *d = '\0';
f08d6ad9 2152 sv_utf8_upgrade(sv);
d2f449dd 2153 /* this just broke our allocation above... */
eb160463 2154 SvGROW(sv, (STRLEN)(send - start));
f08d6ad9 2155 d = SvPVX(sv) + SvCUR(sv);
89491803 2156 has_utf8 = TRUE;
f08d6ad9 2157 }
eb160463 2158 if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
9d4ba2ae 2159 const char * const odest = SvPVX_const(sv);
423cee85 2160
8973db79 2161 SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
423cee85
JH
2162 d = SvPVX(sv) + (d - odest);
2163 }
2164 Copy(str, d, len, char);
2165 d += len;
2166 SvREFCNT_dec(res);
2167 cont_scan:
2168 s = e + 1;
2169 }
2170 else
5777a3f7 2171 yyerror("Missing braces on \\N{}");
423cee85
JH
2172 continue;
2173
02aa26ce 2174 /* \c is a control character */
79072805
LW
2175 case 'c':
2176 s++;
961ce445 2177 if (s < send) {
ba210ebe 2178 U8 c = *s++;
c7f1f016
NIS
2179#ifdef EBCDIC
2180 if (isLOWER(c))
2181 c = toUPPER(c);
2182#endif
db42d148 2183 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2184 }
961ce445
RGS
2185 else {
2186 yyerror("Missing control char name in \\c");
2187 }
79072805 2188 continue;
02aa26ce
NT
2189
2190 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2191 case 'b':
db42d148 2192 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2193 break;
2194 case 'n':
db42d148 2195 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2196 break;
2197 case 'r':
db42d148 2198 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2199 break;
2200 case 'f':
db42d148 2201 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2202 break;
2203 case 't':
db42d148 2204 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 2205 break;
34a3fe2a 2206 case 'e':
db42d148 2207 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
2208 break;
2209 case 'a':
db42d148 2210 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 2211 break;
02aa26ce
NT
2212 } /* end switch */
2213
79072805
LW
2214 s++;
2215 continue;
02aa26ce 2216 } /* end if (backslash) */
4c3a8340
TS
2217#ifdef EBCDIC
2218 else
2219 literal_endpoint++;
2220#endif
02aa26ce 2221
f9a63242 2222 default_action:
2b9d42f0
NIS
2223 /* If we started with encoded form, or already know we want it
2224 and then encode the next character */
2225 if ((has_utf8 || this_utf8) && !NATIVE_IS_INVARIANT((U8)(*s))) {
2226 STRLEN len = 1;
5f66b61c
AL
2227 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
2228 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
2b9d42f0
NIS
2229 s += len;
2230 if (need > len) {
2231 /* encoded value larger than old, need extra space (NOTE: SvCUR() not set here) */
9d4ba2ae 2232 const STRLEN off = d - SvPVX_const(sv);
2b9d42f0
NIS
2233 d = SvGROW(sv, SvLEN(sv) + (need-len)) + off;
2234 }
5f66b61c 2235 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
2b9d42f0
NIS
2236 has_utf8 = TRUE;
2237 }
2238 else {
2239 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2240 }
02aa26ce
NT
2241 } /* while loop to process each character */
2242
2243 /* terminate the string and set up the sv */
79072805 2244 *d = '\0';
95a20fc0 2245 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 2246 if (SvCUR(sv) >= SvLEN(sv))
d0063567 2247 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 2248
79072805 2249 SvPOK_on(sv);
9f4817db 2250 if (PL_encoding && !has_utf8) {
d0063567
DK
2251 sv_recode_to_utf8(sv, PL_encoding);
2252 if (SvUTF8(sv))
2253 has_utf8 = TRUE;
9f4817db 2254 }
2b9d42f0 2255 if (has_utf8) {
7e2040f0 2256 SvUTF8_on(sv);
2b9d42f0 2257 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 2258 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
2259 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2260 }
2261 }
79072805 2262
02aa26ce 2263 /* shrink the sv if we allocated more than we used */
79072805 2264 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 2265 SvPV_shrink_to_cur(sv);
79072805 2266 }
02aa26ce 2267
9b599b2a 2268 /* return the substring (via yylval) only if we parsed anything */
3280af22
NIS
2269 if (s > PL_bufptr) {
2270 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
4e553d73 2271 sv = new_constant(start, s - start, (PL_lex_inpat ? "qr" : "q"),
a0714e2c 2272 sv, NULL,
4e553d73 2273 ( PL_lex_inwhat == OP_TRANS
b3ac6de7 2274 ? "tr"
3280af22 2275 : ( (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat)
b3ac6de7
IZ
2276 ? "s"
2277 : "qq")));
79072805 2278 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 2279 } else
8990e307 2280 SvREFCNT_dec(sv);
79072805
LW
2281 return s;
2282}
2283
ffb4593c
NT
2284/* S_intuit_more
2285 * Returns TRUE if there's more to the expression (e.g., a subscript),
2286 * FALSE otherwise.
ffb4593c
NT
2287 *
2288 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
2289 *
2290 * ->[ and ->{ return TRUE
2291 * { and [ outside a pattern are always subscripts, so return TRUE
2292 * if we're outside a pattern and it's not { or [, then return FALSE
2293 * if we're in a pattern and the first char is a {
2294 * {4,5} (any digits around the comma) returns FALSE
2295 * if we're in a pattern and the first char is a [
2296 * [] returns FALSE
2297 * [SOMETHING] has a funky algorithm to decide whether it's a
2298 * character class or not. It has to deal with things like
2299 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
2300 * anything else returns TRUE
2301 */
2302
9cbb5ea2
GS
2303/* This is the one truly awful dwimmer necessary to conflate C and sed. */
2304
76e3520e 2305STATIC int
cea2e8a9 2306S_intuit_more(pTHX_ register char *s)
79072805 2307{
97aff369 2308 dVAR;
3280af22 2309 if (PL_lex_brackets)
79072805
LW
2310 return TRUE;
2311 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
2312 return TRUE;
2313 if (*s != '{' && *s != '[')
2314 return FALSE;
3280af22 2315 if (!PL_lex_inpat)
79072805
LW
2316 return TRUE;
2317
2318 /* In a pattern, so maybe we have {n,m}. */
2319 if (*s == '{') {
2320 s++;
2321 if (!isDIGIT(*s))
2322 return TRUE;
2323 while (isDIGIT(*s))
2324 s++;
2325 if (*s == ',')
2326 s++;
2327 while (isDIGIT(*s))
2328 s++;
2329 if (*s == '}')
2330 return FALSE;
2331 return TRUE;
2332
2333 }
2334
2335 /* On the other hand, maybe we have a character class */
2336
2337 s++;
2338 if (*s == ']' || *s == '^')
2339 return FALSE;
2340 else {
ffb4593c 2341 /* this is terrifying, and it works */
79072805
LW
2342 int weight = 2; /* let's weigh the evidence */
2343 char seen[256];
f27ffc4a 2344 unsigned char un_char = 255, last_un_char;
9d4ba2ae 2345 const char * const send = strchr(s,']');
3280af22 2346 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
2347
2348 if (!send) /* has to be an expression */
2349 return TRUE;
2350
2351 Zero(seen,256,char);
2352 if (*s == '$')
2353 weight -= 3;
2354 else if (isDIGIT(*s)) {
2355 if (s[1] != ']') {
2356 if (isDIGIT(s[1]) && s[2] == ']')
2357 weight -= 10;
2358 }
2359 else
2360 weight -= 100;
2361 }
2362 for (; s < send; s++) {
2363 last_un_char = un_char;
2364 un_char = (unsigned char)*s;
2365 switch (*s) {
2366 case '@':
2367 case '&':
2368 case '$':
2369 weight -= seen[un_char] * 10;
7e2040f0 2370 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 2371 int len;
8903cb82 2372 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
2373 len = (int)strlen(tmpbuf);
2374 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
2375 weight -= 100;
2376 else
2377 weight -= 10;
2378 }
2379 else if (*s == '$' && s[1] &&
93a17b20
LW
2380 strchr("[#!%*<>()-=",s[1])) {
2381 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
2382 weight -= 10;
2383 else
2384 weight -= 1;
2385 }
2386 break;
2387 case '\\':
2388 un_char = 254;
2389 if (s[1]) {
93a17b20 2390 if (strchr("wds]",s[1]))
79072805
LW
2391 weight += 100;
2392 else if (seen['\''] || seen['"'])
2393 weight += 1;
93a17b20 2394 else if (strchr("rnftbxcav",s[1]))
79072805
LW
2395 weight += 40;
2396 else if (isDIGIT(s[1])) {
2397 weight += 40;
2398 while (s[1] && isDIGIT(s[1]))
2399 s++;
2400 }
2401 }
2402 else
2403 weight += 100;
2404 break;
2405 case '-':
2406 if (s[1] == '\\')
2407 weight += 50;
93a17b20 2408 if (strchr("aA01! ",last_un_char))
79072805 2409 weight += 30;
93a17b20 2410 if (strchr("zZ79~",s[1]))
79072805 2411 weight += 30;
f27ffc4a
GS
2412 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
2413 weight -= 5; /* cope with negative subscript */
79072805
LW
2414 break;
2415 default:
3792a11b
NC
2416 if (!isALNUM(last_un_char)
2417 && !(last_un_char == '$' || last_un_char == '@'
2418 || last_un_char == '&')
2419 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
2420 char *d = tmpbuf;
2421 while (isALPHA(*s))
2422 *d++ = *s++;
2423 *d = '\0';
2424 if (keyword(tmpbuf, d - tmpbuf))
2425 weight -= 150;
2426 }
2427 if (un_char == last_un_char + 1)
2428 weight += 5;
2429 weight -= seen[un_char];
2430 break;
2431 }
2432 seen[un_char]++;
2433 }
2434 if (weight >= 0) /* probably a character class */
2435 return FALSE;
2436 }
2437
2438 return TRUE;
2439}
ffed7fef 2440
ffb4593c
NT
2441/*
2442 * S_intuit_method
2443 *
2444 * Does all the checking to disambiguate
2445 * foo bar
2446 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
2447 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
2448 *
2449 * First argument is the stuff after the first token, e.g. "bar".
2450 *
2451 * Not a method if bar is a filehandle.
2452 * Not a method if foo is a subroutine prototyped to take a filehandle.
2453 * Not a method if it's really "Foo $bar"
2454 * Method if it's "foo $bar"
2455 * Not a method if it's really "print foo $bar"
2456 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 2457 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 2458 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
2459 * =>
2460 */
2461
76e3520e 2462STATIC int
62d55b22 2463S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 2464{
97aff369 2465 dVAR;
a0d0e21e 2466 char *s = start + (*start == '$');
3280af22 2467 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
2468 STRLEN len;
2469 GV* indirgv;
5db06880
NC
2470#ifdef PERL_MAD
2471 int soff;
2472#endif
a0d0e21e
LW
2473
2474 if (gv) {
62d55b22 2475 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 2476 return 0;
62d55b22
NC
2477 if (cv) {
2478 if (SvPOK(cv)) {
2479 const char *proto = SvPVX_const(cv);
2480 if (proto) {
2481 if (*proto == ';')
2482 proto++;
2483 if (*proto == '*')
2484 return 0;
2485 }
b6c543e3
IZ
2486 }
2487 } else
c35e046a 2488 gv = NULL;
a0d0e21e 2489 }
8903cb82 2490 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
2491 /* start is the beginning of the possible filehandle/object,
2492 * and s is the end of it
2493 * tmpbuf is a copy of it
2494 */
2495
a0d0e21e 2496 if (*start == '$') {
3280af22 2497 if (gv || PL_last_lop_op == OP_PRINT || isUPPER(*PL_tokenbuf))
a0d0e21e 2498 return 0;
5db06880
NC
2499#ifdef PERL_MAD
2500 len = start - SvPVX(PL_linestr);
2501#endif
29595ff2 2502 s = PEEKSPACE(s);
5db06880
NC
2503#ifdef PERLMAD
2504 start = SvPVX(PL_linestr) + len;
2505#endif
3280af22
NIS
2506 PL_bufptr = start;
2507 PL_expect = XREF;
a0d0e21e
LW
2508 return *s == '(' ? FUNCMETH : METHOD;
2509 }
2510 if (!keyword(tmpbuf, len)) {
c3e0f903
GS
2511 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
2512 len -= 2;
2513 tmpbuf[len] = '\0';
5db06880
NC
2514#ifdef PERL_MAD
2515 soff = s - SvPVX(PL_linestr);
2516#endif
c3e0f903
GS
2517 goto bare_package;
2518 }
90e5519e 2519 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 2520 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
2521 return 0;
2522 /* filehandle or package name makes it a method */
89bfa8cd 2523 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, FALSE)) {
5db06880
NC
2524#ifdef PERL_MAD
2525 soff = s - SvPVX(PL_linestr);
2526#endif
29595ff2 2527 s = PEEKSPACE(s);
3280af22 2528 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 2529 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 2530 bare_package:
cd81e915 2531 start_force(PL_curforce);
9ded7720 2532 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
79cb57f6 2533 newSVpvn(tmpbuf,len));
9ded7720 2534 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
2535 if (PL_madskills)
2536 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 2537 PL_expect = XTERM;
a0d0e21e 2538 force_next(WORD);
3280af22 2539 PL_bufptr = s;
5db06880
NC
2540#ifdef PERL_MAD
2541 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
2542#endif
a0d0e21e
LW
2543 return *s == '(' ? FUNCMETH : METHOD;
2544 }
2545 }
2546 return 0;
2547}
2548
ffb4593c
NT
2549/*
2550 * S_incl_perldb
2551 * Return a string of Perl code to load the debugger. If PERL5DB
2552 * is set, it will return the contents of that, otherwise a
2553 * compile-time require of perl5db.pl.
2554 */
2555
bfed75c6 2556STATIC const char*
cea2e8a9 2557S_incl_perldb(pTHX)
a0d0e21e 2558{
97aff369 2559 dVAR;
3280af22 2560 if (PL_perldb) {
9d4ba2ae 2561 const char * const pdb = PerlEnv_getenv("PERL5DB");
a0d0e21e
LW
2562
2563 if (pdb)
2564 return pdb;
93189314 2565 SETERRNO(0,SS_NORMAL);
a0d0e21e
LW
2566 return "BEGIN { require 'perl5db.pl' }";
2567 }
2568 return "";
2569}
2570
2571
16d20bd9 2572/* Encoded script support. filter_add() effectively inserts a
4e553d73 2573 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
2574 * Note that the filter function only applies to the current source file
2575 * (e.g., it will not affect files 'require'd or 'use'd by this one).
2576 *
2577 * The datasv parameter (which may be NULL) can be used to pass
2578 * private data to this instance of the filter. The filter function
2579 * can recover the SV using the FILTER_DATA macro and use it to
2580 * store private buffers and state information.
2581 *
2582 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 2583 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 2584 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
2585 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
2586 * private use must be set using malloc'd pointers.
2587 */
16d20bd9
AD
2588
2589SV *
864dbfa3 2590Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 2591{
97aff369 2592 dVAR;
f4c556ac 2593 if (!funcp)
a0714e2c 2594 return NULL;
f4c556ac 2595
3280af22
NIS
2596 if (!PL_rsfp_filters)
2597 PL_rsfp_filters = newAV();
16d20bd9 2598 if (!datasv)
561b68a9 2599 datasv = newSV(0);
862a34c6 2600 SvUPGRADE(datasv, SVt_PVIO);
8141890a 2601 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 2602 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 2603 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
2604 FPTR2DPTR(void *, IoANY(datasv)),
2605 SvPV_nolen(datasv)));
3280af22
NIS
2606 av_unshift(PL_rsfp_filters, 1);
2607 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
2608 return(datasv);
2609}
4e553d73 2610
16d20bd9
AD
2611
2612/* Delete most recently added instance of this filter function. */
a0d0e21e 2613void
864dbfa3 2614Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 2615{
97aff369 2616 dVAR;
e0c19803 2617 SV *datasv;
24801a4b 2618
33073adb 2619#ifdef DEBUGGING
55662e27
JH
2620 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
2621 FPTR2DPTR(void*, funcp)));
33073adb 2622#endif
3280af22 2623 if (!PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
2624 return;
2625 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 2626 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 2627 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 2628 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 2629 IoANY(datasv) = (void *)NULL;
3280af22 2630 sv_free(av_pop(PL_rsfp_filters));
e50aee73 2631
16d20bd9
AD
2632 return;
2633 }
2634 /* we need to search for the correct entry and clear it */
cea2e8a9 2635 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
2636}
2637
2638
1de9afcd
RGS
2639/* Invoke the idxth filter function for the current rsfp. */
2640/* maxlen 0 = read one text line */
16d20bd9 2641I32
864dbfa3 2642Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 2643{
97aff369 2644 dVAR;
16d20bd9
AD
2645 filter_t funcp;
2646 SV *datasv = NULL;
f482118e
NC
2647 /* This API is bad. It should have been using unsigned int for maxlen.
2648 Not sure if we want to change the API, but if not we should sanity
2649 check the value here. */
39cd7a59
NC
2650 const unsigned int correct_length
2651 = maxlen < 0 ?
2652#ifdef PERL_MICRO
2653 0x7FFFFFFF
2654#else
2655 INT_MAX
2656#endif
2657 : maxlen;
e50aee73 2658
3280af22 2659 if (!PL_rsfp_filters)
16d20bd9 2660 return -1;
1de9afcd 2661 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
2662 /* Provide a default input filter to make life easy. */
2663 /* Note that we append to the line. This is handy. */
f4c556ac
GS
2664 DEBUG_P(PerlIO_printf(Perl_debug_log,
2665 "filter_read %d: from rsfp\n", idx));
f482118e 2666 if (correct_length) {
16d20bd9
AD
2667 /* Want a block */
2668 int len ;
f54cb97a 2669 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
2670
2671 /* ensure buf_sv is large enough */
f482118e
NC
2672 SvGROW(buf_sv, (STRLEN)(old_len + correct_length)) ;
2673 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
2674 correct_length)) <= 0) {
3280af22 2675 if (PerlIO_error(PL_rsfp))
37120919
AD
2676 return -1; /* error */
2677 else
2678 return 0 ; /* end of file */
2679 }
16d20bd9
AD
2680 SvCUR_set(buf_sv, old_len + len) ;
2681 } else {
2682 /* Want a line */
3280af22
NIS
2683 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
2684 if (PerlIO_error(PL_rsfp))
37120919
AD
2685 return -1; /* error */
2686 else
2687 return 0 ; /* end of file */
2688 }
16d20bd9
AD
2689 }
2690 return SvCUR(buf_sv);
2691 }
2692 /* Skip this filter slot if filter has been deleted */
1de9afcd 2693 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
2694 DEBUG_P(PerlIO_printf(Perl_debug_log,
2695 "filter_read %d: skipped (filter deleted)\n",
2696 idx));
f482118e 2697 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
2698 }
2699 /* Get function pointer hidden within datasv */
8141890a 2700 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
2701 DEBUG_P(PerlIO_printf(Perl_debug_log,
2702 "filter_read %d: via function %p (%s)\n",
ca0270c4 2703 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
2704 /* Call function. The function is expected to */
2705 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 2706 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 2707 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
2708}
2709
76e3520e 2710STATIC char *
cea2e8a9 2711S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
16d20bd9 2712{
97aff369 2713 dVAR;
c39cd008 2714#ifdef PERL_CR_FILTER
3280af22 2715 if (!PL_rsfp_filters) {
c39cd008 2716 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
2717 }
2718#endif
3280af22 2719 if (PL_rsfp_filters) {
55497cff 2720 if (!append)
2721 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
2722 if (FILTER_READ(0, sv, 0) > 0)
2723 return ( SvPVX(sv) ) ;
2724 else
bd61b366 2725 return NULL ;
16d20bd9 2726 }
9d116dd7 2727 else
fd049845 2728 return (sv_gets(sv, fp, append));
a0d0e21e
LW
2729}
2730
01ec43d0 2731STATIC HV *
7fc63493 2732S_find_in_my_stash(pTHX_ const char *pkgname, I32 len)
def3634b 2733{
97aff369 2734 dVAR;
def3634b
GS
2735 GV *gv;
2736
01ec43d0 2737 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
2738 return PL_curstash;
2739
2740 if (len > 2 &&
2741 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 2742 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
2743 {
2744 return GvHV(gv); /* Foo:: */
def3634b
GS
2745 }
2746
2747 /* use constant CLASS => 'MyClass' */
c35e046a
AL
2748 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
2749 if (gv && GvCV(gv)) {
2750 SV * const sv = cv_const_sv(GvCV(gv));
2751 if (sv)
83003860 2752 pkgname = SvPV_nolen_const(sv);
def3634b
GS
2753 }
2754
2755 return gv_stashpv(pkgname, FALSE);
2756}
a0d0e21e 2757
5db06880
NC
2758#ifdef PERL_MAD
2759 /*
2760 * Perl_madlex
2761 * The intent of this yylex wrapper is to minimize the changes to the
2762 * tokener when we aren't interested in collecting madprops. It remains
2763 * to be seen how successful this strategy will be...
2764 */
2765
2766int
2767Perl_madlex(pTHX)
2768{
2769 int optype;
2770 char *s = PL_bufptr;
2771
cd81e915
NC
2772 /* make sure PL_thiswhite is initialized */
2773 PL_thiswhite = 0;
2774 PL_thismad = 0;
5db06880 2775
cd81e915 2776 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
2777 if (PL_pending_ident)
2778 return S_pending_ident(aTHX);
2779
2780 /* previous token ate up our whitespace? */
cd81e915
NC
2781 if (!PL_lasttoke && PL_nextwhite) {
2782 PL_thiswhite = PL_nextwhite;
2783 PL_nextwhite = 0;
5db06880
NC
2784 }
2785
2786 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
2787 PL_realtokenstart = -1;
2788 PL_thistoken = 0;
5db06880
NC
2789 optype = yylex();
2790 s = PL_bufptr;
cd81e915 2791 assert(PL_curforce < 0);
5db06880 2792
cd81e915
NC
2793 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
2794 if (!PL_thistoken) {
2795 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
2796 PL_thistoken = newSVpvn("",0);
5db06880 2797 else {
c35e046a 2798 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 2799 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
2800 }
2801 }
cd81e915
NC
2802 if (PL_thismad) /* install head */
2803 CURMAD('X', PL_thistoken);
5db06880
NC
2804 }
2805
2806 /* last whitespace of a sublex? */
cd81e915
NC
2807 if (optype == ')' && PL_endwhite) {
2808 CURMAD('X', PL_endwhite);
5db06880
NC
2809 }
2810
cd81e915 2811 if (!PL_thismad) {
5db06880
NC
2812
2813 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
2814 if (!PL_thiswhite && !PL_endwhite && !optype) {
2815 sv_free(PL_thistoken);
2816 PL_thistoken = 0;
5db06880
NC
2817 return 0;
2818 }
2819
2820 /* put off final whitespace till peg */
2821 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
2822 PL_nextwhite = PL_thiswhite;
2823 PL_thiswhite = 0;
5db06880 2824 }
cd81e915
NC
2825 else if (PL_thisopen) {
2826 CURMAD('q', PL_thisopen);
2827 if (PL_thistoken)
2828 sv_free(PL_thistoken);
2829 PL_thistoken = 0;
5db06880
NC
2830 }
2831 else {
2832 /* Store actual token text as madprop X */
cd81e915 2833 CURMAD('X', PL_thistoken);
5db06880
NC
2834 }
2835
cd81e915 2836 if (PL_thiswhite) {
5db06880 2837 /* add preceding whitespace as madprop _ */
cd81e915 2838 CURMAD('_', PL_thiswhite);
5db06880
NC
2839 }
2840
cd81e915 2841 if (PL_thisstuff) {
5db06880 2842 /* add quoted material as madprop = */
cd81e915 2843 CURMAD('=', PL_thisstuff);
5db06880
NC
2844 }
2845
cd81e915 2846 if (PL_thisclose) {
5db06880 2847 /* add terminating quote as madprop Q */
cd81e915 2848 CURMAD('Q', PL_thisclose);
5db06880
NC
2849 }
2850 }
2851
2852 /* special processing based on optype */
2853
2854 switch (optype) {
2855
2856 /* opval doesn't need a TOKEN since it can already store mp */
2857 case WORD:
2858 case METHOD:
2859 case FUNCMETH:
2860 case THING:
2861 case PMFUNC:
2862 case PRIVATEREF:
2863 case FUNC0SUB:
2864 case UNIOPSUB:
2865 case LSTOPSUB:
2866 if (yylval.opval)
cd81e915
NC
2867 append_madprops(PL_thismad, yylval.opval, 0);
2868 PL_thismad = 0;
5db06880
NC
2869 return optype;
2870
2871 /* fake EOF */
2872 case 0:
2873 optype = PEG;
cd81e915
NC
2874 if (PL_endwhite) {
2875 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
2876 PL_endwhite = 0;
5db06880
NC
2877 }
2878 break;
2879
2880 case ']':
2881 case '}':
cd81e915 2882 if (PL_faketokens)
5db06880
NC
2883 break;
2884 /* remember any fake bracket that lexer is about to discard */
2885 if (PL_lex_brackets == 1 &&
2886 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
2887 {
2888 s = PL_bufptr;
2889 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2890 s++;
2891 if (*s == '}') {
cd81e915
NC
2892 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
2893 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2894 PL_thiswhite = 0;
5db06880
NC
2895 PL_bufptr = s - 1;
2896 break; /* don't bother looking for trailing comment */
2897 }
2898 else
2899 s = PL_bufptr;
2900 }
2901 if (optype == ']')
2902 break;
2903 /* FALLTHROUGH */
2904
2905 /* attach a trailing comment to its statement instead of next token */
2906 case ';':
cd81e915 2907 if (PL_faketokens)
5db06880
NC
2908 break;
2909 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
2910 s = PL_bufptr;
2911 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
2912 s++;
2913 if (*s == '\n' || *s == '#') {
2914 while (s < PL_bufend && *s != '\n')
2915 s++;
2916 if (s < PL_bufend)
2917 s++;
cd81e915
NC
2918 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
2919 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
2920 PL_thiswhite = 0;
5db06880
NC
2921 PL_bufptr = s;
2922 }
2923 }
2924 break;
2925
2926 /* pval */
2927 case LABEL:
2928 break;
2929
2930 /* ival */
2931 default:
2932 break;
2933
2934 }
2935
2936 /* Create new token struct. Note: opvals return early above. */
cd81e915
NC
2937 yylval.tkval = newTOKEN(optype, yylval, PL_thismad);
2938 PL_thismad = 0;
5db06880
NC
2939 return optype;
2940}
2941#endif
2942
468aa647 2943STATIC char *
cc6ed77d 2944S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 2945 dVAR;
468aa647
RGS
2946 if (PL_expect != XSTATE)
2947 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
2948 is_use ? "use" : "no"));
29595ff2 2949 s = SKIPSPACE1(s);
468aa647
RGS
2950 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
2951 s = force_version(s, TRUE);
29595ff2 2952 if (*s == ';' || (s = SKIPSPACE1(s), *s == ';')) {
cd81e915 2953 start_force(PL_curforce);
9ded7720 2954 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
2955 force_next(WORD);
2956 }
2957 else if (*s == 'v') {
2958 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2959 s = force_version(s, FALSE);
2960 }
2961 }
2962 else {
2963 s = force_word(s,WORD,FALSE,TRUE,FALSE);
2964 s = force_version(s, FALSE);
2965 }
2966 yylval.ival = is_use;
2967 return s;
2968}
748a9306 2969#ifdef DEBUGGING
27da23d5 2970 static const char* const exp_name[] =
09bef843 2971 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 2972 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 2973 };
748a9306 2974#endif
463ee0b2 2975
02aa26ce
NT
2976/*
2977 yylex
2978
2979 Works out what to call the token just pulled out of the input
2980 stream. The yacc parser takes care of taking the ops we return and
2981 stitching them into a tree.
2982
2983 Returns:
2984 PRIVATEREF
2985
2986 Structure:
2987 if read an identifier
2988 if we're in a my declaration
2989 croak if they tried to say my($foo::bar)
2990 build the ops for a my() declaration
2991 if it's an access to a my() variable
2992 are we in a sort block?
2993 croak if my($a); $a <=> $b
2994 build ops for access to a my() variable
2995 if in a dq string, and they've said @foo and we can't find @foo
2996 croak
2997 build ops for a bareword
2998 if we already built the token before, use it.
2999*/
3000
20141f0e 3001
dba4d153
JH
3002#ifdef __SC__
3003#pragma segment Perl_yylex
3004#endif
dba4d153 3005int
dba4d153 3006Perl_yylex(pTHX)
20141f0e 3007{
97aff369 3008 dVAR;
3afc138a 3009 register char *s = PL_bufptr;
378cc40b 3010 register char *d;
463ee0b2 3011 STRLEN len;
aa7440fb 3012 bool bof = FALSE;
a687059c 3013
bbf60fe6 3014 DEBUG_T( {
396482e1 3015 SV* tmp = newSVpvs("");
b6007c36
DM
3016 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3017 (IV)CopLINE(PL_curcop),
3018 lex_state_names[PL_lex_state],
3019 exp_name[PL_expect],
3020 pv_display(tmp, s, strlen(s), 0, 60));
3021 SvREFCNT_dec(tmp);
bbf60fe6 3022 } );
02aa26ce 3023 /* check if there's an identifier for us to look at */
ba979b31 3024 if (PL_pending_ident)
bbf60fe6 3025 return REPORT(S_pending_ident(aTHX));
bbce6d69 3026
02aa26ce
NT
3027 /* no identifier pending identification */
3028
3280af22 3029 switch (PL_lex_state) {
79072805
LW
3030#ifdef COMMENTARY
3031 case LEX_NORMAL: /* Some compilers will produce faster */
3032 case LEX_INTERPNORMAL: /* code if we comment these out. */
3033 break;
3034#endif
3035
09bef843 3036 /* when we've already built the next token, just pull it out of the queue */
79072805 3037 case LEX_KNOWNEXT:
5db06880
NC
3038#ifdef PERL_MAD
3039 PL_lasttoke--;
3040 yylval = PL_nexttoke[PL_lasttoke].next_val;
3041 if (PL_madskills) {
cd81e915 3042 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3043 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915
NC
3044 if (PL_thismad && PL_thismad->mad_key == '_') {
3045 PL_thiswhite = (SV*)PL_thismad->mad_val;
3046 PL_thismad->mad_val = 0;
3047 mad_free(PL_thismad);
3048 PL_thismad = 0;
5db06880
NC
3049 }
3050 }
3051 if (!PL_lasttoke) {
3052 PL_lex_state = PL_lex_defer;
3053 PL_expect = PL_lex_expect;
3054 PL_lex_defer = LEX_NORMAL;
3055 if (!PL_nexttoke[PL_lasttoke].next_type)
3056 return yylex();
3057 }
3058#else
3280af22 3059 PL_nexttoke--;
5db06880 3060 yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3061 if (!PL_nexttoke) {
3062 PL_lex_state = PL_lex_defer;
3063 PL_expect = PL_lex_expect;
3064 PL_lex_defer = LEX_NORMAL;
463ee0b2 3065 }
5db06880
NC
3066#endif
3067#ifdef PERL_MAD
3068 /* FIXME - can these be merged? */
3069 return(PL_nexttoke[PL_lasttoke].next_type);
3070#else
bbf60fe6 3071 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3072#endif
79072805 3073
02aa26ce 3074 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3075 when we get here, PL_bufptr is at the \
02aa26ce 3076 */
79072805
LW
3077 case LEX_INTERPCASEMOD:
3078#ifdef DEBUGGING
3280af22 3079 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3080 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3081#endif
02aa26ce 3082 /* handle \E or end of string */
3280af22 3083 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3084 /* if at a \E */
3280af22 3085 if (PL_lex_casemods) {
f54cb97a 3086 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3087 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3088
3792a11b
NC
3089 if (PL_bufptr != PL_bufend
3090 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3091 PL_bufptr += 2;
3092 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3093#ifdef PERL_MAD
3094 if (PL_madskills)
cd81e915 3095 PL_thistoken = newSVpvn("\\E",2);
5db06880 3096#endif
a0d0e21e 3097 }
bbf60fe6 3098 return REPORT(')');
79072805 3099 }
5db06880
NC
3100#ifdef PERL_MAD
3101 while (PL_bufptr != PL_bufend &&
3102 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915
NC
3103 if (!PL_thiswhite)
3104 PL_thiswhite = newSVpvn("",0);
3105 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3106 PL_bufptr += 2;
3107 }
3108#else
3280af22
NIS
3109 if (PL_bufptr != PL_bufend)
3110 PL_bufptr += 2;
5db06880 3111#endif
3280af22 3112 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3113 return yylex();
79072805
LW
3114 }
3115 else {
607df283 3116 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3117 "### Saw case modifier\n"); });
3280af22 3118 s = PL_bufptr + 1;
6e909404 3119 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3120#ifdef PERL_MAD
cd81e915
NC
3121 if (!PL_thiswhite)
3122 PL_thiswhite = newSVpvn("",0);
3123 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3124#endif
89122651 3125 PL_bufptr = s + 3;
6e909404
JH
3126 PL_lex_state = LEX_INTERPCONCAT;
3127 return yylex();
a0d0e21e 3128 }
6e909404 3129 else {
90771dc0 3130 I32 tmp;
5db06880
NC
3131 if (!PL_madskills) /* when just compiling don't need correct */
3132 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
3133 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 3134 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
3135 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
3136 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 3137 return REPORT(')');
6e909404
JH
3138 }
3139 if (PL_lex_casemods > 10)
3140 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
3141 PL_lex_casestack[PL_lex_casemods++] = *s;
3142 PL_lex_casestack[PL_lex_casemods] = '\0';
3143 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 3144 start_force(PL_curforce);
9ded7720 3145 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 3146 force_next('(');
cd81e915 3147 start_force(PL_curforce);
6e909404 3148 if (*s == 'l')
9ded7720 3149 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 3150 else if (*s == 'u')
9ded7720 3151 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 3152 else if (*s == 'L')
9ded7720 3153 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 3154 else if (*s == 'U')
9ded7720 3155 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 3156 else if (*s == 'Q')
9ded7720 3157 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
3158 else
3159 Perl_croak(aTHX_ "panic: yylex");
5db06880 3160 if (PL_madskills) {
d4c19fe8 3161 SV* const tmpsv = newSVpvn("",0);
5db06880
NC
3162 Perl_sv_catpvf(aTHX_ tmpsv, "\\%c", *s);
3163 curmad('_', tmpsv);
3164 }
6e909404 3165 PL_bufptr = s + 1;
a0d0e21e 3166 }
79072805 3167 force_next(FUNC);
3280af22
NIS
3168 if (PL_lex_starts) {
3169 s = PL_bufptr;
3170 PL_lex_starts = 0;
5db06880
NC
3171#ifdef PERL_MAD
3172 if (PL_madskills) {
cd81e915
NC
3173 if (PL_thistoken)
3174 sv_free(PL_thistoken);
3175 PL_thistoken = newSVpvn("",0);
5db06880
NC
3176 }
3177#endif
131b3ad0
DM
3178 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3179 if (PL_lex_casemods == 1 && PL_lex_inpat)
3180 OPERATOR(',');
3181 else
3182 Aop(OP_CONCAT);
79072805
LW
3183 }
3184 else
cea2e8a9 3185 return yylex();
79072805
LW
3186 }
3187
55497cff 3188 case LEX_INTERPPUSH:
bbf60fe6 3189 return REPORT(sublex_push());
55497cff 3190
79072805 3191 case LEX_INTERPSTART:
3280af22 3192 if (PL_bufptr == PL_bufend)
bbf60fe6 3193 return REPORT(sublex_done());
607df283 3194 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3195 "### Interpolated variable\n"); });
3280af22
NIS
3196 PL_expect = XTERM;
3197 PL_lex_dojoin = (*PL_bufptr == '@');
3198 PL_lex_state = LEX_INTERPNORMAL;
3199 if (PL_lex_dojoin) {
cd81e915 3200 start_force(PL_curforce);
9ded7720 3201 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3202 force_next(',');
cd81e915 3203 start_force(PL_curforce);
a0d0e21e 3204 force_ident("\"", '$');
cd81e915 3205 start_force(PL_curforce);
9ded7720 3206 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3207 force_next('$');
cd81e915 3208 start_force(PL_curforce);
9ded7720 3209 NEXTVAL_NEXTTOKE.ival = 0;
79072805 3210 force_next('(');
cd81e915 3211 start_force(PL_curforce);
9ded7720 3212 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
3213 force_next(FUNC);
3214 }
3280af22
NIS
3215 if (PL_lex_starts++) {
3216 s = PL_bufptr;
5db06880
NC
3217#ifdef PERL_MAD
3218 if (PL_madskills) {
cd81e915
NC
3219 if (PL_thistoken)
3220 sv_free(PL_thistoken);
3221 PL_thistoken = newSVpvn("",0);
5db06880
NC
3222 }
3223#endif
131b3ad0
DM
3224 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3225 if (!PL_lex_casemods && PL_lex_inpat)
3226 OPERATOR(',');
3227 else
3228 Aop(OP_CONCAT);
79072805 3229 }
cea2e8a9 3230 return yylex();
79072805
LW
3231
3232 case LEX_INTERPENDMAYBE:
3280af22
NIS
3233 if (intuit_more(PL_bufptr)) {
3234 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
3235 break;
3236 }
3237 /* FALL THROUGH */
3238
3239 case LEX_INTERPEND:
3280af22
NIS
3240 if (PL_lex_dojoin) {
3241 PL_lex_dojoin = FALSE;
3242 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3243#ifdef PERL_MAD
3244 if (PL_madskills) {
cd81e915
NC
3245 if (PL_thistoken)
3246 sv_free(PL_thistoken);
3247 PL_thistoken = newSVpvn("",0);
5db06880
NC
3248 }
3249#endif
bbf60fe6 3250 return REPORT(')');
79072805 3251 }
43a16006 3252 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 3253 && SvEVALED(PL_lex_repl))
43a16006 3254 {
e9fa98b2 3255 if (PL_bufptr != PL_bufend)
cea2e8a9 3256 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 3257 PL_lex_repl = NULL;
e9fa98b2 3258 }
79072805
LW
3259 /* FALLTHROUGH */
3260 case LEX_INTERPCONCAT:
3261#ifdef DEBUGGING
3280af22 3262 if (PL_lex_brackets)
cea2e8a9 3263 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 3264#endif
3280af22 3265 if (PL_bufptr == PL_bufend)
bbf60fe6 3266 return REPORT(sublex_done());
79072805 3267
3280af22
NIS
3268 if (SvIVX(PL_linestr) == '\'') {
3269 SV *sv = newSVsv(PL_linestr);
3270 if (!PL_lex_inpat)
76e3520e 3271 sv = tokeq(sv);
3280af22 3272 else if ( PL_hints & HINT_NEW_RE )
b3ac6de7 3273 sv = new_constant(NULL, 0, "qr", sv, sv, "q");
79072805 3274 yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 3275 s = PL_bufend;
79072805
LW
3276 }
3277 else {
3280af22 3278 s = scan_const(PL_bufptr);
79072805 3279 if (*s == '\\')
3280af22 3280 PL_lex_state = LEX_INTERPCASEMOD;
79072805 3281 else
3280af22 3282 PL_lex_state = LEX_INTERPSTART;
79072805
LW
3283 }
3284
3280af22 3285 if (s != PL_bufptr) {
cd81e915 3286 start_force(PL_curforce);
5db06880
NC
3287 if (PL_madskills) {
3288 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
3289 }
9ded7720 3290 NEXTVAL_NEXTTOKE = yylval;
3280af22 3291 PL_expect = XTERM;
79072805 3292 force_next(THING);
131b3ad0 3293 if (PL_lex_starts++) {
5db06880
NC
3294#ifdef PERL_MAD
3295 if (PL_madskills) {
cd81e915
NC
3296 if (PL_thistoken)
3297 sv_free(PL_thistoken);
3298 PL_thistoken = newSVpvn("",0);
5db06880
NC
3299 }
3300#endif
131b3ad0
DM
3301 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
3302 if (!PL_lex_casemods && PL_lex_inpat)
3303 OPERATOR(',');
3304 else
3305 Aop(OP_CONCAT);
3306 }
79072805 3307 else {
3280af22 3308 PL_bufptr = s;
cea2e8a9 3309 return yylex();
79072805
LW
3310 }
3311 }
3312
cea2e8a9 3313 return yylex();
a0d0e21e 3314 case LEX_FORMLINE:
3280af22
NIS
3315 PL_lex_state = LEX_NORMAL;
3316 s = scan_formline(PL_bufptr);
3317 if (!PL_lex_formbrack)
a0d0e21e
LW
3318 goto rightbracket;
3319 OPERATOR(';');
79072805
LW
3320 }
3321
3280af22
NIS
3322 s = PL_bufptr;
3323 PL_oldoldbufptr = PL_oldbufptr;
3324 PL_oldbufptr = s;
463ee0b2
LW
3325
3326 retry:
5db06880 3327#ifdef PERL_MAD
cd81e915
NC
3328 if (PL_thistoken) {
3329 sv_free(PL_thistoken);
3330 PL_thistoken = 0;
5db06880 3331 }
cd81e915 3332 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 3333#endif
378cc40b
LW
3334 switch (*s) {
3335 default:
7e2040f0 3336 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 3337 goto keylookup;
cea2e8a9 3338 Perl_croak(aTHX_ "Unrecognized character \\x%02X", *s & 255);
e929a76b
LW
3339 case 4:
3340 case 26:
3341 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 3342 case 0:
5db06880
NC
3343#ifdef PERL_MAD
3344 if (PL_madskills)
cd81e915 3345 PL_faketokens = 0;
5db06880 3346#endif
3280af22
NIS
3347 if (!PL_rsfp) {
3348 PL_last_uni = 0;
3349 PL_last_lop = 0;
c5ee2135 3350 if (PL_lex_brackets) {
0bd48802
AL
3351 yyerror(PL_lex_formbrack
3352 ? "Format not terminated"
3353 : "Missing right curly or square bracket");
c5ee2135 3354 }
4e553d73 3355 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 3356 "### Tokener got EOF\n");
5f80b19c 3357 } );
79072805 3358 TOKEN(0);
463ee0b2 3359 }
3280af22 3360 if (s++ < PL_bufend)
a687059c 3361 goto retry; /* ignore stray nulls */
3280af22
NIS
3362 PL_last_uni = 0;
3363 PL_last_lop = 0;
3364 if (!PL_in_eval && !PL_preambled) {
3365 PL_preambled = TRUE;
5db06880
NC
3366#ifdef PERL_MAD
3367 if (PL_madskills)
cd81e915 3368 PL_faketokens = 1;
5db06880 3369#endif
3280af22
NIS
3370 sv_setpv(PL_linestr,incl_perldb());
3371 if (SvCUR(PL_linestr))
396482e1 3372 sv_catpvs(PL_linestr,";");
3280af22
NIS
3373 if (PL_preambleav){
3374 while(AvFILLp(PL_preambleav) >= 0) {
3375 SV *tmpsv = av_shift(PL_preambleav);
3376 sv_catsv(PL_linestr, tmpsv);
396482e1 3377 sv_catpvs(PL_linestr, ";");
91b7def8 3378 sv_free(tmpsv);
3379 }
3280af22
NIS
3380 sv_free((SV*)PL_preambleav);
3381 PL_preambleav = NULL;
91b7def8 3382 }
3280af22 3383 if (PL_minus_n || PL_minus_p) {
396482e1 3384 sv_catpvs(PL_linestr, "LINE: while (<>) {");
3280af22 3385 if (PL_minus_l)
396482e1 3386 sv_catpvs(PL_linestr,"chomp;");
3280af22 3387 if (PL_minus_a) {
3280af22 3388 if (PL_minus_F) {
3792a11b
NC
3389 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
3390 || *PL_splitstr == '"')
3280af22 3391 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 3392 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 3393 else {
c8ef6a4b
NC
3394 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
3395 bytes can be used as quoting characters. :-) */
dd374669 3396 const char *splits = PL_splitstr;
91d456ae 3397 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
3398 do {
3399 /* Need to \ \s */
dd374669
AL
3400 if (*splits == '\\')
3401 sv_catpvn(PL_linestr, splits, 1);
3402 sv_catpvn(PL_linestr, splits, 1);
3403 } while (*splits++);
48c4c863
NC
3404 /* This loop will embed the trailing NUL of
3405 PL_linestr as the last thing it does before
3406 terminating. */
396482e1 3407 sv_catpvs(PL_linestr, ");");
54310121 3408 }
2304df62
AD
3409 }
3410 else
396482e1 3411 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 3412 }
79072805 3413 }
bc9b29db 3414 if (PL_minus_E)
396482e1
GA
3415 sv_catpvs(PL_linestr,"use feature ':5.10';");
3416 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
3417 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3418 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3419 PL_last_lop = PL_last_uni = NULL;
3280af22 3420 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3421 SV * const sv = newSV(0);
a0d0e21e
LW
3422
3423 sv_upgrade(sv, SVt_PVMG);
3280af22 3424 sv_setsv(sv,PL_linestr);
0ac0412a 3425 (void)SvIOK_on(sv);
45977657 3426 SvIV_set(sv, 0);
36c7798d 3427 av_store(CopFILEAVx(PL_curcop),(I32)CopLINE(PL_curcop),sv);
a0d0e21e 3428 }
79072805 3429 goto retry;
a687059c 3430 }
e929a76b 3431 do {
aa7440fb 3432 bof = PL_rsfp ? TRUE : FALSE;
bd61b366 3433 if ((s = filter_gets(PL_linestr, PL_rsfp, 0)) == NULL) {
7e28d3af 3434 fake_eof:
5db06880 3435#ifdef PERL_MAD
cd81e915 3436 PL_realtokenstart = -1;
5db06880 3437#endif
7e28d3af
JH
3438 if (PL_rsfp) {
3439 if (PL_preprocess && !PL_in_eval)
3440 (void)PerlProc_pclose(PL_rsfp);
3441 else if ((PerlIO *)PL_rsfp == PerlIO_stdin())
3442 PerlIO_clearerr(PL_rsfp);
3443 else
3444 (void)PerlIO_close(PL_rsfp);
4608196e 3445 PL_rsfp = NULL;
7e28d3af
JH
3446 PL_doextract = FALSE;
3447 }
3448 if (!PL_in_eval && (PL_minus_n || PL_minus_p)) {
5db06880
NC
3449#ifdef PERL_MAD
3450 if (PL_madskills)
cd81e915 3451 PL_faketokens = 1;
5db06880 3452#endif
a23c4656
NC
3453 sv_setpv(PL_linestr,PL_minus_p
3454 ? ";}continue{print;}" : ";}");
7e28d3af
JH
3455 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3456 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3457 PL_last_lop = PL_last_uni = NULL;
7e28d3af
JH
3458 PL_minus_n = PL_minus_p = 0;
3459 goto retry;
3460 }
3461 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
bd61b366 3462 PL_last_lop = PL_last_uni = NULL;
c69006e4 3463 sv_setpvn(PL_linestr,"",0);
7e28d3af
JH
3464 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
3465 }
7aa207d6
JH
3466 /* If it looks like the start of a BOM or raw UTF-16,
3467 * check if it in fact is. */
3468 else if (bof &&
3469 (*s == 0 ||
3470 *(U8*)s == 0xEF ||
3471 *(U8*)s >= 0xFE ||
3472 s[1] == 0)) {
226017aa 3473#ifdef PERLIO_IS_STDIO
e3f494f1
JH
3474# ifdef __GNU_LIBRARY__
3475# if __GNU_LIBRARY__ == 1 /* Linux glibc5 */
226017aa
DD
3476# define FTELL_FOR_PIPE_IS_BROKEN
3477# endif
e3f494f1
JH
3478# else
3479# ifdef __GLIBC__
3480# if __GLIBC__ == 1 /* maybe some glibc5 release had it like this? */
3481# define FTELL_FOR_PIPE_IS_BROKEN
3482# endif
3483# endif
226017aa
DD
3484# endif
3485#endif
3486#ifdef FTELL_FOR_PIPE_IS_BROKEN
3487 /* This loses the possibility to detect the bof
3488 * situation on perl -P when the libc5 is being used.
3489 * Workaround? Maybe attach some extra state to PL_rsfp?
3490 */
3491 if (!PL_preprocess)
7e28d3af 3492 bof = PerlIO_tell(PL_rsfp) == SvCUR(PL_linestr);
226017aa 3493#else
eb160463 3494 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
226017aa 3495#endif
7e28d3af 3496 if (bof) {
3280af22 3497 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 3498 s = swallow_bom((U8*)s);
e929a76b 3499 }
378cc40b 3500 }
3280af22 3501 if (PL_doextract) {
a0d0e21e 3502 /* Incest with pod. */
5db06880
NC
3503#ifdef PERL_MAD
3504 if (PL_madskills)
cd81e915 3505 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 3506#endif
a0d0e21e 3507 if (*s == '=' && strnEQ(s, "=cut", 4)) {
c69006e4 3508 sv_setpvn(PL_linestr, "", 0);
3280af22
NIS
3509 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
3510 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 3511 PL_last_lop = PL_last_uni = NULL;
3280af22 3512 PL_doextract = FALSE;
a0d0e21e 3513 }
4e553d73 3514 }
463ee0b2 3515 incline(s);
3280af22
NIS
3516 } while (PL_doextract);
3517 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3518 if (PERLDB_LINE && PL_curstash != PL_debstash) {
561b68a9 3519 SV * const sv = newSV(0);
a687059c 3520
93a17b20 3521 sv_upgrade(sv, SVt_PVMG);
3280af22 3522 sv_setsv(sv,PL_linestr);
0ac0412a