This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Allow "{sub f}" to compile
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
378cc40b 42
eb0d8d16
NC
43#define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45
6154021b 46#define pl_yylval (PL_parser->yylval)
d3b6f988 47
acdf0a21
DM
48/* YYINITDEPTH -- initial size of the parser's stacks. */
49#define YYINITDEPTH 200
50
199e78b7
DM
51/* XXX temporary backwards compatibility */
52#define PL_lex_brackets (PL_parser->lex_brackets)
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
9059aa12
LW
129#define XFAKEBRACK 128
130#define XENUMMASK 127
131
39e02b42
JH
132#ifdef USE_UTF8_SCRIPTS
133# define UTF (!IN_BYTES)
2b9d42f0 134#else
746b446a 135# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 136#endif
a0ed51b3 137
b1fc3636
CJ
138/* The maximum number of characters preceding the unrecognized one to display */
139#define UNRECOGNIZED_PRECEDE_COUNT 10
140
61f0cdd9 141/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
142 * 1999-02-27 mjd-perl-patch@plover.com */
143#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
144
bf4acbe4 145#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 146
ffb4593c
NT
147/* LEX_* are values for PL_lex_state, the state of the lexer.
148 * They are arranged oddly so that the guard on the switch statement
79072805
LW
149 * can get by with a single comparison (if the compiler is smart enough).
150 */
151
fb73857a
PP
152/* #define LEX_NOTPARSING 11 is done in perl.h. */
153
b6007c36
DM
154#define LEX_NORMAL 10 /* normal code (ie not within "...") */
155#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
156#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
157#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
158#define LEX_INTERPSTART 6 /* expecting the start of a $var */
159
160 /* at end of code, eg "$x" followed by: */
161#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
162#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
163
164#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
165 string or after \E, $foo, etc */
166#define LEX_INTERPCONST 2 /* NOT USED */
167#define LEX_FORMLINE 1 /* expecting a format line */
168#define LEX_KNOWNEXT 0 /* next token known; just return it */
169
79072805 170
bbf60fe6 171#ifdef DEBUGGING
27da23d5 172static const char* const lex_state_names[] = {
bbf60fe6
DM
173 "KNOWNEXT",
174 "FORMLINE",
175 "INTERPCONST",
176 "INTERPCONCAT",
177 "INTERPENDMAYBE",
178 "INTERPEND",
179 "INTERPSTART",
180 "INTERPPUSH",
181 "INTERPCASEMOD",
182 "INTERPNORMAL",
183 "NORMAL"
184};
185#endif
186
79072805
LW
187#ifdef ff_next
188#undef ff_next
d48672a2
LW
189#endif
190
79072805 191#include "keywords.h"
fe14fcc3 192
ffb4593c
NT
193/* CLINE is a macro that ensures PL_copline has a sane value */
194
ae986130
LW
195#ifdef CLINE
196#undef CLINE
197#endif
57843af0 198#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 199
5db06880 200#ifdef PERL_MAD
29595ff2
NC
201# define SKIPSPACE0(s) skipspace0(s)
202# define SKIPSPACE1(s) skipspace1(s)
203# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
204# define PEEKSPACE(s) skipspace2(s,0)
205#else
206# define SKIPSPACE0(s) skipspace(s)
207# define SKIPSPACE1(s) skipspace(s)
208# define SKIPSPACE2(s,tsv) skipspace(s)
209# define PEEKSPACE(s) skipspace(s)
210#endif
211
ffb4593c
NT
212/*
213 * Convenience functions to return different tokens and prime the
9cbb5ea2 214 * lexer for the next token. They all take an argument.
ffb4593c
NT
215 *
216 * TOKEN : generic token (used for '(', DOLSHARP, etc)
217 * OPERATOR : generic operator
218 * AOPERATOR : assignment operator
219 * PREBLOCK : beginning the block after an if, while, foreach, ...
220 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
221 * PREREF : *EXPR where EXPR is not a simple identifier
222 * TERM : expression term
223 * LOOPX : loop exiting command (goto, last, dump, etc)
224 * FTST : file test operator
225 * FUN0 : zero-argument function
2d2e263d 226 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
227 * BOop : bitwise or or xor
228 * BAop : bitwise and
229 * SHop : shift operator
230 * PWop : power operator
9cbb5ea2 231 * PMop : pattern-matching operator
ffb4593c
NT
232 * Aop : addition-level operator
233 * Mop : multiplication-level operator
234 * Eop : equality-testing operator
e5edeb50 235 * Rop : relational operator <= != gt
ffb4593c
NT
236 *
237 * Also see LOP and lop() below.
238 */
239
998054bd 240#ifdef DEBUGGING /* Serve -DT. */
704d4215 241# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 242#else
bbf60fe6 243# define REPORT(retval) (retval)
998054bd
SC
244#endif
245
bbf60fe6
DM
246#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
247#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
248#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
249#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
250#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
251#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
252#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
253#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
254#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
255#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
376fcdbf 272#define UNI2(f,x) { \
6154021b 273 pl_yylval.ival = f; \
376fcdbf
AL
274 PL_expect = x; \
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
6f33ba73
RGS
283#define UNI(f) UNI2(f,XTERM)
284#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 285
376fcdbf 286#define UNIBRACK(f) { \
6154021b 287 pl_yylval.ival = f; \
376fcdbf
AL
288 PL_bufptr = s; \
289 PL_last_uni = PL_oldbufptr; \
290 if (*s == '(') \
291 return REPORT( (int)FUNC1 ); \
29595ff2 292 s = PEEKSPACE(s); \
376fcdbf
AL
293 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
294 }
79072805 295
9f68db38 296/* grandfather return to old style */
6154021b 297#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 298
8fa7f367
JH
299#ifdef DEBUGGING
300
6154021b 301/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
302enum token_type {
303 TOKENTYPE_NONE,
304 TOKENTYPE_IVAL,
6154021b 305 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
306 TOKENTYPE_PVAL,
307 TOKENTYPE_OPVAL,
308 TOKENTYPE_GVVAL
309};
310
6d4a66ac
NC
311static struct debug_tokens {
312 const int token;
313 enum token_type type;
314 const char *name;
315} const debug_tokens[] =
9041c2e3 316{
bbf60fe6
DM
317 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
318 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
319 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
320 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
321 { ARROW, TOKENTYPE_NONE, "ARROW" },
322 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
323 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
324 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
325 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
326 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 327 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
328 { DO, TOKENTYPE_NONE, "DO" },
329 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
330 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
331 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
332 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
333 { ELSE, TOKENTYPE_NONE, "ELSE" },
334 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
335 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
336 { FOR, TOKENTYPE_IVAL, "FOR" },
337 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
338 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
339 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
340 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
341 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
342 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 343 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
344 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
345 { IF, TOKENTYPE_IVAL, "IF" },
346 { LABEL, TOKENTYPE_PVAL, "LABEL" },
347 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
348 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
349 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
350 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
351 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
352 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
353 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
354 { MY, TOKENTYPE_IVAL, "MY" },
355 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
356 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
357 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
358 { OROP, TOKENTYPE_IVAL, "OROP" },
359 { OROR, TOKENTYPE_NONE, "OROR" },
360 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
361 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
362 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
363 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
364 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
365 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
366 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
367 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
368 { PREINC, TOKENTYPE_NONE, "PREINC" },
369 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
370 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
371 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
372 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
373 { SUB, TOKENTYPE_NONE, "SUB" },
374 { THING, TOKENTYPE_OPVAL, "THING" },
375 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
376 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
377 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
378 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
379 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
380 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 381 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
382 { WHILE, TOKENTYPE_IVAL, "WHILE" },
383 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 384 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 385 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
386};
387
6154021b 388/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 389
bbf60fe6 390STATIC int
704d4215 391S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 392{
97aff369 393 dVAR;
7918f24d
NC
394
395 PERL_ARGS_ASSERT_TOKEREPORT;
396
bbf60fe6 397 if (DEBUG_T_TEST) {
bd61b366 398 const char *name = NULL;
bbf60fe6 399 enum token_type type = TOKENTYPE_NONE;
f54cb97a 400 const struct debug_tokens *p;
396482e1 401 SV* const report = newSVpvs("<== ");
bbf60fe6 402
f54cb97a 403 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
404 if (p->token == (int)rv) {
405 name = p->name;
406 type = p->type;
407 break;
408 }
409 }
410 if (name)
54667de8 411 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
412 else if ((char)rv > ' ' && (char)rv < '~')
413 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
414 else if (!rv)
396482e1 415 sv_catpvs(report, "EOF");
bbf60fe6
DM
416 else
417 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
418 switch (type) {
419 case TOKENTYPE_NONE:
420 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
421 break;
422 case TOKENTYPE_IVAL:
704d4215 423 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
424 break;
425 case TOKENTYPE_OPNUM:
426 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 427 PL_op_name[lvalp->ival]);
bbf60fe6
DM
428 break;
429 case TOKENTYPE_PVAL:
704d4215 430 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
431 break;
432 case TOKENTYPE_OPVAL:
704d4215 433 if (lvalp->opval) {
401441c0 434 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
435 PL_op_name[lvalp->opval->op_type]);
436 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 437 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 438 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
439 }
440
441 }
401441c0 442 else
396482e1 443 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
444 break;
445 }
b6007c36 446 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
447 };
448 return (int)rv;
998054bd
SC
449}
450
b6007c36
DM
451
452/* print the buffer with suitable escapes */
453
454STATIC void
15f169a1 455S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 456{
396482e1 457 SV* const tmp = newSVpvs("");
7918f24d
NC
458
459 PERL_ARGS_ASSERT_PRINTBUF;
460
b6007c36
DM
461 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
462 SvREFCNT_dec(tmp);
463}
464
8fa7f367
JH
465#endif
466
8290c323
NC
467static int
468S_deprecate_commaless_var_list(pTHX) {
469 PL_expect = XTERM;
470 deprecate("comma-less variable list");
471 return REPORT(','); /* grandfather non-comma-format format */
472}
473
ffb4593c
NT
474/*
475 * S_ao
476 *
c963b151
BD
477 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
478 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
479 */
480
76e3520e 481STATIC int
cea2e8a9 482S_ao(pTHX_ int toketype)
a0d0e21e 483{
97aff369 484 dVAR;
3280af22
NIS
485 if (*PL_bufptr == '=') {
486 PL_bufptr++;
a0d0e21e 487 if (toketype == ANDAND)
6154021b 488 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 489 else if (toketype == OROR)
6154021b 490 pl_yylval.ival = OP_ORASSIGN;
c963b151 491 else if (toketype == DORDOR)
6154021b 492 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
493 toketype = ASSIGNOP;
494 }
495 return toketype;
496}
497
ffb4593c
NT
498/*
499 * S_no_op
500 * When Perl expects an operator and finds something else, no_op
501 * prints the warning. It always prints "<something> found where
502 * operator expected. It prints "Missing semicolon on previous line?"
503 * if the surprise occurs at the start of the line. "do you need to
504 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
505 * where the compiler doesn't know if foo is a method call or a function.
506 * It prints "Missing operator before end of line" if there's nothing
507 * after the missing operator, or "... before <...>" if there is something
508 * after the missing operator.
509 */
510
76e3520e 511STATIC void
15f169a1 512S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 513{
97aff369 514 dVAR;
9d4ba2ae
AL
515 char * const oldbp = PL_bufptr;
516 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 517
7918f24d
NC
518 PERL_ARGS_ASSERT_NO_OP;
519
1189a94a
GS
520 if (!s)
521 s = oldbp;
07c798fb 522 else
1189a94a 523 PL_bufptr = s;
cea2e8a9 524 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
525 if (ckWARN_d(WARN_SYNTAX)) {
526 if (is_first)
527 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
528 "\t(Missing semicolon on previous line?)\n");
529 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 530 const char *t;
c35e046a
AL
531 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
532 NOOP;
56da5a46
RGS
533 if (t < PL_bufptr && isSPACE(*t))
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Do you need to predeclare %.*s?)\n",
551405c4 536 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
537 }
538 else {
539 assert(s >= oldbp);
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 541 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 542 }
07c798fb 543 }
3280af22 544 PL_bufptr = oldbp;
8990e307
LW
545}
546
ffb4593c
NT
547/*
548 * S_missingterm
549 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 550 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
551 * If we're in a delimited string and the delimiter is a control
552 * character, it's reformatted into a two-char sequence like ^C.
553 * This is fatal.
554 */
555
76e3520e 556STATIC void
cea2e8a9 557S_missingterm(pTHX_ char *s)
8990e307 558{
97aff369 559 dVAR;
8990e307
LW
560 char tmpbuf[3];
561 char q;
562 if (s) {
9d4ba2ae 563 char * const nl = strrchr(s,'\n');
d2719217 564 if (nl)
8990e307
LW
565 *nl = '\0';
566 }
463559e7 567 else if (isCNTRL(PL_multi_close)) {
8990e307 568 *tmpbuf = '^';
585ec06d 569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
570 tmpbuf[2] = '\0';
571 s = tmpbuf;
572 }
573 else {
eb160463 574 *tmpbuf = (char)PL_multi_close;
8990e307
LW
575 tmpbuf[1] = '\0';
576 s = tmpbuf;
577 }
578 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 580}
79072805 581
ef89dcc3 582#define FEATURE_IS_ENABLED(name) \
0d863452 583 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 584 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 585/* The longest string we pass in. */
1863b879 586#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 587
0d863452
RH
588/*
589 * S_feature_is_enabled
590 * Check whether the named feature is enabled.
591 */
592STATIC bool
15f169a1 593S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 594{
97aff369 595 dVAR;
0d863452 596 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 597 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
598
599 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
600
4a731d7b
NC
601 assert(namelen <= MAX_FEATURE_LEN);
602 memcpy(&he_name[8], name, namelen);
d4c19fe8 603
7b9ef140 604 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
605}
606
ffb4593c 607/*
9cbb5ea2
GS
608 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
609 * utf16-to-utf8-reversed.
ffb4593c
NT
610 */
611
c39cd008
GS
612#ifdef PERL_CR_FILTER
613static void
614strip_return(SV *sv)
615{
95a20fc0 616 register const char *s = SvPVX_const(sv);
9d4ba2ae 617 register const char * const e = s + SvCUR(sv);
7918f24d
NC
618
619 PERL_ARGS_ASSERT_STRIP_RETURN;
620
c39cd008
GS
621 /* outer loop optimized to do nothing if there are no CR-LFs */
622 while (s < e) {
623 if (*s++ == '\r' && *s == '\n') {
624 /* hit a CR-LF, need to copy the rest */
625 register char *d = s - 1;
626 *d++ = *s++;
627 while (s < e) {
628 if (*s == '\r' && s[1] == '\n')
629 s++;
630 *d++ = *s++;
631 }
632 SvCUR(sv) -= s - d;
633 return;
634 }
635 }
636}
a868473f 637
76e3520e 638STATIC I32
c39cd008 639S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 640{
f54cb97a 641 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
642 if (count > 0 && !maxlen)
643 strip_return(sv);
644 return count;
a868473f
NIS
645}
646#endif
647
199e78b7
DM
648
649
ffb4593c
NT
650/*
651 * Perl_lex_start
5486870f 652 *
e3abe207 653 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
654 *
655 * rsfp is the opened file handle to read from (if any),
656 *
657 * line holds any initial content already read from the file (or in
658 * the case of no file, such as an eval, the whole contents);
659 *
660 * new_filter indicates that this is a new file and it shouldn't inherit
661 * the filters from the current parser (ie require).
ffb4593c
NT
662 */
663
a0d0e21e 664void
5486870f 665Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 666{
97aff369 667 dVAR;
6ef55633 668 const char *s = NULL;
8990e307 669 STRLEN len;
5486870f 670 yy_parser *parser, *oparser;
acdf0a21
DM
671
672 /* create and initialise a parser */
673
199e78b7 674 Newxz(parser, 1, yy_parser);
5486870f 675 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
676 PL_parser = parser;
677
678 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
679 parser->ps = parser->stack;
680 parser->stack_size = YYINITDEPTH;
681
682 parser->stack->state = 0;
683 parser->yyerrstatus = 0;
684 parser->yychar = YYEMPTY; /* Cause a token to be read. */
685
e3abe207
DM
686 /* on scope exit, free this parser and restore any outer one */
687 SAVEPARSER(parser);
7c4baf47 688 parser->saved_curcop = PL_curcop;
e3abe207 689
acdf0a21 690 /* initialise lexer state */
8990e307 691
fb205e7a
DM
692#ifdef PERL_MAD
693 parser->curforce = -1;
694#else
695 parser->nexttoke = 0;
696#endif
ca4cfd28 697 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 698 parser->copline = NOLINE;
5afb0a62 699 parser->lex_state = LEX_NORMAL;
c2598295 700 parser->expect = XSTATE;
2f9285f8 701 parser->rsfp = rsfp;
56b27c9a 702 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 703 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 704
199e78b7
DM
705 Newx(parser->lex_brackstack, 120, char);
706 Newx(parser->lex_casestack, 12, char);
707 *parser->lex_casestack = '\0';
02b34bbe 708
10efb74f
NC
709 if (line) {
710 s = SvPV_const(line, len);
711 } else {
712 len = 0;
713 }
bdc0bf6f 714
10efb74f 715 if (!len) {
bdc0bf6f 716 parser->linestr = newSVpvs("\n;");
10efb74f 717 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 718 parser->linestr = newSVsv(line);
10efb74f 719 if (s[len-1] != ';')
bdc0bf6f 720 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
721 } else {
722 SvTEMP_off(line);
723 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 724 parser->linestr = line;
8990e307 725 }
f06b5848
DM
726 parser->oldoldbufptr =
727 parser->oldbufptr =
728 parser->bufptr =
729 parser->linestart = SvPVX(parser->linestr);
730 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
731 parser->last_lop = parser->last_uni = NULL;
79072805 732}
a687059c 733
e3abe207
DM
734
735/* delete a parser object */
736
737void
738Perl_parser_free(pTHX_ const yy_parser *parser)
739{
7918f24d
NC
740 PERL_ARGS_ASSERT_PARSER_FREE;
741
7c4baf47 742 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
743 SvREFCNT_dec(parser->linestr);
744
2f9285f8
DM
745 if (parser->rsfp == PerlIO_stdin())
746 PerlIO_clearerr(parser->rsfp);
799361c3
SH
747 else if (parser->rsfp && (!parser->old_parser ||
748 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 749 PerlIO_close(parser->rsfp);
5486870f 750 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 751
e3abe207
DM
752 Safefree(parser->stack);
753 Safefree(parser->lex_brackstack);
754 Safefree(parser->lex_casestack);
755 PL_parser = parser->old_parser;
756 Safefree(parser);
757}
758
759
ffb4593c
NT
760/*
761 * Perl_lex_end
9cbb5ea2
GS
762 * Finalizer for lexing operations. Must be called when the parser is
763 * done with the lexer.
ffb4593c
NT
764 */
765
463ee0b2 766void
864dbfa3 767Perl_lex_end(pTHX)
463ee0b2 768{
97aff369 769 dVAR;
3280af22 770 PL_doextract = FALSE;
463ee0b2
LW
771}
772
ffb4593c 773/*
f0e67a1d
Z
774=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
775
776Buffer scalar containing the chunk currently under consideration of the
777text currently being lexed. This is always a plain string scalar (for
778which C<SvPOK> is true). It is not intended to be used as a scalar by
779normal scalar means; instead refer to the buffer directly by the pointer
780variables described below.
781
782The lexer maintains various C<char*> pointers to things in the
783C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
784reallocated, all of these pointers must be updated. Don't attempt to
785do this manually, but rather use L</lex_grow_linestr> if you need to
786reallocate the buffer.
787
788The content of the text chunk in the buffer is commonly exactly one
789complete line of input, up to and including a newline terminator,
790but there are situations where it is otherwise. The octets of the
791buffer may be intended to be interpreted as either UTF-8 or Latin-1.
792The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
793flag on this scalar, which may disagree with it.
794
795For direct examination of the buffer, the variable
796L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
797lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
798of these pointers is usually preferable to examination of the scalar
799through normal scalar means.
800
801=for apidoc AmxU|char *|PL_parser-E<gt>bufend
802
803Direct pointer to the end of the chunk of text currently being lexed, the
804end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
805+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
806always located at the end of the buffer, and does not count as part of
807the buffer's contents.
808
809=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
810
811Points to the current position of lexing inside the lexer buffer.
812Characters around this point may be freely examined, within
813the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
814L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
815interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
816
817Lexing code (whether in the Perl core or not) moves this pointer past
818the characters that it consumes. It is also expected to perform some
819bookkeeping whenever a newline character is consumed. This movement
820can be more conveniently performed by the function L</lex_read_to>,
821which handles newlines appropriately.
822
823Interpretation of the buffer's octets can be abstracted out by
824using the slightly higher-level functions L</lex_peek_unichar> and
825L</lex_read_unichar>.
826
827=for apidoc AmxU|char *|PL_parser-E<gt>linestart
828
829Points to the start of the current line inside the lexer buffer.
830This is useful for indicating at which column an error occurred, and
831not much else. This must be updated by any lexing code that consumes
832a newline; the function L</lex_read_to> handles this detail.
833
834=cut
835*/
836
837/*
838=for apidoc Amx|bool|lex_bufutf8
839
840Indicates whether the octets in the lexer buffer
841(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
842of Unicode characters. If not, they should be interpreted as Latin-1
843characters. This is analogous to the C<SvUTF8> flag for scalars.
844
845In UTF-8 mode, it is not guaranteed that the lexer buffer actually
846contains valid UTF-8. Lexing code must be robust in the face of invalid
847encoding.
848
849The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
850is significant, but not the whole story regarding the input character
851encoding. Normally, when a file is being read, the scalar contains octets
852and its C<SvUTF8> flag is off, but the octets should be interpreted as
853UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
854however, the scalar may have the C<SvUTF8> flag on, and in this case its
855octets should be interpreted as UTF-8 unless the C<use bytes> pragma
856is in effect. This logic may change in the future; use this function
857instead of implementing the logic yourself.
858
859=cut
860*/
861
862bool
863Perl_lex_bufutf8(pTHX)
864{
865 return UTF;
866}
867
868/*
869=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
870
871Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
872at least I<len> octets (including terminating NUL). Returns a
873pointer to the reallocated buffer. This is necessary before making
874any direct modification of the buffer that would increase its length.
875L</lex_stuff_pvn> provides a more convenient way to insert text into
876the buffer.
877
878Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
879this function updates all of the lexer's variables that point directly
880into the buffer.
881
882=cut
883*/
884
885char *
886Perl_lex_grow_linestr(pTHX_ STRLEN len)
887{
888 SV *linestr;
889 char *buf;
890 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
891 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
892 linestr = PL_parser->linestr;
893 buf = SvPVX(linestr);
894 if (len <= SvLEN(linestr))
895 return buf;
896 bufend_pos = PL_parser->bufend - buf;
897 bufptr_pos = PL_parser->bufptr - buf;
898 oldbufptr_pos = PL_parser->oldbufptr - buf;
899 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
900 linestart_pos = PL_parser->linestart - buf;
901 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
902 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
903 buf = sv_grow(linestr, len);
904 PL_parser->bufend = buf + bufend_pos;
905 PL_parser->bufptr = buf + bufptr_pos;
906 PL_parser->oldbufptr = buf + oldbufptr_pos;
907 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
908 PL_parser->linestart = buf + linestart_pos;
909 if (PL_parser->last_uni)
910 PL_parser->last_uni = buf + last_uni_pos;
911 if (PL_parser->last_lop)
912 PL_parser->last_lop = buf + last_lop_pos;
913 return buf;
914}
915
916/*
917=for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
918
919Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
920immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
921reallocating the buffer if necessary. This means that lexing code that
922runs later will see the characters as if they had appeared in the input.
923It is not recommended to do this as part of normal parsing, and most
924uses of this facility run the risk of the inserted characters being
925interpreted in an unintended manner.
926
927The string to be inserted is represented by I<len> octets starting
928at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
929according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
930The characters are recoded for the lexer buffer, according to how the
931buffer is currently being interpreted (L</lex_bufutf8>). If a string
932to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
933function is more convenient.
934
935=cut
936*/
937
938void
939Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
940{
941 char *bufptr;
942 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
943 if (flags & ~(LEX_STUFF_UTF8))
944 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
945 if (UTF) {
946 if (flags & LEX_STUFF_UTF8) {
947 goto plain_copy;
948 } else {
949 STRLEN highhalf = 0;
950 char *p, *e = pv+len;
951 for (p = pv; p != e; p++)
952 highhalf += !!(((U8)*p) & 0x80);
953 if (!highhalf)
954 goto plain_copy;
955 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
956 bufptr = PL_parser->bufptr;
957 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
958 PL_parser->bufend += len+highhalf;
959 for (p = pv; p != e; p++) {
960 U8 c = (U8)*p;
961 if (c & 0x80) {
962 *bufptr++ = (char)(0xc0 | (c >> 6));
963 *bufptr++ = (char)(0x80 | (c & 0x3f));
964 } else {
965 *bufptr++ = (char)c;
966 }
967 }
968 }
969 } else {
970 if (flags & LEX_STUFF_UTF8) {
971 STRLEN highhalf = 0;
972 char *p, *e = pv+len;
973 for (p = pv; p != e; p++) {
974 U8 c = (U8)*p;
975 if (c >= 0xc4) {
976 Perl_croak(aTHX_ "Lexing code attempted to stuff "
977 "non-Latin-1 character into Latin-1 input");
978 } else if (c >= 0xc2 && p+1 != e &&
979 (((U8)p[1]) & 0xc0) == 0x80) {
980 p++;
981 highhalf++;
982 } else if (c >= 0x80) {
983 /* malformed UTF-8 */
984 ENTER;
985 SAVESPTR(PL_warnhook);
986 PL_warnhook = PERL_WARNHOOK_FATAL;
987 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
988 LEAVE;
989 }
990 }
991 if (!highhalf)
992 goto plain_copy;
993 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
994 bufptr = PL_parser->bufptr;
995 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
996 PL_parser->bufend += len-highhalf;
997 for (p = pv; p != e; p++) {
998 U8 c = (U8)*p;
999 if (c & 0x80) {
1000 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1001 p++;
1002 } else {
1003 *bufptr++ = (char)c;
1004 }
1005 }
1006 } else {
1007 plain_copy:
1008 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1009 bufptr = PL_parser->bufptr;
1010 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1011 PL_parser->bufend += len;
1012 Copy(pv, bufptr, len, char);
1013 }
1014 }
1015}
1016
1017/*
1018=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1019
1020Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022reallocating the buffer if necessary. This means that lexing code that
1023runs later will see the characters as if they had appeared in the input.
1024It is not recommended to do this as part of normal parsing, and most
1025uses of this facility run the risk of the inserted characters being
1026interpreted in an unintended manner.
1027
1028The string to be inserted is the string value of I<sv>. The characters
1029are recoded for the lexer buffer, according to how the buffer is currently
1030being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1031not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032need to construct a scalar.
1033
1034=cut
1035*/
1036
1037void
1038Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1039{
1040 char *pv;
1041 STRLEN len;
1042 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1043 if (flags)
1044 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1045 pv = SvPV(sv, len);
1046 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1047}
1048
1049/*
1050=for apidoc Amx|void|lex_unstuff|char *ptr
1051
1052Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1054This hides the discarded text from any lexing code that runs later,
1055as if the text had never appeared.
1056
1057This is not the normal way to consume lexed text. For that, use
1058L</lex_read_to>.
1059
1060=cut
1061*/
1062
1063void
1064Perl_lex_unstuff(pTHX_ char *ptr)
1065{
1066 char *buf, *bufend;
1067 STRLEN unstuff_len;
1068 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069 buf = PL_parser->bufptr;
1070 if (ptr < buf)
1071 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1072 if (ptr == buf)
1073 return;
1074 bufend = PL_parser->bufend;
1075 if (ptr > bufend)
1076 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077 unstuff_len = ptr - buf;
1078 Move(ptr, buf, bufend+1-ptr, char);
1079 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080 PL_parser->bufend = bufend - unstuff_len;
1081}
1082
1083/*
1084=for apidoc Amx|void|lex_read_to|char *ptr
1085
1086Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088performing the correct bookkeeping whenever a newline character is passed.
1089This is the normal way to consume lexed text.
1090
1091Interpretation of the buffer's octets can be abstracted out by
1092using the slightly higher-level functions L</lex_peek_unichar> and
1093L</lex_read_unichar>.
1094
1095=cut
1096*/
1097
1098void
1099Perl_lex_read_to(pTHX_ char *ptr)
1100{
1101 char *s;
1102 PERL_ARGS_ASSERT_LEX_READ_TO;
1103 s = PL_parser->bufptr;
1104 if (ptr < s || ptr > PL_parser->bufend)
1105 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106 for (; s != ptr; s++)
1107 if (*s == '\n') {
1108 CopLINE_inc(PL_curcop);
1109 PL_parser->linestart = s+1;
1110 }
1111 PL_parser->bufptr = ptr;
1112}
1113
1114/*
1115=for apidoc Amx|void|lex_discard_to|char *ptr
1116
1117Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118up to I<ptr>. The remaining content of the buffer will be moved, and
1119all pointers into the buffer updated appropriately. I<ptr> must not
1120be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121it is not permitted to discard text that has yet to be lexed.
1122
1123Normally it is not necessarily to do this directly, because it suffices to
1124use the implicit discarding behaviour of L</lex_next_chunk> and things
1125based on it. However, if a token stretches across multiple lines,
1126and the lexing code has kept multiple lines of text in the buffer fof
1127that purpose, then after completion of the token it would be wise to
1128explicitly discard the now-unneeded earlier lines, to avoid future
1129multi-line tokens growing the buffer without bound.
1130
1131=cut
1132*/
1133
1134void
1135Perl_lex_discard_to(pTHX_ char *ptr)
1136{
1137 char *buf;
1138 STRLEN discard_len;
1139 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140 buf = SvPVX(PL_parser->linestr);
1141 if (ptr < buf)
1142 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1143 if (ptr == buf)
1144 return;
1145 if (ptr > PL_parser->bufptr)
1146 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147 discard_len = ptr - buf;
1148 if (PL_parser->oldbufptr < ptr)
1149 PL_parser->oldbufptr = ptr;
1150 if (PL_parser->oldoldbufptr < ptr)
1151 PL_parser->oldoldbufptr = ptr;
1152 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153 PL_parser->last_uni = NULL;
1154 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155 PL_parser->last_lop = NULL;
1156 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158 PL_parser->bufend -= discard_len;
1159 PL_parser->bufptr -= discard_len;
1160 PL_parser->oldbufptr -= discard_len;
1161 PL_parser->oldoldbufptr -= discard_len;
1162 if (PL_parser->last_uni)
1163 PL_parser->last_uni -= discard_len;
1164 if (PL_parser->last_lop)
1165 PL_parser->last_lop -= discard_len;
1166}
1167
1168/*
1169=for apidoc Amx|bool|lex_next_chunk|U32 flags
1170
1171Reads in the next chunk of text to be lexed, appending it to
1172L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1173looked to the end of the current chunk and wants to know more. It is
1174usual, but not necessary, for lexing to have consumed the entirety of
1175the current chunk at this time.
1176
1177If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178chunk (i.e., the current chunk has been entirely consumed), normally the
1179current chunk will be discarded at the same time that the new chunk is
1180read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181will not be discarded. If the current chunk has not been entirely
1182consumed, then it will not be discarded regardless of the flag.
1183
1184Returns true if some new text was added to the buffer, or false if the
1185buffer has reached the end of the input text.
1186
1187=cut
1188*/
1189
1190#define LEX_FAKE_EOF 0x80000000
1191
1192bool
1193Perl_lex_next_chunk(pTHX_ U32 flags)
1194{
1195 SV *linestr;
1196 char *buf;
1197 STRLEN old_bufend_pos, new_bufend_pos;
1198 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1200 bool got_some_for_debugger = 0;
f0e67a1d
Z
1201 bool got_some;
1202 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1203 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1204 linestr = PL_parser->linestr;
1205 buf = SvPVX(linestr);
1206 if (!(flags & LEX_KEEP_PREVIOUS) &&
1207 PL_parser->bufptr == PL_parser->bufend) {
1208 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1209 linestart_pos = 0;
1210 if (PL_parser->last_uni != PL_parser->bufend)
1211 PL_parser->last_uni = NULL;
1212 if (PL_parser->last_lop != PL_parser->bufend)
1213 PL_parser->last_lop = NULL;
1214 last_uni_pos = last_lop_pos = 0;
1215 *buf = 0;
1216 SvCUR(linestr) = 0;
1217 } else {
1218 old_bufend_pos = PL_parser->bufend - buf;
1219 bufptr_pos = PL_parser->bufptr - buf;
1220 oldbufptr_pos = PL_parser->oldbufptr - buf;
1221 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1222 linestart_pos = PL_parser->linestart - buf;
1223 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1224 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1225 }
1226 if (flags & LEX_FAKE_EOF) {
1227 goto eof;
1228 } else if (!PL_parser->rsfp) {
1229 got_some = 0;
1230 } else if (filter_gets(linestr, old_bufend_pos)) {
1231 got_some = 1;
17cc9359 1232 got_some_for_debugger = 1;
f0e67a1d 1233 } else {
580561a3
Z
1234 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1235 sv_setpvs(linestr, "");
f0e67a1d
Z
1236 eof:
1237 /* End of real input. Close filehandle (unless it was STDIN),
1238 * then add implicit termination.
1239 */
1240 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1241 PerlIO_clearerr(PL_parser->rsfp);
1242 else if (PL_parser->rsfp)
1243 (void)PerlIO_close(PL_parser->rsfp);
1244 PL_parser->rsfp = NULL;
1245 PL_doextract = FALSE;
1246#ifdef PERL_MAD
1247 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1248 PL_faketokens = 1;
1249#endif
1250 if (!PL_in_eval && PL_minus_p) {
1251 sv_catpvs(linestr,
1252 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1253 PL_minus_n = PL_minus_p = 0;
1254 } else if (!PL_in_eval && PL_minus_n) {
1255 sv_catpvs(linestr, /*{*/";}");
1256 PL_minus_n = 0;
1257 } else
1258 sv_catpvs(linestr, ";");
1259 got_some = 1;
1260 }
1261 buf = SvPVX(linestr);
1262 new_bufend_pos = SvCUR(linestr);
1263 PL_parser->bufend = buf + new_bufend_pos;
1264 PL_parser->bufptr = buf + bufptr_pos;
1265 PL_parser->oldbufptr = buf + oldbufptr_pos;
1266 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1267 PL_parser->linestart = buf + linestart_pos;
1268 if (PL_parser->last_uni)
1269 PL_parser->last_uni = buf + last_uni_pos;
1270 if (PL_parser->last_lop)
1271 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1272 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1273 PL_curstash != PL_debstash) {
1274 /* debugger active and we're not compiling the debugger code,
1275 * so store the line into the debugger's array of lines
1276 */
1277 update_debugger_info(NULL, buf+old_bufend_pos,
1278 new_bufend_pos-old_bufend_pos);
1279 }
1280 return got_some;
1281}
1282
1283/*
1284=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1285
1286Looks ahead one (Unicode) character in the text currently being lexed.
1287Returns the codepoint (unsigned integer value) of the next character,
1288or -1 if lexing has reached the end of the input text. To consume the
1289peeked character, use L</lex_read_unichar>.
1290
1291If the next character is in (or extends into) the next chunk of input
1292text, the next chunk will be read in. Normally the current chunk will be
1293discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1294then the current chunk will not be discarded.
1295
1296If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1297is encountered, an exception is generated.
1298
1299=cut
1300*/
1301
1302I32
1303Perl_lex_peek_unichar(pTHX_ U32 flags)
1304{
1305 char *s, *bufend;
1306 if (flags & ~(LEX_KEEP_PREVIOUS))
1307 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1308 s = PL_parser->bufptr;
1309 bufend = PL_parser->bufend;
1310 if (UTF) {
1311 U8 head;
1312 I32 unichar;
1313 STRLEN len, retlen;
1314 if (s == bufend) {
1315 if (!lex_next_chunk(flags))
1316 return -1;
1317 s = PL_parser->bufptr;
1318 bufend = PL_parser->bufend;
1319 }
1320 head = (U8)*s;
1321 if (!(head & 0x80))
1322 return head;
1323 if (head & 0x40) {
1324 len = PL_utf8skip[head];
1325 while ((STRLEN)(bufend-s) < len) {
1326 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1327 break;
1328 s = PL_parser->bufptr;
1329 bufend = PL_parser->bufend;
1330 }
1331 }
1332 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1333 if (retlen == (STRLEN)-1) {
1334 /* malformed UTF-8 */
1335 ENTER;
1336 SAVESPTR(PL_warnhook);
1337 PL_warnhook = PERL_WARNHOOK_FATAL;
1338 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1339 LEAVE;
1340 }
1341 return unichar;
1342 } else {
1343 if (s == bufend) {
1344 if (!lex_next_chunk(flags))
1345 return -1;
1346 s = PL_parser->bufptr;
1347 }
1348 return (U8)*s;
1349 }
1350}
1351
1352/*
1353=for apidoc Amx|I32|lex_read_unichar|U32 flags
1354
1355Reads the next (Unicode) character in the text currently being lexed.
1356Returns the codepoint (unsigned integer value) of the character read,
1357and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1358if lexing has reached the end of the input text. To non-destructively
1359examine the next character, use L</lex_peek_unichar> instead.
1360
1361If the next character is in (or extends into) the next chunk of input
1362text, the next chunk will be read in. Normally the current chunk will be
1363discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1364then the current chunk will not be discarded.
1365
1366If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1367is encountered, an exception is generated.
1368
1369=cut
1370*/
1371
1372I32
1373Perl_lex_read_unichar(pTHX_ U32 flags)
1374{
1375 I32 c;
1376 if (flags & ~(LEX_KEEP_PREVIOUS))
1377 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1378 c = lex_peek_unichar(flags);
1379 if (c != -1) {
1380 if (c == '\n')
1381 CopLINE_inc(PL_curcop);
1382 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1383 }
1384 return c;
1385}
1386
1387/*
1388=for apidoc Amx|void|lex_read_space|U32 flags
1389
1390Reads optional spaces, in Perl style, in the text currently being
1391lexed. The spaces may include ordinary whitespace characters and
1392Perl-style comments. C<#line> directives are processed if encountered.
1393L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1394at a non-space character (or the end of the input text).
1395
1396If spaces extend into the next chunk of input text, the next chunk will
1397be read in. Normally the current chunk will be discarded at the same
1398time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1399chunk will not be discarded.
1400
1401=cut
1402*/
1403
1404void
1405Perl_lex_read_space(pTHX_ U32 flags)
1406{
1407 char *s, *bufend;
1408 bool need_incline = 0;
1409 if (flags & ~(LEX_KEEP_PREVIOUS))
1410 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1411#ifdef PERL_MAD
1412 if (PL_skipwhite) {
1413 sv_free(PL_skipwhite);
1414 PL_skipwhite = NULL;
1415 }
1416 if (PL_madskills)
1417 PL_skipwhite = newSVpvs("");
1418#endif /* PERL_MAD */
1419 s = PL_parser->bufptr;
1420 bufend = PL_parser->bufend;
1421 while (1) {
1422 char c = *s;
1423 if (c == '#') {
1424 do {
1425 c = *++s;
1426 } while (!(c == '\n' || (c == 0 && s == bufend)));
1427 } else if (c == '\n') {
1428 s++;
1429 PL_parser->linestart = s;
1430 if (s == bufend)
1431 need_incline = 1;
1432 else
1433 incline(s);
1434 } else if (isSPACE(c)) {
1435 s++;
1436 } else if (c == 0 && s == bufend) {
1437 bool got_more;
1438#ifdef PERL_MAD
1439 if (PL_madskills)
1440 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1441#endif /* PERL_MAD */
1442 PL_parser->bufptr = s;
1443 CopLINE_inc(PL_curcop);
1444 got_more = lex_next_chunk(flags);
1445 CopLINE_dec(PL_curcop);
1446 s = PL_parser->bufptr;
1447 bufend = PL_parser->bufend;
1448 if (!got_more)
1449 break;
1450 if (need_incline && PL_parser->rsfp) {
1451 incline(s);
1452 need_incline = 0;
1453 }
1454 } else {
1455 break;
1456 }
1457 }
1458#ifdef PERL_MAD
1459 if (PL_madskills)
1460 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1461#endif /* PERL_MAD */
1462 PL_parser->bufptr = s;
1463}
1464
1465/*
ffb4593c
NT
1466 * S_incline
1467 * This subroutine has nothing to do with tilting, whether at windmills
1468 * or pinball tables. Its name is short for "increment line". It
57843af0 1469 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1470 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1471 * # line 500 "foo.pm"
1472 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1473 */
1474
76e3520e 1475STATIC void
d9095cec 1476S_incline(pTHX_ const char *s)
463ee0b2 1477{
97aff369 1478 dVAR;
d9095cec
NC
1479 const char *t;
1480 const char *n;
1481 const char *e;
463ee0b2 1482
7918f24d
NC
1483 PERL_ARGS_ASSERT_INCLINE;
1484
57843af0 1485 CopLINE_inc(PL_curcop);
463ee0b2
LW
1486 if (*s++ != '#')
1487 return;
d4c19fe8
AL
1488 while (SPACE_OR_TAB(*s))
1489 s++;
73659bf1
GS
1490 if (strnEQ(s, "line", 4))
1491 s += 4;
1492 else
1493 return;
084592ab 1494 if (SPACE_OR_TAB(*s))
73659bf1 1495 s++;
4e553d73 1496 else
73659bf1 1497 return;
d4c19fe8
AL
1498 while (SPACE_OR_TAB(*s))
1499 s++;
463ee0b2
LW
1500 if (!isDIGIT(*s))
1501 return;
d4c19fe8 1502
463ee0b2
LW
1503 n = s;
1504 while (isDIGIT(*s))
1505 s++;
07714eb4 1506 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1507 return;
bf4acbe4 1508 while (SPACE_OR_TAB(*s))
463ee0b2 1509 s++;
73659bf1 1510 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1511 s++;
73659bf1
GS
1512 e = t + 1;
1513 }
463ee0b2 1514 else {
c35e046a
AL
1515 t = s;
1516 while (!isSPACE(*t))
1517 t++;
73659bf1 1518 e = t;
463ee0b2 1519 }
bf4acbe4 1520 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1521 e++;
1522 if (*e != '\n' && *e != '\0')
1523 return; /* false alarm */
1524
f4dd75d9 1525 if (t - s > 0) {
d9095cec 1526 const STRLEN len = t - s;
8a5ee598 1527#ifndef USE_ITHREADS
19bad673
NC
1528 SV *const temp_sv = CopFILESV(PL_curcop);
1529 const char *cf;
1530 STRLEN tmplen;
1531
1532 if (temp_sv) {
1533 cf = SvPVX(temp_sv);
1534 tmplen = SvCUR(temp_sv);
1535 } else {
1536 cf = NULL;
1537 tmplen = 0;
1538 }
1539
42d9b98d 1540 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1541 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1542 * to *{"::_<newfilename"} */
44867030
NC
1543 /* However, the long form of evals is only turned on by the
1544 debugger - usually they're "(eval %lu)" */
1545 char smallbuf[128];
1546 char *tmpbuf;
1547 GV **gvp;
d9095cec 1548 STRLEN tmplen2 = len;
798b63bc 1549 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1550 tmpbuf = smallbuf;
1551 else
2ae0db35 1552 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1553 tmpbuf[0] = '_';
1554 tmpbuf[1] = '<';
2ae0db35 1555 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1556 tmplen += 2;
8a5ee598
RGS
1557 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1558 if (gvp) {
44867030
NC
1559 char *tmpbuf2;
1560 GV *gv2;
1561
1562 if (tmplen2 + 2 <= sizeof smallbuf)
1563 tmpbuf2 = smallbuf;
1564 else
1565 Newx(tmpbuf2, tmplen2 + 2, char);
1566
1567 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1568 /* Either they malloc'd it, or we malloc'd it,
1569 so no prefix is present in ours. */
1570 tmpbuf2[0] = '_';
1571 tmpbuf2[1] = '<';
1572 }
1573
1574 memcpy(tmpbuf2 + 2, s, tmplen2);
1575 tmplen2 += 2;
1576
8a5ee598 1577 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1578 if (!isGV(gv2)) {
8a5ee598 1579 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1580 /* adjust ${"::_<newfilename"} to store the new file name */
1581 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1582 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1583 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1584 }
44867030
NC
1585
1586 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1587 }
e66cf94c 1588 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1589 }
8a5ee598 1590#endif
05ec9bb3 1591 CopFILE_free(PL_curcop);
d9095cec 1592 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1593 }
57843af0 1594 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1595}
1596
29595ff2 1597#ifdef PERL_MAD
cd81e915 1598/* skip space before PL_thistoken */
29595ff2
NC
1599
1600STATIC char *
1601S_skipspace0(pTHX_ register char *s)
1602{
7918f24d
NC
1603 PERL_ARGS_ASSERT_SKIPSPACE0;
1604
29595ff2
NC
1605 s = skipspace(s);
1606 if (!PL_madskills)
1607 return s;
cd81e915
NC
1608 if (PL_skipwhite) {
1609 if (!PL_thiswhite)
6b29d1f5 1610 PL_thiswhite = newSVpvs("");
cd81e915
NC
1611 sv_catsv(PL_thiswhite, PL_skipwhite);
1612 sv_free(PL_skipwhite);
1613 PL_skipwhite = 0;
1614 }
1615 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1616 return s;
1617}
1618
cd81e915 1619/* skip space after PL_thistoken */
29595ff2
NC
1620
1621STATIC char *
1622S_skipspace1(pTHX_ register char *s)
1623{
d4c19fe8 1624 const char *start = s;
29595ff2
NC
1625 I32 startoff = start - SvPVX(PL_linestr);
1626
7918f24d
NC
1627 PERL_ARGS_ASSERT_SKIPSPACE1;
1628
29595ff2
NC
1629 s = skipspace(s);
1630 if (!PL_madskills)
1631 return s;
1632 start = SvPVX(PL_linestr) + startoff;
cd81e915 1633 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1634 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1635 PL_thistoken = newSVpvn(tstart, start - tstart);
1636 }
1637 PL_realtokenstart = -1;
1638 if (PL_skipwhite) {
1639 if (!PL_nextwhite)
6b29d1f5 1640 PL_nextwhite = newSVpvs("");
cd81e915
NC
1641 sv_catsv(PL_nextwhite, PL_skipwhite);
1642 sv_free(PL_skipwhite);
1643 PL_skipwhite = 0;
29595ff2
NC
1644 }
1645 return s;
1646}
1647
1648STATIC char *
1649S_skipspace2(pTHX_ register char *s, SV **svp)
1650{
c35e046a
AL
1651 char *start;
1652 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1653 const I32 startoff = s - SvPVX(PL_linestr);
1654
7918f24d
NC
1655 PERL_ARGS_ASSERT_SKIPSPACE2;
1656
29595ff2
NC
1657 s = skipspace(s);
1658 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1659 if (!PL_madskills || !svp)
1660 return s;
1661 start = SvPVX(PL_linestr) + startoff;
cd81e915 1662 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1663 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1664 PL_thistoken = newSVpvn(tstart, start - tstart);
1665 PL_realtokenstart = -1;
29595ff2 1666 }
cd81e915 1667 if (PL_skipwhite) {
29595ff2 1668 if (!*svp)
6b29d1f5 1669 *svp = newSVpvs("");
cd81e915
NC
1670 sv_setsv(*svp, PL_skipwhite);
1671 sv_free(PL_skipwhite);
1672 PL_skipwhite = 0;
29595ff2
NC
1673 }
1674
1675 return s;
1676}
1677#endif
1678
80a702cd 1679STATIC void
15f169a1 1680S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1681{
1682 AV *av = CopFILEAVx(PL_curcop);
1683 if (av) {
b9f83d2f 1684 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1685 if (orig_sv)
1686 sv_setsv(sv, orig_sv);
1687 else
1688 sv_setpvn(sv, buf, len);
80a702cd
RGS
1689 (void)SvIOK_on(sv);
1690 SvIV_set(sv, 0);
1691 av_store(av, (I32)CopLINE(PL_curcop), sv);
1692 }
1693}
1694
ffb4593c
NT
1695/*
1696 * S_skipspace
1697 * Called to gobble the appropriate amount and type of whitespace.
1698 * Skips comments as well.
1699 */
1700
76e3520e 1701STATIC char *
cea2e8a9 1702S_skipspace(pTHX_ register char *s)
a687059c 1703{
5db06880 1704#ifdef PERL_MAD
f0e67a1d
Z
1705 char *start = s;
1706#endif /* PERL_MAD */
7918f24d 1707 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1708#ifdef PERL_MAD
cd81e915
NC
1709 if (PL_skipwhite) {
1710 sv_free(PL_skipwhite);
f0e67a1d 1711 PL_skipwhite = NULL;
5db06880 1712 }
f0e67a1d 1713#endif /* PERL_MAD */
3280af22 1714 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1715 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1716 s++;
f0e67a1d
Z
1717 } else if (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) {
1718 while (isSPACE(*s) && *s != '\n')
1719 s++;
1720 if (*s == '#') {
1721 do {
463ee0b2 1722 s++;
f0e67a1d 1723 } while (s != PL_bufend && *s != '\n');
463ee0b2 1724 }
f0e67a1d
Z
1725 if (*s == '\n')
1726 s++;
1727 } else {
1728 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1729 PL_bufptr = s;
1730 lex_read_space(LEX_KEEP_PREVIOUS);
3280af22 1731 s = PL_bufptr;
f0e67a1d
Z
1732 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1733 if (PL_linestart > PL_bufptr)
1734 PL_bufptr = PL_linestart;
1735 return s;
463ee0b2 1736 }
5db06880 1737#ifdef PERL_MAD
f0e67a1d
Z
1738 if (PL_madskills)
1739 PL_skipwhite = newSVpvn(start, s-start);
1740#endif /* PERL_MAD */
5db06880 1741 return s;
a687059c 1742}
378cc40b 1743
ffb4593c
NT
1744/*
1745 * S_check_uni
1746 * Check the unary operators to ensure there's no ambiguity in how they're
1747 * used. An ambiguous piece of code would be:
1748 * rand + 5
1749 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1750 * the +5 is its argument.
1751 */
1752
76e3520e 1753STATIC void
cea2e8a9 1754S_check_uni(pTHX)
ba106d47 1755{
97aff369 1756 dVAR;
d4c19fe8
AL
1757 const char *s;
1758 const char *t;
2f3197b3 1759
3280af22 1760 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1761 return;
3280af22
NIS
1762 while (isSPACE(*PL_last_uni))
1763 PL_last_uni++;
c35e046a
AL
1764 s = PL_last_uni;
1765 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1766 s++;
3280af22 1767 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1768 return;
6136c704 1769
9b387841
NC
1770 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1771 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1772 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1773}
1774
ffb4593c
NT
1775/*
1776 * LOP : macro to build a list operator. Its behaviour has been replaced
1777 * with a subroutine, S_lop() for which LOP is just another name.
1778 */
1779
a0d0e21e
LW
1780#define LOP(f,x) return lop(f,x,s)
1781
ffb4593c
NT
1782/*
1783 * S_lop
1784 * Build a list operator (or something that might be one). The rules:
1785 * - if we have a next token, then it's a list operator [why?]
1786 * - if the next thing is an opening paren, then it's a function
1787 * - else it's a list operator
1788 */
1789
76e3520e 1790STATIC I32
a0be28da 1791S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1792{
97aff369 1793 dVAR;
7918f24d
NC
1794
1795 PERL_ARGS_ASSERT_LOP;
1796
6154021b 1797 pl_yylval.ival = f;
35c8bce7 1798 CLINE;
3280af22
NIS
1799 PL_expect = x;
1800 PL_bufptr = s;
1801 PL_last_lop = PL_oldbufptr;
eb160463 1802 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1803#ifdef PERL_MAD
1804 if (PL_lasttoke)
1805 return REPORT(LSTOP);
1806#else
3280af22 1807 if (PL_nexttoke)
bbf60fe6 1808 return REPORT(LSTOP);
5db06880 1809#endif
79072805 1810 if (*s == '(')
bbf60fe6 1811 return REPORT(FUNC);
29595ff2 1812 s = PEEKSPACE(s);
79072805 1813 if (*s == '(')
bbf60fe6 1814 return REPORT(FUNC);
79072805 1815 else
bbf60fe6 1816 return REPORT(LSTOP);
79072805
LW
1817}
1818
5db06880
NC
1819#ifdef PERL_MAD
1820 /*
1821 * S_start_force
1822 * Sets up for an eventual force_next(). start_force(0) basically does
1823 * an unshift, while start_force(-1) does a push. yylex removes items
1824 * on the "pop" end.
1825 */
1826
1827STATIC void
1828S_start_force(pTHX_ int where)
1829{
1830 int i;
1831
cd81e915 1832 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1833 where = PL_lasttoke;
cd81e915
NC
1834 assert(PL_curforce < 0 || PL_curforce == where);
1835 if (PL_curforce != where) {
5db06880
NC
1836 for (i = PL_lasttoke; i > where; --i) {
1837 PL_nexttoke[i] = PL_nexttoke[i-1];
1838 }
1839 PL_lasttoke++;
1840 }
cd81e915 1841 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1842 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1843 PL_curforce = where;
1844 if (PL_nextwhite) {
5db06880 1845 if (PL_madskills)
6b29d1f5 1846 curmad('^', newSVpvs(""));
cd81e915 1847 CURMAD('_', PL_nextwhite);
5db06880
NC
1848 }
1849}
1850
1851STATIC void
1852S_curmad(pTHX_ char slot, SV *sv)
1853{
1854 MADPROP **where;
1855
1856 if (!sv)
1857 return;
cd81e915
NC
1858 if (PL_curforce < 0)
1859 where = &PL_thismad;
5db06880 1860 else
cd81e915 1861 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1862
cd81e915 1863 if (PL_faketokens)
76f68e9b 1864 sv_setpvs(sv, "");
5db06880
NC
1865 else {
1866 if (!IN_BYTES) {
1867 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1868 SvUTF8_on(sv);
1869 else if (PL_encoding) {
1870 sv_recode_to_utf8(sv, PL_encoding);
1871 }
1872 }
1873 }
1874
1875 /* keep a slot open for the head of the list? */
1876 if (slot != '_' && *where && (*where)->mad_key == '^') {
1877 (*where)->mad_key = slot;
daba3364 1878 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1879 (*where)->mad_val = (void*)sv;
1880 }
1881 else
1882 addmad(newMADsv(slot, sv), where, 0);
1883}
1884#else
b3f24c00
MHM
1885# define start_force(where) NOOP
1886# define curmad(slot, sv) NOOP
5db06880
NC
1887#endif
1888
ffb4593c
NT
1889/*
1890 * S_force_next
9cbb5ea2 1891 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1892 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1893 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1894 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1895 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1896 */
1897
4e553d73 1898STATIC void
cea2e8a9 1899S_force_next(pTHX_ I32 type)
79072805 1900{
97aff369 1901 dVAR;
704d4215
GG
1902#ifdef DEBUGGING
1903 if (DEBUG_T_TEST) {
1904 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1905 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1906 }
1907#endif
5db06880 1908#ifdef PERL_MAD
cd81e915 1909 if (PL_curforce < 0)
5db06880 1910 start_force(PL_lasttoke);
cd81e915 1911 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1912 if (PL_lex_state != LEX_KNOWNEXT)
1913 PL_lex_defer = PL_lex_state;
1914 PL_lex_state = LEX_KNOWNEXT;
1915 PL_lex_expect = PL_expect;
cd81e915 1916 PL_curforce = -1;
5db06880 1917#else
3280af22
NIS
1918 PL_nexttype[PL_nexttoke] = type;
1919 PL_nexttoke++;
1920 if (PL_lex_state != LEX_KNOWNEXT) {
1921 PL_lex_defer = PL_lex_state;
1922 PL_lex_expect = PL_expect;
1923 PL_lex_state = LEX_KNOWNEXT;
79072805 1924 }
5db06880 1925#endif
79072805
LW
1926}
1927
d0a148a6 1928STATIC SV *
15f169a1 1929S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1930{
97aff369 1931 dVAR;
740cce10 1932 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1933 !IN_BYTES
1934 && UTF
1935 && !is_ascii_string((const U8*)start, len)
740cce10 1936 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1937 return sv;
1938}
1939
ffb4593c
NT
1940/*
1941 * S_force_word
1942 * When the lexer knows the next thing is a word (for instance, it has
1943 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1944 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1945 * lookahead.
ffb4593c
NT
1946 *
1947 * Arguments:
b1b65b59 1948 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1949 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1950 * int check_keyword : if true, Perl checks to make sure the word isn't
1951 * a keyword (do this if the word is a label, e.g. goto FOO)
1952 * int allow_pack : if true, : characters will also be allowed (require,
1953 * use, etc. do this)
9cbb5ea2 1954 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1955 */
1956
76e3520e 1957STATIC char *
cea2e8a9 1958S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1959{
97aff369 1960 dVAR;
463ee0b2
LW
1961 register char *s;
1962 STRLEN len;
4e553d73 1963
7918f24d
NC
1964 PERL_ARGS_ASSERT_FORCE_WORD;
1965
29595ff2 1966 start = SKIPSPACE1(start);
463ee0b2 1967 s = start;
7e2040f0 1968 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1969 (allow_pack && *s == ':') ||
15f0808c 1970 (allow_initial_tick && *s == '\'') )
a0d0e21e 1971 {
3280af22 1972 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1973 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1974 return start;
cd81e915 1975 start_force(PL_curforce);
5db06880
NC
1976 if (PL_madskills)
1977 curmad('X', newSVpvn(start,s-start));
463ee0b2 1978 if (token == METHOD) {
29595ff2 1979 s = SKIPSPACE1(s);
463ee0b2 1980 if (*s == '(')
3280af22 1981 PL_expect = XTERM;
463ee0b2 1982 else {
3280af22 1983 PL_expect = XOPERATOR;
463ee0b2 1984 }
79072805 1985 }
e74e6b3d 1986 if (PL_madskills)
63575281 1987 curmad('g', newSVpvs( "forced" ));
9ded7720 1988 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1989 = (OP*)newSVOP(OP_CONST,0,
1990 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1991 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1992 force_next(token);
1993 }
1994 return s;
1995}
1996
ffb4593c
NT
1997/*
1998 * S_force_ident
9cbb5ea2 1999 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2000 * text only contains the "foo" portion. The first argument is a pointer
2001 * to the "foo", and the second argument is the type symbol to prefix.
2002 * Forces the next token to be a "WORD".
9cbb5ea2 2003 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2004 */
2005
76e3520e 2006STATIC void
bfed75c6 2007S_force_ident(pTHX_ register const char *s, int kind)
79072805 2008{
97aff369 2009 dVAR;
7918f24d
NC
2010
2011 PERL_ARGS_ASSERT_FORCE_IDENT;
2012
c35e046a 2013 if (*s) {
90e5519e
NC
2014 const STRLEN len = strlen(s);
2015 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2016 start_force(PL_curforce);
9ded7720 2017 NEXTVAL_NEXTTOKE.opval = o;
79072805 2018 force_next(WORD);
748a9306 2019 if (kind) {
11343788 2020 o->op_private = OPpCONST_ENTERED;
55497cff
PP
2021 /* XXX see note in pp_entereval() for why we forgo typo
2022 warnings if the symbol must be introduced in an eval.
2023 GSAR 96-10-12 */
90e5519e
NC
2024 gv_fetchpvn_flags(s, len,
2025 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2026 : GV_ADD,
2027 kind == '$' ? SVt_PV :
2028 kind == '@' ? SVt_PVAV :
2029 kind == '%' ? SVt_PVHV :
a0d0e21e 2030 SVt_PVGV
90e5519e 2031 );
748a9306 2032 }
79072805
LW
2033 }
2034}
2035
1571675a
GS
2036NV
2037Perl_str_to_version(pTHX_ SV *sv)
2038{
2039 NV retval = 0.0;
2040 NV nshift = 1.0;
2041 STRLEN len;
cfd0369c 2042 const char *start = SvPV_const(sv,len);
9d4ba2ae 2043 const char * const end = start + len;
504618e9 2044 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2045
2046 PERL_ARGS_ASSERT_STR_TO_VERSION;
2047
1571675a 2048 while (start < end) {
ba210ebe 2049 STRLEN skip;
1571675a
GS
2050 UV n;
2051 if (utf)
9041c2e3 2052 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2053 else {
2054 n = *(U8*)start;
2055 skip = 1;
2056 }
2057 retval += ((NV)n)/nshift;
2058 start += skip;
2059 nshift *= 1000;
2060 }
2061 return retval;
2062}
2063
4e553d73 2064/*
ffb4593c
NT
2065 * S_force_version
2066 * Forces the next token to be a version number.
e759cc13
RGS
2067 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2068 * and if "guessing" is TRUE, then no new token is created (and the caller
2069 * must use an alternative parsing method).
ffb4593c
NT
2070 */
2071
76e3520e 2072STATIC char *
e759cc13 2073S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2074{
97aff369 2075 dVAR;
5f66b61c 2076 OP *version = NULL;
44dcb63b 2077 char *d;
5db06880
NC
2078#ifdef PERL_MAD
2079 I32 startoff = s - SvPVX(PL_linestr);
2080#endif
89bfa8cd 2081
7918f24d
NC
2082 PERL_ARGS_ASSERT_FORCE_VERSION;
2083
29595ff2 2084 s = SKIPSPACE1(s);
89bfa8cd 2085
44dcb63b 2086 d = s;
dd629d5b 2087 if (*d == 'v')
44dcb63b 2088 d++;
44dcb63b 2089 if (isDIGIT(*d)) {
e759cc13
RGS
2090 while (isDIGIT(*d) || *d == '_' || *d == '.')
2091 d++;
5db06880
NC
2092#ifdef PERL_MAD
2093 if (PL_madskills) {
cd81e915 2094 start_force(PL_curforce);
5db06880
NC
2095 curmad('X', newSVpvn(s,d-s));
2096 }
2097#endif
9f3d182e 2098 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 2099 SV *ver;
6154021b
RGS
2100 s = scan_num(s, &pl_yylval);
2101 version = pl_yylval.opval;
dd629d5b
GS
2102 ver = cSVOPx(version)->op_sv;
2103 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2104 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2105 SvNV_set(ver, str_to_version(ver));
1571675a 2106 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2107 }
89bfa8cd 2108 }
5db06880
NC
2109 else if (guessing) {
2110#ifdef PERL_MAD
2111 if (PL_madskills) {
cd81e915
NC
2112 sv_free(PL_nextwhite); /* let next token collect whitespace */
2113 PL_nextwhite = 0;
5db06880
NC
2114 s = SvPVX(PL_linestr) + startoff;
2115 }
2116#endif
e759cc13 2117 return s;
5db06880 2118 }
89bfa8cd
PP
2119 }
2120
5db06880
NC
2121#ifdef PERL_MAD
2122 if (PL_madskills && !version) {
cd81e915
NC
2123 sv_free(PL_nextwhite); /* let next token collect whitespace */
2124 PL_nextwhite = 0;
5db06880
NC
2125 s = SvPVX(PL_linestr) + startoff;
2126 }
2127#endif
89bfa8cd 2128 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2129 start_force(PL_curforce);
9ded7720 2130 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2131 force_next(WORD);
89bfa8cd 2132
e759cc13 2133 return s;
89bfa8cd
PP
2134}
2135
ffb4593c
NT
2136/*
2137 * S_tokeq
2138 * Tokenize a quoted string passed in as an SV. It finds the next
2139 * chunk, up to end of string or a backslash. It may make a new
2140 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2141 * turns \\ into \.
2142 */
2143
76e3520e 2144STATIC SV *
cea2e8a9 2145S_tokeq(pTHX_ SV *sv)
79072805 2146{
97aff369 2147 dVAR;
79072805
LW
2148 register char *s;
2149 register char *send;
2150 register char *d;
b3ac6de7
IZ
2151 STRLEN len = 0;
2152 SV *pv = sv;
79072805 2153
7918f24d
NC
2154 PERL_ARGS_ASSERT_TOKEQ;
2155
79072805 2156 if (!SvLEN(sv))
b3ac6de7 2157 goto finish;
79072805 2158
a0d0e21e 2159 s = SvPV_force(sv, len);
21a311ee 2160 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2161 goto finish;
463ee0b2 2162 send = s + len;
79072805
LW
2163 while (s < send && *s != '\\')
2164 s++;
2165 if (s == send)
b3ac6de7 2166 goto finish;
79072805 2167 d = s;
be4731d2 2168 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2169 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2170 }
79072805
LW
2171 while (s < send) {
2172 if (*s == '\\') {
a0d0e21e 2173 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2174 s++; /* all that, just for this */
2175 }
2176 *d++ = *s++;
2177 }
2178 *d = '\0';
95a20fc0 2179 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2180 finish:
3280af22 2181 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2182 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2183 return sv;
2184}
2185
ffb4593c
NT
2186/*
2187 * Now come three functions related to double-quote context,
2188 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2189 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2190 * interact with PL_lex_state, and create fake ( ... ) argument lists
2191 * to handle functions and concatenation.
2192 * They assume that whoever calls them will be setting up a fake
2193 * join call, because each subthing puts a ',' after it. This lets
2194 * "lower \luPpEr"
2195 * become
2196 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2197 *
2198 * (I'm not sure whether the spurious commas at the end of lcfirst's
2199 * arguments and join's arguments are created or not).
2200 */
2201
2202/*
2203 * S_sublex_start
6154021b 2204 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2205 *
2206 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2207 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2208 *
2209 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2210 *
2211 * Everything else becomes a FUNC.
2212 *
2213 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2214 * had an OP_CONST or OP_READLINE). This just sets us up for a
2215 * call to S_sublex_push().
2216 */
2217
76e3520e 2218STATIC I32
cea2e8a9 2219S_sublex_start(pTHX)
79072805 2220{
97aff369 2221 dVAR;
6154021b 2222 register const I32 op_type = pl_yylval.ival;
79072805
LW
2223
2224 if (op_type == OP_NULL) {
6154021b 2225 pl_yylval.opval = PL_lex_op;
5f66b61c 2226 PL_lex_op = NULL;
79072805
LW
2227 return THING;
2228 }
2229 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2230 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2231
2232 if (SvTYPE(sv) == SVt_PVIV) {
2233 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2234 STRLEN len;
96a5add6 2235 const char * const p = SvPV_const(sv, len);
740cce10 2236 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2237 SvREFCNT_dec(sv);
2238 sv = nsv;
4e553d73 2239 }
6154021b 2240 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2241 PL_lex_stuff = NULL;
6f33ba73
RGS
2242 /* Allow <FH> // "foo" */
2243 if (op_type == OP_READLINE)
2244 PL_expect = XTERMORDORDOR;
79072805
LW
2245 return THING;
2246 }
e3f73d4e
RGS
2247 else if (op_type == OP_BACKTICK && PL_lex_op) {
2248 /* readpipe() vas overriden */
2249 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2250 pl_yylval.opval = PL_lex_op;
9b201d7d 2251 PL_lex_op = NULL;
e3f73d4e
RGS
2252 PL_lex_stuff = NULL;
2253 return THING;
2254 }
79072805 2255
3280af22 2256 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2257 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2258 PL_sublex_info.sub_op = PL_lex_op;
2259 PL_lex_state = LEX_INTERPPUSH;
55497cff 2260
3280af22
NIS
2261 PL_expect = XTERM;
2262 if (PL_lex_op) {
6154021b 2263 pl_yylval.opval = PL_lex_op;
5f66b61c 2264 PL_lex_op = NULL;
55497cff
PP
2265 return PMFUNC;
2266 }
2267 else
2268 return FUNC;
2269}
2270
ffb4593c
NT
2271/*
2272 * S_sublex_push
2273 * Create a new scope to save the lexing state. The scope will be
2274 * ended in S_sublex_done. Returns a '(', starting the function arguments
2275 * to the uc, lc, etc. found before.
2276 * Sets PL_lex_state to LEX_INTERPCONCAT.
2277 */
2278
76e3520e 2279STATIC I32
cea2e8a9 2280S_sublex_push(pTHX)
55497cff 2281{
27da23d5 2282 dVAR;
f46d017c 2283 ENTER;
55497cff 2284
3280af22 2285 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2286 SAVEBOOL(PL_lex_dojoin);
3280af22 2287 SAVEI32(PL_lex_brackets);
3280af22
NIS
2288 SAVEI32(PL_lex_casemods);
2289 SAVEI32(PL_lex_starts);
651b5b28 2290 SAVEI8(PL_lex_state);
7766f137 2291 SAVEVPTR(PL_lex_inpat);
98246f1e 2292 SAVEI16(PL_lex_inwhat);
57843af0 2293 SAVECOPLINE(PL_curcop);
3280af22 2294 SAVEPPTR(PL_bufptr);
8452ff4b 2295 SAVEPPTR(PL_bufend);
3280af22
NIS
2296 SAVEPPTR(PL_oldbufptr);
2297 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2298 SAVEPPTR(PL_last_lop);
2299 SAVEPPTR(PL_last_uni);
3280af22
NIS
2300 SAVEPPTR(PL_linestart);
2301 SAVESPTR(PL_linestr);
8edd5f42
RGS
2302 SAVEGENERICPV(PL_lex_brackstack);
2303 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2304
2305 PL_linestr = PL_lex_stuff;
a0714e2c 2306 PL_lex_stuff = NULL;
3280af22 2307
9cbb5ea2
GS
2308 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2309 = SvPVX(PL_linestr);
3280af22 2310 PL_bufend += SvCUR(PL_linestr);
bd61b366 2311 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2312 SAVEFREESV(PL_linestr);
2313
2314 PL_lex_dojoin = FALSE;
2315 PL_lex_brackets = 0;
a02a5408
JC
2316 Newx(PL_lex_brackstack, 120, char);
2317 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2318 PL_lex_casemods = 0;
2319 *PL_lex_casestack = '\0';
2320 PL_lex_starts = 0;
2321 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2322 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2323
2324 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2325 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2326 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2327 else
5f66b61c 2328 PL_lex_inpat = NULL;
79072805 2329
55497cff 2330 return '(';
79072805
LW
2331}
2332
ffb4593c
NT
2333/*
2334 * S_sublex_done
2335 * Restores lexer state after a S_sublex_push.
2336 */
2337
76e3520e 2338STATIC I32
cea2e8a9 2339S_sublex_done(pTHX)
79072805 2340{
27da23d5 2341 dVAR;
3280af22 2342 if (!PL_lex_starts++) {
396482e1 2343 SV * const sv = newSVpvs("");
9aa983d2
JH
2344 if (SvUTF8(PL_linestr))
2345 SvUTF8_on(sv);
3280af22 2346 PL_expect = XOPERATOR;
6154021b 2347 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2348 return THING;
2349 }
2350
3280af22
NIS
2351 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2352 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2353 return yylex();
79072805
LW
2354 }
2355
ffb4593c 2356 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
2357 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2358 PL_linestr = PL_lex_repl;
2359 PL_lex_inpat = 0;
2360 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2361 PL_bufend += SvCUR(PL_linestr);
bd61b366 2362 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2363 SAVEFREESV(PL_linestr);
2364 PL_lex_dojoin = FALSE;
2365 PL_lex_brackets = 0;
3280af22
NIS
2366 PL_lex_casemods = 0;
2367 *PL_lex_casestack = '\0';
2368 PL_lex_starts = 0;
25da4f38 2369 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2370 PL_lex_state = LEX_INTERPNORMAL;
2371 PL_lex_starts++;
e9fa98b2
HS
2372 /* we don't clear PL_lex_repl here, so that we can check later
2373 whether this is an evalled subst; that means we rely on the
2374 logic to ensure sublex_done() is called again only via the
2375 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2376 }
e9fa98b2 2377 else {
3280af22 2378 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2379 PL_lex_repl = NULL;
e9fa98b2 2380 }
79072805 2381 return ',';
ffed7fef
LW
2382 }
2383 else {
5db06880
NC
2384#ifdef PERL_MAD
2385 if (PL_madskills) {
cd81e915
NC
2386 if (PL_thiswhite) {
2387 if (!PL_endwhite)
6b29d1f5 2388 PL_endwhite = newSVpvs("");
cd81e915
NC
2389 sv_catsv(PL_endwhite, PL_thiswhite);
2390 PL_thiswhite = 0;
2391 }
2392 if (PL_thistoken)
76f68e9b 2393 sv_setpvs(PL_thistoken,"");
5db06880 2394 else
cd81e915 2395 PL_realtokenstart = -1;
5db06880
NC
2396 }
2397#endif
f46d017c 2398 LEAVE;
3280af22
NIS
2399 PL_bufend = SvPVX(PL_linestr);
2400 PL_bufend += SvCUR(PL_linestr);
2401 PL_expect = XOPERATOR;
09bef843 2402 PL_sublex_info.sub_inwhat = 0;
79072805 2403 return ')';
ffed7fef
LW
2404 }
2405}
2406
02aa26ce
NT
2407/*
2408 scan_const
2409
2410 Extracts a pattern, double-quoted string, or transliteration. This
2411 is terrifying code.
2412
94def140 2413 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2414 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2415 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2416
94def140
ST
2417 Returns a pointer to the character scanned up to. If this is
2418 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2419 successfully parsed), will leave an OP for the substring scanned
6154021b 2420 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2421 by looking at the next characters herself.
2422
02aa26ce
NT
2423 In patterns:
2424 backslashes:
2425 double-quoted style: \r and \n
2426 regexp special ones: \D \s
94def140
ST
2427 constants: \x31
2428 backrefs: \1
02aa26ce
NT
2429 case and quoting: \U \Q \E
2430 stops on @ and $, but not for $ as tail anchor
2431
2432 In transliterations:
2433 characters are VERY literal, except for - not at the start or end
94def140
ST
2434 of the string, which indicates a range. If the range is in bytes,
2435 scan_const expands the range to the full set of intermediate
2436 characters. If the range is in utf8, the hyphen is replaced with
2437 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2438
2439 In double-quoted strings:
2440 backslashes:
2441 double-quoted style: \r and \n
94def140
ST
2442 constants: \x31
2443 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2444 case and quoting: \U \Q \E
2445 stops on @ and $
2446
2447 scan_const does *not* construct ops to handle interpolated strings.
2448 It stops processing as soon as it finds an embedded $ or @ variable
2449 and leaves it to the caller to work out what's going on.
2450
94def140
ST
2451 embedded arrays (whether in pattern or not) could be:
2452 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2453
2454 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2455
2456 $ in pattern could be $foo or could be tail anchor. Assumption:
2457 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2458 followed by one of "()| \r\n\t"
02aa26ce
NT
2459
2460 \1 (backreferences) are turned into $1
2461
2462 The structure of the code is
2463 while (there's a character to process) {
94def140
ST
2464 handle transliteration ranges
2465 skip regexp comments /(?#comment)/ and codes /(?{code})/
2466 skip #-initiated comments in //x patterns
2467 check for embedded arrays
02aa26ce
NT
2468 check for embedded scalars
2469 if (backslash) {
94def140
ST
2470 leave intact backslashes from leaveit (below)
2471 deprecate \1 in substitution replacements
02aa26ce
NT
2472 handle string-changing backslashes \l \U \Q \E, etc.
2473 switch (what was escaped) {
94def140
ST
2474 handle \- in a transliteration (becomes a literal -)
2475 handle \132 (octal characters)
2476 handle \x15 and \x{1234} (hex characters)
2477 handle \N{name} (named characters)
2478 handle \cV (control characters)
2479 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2480 } (end switch)
77a135fe 2481 continue
02aa26ce 2482 } (end if backslash)
77a135fe 2483 handle regular character
02aa26ce 2484 } (end while character to read)
4e553d73 2485
02aa26ce
NT
2486*/
2487
76e3520e 2488STATIC char *
cea2e8a9 2489S_scan_const(pTHX_ char *start)
79072805 2490{
97aff369 2491 dVAR;
3280af22 2492 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2493 SV *sv = newSV(send - start); /* sv for the constant. See
2494 note below on sizing. */
02aa26ce
NT
2495 register char *s = start; /* start of the constant */
2496 register char *d = SvPVX(sv); /* destination for copies */
2497 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2498 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2499 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2500 I32 this_utf8 = UTF; /* Is the source string assumed
2501 to be UTF8? But, this can
2502 show as true when the source
2503 isn't utf8, as for example
2504 when it is entirely composed
2505 of hex constants */
2506
2507 /* Note on sizing: The scanned constant is placed into sv, which is
2508 * initialized by newSV() assuming one byte of output for every byte of
2509 * input. This routine expects newSV() to allocate an extra byte for a
2510 * trailing NUL, which this routine will append if it gets to the end of
2511 * the input. There may be more bytes of input than output (eg., \N{LATIN
2512 * CAPITAL LETTER A}), or more output than input if the constant ends up
2513 * recoded to utf8, but each time a construct is found that might increase
2514 * the needed size, SvGROW() is called. Its size parameter each time is
2515 * based on the best guess estimate at the time, namely the length used so
2516 * far, plus the length the current construct will occupy, plus room for
2517 * the trailing NUL, plus one byte for every input byte still unscanned */
2518
012bcf8d 2519 UV uv;
4c3a8340
ST
2520#ifdef EBCDIC
2521 UV literal_endpoint = 0;
e294cc5d 2522 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2523#endif
012bcf8d 2524
7918f24d
NC
2525 PERL_ARGS_ASSERT_SCAN_CONST;
2526
2b9d42f0
NIS
2527 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2528 /* If we are doing a trans and we know we want UTF8 set expectation */
2529 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2530 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2531 }
2532
2533
79072805 2534 while (s < send || dorange) {
02aa26ce 2535 /* get transliterations out of the way (they're most literal) */
3280af22 2536 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2537 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2538 if (dorange) {
1ba5c669
JH
2539 I32 i; /* current expanded character */
2540 I32 min; /* first character in range */
2541 I32 max; /* last character in range */
02aa26ce 2542
e294cc5d
JH
2543#ifdef EBCDIC
2544 UV uvmax = 0;
2545#endif
2546
2547 if (has_utf8
2548#ifdef EBCDIC
2549 && !native_range
2550#endif
2551 ) {
9d4ba2ae 2552 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2553 char *e = d++;
2554 while (e-- > c)
2555 *(e + 1) = *e;
25716404 2556 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2557 /* mark the range as done, and continue */
2558 dorange = FALSE;
2559 didrange = TRUE;
2560 continue;
2561 }
2b9d42f0 2562
95a20fc0 2563 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2564#ifdef EBCDIC
2565 SvGROW(sv,
2566 SvLEN(sv) + (has_utf8 ?
2567 (512 - UTF_CONTINUATION_MARK +
2568 UNISKIP(0x100))
2569 : 256));
2570 /* How many two-byte within 0..255: 128 in UTF-8,
2571 * 96 in UTF-8-mod. */
2572#else
9cbb5ea2 2573 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2574#endif
9cbb5ea2 2575 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2576#ifdef EBCDIC
2577 if (has_utf8) {
2578 int j;
2579 for (j = 0; j <= 1; j++) {
2580 char * const c = (char*)utf8_hop((U8*)d, -1);
2581 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2582 if (j)
2583 min = (U8)uv;
2584 else if (uv < 256)
2585 max = (U8)uv;
2586 else {
2587 max = (U8)0xff; /* only to \xff */
2588 uvmax = uv; /* \x{100} to uvmax */
2589 }
2590 d = c; /* eat endpoint chars */
2591 }
2592 }
2593 else {
2594#endif
2595 d -= 2; /* eat the first char and the - */
2596 min = (U8)*d; /* first char in range */
2597 max = (U8)d[1]; /* last char in range */
2598#ifdef EBCDIC
2599 }
2600#endif
8ada0baa 2601
c2e66d9e 2602 if (min > max) {
01ec43d0 2603 Perl_croak(aTHX_
d1573ac7 2604 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2605 (char)min, (char)max);
c2e66d9e
GS
2606 }
2607
c7f1f016 2608#ifdef EBCDIC
4c3a8340
ST
2609 if (literal_endpoint == 2 &&
2610 ((isLOWER(min) && isLOWER(max)) ||
2611 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2612 if (isLOWER(min)) {
2613 for (i = min; i <= max; i++)
2614 if (isLOWER(i))
db42d148 2615 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2616 } else {
2617 for (i = min; i <= max; i++)
2618 if (isUPPER(i))
db42d148 2619 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2620 }
2621 }
2622 else
2623#endif
2624 for (i = min; i <= max; i++)
e294cc5d
JH
2625#ifdef EBCDIC
2626 if (has_utf8) {
2627 const U8 ch = (U8)NATIVE_TO_UTF(i);
2628 if (UNI_IS_INVARIANT(ch))
2629 *d++ = (U8)i;
2630 else {
2631 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2632 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2633 }
2634 }
2635 else
2636#endif
2637 *d++ = (char)i;
2638
2639#ifdef EBCDIC
2640 if (uvmax) {
2641 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2642 if (uvmax > 0x101)
2643 *d++ = (char)UTF_TO_NATIVE(0xff);
2644 if (uvmax > 0x100)
2645 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2646 }
2647#endif
02aa26ce
NT
2648
2649 /* mark the range as done, and continue */
79072805 2650 dorange = FALSE;
01ec43d0 2651 didrange = TRUE;
4c3a8340
ST
2652#ifdef EBCDIC
2653 literal_endpoint = 0;
2654#endif
79072805 2655 continue;
4e553d73 2656 }
02aa26ce
NT
2657
2658 /* range begins (ignore - as first or last char) */
79072805 2659 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2660 if (didrange) {
1fafa243 2661 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2662 }
e294cc5d
JH
2663 if (has_utf8
2664#ifdef EBCDIC
2665 && !native_range
2666#endif
2667 ) {
25716404 2668 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2669 s++;
2670 continue;
2671 }
79072805
LW
2672 dorange = TRUE;
2673 s++;
01ec43d0
GS
2674 }
2675 else {
2676 didrange = FALSE;
4c3a8340
ST
2677#ifdef EBCDIC
2678 literal_endpoint = 0;
e294cc5d 2679 native_range = TRUE;
4c3a8340 2680#endif
01ec43d0 2681 }
79072805 2682 }
02aa26ce
NT
2683
2684 /* if we get here, we're not doing a transliteration */
2685
0f5d15d6
IZ
2686 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2687 except for the last char, which will be done separately. */
3280af22 2688 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2689 if (s[2] == '#') {
e994fd66 2690 while (s+1 < send && *s != ')')
db42d148 2691 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2692 }
2693 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2694 || (s[2] == '?' && s[3] == '{'))
155aba94 2695 {
cc6b7395 2696 I32 count = 1;
0f5d15d6 2697 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2698 char c;
2699
d9f97599
GS
2700 while (count && (c = *regparse)) {
2701 if (c == '\\' && regparse[1])
2702 regparse++;
4e553d73 2703 else if (c == '{')
cc6b7395 2704 count++;
4e553d73 2705 else if (c == '}')
cc6b7395 2706 count--;
d9f97599 2707 regparse++;
cc6b7395 2708 }
e994fd66 2709 if (*regparse != ')')
5bdf89e7 2710 regparse--; /* Leave one char for continuation. */
0f5d15d6 2711 while (s < regparse)
db42d148 2712 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2713 }
748a9306 2714 }
02aa26ce
NT
2715
2716 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2717 else if (*s == '#' && PL_lex_inpat &&
2718 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2719 while (s+1 < send && *s != '\n')
db42d148 2720 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2721 }
02aa26ce 2722
5d1d4326 2723 /* check for embedded arrays
da6eedaa 2724 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2725 */
1749ea0d
ST
2726 else if (*s == '@' && s[1]) {
2727 if (isALNUM_lazy_if(s+1,UTF))
2728 break;
2729 if (strchr(":'{$", s[1]))
2730 break;
2731 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2732 break; /* in regexp, neither @+ nor @- are interpolated */
2733 }
02aa26ce
NT
2734
2735 /* check for embedded scalars. only stop if we're sure it's a
2736 variable.
2737 */
79072805 2738 else if (*s == '$') {
3280af22 2739 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2740 break;
77772344 2741 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2742 if (s[1] == '\\') {
2743 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2744 "Possible unintended interpolation of $\\ in regex");
77772344 2745 }
79072805 2746 break; /* in regexp, $ might be tail anchor */
77772344 2747 }
79072805 2748 }
02aa26ce 2749
2b9d42f0
NIS
2750 /* End of else if chain - OP_TRANS rejoin rest */
2751
02aa26ce 2752 /* backslashes */
79072805
LW
2753 if (*s == '\\' && s+1 < send) {
2754 s++;
02aa26ce 2755
02aa26ce 2756 /* deprecate \1 in strings and substitution replacements */
3280af22 2757 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2758 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2759 {
a2a5de95 2760 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2761 *--s = '$';
2762 break;
2763 }
02aa26ce
NT
2764
2765 /* string-change backslash escapes */
3280af22 2766 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2767 --s;
2768 break;
2769 }
cc74c5bd
ST
2770 /* skip any other backslash escapes in a pattern */
2771 else if (PL_lex_inpat) {
2772 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2773 goto default_action;
2774 }
02aa26ce
NT
2775
2776 /* if we get here, it's either a quoted -, or a digit */
79072805 2777 switch (*s) {
02aa26ce
NT
2778
2779 /* quoted - in transliterations */
79072805 2780 case '-':
3280af22 2781 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2782 *d++ = *s++;
2783 continue;
2784 }
2785 /* FALL THROUGH */
2786 default:
11b8faa4 2787 {
a2a5de95
NC
2788 if ((isALPHA(*s) || isDIGIT(*s)))
2789 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2790 "Unrecognized escape \\%c passed through",
2791 *s);
11b8faa4 2792 /* default action is to copy the quoted character */
f9a63242 2793 goto default_action;
11b8faa4 2794 }
02aa26ce 2795
77a135fe 2796 /* eg. \132 indicates the octal constant 0x132 */
79072805
LW
2797 case '0': case '1': case '2': case '3':
2798 case '4': case '5': case '6': case '7':
ba210ebe 2799 {
53305cf1
NC
2800 I32 flags = 0;
2801 STRLEN len = 3;
77a135fe 2802 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2803 s += len;
2804 }
012bcf8d 2805 goto NUM_ESCAPE_INSERT;
02aa26ce 2806
77a135fe 2807 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2808 case 'x':
a0ed51b3
LW
2809 ++s;
2810 if (*s == '{') {
9d4ba2ae 2811 char* const e = strchr(s, '}');
a4c04bdc
NC
2812 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2813 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2814 STRLEN len;
355860ce 2815
53305cf1 2816 ++s;
adaeee49 2817 if (!e) {
a0ed51b3 2818 yyerror("Missing right brace on \\x{}");
355860ce 2819 continue;
ba210ebe 2820 }
53305cf1 2821 len = e - s;
77a135fe 2822 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2823 s = e + 1;
a0ed51b3
LW
2824 }
2825 else {
ba210ebe 2826 {
53305cf1 2827 STRLEN len = 2;
a4c04bdc 2828 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2829 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2830 s += len;
2831 }
012bcf8d
GS
2832 }
2833
2834 NUM_ESCAPE_INSERT:
77a135fe
KW
2835 /* Insert oct, hex, or \N{U+...} escaped character. There will
2836 * always be enough room in sv since such escapes will be
2837 * longer than any UTF-8 sequence they can end up as, except if
2838 * they force us to recode the rest of the string into utf8 */
ba7cea30 2839
77a135fe
KW
2840 /* Here uv is the ordinal of the next character being added in
2841 * unicode (converted from native). (It has to be done before
2842 * here because \N is interpreted as unicode, and oct and hex
2843 * as native.) */
2844 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2845 if (!has_utf8 && uv > 255) {
77a135fe
KW
2846 /* Might need to recode whatever we have accumulated so
2847 * far if it contains any chars variant in utf8 or
2848 * utf-ebcdic. */
2849
2850 SvCUR_set(sv, d - SvPVX_const(sv));
2851 SvPOK_on(sv);
2852 *d = '\0';
77a135fe 2853 /* See Note on sizing above. */
7bf79863
KW
2854 sv_utf8_upgrade_flags_grow(sv,
2855 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2856 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2857 d = SvPVX(sv) + SvCUR(sv);
2858 has_utf8 = TRUE;
012bcf8d
GS
2859 }
2860
77a135fe
KW
2861 if (has_utf8) {
2862 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2863 if (PL_lex_inwhat == OP_TRANS &&
2864 PL_sublex_info.sub_op) {
2865 PL_sublex_info.sub_op->op_private |=
2866 (PL_lex_repl ? OPpTRANS_FROM_UTF
2867 : OPpTRANS_TO_UTF);
f9a63242 2868 }
e294cc5d
JH
2869#ifdef EBCDIC
2870 if (uv > 255 && !dorange)
2871 native_range = FALSE;
2872#endif
012bcf8d 2873 }
a0ed51b3 2874 else {
012bcf8d 2875 *d++ = (char)uv;
a0ed51b3 2876 }
012bcf8d
GS
2877 }
2878 else {
c4d5f83a 2879 *d++ = (char) uv;
a0ed51b3 2880 }
79072805 2881 continue;
02aa26ce 2882
77a135fe
KW
2883 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2884 * \N{U+0041} */
4a2d328f 2885 case 'N':
55eda711 2886 ++s;
423cee85
JH
2887 if (*s == '{') {
2888 char* e = strchr(s, '}');
155aba94 2889 SV *res;
423cee85 2890 STRLEN len;
cfd0369c 2891 const char *str;
4e553d73 2892
423cee85 2893 if (!e) {
5777a3f7 2894 yyerror("Missing right brace on \\N{}");
423cee85
JH
2895 e = s - 1;
2896 goto cont_scan;
2897 }
dbc0d4f2 2898 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
77a135fe
KW
2899 /* \N{U+...} The ... is a unicode value even on EBCDIC
2900 * machines */
dbc0d4f2
JH
2901 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2902 PERL_SCAN_DISALLOW_PREFIX;
2903 s += 3;
2904 len = e - s;
2905 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2906 if ( e > s && len != (STRLEN)(e - s) ) {
2907 uv = 0xFFFD;
fc8cd66c 2908 }
dbc0d4f2
JH
2909 s = e + 1;
2910 goto NUM_ESCAPE_INSERT;
2911 }
55eda711 2912 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2913 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2914 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2915 if (has_utf8)
2916 sv_utf8_upgrade(res);
cfd0369c 2917 str = SvPV_const(res,len);
1c47067b
JH
2918#ifdef EBCDIC_NEVER_MIND
2919 /* charnames uses pack U and that has been
2920 * recently changed to do the below uni->native
2921 * mapping, so this would be redundant (and wrong,
2922 * the code point would be doubly converted).
2923 * But leave this in just in case the pack U change
2924 * gets revoked, but the semantics is still
2925 * desireable for charnames. --jhi */
cddc7ef4 2926 {
cfd0369c 2927 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2928
2929 if (uv < 0x100) {
89ebb4a3 2930 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2931
2932 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2933 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2934 str = SvPV_const(res, len);
cddc7ef4
JH
2935 }
2936 }
2937#endif
77a135fe
KW
2938 /* If destination is not in utf8 but this new character is,
2939 * recode the dest to utf8 */
89491803 2940 if (!has_utf8 && SvUTF8(res)) {
77a135fe 2941 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 2942 SvPOK_on(sv);
e4f3eed8 2943 *d = '\0';
77a135fe 2944 /* See Note on sizing above. */
7bf79863
KW
2945 sv_utf8_upgrade_flags_grow(sv,
2946 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2947 len + (STRLEN)(send - s) + 1);
f08d6ad9 2948 d = SvPVX(sv) + SvCUR(sv);
89491803 2949 has_utf8 = TRUE;
77a135fe 2950 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85 2951
77a135fe
KW
2952 /* See Note on sizing above. (NOTE: SvCUR() is not set
2953 * correctly here). */
2954 const STRLEN off = d - SvPVX_const(sv);
2955 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
423cee85 2956 }
e294cc5d
JH
2957#ifdef EBCDIC
2958 if (!dorange)
2959 native_range = FALSE; /* \N{} is guessed to be Unicode */
2960#endif
423cee85
JH
2961 Copy(str, d, len, char);
2962 d += len;
2963 SvREFCNT_dec(res);
2964 cont_scan:
2965 s = e + 1;
2966 }
2967 else
5777a3f7 2968 yyerror("Missing braces on \\N{}");
423cee85
JH
2969 continue;
2970
02aa26ce 2971 /* \c is a control character */
79072805
LW
2972 case 'c':
2973 s++;
961ce445 2974 if (s < send) {
ba210ebe 2975 U8 c = *s++;
c7f1f016
NIS
2976#ifdef EBCDIC
2977 if (isLOWER(c))
2978 c = toUPPER(c);
2979#endif
db42d148 2980 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2981 }
961ce445
RGS
2982 else {
2983 yyerror("Missing control char name in \\c");
2984 }
79072805 2985 continue;
02aa26ce
NT
2986
2987 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2988 case 'b':
db42d148 2989 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2990 break;
2991 case 'n':
db42d148 2992 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2993 break;
2994 case 'r':
db42d148 2995 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2996 break;
2997 case 'f':
db42d148 2998 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2999 break;
3000 case 't':
db42d148 3001 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3002 break;
34a3fe2a 3003 case 'e':
db42d148 3004 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3005 break;
3006 case 'a':
db42d148 3007 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3008 break;
02aa26ce
NT
3009 } /* end switch */
3010
79072805
LW
3011 s++;
3012 continue;
02aa26ce 3013 } /* end if (backslash) */
4c3a8340
ST
3014#ifdef EBCDIC
3015 else
3016 literal_endpoint++;
3017#endif
02aa26ce 3018
f9a63242 3019 default_action:
77a135fe
KW
3020 /* If we started with encoded form, or already know we want it,
3021 then encode the next character */
3022 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3023 STRLEN len = 1;
77a135fe
KW
3024
3025
3026 /* One might think that it is wasted effort in the case of the
3027 * source being utf8 (this_utf8 == TRUE) to take the next character
3028 * in the source, convert it to an unsigned value, and then convert
3029 * it back again. But the source has not been validated here. The
3030 * routine that does the conversion checks for errors like
3031 * malformed utf8 */
3032
5f66b61c
AL
3033 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3034 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3035 if (!has_utf8) {
3036 SvCUR_set(sv, d - SvPVX_const(sv));
3037 SvPOK_on(sv);
3038 *d = '\0';
77a135fe 3039 /* See Note on sizing above. */
7bf79863
KW
3040 sv_utf8_upgrade_flags_grow(sv,
3041 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3042 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3043 d = SvPVX(sv) + SvCUR(sv);
3044 has_utf8 = TRUE;
3045 } else if (need > len) {
3046 /* encoded value larger than old, may need extra space (NOTE:
3047 * SvCUR() is not set correctly here). See Note on sizing
3048 * above. */
9d4ba2ae 3049 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3050 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3051 }
77a135fe
KW
3052 s += len;
3053
5f66b61c 3054 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3055#ifdef EBCDIC
3056 if (uv > 255 && !dorange)
3057 native_range = FALSE;
3058#endif
2b9d42f0
NIS
3059 }
3060 else {
3061 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3062 }
02aa26ce
NT
3063 } /* while loop to process each character */
3064
3065 /* terminate the string and set up the sv */
79072805 3066 *d = '\0';
95a20fc0 3067 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3068 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3069 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3070
79072805 3071 SvPOK_on(sv);
9f4817db 3072 if (PL_encoding && !has_utf8) {
d0063567
DK
3073 sv_recode_to_utf8(sv, PL_encoding);
3074 if (SvUTF8(sv))
3075 has_utf8 = TRUE;
9f4817db 3076 }
2b9d42f0 3077 if (has_utf8) {
7e2040f0 3078 SvUTF8_on(sv);
2b9d42f0 3079 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3080 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3081 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3082 }
3083 }
79072805 3084
02aa26ce 3085 /* shrink the sv if we allocated more than we used */
79072805 3086 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3087 SvPV_shrink_to_cur(sv);
79072805 3088 }
02aa26ce 3089
6154021b 3090 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3091 if (s > PL_bufptr) {
eb0d8d16
NC
3092 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3093 const char *const key = PL_lex_inpat ? "qr" : "q";
3094 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3095 const char *type;
3096 STRLEN typelen;
3097
3098 if (PL_lex_inwhat == OP_TRANS) {
3099 type = "tr";
3100 typelen = 2;
3101 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3102 type = "s";
3103 typelen = 1;
3104 } else {
3105 type = "qq";
3106 typelen = 2;
3107 }
3108
3109 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3110 type, typelen);
3111 }
6154021b 3112 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3113 } else
8990e307 3114 SvREFCNT_dec(sv);
79072805
LW
3115 return s;
3116}
3117
ffb4593c
NT
3118/* S_intuit_more
3119 * Returns TRUE if there's more to the expression (e.g., a subscript),
3120 * FALSE otherwise.
ffb4593c
NT
3121 *
3122 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3123 *
3124 * ->[ and ->{ return TRUE
3125 * { and [ outside a pattern are always subscripts, so return TRUE
3126 * if we're outside a pattern and it's not { or [, then return FALSE
3127 * if we're in a pattern and the first char is a {
3128 * {4,5} (any digits around the comma) returns FALSE
3129 * if we're in a pattern and the first char is a [
3130 * [] returns FALSE
3131 * [SOMETHING] has a funky algorithm to decide whether it's a
3132 * character class or not. It has to deal with things like
3133 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3134 * anything else returns TRUE
3135 */
3136
9cbb5ea2
GS
3137/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3138
76e3520e 3139STATIC int
cea2e8a9 3140S_intuit_more(pTHX_ register char *s)
79072805 3141{
97aff369 3142 dVAR;
7918f24d
NC
3143
3144 PERL_ARGS_ASSERT_INTUIT_MORE;
3145
3280af22 3146 if (PL_lex_brackets)
79072805
LW
3147 return TRUE;
3148 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3149 return TRUE;
3150 if (*s != '{' && *s != '[')
3151 return FALSE;
3280af22 3152 if (!PL_lex_inpat)
79072805
LW
3153 return TRUE;
3154
3155 /* In a pattern, so maybe we have {n,m}. */
3156 if (*s == '{') {
3157 s++;
3158 if (!isDIGIT(*s))
3159 return TRUE;
3160 while (isDIGIT(*s))
3161 s++;
3162 if (*s == ',')
3163 s++;
3164 while (isDIGIT(*s))
3165 s++;
3166 if (*s == '}')
3167 return FALSE;
3168 return TRUE;
3169
3170 }
3171
3172 /* On the other hand, maybe we have a character class */
3173
3174 s++;
3175 if (*s == ']' || *s == '^')
3176 return FALSE;
3177 else {
ffb4593c 3178 /* this is terrifying, and it works */
79072805
LW
3179 int weight = 2; /* let's weigh the evidence */
3180 char seen[256];
f27ffc4a 3181 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3182 const char * const send = strchr(s,']');
3280af22 3183 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3184
3185 if (!send) /* has to be an expression */
3186 return TRUE;
3187
3188 Zero(seen,256,char);
3189 if (*s == '$')
3190 weight -= 3;
3191 else if (isDIGIT(*s)) {
3192 if (s[1] != ']') {
3193 if (isDIGIT(s[1]) && s[2] == ']')
3194 weight -= 10;
3195 }
3196 else
3197 weight -= 100;
3198 }
3199 for (; s < send; s++) {
3200 last_un_char = un_char;
3201 un_char = (unsigned char)*s;
3202 switch (*s) {
3203 case '@':
3204 case '&':
3205 case '$':
3206 weight -= seen[un_char] * 10;
7e2040f0 3207 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3208 int len;
8903cb82 3209 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3210 len = (int)strlen(tmpbuf);
3211 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3212 weight -= 100;
3213 else
3214 weight -= 10;
3215 }
3216 else if (*s == '$' && s[1] &&
93a17b20
LW
3217 strchr("[#!%*<>()-=",s[1])) {
3218 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3219 weight -= 10;
3220 else
3221 weight -= 1;
3222 }
3223 break;
3224 case '\\':
3225 un_char = 254;
3226 if (s[1]) {
93a17b20 3227 if (strchr("wds]",s[1]))
79072805 3228 weight += 100;
10edeb5d 3229 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3230 weight += 1;
93a17b20 3231 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3232 weight += 40;
3233 else if (isDIGIT(s[1])) {
3234 weight += 40;
3235 while (s[1] && isDIGIT(s[1]))
3236 s++;
3237 }
3238 }
3239 else
3240 weight += 100;
3241 break;
3242 case '-':
3243 if (s[1] == '\\')
3244 weight += 50;
93a17b20 3245 if (strchr("aA01! ",last_un_char))
79072805 3246 weight += 30;
93a17b20 3247 if (strchr("zZ79~",s[1]))
79072805 3248 weight += 30;
f27ffc4a
GS
3249 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3250 weight -= 5; /* cope with negative subscript */
79072805
LW
3251 break;
3252 default:
3792a11b
NC
3253 if (!isALNUM(last_un_char)
3254 && !(last_un_char == '$' || last_un_char == '@'
3255 || last_un_char == '&')
3256 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3257 char *d = tmpbuf;
3258 while (isALPHA(*s))
3259 *d++ = *s++;
3260 *d = '\0';
5458a98a 3261 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3262 weight -= 150;
3263 }
3264 if (un_char == last_un_char + 1)
3265 weight += 5;
3266 weight -= seen[un_char];
3267 break;
3268 }
3269 seen[un_char]++;
3270 }
3271 if (weight >= 0) /* probably a character class */
3272 return FALSE;
3273 }
3274
3275 return TRUE;
3276}
ffed7fef 3277
ffb4593c
NT
3278/*
3279 * S_intuit_method
3280 *
3281 * Does all the checking to disambiguate
3282 * foo bar
3283 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3284 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3285 *
3286 * First argument is the stuff after the first token, e.g. "bar".
3287 *
3288 * Not a method if bar is a filehandle.
3289 * Not a method if foo is a subroutine prototyped to take a filehandle.
3290 * Not a method if it's really "Foo $bar"
3291 * Method if it's "foo $bar"
3292 * Not a method if it's really "print foo $bar"
3293 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3294 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3295 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3296 * =>
3297 */
3298
76e3520e 3299STATIC int
62d55b22 3300S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3301{
97aff369 3302 dVAR;
a0d0e21e 3303 char *s = start + (*start == '$');
3280af22 3304 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3305 STRLEN len;
3306 GV* indirgv;
5db06880
NC
3307#ifdef PERL_MAD
3308 int soff;
3309#endif
a0d0e21e 3310
7918f24d
NC
3311 PERL_ARGS_ASSERT_INTUIT_METHOD;
3312
a0d0e21e 3313 if (gv) {
62d55b22 3314 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3315 return 0;
62d55b22
NC
3316 if (cv) {
3317 if (SvPOK(cv)) {
3318 const char *proto = SvPVX_const(cv);
3319 if (proto) {
3320 if (*proto == ';')
3321 proto++;
3322 if (*proto == '*')
3323 return 0;
3324 }
b6c543e3
IZ
3325 }
3326 } else
c35e046a 3327 gv = NULL;
a0d0e21e 3328 }
8903cb82 3329 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3330 /* start is the beginning of the possible filehandle/object,
3331 * and s is the end of it
3332 * tmpbuf is a copy of it
3333 */
3334
a0d0e21e 3335 if (*start == '$') {
3ef1310e
RGS
3336 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3337 isUPPER(*PL_tokenbuf))
a0d0e21e 3338 return 0;
5db06880
NC
3339#ifdef PERL_MAD
3340 len = start - SvPVX(PL_linestr);
3341#endif
29595ff2 3342 s = PEEKSPACE(s);
f0092767 3343#ifdef PERL_MAD
5db06880
NC
3344 start = SvPVX(PL_linestr) + len;
3345#endif
3280af22
NIS
3346 PL_bufptr = start;
3347 PL_expect = XREF;
a0d0e21e
LW
3348 return *s == '(' ? FUNCMETH : METHOD;
3349 }
5458a98a 3350 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3351 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3352 len -= 2;
3353 tmpbuf[len] = '\0';
5db06880
NC
3354#ifdef PERL_MAD
3355 soff = s - SvPVX(PL_linestr);
3356#endif
c3e0f903
GS
3357 goto bare_package;
3358 }
90e5519e 3359 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3360 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3361 return 0;
3362 /* filehandle or package name makes it a method */
da51bb9b 3363 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3364#ifdef PERL_MAD
3365 soff = s - SvPVX(PL_linestr);
3366#endif
29595ff2 3367 s = PEEKSPACE(s);
3280af22 3368 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3369 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3370 bare_package:
cd81e915 3371 start_force(PL_curforce);
9ded7720 3372 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3373 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3374 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3375 if (PL_madskills)
3376 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3377 PL_expect = XTERM;
a0d0e21e 3378 force_next(WORD);
3280af22 3379 PL_bufptr = s;
5db06880
NC
3380#ifdef PERL_MAD
3381 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3382#endif
a0d0e21e
LW
3383 return *s == '(' ? FUNCMETH : METHOD;
3384 }
3385 }
3386 return 0;
3387}
3388
16d20bd9 3389/* Encoded script support. filter_add() effectively inserts a
4e553d73 3390 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3391 * Note that the filter function only applies to the current source file
3392 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3393 *
3394 * The datasv parameter (which may be NULL) can be used to pass
3395 * private data to this instance of the filter. The filter function
3396 * can recover the SV using the FILTER_DATA macro and use it to
3397 * store private buffers and state information.
3398 *
3399 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3400 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3401 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3402 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3403 * private use must be set using malloc'd pointers.
3404 */
16d20bd9
AD
3405
3406SV *
864dbfa3 3407Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3408{
97aff369 3409 dVAR;
f4c556ac 3410 if (!funcp)
a0714e2c 3411 return NULL;
f4c556ac 3412
5486870f
DM
3413 if (!PL_parser)
3414 return NULL;
3415
3280af22
NIS
3416 if (!PL_rsfp_filters)
3417 PL_rsfp_filters = newAV();
16d20bd9 3418 if (!datasv)
561b68a9 3419 datasv = newSV(0);
862a34c6 3420 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3421 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3422 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3423 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3424 FPTR2DPTR(void *, IoANY(datasv)),
3425 SvPV_nolen(datasv)));
3280af22
NIS
3426 av_unshift(PL_rsfp_filters, 1);
3427 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3428 return(datasv);
3429}
4e553d73 3430
16d20bd9
AD
3431
3432/* Delete most recently added instance of this filter function. */
a0d0e21e 3433void
864dbfa3 3434Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3435{
97aff369 3436 dVAR;
e0c19803 3437 SV *datasv;
24801a4b 3438
7918f24d
NC
3439 PERL_ARGS_ASSERT_FILTER_DEL;
3440
33073adb 3441#ifdef DEBUGGING
55662e27
JH
3442 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3443 FPTR2DPTR(void*, funcp)));
33073adb 3444#endif
5486870f 3445 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3446 return;
3447 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3448 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3449 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 3450 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 3451 IoANY(datasv) = (void *)NULL;
3280af22 3452 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3453
16d20bd9
AD
3454 return;
3455 }
3456 /* we need to search for the correct entry and clear it */
cea2e8a9 3457 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3458}
3459
3460
1de9afcd
RGS
3461/* Invoke the idxth filter function for the current rsfp. */
3462/* maxlen 0 = read one text line */
16d20bd9 3463I32
864dbfa3 3464Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3465{
97aff369 3466 dVAR;
16d20bd9
AD
3467 filter_t funcp;
3468 SV *datasv = NULL;
f482118e
NC
3469 /* This API is bad. It should have been using unsigned int for maxlen.
3470 Not sure if we want to change the API, but if not we should sanity
3471 check the value here. */
39cd7a59
NC
3472 const unsigned int correct_length
3473 = maxlen < 0 ?
3474#ifdef PERL_MICRO
3475 0x7FFFFFFF
3476#else
3477 INT_MAX
3478#endif
3479 : maxlen;
e50aee73 3480
7918f24d
NC
3481 PERL_ARGS_ASSERT_FILTER_READ;
3482
5486870f 3483 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3484 return -1;
1de9afcd 3485 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3486 /* Provide a default input filter to make life easy. */
3487 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3488 DEBUG_P(PerlIO_printf(Perl_debug_log,
3489 "filter_read %d: from rsfp\n", idx));
f482118e 3490 if (correct_length) {
16d20bd9
AD
3491 /* Want a block */
3492 int len ;
f54cb97a 3493 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3494
3495 /* ensure buf_sv is large enough */
881d8f0a 3496 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3497 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3498 correct_length)) <= 0) {
3280af22 3499 if (PerlIO_error(PL_rsfp))
37120919
AD
3500 return -1; /* error */
3501 else
3502 return 0 ; /* end of file */
3503 }
16d20bd9 3504 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3505 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3506 } else {
3507 /* Want a line */
3280af22
NIS
3508 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3509 if (PerlIO_error(PL_rsfp))
37120919
AD
3510 return -1; /* error */
3511 else
3512 return 0 ; /* end of file */
3513 }
16d20bd9
AD
3514 }
3515 return SvCUR(buf_sv);
3516 }
3517 /* Skip this filter slot if filter has been deleted */
1de9afcd 3518 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3519 DEBUG_P(PerlIO_printf(Perl_debug_log,
3520 "filter_read %d: skipped (filter deleted)\n",
3521 idx));
f482118e 3522 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3523 }
3524 /* Get function pointer hidden within datasv */
8141890a 3525 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3526 DEBUG_P(PerlIO_printf(Perl_debug_log,
3527 "filter_read %d: via function %p (%s)\n",
ca0270c4 3528 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3529 /* Call function. The function is expected to */
3530 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3531 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3532 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3533}
3534
76e3520e 3535STATIC char *
5cc814fd 3536S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3537{
97aff369 3538 dVAR;
7918f24d
NC
3539
3540 PERL_ARGS_ASSERT_FILTER_GETS;
3541
c39cd008 3542#ifdef PERL_CR_FILTER
3280af22 3543 if (!PL_rsfp_filters) {
c39cd008 3544 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3545 }
3546#endif
3280af22 3547 if (PL_rsfp_filters) {
55497cff
PP
3548 if (!append)
3549 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3550 if (FILTER_READ(0, sv, 0) > 0)
3551 return ( SvPVX(sv) ) ;
3552 else
bd61b366 3553 return NULL ;
16d20bd9 3554 }
9d116dd7 3555 else
5cc814fd 3556 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3557}
3558
01ec43d0 3559STATIC HV *
9bde8eb0 3560S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3561{
97aff369 3562 dVAR;
def3634b
GS
3563 GV *gv;
3564
7918f24d
NC
3565 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3566
01ec43d0 3567 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3568 return PL_curstash;
3569
3570 if (len > 2 &&
3571 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3572 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3573 {
3574 return GvHV(gv); /* Foo:: */
def3634b
GS
3575 }
3576
3577 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3578 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3579 if (gv && GvCV(gv)) {
3580 SV * const sv = cv_const_sv(GvCV(gv));
3581 if (sv)
9bde8eb0 3582 pkgname = SvPV_const(sv, len);
def3634b
GS
3583 }
3584
9bde8eb0 3585 return gv_stashpvn(pkgname, len, 0);
def3634b 3586}
a0d0e21e 3587
e3f73d4e
RGS
3588/*
3589 * S_readpipe_override
3590 * Check whether readpipe() is overriden, and generates the appropriate
3591 * optree, provided sublex_start() is called afterwards.
3592 */
3593STATIC void
1d51329b 3594S_readpipe_override(pTHX)
e3f73d4e
RGS
3595{
3596 GV **gvp;
3597 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3598 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3599 if ((gv_readpipe
3600 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3601 ||
3602 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3603 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3604 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3605 {
3606 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3607 append_elem(OP_LIST,
3608 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3609 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3610 }
e3f73d4e
RGS
3611}
3612
5db06880
NC
3613#ifdef PERL_MAD
3614 /*
3615 * Perl_madlex
3616 * The intent of this yylex wrapper is to minimize the changes to the
3617 * tokener when we aren't interested in collecting madprops. It remains
3618 * to be seen how successful this strategy will be...
3619 */
3620
3621int
3622Perl_madlex(pTHX)
3623{
3624 int optype;
3625 char *s = PL_bufptr;
3626
cd81e915
NC
3627 /* make sure PL_thiswhite is initialized */
3628 PL_thiswhite = 0;
3629 PL_thismad = 0;
5db06880 3630
cd81e915 3631 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3632 if (PL_pending_ident)
3633 return S_pending_ident(aTHX);
3634
3635 /* previous token ate up our whitespace? */
cd81e915
NC
3636 if (!PL_lasttoke && PL_nextwhite) {
3637 PL_thiswhite = PL_nextwhite;
3638 PL_nextwhite = 0;
5db06880
NC
3639 }
3640
3641 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3642 PL_realtokenstart = -1;
3643 PL_thistoken = 0;
5db06880
NC
3644 optype = yylex();
3645 s = PL_bufptr;
cd81e915 3646 assert(PL_curforce < 0);
5db06880 3647
cd81e915
NC
3648 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3649 if (!PL_thistoken) {
3650 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3651 PL_thistoken = newSVpvs("");
5db06880 3652 else {
c35e046a 3653 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3654 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3655 }
3656 }
cd81e915
NC
3657 if (PL_thismad) /* install head */
3658 CURMAD('X', PL_thistoken);
5db06880
NC
3659 }
3660
3661 /* last whitespace of a sublex? */
cd81e915
NC
3662 if (optype == ')' && PL_endwhite) {
3663 CURMAD('X', PL_endwhite);
5db06880
NC
3664 }
3665
cd81e915 3666 if (!PL_thismad) {
5db06880
NC
3667
3668 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3669 if (!PL_thiswhite && !PL_endwhite && !optype) {
3670 sv_free(PL_thistoken);
3671 PL_thistoken = 0;
5db06880
NC
3672 return 0;
3673 }
3674
3675 /* put off final whitespace till peg */
3676 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3677 PL_nextwhite = PL_thiswhite;
3678 PL_thiswhite = 0;
5db06880 3679 }
cd81e915
NC
3680 else if (PL_thisopen) {
3681 CURMAD('q', PL_thisopen);
3682 if (PL_thistoken)
3683 sv_free(PL_thistoken);
3684 PL_thistoken = 0;
5db06880
NC
3685 }
3686 else {
3687 /* Store actual token text as madprop X */
cd81e915 3688 CURMAD('X', PL_thistoken);
5db06880
NC
3689 }
3690
cd81e915 3691 if (PL_thiswhite) {
5db06880 3692 /* add preceding whitespace as madprop _ */
cd81e915 3693 CURMAD('_', PL_thiswhite);
5db06880
NC
3694 }
3695
cd81e915 3696 if (PL_thisstuff) {
5db06880 3697 /* add quoted material as madprop = */
cd81e915 3698 CURMAD('=', PL_thisstuff);
5db06880
NC
3699 }
3700
cd81e915 3701 if (PL_thisclose) {
5db06880 3702 /* add terminating quote as madprop Q */
cd81e915 3703 CURMAD('Q', PL_thisclose);
5db06880
NC
3704 }
3705 }
3706