This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
lexer API fixes
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
378cc40b 42
eb0d8d16
NC
43#define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45
6154021b 46#define pl_yylval (PL_parser->yylval)
d3b6f988 47
acdf0a21
DM
48/* YYINITDEPTH -- initial size of the parser's stacks. */
49#define YYINITDEPTH 200
50
199e78b7
DM
51/* XXX temporary backwards compatibility */
52#define PL_lex_brackets (PL_parser->lex_brackets)
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
9059aa12
LW
129#define XFAKEBRACK 128
130#define XENUMMASK 127
131
39e02b42
JH
132#ifdef USE_UTF8_SCRIPTS
133# define UTF (!IN_BYTES)
2b9d42f0 134#else
746b446a 135# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 136#endif
a0ed51b3 137
b1fc3636
CJ
138/* The maximum number of characters preceding the unrecognized one to display */
139#define UNRECOGNIZED_PRECEDE_COUNT 10
140
61f0cdd9 141/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
142 * 1999-02-27 mjd-perl-patch@plover.com */
143#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
144
bf4acbe4 145#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 146
ffb4593c
NT
147/* LEX_* are values for PL_lex_state, the state of the lexer.
148 * They are arranged oddly so that the guard on the switch statement
79072805
LW
149 * can get by with a single comparison (if the compiler is smart enough).
150 */
151
fb73857a
PP
152/* #define LEX_NOTPARSING 11 is done in perl.h. */
153
b6007c36
DM
154#define LEX_NORMAL 10 /* normal code (ie not within "...") */
155#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
156#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
157#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
158#define LEX_INTERPSTART 6 /* expecting the start of a $var */
159
160 /* at end of code, eg "$x" followed by: */
161#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
162#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
163
164#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
165 string or after \E, $foo, etc */
166#define LEX_INTERPCONST 2 /* NOT USED */
167#define LEX_FORMLINE 1 /* expecting a format line */
168#define LEX_KNOWNEXT 0 /* next token known; just return it */
169
79072805 170
bbf60fe6 171#ifdef DEBUGGING
27da23d5 172static const char* const lex_state_names[] = {
bbf60fe6
DM
173 "KNOWNEXT",
174 "FORMLINE",
175 "INTERPCONST",
176 "INTERPCONCAT",
177 "INTERPENDMAYBE",
178 "INTERPEND",
179 "INTERPSTART",
180 "INTERPPUSH",
181 "INTERPCASEMOD",
182 "INTERPNORMAL",
183 "NORMAL"
184};
185#endif
186
79072805
LW
187#ifdef ff_next
188#undef ff_next
d48672a2
LW
189#endif
190
79072805 191#include "keywords.h"
fe14fcc3 192
ffb4593c
NT
193/* CLINE is a macro that ensures PL_copline has a sane value */
194
ae986130
LW
195#ifdef CLINE
196#undef CLINE
197#endif
57843af0 198#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 199
5db06880 200#ifdef PERL_MAD
29595ff2
NC
201# define SKIPSPACE0(s) skipspace0(s)
202# define SKIPSPACE1(s) skipspace1(s)
203# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
204# define PEEKSPACE(s) skipspace2(s,0)
205#else
206# define SKIPSPACE0(s) skipspace(s)
207# define SKIPSPACE1(s) skipspace(s)
208# define SKIPSPACE2(s,tsv) skipspace(s)
209# define PEEKSPACE(s) skipspace(s)
210#endif
211
ffb4593c
NT
212/*
213 * Convenience functions to return different tokens and prime the
9cbb5ea2 214 * lexer for the next token. They all take an argument.
ffb4593c
NT
215 *
216 * TOKEN : generic token (used for '(', DOLSHARP, etc)
217 * OPERATOR : generic operator
218 * AOPERATOR : assignment operator
219 * PREBLOCK : beginning the block after an if, while, foreach, ...
220 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
221 * PREREF : *EXPR where EXPR is not a simple identifier
222 * TERM : expression term
223 * LOOPX : loop exiting command (goto, last, dump, etc)
224 * FTST : file test operator
225 * FUN0 : zero-argument function
2d2e263d 226 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
227 * BOop : bitwise or or xor
228 * BAop : bitwise and
229 * SHop : shift operator
230 * PWop : power operator
9cbb5ea2 231 * PMop : pattern-matching operator
ffb4593c
NT
232 * Aop : addition-level operator
233 * Mop : multiplication-level operator
234 * Eop : equality-testing operator
e5edeb50 235 * Rop : relational operator <= != gt
ffb4593c
NT
236 *
237 * Also see LOP and lop() below.
238 */
239
998054bd 240#ifdef DEBUGGING /* Serve -DT. */
704d4215 241# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 242#else
bbf60fe6 243# define REPORT(retval) (retval)
998054bd
SC
244#endif
245
bbf60fe6
DM
246#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
247#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
248#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
249#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
250#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
251#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
252#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
253#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
254#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
255#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
376fcdbf 272#define UNI2(f,x) { \
6154021b 273 pl_yylval.ival = f; \
376fcdbf
AL
274 PL_expect = x; \
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
6f33ba73
RGS
283#define UNI(f) UNI2(f,XTERM)
284#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 285
376fcdbf 286#define UNIBRACK(f) { \
6154021b 287 pl_yylval.ival = f; \
376fcdbf
AL
288 PL_bufptr = s; \
289 PL_last_uni = PL_oldbufptr; \
290 if (*s == '(') \
291 return REPORT( (int)FUNC1 ); \
29595ff2 292 s = PEEKSPACE(s); \
376fcdbf
AL
293 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
294 }
79072805 295
9f68db38 296/* grandfather return to old style */
6154021b 297#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 298
8fa7f367
JH
299#ifdef DEBUGGING
300
6154021b 301/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
302enum token_type {
303 TOKENTYPE_NONE,
304 TOKENTYPE_IVAL,
6154021b 305 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
306 TOKENTYPE_PVAL,
307 TOKENTYPE_OPVAL,
308 TOKENTYPE_GVVAL
309};
310
6d4a66ac
NC
311static struct debug_tokens {
312 const int token;
313 enum token_type type;
314 const char *name;
315} const debug_tokens[] =
9041c2e3 316{
bbf60fe6
DM
317 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
318 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
319 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
320 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
321 { ARROW, TOKENTYPE_NONE, "ARROW" },
322 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
323 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
324 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
325 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
326 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 327 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
328 { DO, TOKENTYPE_NONE, "DO" },
329 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
330 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
331 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
332 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
333 { ELSE, TOKENTYPE_NONE, "ELSE" },
334 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
335 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
336 { FOR, TOKENTYPE_IVAL, "FOR" },
337 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
338 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
339 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
340 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
341 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
342 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 343 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
344 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
345 { IF, TOKENTYPE_IVAL, "IF" },
346 { LABEL, TOKENTYPE_PVAL, "LABEL" },
347 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
348 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
349 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
350 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
351 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
352 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
353 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
354 { MY, TOKENTYPE_IVAL, "MY" },
355 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
356 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
357 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
358 { OROP, TOKENTYPE_IVAL, "OROP" },
359 { OROR, TOKENTYPE_NONE, "OROR" },
360 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
361 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
362 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
363 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
364 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
365 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
366 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
367 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
368 { PREINC, TOKENTYPE_NONE, "PREINC" },
369 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
370 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
371 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
372 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
373 { SUB, TOKENTYPE_NONE, "SUB" },
374 { THING, TOKENTYPE_OPVAL, "THING" },
375 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
376 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
377 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
378 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
379 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
380 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 381 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
382 { WHILE, TOKENTYPE_IVAL, "WHILE" },
383 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 384 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 385 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
386};
387
6154021b 388/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 389
bbf60fe6 390STATIC int
704d4215 391S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 392{
97aff369 393 dVAR;
7918f24d
NC
394
395 PERL_ARGS_ASSERT_TOKEREPORT;
396
bbf60fe6 397 if (DEBUG_T_TEST) {
bd61b366 398 const char *name = NULL;
bbf60fe6 399 enum token_type type = TOKENTYPE_NONE;
f54cb97a 400 const struct debug_tokens *p;
396482e1 401 SV* const report = newSVpvs("<== ");
bbf60fe6 402
f54cb97a 403 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
404 if (p->token == (int)rv) {
405 name = p->name;
406 type = p->type;
407 break;
408 }
409 }
410 if (name)
54667de8 411 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
412 else if ((char)rv > ' ' && (char)rv < '~')
413 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
414 else if (!rv)
396482e1 415 sv_catpvs(report, "EOF");
bbf60fe6
DM
416 else
417 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
418 switch (type) {
419 case TOKENTYPE_NONE:
420 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
421 break;
422 case TOKENTYPE_IVAL:
704d4215 423 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
424 break;
425 case TOKENTYPE_OPNUM:
426 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 427 PL_op_name[lvalp->ival]);
bbf60fe6
DM
428 break;
429 case TOKENTYPE_PVAL:
704d4215 430 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
431 break;
432 case TOKENTYPE_OPVAL:
704d4215 433 if (lvalp->opval) {
401441c0 434 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
435 PL_op_name[lvalp->opval->op_type]);
436 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 437 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 438 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
439 }
440
441 }
401441c0 442 else
396482e1 443 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
444 break;
445 }
b6007c36 446 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
447 };
448 return (int)rv;
998054bd
SC
449}
450
b6007c36
DM
451
452/* print the buffer with suitable escapes */
453
454STATIC void
15f169a1 455S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 456{
396482e1 457 SV* const tmp = newSVpvs("");
7918f24d
NC
458
459 PERL_ARGS_ASSERT_PRINTBUF;
460
b6007c36
DM
461 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
462 SvREFCNT_dec(tmp);
463}
464
8fa7f367
JH
465#endif
466
8290c323
NC
467static int
468S_deprecate_commaless_var_list(pTHX) {
469 PL_expect = XTERM;
470 deprecate("comma-less variable list");
471 return REPORT(','); /* grandfather non-comma-format format */
472}
473
ffb4593c
NT
474/*
475 * S_ao
476 *
c963b151
BD
477 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
478 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
479 */
480
76e3520e 481STATIC int
cea2e8a9 482S_ao(pTHX_ int toketype)
a0d0e21e 483{
97aff369 484 dVAR;
3280af22
NIS
485 if (*PL_bufptr == '=') {
486 PL_bufptr++;
a0d0e21e 487 if (toketype == ANDAND)
6154021b 488 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 489 else if (toketype == OROR)
6154021b 490 pl_yylval.ival = OP_ORASSIGN;
c963b151 491 else if (toketype == DORDOR)
6154021b 492 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
493 toketype = ASSIGNOP;
494 }
495 return toketype;
496}
497
ffb4593c
NT
498/*
499 * S_no_op
500 * When Perl expects an operator and finds something else, no_op
501 * prints the warning. It always prints "<something> found where
502 * operator expected. It prints "Missing semicolon on previous line?"
503 * if the surprise occurs at the start of the line. "do you need to
504 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
505 * where the compiler doesn't know if foo is a method call or a function.
506 * It prints "Missing operator before end of line" if there's nothing
507 * after the missing operator, or "... before <...>" if there is something
508 * after the missing operator.
509 */
510
76e3520e 511STATIC void
15f169a1 512S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 513{
97aff369 514 dVAR;
9d4ba2ae
AL
515 char * const oldbp = PL_bufptr;
516 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 517
7918f24d
NC
518 PERL_ARGS_ASSERT_NO_OP;
519
1189a94a
GS
520 if (!s)
521 s = oldbp;
07c798fb 522 else
1189a94a 523 PL_bufptr = s;
cea2e8a9 524 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
525 if (ckWARN_d(WARN_SYNTAX)) {
526 if (is_first)
527 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
528 "\t(Missing semicolon on previous line?)\n");
529 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 530 const char *t;
c35e046a
AL
531 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
532 NOOP;
56da5a46
RGS
533 if (t < PL_bufptr && isSPACE(*t))
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Do you need to predeclare %.*s?)\n",
551405c4 536 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
537 }
538 else {
539 assert(s >= oldbp);
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 541 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 542 }
07c798fb 543 }
3280af22 544 PL_bufptr = oldbp;
8990e307
LW
545}
546
ffb4593c
NT
547/*
548 * S_missingterm
549 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 550 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
551 * If we're in a delimited string and the delimiter is a control
552 * character, it's reformatted into a two-char sequence like ^C.
553 * This is fatal.
554 */
555
76e3520e 556STATIC void
cea2e8a9 557S_missingterm(pTHX_ char *s)
8990e307 558{
97aff369 559 dVAR;
8990e307
LW
560 char tmpbuf[3];
561 char q;
562 if (s) {
9d4ba2ae 563 char * const nl = strrchr(s,'\n');
d2719217 564 if (nl)
8990e307
LW
565 *nl = '\0';
566 }
463559e7 567 else if (isCNTRL(PL_multi_close)) {
8990e307 568 *tmpbuf = '^';
585ec06d 569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
570 tmpbuf[2] = '\0';
571 s = tmpbuf;
572 }
573 else {
eb160463 574 *tmpbuf = (char)PL_multi_close;
8990e307
LW
575 tmpbuf[1] = '\0';
576 s = tmpbuf;
577 }
578 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 580}
79072805 581
ef89dcc3 582#define FEATURE_IS_ENABLED(name) \
0d863452 583 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 584 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b
NC
585/* The longest string we pass in. */
586#define MAX_FEATURE_LEN (sizeof("switch")-1)
587
0d863452
RH
588/*
589 * S_feature_is_enabled
590 * Check whether the named feature is enabled.
591 */
592STATIC bool
15f169a1 593S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 594{
97aff369 595 dVAR;
0d863452 596 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 597 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
598
599 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
600
4a731d7b
NC
601 assert(namelen <= MAX_FEATURE_LEN);
602 memcpy(&he_name[8], name, namelen);
d4c19fe8 603
7b9ef140 604 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
605}
606
ffb4593c 607/*
9cbb5ea2
GS
608 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
609 * utf16-to-utf8-reversed.
ffb4593c
NT
610 */
611
c39cd008
GS
612#ifdef PERL_CR_FILTER
613static void
614strip_return(SV *sv)
615{
95a20fc0 616 register const char *s = SvPVX_const(sv);
9d4ba2ae 617 register const char * const e = s + SvCUR(sv);
7918f24d
NC
618
619 PERL_ARGS_ASSERT_STRIP_RETURN;
620
c39cd008
GS
621 /* outer loop optimized to do nothing if there are no CR-LFs */
622 while (s < e) {
623 if (*s++ == '\r' && *s == '\n') {
624 /* hit a CR-LF, need to copy the rest */
625 register char *d = s - 1;
626 *d++ = *s++;
627 while (s < e) {
628 if (*s == '\r' && s[1] == '\n')
629 s++;
630 *d++ = *s++;
631 }
632 SvCUR(sv) -= s - d;
633 return;
634 }
635 }
636}
a868473f 637
76e3520e 638STATIC I32
c39cd008 639S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 640{
f54cb97a 641 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
642 if (count > 0 && !maxlen)
643 strip_return(sv);
644 return count;
a868473f
NIS
645}
646#endif
647
199e78b7
DM
648
649
ffb4593c
NT
650/*
651 * Perl_lex_start
5486870f 652 *
e3abe207 653 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
654 *
655 * rsfp is the opened file handle to read from (if any),
656 *
657 * line holds any initial content already read from the file (or in
658 * the case of no file, such as an eval, the whole contents);
659 *
660 * new_filter indicates that this is a new file and it shouldn't inherit
661 * the filters from the current parser (ie require).
ffb4593c
NT
662 */
663
a0d0e21e 664void
5486870f 665Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 666{
97aff369 667 dVAR;
6ef55633 668 const char *s = NULL;
8990e307 669 STRLEN len;
5486870f 670 yy_parser *parser, *oparser;
acdf0a21
DM
671
672 /* create and initialise a parser */
673
199e78b7 674 Newxz(parser, 1, yy_parser);
5486870f 675 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
676 PL_parser = parser;
677
678 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
679 parser->ps = parser->stack;
680 parser->stack_size = YYINITDEPTH;
681
682 parser->stack->state = 0;
683 parser->yyerrstatus = 0;
684 parser->yychar = YYEMPTY; /* Cause a token to be read. */
685
e3abe207
DM
686 /* on scope exit, free this parser and restore any outer one */
687 SAVEPARSER(parser);
7c4baf47 688 parser->saved_curcop = PL_curcop;
e3abe207 689
acdf0a21 690 /* initialise lexer state */
8990e307 691
fb205e7a
DM
692#ifdef PERL_MAD
693 parser->curforce = -1;
694#else
695 parser->nexttoke = 0;
696#endif
ca4cfd28 697 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 698 parser->copline = NOLINE;
5afb0a62 699 parser->lex_state = LEX_NORMAL;
c2598295 700 parser->expect = XSTATE;
2f9285f8 701 parser->rsfp = rsfp;
56b27c9a 702 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 703 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 704
199e78b7
DM
705 Newx(parser->lex_brackstack, 120, char);
706 Newx(parser->lex_casestack, 12, char);
707 *parser->lex_casestack = '\0';
02b34bbe 708
10efb74f
NC
709 if (line) {
710 s = SvPV_const(line, len);
711 } else {
712 len = 0;
713 }
bdc0bf6f 714
10efb74f 715 if (!len) {
bdc0bf6f 716 parser->linestr = newSVpvs("\n;");
10efb74f 717 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 718 parser->linestr = newSVsv(line);
10efb74f 719 if (s[len-1] != ';')
bdc0bf6f 720 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
721 } else {
722 SvTEMP_off(line);
723 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 724 parser->linestr = line;
8990e307 725 }
f06b5848
DM
726 parser->oldoldbufptr =
727 parser->oldbufptr =
728 parser->bufptr =
729 parser->linestart = SvPVX(parser->linestr);
730 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
731 parser->last_lop = parser->last_uni = NULL;
79072805 732}
a687059c 733
e3abe207
DM
734
735/* delete a parser object */
736
737void
738Perl_parser_free(pTHX_ const yy_parser *parser)
739{
7918f24d
NC
740 PERL_ARGS_ASSERT_PARSER_FREE;
741
7c4baf47 742 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
743 SvREFCNT_dec(parser->linestr);
744
2f9285f8
DM
745 if (parser->rsfp == PerlIO_stdin())
746 PerlIO_clearerr(parser->rsfp);
799361c3
SH
747 else if (parser->rsfp && (!parser->old_parser ||
748 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 749 PerlIO_close(parser->rsfp);
5486870f 750 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 751
e3abe207
DM
752 Safefree(parser->stack);
753 Safefree(parser->lex_brackstack);
754 Safefree(parser->lex_casestack);
755 PL_parser = parser->old_parser;
756 Safefree(parser);
757}
758
759
ffb4593c
NT
760/*
761 * Perl_lex_end
9cbb5ea2
GS
762 * Finalizer for lexing operations. Must be called when the parser is
763 * done with the lexer.
ffb4593c
NT
764 */
765
463ee0b2 766void
864dbfa3 767Perl_lex_end(pTHX)
463ee0b2 768{
97aff369 769 dVAR;
3280af22 770 PL_doextract = FALSE;
463ee0b2
LW
771}
772
ffb4593c 773/*
f0e67a1d
Z
774=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
775
776Buffer scalar containing the chunk currently under consideration of the
777text currently being lexed. This is always a plain string scalar (for
778which C<SvPOK> is true). It is not intended to be used as a scalar by
779normal scalar means; instead refer to the buffer directly by the pointer
780variables described below.
781
782The lexer maintains various C<char*> pointers to things in the
783C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
784reallocated, all of these pointers must be updated. Don't attempt to
785do this manually, but rather use L</lex_grow_linestr> if you need to
786reallocate the buffer.
787
788The content of the text chunk in the buffer is commonly exactly one
789complete line of input, up to and including a newline terminator,
790but there are situations where it is otherwise. The octets of the
791buffer may be intended to be interpreted as either UTF-8 or Latin-1.
792The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
793flag on this scalar, which may disagree with it.
794
795For direct examination of the buffer, the variable
796L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
797lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
798of these pointers is usually preferable to examination of the scalar
799through normal scalar means.
800
801=for apidoc AmxU|char *|PL_parser-E<gt>bufend
802
803Direct pointer to the end of the chunk of text currently being lexed, the
804end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
805+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
806always located at the end of the buffer, and does not count as part of
807the buffer's contents.
808
809=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
810
811Points to the current position of lexing inside the lexer buffer.
812Characters around this point may be freely examined, within
813the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
814L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
815interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
816
817Lexing code (whether in the Perl core or not) moves this pointer past
818the characters that it consumes. It is also expected to perform some
819bookkeeping whenever a newline character is consumed. This movement
820can be more conveniently performed by the function L</lex_read_to>,
821which handles newlines appropriately.
822
823Interpretation of the buffer's octets can be abstracted out by
824using the slightly higher-level functions L</lex_peek_unichar> and
825L</lex_read_unichar>.
826
827=for apidoc AmxU|char *|PL_parser-E<gt>linestart
828
829Points to the start of the current line inside the lexer buffer.
830This is useful for indicating at which column an error occurred, and
831not much else. This must be updated by any lexing code that consumes
832a newline; the function L</lex_read_to> handles this detail.
833
834=cut
835*/
836
837/*
838=for apidoc Amx|bool|lex_bufutf8
839
840Indicates whether the octets in the lexer buffer
841(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
842of Unicode characters. If not, they should be interpreted as Latin-1
843characters. This is analogous to the C<SvUTF8> flag for scalars.
844
845In UTF-8 mode, it is not guaranteed that the lexer buffer actually
846contains valid UTF-8. Lexing code must be robust in the face of invalid
847encoding.
848
849The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
850is significant, but not the whole story regarding the input character
851encoding. Normally, when a file is being read, the scalar contains octets
852and its C<SvUTF8> flag is off, but the octets should be interpreted as
853UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
854however, the scalar may have the C<SvUTF8> flag on, and in this case its
855octets should be interpreted as UTF-8 unless the C<use bytes> pragma
856is in effect. This logic may change in the future; use this function
857instead of implementing the logic yourself.
858
859=cut
860*/
861
862bool
863Perl_lex_bufutf8(pTHX)
864{
865 return UTF;
866}
867
868/*
869=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
870
871Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
872at least I<len> octets (including terminating NUL). Returns a
873pointer to the reallocated buffer. This is necessary before making
874any direct modification of the buffer that would increase its length.
875L</lex_stuff_pvn> provides a more convenient way to insert text into
876the buffer.
877
878Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
879this function updates all of the lexer's variables that point directly
880into the buffer.
881
882=cut
883*/
884
885char *
886Perl_lex_grow_linestr(pTHX_ STRLEN len)
887{
888 SV *linestr;
889 char *buf;
890 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
891 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
892 linestr = PL_parser->linestr;
893 buf = SvPVX(linestr);
894 if (len <= SvLEN(linestr))
895 return buf;
896 bufend_pos = PL_parser->bufend - buf;
897 bufptr_pos = PL_parser->bufptr - buf;
898 oldbufptr_pos = PL_parser->oldbufptr - buf;
899 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
900 linestart_pos = PL_parser->linestart - buf;
901 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
902 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
903 buf = sv_grow(linestr, len);
904 PL_parser->bufend = buf + bufend_pos;
905 PL_parser->bufptr = buf + bufptr_pos;
906 PL_parser->oldbufptr = buf + oldbufptr_pos;
907 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
908 PL_parser->linestart = buf + linestart_pos;
909 if (PL_parser->last_uni)
910 PL_parser->last_uni = buf + last_uni_pos;
911 if (PL_parser->last_lop)
912 PL_parser->last_lop = buf + last_lop_pos;
913 return buf;
914}
915
916/*
917=for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
918
919Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
920immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
921reallocating the buffer if necessary. This means that lexing code that
922runs later will see the characters as if they had appeared in the input.
923It is not recommended to do this as part of normal parsing, and most
924uses of this facility run the risk of the inserted characters being
925interpreted in an unintended manner.
926
927The string to be inserted is represented by I<len> octets starting
928at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
929according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
930The characters are recoded for the lexer buffer, according to how the
931buffer is currently being interpreted (L</lex_bufutf8>). If a string
932to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
933function is more convenient.
934
935=cut
936*/
937
938void
939Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
940{
941 char *bufptr;
942 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
943 if (flags & ~(LEX_STUFF_UTF8))
944 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
945 if (UTF) {
946 if (flags & LEX_STUFF_UTF8) {
947 goto plain_copy;
948 } else {
949 STRLEN highhalf = 0;
950 char *p, *e = pv+len;
951 for (p = pv; p != e; p++)
952 highhalf += !!(((U8)*p) & 0x80);
953 if (!highhalf)
954 goto plain_copy;
955 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
956 bufptr = PL_parser->bufptr;
957 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
958 PL_parser->bufend += len+highhalf;
959 for (p = pv; p != e; p++) {
960 U8 c = (U8)*p;
961 if (c & 0x80) {
962 *bufptr++ = (char)(0xc0 | (c >> 6));
963 *bufptr++ = (char)(0x80 | (c & 0x3f));
964 } else {
965 *bufptr++ = (char)c;
966 }
967 }
968 }
969 } else {
970 if (flags & LEX_STUFF_UTF8) {
971 STRLEN highhalf = 0;
972 char *p, *e = pv+len;
973 for (p = pv; p != e; p++) {
974 U8 c = (U8)*p;
975 if (c >= 0xc4) {
976 Perl_croak(aTHX_ "Lexing code attempted to stuff "
977 "non-Latin-1 character into Latin-1 input");
978 } else if (c >= 0xc2 && p+1 != e &&
979 (((U8)p[1]) & 0xc0) == 0x80) {
980 p++;
981 highhalf++;
982 } else if (c >= 0x80) {
983 /* malformed UTF-8 */
984 ENTER;
985 SAVESPTR(PL_warnhook);
986 PL_warnhook = PERL_WARNHOOK_FATAL;
987 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
988 LEAVE;
989 }
990 }
991 if (!highhalf)
992 goto plain_copy;
993 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
994 bufptr = PL_parser->bufptr;
995 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
996 PL_parser->bufend += len-highhalf;
997 for (p = pv; p != e; p++) {
998 U8 c = (U8)*p;
999 if (c & 0x80) {
1000 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1001 p++;
1002 } else {
1003 *bufptr++ = (char)c;
1004 }
1005 }
1006 } else {
1007 plain_copy:
1008 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1009 bufptr = PL_parser->bufptr;
1010 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1011 PL_parser->bufend += len;
1012 Copy(pv, bufptr, len, char);
1013 }
1014 }
1015}
1016
1017/*
1018=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1019
1020Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022reallocating the buffer if necessary. This means that lexing code that
1023runs later will see the characters as if they had appeared in the input.
1024It is not recommended to do this as part of normal parsing, and most
1025uses of this facility run the risk of the inserted characters being
1026interpreted in an unintended manner.
1027
1028The string to be inserted is the string value of I<sv>. The characters
1029are recoded for the lexer buffer, according to how the buffer is currently
1030being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1031not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032need to construct a scalar.
1033
1034=cut
1035*/
1036
1037void
1038Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1039{
1040 char *pv;
1041 STRLEN len;
1042 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1043 if (flags)
1044 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1045 pv = SvPV(sv, len);
1046 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1047}
1048
1049/*
1050=for apidoc Amx|void|lex_unstuff|char *ptr
1051
1052Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1054This hides the discarded text from any lexing code that runs later,
1055as if the text had never appeared.
1056
1057This is not the normal way to consume lexed text. For that, use
1058L</lex_read_to>.
1059
1060=cut
1061*/
1062
1063void
1064Perl_lex_unstuff(pTHX_ char *ptr)
1065{
1066 char *buf, *bufend;
1067 STRLEN unstuff_len;
1068 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069 buf = PL_parser->bufptr;
1070 if (ptr < buf)
1071 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1072 if (ptr == buf)
1073 return;
1074 bufend = PL_parser->bufend;
1075 if (ptr > bufend)
1076 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077 unstuff_len = ptr - buf;
1078 Move(ptr, buf, bufend+1-ptr, char);
1079 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080 PL_parser->bufend = bufend - unstuff_len;
1081}
1082
1083/*
1084=for apidoc Amx|void|lex_read_to|char *ptr
1085
1086Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088performing the correct bookkeeping whenever a newline character is passed.
1089This is the normal way to consume lexed text.
1090
1091Interpretation of the buffer's octets can be abstracted out by
1092using the slightly higher-level functions L</lex_peek_unichar> and
1093L</lex_read_unichar>.
1094
1095=cut
1096*/
1097
1098void
1099Perl_lex_read_to(pTHX_ char *ptr)
1100{
1101 char *s;
1102 PERL_ARGS_ASSERT_LEX_READ_TO;
1103 s = PL_parser->bufptr;
1104 if (ptr < s || ptr > PL_parser->bufend)
1105 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106 for (; s != ptr; s++)
1107 if (*s == '\n') {
1108 CopLINE_inc(PL_curcop);
1109 PL_parser->linestart = s+1;
1110 }
1111 PL_parser->bufptr = ptr;
1112}
1113
1114/*
1115=for apidoc Amx|void|lex_discard_to|char *ptr
1116
1117Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118up to I<ptr>. The remaining content of the buffer will be moved, and
1119all pointers into the buffer updated appropriately. I<ptr> must not
1120be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121it is not permitted to discard text that has yet to be lexed.
1122
1123Normally it is not necessarily to do this directly, because it suffices to
1124use the implicit discarding behaviour of L</lex_next_chunk> and things
1125based on it. However, if a token stretches across multiple lines,
1126and the lexing code has kept multiple lines of text in the buffer fof
1127that purpose, then after completion of the token it would be wise to
1128explicitly discard the now-unneeded earlier lines, to avoid future
1129multi-line tokens growing the buffer without bound.
1130
1131=cut
1132*/
1133
1134void
1135Perl_lex_discard_to(pTHX_ char *ptr)
1136{
1137 char *buf;
1138 STRLEN discard_len;
1139 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140 buf = SvPVX(PL_parser->linestr);
1141 if (ptr < buf)
1142 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1143 if (ptr == buf)
1144 return;
1145 if (ptr > PL_parser->bufptr)
1146 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147 discard_len = ptr - buf;
1148 if (PL_parser->oldbufptr < ptr)
1149 PL_parser->oldbufptr = ptr;
1150 if (PL_parser->oldoldbufptr < ptr)
1151 PL_parser->oldoldbufptr = ptr;
1152 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153 PL_parser->last_uni = NULL;
1154 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155 PL_parser->last_lop = NULL;
1156 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158 PL_parser->bufend -= discard_len;
1159 PL_parser->bufptr -= discard_len;
1160 PL_parser->oldbufptr -= discard_len;
1161 PL_parser->oldoldbufptr -= discard_len;
1162 if (PL_parser->last_uni)
1163 PL_parser->last_uni -= discard_len;
1164 if (PL_parser->last_lop)
1165 PL_parser->last_lop -= discard_len;
1166}
1167
1168/*
1169=for apidoc Amx|bool|lex_next_chunk|U32 flags
1170
1171Reads in the next chunk of text to be lexed, appending it to
1172L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1173looked to the end of the current chunk and wants to know more. It is
1174usual, but not necessary, for lexing to have consumed the entirety of
1175the current chunk at this time.
1176
1177If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178chunk (i.e., the current chunk has been entirely consumed), normally the
1179current chunk will be discarded at the same time that the new chunk is
1180read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181will not be discarded. If the current chunk has not been entirely
1182consumed, then it will not be discarded regardless of the flag.
1183
1184Returns true if some new text was added to the buffer, or false if the
1185buffer has reached the end of the input text.
1186
1187=cut
1188*/
1189
1190#define LEX_FAKE_EOF 0x80000000
1191
1192bool
1193Perl_lex_next_chunk(pTHX_ U32 flags)
1194{
1195 SV *linestr;
1196 char *buf;
1197 STRLEN old_bufend_pos, new_bufend_pos;
1198 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1200 bool got_some;
1201 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1202 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1203#ifdef PERL_MAD
1204 flags |= LEX_KEEP_PREVIOUS;
1205#endif /* PERL_MAD */
1206 linestr = PL_parser->linestr;
1207 buf = SvPVX(linestr);
1208 if (!(flags & LEX_KEEP_PREVIOUS) &&
1209 PL_parser->bufptr == PL_parser->bufend) {
1210 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1211 linestart_pos = 0;
1212 if (PL_parser->last_uni != PL_parser->bufend)
1213 PL_parser->last_uni = NULL;
1214 if (PL_parser->last_lop != PL_parser->bufend)
1215 PL_parser->last_lop = NULL;
1216 last_uni_pos = last_lop_pos = 0;
1217 *buf = 0;
1218 SvCUR(linestr) = 0;
1219 } else {
1220 old_bufend_pos = PL_parser->bufend - buf;
1221 bufptr_pos = PL_parser->bufptr - buf;
1222 oldbufptr_pos = PL_parser->oldbufptr - buf;
1223 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1224 linestart_pos = PL_parser->linestart - buf;
1225 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1226 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1227 }
1228 if (flags & LEX_FAKE_EOF) {
1229 goto eof;
1230 } else if (!PL_parser->rsfp) {
1231 got_some = 0;
1232 } else if (filter_gets(linestr, old_bufend_pos)) {
1233 got_some = 1;
1234 } else {
580561a3
Z
1235 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1236 sv_setpvs(linestr, "");
f0e67a1d
Z
1237 eof:
1238 /* End of real input. Close filehandle (unless it was STDIN),
1239 * then add implicit termination.
1240 */
1241 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1242 PerlIO_clearerr(PL_parser->rsfp);
1243 else if (PL_parser->rsfp)
1244 (void)PerlIO_close(PL_parser->rsfp);
1245 PL_parser->rsfp = NULL;
1246 PL_doextract = FALSE;
1247#ifdef PERL_MAD
1248 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1249 PL_faketokens = 1;
1250#endif
1251 if (!PL_in_eval && PL_minus_p) {
1252 sv_catpvs(linestr,
1253 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1254 PL_minus_n = PL_minus_p = 0;
1255 } else if (!PL_in_eval && PL_minus_n) {
1256 sv_catpvs(linestr, /*{*/";}");
1257 PL_minus_n = 0;
1258 } else
1259 sv_catpvs(linestr, ";");
1260 got_some = 1;
1261 }
1262 buf = SvPVX(linestr);
1263 new_bufend_pos = SvCUR(linestr);
1264 PL_parser->bufend = buf + new_bufend_pos;
1265 PL_parser->bufptr = buf + bufptr_pos;
1266 PL_parser->oldbufptr = buf + oldbufptr_pos;
1267 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1268 PL_parser->linestart = buf + linestart_pos;
1269 if (PL_parser->last_uni)
1270 PL_parser->last_uni = buf + last_uni_pos;
1271 if (PL_parser->last_lop)
1272 PL_parser->last_lop = buf + last_lop_pos;
1273 if (got_some && (PERLDB_LINE || PERLDB_SAVESRC) &&
1274 PL_curstash != PL_debstash) {
1275 /* debugger active and we're not compiling the debugger code,
1276 * so store the line into the debugger's array of lines
1277 */
1278 update_debugger_info(NULL, buf+old_bufend_pos,
1279 new_bufend_pos-old_bufend_pos);
1280 }
1281 return got_some;
1282}
1283
1284/*
1285=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1286
1287Looks ahead one (Unicode) character in the text currently being lexed.
1288Returns the codepoint (unsigned integer value) of the next character,
1289or -1 if lexing has reached the end of the input text. To consume the
1290peeked character, use L</lex_read_unichar>.
1291
1292If the next character is in (or extends into) the next chunk of input
1293text, the next chunk will be read in. Normally the current chunk will be
1294discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1295then the current chunk will not be discarded.
1296
1297If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1298is encountered, an exception is generated.
1299
1300=cut
1301*/
1302
1303I32
1304Perl_lex_peek_unichar(pTHX_ U32 flags)
1305{
1306 char *s, *bufend;
1307 if (flags & ~(LEX_KEEP_PREVIOUS))
1308 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1309 s = PL_parser->bufptr;
1310 bufend = PL_parser->bufend;
1311 if (UTF) {
1312 U8 head;
1313 I32 unichar;
1314 STRLEN len, retlen;
1315 if (s == bufend) {
1316 if (!lex_next_chunk(flags))
1317 return -1;
1318 s = PL_parser->bufptr;
1319 bufend = PL_parser->bufend;
1320 }
1321 head = (U8)*s;
1322 if (!(head & 0x80))
1323 return head;
1324 if (head & 0x40) {
1325 len = PL_utf8skip[head];
1326 while ((STRLEN)(bufend-s) < len) {
1327 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1328 break;
1329 s = PL_parser->bufptr;
1330 bufend = PL_parser->bufend;
1331 }
1332 }
1333 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1334 if (retlen == (STRLEN)-1) {
1335 /* malformed UTF-8 */
1336 ENTER;
1337 SAVESPTR(PL_warnhook);
1338 PL_warnhook = PERL_WARNHOOK_FATAL;
1339 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1340 LEAVE;
1341 }
1342 return unichar;
1343 } else {
1344 if (s == bufend) {
1345 if (!lex_next_chunk(flags))
1346 return -1;
1347 s = PL_parser->bufptr;
1348 }
1349 return (U8)*s;
1350 }
1351}
1352
1353/*
1354=for apidoc Amx|I32|lex_read_unichar|U32 flags
1355
1356Reads the next (Unicode) character in the text currently being lexed.
1357Returns the codepoint (unsigned integer value) of the character read,
1358and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1359if lexing has reached the end of the input text. To non-destructively
1360examine the next character, use L</lex_peek_unichar> instead.
1361
1362If the next character is in (or extends into) the next chunk of input
1363text, the next chunk will be read in. Normally the current chunk will be
1364discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1365then the current chunk will not be discarded.
1366
1367If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1368is encountered, an exception is generated.
1369
1370=cut
1371*/
1372
1373I32
1374Perl_lex_read_unichar(pTHX_ U32 flags)
1375{
1376 I32 c;
1377 if (flags & ~(LEX_KEEP_PREVIOUS))
1378 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1379 c = lex_peek_unichar(flags);
1380 if (c != -1) {
1381 if (c == '\n')
1382 CopLINE_inc(PL_curcop);
1383 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1384 }
1385 return c;
1386}
1387
1388/*
1389=for apidoc Amx|void|lex_read_space|U32 flags
1390
1391Reads optional spaces, in Perl style, in the text currently being
1392lexed. The spaces may include ordinary whitespace characters and
1393Perl-style comments. C<#line> directives are processed if encountered.
1394L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1395at a non-space character (or the end of the input text).
1396
1397If spaces extend into the next chunk of input text, the next chunk will
1398be read in. Normally the current chunk will be discarded at the same
1399time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1400chunk will not be discarded.
1401
1402=cut
1403*/
1404
1405void
1406Perl_lex_read_space(pTHX_ U32 flags)
1407{
1408 char *s, *bufend;
1409 bool need_incline = 0;
1410 if (flags & ~(LEX_KEEP_PREVIOUS))
1411 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1412#ifdef PERL_MAD
1413 if (PL_skipwhite) {
1414 sv_free(PL_skipwhite);
1415 PL_skipwhite = NULL;
1416 }
1417 if (PL_madskills)
1418 PL_skipwhite = newSVpvs("");
1419#endif /* PERL_MAD */
1420 s = PL_parser->bufptr;
1421 bufend = PL_parser->bufend;
1422 while (1) {
1423 char c = *s;
1424 if (c == '#') {
1425 do {
1426 c = *++s;
1427 } while (!(c == '\n' || (c == 0 && s == bufend)));
1428 } else if (c == '\n') {
1429 s++;
1430 PL_parser->linestart = s;
1431 if (s == bufend)
1432 need_incline = 1;
1433 else
1434 incline(s);
1435 } else if (isSPACE(c)) {
1436 s++;
1437 } else if (c == 0 && s == bufend) {
1438 bool got_more;
1439#ifdef PERL_MAD
1440 if (PL_madskills)
1441 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1442#endif /* PERL_MAD */
1443 PL_parser->bufptr = s;
1444 CopLINE_inc(PL_curcop);
1445 got_more = lex_next_chunk(flags);
1446 CopLINE_dec(PL_curcop);
1447 s = PL_parser->bufptr;
1448 bufend = PL_parser->bufend;
1449 if (!got_more)
1450 break;
1451 if (need_incline && PL_parser->rsfp) {
1452 incline(s);
1453 need_incline = 0;
1454 }
1455 } else {
1456 break;
1457 }
1458 }
1459#ifdef PERL_MAD
1460 if (PL_madskills)
1461 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1462#endif /* PERL_MAD */
1463 PL_parser->bufptr = s;
1464}
1465
1466/*
ffb4593c
NT
1467 * S_incline
1468 * This subroutine has nothing to do with tilting, whether at windmills
1469 * or pinball tables. Its name is short for "increment line". It
57843af0 1470 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1471 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1472 * # line 500 "foo.pm"
1473 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1474 */
1475
76e3520e 1476STATIC void
d9095cec 1477S_incline(pTHX_ const char *s)
463ee0b2 1478{
97aff369 1479 dVAR;
d9095cec
NC
1480 const char *t;
1481 const char *n;
1482 const char *e;
463ee0b2 1483
7918f24d
NC
1484 PERL_ARGS_ASSERT_INCLINE;
1485
57843af0 1486 CopLINE_inc(PL_curcop);
463ee0b2
LW
1487 if (*s++ != '#')
1488 return;
d4c19fe8
AL
1489 while (SPACE_OR_TAB(*s))
1490 s++;
73659bf1
GS
1491 if (strnEQ(s, "line", 4))
1492 s += 4;
1493 else
1494 return;
084592ab 1495 if (SPACE_OR_TAB(*s))
73659bf1 1496 s++;
4e553d73 1497 else
73659bf1 1498 return;
d4c19fe8
AL
1499 while (SPACE_OR_TAB(*s))
1500 s++;
463ee0b2
LW
1501 if (!isDIGIT(*s))
1502 return;
d4c19fe8 1503
463ee0b2
LW
1504 n = s;
1505 while (isDIGIT(*s))
1506 s++;
07714eb4 1507 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1508 return;
bf4acbe4 1509 while (SPACE_OR_TAB(*s))
463ee0b2 1510 s++;
73659bf1 1511 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1512 s++;
73659bf1
GS
1513 e = t + 1;
1514 }
463ee0b2 1515 else {
c35e046a
AL
1516 t = s;
1517 while (!isSPACE(*t))
1518 t++;
73659bf1 1519 e = t;
463ee0b2 1520 }
bf4acbe4 1521 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1522 e++;
1523 if (*e != '\n' && *e != '\0')
1524 return; /* false alarm */
1525
f4dd75d9 1526 if (t - s > 0) {
d9095cec 1527 const STRLEN len = t - s;
8a5ee598 1528#ifndef USE_ITHREADS
19bad673
NC
1529 SV *const temp_sv = CopFILESV(PL_curcop);
1530 const char *cf;
1531 STRLEN tmplen;
1532
1533 if (temp_sv) {
1534 cf = SvPVX(temp_sv);
1535 tmplen = SvCUR(temp_sv);
1536 } else {
1537 cf = NULL;
1538 tmplen = 0;
1539 }
1540
42d9b98d 1541 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1542 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1543 * to *{"::_<newfilename"} */
44867030
NC
1544 /* However, the long form of evals is only turned on by the
1545 debugger - usually they're "(eval %lu)" */
1546 char smallbuf[128];
1547 char *tmpbuf;
1548 GV **gvp;
d9095cec 1549 STRLEN tmplen2 = len;
798b63bc 1550 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1551 tmpbuf = smallbuf;
1552 else
2ae0db35 1553 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1554 tmpbuf[0] = '_';
1555 tmpbuf[1] = '<';
2ae0db35 1556 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1557 tmplen += 2;
8a5ee598
RGS
1558 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1559 if (gvp) {
44867030
NC
1560 char *tmpbuf2;
1561 GV *gv2;
1562
1563 if (tmplen2 + 2 <= sizeof smallbuf)
1564 tmpbuf2 = smallbuf;
1565 else
1566 Newx(tmpbuf2, tmplen2 + 2, char);
1567
1568 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1569 /* Either they malloc'd it, or we malloc'd it,
1570 so no prefix is present in ours. */
1571 tmpbuf2[0] = '_';
1572 tmpbuf2[1] = '<';
1573 }
1574
1575 memcpy(tmpbuf2 + 2, s, tmplen2);
1576 tmplen2 += 2;
1577
8a5ee598 1578 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1579 if (!isGV(gv2)) {
8a5ee598 1580 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1581 /* adjust ${"::_<newfilename"} to store the new file name */
1582 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1583 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1584 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1585 }
44867030
NC
1586
1587 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1588 }
e66cf94c 1589 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1590 }
8a5ee598 1591#endif
05ec9bb3 1592 CopFILE_free(PL_curcop);
d9095cec 1593 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1594 }
57843af0 1595 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1596}
1597
29595ff2 1598#ifdef PERL_MAD
cd81e915 1599/* skip space before PL_thistoken */
29595ff2
NC
1600
1601STATIC char *
1602S_skipspace0(pTHX_ register char *s)
1603{
7918f24d
NC
1604 PERL_ARGS_ASSERT_SKIPSPACE0;
1605
29595ff2
NC
1606 s = skipspace(s);
1607 if (!PL_madskills)
1608 return s;
cd81e915
NC
1609 if (PL_skipwhite) {
1610 if (!PL_thiswhite)
6b29d1f5 1611 PL_thiswhite = newSVpvs("");
cd81e915
NC
1612 sv_catsv(PL_thiswhite, PL_skipwhite);
1613 sv_free(PL_skipwhite);
1614 PL_skipwhite = 0;
1615 }
1616 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1617 return s;
1618}
1619
cd81e915 1620/* skip space after PL_thistoken */
29595ff2
NC
1621
1622STATIC char *
1623S_skipspace1(pTHX_ register char *s)
1624{
d4c19fe8 1625 const char *start = s;
29595ff2
NC
1626 I32 startoff = start - SvPVX(PL_linestr);
1627
7918f24d
NC
1628 PERL_ARGS_ASSERT_SKIPSPACE1;
1629
29595ff2
NC
1630 s = skipspace(s);
1631 if (!PL_madskills)
1632 return s;
1633 start = SvPVX(PL_linestr) + startoff;
cd81e915 1634 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1635 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1636 PL_thistoken = newSVpvn(tstart, start - tstart);
1637 }
1638 PL_realtokenstart = -1;
1639 if (PL_skipwhite) {
1640 if (!PL_nextwhite)
6b29d1f5 1641 PL_nextwhite = newSVpvs("");
cd81e915
NC
1642 sv_catsv(PL_nextwhite, PL_skipwhite);
1643 sv_free(PL_skipwhite);
1644 PL_skipwhite = 0;
29595ff2
NC
1645 }
1646 return s;
1647}
1648
1649STATIC char *
1650S_skipspace2(pTHX_ register char *s, SV **svp)
1651{
c35e046a
AL
1652 char *start;
1653 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1654 const I32 startoff = s - SvPVX(PL_linestr);
1655
7918f24d
NC
1656 PERL_ARGS_ASSERT_SKIPSPACE2;
1657
29595ff2
NC
1658 s = skipspace(s);
1659 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1660 if (!PL_madskills || !svp)
1661 return s;
1662 start = SvPVX(PL_linestr) + startoff;
cd81e915 1663 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1664 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1665 PL_thistoken = newSVpvn(tstart, start - tstart);
1666 PL_realtokenstart = -1;
29595ff2 1667 }
cd81e915 1668 if (PL_skipwhite) {
29595ff2 1669 if (!*svp)
6b29d1f5 1670 *svp = newSVpvs("");
cd81e915
NC
1671 sv_setsv(*svp, PL_skipwhite);
1672 sv_free(PL_skipwhite);
1673 PL_skipwhite = 0;
29595ff2
NC
1674 }
1675
1676 return s;
1677}
1678#endif
1679
80a702cd 1680STATIC void
15f169a1 1681S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1682{
1683 AV *av = CopFILEAVx(PL_curcop);
1684 if (av) {
b9f83d2f 1685 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1686 if (orig_sv)
1687 sv_setsv(sv, orig_sv);
1688 else
1689 sv_setpvn(sv, buf, len);
80a702cd
RGS
1690 (void)SvIOK_on(sv);
1691 SvIV_set(sv, 0);
1692 av_store(av, (I32)CopLINE(PL_curcop), sv);
1693 }
1694}
1695
ffb4593c
NT
1696/*
1697 * S_skipspace
1698 * Called to gobble the appropriate amount and type of whitespace.
1699 * Skips comments as well.
1700 */
1701
76e3520e 1702STATIC char *
cea2e8a9 1703S_skipspace(pTHX_ register char *s)
a687059c 1704{
5db06880 1705#ifdef PERL_MAD
f0e67a1d
Z
1706 char *start = s;
1707#endif /* PERL_MAD */
7918f24d 1708 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1709#ifdef PERL_MAD
cd81e915
NC
1710 if (PL_skipwhite) {
1711 sv_free(PL_skipwhite);
f0e67a1d 1712 PL_skipwhite = NULL;
5db06880 1713 }
f0e67a1d 1714#endif /* PERL_MAD */
3280af22 1715 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1716 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1717 s++;
f0e67a1d
Z
1718 } else if (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) {
1719 while (isSPACE(*s) && *s != '\n')
1720 s++;
1721 if (*s == '#') {
1722 do {
463ee0b2 1723 s++;
f0e67a1d 1724 } while (s != PL_bufend && *s != '\n');
463ee0b2 1725 }
f0e67a1d
Z
1726 if (*s == '\n')
1727 s++;
1728 } else {
1729 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1730 PL_bufptr = s;
1731 lex_read_space(LEX_KEEP_PREVIOUS);
3280af22 1732 s = PL_bufptr;
f0e67a1d
Z
1733 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1734 if (PL_linestart > PL_bufptr)
1735 PL_bufptr = PL_linestart;
1736 return s;
463ee0b2 1737 }
5db06880 1738#ifdef PERL_MAD
f0e67a1d
Z
1739 if (PL_madskills)
1740 PL_skipwhite = newSVpvn(start, s-start);
1741#endif /* PERL_MAD */
5db06880 1742 return s;
a687059c 1743}
378cc40b 1744
ffb4593c
NT
1745/*
1746 * S_check_uni
1747 * Check the unary operators to ensure there's no ambiguity in how they're
1748 * used. An ambiguous piece of code would be:
1749 * rand + 5
1750 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1751 * the +5 is its argument.
1752 */
1753
76e3520e 1754STATIC void
cea2e8a9 1755S_check_uni(pTHX)
ba106d47 1756{
97aff369 1757 dVAR;
d4c19fe8
AL
1758 const char *s;
1759 const char *t;
2f3197b3 1760
3280af22 1761 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1762 return;
3280af22
NIS
1763 while (isSPACE(*PL_last_uni))
1764 PL_last_uni++;
c35e046a
AL
1765 s = PL_last_uni;
1766 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1767 s++;
3280af22 1768 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1769 return;
6136c704 1770
9b387841
NC
1771 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1772 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1773 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1774}
1775
ffb4593c
NT
1776/*
1777 * LOP : macro to build a list operator. Its behaviour has been replaced
1778 * with a subroutine, S_lop() for which LOP is just another name.
1779 */
1780
a0d0e21e
LW
1781#define LOP(f,x) return lop(f,x,s)
1782
ffb4593c
NT
1783/*
1784 * S_lop
1785 * Build a list operator (or something that might be one). The rules:
1786 * - if we have a next token, then it's a list operator [why?]
1787 * - if the next thing is an opening paren, then it's a function
1788 * - else it's a list operator
1789 */
1790
76e3520e 1791STATIC I32
a0be28da 1792S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1793{
97aff369 1794 dVAR;
7918f24d
NC
1795
1796 PERL_ARGS_ASSERT_LOP;
1797
6154021b 1798 pl_yylval.ival = f;
35c8bce7 1799 CLINE;
3280af22
NIS
1800 PL_expect = x;
1801 PL_bufptr = s;
1802 PL_last_lop = PL_oldbufptr;
eb160463 1803 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1804#ifdef PERL_MAD
1805 if (PL_lasttoke)
1806 return REPORT(LSTOP);
1807#else
3280af22 1808 if (PL_nexttoke)
bbf60fe6 1809 return REPORT(LSTOP);
5db06880 1810#endif
79072805 1811 if (*s == '(')
bbf60fe6 1812 return REPORT(FUNC);
29595ff2 1813 s = PEEKSPACE(s);
79072805 1814 if (*s == '(')
bbf60fe6 1815 return REPORT(FUNC);
79072805 1816 else
bbf60fe6 1817 return REPORT(LSTOP);
79072805
LW
1818}
1819
5db06880
NC
1820#ifdef PERL_MAD
1821 /*
1822 * S_start_force
1823 * Sets up for an eventual force_next(). start_force(0) basically does
1824 * an unshift, while start_force(-1) does a push. yylex removes items
1825 * on the "pop" end.
1826 */
1827
1828STATIC void
1829S_start_force(pTHX_ int where)
1830{
1831 int i;
1832
cd81e915 1833 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1834 where = PL_lasttoke;
cd81e915
NC
1835 assert(PL_curforce < 0 || PL_curforce == where);
1836 if (PL_curforce != where) {
5db06880
NC
1837 for (i = PL_lasttoke; i > where; --i) {
1838 PL_nexttoke[i] = PL_nexttoke[i-1];
1839 }
1840 PL_lasttoke++;
1841 }
cd81e915 1842 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1843 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1844 PL_curforce = where;
1845 if (PL_nextwhite) {
5db06880 1846 if (PL_madskills)
6b29d1f5 1847 curmad('^', newSVpvs(""));
cd81e915 1848 CURMAD('_', PL_nextwhite);
5db06880
NC
1849 }
1850}
1851
1852STATIC void
1853S_curmad(pTHX_ char slot, SV *sv)
1854{
1855 MADPROP **where;
1856
1857 if (!sv)
1858 return;
cd81e915
NC
1859 if (PL_curforce < 0)
1860 where = &PL_thismad;
5db06880 1861 else
cd81e915 1862 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1863
cd81e915 1864 if (PL_faketokens)
76f68e9b 1865 sv_setpvs(sv, "");
5db06880
NC
1866 else {
1867 if (!IN_BYTES) {
1868 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1869 SvUTF8_on(sv);
1870 else if (PL_encoding) {
1871 sv_recode_to_utf8(sv, PL_encoding);
1872 }
1873 }
1874 }
1875
1876 /* keep a slot open for the head of the list? */
1877 if (slot != '_' && *where && (*where)->mad_key == '^') {
1878 (*where)->mad_key = slot;
daba3364 1879 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1880 (*where)->mad_val = (void*)sv;
1881 }
1882 else
1883 addmad(newMADsv(slot, sv), where, 0);
1884}
1885#else
b3f24c00
MHM
1886# define start_force(where) NOOP
1887# define curmad(slot, sv) NOOP
5db06880
NC
1888#endif
1889
ffb4593c
NT
1890/*
1891 * S_force_next
9cbb5ea2 1892 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1893 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1894 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1895 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1896 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1897 */
1898
4e553d73 1899STATIC void
cea2e8a9 1900S_force_next(pTHX_ I32 type)
79072805 1901{
97aff369 1902 dVAR;
704d4215
GG
1903#ifdef DEBUGGING
1904 if (DEBUG_T_TEST) {
1905 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1906 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1907 }
1908#endif
5db06880 1909#ifdef PERL_MAD
cd81e915 1910 if (PL_curforce < 0)
5db06880 1911 start_force(PL_lasttoke);
cd81e915 1912 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1913 if (PL_lex_state != LEX_KNOWNEXT)
1914 PL_lex_defer = PL_lex_state;
1915 PL_lex_state = LEX_KNOWNEXT;
1916 PL_lex_expect = PL_expect;
cd81e915 1917 PL_curforce = -1;
5db06880 1918#else
3280af22
NIS
1919 PL_nexttype[PL_nexttoke] = type;
1920 PL_nexttoke++;
1921 if (PL_lex_state != LEX_KNOWNEXT) {
1922 PL_lex_defer = PL_lex_state;
1923 PL_lex_expect = PL_expect;
1924 PL_lex_state = LEX_KNOWNEXT;
79072805 1925 }
5db06880 1926#endif
79072805
LW
1927}
1928
d0a148a6 1929STATIC SV *
15f169a1 1930S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1931{
97aff369 1932 dVAR;
740cce10 1933 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1934 !IN_BYTES
1935 && UTF
1936 && !is_ascii_string((const U8*)start, len)
740cce10 1937 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1938 return sv;
1939}
1940
ffb4593c
NT
1941/*
1942 * S_force_word
1943 * When the lexer knows the next thing is a word (for instance, it has
1944 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1945 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1946 * lookahead.
ffb4593c
NT
1947 *
1948 * Arguments:
b1b65b59 1949 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1950 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1951 * int check_keyword : if true, Perl checks to make sure the word isn't
1952 * a keyword (do this if the word is a label, e.g. goto FOO)
1953 * int allow_pack : if true, : characters will also be allowed (require,
1954 * use, etc. do this)
9cbb5ea2 1955 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1956 */
1957
76e3520e 1958STATIC char *
cea2e8a9 1959S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1960{
97aff369 1961 dVAR;
463ee0b2
LW
1962 register char *s;
1963 STRLEN len;
4e553d73 1964
7918f24d
NC
1965 PERL_ARGS_ASSERT_FORCE_WORD;
1966
29595ff2 1967 start = SKIPSPACE1(start);
463ee0b2 1968 s = start;
7e2040f0 1969 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1970 (allow_pack && *s == ':') ||
15f0808c 1971 (allow_initial_tick && *s == '\'') )
a0d0e21e 1972 {
3280af22 1973 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1974 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1975 return start;
cd81e915 1976 start_force(PL_curforce);
5db06880
NC
1977 if (PL_madskills)
1978 curmad('X', newSVpvn(start,s-start));
463ee0b2 1979 if (token == METHOD) {
29595ff2 1980 s = SKIPSPACE1(s);
463ee0b2 1981 if (*s == '(')
3280af22 1982 PL_expect = XTERM;
463ee0b2 1983 else {
3280af22 1984 PL_expect = XOPERATOR;
463ee0b2 1985 }
79072805 1986 }
e74e6b3d 1987 if (PL_madskills)
63575281 1988 curmad('g', newSVpvs( "forced" ));
9ded7720 1989 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1990 = (OP*)newSVOP(OP_CONST,0,
1991 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1992 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1993 force_next(token);
1994 }
1995 return s;
1996}
1997
ffb4593c
NT
1998/*
1999 * S_force_ident
9cbb5ea2 2000 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2001 * text only contains the "foo" portion. The first argument is a pointer
2002 * to the "foo", and the second argument is the type symbol to prefix.
2003 * Forces the next token to be a "WORD".
9cbb5ea2 2004 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2005 */
2006
76e3520e 2007STATIC void
bfed75c6 2008S_force_ident(pTHX_ register const char *s, int kind)
79072805 2009{
97aff369 2010 dVAR;
7918f24d
NC
2011
2012 PERL_ARGS_ASSERT_FORCE_IDENT;
2013
c35e046a 2014 if (*s) {
90e5519e
NC
2015 const STRLEN len = strlen(s);
2016 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2017 start_force(PL_curforce);
9ded7720 2018 NEXTVAL_NEXTTOKE.opval = o;
79072805 2019 force_next(WORD);
748a9306 2020 if (kind) {
11343788 2021 o->op_private = OPpCONST_ENTERED;
55497cff
PP
2022 /* XXX see note in pp_entereval() for why we forgo typo
2023 warnings if the symbol must be introduced in an eval.
2024 GSAR 96-10-12 */
90e5519e
NC
2025 gv_fetchpvn_flags(s, len,
2026 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2027 : GV_ADD,
2028 kind == '$' ? SVt_PV :
2029 kind == '@' ? SVt_PVAV :
2030 kind == '%' ? SVt_PVHV :
a0d0e21e 2031 SVt_PVGV
90e5519e 2032 );
748a9306 2033 }
79072805
LW
2034 }
2035}
2036
1571675a
GS
2037NV
2038Perl_str_to_version(pTHX_ SV *sv)
2039{
2040 NV retval = 0.0;
2041 NV nshift = 1.0;
2042 STRLEN len;
cfd0369c 2043 const char *start = SvPV_const(sv,len);
9d4ba2ae 2044 const char * const end = start + len;
504618e9 2045 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2046
2047 PERL_ARGS_ASSERT_STR_TO_VERSION;
2048
1571675a 2049 while (start < end) {
ba210ebe 2050 STRLEN skip;
1571675a
GS
2051 UV n;
2052 if (utf)
9041c2e3 2053 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2054 else {
2055 n = *(U8*)start;
2056 skip = 1;
2057 }
2058 retval += ((NV)n)/nshift;
2059 start += skip;
2060 nshift *= 1000;
2061 }
2062 return retval;
2063}
2064
4e553d73 2065/*
ffb4593c
NT
2066 * S_force_version
2067 * Forces the next token to be a version number.
e759cc13
RGS
2068 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2069 * and if "guessing" is TRUE, then no new token is created (and the caller
2070 * must use an alternative parsing method).
ffb4593c
NT
2071 */
2072
76e3520e 2073STATIC char *
e759cc13 2074S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2075{
97aff369 2076 dVAR;
5f66b61c 2077 OP *version = NULL;
44dcb63b 2078 char *d;
5db06880
NC
2079#ifdef PERL_MAD
2080 I32 startoff = s - SvPVX(PL_linestr);
2081#endif
89bfa8cd 2082
7918f24d
NC
2083 PERL_ARGS_ASSERT_FORCE_VERSION;
2084
29595ff2 2085 s = SKIPSPACE1(s);
89bfa8cd 2086
44dcb63b 2087 d = s;
dd629d5b 2088 if (*d == 'v')
44dcb63b 2089 d++;
44dcb63b 2090 if (isDIGIT(*d)) {
e759cc13
RGS
2091 while (isDIGIT(*d) || *d == '_' || *d == '.')
2092 d++;
5db06880
NC
2093#ifdef PERL_MAD
2094 if (PL_madskills) {
cd81e915 2095 start_force(PL_curforce);
5db06880
NC
2096 curmad('X', newSVpvn(s,d-s));
2097 }
2098#endif
9f3d182e 2099 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 2100 SV *ver;
6154021b
RGS
2101 s = scan_num(s, &pl_yylval);
2102 version = pl_yylval.opval;
dd629d5b
GS
2103 ver = cSVOPx(version)->op_sv;
2104 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2105 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2106 SvNV_set(ver, str_to_version(ver));
1571675a 2107 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2108 }
89bfa8cd 2109 }
5db06880
NC
2110 else if (guessing) {
2111#ifdef PERL_MAD
2112 if (PL_madskills) {
cd81e915
NC
2113 sv_free(PL_nextwhite); /* let next token collect whitespace */
2114 PL_nextwhite = 0;
5db06880
NC
2115 s = SvPVX(PL_linestr) + startoff;
2116 }
2117#endif
e759cc13 2118 return s;
5db06880 2119 }
89bfa8cd
PP
2120 }
2121
5db06880
NC
2122#ifdef PERL_MAD
2123 if (PL_madskills && !version) {
cd81e915
NC
2124 sv_free(PL_nextwhite); /* let next token collect whitespace */
2125 PL_nextwhite = 0;
5db06880
NC
2126 s = SvPVX(PL_linestr) + startoff;
2127 }
2128#endif
89bfa8cd 2129 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2130 start_force(PL_curforce);
9ded7720 2131 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2132 force_next(WORD);
89bfa8cd 2133
e759cc13 2134 return s;
89bfa8cd
PP
2135}
2136
ffb4593c
NT
2137/*
2138 * S_tokeq
2139 * Tokenize a quoted string passed in as an SV. It finds the next
2140 * chunk, up to end of string or a backslash. It may make a new
2141 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2142 * turns \\ into \.
2143 */
2144
76e3520e 2145STATIC SV *
cea2e8a9 2146S_tokeq(pTHX_ SV *sv)
79072805 2147{
97aff369 2148 dVAR;
79072805
LW
2149 register char *s;
2150 register char *send;
2151 register char *d;
b3ac6de7
IZ
2152 STRLEN len = 0;
2153 SV *pv = sv;
79072805 2154
7918f24d
NC
2155 PERL_ARGS_ASSERT_TOKEQ;
2156
79072805 2157 if (!SvLEN(sv))
b3ac6de7 2158 goto finish;
79072805 2159
a0d0e21e 2160 s = SvPV_force(sv, len);
21a311ee 2161 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2162 goto finish;
463ee0b2 2163 send = s + len;
79072805
LW
2164 while (s < send && *s != '\\')
2165 s++;
2166 if (s == send)
b3ac6de7 2167 goto finish;
79072805 2168 d = s;
be4731d2 2169 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2170 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2171 }
79072805
LW
2172 while (s < send) {
2173 if (*s == '\\') {
a0d0e21e 2174 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2175 s++; /* all that, just for this */
2176 }
2177 *d++ = *s++;
2178 }
2179 *d = '\0';
95a20fc0 2180 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2181 finish:
3280af22 2182 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2183 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2184 return sv;
2185}
2186
ffb4593c
NT
2187/*
2188 * Now come three functions related to double-quote context,
2189 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2190 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2191 * interact with PL_lex_state, and create fake ( ... ) argument lists
2192 * to handle functions and concatenation.
2193 * They assume that whoever calls them will be setting up a fake
2194 * join call, because each subthing puts a ',' after it. This lets
2195 * "lower \luPpEr"
2196 * become
2197 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2198 *
2199 * (I'm not sure whether the spurious commas at the end of lcfirst's
2200 * arguments and join's arguments are created or not).
2201 */
2202
2203/*
2204 * S_sublex_start
6154021b 2205 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2206 *
2207 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2208 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2209 *
2210 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2211 *
2212 * Everything else becomes a FUNC.
2213 *
2214 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2215 * had an OP_CONST or OP_READLINE). This just sets us up for a
2216 * call to S_sublex_push().
2217 */
2218
76e3520e 2219STATIC I32
cea2e8a9 2220S_sublex_start(pTHX)
79072805 2221{
97aff369 2222 dVAR;
6154021b 2223 register const I32 op_type = pl_yylval.ival;
79072805
LW
2224
2225 if (op_type == OP_NULL) {
6154021b 2226 pl_yylval.opval = PL_lex_op;
5f66b61c 2227 PL_lex_op = NULL;
79072805
LW
2228 return THING;
2229 }
2230 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2231 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2232
2233 if (SvTYPE(sv) == SVt_PVIV) {
2234 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2235 STRLEN len;
96a5add6 2236 const char * const p = SvPV_const(sv, len);
740cce10 2237 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2238 SvREFCNT_dec(sv);
2239 sv = nsv;
4e553d73 2240 }
6154021b 2241 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2242 PL_lex_stuff = NULL;
6f33ba73
RGS
2243 /* Allow <FH> // "foo" */
2244 if (op_type == OP_READLINE)
2245 PL_expect = XTERMORDORDOR;
79072805
LW
2246 return THING;
2247 }
e3f73d4e
RGS
2248 else if (op_type == OP_BACKTICK && PL_lex_op) {
2249 /* readpipe() vas overriden */
2250 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2251 pl_yylval.opval = PL_lex_op;
9b201d7d 2252 PL_lex_op = NULL;
e3f73d4e
RGS
2253 PL_lex_stuff = NULL;
2254 return THING;
2255 }
79072805 2256
3280af22 2257 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2258 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2259 PL_sublex_info.sub_op = PL_lex_op;
2260 PL_lex_state = LEX_INTERPPUSH;
55497cff 2261
3280af22
NIS
2262 PL_expect = XTERM;
2263 if (PL_lex_op) {
6154021b 2264 pl_yylval.opval = PL_lex_op;
5f66b61c 2265 PL_lex_op = NULL;
55497cff
PP
2266 return PMFUNC;
2267 }
2268 else
2269 return FUNC;
2270}
2271
ffb4593c
NT
2272/*
2273 * S_sublex_push
2274 * Create a new scope to save the lexing state. The scope will be
2275 * ended in S_sublex_done. Returns a '(', starting the function arguments
2276 * to the uc, lc, etc. found before.
2277 * Sets PL_lex_state to LEX_INTERPCONCAT.
2278 */
2279
76e3520e 2280STATIC I32
cea2e8a9 2281S_sublex_push(pTHX)
55497cff 2282{
27da23d5 2283 dVAR;
f46d017c 2284 ENTER;
55497cff 2285
3280af22 2286 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2287 SAVEBOOL(PL_lex_dojoin);
3280af22 2288 SAVEI32(PL_lex_brackets);
3280af22
NIS
2289 SAVEI32(PL_lex_casemods);
2290 SAVEI32(PL_lex_starts);
651b5b28 2291 SAVEI8(PL_lex_state);
7766f137 2292 SAVEVPTR(PL_lex_inpat);
98246f1e 2293 SAVEI16(PL_lex_inwhat);
57843af0 2294 SAVECOPLINE(PL_curcop);
3280af22 2295 SAVEPPTR(PL_bufptr);
8452ff4b 2296 SAVEPPTR(PL_bufend);
3280af22
NIS
2297 SAVEPPTR(PL_oldbufptr);
2298 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2299 SAVEPPTR(PL_last_lop);
2300 SAVEPPTR(PL_last_uni);
3280af22
NIS
2301 SAVEPPTR(PL_linestart);
2302 SAVESPTR(PL_linestr);
8edd5f42
RGS
2303 SAVEGENERICPV(PL_lex_brackstack);
2304 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2305
2306 PL_linestr = PL_lex_stuff;
a0714e2c 2307 PL_lex_stuff = NULL;
3280af22 2308
9cbb5ea2
GS
2309 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2310 = SvPVX(PL_linestr);
3280af22 2311 PL_bufend += SvCUR(PL_linestr);
bd61b366 2312 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2313 SAVEFREESV(PL_linestr);
2314
2315 PL_lex_dojoin = FALSE;
2316 PL_lex_brackets = 0;
a02a5408
JC
2317 Newx(PL_lex_brackstack, 120, char);
2318 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2319 PL_lex_casemods = 0;
2320 *PL_lex_casestack = '\0';
2321 PL_lex_starts = 0;
2322 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2323 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2324
2325 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2326 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2327 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2328 else
5f66b61c 2329 PL_lex_inpat = NULL;
79072805 2330
55497cff 2331 return '(';
79072805
LW
2332}
2333
ffb4593c
NT
2334/*
2335 * S_sublex_done
2336 * Restores lexer state after a S_sublex_push.
2337 */
2338
76e3520e 2339STATIC I32
cea2e8a9 2340S_sublex_done(pTHX)
79072805 2341{
27da23d5 2342 dVAR;
3280af22 2343 if (!PL_lex_starts++) {
396482e1 2344 SV * const sv = newSVpvs("");
9aa983d2
JH
2345 if (SvUTF8(PL_linestr))
2346 SvUTF8_on(sv);
3280af22 2347 PL_expect = XOPERATOR;
6154021b 2348 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2349 return THING;
2350 }
2351
3280af22
NIS
2352 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2353 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2354 return yylex();
79072805
LW
2355 }
2356
ffb4593c 2357 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
2358 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2359 PL_linestr = PL_lex_repl;
2360 PL_lex_inpat = 0;
2361 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2362 PL_bufend += SvCUR(PL_linestr);
bd61b366 2363 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2364 SAVEFREESV(PL_linestr);
2365 PL_lex_dojoin = FALSE;
2366 PL_lex_brackets = 0;
3280af22
NIS
2367 PL_lex_casemods = 0;
2368 *PL_lex_casestack = '\0';
2369 PL_lex_starts = 0;
25da4f38 2370 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2371 PL_lex_state = LEX_INTERPNORMAL;
2372 PL_lex_starts++;
e9fa98b2
HS
2373 /* we don't clear PL_lex_repl here, so that we can check later
2374 whether this is an evalled subst; that means we rely on the
2375 logic to ensure sublex_done() is called again only via the
2376 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2377 }
e9fa98b2 2378 else {
3280af22 2379 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2380 PL_lex_repl = NULL;
e9fa98b2 2381 }
79072805 2382 return ',';
ffed7fef
LW
2383 }
2384 else {
5db06880
NC
2385#ifdef PERL_MAD
2386 if (PL_madskills) {
cd81e915
NC
2387 if (PL_thiswhite) {
2388 if (!PL_endwhite)
6b29d1f5 2389 PL_endwhite = newSVpvs("");
cd81e915
NC
2390 sv_catsv(PL_endwhite, PL_thiswhite);
2391 PL_thiswhite = 0;
2392 }
2393 if (PL_thistoken)
76f68e9b 2394 sv_setpvs(PL_thistoken,"");
5db06880 2395 else
cd81e915 2396 PL_realtokenstart = -1;
5db06880
NC
2397 }
2398#endif
f46d017c 2399 LEAVE;
3280af22
NIS
2400 PL_bufend = SvPVX(PL_linestr);
2401 PL_bufend += SvCUR(PL_linestr);
2402 PL_expect = XOPERATOR;
09bef843 2403 PL_sublex_info.sub_inwhat = 0;
79072805 2404 return ')';
ffed7fef
LW
2405 }
2406}
2407
02aa26ce
NT
2408/*
2409 scan_const
2410
2411 Extracts a pattern, double-quoted string, or transliteration. This
2412 is terrifying code.
2413
94def140 2414 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2415 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2416 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2417
94def140
ST
2418 Returns a pointer to the character scanned up to. If this is
2419 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2420 successfully parsed), will leave an OP for the substring scanned
6154021b 2421 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2422 by looking at the next characters herself.
2423
02aa26ce
NT
2424 In patterns:
2425 backslashes:
2426 double-quoted style: \r and \n
2427 regexp special ones: \D \s
94def140
ST
2428 constants: \x31
2429 backrefs: \1
02aa26ce
NT
2430 case and quoting: \U \Q \E
2431 stops on @ and $, but not for $ as tail anchor
2432
2433 In transliterations:
2434 characters are VERY literal, except for - not at the start or end
94def140
ST
2435 of the string, which indicates a range. If the range is in bytes,
2436 scan_const expands the range to the full set of intermediate
2437 characters. If the range is in utf8, the hyphen is replaced with
2438 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2439
2440 In double-quoted strings:
2441 backslashes:
2442 double-quoted style: \r and \n
94def140
ST
2443 constants: \x31
2444 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2445 case and quoting: \U \Q \E
2446 stops on @ and $
2447
2448 scan_const does *not* construct ops to handle interpolated strings.
2449 It stops processing as soon as it finds an embedded $ or @ variable
2450 and leaves it to the caller to work out what's going on.
2451
94def140
ST
2452 embedded arrays (whether in pattern or not) could be:
2453 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2454
2455 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2456
2457 $ in pattern could be $foo or could be tail anchor. Assumption:
2458 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2459 followed by one of "()| \r\n\t"
02aa26ce
NT
2460
2461 \1 (backreferences) are turned into $1
2462
2463 The structure of the code is
2464 while (there's a character to process) {
94def140
ST
2465 handle transliteration ranges
2466 skip regexp comments /(?#comment)/ and codes /(?{code})/
2467 skip #-initiated comments in //x patterns
2468 check for embedded arrays
02aa26ce
NT
2469 check for embedded scalars
2470 if (backslash) {
94def140
ST
2471 leave intact backslashes from leaveit (below)
2472 deprecate \1 in substitution replacements
02aa26ce
NT
2473 handle string-changing backslashes \l \U \Q \E, etc.
2474 switch (what was escaped) {
94def140
ST
2475 handle \- in a transliteration (becomes a literal -)
2476 handle \132 (octal characters)
2477 handle \x15 and \x{1234} (hex characters)
2478 handle \N{name} (named characters)
2479 handle \cV (control characters)
2480 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2481 } (end switch)
77a135fe 2482 continue
02aa26ce 2483 } (end if backslash)
77a135fe 2484 handle regular character
02aa26ce 2485 } (end while character to read)
4e553d73 2486
02aa26ce
NT
2487*/
2488
76e3520e 2489STATIC char *
cea2e8a9 2490S_scan_const(pTHX_ char *start)
79072805 2491{
97aff369 2492 dVAR;
3280af22 2493 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2494 SV *sv = newSV(send - start); /* sv for the constant. See
2495 note below on sizing. */
02aa26ce
NT
2496 register char *s = start; /* start of the constant */
2497 register char *d = SvPVX(sv); /* destination for copies */
2498 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2499 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2500 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2501 I32 this_utf8 = UTF; /* Is the source string assumed
2502 to be UTF8? But, this can
2503 show as true when the source
2504 isn't utf8, as for example
2505 when it is entirely composed
2506 of hex constants */
2507
2508 /* Note on sizing: The scanned constant is placed into sv, which is
2509 * initialized by newSV() assuming one byte of output for every byte of
2510 * input. This routine expects newSV() to allocate an extra byte for a
2511 * trailing NUL, which this routine will append if it gets to the end of
2512 * the input. There may be more bytes of input than output (eg., \N{LATIN
2513 * CAPITAL LETTER A}), or more output than input if the constant ends up
2514 * recoded to utf8, but each time a construct is found that might increase
2515 * the needed size, SvGROW() is called. Its size parameter each time is
2516 * based on the best guess estimate at the time, namely the length used so
2517 * far, plus the length the current construct will occupy, plus room for
2518 * the trailing NUL, plus one byte for every input byte still unscanned */
2519
012bcf8d 2520 UV uv;
4c3a8340
ST
2521#ifdef EBCDIC
2522 UV literal_endpoint = 0;
e294cc5d 2523 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2524#endif
012bcf8d 2525
7918f24d
NC
2526 PERL_ARGS_ASSERT_SCAN_CONST;
2527
2b9d42f0
NIS
2528 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2529 /* If we are doing a trans and we know we want UTF8 set expectation */
2530 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2531 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2532 }
2533
2534
79072805 2535 while (s < send || dorange) {
02aa26ce 2536 /* get transliterations out of the way (they're most literal) */
3280af22 2537 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2538 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2539 if (dorange) {
1ba5c669
JH
2540 I32 i; /* current expanded character */
2541 I32 min; /* first character in range */
2542 I32 max; /* last character in range */
02aa26ce 2543
e294cc5d
JH
2544#ifdef EBCDIC
2545 UV uvmax = 0;
2546#endif
2547
2548 if (has_utf8
2549#ifdef EBCDIC
2550 && !native_range
2551#endif
2552 ) {
9d4ba2ae 2553 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2554 char *e = d++;
2555 while (e-- > c)
2556 *(e + 1) = *e;
25716404 2557 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2558 /* mark the range as done, and continue */
2559 dorange = FALSE;
2560 didrange = TRUE;
2561 continue;
2562 }
2b9d42f0 2563
95a20fc0 2564 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2565#ifdef EBCDIC
2566 SvGROW(sv,
2567 SvLEN(sv) + (has_utf8 ?
2568 (512 - UTF_CONTINUATION_MARK +
2569 UNISKIP(0x100))
2570 : 256));
2571 /* How many two-byte within 0..255: 128 in UTF-8,
2572 * 96 in UTF-8-mod. */
2573#else
9cbb5ea2 2574 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2575#endif
9cbb5ea2 2576 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2577#ifdef EBCDIC
2578 if (has_utf8) {
2579 int j;
2580 for (j = 0; j <= 1; j++) {
2581 char * const c = (char*)utf8_hop((U8*)d, -1);
2582 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2583 if (j)
2584 min = (U8)uv;
2585 else if (uv < 256)
2586 max = (U8)uv;
2587 else {
2588 max = (U8)0xff; /* only to \xff */
2589 uvmax = uv; /* \x{100} to uvmax */
2590 }
2591 d = c; /* eat endpoint chars */
2592 }
2593 }
2594 else {
2595#endif
2596 d -= 2; /* eat the first char and the - */
2597 min = (U8)*d; /* first char in range */
2598 max = (U8)d[1]; /* last char in range */
2599#ifdef EBCDIC
2600 }
2601#endif
8ada0baa 2602
c2e66d9e 2603 if (min > max) {
01ec43d0 2604 Perl_croak(aTHX_
d1573ac7 2605 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2606 (char)min, (char)max);
c2e66d9e
GS
2607 }
2608
c7f1f016 2609#ifdef EBCDIC
4c3a8340
ST
2610 if (literal_endpoint == 2 &&
2611 ((isLOWER(min) && isLOWER(max)) ||
2612 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2613 if (isLOWER(min)) {
2614 for (i = min; i <= max; i++)
2615 if (isLOWER(i))
db42d148 2616 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2617 } else {
2618 for (i = min; i <= max; i++)
2619 if (isUPPER(i))
db42d148 2620 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2621 }
2622 }
2623 else
2624#endif
2625 for (i = min; i <= max; i++)
e294cc5d
JH
2626#ifdef EBCDIC
2627 if (has_utf8) {
2628 const U8 ch = (U8)NATIVE_TO_UTF(i);
2629 if (UNI_IS_INVARIANT(ch))
2630 *d++ = (U8)i;
2631 else {
2632 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2633 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2634 }
2635 }
2636 else
2637#endif
2638 *d++ = (char)i;
2639
2640#ifdef EBCDIC
2641 if (uvmax) {
2642 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2643 if (uvmax > 0x101)
2644 *d++ = (char)UTF_TO_NATIVE(0xff);
2645 if (uvmax > 0x100)
2646 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2647 }
2648#endif
02aa26ce
NT
2649
2650 /* mark the range as done, and continue */
79072805 2651 dorange = FALSE;
01ec43d0 2652 didrange = TRUE;
4c3a8340
ST
2653#ifdef EBCDIC
2654 literal_endpoint = 0;
2655#endif
79072805 2656 continue;
4e553d73 2657 }
02aa26ce
NT
2658
2659 /* range begins (ignore - as first or last char) */
79072805 2660 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2661 if (didrange) {
1fafa243 2662 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2663 }
e294cc5d
JH
2664 if (has_utf8
2665#ifdef EBCDIC
2666 && !native_range
2667#endif
2668 ) {
25716404 2669 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2670 s++;
2671 continue;
2672 }
79072805
LW
2673 dorange = TRUE;
2674 s++;
01ec43d0
GS
2675 }
2676 else {
2677 didrange = FALSE;
4c3a8340
ST
2678#ifdef EBCDIC
2679 literal_endpoint = 0;
e294cc5d 2680 native_range = TRUE;
4c3a8340 2681#endif
01ec43d0 2682 }
79072805 2683 }
02aa26ce
NT
2684
2685 /* if we get here, we're not doing a transliteration */
2686
0f5d15d6
IZ
2687 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2688 except for the last char, which will be done separately. */
3280af22 2689 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2690 if (s[2] == '#') {
e994fd66 2691 while (s+1 < send && *s != ')')
db42d148 2692 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2693 }
2694 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2695 || (s[2] == '?' && s[3] == '{'))
155aba94 2696 {
cc6b7395 2697 I32 count = 1;
0f5d15d6 2698 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2699 char c;
2700
d9f97599
GS
2701 while (count && (c = *regparse)) {
2702 if (c == '\\' && regparse[1])
2703 regparse++;
4e553d73 2704 else if (c == '{')
cc6b7395 2705 count++;
4e553d73 2706 else if (c == '}')
cc6b7395 2707 count--;
d9f97599 2708 regparse++;
cc6b7395 2709 }
e994fd66 2710 if (*regparse != ')')
5bdf89e7 2711 regparse--; /* Leave one char for continuation. */
0f5d15d6 2712 while (s < regparse)
db42d148 2713 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2714 }
748a9306 2715 }
02aa26ce
NT
2716
2717 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2718 else if (*s == '#' && PL_lex_inpat &&
2719 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2720 while (s+1 < send && *s != '\n')
db42d148 2721 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2722 }
02aa26ce 2723
5d1d4326 2724 /* check for embedded arrays
da6eedaa 2725 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2726 */
1749ea0d
ST
2727 else if (*s == '@' && s[1]) {
2728 if (isALNUM_lazy_if(s+1,UTF))
2729 break;
2730 if (strchr(":'{$", s[1]))
2731 break;
2732 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2733 break; /* in regexp, neither @+ nor @- are interpolated */
2734 }
02aa26ce
NT
2735
2736 /* check for embedded scalars. only stop if we're sure it's a
2737 variable.
2738 */
79072805 2739 else if (*s == '$') {
3280af22 2740 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2741 break;
77772344 2742 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2743 if (s[1] == '\\') {
2744 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2745 "Possible unintended interpolation of $\\ in regex");
77772344 2746 }
79072805 2747 break; /* in regexp, $ might be tail anchor */
77772344 2748 }
79072805 2749 }
02aa26ce 2750
2b9d42f0
NIS
2751 /* End of else if chain - OP_TRANS rejoin rest */
2752
02aa26ce 2753 /* backslashes */
79072805
LW
2754 if (*s == '\\' && s+1 < send) {
2755 s++;
02aa26ce 2756
02aa26ce 2757 /* deprecate \1 in strings and substitution replacements */
3280af22 2758 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2759 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2760 {
a2a5de95 2761 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2762 *--s = '$';
2763 break;
2764 }
02aa26ce
NT
2765
2766 /* string-change backslash escapes */
3280af22 2767 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2768 --s;
2769 break;
2770 }
cc74c5bd
ST
2771 /* skip any other backslash escapes in a pattern */
2772 else if (PL_lex_inpat) {
2773 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2774 goto default_action;
2775 }
02aa26ce
NT
2776
2777 /* if we get here, it's either a quoted -, or a digit */
79072805 2778 switch (*s) {
02aa26ce
NT
2779
2780 /* quoted - in transliterations */
79072805 2781 case '-':
3280af22 2782 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2783 *d++ = *s++;
2784 continue;
2785 }
2786 /* FALL THROUGH */
2787 default:
11b8faa4 2788 {
a2a5de95
NC
2789 if ((isALPHA(*s) || isDIGIT(*s)))
2790 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2791 "Unrecognized escape \\%c passed through",
2792 *s);
11b8faa4 2793 /* default action is to copy the quoted character */
f9a63242 2794 goto default_action;
11b8faa4 2795 }
02aa26ce 2796
77a135fe 2797 /* eg. \132 indicates the octal constant 0x132 */
79072805
LW
2798 case '0': case '1': case '2': case '3':
2799 case '4': case '5': case '6': case '7':
ba210ebe 2800 {
53305cf1
NC
2801 I32 flags = 0;
2802 STRLEN len = 3;
77a135fe 2803 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2804 s += len;
2805 }
012bcf8d 2806 goto NUM_ESCAPE_INSERT;
02aa26ce 2807
77a135fe 2808 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2809 case 'x':
a0ed51b3
LW
2810 ++s;
2811 if (*s == '{') {
9d4ba2ae 2812 char* const e = strchr(s, '}');
a4c04bdc
NC
2813 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2814 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2815 STRLEN len;
355860ce 2816
53305cf1 2817 ++s;
adaeee49 2818 if (!e) {
a0ed51b3 2819 yyerror("Missing right brace on \\x{}");
355860ce 2820 continue;
ba210ebe 2821 }
53305cf1 2822 len = e - s;
77a135fe 2823 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2824 s = e + 1;
a0ed51b3
LW
2825 }
2826 else {
ba210ebe 2827 {
53305cf1 2828 STRLEN len = 2;
a4c04bdc 2829 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2830 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2831 s += len;
2832 }
012bcf8d
GS
2833 }
2834
2835 NUM_ESCAPE_INSERT:
77a135fe
KW
2836 /* Insert oct, hex, or \N{U+...} escaped character. There will
2837 * always be enough room in sv since such escapes will be
2838 * longer than any UTF-8 sequence they can end up as, except if
2839 * they force us to recode the rest of the string into utf8 */
ba7cea30 2840
77a135fe
KW
2841 /* Here uv is the ordinal of the next character being added in
2842 * unicode (converted from native). (It has to be done before
2843 * here because \N is interpreted as unicode, and oct and hex
2844 * as native.) */
2845 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2846 if (!has_utf8 && uv > 255) {
77a135fe
KW
2847 /* Might need to recode whatever we have accumulated so
2848 * far if it contains any chars variant in utf8 or
2849 * utf-ebcdic. */
2850
2851 SvCUR_set(sv, d - SvPVX_const(sv));
2852 SvPOK_on(sv);
2853 *d = '\0';
77a135fe 2854 /* See Note on sizing above. */
7bf79863
KW
2855 sv_utf8_upgrade_flags_grow(sv,
2856 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2857 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2858 d = SvPVX(sv) + SvCUR(sv);
2859 has_utf8 = TRUE;
012bcf8d
GS
2860 }
2861
77a135fe
KW
2862 if (has_utf8) {
2863 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2864 if (PL_lex_inwhat == OP_TRANS &&
2865 PL_sublex_info.sub_op) {
2866 PL_sublex_info.sub_op->op_private |=
2867 (PL_lex_repl ? OPpTRANS_FROM_UTF
2868 : OPpTRANS_TO_UTF);
f9a63242 2869 }
e294cc5d
JH
2870#ifdef EBCDIC
2871 if (uv > 255 && !dorange)
2872 native_range = FALSE;
2873#endif
012bcf8d 2874 }
a0ed51b3 2875 else {
012bcf8d 2876 *d++ = (char)uv;
a0ed51b3 2877 }
012bcf8d
GS
2878 }
2879 else {
c4d5f83a 2880 *d++ = (char) uv;
a0ed51b3 2881 }
79072805 2882 continue;
02aa26ce 2883
77a135fe
KW
2884 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2885 * \N{U+0041} */
4a2d328f 2886 case 'N':
55eda711 2887 ++s;
423cee85
JH
2888 if (*s == '{') {
2889 char* e = strchr(s, '}');
155aba94 2890 SV *res;
423cee85 2891 STRLEN len;
cfd0369c 2892 const char *str;
4e553d73 2893
423cee85 2894 if (!e) {
5777a3f7 2895 yyerror("Missing right brace on \\N{}");
423cee85
JH
2896 e = s - 1;
2897 goto cont_scan;
2898 }
dbc0d4f2 2899 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
77a135fe
KW
2900 /* \N{U+...} The ... is a unicode value even on EBCDIC
2901 * machines */
dbc0d4f2
JH
2902 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2903 PERL_SCAN_DISALLOW_PREFIX;
2904 s += 3;
2905 len = e - s;
2906 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2907 if ( e > s && len != (STRLEN)(e - s) ) {
2908 uv = 0xFFFD;
fc8cd66c 2909 }
dbc0d4f2
JH
2910 s = e + 1;
2911 goto NUM_ESCAPE_INSERT;
2912 }
55eda711 2913 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2914 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2915 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2916 if (has_utf8)
2917 sv_utf8_upgrade(res);
cfd0369c 2918 str = SvPV_const(res,len);
1c47067b
JH
2919#ifdef EBCDIC_NEVER_MIND
2920 /* charnames uses pack U and that has been
2921 * recently changed to do the below uni->native
2922 * mapping, so this would be redundant (and wrong,
2923 * the code point would be doubly converted).
2924 * But leave this in just in case the pack U change
2925 * gets revoked, but the semantics is still
2926 * desireable for charnames. --jhi */
cddc7ef4 2927 {
cfd0369c 2928 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2929
2930 if (uv < 0x100) {
89ebb4a3 2931 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2932
2933 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2934 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2935 str = SvPV_const(res, len);
cddc7ef4
JH
2936 }
2937 }
2938#endif
77a135fe
KW
2939 /* If destination is not in utf8 but this new character is,
2940 * recode the dest to utf8 */
89491803 2941 if (!has_utf8 && SvUTF8(res)) {
77a135fe 2942 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 2943 SvPOK_on(sv);
e4f3eed8 2944 *d = '\0';
77a135fe 2945 /* See Note on sizing above. */
7bf79863
KW
2946 sv_utf8_upgrade_flags_grow(sv,
2947 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2948 len + (STRLEN)(send - s) + 1);
f08d6ad9 2949 d = SvPVX(sv) + SvCUR(sv);
89491803 2950 has_utf8 = TRUE;
77a135fe 2951 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85 2952
77a135fe
KW
2953 /* See Note on sizing above. (NOTE: SvCUR() is not set
2954 * correctly here). */
2955 const STRLEN off = d - SvPVX_const(sv);
2956 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
423cee85 2957 }
e294cc5d
JH
2958#ifdef EBCDIC
2959 if (!dorange)
2960 native_range = FALSE; /* \N{} is guessed to be Unicode */
2961#endif
423cee85
JH
2962 Copy(str, d, len, char);
2963 d += len;
2964 SvREFCNT_dec(res);
2965 cont_scan:
2966 s = e + 1;
2967 }
2968 else
5777a3f7 2969 yyerror("Missing braces on \\N{}");
423cee85
JH
2970 continue;
2971
02aa26ce 2972 /* \c is a control character */
79072805
LW
2973 case 'c':
2974 s++;
961ce445 2975 if (s < send) {
ba210ebe 2976 U8 c = *s++;
c7f1f016
NIS
2977#ifdef EBCDIC
2978 if (isLOWER(c))
2979 c = toUPPER(c);
2980#endif
db42d148 2981 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2982 }
961ce445
RGS
2983 else {
2984 yyerror("Missing control char name in \\c");
2985 }
79072805 2986 continue;
02aa26ce
NT
2987
2988 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2989 case 'b':
db42d148 2990 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2991 break;
2992 case 'n':
db42d148 2993 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2994 break;
2995 case 'r':
db42d148 2996 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2997 break;
2998 case 'f':
db42d148 2999 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3000 break;
3001 case 't':
db42d148 3002 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3003 break;
34a3fe2a 3004 case 'e':
db42d148 3005 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3006 break;
3007 case 'a':
db42d148 3008 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3009 break;
02aa26ce
NT
3010 } /* end switch */
3011
79072805
LW
3012 s++;
3013 continue;
02aa26ce 3014 } /* end if (backslash) */
4c3a8340
ST
3015#ifdef EBCDIC
3016 else
3017 literal_endpoint++;
3018#endif
02aa26ce 3019
f9a63242 3020 default_action:
77a135fe
KW
3021 /* If we started with encoded form, or already know we want it,
3022 then encode the next character */
3023 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3024 STRLEN len = 1;
77a135fe
KW
3025
3026
3027 /* One might think that it is wasted effort in the case of the
3028 * source being utf8 (this_utf8 == TRUE) to take the next character
3029 * in the source, convert it to an unsigned value, and then convert
3030 * it back again. But the source has not been validated here. The
3031 * routine that does the conversion checks for errors like
3032 * malformed utf8 */
3033
5f66b61c
AL
3034 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3035 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3036 if (!has_utf8) {
3037 SvCUR_set(sv, d - SvPVX_const(sv));
3038 SvPOK_on(sv);
3039 *d = '\0';
77a135fe 3040 /* See Note on sizing above. */
7bf79863
KW
3041 sv_utf8_upgrade_flags_grow(sv,
3042 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3043 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3044 d = SvPVX(sv) + SvCUR(sv);
3045 has_utf8 = TRUE;
3046 } else if (need > len) {
3047 /* encoded value larger than old, may need extra space (NOTE:
3048 * SvCUR() is not set correctly here). See Note on sizing
3049 * above. */
9d4ba2ae 3050 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3051 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3052 }
77a135fe
KW
3053 s += len;
3054
5f66b61c 3055 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3056#ifdef EBCDIC
3057 if (uv > 255 && !dorange)
3058 native_range = FALSE;
3059#endif
2b9d42f0
NIS
3060 }
3061 else {
3062 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3063 }
02aa26ce
NT
3064 } /* while loop to process each character */
3065
3066 /* terminate the string and set up the sv */
79072805 3067 *d = '\0';
95a20fc0 3068 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3069 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3070 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3071
79072805 3072 SvPOK_on(sv);
9f4817db 3073 if (PL_encoding && !has_utf8) {
d0063567
DK
3074 sv_recode_to_utf8(sv, PL_encoding);
3075 if (SvUTF8(sv))
3076 has_utf8 = TRUE;
9f4817db 3077 }
2b9d42f0 3078 if (has_utf8) {
7e2040f0 3079 SvUTF8_on(sv);
2b9d42f0 3080 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3081 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3082 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3083 }
3084 }
79072805 3085
02aa26ce 3086 /* shrink the sv if we allocated more than we used */
79072805 3087 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3088 SvPV_shrink_to_cur(sv);
79072805 3089 }
02aa26ce 3090
6154021b 3091 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3092 if (s > PL_bufptr) {
eb0d8d16
NC
3093 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3094 const char *const key = PL_lex_inpat ? "qr" : "q";
3095 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3096 const char *type;
3097 STRLEN typelen;
3098
3099 if (PL_lex_inwhat == OP_TRANS) {
3100 type = "tr";
3101 typelen = 2;
3102 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3103 type = "s";
3104 typelen = 1;
3105 } else {
3106 type = "qq";
3107 typelen = 2;
3108 }
3109
3110 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3111 type, typelen);
3112 }
6154021b 3113 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3114 } else
8990e307 3115 SvREFCNT_dec(sv);
79072805
LW
3116 return s;
3117}
3118
ffb4593c
NT
3119/* S_intuit_more
3120 * Returns TRUE if there's more to the expression (e.g., a subscript),
3121 * FALSE otherwise.
ffb4593c
NT
3122 *
3123 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3124 *
3125 * ->[ and ->{ return TRUE
3126 * { and [ outside a pattern are always subscripts, so return TRUE
3127 * if we're outside a pattern and it's not { or [, then return FALSE
3128 * if we're in a pattern and the first char is a {
3129 * {4,5} (any digits around the comma) returns FALSE
3130 * if we're in a pattern and the first char is a [
3131 * [] returns FALSE
3132 * [SOMETHING] has a funky algorithm to decide whether it's a
3133 * character class or not. It has to deal with things like
3134 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3135 * anything else returns TRUE
3136 */
3137
9cbb5ea2
GS
3138/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3139
76e3520e 3140STATIC int
cea2e8a9 3141S_intuit_more(pTHX_ register char *s)
79072805 3142{
97aff369 3143 dVAR;
7918f24d
NC
3144
3145 PERL_ARGS_ASSERT_INTUIT_MORE;
3146
3280af22 3147 if (PL_lex_brackets)
79072805
LW
3148 return TRUE;
3149 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3150 return TRUE;
3151 if (*s != '{' && *s != '[')
3152 return FALSE;
3280af22 3153 if (!PL_lex_inpat)
79072805
LW
3154 return TRUE;
3155
3156 /* In a pattern, so maybe we have {n,m}. */
3157 if (*s == '{') {
3158 s++;
3159 if (!isDIGIT(*s))
3160 return TRUE;
3161 while (isDIGIT(*s))
3162 s++;
3163 if (*s == ',')
3164 s++;
3165 while (isDIGIT(*s))
3166 s++;
3167 if (*s == '}')
3168 return FALSE;
3169 return TRUE;
3170
3171 }
3172
3173 /* On the other hand, maybe we have a character class */
3174
3175 s++;
3176 if (*s == ']' || *s == '^')
3177 return FALSE;
3178 else {
ffb4593c 3179 /* this is terrifying, and it works */
79072805
LW
3180 int weight = 2; /* let's weigh the evidence */
3181 char seen[256];
f27ffc4a 3182 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3183 const char * const send = strchr(s,']');
3280af22 3184 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3185
3186 if (!send) /* has to be an expression */
3187 return TRUE;
3188
3189 Zero(seen,256,char);
3190 if (*s == '$')
3191 weight -= 3;
3192 else if (isDIGIT(*s)) {
3193 if (s[1] != ']') {
3194 if (isDIGIT(s[1]) && s[2] == ']')
3195 weight -= 10;
3196 }
3197 else
3198 weight -= 100;
3199 }
3200 for (; s < send; s++) {
3201 last_un_char = un_char;
3202 un_char = (unsigned char)*s;
3203 switch (*s) {
3204 case '@':
3205 case '&':
3206 case '$':
3207 weight -= seen[un_char] * 10;
7e2040f0 3208 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3209 int len;
8903cb82 3210 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3211 len = (int)strlen(tmpbuf);
3212 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3213 weight -= 100;
3214 else
3215 weight -= 10;
3216 }
3217 else if (*s == '$' && s[1] &&
93a17b20
LW
3218 strchr("[#!%*<>()-=",s[1])) {
3219 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3220 weight -= 10;
3221 else
3222 weight -= 1;
3223 }
3224 break;
3225 case '\\':
3226 un_char = 254;
3227 if (s[1]) {
93a17b20 3228 if (strchr("wds]",s[1]))
79072805 3229 weight += 100;
10edeb5d 3230 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3231 weight += 1;
93a17b20 3232 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3233 weight += 40;
3234 else if (isDIGIT(s[1])) {
3235 weight += 40;
3236 while (s[1] && isDIGIT(s[1]))
3237 s++;
3238 }
3239 }
3240 else
3241 weight += 100;
3242 break;
3243 case '-':
3244 if (s[1] == '\\')
3245 weight += 50;
93a17b20 3246 if (strchr("aA01! ",last_un_char))
79072805 3247 weight += 30;
93a17b20 3248 if (strchr("zZ79~",s[1]))
79072805 3249 weight += 30;
f27ffc4a
GS
3250 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3251 weight -= 5; /* cope with negative subscript */
79072805
LW
3252 break;
3253 default:
3792a11b
NC
3254 if (!isALNUM(last_un_char)
3255 && !(last_un_char == '$' || last_un_char == '@'
3256 || last_un_char == '&')
3257 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3258 char *d = tmpbuf;
3259 while (isALPHA(*s))
3260 *d++ = *s++;
3261 *d = '\0';
5458a98a 3262 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3263 weight -= 150;
3264 }
3265 if (un_char == last_un_char + 1)
3266 weight += 5;
3267 weight -= seen[un_char];
3268 break;
3269 }
3270 seen[un_char]++;
3271 }
3272 if (weight >= 0) /* probably a character class */
3273 return FALSE;
3274 }
3275
3276 return TRUE;
3277}
ffed7fef 3278
ffb4593c
NT
3279/*
3280 * S_intuit_method
3281 *
3282 * Does all the checking to disambiguate
3283 * foo bar
3284 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3285 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3286 *
3287 * First argument is the stuff after the first token, e.g. "bar".
3288 *
3289 * Not a method if bar is a filehandle.
3290 * Not a method if foo is a subroutine prototyped to take a filehandle.
3291 * Not a method if it's really "Foo $bar"
3292 * Method if it's "foo $bar"
3293 * Not a method if it's really "print foo $bar"
3294 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3295 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3296 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3297 * =>
3298 */
3299
76e3520e 3300STATIC int
62d55b22 3301S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3302{
97aff369 3303 dVAR;
a0d0e21e 3304 char *s = start + (*start == '$');
3280af22 3305 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3306 STRLEN len;
3307 GV* indirgv;
5db06880
NC
3308#ifdef PERL_MAD
3309 int soff;
3310#endif
a0d0e21e 3311
7918f24d
NC
3312 PERL_ARGS_ASSERT_INTUIT_METHOD;
3313
a0d0e21e 3314 if (gv) {
62d55b22 3315 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3316 return 0;
62d55b22
NC
3317 if (cv) {
3318 if (SvPOK(cv)) {
3319 const char *proto = SvPVX_const(cv);
3320 if (proto) {
3321 if (*proto == ';')
3322 proto++;
3323 if (*proto == '*')
3324 return 0;
3325 }
b6c543e3
IZ
3326 }
3327 } else
c35e046a 3328 gv = NULL;
a0d0e21e 3329 }
8903cb82 3330 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3331 /* start is the beginning of the possible filehandle/object,
3332 * and s is the end of it
3333 * tmpbuf is a copy of it
3334 */
3335
a0d0e21e 3336 if (*start == '$') {
3ef1310e
RGS
3337 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3338 isUPPER(*PL_tokenbuf))
a0d0e21e 3339 return 0;
5db06880
NC
3340#ifdef PERL_MAD
3341 len = start - SvPVX(PL_linestr);
3342#endif
29595ff2 3343 s = PEEKSPACE(s);
f0092767 3344#ifdef PERL_MAD
5db06880
NC
3345 start = SvPVX(PL_linestr) + len;
3346#endif
3280af22
NIS
3347 PL_bufptr = start;
3348 PL_expect = XREF;
a0d0e21e
LW
3349 return *s == '(' ? FUNCMETH : METHOD;
3350 }
5458a98a 3351 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3352 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3353 len -= 2;
3354 tmpbuf[len] = '\0';
5db06880
NC
3355#ifdef PERL_MAD
3356 soff = s - SvPVX(PL_linestr);
3357#endif
c3e0f903
GS
3358 goto bare_package;
3359 }
90e5519e 3360 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3361 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3362 return 0;
3363 /* filehandle or package name makes it a method */
da51bb9b 3364 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3365#ifdef PERL_MAD
3366 soff = s - SvPVX(PL_linestr);
3367#endif
29595ff2 3368 s = PEEKSPACE(s);
3280af22 3369 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3370 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3371 bare_package:
cd81e915 3372 start_force(PL_curforce);
9ded7720 3373 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3374 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3375 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3376 if (PL_madskills)
3377 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3378 PL_expect = XTERM;
a0d0e21e 3379 force_next(WORD);
3280af22 3380 PL_bufptr = s;
5db06880
NC
3381#ifdef PERL_MAD
3382 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3383#endif
a0d0e21e
LW
3384 return *s == '(' ? FUNCMETH : METHOD;
3385 }
3386 }
3387 return 0;
3388}
3389
16d20bd9 3390/* Encoded script support. filter_add() effectively inserts a
4e553d73 3391 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3392 * Note that the filter function only applies to the current source file
3393 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3394 *
3395 * The datasv parameter (which may be NULL) can be used to pass
3396 * private data to this instance of the filter. The filter function
3397 * can recover the SV using the FILTER_DATA macro and use it to
3398 * store private buffers and state information.
3399 *
3400 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3401 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3402 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3403 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3404 * private use must be set using malloc'd pointers.
3405 */
16d20bd9
AD
3406
3407SV *
864dbfa3 3408Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3409{
97aff369 3410 dVAR;
f4c556ac 3411 if (!funcp)
a0714e2c 3412 return NULL;
f4c556ac 3413
5486870f
DM
3414 if (!PL_parser)
3415 return NULL;
3416
3280af22
NIS
3417 if (!PL_rsfp_filters)
3418 PL_rsfp_filters = newAV();
16d20bd9 3419 if (!datasv)
561b68a9 3420 datasv = newSV(0);
862a34c6 3421 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3422 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3423 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3424 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3425 FPTR2DPTR(void *, IoANY(datasv)),
3426 SvPV_nolen(datasv)));
3280af22
NIS
3427 av_unshift(PL_rsfp_filters, 1);
3428 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3429 return(datasv);
3430}
4e553d73 3431
16d20bd9
AD
3432
3433/* Delete most recently added instance of this filter function. */
a0d0e21e 3434void
864dbfa3 3435Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3436{
97aff369 3437 dVAR;
e0c19803 3438 SV *datasv;
24801a4b 3439
7918f24d
NC
3440 PERL_ARGS_ASSERT_FILTER_DEL;
3441
33073adb 3442#ifdef DEBUGGING
55662e27
JH
3443 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3444 FPTR2DPTR(void*, funcp)));
33073adb 3445#endif
5486870f 3446 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3447 return;
3448 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3449 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3450 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 3451 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 3452 IoANY(datasv) = (void *)NULL;
3280af22 3453 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3454
16d20bd9
AD
3455 return;
3456 }
3457 /* we need to search for the correct entry and clear it */
cea2e8a9 3458 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3459}
3460
3461
1de9afcd
RGS
3462/* Invoke the idxth filter function for the current rsfp. */
3463/* maxlen 0 = read one text line */
16d20bd9 3464I32
864dbfa3 3465Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3466{
97aff369 3467 dVAR;
16d20bd9
AD
3468 filter_t funcp;
3469 SV *datasv = NULL;
f482118e
NC
3470 /* This API is bad. It should have been using unsigned int for maxlen.
3471 Not sure if we want to change the API, but if not we should sanity
3472 check the value here. */
39cd7a59
NC
3473 const unsigned int correct_length
3474 = maxlen < 0 ?
3475#ifdef PERL_MICRO
3476 0x7FFFFFFF
3477#else
3478 INT_MAX
3479#endif
3480 : maxlen;
e50aee73 3481
7918f24d
NC
3482 PERL_ARGS_ASSERT_FILTER_READ;
3483
5486870f 3484 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3485 return -1;
1de9afcd 3486 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3487 /* Provide a default input filter to make life easy. */
3488 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3489 DEBUG_P(PerlIO_printf(Perl_debug_log,
3490 "filter_read %d: from rsfp\n", idx));
f482118e 3491 if (correct_length) {
16d20bd9
AD
3492 /* Want a block */
3493 int len ;
f54cb97a 3494 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3495
3496 /* ensure buf_sv is large enough */
881d8f0a 3497 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3498 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3499 correct_length)) <= 0) {
3280af22 3500 if (PerlIO_error(PL_rsfp))
37120919
AD
3501 return -1; /* error */
3502 else
3503 return 0 ; /* end of file */
3504 }
16d20bd9 3505 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3506 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3507 } else {
3508 /* Want a line */
3280af22
NIS
3509 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3510 if (PerlIO_error(PL_rsfp))
37120919
AD
3511 return -1; /* error */
3512 else
3513 return 0 ; /* end of file */
3514 }
16d20bd9
AD
3515 }
3516 return SvCUR(buf_sv);
3517 }
3518 /* Skip this filter slot if filter has been deleted */
1de9afcd 3519 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3520 DEBUG_P(PerlIO_printf(Perl_debug_log,
3521 "filter_read %d: skipped (filter deleted)\n",
3522 idx));
f482118e 3523 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3524 }
3525 /* Get function pointer hidden within datasv */
8141890a 3526 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3527 DEBUG_P(PerlIO_printf(Perl_debug_log,
3528 "filter_read %d: via function %p (%s)\n",
ca0270c4 3529 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3530 /* Call function. The function is expected to */
3531 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3532 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3533 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3534}
3535
76e3520e 3536STATIC char *
5cc814fd 3537S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3538{
97aff369 3539 dVAR;
7918f24d
NC
3540
3541 PERL_ARGS_ASSERT_FILTER_GETS;
3542
c39cd008 3543#ifdef PERL_CR_FILTER
3280af22 3544 if (!PL_rsfp_filters) {
c39cd008 3545 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3546 }
3547#endif
3280af22 3548 if (PL_rsfp_filters) {
55497cff
PP
3549 if (!append)
3550 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3551 if (FILTER_READ(0, sv, 0) > 0)
3552 return ( SvPVX(sv) ) ;
3553 else
bd61b366 3554 return NULL ;
16d20bd9 3555 }
9d116dd7 3556 else
5cc814fd 3557 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3558}
3559
01ec43d0 3560STATIC HV *
9bde8eb0 3561S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3562{
97aff369 3563 dVAR;
def3634b
GS
3564 GV *gv;
3565
7918f24d
NC
3566 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3567
01ec43d0 3568 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3569 return PL_curstash;
3570
3571 if (len > 2 &&
3572 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3573 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3574 {
3575 return GvHV(gv); /* Foo:: */
def3634b
GS
3576 }
3577
3578 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3579 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3580 if (gv && GvCV(gv)) {
3581 SV * const sv = cv_const_sv(GvCV(gv));
3582 if (sv)
9bde8eb0 3583 pkgname = SvPV_const(sv, len);
def3634b
GS
3584 }
3585
9bde8eb0 3586 return gv_stashpvn(pkgname, len, 0);
def3634b 3587}
a0d0e21e 3588
e3f73d4e
RGS
3589/*
3590 * S_readpipe_override
3591 * Check whether readpipe() is overriden, and generates the appropriate
3592 * optree, provided sublex_start() is called afterwards.
3593 */
3594STATIC void
1d51329b 3595S_readpipe_override(pTHX)
e3f73d4e
RGS
3596{
3597 GV **gvp;
3598 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3599 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3600 if ((gv_readpipe
3601 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3602 ||
3603 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3604 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3605 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3606 {
3607 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3608 append_elem(OP_LIST,
3609 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3610 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3611 }
e3f73d4e
RGS
3612}
3613
5db06880
NC
3614#ifdef PERL_MAD
3615 /*
3616 * Perl_madlex
3617 * The intent of this yylex wrapper is to minimize the changes to the
3618 * tokener when we aren't interested in collecting madprops. It remains
3619 * to be seen how successful this strategy will be...
3620 */
3621
3622int
3623Perl_madlex(pTHX)
3624{
3625 int optype;
3626 char *s = PL_bufptr;
3627
cd81e915
NC
3628 /* make sure PL_thiswhite is initialized */
3629 PL_thiswhite = 0;
3630 PL_thismad = 0;
5db06880 3631
cd81e915 3632 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3633 if (PL_pending_ident)
3634 return S_pending_ident(aTHX);
3635
3636 /* previous token ate up our whitespace? */
cd81e915
NC
3637 if (!PL_lasttoke && PL_nextwhite) {
3638 PL_thiswhite = PL_nextwhite;
3639 PL_nextwhite = 0;
5db06880
NC
3640 }
3641
3642 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3643 PL_realtokenstart = -1;
3644 PL_thistoken = 0;
5db06880
NC
3645 optype = yylex();
3646 s = PL_bufptr;
cd81e915 3647 assert(PL_curforce < 0);
5db06880 3648
cd81e915
NC
3649 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3650 if (!PL_thistoken) {
3651 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3652 PL_thistoken = newSVpvs("");
5db06880 3653 else {
c35e046a 3654 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3655 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3656 }
3657 }
cd81e915
NC
3658 if (PL_thismad) /* install head */
3659 CURMAD('X', PL_thistoken);
5db06880
NC
3660 }
3661
3662 /* last whitespace of a sublex? */
cd81e915
NC
3663 if (optype == ')' && PL_endwhite) {
3664 CURMAD('X', PL_endwhite);
5db06880
NC
3665 }
3666
cd81e915 3667 if (!PL_thismad) {
5db06880
NC
3668
3669 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3670 if (!PL_thiswhite && !PL_endwhite && !optype) {
3671 sv_free(PL_thistoken);
3672 PL_thistoken = 0;
5db06880
NC
3673 return 0;
3674 }
3675
3676 /* put off final whitespace till peg */
3677 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3678 PL_nextwhite = PL_thiswhite;
3679 PL_thiswhite = 0;
5db06880 3680 }
cd81e915
NC
3681 else if (PL_thisopen) {
3682 CURMAD('q', PL_thisopen);
3683 if (PL_thistoken)
3684 sv_free(PL_thistoken);
3685 PL_thistoken = 0;
5db06880
NC
3686 }
3687 else {
3688 /* Store actual token text as madprop X */
cd81e915 3689 CURMAD('X', PL_thistoken);
5db06880
NC
3690 }
3691
cd81e915 3692 if (PL_thiswhite) {
5db06880 3693 /* add preceding whitespace as madprop _ */
cd81e915 3694 CURMAD('_', PL_thiswhite);
5db06880
NC
3695 }
3696
cd81e915 3697 if (PL_thisstuff) {
5db06880 3698 /* add quoted material as madprop = */
cd81e915 3699 CURMAD('=', PL_thisstuff);
5db06880
NC
3700 }
3701
cd81e915 3702 if (PL_thisclose) {
5db06880 3703 /* add terminating quote as madprop Q */
cd81e915 3704 CURMAD('Q', PL_thisclose);
5db06880
NC
3705 }
3706 }
3707
3708 /* special processing based on optype */
3709
3710 switch (optype) {
3711