This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re-instate the use of gv_stashpvn_flags(), and the correct non-boolean argument.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
378cc40b 42
eb0d8d16
NC
43#define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45
6154021b 46#define pl_yylval (PL_parser->yylval)
d3b6f988 47
acdf0a21
DM
48/* YYINITDEPTH -- initial size of the parser's stacks. */
49#define YYINITDEPTH 200
50
199e78b7
DM
51/* XXX temporary backwards compatibility */
52#define PL_lex_brackets (PL_parser->lex_brackets)
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
9059aa12
LW
129#define XFAKEBRACK 128
130#define XENUMMASK 127
131
39e02b42
JH
132#ifdef USE_UTF8_SCRIPTS
133# define UTF (!IN_BYTES)
2b9d42f0 134#else
746b446a 135# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 136#endif
a0ed51b3 137
b1fc3636
CJ
138/* The maximum number of characters preceding the unrecognized one to display */
139#define UNRECOGNIZED_PRECEDE_COUNT 10
140
61f0cdd9 141/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
142 * 1999-02-27 mjd-perl-patch@plover.com */
143#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
144
bf4acbe4 145#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 146
ffb4593c
NT
147/* LEX_* are values for PL_lex_state, the state of the lexer.
148 * They are arranged oddly so that the guard on the switch statement
79072805
LW
149 * can get by with a single comparison (if the compiler is smart enough).
150 */
151
fb73857a 152/* #define LEX_NOTPARSING 11 is done in perl.h. */
153
b6007c36
DM
154#define LEX_NORMAL 10 /* normal code (ie not within "...") */
155#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
156#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
157#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
158#define LEX_INTERPSTART 6 /* expecting the start of a $var */
159
160 /* at end of code, eg "$x" followed by: */
161#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
162#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
163
164#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
165 string or after \E, $foo, etc */
166#define LEX_INTERPCONST 2 /* NOT USED */
167#define LEX_FORMLINE 1 /* expecting a format line */
168#define LEX_KNOWNEXT 0 /* next token known; just return it */
169
79072805 170
bbf60fe6 171#ifdef DEBUGGING
27da23d5 172static const char* const lex_state_names[] = {
bbf60fe6
DM
173 "KNOWNEXT",
174 "FORMLINE",
175 "INTERPCONST",
176 "INTERPCONCAT",
177 "INTERPENDMAYBE",
178 "INTERPEND",
179 "INTERPSTART",
180 "INTERPPUSH",
181 "INTERPCASEMOD",
182 "INTERPNORMAL",
183 "NORMAL"
184};
185#endif
186
79072805
LW
187#ifdef ff_next
188#undef ff_next
d48672a2
LW
189#endif
190
79072805 191#include "keywords.h"
fe14fcc3 192
ffb4593c
NT
193/* CLINE is a macro that ensures PL_copline has a sane value */
194
ae986130
LW
195#ifdef CLINE
196#undef CLINE
197#endif
57843af0 198#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 199
5db06880 200#ifdef PERL_MAD
29595ff2
NC
201# define SKIPSPACE0(s) skipspace0(s)
202# define SKIPSPACE1(s) skipspace1(s)
203# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
204# define PEEKSPACE(s) skipspace2(s,0)
205#else
206# define SKIPSPACE0(s) skipspace(s)
207# define SKIPSPACE1(s) skipspace(s)
208# define SKIPSPACE2(s,tsv) skipspace(s)
209# define PEEKSPACE(s) skipspace(s)
210#endif
211
ffb4593c
NT
212/*
213 * Convenience functions to return different tokens and prime the
9cbb5ea2 214 * lexer for the next token. They all take an argument.
ffb4593c
NT
215 *
216 * TOKEN : generic token (used for '(', DOLSHARP, etc)
217 * OPERATOR : generic operator
218 * AOPERATOR : assignment operator
219 * PREBLOCK : beginning the block after an if, while, foreach, ...
220 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
221 * PREREF : *EXPR where EXPR is not a simple identifier
222 * TERM : expression term
223 * LOOPX : loop exiting command (goto, last, dump, etc)
224 * FTST : file test operator
225 * FUN0 : zero-argument function
2d2e263d 226 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
227 * BOop : bitwise or or xor
228 * BAop : bitwise and
229 * SHop : shift operator
230 * PWop : power operator
9cbb5ea2 231 * PMop : pattern-matching operator
ffb4593c
NT
232 * Aop : addition-level operator
233 * Mop : multiplication-level operator
234 * Eop : equality-testing operator
e5edeb50 235 * Rop : relational operator <= != gt
ffb4593c
NT
236 *
237 * Also see LOP and lop() below.
238 */
239
998054bd 240#ifdef DEBUGGING /* Serve -DT. */
704d4215 241# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 242#else
bbf60fe6 243# define REPORT(retval) (retval)
998054bd
SC
244#endif
245
bbf60fe6
DM
246#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
247#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
248#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
249#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
250#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
251#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
252#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
253#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
254#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
255#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
376fcdbf 272#define UNI2(f,x) { \
6154021b 273 pl_yylval.ival = f; \
376fcdbf
AL
274 PL_expect = x; \
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
6f33ba73
RGS
283#define UNI(f) UNI2(f,XTERM)
284#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 285
376fcdbf 286#define UNIBRACK(f) { \
6154021b 287 pl_yylval.ival = f; \
376fcdbf
AL
288 PL_bufptr = s; \
289 PL_last_uni = PL_oldbufptr; \
290 if (*s == '(') \
291 return REPORT( (int)FUNC1 ); \
29595ff2 292 s = PEEKSPACE(s); \
376fcdbf
AL
293 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
294 }
79072805 295
9f68db38 296/* grandfather return to old style */
6154021b 297#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 298
8fa7f367
JH
299#ifdef DEBUGGING
300
6154021b 301/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
302enum token_type {
303 TOKENTYPE_NONE,
304 TOKENTYPE_IVAL,
6154021b 305 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
306 TOKENTYPE_PVAL,
307 TOKENTYPE_OPVAL,
308 TOKENTYPE_GVVAL
309};
310
6d4a66ac
NC
311static struct debug_tokens {
312 const int token;
313 enum token_type type;
314 const char *name;
315} const debug_tokens[] =
9041c2e3 316{
bbf60fe6
DM
317 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
318 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
319 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
320 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
321 { ARROW, TOKENTYPE_NONE, "ARROW" },
322 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
323 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
324 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
325 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
326 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 327 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
328 { DO, TOKENTYPE_NONE, "DO" },
329 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
330 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
331 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
332 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
333 { ELSE, TOKENTYPE_NONE, "ELSE" },
334 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
335 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
336 { FOR, TOKENTYPE_IVAL, "FOR" },
337 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
338 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
339 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
340 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
341 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
342 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 343 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
344 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
345 { IF, TOKENTYPE_IVAL, "IF" },
346 { LABEL, TOKENTYPE_PVAL, "LABEL" },
347 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
348 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
349 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
350 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
351 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
352 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
353 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
354 { MY, TOKENTYPE_IVAL, "MY" },
355 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
356 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
357 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
358 { OROP, TOKENTYPE_IVAL, "OROP" },
359 { OROR, TOKENTYPE_NONE, "OROR" },
360 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
361 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
362 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
363 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
364 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
365 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
366 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
367 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
368 { PREINC, TOKENTYPE_NONE, "PREINC" },
369 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
370 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
371 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
372 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
373 { SUB, TOKENTYPE_NONE, "SUB" },
374 { THING, TOKENTYPE_OPVAL, "THING" },
375 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
376 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
377 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
378 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
379 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
380 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 381 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
382 { WHILE, TOKENTYPE_IVAL, "WHILE" },
383 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 384 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 385 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
386};
387
6154021b 388/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 389
bbf60fe6 390STATIC int
704d4215 391S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 392{
97aff369 393 dVAR;
7918f24d
NC
394
395 PERL_ARGS_ASSERT_TOKEREPORT;
396
bbf60fe6 397 if (DEBUG_T_TEST) {
bd61b366 398 const char *name = NULL;
bbf60fe6 399 enum token_type type = TOKENTYPE_NONE;
f54cb97a 400 const struct debug_tokens *p;
396482e1 401 SV* const report = newSVpvs("<== ");
bbf60fe6 402
f54cb97a 403 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
404 if (p->token == (int)rv) {
405 name = p->name;
406 type = p->type;
407 break;
408 }
409 }
410 if (name)
54667de8 411 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
412 else if ((char)rv > ' ' && (char)rv < '~')
413 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
414 else if (!rv)
396482e1 415 sv_catpvs(report, "EOF");
bbf60fe6
DM
416 else
417 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
418 switch (type) {
419 case TOKENTYPE_NONE:
420 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
421 break;
422 case TOKENTYPE_IVAL:
704d4215 423 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
424 break;
425 case TOKENTYPE_OPNUM:
426 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 427 PL_op_name[lvalp->ival]);
bbf60fe6
DM
428 break;
429 case TOKENTYPE_PVAL:
704d4215 430 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
431 break;
432 case TOKENTYPE_OPVAL:
704d4215 433 if (lvalp->opval) {
401441c0 434 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
435 PL_op_name[lvalp->opval->op_type]);
436 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 437 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 438 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
439 }
440
441 }
401441c0 442 else
396482e1 443 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
444 break;
445 }
b6007c36 446 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
447 };
448 return (int)rv;
998054bd
SC
449}
450
b6007c36
DM
451
452/* print the buffer with suitable escapes */
453
454STATIC void
15f169a1 455S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 456{
396482e1 457 SV* const tmp = newSVpvs("");
7918f24d
NC
458
459 PERL_ARGS_ASSERT_PRINTBUF;
460
b6007c36
DM
461 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
462 SvREFCNT_dec(tmp);
463}
464
8fa7f367
JH
465#endif
466
8290c323
NC
467static int
468S_deprecate_commaless_var_list(pTHX) {
469 PL_expect = XTERM;
470 deprecate("comma-less variable list");
471 return REPORT(','); /* grandfather non-comma-format format */
472}
473
ffb4593c
NT
474/*
475 * S_ao
476 *
c963b151
BD
477 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
478 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
479 */
480
76e3520e 481STATIC int
cea2e8a9 482S_ao(pTHX_ int toketype)
a0d0e21e 483{
97aff369 484 dVAR;
3280af22
NIS
485 if (*PL_bufptr == '=') {
486 PL_bufptr++;
a0d0e21e 487 if (toketype == ANDAND)
6154021b 488 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 489 else if (toketype == OROR)
6154021b 490 pl_yylval.ival = OP_ORASSIGN;
c963b151 491 else if (toketype == DORDOR)
6154021b 492 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
493 toketype = ASSIGNOP;
494 }
495 return toketype;
496}
497
ffb4593c
NT
498/*
499 * S_no_op
500 * When Perl expects an operator and finds something else, no_op
501 * prints the warning. It always prints "<something> found where
502 * operator expected. It prints "Missing semicolon on previous line?"
503 * if the surprise occurs at the start of the line. "do you need to
504 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
505 * where the compiler doesn't know if foo is a method call or a function.
506 * It prints "Missing operator before end of line" if there's nothing
507 * after the missing operator, or "... before <...>" if there is something
508 * after the missing operator.
509 */
510
76e3520e 511STATIC void
15f169a1 512S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 513{
97aff369 514 dVAR;
9d4ba2ae
AL
515 char * const oldbp = PL_bufptr;
516 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 517
7918f24d
NC
518 PERL_ARGS_ASSERT_NO_OP;
519
1189a94a
GS
520 if (!s)
521 s = oldbp;
07c798fb 522 else
1189a94a 523 PL_bufptr = s;
cea2e8a9 524 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
525 if (ckWARN_d(WARN_SYNTAX)) {
526 if (is_first)
527 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
528 "\t(Missing semicolon on previous line?)\n");
529 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 530 const char *t;
c35e046a
AL
531 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
532 NOOP;
56da5a46
RGS
533 if (t < PL_bufptr && isSPACE(*t))
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Do you need to predeclare %.*s?)\n",
551405c4 536 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
537 }
538 else {
539 assert(s >= oldbp);
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 541 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 542 }
07c798fb 543 }
3280af22 544 PL_bufptr = oldbp;
8990e307
LW
545}
546
ffb4593c
NT
547/*
548 * S_missingterm
549 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 550 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
551 * If we're in a delimited string and the delimiter is a control
552 * character, it's reformatted into a two-char sequence like ^C.
553 * This is fatal.
554 */
555
76e3520e 556STATIC void
cea2e8a9 557S_missingterm(pTHX_ char *s)
8990e307 558{
97aff369 559 dVAR;
8990e307
LW
560 char tmpbuf[3];
561 char q;
562 if (s) {
9d4ba2ae 563 char * const nl = strrchr(s,'\n');
d2719217 564 if (nl)
8990e307
LW
565 *nl = '\0';
566 }
463559e7 567 else if (isCNTRL(PL_multi_close)) {
8990e307 568 *tmpbuf = '^';
585ec06d 569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
570 tmpbuf[2] = '\0';
571 s = tmpbuf;
572 }
573 else {
eb160463 574 *tmpbuf = (char)PL_multi_close;
8990e307
LW
575 tmpbuf[1] = '\0';
576 s = tmpbuf;
577 }
578 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 580}
79072805 581
ef89dcc3 582#define FEATURE_IS_ENABLED(name) \
0d863452 583 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 584 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 585/* The longest string we pass in. */
1863b879 586#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 587
0d863452
RH
588/*
589 * S_feature_is_enabled
590 * Check whether the named feature is enabled.
591 */
592STATIC bool
15f169a1 593S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 594{
97aff369 595 dVAR;
0d863452 596 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 597 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
598
599 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
600
4a731d7b
NC
601 assert(namelen <= MAX_FEATURE_LEN);
602 memcpy(&he_name[8], name, namelen);
d4c19fe8 603
7b9ef140 604 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
605}
606
ffb4593c 607/*
9cbb5ea2
GS
608 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
609 * utf16-to-utf8-reversed.
ffb4593c
NT
610 */
611
c39cd008
GS
612#ifdef PERL_CR_FILTER
613static void
614strip_return(SV *sv)
615{
95a20fc0 616 register const char *s = SvPVX_const(sv);
9d4ba2ae 617 register const char * const e = s + SvCUR(sv);
7918f24d
NC
618
619 PERL_ARGS_ASSERT_STRIP_RETURN;
620
c39cd008
GS
621 /* outer loop optimized to do nothing if there are no CR-LFs */
622 while (s < e) {
623 if (*s++ == '\r' && *s == '\n') {
624 /* hit a CR-LF, need to copy the rest */
625 register char *d = s - 1;
626 *d++ = *s++;
627 while (s < e) {
628 if (*s == '\r' && s[1] == '\n')
629 s++;
630 *d++ = *s++;
631 }
632 SvCUR(sv) -= s - d;
633 return;
634 }
635 }
636}
a868473f 637
76e3520e 638STATIC I32
c39cd008 639S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 640{
f54cb97a 641 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
642 if (count > 0 && !maxlen)
643 strip_return(sv);
644 return count;
a868473f
NIS
645}
646#endif
647
199e78b7
DM
648
649
ffb4593c
NT
650/*
651 * Perl_lex_start
5486870f 652 *
e3abe207 653 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
654 *
655 * rsfp is the opened file handle to read from (if any),
656 *
657 * line holds any initial content already read from the file (or in
658 * the case of no file, such as an eval, the whole contents);
659 *
660 * new_filter indicates that this is a new file and it shouldn't inherit
661 * the filters from the current parser (ie require).
ffb4593c
NT
662 */
663
a0d0e21e 664void
5486870f 665Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 666{
97aff369 667 dVAR;
6ef55633 668 const char *s = NULL;
8990e307 669 STRLEN len;
5486870f 670 yy_parser *parser, *oparser;
acdf0a21
DM
671
672 /* create and initialise a parser */
673
199e78b7 674 Newxz(parser, 1, yy_parser);
5486870f 675 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
676 PL_parser = parser;
677
678 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
679 parser->ps = parser->stack;
680 parser->stack_size = YYINITDEPTH;
681
682 parser->stack->state = 0;
683 parser->yyerrstatus = 0;
684 parser->yychar = YYEMPTY; /* Cause a token to be read. */
685
e3abe207
DM
686 /* on scope exit, free this parser and restore any outer one */
687 SAVEPARSER(parser);
7c4baf47 688 parser->saved_curcop = PL_curcop;
e3abe207 689
acdf0a21 690 /* initialise lexer state */
8990e307 691
fb205e7a
DM
692#ifdef PERL_MAD
693 parser->curforce = -1;
694#else
695 parser->nexttoke = 0;
696#endif
ca4cfd28 697 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 698 parser->copline = NOLINE;
5afb0a62 699 parser->lex_state = LEX_NORMAL;
c2598295 700 parser->expect = XSTATE;
2f9285f8 701 parser->rsfp = rsfp;
56b27c9a 702 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 703 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 704
199e78b7
DM
705 Newx(parser->lex_brackstack, 120, char);
706 Newx(parser->lex_casestack, 12, char);
707 *parser->lex_casestack = '\0';
02b34bbe 708
10efb74f
NC
709 if (line) {
710 s = SvPV_const(line, len);
711 } else {
712 len = 0;
713 }
bdc0bf6f 714
10efb74f 715 if (!len) {
bdc0bf6f 716 parser->linestr = newSVpvs("\n;");
10efb74f 717 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 718 parser->linestr = newSVsv(line);
10efb74f 719 if (s[len-1] != ';')
bdc0bf6f 720 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
721 } else {
722 SvTEMP_off(line);
723 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 724 parser->linestr = line;
8990e307 725 }
f06b5848
DM
726 parser->oldoldbufptr =
727 parser->oldbufptr =
728 parser->bufptr =
729 parser->linestart = SvPVX(parser->linestr);
730 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
731 parser->last_lop = parser->last_uni = NULL;
79072805 732}
a687059c 733
e3abe207
DM
734
735/* delete a parser object */
736
737void
738Perl_parser_free(pTHX_ const yy_parser *parser)
739{
7918f24d
NC
740 PERL_ARGS_ASSERT_PARSER_FREE;
741
7c4baf47 742 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
743 SvREFCNT_dec(parser->linestr);
744
2f9285f8
DM
745 if (parser->rsfp == PerlIO_stdin())
746 PerlIO_clearerr(parser->rsfp);
799361c3
SH
747 else if (parser->rsfp && (!parser->old_parser ||
748 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 749 PerlIO_close(parser->rsfp);
5486870f 750 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 751
e3abe207
DM
752 Safefree(parser->stack);
753 Safefree(parser->lex_brackstack);
754 Safefree(parser->lex_casestack);
755 PL_parser = parser->old_parser;
756 Safefree(parser);
757}
758
759
ffb4593c
NT
760/*
761 * Perl_lex_end
9cbb5ea2
GS
762 * Finalizer for lexing operations. Must be called when the parser is
763 * done with the lexer.
ffb4593c
NT
764 */
765
463ee0b2 766void
864dbfa3 767Perl_lex_end(pTHX)
463ee0b2 768{
97aff369 769 dVAR;
3280af22 770 PL_doextract = FALSE;
463ee0b2
LW
771}
772
ffb4593c 773/*
f0e67a1d
Z
774=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
775
776Buffer scalar containing the chunk currently under consideration of the
777text currently being lexed. This is always a plain string scalar (for
778which C<SvPOK> is true). It is not intended to be used as a scalar by
779normal scalar means; instead refer to the buffer directly by the pointer
780variables described below.
781
782The lexer maintains various C<char*> pointers to things in the
783C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
784reallocated, all of these pointers must be updated. Don't attempt to
785do this manually, but rather use L</lex_grow_linestr> if you need to
786reallocate the buffer.
787
788The content of the text chunk in the buffer is commonly exactly one
789complete line of input, up to and including a newline terminator,
790but there are situations where it is otherwise. The octets of the
791buffer may be intended to be interpreted as either UTF-8 or Latin-1.
792The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
793flag on this scalar, which may disagree with it.
794
795For direct examination of the buffer, the variable
796L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
797lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
798of these pointers is usually preferable to examination of the scalar
799through normal scalar means.
800
801=for apidoc AmxU|char *|PL_parser-E<gt>bufend
802
803Direct pointer to the end of the chunk of text currently being lexed, the
804end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
805+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
806always located at the end of the buffer, and does not count as part of
807the buffer's contents.
808
809=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
810
811Points to the current position of lexing inside the lexer buffer.
812Characters around this point may be freely examined, within
813the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
814L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
815interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
816
817Lexing code (whether in the Perl core or not) moves this pointer past
818the characters that it consumes. It is also expected to perform some
819bookkeeping whenever a newline character is consumed. This movement
820can be more conveniently performed by the function L</lex_read_to>,
821which handles newlines appropriately.
822
823Interpretation of the buffer's octets can be abstracted out by
824using the slightly higher-level functions L</lex_peek_unichar> and
825L</lex_read_unichar>.
826
827=for apidoc AmxU|char *|PL_parser-E<gt>linestart
828
829Points to the start of the current line inside the lexer buffer.
830This is useful for indicating at which column an error occurred, and
831not much else. This must be updated by any lexing code that consumes
832a newline; the function L</lex_read_to> handles this detail.
833
834=cut
835*/
836
837/*
838=for apidoc Amx|bool|lex_bufutf8
839
840Indicates whether the octets in the lexer buffer
841(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
842of Unicode characters. If not, they should be interpreted as Latin-1
843characters. This is analogous to the C<SvUTF8> flag for scalars.
844
845In UTF-8 mode, it is not guaranteed that the lexer buffer actually
846contains valid UTF-8. Lexing code must be robust in the face of invalid
847encoding.
848
849The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
850is significant, but not the whole story regarding the input character
851encoding. Normally, when a file is being read, the scalar contains octets
852and its C<SvUTF8> flag is off, but the octets should be interpreted as
853UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
854however, the scalar may have the C<SvUTF8> flag on, and in this case its
855octets should be interpreted as UTF-8 unless the C<use bytes> pragma
856is in effect. This logic may change in the future; use this function
857instead of implementing the logic yourself.
858
859=cut
860*/
861
862bool
863Perl_lex_bufutf8(pTHX)
864{
865 return UTF;
866}
867
868/*
869=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
870
871Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
872at least I<len> octets (including terminating NUL). Returns a
873pointer to the reallocated buffer. This is necessary before making
874any direct modification of the buffer that would increase its length.
875L</lex_stuff_pvn> provides a more convenient way to insert text into
876the buffer.
877
878Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
879this function updates all of the lexer's variables that point directly
880into the buffer.
881
882=cut
883*/
884
885char *
886Perl_lex_grow_linestr(pTHX_ STRLEN len)
887{
888 SV *linestr;
889 char *buf;
890 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
891 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
892 linestr = PL_parser->linestr;
893 buf = SvPVX(linestr);
894 if (len <= SvLEN(linestr))
895 return buf;
896 bufend_pos = PL_parser->bufend - buf;
897 bufptr_pos = PL_parser->bufptr - buf;
898 oldbufptr_pos = PL_parser->oldbufptr - buf;
899 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
900 linestart_pos = PL_parser->linestart - buf;
901 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
902 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
903 buf = sv_grow(linestr, len);
904 PL_parser->bufend = buf + bufend_pos;
905 PL_parser->bufptr = buf + bufptr_pos;
906 PL_parser->oldbufptr = buf + oldbufptr_pos;
907 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
908 PL_parser->linestart = buf + linestart_pos;
909 if (PL_parser->last_uni)
910 PL_parser->last_uni = buf + last_uni_pos;
911 if (PL_parser->last_lop)
912 PL_parser->last_lop = buf + last_lop_pos;
913 return buf;
914}
915
916/*
83aa740e 917=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
918
919Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
920immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
921reallocating the buffer if necessary. This means that lexing code that
922runs later will see the characters as if they had appeared in the input.
923It is not recommended to do this as part of normal parsing, and most
924uses of this facility run the risk of the inserted characters being
925interpreted in an unintended manner.
926
927The string to be inserted is represented by I<len> octets starting
928at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
929according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
930The characters are recoded for the lexer buffer, according to how the
931buffer is currently being interpreted (L</lex_bufutf8>). If a string
932to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
933function is more convenient.
934
935=cut
936*/
937
938void
83aa740e 939Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 940{
749123ff 941 dVAR;
f0e67a1d
Z
942 char *bufptr;
943 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
944 if (flags & ~(LEX_STUFF_UTF8))
945 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
946 if (UTF) {
947 if (flags & LEX_STUFF_UTF8) {
948 goto plain_copy;
949 } else {
950 STRLEN highhalf = 0;
83aa740e 951 const char *p, *e = pv+len;
f0e67a1d
Z
952 for (p = pv; p != e; p++)
953 highhalf += !!(((U8)*p) & 0x80);
954 if (!highhalf)
955 goto plain_copy;
956 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
957 bufptr = PL_parser->bufptr;
958 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
959 SvCUR_set(PL_parser->linestr,
960 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
961 PL_parser->bufend += len+highhalf;
962 for (p = pv; p != e; p++) {
963 U8 c = (U8)*p;
964 if (c & 0x80) {
965 *bufptr++ = (char)(0xc0 | (c >> 6));
966 *bufptr++ = (char)(0x80 | (c & 0x3f));
967 } else {
968 *bufptr++ = (char)c;
969 }
970 }
971 }
972 } else {
973 if (flags & LEX_STUFF_UTF8) {
974 STRLEN highhalf = 0;
83aa740e 975 const char *p, *e = pv+len;
f0e67a1d
Z
976 for (p = pv; p != e; p++) {
977 U8 c = (U8)*p;
978 if (c >= 0xc4) {
979 Perl_croak(aTHX_ "Lexing code attempted to stuff "
980 "non-Latin-1 character into Latin-1 input");
981 } else if (c >= 0xc2 && p+1 != e &&
982 (((U8)p[1]) & 0xc0) == 0x80) {
983 p++;
984 highhalf++;
985 } else if (c >= 0x80) {
986 /* malformed UTF-8 */
987 ENTER;
988 SAVESPTR(PL_warnhook);
989 PL_warnhook = PERL_WARNHOOK_FATAL;
990 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
991 LEAVE;
992 }
993 }
994 if (!highhalf)
995 goto plain_copy;
996 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
997 bufptr = PL_parser->bufptr;
998 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
999 SvCUR_set(PL_parser->linestr,
1000 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1001 PL_parser->bufend += len-highhalf;
1002 for (p = pv; p != e; p++) {
1003 U8 c = (U8)*p;
1004 if (c & 0x80) {
1005 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1006 p++;
1007 } else {
1008 *bufptr++ = (char)c;
1009 }
1010 }
1011 } else {
1012 plain_copy:
1013 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1014 bufptr = PL_parser->bufptr;
1015 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1016 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1017 PL_parser->bufend += len;
1018 Copy(pv, bufptr, len, char);
1019 }
1020 }
1021}
1022
1023/*
1024=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1025
1026Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1027immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1028reallocating the buffer if necessary. This means that lexing code that
1029runs later will see the characters as if they had appeared in the input.
1030It is not recommended to do this as part of normal parsing, and most
1031uses of this facility run the risk of the inserted characters being
1032interpreted in an unintended manner.
1033
1034The string to be inserted is the string value of I<sv>. The characters
1035are recoded for the lexer buffer, according to how the buffer is currently
1036being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1037not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1038need to construct a scalar.
1039
1040=cut
1041*/
1042
1043void
1044Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1045{
1046 char *pv;
1047 STRLEN len;
1048 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1049 if (flags)
1050 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1051 pv = SvPV(sv, len);
1052 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1053}
1054
1055/*
1056=for apidoc Amx|void|lex_unstuff|char *ptr
1057
1058Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1059I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1060This hides the discarded text from any lexing code that runs later,
1061as if the text had never appeared.
1062
1063This is not the normal way to consume lexed text. For that, use
1064L</lex_read_to>.
1065
1066=cut
1067*/
1068
1069void
1070Perl_lex_unstuff(pTHX_ char *ptr)
1071{
1072 char *buf, *bufend;
1073 STRLEN unstuff_len;
1074 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1075 buf = PL_parser->bufptr;
1076 if (ptr < buf)
1077 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1078 if (ptr == buf)
1079 return;
1080 bufend = PL_parser->bufend;
1081 if (ptr > bufend)
1082 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1083 unstuff_len = ptr - buf;
1084 Move(ptr, buf, bufend+1-ptr, char);
1085 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1086 PL_parser->bufend = bufend - unstuff_len;
1087}
1088
1089/*
1090=for apidoc Amx|void|lex_read_to|char *ptr
1091
1092Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1093to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1094performing the correct bookkeeping whenever a newline character is passed.
1095This is the normal way to consume lexed text.
1096
1097Interpretation of the buffer's octets can be abstracted out by
1098using the slightly higher-level functions L</lex_peek_unichar> and
1099L</lex_read_unichar>.
1100
1101=cut
1102*/
1103
1104void
1105Perl_lex_read_to(pTHX_ char *ptr)
1106{
1107 char *s;
1108 PERL_ARGS_ASSERT_LEX_READ_TO;
1109 s = PL_parser->bufptr;
1110 if (ptr < s || ptr > PL_parser->bufend)
1111 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1112 for (; s != ptr; s++)
1113 if (*s == '\n') {
1114 CopLINE_inc(PL_curcop);
1115 PL_parser->linestart = s+1;
1116 }
1117 PL_parser->bufptr = ptr;
1118}
1119
1120/*
1121=for apidoc Amx|void|lex_discard_to|char *ptr
1122
1123Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1124up to I<ptr>. The remaining content of the buffer will be moved, and
1125all pointers into the buffer updated appropriately. I<ptr> must not
1126be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1127it is not permitted to discard text that has yet to be lexed.
1128
1129Normally it is not necessarily to do this directly, because it suffices to
1130use the implicit discarding behaviour of L</lex_next_chunk> and things
1131based on it. However, if a token stretches across multiple lines,
1f317c95 1132and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1133that purpose, then after completion of the token it would be wise to
1134explicitly discard the now-unneeded earlier lines, to avoid future
1135multi-line tokens growing the buffer without bound.
1136
1137=cut
1138*/
1139
1140void
1141Perl_lex_discard_to(pTHX_ char *ptr)
1142{
1143 char *buf;
1144 STRLEN discard_len;
1145 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1146 buf = SvPVX(PL_parser->linestr);
1147 if (ptr < buf)
1148 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1149 if (ptr == buf)
1150 return;
1151 if (ptr > PL_parser->bufptr)
1152 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1153 discard_len = ptr - buf;
1154 if (PL_parser->oldbufptr < ptr)
1155 PL_parser->oldbufptr = ptr;
1156 if (PL_parser->oldoldbufptr < ptr)
1157 PL_parser->oldoldbufptr = ptr;
1158 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1159 PL_parser->last_uni = NULL;
1160 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1161 PL_parser->last_lop = NULL;
1162 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1163 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1164 PL_parser->bufend -= discard_len;
1165 PL_parser->bufptr -= discard_len;
1166 PL_parser->oldbufptr -= discard_len;
1167 PL_parser->oldoldbufptr -= discard_len;
1168 if (PL_parser->last_uni)
1169 PL_parser->last_uni -= discard_len;
1170 if (PL_parser->last_lop)
1171 PL_parser->last_lop -= discard_len;
1172}
1173
1174/*
1175=for apidoc Amx|bool|lex_next_chunk|U32 flags
1176
1177Reads in the next chunk of text to be lexed, appending it to
1178L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1179looked to the end of the current chunk and wants to know more. It is
1180usual, but not necessary, for lexing to have consumed the entirety of
1181the current chunk at this time.
1182
1183If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1184chunk (i.e., the current chunk has been entirely consumed), normally the
1185current chunk will be discarded at the same time that the new chunk is
1186read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1187will not be discarded. If the current chunk has not been entirely
1188consumed, then it will not be discarded regardless of the flag.
1189
1190Returns true if some new text was added to the buffer, or false if the
1191buffer has reached the end of the input text.
1192
1193=cut
1194*/
1195
1196#define LEX_FAKE_EOF 0x80000000
1197
1198bool
1199Perl_lex_next_chunk(pTHX_ U32 flags)
1200{
1201 SV *linestr;
1202 char *buf;
1203 STRLEN old_bufend_pos, new_bufend_pos;
1204 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1205 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1206 bool got_some_for_debugger = 0;
f0e67a1d
Z
1207 bool got_some;
1208 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1209 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1210 linestr = PL_parser->linestr;
1211 buf = SvPVX(linestr);
1212 if (!(flags & LEX_KEEP_PREVIOUS) &&
1213 PL_parser->bufptr == PL_parser->bufend) {
1214 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1215 linestart_pos = 0;
1216 if (PL_parser->last_uni != PL_parser->bufend)
1217 PL_parser->last_uni = NULL;
1218 if (PL_parser->last_lop != PL_parser->bufend)
1219 PL_parser->last_lop = NULL;
1220 last_uni_pos = last_lop_pos = 0;
1221 *buf = 0;
1222 SvCUR(linestr) = 0;
1223 } else {
1224 old_bufend_pos = PL_parser->bufend - buf;
1225 bufptr_pos = PL_parser->bufptr - buf;
1226 oldbufptr_pos = PL_parser->oldbufptr - buf;
1227 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1228 linestart_pos = PL_parser->linestart - buf;
1229 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1230 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1231 }
1232 if (flags & LEX_FAKE_EOF) {
1233 goto eof;
1234 } else if (!PL_parser->rsfp) {
1235 got_some = 0;
1236 } else if (filter_gets(linestr, old_bufend_pos)) {
1237 got_some = 1;
17cc9359 1238 got_some_for_debugger = 1;
f0e67a1d 1239 } else {
580561a3
Z
1240 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1241 sv_setpvs(linestr, "");
f0e67a1d
Z
1242 eof:
1243 /* End of real input. Close filehandle (unless it was STDIN),
1244 * then add implicit termination.
1245 */
1246 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1247 PerlIO_clearerr(PL_parser->rsfp);
1248 else if (PL_parser->rsfp)
1249 (void)PerlIO_close(PL_parser->rsfp);
1250 PL_parser->rsfp = NULL;
1251 PL_doextract = FALSE;
1252#ifdef PERL_MAD
1253 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1254 PL_faketokens = 1;
1255#endif
1256 if (!PL_in_eval && PL_minus_p) {
1257 sv_catpvs(linestr,
1258 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1259 PL_minus_n = PL_minus_p = 0;
1260 } else if (!PL_in_eval && PL_minus_n) {
1261 sv_catpvs(linestr, /*{*/";}");
1262 PL_minus_n = 0;
1263 } else
1264 sv_catpvs(linestr, ";");
1265 got_some = 1;
1266 }
1267 buf = SvPVX(linestr);
1268 new_bufend_pos = SvCUR(linestr);
1269 PL_parser->bufend = buf + new_bufend_pos;
1270 PL_parser->bufptr = buf + bufptr_pos;
1271 PL_parser->oldbufptr = buf + oldbufptr_pos;
1272 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1273 PL_parser->linestart = buf + linestart_pos;
1274 if (PL_parser->last_uni)
1275 PL_parser->last_uni = buf + last_uni_pos;
1276 if (PL_parser->last_lop)
1277 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1278 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1279 PL_curstash != PL_debstash) {
1280 /* debugger active and we're not compiling the debugger code,
1281 * so store the line into the debugger's array of lines
1282 */
1283 update_debugger_info(NULL, buf+old_bufend_pos,
1284 new_bufend_pos-old_bufend_pos);
1285 }
1286 return got_some;
1287}
1288
1289/*
1290=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1291
1292Looks ahead one (Unicode) character in the text currently being lexed.
1293Returns the codepoint (unsigned integer value) of the next character,
1294or -1 if lexing has reached the end of the input text. To consume the
1295peeked character, use L</lex_read_unichar>.
1296
1297If the next character is in (or extends into) the next chunk of input
1298text, the next chunk will be read in. Normally the current chunk will be
1299discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1300then the current chunk will not be discarded.
1301
1302If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1303is encountered, an exception is generated.
1304
1305=cut
1306*/
1307
1308I32
1309Perl_lex_peek_unichar(pTHX_ U32 flags)
1310{
749123ff 1311 dVAR;
f0e67a1d
Z
1312 char *s, *bufend;
1313 if (flags & ~(LEX_KEEP_PREVIOUS))
1314 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1315 s = PL_parser->bufptr;
1316 bufend = PL_parser->bufend;
1317 if (UTF) {
1318 U8 head;
1319 I32 unichar;
1320 STRLEN len, retlen;
1321 if (s == bufend) {
1322 if (!lex_next_chunk(flags))
1323 return -1;
1324 s = PL_parser->bufptr;
1325 bufend = PL_parser->bufend;
1326 }
1327 head = (U8)*s;
1328 if (!(head & 0x80))
1329 return head;
1330 if (head & 0x40) {
1331 len = PL_utf8skip[head];
1332 while ((STRLEN)(bufend-s) < len) {
1333 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1334 break;
1335 s = PL_parser->bufptr;
1336 bufend = PL_parser->bufend;
1337 }
1338 }
1339 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1340 if (retlen == (STRLEN)-1) {
1341 /* malformed UTF-8 */
1342 ENTER;
1343 SAVESPTR(PL_warnhook);
1344 PL_warnhook = PERL_WARNHOOK_FATAL;
1345 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1346 LEAVE;
1347 }
1348 return unichar;
1349 } else {
1350 if (s == bufend) {
1351 if (!lex_next_chunk(flags))
1352 return -1;
1353 s = PL_parser->bufptr;
1354 }
1355 return (U8)*s;
1356 }
1357}
1358
1359/*
1360=for apidoc Amx|I32|lex_read_unichar|U32 flags
1361
1362Reads the next (Unicode) character in the text currently being lexed.
1363Returns the codepoint (unsigned integer value) of the character read,
1364and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1365if lexing has reached the end of the input text. To non-destructively
1366examine the next character, use L</lex_peek_unichar> instead.
1367
1368If the next character is in (or extends into) the next chunk of input
1369text, the next chunk will be read in. Normally the current chunk will be
1370discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1371then the current chunk will not be discarded.
1372
1373If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1374is encountered, an exception is generated.
1375
1376=cut
1377*/
1378
1379I32
1380Perl_lex_read_unichar(pTHX_ U32 flags)
1381{
1382 I32 c;
1383 if (flags & ~(LEX_KEEP_PREVIOUS))
1384 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1385 c = lex_peek_unichar(flags);
1386 if (c != -1) {
1387 if (c == '\n')
1388 CopLINE_inc(PL_curcop);
1389 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1390 }
1391 return c;
1392}
1393
1394/*
1395=for apidoc Amx|void|lex_read_space|U32 flags
1396
1397Reads optional spaces, in Perl style, in the text currently being
1398lexed. The spaces may include ordinary whitespace characters and
1399Perl-style comments. C<#line> directives are processed if encountered.
1400L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1401at a non-space character (or the end of the input text).
1402
1403If spaces extend into the next chunk of input text, the next chunk will
1404be read in. Normally the current chunk will be discarded at the same
1405time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1406chunk will not be discarded.
1407
1408=cut
1409*/
1410
f0998909
Z
1411#define LEX_NO_NEXT_CHUNK 0x80000000
1412
f0e67a1d
Z
1413void
1414Perl_lex_read_space(pTHX_ U32 flags)
1415{
1416 char *s, *bufend;
1417 bool need_incline = 0;
f0998909 1418 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1419 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1420#ifdef PERL_MAD
1421 if (PL_skipwhite) {
1422 sv_free(PL_skipwhite);
1423 PL_skipwhite = NULL;
1424 }
1425 if (PL_madskills)
1426 PL_skipwhite = newSVpvs("");
1427#endif /* PERL_MAD */
1428 s = PL_parser->bufptr;
1429 bufend = PL_parser->bufend;
1430 while (1) {
1431 char c = *s;
1432 if (c == '#') {
1433 do {
1434 c = *++s;
1435 } while (!(c == '\n' || (c == 0 && s == bufend)));
1436 } else if (c == '\n') {
1437 s++;
1438 PL_parser->linestart = s;
1439 if (s == bufend)
1440 need_incline = 1;
1441 else
1442 incline(s);
1443 } else if (isSPACE(c)) {
1444 s++;
1445 } else if (c == 0 && s == bufend) {
1446 bool got_more;
1447#ifdef PERL_MAD
1448 if (PL_madskills)
1449 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1450#endif /* PERL_MAD */
f0998909
Z
1451 if (flags & LEX_NO_NEXT_CHUNK)
1452 break;
f0e67a1d
Z
1453 PL_parser->bufptr = s;
1454 CopLINE_inc(PL_curcop);
1455 got_more = lex_next_chunk(flags);
1456 CopLINE_dec(PL_curcop);
1457 s = PL_parser->bufptr;
1458 bufend = PL_parser->bufend;
1459 if (!got_more)
1460 break;
1461 if (need_incline && PL_parser->rsfp) {
1462 incline(s);
1463 need_incline = 0;
1464 }
1465 } else {
1466 break;
1467 }
1468 }
1469#ifdef PERL_MAD
1470 if (PL_madskills)
1471 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1472#endif /* PERL_MAD */
1473 PL_parser->bufptr = s;
1474}
1475
1476/*
ffb4593c
NT
1477 * S_incline
1478 * This subroutine has nothing to do with tilting, whether at windmills
1479 * or pinball tables. Its name is short for "increment line". It
57843af0 1480 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1481 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1482 * # line 500 "foo.pm"
1483 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1484 */
1485
76e3520e 1486STATIC void
d9095cec 1487S_incline(pTHX_ const char *s)
463ee0b2 1488{
97aff369 1489 dVAR;
d9095cec
NC
1490 const char *t;
1491 const char *n;
1492 const char *e;
463ee0b2 1493
7918f24d
NC
1494 PERL_ARGS_ASSERT_INCLINE;
1495
57843af0 1496 CopLINE_inc(PL_curcop);
463ee0b2
LW
1497 if (*s++ != '#')
1498 return;
d4c19fe8
AL
1499 while (SPACE_OR_TAB(*s))
1500 s++;
73659bf1
GS
1501 if (strnEQ(s, "line", 4))
1502 s += 4;
1503 else
1504 return;
084592ab 1505 if (SPACE_OR_TAB(*s))
73659bf1 1506 s++;
4e553d73 1507 else
73659bf1 1508 return;
d4c19fe8
AL
1509 while (SPACE_OR_TAB(*s))
1510 s++;
463ee0b2
LW
1511 if (!isDIGIT(*s))
1512 return;
d4c19fe8 1513
463ee0b2
LW
1514 n = s;
1515 while (isDIGIT(*s))
1516 s++;
07714eb4 1517 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1518 return;
bf4acbe4 1519 while (SPACE_OR_TAB(*s))
463ee0b2 1520 s++;
73659bf1 1521 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1522 s++;
73659bf1
GS
1523 e = t + 1;
1524 }
463ee0b2 1525 else {
c35e046a
AL
1526 t = s;
1527 while (!isSPACE(*t))
1528 t++;
73659bf1 1529 e = t;
463ee0b2 1530 }
bf4acbe4 1531 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1532 e++;
1533 if (*e != '\n' && *e != '\0')
1534 return; /* false alarm */
1535
f4dd75d9 1536 if (t - s > 0) {
d9095cec 1537 const STRLEN len = t - s;
8a5ee598 1538#ifndef USE_ITHREADS
19bad673
NC
1539 SV *const temp_sv = CopFILESV(PL_curcop);
1540 const char *cf;
1541 STRLEN tmplen;
1542
1543 if (temp_sv) {
1544 cf = SvPVX(temp_sv);
1545 tmplen = SvCUR(temp_sv);
1546 } else {
1547 cf = NULL;
1548 tmplen = 0;
1549 }
1550
42d9b98d 1551 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1552 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1553 * to *{"::_<newfilename"} */
44867030
NC
1554 /* However, the long form of evals is only turned on by the
1555 debugger - usually they're "(eval %lu)" */
1556 char smallbuf[128];
1557 char *tmpbuf;
1558 GV **gvp;
d9095cec 1559 STRLEN tmplen2 = len;
798b63bc 1560 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1561 tmpbuf = smallbuf;
1562 else
2ae0db35 1563 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1564 tmpbuf[0] = '_';
1565 tmpbuf[1] = '<';
2ae0db35 1566 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1567 tmplen += 2;
8a5ee598
RGS
1568 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1569 if (gvp) {
44867030
NC
1570 char *tmpbuf2;
1571 GV *gv2;
1572
1573 if (tmplen2 + 2 <= sizeof smallbuf)
1574 tmpbuf2 = smallbuf;
1575 else
1576 Newx(tmpbuf2, tmplen2 + 2, char);
1577
1578 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1579 /* Either they malloc'd it, or we malloc'd it,
1580 so no prefix is present in ours. */
1581 tmpbuf2[0] = '_';
1582 tmpbuf2[1] = '<';
1583 }
1584
1585 memcpy(tmpbuf2 + 2, s, tmplen2);
1586 tmplen2 += 2;
1587
8a5ee598 1588 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1589 if (!isGV(gv2)) {
8a5ee598 1590 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1591 /* adjust ${"::_<newfilename"} to store the new file name */
1592 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1593 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1594 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1595 }
44867030
NC
1596
1597 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1598 }
e66cf94c 1599 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1600 }
8a5ee598 1601#endif
05ec9bb3 1602 CopFILE_free(PL_curcop);
d9095cec 1603 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1604 }
57843af0 1605 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1606}
1607
29595ff2 1608#ifdef PERL_MAD
cd81e915 1609/* skip space before PL_thistoken */
29595ff2
NC
1610
1611STATIC char *
1612S_skipspace0(pTHX_ register char *s)
1613{
7918f24d
NC
1614 PERL_ARGS_ASSERT_SKIPSPACE0;
1615
29595ff2
NC
1616 s = skipspace(s);
1617 if (!PL_madskills)
1618 return s;
cd81e915
NC
1619 if (PL_skipwhite) {
1620 if (!PL_thiswhite)
6b29d1f5 1621 PL_thiswhite = newSVpvs("");
cd81e915
NC
1622 sv_catsv(PL_thiswhite, PL_skipwhite);
1623 sv_free(PL_skipwhite);
1624 PL_skipwhite = 0;
1625 }
1626 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1627 return s;
1628}
1629
cd81e915 1630/* skip space after PL_thistoken */
29595ff2
NC
1631
1632STATIC char *
1633S_skipspace1(pTHX_ register char *s)
1634{
d4c19fe8 1635 const char *start = s;
29595ff2
NC
1636 I32 startoff = start - SvPVX(PL_linestr);
1637
7918f24d
NC
1638 PERL_ARGS_ASSERT_SKIPSPACE1;
1639
29595ff2
NC
1640 s = skipspace(s);
1641 if (!PL_madskills)
1642 return s;
1643 start = SvPVX(PL_linestr) + startoff;
cd81e915 1644 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1645 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1646 PL_thistoken = newSVpvn(tstart, start - tstart);
1647 }
1648 PL_realtokenstart = -1;
1649 if (PL_skipwhite) {
1650 if (!PL_nextwhite)
6b29d1f5 1651 PL_nextwhite = newSVpvs("");
cd81e915
NC
1652 sv_catsv(PL_nextwhite, PL_skipwhite);
1653 sv_free(PL_skipwhite);
1654 PL_skipwhite = 0;
29595ff2
NC
1655 }
1656 return s;
1657}
1658
1659STATIC char *
1660S_skipspace2(pTHX_ register char *s, SV **svp)
1661{
c35e046a
AL
1662 char *start;
1663 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1664 const I32 startoff = s - SvPVX(PL_linestr);
1665
7918f24d
NC
1666 PERL_ARGS_ASSERT_SKIPSPACE2;
1667
29595ff2
NC
1668 s = skipspace(s);
1669 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1670 if (!PL_madskills || !svp)
1671 return s;
1672 start = SvPVX(PL_linestr) + startoff;
cd81e915 1673 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1674 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1675 PL_thistoken = newSVpvn(tstart, start - tstart);
1676 PL_realtokenstart = -1;
29595ff2 1677 }
cd81e915 1678 if (PL_skipwhite) {
29595ff2 1679 if (!*svp)
6b29d1f5 1680 *svp = newSVpvs("");
cd81e915
NC
1681 sv_setsv(*svp, PL_skipwhite);
1682 sv_free(PL_skipwhite);
1683 PL_skipwhite = 0;
29595ff2
NC
1684 }
1685
1686 return s;
1687}
1688#endif
1689
80a702cd 1690STATIC void
15f169a1 1691S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1692{
1693 AV *av = CopFILEAVx(PL_curcop);
1694 if (av) {
b9f83d2f 1695 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1696 if (orig_sv)
1697 sv_setsv(sv, orig_sv);
1698 else
1699 sv_setpvn(sv, buf, len);
80a702cd
RGS
1700 (void)SvIOK_on(sv);
1701 SvIV_set(sv, 0);
1702 av_store(av, (I32)CopLINE(PL_curcop), sv);
1703 }
1704}
1705
ffb4593c
NT
1706/*
1707 * S_skipspace
1708 * Called to gobble the appropriate amount and type of whitespace.
1709 * Skips comments as well.
1710 */
1711
76e3520e 1712STATIC char *
cea2e8a9 1713S_skipspace(pTHX_ register char *s)
a687059c 1714{
5db06880 1715#ifdef PERL_MAD
f0e67a1d
Z
1716 char *start = s;
1717#endif /* PERL_MAD */
7918f24d 1718 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1719#ifdef PERL_MAD
cd81e915
NC
1720 if (PL_skipwhite) {
1721 sv_free(PL_skipwhite);
f0e67a1d 1722 PL_skipwhite = NULL;
5db06880 1723 }
f0e67a1d 1724#endif /* PERL_MAD */
3280af22 1725 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1726 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1727 s++;
f0e67a1d
Z
1728 } else {
1729 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1730 PL_bufptr = s;
f0998909
Z
1731 lex_read_space(LEX_KEEP_PREVIOUS |
1732 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1733 LEX_NO_NEXT_CHUNK : 0));
3280af22 1734 s = PL_bufptr;
f0e67a1d
Z
1735 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1736 if (PL_linestart > PL_bufptr)
1737 PL_bufptr = PL_linestart;
1738 return s;
463ee0b2 1739 }
5db06880 1740#ifdef PERL_MAD
f0e67a1d
Z
1741 if (PL_madskills)
1742 PL_skipwhite = newSVpvn(start, s-start);
1743#endif /* PERL_MAD */
5db06880 1744 return s;
a687059c 1745}
378cc40b 1746
ffb4593c
NT
1747/*
1748 * S_check_uni
1749 * Check the unary operators to ensure there's no ambiguity in how they're
1750 * used. An ambiguous piece of code would be:
1751 * rand + 5
1752 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1753 * the +5 is its argument.
1754 */
1755
76e3520e 1756STATIC void
cea2e8a9 1757S_check_uni(pTHX)
ba106d47 1758{
97aff369 1759 dVAR;
d4c19fe8
AL
1760 const char *s;
1761 const char *t;
2f3197b3 1762
3280af22 1763 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1764 return;
3280af22
NIS
1765 while (isSPACE(*PL_last_uni))
1766 PL_last_uni++;
c35e046a
AL
1767 s = PL_last_uni;
1768 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1769 s++;
3280af22 1770 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1771 return;
6136c704 1772
9b387841
NC
1773 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1774 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1775 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1776}
1777
ffb4593c
NT
1778/*
1779 * LOP : macro to build a list operator. Its behaviour has been replaced
1780 * with a subroutine, S_lop() for which LOP is just another name.
1781 */
1782
a0d0e21e
LW
1783#define LOP(f,x) return lop(f,x,s)
1784
ffb4593c
NT
1785/*
1786 * S_lop
1787 * Build a list operator (or something that might be one). The rules:
1788 * - if we have a next token, then it's a list operator [why?]
1789 * - if the next thing is an opening paren, then it's a function
1790 * - else it's a list operator
1791 */
1792
76e3520e 1793STATIC I32
a0be28da 1794S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1795{
97aff369 1796 dVAR;
7918f24d
NC
1797
1798 PERL_ARGS_ASSERT_LOP;
1799
6154021b 1800 pl_yylval.ival = f;
35c8bce7 1801 CLINE;
3280af22
NIS
1802 PL_expect = x;
1803 PL_bufptr = s;
1804 PL_last_lop = PL_oldbufptr;
eb160463 1805 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1806#ifdef PERL_MAD
1807 if (PL_lasttoke)
1808 return REPORT(LSTOP);
1809#else
3280af22 1810 if (PL_nexttoke)
bbf60fe6 1811 return REPORT(LSTOP);
5db06880 1812#endif
79072805 1813 if (*s == '(')
bbf60fe6 1814 return REPORT(FUNC);
29595ff2 1815 s = PEEKSPACE(s);
79072805 1816 if (*s == '(')
bbf60fe6 1817 return REPORT(FUNC);
79072805 1818 else
bbf60fe6 1819 return REPORT(LSTOP);
79072805
LW
1820}
1821
5db06880
NC
1822#ifdef PERL_MAD
1823 /*
1824 * S_start_force
1825 * Sets up for an eventual force_next(). start_force(0) basically does
1826 * an unshift, while start_force(-1) does a push. yylex removes items
1827 * on the "pop" end.
1828 */
1829
1830STATIC void
1831S_start_force(pTHX_ int where)
1832{
1833 int i;
1834
cd81e915 1835 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1836 where = PL_lasttoke;
cd81e915
NC
1837 assert(PL_curforce < 0 || PL_curforce == where);
1838 if (PL_curforce != where) {
5db06880
NC
1839 for (i = PL_lasttoke; i > where; --i) {
1840 PL_nexttoke[i] = PL_nexttoke[i-1];
1841 }
1842 PL_lasttoke++;
1843 }
cd81e915 1844 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1845 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1846 PL_curforce = where;
1847 if (PL_nextwhite) {
5db06880 1848 if (PL_madskills)
6b29d1f5 1849 curmad('^', newSVpvs(""));
cd81e915 1850 CURMAD('_', PL_nextwhite);
5db06880
NC
1851 }
1852}
1853
1854STATIC void
1855S_curmad(pTHX_ char slot, SV *sv)
1856{
1857 MADPROP **where;
1858
1859 if (!sv)
1860 return;
cd81e915
NC
1861 if (PL_curforce < 0)
1862 where = &PL_thismad;
5db06880 1863 else
cd81e915 1864 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1865
cd81e915 1866 if (PL_faketokens)
76f68e9b 1867 sv_setpvs(sv, "");
5db06880
NC
1868 else {
1869 if (!IN_BYTES) {
1870 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1871 SvUTF8_on(sv);
1872 else if (PL_encoding) {
1873 sv_recode_to_utf8(sv, PL_encoding);
1874 }
1875 }
1876 }
1877
1878 /* keep a slot open for the head of the list? */
1879 if (slot != '_' && *where && (*where)->mad_key == '^') {
1880 (*where)->mad_key = slot;
daba3364 1881 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1882 (*where)->mad_val = (void*)sv;
1883 }
1884 else
1885 addmad(newMADsv(slot, sv), where, 0);
1886}
1887#else
b3f24c00
MHM
1888# define start_force(where) NOOP
1889# define curmad(slot, sv) NOOP
5db06880
NC
1890#endif
1891
ffb4593c
NT
1892/*
1893 * S_force_next
9cbb5ea2 1894 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1895 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1896 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1897 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1898 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1899 */
1900
4e553d73 1901STATIC void
cea2e8a9 1902S_force_next(pTHX_ I32 type)
79072805 1903{
97aff369 1904 dVAR;
704d4215
GG
1905#ifdef DEBUGGING
1906 if (DEBUG_T_TEST) {
1907 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1908 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1909 }
1910#endif
5db06880 1911#ifdef PERL_MAD
cd81e915 1912 if (PL_curforce < 0)
5db06880 1913 start_force(PL_lasttoke);
cd81e915 1914 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1915 if (PL_lex_state != LEX_KNOWNEXT)
1916 PL_lex_defer = PL_lex_state;
1917 PL_lex_state = LEX_KNOWNEXT;
1918 PL_lex_expect = PL_expect;
cd81e915 1919 PL_curforce = -1;
5db06880 1920#else
3280af22
NIS
1921 PL_nexttype[PL_nexttoke] = type;
1922 PL_nexttoke++;
1923 if (PL_lex_state != LEX_KNOWNEXT) {
1924 PL_lex_defer = PL_lex_state;
1925 PL_lex_expect = PL_expect;
1926 PL_lex_state = LEX_KNOWNEXT;
79072805 1927 }
5db06880 1928#endif
79072805
LW
1929}
1930
d0a148a6 1931STATIC SV *
15f169a1 1932S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1933{
97aff369 1934 dVAR;
740cce10 1935 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1936 !IN_BYTES
1937 && UTF
1938 && !is_ascii_string((const U8*)start, len)
740cce10 1939 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1940 return sv;
1941}
1942
ffb4593c
NT
1943/*
1944 * S_force_word
1945 * When the lexer knows the next thing is a word (for instance, it has
1946 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1947 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1948 * lookahead.
ffb4593c
NT
1949 *
1950 * Arguments:
b1b65b59 1951 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1952 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1953 * int check_keyword : if true, Perl checks to make sure the word isn't
1954 * a keyword (do this if the word is a label, e.g. goto FOO)
1955 * int allow_pack : if true, : characters will also be allowed (require,
1956 * use, etc. do this)
9cbb5ea2 1957 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1958 */
1959
76e3520e 1960STATIC char *
cea2e8a9 1961S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1962{
97aff369 1963 dVAR;
463ee0b2
LW
1964 register char *s;
1965 STRLEN len;
4e553d73 1966
7918f24d
NC
1967 PERL_ARGS_ASSERT_FORCE_WORD;
1968
29595ff2 1969 start = SKIPSPACE1(start);
463ee0b2 1970 s = start;
7e2040f0 1971 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1972 (allow_pack && *s == ':') ||
15f0808c 1973 (allow_initial_tick && *s == '\'') )
a0d0e21e 1974 {
3280af22 1975 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1976 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1977 return start;
cd81e915 1978 start_force(PL_curforce);
5db06880
NC
1979 if (PL_madskills)
1980 curmad('X', newSVpvn(start,s-start));
463ee0b2 1981 if (token == METHOD) {
29595ff2 1982 s = SKIPSPACE1(s);
463ee0b2 1983 if (*s == '(')
3280af22 1984 PL_expect = XTERM;
463ee0b2 1985 else {
3280af22 1986 PL_expect = XOPERATOR;
463ee0b2 1987 }
79072805 1988 }
e74e6b3d 1989 if (PL_madskills)
63575281 1990 curmad('g', newSVpvs( "forced" ));
9ded7720 1991 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1992 = (OP*)newSVOP(OP_CONST,0,
1993 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1994 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1995 force_next(token);
1996 }
1997 return s;
1998}
1999
ffb4593c
NT
2000/*
2001 * S_force_ident
9cbb5ea2 2002 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2003 * text only contains the "foo" portion. The first argument is a pointer
2004 * to the "foo", and the second argument is the type symbol to prefix.
2005 * Forces the next token to be a "WORD".
9cbb5ea2 2006 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2007 */
2008
76e3520e 2009STATIC void
bfed75c6 2010S_force_ident(pTHX_ register const char *s, int kind)
79072805 2011{
97aff369 2012 dVAR;
7918f24d
NC
2013
2014 PERL_ARGS_ASSERT_FORCE_IDENT;
2015
c35e046a 2016 if (*s) {
90e5519e
NC
2017 const STRLEN len = strlen(s);
2018 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2019 start_force(PL_curforce);
9ded7720 2020 NEXTVAL_NEXTTOKE.opval = o;
79072805 2021 force_next(WORD);
748a9306 2022 if (kind) {
11343788 2023 o->op_private = OPpCONST_ENTERED;
55497cff 2024 /* XXX see note in pp_entereval() for why we forgo typo
2025 warnings if the symbol must be introduced in an eval.
2026 GSAR 96-10-12 */
90e5519e
NC
2027 gv_fetchpvn_flags(s, len,
2028 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2029 : GV_ADD,
2030 kind == '$' ? SVt_PV :
2031 kind == '@' ? SVt_PVAV :
2032 kind == '%' ? SVt_PVHV :
a0d0e21e 2033 SVt_PVGV
90e5519e 2034 );
748a9306 2035 }
79072805
LW
2036 }
2037}
2038
1571675a
GS
2039NV
2040Perl_str_to_version(pTHX_ SV *sv)
2041{
2042 NV retval = 0.0;
2043 NV nshift = 1.0;
2044 STRLEN len;
cfd0369c 2045 const char *start = SvPV_const(sv,len);
9d4ba2ae 2046 const char * const end = start + len;
504618e9 2047 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2048
2049 PERL_ARGS_ASSERT_STR_TO_VERSION;
2050
1571675a 2051 while (start < end) {
ba210ebe 2052 STRLEN skip;
1571675a
GS
2053 UV n;
2054 if (utf)
9041c2e3 2055 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2056 else {
2057 n = *(U8*)start;
2058 skip = 1;
2059 }
2060 retval += ((NV)n)/nshift;
2061 start += skip;
2062 nshift *= 1000;
2063 }
2064 return retval;
2065}
2066
4e553d73 2067/*
ffb4593c
NT
2068 * S_force_version
2069 * Forces the next token to be a version number.
e759cc13
RGS
2070 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2071 * and if "guessing" is TRUE, then no new token is created (and the caller
2072 * must use an alternative parsing method).
ffb4593c
NT
2073 */
2074
76e3520e 2075STATIC char *
e759cc13 2076S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2077{
97aff369 2078 dVAR;
5f66b61c 2079 OP *version = NULL;
44dcb63b 2080 char *d;
5db06880
NC
2081#ifdef PERL_MAD
2082 I32 startoff = s - SvPVX(PL_linestr);
2083#endif
89bfa8cd 2084
7918f24d
NC
2085 PERL_ARGS_ASSERT_FORCE_VERSION;
2086
29595ff2 2087 s = SKIPSPACE1(s);
89bfa8cd 2088
44dcb63b 2089 d = s;
dd629d5b 2090 if (*d == 'v')
44dcb63b 2091 d++;
44dcb63b 2092 if (isDIGIT(*d)) {
e759cc13
RGS
2093 while (isDIGIT(*d) || *d == '_' || *d == '.')
2094 d++;
5db06880
NC
2095#ifdef PERL_MAD
2096 if (PL_madskills) {
cd81e915 2097 start_force(PL_curforce);
5db06880
NC
2098 curmad('X', newSVpvn(s,d-s));
2099 }
2100#endif
4e4da3ac 2101 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2102 SV *ver;
8d08d9ba
DG
2103#ifdef USE_LOCALE_NUMERIC
2104 char *loc = setlocale(LC_NUMERIC, "C");
2105#endif
6154021b 2106 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2107#ifdef USE_LOCALE_NUMERIC
2108 setlocale(LC_NUMERIC, loc);
2109#endif
6154021b 2110 version = pl_yylval.opval;
dd629d5b
GS
2111 ver = cSVOPx(version)->op_sv;
2112 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2113 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2114 SvNV_set(ver, str_to_version(ver));
1571675a 2115 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2116 }
89bfa8cd 2117 }
5db06880
NC
2118 else if (guessing) {
2119#ifdef PERL_MAD
2120 if (PL_madskills) {
cd81e915
NC
2121 sv_free(PL_nextwhite); /* let next token collect whitespace */
2122 PL_nextwhite = 0;
5db06880
NC
2123 s = SvPVX(PL_linestr) + startoff;
2124 }
2125#endif
e759cc13 2126 return s;
5db06880 2127 }
89bfa8cd 2128 }
2129
5db06880
NC
2130#ifdef PERL_MAD
2131 if (PL_madskills && !version) {
cd81e915
NC
2132 sv_free(PL_nextwhite); /* let next token collect whitespace */
2133 PL_nextwhite = 0;
5db06880
NC
2134 s = SvPVX(PL_linestr) + startoff;
2135 }
2136#endif
89bfa8cd 2137 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2138 start_force(PL_curforce);
9ded7720 2139 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2140 force_next(WORD);
89bfa8cd 2141
e759cc13 2142 return s;
89bfa8cd 2143}
2144
ffb4593c 2145/*
91152fc1
DG
2146 * S_force_strict_version
2147 * Forces the next token to be a version number using strict syntax rules.
2148 */
2149
2150STATIC char *
2151S_force_strict_version(pTHX_ char *s)
2152{
2153 dVAR;
2154 OP *version = NULL;
2155#ifdef PERL_MAD
2156 I32 startoff = s - SvPVX(PL_linestr);
2157#endif
2158 const char *errstr = NULL;
2159
2160 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2161
2162 while (isSPACE(*s)) /* leading whitespace */
2163 s++;
2164
2165 if (is_STRICT_VERSION(s,&errstr)) {
2166 SV *ver = newSV(0);
2167 s = (char *)scan_version(s, ver, 0);
2168 version = newSVOP(OP_CONST, 0, ver);
2169 }
4e4da3ac
Z
2170 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2171 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2172 {
91152fc1
DG
2173 PL_bufptr = s;
2174 if (errstr)
2175 yyerror(errstr); /* version required */
2176 return s;
2177 }
2178
2179#ifdef PERL_MAD
2180 if (PL_madskills && !version) {
2181 sv_free(PL_nextwhite); /* let next token collect whitespace */
2182 PL_nextwhite = 0;
2183 s = SvPVX(PL_linestr) + startoff;
2184 }
2185#endif
2186 /* NOTE: The parser sees the package name and the VERSION swapped */
2187 start_force(PL_curforce);
2188 NEXTVAL_NEXTTOKE.opval = version;
2189 force_next(WORD);
2190
2191 return s;
2192}
2193
2194/*
ffb4593c
NT
2195 * S_tokeq
2196 * Tokenize a quoted string passed in as an SV. It finds the next
2197 * chunk, up to end of string or a backslash. It may make a new
2198 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2199 * turns \\ into \.
2200 */
2201
76e3520e 2202STATIC SV *
cea2e8a9 2203S_tokeq(pTHX_ SV *sv)
79072805 2204{
97aff369 2205 dVAR;
79072805
LW
2206 register char *s;
2207 register char *send;
2208 register char *d;
b3ac6de7
IZ
2209 STRLEN len = 0;
2210 SV *pv = sv;
79072805 2211
7918f24d
NC
2212 PERL_ARGS_ASSERT_TOKEQ;
2213
79072805 2214 if (!SvLEN(sv))
b3ac6de7 2215 goto finish;
79072805 2216
a0d0e21e 2217 s = SvPV_force(sv, len);
21a311ee 2218 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2219 goto finish;
463ee0b2 2220 send = s + len;
79072805
LW
2221 while (s < send && *s != '\\')
2222 s++;
2223 if (s == send)
b3ac6de7 2224 goto finish;
79072805 2225 d = s;
be4731d2 2226 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2227 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2228 }
79072805
LW
2229 while (s < send) {
2230 if (*s == '\\') {
a0d0e21e 2231 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2232 s++; /* all that, just for this */
2233 }
2234 *d++ = *s++;
2235 }
2236 *d = '\0';
95a20fc0 2237 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2238 finish:
3280af22 2239 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2240 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2241 return sv;
2242}
2243
ffb4593c
NT
2244/*
2245 * Now come three functions related to double-quote context,
2246 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2247 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2248 * interact with PL_lex_state, and create fake ( ... ) argument lists
2249 * to handle functions and concatenation.
2250 * They assume that whoever calls them will be setting up a fake
2251 * join call, because each subthing puts a ',' after it. This lets
2252 * "lower \luPpEr"
2253 * become
2254 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2255 *
2256 * (I'm not sure whether the spurious commas at the end of lcfirst's
2257 * arguments and join's arguments are created or not).
2258 */
2259
2260/*
2261 * S_sublex_start
6154021b 2262 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2263 *
2264 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2265 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2266 *
2267 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2268 *
2269 * Everything else becomes a FUNC.
2270 *
2271 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2272 * had an OP_CONST or OP_READLINE). This just sets us up for a
2273 * call to S_sublex_push().
2274 */
2275
76e3520e 2276STATIC I32
cea2e8a9 2277S_sublex_start(pTHX)
79072805 2278{
97aff369 2279 dVAR;
6154021b 2280 register const I32 op_type = pl_yylval.ival;
79072805
LW
2281
2282 if (op_type == OP_NULL) {
6154021b 2283 pl_yylval.opval = PL_lex_op;
5f66b61c 2284 PL_lex_op = NULL;
79072805
LW
2285 return THING;
2286 }
2287 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2288 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2289
2290 if (SvTYPE(sv) == SVt_PVIV) {
2291 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2292 STRLEN len;
96a5add6 2293 const char * const p = SvPV_const(sv, len);
740cce10 2294 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2295 SvREFCNT_dec(sv);
2296 sv = nsv;
4e553d73 2297 }
6154021b 2298 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2299 PL_lex_stuff = NULL;
6f33ba73
RGS
2300 /* Allow <FH> // "foo" */
2301 if (op_type == OP_READLINE)
2302 PL_expect = XTERMORDORDOR;
79072805
LW
2303 return THING;
2304 }
e3f73d4e
RGS
2305 else if (op_type == OP_BACKTICK && PL_lex_op) {
2306 /* readpipe() vas overriden */
2307 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2308 pl_yylval.opval = PL_lex_op;
9b201d7d 2309 PL_lex_op = NULL;
e3f73d4e
RGS
2310 PL_lex_stuff = NULL;
2311 return THING;
2312 }
79072805 2313
3280af22 2314 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2315 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2316 PL_sublex_info.sub_op = PL_lex_op;
2317 PL_lex_state = LEX_INTERPPUSH;
55497cff 2318
3280af22
NIS
2319 PL_expect = XTERM;
2320 if (PL_lex_op) {
6154021b 2321 pl_yylval.opval = PL_lex_op;
5f66b61c 2322 PL_lex_op = NULL;
55497cff 2323 return PMFUNC;
2324 }
2325 else
2326 return FUNC;
2327}
2328
ffb4593c
NT
2329/*
2330 * S_sublex_push
2331 * Create a new scope to save the lexing state. The scope will be
2332 * ended in S_sublex_done. Returns a '(', starting the function arguments
2333 * to the uc, lc, etc. found before.
2334 * Sets PL_lex_state to LEX_INTERPCONCAT.
2335 */
2336
76e3520e 2337STATIC I32
cea2e8a9 2338S_sublex_push(pTHX)
55497cff 2339{
27da23d5 2340 dVAR;
f46d017c 2341 ENTER;
55497cff 2342
3280af22 2343 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2344 SAVEBOOL(PL_lex_dojoin);
3280af22 2345 SAVEI32(PL_lex_brackets);
3280af22
NIS
2346 SAVEI32(PL_lex_casemods);
2347 SAVEI32(PL_lex_starts);
651b5b28 2348 SAVEI8(PL_lex_state);
7766f137 2349 SAVEVPTR(PL_lex_inpat);
98246f1e 2350 SAVEI16(PL_lex_inwhat);
57843af0 2351 SAVECOPLINE(PL_curcop);
3280af22 2352 SAVEPPTR(PL_bufptr);
8452ff4b 2353 SAVEPPTR(PL_bufend);
3280af22
NIS
2354 SAVEPPTR(PL_oldbufptr);
2355 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2356 SAVEPPTR(PL_last_lop);
2357 SAVEPPTR(PL_last_uni);
3280af22
NIS
2358 SAVEPPTR(PL_linestart);
2359 SAVESPTR(PL_linestr);
8edd5f42
RGS
2360 SAVEGENERICPV(PL_lex_brackstack);
2361 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2362
2363 PL_linestr = PL_lex_stuff;
a0714e2c 2364 PL_lex_stuff = NULL;
3280af22 2365
9cbb5ea2
GS
2366 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2367 = SvPVX(PL_linestr);
3280af22 2368 PL_bufend += SvCUR(PL_linestr);
bd61b366 2369 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2370 SAVEFREESV(PL_linestr);
2371
2372 PL_lex_dojoin = FALSE;
2373 PL_lex_brackets = 0;
a02a5408
JC
2374 Newx(PL_lex_brackstack, 120, char);
2375 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2376 PL_lex_casemods = 0;
2377 *PL_lex_casestack = '\0';
2378 PL_lex_starts = 0;
2379 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2380 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2381
2382 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2383 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2384 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2385 else
5f66b61c 2386 PL_lex_inpat = NULL;
79072805 2387
55497cff 2388 return '(';
79072805
LW
2389}
2390
ffb4593c
NT
2391/*
2392 * S_sublex_done
2393 * Restores lexer state after a S_sublex_push.
2394 */
2395
76e3520e 2396STATIC I32
cea2e8a9 2397S_sublex_done(pTHX)
79072805 2398{
27da23d5 2399 dVAR;
3280af22 2400 if (!PL_lex_starts++) {
396482e1 2401 SV * const sv = newSVpvs("");
9aa983d2
JH
2402 if (SvUTF8(PL_linestr))
2403 SvUTF8_on(sv);
3280af22 2404 PL_expect = XOPERATOR;
6154021b 2405 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2406 return THING;
2407 }
2408
3280af22
NIS
2409 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2410 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2411 return yylex();
79072805
LW
2412 }
2413
ffb4593c 2414 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
2415 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2416 PL_linestr = PL_lex_repl;
2417 PL_lex_inpat = 0;
2418 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2419 PL_bufend += SvCUR(PL_linestr);
bd61b366 2420 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2421 SAVEFREESV(PL_linestr);
2422 PL_lex_dojoin = FALSE;
2423 PL_lex_brackets = 0;
3280af22
NIS
2424 PL_lex_casemods = 0;
2425 *PL_lex_casestack = '\0';
2426 PL_lex_starts = 0;
25da4f38 2427 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2428 PL_lex_state = LEX_INTERPNORMAL;
2429 PL_lex_starts++;
e9fa98b2
HS
2430 /* we don't clear PL_lex_repl here, so that we can check later
2431 whether this is an evalled subst; that means we rely on the
2432 logic to ensure sublex_done() is called again only via the
2433 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2434 }
e9fa98b2 2435 else {
3280af22 2436 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2437 PL_lex_repl = NULL;
e9fa98b2 2438 }
79072805 2439 return ',';
ffed7fef
LW
2440 }
2441 else {
5db06880
NC
2442#ifdef PERL_MAD
2443 if (PL_madskills) {
cd81e915
NC
2444 if (PL_thiswhite) {
2445 if (!PL_endwhite)
6b29d1f5 2446 PL_endwhite = newSVpvs("");
cd81e915
NC
2447 sv_catsv(PL_endwhite, PL_thiswhite);
2448 PL_thiswhite = 0;
2449 }
2450 if (PL_thistoken)
76f68e9b 2451 sv_setpvs(PL_thistoken,"");
5db06880 2452 else
cd81e915 2453 PL_realtokenstart = -1;
5db06880
NC
2454 }
2455#endif
f46d017c 2456 LEAVE;
3280af22
NIS
2457 PL_bufend = SvPVX(PL_linestr);
2458 PL_bufend += SvCUR(PL_linestr);
2459 PL_expect = XOPERATOR;
09bef843 2460 PL_sublex_info.sub_inwhat = 0;
79072805 2461 return ')';
ffed7fef
LW
2462 }
2463}
2464
02aa26ce
NT
2465/*
2466 scan_const
2467
2468 Extracts a pattern, double-quoted string, or transliteration. This
2469 is terrifying code.
2470
94def140 2471 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2472 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2473 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2474
94def140
TS
2475 Returns a pointer to the character scanned up to. If this is
2476 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2477 successfully parsed), will leave an OP for the substring scanned
6154021b 2478 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2479 by looking at the next characters herself.
2480
02aa26ce
NT
2481 In patterns:
2482 backslashes:
ff3f963a 2483 constants: \N{NAME} only
02aa26ce
NT
2484 case and quoting: \U \Q \E
2485 stops on @ and $, but not for $ as tail anchor
2486
2487 In transliterations:
2488 characters are VERY literal, except for - not at the start or end
94def140
TS
2489 of the string, which indicates a range. If the range is in bytes,
2490 scan_const expands the range to the full set of intermediate
2491 characters. If the range is in utf8, the hyphen is replaced with
2492 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2493
2494 In double-quoted strings:
2495 backslashes:
2496 double-quoted style: \r and \n
ff3f963a 2497 constants: \x31, etc.
94def140 2498 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2499 case and quoting: \U \Q \E
2500 stops on @ and $
2501
2502 scan_const does *not* construct ops to handle interpolated strings.
2503 It stops processing as soon as it finds an embedded $ or @ variable
2504 and leaves it to the caller to work out what's going on.
2505
94def140
TS
2506 embedded arrays (whether in pattern or not) could be:
2507 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2508
2509 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2510
2511 $ in pattern could be $foo or could be tail anchor. Assumption:
2512 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2513 followed by one of "()| \r\n\t"
02aa26ce
NT
2514
2515 \1 (backreferences) are turned into $1
2516
2517 The structure of the code is
2518 while (there's a character to process) {
94def140
TS
2519 handle transliteration ranges
2520 skip regexp comments /(?#comment)/ and codes /(?{code})/
2521 skip #-initiated comments in //x patterns
2522 check for embedded arrays
02aa26ce
NT
2523 check for embedded scalars
2524 if (backslash) {
94def140 2525 deprecate \1 in substitution replacements
02aa26ce
NT
2526 handle string-changing backslashes \l \U \Q \E, etc.
2527 switch (what was escaped) {
94def140 2528 handle \- in a transliteration (becomes a literal -)
ff3f963a 2529 if a pattern and not \N{, go treat as regular character
94def140
TS
2530 handle \132 (octal characters)
2531 handle \x15 and \x{1234} (hex characters)
ff3f963a 2532 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2533 handle \cV (control characters)
2534 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2535 } (end switch)
77a135fe 2536 continue
02aa26ce 2537 } (end if backslash)
77a135fe 2538 handle regular character
02aa26ce 2539 } (end while character to read)
4e553d73 2540
02aa26ce
NT
2541*/
2542
76e3520e 2543STATIC char *
cea2e8a9 2544S_scan_const(pTHX_ char *start)
79072805 2545{
97aff369 2546 dVAR;
3280af22 2547 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2548 SV *sv = newSV(send - start); /* sv for the constant. See
2549 note below on sizing. */
02aa26ce
NT
2550 register char *s = start; /* start of the constant */
2551 register char *d = SvPVX(sv); /* destination for copies */
2552 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2553 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2554 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2555 I32 this_utf8 = UTF; /* Is the source string assumed
2556 to be UTF8? But, this can
2557 show as true when the source
2558 isn't utf8, as for example
2559 when it is entirely composed
2560 of hex constants */
2561
2562 /* Note on sizing: The scanned constant is placed into sv, which is
2563 * initialized by newSV() assuming one byte of output for every byte of
2564 * input. This routine expects newSV() to allocate an extra byte for a
2565 * trailing NUL, which this routine will append if it gets to the end of
2566 * the input. There may be more bytes of input than output (eg., \N{LATIN
2567 * CAPITAL LETTER A}), or more output than input if the constant ends up
2568 * recoded to utf8, but each time a construct is found that might increase
2569 * the needed size, SvGROW() is called. Its size parameter each time is
2570 * based on the best guess estimate at the time, namely the length used so
2571 * far, plus the length the current construct will occupy, plus room for
2572 * the trailing NUL, plus one byte for every input byte still unscanned */
2573
012bcf8d 2574 UV uv;
4c3a8340
TS
2575#ifdef EBCDIC
2576 UV literal_endpoint = 0;
e294cc5d 2577 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2578#endif
012bcf8d 2579
7918f24d
NC
2580 PERL_ARGS_ASSERT_SCAN_CONST;
2581
2b9d42f0
NIS
2582 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2583 /* If we are doing a trans and we know we want UTF8 set expectation */
2584 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2585 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2586 }
2587
2588
79072805 2589 while (s < send || dorange) {
ff3f963a 2590
02aa26ce 2591 /* get transliterations out of the way (they're most literal) */
3280af22 2592 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2593 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2594 if (dorange) {
1ba5c669
JH
2595 I32 i; /* current expanded character */
2596 I32 min; /* first character in range */
2597 I32 max; /* last character in range */
02aa26ce 2598
e294cc5d
JH
2599#ifdef EBCDIC
2600 UV uvmax = 0;
2601#endif
2602
2603 if (has_utf8
2604#ifdef EBCDIC
2605 && !native_range
2606#endif
2607 ) {
9d4ba2ae 2608 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2609 char *e = d++;
2610 while (e-- > c)
2611 *(e + 1) = *e;
25716404 2612 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2613 /* mark the range as done, and continue */
2614 dorange = FALSE;
2615 didrange = TRUE;
2616 continue;
2617 }
2b9d42f0 2618
95a20fc0 2619 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2620#ifdef EBCDIC
2621 SvGROW(sv,
2622 SvLEN(sv) + (has_utf8 ?
2623 (512 - UTF_CONTINUATION_MARK +
2624 UNISKIP(0x100))
2625 : 256));
2626 /* How many two-byte within 0..255: 128 in UTF-8,
2627 * 96 in UTF-8-mod. */
2628#else
9cbb5ea2 2629 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2630#endif
9cbb5ea2 2631 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2632#ifdef EBCDIC
2633 if (has_utf8) {
2634 int j;
2635 for (j = 0; j <= 1; j++) {
2636 char * const c = (char*)utf8_hop((U8*)d, -1);
2637 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2638 if (j)
2639 min = (U8)uv;
2640 else if (uv < 256)
2641 max = (U8)uv;
2642 else {
2643 max = (U8)0xff; /* only to \xff */
2644 uvmax = uv; /* \x{100} to uvmax */
2645 }
2646 d = c; /* eat endpoint chars */
2647 }
2648 }
2649 else {
2650#endif
2651 d -= 2; /* eat the first char and the - */
2652 min = (U8)*d; /* first char in range */
2653 max = (U8)d[1]; /* last char in range */
2654#ifdef EBCDIC
2655 }
2656#endif
8ada0baa 2657
c2e66d9e 2658 if (min > max) {
01ec43d0 2659 Perl_croak(aTHX_
d1573ac7 2660 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2661 (char)min, (char)max);
c2e66d9e
GS
2662 }
2663
c7f1f016 2664#ifdef EBCDIC
4c3a8340
TS
2665 if (literal_endpoint == 2 &&
2666 ((isLOWER(min) && isLOWER(max)) ||
2667 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2668 if (isLOWER(min)) {
2669 for (i = min; i <= max; i++)
2670 if (isLOWER(i))
db42d148 2671 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2672 } else {
2673 for (i = min; i <= max; i++)
2674 if (isUPPER(i))
db42d148 2675 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2676 }
2677 }
2678 else
2679#endif
2680 for (i = min; i <= max; i++)
e294cc5d
JH
2681#ifdef EBCDIC
2682 if (has_utf8) {
2683 const U8 ch = (U8)NATIVE_TO_UTF(i);
2684 if (UNI_IS_INVARIANT(ch))
2685 *d++ = (U8)i;
2686 else {
2687 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2688 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2689 }
2690 }
2691 else
2692#endif
2693 *d++ = (char)i;
2694
2695#ifdef EBCDIC
2696 if (uvmax) {
2697 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2698 if (uvmax > 0x101)
2699 *d++ = (char)UTF_TO_NATIVE(0xff);
2700 if (uvmax > 0x100)
2701 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2702 }
2703#endif
02aa26ce
NT
2704
2705 /* mark the range as done, and continue */
79072805 2706 dorange = FALSE;
01ec43d0 2707 didrange = TRUE;
4c3a8340
TS
2708#ifdef EBCDIC
2709 literal_endpoint = 0;
2710#endif
79072805 2711 continue;
4e553d73 2712 }
02aa26ce
NT
2713
2714 /* range begins (ignore - as first or last char) */
79072805 2715 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2716 if (didrange) {
1fafa243 2717 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2718 }
e294cc5d
JH
2719 if (has_utf8
2720#ifdef EBCDIC
2721 && !native_range
2722#endif
2723 ) {
25716404 2724 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2725 s++;
2726 continue;
2727 }
79072805
LW
2728 dorange = TRUE;
2729 s++;
01ec43d0
GS
2730 }
2731 else {
2732 didrange = FALSE;
4c3a8340
TS
2733#ifdef EBCDIC
2734 literal_endpoint = 0;
e294cc5d 2735 native_range = TRUE;
4c3a8340 2736#endif
01ec43d0 2737 }
79072805 2738 }
02aa26ce
NT
2739
2740 /* if we get here, we're not doing a transliteration */
2741
0f5d15d6
IZ
2742 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2743 except for the last char, which will be done separately. */
3280af22 2744 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2745 if (s[2] == '#') {
e994fd66 2746 while (s+1 < send && *s != ')')
db42d148 2747 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2748 }
2749 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2750 || (s[2] == '?' && s[3] == '{'))
155aba94 2751 {
cc6b7395 2752 I32 count = 1;
0f5d15d6 2753 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2754 char c;
2755
d9f97599
GS
2756 while (count && (c = *regparse)) {
2757 if (c == '\\' && regparse[1])
2758 regparse++;
4e553d73 2759 else if (c == '{')
cc6b7395 2760 count++;
4e553d73 2761 else if (c == '}')
cc6b7395 2762 count--;
d9f97599 2763 regparse++;
cc6b7395 2764 }
e994fd66 2765 if (*regparse != ')')
5bdf89e7 2766 regparse--; /* Leave one char for continuation. */
0f5d15d6 2767 while (s < regparse)
db42d148 2768 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2769 }
748a9306 2770 }
02aa26ce
NT
2771
2772 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2773 else if (*s == '#' && PL_lex_inpat &&
2774 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2775 while (s+1 < send && *s != '\n')
db42d148 2776 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2777 }
02aa26ce 2778
5d1d4326 2779 /* check for embedded arrays
da6eedaa 2780 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2781 */
1749ea0d
TS
2782 else if (*s == '@' && s[1]) {
2783 if (isALNUM_lazy_if(s+1,UTF))
2784 break;
2785 if (strchr(":'{$", s[1]))
2786 break;
2787 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2788 break; /* in regexp, neither @+ nor @- are interpolated */
2789 }
02aa26ce
NT
2790
2791 /* check for embedded scalars. only stop if we're sure it's a
2792 variable.
2793 */
79072805 2794 else if (*s == '$') {
3280af22 2795 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2796 break;
77772344 2797 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2798 if (s[1] == '\\') {
2799 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2800 "Possible unintended interpolation of $\\ in regex");
77772344 2801 }
79072805 2802 break; /* in regexp, $ might be tail anchor */
77772344 2803 }
79072805 2804 }
02aa26ce 2805
2b9d42f0
NIS
2806 /* End of else if chain - OP_TRANS rejoin rest */
2807
02aa26ce 2808 /* backslashes */
79072805 2809 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2810 char* e; /* Can be used for ending '}', etc. */
2811
79072805 2812 s++;
02aa26ce 2813
02aa26ce 2814 /* deprecate \1 in strings and substitution replacements */
3280af22 2815 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2816 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2817 {
a2a5de95 2818 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2819 *--s = '$';
2820 break;
2821 }
02aa26ce
NT
2822
2823 /* string-change backslash escapes */
3280af22 2824 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2825 --s;
2826 break;
2827 }
ff3f963a
KW
2828 /* In a pattern, process \N, but skip any other backslash escapes.
2829 * This is because we don't want to translate an escape sequence
2830 * into a meta symbol and have the regex compiler use the meta
2831 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2832 * in spite of this, we do have to process \N here while the proper
2833 * charnames handler is in scope. See bugs #56444 and #62056.
2834 * There is a complication because \N in a pattern may also stand
2835 * for 'match a non-nl', and not mean a charname, in which case its
2836 * processing should be deferred to the regex compiler. To be a
2837 * charname it must be followed immediately by a '{', and not look
2838 * like \N followed by a curly quantifier, i.e., not something like
2839 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2840 * quantifier */
2841 else if (PL_lex_inpat
2842 && (*s != 'N'
2843 || s[1] != '{'
2844 || regcurly(s + 1)))
2845 {
cc74c5bd
TS
2846 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2847 goto default_action;
2848 }
02aa26ce 2849
79072805 2850 switch (*s) {
02aa26ce
NT
2851
2852 /* quoted - in transliterations */
79072805 2853 case '-':
3280af22 2854 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2855 *d++ = *s++;
2856 continue;
2857 }
2858 /* FALL THROUGH */
2859 default:
11b8faa4 2860 {
a2a5de95
NC
2861 if ((isALPHA(*s) || isDIGIT(*s)))
2862 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2863 "Unrecognized escape \\%c passed through",
2864 *s);
11b8faa4 2865 /* default action is to copy the quoted character */
f9a63242 2866 goto default_action;
11b8faa4 2867 }
02aa26ce 2868
77a135fe 2869 /* eg. \132 indicates the octal constant 0x132 */
79072805
LW
2870 case '0': case '1': case '2': case '3':
2871 case '4': case '5': case '6': case '7':
ba210ebe 2872 {
53305cf1
NC
2873 I32 flags = 0;
2874 STRLEN len = 3;
77a135fe 2875 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2876 s += len;
2877 }
012bcf8d 2878 goto NUM_ESCAPE_INSERT;
02aa26ce 2879
77a135fe 2880 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2881 case 'x':
a0ed51b3
LW
2882 ++s;
2883 if (*s == '{') {
9d4ba2ae 2884 char* const e = strchr(s, '}');
a4c04bdc
NC
2885 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2886 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2887 STRLEN len;
355860ce 2888
53305cf1 2889 ++s;
adaeee49 2890 if (!e) {
a0ed51b3 2891 yyerror("Missing right brace on \\x{}");
355860ce 2892 continue;
ba210ebe 2893 }
53305cf1 2894 len = e - s;
77a135fe 2895 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2896 s = e + 1;
a0ed51b3
LW
2897 }
2898 else {
ba210ebe 2899 {
53305cf1 2900 STRLEN len = 2;
a4c04bdc 2901 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2902 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2903 s += len;
2904 }
012bcf8d
GS
2905 }
2906
2907 NUM_ESCAPE_INSERT:
ff3f963a
KW
2908 /* Insert oct or hex escaped character. There will always be
2909 * enough room in sv since such escapes will be longer than any
2910 * UTF-8 sequence they can end up as, except if they force us
2911 * to recode the rest of the string into utf8 */
ba7cea30 2912
77a135fe 2913 /* Here uv is the ordinal of the next character being added in
ff3f963a 2914 * unicode (converted from native). */
77a135fe 2915 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2916 if (!has_utf8 && uv > 255) {
77a135fe
KW
2917 /* Might need to recode whatever we have accumulated so
2918 * far if it contains any chars variant in utf8 or
2919 * utf-ebcdic. */
2920
2921 SvCUR_set(sv, d - SvPVX_const(sv));
2922 SvPOK_on(sv);
2923 *d = '\0';
77a135fe 2924 /* See Note on sizing above. */
7bf79863
KW
2925 sv_utf8_upgrade_flags_grow(sv,
2926 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2927 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2928 d = SvPVX(sv) + SvCUR(sv);
2929 has_utf8 = TRUE;
012bcf8d
GS
2930 }
2931
77a135fe
KW
2932 if (has_utf8) {
2933 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2934 if (PL_lex_inwhat == OP_TRANS &&
2935 PL_sublex_info.sub_op) {
2936 PL_sublex_info.sub_op->op_private |=
2937 (PL_lex_repl ? OPpTRANS_FROM_UTF
2938 : OPpTRANS_TO_UTF);
f9a63242 2939 }
e294cc5d
JH
2940#ifdef EBCDIC
2941 if (uv > 255 && !dorange)
2942 native_range = FALSE;
2943#endif
012bcf8d 2944 }
a0ed51b3 2945 else {
012bcf8d 2946 *d++ = (char)uv;
a0ed51b3 2947 }
012bcf8d
GS
2948 }
2949 else {
c4d5f83a 2950 *d++ = (char) uv;
a0ed51b3 2951 }
79072805 2952 continue;
02aa26ce 2953
4a2d328f 2954 case 'N':
ff3f963a
KW
2955 /* In a non-pattern \N must be a named character, like \N{LATIN
2956 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
2957 * mean to match a non-newline. For non-patterns, named
2958 * characters are converted to their string equivalents. In
2959 * patterns, named characters are not converted to their
2960 * ultimate forms for the same reasons that other escapes
2961 * aren't. Instead, they are converted to the \N{U+...} form
2962 * to get the value from the charnames that is in effect right
2963 * now, while preserving the fact that it was a named character
2964 * so that the regex compiler knows this */
2965
2966 /* This section of code doesn't generally use the
2967 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
2968 * a close examination of this macro and determined it is a
2969 * no-op except on utfebcdic variant characters. Every
2970 * character generated by this that would normally need to be
2971 * enclosed by this macro is invariant, so the macro is not
2972 * needed, and would complicate use of copy(). There are other
2973 * parts of this file where the macro is used inconsistently,
2974 * but are saved by it being a no-op */
2975
2976 /* The structure of this section of code (besides checking for
2977 * errors and upgrading to utf8) is:
2978 * Further disambiguate between the two meanings of \N, and if
2979 * not a charname, go process it elsewhere
0a96133f
KW
2980 * If of form \N{U+...}, pass it through if a pattern;
2981 * otherwise convert to utf8
2982 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
2983 * pattern; otherwise convert to utf8 */
ff3f963a
KW
2984
2985 /* Here, s points to the 'N'; the test below is guaranteed to
2986 * succeed if we are being called on a pattern as we already
2987 * know from a test above that the next character is a '{'.
2988 * On a non-pattern \N must mean 'named sequence, which
2989 * requires braces */
2990 s++;
2991 if (*s != '{') {
2992 yyerror("Missing braces on \\N{}");
2993 continue;
2994 }
2995 s++;
2996
0a96133f 2997 /* If there is no matching '}', it is an error. */
ff3f963a
KW
2998 if (! (e = strchr(s, '}'))) {
2999 if (! PL_lex_inpat) {
5777a3f7 3000 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3001 } else {
3002 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3003 }
0a96133f 3004 continue;
ff3f963a 3005 }
cddc7ef4 3006
ff3f963a 3007 /* Here it looks like a named character */
cddc7ef4 3008
ff3f963a
KW
3009 if (PL_lex_inpat) {
3010
3011 /* XXX This block is temporary code. \N{} implies that the
3012 * pattern is to have Unicode semantics, and therefore
3013 * currently has to be encoded in utf8. By putting it in
3014 * utf8 now, we save a whole pass in the regular expression
3015 * compiler. Once that code is changed so Unicode
3016 * semantics doesn't necessarily have to be in utf8, this
3017 * block should be removed */
3018 if (!has_utf8) {
77a135fe 3019 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3020 SvPOK_on(sv);
e4f3eed8 3021 *d = '\0';
77a135fe 3022 /* See Note on sizing above. */
7bf79863 3023 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3024 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3025 /* 5 = '\N{' + cur char + NUL */
3026 (STRLEN)(send - s) + 5);
f08d6ad9 3027 d = SvPVX(sv) + SvCUR(sv);
89491803 3028 has_utf8 = TRUE;
ff3f963a
KW
3029 }
3030 }
423cee85 3031
ff3f963a
KW
3032 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3033 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3034 | PERL_SCAN_DISALLOW_PREFIX;
3035 STRLEN len;
3036
3037 /* For \N{U+...}, the '...' is a unicode value even on
3038 * EBCDIC machines */
3039 s += 2; /* Skip to next char after the 'U+' */
3040 len = e - s;
3041 uv = grok_hex(s, &len, &flags, NULL);
3042 if (len == 0 || len != (STRLEN)(e - s)) {
3043 yyerror("Invalid hexadecimal number in \\N{U+...}");
3044 s = e + 1;
3045 continue;
3046 }
3047
3048 if (PL_lex_inpat) {
3049
3050 /* Pass through to the regex compiler unchanged. The
3051 * reason we evaluated the number above is to make sure
0a96133f 3052 * there wasn't a syntax error. */
ff3f963a
KW
3053 s -= 5; /* Include the '\N{U+' */
3054 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3055 d += e - s + 1;
3056 }
3057 else { /* Not a pattern: convert the hex to string */
3058
3059 /* If destination is not in utf8, unconditionally
3060 * recode it to be so. This is because \N{} implies
3061 * Unicode semantics, and scalars have to be in utf8
3062 * to guarantee those semantics */
3063 if (! has_utf8) {
3064 SvCUR_set(sv, d - SvPVX_const(sv));
3065 SvPOK_on(sv);
3066 *d = '\0';
3067 /* See Note on sizing above. */
3068 sv_utf8_upgrade_flags_grow(
3069 sv,
3070 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3071 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3072 d = SvPVX(sv) + SvCUR(sv);
3073 has_utf8 = TRUE;
3074 }
3075
3076 /* Add the string to the output */
3077 if (UNI_IS_INVARIANT(uv)) {
3078 *d++ = (char) uv;
3079 }
3080 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3081 }
3082 }
3083 else { /* Here is \N{NAME} but not \N{U+...}. */
3084
3085 SV *res; /* result from charnames */
3086 const char *str; /* the string in 'res' */
3087 STRLEN len; /* its length */
3088
3089 /* Get the value for NAME */
3090 res = newSVpvn(s, e - s);
3091 res = new_constant( NULL, 0, "charnames",
3092 /* includes all of: \N{...} */
3093 res, NULL, s - 3, e - s + 4 );
3094
3095 /* Most likely res will be in utf8 already since the
3096 * standard charnames uses pack U, but a custom translator
3097 * can leave it otherwise, so make sure. XXX This can be
3098 * revisited to not have charnames use utf8 for characters
3099 * that don't need it when regexes don't have to be in utf8
3100 * for Unicode semantics. If doing so, remember EBCDIC */
3101 sv_utf8_upgrade(res);
3102 str = SvPV_const(res, len);
3103
3104 /* Don't accept malformed input */
3105 if (! is_utf8_string((U8 *) str, len)) {
3106 yyerror("Malformed UTF-8 returned by \\N");
3107 }
3108 else if (PL_lex_inpat) {
3109
3110 if (! len) { /* The name resolved to an empty string */
3111 Copy("\\N{}", d, 4, char);
3112 d += 4;
3113 }
3114 else {
3115 /* In order to not lose information for the regex
3116 * compiler, pass the result in the specially made
3117 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3118 * the code points in hex of each character
3119 * returned by charnames */
3120
3121 const char *str_end = str + len;
3122 STRLEN char_length; /* cur char's byte length */
3123 STRLEN output_length; /* and the number of bytes
3124 after this is translated
3125 into hex digits */
3126 const STRLEN off = d - SvPVX_const(sv);
3127
3128 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3129 * max('U+', '.'); and 1 for NUL */
3130 char hex_string[2 * UTF8_MAXBYTES + 5];
3131
3132 /* Get the first character of the result. */
3133 U32 uv = utf8n_to_uvuni((U8 *) str,
3134 len,
3135 &char_length,
3136 UTF8_ALLOW_ANYUV);
3137
3138 /* The call to is_utf8_string() above hopefully
3139 * guarantees that there won't be an error. But
3140 * it's easy here to make sure. The function just
3141 * above warns and returns 0 if invalid utf8, but
3142 * it can also return 0 if the input is validly a
3143 * NUL. Disambiguate */
3144 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3145 uv = UNICODE_REPLACEMENT;
3146 }
3147
3148 /* Convert first code point to hex, including the
3149 * boiler plate before it */
3150 sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3151 output_length = strlen(hex_string);
3152
3153 /* Make sure there is enough space to hold it */
3154 d = off + SvGROW(sv, off
3155 + output_length
3156 + (STRLEN)(send - e)
3157 + 2); /* '}' + NUL */
3158 /* And output it */
3159 Copy(hex_string, d, output_length, char);
3160 d += output_length;
3161
3162 /* For each subsequent character, append dot and
3163 * its ordinal in hex */
3164 while ((str += char_length) < str_end) {
3165 const STRLEN off = d - SvPVX_const(sv);
3166 U32 uv = utf8n_to_uvuni((U8 *) str,
3167 str_end - str,
3168 &char_length,
3169 UTF8_ALLOW_ANYUV);
3170 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3171 uv = UNICODE_REPLACEMENT;
3172 }
3173
3174 sprintf(hex_string, ".%X", (unsigned int) uv);
3175 output_length = strlen(hex_string);
3176
3177 d = off + SvGROW(sv, off
3178 + output_length
3179 + (STRLEN)(send - e)
3180 + 2); /* '}' + NUL */
3181 Copy(hex_string, d, output_length, char);
3182 d += output_length;
3183 }
3184
3185 *d++ = '}'; /* Done. Add the trailing brace */
3186 }
3187 }
3188 else { /* Here, not in a pattern. Convert the name to a
3189 * string. */
3190
3191 /* If destination is not in utf8, unconditionally
3192 * recode it to be so. This is because \N{} implies
3193 * Unicode semantics, and scalars have to be in utf8
3194 * to guarantee those semantics */
3195 if (! has_utf8) {
3196 SvCUR_set(sv, d - SvPVX_const(sv));
3197 SvPOK_on(sv);
3198 *d = '\0';
3199 /* See Note on sizing above. */
3200 sv_utf8_upgrade_flags_grow(sv,
3201 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3202 len + (STRLEN)(send - s) + 1);
3203 d = SvPVX(sv) + SvCUR(sv);
3204 has_utf8 = TRUE;
3205 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3206
3207 /* See Note on sizing above. (NOTE: SvCUR() is not
3208 * set correctly here). */
3209 const STRLEN off = d - SvPVX_const(sv);
3210 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3211 }
3212 Copy(str, d, len, char);
3213 d += len;
423cee85 3214 }
423cee85 3215 SvREFCNT_dec(res);
cb233ae3
KW
3216
3217 /* Deprecate non-approved name syntax */
3218 if (ckWARN_d(WARN_DEPRECATED)) {
3219 bool problematic = FALSE;
3220 char* i = s;
3221
3222 /* For non-ut8 input, look to see that the first
3223 * character is an alpha, then loop through the rest
3224 * checking that each is a continuation */
3225 if (! this_utf8) {
3226 if (! isALPHAU(*i)) problematic = TRUE;
3227 else for (i = s + 1; i < e; i++) {
3228 if (isCHARNAME_CONT(*i)) continue;
3229 problematic = TRUE;
3230 break;
3231 }
3232 }
3233 else {
3234 /* Similarly for utf8. For invariants can check
3235 * directly. We accept anything above the latin1
3236 * range because it is immaterial to Perl if it is
3237 * correct or not, and is expensive to check. But
3238 * it is fairly easy in the latin1 range to convert
3239 * the variants into a single character and check
3240 * those */
3241 if (UTF8_IS_INVARIANT(*i)) {
3242 if (! isALPHAU(*i)) problematic = TRUE;
3243 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3244 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3245 *(i+1)))))
3246 {
3247 problematic = TRUE;
3248 }
3249 }
3250 if (! problematic) for (i = s + UTF8SKIP(s);
3251 i < e;
3252 i+= UTF8SKIP(i))
3253 {
3254 if (UTF8_IS_INVARIANT(*i)) {
3255 if (isCHARNAME_CONT(*i)) continue;
3256 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3257 continue;
3258 } else if (isCHARNAME_CONT(
3259 UNI_TO_NATIVE(
3260 UTF8_ACCUMULATE(*i, *(i+1)))))
3261 {
3262 continue;
3263 }
3264 problematic = TRUE;
3265 break;
3266 }
3267 }
3268 if (problematic) {
6e1bad6c
KW
3269 /* The e-i passed to the final %.*s makes sure that
3270 * should the trailing NUL be missing that this
3271 * print won't run off the end of the string */
cb233ae3 3272 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
6e1bad6c 3273 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s", i - s + 1, s, e - i, i + 1);
cb233ae3
KW
3274 }
3275 }
3276 } /* End \N{NAME} */
ff3f963a
KW
3277#ifdef EBCDIC
3278 if (!dorange)
3279 native_range = FALSE; /* \N{} is defined to be Unicode */
3280#endif
3281 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3282 continue;
3283
02aa26ce 3284 /* \c is a control character */
79072805
LW
3285 case 'c':
3286 s++;
961ce445 3287 if (s < send) {
f9d13529 3288 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3289 }
961ce445
RGS
3290 else {
3291 yyerror("Missing control char name in \\c");
3292 }
79072805 3293 continue;
02aa26ce
NT
3294
3295 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3296 case 'b':
db42d148 3297 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3298 break;
3299 case 'n':
db42d148 3300 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3301 break;
3302 case 'r':
db42d148 3303 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3304 break;
3305 case 'f':
db42d148 3306 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3307 break;
3308 case 't':
db42d148 3309 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3310 break;
34a3fe2a 3311 case 'e':
db42d148 3312 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3313 break;
3314 case 'a':
db42d148 3315 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3316 break;
02aa26ce
NT
3317 } /* end switch */
3318
79072805
LW
3319 s++;
3320 continue;
02aa26ce 3321 } /* end if (backslash) */
4c3a8340
TS
3322#ifdef EBCDIC
3323 else
3324 literal_endpoint++;
3325#endif
02aa26ce 3326
f9a63242 3327 default_action:
77a135fe
KW
3328 /* If we started with encoded form, or already know we want it,
3329 then encode the next character */
3330 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3331 STRLEN len = 1;
77a135fe
KW
3332
3333
3334 /* One might think that it is wasted effort in the case of the
3335 * source being utf8 (this_utf8 == TRUE) to take the next character
3336 * in the source, convert it to an unsigned value, and then convert
3337 * it back again. But the source has not been validated here. The
3338 * routine that does the conversion checks for errors like
3339 * malformed utf8 */
3340
5f66b61c
AL
3341 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3342 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3343 if (!has_utf8) {
3344 SvCUR_set(sv, d - SvPVX_const(sv));
3345 SvPOK_on(sv);
3346 *d = '\0';
77a135fe 3347 /* See Note on sizing above. */
7bf79863
KW
3348 sv_utf8_upgrade_flags_grow(sv,
3349 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3350 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3351 d = SvPVX(sv) + SvCUR(sv);
3352 has_utf8 = TRUE;
3353 } else if (need > len) {
3354 /* encoded value larger than old, may need extra space (NOTE:
3355 * SvCUR() is not set correctly here). See Note on sizing
3356 * above. */
9d4ba2ae 3357 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3358 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3359 }
77a135fe
KW
3360 s += len;
3361
5f66b61c 3362 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3363#ifdef EBCDIC
3364 if (uv > 255 && !dorange)
3365 native_range = FALSE;
3366#endif
2b9d42f0
NIS
3367 }
3368 else {
3369 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3370 }
02aa26ce
NT
3371 } /* while loop to process each character */
3372
3373 /* terminate the string and set up the sv */
79072805 3374 *d = '\0';
95a20fc0 3375 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3376 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3377 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3378
79072805 3379 SvPOK_on(sv);
9f4817db 3380 if (PL_encoding && !has_utf8) {
d0063567
DK
3381 sv_recode_to_utf8(sv, PL_encoding);
3382 if (SvUTF8(sv))
3383 has_utf8 = TRUE;
9f4817db 3384 }
2b9d42f0 3385 if (has_utf8) {
7e2040f0 3386 SvUTF8_on(sv);
2b9d42f0 3387 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3388 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3389 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3390 }
3391 }
79072805 3392
02aa26ce 3393 /* shrink the sv if we allocated more than we used */
79072805 3394 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3395 SvPV_shrink_to_cur(sv);
79072805 3396 }
02aa26ce 3397
6154021b 3398 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3399 if (s > PL_bufptr) {
eb0d8d16
NC
3400 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3401 const char *const key = PL_lex_inpat ? "qr" : "q";
3402 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3403 const char *type;
3404 STRLEN typelen;
3405
3406 if (PL_lex_inwhat == OP_TRANS) {
3407 type = "tr";
3408 typelen = 2;
3409 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3410 type = "s";
3411 typelen = 1;
3412 } else {
3413 type = "qq";
3414 typelen = 2;
3415 }
3416
3417 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3418 type, typelen);
3419 }
6154021b 3420 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3421 } else
8990e307 3422 SvREFCNT_dec(sv);
79072805
LW
3423 return s;
3424}
3425
ffb4593c
NT
3426/* S_intuit_more
3427 * Returns TRUE if there's more to the expression (e.g., a subscript),
3428 * FALSE otherwise.
ffb4593c
NT
3429 *
3430 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3431 *
3432 * ->[ and ->{ return TRUE
3433 * { and [ outside a pattern are always subscripts, so return TRUE
3434 * if we're outside a pattern and it's not { or [, then return FALSE
3435 * if we're in a pattern and the first char is a {
3436 * {4,5} (any digits around the comma) returns FALSE
3437 * if we're in a pattern and the first char is a [
3438 * [] returns FALSE
3439 * [SOMETHING] has a funky algorithm to decide whether it's a
3440 * character class or not. It has to deal with things like
3441 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3442 * anything else returns TRUE
3443 */
3444
9cbb5ea2
GS
3445/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3446
76e3520e 3447STATIC int
cea2e8a9 3448S_intuit_more(pTHX_ register char *s)
79072805 3449{
97aff369 3450 dVAR;
7918f24d
NC
3451
3452 PERL_ARGS_ASSERT_INTUIT_MORE;
3453
3280af22 3454 if (PL_lex_brackets)
79072805
LW
3455 return TRUE;
3456 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3457 return TRUE;
3458 if (*s != '{' && *s != '[')
3459 return FALSE;
3280af22 3460 if (!PL_lex_inpat)
79072805
LW
3461 return TRUE;
3462
3463 /* In a pattern, so maybe we have {n,m}. */
3464 if (*s == '{') {
3465 s++;
3466 if (!isDIGIT(*s))
3467 return TRUE;
3468 while (isDIGIT(*s))
3469 s++;
3470 if (*s == ',')
3471 s++;
3472 while (isDIGIT(*s))
3473 s++;
3474 if (*s == '}')
3475 return FALSE;
3476 return TRUE;
3477
3478 }
3479
3480 /* On the other hand, maybe we have a character class */
3481
3482 s++;
3483 if (*s == ']' || *s == '^')
3484 return FALSE;
3485 else {
ffb4593c 3486 /* this is terrifying, and it works */
79072805
LW
3487 int weight = 2; /* let's weigh the evidence */
3488 char seen[256];
f27ffc4a 3489 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3490 const char * const send = strchr(s,']');
3280af22 3491 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3492
3493 if (!send) /* has to be an expression */
3494 return TRUE;
3495
3496 Zero(seen,256,char);
3497 if (*s == '$')
3498 weight -= 3;
3499 else if (isDIGIT(*s)) {
3500 if (s[1] != ']') {
3501 if (isDIGIT(s[1]) && s[2] == ']')
3502 weight -= 10;
3503 }
3504 else
3505 weight -= 100;
3506 }
3507 for (; s < send; s++) {
3508 last_un_char = un_char;
3509 un_char = (unsigned char)*s;
3510 switch (*s) {
3511 case '@':
3512 case '&':
3513 case '$':
3514 weight -= seen[un_char] * 10;
7e2040f0 3515 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3516 int len;
8903cb82 3517 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3518 len = (int)strlen(tmpbuf);
3519 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3520 weight -= 100;
3521 else
3522 weight -= 10;
3523 }
3524 else if (*s == '$' && s[1] &&
93a17b20
LW
3525 strchr("[#!%*<>()-=",s[1])) {
3526 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3527 weight -= 10;
3528 else
3529 weight -= 1;
3530 }
3531 break;
3532 case '\\':
3533 un_char = 254;
3534 if (s[1]) {
93a17b20 3535 if (strchr("wds]",s[1]))
79072805 3536 weight += 100;
10edeb5d 3537 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3538 weight += 1;
93a17b20 3539 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3540 weight += 40;
3541 else if (isDIGIT(s[1])) {
3542 weight += 40;
3543 while (s[1] && isDIGIT(s[1]))
3544 s++;
3545 }
3546 }
3547 else
3548 weight += 100;
3549 break;
3550 case '-':
3551 if (s[1] == '\\')
3552 weight += 50;
93a17b20 3553 if (strchr("aA01! ",last_un_char))
79072805 3554 weight += 30;
93a17b20 3555 if (strchr("zZ79~",s[1]))
79072805 3556 weight += 30;
f27ffc4a
GS
3557 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3558 weight -= 5; /* cope with negative subscript */
79072805
LW
3559 break;
3560 default:
3792a11b
NC
3561 if (!isALNUM(last_un_char)
3562 && !(last_un_char == '$' || last_un_char == '@'
3563 || last_un_char == '&')
3564 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3565 char *d = tmpbuf;
3566 while (isALPHA(*s))
3567 *d++ = *s++;
3568 *d = '\0';
5458a98a 3569 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3570 weight -= 150;
3571 }
3572 if (un_char == last_un_char + 1)
3573 weight += 5;
3574 weight -= seen[un_char];
3575 break;
3576 }
3577 seen[un_char]++;
3578 }
3579 if (weight >= 0) /* probably a character class */
3580 return FALSE;
3581 }
3582
3583 return TRUE;
3584}
ffed7fef 3585
ffb4593c
NT
3586/*
3587 * S_intuit_method
3588 *
3589 * Does all the checking to disambiguate
3590 * foo bar
3591 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3592 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3593 *
3594 * First argument is the stuff after the first token, e.g. "bar".
3595 *
3596 * Not a method if bar is a filehandle.
3597 * Not a method if foo is a subroutine prototyped to take a filehandle.
3598 * Not a method if it's really "Foo $bar"
3599 * Method if it's "foo $bar"
3600 * Not a method if it's really "print foo $bar"
3601 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3602 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3603 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3604 * =>
3605 */
3606
76e3520e 3607STATIC int
62d55b22 3608S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3609{
97aff369 3610 dVAR;
a0d0e21e 3611 char *s = start + (*start == '$');
3280af22 3612 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3613 STRLEN len;
3614 GV* indirgv;
5db06880
NC
3615#ifdef PERL_MAD
3616 int soff;
3617#endif
a0d0e21e 3618
7918f24d
NC
3619 PERL_ARGS_ASSERT_INTUIT_METHOD;
3620
a0d0e21e 3621 if (gv) {
62d55b22 3622 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3623 return 0;
62d55b22
NC
3624 if (cv) {
3625 if (SvPOK(cv)) {
3626 const char *proto = SvPVX_const(cv);
3627 if (proto) {
3628 if (*proto == ';')
3629 proto++;
3630 if (*proto == '*')
3631 return 0;
3632 }
b6c543e3
IZ
3633 }
3634 } else
c35e046a 3635 gv = NULL;
a0d0e21e 3636 }
8903cb82 3637 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3638 /* start is the beginning of the possible filehandle/object,
3639 * and s is the end of it
3640 * tmpbuf is a copy of it
3641 */
3642
a0d0e21e 3643 if (*start == '$') {
3ef1310e
RGS
3644 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3645 isUPPER(*PL_tokenbuf))
a0d0e21e 3646 return 0;
5db06880
NC
3647#ifdef PERL_MAD
3648 len = start - SvPVX(PL_linestr);
3649#endif
29595ff2 3650 s = PEEKSPACE(s);
f0092767 3651#ifdef PERL_MAD
5db06880
NC
3652 start = SvPVX(PL_linestr) + len;
3653#endif
3280af22
NIS
3654 PL_bufptr = start;
3655 PL_expect = XREF;
a0d0e21e
LW
3656 return *s == '(' ? FUNCMETH : METHOD;
3657 }
5458a98a 3658 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3659 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3660 len -= 2;
3661 tmpbuf[len] = '\0';
5db06880
NC
3662#ifdef PERL_MAD
3663 soff = s - SvPVX(PL_linestr);
3664#endif
c3e0f903
GS
3665 goto bare_package;
3666 }
90e5519e 3667 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3668 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3669 return 0;
3670 /* filehandle or package name makes it a method */
da51bb9b 3671 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3672#ifdef PERL_MAD
3673 soff = s - SvPVX(PL_linestr);
3674#endif
29595ff2 3675 s = PEEKSPACE(s);
3280af22 3676 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3677 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3678 bare_package:
cd81e915 3679 start_force(PL_curforce);
9ded7720 3680 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3681 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3682 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3683 if (PL_madskills)
3684 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3685 PL_expect = XTERM;
a0d0e21e 3686 force_next(WORD);
3280af22 3687 PL_bufptr = s;
5db06880
NC
3688#ifdef PERL_MAD
3689 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3690#endif
a0d0e21e
LW
3691 return *s == '(' ? FUNCMETH : METHOD;
3692 }
3693 }
3694 return 0;
3695}
3696
16d20bd9 3697/* Encoded script support. filter_add() effectively inserts a
4e553d73 3698 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3699 * Note that the filter function only applies to the current source file
3700 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3701 *
3702 * The datasv parameter (which may be NULL) can be used to pass
3703 * private data to this instance of the filter. The filter function
3704 * can recover the SV using the FILTER_DATA macro and use it to
3705 * store private buffers and state information.
3706 *
3707 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3708 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3709 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3710 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3711 * private use must be set using malloc'd pointers.
3712 */
16d20bd9
AD
3713
3714SV *
864dbfa3 3715Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3716{
97aff369 3717 dVAR;
f4c556ac 3718 if (!funcp)
a0714e2c 3719 return NULL;
f4c556ac 3720
5486870f
DM
3721 if (!PL_parser)
3722 return NULL;
3723
3280af22
NIS
3724 if (!PL_rsfp_filters)
3725 PL_rsfp_filters = newAV();
16d20bd9 3726 if (!datasv)
561b68a9 3727 datasv = newSV(0);
862a34c6 3728 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3729 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3730 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3731 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3732 FPTR2DPTR(void *, IoANY(datasv)),
3733 SvPV_nolen(datasv)));
3280af22
NIS
3734 av_unshift(PL_rsfp_filters, 1);
3735 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3736 return(datasv);
3737}
4e553d73 3738
16d20bd9
AD
3739
3740/* Delete most recently added instance of this filter function. */
a0d0e21e 3741void
864dbfa3 3742Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3743{
97aff369 3744 dVAR;
e0c19803 3745 SV *datasv;
24801a4b 3746
7918f24d
NC
3747 PERL_ARGS_ASSERT_FILTER_DEL;
3748
33073adb 3749#ifdef DEBUGGING
55662e27
JH
3750 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3751 FPTR2DPTR(void*, funcp)));
33073adb 3752#endif
5486870f 3753 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3754 return;
3755 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3756 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3757 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 3758 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 3759 IoANY(datasv) = (void *)NULL;
3280af22 3760 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3761
16d20bd9
AD
3762 return;
3763 }
3764 /* we need to search for the correct entry and clear it */
cea2e8a9 3765 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3766}
3767
3768
1de9afcd
RGS
3769/* Invoke the idxth filter function for the current rsfp. */
3770/* maxlen 0 = read one text line */
16d20bd9 3771I32
864dbfa3 3772Perl_f