This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
fix MAD breakage caused by qw patch
[perl5.git] / toke.c
</
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
378cc40b 42
eb0d8d16
NC
43#define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45
6154021b 46#define pl_yylval (PL_parser->yylval)
d3b6f988 47
199e78b7
DM
48/* XXX temporary backwards compatibility */
49#define PL_lex_brackets (PL_parser->lex_brackets)
50#define PL_lex_brackstack (PL_parser->lex_brackstack)
51#define PL_lex_casemods (PL_parser->lex_casemods)
52#define PL_lex_casestack (PL_parser->lex_casestack)
53#define PL_lex_defer (PL_parser->lex_defer)
54#define PL_lex_dojoin (PL_parser->lex_dojoin)
55#define PL_lex_expect (PL_parser->lex_expect)
56#define PL_lex_formbrack (PL_parser->lex_formbrack)
57#define PL_lex_inpat (PL_parser->lex_inpat)
58#define PL_lex_inwhat (PL_parser->lex_inwhat)
59#define PL_lex_op (PL_parser->lex_op)
60#define PL_lex_repl (PL_parser->lex_repl)
61#define PL_lex_starts (PL_parser->lex_starts)
62#define PL_lex_stuff (PL_parser->lex_stuff)
63#define PL_multi_start (PL_parser->multi_start)
64#define PL_multi_open (PL_parser->multi_open)
65#define PL_multi_close (PL_parser->multi_close)
66#define PL_pending_ident (PL_parser->pending_ident)
67#define PL_preambled (PL_parser->preambled)
68#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 69#define PL_linestr (PL_parser->linestr)
c2598295
DM
70#define PL_expect (PL_parser->expect)
71#define PL_copline (PL_parser->copline)
f06b5848
DM
72#define PL_bufptr (PL_parser->bufptr)
73#define PL_oldbufptr (PL_parser->oldbufptr)
74#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
75#define PL_linestart (PL_parser->linestart)
76#define PL_bufend (PL_parser->bufend)
77#define PL_last_uni (PL_parser->last_uni)
78#define PL_last_lop (PL_parser->last_lop)
79#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 80#define PL_lex_state (PL_parser->lex_state)
2f9285f8 81#define PL_rsfp (PL_parser->rsfp)
5486870f 82#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
83#define PL_in_my (PL_parser->in_my)
84#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 85#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 86#define PL_multi_end (PL_parser->multi_end)
13765c85 87#define PL_error_count (PL_parser->error_count)
199e78b7
DM
88
89#ifdef PERL_MAD
90# define PL_endwhite (PL_parser->endwhite)
91# define PL_faketokens (PL_parser->faketokens)
92# define PL_lasttoke (PL_parser->lasttoke)
93# define PL_nextwhite (PL_parser->nextwhite)
94# define PL_realtokenstart (PL_parser->realtokenstart)
95# define PL_skipwhite (PL_parser->skipwhite)
96# define PL_thisclose (PL_parser->thisclose)
97# define PL_thismad (PL_parser->thismad)
98# define PL_thisopen (PL_parser->thisopen)
99# define PL_thisstuff (PL_parser->thisstuff)
100# define PL_thistoken (PL_parser->thistoken)
101# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
102# define PL_thiswhite (PL_parser->thiswhite)
103# define PL_nexttoke (PL_parser->nexttoke)
104# define PL_curforce (PL_parser->curforce)
105#else
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_nexttype (PL_parser->nexttype)
108# define PL_nextval (PL_parser->nextval)
199e78b7
DM
109#endif
110
16173588
NC
111/* This can't be done with embed.fnc, because struct yy_parser contains a
112 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
113static int
114S_pending_ident(pTHX);
199e78b7 115
0bd48802 116static const char ident_too_long[] = "Identifier too long";
8903cb82 117
29595ff2 118#ifdef PERL_MAD
29595ff2 119# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 120# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 121#else
5db06880 122# define CURMAD(slot,sv)
9ded7720 123# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
124#endif
125
9059aa12
LW
126#define XFAKEBRACK 128
127#define XENUMMASK 127
128
39e02b42
JH
129#ifdef USE_UTF8_SCRIPTS
130# define UTF (!IN_BYTES)
2b9d42f0 131#else
746b446a 132# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 133#endif
a0ed51b3 134
b1fc3636
CJ
135/* The maximum number of characters preceding the unrecognized one to display */
136#define UNRECOGNIZED_PRECEDE_COUNT 10
137
61f0cdd9 138/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
139 * 1999-02-27 mjd-perl-patch@plover.com */
140#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
141
bf4acbe4 142#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 143
ffb4593c
NT
144/* LEX_* are values for PL_lex_state, the state of the lexer.
145 * They are arranged oddly so that the guard on the switch statement
79072805
LW
146 * can get by with a single comparison (if the compiler is smart enough).
147 */
148
fb73857a
PP
149/* #define LEX_NOTPARSING 11 is done in perl.h. */
150
b6007c36
DM
151#define LEX_NORMAL 10 /* normal code (ie not within "...") */
152#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
153#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
154#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
155#define LEX_INTERPSTART 6 /* expecting the start of a $var */
156
157 /* at end of code, eg "$x" followed by: */
158#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
159#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
160
161#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
162 string or after \E, $foo, etc */
163#define LEX_INTERPCONST 2 /* NOT USED */
164#define LEX_FORMLINE 1 /* expecting a format line */
165#define LEX_KNOWNEXT 0 /* next token known; just return it */
166
79072805 167
bbf60fe6 168#ifdef DEBUGGING
27da23d5 169static const char* const lex_state_names[] = {
bbf60fe6
DM
170 "KNOWNEXT",
171 "FORMLINE",
172 "INTERPCONST",
173 "INTERPCONCAT",
174 "INTERPENDMAYBE",
175 "INTERPEND",
176 "INTERPSTART",
177 "INTERPPUSH",
178 "INTERPCASEMOD",
179 "INTERPNORMAL",
180 "NORMAL"
181};
182#endif
183
79072805
LW
184#ifdef ff_next
185#undef ff_next
d48672a2
LW
186#endif
187
79072805 188#include "keywords.h"
fe14fcc3 189
ffb4593c
NT
190/* CLINE is a macro that ensures PL_copline has a sane value */
191
ae986130
LW
192#ifdef CLINE
193#undef CLINE
194#endif
57843af0 195#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 196
5db06880 197#ifdef PERL_MAD
29595ff2
NC
198# define SKIPSPACE0(s) skipspace0(s)
199# define SKIPSPACE1(s) skipspace1(s)
200# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
201# define PEEKSPACE(s) skipspace2(s,0)
202#else
203# define SKIPSPACE0(s) skipspace(s)
204# define SKIPSPACE1(s) skipspace(s)
205# define SKIPSPACE2(s,tsv) skipspace(s)
206# define PEEKSPACE(s) skipspace(s)
207#endif
208
ffb4593c
NT
209/*
210 * Convenience functions to return different tokens and prime the
9cbb5ea2 211 * lexer for the next token. They all take an argument.
ffb4593c
NT
212 *
213 * TOKEN : generic token (used for '(', DOLSHARP, etc)
214 * OPERATOR : generic operator
215 * AOPERATOR : assignment operator
216 * PREBLOCK : beginning the block after an if, while, foreach, ...
217 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
218 * PREREF : *EXPR where EXPR is not a simple identifier
219 * TERM : expression term
220 * LOOPX : loop exiting command (goto, last, dump, etc)
221 * FTST : file test operator
222 * FUN0 : zero-argument function
2d2e263d 223 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
224 * BOop : bitwise or or xor
225 * BAop : bitwise and
226 * SHop : shift operator
227 * PWop : power operator
9cbb5ea2 228 * PMop : pattern-matching operator
ffb4593c
NT
229 * Aop : addition-level operator
230 * Mop : multiplication-level operator
231 * Eop : equality-testing operator
e5edeb50 232 * Rop : relational operator <= != gt
ffb4593c
NT
233 *
234 * Also see LOP and lop() below.
235 */
236
998054bd 237#ifdef DEBUGGING /* Serve -DT. */
704d4215 238# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 239#else
bbf60fe6 240# define REPORT(retval) (retval)
998054bd
SC
241#endif
242
bbf60fe6
DM
243#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
244#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
245#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
246#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
247#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
248#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
249#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
250#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
251#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
252#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
253#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
254#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
255#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
256#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
257#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
258#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
259#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
260#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
261#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
262#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 263
a687059c
LW
264/* This bit of chicanery makes a unary function followed by
265 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
266 * The UNIDOR macro is for unary functions that can be followed by the //
267 * operator (such as C<shift // 0>).
a687059c 268 */
376fcdbf 269#define UNI2(f,x) { \
6154021b 270 pl_yylval.ival = f; \
376fcdbf
AL
271 PL_expect = x; \
272 PL_bufptr = s; \
273 PL_last_uni = PL_oldbufptr; \
274 PL_last_lop_op = f; \
275 if (*s == '(') \
276 return REPORT( (int)FUNC1 ); \
29595ff2 277 s = PEEKSPACE(s); \
376fcdbf
AL
278 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
279 }
6f33ba73
RGS
280#define UNI(f) UNI2(f,XTERM)
281#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 282
376fcdbf 283#define UNIBRACK(f) { \
6154021b 284 pl_yylval.ival = f; \
376fcdbf
AL
285 PL_bufptr = s; \
286 PL_last_uni = PL_oldbufptr; \
287 if (*s == '(') \
288 return REPORT( (int)FUNC1 ); \
29595ff2 289 s = PEEKSPACE(s); \
376fcdbf
AL
290 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
291 }
79072805 292
9f68db38 293/* grandfather return to old style */
6154021b 294#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 295
8fa7f367
JH
296#ifdef DEBUGGING
297
6154021b 298/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
299enum token_type {
300 TOKENTYPE_NONE,
301 TOKENTYPE_IVAL,
6154021b 302 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
303 TOKENTYPE_PVAL,
304 TOKENTYPE_OPVAL,
305 TOKENTYPE_GVVAL
306};
307
6d4a66ac
NC
308static struct debug_tokens {
309 const int token;
310 enum token_type type;
311 const char *name;
312} const debug_tokens[] =
9041c2e3 313{
bbf60fe6
DM
314 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
315 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
316 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
317 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
318 { ARROW, TOKENTYPE_NONE, "ARROW" },
319 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
320 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
321 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
322 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
323 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 324 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
325 { DO, TOKENTYPE_NONE, "DO" },
326 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
327 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
328 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
329 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
330 { ELSE, TOKENTYPE_NONE, "ELSE" },
331 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
332 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
333 { FOR, TOKENTYPE_IVAL, "FOR" },
334 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
335 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
336 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
337 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
338 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
339 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 340 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
341 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
342 { IF, TOKENTYPE_IVAL, "IF" },
343 { LABEL, TOKENTYPE_PVAL, "LABEL" },
344 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
345 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
346 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
347 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
348 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
349 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
350 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
351 { MY, TOKENTYPE_IVAL, "MY" },
352 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
353 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
354 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
355 { OROP, TOKENTYPE_IVAL, "OROP" },
356 { OROR, TOKENTYPE_NONE, "OROR" },
357 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
358 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
359 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
360 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
361 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
362 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
363 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
364 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
365 { PREINC, TOKENTYPE_NONE, "PREINC" },
366 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
367 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
368 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
369 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
370 { SUB, TOKENTYPE_NONE, "SUB" },
371 { THING, TOKENTYPE_OPVAL, "THING" },
372 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
373 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
374 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
375 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
376 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
377 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 378 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
379 { WHILE, TOKENTYPE_IVAL, "WHILE" },
380 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 381 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 382 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
383};
384
6154021b 385/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 386
bbf60fe6 387STATIC int
704d4215 388S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 389{
97aff369 390 dVAR;
7918f24d
NC
391
392 PERL_ARGS_ASSERT_TOKEREPORT;
393
bbf60fe6 394 if (DEBUG_T_TEST) {
bd61b366 395 const char *name = NULL;
bbf60fe6 396 enum token_type type = TOKENTYPE_NONE;
f54cb97a 397 const struct debug_tokens *p;
396482e1 398 SV* const report = newSVpvs("<== ");
bbf60fe6 399
f54cb97a 400 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
401 if (p->token == (int)rv) {
402 name = p->name;
403 type = p->type;
404 break;
405 }
406 }
407 if (name)
54667de8 408 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
409 else if ((char)rv > ' ' && (char)rv < '~')
410 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
411 else if (!rv)
396482e1 412 sv_catpvs(report, "EOF");
bbf60fe6
DM
413 else
414 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
415 switch (type) {
416 case TOKENTYPE_NONE:
417 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
418 break;
419 case TOKENTYPE_IVAL:
704d4215 420 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
421 break;
422 case TOKENTYPE_OPNUM:
423 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 424 PL_op_name[lvalp->ival]);
bbf60fe6
DM
425 break;
426 case TOKENTYPE_PVAL:
704d4215 427 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
428 break;
429 case TOKENTYPE_OPVAL:
704d4215 430 if (lvalp->opval) {
401441c0 431 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
432 PL_op_name[lvalp->opval->op_type]);
433 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 434 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 435 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
436 }
437
438 }
401441c0 439 else
396482e1 440 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
441 break;
442 }
b6007c36 443 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
444 };
445 return (int)rv;
998054bd
SC
446}
447
b6007c36
DM
448
449/* print the buffer with suitable escapes */
450
451STATIC void
15f169a1 452S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 453{
396482e1 454 SV* const tmp = newSVpvs("");
7918f24d
NC
455
456 PERL_ARGS_ASSERT_PRINTBUF;
457
b6007c36
DM
458 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
459 SvREFCNT_dec(tmp);
460}
461
8fa7f367
JH
462#endif
463
8290c323
NC
464static int
465S_deprecate_commaless_var_list(pTHX) {
466 PL_expect = XTERM;
467 deprecate("comma-less variable list");
468 return REPORT(','); /* grandfather non-comma-format format */
469}
470
ffb4593c
NT
471/*
472 * S_ao
473 *
c963b151
BD
474 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
475 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
476 */
477
76e3520e 478STATIC int
cea2e8a9 479S_ao(pTHX_ int toketype)
a0d0e21e 480{
97aff369 481 dVAR;
3280af22
NIS
482 if (*PL_bufptr == '=') {
483 PL_bufptr++;
a0d0e21e 484 if (toketype == ANDAND)
6154021b 485 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 486 else if (toketype == OROR)
6154021b 487 pl_yylval.ival = OP_ORASSIGN;
c963b151 488 else if (toketype == DORDOR)
6154021b 489 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
490 toketype = ASSIGNOP;
491 }
492 return toketype;
493}
494
ffb4593c
NT
495/*
496 * S_no_op
497 * When Perl expects an operator and finds something else, no_op
498 * prints the warning. It always prints "<something> found where
499 * operator expected. It prints "Missing semicolon on previous line?"
500 * if the surprise occurs at the start of the line. "do you need to
501 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
502 * where the compiler doesn't know if foo is a method call or a function.
503 * It prints "Missing operator before end of line" if there's nothing
504 * after the missing operator, or "... before <...>" if there is something
505 * after the missing operator.
506 */
507
76e3520e 508STATIC void
15f169a1 509S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 510{
97aff369 511 dVAR;
9d4ba2ae
AL
512 char * const oldbp = PL_bufptr;
513 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 514
7918f24d
NC
515 PERL_ARGS_ASSERT_NO_OP;
516
1189a94a
GS
517 if (!s)
518 s = oldbp;
07c798fb 519 else
1189a94a 520 PL_bufptr = s;
cea2e8a9 521 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
522 if (ckWARN_d(WARN_SYNTAX)) {
523 if (is_first)
524 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
525 "\t(Missing semicolon on previous line?)\n");
526 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 527 const char *t;
c35e046a
AL
528 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
529 NOOP;
56da5a46
RGS
530 if (t < PL_bufptr && isSPACE(*t))
531 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
532 "\t(Do you need to predeclare %.*s?)\n",
551405c4 533 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
534 }
535 else {
536 assert(s >= oldbp);
537 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 538 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 539 }
07c798fb 540 }
3280af22 541 PL_bufptr = oldbp;
8990e307
LW
542}
543
ffb4593c
NT
544/*
545 * S_missingterm
546 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 547 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
548 * If we're in a delimited string and the delimiter is a control
549 * character, it's reformatted into a two-char sequence like ^C.
550 * This is fatal.
551 */
552
76e3520e 553STATIC void
cea2e8a9 554S_missingterm(pTHX_ char *s)
8990e307 555{
97aff369 556 dVAR;
8990e307
LW
557 char tmpbuf[3];
558 char q;
559 if (s) {
9d4ba2ae 560 char * const nl = strrchr(s,'\n');
d2719217 561 if (nl)
8990e307
LW
562 *nl = '\0';
563 }
463559e7 564 else if (isCNTRL(PL_multi_close)) {
8990e307 565 *tmpbuf = '^';
585ec06d 566 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
567 tmpbuf[2] = '\0';
568 s = tmpbuf;
569 }
570 else {
eb160463 571 *tmpbuf = (char)PL_multi_close;
8990e307
LW
572 tmpbuf[1] = '\0';
573 s = tmpbuf;
574 }
575 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 576 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 577}
79072805 578
ef89dcc3 579#define FEATURE_IS_ENABLED(name) \
0d863452 580 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 581 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 582/* The longest string we pass in. */
1863b879 583#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 584
0d863452
RH
585/*
586 * S_feature_is_enabled
587 * Check whether the named feature is enabled.
588 */
589STATIC bool
15f169a1 590S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 591{
97aff369 592 dVAR;
0d863452 593 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 594 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
595
596 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
597
4a731d7b
NC
598 assert(namelen <= MAX_FEATURE_LEN);
599 memcpy(&he_name[8], name, namelen);
d4c19fe8 600
7b9ef140 601 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
602}
603
ffb4593c 604/*
9cbb5ea2
GS
605 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
606 * utf16-to-utf8-reversed.
ffb4593c
NT
607 */
608
c39cd008
GS
609#ifdef PERL_CR_FILTER
610static void
611strip_return(SV *sv)
612{
95a20fc0 613 register const char *s = SvPVX_const(sv);
9d4ba2ae 614 register const char * const e = s + SvCUR(sv);
7918f24d
NC
615
616 PERL_ARGS_ASSERT_STRIP_RETURN;
617
c39cd008
GS
618 /* outer loop optimized to do nothing if there are no CR-LFs */
619 while (s < e) {
620 if (*s++ == '\r' && *s == '\n') {
621 /* hit a CR-LF, need to copy the rest */
622 register char *d = s - 1;
623 *d++ = *s++;
624 while (s < e) {
625 if (*s == '\r' && s[1] == '\n')
626 s++;
627 *d++ = *s++;
628 }
629 SvCUR(sv) -= s - d;
630 return;
631 }
632 }
633}
a868473f 634
76e3520e 635STATIC I32
c39cd008 636S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 637{
f54cb97a 638 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
639 if (count > 0 && !maxlen)
640 strip_return(sv);
641 return count;
a868473f
NIS
642}
643#endif
644
199e78b7
DM
645
646
ffb4593c
NT
647/*
648 * Perl_lex_start
5486870f 649 *
e3abe207 650 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
651 *
652 * rsfp is the opened file handle to read from (if any),
653 *
654 * line holds any initial content already read from the file (or in
655 * the case of no file, such as an eval, the whole contents);
656 *
657 * new_filter indicates that this is a new file and it shouldn't inherit
658 * the filters from the current parser (ie require).
ffb4593c
NT
659 */
660
a0d0e21e 661void
5486870f 662Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 663{
97aff369 664 dVAR;
6ef55633 665 const char *s = NULL;
8990e307 666 STRLEN len;
5486870f 667 yy_parser *parser, *oparser;
acdf0a21
DM
668
669 /* create and initialise a parser */
670
199e78b7 671 Newxz(parser, 1, yy_parser);
5486870f 672 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
673 PL_parser = parser;
674
28ac2b49
Z
675 parser->stack = NULL;
676 parser->ps = NULL;
677 parser->stack_size = 0;
acdf0a21 678
e3abe207
DM
679 /* on scope exit, free this parser and restore any outer one */
680 SAVEPARSER(parser);
7c4baf47 681 parser->saved_curcop = PL_curcop;
e3abe207 682
acdf0a21 683 /* initialise lexer state */
8990e307 684
fb205e7a
DM
685#ifdef PERL_MAD
686 parser->curforce = -1;
687#else
688 parser->nexttoke = 0;
689#endif
ca4cfd28 690 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 691 parser->copline = NOLINE;
5afb0a62 692 parser->lex_state = LEX_NORMAL;
c2598295 693 parser->expect = XSTATE;
2f9285f8 694 parser->rsfp = rsfp;
56b27c9a 695 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 696 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 697
199e78b7
DM
698 Newx(parser->lex_brackstack, 120, char);
699 Newx(parser->lex_casestack, 12, char);
700 *parser->lex_casestack = '\0';
02b34bbe 701
10efb74f
NC
702 if (line) {
703 s = SvPV_const(line, len);
704 } else {
705 len = 0;
706 }
bdc0bf6f 707
10efb74f 708 if (!len) {
bdc0bf6f 709 parser->linestr = newSVpvs("\n;");
3e5c0189 710 } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
719a9bb0
NC
711 /* avoid tie/overload weirdness */
712 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 713 if (s[len-1] != ';')
bdc0bf6f 714 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
715 } else {
716 SvTEMP_off(line);
717 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 718 parser->linestr = line;
8990e307 719 }
f06b5848
DM
720 parser->oldoldbufptr =
721 parser->oldbufptr =
722 parser->bufptr =
723 parser->linestart = SvPVX(parser->linestr);
724 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
725 parser->last_lop = parser->last_uni = NULL;
79072805 726}
a687059c 727
e3abe207
DM
728
729/* delete a parser object */
730
731void
732Perl_parser_free(pTHX_ const yy_parser *parser)
733{
7918f24d
NC
734 PERL_ARGS_ASSERT_PARSER_FREE;
735
7c4baf47 736 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
737 SvREFCNT_dec(parser->linestr);
738
2f9285f8
DM
739 if (parser->rsfp == PerlIO_stdin())
740 PerlIO_clearerr(parser->rsfp);
799361c3
SH
741 else if (parser->rsfp && (!parser->old_parser ||
742 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 743 PerlIO_close(parser->rsfp);
5486870f 744 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 745
e3abe207
DM
746 Safefree(parser->lex_brackstack);
747 Safefree(parser->lex_casestack);
748 PL_parser = parser->old_parser;
749 Safefree(parser);
750}
751
752
ffb4593c
NT
753/*
754 * Perl_lex_end
9cbb5ea2
GS
755 * Finalizer for lexing operations. Must be called when the parser is
756 * done with the lexer.
ffb4593c
NT
757 */
758
463ee0b2 759void
864dbfa3 760Perl_lex_end(pTHX)
463ee0b2 761{
97aff369 762 dVAR;
3280af22 763 PL_doextract = FALSE;
463ee0b2
LW
764}
765
ffb4593c 766/*
f0e67a1d
Z
767=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
768
769Buffer scalar containing the chunk currently under consideration of the
770text currently being lexed. This is always a plain string scalar (for
771which C<SvPOK> is true). It is not intended to be used as a scalar by
772normal scalar means; instead refer to the buffer directly by the pointer
773variables described below.
774
775The lexer maintains various C<char*> pointers to things in the
776C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
777reallocated, all of these pointers must be updated. Don't attempt to
778do this manually, but rather use L</lex_grow_linestr> if you need to
779reallocate the buffer.
780
781The content of the text chunk in the buffer is commonly exactly one
782complete line of input, up to and including a newline terminator,
783but there are situations where it is otherwise. The octets of the
784buffer may be intended to be interpreted as either UTF-8 or Latin-1.
785The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
786flag on this scalar, which may disagree with it.
787
788For direct examination of the buffer, the variable
789L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
790lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
791of these pointers is usually preferable to examination of the scalar
792through normal scalar means.
793
794=for apidoc AmxU|char *|PL_parser-E<gt>bufend
795
796Direct pointer to the end of the chunk of text currently being lexed, the
797end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
798+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
799always located at the end of the buffer, and does not count as part of
800the buffer's contents.
801
802=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
803
804Points to the current position of lexing inside the lexer buffer.
805Characters around this point may be freely examined, within
806the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
807L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
808interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
809
810Lexing code (whether in the Perl core or not) moves this pointer past
811the characters that it consumes. It is also expected to perform some
812bookkeeping whenever a newline character is consumed. This movement
813can be more conveniently performed by the function L</lex_read_to>,
814which handles newlines appropriately.
815
816Interpretation of the buffer's octets can be abstracted out by
817using the slightly higher-level functions L</lex_peek_unichar> and
818L</lex_read_unichar>.
819
820=for apidoc AmxU|char *|PL_parser-E<gt>linestart
821
822Points to the start of the current line inside the lexer buffer.
823This is useful for indicating at which column an error occurred, and
824not much else. This must be updated by any lexing code that consumes
825a newline; the function L</lex_read_to> handles this detail.
826
827=cut
828*/
829
830/*
831=for apidoc Amx|bool|lex_bufutf8
832
833Indicates whether the octets in the lexer buffer
834(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
835of Unicode characters. If not, they should be interpreted as Latin-1
836characters. This is analogous to the C<SvUTF8> flag for scalars.
837
838In UTF-8 mode, it is not guaranteed that the lexer buffer actually
839contains valid UTF-8. Lexing code must be robust in the face of invalid
840encoding.
841
842The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
843is significant, but not the whole story regarding the input character
844encoding. Normally, when a file is being read, the scalar contains octets
845and its C<SvUTF8> flag is off, but the octets should be interpreted as
846UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
847however, the scalar may have the C<SvUTF8> flag on, and in this case its
848octets should be interpreted as UTF-8 unless the C<use bytes> pragma
849is in effect. This logic may change in the future; use this function
850instead of implementing the logic yourself.
851
852=cut
853*/
854
855bool
856Perl_lex_bufutf8(pTHX)
857{
858 return UTF;
859}
860
861/*
862=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
863
864Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
865at least I<len> octets (including terminating NUL). Returns a
866pointer to the reallocated buffer. This is necessary before making
867any direct modification of the buffer that would increase its length.
868L</lex_stuff_pvn> provides a more convenient way to insert text into
869the buffer.
870
871Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
872this function updates all of the lexer's variables that point directly
873into the buffer.
874
875=cut
876*/
877
878char *
879Perl_lex_grow_linestr(pTHX_ STRLEN len)
880{
881 SV *linestr;
882 char *buf;
883 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
884 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
885 linestr = PL_parser->linestr;
886 buf = SvPVX(linestr);
887 if (len <= SvLEN(linestr))
888 return buf;
889 bufend_pos = PL_parser->bufend - buf;
890 bufptr_pos = PL_parser->bufptr - buf;
891 oldbufptr_pos = PL_parser->oldbufptr - buf;
892 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
893 linestart_pos = PL_parser->linestart - buf;
894 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
895 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
896 buf = sv_grow(linestr, len);
897 PL_parser->bufend = buf + bufend_pos;
898 PL_parser->bufptr = buf + bufptr_pos;
899 PL_parser->oldbufptr = buf + oldbufptr_pos;
900 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
901 PL_parser->linestart = buf + linestart_pos;
902 if (PL_parser->last_uni)
903 PL_parser->last_uni = buf + last_uni_pos;
904 if (PL_parser->last_lop)
905 PL_parser->last_lop = buf + last_lop_pos;
906 return buf;
907}
908
909/*
83aa740e 910=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
911
912Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
913immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
914reallocating the buffer if necessary. This means that lexing code that
915runs later will see the characters as if they had appeared in the input.
916It is not recommended to do this as part of normal parsing, and most
917uses of this facility run the risk of the inserted characters being
918interpreted in an unintended manner.
919
920The string to be inserted is represented by I<len> octets starting
921at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
922according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
923The characters are recoded for the lexer buffer, according to how the
924buffer is currently being interpreted (L</lex_bufutf8>). If a string
925to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
926function is more convenient.
927
928=cut
929*/
930
931void
83aa740e 932Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 933{
749123ff 934 dVAR;
f0e67a1d
Z
935 char *bufptr;
936 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
937 if (flags & ~(LEX_STUFF_UTF8))
938 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
939 if (UTF) {
940 if (flags & LEX_STUFF_UTF8) {
941 goto plain_copy;
942 } else {
943 STRLEN highhalf = 0;
83aa740e 944 const char *p, *e = pv+len;
f0e67a1d
Z
945 for (p = pv; p != e; p++)
946 highhalf += !!(((U8)*p) & 0x80);
947 if (!highhalf)
948 goto plain_copy;
949 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
950 bufptr = PL_parser->bufptr;
951 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
952 SvCUR_set(PL_parser->linestr,
953 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
954 PL_parser->bufend += len+highhalf;
955 for (p = pv; p != e; p++) {
956 U8 c = (U8)*p;
957 if (c & 0x80) {
958 *bufptr++ = (char)(0xc0 | (c >> 6));
959 *bufptr++ = (char)(0x80 | (c & 0x3f));
960 } else {
961 *bufptr++ = (char)c;
962 }
963 }
964 }
965 } else {
966 if (flags & LEX_STUFF_UTF8) {
967 STRLEN highhalf = 0;
83aa740e 968 const char *p, *e = pv+len;
f0e67a1d
Z
969 for (p = pv; p != e; p++) {
970 U8 c = (U8)*p;
971 if (c >= 0xc4) {
972 Perl_croak(aTHX_ "Lexing code attempted to stuff "
973 "non-Latin-1 character into Latin-1 input");
974 } else if (c >= 0xc2 && p+1 != e &&
975 (((U8)p[1]) & 0xc0) == 0x80) {
976 p++;
977 highhalf++;
978 } else if (c >= 0x80) {
979 /* malformed UTF-8 */
980 ENTER;
981 SAVESPTR(PL_warnhook);
982 PL_warnhook = PERL_WARNHOOK_FATAL;
983 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
984 LEAVE;
985 }
986 }
987 if (!highhalf)
988 goto plain_copy;
989 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
990 bufptr = PL_parser->bufptr;
991 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
992 SvCUR_set(PL_parser->linestr,
993 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
994 PL_parser->bufend += len-highhalf;
995 for (p = pv; p != e; p++) {
996 U8 c = (U8)*p;
997 if (c & 0x80) {
998 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
999 p++;
1000 } else {
1001 *bufptr++ = (char)c;
1002 }
1003 }
1004 } else {
1005 plain_copy:
1006 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1007 bufptr = PL_parser->bufptr;
1008 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1009 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1010 PL_parser->bufend += len;
1011 Copy(pv, bufptr, len, char);
1012 }
1013 }
1014}
1015
1016/*
1017=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1018
1019Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1020immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1021reallocating the buffer if necessary. This means that lexing code that
1022runs later will see the characters as if they had appeared in the input.
1023It is not recommended to do this as part of normal parsing, and most
1024uses of this facility run the risk of the inserted characters being
1025interpreted in an unintended manner.
1026
1027The string to be inserted is the string value of I<sv>. The characters
1028are recoded for the lexer buffer, according to how the buffer is currently
1029being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1030not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1031need to construct a scalar.
1032
1033=cut
1034*/
1035
1036void
1037Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1038{
1039 char *pv;
1040 STRLEN len;
1041 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1042 if (flags)
1043 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1044 pv = SvPV(sv, len);
1045 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1046}
1047
1048/*
1049=for apidoc Amx|void|lex_unstuff|char *ptr
1050
1051Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1052I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1053This hides the discarded text from any lexing code that runs later,
1054as if the text had never appeared.
1055
1056This is not the normal way to consume lexed text. For that, use
1057L</lex_read_to>.
1058
1059=cut
1060*/
1061
1062void
1063Perl_lex_unstuff(pTHX_ char *ptr)
1064{
1065 char *buf, *bufend;
1066 STRLEN unstuff_len;
1067 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1068 buf = PL_parser->bufptr;
1069 if (ptr < buf)
1070 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1071 if (ptr == buf)
1072 return;
1073 bufend = PL_parser->bufend;
1074 if (ptr > bufend)
1075 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1076 unstuff_len = ptr - buf;
1077 Move(ptr, buf, bufend+1-ptr, char);
1078 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1079 PL_parser->bufend = bufend - unstuff_len;
1080}
1081
1082/*
1083=for apidoc Amx|void|lex_read_to|char *ptr
1084
1085Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1086to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1087performing the correct bookkeeping whenever a newline character is passed.
1088This is the normal way to consume lexed text.
1089
1090Interpretation of the buffer's octets can be abstracted out by
1091using the slightly higher-level functions L</lex_peek_unichar> and
1092L</lex_read_unichar>.
1093
1094=cut
1095*/
1096
1097void
1098Perl_lex_read_to(pTHX_ char *ptr)
1099{
1100 char *s;
1101 PERL_ARGS_ASSERT_LEX_READ_TO;
1102 s = PL_parser->bufptr;
1103 if (ptr < s || ptr > PL_parser->bufend)
1104 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1105 for (; s != ptr; s++)
1106 if (*s == '\n') {
1107 CopLINE_inc(PL_curcop);
1108 PL_parser->linestart = s+1;
1109 }
1110 PL_parser->bufptr = ptr;
1111}
1112
1113/*
1114=for apidoc Amx|void|lex_discard_to|char *ptr
1115
1116Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1117up to I<ptr>. The remaining content of the buffer will be moved, and
1118all pointers into the buffer updated appropriately. I<ptr> must not
1119be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1120it is not permitted to discard text that has yet to be lexed.
1121
1122Normally it is not necessarily to do this directly, because it suffices to
1123use the implicit discarding behaviour of L</lex_next_chunk> and things
1124based on it. However, if a token stretches across multiple lines,
1f317c95 1125and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1126that purpose, then after completion of the token it would be wise to
1127explicitly discard the now-unneeded earlier lines, to avoid future
1128multi-line tokens growing the buffer without bound.
1129
1130=cut
1131*/
1132
1133void
1134Perl_lex_discard_to(pTHX_ char *ptr)
1135{
1136 char *buf;
1137 STRLEN discard_len;
1138 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1139 buf = SvPVX(PL_parser->linestr);
1140 if (ptr < buf)
1141 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1142 if (ptr == buf)
1143 return;
1144 if (ptr > PL_parser->bufptr)
1145 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1146 discard_len = ptr - buf;
1147 if (PL_parser->oldbufptr < ptr)
1148 PL_parser->oldbufptr = ptr;
1149 if (PL_parser->oldoldbufptr < ptr)
1150 PL_parser->oldoldbufptr = ptr;
1151 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1152 PL_parser->last_uni = NULL;
1153 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1154 PL_parser->last_lop = NULL;
1155 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1156 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1157 PL_parser->bufend -= discard_len;
1158 PL_parser->bufptr -= discard_len;
1159 PL_parser->oldbufptr -= discard_len;
1160 PL_parser->oldoldbufptr -= discard_len;
1161 if (PL_parser->last_uni)
1162 PL_parser->last_uni -= discard_len;
1163 if (PL_parser->last_lop)
1164 PL_parser->last_lop -= discard_len;
1165}
1166
1167/*
1168=for apidoc Amx|bool|lex_next_chunk|U32 flags
1169
1170Reads in the next chunk of text to be lexed, appending it to
1171L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1172looked to the end of the current chunk and wants to know more. It is
1173usual, but not necessary, for lexing to have consumed the entirety of
1174the current chunk at this time.
1175
1176If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1177chunk (i.e., the current chunk has been entirely consumed), normally the
1178current chunk will be discarded at the same time that the new chunk is
1179read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1180will not be discarded. If the current chunk has not been entirely
1181consumed, then it will not be discarded regardless of the flag.
1182
1183Returns true if some new text was added to the buffer, or false if the
1184buffer has reached the end of the input text.
1185
1186=cut
1187*/
1188
1189#define LEX_FAKE_EOF 0x80000000
1190
1191bool
1192Perl_lex_next_chunk(pTHX_ U32 flags)
1193{
1194 SV *linestr;
1195 char *buf;
1196 STRLEN old_bufend_pos, new_bufend_pos;
1197 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1198 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1199 bool got_some_for_debugger = 0;
f0e67a1d
Z
1200 bool got_some;
1201 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1202 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1203 linestr = PL_parser->linestr;
1204 buf = SvPVX(linestr);
1205 if (!(flags & LEX_KEEP_PREVIOUS) &&
1206 PL_parser->bufptr == PL_parser->bufend) {
1207 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1208 linestart_pos = 0;
1209 if (PL_parser->last_uni != PL_parser->bufend)
1210 PL_parser->last_uni = NULL;
1211 if (PL_parser->last_lop != PL_parser->bufend)
1212 PL_parser->last_lop = NULL;
1213 last_uni_pos = last_lop_pos = 0;
1214 *buf = 0;
1215 SvCUR(linestr) = 0;
1216 } else {
1217 old_bufend_pos = PL_parser->bufend - buf;
1218 bufptr_pos = PL_parser->bufptr - buf;
1219 oldbufptr_pos = PL_parser->oldbufptr - buf;
1220 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1221 linestart_pos = PL_parser->linestart - buf;
1222 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1223 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1224 }
1225 if (flags & LEX_FAKE_EOF) {
1226 goto eof;
1227 } else if (!PL_parser->rsfp) {
1228 got_some = 0;
1229 } else if (filter_gets(linestr, old_bufend_pos)) {
1230 got_some = 1;
17cc9359 1231 got_some_for_debugger = 1;
f0e67a1d 1232 } else {
580561a3
Z
1233 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1234 sv_setpvs(linestr, "");
f0e67a1d
Z
1235 eof:
1236 /* End of real input. Close filehandle (unless it was STDIN),
1237 * then add implicit termination.
1238 */
1239 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1240 PerlIO_clearerr(PL_parser->rsfp);
1241 else if (PL_parser->rsfp)
1242 (void)PerlIO_close(PL_parser->rsfp);
1243 PL_parser->rsfp = NULL;
1244 PL_doextract = FALSE;
1245#ifdef PERL_MAD
1246 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1247 PL_faketokens = 1;
1248#endif
1249 if (!PL_in_eval && PL_minus_p) {
1250 sv_catpvs(linestr,
1251 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1252 PL_minus_n = PL_minus_p = 0;
1253 } else if (!PL_in_eval && PL_minus_n) {
1254 sv_catpvs(linestr, /*{*/";}");
1255 PL_minus_n = 0;
1256 } else
1257 sv_catpvs(linestr, ";");
1258 got_some = 1;
1259 }
1260 buf = SvPVX(linestr);
1261 new_bufend_pos = SvCUR(linestr);
1262 PL_parser->bufend = buf + new_bufend_pos;
1263 PL_parser->bufptr = buf + bufptr_pos;
1264 PL_parser->oldbufptr = buf + oldbufptr_pos;
1265 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1266 PL_parser->linestart = buf + linestart_pos;
1267 if (PL_parser->last_uni)
1268 PL_parser->last_uni = buf + last_uni_pos;
1269 if (PL_parser->last_lop)
1270 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1271 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1272 PL_curstash != PL_debstash) {
1273 /* debugger active and we're not compiling the debugger code,
1274 * so store the line into the debugger's array of lines
1275 */
1276 update_debugger_info(NULL, buf+old_bufend_pos,
1277 new_bufend_pos-old_bufend_pos);
1278 }
1279 return got_some;
1280}
1281
1282/*
1283=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1284
1285Looks ahead one (Unicode) character in the text currently being lexed.
1286Returns the codepoint (unsigned integer value) of the next character,
1287or -1 if lexing has reached the end of the input text. To consume the
1288peeked character, use L</lex_read_unichar>.
1289
1290If the next character is in (or extends into) the next chunk of input
1291text, the next chunk will be read in. Normally the current chunk will be
1292discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1293then the current chunk will not be discarded.
1294
1295If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1296is encountered, an exception is generated.
1297
1298=cut
1299*/
1300
1301I32
1302Perl_lex_peek_unichar(pTHX_ U32 flags)
1303{
749123ff 1304 dVAR;
f0e67a1d
Z
1305 char *s, *bufend;
1306 if (flags & ~(LEX_KEEP_PREVIOUS))
1307 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1308 s = PL_parser->bufptr;
1309 bufend = PL_parser->bufend;
1310 if (UTF) {
1311 U8 head;
1312 I32 unichar;
1313 STRLEN len, retlen;
1314 if (s == bufend) {
1315 if (!lex_next_chunk(flags))
1316 return -1;
1317 s = PL_parser->bufptr;
1318 bufend = PL_parser->bufend;
1319 }
1320 head = (U8)*s;
1321 if (!(head & 0x80))
1322 return head;
1323 if (head & 0x40) {
1324 len = PL_utf8skip[head];
1325 while ((STRLEN)(bufend-s) < len) {
1326 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1327 break;
1328 s = PL_parser->bufptr;
1329 bufend = PL_parser->bufend;
1330 }
1331 }
1332 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1333 if (retlen == (STRLEN)-1) {
1334 /* malformed UTF-8 */
1335 ENTER;
1336 SAVESPTR(PL_warnhook);
1337 PL_warnhook = PERL_WARNHOOK_FATAL;
1338 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1339 LEAVE;
1340 }
1341 return unichar;
1342 } else {
1343 if (s == bufend) {
1344 if (!lex_next_chunk(flags))
1345 return -1;
1346 s = PL_parser->bufptr;
1347 }
1348 return (U8)*s;
1349 }
1350}
1351
1352/*
1353=for apidoc Amx|I32|lex_read_unichar|U32 flags
1354
1355Reads the next (Unicode) character in the text currently being lexed.
1356Returns the codepoint (unsigned integer value) of the character read,
1357and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1358if lexing has reached the end of the input text. To non-destructively
1359examine the next character, use L</lex_peek_unichar> instead.
1360
1361If the next character is in (or extends into) the next chunk of input
1362text, the next chunk will be read in. Normally the current chunk will be
1363discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1364then the current chunk will not be discarded.
1365
1366If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1367is encountered, an exception is generated.
1368
1369=cut
1370*/
1371
1372I32
1373Perl_lex_read_unichar(pTHX_ U32 flags)
1374{
1375 I32 c;
1376 if (flags & ~(LEX_KEEP_PREVIOUS))
1377 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1378 c = lex_peek_unichar(flags);
1379 if (c != -1) {
1380 if (c == '\n')
1381 CopLINE_inc(PL_curcop);
1382 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1383 }
1384 return c;
1385}
1386
1387/*
1388=for apidoc Amx|void|lex_read_space|U32 flags
1389
1390Reads optional spaces, in Perl style, in the text currently being
1391lexed. The spaces may include ordinary whitespace characters and
1392Perl-style comments. C<#line> directives are processed if encountered.
1393L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1394at a non-space character (or the end of the input text).
1395
1396If spaces extend into the next chunk of input text, the next chunk will
1397be read in. Normally the current chunk will be discarded at the same
1398time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1399chunk will not be discarded.
1400
1401=cut
1402*/
1403
f0998909
Z
1404#define LEX_NO_NEXT_CHUNK 0x80000000
1405
f0e67a1d
Z
1406void
1407Perl_lex_read_space(pTHX_ U32 flags)
1408{
1409 char *s, *bufend;
1410 bool need_incline = 0;
f0998909 1411 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1412 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1413#ifdef PERL_MAD
1414 if (PL_skipwhite) {
1415 sv_free(PL_skipwhite);
1416 PL_skipwhite = NULL;
1417 }
1418 if (PL_madskills)
1419 PL_skipwhite = newSVpvs("");
1420#endif /* PERL_MAD */
1421 s = PL_parser->bufptr;
1422 bufend = PL_parser->bufend;
1423 while (1) {
1424 char c = *s;
1425 if (c == '#') {
1426 do {
1427 c = *++s;
1428 } while (!(c == '\n' || (c == 0 && s == bufend)));
1429 } else if (c == '\n') {
1430 s++;
1431 PL_parser->linestart = s;
1432 if (s == bufend)
1433 need_incline = 1;
1434 else
1435 incline(s);
1436 } else if (isSPACE(c)) {
1437 s++;
1438 } else if (c == 0 && s == bufend) {
1439 bool got_more;
1440#ifdef PERL_MAD
1441 if (PL_madskills)
1442 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1443#endif /* PERL_MAD */
f0998909
Z
1444 if (flags & LEX_NO_NEXT_CHUNK)
1445 break;
f0e67a1d
Z
1446 PL_parser->bufptr = s;
1447 CopLINE_inc(PL_curcop);
1448 got_more = lex_next_chunk(flags);
1449 CopLINE_dec(PL_curcop);
1450 s = PL_parser->bufptr;
1451 bufend = PL_parser->bufend;
1452 if (!got_more)
1453 break;
1454 if (need_incline && PL_parser->rsfp) {
1455 incline(s);
1456 need_incline = 0;
1457 }
1458 } else {
1459 break;
1460 }
1461 }
1462#ifdef PERL_MAD
1463 if (PL_madskills)
1464 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1465#endif /* PERL_MAD */
1466 PL_parser->bufptr = s;
1467}
1468
1469/*
ffb4593c
NT
1470 * S_incline
1471 * This subroutine has nothing to do with tilting, whether at windmills
1472 * or pinball tables. Its name is short for "increment line". It
57843af0 1473 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1474 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1475 * # line 500 "foo.pm"
1476 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1477 */
1478
76e3520e 1479STATIC void
d9095cec 1480S_incline(pTHX_ const char *s)
463ee0b2 1481{
97aff369 1482 dVAR;
d9095cec
NC
1483 const char *t;
1484 const char *n;
1485 const char *e;
463ee0b2 1486
7918f24d
NC
1487 PERL_ARGS_ASSERT_INCLINE;
1488
57843af0 1489 CopLINE_inc(PL_curcop);
463ee0b2
LW
1490 if (*s++ != '#')
1491 return;
d4c19fe8
AL
1492 while (SPACE_OR_TAB(*s))
1493 s++;
73659bf1
GS
1494 if (strnEQ(s, "line", 4))
1495 s += 4;
1496 else
1497 return;
084592ab 1498 if (SPACE_OR_TAB(*s))
73659bf1 1499 s++;
4e553d73 1500 else
73659bf1 1501 return;
d4c19fe8
AL
1502 while (SPACE_OR_TAB(*s))
1503 s++;
463ee0b2
LW
1504 if (!isDIGIT(*s))
1505 return;
d4c19fe8 1506
463ee0b2
LW
1507 n = s;
1508 while (isDIGIT(*s))
1509 s++;
07714eb4 1510 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1511 return;
bf4acbe4 1512 while (SPACE_OR_TAB(*s))
463ee0b2 1513 s++;
73659bf1 1514 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1515 s++;
73659bf1
GS
1516 e = t + 1;
1517 }
463ee0b2 1518 else {
c35e046a
AL
1519 t = s;
1520 while (!isSPACE(*t))
1521 t++;
73659bf1 1522 e = t;
463ee0b2 1523 }
bf4acbe4 1524 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1525 e++;
1526 if (*e != '\n' && *e != '\0')
1527 return; /* false alarm */
1528
f4dd75d9 1529 if (t - s > 0) {
d9095cec 1530 const STRLEN len = t - s;
8a5ee598 1531#ifndef USE_ITHREADS
19bad673
NC
1532 SV *const temp_sv = CopFILESV(PL_curcop);
1533 const char *cf;
1534 STRLEN tmplen;
1535
1536 if (temp_sv) {
1537 cf = SvPVX(temp_sv);
1538 tmplen = SvCUR(temp_sv);
1539 } else {
1540 cf = NULL;
1541 tmplen = 0;
1542 }
1543
42d9b98d 1544 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1545 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1546 * to *{"::_<newfilename"} */
44867030
NC
1547 /* However, the long form of evals is only turned on by the
1548 debugger - usually they're "(eval %lu)" */
1549 char smallbuf[128];
1550 char *tmpbuf;
1551 GV **gvp;
d9095cec 1552 STRLEN tmplen2 = len;
798b63bc 1553 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1554 tmpbuf = smallbuf;
1555 else
2ae0db35 1556 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1557 tmpbuf[0] = '_';
1558 tmpbuf[1] = '<';
2ae0db35 1559 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1560 tmplen += 2;
8a5ee598
RGS
1561 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1562 if (gvp) {
44867030
NC
1563 char *tmpbuf2;
1564 GV *gv2;
1565
1566 if (tmplen2 + 2 <= sizeof smallbuf)
1567 tmpbuf2 = smallbuf;
1568 else
1569 Newx(tmpbuf2, tmplen2 + 2, char);
1570
1571 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1572 /* Either they malloc'd it, or we malloc'd it,
1573 so no prefix is present in ours. */
1574 tmpbuf2[0] = '_';
1575 tmpbuf2[1] = '<';
1576 }
1577
1578 memcpy(tmpbuf2 + 2, s, tmplen2);
1579 tmplen2 += 2;
1580
8a5ee598 1581 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1582 if (!isGV(gv2)) {
8a5ee598 1583 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1584 /* adjust ${"::_<newfilename"} to store the new file name */
1585 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1586 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1587 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1588 }
44867030
NC
1589
1590 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1591 }
e66cf94c 1592 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1593 }
8a5ee598 1594#endif
05ec9bb3 1595 CopFILE_free(PL_curcop);
d9095cec 1596 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1597 }
57843af0 1598 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1599}
1600
29595ff2 1601#ifdef PERL_MAD
cd81e915 1602/* skip space before PL_thistoken */
29595ff2
NC
1603
1604STATIC char *
1605S_skipspace0(pTHX_ register char *s)
1606{
7918f24d
NC
1607 PERL_ARGS_ASSERT_SKIPSPACE0;
1608
29595ff2
NC
1609 s = skipspace(s);
1610 if (!PL_madskills)
1611 return s;
cd81e915
NC
1612 if (PL_skipwhite) {
1613 if (!PL_thiswhite)
6b29d1f5 1614 PL_thiswhite = newSVpvs("");
cd81e915
NC
1615 sv_catsv(PL_thiswhite, PL_skipwhite);
1616 sv_free(PL_skipwhite);
1617 PL_skipwhite = 0;
1618 }
1619 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1620 return s;
1621}
1622
cd81e915 1623/* skip space after PL_thistoken */
29595ff2
NC
1624
1625STATIC char *
1626S_skipspace1(pTHX_ register char *s)
1627{
d4c19fe8 1628 const char *start = s;
29595ff2
NC
1629 I32 startoff = start - SvPVX(PL_linestr);
1630
7918f24d
NC
1631 PERL_ARGS_ASSERT_SKIPSPACE1;
1632
29595ff2
NC
1633 s = skipspace(s);
1634 if (!PL_madskills)
1635 return s;
1636 start = SvPVX(PL_linestr) + startoff;
cd81e915 1637 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1638 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1639 PL_thistoken = newSVpvn(tstart, start - tstart);
1640 }
1641 PL_realtokenstart = -1;
1642 if (PL_skipwhite) {
1643 if (!PL_nextwhite)
6b29d1f5 1644 PL_nextwhite = newSVpvs("");
cd81e915
NC
1645 sv_catsv(PL_nextwhite, PL_skipwhite);
1646 sv_free(PL_skipwhite);
1647 PL_skipwhite = 0;
29595ff2
NC
1648 }
1649 return s;
1650}
1651
1652STATIC char *
1653S_skipspace2(pTHX_ register char *s, SV **svp)
1654{
c35e046a
AL
1655 char *start;
1656 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1657 const I32 startoff = s - SvPVX(PL_linestr);
1658
7918f24d
NC
1659 PERL_ARGS_ASSERT_SKIPSPACE2;
1660
29595ff2
NC
1661 s = skipspace(s);
1662 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1663 if (!PL_madskills || !svp)
1664 return s;
1665 start = SvPVX(PL_linestr) + startoff;
cd81e915 1666 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1667 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1668 PL_thistoken = newSVpvn(tstart, start - tstart);
1669 PL_realtokenstart = -1;
29595ff2 1670 }
cd81e915 1671 if (PL_skipwhite) {
29595ff2 1672 if (!*svp)
6b29d1f5 1673 *svp = newSVpvs("");
cd81e915
NC
1674 sv_setsv(*svp, PL_skipwhite);
1675 sv_free(PL_skipwhite);
1676 PL_skipwhite = 0;
29595ff2
NC
1677 }
1678
1679 return s;
1680}
1681#endif
1682
80a702cd 1683STATIC void
15f169a1 1684S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1685{
1686 AV *av = CopFILEAVx(PL_curcop);
1687 if (av) {
b9f83d2f 1688 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1689 if (orig_sv)
1690 sv_setsv(sv, orig_sv);
1691 else
1692 sv_setpvn(sv, buf, len);
80a702cd
RGS
1693 (void)SvIOK_on(sv);
1694 SvIV_set(sv, 0);
1695 av_store(av, (I32)CopLINE(PL_curcop), sv);
1696 }
1697}
1698
ffb4593c
NT
1699/*
1700 * S_skipspace
1701 * Called to gobble the appropriate amount and type of whitespace.
1702 * Skips comments as well.
1703 */
1704
76e3520e 1705STATIC char *
cea2e8a9 1706S_skipspace(pTHX_ register char *s)
a687059c 1707{
5db06880 1708#ifdef PERL_MAD
f0e67a1d
Z
1709 char *start = s;
1710#endif /* PERL_MAD */
7918f24d 1711 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1712#ifdef PERL_MAD
cd81e915
NC
1713 if (PL_skipwhite) {
1714 sv_free(PL_skipwhite);
f0e67a1d 1715 PL_skipwhite = NULL;
5db06880 1716 }
f0e67a1d 1717#endif /* PERL_MAD */
3280af22 1718 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1719 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1720 s++;
f0e67a1d
Z
1721 } else {
1722 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1723 PL_bufptr = s;
f0998909
Z
1724 lex_read_space(LEX_KEEP_PREVIOUS |
1725 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1726 LEX_NO_NEXT_CHUNK : 0));
3280af22 1727 s = PL_bufptr;
f0e67a1d
Z
1728 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1729 if (PL_linestart > PL_bufptr)
1730 PL_bufptr = PL_linestart;
1731 return s;
463ee0b2 1732 }
5db06880 1733#ifdef PERL_MAD
f0e67a1d
Z
1734 if (PL_madskills)
1735 PL_skipwhite = newSVpvn(start, s-start);
1736#endif /* PERL_MAD */
5db06880 1737 return s;
a687059c 1738}
378cc40b 1739
ffb4593c
NT
1740/*
1741 * S_check_uni
1742 * Check the unary operators to ensure there's no ambiguity in how they're
1743 * used. An ambiguous piece of code would be:
1744 * rand + 5
1745 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1746 * the +5 is its argument.
1747 */
1748
76e3520e 1749STATIC void
cea2e8a9 1750S_check_uni(pTHX)
ba106d47 1751{
97aff369 1752 dVAR;
d4c19fe8
AL
1753 const char *s;
1754 const char *t;
2f3197b3 1755
3280af22 1756 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1757 return;
3280af22
NIS
1758 while (isSPACE(*PL_last_uni))
1759 PL_last_uni++;
c35e046a
AL
1760 s = PL_last_uni;
1761 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1762 s++;
3280af22 1763 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1764 return;
6136c704 1765
9b387841
NC
1766 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1767 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1768 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1769}
1770
ffb4593c
NT
1771/*
1772 * LOP : macro to build a list operator. Its behaviour has been replaced
1773 * with a subroutine, S_lop() for which LOP is just another name.
1774 */
1775
a0d0e21e
LW
1776#define LOP(f,x) return lop(f,x,s)
1777
ffb4593c
NT
1778/*
1779 * S_lop
1780 * Build a list operator (or something that might be one). The rules:
1781 * - if we have a next token, then it's a list operator [why?]
1782 * - if the next thing is an opening paren, then it's a function
1783 * - else it's a list operator
1784 */
1785
76e3520e 1786STATIC I32
a0be28da 1787S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1788{
97aff369 1789 dVAR;
7918f24d
NC
1790
1791 PERL_ARGS_ASSERT_LOP;
1792
6154021b 1793 pl_yylval.ival = f;
35c8bce7 1794 CLINE;
3280af22
NIS
1795 PL_expect = x;
1796 PL_bufptr = s;
1797 PL_last_lop = PL_oldbufptr;
eb160463 1798 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1799#ifdef PERL_MAD
1800 if (PL_lasttoke)
1801 return REPORT(LSTOP);
1802#else
3280af22 1803 if (PL_nexttoke)
bbf60fe6 1804 return REPORT(LSTOP);
5db06880 1805#endif
79072805 1806 if (*s == '(')
bbf60fe6 1807 return REPORT(FUNC);
29595ff2 1808 s = PEEKSPACE(s);
79072805 1809 if (*s == '(')
bbf60fe6 1810 return REPORT(FUNC);
79072805 1811 else
bbf60fe6 1812 return REPORT(LSTOP);
79072805
LW
1813}
1814
5db06880
NC
1815#ifdef PERL_MAD
1816 /*
1817 * S_start_force
1818 * Sets up for an eventual force_next(). start_force(0) basically does
1819 * an unshift, while start_force(-1) does a push. yylex removes items
1820 * on the "pop" end.
1821 */
1822
1823STATIC void
1824S_start_force(pTHX_ int where)
1825{
1826 int i;
1827
cd81e915 1828 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1829 where = PL_lasttoke;
cd81e915
NC
1830 assert(PL_curforce < 0 || PL_curforce == where);
1831 if (PL_curforce != where) {
5db06880
NC
1832 for (i = PL_lasttoke; i > where; --i) {
1833 PL_nexttoke[i] = PL_nexttoke[i-1];
1834 }
1835 PL_lasttoke++;
1836 }
cd81e915 1837 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1838 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1839 PL_curforce = where;
1840 if (PL_nextwhite) {
5db06880 1841 if (PL_madskills)
6b29d1f5 1842 curmad('^', newSVpvs(""));
cd81e915 1843 CURMAD('_', PL_nextwhite);
5db06880
NC
1844 }
1845}
1846
1847STATIC void
1848S_curmad(pTHX_ char slot, SV *sv)
1849{
1850 MADPROP **where;
1851
1852 if (!sv)
1853 return;
cd81e915
NC
1854 if (PL_curforce < 0)
1855 where = &PL_thismad;
5db06880 1856 else
cd81e915 1857 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1858
cd81e915 1859 if (PL_faketokens)
76f68e9b 1860 sv_setpvs(sv, "");
5db06880
NC
1861 else {
1862 if (!IN_BYTES) {
1863 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1864 SvUTF8_on(sv);
1865 else if (PL_encoding) {
1866 sv_recode_to_utf8(sv, PL_encoding);
1867 }
1868 }
1869 }
1870
1871 /* keep a slot open for the head of the list? */
1872 if (slot != '_' && *where && (*where)->mad_key == '^') {
1873 (*where)->mad_key = slot;
daba3364 1874 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1875 (*where)->mad_val = (void*)sv;
1876 }
1877 else
1878 addmad(newMADsv(slot, sv), where, 0);
1879}
1880#else
b3f24c00
MHM
1881# define start_force(where) NOOP
1882# define curmad(slot, sv) NOOP
5db06880
NC
1883#endif
1884
ffb4593c
NT
1885/*
1886 * S_force_next
9cbb5ea2 1887 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1888 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1889 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1890 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1891 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1892 */
1893
4e553d73 1894STATIC void
cea2e8a9 1895S_force_next(pTHX_ I32 type)
79072805 1896{
97aff369 1897 dVAR;
704d4215
GG
1898#ifdef DEBUGGING
1899 if (DEBUG_T_TEST) {
1900 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1901 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1902 }
1903#endif
5db06880 1904#ifdef PERL_MAD
cd81e915 1905 if (PL_curforce < 0)
5db06880 1906 start_force(PL_lasttoke);
cd81e915 1907 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1908 if (PL_lex_state != LEX_KNOWNEXT)
1909 PL_lex_defer = PL_lex_state;
1910 PL_lex_state = LEX_KNOWNEXT;
1911 PL_lex_expect = PL_expect;
cd81e915 1912 PL_curforce = -1;
5db06880 1913#else
3280af22
NIS
1914 PL_nexttype[PL_nexttoke] = type;
1915 PL_nexttoke++;
1916 if (PL_lex_state != LEX_KNOWNEXT) {
1917 PL_lex_defer = PL_lex_state;
1918 PL_lex_expect = PL_expect;
1919 PL_lex_state = LEX_KNOWNEXT;
79072805 1920 }
5db06880 1921#endif
79072805
LW
1922}
1923
28ac2b49
Z
1924void
1925Perl_yyunlex(pTHX)
1926{
1927 if (PL_parser->yychar != YYEMPTY) {
1928 start_force(-1);
1929 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1930 force_next(PL_parser->yychar);
1931 PL_parser->yychar = YYEMPTY;
1932 }
1933}
1934
d0a148a6 1935STATIC SV *
15f169a1 1936S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1937{
97aff369 1938 dVAR;
740cce10 1939 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1940 !IN_BYTES
1941 && UTF
1942 && !is_ascii_string((const U8*)start, len)
740cce10 1943 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1944 return sv;
1945}
1946
ffb4593c
NT
1947/*
1948 * S_force_word
1949 * When the lexer knows the next thing is a word (for instance, it has
1950 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1951 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1952 * lookahead.
ffb4593c
NT
1953 *
1954 * Arguments:
b1b65b59 1955 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1956 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1957 * int check_keyword : if true, Perl checks to make sure the word isn't
1958 * a keyword (do this if the word is a label, e.g. goto FOO)
1959 * int allow_pack : if true, : characters will also be allowed (require,
1960 * use, etc. do this)
9cbb5ea2 1961 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1962 */
1963
76e3520e 1964STATIC char *
cea2e8a9 1965S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1966{
97aff369 1967 dVAR;
463ee0b2
LW
1968 register char *s;
1969 STRLEN len;
4e553d73 1970
7918f24d
NC
1971 PERL_ARGS_ASSERT_FORCE_WORD;
1972
29595ff2 1973 start = SKIPSPACE1(start);
463ee0b2 1974 s = start;
7e2040f0 1975 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1976 (allow_pack && *s == ':') ||
15f0808c 1977 (allow_initial_tick && *s == '\'') )
a0d0e21e 1978 {
3280af22 1979 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1980 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1981 return start;
cd81e915 1982 start_force(PL_curforce);
5db06880
NC
1983 if (PL_madskills)
1984 curmad('X', newSVpvn(start,s-start));
463ee0b2 1985 if (token == METHOD) {
29595ff2 1986 s = SKIPSPACE1(s);
463ee0b2 1987 if (*s == '(')
3280af22 1988 PL_expect = XTERM;
463ee0b2 1989 else {
3280af22 1990 PL_expect = XOPERATOR;
463ee0b2 1991 }
79072805 1992 }
e74e6b3d 1993 if (PL_madskills)
63575281 1994 curmad('g', newSVpvs( "forced" ));
9ded7720 1995 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1996 = (OP*)newSVOP(OP_CONST,0,
1997 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1998 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1999 force_next(token);
2000 }
2001 return s;
2002}
2003
ffb4593c
NT
2004/*
2005 * S_force_ident
9cbb5ea2 2006 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2007 * text only contains the "foo" portion. The first argument is a pointer
2008 * to the "foo", and the second argument is the type symbol to prefix.
2009 * Forces the next token to be a "WORD".
9cbb5ea2 2010 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2011 */
2012
76e3520e 2013STATIC void
bfed75c6 2014S_force_ident(pTHX_ register const char *s, int kind)
79072805 2015{
97aff369 2016 dVAR;
7918f24d
NC
2017
2018 PERL_ARGS_ASSERT_FORCE_IDENT;
2019
c35e046a 2020 if (*s) {
90e5519e
NC
2021 const STRLEN len = strlen(s);
2022 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2023 start_force(PL_curforce);
9ded7720 2024 NEXTVAL_NEXTTOKE.opval = o;
79072805 2025 force_next(WORD);
748a9306 2026 if (kind) {
11343788 2027 o->op_private = OPpCONST_ENTERED;
55497cff
PP
2028 /* XXX see note in pp_entereval() for why we forgo typo
2029 warnings if the symbol must be introduced in an eval.
2030 GSAR 96-10-12 */
90e5519e
NC
2031 gv_fetchpvn_flags(s, len,
2032 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2033 : GV_ADD,
2034 kind == '$' ? SVt_PV :
2035 kind == '@' ? SVt_PVAV :
2036 kind == '%' ? SVt_PVHV :
a0d0e21e 2037 SVt_PVGV
90e5519e 2038 );
748a9306 2039 }
79072805
LW
2040 }
2041}
2042
1571675a
GS
2043NV
2044Perl_str_to_version(pTHX_ SV *sv)
2045{
2046 NV retval = 0.0;
2047 NV nshift = 1.0;
2048 STRLEN len;
cfd0369c 2049 const char *start = SvPV_const(sv,len);
9d4ba2ae 2050 const char * const end = start + len;
504618e9 2051 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2052
2053 PERL_ARGS_ASSERT_STR_TO_VERSION;
2054
1571675a 2055 while (start < end) {
ba210ebe 2056 STRLEN skip;
1571675a
GS
2057 UV n;
2058 if (utf)
9041c2e3 2059 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2060 else {
2061 n = *(U8*)start;
2062 skip = 1;
2063 }
2064 retval += ((NV)n)/nshift;
2065 start += skip;
2066 nshift *= 1000;
2067 }
2068 return retval;
2069}
2070
4e553d73 2071/*
ffb4593c
NT
2072 * S_force_version
2073 * Forces the next token to be a version number.
e759cc13
RGS
2074 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2075 * and if "guessing" is TRUE, then no new token is created (and the caller
2076 * must use an alternative parsing method).
ffb4593c
NT
2077 */
2078
76e3520e 2079STATIC char *
e759cc13 2080S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2081{
97aff369 2082 dVAR;
5f66b61c 2083 OP *version = NULL;
44dcb63b 2084 char *d;
5db06880
NC
2085#ifdef PERL_MAD
2086 I32 startoff = s - SvPVX(PL_linestr);
2087#endif
89bfa8cd 2088
7918f24d
NC
2089 PERL_ARGS_ASSERT_FORCE_VERSION;
2090
29595ff2 2091 s = SKIPSPACE1(s);
89bfa8cd 2092
44dcb63b 2093 d = s;
dd629d5b 2094 if (*d == 'v')
44dcb63b 2095 d++;
44dcb63b 2096 if (isDIGIT(*d)) {
e759cc13
RGS
2097 while (isDIGIT(*d) || *d == '_' || *d == '.')
2098 d++;
5db06880
NC
2099#ifdef PERL_MAD
2100 if (PL_madskills) {
cd81e915 2101 start_force(PL_curforce);
5db06880
NC
2102 curmad('X', newSVpvn(s,d-s));
2103 }
2104#endif
4e4da3ac 2105 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2106 SV *ver;
8d08d9ba
DG
2107#ifdef USE_LOCALE_NUMERIC
2108 char *loc = setlocale(LC_NUMERIC, "C");
2109#endif
6154021b 2110 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2111#ifdef USE_LOCALE_NUMERIC
2112 setlocale(LC_NUMERIC, loc);
2113#endif
6154021b 2114 version = pl_yylval.opval;
dd629d5b
GS
2115 ver = cSVOPx(version)->op_sv;
2116 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2117 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2118 SvNV_set(ver, str_to_version(ver));
1571675a 2119 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2120 }
89bfa8cd 2121 }
5db06880
NC
2122 else if (guessing) {
2123#ifdef PERL_MAD
2124 if (PL_madskills) {
cd81e915
NC
2125 sv_free(PL_nextwhite); /* let next token collect whitespace */
2126 PL_nextwhite = 0;
5db06880
NC
2127 s = SvPVX(PL_linestr) + startoff;
2128 }
2129#endif
e759cc13 2130 return s;
5db06880 2131 }
89bfa8cd
PP
2132 }
2133
5db06880
NC
2134#ifdef PERL_MAD
2135 if (PL_madskills && !version) {
cd81e915
NC
2136 sv_free(PL_nextwhite); /* let next token collect whitespace */
2137 PL_nextwhite = 0;
5db06880
NC
2138 s = SvPVX(PL_linestr) + startoff;
2139 }
2140#endif
89bfa8cd 2141 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2142 start_force(PL_curforce);
9ded7720 2143 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2144 force_next(WORD);
89bfa8cd 2145
e759cc13 2146 return s;
89bfa8cd
PP
2147}
2148
ffb4593c 2149/*
91152fc1
DG
2150 * S_force_strict_version
2151 * Forces the next token to be a version number using strict syntax rules.
2152 */
2153
2154STATIC char *
2155S_force_strict_version(pTHX_ char *s)
2156{
2157 dVAR;
2158 OP *version = NULL;
2159#ifdef PERL_MAD
2160 I32 startoff = s - SvPVX(PL_linestr);
2161#endif
2162 const char *errstr = NULL;
2163
2164 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2165
2166 while (isSPACE(*s)) /* leading whitespace */
2167 s++;
2168
2169 if (is_STRICT_VERSION(s,&errstr)) {
2170 SV *ver = newSV(0);
2171 s = (char *)scan_version(s, ver, 0);
2172 version = newSVOP(OP_CONST, 0, ver);
2173 }
4e4da3ac
Z
2174 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2175 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2176 {
91152fc1
DG
2177 PL_bufptr = s;
2178 if (errstr)
2179 yyerror(errstr); /* version required */
2180 return s;
2181 }
2182
2183#ifdef PERL_MAD
2184 if (PL_madskills && !version) {
2185 sv_free(PL_nextwhite); /* let next token collect whitespace */
2186 PL_nextwhite = 0;
2187 s = SvPVX(PL_linestr) + startoff;
2188 }
2189#endif
2190 /* NOTE: The parser sees the package name and the VERSION swapped */
2191 start_force(PL_curforce);
2192 NEXTVAL_NEXTTOKE.opval = version;
2193 force_next(WORD);
2194
2195 return s;
2196}
2197
2198/*
ffb4593c
NT
2199 * S_tokeq
2200 * Tokenize a quoted string passed in as an SV. It finds the next
2201 * chunk, up to end of string or a backslash. It may make a new
2202 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2203 * turns \\ into \.
2204 */
2205
76e3520e 2206STATIC SV *
cea2e8a9 2207S_tokeq(pTHX_ SV *sv)
79072805 2208{
97aff369 2209 dVAR;
79072805
LW
2210 register char *s;
2211 register char *send;
2212 register char *d;
b3ac6de7
IZ
2213 STRLEN len = 0;
2214 SV *pv = sv;
79072805 2215
7918f24d
NC
2216 PERL_ARGS_ASSERT_TOKEQ;
2217
79072805 2218 if (!SvLEN(sv))
b3ac6de7 2219 goto finish;
79072805 2220
a0d0e21e 2221 s = SvPV_force(sv, len);
21a311ee 2222 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2223 goto finish;
463ee0b2 2224 send = s + len;
79072805
LW
2225 while (s < send && *s != '\\')
2226 s++;
2227 if (s == send)
b3ac6de7 2228 goto finish;
79072805 2229 d = s;
be4731d2 2230 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2231 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2232 }
79072805
LW
2233 while (s < send) {
2234 if (*s == '\\') {
a0d0e21e 2235 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2236 s++; /* all that, just for this */
2237 }
2238 *d++ = *s++;
2239 }
2240 *d = '\0';
95a20fc0 2241 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2242 finish:
3280af22 2243 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2244 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2245 return sv;
2246}
2247
ffb4593c
NT
2248/*
2249 * Now come three functions related to double-quote context,
2250 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2251 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2252 * interact with PL_lex_state, and create fake ( ... ) argument lists
2253 * to handle functions and concatenation.
2254 * They assume that whoever calls them will be setting up a fake
2255 * join call, because each subthing puts a ',' after it. This lets
2256 * "lower \luPpEr"
2257 * become
2258 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2259 *
2260 * (I'm not sure whether the spurious commas at the end of lcfirst's
2261 * arguments and join's arguments are created or not).
2262 */
2263
2264/*
2265 * S_sublex_start
6154021b 2266 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2267 *
2268 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2269 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2270 *
2271 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2272 *
2273 * Everything else becomes a FUNC.
2274 *
2275 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2276 * had an OP_CONST or OP_READLINE). This just sets us up for a
2277 * call to S_sublex_push().
2278 */
2279
76e3520e 2280STATIC I32
cea2e8a9 2281S_sublex_start(pTHX)
79072805 2282{
97aff369 2283 dVAR;
6154021b 2284 register const I32 op_type = pl_yylval.ival;
79072805
LW
2285
2286 if (op_type == OP_NULL) {
6154021b 2287 pl_yylval.opval = PL_lex_op;
5f66b61c 2288 PL_lex_op = NULL;
79072805
LW
2289 return THING;
2290 }
2291 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2292 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2293
2294 if (SvTYPE(sv) == SVt_PVIV) {
2295 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2296 STRLEN len;
96a5add6 2297 const char * const p = SvPV_const(sv, len);
740cce10 2298 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2299 SvREFCNT_dec(sv);
2300 sv = nsv;
4e553d73 2301 }
6154021b 2302 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2303 PL_lex_stuff = NULL;
6f33ba73
RGS
2304 /* Allow <FH> // "foo" */
2305 if (op_type == OP_READLINE)
2306 PL_expect = XTERMORDORDOR;
79072805
LW
2307 return THING;
2308 }
e3f73d4e
RGS
2309 else if (op_type == OP_BACKTICK && PL_lex_op) {
2310 /* readpipe() vas overriden */
2311 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2312 pl_yylval.opval = PL_lex_op;
9b201d7d 2313 PL_lex_op = NULL;
e3f73d4e
RGS
2314 PL_lex_stuff = NULL;
2315 return THING;
2316 }
79072805 2317
3280af22 2318 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2319 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2320 PL_sublex_info.sub_op = PL_lex_op;
2321 PL_lex_state = LEX_INTERPPUSH;
55497cff 2322
3280af22
NIS
2323 PL_expect = XTERM;
2324 if (PL_lex_op) {
6154021b 2325 pl_yylval.opval = PL_lex_op;
5f66b61c 2326 PL_lex_op = NULL;
55497cff
PP
2327 return PMFUNC;
2328 }
2329 else
2330 return FUNC;
2331}
2332
ffb4593c
NT
2333/*
2334 * S_sublex_push
2335 * Create a new scope to save the lexing state. The scope will be
2336 * ended in S_sublex_done. Returns a '(', starting the function arguments
2337 * to the uc, lc, etc. found before.
2338 * Sets PL_lex_state to LEX_INTERPCONCAT.
2339 */
2340
76e3520e 2341STATIC I32
cea2e8a9 2342S_sublex_push(pTHX)
55497cff 2343{
27da23d5 2344 dVAR;
f46d017c 2345 ENTER;
55497cff 2346
3280af22 2347 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2348 SAVEBOOL(PL_lex_dojoin);
3280af22 2349 SAVEI32(PL_lex_brackets);
3280af22
NIS
2350 SAVEI32(PL_lex_casemods);
2351 SAVEI32(PL_lex_starts);
651b5b28 2352 SAVEI8(PL_lex_state);
7766f137 2353 SAVEVPTR(PL_lex_inpat);
98246f1e 2354 SAVEI16(PL_lex_inwhat);
57843af0 2355 SAVECOPLINE(PL_curcop);
3280af22 2356 SAVEPPTR(PL_bufptr);
8452ff4b 2357 SAVEPPTR(PL_bufend);
3280af22
NIS
2358 SAVEPPTR(PL_oldbufptr);
2359 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2360 SAVEPPTR(PL_last_lop);
2361 SAVEPPTR(PL_last_uni);
3280af22
NIS
2362 SAVEPPTR(PL_linestart);
2363 SAVESPTR(PL_linestr);
8edd5f42
RGS
2364 SAVEGENERICPV(PL_lex_brackstack);
2365 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2366
2367 PL_linestr = PL_lex_stuff;
a0714e2c 2368 PL_lex_stuff = NULL;
3280af22 2369
9cbb5ea2
GS
2370 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2371 = SvPVX(PL_linestr);
3280af22 2372 PL_bufend += SvCUR(PL_linestr);
bd61b366 2373 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2374 SAVEFREESV(PL_linestr);
2375
2376 PL_lex_dojoin = FALSE;
2377 PL_lex_brackets = 0;
a02a5408
JC
2378 Newx(PL_lex_brackstack, 120, char);
2379 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2380 PL_lex_casemods = 0;
2381 *PL_lex_casestack = '\0';
2382 PL_lex_starts = 0;
2383 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2384 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2385
2386 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2387 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2388 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2389 else
5f66b61c 2390 PL_lex_inpat = NULL;
79072805 2391
55497cff 2392 return '(';
79072805
LW
2393}
2394
ffb4593c
NT
2395/*
2396 * S_sublex_done
2397 * Restores lexer state after a S_sublex_push.
2398 */
2399
76e3520e 2400STATIC I32
cea2e8a9 2401S_sublex_done(pTHX)
79072805 2402{
27da23d5 2403 dVAR;
3280af22 2404 if (!PL_lex_starts++) {
396482e1 2405 SV * const sv = newSVpvs("");
9aa983d2
JH
2406 if (SvUTF8(PL_linestr))
2407 SvUTF8_on(sv);
3280af22 2408 PL_expect = XOPERATOR;
6154021b 2409 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2410 return THING;
2411 }
2412
3280af22
NIS
2413 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2414 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2415 return yylex();
79072805
LW
2416 }
2417
ffb4593c 2418 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
2419 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2420 PL_linestr = PL_lex_repl;
2421 PL_lex_inpat = 0;
2422 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2423 PL_bufend += SvCUR(PL_linestr);
bd61b366 2424 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2425 SAVEFREESV(PL_linestr);
2426 PL_lex_dojoin = FALSE;
2427 PL_lex_brackets = 0;
3280af22
NIS
2428 PL_lex_casemods = 0;
2429 *PL_lex_casestack = '\0';
2430 PL_lex_starts = 0;
25da4f38 2431 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2432 PL_lex_state = LEX_INTERPNORMAL;
2433 PL_lex_starts++;
e9fa98b2
HS
2434 /* we don't clear PL_lex_repl here, so that we can check later
2435 whether this is an evalled subst; that means we rely on the
2436 logic to ensure sublex_done() is called again only via the
2437 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2438 }
e9fa98b2 2439 else {
3280af22 2440 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2441 PL_lex_repl = NULL;
e9fa98b2 2442 }
79072805 2443 return ',';
ffed7fef
LW
2444 }
2445 else {
5db06880
NC
2446#ifdef PERL_MAD
2447 if (PL_madskills) {
cd81e915
NC
2448 if (PL_thiswhite) {
2449 if (!PL_endwhite)
6b29d1f5 2450 PL_endwhite = newSVpvs("");
cd81e915
NC
2451 sv_catsv(PL_endwhite, PL_thiswhite);
2452 PL_thiswhite = 0;
2453 }
2454 if (PL_thistoken)
76f68e9b 2455 sv_setpvs(PL_thistoken,"");
5db06880 2456 else
cd81e915 2457 PL_realtokenstart = -1;
5db06880
NC
2458 }
2459#endif
f46d017c 2460 LEAVE;
3280af22
NIS
2461 PL_bufend = SvPVX(PL_linestr);
2462 PL_bufend += SvCUR(PL_linestr);
2463 PL_expect = XOPERATOR;
09bef843 2464 PL_sublex_info.sub_inwhat = 0;
79072805 2465 return ')';
ffed7fef
LW
2466 }
2467}
2468
02aa26ce
NT
2469/*
2470 scan_const
2471
2472 Extracts a pattern, double-quoted string, or transliteration. This
2473 is terrifying code.
2474
94def140 2475 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2476 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2477 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2478
94def140
ST
2479 Returns a pointer to the character scanned up to. If this is
2480 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2481 successfully parsed), will leave an OP for the substring scanned
6154021b 2482 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2483 by looking at the next characters herself.
2484
02aa26ce
NT
2485 In patterns:
2486 backslashes:
ff3f963a 2487 constants: \N{NAME} only
02aa26ce
NT
2488 case and quoting: \U \Q \E
2489 stops on @ and $, but not for $ as tail anchor
2490
2491 In transliterations:
2492 characters are VERY literal, except for - not at the start or end
94def140
ST
2493 of the string, which indicates a range. If the range is in bytes,
2494 scan_const expands the range to the full set of intermediate
2495 characters. If the range is in utf8, the hyphen is replaced with
2496 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2497
2498 In double-quoted strings:
2499 backslashes:
2500 double-quoted style: \r and \n
ff3f963a 2501 constants: \x31, etc.
94def140 2502 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2503 case and quoting: \U \Q \E
2504 stops on @ and $
2505
2506 scan_const does *not* construct ops to handle interpolated strings.
2507 It stops processing as soon as it finds an embedded $ or @ variable
2508 and leaves it to the caller to work out what's going on.
2509
94def140
ST
2510 embedded arrays (whether in pattern or not) could be:
2511 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2512
2513 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2514
2515 $ in pattern could be $foo or could be tail anchor. Assumption:
2516 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2517 followed by one of "()| \r\n\t"
02aa26ce
NT
2518
2519 \1 (backreferences) are turned into $1
2520
2521 The structure of the code is
2522 while (there's a character to process) {
94def140
ST
2523 handle transliteration ranges
2524 skip regexp comments /(?#comment)/ and codes /(?{code})/
2525 skip #-initiated comments in //x patterns
2526 check for embedded arrays
02aa26ce
NT
2527 check for embedded scalars
2528 if (backslash) {
94def140 2529 deprecate \1 in substitution replacements
02aa26ce
NT
2530 handle string-changing backslashes \l \U \Q \E, etc.
2531 switch (what was escaped) {
94def140 2532 handle \- in a transliteration (becomes a literal -)
ff3f963a 2533 if a pattern and not \N{, go treat as regular character
94def140
ST
2534 handle \132 (octal characters)
2535 handle \x15 and \x{1234} (hex characters)
ff3f963a 2536 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
ST
2537 handle \cV (control characters)
2538 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2539 } (end switch)
77a135fe 2540 continue
02aa26ce 2541 } (end if backslash)
77a135fe 2542 handle regular character
02aa26ce 2543 } (end while character to read)
4e553d73 2544
02aa26ce
NT
2545*/
2546
76e3520e 2547STATIC char *
cea2e8a9 2548S_scan_const(pTHX_ char *start)
79072805 2549{
97aff369 2550 dVAR;
3280af22 2551 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2552 SV *sv = newSV(send - start); /* sv for the constant. See
2553 note below on sizing. */
02aa26ce
NT
2554 register char *s = start; /* start of the constant */
2555 register char *d = SvPVX(sv); /* destination for copies */
2556 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2557 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2558 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2559 I32 this_utf8 = UTF; /* Is the source string assumed
2560 to be UTF8? But, this can
2561 show as true when the source
2562 isn't utf8, as for example
2563 when it is entirely composed
2564 of hex constants */
2565
2566 /* Note on sizing: The scanned constant is placed into sv, which is
2567 * initialized by newSV() assuming one byte of output for every byte of
2568 * input. This routine expects newSV() to allocate an extra byte for a
2569 * trailing NUL, which this routine will append if it gets to the end of
2570 * the input. There may be more bytes of input than output (eg., \N{LATIN
2571 * CAPITAL LETTER A}), or more output than input if the constant ends up
2572 * recoded to utf8, but each time a construct is found that might increase
2573 * the needed size, SvGROW() is called. Its size parameter each time is
2574 * based on the best guess estimate at the time, namely the length used so
2575 * far, plus the length the current construct will occupy, plus room for
2576 * the trailing NUL, plus one byte for every input byte still unscanned */
2577
012bcf8d 2578 UV uv;
4c3a8340
ST
2579#ifdef EBCDIC
2580 UV literal_endpoint = 0;
e294cc5d 2581 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2582#endif
012bcf8d 2583
7918f24d
NC
2584 PERL_ARGS_ASSERT_SCAN_CONST;
2585
2b9d42f0
NIS
2586 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2587 /* If we are doing a trans and we know we want UTF8 set expectation */
2588 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2589 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2590 }
2591
2592
79072805 2593 while (s < send || dorange) {
ff3f963a 2594
02aa26ce 2595 /* get transliterations out of the way (they're most literal) */
3280af22 2596 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2597 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2598 if (dorange) {
1ba5c669
JH
2599 I32 i; /* current expanded character */
2600 I32 min; /* first character in range */
2601 I32 max; /* last character in range */
02aa26ce 2602
e294cc5d
JH
2603#ifdef EBCDIC
2604 UV uvmax = 0;
2605#endif
2606
2607 if (has_utf8
2608#ifdef EBCDIC
2609 && !native_range
2610#endif
2611 ) {
9d4ba2ae 2612 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2613 char *e = d++;
2614 while (e-- > c)
2615 *(e + 1) = *e;
25716404 2616 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2617 /* mark the range as done, and continue */
2618 dorange = FALSE;
2619 didrange = TRUE;
2620 continue;
2621 }
2b9d42f0 2622
95a20fc0 2623 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2624#ifdef EBCDIC
2625 SvGROW(sv,
2626 SvLEN(sv) + (has_utf8 ?
2627 (512 - UTF_CONTINUATION_MARK +
2628 UNISKIP(0x100))
2629 : 256));
2630 /* How many two-byte within 0..255: 128 in UTF-8,
2631 * 96 in UTF-8-mod. */
2632#else
9cbb5ea2 2633 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2634#endif
9cbb5ea2 2635 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2636#ifdef EBCDIC
2637 if (has_utf8) {
2638 int j;
2639 for (j = 0; j <= 1; j++) {
2640 char * const c = (char*)utf8_hop((U8*)d, -1);
2641 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2642 if (j)
2643 min = (U8)uv;
2644 else if (uv < 256)
2645 max = (U8)uv;
2646 else {
2647 max = (U8)0xff; /* only to \xff */
2648 uvmax = uv; /* \x{100} to uvmax */
2649 }
2650 d = c; /* eat endpoint chars */
2651 }
2652 }
2653 else {
2654#endif
2655 d -= 2; /* eat the first char and the - */
2656 min = (U8)*d; /* first char in range */
2657 max = (U8)d[1]; /* last char in range */
2658#ifdef EBCDIC
2659 }
2660#endif
8ada0baa 2661
c2e66d9e 2662 if (min > max) {
01ec43d0 2663 Perl_croak(aTHX_
d1573ac7 2664 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2665 (char)min, (char)max);
c2e66d9e
GS
2666 }
2667
c7f1f016 2668#ifdef EBCDIC
4c3a8340
ST
2669 if (literal_endpoint == 2 &&
2670 ((isLOWER(min) && isLOWER(max)) ||
2671 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2672 if (isLOWER(min)) {
2673 for (i = min; i <= max; i++)
2674 if (isLOWER(i))
db42d148 2675 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2676 } else {
2677 for (i = min; i <= max; i++)
2678 if (isUPPER(i))
db42d148 2679 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2680 }
2681 }
2682 else
2683#endif
2684 for (i = min; i <= max; i++)
e294cc5d
JH
2685#ifdef EBCDIC
2686 if (has_utf8) {
2687 const U8 ch = (U8)NATIVE_TO_UTF(i);
2688 if (UNI_IS_INVARIANT(ch))
2689 *d++ = (U8)i;
2690 else {
2691 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2692 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2693 }
2694 }
2695 else
2696#endif
2697 *d++ = (char)i;
2698
2699#ifdef EBCDIC
2700 if (uvmax) {
2701 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2702 if (uvmax > 0x101)
2703 *d++ = (char)UTF_TO_NATIVE(0xff);
2704 if (uvmax > 0x100)
2705 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2706 }
2707#endif
02aa26ce
NT
2708
2709 /* mark the range as done, and continue */
79072805 2710 dorange = FALSE;
01ec43d0 2711 didrange = TRUE;
4c3a8340
ST
2712#ifdef EBCDIC
2713 literal_endpoint = 0;
2714#endif
79072805 2715 continue;
4e553d73 2716 }
02aa26ce
NT
2717
2718 /* range begins (ignore - as first or last char) */
79072805 2719 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2720 if (didrange) {
1fafa243 2721 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2722 }
e294cc5d
JH
2723 if (has_utf8
2724#ifdef EBCDIC
2725 && !native_range
2726#endif
2727 ) {
25716404 2728 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2729 s++;
2730 continue;
2731 }
79072805
LW
2732 dorange = TRUE;
2733 s++;
01ec43d0
GS
2734 }
2735 else {
2736 didrange = FALSE;
4c3a8340
ST
2737#ifdef EBCDIC
2738 literal_endpoint = 0;
e294cc5d 2739 native_range = TRUE;
4c3a8340 2740#endif
01ec43d0 2741 }
79072805 2742 }
02aa26ce
NT
2743
2744 /* if we get here, we're not doing a transliteration */
2745
0f5d15d6
IZ
2746 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2747 except for the last char, which will be done separately. */
3280af22 2748 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2749 if (s[2] == '#') {
e994fd66 2750 while (s+1 < send && *s != ')')
db42d148 2751 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2752 }
2753 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2754 || (s[2] == '?' && s[3] == '{'))
155aba94 2755 {
cc6b7395 2756 I32 count = 1;
0f5d15d6 2757 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2758 char c;
2759
d9f97599
GS
2760 while (count && (c = *regparse)) {
2761 if (c == '\\' && regparse[1])
2762 regparse++;
4e553d73 2763 else if (c == '{')
cc6b7395 2764 count++;
4e553d73 2765 else if (c == '}')
cc6b7395 2766 count--;
d9f97599 2767 regparse++;
cc6b7395 2768 }
e994fd66 2769 if (*regparse != ')')
5bdf89e7 2770 regparse--; /* Leave one char for continuation. */
0f5d15d6 2771 while (s < regparse)
db42d148 2772 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2773 }
748a9306 2774 }
02aa26ce
NT
2775
2776 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2777 else if (*s == '#' && PL_lex_inpat &&
2778 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2779 while (s+1 < send && *s != '\n')
db42d148 2780 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2781 }
02aa26ce 2782
5d1d4326 2783 /* check for embedded arrays
da6eedaa 2784 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2785 */
1749ea0d
ST
2786 else if (*s == '@' && s[1]) {
2787 if (isALNUM_lazy_if(s+1,UTF))
2788 break;
2789 if (strchr(":'{$", s[1]))
2790 break;
2791 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2792 break; /* in regexp, neither @+ nor @- are interpolated */
2793 }
02aa26ce
NT
2794
2795 /* check for embedded scalars. only stop if we're sure it's a
2796 variable.
2797 */
79072805 2798 else if (*s == '$') {
3280af22 2799 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2800 break;
77772344 2801 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2802 if (s[1] == '\\') {
2803 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2804 "Possible unintended interpolation of $\\ in regex");
77772344 2805 }
79072805 2806 break; /* in regexp, $ might be tail anchor */
77772344 2807 }
79072805 2808 }
02aa26ce 2809
2b9d42f0
NIS
2810 /* End of else if chain - OP_TRANS rejoin rest */
2811
02aa26ce 2812 /* backslashes */
79072805 2813 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2814 char* e; /* Can be used for ending '}', etc. */
2815
79072805 2816 s++;
02aa26ce 2817
7d0fc23c
KW
2818 /* warn on \1 - \9 in substitution replacements, but note that \11
2819 * is an octal; and \19 is \1 followed by '9' */
3280af22 2820 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2821 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2822 {
a2a5de95 2823 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2824 *--s = '$';
2825 break;
2826 }
02aa26ce
NT
2827
2828 /* string-change backslash escapes */
3280af22 2829 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2830 --s;
2831 break;
2832 }
ff3f963a
KW
2833 /* In a pattern, process \N, but skip any other backslash escapes.
2834 * This is because we don't want to translate an escape sequence
2835 * into a meta symbol and have the regex compiler use the meta
2836 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2837 * in spite of this, we do have to process \N here while the proper
2838 * charnames handler is in scope. See bugs #56444 and #62056.
2839 * There is a complication because \N in a pattern may also stand
2840 * for 'match a non-nl', and not mean a charname, in which case its
2841 * processing should be deferred to the regex compiler. To be a
2842 * charname it must be followed immediately by a '{', and not look
2843 * like \N followed by a curly quantifier, i.e., not something like
2844 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2845 * quantifier */
2846 else if (PL_lex_inpat
2847 && (*s != 'N'
2848 || s[1] != '{'
2849 || regcurly(s + 1)))
2850 {
cc74c5bd
ST
2851 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2852 goto default_action;
2853 }
02aa26ce 2854
79072805 2855 switch (*s) {
02aa26ce
NT
2856
2857 /* quoted - in transliterations */
79072805 2858 case '-':
3280af22 2859 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2860 *d++ = *s++;
2861 continue;
2862 }
2863 /* FALL THROUGH */
2864 default:
11b8faa4 2865 {
a2a5de95
NC
2866 if ((isALPHA(*s) || isDIGIT(*s)))
2867 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2868 "Unrecognized escape \\%c passed through",
2869 *s);
11b8faa4 2870 /* default action is to copy the quoted character */
f9a63242 2871 goto default_action;
11b8faa4 2872 }
02aa26ce 2873
632403cc 2874 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2875 case '0': case '1': case '2': case '3':
2876 case '4': case '5': case '6': case '7':
ba210ebe 2877 {
53305cf1
NC
2878 I32 flags = 0;
2879 STRLEN len = 3;
77a135fe 2880 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2881 s += len;
2882 }
012bcf8d 2883 goto NUM_ESCAPE_INSERT;
02aa26ce 2884
f0a2b745
KW
2885 /* eg. \o{24} indicates the octal constant \024 */
2886 case 'o':
2887 {
2888 STRLEN len;
454155d9 2889 const char* error;
f0a2b745 2890
454155d9 2891 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2892 s += len;
454155d9 2893 if (! valid) {
f0a2b745
KW
2894 yyerror(error);
2895 continue;
2896 }
2897 goto NUM_ESCAPE_INSERT;
2898 }
2899
77a135fe 2900 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2901 case 'x':
a0ed51b3
LW
2902 ++s;
2903 if (*s == '{') {
9d4ba2ae 2904 char* const e = strchr(s, '}');
a4c04bdc
NC
2905 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2906 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2907 STRLEN len;
355860ce 2908
53305cf1 2909 ++s;
adaeee49 2910 if (!e) {
a0ed51b3 2911 yyerror("Missing right brace on \\x{}");
355860ce 2912 continue;
ba210ebe 2913 }
53305cf1 2914 len = e - s;
77a135fe 2915 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2916 s = e + 1;
a0ed51b3
LW
2917 }
2918 else {
ba210ebe 2919 {
53305cf1 2920 STRLEN len = 2;
a4c04bdc 2921 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2922 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2923 s += len;
2924 }
012bcf8d
GS
2925 }
2926
2927 NUM_ESCAPE_INSERT:
ff3f963a
KW
2928 /* Insert oct or hex escaped character. There will always be
2929 * enough room in sv since such escapes will be longer than any
2930 * UTF-8 sequence they can end up as, except if they force us
2931 * to recode the rest of the string into utf8 */
ba7cea30 2932
77a135fe 2933 /* Here uv is the ordinal of the next character being added in
ff3f963a 2934 * unicode (converted from native). */
77a135fe 2935 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2936 if (!has_utf8 && uv > 255) {
77a135fe
KW
2937 /* Might need to recode whatever we have accumulated so
2938 * far if it contains any chars variant in utf8 or
2939 * utf-ebcdic. */
2940
2941 SvCUR_set(sv, d - SvPVX_const(sv));
2942 SvPOK_on(sv);
2943 *d = '\0';
77a135fe 2944 /* See Note on sizing above. */
7bf79863
KW
2945 sv_utf8_upgrade_flags_grow(sv,
2946 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2947 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2948 d = SvPVX(sv) + SvCUR(sv);
2949 has_utf8 = TRUE;
012bcf8d
GS
2950 }
2951
77a135fe
KW
2952 if (has_utf8) {
2953 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2954 if (PL_lex_inwhat == OP_TRANS &&
2955 PL_sublex_info.sub_op) {
2956 PL_sublex_info.sub_op->op_private |=
2957 (PL_lex_repl ? OPpTRANS_FROM_UTF
2958 : OPpTRANS_TO_UTF);
f9a63242 2959 }
e294cc5d
JH
2960#ifdef EBCDIC
2961 if (uv > 255 && !dorange)
2962 native_range = FALSE;
2963#endif
012bcf8d 2964 }
a0ed51b3 2965 else {
012bcf8d 2966 *d++ = (char)uv;
a0ed51b3 2967 }
012bcf8d
GS
2968 }
2969 else {
c4d5f83a 2970 *d++ = (char) uv;
a0ed51b3 2971 }
79072805 2972 continue;
02aa26ce 2973
4a2d328f 2974 case 'N':
ff3f963a
KW
2975 /* In a non-pattern \N must be a named character, like \N{LATIN
2976 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
2977 * mean to match a non-newline. For non-patterns, named
2978 * characters are converted to their string equivalents. In
2979 * patterns, named characters are not converted to their
2980 * ultimate forms for the same reasons that other escapes
2981 * aren't. Instead, they are converted to the \N{U+...} form
2982 * to get the value from the charnames that is in effect right
2983 * now, while preserving the fact that it was a named character
2984 * so that the regex compiler knows this */
2985
2986 /* This section of code doesn't generally use the
2987 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
2988 * a close examination of this macro and determined it is a
2989 * no-op except on utfebcdic variant characters. Every
2990 * character generated by this that would normally need to be
2991 * enclosed by this macro is invariant, so the macro is not
2992 * needed, and would complicate use of copy(). There are other
2993 * parts of this file where the macro is used inconsistently,
2994 * but are saved by it being a no-op */
2995
2996 /* The structure of this section of code (besides checking for
2997 * errors and upgrading to utf8) is:
2998 * Further disambiguate between the two meanings of \N, and if
2999 * not a charname, go process it elsewhere
0a96133f
KW
3000 * If of form \N{U+...}, pass it through if a pattern;
3001 * otherwise convert to utf8
3002 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3003 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3004
3005 /* Here, s points to the 'N'; the test below is guaranteed to
3006 * succeed if we are being called on a pattern as we already
3007 * know from a test above that the next character is a '{'.
3008 * On a non-pattern \N must mean 'named sequence, which
3009 * requires braces */
3010 s++;
3011 if (*s != '{') {
3012 yyerror("Missing braces on \\N{}");
3013 continue;
3014 }
3015 s++;
3016
0a96133f 3017 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3018 if (! (e = strchr(s, '}'))) {
3019 if (! PL_lex_inpat) {
5777a3f7 3020 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3021 } else {
3022 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3023 }
0a96133f 3024 continue;
ff3f963a 3025 }
cddc7ef4 3026
ff3f963a 3027 /* Here it looks like a named character */
cddc7ef4 3028
ff3f963a
KW
3029 if (PL_lex_inpat) {
3030
3031 /* XXX This block is temporary code. \N{} implies that the
3032 * pattern is to have Unicode semantics, and therefore
3033 * currently has to be encoded in utf8. By putting it in
3034 * utf8 now, we save a whole pass in the regular expression
3035 * compiler. Once that code is changed so Unicode
3036 * semantics doesn't necessarily have to be in utf8, this
3037 * block should be removed */
3038 if (!has_utf8) {
77a135fe 3039 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3040 SvPOK_on(sv);
e4f3eed8 3041 *d = '\0';
77a135fe 3042 /* See Note on sizing above. */
7bf79863 3043 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3044 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3045 /* 5 = '\N{' + cur char + NUL */
3046 (STRLEN)(send - s) + 5);
f08d6ad9 3047 d = SvPVX(sv) + SvCUR(sv);
89491803 3048 has_utf8 = TRUE;
ff3f963a
KW
3049 }
3050 }
423cee85 3051
ff3f963a
KW
3052 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3053 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3054 | PERL_SCAN_DISALLOW_PREFIX;
3055 STRLEN len;
3056
3057 /* For \N{U+...}, the '...' is a unicode value even on
3058 * EBCDIC machines */
3059 s += 2; /* Skip to next char after the 'U+' */
3060 len = e - s;
3061 uv = grok_hex(s, &len, &flags, NULL);
3062 if (len == 0 || len != (STRLEN)(e - s)) {
3063 yyerror("Invalid hexadecimal number in \\N{U+...}");
3064 s = e + 1;
3065 continue;
3066 }
3067
3068 if (PL_lex_inpat) {
3069
3070 /* Pass through to the regex compiler unchanged. The
3071 * reason we evaluated the number above is to make sure
0a96133f 3072 * there wasn't a syntax error. */
ff3f963a
KW
3073 s -= 5; /* Include the '\N{U+' */
3074 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3075 d += e - s + 1;
3076 }
3077 else { /* Not a pattern: convert the hex to string */
3078
3079 /* If destination is not in utf8, unconditionally
3080 * recode it to be so. This is because \N{} implies
3081 * Unicode semantics, and scalars have to be in utf8
3082 * to guarantee those semantics */
3083 if (! has_utf8) {
3084 SvCUR_set(sv, d - SvPVX_const(sv));
3085 SvPOK_on(sv);
3086 *d = '\0';
3087 /* See Note on sizing above. */
3088 sv_utf8_upgrade_flags_grow(
3089 sv,
3090 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3091 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3092 d = SvPVX(sv) + SvCUR(sv);
3093 has_utf8 = TRUE;
3094 }
3095
3096 /* Add the string to the output */
3097 if (UNI_IS_INVARIANT(uv)) {
3098 *d++ = (char) uv;
3099 }
3100 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3101 }
3102 }
3103 else { /* Here is \N{NAME} but not \N{U+...}. */
3104
3105 SV *res; /* result from charnames */
3106 const char *str; /* the string in 'res' */
3107 STRLEN len; /* its length */
3108
3109 /* Get the value for NAME */
3110 res = newSVpvn(s, e - s);
3111 res = new_constant( NULL, 0, "charnames",
3112 /* includes all of: \N{...} */
3113 res, NULL, s - 3, e - s + 4 );
3114
3115 /* Most likely res will be in utf8 already since the
3116 * standard charnames uses pack U, but a custom translator
3117 * can leave it otherwise, so make sure. XXX This can be
3118 * revisited to not have charnames use utf8 for characters
3119 * that don't need it when regexes don't have to be in utf8
3120 * for Unicode semantics. If doing so, remember EBCDIC */
3121 sv_utf8_upgrade(res);
3122 str = SvPV_const(res, len);
3123
3124 /* Don't accept malformed input */
3125 if (! is_utf8_string((U8 *) str, len)) {
3126 yyerror("Malformed UTF-8 returned by \\N");
3127 }
3128 else if (PL_lex_inpat) {
3129
3130 if (! len) { /* The name resolved to an empty string */
3131 Copy("\\N{}", d, 4, char);
3132 d += 4;
3133 }
3134 else {
3135 /* In order to not lose information for the regex
3136 * compiler, pass the result in the specially made
3137 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3138 * the code points in hex of each character
3139 * returned by charnames */
3140
3141 const char *str_end = str + len;
3142 STRLEN char_length; /* cur char's byte length */
3143 STRLEN output_length; /* and the number of bytes
3144 after this is translated
3145 into hex digits */
3146 const STRLEN off = d - SvPVX_const(sv);
3147
3148 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3149 * max('U+', '.'); and 1 for NUL */
3150 char hex_string[2 * UTF8_MAXBYTES + 5];
3151
3152 /* Get the first character of the result. */
3153 U32 uv = utf8n_to_uvuni((U8 *) str,
3154 len,
3155 &char_length,
3156 UTF8_ALLOW_ANYUV);
3157
3158 /* The call to is_utf8_string() above hopefully
3159 * guarantees that there won't be an error. But
3160 * it's easy here to make sure. The function just
3161 * above warns and returns 0 if invalid utf8, but
3162 * it can also return 0 if the input is validly a
3163 * NUL. Disambiguate */
3164 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3165 uv = UNICODE_REPLACEMENT;
3166 }
3167
3168 /* Convert first code point to hex, including the
3169 * boiler plate before it */
3170 sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3171 output_length = strlen(hex_string);
3172
3173 /* Make sure there is enough space to hold it */
3174 d = off + SvGROW(sv, off
3175 + output_length
3176 + (STRLEN)(send - e)
3177 + 2); /* '}' + NUL */
3178 /* And output it */
3179 Copy(hex_string, d, output_length, char);
3180 d += output_length;
3181
3182 /* For each subsequent character, append dot and
3183 * its ordinal in hex */
3184 while ((str += char_length) < str_end) {
3185 const STRLEN off = d - SvPVX_const(sv);
3186 U32 uv = utf8n_to_uvuni((U8 *) str,
3187 str_end - str,
3188 &char_length,
3189 UTF8_ALLOW_ANYUV);
3190 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3191 uv = UNICODE_REPLACEMENT;
3192 }
3193
3194 sprintf(hex_string, ".%X", (unsigned int) uv);
3195 output_length = strlen(hex_string);
3196
3197 d = off + SvGROW(sv, off
3198 + output_length
3199 + (STRLEN)(send - e)
3200 + 2); /* '}' + NUL */
3201 Copy(hex_string, d, output_length, char);
3202 d += output_length;
3203 }
3204
3205 *d++ = '}'; /* Done. Add the trailing brace */
3206 }
3207 }
3208 else { /* Here, not in a pattern. Convert the name to a
3209 * string. */
3210
3211 /* If destination is not in utf8, unconditionally
3212 * recode it to be so. This is because \N{} implies
3213 * Unicode semantics, and scalars have to be in utf8
3214 * to guarantee those semantics */
3215 if (! has_utf8) {
3216 SvCUR_set(sv, d - SvPVX_const(sv));
3217 SvPOK_on(sv);
3218 *d = '\0';
3219 /* See Note on sizing above. */
3220 sv_utf8_upgrade_flags_grow(sv,
3221 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3222 len + (STRLEN)(send - s) + 1);
3223 d = SvPVX(sv) + SvCUR(sv);
3224 has_utf8 = TRUE;
3225 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3226
3227 /* See Note on sizing above. (NOTE: SvCUR() is not
3228 * set correctly here). */
3229 const STRLEN off = d - SvPVX_const(sv);
3230 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3231 }
3232 Copy(str, d, len, char);
3233 d += len;
423cee85 3234 }
423cee85 3235 SvREFCNT_dec(res);
cb233ae3
KW
3236
3237 /* Deprecate non-approved name syntax */
3238 if (ckWARN_d(WARN_DEPRECATED)) {
3239 bool problematic = FALSE;
3240 char* i = s;
3241
3242 /* For non-ut8 input, look to see that the first
3243 * character is an alpha, then loop through the rest
3244 * checking that each is a continuation */
3245 if (! this_utf8) {
3246 if (! isALPHAU(*i)) problematic = TRUE;
3247 else for (i = s + 1; i < e; i++) {
3248 if (isCHARNAME_CONT(*i)) continue;
3249 problematic = TRUE;
3250 break;
3251 }
3252 }
3253 else {
3254 /* Similarly for utf8. For invariants can check
3255 * directly. We accept anything above the latin1
3256 * range because it is immaterial to Perl if it is
3257 * correct or not, and is expensive to check. But
3258 * it is fairly easy in the latin1 range to convert
3259 * the variants into a single character and check
3260 * those */
3261 if (UTF8_IS_INVARIANT(*i)) {
3262 if (! isALPHAU(*i)) problematic = TRUE;
3263 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3264 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3265 *(i+1)))))
3266 {
3267 problematic = TRUE;
3268 }
3269 }
3270 if (! problematic) for (i = s + UTF8SKIP(s);
3271 i < e;
3272 i+= UTF8SKIP(i))
3273 {
3274 if (UTF8_IS_INVARIANT(*i)) {
3275 if (isCHARNAME_CONT(*i)) continue;
3276 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3277 continue;
3278 } else if (isCHARNAME_CONT(
3279 UNI_TO_NATIVE(
3280 UTF8_ACCUMULATE(*i, *(i+1)))))
3281 {
3282 continue;
3283 }
3284 problematic = TRUE;
3285 break;
3286 }
3287 }
3288 if (problematic) {
6e1bad6c
KW
3289 /* The e-i passed to the final %.*s makes sure that
3290 * should the trailing NUL be missing that this
3291 * print won't run off the end of the string */
cb233ae3 3292 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3293 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3294 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3295 }
3296 }
3297 } /* End \N{NAME} */
ff3f963a
KW
3298#ifdef EBCDIC
3299 if (!dorange)
3300 native_range = FALSE; /* \N{} is defined to be Unicode */
3301#endif
3302 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3303 continue;
3304
02aa26ce 3305 /* \c is a control character */
79072805
LW
3306 case 'c':
3307 s++;
961ce445 3308 if (s < send) {
f9d13529 3309 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3310 }
961ce445
RGS
3311 else {
3312 yyerror("Missing control char name in \\c");
3313 }
79072805 3314 continue;
02aa26ce
NT
3315
3316 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3317 case 'b':
db42d148 3318 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3319 break;
3320 case 'n':
db42d148 3321 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3322 break;
3323 case 'r':
db42d148 3324 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3325 break;
3326 case 'f':
db42d148 3327 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3328 break;
3329 case 't':
db42d148 3330 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3331 break;
34a3fe2a 3332 case 'e':
db42d148 3333 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3334 break;
3335 case 'a':
db42d148 3336 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3337 break;
02aa26ce
NT
3338 } /* end switch */
3339
79072805
LW
3340 s++;
3341 continue;
02aa26ce 3342 } /* end if (backslash) */
4c3a8340
ST
3343#ifdef EBCDIC
3344 else
3345 literal_endpoint++;
3346#endif
02aa26ce 3347
f9a63242 3348 default_action:
77a135fe
KW
3349 /* If we started with encoded form, or already know we want it,
3350 then encode the next character */
3351 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3352 STRLEN len = 1;
77a135fe
KW
3353
3354
3355 /* One might think that it is wasted effort in the case of the
3356 * source being utf8 (this_utf8 == TRUE) to take the next character
3357 * in the source, convert it to an unsigned value, and then convert
3358 * it back again. But the source has not been validated here. The
3359 * routine that does the conversion checks for errors like
3360 * malformed utf8 */
3361
5f66b61c
AL
3362 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3363 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3364 if (!has_utf8) {
3365 SvCUR_set(sv, d - SvPVX_const(sv));
3366 SvPOK_on(sv);
3367 *d = '\0';
77a135fe 3368 /* See Note on sizing above. */
7bf79863
KW
3369 sv_utf8_upgrade_flags_grow(sv,
3370 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3371 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3372 d = SvPVX(sv) + SvCUR(sv);
3373 has_utf8 = TRUE;
3374 } else if (need > len) {
3375 /* encoded value larger than old, may need extra space (NOTE:
3376 * SvCUR() is not set correctly here). See Note on sizing
3377 * above. */
9d4ba2ae 3378 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3379 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3380 }
77a135fe
KW
3381 s += len;
3382
5f66b61c 3383 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3384#ifdef EBCDIC
3385 if (uv > 255 && !dorange)
3386 native_range = FALSE;
3387#endif
2b9d42f0
NIS
3388 }
3389 else {
3390 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3391 }
02aa26ce
NT
3392 } /* while loop to process each character */
3393
3394 /* terminate the string and set up the sv */
79072805 3395 *d = '\0';
95a20fc0 3396 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3397 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3398 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3399
79072805 3400 SvPOK_on(sv);
9f4817db 3401 if (PL_encoding && !has_utf8) {
d0063567
DK
3402 sv_recode_to_utf8(sv, PL_encoding);
3403 if (SvUTF8(sv))
3404 has_utf8 = TRUE;
9f4817db 3405 }
2b9d42f0 3406 if (has_utf8) {
7e2040f0 3407 SvUTF8_on(sv);
2b9d42f0 3408 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3409 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3410 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3411 }
3412 }
79072805 3413
02aa26ce 3414 /* shrink the sv if we allocated more than we used */
79072805 3415 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3416 SvPV_shrink_to_cur(sv);
79072805 3417 }
02aa26ce 3418
6154021b 3419 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3420 if (s > PL_bufptr) {
eb0d8d16
NC
3421 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3422 const char *const key = PL_lex_inpat ? "qr" : "q";
3423 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3424 const char *type;
3425 STRLEN typelen;
3426
3427 if (PL_lex_inwhat == OP_TRANS) {
3428 type = "tr";
3429 typelen = 2;
3430 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3431 type = "s";
3432 typelen = 1;
3433 } else {
3434 type = "qq";
3435 typelen = 2;
3436 }
3437
3438 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3439 type, typelen);
3440 }
6154021b 3441 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3442 } else
8990e307 3443 SvREFCNT_dec(sv);
79072805
LW
3444 return s;
3445}
3446
ffb4593c
NT
3447/* S_intuit_more
3448 * Returns TRUE if there's more to the expression (e.g., a subscript),
3449 * FALSE otherwise.
ffb4593c
NT
3450 *
3451 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3452 *
3453 * ->[ and ->{ return TRUE
3454 * { and [ outside a pattern are always subscripts, so return TRUE
3455 * if we're outside a pattern and it's not { or [, then return FALSE
3456 * if we're in a pattern and the first char is a {
3457 * {4,5} (any digits around the comma) returns FALSE
3458 * if we're in a pattern and the first char is a [
3459 * [] returns FALSE
3460 * [SOMETHING] has a funky algorithm to decide whether it's a
3461 * character class or not. It has to deal with things like
3462 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3463 * anything else returns TRUE
3464 */
3465
9cbb5ea2
GS
3466/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3467
76e3520e 3468STATIC int
cea2e8a9 3469S_intuit_more(pTHX_ register char *s)
79072805 3470{
97aff369 3471 dVAR;
7918f24d
NC
3472
3473 PERL_ARGS_ASSERT_INTUIT_MORE;
3474
3280af22 3475 if (PL_lex_brackets)
79072805
LW
3476 return TRUE;
3477 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3478 return TRUE;
3479 if (*s != '{' && *s != '[')
3480 return FALSE;
3280af22 3481 if (!PL_lex_inpat)
79072805
LW
3482 return TRUE;
3483
3484 /* In a pattern, so maybe we have {n,m}. */
3485 if (*s == '{') {
3486 s++;
3487 if (!isDIGIT(*s))
3488 return TRUE;
3489 while (isDIGIT(*s))
3490 s++;
3491 if (*s == ',')
3492 s++;
3493 while (isDIGIT(*s))
3494 s++;
3495 if (*s == '}')
3496 return FALSE;
3497 return TRUE;
3498
3499 }
3500
3501 /* On the other hand, maybe we have a character class */
3502
3503 s++;
3504 if (*s == ']' || *s == '^')
3505 return FALSE;
3506 else {
ffb4593c 3507 /* this is terrifying, and it works */
79072805
LW
3508 int weight = 2; /* let's weigh the evidence */
3509 char seen[256];
f27ffc4a 3510 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3511 const char * const send = strchr(s,']');
3280af22 3512 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3513
3514 if (!send) /* has to be an expression */
3515 return TRUE;
3516
3517 Zero(seen,256,char);
3518 if (*s == '$')
3519 weight -= 3;
3520 else if (isDIGIT(*s)) {
3521 if (s[1] != ']') {
3522 if (isDIGIT(s[1]) && s[2] == ']')
3523 weight -= 10;
3524 }
3525 else
3526 weight -= 100;
3527 }
3528 for (; s < send; s++) {
3529 last_un_char = un_char;
3530 un_char = (unsigned char)*s;
3531 switch (*s) {
3532 case '@':
3533 case '&':
3534 case '$':
3535 weight -= seen[un_char] * 10;
7e2040f0 3536 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3537 int len;
8903cb82 3538 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3539 len = (int)strlen(tmpbuf);
3540 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3541 weight -= 100;
3542 else
3543 weight -= 10;
3544 }
3545 else if (*s == '$' && s[1] &&
93a17b20
LW
3546 strchr("[#!%*<>()-=",s[1])) {
3547 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3548 weight -= 10;
3549 else
3550 weight -= 1;
3551 }
3552 break;
3553 case '\\':
3554 un_char = 254;
3555 if (s[1]) {
93a17b20 3556 if (strchr("wds]",s[1]))
79072805 3557 weight += 100;
10edeb5d 3558 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3559 weight += 1;
93a17b20 3560 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3561 weight += 40;
3562 else if (isDIGIT(s[1])) {
3563 weight += 40;
3564 while (s[1] && isDIGIT(s[1]))
3565 s++;
3566 }
3567 }
3568 else
3569 weight += 100;
3570 break;
3571 case '-':
3572 if (s[1] == '\\')
3573 weight += 50;
93a17b20 3574 if (strchr("aA01! ",last_un_char))
79072805 3575 weight += 30;
93a17b20 3576 if (strchr("zZ79~",s[1]))
79072805 3577 weight += 30;
f27ffc4a
GS
3578 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3579 weight -= 5; /* cope with negative subscript */
79072805
LW
3580 break;
3581 default:
3792a11b
NC
3582 if (!isALNUM(last_un_char)
3583 && !(last_un_char == '$' || last_un_char == '@'
3584 || last_un_char == '&')
3585 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3586 char *d = tmpbuf;
3587 while (isALPHA(*s))
3588 *d++ = *s++;
3589 *d = '\0';
5458a98a 3590 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3591 weight -= 150;
3592 }
3593 if (un_char == last_un_char + 1)
3594 weight += 5;
3595 weight -= seen[un_char];
3596 break;
3597 }
3598 seen[un_char]++;
3599 }
3600 if (weight >= 0) /* probably a character class */
3601 return FALSE;
3602 }
3603
3604 return TRUE;
3605}
ffed7fef 3606
ffb4593c
NT
3607/*
3608 * S_intuit_method
3609 *
3610 * Does all the checking to disambiguate
3611 * foo bar
3612 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3613 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3614 *
3615 * First argument is the stuff after the first token, e.g. "bar".
3616 *
3617 * Not a method if bar is a filehandle.
3618 * Not a method if foo is a subroutine prototyped to take a filehandle.
3619 * Not a method if it's really "Foo $bar"
3620 * Method if it's "foo $bar"
3621 * Not a method if it's really "print foo $bar"
3622 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3623 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3624 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3625 * =>
3626 */
3627
76e3520e 3628STATIC int
62d55b22 3629S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3630{
97aff369 3631 dVAR;
a0d0e21e 3632 char *s = start + (*start == '$');
3280af22 3633 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3634 STRLEN len;
3635 GV* indirgv;
5db06880
NC
3636#ifdef PERL_MAD
3637 int soff;
3638#endif
a0d0e21e 3639
7918f24d
NC
3640 PERL_ARGS_ASSERT_INTUIT_METHOD;
3641
a0d0e21e 3642 if (gv) {
62d55b22 3643 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3644 return 0;
62d55b22
NC
3645 if (cv) {
3646 if (SvPOK(cv)) {
3647 const char *proto = SvPVX_const(cv);
3648 if (proto) {
3649 if (*proto == ';')
3650 proto++;
3651 if (*proto == '*')
3652 return 0;
3653 }
b6c543e3
IZ
3654 }
3655 } else
c35e046a 3656 gv = NULL;
a0d0e21e 3657 }
8903cb82 3658 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3659 /* start is the beginning of the possible filehandle/object,
3660 * and s is the end of it
3661 * tmpbuf is a copy of it
3662 */
3663
a0d0e21e 3664 if (*start == '$') {
3ef1310e
RGS
3665 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3666 isUPPER(*PL_tokenbuf))
a0d0e21e 3667 return 0;
5db06880
NC
3668#ifdef PERL_MAD
3669 len = start - SvPVX(PL_linestr);
3670#endif
29595ff2 3671 s = PEEKSPACE(s);
f0092767 3672#ifdef PERL_MAD
5db06880
NC
3673 start = SvPVX(PL_linestr) + len;
3674#endif
3280af22
NIS
3675 PL_bufptr = start;
3676 PL_expect = XREF;
a0d0e21e
LW
3677 return *s == '(' ? FUNCMETH : METHOD;
3678 }
5458a98a 3679 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3680 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3681 len -= 2;
3682 tmpbuf[len] = '\0';
5db06880
NC
3683#ifdef PERL_MAD
3684 soff = s - SvPVX(PL_linestr);
3685#endif
c3e0f903
GS
3686 goto bare_package;
3687 }
90e5519e 3688 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3689 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3690 return 0;
3691 /* filehandle or package name makes it a method */
da51bb9b 3692 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3693#ifdef PERL_MAD
3694 soff = s - SvPVX(PL_linestr);
3695#endif
29595ff2 3696 s = PEEKSPACE(s);
3280af22 3697 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3698 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3699 bare_package:
cd81e915 3700 start_force(PL_curforce);
9ded7720 3701 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3702 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3703 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3704 if (PL_madskills)
3705 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3706 PL_expect = XTERM;
a0d0e21e 3707 force_next(WORD);
3280af22 3708 PL_bufptr = s;
5db06880
NC
3709#ifdef PERL_MAD
3710 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3711#endif
a0d0e21e
LW
3712 return *s == '(' ? FUNCMETH : METHOD;
3713 }
3714 }
3715 return 0;
3716}
3717
16d20bd9 3718/* Encoded script support. filter_add() effectively inserts a
4e553d73 3719 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3720 * Note that the filter function only applies to the current source file
3721 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3722 *
3723 * The datasv parameter (which may be NULL) can be used to pass
3724 * private data to this instance of the filter. The filter function
3725 * can recover the SV using the FILTER_DATA macro and use it to
3726 * store private buffers and state information.
3727 *
3728 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3729 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3730 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3731 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3732 * private use must be set using malloc'd pointers.
3733 */
16d20bd9
AD
3734
3735SV *
864dbfa3 3736Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3737{
97aff369 3738 dVAR;
f4c556ac 3739 if (!funcp)
a0714e2c 3740 return NULL;
f4c556ac 3741
5486870f
DM
3742 if (!PL_parser)
3743 return NULL;
3744
3280af22
NIS
3745 if (!PL_rsfp_filters)
3746 PL_rsfp_filters = newAV();
16d20bd9 3747 if (!datasv)
561b68a9 3748 datasv = newSV(0);
862a34c6 3749 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3750 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3751 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3752 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3753 FPTR2DPTR(void *, IoANY(datasv)),
3754 SvPV_nolen(datasv)));
3280af22
NIS
3755 av_unshift(PL_rsfp_filters, 1);
3756 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3757 return(datasv);
3758}
4e553d73 3759
16d20bd9
AD
3760
3761/* Delete most recently added instance of this filter function. */
a0d0e21e 3762void
864dbfa3 3763Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3764{
97aff369 3765 dVAR;
e0c19803 3766 SV *datasv;
24801a4b 3767
7918f24d
NC
3768 PERL_ARGS_ASSERT_FILTER_DEL;
3769
33073adb 3770#ifdef DEBUGGING
55662e27
JH
3771 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3772 FPTR2DPTR(void*, funcp)));
33073adb 3773#endif
5486870f 3774 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3775 return;